I am trying to make use of the self-pipe trick to get a portable implementation (across Solaris, MacOSX, Linux, BSDs) of my application.
So in addition to the two pipes for stderr
and stdout
which I am using to fetch the output of the forked child (I use no exec
in the child, the child executes the same code as the parent), I have the pipe for signals (enum {SIG_PIPE, STDOUT_PIPE, STDERR_PIPE, MAX_PIPE}
provides the symbolic names).
O_NONBLOCK
is set on the pipes, prior to callinghandle_child_output()
.- the child has the write ends of the
stderr
andstdout
pipes and continues usingprintf()
and friends, effectively writing to each respective pipe (setvbuf
is used to turn off buffering inside the child).
The code to follow is a bit lengthy, as is the self-pipe trick in general. It is already the condensed form. So let me try to explain what should happen and where it gets stuck.
I need to collect the exit status and I have to be able to find out whether the child terminated with a signal or via exit. The handling of those conditions is in place elsewhere. What's relevant is that handle_child_output()
returns the exit code of the child inside the int
pointed to by pstatus
.
The outer do
-while
loop in handle_child_output()
will set up the FD_SET
to use in the select
call. It adds the signal pipe read-end plus the read-ends of the stderr
and stdout
pipe from the child.
Then the if(FD_ISSET(fds[SIG_PIPE], &rd))
checks whether the signal pipe contains anything new and drains it, handling any potential signals.
The for
loop after that loops through the remaining file descriptors to see whether anything is pending and then handles by parroting what it gets on the parent's respective stdio
channel.
The read call in that second loop is where it gets stalled.
The symptom is that the parent is stuck in the read call:
(gdb) bt 1
#0 0x00007f2daaa9e530 in __read_nocancel () from /lib64/libpthread.so.0
It's as if there is a race condition between reading the signal pipe and the other pipes. The child seems to have exited already by the time the file descriptors get inspected and consequently ends up as a zombie, because the parent is still stuck in the read()
and will never reach one of the wait()
calls.
What am I doing wrong? Would it be legit to add an if(exitloop) break;
before the for
loop to break out of the outer do
-while
loop? It seems to me that this could leave unread content in the pipes, no?
#define __MAX__(x,y) ((x) > (y) ? (x) : (y))
int childpid;
typedef enum { READ, WRITE, BOTH } pipefd_type_t;
static void avoid_zombie(int* pstatus)
{
int temp;
pstatus = (pstatus) ? pstatus : &temp;
if(0 > childpid && 0 != childpid) {
kill(childpid, SIGKILL); /* kill the child */
}
wait(pstatus); /* wait to avoid lingering zombies */
}
static void select_signal_handler(int sig)
{
int savedErrno = errno;
const int sigw = sig;
if(0 > write(sigpipe[WRITE], &sigw, sizeof(sigw))) {
avoid_zombie(NULL);
_exit(EXIT_FAILURE); /* actual code also shows error etc */
}
errno = savedErrno;
}
void handle_child_output(int *pstatus)
{
enum {SIG_PIPE, STDOUT_PIPE, STDERR_PIPE, MAX_PIPE};
fd_set rd;
int ready, n = 0, fds[MAX_PIPE];
size_t i, exitloop, sigint;
struct sigaction sa;
struct {
int sig;
struct sigaction oldsa;
} old_sigactions[3];
old_sigactions[0].sig = SIGINT;
old_sigactions[1].sig = SIGCHLD;
old_sigactions[2].sig = SIGQUIT;
/* fds have been initialized at this point (read-ends) */
for(i = 0; i < sizeof(old_sigactions)/sizeof(old_sigactions[0]); i++) {
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_RESTART;
sa.sa_handler = select_signal_handler;
if(0 > sigaction(old_sigactions[i].sig, &sa, &old_sigactions[i].oldsa)) {
avoid_zombie(pstatus);
_exit(EXIT_FAILURE); /* actual code also shows error etc */
}
}
do {
sigint = 0;
exitloop = 0;
FD_ZERO(&rd);
for(i = 0; i < MAX_PIPE; i++) {
if(fds[i] >= FD_SETSIZE) {
avoid_zombie(pstatus);
_exit(EXIT_FAILURE); /* actual code also shows error etc */
}
FD_SET(fds[i], &rd);
n = __MAX__(n, fds[i]);
}
while(0 > (ready = select(n+1, &rd, NULL, NULL, NULL)))
if (EINTR == errno) continue;
if(0 > ready) {
avoid_zombie(pstatus);
_exit(EXIT_FAILURE); /* actual code also shows error etc */
}
if(FD_ISSET(fds[SIG_PIPE], &rd)) {
do { /* drain the signal pipe */
int sig = -1;
if(0 > read(fds[SIG_PIPE], &sig, sizeof(sig))) {
if(EAGAIN == errno)
break;
else {
avoid_zombie(pstatus);
_exit(EXIT_FAILURE); /* actual code also shows error etc */
}
}
switch(sig) {
case SIGINT:
if(0 > childpid && 0 != childpid) {
kill(childpid, SIGINT); /* pass to child */
wait(pstatus);
}
sigint++; exitloop++;
break;
case SIGCHLD:
exitloop++;
break;
}
} while(1);
}
for(i = STDOUT_PIPE; i < MAX_PIPE; i++) {
if(FD_ISSET(fds[i], &rd)) {
do {
const size_t max_tries = 5;
char buf[0x1000];
ssize_t bytesWritten, bytesRead = read(fds[i], buf, sizeof(buf));
if(0 == bytesRead)
break;
int outchan = STDERR_FILENO;
if(0 > bytesRead) {
/* Retry (inner do-while loop) if we get an EAGAIN */
if(EAGAIN == errno) break;
/* fatal error */
avoid_zombie(pstatus);
_exit(EXIT_FAILURE); /* actual code also shows error etc */
}
if(STDOUT_PIPE == i)
outchan = STDOUT_FILENO;
bytesWritten = write(outchan, buf, bytesRead);
if(0 > bytesWritten) {
/* Retry if we get an EAGAIN */
if(EAGAIN == errno) {
size_t tries;
for(tries = 0; tries < max_tries; tries++) {
bytesWritten = write(outchan, buf, bytesRead);
if((0 > bytesWritten) && (EAGAIN == errno))
continue;
break;
}
}
if(0 > bytesWritten) {
avoid_zombie(pstatus);
_exit(EXIT_FAILURE); /* actual code also shows error etc */
}
}
if(bytesWritten < bytesRead) {
const ssize_t bytesToWrite = bytesRead - bytesWritten;
/* try to write the remainder */
ssize_t bytesWritten2 = write(outchan, &buf[bytesWritten], bytesToWrite);
if((0 > bytesWritten2) || (bytesWritten2 != bytesToWrite)) {
/* fatal error */
avoid_zombie(pstatus);
_exit(EXIT_FAILURE); /* actual code also shows error etc */
}
}
} while(1);
}
}
} while(0 == exitloop);
/* restore old signal handlers */
for(i = 0; i < sizeof(old_sigactions)/sizeof(old_sigactions[0]); i++) {
if (sigaction(old_sigactions[i].sig, &old_sigactions[i].oldsa, NULL) == -1) {
avoid_zombie(pstatus);
_exit(EXIT_FAILURE); /* actual code also shows error etc */
}
}
avoid_zombie(pstatus);
}