I am trying to create a process that manage some other process in the way that if a child die then the parent restart the process and the process that depend from it.
The problem is that I notice that if I create a tree structure of process when I restart a process in the middle of this structure I am unable to be signaled when new child process terminates.
I write an example; suppose that we have 3 process, grandparent, parent and child. Grandparent fork and start parent that fork and start child (I put the code at the end of this post). Now if I kill child everything works well, child is restarted correctly.
The problem occurs if I kill parent... The grandparent restart parent that restart child, but if I kill child the process remain in the Zombie state and the SIGCHLD is not delivered to the parent process.
In other words:
- Start grandparent process and wait that all 3 processes have been up
- Kill parent process and wait that grandparent restart parent that restart child
- now kill child process, the process remain in the zombie state.
I'm not able to understand this behavior... I have read a tons of example and documentation about signal and wait, try to reset default handler before the fork in parent and grandparent, but nothing seem to work... Here is the code sample...
grandparent.cpp
#include <cstdio>
#include <string>
#include <cstring>
#include <stdlib.h>
#include <signal.h>
#include <wait.h>
using namespace std;
void startProcess(string processFile);
void childDieHandler(int sig, siginfo_t *child_info, void *context);
FILE *logFile;
int currentChildPid;
int main(int argc, char** argv)
{
currentChildPid = 0;
logFile = stdout;
daemon(1,1);
struct sigaction sa;
bzero(&sa, sizeof(sa));
sa.sa_sigaction = childDieHandler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_SIGINFO;
sigaction(SIGCHLD, &sa, NULL);
startProcess("parent");
while(true) {
sleep(60);
}
return 0;
}
void startProcess(string processFile)
{
fprintf(logFile, "\nGP:Starting new process %s\n",processFile.c_str());
// Get process field and start a new process via fork + execl
int pid = fork();
if (pid == -1){
fprintf(logFile,"GP:*** FORK ERROR on process %s !!!\n",processFile.c_str());
fflush(logFile);
return;
}
// New child process
if (pid == 0) {
string execString = get_current_dir_name()+(string)"/"+processFile;
fprintf(logFile, "GP: %s \n",execString.c_str());
execl(execString.c_str(), processFile.c_str(), NULL);
fprintf(logFile, "GP:*** ERROR on execv for process %s\n",processFile.c_str());
fflush(logFile);
exit(1);
} else {
// Parent process
fprintf(logFile, "GP:New process %s pid is %d .\n", processFile.c_str(), pid);
fflush(logFile);
currentChildPid = pid;
sleep(2);
}
}
// Intercept a signal SIGCHLD
void childDieHandler(int sig, siginfo_t *child_info, void *context){
int status;
pid_t childPid;
while((childPid = waitpid(-1,&status, WNOHANG)) > 0) {
int pid = (int) childPid;
fprintf(logFile,"GP:*** PROCESS KILLED [pid %d]\n",pid);
sigset_t set;
sigpending(&set);
if(sigismember(&set, SIGCHLD)){
fprintf(logFile, "GP: SIGCHLD is pending or blocked!!!!\n");
fflush(logFile);
}
fflush(logFile);
// identify exited process and then restart it
if(currentChildPid == childPid){
// kill any child
system("killall child");
fprintf(logFile,"GP: Restarting parent process...\n");
fflush(logFile);
startProcess("parent");
}
}
fprintf(logFile,"GP:End of childDieHandler()... [%d]\n\n",(int)childPid);
fflush(logFile);
}
parent.cpp
#include <cstdio>
#include <string>
#include <cstring>
#include <stdlib.h>
#include <signal.h>
#include <wait.h>
using namespace std;
void startProcess(string processFile);
void childDieHandler(int sig, siginfo_t *child_info, void *context);
FILE *logFile;
int currentChildPid;
int main(int argc, char** argv)
{
currentChildPid = 0;
logFile = stdout;
struct sigaction sa;
bzero(&sa, sizeof(sa));
sa.sa_sigaction = childDieHandler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_SIGINFO;
sigaction(SIGCHLD, &sa, NULL);
startProcess("child");
while(true) {
sleep(60);
}
return 0;
}
void startProcess(string processFile)
{
fprintf(logFile, "\nP : Starting new process %s\n",processFile.c_str());
// Get process field and start a new process via fork + execl
int pid = fork();
if (pid == -1){
fprintf(logFile,"P : *** FORK ERROR on process %s !!!\n",processFile.c_s开发者_C百科tr());
fflush(logFile);
return;
}
// New child process
if (pid == 0) {
string execString = get_current_dir_name()+(string)"/"+processFile;
execl(execString.c_str(), processFile.c_str(), NULL);
fprintf(logFile, "P : *** ERROR on execv for process %s\n",processFile.c_str());
fflush(logFile);
exit(1);
} else {
// Parent process
fprintf(logFile, "P : New process %s pid is %d .\n", processFile.c_str(), pid);
fflush(logFile);
currentChildPid = pid;
sleep(2);
}
}
// Intercept a signal SIGCHLD
void childDieHandler(int sig, siginfo_t *child_info, void *context){
int status;
pid_t childPid;
while((childPid = waitpid(-1,&status, WNOHANG)) > 0) {
int pid = (int) childPid;
fprintf(logFile,"P : *** PROCESS KILLED [pid %d]\n",pid);
sigset_t set;
sigpending(&set);
if(sigismember(&set, SIGCHLD)){
fprintf(logFile, "P : SIGCHLD is pending or blocked!!!!\n");
fflush(logFile);
}
fflush(logFile);
// identify exited process and then restart it
if(currentChildPid == childPid){
fprintf(logFile,"P : Restarting child process...\n");
fflush(logFile);
startProcess("child");
}
}
fprintf(logFile,"P : End of childDieHandler()... [%d]\n\n",(int)childPid);
fflush(logFile);
}
child.cpp
#include <cstdio>
#include <string>
#include <cstring>
int main(int argc, char** argv)
{
printf("\nC : I'm born...\n\n");
while(true) {
sleep(60);
}
return 0;
}
Well, I have a guess...
Inside the signal handler, the SIGCHLD
signal is blocked (i.e., it is a member of the process's signal mask).
So when the grandparent calls execl
from inside the signal handler, the new parent starts up with SIGCHLD
blocked. Thus it never sees the signal and never waits for the new child.
Try calling sigprocmask
at the beginning of parent.cpp in order to (a) verify this theory and (b) unblock SIGCHLD.
精彩评论