Doug Owens <owens2@llnl.gov> wrote:
I am trying to use HAM to restart applications in my system.
I can get it to work for a single application, but when I try to register
2 applications for restart, HAM will ignore the
death of the registered apps until I use hamctrl to kill HAM, at which
point it will restart the registered apps. I am using
ham_attach after obtaining process PIDs from the /proc filesystem (QNX
cookbook). Everything in /proc/ham looks OK but HAM will not respond to
the death of the registered processes. Anyone have any ideas?
ham_attach() will only work for processes that:
- die abnormally (would create a dump, should dumper be running)
- are in session 1 (generally system daemons)
I’ve include a (fairly) simple program with this response that attaches
to pipe and random (two fairly inocuous servers to kill off & restart)
and does both a norify & restart action on their death. Both servers are
restarted “immediately”.
This was tested against 6.3.0 SP1 running on x86.
-David
/*
- ham_attach_other.c
-
-
This module contains sample source code for attaching
-
an already running process to ham.
-
-
It will attach random, the process that maintains /dev/random.
-
- random will be restarted automatically
-
- we will get a pulse everytime random dies
-
- the condition and action will be persistent
-
It will also attach pipe the same way.
-
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <libgen.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/neutrino.h>
#include <sys/netmgr.h>
#include <ha/ham.h>
#define PULSE_CODE_RANDOM_DIED (_PULSE_CODE_MINAVAIL + 4)
#define PULSE_CODE_PIPE_DIED (_PULSE_CODE_MINAVAIL + 5)
void options (int argc, char **argv);
int get_random_pid (void);
int get_pipe_pid( void );
int optv;
int chid;
int monitor( int pid, char *ent_name, char *cmd, int pcode )
{
ham_entity_t *hent;
ham_condition_t *hcond;
ham_action_t *hact;
/* tell ham to monitor pid */
hent = ham_attach( ent_name, ND_LOCAL_NODE, pid, NULL, 0 );
if( NULL == hent )
{
perror(“Attaching to ham”);
return 0;
}
/* create a death condition, and make it survive restarts */
hcond = ham_condition( hent, CONDDEATH, “death”, HREARMAFTERRESTART );
if( NULL == hcond )
{
perror(“Specifying ham condition”);
return 0;
}
/* have ham restart random every time it dies */
hact = ham_action_restart( hcond, “restart”, cmd, HREARMAFTERRESTART );
if( NULL == hact )
{
perror(“Specifying restart action”);
return 0;
}
ham_action_handle_free( hact );
/* have ham send us a pulse every time random dies */
hact = ham_action_notify_pulse( hcond, “pulse”, ND_LOCAL_NODE, getpid(), chid,
pcode, 0, HREARMAFTERRESTART );
if( NULL == hact )
{
perror(“Specifying restart action”);
return 0;
}
/* cleanup */
ham_action_handle_free( hact );
ham_condition_handle_free( hcond );
ham_entity_handle_free( hent );
return 1;
}
int main( int argc, char *argv )
{
int pid; / pid for server */
options( argc, argv );
chid = ChannelCreate( 0 );
if( ham_connect( 0 ) == -1 )
{
perror(“Connecting to ham”);
printf(“Is ham running?\n”);
return EXIT_FAILURE;
}
/* find the pid of the server that maintains /dev/random */
pid = get_random_pid();
if( ! monitor( pid, “random”, “/usr/sbin/random -t”, PULSE_CODE_RANDOM_DIED ))
{
printf(“failed to monitor random\n”);
}
pid = get_pipe_pid();
if( ! monitor( pid, “pipe”, “/sbin/pipe”, PULSE_CODE_PIPE_DIED ))
{
printf(“failed to monitor pipe\n”);
}
ham_disconnect( 0 );
/* loop waiting for our pulses */
while(1)
{
struct _pulse pulse;
int rcvid;
rcvid = MsgReceive( chid, &pulse, sizeof(pulse), NULL );
if (0 == rcvid )
{
switch( pulse.code )
{
case PULSE_CODE_RANDOM_DIED:
printf(“random died\n”);
break;
case PULSE_CODE_PIPE_DIED:
printf(“pipe died\n”);
break;
default:
printf(“unexpected pulse\n”);
}
} else if (-1 == rcvid )
{
perror(“MsgReceive”);
} else
{
if (optv)
printf(“Unexpected message.\n”);
MsgError(rcvid, ENOSYS);
}
}
}
/*
- options
-
- This routine handles the command line options.
- We support:
-
-v verbose operation
*/
void
options (int argc, char **argv)
{
int opt;
int i;
optv = 0;
i = 0;
while ((opt = getopt (argc, argv, “v”)) != -1)
{
switch (opt)
{
case ‘v’:
optv = 1;
break;
}
}
}
/*
- get_random_pid
-
- Finds the process id for the server that maintains /dev/random
-
*/
int get_random_pid()
{
int fd;
struct _server_info info;
fd = open( “/dev/random”, O_RDONLY );
if( -1 == fd )
{
perror(“opening /dev/random”);
printf(“Is random running? Please run random.\n”);
exit( EXIT_FAILURE );
}
ConnectServerInfo( getpid(), fd, &info );
close(fd);
if (optv)
printf(“random has pid %d\n”, info.pid );
return( info.pid );
}
int get_pipe_pid( void )
{
int fd;
struct _server_info info;
fd = open( “/dev/pipe”, O_RDONLY );
if( -1 == fd )
{
perror(“opening /dev/pipe”);
printf(“Is random running? Please run random.\n”);
exit( EXIT_FAILURE );
}
ConnectServerInfo( getpid(), fd, &info );
close(fd);
if (optv)
printf(“pipe has pid %d\n”, info.pid );
return( info.pid );
}
-David
David Gibbs
QNX Training Services
dagibbs@qnx.com