Does HAM work?

I am trying to use HAM to restart applications in my system.
I can get it to work for a single application, but when I try to register
2 applications for restart, HAM will ignore the
death of the registered apps until I use hamctrl to kill HAM, at which
point it will restart the registered apps. I am using
ham_attach after obtaining process PIDs from the /proc filesystem (QNX
cookbook). Everything in /proc/ham looks OK but HAM will not respond to
the death of the registered processes. Anyone have any ideas?

Thanks,
Doug Owens
owens2@llnl.gov

Doug Owens <owens2@llnl.gov> wrote:

I am trying to use HAM to restart applications in my system.
I can get it to work for a single application, but when I try to register
2 applications for restart, HAM will ignore the
death of the registered apps until I use hamctrl to kill HAM, at which
point it will restart the registered apps. I am using
ham_attach after obtaining process PIDs from the /proc filesystem (QNX
cookbook). Everything in /proc/ham looks OK but HAM will not respond to
the death of the registered processes. Anyone have any ideas?

ham_attach() will only work for processes that:

  1. die abnormally (would create a dump, should dumper be running)
  2. are in session 1 (generally system daemons)

I’ve include a (fairly) simple program with this response that attaches
to pipe and random (two fairly inocuous servers to kill off & restart)
and does both a norify & restart action on their death. Both servers are
restarted “immediately”.

This was tested against 6.3.0 SP1 running on x86.

-David

/*

  • ham_attach_other.c
  •  This module contains sample source code for attaching
    
  •  an already running process to ham.
    
  •  It will attach random, the process that maintains /dev/random.
    
  •     - random will be restarted automatically
    
  •     - we will get a pulse everytime random dies
    
  •     - the condition and action will be persistent
    
  •  It will also attach pipe the same way.
    

*/

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <libgen.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/neutrino.h>
#include <sys/netmgr.h>
#include <ha/ham.h>

#define PULSE_CODE_RANDOM_DIED (_PULSE_CODE_MINAVAIL + 4)
#define PULSE_CODE_PIPE_DIED (_PULSE_CODE_MINAVAIL + 5)

void options (int argc, char **argv);
int get_random_pid (void);
int get_pipe_pid( void );

int optv;
int chid;

int monitor( int pid, char *ent_name, char *cmd, int pcode )
{
ham_entity_t *hent;
ham_condition_t *hcond;
ham_action_t *hact;

/* tell ham to monitor pid */
hent = ham_attach( ent_name, ND_LOCAL_NODE, pid, NULL, 0 );
if( NULL == hent )
{
perror(“Attaching to ham”);
return 0;
}

/* create a death condition, and make it survive restarts */
hcond = ham_condition( hent, CONDDEATH, “death”, HREARMAFTERRESTART );
if( NULL == hcond )
{
perror(“Specifying ham condition”);
return 0;
}

/* have ham restart random every time it dies */
hact = ham_action_restart( hcond, “restart”, cmd, HREARMAFTERRESTART );
if( NULL == hact )
{
perror(“Specifying restart action”);
return 0;
}
ham_action_handle_free( hact );

/* have ham send us a pulse every time random dies */
hact = ham_action_notify_pulse( hcond, “pulse”, ND_LOCAL_NODE, getpid(), chid,
pcode, 0, HREARMAFTERRESTART );
if( NULL == hact )
{
perror(“Specifying restart action”);
return 0;
}

/* cleanup */
ham_action_handle_free( hact );
ham_condition_handle_free( hcond );
ham_entity_handle_free( hent );
return 1;
}


int main( int argc, char *argv )
{
int pid; /
pid for server */



options( argc, argv );

chid = ChannelCreate( 0 );


if( ham_connect( 0 ) == -1 )
{
perror(“Connecting to ham”);
printf(“Is ham running?\n”);
return EXIT_FAILURE;
}

/* find the pid of the server that maintains /dev/random */
pid = get_random_pid();
if( ! monitor( pid, “random”, “/usr/sbin/random -t”, PULSE_CODE_RANDOM_DIED ))
{
printf(“failed to monitor random\n”);
}

pid = get_pipe_pid();
if( ! monitor( pid, “pipe”, “/sbin/pipe”, PULSE_CODE_PIPE_DIED ))
{
printf(“failed to monitor pipe\n”);
}

ham_disconnect( 0 );

/* loop waiting for our pulses */
while(1)
{
struct _pulse pulse;
int rcvid;

rcvid = MsgReceive( chid, &pulse, sizeof(pulse), NULL );
if (0 == rcvid )
{
switch( pulse.code )
{
case PULSE_CODE_RANDOM_DIED:
printf(“random died\n”);
break;
case PULSE_CODE_PIPE_DIED:
printf(“pipe died\n”);
break;

default:
printf(“unexpected pulse\n”);

}

} else if (-1 == rcvid )
{
perror(“MsgReceive”);

} else
{
if (optv)
printf(“Unexpected message.\n”);
MsgError(rcvid, ENOSYS);
}
}

}



/*

  • options
  • This routine handles the command line options.
  • We support:
  •  -v      verbose operation
    

*/

void
options (int argc, char **argv)
{
int opt;
int i;

optv = 0;

i = 0;
while ((opt = getopt (argc, argv, “v”)) != -1)
{
switch (opt)
{
case ‘v’:
optv = 1;
break;
}
}

}

/*

  • get_random_pid
  • Finds the process id for the server that maintains /dev/random

*/

int get_random_pid()
{
int fd;
struct _server_info info;

fd = open( “/dev/random”, O_RDONLY );
if( -1 == fd )
{
perror(“opening /dev/random”);
printf(“Is random running? Please run random.\n”);
exit( EXIT_FAILURE );
}

ConnectServerInfo( getpid(), fd, &info );
close(fd);

if (optv)
printf(“random has pid %d\n”, info.pid );

return( info.pid );
}

int get_pipe_pid( void )
{
int fd;
struct _server_info info;

fd = open( “/dev/pipe”, O_RDONLY );
if( -1 == fd )
{
perror(“opening /dev/pipe”);
printf(“Is random running? Please run random.\n”);
exit( EXIT_FAILURE );
}

ConnectServerInfo( getpid(), fd, &info );
close(fd);

if (optv)
printf(“pipe has pid %d\n”, info.pid );

return( info.pid );
}

-David

David Gibbs
QNX Training Services
dagibbs@qnx.com

Thanks for the info. Is there a way to determine a procesess’s
Session ID? I can use pidin sess and when a process
is started by ham it does show up in session one. This
seems to indicate to me that Ham can only deal with these
‘daemon’ procesesses. If it is possible to determine
(or change) a running process’s session ID this would
be nice to know since ham will let you register ANY process
without complaining. I have seen the procmgr_daemon() info
but it would be nice to use this without modifying the
applications if possible.

Thanks,
Doug Owens

David Gibbs wrote:

Doug Owens <> owens2@llnl.gov> > wrote:
I am trying to use HAM to restart applications in my system.
I can get it to work for a single application, but when I try to register
2 applications for restart, HAM will ignore the
death of the registered apps until I use hamctrl to kill HAM, at which
point it will restart the registered apps. I am using
ham_attach after obtaining process PIDs from the /proc filesystem (QNX
cookbook). Everything in /proc/ham looks OK but HAM will not respond to
the death of the registered processes. Anyone have any ideas?

ham_attach() will only work for processes that:

  1. die abnormally (would create a dump, should dumper be running)
  2. are in session 1 (generally system daemons)

I’ve include a (fairly) simple program with this response that attaches
to pipe and random (two fairly inocuous servers to kill off & restart)
and does both a norify & restart action on their death. Both servers are
restarted “immediately”.

This was tested against 6.3.0 SP1 running on x86.

-David

/*

  • ham_attach_other.c
  •  This module contains sample source code for attaching
    
  •  an already running process to ham.
    
  •  It will attach random, the process that maintains /dev/random.
    
  •     - random will be restarted automatically
    
  •     - we will get a pulse everytime random dies
    
  •     - the condition and action will be persistent
    
  •  It will also attach pipe the same way.
    

*/

#include <stdio.h
#include <unistd.h
#include <stdlib.h
#include <libgen.h
#include <errno.h
#include <fcntl.h
#include <sys/neutrino.h
#include <sys/netmgr.h
#include <ha/ham.h

#define PULSE_CODE_RANDOM_DIED (_PULSE_CODE_MINAVAIL + 4)
#define PULSE_CODE_PIPE_DIED (_PULSE_CODE_MINAVAIL + 5)

void options (int argc, char **argv);
int get_random_pid (void);
int get_pipe_pid( void );

int optv;
int chid;

int monitor( int pid, char *ent_name, char *cmd, int pcode )
{
ham_entity_t *hent;
ham_condition_t *hcond;
ham_action_t *hact;

/* tell ham to monitor pid */
hent = ham_attach( ent_name, ND_LOCAL_NODE, pid, NULL, 0 );
if( NULL == hent )
{
perror(“Attaching to ham”);
return 0;
}

/* create a death condition, and make it survive restarts */
hcond = ham_condition( hent, CONDDEATH, “death”, HREARMAFTERRESTART );
if( NULL == hcond )
{
perror(“Specifying ham condition”);
return 0;
}

/* have ham restart random every time it dies */
hact = ham_action_restart( hcond, “restart”, cmd, HREARMAFTERRESTART );
if( NULL == hact )
{
perror(“Specifying restart action”);
return 0;
}
ham_action_handle_free( hact );

/* have ham send us a pulse every time random dies */
hact = ham_action_notify_pulse( hcond, “pulse”, ND_LOCAL_NODE, getpid(),
chid,
pcode, 0, HREARMAFTERRESTART );
if( NULL == hact )
{
perror(“Specifying restart action”);
return 0;
}

/* cleanup */
ham_action_handle_free( hact );
ham_condition_handle_free( hcond );
ham_entity_handle_free( hent );
return 1;
}



int main( int argc, char *argv )
{
int pid; /
pid for server */



options( argc, argv );

chid = ChannelCreate( 0 );



if( ham_connect( 0 ) == -1 )
{
perror(“Connecting to ham”);
printf(“Is ham running?n”);
return EXIT_FAILURE;
}

/* find the pid of the server that maintains /dev/random */
pid = get_random_pid();
if( ! monitor( pid, “random”, “/usr/sbin/random -t”,
PULSE_CODE_RANDOM_DIED ))
{
printf(“failed to monitor randomn”);
}

pid = get_pipe_pid();
if( ! monitor( pid, “pipe”, “/sbin/pipe”, PULSE_CODE_PIPE_DIED ))
{
printf(“failed to monitor pipen”);
}

ham_disconnect( 0 );

/* loop waiting for our pulses */
while(1)
{
struct _pulse pulse;
int rcvid;

rcvid = MsgReceive( chid, &pulse, sizeof(pulse), NULL );
if (0 == rcvid )
{
switch( pulse.code )
{
case PULSE_CODE_RANDOM_DIED:
printf(“random diedn”);
break;
case PULSE_CODE_PIPE_DIED:
printf(“pipe diedn”);
break;

default:
printf(“unexpected pulsen”);

}

} else if (-1 == rcvid )
{
perror(“MsgReceive”);

} else
{
if (optv)
printf(“Unexpected message.n”);
MsgError(rcvid, ENOSYS);
}
}

}



/*

  • options
  • This routine handles the command line options.
  • We support:
  •  -v      verbose operation
    

*/

void
options (int argc, char **argv)
{
int opt;
int i;

optv = 0;

i = 0;
while ((opt = getopt (argc, argv, “v”)) != -1)
{
switch (opt)
{
case ‘v’:
optv = 1;
break;
}
}

}

/*

  • get_random_pid
  • Finds the process id for the server that maintains /dev/random

*/

int get_random_pid()
{
int fd;
struct _server_info info;

fd = open( “/dev/random”, O_RDONLY );
if( -1 == fd )
{
perror(“opening /dev/random”);
printf(“Is random running? Please run random.n”);
exit( EXIT_FAILURE );
}

ConnectServerInfo( getpid(), fd, &info );
close(fd);

if (optv)
printf(“random has pid %dn”, info.pid );

return( info.pid );
}

int get_pipe_pid( void )
{
int fd;
struct _server_info info;

fd = open( “/dev/pipe”, O_RDONLY );
if( -1 == fd )
{
perror(“opening /dev/pipe”);
printf(“Is random running? Please run random.n”);
exit( EXIT_FAILURE );
}

ConnectServerInfo( getpid(), fd, &info );
close(fd);

if (optv)
printf(“pipe has pid %dn”, info.pid );

return( info.pid );
}

-David

David Gibbs
QNX Training Services
dagibbs@qnx.com

Doug Owens <owens2@llnl.gov> wrote:

Thanks for the info. Is there a way to determine a procesess’s
Session ID?

Well, as you see “pidin se” will give it to you at command line.

For a running process, from code, getsid(pid) will give the
session id.

I can use pidin sess and when a process
is started by ham it does show up in session one.
This
seems to indicate to me that Ham can only deal with these
‘daemon’ procesesses.

That’s what I said – daemon’s or crashes. The documentation
for ham_attach() states this limitation as well.

If it is possible to determine
(or change) a running process’s session ID this would
be nice to know since ham will let you register ANY process
without complaining. I have seen the procmgr_daemon() info
but it would be nice to use this without modifying the
applications if possible.

The rules for who can change what about session (and process
group) membership is a bit complicated. It is, also, very
Unixy (POSIXy), and we try to duplicate POSIX behaviour here.
So, I’d suggest checking in a Unix programming book, but at
first glance, you might be able to do a setpgid(pid, 1) to
put a process in Proc’s process group, which I think might
also put it in Proc’s session – but you can’t do it to a
process that is a session leader.

Or, I think also sessions are inherited – a child generally
starts in it’s parents session. So, depending on how your
system startup is done, you may be able to make it such that
everything you want to monitor is in session 1.

(Among other things, this could just be done by writing an
appropriate starter program that just puts itself into session
1, then execs whatever program with whatever arguments you pass
on the command line – then whenever you want to run a program,
just do “starter program arguments” instead of “program arguments”
and that would mean program wouldn’t need to be changed.)

Or, just have ham start everything, as it starts things in session

  1. (If ham can be started early enough.)

-David

P.S. The reason for the restriction, is that QNX doesn’t provide
a general case asynchronous notification of death. It provides
the POSIX case – SIGCHLD on child death, plus a few other cases:

  1. client/server notification of lost connection if connection
    exists between processes
  2. procmgr_event_notify() for death of process in session 1
  3. /proc/dumper notification for “core dump” deaths of processes

For a process that has not done a ham_attach_self(), #1 isn’t
available, so only 2, and 3 are possible.

-David

Thanks,
Doug Owens

David Gibbs wrote:

Doug Owens <> owens2@llnl.gov> > wrote:
I am trying to use HAM to restart applications in my system.
I can get it to work for a single application, but when I try to register
2 applications for restart, HAM will ignore the
death of the registered apps until I use hamctrl to kill HAM, at which
point it will restart the registered apps. I am using
ham_attach after obtaining process PIDs from the /proc filesystem (QNX
cookbook). Everything in /proc/ham looks OK but HAM will not respond to
the death of the registered processes. Anyone have any ideas?

ham_attach() will only work for processes that:

  1. die abnormally (would create a dump, should dumper be running)
  2. are in session 1 (generally system daemons)

I’ve include a (fairly) simple program with this response that attaches
to pipe and random (two fairly inocuous servers to kill off & restart)
and does both a norify & restart action on their death. Both servers are
restarted “immediately”.

This was tested against 6.3.0 SP1 running on x86.

-David

/*

  • ham_attach_other.c
  •  This module contains sample source code for attaching
    
  •  an already running process to ham.
    
  •  It will attach random, the process that maintains /dev/random.
    
  •     - random will be restarted automatically
    
  •     - we will get a pulse everytime random dies
    
  •     - the condition and action will be persistent
    
  •  It will also attach pipe the same way.
    

*/

#include <stdio.h
#include <unistd.h
#include <stdlib.h
#include <libgen.h
#include <errno.h
#include <fcntl.h
#include <sys/neutrino.h
#include <sys/netmgr.h
#include <ha/ham.h

#define PULSE_CODE_RANDOM_DIED (_PULSE_CODE_MINAVAIL + 4)
#define PULSE_CODE_PIPE_DIED (_PULSE_CODE_MINAVAIL + 5)

void options (int argc, char **argv);
int get_random_pid (void);
int get_pipe_pid( void );

int optv;
int chid;

int monitor( int pid, char *ent_name, char *cmd, int pcode )
{
ham_entity_t *hent;
ham_condition_t *hcond;
ham_action_t *hact;

/* tell ham to monitor pid */
hent = ham_attach( ent_name, ND_LOCAL_NODE, pid, NULL, 0 );
if( NULL == hent )
{
perror(“Attaching to ham”);
return 0;
}

/* create a death condition, and make it survive restarts */
hcond = ham_condition( hent, CONDDEATH, “death”, HREARMAFTERRESTART );
if( NULL == hcond )
{
perror(“Specifying ham condition”);
return 0;
}

/* have ham restart random every time it dies */
hact = ham_action_restart( hcond, “restart”, cmd, HREARMAFTERRESTART );
if( NULL == hact )
{
perror(“Specifying restart action”);
return 0;
}
ham_action_handle_free( hact );

/* have ham send us a pulse every time random dies */
hact = ham_action_notify_pulse( hcond, “pulse”, ND_LOCAL_NODE, getpid(),
chid,
pcode, 0, HREARMAFTERRESTART );
if( NULL == hact )
{
perror(“Specifying restart action”);
return 0;
}

/* cleanup */
ham_action_handle_free( hact );
ham_condition_handle_free( hcond );
ham_entity_handle_free( hent );
return 1;
}



int main( int argc, char *argv )
{
int pid; /
pid for server */



options( argc, argv );

chid = ChannelCreate( 0 );



if( ham_connect( 0 ) == -1 )
{
perror(“Connecting to ham”);
printf(“Is ham running?n”);
return EXIT_FAILURE;
}

/* find the pid of the server that maintains /dev/random */
pid = get_random_pid();
if( ! monitor( pid, “random”, “/usr/sbin/random -t”,
PULSE_CODE_RANDOM_DIED ))
{
printf(“failed to monitor randomn”);
}

pid = get_pipe_pid();
if( ! monitor( pid, “pipe”, “/sbin/pipe”, PULSE_CODE_PIPE_DIED ))
{
printf(“failed to monitor pipen”);
}

ham_disconnect( 0 );

/* loop waiting for our pulses */
while(1)
{
struct _pulse pulse;
int rcvid;

rcvid = MsgReceive( chid, &pulse, sizeof(pulse), NULL );
if (0 == rcvid )
{
switch( pulse.code )
{
case PULSE_CODE_RANDOM_DIED:
printf(“random diedn”);
break;
case PULSE_CODE_PIPE_DIED:
printf(“pipe diedn”);
break;

default:
printf(“unexpected pulsen”);

}

} else if (-1 == rcvid )
{
perror(“MsgReceive”);

} else
{
if (optv)
printf(“Unexpected message.n”);
MsgError(rcvid, ENOSYS);
}
}

}



/*

  • options
  • This routine handles the command line options.
  • We support:
  •  -v      verbose operation
    

*/

void
options (int argc, char **argv)
{
int opt;
int i;

optv = 0;

i = 0;
while ((opt = getopt (argc, argv, “v”)) != -1)
{
switch (opt)
{
case ‘v’:
optv = 1;
break;
}
}

}

/*

  • get_random_pid
  • Finds the process id for the server that maintains /dev/random

*/

int get_random_pid()
{
int fd;
struct _server_info info;

fd = open( “/dev/random”, O_RDONLY );
if( -1 == fd )
{
perror(“opening /dev/random”);
printf(“Is random running? Please run random.n”);
exit( EXIT_FAILURE );
}

ConnectServerInfo( getpid(), fd, &info );
close(fd);

if (optv)
printf(“random has pid %dn”, info.pid );

return( info.pid );
}

int get_pipe_pid( void )
{
int fd;
struct _server_info info;

fd = open( “/dev/pipe”, O_RDONLY );
if( -1 == fd )
{
perror(“opening /dev/pipe”);
printf(“Is random running? Please run random.n”);
exit( EXIT_FAILURE );
}

ConnectServerInfo( getpid(), fd, &info );
close(fd);

if (optv)
printf(“pipe has pid %dn”, info.pid );

return( info.pid );
}

-David

David Gibbs
QNX Training Services
dagibbs@qnx.com




David Gibbs
QNX Training Services
dagibbs@qnx.com