io-net bug on slow machine?

I have a problem that occurs on a 200mhz PC box but not on a 400mhz PC

I wrote an io-net producer that sends a single raw ethernet packet once a second. On
my 400mhz PC it works perfectly. On a 200mhz PC it sends packets out intermittently,
around 2-3% of the time.

I wrote a simple producer and converter. On either machine all calls to io-net return
success and the tx_done function is called 100% of the time. I only found the problem when
I used tcpdump from a different machine to sniff packets and noticed that they weren’t
always getting sent.

Here is my code (actually more comments than code), and if you would like to try
my simple producer send me an email for the converter and/or tarball.


/*
** raw.c
**
** raw ethernet pkt support for QNX
**
** This producer will eventually provide
** a simple resource manger to
** read/write raw ethernet packets
**
** right now it sends a single raw (non-IP) ethernet
** packet once a second
*/

#include <stdio.h>
#include <unistd.h>
#include <atomic.h>
#include <malloc.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_types.h>
#include <netinet/in.h>
#include <sys/syspage.h>
#include <sys/neutrino.h>
#include <syslog.h>
#include <sys/dispatch.h>
#include <sys/io-net.h>

#include <stdbool.h>

static int raw_init(void *dll_hdl, dispatch_t *dpp, io_net_self_t *n, char *options);
static int raw_rx_up(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, ui
static int raw_tx_done(npkt_t *npkt, void *done_hdl, void *func_hdl);
void proc_cntrl_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uin
void proc_reg_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uint1
void spawn_producer();
npkt_t *alloc_pkt();

// default registration, io-net finds this and
// calls “raw_init”
io_net_dll_entry_t io_net_dll_entry = {
2,
raw_init, // init fct
NULL // destroy fct
};

// the io_net_registrant_t struct refers to this one
static io_net_registrant_funcs_t raw_funcs = {
9,
raw_rx_up, // called for upward bound pkts
NULL, // called for downward bound pkts
raw_tx_done, // called when they are done with downward pkt
NULL, // shutdown 1 (shutdown requested)
NULL, // shutdown 2 (shutdown demanded)
NULL, // function to advertise yourself upward
NULL, //raw_devctl, // used to proc devctl commands
NULL, //raw_flush, //
NULL // open
};

/*
** we have a list of interfaces for each advertised
** interface
*/
struct bpf_if {
struct bpf_if *next;
char ifname[IFNAMSIZ];
uint16_t cell;
uint16_t endpoint;
uint16_t iface;
u_char sdl_type;
};

/* global control structure */
// contains stuff that I want to keep track of
typedef struct {
void *dll_hdl; // a handle to the dll (us) when it was loaded
int reg_hdl; // a dll can register multiple times, each
// registration is given a handle
dispatch_t *dpp; // points to the io-net dispatch (state?) struct
io_net_self_t *npi; // points to the io-net handler fcts
struct qtime_entry *qtp; // ??? (copied from bsp driver)
struct bpf_if *ifs;

} raw_ctrl_t;

void *raw_tx_thread(void *);

// now declare it, so that we can reference it
raw_ctrl_t raw_ctrl;


// we “register” with io-net using this struct
// for a name to show up in /dev/io-net it uses the name in the
// up field.
// plus if up & down don’t match then it will fail
// to load
static io_net_registrant_t raw_reg = {
_REG_PRODUCER_DOWN,
“raw.so”, // lib name
“raw”, // our io-net “name” and lbl used by any DOWN producers
“raw”, // what we produce
&raw_ctrl, // (was bpf_ctrl)
// struct with misc data in it
// we can register multiple times, it is highly
// reccomended that a different structure get passed
// in this field each time & that it contain the
// state information for each separate registration
&raw_funcs, // the list of callbacks above
0 // number of dependancies
};


// ethernet pkt
typedef struct {
unsigned char dst[6] attribute ((packed));;
unsigned char src[6] attribute ((packed));;
unsigned char len[2] attribute ((packed));;
unsigned char data[16] attribute ((packed));;
unsigned int p1 attribute ((packed));;
} ether_pkt_t;

int debug=1;
int pktinroute=false;
unsigned int pcnt1=0;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t cond = PTHREAD_COND_INITIALIZER;

static int raw_init(void *dll_hdl, dispatch_t *dpp, io_net_self_t *n, char *options)
{
raw_ctrl_t *bcp = &raw_ctrl;

if(debug)
syslog(LOG_INFO,"%d:raw_init",pthread_self());

memset(bcp, 0, sizeof(*bcp));
bcp->dll_hdl = dll_hdl;
bcp->dpp = dpp;
bcp->npi = n;
bcp->qtp = SYSPAGE_ENTRY(qtime);
bcp->ifs = NULL;

// the bigtime registration function,all the good stuff is in the raw_reg struct
if(bcp->npi->reg(bcp->dll_hdl, &raw_reg, &bcp->reg_hdl, NULL, NULL) == -1)
{
syslog(LOG_INFO,“raw: reg failed (%d)”,errno);
return errno;
}

// no-clue what this one is about…NO DOCS! (can we remove this?)
if(bcp->npi->reg_byte_pat(bcp->reg_hdl, 0, 0, NULL, _BYTE_PAT_ALL) == -1)
{
syslog(LOG_INFO,“raw: reg_byte_pat failed (%d)”,errno);
bcp->npi->dereg(bcp->reg_hdl);
return errno;
}

spawn_producer();

return EOK;
}

static int raw_rx_up(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, ui
{
if(debug)
if(npkt->flags & _NPKT_MSG)
syslog(LOG_INFO,"%d:raw_rx_up: ctrl",pthread_self());
else
syslog(LOG_INFO,"%d:raw_rx_up: data (%d)",pthread_self(),pktinroute);

// is it a control msg?
if(npkt->flags & _NPKT_MSG) {
proc_cntrl_msg(npkt,func_hdl,off,len_sub,cell,endpoint,iface);
}
else {
proc_reg_msg(npkt,func_hdl,off,len_sub,cell,endpoint,iface);
}



return 0;
}


static int raw_tx_done(npkt_t *npkt, void *done_hdl, void *func_hdl)
{
raw_ctrl_t *bcp = func_hdl;
ether_pkt_t *pkt;

pkt = (ether_pkt_t *)npkt->buffers.tqh_first->net_iov->iov_base;

if(debug)
syslog(LOG_INFO,"%d:raw_tx_done pktnum:%x NPKT_NOT_TXED=0x%x",
pthread_self(),
ntohl(pkt->p1),
npkt->flags & _NPKT_NOT_TXED);

// release the buff
bcp->npi->free(npkt);

// there is a potential race condition w/the pktinroute var
pthread_mutex_lock(&mutex);
while(pktinroute == false)
pthread_cond_wait(&cond, &mutex);
pktinroute = false;
pthread_cond_signal(&cond);
pthread_mutex_unlock(&mutex);

return 0;
}

void proc_cntrl_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uin
{
raw_ctrl_t *bcp = func_hdl;
struct bpf_if *bpif, **bpifp;
uint16_t *type;

// is an endpoint dying?
if(npkt->flags & _NPKT_MSG_DYING) {
for(bpifp = &bcp->ifs, bpif = bcp->ifs;bpif != NULL;bpif=bpif->next,bpifp=&bpif->next)
if(bpif->cell == cell &&
bpif->endpoint == endpoint &&
bpif->iface == iface) {

// goodbye
*bpifp = bpif->next;
bcp->npi->free(bpif);
}
}
else
{
// is it a new endpoint?
type = (uint16_t *)(npkt->buffers.tqh_first->net_iov->iov_base);

switch(*type) {
case _IO_NET_MSG_DL_ADVERT: // only type supported in tcp & bpf
{
io_net_msg_dl_advert_t *ad = (io_net_msg_dl_advert_t *) type;

// look for this interface in our list
for(bpif = bcp->ifs;bpif != NULL;bpif=bpif->next) {
if(stricmp(bpif->ifname,ad->dl.sdl_data) == 0)
break;
}

// is it in there?
if(bpif == NULL) {
// no
if((bpif = bcp->npi->alloc(sizeof(*bpif), 0)) != NULL) {
memset(bpif, 0, sizeof(*bpif));
snprintf(bpif->ifname, IFNAMSIZ, “%s”, ad->dl.sdl_data);
bpif->cell = cell;
bpif->endpoint = endpoint;
bpif->iface = iface;
bpif->sdl_type = ad->dl.sdl_type;
bpif->next = bcp->ifs;
bcp->ifs = bpif;
}
} else {
// yes, copy in the new cell/iface/endpoint (as per the bsp)
bpif->cell = cell;
bpif->endpoint = endpoint;
bpif->iface = iface;
}
}
break;
default:
break;

}
}
// we are done, let io-net know
bcp->npi->tx_done(bcp->reg_hdl,npkt);
}

void proc_reg_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uint1
{
raw_ctrl_t *bcp = func_hdl;

// save it on a queue for any lcl proc’s

// we are done, let io-net know
bcp->npi->tx_done(bcp->reg_hdl,npkt);
}

void spawn_producer()
{
pthread_create (NULL,NULL,raw_tx_thread,NULL);
}

void *raw_tx_thread(void *argv)
{
raw_ctrl_t *bcp = &raw_ctrl;
npkt_t *npkt;

while(1) {
// pktinroute could have a race-condition
pthread_mutex_lock(&mutex);

while(pktinroute == true)
pthread_cond_wait(&cond, &mutex);

if(debug)
syslog(LOG_INFO,"%d:raw_tx_thread: sending pkt (%x)",pthread_self(),pcnt1);

// build/send our pkt
npkt = alloc_pkt();

if(npkt != NULL)
// note if tx_down fails a lower layer will be responsible
// or calling tx_done()
bcp->npi->tx_down(bcp->reg_hdl, npkt);
else
syslog(LOG_INFO,“raw_tx_thread: ERROR: npkt == NULL”);

pktinroute = true;
pthread_cond_signal(&cond);
pthread_mutex_unlock(&mutex);
sleep(1);
//usleep(100000); still skips every other pkt
//sleep(3); didn’t make any difference
}
}

npkt_t *alloc_pkt()
{
npkt_t *npkt;
raw_ctrl_t *bcp = &raw_ctrl;
net_buf_t *nbp;
net_iov_t *niovp;
ether_pkt_t *pkt;
struct bpf_if *bpif;

/*
** ok, we have to allocate space for 5 different structures as
** per the info from training
**
** npkt_t: main hdr
** N*npkt_done_t: 1 per module that add’s data to a down pkt (1 in this case)
** (tcpip uses 2, one for the tcp module & 1 for the converter)
** net_buf_t: 1 per cluster of iov’s (note this could be bigger than the default
** structure. It contains an array of pointers as the last element in the struct. One
** array element for each net_iov_t supported by this net_buf_t
** net_iov_t: we only need one iov for this application
** ether_pkt_t: DATA, what actually goes out on the wire
*/

// lets allocate space for the pkt, to minimize mallocs it reccomended
// that you allocate all three structs at once (if N>1 then other layers
// could allocate their own npkt_done_t/net_buf_t/net_iov_t structs

// DANGER: the tcpip code doesn’t use alloc_down_npkt, it uses malloc
// and then fills in all the structures by hand, arp.c does it more
// in line w/the spec
npkt = bcp->npi->alloc_down_npkt(bcp->reg_hdl,
sizeof(npkt_t) + 1*sizeof(npkt_done_t) // 1st struct (and 1.a)

  • sizeof(net_buf_t) // 2nd struct
  • sizeof(net_iov_t) // 3rd struct
  • sizeof(ether_pkt_t), // data
    (void **)&nbp); // 3rd struct

if(npkt == NULL)
return npkt;

// npkt/nbp were filled in by alloc_down_npkt, lets get the other two structs
niovp = (net_iov_t *)(nbp + 1);
pkt = (ether_pkt_t *)(niovp + 1);

// now fill in the pkt

// struct #1: npkt (from arp.c not the tcp code)
bpif = bcp->ifs;
while(bpif == NULL)
sleep(1);

npkt->cell = bpif->cell;
npkt->endpoint = bpif->endpoint;
npkt->iface = bpif->iface;

// fill in the links between struct #1 & #2
// (both arp.c and tcp/if_ndi.c do this)
nbp->ptrs.tqe_next = NULL;
npkt->buffers.tqh_last = &nbp->ptrs.tqe_next;
npkt->buffers.tqh_first = nbp;
nbp->ptrs.tqe_prev = &npkt->buffers.tqh_first;

// struct #1.b: net_buf_t…not filled in?

// struct #2
nbp->niov = 1;
nbp->net_iov = niovp;

// struct #3
// iov_phys == physical address of the data (for DMA?)
SETIOV(niovp, pkt, sizeof(*pkt));
niovp->iov_phys =(paddr_t)(bcp->npi->mphys(niovp->iov_base));

// data
pkt->dst[0] = 0x00;
pkt->dst[1] = 0x01;
pkt->dst[2] = 0x02;
pkt->dst[3] = 0x03;
pkt->dst[4] = 0x04;
pkt->dst[5] = 0x05;

pkt->src[0] = 0xcc;
pkt->src[1] = 0x10;
pkt->src[2] = 0x4b;
pkt->src[3] = 0x1f;
pkt->src[4] = 0x77;
pkt->src[5] = 0x2c;

pkt->len[0] = 0x00;
pkt->len[1] = 0x10;

pkt->data[0] = 0x82;
pkt->data[1] = 0x82;
pkt->data[2] = 0x03;
pkt->data[3] = 0x01;
pkt->data[4] = 0x04;
pkt->data[5] = 0x00;
pkt->data[6] = 0x05;
pkt->data[7] = 0x0F;
pkt->data[8] = 0x0C;
pkt->data[9] = 0x0C;
pkt->data[10] = 0x02;
pkt->data[11] = 0x00;
pkt->data[12] = 0x00;
pkt->data[13] = 0x01;
pkt->data[14] = 0x19;
pkt->data[15] = 0x70;

pkt->p1 = htonl(pcnt1++);

// and finally fill out the npkt_done_t (struct #1.a)
// not sure about the NULL param is for, but arp.c uses it

if(bcp->npi->reg_tx_done(bcp->reg_hdl,npkt,NULL) == -1)
{
bcp->npi->free(npkt);
syslog(LOG_INFO,“raw: reg_tx_done failed!”);
}

return npkt;
}

A couple of notes. Not sure if it’s the root of your problem though…

Chris Goebel <cgoebel@tridium.com> wrote:
: I have a problem that occurs on a 200mhz PC box but not on a 400mhz PC

: I wrote an io-net producer that sends a single raw ethernet packet once a second. On
: my 400mhz PC it works perfectly. On a 200mhz PC it sends packets out intermittently,
: around 2-3% of the time.

: I wrote a simple producer and converter. On either machine all calls to io-net return
: success and the tx_done function is called 100% of the time. I only found the problem when
: I used tcpdump from a different machine to sniff packets and noticed that they weren’t
: always getting sent.

: Here is my code (actually more comments than code), and if you would like to try
: my simple producer send me an email for the converter and/or tarball.


: /*
: ** raw.c
: **
: ** raw ethernet pkt support for QNX
: **
: ** This producer will eventually provide
: ** a simple resource manger to
: ** read/write raw ethernet packets
: **
: ** right now it sends a single raw (non-IP) ethernet
: ** packet once a second
: */

: #include <stdio.h>
: #include <unistd.h>
: #include <atomic.h>
: #include <malloc.h>
: #include <string.h>
: #include <errno.h>
: #include <stdlib.h>
: #include <net/if.h>
: #include <net/if_arp.h>
: #include <net/if_types.h>
: #include <netinet/in.h>
: #include <sys/syspage.h>
: #include <sys/neutrino.h>
: #include <syslog.h>
: #include <sys/dispatch.h>
: #include <sys/io-net.h>

: #include <stdbool.h>

: static int raw_init(void *dll_hdl, dispatch_t *dpp, io_net_self_t *n, char *options);
: static int raw_rx_up(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, ui
: static int raw_tx_done(npkt_t *npkt, void *done_hdl, void *func_hdl);
: void proc_cntrl_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uin
: void proc_reg_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uint1
: void spawn_producer();
: npkt_t *alloc_pkt();

: // default registration, io-net finds this and
: // calls “raw_init”
: io_net_dll_entry_t io_net_dll_entry = {
: 2,
: raw_init, // init fct
: NULL // destroy fct
: };

: // the io_net_registrant_t struct refers to this one
: static io_net_registrant_funcs_t raw_funcs = {
: 9,
: raw_rx_up, // called for upward bound pkts
: NULL, // called for downward bound pkts
: raw_tx_done, // called when they are done with downward pkt
: NULL, // shutdown 1 (shutdown requested)
: NULL, // shutdown 2 (shutdown demanded)
: NULL, // function to advertise yourself upward
: NULL, //raw_devctl, // used to proc devctl commands
: NULL, //raw_flush, //
: NULL // open
: };

: /*
: ** we have a list of interfaces for each advertised
: ** interface
: */
: struct bpf_if {
: struct bpf_if *next;
: char ifname[IFNAMSIZ];
: uint16_t cell;
: uint16_t endpoint;
: uint16_t iface;
: u_char sdl_type;
: };

: /* global control structure */
: // contains stuff that I want to keep track of
: typedef struct {
: void *dll_hdl; // a handle to the dll (us) when it was loaded
: int reg_hdl; // a dll can register multiple times, each
: // registration is given a handle
: dispatch_t *dpp; // points to the io-net dispatch (state?) struct
: io_net_self_t *npi; // points to the io-net handler fcts
: struct qtime_entry *qtp; // ??? (copied from bsp driver)
: struct bpf_if *ifs;

: } raw_ctrl_t;

: void *raw_tx_thread(void *);

: // now declare it, so that we can reference it
: raw_ctrl_t raw_ctrl;


: // we “register” with io-net using this struct
: // for a name to show up in /dev/io-net it uses the name in the
: // up field.
: // plus if up & down don’t match then it will fail
: // to load
: static io_net_registrant_t raw_reg = {
: _REG_PRODUCER_DOWN,
: “raw.so”, // lib name
: “raw”, // our io-net “name” and lbl used by any DOWN producers
: “raw”, // what we produce
: &raw_ctrl, // (was bpf_ctrl)
: // struct with misc data in it
: // we can register multiple times, it is highly
: // reccomended that a different structure get passed
: // in this field each time & that it contain the
: // state information for each separate registration
: &raw_funcs, // the list of callbacks above
: 0 // number of dependancies
: };


: // ethernet pkt
: typedef struct {
: unsigned char dst[6] attribute ((packed));;
: unsigned char src[6] attribute ((packed));;
: unsigned char len[2] attribute ((packed));;
: unsigned char data[16] attribute ((packed));;
: unsigned int p1 attribute ((packed));;
: } ether_pkt_t;

: int debug=1;
: int pktinroute=false;
: unsigned int pcnt1=0;
: pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
: pthread_cond_t cond = PTHREAD_COND_INITIALIZER;

: static int raw_init(void *dll_hdl, dispatch_t *dpp, io_net_self_t *n, char *options)
: {
: raw_ctrl_t *bcp = &raw_ctrl;

: if(debug)
: syslog(LOG_INFO,"%d:raw_init",pthread_self());

: memset(bcp, 0, sizeof(*bcp));
: bcp->dll_hdl = dll_hdl;
: bcp->dpp = dpp;
: bcp->npi = n;
: bcp->qtp = SYSPAGE_ENTRY(qtime);
: bcp->ifs = NULL;

: // the bigtime registration function,all the good stuff is in the raw_reg struct
: if(bcp->npi->reg(bcp->dll_hdl, &raw_reg, &bcp->reg_hdl, NULL, NULL) == -1)
: {
: syslog(LOG_INFO,“raw: reg failed (%d)”,errno);
: return errno;
: }

: // no-clue what this one is about…NO DOCS! (can we remove this?)
: if(bcp->npi->reg_byte_pat(bcp->reg_hdl, 0, 0, NULL, _BYTE_PAT_ALL) == -1)
: {
: syslog(LOG_INFO,“raw: reg_byte_pat failed (%d)”,errno);
: bcp->npi->dereg(bcp->reg_hdl);
: return errno;
: }

: spawn_producer();

: return EOK;
: }

: static int raw_rx_up(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, ui
: {
: if(debug)
: if(npkt->flags & _NPKT_MSG)
: syslog(LOG_INFO,"%d:raw_rx_up: ctrl",pthread_self());
: else
: syslog(LOG_INFO,"%d:raw_rx_up: data (%d)",pthread_self(),pktinroute);

: // is it a control msg?
: if(npkt->flags & _NPKT_MSG) {
: proc_cntrl_msg(npkt,func_hdl,off,len_sub,cell,endpoint,iface);
: }
: else {
: proc_reg_msg(npkt,func_hdl,off,len_sub,cell,endpoint,iface);
: }



: return 0;
: }


: static int raw_tx_done(npkt_t *npkt, void *done_hdl, void *func_hdl)
: {
: raw_ctrl_t *bcp = func_hdl;
: ether_pkt_t *pkt;

: pkt = (ether_pkt_t *)npkt->buffers.tqh_first->net_iov->iov_base;

: if(debug)
: syslog(LOG_INFO,"%d:raw_tx_done pktnum:%x NPKT_NOT_TXED=0x%x",
: pthread_self(),
: ntohl(pkt->p1),
: npkt->flags & _NPKT_NOT_TXED);

: // release the buff
: bcp->npi->free(npkt);

: // there is a potential race condition w/the pktinroute var
: pthread_mutex_lock(&mutex);
: while(pktinroute == false)
: pthread_cond_wait(&cond, &mutex);
: pktinroute = false;
: pthread_cond_signal(&cond);
: pthread_mutex_unlock(&mutex);

: return 0;
: }

: void proc_cntrl_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uin
: {
: raw_ctrl_t *bcp = func_hdl;
: struct bpf_if *bpif, **bpifp;
: uint16_t *type;

: // is an endpoint dying?
: if(npkt->flags & _NPKT_MSG_DYING) {
: for(bpifp = &bcp->ifs, bpif = bcp->ifs;bpif != NULL;bpif=bpif->next,bpifp=&bpif->next)
: if(bpif->cell == cell &&
: bpif->endpoint == endpoint &&
: bpif->iface == iface) {

: // goodbye
: *bpifp = bpif->next;
: bcp->npi->free(bpif);
: }
: }
: else
: {
: // is it a new endpoint?
: type = (uint16_t *)(npkt->buffers.tqh_first->net_iov->iov_base);

: switch(*type) {
: case _IO_NET_MSG_DL_ADVERT: // only type supported in tcp & bpf
: {
: io_net_msg_dl_advert_t *ad = (io_net_msg_dl_advert_t *) type;

: // look for this interface in our list
: for(bpif = bcp->ifs;bpif != NULL;bpif=bpif->next) {
: if(stricmp(bpif->ifname,ad->dl.sdl_data) == 0)
: break;
: }

: // is it in there?
: if(bpif == NULL) {
: // no
: if((bpif = bcp->npi->alloc(sizeof(*bpif), 0)) != NULL) {
: memset(bpif, 0, sizeof(*bpif));
: snprintf(bpif->ifname, IFNAMSIZ, “%s”, ad->dl.sdl_data);
: bpif->cell = cell;
: bpif->endpoint = endpoint;
: bpif->iface = iface;
: bpif->sdl_type = ad->dl.sdl_type;
: bpif->next = bcp->ifs;
: bcp->ifs = bpif;
: }
: } else {
: // yes, copy in the new cell/iface/endpoint (as per the bsp)
: bpif->cell = cell;
: bpif->endpoint = endpoint;
: bpif->iface = iface;
: }
: }
: break;
: default:
: break;

: }
: }
: // we are done, let io-net know
: bcp->npi->tx_done(bcp->reg_hdl,npkt);
: }

: void proc_reg_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uint1
: {
: raw_ctrl_t *bcp = func_hdl;

: // save it on a queue for any lcl proc’s

: // we are done, let io-net know
: bcp->npi->tx_done(bcp->reg_hdl,npkt);
: }

: void spawn_producer()
: {
: pthread_create (NULL,NULL,raw_tx_thread,NULL);
: }

: void *raw_tx_thread(void *argv)
: {
: raw_ctrl_t *bcp = &raw_ctrl;
: npkt_t *npkt;

: while(1) {
: // pktinroute could have a race-condition
: pthread_mutex_lock(&mutex);

: while(pktinroute == true)
: pthread_cond_wait(&cond, &mutex);

: if(debug)
: syslog(LOG_INFO,"%d:raw_tx_thread: sending pkt (%x)",pthread_self(),pcnt1);

: // build/send our pkt
: npkt = alloc_pkt();

: if(npkt != NULL)
: // note if tx_down fails a lower layer will be responsible
: // or calling tx_done()
: bcp->npi->tx_down(bcp->reg_hdl, npkt);
: else
: syslog(LOG_INFO,“raw_tx_thread: ERROR: npkt == NULL”);

: pktinroute = true;
: pthread_cond_signal(&cond);
: pthread_mutex_unlock(&mutex);
: sleep(1);
: //usleep(100000); still skips every other pkt
: //sleep(3); didn’t make any difference
: }
: }

: npkt_t *alloc_pkt()
: {
: npkt_t *npkt;
: raw_ctrl_t *bcp = &raw_ctrl;
: net_buf_t *nbp;
: net_iov_t *niovp;
: ether_pkt_t *pkt;
: struct bpf_if *bpif;

: /*
: ** ok, we have to allocate space for 5 different structures as
: ** per the info from training
: **
: ** npkt_t: main hdr
: ** N*npkt_done_t: 1 per module that add’s data to a down pkt (1 in this case)
: ** (tcpip uses 2, one for the tcp module & 1 for the converter)
: ** net_buf_t: 1 per cluster of iov’s (note this could be bigger than the default
: ** structure. It contains an array of pointers as the last element in the struct. One
: ** array element for each net_iov_t supported by this net_buf_t
: ** net_iov_t: we only need one iov for this application
: ** ether_pkt_t: DATA, what actually goes out on the wire
: */

: // lets allocate space for the pkt, to minimize mallocs it reccomended
: // that you allocate all three structs at once (if N>1 then other layers
: // could allocate their own npkt_done_t/net_buf_t/net_iov_t structs

: // DANGER: the tcpip code doesn’t use alloc_down_npkt, it uses malloc
: // and then fills in all the structures by hand, arp.c does it more
: // in line w/the spec
: npkt = bcp->npi->alloc_down_npkt(bcp->reg_hdl,
: sizeof(npkt_t) + 1*sizeof(npkt_done_t) // 1st struct (and 1.a)
: + sizeof(net_buf_t) // 2nd struct
: + sizeof(net_iov_t) // 3rd struct
: + sizeof(ether_pkt_t), // data
: (void **)&nbp); // 3rd struct

The above should be something like:
npkt = bcp->npi->alloc_down_npkt(bcp->reg_hdl, sizeof(net_buf_t) +
sizeof(net_iov_t) +
sizeof(ether_pkt_t), (void **)&nbp);

io-net will alloc the npkt and the appropriate number of npkt_done_t’s. Don’t think
it’s really a problem though, just overallocating.

: if(npkt == NULL)
: return npkt;

: // npkt/nbp were filled in by alloc_down_npkt, lets get the other two structs
: niovp = (net_iov_t *)(nbp + 1);
: pkt = (ether_pkt_t *)(niovp + 1);

: // now fill in the pkt

: // struct #1: npkt (from arp.c not the tcp code)
: bpif = bcp->ifs;
: while(bpif == NULL)
: sleep(1);

: npkt->cell = bpif->cell;
: npkt->endpoint = bpif->endpoint;
: npkt->iface = bpif->iface;

: // fill in the links between struct #1 & #2
: // (both arp.c and tcp/if_ndi.c do this)
: nbp->ptrs.tqe_next = NULL;
: npkt->buffers.tqh_last = &nbp->ptrs.tqe_next;
: npkt->buffers.tqh_first = nbp;
: nbp->ptrs.tqe_prev = &npkt->buffers.tqh_first;

: // struct #1.b: net_buf_t…not filled in?

: // struct #2
: nbp->niov = 1;
: nbp->net_iov = niovp;

: // struct #3
: // iov_phys == physical address of the data (for DMA?)
: SETIOV(niovp, pkt, sizeof(*pkt));
: niovp->iov_phys =(paddr_t)(bcp->npi->mphys(niovp->iov_base));

: // data
: pkt->dst[0] = 0x00;
: pkt->dst[1] = 0x01;
: pkt->dst[2] = 0x02;
: pkt->dst[3] = 0x03;
: pkt->dst[4] = 0x04;
: pkt->dst[5] = 0x05;

: pkt->src[0] = 0xcc;
: pkt->src[1] = 0x10;
: pkt->src[2] = 0x4b;
: pkt->src[3] = 0x1f;
: pkt->src[4] = 0x77;
: pkt->src[5] = 0x2c;

: pkt->len[0] = 0x00;
: pkt->len[1] = 0x10;

: pkt->data[0] = 0x82;
: pkt->data[1] = 0x82;
: pkt->data[2] = 0x03;
: pkt->data[3] = 0x01;
: pkt->data[4] = 0x04;
: pkt->data[5] = 0x00;
: pkt->data[6] = 0x05;
: pkt->data[7] = 0x0F;
: pkt->data[8] = 0x0C;
: pkt->data[9] = 0x0C;
: pkt->data[10] = 0x02;
: pkt->data[11] = 0x00;
: pkt->data[12] = 0x00;
: pkt->data[13] = 0x01;
: pkt->data[14] = 0x19;
: pkt->data[15] = 0x70;

: pkt->p1 = htonl(pcnt1++);

You need to set npkt->tot_iov. Here:
npkt->tot_iov = 1;

: // and finally fill out the npkt_done_t (struct #1.a)
: // not sure about the NULL param is for, but arp.c uses it

Whatever you pass here, comes out as the last parameter (done_hdl here)
in your tx_done function.


: if(bcp->npi->reg_tx_done(bcp->reg_hdl,npkt,NULL) == -1)
: {
: bcp->npi->free(npkt);
: syslog(LOG_INFO,“raw: reg_tx_done failed!”);
: }

: return npkt;
: }

halfway there…

ok, I changed two things:
-initialized npkt->iov as you suggested
-reformat & fresh install of QNX

This time “mount raw.so” caused a single packet to go
out immediately, then everything froze for 30 seconds.

Sniffing the network shows that every other packet
hit the wire (30, 32, 34, 36, …) while syslog shows that the io-net
handshakes were completed for every packet and 2-3 minutes later
io-net locks up completely



Sean Boudreau wrote:

A couple of notes. Not sure if it’s the root of your problem though…

Chris Goebel <> cgoebel@tridium.com> > wrote:
: I have a problem that occurs on a 200mhz PC box but not on a 400mhz PC

: I wrote an io-net producer that sends a single raw ethernet packet once a second. On
: my 400mhz PC it works perfectly. On a 200mhz PC it sends packets out intermittently,
: around 2-3% of the time.

: I wrote a simple producer and converter. On either machine all calls to io-net return
: success and the tx_done function is called 100% of the time. I only found the problem when
: I used tcpdump from a different machine to sniff packets and noticed that they weren’t
: always getting sent.

: Here is my code (actually more comments than code), and if you would like to try
: my simple producer send me an email for the converter and/or tarball.

: /*
: ** raw.c
: **
: ** raw ethernet pkt support for QNX
: **
: ** This producer will eventually provide
: ** a simple resource manger to
: ** read/write raw ethernet packets
: **
: ** right now it sends a single raw (non-IP) ethernet
: ** packet once a second
: */

: #include <stdio.h
: #include <unistd.h
: #include <atomic.h
: #include <malloc.h
: #include <string.h
: #include <errno.h
: #include <stdlib.h
: #include <net/if.h
: #include <net/if_arp.h
: #include <net/if_types.h
: #include <netinet/in.h
: #include <sys/syspage.h
: #include <sys/neutrino.h
: #include <syslog.h
: #include <sys/dispatch.h
: #include <sys/io-net.h

: #include <stdbool.h

: static int raw_init(void *dll_hdl, dispatch_t *dpp, io_net_self_t *n, char *options);
: static int raw_rx_up(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, ui
: static int raw_tx_done(npkt_t *npkt, void *done_hdl, void *func_hdl);
: void proc_cntrl_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uin
: void proc_reg_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uint1
: void spawn_producer();
: npkt_t *alloc_pkt();

: // default registration, io-net finds this and
: // calls “raw_init”
: io_net_dll_entry_t io_net_dll_entry = {
: 2,
: raw_init, // init fct
: NULL // destroy fct
: };

: // the io_net_registrant_t struct refers to this one
: static io_net_registrant_funcs_t raw_funcs = {
: 9,
: raw_rx_up, // called for upward bound pkts
: NULL, // called for downward bound pkts
: raw_tx_done, // called when they are done with downward pkt
: NULL, // shutdown 1 (shutdown requested)
: NULL, // shutdown 2 (shutdown demanded)
: NULL, // function to advertise yourself upward
: NULL, //raw_devctl, // used to proc devctl commands
: NULL, //raw_flush, //
: NULL // open
: };

: /*
: ** we have a list of interfaces for each advertised
: ** interface
: */
: struct bpf_if {
: struct bpf_if *next;
: char ifname[IFNAMSIZ];
: uint16_t cell;
: uint16_t endpoint;
: uint16_t iface;
: u_char sdl_type;
: };

: /* global control structure */
: // contains stuff that I want to keep track of
: typedef struct {
: void *dll_hdl; // a handle to the dll (us) when it was loaded
: int reg_hdl; // a dll can register multiple times, each
: // registration is given a handle
: dispatch_t *dpp; // points to the io-net dispatch (state?) struct
: io_net_self_t *npi; // points to the io-net handler fcts
: struct qtime_entry *qtp; // ??? (copied from bsp driver)
: struct bpf_if *ifs;

: } raw_ctrl_t;

: void *raw_tx_thread(void *);

: // now declare it, so that we can reference it
: raw_ctrl_t raw_ctrl;

: // we “register” with io-net using this struct
: // for a name to show up in /dev/io-net it uses the name in the
: // up field.
: // plus if up & down don’t match then it will fail
: // to load
: static io_net_registrant_t raw_reg = {
: _REG_PRODUCER_DOWN,
: “raw.so”, // lib name
: “raw”, // our io-net “name” and lbl used by any DOWN producers
: “raw”, // what we produce
: &raw_ctrl, // (was bpf_ctrl)
: // struct with misc data in it
: // we can register multiple times, it is highly
: // reccomended that a different structure get passed
: // in this field each time & that it contain the
: // state information for each separate registration
: &raw_funcs, // the list of callbacks above
: 0 // number of dependancies
: };

: // ethernet pkt
: typedef struct {
: unsigned char dst[6] attribute ((packed));;
: unsigned char src[6] attribute ((packed));;
: unsigned char len[2] attribute ((packed));;
: unsigned char data[16] attribute ((packed));;
: unsigned int p1 attribute ((packed));;
: } ether_pkt_t;

: int debug=1;
: int pktinroute=false;
: unsigned int pcnt1=0;
: pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
: pthread_cond_t cond = PTHREAD_COND_INITIALIZER;

: static int raw_init(void *dll_hdl, dispatch_t *dpp, io_net_self_t *n, char *options)
: {
: raw_ctrl_t *bcp = &raw_ctrl;

: if(debug)
: syslog(LOG_INFO,"%d:raw_init",pthread_self());

: memset(bcp, 0, sizeof(*bcp));
: bcp->dll_hdl = dll_hdl;
: bcp->dpp = dpp;
: bcp->npi = n;
: bcp->qtp = SYSPAGE_ENTRY(qtime);
: bcp->ifs = NULL;

: // the bigtime registration function,all the good stuff is in the raw_reg struct
: if(bcp->npi->reg(bcp->dll_hdl, &raw_reg, &bcp->reg_hdl, NULL, NULL) == -1)
: {
: syslog(LOG_INFO,“raw: reg failed (%d)”,errno);
: return errno;
: }

: // no-clue what this one is about…NO DOCS! (can we remove this?)
: if(bcp->npi->reg_byte_pat(bcp->reg_hdl, 0, 0, NULL, _BYTE_PAT_ALL) == -1)
: {
: syslog(LOG_INFO,“raw: reg_byte_pat failed (%d)”,errno);
: bcp->npi->dereg(bcp->reg_hdl);
: return errno;
: }

: spawn_producer();

: return EOK;
: }

: static int raw_rx_up(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, ui
: {
: if(debug)
: if(npkt->flags & _NPKT_MSG)
: syslog(LOG_INFO,"%d:raw_rx_up: ctrl",pthread_self());
: else
: syslog(LOG_INFO,"%d:raw_rx_up: data (%d)",pthread_self(),pktinroute);

: // is it a control msg?
: if(npkt->flags & _NPKT_MSG) {
: proc_cntrl_msg(npkt,func_hdl,off,len_sub,cell,endpoint,iface);
: }
: else {
: proc_reg_msg(npkt,func_hdl,off,len_sub,cell,endpoint,iface);
: }

: return 0;
: }

: static int raw_tx_done(npkt_t *npkt, void *done_hdl, void *func_hdl)
: {
: raw_ctrl_t *bcp = func_hdl;
: ether_pkt_t *pkt;

: pkt = (ether_pkt_t *)npkt->buffers.tqh_first->net_iov->iov_base;

: if(debug)
: syslog(LOG_INFO,"%d:raw_tx_done pktnum:%x NPKT_NOT_TXED=0x%x",
: pthread_self(),
: ntohl(pkt->p1),
: npkt->flags & _NPKT_NOT_TXED);

: // release the buff
: bcp->npi->free(npkt);

: // there is a potential race condition w/the pktinroute var
: pthread_mutex_lock(&mutex);
: while(pktinroute == false)
: pthread_cond_wait(&cond, &mutex);
: pktinroute = false;
: pthread_cond_signal(&cond);
: pthread_mutex_unlock(&mutex);

: return 0;
: }

: void proc_cntrl_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uin
: {
: raw_ctrl_t *bcp = func_hdl;
: struct bpf_if *bpif, **bpifp;
: uint16_t *type;

: // is an endpoint dying?
: if(npkt->flags & _NPKT_MSG_DYING) {
: for(bpifp = &bcp->ifs, bpif = bcp->ifs;bpif != NULL;bpif=bpif->next,bpifp=&bpif->next)
: if(bpif->cell == cell &&
: bpif->endpoint == endpoint &&
: bpif->iface == iface) {

: // goodbye
: *bpifp = bpif->next;
: bcp->npi->free(bpif);
: }
: }
: else
: {
: // is it a new endpoint?
: type = (uint16_t *)(npkt->buffers.tqh_first->net_iov->iov_base);

: switch(*type) {
: case _IO_NET_MSG_DL_ADVERT: // only type supported in tcp & bpf
: {
: io_net_msg_dl_advert_t *ad = (io_net_msg_dl_advert_t *) type;

: // look for this interface in our list
: for(bpif = bcp->ifs;bpif != NULL;bpif=bpif->next) {
: if(stricmp(bpif->ifname,ad->dl.sdl_data) == 0)
: break;
: }

: // is it in there?
: if(bpif == NULL) {
: // no
: if((bpif = bcp->npi->alloc(sizeof(*bpif), 0)) != NULL) {
: memset(bpif, 0, sizeof(*bpif));
: snprintf(bpif->ifname, IFNAMSIZ, “%s”, ad->dl.sdl_data);
: bpif->cell = cell;
: bpif->endpoint = endpoint;
: bpif->iface = iface;
: bpif->sdl_type = ad->dl.sdl_type;
: bpif->next = bcp->ifs;
: bcp->ifs = bpif;
: }
: } else {
: // yes, copy in the new cell/iface/endpoint (as per the bsp)
: bpif->cell = cell;
: bpif->endpoint = endpoint;
: bpif->iface = iface;
: }
: }
: break;
: default:
: break;

: }
: }
: // we are done, let io-net know
: bcp->npi->tx_done(bcp->reg_hdl,npkt);
: }

: void proc_reg_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uint1
: {
: raw_ctrl_t *bcp = func_hdl;

: // save it on a queue for any lcl proc’s

: // we are done, let io-net know
: bcp->npi->tx_done(bcp->reg_hdl,npkt);
: }

: void spawn_producer()
: {
: pthread_create (NULL,NULL,raw_tx_thread,NULL);
: }

: void *raw_tx_thread(void *argv)
: {
: raw_ctrl_t *bcp = &raw_ctrl;
: npkt_t *npkt;

: while(1) {
: // pktinroute could have a race-condition
: pthread_mutex_lock(&mutex);

: while(pktinroute == true)
: pthread_cond_wait(&cond, &mutex);

: if(debug)
: syslog(LOG_INFO,"%d:raw_tx_thread: sending pkt (%x)",pthread_self(),pcnt1);

: // build/send our pkt
: npkt = alloc_pkt();

: if(npkt != NULL)
: // note if tx_down fails a lower layer will be responsible
: // or calling tx_done()
: bcp->npi->tx_down(bcp->reg_hdl, npkt);
: else
: syslog(LOG_INFO,“raw_tx_thread: ERROR: npkt == NULL”);

: pktinroute = true;
: pthread_cond_signal(&cond);
: pthread_mutex_unlock(&mutex);
: sleep(1);
: //usleep(100000); still skips every other pkt
: //sleep(3); didn’t make any difference
: }
: }

: npkt_t *alloc_pkt()
: {
: npkt_t *npkt;
: raw_ctrl_t *bcp = &raw_ctrl;
: net_buf_t *nbp;
: net_iov_t *niovp;
: ether_pkt_t *pkt;
: struct bpf_if *bpif;

: /*
: ** ok, we have to allocate space for 5 different structures as
: ** per the info from training
: **
: ** npkt_t: main hdr
: ** N*npkt_done_t: 1 per module that add’s data to a down pkt (1 in this case)
: ** (tcpip uses 2, one for the tcp module & 1 for the converter)
: ** net_buf_t: 1 per cluster of iov’s (note this could be bigger than the default
: ** structure. It contains an array of pointers as the last element in the struct. One
: ** array element for each net_iov_t supported by this net_buf_t
: ** net_iov_t: we only need one iov for this application
: ** ether_pkt_t: DATA, what actually goes out on the wire
: */

: // lets allocate space for the pkt, to minimize mallocs it reccomended
: // that you allocate all three structs at once (if N>1 then other layers
: // could allocate their own npkt_done_t/net_buf_t/net_iov_t structs

: // DANGER: the tcpip code doesn’t use alloc_down_npkt, it uses malloc
: // and then fills in all the structures by hand, arp.c does it more
: // in line w/the spec
: npkt = bcp->npi->alloc_down_npkt(bcp->reg_hdl,
: sizeof(npkt_t) + 1*sizeof(npkt_done_t) // 1st struct (and 1.a)
: + sizeof(net_buf_t) // 2nd struct
: + sizeof(net_iov_t) // 3rd struct
: + sizeof(ether_pkt_t), // data
: (void **)&nbp); // 3rd struct

The above should be something like:
npkt = bcp->npi->alloc_down_npkt(bcp->reg_hdl, sizeof(net_buf_t) +
sizeof(net_iov_t) +
sizeof(ether_pkt_t), (void **)&nbp);

io-net will alloc the npkt and the appropriate number of npkt_done_t’s. Don’t think
it’s really a problem though, just overallocating.

: if(npkt == NULL)
: return npkt;

: // npkt/nbp were filled in by alloc_down_npkt, lets get the other two structs
: niovp = (net_iov_t *)(nbp + 1);
: pkt = (ether_pkt_t *)(niovp + 1);

: // now fill in the pkt

: // struct #1: npkt (from arp.c not the tcp code)
: bpif = bcp->ifs;
: while(bpif == NULL)
: sleep(1);

: npkt->cell = bpif->cell;
: npkt->endpoint = bpif->endpoint;
: npkt->iface = bpif->iface;

: // fill in the links between struct #1 & #2
: // (both arp.c and tcp/if_ndi.c do this)
: nbp->ptrs.tqe_next = NULL;
: npkt->buffers.tqh_last = &nbp->ptrs.tqe_next;
: npkt->buffers.tqh_first = nbp;
: nbp->ptrs.tqe_prev = &npkt->buffers.tqh_first;

: // struct #1.b: net_buf_t…not filled in?

: // struct #2
: nbp->niov = 1;
: nbp->net_iov = niovp;

: // struct #3
: // iov_phys == physical address of the data (for DMA?)
: SETIOV(niovp, pkt, sizeof(*pkt));
: niovp->iov_phys =(paddr_t)(bcp->npi->mphys(niovp->iov_base));

: // data
: pkt->dst[0] = 0x00;
: pkt->dst[1] = 0x01;
: pkt->dst[2] = 0x02;
: pkt->dst[3] = 0x03;
: pkt->dst[4] = 0x04;
: pkt->dst[5] = 0x05;

: pkt->src[0] = 0xcc;
: pkt->src[1] = 0x10;
: pkt->src[2] = 0x4b;
: pkt->src[3] = 0x1f;
: pkt->src[4] = 0x77;
: pkt->src[5] = 0x2c;

: pkt->len[0] = 0x00;
: pkt->len[1] = 0x10;

: pkt->data[0] = 0x82;
: pkt->data[1] = 0x82;
: pkt->data[2] = 0x03;
: pkt->data[3] = 0x01;
: pkt->data[4] = 0x04;
: pkt->data[5] = 0x00;
: pkt->data[6] = 0x05;
: pkt->data[7] = 0x0F;
: pkt->data[8] = 0x0C;
: pkt->data[9] = 0x0C;
: pkt->data[10] = 0x02;
: pkt->data[11] = 0x00;
: pkt->data[12] = 0x00;
: pkt->data[13] = 0x01;
: pkt->data[14] = 0x19;
: pkt->data[15] = 0x70;

: pkt->p1 = htonl(pcnt1++);

You need to set npkt->tot_iov. Here:
npkt->tot_iov = 1;

: // and finally fill out the npkt_done_t (struct #1.a)
: // not sure about the NULL param is for, but arp.c uses it

Whatever you pass here, comes out as the last parameter (done_hdl here)
in your tx_done function.

: if(bcp->npi->reg_tx_done(bcp->reg_hdl,npkt,NULL) == -1)
: {
: bcp->npi->free(npkt);
: syslog(LOG_INFO,“raw: reg_tx_done failed!”);
: }

: return npkt;
: }

When locked up, what does pidin show?

Are you running a stack or just your raw thingy?

Are you running the same driver on the ‘fast’
and ‘slow’ machines.

Don’t think it’s a good idea to be waiting
on a condvar in your tx_done func. This can
hold up a driver thread, although don’t see
where it would actually wait here.

It’s probably not a good idea to assume your
tx_done func will be called as soon as the
packet is tx’d. Some drivers may wait for
the next packet to come down before reaping
tx’d packets. You see the deadlock…
Try removing the mutex and condvars.



-seanb

Chris Goebel <cgoebel@tridium.com> wrote:
: halfway there…

: ok, I changed two things:
: -initialized npkt->iov as you suggested
: -reformat & fresh install of QNX

: This time “mount raw.so” caused a single packet to go
: out immediately, then everything froze for 30 seconds.

: Sniffing the network shows that every other packet
: hit the wire (30, 32, 34, 36, …) while syslog shows that the io-net
: handshakes were completed for every packet and 2-3 minutes later
: io-net locks up completely



: Sean Boudreau wrote:

:> A couple of notes. Not sure if it’s the root of your problem though…
:>
:> Chris Goebel <cgoebel@tridium.com> wrote:
:> : I have a problem that occurs on a 200mhz PC box but not on a 400mhz PC
:>
:> : I wrote an io-net producer that sends a single raw ethernet packet once a second. On
:> : my 400mhz PC it works perfectly. On a 200mhz PC it sends packets out intermittently,
:> : around 2-3% of the time.
:>
:> : I wrote a simple producer and converter. On either machine all calls to io-net return
:> : success and the tx_done function is called 100% of the time. I only found the problem when
:> : I used tcpdump from a different machine to sniff packets and noticed that they weren’t
:> : always getting sent.
:>
:> : Here is my code (actually more comments than code), and if you would like to try
:> : my simple producer send me an email for the converter and/or tarball.
:>
:> : /*
:> : ** raw.c
:> : **
:> : ** raw ethernet pkt support for QNX
:> : **
:> : ** This producer will eventually provide
:> : ** a simple resource manger to
:> : ** read/write raw ethernet packets
:> : **
:> : ** right now it sends a single raw (non-IP) ethernet
:> : ** packet once a second
:> : */
:>
:> : #include <stdio.h>
:> : #include <unistd.h>
:> : #include <atomic.h>
:> : #include <malloc.h>
:> : #include <string.h>
:> : #include <errno.h>
:> : #include <stdlib.h>
:> : #include <net/if.h>
:> : #include <net/if_arp.h>
:> : #include <net/if_types.h>
:> : #include <netinet/in.h>
:> : #include <sys/syspage.h>
:> : #include <sys/neutrino.h>
:> : #include <syslog.h>
:> : #include <sys/dispatch.h>
:> : #include <sys/io-net.h>
:>
:> : #include <stdbool.h>
:>
:> : static int raw_init(void *dll_hdl, dispatch_t *dpp, io_net_self_t *n, char *options);
:> : static int raw_rx_up(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, ui
:> : static int raw_tx_done(npkt_t *npkt, void *done_hdl, void *func_hdl);
:> : void proc_cntrl_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uin
:> : void proc_reg_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uint1
:> : void spawn_producer();
:> : npkt_t alloc_pkt();
:>
:> : // default registration, io-net finds this and
:> : // calls “raw_init”
:> : io_net_dll_entry_t io_net_dll_entry = {
:> : 2,
:> : raw_init, // init fct
:> : NULL // destroy fct
:> : };
:>
:> : // the io_net_registrant_t struct refers to this one
:> : static io_net_registrant_funcs_t raw_funcs = {
:> : 9,
:> : raw_rx_up, // called for upward bound pkts
:> : NULL, // called for downward bound pkts
:> : raw_tx_done, // called when they are done with downward pkt
:> : NULL, // shutdown 1 (shutdown requested)
:> : NULL, // shutdown 2 (shutdown demanded)
:> : NULL, // function to advertise yourself upward
:> : NULL, //raw_devctl, // used to proc devctl commands
:> : NULL, //raw_flush, //
:> : NULL // open
:> : };
:>
:> : /

:> : ** we have a list of interfaces for each advertised
:> : ** interface
:> : */
:> : struct bpf_if {
:> : struct bpf_if next;
:> : char ifname[IFNAMSIZ];
:> : uint16_t cell;
:> : uint16_t endpoint;
:> : uint16_t iface;
:> : u_char sdl_type;
:> : };
:>
:> : /
global control structure */
:> : // contains stuff that I want to keep track of
:> : typedef struct {
:> : void *dll_hdl; // a handle to the dll (us) when it was loaded
:> : int reg_hdl; // a dll can register multiple times, each
:> : // registration is given a handle
:> : dispatch_t *dpp; // points to the io-net dispatch (state?) struct
:> : io_net_self_t *npi; // points to the io-net handler fcts
:> : struct qtime_entry *qtp; // ??? (copied from bsp driver)
:> : struct bpf_if *ifs;
:>
:> : } raw_ctrl_t;
:>
:> : void *raw_tx_thread(void *);
:>
:> : // now declare it, so that we can reference it
:> : raw_ctrl_t raw_ctrl;
:>
:> : // we “register” with io-net using this struct
:> : // for a name to show up in /dev/io-net it uses the name in the
:> : // up field.
:> : // plus if up & down don’t match then it will fail
:> : // to load
:> : static io_net_registrant_t raw_reg = {
:> : _REG_PRODUCER_DOWN,
:> : “raw.so”, // lib name
:> : “raw”, // our io-net “name” and lbl used by any DOWN producers
:> : “raw”, // what we produce
:> : &raw_ctrl, // (was bpf_ctrl)
:> : // struct with misc data in it
:> : // we can register multiple times, it is highly
:> : // reccomended that a different structure get passed
:> : // in this field each time & that it contain the
:> : // state information for each separate registration
:> : &raw_funcs, // the list of callbacks above
:> : 0 // number of dependancies
:> : };
:>
:> : // ethernet pkt
:> : typedef struct {
:> : unsigned char dst[6] attribute ((packed));;
:> : unsigned char src[6] attribute ((packed));;
:> : unsigned char len[2] attribute ((packed));;
:> : unsigned char data[16] attribute ((packed));;
:> : unsigned int p1 attribute ((packed));;
:> : } ether_pkt_t;
:>
:> : int debug=1;
:> : int pktinroute=false;
:> : unsigned int pcnt1=0;
:> : pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
:> : pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
:>
:> : static int raw_init(void *dll_hdl, dispatch_t *dpp, io_net_self_t *n, char *options)
:> : {
:> : raw_ctrl_t *bcp = &raw_ctrl;
:>
:> : if(debug)
:> : syslog(LOG_INFO,"%d:raw_init",pthread_self());
:>
:> : memset(bcp, 0, sizeof(*bcp));
:> : bcp->dll_hdl = dll_hdl;
:> : bcp->dpp = dpp;
:> : bcp->npi = n;
:> : bcp->qtp = SYSPAGE_ENTRY(qtime);
:> : bcp->ifs = NULL;
:>
:> : // the bigtime registration function,all the good stuff is in the raw_reg struct
:> : if(bcp->npi->reg(bcp->dll_hdl, &raw_reg, &bcp->reg_hdl, NULL, NULL) == -1)
:> : {
:> : syslog(LOG_INFO,“raw: reg failed (%d)”,errno);
:> : return errno;
:> : }
:>
:> : // no-clue what this one is about…NO DOCS! (can we remove this?)
:> : if(bcp->npi->reg_byte_pat(bcp->reg_hdl, 0, 0, NULL, _BYTE_PAT_ALL) == -1)
:> : {
:> : syslog(LOG_INFO,“raw: reg_byte_pat failed (%d)”,errno);
:> : bcp->npi->dereg(bcp->reg_hdl);
:> : return errno;
:> : }
:>
:> : spawn_producer();
:>
:> : return EOK;
:> : }
:>
:> : static int raw_rx_up(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, ui
:> : {
:> : if(debug)
:> : if(npkt->flags & _NPKT_MSG)
:> : syslog(LOG_INFO,"%d:raw_rx_up: ctrl",pthread_self());
:> : else
:> : syslog(LOG_INFO,"%d:raw_rx_up: data (%d)",pthread_self(),pktinroute);
:>
:> : // is it a control msg?
:> : if(npkt->flags & _NPKT_MSG) {
:> : proc_cntrl_msg(npkt,func_hdl,off,len_sub,cell,endpoint,iface);
:> : }
:> : else {
:> : proc_reg_msg(npkt,func_hdl,off,len_sub,cell,endpoint,iface);
:> : }
:>
:> : return 0;
:> : }
:>
:> : static int raw_tx_done(npkt_t *npkt, void *done_hdl, void *func_hdl)
:> : {
:> : raw_ctrl_t *bcp = func_hdl;
:> : ether_pkt_t *pkt;
:>
:> : pkt = (ether_pkt_t *)npkt->buffers.tqh_first->net_iov->iov_base;
:>
:> : if(debug)
:> : syslog(LOG_INFO,"%d:raw_tx_done pktnum:%x NPKT_NOT_TXED=0x%x",
:> : pthread_self(),
:> : ntohl(pkt->p1),
:> : npkt->flags & _NPKT_NOT_TXED);
:>
:> : // release the buff
:> : bcp->npi->free(npkt);
:>
:> : // there is a potential race condition w/the pktinroute var
:> : pthread_mutex_lock(&mutex);
:> : while(pktinroute == false)
:> : pthread_cond_wait(&cond, &mutex);
:> : pktinroute = false;
:> : pthread_cond_signal(&cond);
:> : pthread_mutex_unlock(&mutex);
:>
:> : return 0;
:> : }
:>
:> : void proc_cntrl_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uin
:> : {
:> : raw_ctrl_t *bcp = func_hdl;
:> : struct bpf_if *bpif, **bpifp;
:> : uint16_t *type;
:>
:> : // is an endpoint dying?
:> : if(npkt->flags & _NPKT_MSG_DYING) {
:> : for(bpifp = &bcp->ifs, bpif = bcp->ifs;bpif != NULL;bpif=bpif->next,bpifp=&bpif->next)
:> : if(bpif->cell == cell &&
:> : bpif->endpoint == endpoint &&
:> : bpif->iface == iface) {
:>
:> : // goodbye
:> : *bpifp = bpif->next;
:> : bcp->npi->free(bpif);
:> : }
:> : }
:> : else
:> : {
:> : // is it a new endpoint?
:> : type = (uint16_t *)(npkt->buffers.tqh_first->net_iov->iov_base);
:>
:> : switch(*type) {
:> : case _IO_NET_MSG_DL_ADVERT: // only type supported in tcp & bpf
:> : {
:> : io_net_msg_dl_advert_t *ad = (io_net_msg_dl_advert_t *) type;
:>
:> : // look for this interface in our list
:> : for(bpif = bcp->ifs;bpif != NULL;bpif=bpif->next) {
:> : if(stricmp(bpif->ifname,ad->dl.sdl_data) == 0)
:> : break;
:> : }
:>
:> : // is it in there?
:> : if(bpif == NULL) {
:> : // no
:> : if((bpif = bcp->npi->alloc(sizeof(*bpif), 0)) != NULL) {
:> : memset(bpif, 0, sizeof(*bpif));
:> : snprintf(bpif->ifname, IFNAMSIZ, “%s”, ad->dl.sdl_data);
:> : bpif->cell = cell;
:> : bpif->endpoint = endpoint;
:> : bpif->iface = iface;
:> : bpif->sdl_type = ad->dl.sdl_type;
:> : bpif->next = bcp->ifs;
:> : bcp->ifs = bpif;
:> : }
:> : } else {
:> : // yes, copy in the new cell/iface/endpoint (as per the bsp)
:> : bpif->cell = cell;
:> : bpif->endpoint = endpoint;
:> : bpif->iface = iface;
:> : }
:> : }
:> : break;
:> : default:
:> : break;
:>
:> : }
:> : }
:> : // we are done, let io-net know
:> : bcp->npi->tx_done(bcp->reg_hdl,npkt);
:> : }
:>
:> : void proc_reg_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uint1
:> : {
:> : raw_ctrl_t *bcp = func_hdl;
:>
:> : // save it on a queue for any lcl proc’s
:>
:> : // we are done, let io-net know
:> : bcp->npi->tx_done(bcp->reg_hdl,npkt);
:> : }
:>
:> : void spawn_producer()
:> : {
:> : pthread_create (NULL,NULL,raw_tx_thread,NULL);
:> : }
:>
:> : void *raw_tx_thread(void *argv)
:> : {
:> : raw_ctrl_t *bcp = &raw_ctrl;
:> : npkt_t *npkt;
:>
:> : while(1) {
:> : // pktinroute could have a race-condition
:> : pthread_mutex_lock(&mutex);
:>
:> : while(pktinroute == true)
:> : pthread_cond_wait(&cond, &mutex);
:>
:> : if(debug)
:> : syslog(LOG_INFO,"%d:raw_tx_thread: sending pkt (%x)",pthread_self(),pcnt1);
:>
:> : // build/send our pkt
:> : npkt = alloc_pkt();
:>
:> : if(npkt != NULL)
:> : // note if tx_down fails a lower layer will be responsible
:> : // or calling tx_done()
:> : bcp->npi->tx_down(bcp->reg_hdl, npkt);
:> : else
:> : syslog(LOG_INFO,“raw_tx_thread: ERROR: npkt == NULL”);
:>
:> : pktinroute = true;
:> : pthread_cond_signal(&cond);
:> : pthread_mutex_unlock(&mutex);
:> : sleep(1);
:> : //usleep(100000); still skips every other pkt
:> : //sleep(3); didn’t make any difference
:> : }
:> : }
:>
:> : npkt_t *alloc_pkt()
:> : {
:> : npkt_t *npkt;
:> : raw_ctrl_t *bcp = &raw_ctrl;
:> : net_buf_t *nbp;
:> : net_iov_t *niovp;
:> : ether_pkt_t pkt;
:> : struct bpf_if bpif;
:>
:> : /

:> : ** ok, we have to allocate space for 5 different structures as
:> : ** per the info from training
:> : **
:> : ** npkt_t: main hdr
:> : ** N
npkt_done_t: 1 per module that add’s data to a down pkt (1 in this case)
:> : ** (tcpip uses 2, one for the tcp module & 1 for the converter)
:> : ** net_buf_t: 1 per cluster of iov’s (note this could be bigger than the default
:> : ** structure. It contains an array of pointers as the last element in the struct. One
:> : ** array element for each net_iov_t supported by this net_buf_t
:> : ** net_iov_t: we only need one iov for this application
:> : ** ether_pkt_t: DATA, what actually goes out on the wire
:> : /
:>
:> : // lets allocate space for the pkt, to minimize mallocs it reccomended
:> : // that you allocate all three structs at once (if N>1 then other layers
:> : // could allocate their own npkt_done_t/net_buf_t/net_iov_t structs
:>
:> : // DANGER: the tcpip code doesn’t use alloc_down_npkt, it uses malloc
:> : // and then fills in all the structures by hand, arp.c does it more
:> : // in line w/the spec
:> : npkt = bcp->npi->alloc_down_npkt(bcp->reg_hdl,
:> : sizeof(npkt_t) + 1
sizeof(npkt_done_t) // 1st struct (and 1.a)
:> : + sizeof(net_buf_t) // 2nd struct
:> : + sizeof(net_iov_t) // 3rd struct
:> : + sizeof(ether_pkt_t), // data
:> : (void **)&nbp); // 3rd struct
:>
:> The above should be something like:
:> npkt = bcp->npi->alloc_down_npkt(bcp->reg_hdl, sizeof(net_buf_t) +
:> sizeof(net_iov_t) +
:> sizeof(ether_pkt_t), (void **)&nbp);
:>
:> io-net will alloc the npkt and the appropriate number of npkt_done_t’s. Don’t think
:> it’s really a problem though, just overallocating.
:>
:> : if(npkt == NULL)
:> : return npkt;
:>
:> : // npkt/nbp were filled in by alloc_down_npkt, lets get the other two structs
:> : niovp = (net_iov_t *)(nbp + 1);
:> : pkt = (ether_pkt_t *)(niovp + 1);
:>
:> : // now fill in the pkt
:>
:> : // struct #1: npkt (from arp.c not the tcp code)
:> : bpif = bcp->ifs;
:> : while(bpif == NULL)
:> : sleep(1);
:>
:> : npkt->cell = bpif->cell;
:> : npkt->endpoint = bpif->endpoint;
:> : npkt->iface = bpif->iface;
:>
:> : // fill in the links between struct #1 & #2
:> : // (both arp.c and tcp/if_ndi.c do this)
:> : nbp->ptrs.tqe_next = NULL;
:> : npkt->buffers.tqh_last = &nbp->ptrs.tqe_next;
:> : npkt->buffers.tqh_first = nbp;
:> : nbp->ptrs.tqe_prev = &npkt->buffers.tqh_first;
:>
:> : // struct #1.b: net_buf_t…not filled in?
:>
:> : // struct #2
:> : nbp->niov = 1;
:> : nbp->net_iov = niovp;
:>
:> : // struct #3
:> : // iov_phys == physical address of the data (for DMA?)
:> : SETIOV(niovp, pkt, sizeof(*pkt));
:> : niovp->iov_phys =(paddr_t)(bcp->npi->mphys(niovp->iov_base));
:>
:> : // data
:> : pkt->dst[0] = 0x00;
:> : pkt->dst[1] = 0x01;
:> : pkt->dst[2] = 0x02;
:> : pkt->dst[3] = 0x03;
:> : pkt->dst[4] = 0x04;
:> : pkt->dst[5] = 0x05;
:>
:> : pkt->src[0] = 0xcc;
:> : pkt->src[1] = 0x10;
:> : pkt->src[2] = 0x4b;
:> : pkt->src[3] = 0x1f;
:> : pkt->src[4] = 0x77;
:> : pkt->src[5] = 0x2c;
:>
:> : pkt->len[0] = 0x00;
:> : pkt->len[1] = 0x10;
:>
:> : pkt->data[0] = 0x82;
:> : pkt->data[1] = 0x82;
:> : pkt->data[2] = 0x03;
:> : pkt->data[3] = 0x01;
:> : pkt->data[4] = 0x04;
:> : pkt->data[5] = 0x00;
:> : pkt->data[6] = 0x05;
:> : pkt->data[7] = 0x0F;
:> : pkt->data[8] = 0x0C;
:> : pkt->data[9] = 0x0C;
:> : pkt->data[10] = 0x02;
:> : pkt->data[11] = 0x00;
:> : pkt->data[12] = 0x00;
:> : pkt->data[13] = 0x01;
:> : pkt->data[14] = 0x19;
:> : pkt->data[15] = 0x70;
:>
:> : pkt->p1 = htonl(pcnt1++);
:>
:> You need to set npkt->tot_iov. Here:
:> npkt->tot_iov = 1;
:>
:> : // and finally fill out the npkt_done_t (struct #1.a)
:> : // not sure about the NULL param is for, but arp.c uses it
:>
:> Whatever you pass here, comes out as the last parameter (done_hdl here)
:> in your tx_done function.
:>
:> : if(bcp->npi->reg_tx_done(bcp->reg_hdl,npkt,NULL) == -1)
:> : {
:> : bcp->npi->free(npkt);
:> : syslog(LOG_INFO,“raw: reg_tx_done failed!”);
:> : }
:>
:> : return npkt;
:> : }

pidin on the “slow” machine:
77839 1 sbin/io-net 10o SIGWAITINFO 52K 520K 8192(516K)*
77839 2 sbin/io-net 18o RECEIVE 52K 520K 8192(12K)
77839 3 sbin/io-net 10o RECEIVE 52K 520K 4096(12K)
77839 4 sbin/io-net 10o RECEIVE 52K 520K 8192(12K)
77839 5 sbin/io-net 10o RECEIVE 52K 520K 4096(12K)
77839 6 sbin/io-net 20o RECEIVE 52K 520K 4096(132K)
77839 7 sbin/io-net 10o RECEIVE 52K 520K 4096(132K)
77839 8 sbin/io-net 21r RECEIVE 52K 520K 4096(132K)
77839 13 sbin/io-net 10o CONDVAR 52K 520K 4096(132K)
77839 14 sbin/io-net 10o CONDVAR 52K 520K 4096(132K)
ldqnx.so.2 @b0300000 296K 16K
npm-tcpip.so @b034e000 184K 44K
devn-el900.so @b0387000 48K 4096
npm-pppmgr.so @b0394000 24K 8192
npm-qnet.so @b039c000 132K 4096
eth-raw_conv.so @b8200000 8192 4096
eth-raw.so @b8203000 8192 4096

As you can see I am running the full tcpip stack

On the “fast” machine (which has been running the posted code for 4 days now):
81935 1 sbin/io-net 10o SIGWAITINFO 52K 1704K 8192(516K)*
81935 2 sbin/io-net 9o RECEIVE 52K 1704K 4096(12K)
81935 5 sbin/io-net 10o RECEIVE 52K 1704K 8192(12K)
81935 6 sbin/io-net 20o RECEIVE 52K 1704K 4096(132K)
81935 7 sbin/io-net 10o RECEIVE 52K 1704K 4096(132K)
81935 8 sbin/io-net 21r RECEIVE 52K 1704K 8192(132K)
81935 9 sbin/io-net 18o RECEIVE 52K 1704K 4096(12K)
81935 10 sbin/io-net 10o RECEIVE 52K 1704K 4096(12K)
81935 11 sbin/io-net 10o NANOSLEEP 52K 1704K 4096(132K)
81935 13 sbin/io-net 10o CONDVAR 52K 1704K 4096(132K)
ldqnx.so.2 @b0300000 296K 16K
npm-tcpip.so @b034e000 184K 44K
devn-el900.so @b0387000 48K 4096
npm-pppmgr.so @b0394000 24K 8192
npm-qnet.so @b039c000 132K 4096
eth-raw_conv.so @b8200000 8192 4096
eth-raw.so @b8203000 8192 4096

Looks like they are both running the same drivers

Hmm, I originally didn’t have the condvar & had
a situation where the tx_done() thread was pre-empting after tx_down()
was called but before the pktinroute varable was set to true. I put the condvar
in to simplify debugging and putting all the callbacks in lockstep

If tx_done() is only called when a pkt is reaped that would explain
the delays while the driver waits for a timeout/more-packets but it doesn’t
explain why I get a tx_done() for every packet but they don’t all go out on the wire.

But I don’t think this is what is happening. Timestamps in the syslog show that tx_done()
is called immediately after tx_down()

I can “break” the fast machine by taking the sleep() statement out, it sends about 1000-1500 pkts
and then io-net freezes.




Sean Boudreau wrote:

When locked up, what does pidin show?

Are you running a stack or just your raw thingy?

Are you running the same driver on the ‘fast’
and ‘slow’ machines.

Don’t think it’s a good idea to be waiting
on a condvar in your tx_done func. This can
hold up a driver thread, although don’t see
where it would actually wait here.

It’s probably not a good idea to assume your
tx_done func will be called as soon as the
packet is tx’d. Some drivers may wait for
the next packet to come down before reaping
tx’d packets. You see the deadlock…
Try removing the mutex and condvars.

-seanb

Chris Goebel <> cgoebel@tridium.com> > wrote:
: halfway there…

: ok, I changed two things:
: -initialized npkt->iov as you suggested
: -reformat & fresh install of QNX

: This time “mount raw.so” caused a single packet to go
: out immediately, then everything froze for 30 seconds.

: Sniffing the network shows that every other packet
: hit the wire (30, 32, 34, 36, …) while syslog shows that the io-net
: handshakes were completed for every packet and 2-3 minutes later
: io-net locks up completely

: Sean Boudreau wrote:

:> A couple of notes. Not sure if it’s the root of your problem though…
:
:> Chris Goebel <> cgoebel@tridium.com> > wrote:
:> : I have a problem that occurs on a 200mhz PC box but not on a 400mhz PC
:
:> : I wrote an io-net producer that sends a single raw ethernet packet once a second. On
:> : my 400mhz PC it works perfectly. On a 200mhz PC it sends packets out intermittently,
:> : around 2-3% of the time.
:
:> : I wrote a simple producer and converter. On either machine all calls to io-net return
:> : success and the tx_done function is called 100% of the time. I only found the problem when
:> : I used tcpdump from a different machine to sniff packets and noticed that they weren’t
:> : always getting sent.
:
:> : Here is my code (actually more comments than code), and if you would like to try
:> : my simple producer send me an email for the converter and/or tarball.
:
:> : /*
:> : ** raw.c
:> : **
:> : ** raw ethernet pkt support for QNX
:> : **
:> : ** This producer will eventually provide
:> : ** a simple resource manger to
:> : ** read/write raw ethernet packets
:> : **
:> : ** right now it sends a single raw (non-IP) ethernet
:> : ** packet once a second
:> : */
:
:> : #include <stdio.h
:> : #include <unistd.h
:> : #include <atomic.h
:> : #include <malloc.h
:> : #include <string.h
:> : #include <errno.h
:> : #include <stdlib.h
:> : #include <net/if.h
:> : #include <net/if_arp.h
:> : #include <net/if_types.h
:> : #include <netinet/in.h
:> : #include <sys/syspage.h
:> : #include <sys/neutrino.h
:> : #include <syslog.h
:> : #include <sys/dispatch.h
:> : #include <sys/io-net.h
:
:> : #include <stdbool.h
:
:> : static int raw_init(void *dll_hdl, dispatch_t *dpp, io_net_self_t *n, char *options);
:> : static int raw_rx_up(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, ui
:> : static int raw_tx_done(npkt_t *npkt, void *done_hdl, void *func_hdl);
:> : void proc_cntrl_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uin
:> : void proc_reg_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uint1
:> : void spawn_producer();
:> : npkt_t alloc_pkt();
:
:> : // default registration, io-net finds this and
:> : // calls “raw_init”
:> : io_net_dll_entry_t io_net_dll_entry = {
:> : 2,
:> : raw_init, // init fct
:> : NULL // destroy fct
:> : };
:
:> : // the io_net_registrant_t struct refers to this one
:> : static io_net_registrant_funcs_t raw_funcs = {
:> : 9,
:> : raw_rx_up, // called for upward bound pkts
:> : NULL, // called for downward bound pkts
:> : raw_tx_done, // called when they are done with downward pkt
:> : NULL, // shutdown 1 (shutdown requested)
:> : NULL, // shutdown 2 (shutdown demanded)
:> : NULL, // function to advertise yourself upward
:> : NULL, //raw_devctl, // used to proc devctl commands
:> : NULL, //raw_flush, //
:> : NULL // open
:> : };
:
:> : /

:> : ** we have a list of interfaces for each advertised
:> : ** interface
:> : */
:> : struct bpf_if {
:> : struct bpf_if next;
:> : char ifname[IFNAMSIZ];
:> : uint16_t cell;
:> : uint16_t endpoint;
:> : uint16_t iface;
:> : u_char sdl_type;
:> : };
:
:> : /
global control structure */
:> : // contains stuff that I want to keep track of
:> : typedef struct {
:> : void *dll_hdl; // a handle to the dll (us) when it was loaded
:> : int reg_hdl; // a dll can register multiple times, each
:> : // registration is given a handle
:> : dispatch_t *dpp; // points to the io-net dispatch (state?) struct
:> : io_net_self_t *npi; // points to the io-net handler fcts
:> : struct qtime_entry *qtp; // ??? (copied from bsp driver)
:> : struct bpf_if *ifs;
:
:> : } raw_ctrl_t;
:
:> : void *raw_tx_thread(void *);
:
:> : // now declare it, so that we can reference it
:> : raw_ctrl_t raw_ctrl;
:
:> : // we “register” with io-net using this struct
:> : // for a name to show up in /dev/io-net it uses the name in the
:> : // up field.
:> : // plus if up & down don’t match then it will fail
:> : // to load
:> : static io_net_registrant_t raw_reg = {
:> : _REG_PRODUCER_DOWN,
:> : “raw.so”, // lib name
:> : “raw”, // our io-net “name” and lbl used by any DOWN producers
:> : “raw”, // what we produce
:> : &raw_ctrl, // (was bpf_ctrl)
:> : // struct with misc data in it
:> : // we can register multiple times, it is highly
:> : // reccomended that a different structure get passed
:> : // in this field each time & that it contain the
:> : // state information for each separate registration
:> : &raw_funcs, // the list of callbacks above
:> : 0 // number of dependancies
:> : };
:
:> : // ethernet pkt
:> : typedef struct {
:> : unsigned char dst[6] attribute ((packed));;
:> : unsigned char src[6] attribute ((packed));;
:> : unsigned char len[2] attribute ((packed));;
:> : unsigned char data[16] attribute ((packed));;
:> : unsigned int p1 attribute ((packed));;
:> : } ether_pkt_t;
:
:> : int debug=1;
:> : int pktinroute=false;
:> : unsigned int pcnt1=0;
:> : pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
:> : pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
:
:> : static int raw_init(void *dll_hdl, dispatch_t *dpp, io_net_self_t *n, char *options)
:> : {
:> : raw_ctrl_t *bcp = &raw_ctrl;
:
:> : if(debug)
:> : syslog(LOG_INFO,"%d:raw_init",pthread_self());
:
:> : memset(bcp, 0, sizeof(*bcp));
:> : bcp->dll_hdl = dll_hdl;
:> : bcp->dpp = dpp;
:> : bcp->npi = n;
:> : bcp->qtp = SYSPAGE_ENTRY(qtime);
:> : bcp->ifs = NULL;
:
:> : // the bigtime registration function,all the good stuff is in the raw_reg struct
:> : if(bcp->npi->reg(bcp->dll_hdl, &raw_reg, &bcp->reg_hdl, NULL, NULL) == -1)
:> : {
:> : syslog(LOG_INFO,“raw: reg failed (%d)”,errno);
:> : return errno;
:> : }
:
:> : // no-clue what this one is about…NO DOCS! (can we remove this?)
:> : if(bcp->npi->reg_byte_pat(bcp->reg_hdl, 0, 0, NULL, _BYTE_PAT_ALL) == -1)
:> : {
:> : syslog(LOG_INFO,“raw: reg_byte_pat failed (%d)”,errno);
:> : bcp->npi->dereg(bcp->reg_hdl);
:> : return errno;
:> : }
:
:> : spawn_producer();
:
:> : return EOK;
:> : }
:
:> : static int raw_rx_up(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, ui
:> : {
:> : if(debug)
:> : if(npkt->flags & _NPKT_MSG)
:> : syslog(LOG_INFO,"%d:raw_rx_up: ctrl",pthread_self());
:> : else
:> : syslog(LOG_INFO,"%d:raw_rx_up: data (%d)",pthread_self(),pktinroute);
:
:> : // is it a control msg?
:> : if(npkt->flags & _NPKT_MSG) {
:> : proc_cntrl_msg(npkt,func_hdl,off,len_sub,cell,endpoint,iface);
:> : }
:> : else {
:> : proc_reg_msg(npkt,func_hdl,off,len_sub,cell,endpoint,iface);
:> : }
:
:> : return 0;
:> : }
:
:> : static int raw_tx_done(npkt_t *npkt, void *done_hdl, void *func_hdl)
:> : {
:> : raw_ctrl_t *bcp = func_hdl;
:> : ether_pkt_t *pkt;
:
:> : pkt = (ether_pkt_t *)npkt->buffers.tqh_first->net_iov->iov_base;
:
:> : if(debug)
:> : syslog(LOG_INFO,"%d:raw_tx_done pktnum:%x NPKT_NOT_TXED=0x%x",
:> : pthread_self(),
:> : ntohl(pkt->p1),
:> : npkt->flags & _NPKT_NOT_TXED);
:
:> : // release the buff
:> : bcp->npi->free(npkt);
:
:> : // there is a potential race condition w/the pktinroute var
:> : pthread_mutex_lock(&mutex);
:> : while(pktinroute == false)
:> : pthread_cond_wait(&cond, &mutex);
:> : pktinroute = false;
:> : pthread_cond_signal(&cond);
:> : pthread_mutex_unlock(&mutex);
:
:> : return 0;
:> : }
:
:> : void proc_cntrl_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uin
:> : {
:> : raw_ctrl_t *bcp = func_hdl;
:> : struct bpf_if *bpif, **bpifp;
:> : uint16_t *type;
:
:> : // is an endpoint dying?
:> : if(npkt->flags & _NPKT_MSG_DYING) {
:> : for(bpifp = &bcp->ifs, bpif = bcp->ifs;bpif != NULL;bpif=bpif->next,bpifp=&bpif->next)
:> : if(bpif->cell == cell &&
:> : bpif->endpoint == endpoint &&
:> : bpif->iface == iface) {
:
:> : // goodbye
:> : *bpifp = bpif->next;
:> : bcp->npi->free(bpif);
:> : }
:> : }
:> : else
:> : {
:> : // is it a new endpoint?
:> : type = (uint16_t *)(npkt->buffers.tqh_first->net_iov->iov_base);
:
:> : switch(*type) {
:> : case _IO_NET_MSG_DL_ADVERT: // only type supported in tcp & bpf
:> : {
:> : io_net_msg_dl_advert_t *ad = (io_net_msg_dl_advert_t *) type;
:
:> : // look for this interface in our list
:> : for(bpif = bcp->ifs;bpif != NULL;bpif=bpif->next) {
:> : if(stricmp(bpif->ifname,ad->dl.sdl_data) == 0)
:> : break;
:> : }
:
:> : // is it in there?
:> : if(bpif == NULL) {
:> : // no
:> : if((bpif = bcp->npi->alloc(sizeof(*bpif), 0)) != NULL) {
:> : memset(bpif, 0, sizeof(*bpif));
:> : snprintf(bpif->ifname, IFNAMSIZ, “%s”, ad->dl.sdl_data);
:> : bpif->cell = cell;
:> : bpif->endpoint = endpoint;
:> : bpif->iface = iface;
:> : bpif->sdl_type = ad->dl.sdl_type;
:> : bpif->next = bcp->ifs;
:> : bcp->ifs = bpif;
:> : }
:> : } else {
:> : // yes, copy in the new cell/iface/endpoint (as per the bsp)
:> : bpif->cell = cell;
:> : bpif->endpoint = endpoint;
:> : bpif->iface = iface;
:> : }
:> : }
:> : break;
:> : default:
:> : break;
:
:> : }
:> : }
:> : // we are done, let io-net know
:> : bcp->npi->tx_done(bcp->reg_hdl,npkt);
:> : }
:
:> : void proc_reg_msg(npkt_t *npkt, void *func_hdl, int off, int len_sub, uint16_t cell, uint16_t endpoint, uint1
:> : {
:> : raw_ctrl_t *bcp = func_hdl;
:
:> : // save it on a queue for any lcl proc’s
:
:> : // we are done, let io-net know
:> : bcp->npi->tx_done(bcp->reg_hdl,npkt);
:> : }
:
:> : void spawn_producer()
:> : {
:> : pthread_create (NULL,NULL,raw_tx_thread,NULL);
:> : }
:
:> : void *raw_tx_thread(void *argv)
:> : {
:> : raw_ctrl_t *bcp = &raw_ctrl;
:> : npkt_t *npkt;
:
:> : while(1) {
:> : // pktinroute could have a race-condition
:> : pthread_mutex_lock(&mutex);
:
:> : while(pktinroute == true)
:> : pthread_cond_wait(&cond, &mutex);
:
:> : if(debug)
:> : syslog(LOG_INFO,"%d:raw_tx_thread: sending pkt (%x)",pthread_self(),pcnt1);
:
:> : // build/send our pkt
:> : npkt = alloc_pkt();
:
:> : if(npkt != NULL)
:> : // note if tx_down fails a lower layer will be responsible
:> : // or calling tx_done()
:> : bcp->npi->tx_down(bcp->reg_hdl, npkt);
:> : else
:> : syslog(LOG_INFO,“raw_tx_thread: ERROR: npkt == NULL”);
:
:> : pktinroute = true;
:> : pthread_cond_signal(&cond);
:> : pthread_mutex_unlock(&mutex);
:> : sleep(1);
:> : //usleep(100000); still skips every other pkt
:> : //sleep(3); didn’t make any difference
:> : }
:> : }
:
:> : npkt_t *alloc_pkt()
:> : {
:> : npkt_t *npkt;
:> : raw_ctrl_t *bcp = &raw_ctrl;
:> : net_buf_t *nbp;
:> : net_iov_t *niovp;
:> : ether_pkt_t pkt;
:> : struct bpf_if bpif;
:
:> : /

:> : ** ok, we have to allocate space for 5 different structures as
:> : ** per the info from training
:> : **
:> : ** npkt_t: main hdr
:> : ** N
npkt_done_t: 1 per module that add’s data to a down pkt (1 in this case)
:> : ** (tcpip uses 2, one for the tcp module & 1 for the converter)
:> : ** net_buf_t: 1 per cluster of iov’s (note this could be bigger than the default
:> : ** structure. It contains an array of pointers as the last element in the struct. One
:> : ** array element for each net_iov_t supported by this net_buf_t
:> : ** net_iov_t: we only need one iov for this application
:> : ** ether_pkt_t: DATA, what actually goes out on the wire
:> : /
:
:> : // lets allocate space for the pkt, to minimize mallocs it reccomended
:> : // that you allocate all three structs at once (if N>1 then other layers
:> : // could allocate their own npkt_done_t/net_buf_t/net_iov_t structs
:
:> : // DANGER: the tcpip code doesn’t use alloc_down_npkt, it uses malloc
:> : // and then fills in all the structures by hand, arp.c does it more
:> : // in line w/the spec
:> : npkt = bcp->npi->alloc_down_npkt(bcp->reg_hdl,
:> : sizeof(npkt_t) + 1
sizeof(npkt_done_t) // 1st struct (and 1.a)
:> : + sizeof(net_buf_t) // 2nd struct
:> : + sizeof(net_iov_t) // 3rd struct
:> : + sizeof(ether_pkt_t), // data
:> : (void **)&nbp); // 3rd struct
:
:> The above should be something like:
:> npkt = bcp->npi->alloc_down_npkt(bcp->reg_hdl, sizeof(net_buf_t) +
:> sizeof(net_iov_t) +
:> sizeof(ether_pkt_t), (void **)&nbp);
:
:> io-net will alloc the npkt and the appropriate number of npkt_done_t’s. Don’t think
:> it’s really a problem though, just overallocating.
:
:> : if(npkt == NULL)
:> : return npkt;
:
:> : // npkt/nbp were filled in by alloc_down_npkt, lets get the other two structs
:> : niovp = (net_iov_t *)(nbp + 1);
:> : pkt = (ether_pkt_t *)(niovp + 1);
:
:> : // now fill in the pkt
:
:> : // struct #1: npkt (from arp.c not the tcp code)
:> : bpif = bcp->ifs;
:> : while(bpif == NULL)
:> : sleep(1);
:
:> : npkt->cell = bpif->cell;
:> : npkt->endpoint = bpif->endpoint;
:> : npkt->iface = bpif->iface;
:
:> : // fill in the links between struct #1 & #2
:> : // (both arp.c and tcp/if_ndi.c do this)
:> : nbp->ptrs.tqe_next = NULL;
:> : npkt->buffers.tqh_last = &nbp->ptrs.tqe_next;
:> : npkt->buffers.tqh_first = nbp;
:> : nbp->ptrs.tqe_prev = &npkt->buffers.tqh_first;
:
:> : // struct #1.b: net_buf_t…not filled in?
:
:> : // struct #2
:> : nbp->niov = 1;
:> : nbp->net_iov = niovp;
:
:> : // struct #3
:> : // iov_phys == physical address of the data (for DMA?)
:> : SETIOV(niovp, pkt, sizeof(*pkt));
:> : niovp->iov_phys =(paddr_t)(bcp->npi->mphys(niovp->iov_base));
:
:> : // data
:> : pkt->dst[0] = 0x00;
:> : pkt->dst[1] = 0x01;
:> : pkt->dst[2] = 0x02;
:> : pkt->dst[3] = 0x03;
:> : pkt->dst[4] = 0x04;
:> : pkt->dst[5] = 0x05;
:
:> : pkt->src[0] = 0xcc;
:> : pkt->src[1] = 0x10;
:> : pkt->src[2] = 0x4b;
:> : pkt->src[3] = 0x1f;
:> : pkt->src[4] = 0x77;
:> : pkt->src[5] = 0x2c;
:
:> : pkt->len[0] = 0x00;
:> : pkt->len[1] = 0x10;
:
:> : pkt->data[0] = 0x82;
:> : pkt->data[1] = 0x82;
:> : pkt->data[2] = 0x03;
:> : pkt->data[3] = 0x01;
:> : pkt->data[4] = 0x04;
:> : pkt->data[5] = 0x00;
:> : pkt->data[6] = 0x05;
:> : pkt->data[7] = 0x0F;
:> : pkt->data[8] = 0x0C;
:> : pkt->data[9] = 0x0C;
:> : pkt->data[10] = 0x02;
:> : pkt->data[11] = 0x00;
:> : pkt->data[12] = 0x00;
:> : pkt->data[13] = 0x01;
:> : pkt->data[14] = 0x19;
:> : pkt->data[15] = 0x70;
:
:> : pkt->p1 = htonl(pcnt1++);
:
:> You need to set npkt->tot_iov. Here:
:> npkt->tot_iov = 1;
:
:> : // and finally fill out the npkt_done_t (struct #1.a)
:> : // not sure about the NULL param is for, but arp.c uses it
:
:> Whatever you pass here, comes out as the last parameter (done_hdl here)
:> in your tx_done function.
:
:> : if(bcp->npi->reg_tx_done(bcp->reg_hdl,npkt,NULL) == -1)
:> : {
:> : bcp->npi->free(npkt);
:> : syslog(LOG_INFO,“raw: reg_tx_done failed!”);
:> : }
:
:> : return npkt;
:> : }

Also noticed you aren’t setting npkt->framelen anywhere on your
tx packets:

npkt->framelen = sizeof(ether_pkt_t);

Might explain what you are seeing…

-seanb

wow! that was it!

works on the p200, works on my target embedded board, all current problems
are solved!

THANK YOU!


Sean Boudreau wrote:

Also noticed you aren’t setting npkt->framelen anywhere on your
tx packets:

npkt->framelen = sizeof(ether_pkt_t);

Might explain what you are seeing…

-seanb

Just as a final (famous last words :slight_smile: point, still don’t
see what you are protecting with the mutex / condvar. You
should be able to send packets whenever and have your tx_done()
func called some time after they are tx’d. As mentined previously
you shouldn’t rely on your tx_done() func being called immediately.
From what I’ve seen, you should be able to just remove all the
extra locking.

-seanb

Chris Goebel <cgoebel@tridium.com> wrote:
: wow! that was it!

: works on the p200, works on my target embedded board, all current problems
: are solved!

: THANK YOU!


: Sean Boudreau wrote:

:> Also noticed you aren’t setting npkt->framelen anywhere on your
:> tx packets:
:>
:> npkt->framelen = sizeof(ether_pkt_t);
:>
:> Might explain what you are seeing…
:>
:> -seanb

I use the condvar to avoid a classic deadlock scenario:

tx_done()
{
pktdone = true
}

generator()
{
while(pktdone == false) sleep();
tx_down()
pktdone=false
}

-generator() & tx_done() run in separate threads
-it is possible for:
-generator to call tx_down()
-tx_done() to be called & set pktdone == true
-generator to set pktdone == false
-generator to loop forever in its while() {sleep()} statement since tx_done()
has already been called


Originally I didn’t have any sleep() calls and this would happen frequently.

I don’t really care how long it takes between calls, I just want to make
sure the cycle never stops. This code was written as an exercise to learn
the minimum number of steps necessary to send a packet. It is by no
means optimal.

I could call generator() from tx_done() but I didn’t think of it (good idea btw) :slight_smile:



Sean Boudreau wrote:

Just as a final (famous last words > :slight_smile: > point, still don’t
see what you are protecting with the mutex / condvar. You
should be able to send packets whenever and have your tx_done()
func called some time after they are tx’d. As mentined previously
you shouldn’t rely on your tx_done() func being called immediately.
From what I’ve seen, you should be able to just remove all the
extra locking.

-seanb

Chris Goebel <> cgoebel@tridium.com> > wrote:
: wow! that was it!

: works on the p200, works on my target embedded board, all current problems
: are solved!

: THANK YOU!

: Sean Boudreau wrote:

:> Also noticed you aren’t setting npkt->framelen anywhere on your
:> tx packets:
:
:> npkt->framelen = sizeof(ether_pkt_t);
:
:> Might explain what you are seeing…
:
:> -seanb

I meant just get rid of pktdone altogether. You send
a packet periodically, it is reaped sometime thereafter.

-seanb

Chris Goebel <cgoebel@tridium.com> wrote:
: I use the condvar to avoid a classic deadlock scenario:

: tx_done()
: {
: pktdone = true
: }

: generator()
: {
: while(pktdone == false) sleep();
: tx_down()
: pktdone=false
: }

: -generator() & tx_done() run in separate threads
: -it is possible for:
: -generator to call tx_down()
: -tx_done() to be called & set pktdone == true
: -generator to set pktdone == false
: -generator to loop forever in its while() {sleep()} statement since tx_done()
: has already been called


: Originally I didn’t have any sleep() calls and this would happen frequently.

: I don’t really care how long it takes between calls, I just want to make
: sure the cycle never stops. This code was written as an exercise to learn
: the minimum number of steps necessary to send a packet. It is by no
: means optimal.

: I could call generator() from tx_done() but I didn’t think of it (good idea btw) :slight_smile:



: Sean Boudreau wrote:

:> Just as a final (famous last words :slight_smile: point, still don’t
:> see what you are protecting with the mutex / condvar. You
:> should be able to send packets whenever and have your tx_done()
:> func called some time after they are tx’d. As mentined previously
:> you shouldn’t rely on your tx_done() func being called immediately.
:> From what I’ve seen, you should be able to just remove all the
:> extra locking.
:>
:> -seanb
:>
:> Chris Goebel <cgoebel@tridium.com> wrote:
:> : wow! that was it!
:>
:> : works on the p200, works on my target embedded board, all current problems
:> : are solved!
:>
:> : THANK YOU!
:>
:> : Sean Boudreau wrote:
:>
:> :> Also noticed you aren’t setting npkt->framelen anywhere on your
:> :> tx packets:
:> :>
:> :> npkt->framelen = sizeof(ether_pkt_t);
:> :>
:> :> Might explain what you are seeing…
:> :>
:> :> -seanb

Chris Goebel <cgoebel@tridium.com> wrote:

I use the condvar to avoid a classic deadlock scenario:

tx_done()
{
pktdone = true
}

generator()
{
while(pktdone == false) sleep();
tx_down()
pktdone=false
}

-generator() & tx_done() run in separate threads
-it is possible for:
-generator to call tx_down()
-tx_done() to be called & set pktdone == true
-generator to set pktdone == false
-generator to loop forever in its while() {sleep()} statement since tx_done()
has already been called

I am not totally followed, but change generator() as:

{
while(pktdone == false) sleep();
pktdone=false;
tx_down();
}

Should this solve the problem ?

-xtang


Originally I didn’t have any sleep() calls and this would happen frequently.

I don’t really care how long it takes between calls, I just want to make
sure the cycle never stops. This code was written as an exercise to learn
the minimum number of steps necessary to send a packet. It is by no
means optimal.

I could call generator() from tx_done() but I didn’t think of it (good idea btw) > :slight_smile:



Sean Boudreau wrote:

Just as a final (famous last words > :slight_smile: > point, still don’t
see what you are protecting with the mutex / condvar. You
should be able to send packets whenever and have your tx_done()
func called some time after they are tx’d. As mentined previously
you shouldn’t rely on your tx_done() func being called immediately.
From what I’ve seen, you should be able to just remove all the
extra locking.

-seanb

Chris Goebel <> cgoebel@tridium.com> > wrote:
: wow! that was it!

: works on the p200, works on my target embedded board, all current problems
: are solved!

: THANK YOU!

: Sean Boudreau wrote:

:> Also noticed you aren’t setting npkt->framelen anywhere on your
:> tx packets:
:
:> npkt->framelen = sizeof(ether_pkt_t);
:
:> Might explain what you are seeing…
:
:> -seanb

I agree, that is another way to avoid using a condvar

I am in the process of adding a pool of pkt buffers & replacing generator()
with a resource manager. I am planning to use a mutex to prevent race
conditions when buff’s are added removed from the pool.

I don’t think I’ll need a condvar but I am curious about the
amount of concern it has drawn

Is there a fundamental reason to avoid condvar’s and/or mutex’s?



Xiaodan Tang wrote:

Chris Goebel <> cgoebel@tridium.com> > wrote:
I use the condvar to avoid a classic deadlock scenario:

tx_done()
{
pktdone = true
}

generator()
{
while(pktdone == false) sleep();
tx_down()
pktdone=false
}

-generator() & tx_done() run in separate threads
-it is possible for:
-generator to call tx_down()
-tx_done() to be called & set pktdone == true
-generator to set pktdone == false
-generator to loop forever in its while() {sleep()} statement since tx_done()
has already been called

I am not totally followed, but change generator() as:

{
while(pktdone == false) sleep();
pktdone=false;
tx_down();
}

Should this solve the problem ?

-xtang

Originally I didn’t have any sleep() calls and this would happen frequently.

I don’t really care how long it takes between calls, I just want to make
sure the cycle never stops. This code was written as an exercise to learn
the minimum number of steps necessary to send a packet. It is by no
means optimal.

I could call generator() from tx_done() but I didn’t think of it (good idea btw) > :slight_smile:

Sean Boudreau wrote:

Just as a final (famous last words > :slight_smile: > point, still don’t
see what you are protecting with the mutex / condvar. You
should be able to send packets whenever and have your tx_done()
func called some time after they are tx’d. As mentined previously
you shouldn’t rely on your tx_done() func being called immediately.
From what I’ve seen, you should be able to just remove all the
extra locking.

-seanb

Chris Goebel <> cgoebel@tridium.com> > wrote:
: wow! that was it!

: works on the p200, works on my target embedded board, all current problems
: are solved!

: THANK YOU!

: Sean Boudreau wrote:

:> Also noticed you aren’t setting npkt->framelen anywhere on your
:> tx packets:
:
:> npkt->framelen = sizeof(ether_pkt_t);
:
:> Might explain what you are seeing…
:
:> -seanb

Chris Goebel <cgoebel@tridium.com> wrote:
: I agree, that is another way to avoid using a condvar

: I am in the process of adding a pool of pkt buffers & replacing generator()
: with a resource manager. I am planning to use a mutex to prevent race
: conditions when buff’s are added removed from the pool.

: I don’t think I’ll need a condvar but I am curious about the
: amount of concern it has drawn

: Is there a fundamental reason to avoid condvar’s and/or mutex’s?


Not in general but as mentioned, the way you are using them
assumes the driver will release the packet as soon as it is
tx’d. If that doesn’t happen (maybe it’s waiting for another
packet to come down before looking if has anything to release),
you have a deadlock in your code as posted.

-seanb