6.3.2 virtual memory manager slower, page reference delay

According to QNX 6.3.2 release notes,

‧QNX 6.3.2 includes a completely rearchitected virtual memory (VM)
manager.
‧In some cases, the new code may delay memory-manager operations
until a page is referenced, which could affect some code. You can use
the POSIX mlock() or mlockall() functions to disable this behavior.
‧Large block writes to /dev/shmem are slower than in the last
release.

We tested both with and without mlockall on both QNX 6.3.2 and QNX
6.3.0 SP2 on same hardware. We found that

“w/o mlockall on QNX 6.3.0 SP2” < “mlockall on QNX
6.3.2” < “w/o mlockall on QNX 6.3.2”.

We found two strange questions,
1.The mlockall in QNX 6.3.2 is slower than w/o mlockall in QNX 6.3.0
SP2. The completedly rearchitectured virtual memory manager seems
slower than older version

2.There are still delayed memory operations when a paged is reference,
even after called mlockall().

Below is our testing codes for w/o mlockall() on 6.3.2 and 6.3.0 SP2:

#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <limits.h>
#include <sys/mman.h>
#include <sys/neutrino.h>
#include <sys/dispatch.h>
#include <sys/syspage.h>
#include
#include <stdlib.h>
#include <inttypes.h>
#include <atomic.h>
using namespace std;

int MemTesting(unsigned short*);
unsigned short createMem(int& fd);


int main( int argc, char
* argv )
{
unsigned short* addr;
int fd;
unsigned short fp_indx;

fp_indx = 0;
addr = createMem(fd);
MemTesting(addr);
close(fd);
return (0);
}

unsigned short createMem(int & fd) {
unsigned short
addr;
mode_t nmask;

nmask = S_IRUSR | S_IWUSR | /* owner read write /
S_IRGRP | S_IWGRP | /
group read write /
S_IROTH | S_IWOTH ; /
other read write /
nmask |= S_IWRITE | S_IREAD | S_IEXEC;
/

  • In case the unlink code isn’t executed at the end
    */

/* Create a new memory object */

fd = shm_open("/dev/shmem/Memo00", O_RDWR | O_CREAT, nmask
);
if( fd == -1 ) {
fprintf( stderr, “Open failed:%s\n”,
strerror( errno ) );
return NULL;
}

/* Set the memory object’s size */
if( ftruncate( fd, 0x00241000 ) == -1 ) {
fprintf( stderr, “ftruncate: %s\n”,
strerror( errno ) );
return NULL;
}

/* Map the memory object /
addr = (unsigned short
)mmap( 0, 0x00241000,
PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0 );

if( addr == MAP_FAILED ) {
fprintf( stderr, “mmap failed: %s\n”,
strerror( errno ) );
return NULL;
}

return addr;

}

int MemTesting(unsigned short * incAddr) {
unsigned short* addr;
unsigned short* pwData;
unsigned wData;
long long int TimerRunTbA[1024];
long long int TimerRunTbB[1024];
long long int TimerRunTbC[1024];
long long int TimerStart, cps;
long long int TimerEnd;
long long int tmp = 0;
int i;
char pstrBuff[128];
int RUNTIME = 1024;
addr = incAddr;
/* Write to shared memory */

cps = SYSPAGE_ENTRY(qtime)->cycles_per_sec;

pwData = addr + 4095;
for (i=0; i<RUNTIME; i++) {
TimerStart=ClockCycles( );
wData=*pwData;
TimerEnd=ClockCycles( );
TimerRunTbA=TimerEnd-TimerStart;
pwData+=60;
}
pwData = addr;
for (i=0; i<RUNTIME; i++) {
TimerStart=ClockCycles( );
pwData=0x0;
TimerEnd=ClockCycles( );
TimerRunTbB_=TimerEnd-TimerStart;
tmp = tmp + TimerRunTbB;
pwData+=40;
}
pwData = addr;
for (i=0; i<RUNTIME; i++) {
TimerStart=ClockCycles( );
TimerEnd=ClockCycles( );
TimerRunTbC=TimerEnd-TimerStart;
pwData+=40;
}

printf(" Read Write Clk cycle\n");

for (i=0; i<RUNTIME; i++) {
sprintf(pstrBuff,"\t%lld \t\t%lld \t\t%lld",
TimerRunTbA, TimerRunTbB, TimerRunTbC);
printf("%4d ) %s\n", i , pstrBuff);
}
tmp = tmp / RUNTIME;
printf(“the average is %lld\n”,tmp);
/

  • The memory object remains in
  • the system after the close
    /

    /
  • To remove a memory object
  • you must unlink it like a file.
  • This may be done by another process.
    */
    shm_unlink( “/dev/shmem/Memo00” );

    return EXIT_SUCCESS;
    }


    Below is our testing codes for with mlockall() on QNX 6.3.2:

#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <limits.h>
#include <sys/mman.h>
#include <sys/neutrino.h>
#include <sys/dispatch.h>
#include <sys/syspage.h>
#include
#include <stdlib.h>
#include <inttypes.h>
#include <atomic.h>
using namespace std;

int MemTesting(unsigned short*);
unsigned short createMem(int& fd);


int main( int argc, char
* argv )
{
unsigned short* addr;
int fd;
unsigned short fp_indx;

fp_indx = 0;
addr = createMem(fd);
MemTesting(addr);
close(fd);
return (0);
}

unsigned short createMem(int & fd) {
unsigned short
addr;
mode_t nmask;

unsigned wData, wData2;
long long int TimerStart;
long long int TimerEnd;

nmask = S_IRUSR | S_IWUSR | /* owner read write /
S_IRGRP | S_IWGRP | /
group read write /
S_IROTH | S_IWOTH ; /
other read write /
nmask |= S_IWRITE | S_IREAD | S_IEXEC;
/

  • In case the unlink code isn’t executed at the end
    /

    /
    Create a new memory object /

    /mlockall/
    mlockall(MCL_FUTURE );

    fd = shm_open("/dev/shmem/Memo00", O_RDWR | O_CREAT, nmask
    );
    if( fd == -1 ) {
    fprintf( stderr, “Open failed:%s\n”,
    strerror( errno ) );
    return NULL;
    }

    /
    Set the memory object’s size /
    if( ftruncate( fd, 0x00241000 ) == -1 ) {
    fprintf( stderr, “ftruncate: %s\n”,
    strerror( errno ) );
    return NULL;
    }

    /
    Map the memory object /
    addr = (unsigned short
    )mmap( 0, 0x00241000,
    PROT_READ | PROT_WRITE,
    MAP_SHARED, fd, 0 );

    /mlockall/
    TimerStart = ClockCycles( );
    wData = addr;
    addr = addr + 2;
    wData2 = addr;
    TimerEnd = ClockCycles( );
    cout << "time to run add1 is "<<
    TimerEnd-TimerStart << “clock \n”;


    if( addr == MAP_FAILED ) {
    fprintf( stderr, “mmap failed: %s\n”,
    strerror( errno ) );
    return NULL;
    }

    return addr;

    }

    int MemTesting(unsigned short * incAddr) {
    unsigned short
    addr;
    unsigned short
    pwData;
    unsigned wData;
    long long int TimerRunTbA[1024];
    long long int TimerRunTbB[1024];
    long long int TimerRunTbC[1024];
    long long int TimerStart, cps;
    long long int TimerEnd;
    long long int tmp = 0;
    int i;
    char pstrBuff[128];
    int RUNTIME = 1024;
    addr = incAddr;
    /* Write to shared memory */

    cps = SYSPAGE_ENTRY(qtime)->cycles_per_sec;

    pwData = addr + 4095;
    for (i=0; i<RUNTIME; i++) {
    TimerStart=ClockCycles( );
    wData=*pwData;
    TimerEnd=ClockCycles( );
    TimerRunTbA=TimerEnd-TimerStart;
    pwData+=60;
    }
    pwData = addr;
    for (i=0; i<RUNTIME; i++) {
    TimerStart=ClockCycles( );
    pwData=0x0;
    TimerEnd=ClockCycles( );
    TimerRunTbB=TimerEnd-TimerStart;
    tmp = tmp + TimerRunTbB;
    pwData+=40;
    }
    pwData = addr;
    for (i=0; i<RUNTIME; i++) {
    TimerStart=ClockCycles( );
    TimerEnd=ClockCycles( );
    TimerRunTbC=TimerEnd-TimerStart;
    pwData+=40;
    }

    printf(" Read Write Clk cycle\n");

    for (i=0; i<RUNTIME; i++) {
    sprintf(pstrBuff,"\t%lld \t\t%lld \t\t%lld",
    TimerRunTbA[i], TimerRunTbB[i], TimerRunTbC[i]);
    printf("%d )\t %s\n", i , pstrBuff);
    }
    tmp = tmp / RUNTIME;
    printf(“the average is %lld\n”,tmp);
    /
  • The memory object remains in
  • the system after the close
    /

    /
  • To remove a memory object
  • you must unlink it like a file.
  • This may be done by another process.
    */
    shm_unlink( “/dev/shmem/Memo00” );

    return EXIT_SUCCESS;
    }_