PxTranslateToUTF

Alright I think I’m just lost here but maybe someone can straighten me out:
I have the following Korean Character:
KSC5601 0xD3CB (Looks like a box with a 7 and A above it) (Sorry about
the description)
When I look this up I find that:
KSC5601 0xD3CB = Unicode 0x6793 = U+6793

I’ve then looked up the UTF-8 FAQ
http://www.cl.cam.ac.uk/~mgk25/unicode.html
This states that codes greater than U+007F are encoded as a sequence of
several bytes, etc…

But when I use PtTranslateToUTF() on 0xD3CB I get as output 0x6C9372
This looks like Unicode and Not UTF-8. Is this correct or is the
documentation saying UTF-8 really Unicode?

Here is a a callback that I have on a button simple app builder program to
demonstrate.

Thanks for any info people can give

Jeff


Jeffrey B. Holtz
Software Engineering
Electro Scientific Industries, Inc.
Ph: 734-332-7054
Fax: 734-332-7077
email: holtzj@esi.com

/* Callback /
/
AppBuilder Photon Code Lib /
/
Version 2.01 */

/* Standard headers */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>

/* Local headers */
#include “ablibs.h”
#include “abimport.h”
#include “proto.h”

// The following equates to:
// KSC5601 - 0xD3CB
// Unicode - 0x6C93
char * pcKSC5601 = “\xD3\xCB”;
char * pcUnicode = “\x6C\x93”;
#define BUFFER_SIZE 256

int
ButtonPressed( PtWidget_t *widget, ApInfo_t *apinfo, PtCallbackInfo_t
*cbinfo )
{
struct PxTransCtrl * trans = NULL;
FontDetails * fontDetails = NULL;
char * pmbUTF8 = NULL;
int nConsumed = 0, nProduced = 0;
int nTranslateCode = 0;
int lenKSC5601 = 0;
int lenUnicode = 0;
int nNumFonts = 0;
int nfont = 0;
int nloop = 0;

uchar_t fontName[MAX_FONT_TAG];

memset(fontName,0x00,sizeof(fontName));

/* eliminate ‘unreferenced’ warnings */
widget = widget, apinfo = apinfo, cbinfo = cbinfo;

trans = PxTranslateSet(NULL,“EUC-KR”);
if (trans == NULL)
{
printf(“Unknown translation State\n”);
return (Pt_CONTINUE);
}

if (pmbUTF8 == NULL)
pmbUTF8 = calloc(BUFFER_SIZE, sizeof(char));

memset(pmbUTF8,0x00,sizeof(BUFFER_SIZE));

printf("\nTranslating pcKSC5601 To UTF-8\n");
lenKSC5601 = strlen(pcKSC5601);
printf("pcKSC5601 = ");
for (nloop=0; nloop<lenKSC5601; nloop=nloop+2)
{
printf(“0x%02X%02X “,
(unsigned char)pcKSC5601[nloop],
(unsigned char)pcKSC5601[nloop+1]);
}
printf(”\n”);

nTranslateCode = PxTranslateToUTF(trans, // Translation Code
pcKSC5601, // Source String to Convert
lenKSC5601, // Source String Size
&nConsumed, // Number of Source Bytes Consumed
pmbUTF8, // UTF-8 Destination buffer
BUFFER_SIZE, // Max UTF-8 Destination buffer
&nProduced); // Number of Destination Bytes Produced
if (nTranslateCode == -1)
{
printf(“Could Not Translate to EUC-KR\n”);
return (Pt_CONTINUE);
}
printf(“Source Consumed = %d\n”,nConsumed);
printf(“Destination Produced = %d\n”,nProduced);

printf("\npmbUTF8 = 0x");
for (nloop=0; nloop <nProduced; nloop=nloop+1)
printf("%02X",(unsigned char)pcUnicode[nloop]);
printf("\n");

if (PfGenerateFontName(“Bitstream SB Gothic KR Uni”, PF_STYLE_BOLD, 24,
fontName) == NULL)
printf(“Can’t create FontName4\n”);
PtMessageBox(widget,NULL,pmbUTF8,fontName,“Press ME”);

return( Pt_CONTINUE );
}

Jeff Holtz <holtzj@esi.com> wrote:

Alright I think I’m just lost here but maybe someone can straighten me out:
I have the following Korean Character:
KSC5601 0xD3CB (Looks like a box with a 7 and A above it) (Sorry about
the description)
When I look this up I find that:
KSC5601 0xD3CB = Unicode 0x6793 = U+6793

I’ve then looked up the UTF-8 FAQ
http://www.cl.cam.ac.uk/~mgk25/unicode.html
This states that codes greater than U+007F are encoded as a sequence of
several bytes, etc…

But when I use PtTranslateToUTF() on 0xD3CB I get as output 0x6C9372

When I try it, I get “\xE6\xB2\x93”, which is the correct UTF-8 sequence
for U+6C93.

(Yes, that’s a ‘C’, not a ‘7’. But when I look at the pictures at

http://www.unicode.org/charts/PDF/U4E00.pdf

(warning: it’s a big file), U+6C93 looks much more similar to your
description than U+6793. Are you sure you didn’t make a mistake?..)

Here’s my code and its output:

#include <stdio.h>
#include <stdlib.h>
#include <photon/PxProto.h>

int main( void ) {
static const char kc[] = { 0xD3, 0xCB };
char buf[ 100 ];
wchar_t wc = 0;
int ret, srctaken, dstmade, i;
struct PxTransCtrl * trans = PxTranslateSet(NULL,“EUC-KR”);
ret = PxTranslateToUTF( trans, kc, sizeof(kc), &srctaken, buf, sizeof(buf), &dstmade );
printf( “ret=%d, srctaken=%d, dstmade=%d, dst =”, ret, srctaken, dstmade );
for ( i=0; i<dstmade; ++i )
printf( " %02X", (unsigned char) buf );

i = mbtowc( &wc, buf, dstmade );
printf( “\nmbtowc() returned %d, wc = 0x%04X\n”, i, (unsigned) wc );
return 0;
}

ret=0, srctaken=2, dstmade=3, dst = E6 B2 93
mbtowc() returned 3, wc = 0x6C93

Are you running QNX 6.2 or something else?

Could you run my program on your machine and see if the output is the
same?

This looks like Unicode and Not UTF-8. Is this correct or is the
documentation saying UTF-8 really Unicode?

Here is a a callback that I have on a button simple app builder program to
demonstrate.

It seems there’s something wrong with your callback: it tells
PtTranslateToUTF() to put the output in pmbUTF8, but then the printf()
loop prints out the contents of pcUnicode, which is initialized to
“\x6C\x93” and never changed afterwards.

_Thanks for any info people can give

Jeff
\

Jeffrey B. Holtz
Software Engineering
Electro Scientific Industries, Inc.
Ph: 734-332-7054
Fax: 734-332-7077
email: > holtzj@esi.com

/* Callback /
/
AppBuilder Photon Code Lib /
/
Version 2.01 /

/
Standard headers /
#include <stdio.h
#include <stdlib.h
#include <unistd.h
#include <string.h

/
Local headers */
#include “ablibs.h”
#include “abimport.h”
#include “proto.h”

// The following equates to:
// KSC5601 - 0xD3CB
// Unicode - 0x6C93
char * pcKSC5601 = “\xD3\xCB”;
char * pcUnicode = “\x6C\x93”;
#define BUFFER_SIZE 256

int
ButtonPressed( PtWidget_t *widget, ApInfo_t *apinfo, PtCallbackInfo_t
cbinfo )
{
struct PxTransCtrl * trans = NULL;
FontDetails * fontDetails = NULL;
char * pmbUTF8 = NULL;
int nConsumed = 0, nProduced = 0;
int nTranslateCode = 0;
int lenKSC5601 = 0;
int lenUnicode = 0;
int nNumFonts = 0;
int nfont = 0;
int nloop = 0;

uchar_t fontName[MAX_FONT_TAG];

memset(fontName,0x00,sizeof(fontName));

/
eliminate ‘unreferenced’ warnings */
widget = widget, apinfo = apinfo, cbinfo = cbinfo;

trans = PxTranslateSet(NULL,“EUC-KR”);
if (trans == NULL)
{
printf(“Unknown translation State\n”);
return (Pt_CONTINUE);
}

if (pmbUTF8 == NULL)
pmbUTF8 = calloc(BUFFER_SIZE, sizeof(char));

memset(pmbUTF8,0x00,sizeof(BUFFER_SIZE));

printf("\nTranslating pcKSC5601 To UTF-8\n");
lenKSC5601 = strlen(pcKSC5601);
printf(“pcKSC5601 = “);
for (nloop=0; nloop<lenKSC5601; nloop=nloop+2)
{
printf(“0x%02X%02X “,
(unsigned char)pcKSC5601[nloop],
(unsigned char)pcKSC5601[nloop+1]);
}
printf(”\n”);

nTranslateCode = PxTranslateToUTF(trans, // Translation Code
pcKSC5601, // Source String to Convert
lenKSC5601, // Source String Size
&nConsumed, // Number of Source Bytes Consumed
pmbUTF8, // UTF-8 Destination buffer
BUFFER_SIZE, // Max UTF-8 Destination buffer
&nProduced); // Number of Destination Bytes Produced
if (nTranslateCode == -1)
{
printf(“Could Not Translate to EUC-KR\n”);
return (Pt_CONTINUE);
}
printf(“Source Consumed = %d\n”,nConsumed);
printf(“Destination Produced = %d\n”,nProduced);

printf(”\npmbUTF8 = 0x”);
for (nloop=0; nloop <nProduced; nloop=nloop+1)
printf("%02X",(unsigned char)pcUnicode[nloop]);
printf("\n");

if (PfGenerateFontName(“Bitstream SB Gothic KR Uni”, PF_STYLE_BOLD, 24,
fontName) == NULL)
printf(“Can’t create FontName4\n”);
PtMessageBox(widget,NULL,pmbUTF8,fontName,“Press ME”);

return( Pt_CONTINUE );
}_


Wojtek Lerch QNX Software Systems Ltd.

THUMP!!! THUMP!!! THUMP!!!
That would be my head meeting the desk!

Thanks for bringing me back into the real world, I’m not sure where I was.


Jeffrey B. Holtz
Software Engineering
Electro Scientific Industries, Inc.
Ph: 734-332-7054
Fax: 734-332-7077
email: holtzj@esi.com
“Wojtek Lerch” <wojtek_l@yahoo.ca> wrote in message
news:ame0ep$ihe$1@nntp.qnx.com

Jeff Holtz <> holtzj@esi.com> > wrote:
Alright I think I’m just lost here but maybe someone can straighten me
out:
I have the following Korean Character:
KSC5601 0xD3CB (Looks like a box with a 7 and A above it) (Sorry
about
the description)
When I look this up I find that:
KSC5601 0xD3CB = Unicode 0x6793 = U+6793

I’ve then looked up the UTF-8 FAQ
http://www.cl.cam.ac.uk/~mgk25/unicode.html
This states that codes greater than U+007F are encoded as a sequence of
several bytes, etc…

But when I use PtTranslateToUTF() on 0xD3CB I get as output 0x6C9372

When I try it, I get “\xE6\xB2\x93”, which is the correct UTF-8 sequence
for U+6C93.

(Yes, that’s a ‘C’, not a ‘7’. But when I look at the pictures at

http://www.unicode.org/charts/PDF/U4E00.pdf

(warning: it’s a big file), U+6C93 looks much more similar to your
description than U+6793. Are you sure you didn’t make a mistake?..)

Here’s my code and its output:

#include <stdio.h
#include <stdlib.h
#include <photon/PxProto.h

int main( void ) {
static const char kc[] = { 0xD3, 0xCB };
char buf[ 100 ];
wchar_t wc = 0;
int ret, srctaken, dstmade, i;
struct PxTransCtrl * trans = PxTranslateSet(NULL,“EUC-KR”);
ret = PxTranslateToUTF( trans, kc, sizeof(kc), &srctaken, buf,
sizeof(buf), &dstmade );
printf( “ret=%d, srctaken=%d, dstmade=%d, dst =”, ret, srctaken,
dstmade );
for ( i=0; i<dstmade; ++i )
printf( " %02X", (unsigned char) buf > _);

i = mbtowc( &wc, buf, dstmade );
printf( “\nmbtowc() returned %d, wc = 0x%04X\n”, i, (unsigned)
wc );
return 0;
}

ret=0, srctaken=2, dstmade=3, dst = E6 B2 93
mbtowc() returned 3, wc = 0x6C93

Are you running QNX 6.2 or something else?

Could you run my program on your machine and see if the output is the
same?

This looks like Unicode and Not UTF-8. Is this correct or is the
documentation saying UTF-8 really Unicode?

Here is a a callback that I have on a button simple app builder program
to
demonstrate.

It seems there’s something wrong with your callback: it tells
PtTranslateToUTF() to put the output in pmbUTF8, but then the printf()
loop prints out the contents of pcUnicode, which is initialized to
“\x6C\x93” and never changed afterwards.

Thanks for any info people can give

Jeff
\

Jeffrey B. Holtz
Software Engineering
Electro Scientific Industries, Inc.
Ph: 734-332-7054
Fax: 734-332-7077
email: > holtzj@esi.com

/* Callback /
/
AppBuilder Photon Code Lib /
/
Version 2.01 /

/
Standard headers /
#include <stdio.h
#include <stdlib.h
#include <unistd.h
#include <string.h

/
Local headers */
#include “ablibs.h”
#include “abimport.h”
#include “proto.h”

// The following equates to:
// KSC5601 - 0xD3CB
// Unicode - 0x6C93
char * pcKSC5601 = “\xD3\xCB”;
char * pcUnicode = “\x6C\x93”;
#define BUFFER_SIZE 256

int
ButtonPressed( PtWidget_t *widget, ApInfo_t *apinfo, PtCallbackInfo_t
cbinfo )
{
struct PxTransCtrl * trans = NULL;
FontDetails * fontDetails = NULL;
char * pmbUTF8 = NULL;
int nConsumed = 0, nProduced = 0;
int nTranslateCode = 0;
int lenKSC5601 = 0;
int lenUnicode = 0;
int nNumFonts = 0;
int nfont = 0;
int nloop = 0;

uchar_t fontName[MAX_FONT_TAG];

memset(fontName,0x00,sizeof(fontName));

/
eliminate ‘unreferenced’ warnings */
widget = widget, apinfo = apinfo, cbinfo = cbinfo;

trans = PxTranslateSet(NULL,“EUC-KR”);
if (trans == NULL)
{
printf(“Unknown translation State\n”);
return (Pt_CONTINUE);
}

if (pmbUTF8 == NULL)
pmbUTF8 = calloc(BUFFER_SIZE, sizeof(char));

memset(pmbUTF8,0x00,sizeof(BUFFER_SIZE));

printf("\nTranslating pcKSC5601 To UTF-8\n");
lenKSC5601 = strlen(pcKSC5601);
printf(“pcKSC5601 = “);
for (nloop=0; nloop<lenKSC5601; nloop=nloop+2)
{
printf(“0x%02X%02X “,
(unsigned char)pcKSC5601[nloop],
(unsigned char)pcKSC5601[nloop+1]);
}
printf(”\n”);

nTranslateCode = PxTranslateToUTF(trans, // Translation Code
pcKSC5601, // Source String to Convert
lenKSC5601, // Source String Size
&nConsumed, // Number of Source Bytes Consumed
pmbUTF8, // UTF-8 Destination buffer
BUFFER_SIZE, // Max UTF-8 Destination buffer
&nProduced); // Number of Destination Bytes Produced
if (nTranslateCode == -1)
{
printf(“Could Not Translate to EUC-KR\n”);
return (Pt_CONTINUE);
}
printf(“Source Consumed = %d\n”,nConsumed);
printf(“Destination Produced = %d\n”,nProduced);

printf(”\npmbUTF8 = 0x”);
for (nloop=0; nloop <nProduced; nloop=nloop+1)
printf("%02X",(unsigned char)pcUnicode[nloop]);
printf("\n");

if (PfGenerateFontName(“Bitstream SB Gothic KR Uni”, PF_STYLE_BOLD, 24,
fontName) == NULL)
printf(“Can’t create FontName4\n”);
PtMessageBox(widget,NULL,pmbUTF8,fontName,“Press ME”);

return( Pt_CONTINUE );
}




\

Wojtek Lerch QNX Software Systems Ltd._