Examples
The following example uses the mbtowc subroutine to convert a character in multibyte character code to wide character code:
main()
{
char *s;
wchar_t wc;
int n;
(void)setlocale(LC_ALL,"");
/*
** s points to the character string that needs to be
** converted to a wide character to be stored in wc.
*/
n = mbtowc(&wc, s, MB_CUR_MAX);
if (n == -1){
/* Error handle */
}
if (n == 0){
/* case of name pointing to null */
}
/*
** wc contains the process code for the multibyte character
** pointed to by s.
*/
}
The following example uses the wctomb subroutine to convert a character in wide character code to multibyte character code:
main()
{
char *s;
wchar_t wc;
int n;
(void)setlocale(LC_ALL,"");
/*
** s points to the character string that needs to be
** converted to a wide character to be stored in wc.
*/
n = mbtowc(&wc, s, MB_CUR_MAX);
if (n == -1){
/* Error handle */
}
if (n == 0){
/* case of name pointing to null */
}
/*
** wc contains the process code for the multibyte character
** pointed to by s.
*/
}
The following example uses the mblen subroutine to find the byte length of a character in multibyte character code:
#include <stdlib.h>
#include <locale.h>
main
{
char *name = "h";
int n;
(void)setlocale(LC_ALL,"");
n = mblen(name, MB_CUR_MAX);
/*
** The count returned in n is the multibyte length.
** It is always less than or equal to the value of
** MB_CUR_MAX in stdlib.h
*/
if(n == -1){
/* Error Handling */
}
}
The following example obtains a previous character position in a multibyte string. If you need to determine the previous character position, starting from a current character position (not a random byte position), step through the buffer starting at the beginning. Use the mblen subroutine until the current character position is reached, and save the previous character position to obtain the needed character position.
char buf[]; /* contains the multibyte string */
char *cur, /* points to the current character position */
char *prev, /* points to previous multibyte character */
char *p; /* moving pointer */
/* initialize the buffer and pointers as needed */
/* loop through the buffer until the moving pointer reaches
** the current character position in the buffer, always
** saving the last character position in prev pointer */
p = prev = buf;
/* cur points to a valid character somewhere in buf */
while(p< cur){
prev = p;
if( (i=mblen(p, mbcurmax))<=0){
/* invalid multibyte character or null */
/* You can have a different error handling
** strategy */
p++; /* skip it */
}else {
p += i;
}
}
/* prev will point to the previous character position */
/* Note that if( prev == cur), then it means that there was
** no previous character. Also, if all bytes up to the
** current character are invalid, it will treat them as
** all valid single-byte characters and this may not be what
** you want. One may change this to handle another method of
** error recovery. */
The following example uses of the mbstowcs subroutine to convert a multibyte string to wide character string:
#include <stdlib.h>
#include <locale.h>
main()
{
char *s;
wchar_t *pwcs;
size_t retval, n;
(void)setlocale(LC_ALL, "");
n = strlen(s) + 1; /*string length + terminating null */
/* Allocate required wchar array */
pwcs = (wchar_t *)malloc(n * sizeof(wchar_t) );
retval = mbstowcs(pwcs, s, n);
if(retval == -1){
/* Error handle */
}
/*
** pwcs contains the wide character string.
*/
}
The following example illustrates the problems
with using the mbstowcs subroutine on a large block of
data for conversion to wide character form. When it encounters a multibyte
that is not valid, the mbstowcs subroutine returns a value
of -1
but does not specify where the error occurred.
Therefore, the mbtowc subroutine must be used repeatedly to
convert one character at a time to wide character code.
During the conversion of single-byte code sets, there is no possibility for partial multibytes. However, during the conversion of multibyte code sets, partial multibytes are copied to a save buffer. During the next call to the read subroutine, the partial multibyte is prefixed to the rest of the byte sequence.
#include <stdio.h>
#include <locale.h>
#include <stdlib.h>
main(int argc, char *argv[])
{
char *curp, *cure;
int bytesread, bytestoconvert, leftover;
int invalid_multibyte, mbcnt, wcnt;
wchar_t *pwcs;
wchar_t wbuf[BUFSIZ+1];
char buf[BUFSIZ+1];
char savebuf[MB_LEN_MAX];
size_t mb_cur_max;
int fd;
/*
** MB_LEN_MAX specifies the system wide constant for
** the maximum number of bytes in a multibyte character.
*/
(void)setlocale(LC_ALL, "");
mb_cur_max = MB_CUR_MAX;
fd = open(argv[1], 0);
if(fd < 0){
/* error handle */
}
leftover = 0;
if(mb_cur_max==1){ /* Single byte code sets case */
for(;;){
bytesread = read(fd, buf, BUSIZ);
if(bytesread <= 0)
break;
mbstowcs(wbuf, buf, bytesread+1);
/* Process using the wide character buffer */
}
/* File processed ... */
exit(0); /* End of program */
}else{ /* Multibyte code sets */
leftover = 0;
for(;;) {
if(leftover)
strncpy(buf, savebuf ,leftover);
bytesread=read(fd,buf+leftover, BUFSIZ-leftover);
if(bytesread <= 0)
break;
buf[leftover+bytesread] = '\0';
/* Null terminate string */
invalid_multibyte = 0;
bytestoconvert = leftover+bytesread;
cure= buf+bytestoconvert;
leftover=0;
pwcs = wbuf;
/* Stop processing when invalid mbyte found. */
curp= buf;
for(;curp<cure;){
mbcnt = mbtowc(pwcs,curp, mb_cur_max);
if(mbcnt>0){
curp += mbcnt;
pwcs++;
continue;
}else{
/* More data needed on next read*/
if ( cure-curp<mb_cur_max){
leftover=cure-curp;
strncpy(savebuf,curp,leftover);
/* Null terminate before partial mbyte */
*curp=0;
break;
}else{
/*Invalid multibyte found */
invalid_multibyte =1;
break;
}
}
}
if(invalid_multibyte){ /*error handle */
}
/* Process the wide char buffer */
}
}
}
The following example uses the wcstombs and wcslen subroutines to convert a wide character string to multibyte form:
#include <stdlib.h>
#include <locale.h>
main()
{
wchar_t *pwcs; /* Source wide character string */
char *s; /* Destination multibyte character string */
size_t n;
size_t retval;
(void)setlocale(LC_ALL, "");
/*
** Calculate the maximum number of bytes needed to
** store the wide character buffer in multibyte form in the
** current code page and malloc() the appropriate storage,
** including the terminating null.
*/
s = (char *) malloc( wcslen(pwcs) * MB_CUR_MAX + 1 );
retval= wcstombs( s, pwcs, n);
if( retval == -1) {
/* Error handle */
/* s points to the multibyte character string. */
}