为单字节代码集优化的单源双路径版本

术语单源双路径是指在一个应用程序中有两条路径,在运行时选择哪一条路径要取决于当前语言环境的设置,这一设置指明正在使用的代码集是单字节还是多字节。

如果程序可以保持自身的性能,并且不会过多地增加其可执行文件的大小,那么单源双路径方式可作为首选。 应该对命令或实用程序逐个进行评估,以评估可执行文件大小的增长。

在单源双路径方法中, MB_CUR_MAX 宏指定了当前语言环境中的多字节字符的最大字节数。 这应被用来在运行时确定要选择的处理路径是单字节路径还是多字节路径。 可使用一个布尔标志来指出要选择的路径,例如:

int mbcodeset ;
/* After setlocale(LC_ALL,"") is done, determine the path to
** be chosen.
*/
if(MB_CUR_MAX == 1)
        mbcodeset = 0;
else    mbcodeset = 1;

这样,将检查当前代码集以查看它是否是多字节代码集,如果是,将相应地设置标志 mbcodeset 。 相比多次测试 MB_CUR_MAX 宏,测试此标志对性能的影响较小。

if(mbcodeset){
        /* Multibyte code sets (also supports single-byte
        ** code sets )
        */
        /* Use multibyte or wide character processing
        functions */
}else{
        /* single-byte code sets */
        /* Process accordingly */
}

如果全球化只影响模块的很小一部分,那么前面的方法恰当。 过多进行测试以提供双路径可能降低性能。 对此,提供这种测试应以排除频繁测试为度。

以下版本的 my_example 实用程序产生一个对象,而在运行时,相应的路径是根据代码集来选择的,为的是优化该代码集的性能。 请注意我们只区分了单字节和多字节代码集。

/*
 * COMPONENT_NAME:
 *
 * FUNCTIONS: my_example
 *
 * The following code shows how to count the number of bytes and
 * the number of characters in a text file.
 *
 * This example is for illustration purposes only. Performance
 * improvements may still be possible.
 *
 */

#include        <stdio.h>
#include        <ctype.h>
#include        <locale.h>
#include        <stdlib.h>
#include        "my_example_msg.h"

#define MSGSTR(Num,Str) catgets(catd,MS_MY_EXAMPLE,Num,Str)

/*
 * NAME: my_example
 *
 * FUNCTION: Counts the number of characters in a file.
 *
 */  

main(argc,argv)
int argc;
char **argv;
{
    int bytesread,  /* number of bytes read */
        bytesprocessed;
    int   leftover;

    int   i;
    int   mbcnt;   /* number of bytes in a character */
    int   f;       /* File descriptor */
    int   mb_cur_max;
    int    bytect;             /* name changed from charct... */
    int    charct;             /* for real character count */
    char   *curp, *cure; /* current and end pointers into buffer */
    char        buf[BUFSIZ+1];

        nl_catd          catd;

        wchar_t    wc;

        /* flag to indicate if current code set is a
        ** multibyte code set
        */
        int     multibytecodeset;

        /* Obtain the current locale */
        (void) setlocale(LC_ALL,"");

    /* after setting the locale, open the message catalog */
    catd = catopen(MF_MY_EXAMPLE,NL_CAT_LOCALE);

   /* Parse the arguments if any */

    /*
    ** Obtain the maximum number of bytes in a character in the
    ** current locale.
    */
    mb_cur_max = MB_CUR_MAX;

    if(mb_cur_max >1)
        multibytecodeset = 1;
    else
        multibytecodeset = 0;

    i = 1;

    /* Open the specified file and issue error messages if any */
    f = open(argv[i],0);
    if(f<0){
        fprintf(stderr,MSGSTR(CANTOPEN,              /*MSG*/
            "my_example: cannot open %s\n"), argv[i]);      /*MSG*/
            exit(2);
    }

    /* Initialize the variables for the count */
    bytect = 0;
    charct = 0;

    /* Start count of bytes and characters  */

    leftover = 0;

    if(multibytecodeset){
        /* Full globalization */
        /* Handles supported multibyte code sets */
        for(;;) {
            bytesread = read(f,buf+leftover,
                    BUFSIZ-leftover);
            /* issue any error messages here, if needed */
            if(bytesread <= 0)
                break;

            buf[leftover+bytesread] = '\0';
                    /* Protect partial reads */
            bytect += bytesread;
            curp=buf;

            cure = buf + bytesread+leftover;
            leftover=0; /* No more leftover */

            for(; curp<cure ;){
                /* Convert to wide character */
                mbcnt= mbtowc(&wc, curp, mb_cur_max);
                if(mbcnt <= 0){
                    mbcnt = 1;
                }else if (cure - curp >=mb_cur_max){
                    wc = *curp;
                    mbcnt =1;

                }else{
                    /* Needs more data */
                    leftover= cure - curp;
                    strcpy(buf, curp, leftover);
                    break;
                }
                curp +=mbcnt;
                charct++;
            }
        }
    }else {

        /* Code specific to single-byte code sets that
        ** avoids conversion to widechars and thus optimizes
        ** performance for single-byte code sets.
        */

        for(;;) {
            bytesread = read(f,buf, BUFSIZ);
            /* issue any error messages here, if needed */
            if(bytesread <= 0)
                    break;

                bytect += bytesread;
                charct += bytesread;
        }

    }

        /* print number of chars and bytes */
    fprintf(stderr,MSGSTR(BYTECNT, "number of bytes:%d\n"),
            bytect);
    fprintf(stderr,MSGSTR(CHARCNT, "number of characters:%d\n"),
            charct);
    close(f);
    exit(0);
}