qu8.h

(Moved here unchanged from http://www.mindspring.com/~markus.scherer/unicode/qu8.h)

/* * file name: qu8.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2006feb05 * created by: Markus W. Scherer */ #ifndef __UQ8_H__ #define __UQ8_H__ #define UQ8_NEXT(s, i, length, c) { \ (c)=(uint8_t)(s)[(i)++]; \ if((c)>=0x80) { \ uint8_t _d_, _e_; \ if((c)<0xc0) { /* LH */ \ if((i)<(length) && (_d_=(uint8_t)(s)[(i)])>=0xc0) { \ ++(i); \ if((c)>=0x82) { \ (c)=((c)<<6)|(_d_&0x3f); \ } else { \ (c)=-1; \ } \ } else { \ (c)=-1; \ } \ } else { /* HLL */ \ if( ((i)+1)<(length) && \ ((_d_=(uint8_t)((s)[(i)]-0x80))| \ (_e_=(uint8_t)((s)[(i)+1]-0x80))) \ <=0x3f \ ) { \ (i)+=2; \ if((c)!=0xc0) { \ if((c)<0xfe) { \ (c)=((c)&0x3f)<<12; \ } else if((c)==0xfe) { \ (c)=0xe0000; \ } else /* 0xff */ { \ (c)=0x10f000; \ } (c)|=(_d_<<6)|_e_; \ } else { \ (c)=-1; \ } \ } else { \ (c)=-1; \ } \ } \ } \ } /* * UQ8_PREV() is symmetrical to UQ8_NEXT(): * fetch c=s[--i]; * Single->use c; H->should be preceded by L; L->should be preceded by HL */ /* write a representable code point */ #define UQ8_APPEND_UNSAFE(s, i, c) { \ if((uint32_t)(c)<=0x7f) { \ (s)[(i)++]=(uint8_t)(c); \ } else { \ if((uint32_t)(c)<=0xfff) { \ (s)[(i)++]=(uint8_t)(((c)>>6)|0x80); \ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0xc0); \ } else { \ int32_t _c12_=(int32_t)(c)>>12; \ if((uint32_t)_c12_<=0x3d) { \ (s)[(i)++]=(uint8_t)(_c12_|0xc0); \ } else if(_c12_==(0xe0000>>12)) { \ (s)[(i)++]=0xfe; \ } else /* _c12_==(0x10f000>>12) */ { \ (s)[(i)++]=0xff; \ } \ (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ } \ } \ } /* * Random access into a UQ8 string at index i. * Returns an integer with three bit fields: * - 31..8 code point at i, or -1 if there is an error * - 7..4 distance from the beginning of the code point to i * - 3..0 distance from i to the beginning of the next code point */ extern UChar32 uq8_get(const char *s, int32_t i, int32_t length) { int32_t a, b, c, d, e; if(s==NULL || length<0 || i<0 || i>=length) { return -1; } c=(uint8_t)s[i]; if(c<0x80) { return ((int32_t)c<<8)|1; /* c, 0, 1 */ } /* fetch the 4 surrounding bytes */ a=b=d=e=0; if(i>=1) { b=(uint8_t)s[i-1]; if(i>=2) { a=(uint8_t)s[i-2]; } } if((i+1)=0xc0 && 0x80<=b && b<0xc0) { /* hlL.. */ e=c; d=b; c=a; a=0x21; } else if(b>=0xc0 && 0x80<=d && d<0xc0) { /* .hLl. */ e=d; d=c; c=b; a=0x12; } else if(d>=0xc0) { /* ..Lh. */ e=0; a=2; } else { return 0xffffff01; /* illegal sequence, report -1, 0, 1 */ } } else { if(0x80<=d && d<0xc0 && 0x80<=e && e<0xc0) { /* ..Hll */ a=3; } else if(0x80<=b && b<0xc0) { /* .lH.. */ e=0; d=c; c=b; a=0x11; } else { return 0xffffff01; /* illegal sequence, report -1, 0, 1 */ } } if(e==0) { /* cd=LH */ if(c>=0x82) { return ((c&0x3f)<<14)|((d&0x3f)<<8)|a; } } else { /* cde=HLL */ if(c!=0xc0) { if(c<0xfe) { c=(c&0x3f)<<20; } else if(c==0xfe) { c=0xe000000; } else /* 0xff */ { c=0x10f00000; } return c|((d&0x3f)<<14)|((e&0x3f)<<8)|a; } } return 0xffffff00|a; /* illegal sequence, report -1, a */ } #endif