uh8-norandom.h

(Moved here unchanged from http://www.mindspring.com/~markus.scherer/unicode/uh8-norandom.h)

/* * file name: uh8-norandom.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2006mar04 * created by: Markus W. Scherer */ #ifndef __UH8_H__ #define __UH8_H__ #define UH8_NEXT(s, i, length, c) { \ (c)=(uint8_t)(s)[(i)++]; \ if((c)>=0x80) { \ uint8_t _d_, _e_; \ if((c)<0xc0) { /* LH */ \ if((i)<(length) && (_d_=(uint8_t)(s)[i])>=0xc0) { \ ++(i); \ if((c)>=0x82) { \ (c)=((c)<<6)|(_d_&0x3f); \ } else { \ (c)=-1; \ } \ } else { \ (c)=-1; \ } \ } else { /* HML */ \ if( ((i)+1)<(length) && \ (_d_=(uint8_t)(s)[i])>=0x80 && \ (_e_=(uint8_t)((s)[(i)+1]-0x80))<=0x3f \ ) { \ (i)+=2; \ (c)=(((c)&0x3f)<<13)|((_d_&0x7f)<<6)|_e_; \ if((c)<=0xfff) { \ (c)=-1; \ } else if((c)>=0x50000) { \ (c)+=0x90000; \ } \ } else { \ (c)=-1; \ } \ } \ } \ } /* * UH8_PREV() is symmetrical to UH8_NEXT(): * fetch c=s[--i]; * Single->use c; H->should be preceded by L; L->should be preceded by HM */ /* write a representable code point */ #define UH8_APPEND_UNSAFE(s, i, c) { \ if((uint32_t)(c)<=0x7f) { \ (s)[(i)++]=(uint8_t)(c); \ } else { \ if((uint32_t)(c)<=0xfff) { \ (s)[(i)++]=(uint8_t)(((c)>>6)|0x80); \ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0xc0); \ } else { \ if((uint32_t)(c)>=0xe0000) { \ (c)-=0x90000; \ } \ (s)[(i)++]=(uint8_t)((((c)>>13)&0x3f)|0x80); \ (s)[(i)++]=(uint8_t)((((c)>>6)&0x7f)|0x80); \ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0xc0); \ } \ } \ } /* * Random access into a UH8 string at index i. * Returns an integer with three bit fields: * - 31..8 code point at i, or -1 if there is an error * - 7..4 distance from the beginning of the code point to i * - 3..0 distance from i to the beginning of the next code point */ extern UChar32 uh8_get(const char *s, int32_t i, int32_t length) { int32_t a, b, c, d, e; if(s==NULL || length<0 || i<0 || i>=length) { return -1; } c=(uint8_t)s[i]; if(c<0x80) { return ((int32_t)c<<8)|1; /* single byte, return c, 0, 1 */ } /* fetch the 4 surrounding bytes */ a=b=d=e=0; if(i>=1) { b=(uint8_t)s[i-1]; if(i>=2) { a=(uint8_t)s[i-2]; } } if((i+1)=0xc0 && 0x80<=b) { /* hmL.. */ e=c; d=b; c=a; a=0x21; } else if(b>=0xc0 && 0x80<=d && d<0xc0) { /* .hLl. */ e=d; d=c; c=b; a=0x12; } else if(d>=0xc0) { /* ..Lh. */ e=0; a=2; } else { return 0xffffff01; /* illegal sequence, return -1, 0, 1 */ } } else { if(0x80<=d && 0x80<=e && e<0xc0) { /* ..Hml */ a=3; } else if(b>=0xc0 && 0x80<=d && d<0xc0) { /* .hHl. */ e=d; d=c; c=b; a=0x12; } else if(0x80<=b && b<0xc0) { /* .lH.. */ e=0; d=c; c=b; a=0x11; } else { return 0xffffff01; /* illegal sequence, return -1, 0, 1 */ } } if(e==0) { /* cd=LH */ if(c>=0x82) { return ((c&0x3f)<<14)|((d&0x3f)<<8)|a; } } else { /* cde=HML */ UChar32 cp=((c&0x3f)<<21)|((d&0x7f)<<14)|((e&0x3f)<<8)|a; if(cp>0xfffff) { if(cp>=0x5000000) { cp+=0x9000000; } return cp; } } return 0xffffff00|a; /* illegal sequence, return -1, a */ } #endif