uh8.h

(Moved here unchanged from http://www.mindspring.com/~markus.scherer/unicode/uh8.h)

/* * file name: uh8.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2006mar04 * created by: Markus W. Scherer */ #ifndef __UH8_H__ #define __UH8_H__ #define UH8_NEXT(s, i, length, c) { \ (c)=(uint8_t)(s)[(i)++]; \ if((c)>=0x80) { \ uint8_t _d_, _e_; \ if((c)<0xc0) { /* LMH */ \ if( ((i)+1)<(length) && \ (_d_=(uint8_t)(s)[i])>=0x80 && \ (_e_=(uint8_t)(s)[(i)+1])>=0xc0 \ ) { \ (i)+=2; \ (c)=(((c)&0x3f)<<13)|((_d_&0x7f)<<6)|(_e_&0x3f); \ if((c)<=0x7f) { \ (c)=-1; \ } else if((c)>=0x50000) { \ (c)+=0x90000; \ } \ } else { \ (c)=-1; \ } \ } else { \ (c)=-1; \ } \ } \ } /* * UH8_PREV() is symmetrical to UH8_NEXT(): * fetch c=s[--i]; * Single->use c; H->should be preceded by LM */ /* write a representable code point */ #define UH8_APPEND_UNSAFE(s, i, c) { \ if((uint32_t)(c)<=0x7f) { \ (s)[(i)++]=(uint8_t)(c); \ } else { \ if((uint32_t)(c)>=0xe0000) { \ (c)-=0x90000; \ } \ (s)[(i)++]=(uint8_t)((((c)>>13)&0x3f)|0x80); \ (s)[(i)++]=(uint8_t)((((c)>>6)&0x7f)|0x80); \ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0xc0); \ } \ } /* * Random access into a UH8 string at index i. * Returns an integer with three bit fields: * - 31..8 code point at i, or -1 if there is an error * - 7..4 distance from the beginning of the code point to i * - 3..0 distance from i to the beginning of the next code point */ extern UChar32 uh8_get(const char *s, int32_t i, int32_t length) { int32_t a, b, c, d, e; if(s==NULL || length<0 || i<0 || i>=length) { return -1; } c=(uint8_t)s[i]; if(c<0x80) { return ((int32_t)c<<8)|1; /* single byte, return c, 0, 1 */ } /* fetch the 4 surrounding bytes */ a=b=d=e=0; if(i>=1) { b=(uint8_t)s[i-1]; if(i>=2) { a=(uint8_t)s[i-2]; } } if((i+1)=0xc0) { /* .lLh. */ e=d; d=c; c=b; a=0x12; } else if(d>=0x80 && e>=0xc0) { /* ..Lmh */ a=3; } else { return 0xffffff01; /* illegal sequence, return -1, 0, 1 */ } } else { if(0x80<=a && a<0xc0 && b>=0x80) { /* lmH.. */ e=c; d=b; c=a; a=0x21; } else if(0x80<=b && b<0xc0 && d>=0xc0) { /* .lHH. */ e=d; d=c; c=b; a=0x12; } else { return 0xffffff01; /* illegal sequence, return -1, 0, 1 */ } } /* cde=LMH */ UChar32 cp=((c&0x3f)<<21)|((d&0x7f)<<14)|((e&0x3f)<<8)|a; if(cp>0x7fff) { if(cp>=0x5000000) { cp+=0x9000000; } return cp; } return 0xffffff00|a; /* illegal sequence, return -1, a */ } #endif