145 #ifndef __UTF_OLD_H__ 146 #define __UTF_OLD_H__ 148 #ifndef U_HIDE_DEPRECATED_API 156 #ifdef U_USE_UTF_DEPRECATES 164 typedef int32_t UTextOffset;
196 #define UTF8_ERROR_VALUE_1 0x15 203 #define UTF8_ERROR_VALUE_2 0x9f 211 #define UTF_ERROR_VALUE 0xffff 219 #define UTF_IS_ERROR(c) \ 220 (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) 227 #define UTF_IS_VALID(c) \ 228 (UTF_IS_UNICODE_CHAR(c) && \ 229 (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) 235 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) 242 #define UTF_IS_UNICODE_NONCHAR(c) \ 244 ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ 245 (uint32_t)(c)<=0x10ffff) 262 #define UTF_IS_UNICODE_CHAR(c) \ 263 ((uint32_t)(c)<0xd800 || \ 264 ((uint32_t)(c)>0xdfff && \ 265 (uint32_t)(c)<=0x10ffff && \ 266 !UTF_IS_UNICODE_NONCHAR(c))) 274 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) 280 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) 283 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) 285 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) 287 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) 290 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) 306 # define UTF8_CHAR_LENGTH(c) \ 307 ((uint32_t)(c)<=0x7f ? 1 : \ 308 ((uint32_t)(c)<=0x7ff ? 2 : \ 309 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ 313 # define UTF8_CHAR_LENGTH(c) \ 314 ((uint32_t)(c)<=0x7f ? 1 : \ 315 ((uint32_t)(c)<=0x7ff ? 2 : \ 316 ((uint32_t)(c)<=0xffff ? 3 : \ 317 ((uint32_t)(c)<=0x10ffff ? 4 : \ 318 ((uint32_t)(c)<=0x3ffffff ? 5 : \ 319 ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ 328 #define UTF8_MAX_CHAR_LENGTH 4 331 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2) 334 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \ 335 int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \ 336 UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \ 337 UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \ 341 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 342 int32_t _utf8_get_char_safe_index=(int32_t)(i); \ 343 UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \ 344 UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \ 348 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \ 350 if((uint8_t)((c)-0xc0)<0x35) { \ 351 uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ 352 UTF8_MASK_LEAD_BYTE(c, __count); \ 356 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 358 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 360 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 368 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \ 369 if((uint32_t)(c)<=0x7f) { \ 370 (s)[(i)++]=(uint8_t)(c); \ 372 if((uint32_t)(c)<=0x7ff) { \ 373 (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ 375 if((uint32_t)(c)<=0xffff) { \ 376 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ 378 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ 379 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ 381 (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ 383 (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ 388 #define UTF8_FWD_1_UNSAFE(s, i) { \ 389 (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ 393 #define UTF8_FWD_N_UNSAFE(s, i, n) { \ 396 UTF8_FWD_1_UNSAFE(s, i); \ 402 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \ 403 while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ 407 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 410 if(UTF8_IS_LEAD(c)) { \ 411 (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ 413 (c)=UTF8_ERROR_VALUE_1; \ 419 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \ 420 if((uint32_t)(c)<=0x7f) { \ 421 (s)[(i)++]=(uint8_t)(c); \ 423 (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ 428 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) 431 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) 434 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) 437 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \ 439 if(UTF8_IS_TRAIL(c)) { \ 440 uint8_t __b, __count=1, __shift=6; \ 447 UTF8_MASK_LEAD_BYTE(__b, __count); \ 448 (c)|=(UChar32)__b<<__shift; \ 451 (c)|=(UChar32)(__b&0x3f)<<__shift; \ 460 #define UTF8_BACK_1_UNSAFE(s, i) { \ 461 while(UTF8_IS_TRAIL((s)[--(i)])) {} \ 465 #define UTF8_BACK_N_UNSAFE(s, i, n) { \ 468 UTF8_BACK_1_UNSAFE(s, i); \ 474 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 475 UTF8_BACK_1_UNSAFE(s, i); \ 476 UTF8_FWD_1_UNSAFE(s, i); \ 480 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 484 (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ 486 (c)=UTF8_ERROR_VALUE_1; \ 492 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) 495 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) 498 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) 503 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) 506 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) 509 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) 512 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 515 #define UTF16_GET_PAIR_VALUE(first, second) \ 516 (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) 519 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 522 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 525 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) 528 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) 531 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) 534 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) 537 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) 540 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) 543 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) 546 #define UTF16_MAX_CHAR_LENGTH 2 549 #define UTF16_ARRAY_SIZE(size) (size) 562 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \ 564 if(UTF_IS_SURROGATE(c)) { \ 565 if(UTF_IS_SURROGATE_FIRST(c)) { \ 566 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ 568 (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ 574 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 576 if(UTF_IS_SURROGATE(c)) { \ 578 if(UTF_IS_SURROGATE_FIRST(c)) { \ 579 if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ 580 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 584 (c)=UTF_ERROR_VALUE; \ 587 if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 588 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 592 (c)=UTF_ERROR_VALUE; \ 595 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 596 (c)=UTF_ERROR_VALUE; \ 601 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \ 603 if(UTF_IS_FIRST_SURROGATE(c)) { \ 604 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ 609 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \ 610 if((uint32_t)(c)<=0xffff) { \ 611 (s)[(i)++]=(uint16_t)(c); \ 613 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 614 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 619 #define UTF16_FWD_1_UNSAFE(s, i) { \ 620 if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ 626 #define UTF16_FWD_N_UNSAFE(s, i, n) { \ 629 UTF16_FWD_1_UNSAFE(s, i); \ 635 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \ 636 if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ 642 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 644 if(UTF_IS_FIRST_SURROGATE(c)) { \ 646 if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ 648 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 652 (c)=UTF_ERROR_VALUE; \ 654 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 656 (c)=UTF_ERROR_VALUE; \ 661 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ 662 if((uint32_t)(c)<=0xffff) { \ 663 (s)[(i)++]=(uint16_t)(c); \ 664 } else if((uint32_t)(c)<=0x10ffff) { \ 665 if((i)+1<(length)) { \ 666 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 667 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 669 (s)[(i)++]=UTF_ERROR_VALUE; \ 672 (s)[(i)++]=UTF_ERROR_VALUE; \ 677 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) 680 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) 683 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) 686 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ 688 if(UTF_IS_SECOND_SURROGATE(c)) { \ 689 (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ 694 #define UTF16_BACK_1_UNSAFE(s, i) { \ 695 if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ 701 #define UTF16_BACK_N_UNSAFE(s, i, n) { \ 704 UTF16_BACK_1_UNSAFE(s, i); \ 710 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 711 if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ 717 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 719 if(UTF_IS_SECOND_SURROGATE(c)) { \ 721 if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 723 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 727 (c)=UTF_ERROR_VALUE; \ 729 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 731 (c)=UTF_ERROR_VALUE; \ 736 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) 739 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) 742 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 762 #define UTF32_IS_SAFE(c, strict) \ 764 (uint32_t)(c)<=0x10ffff : \ 765 UTF_IS_UNICODE_CHAR(c)) 778 #define UTF32_IS_SINGLE(uchar) 1 780 #define UTF32_IS_LEAD(uchar) 0 782 #define UTF32_IS_TRAIL(uchar) 0 787 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0 789 #define UTF32_CHAR_LENGTH(c) 1 791 #define UTF32_MAX_CHAR_LENGTH 1 796 #define UTF32_ARRAY_SIZE(size) (size) 799 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ 804 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 806 if(!UTF32_IS_SAFE(c, strict)) { \ 807 (c)=UTF_ERROR_VALUE; \ 814 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ 819 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ 824 #define UTF32_FWD_1_UNSAFE(s, i) { \ 829 #define UTF32_FWD_N_UNSAFE(s, i, n) { \ 834 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ 838 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 840 if(!UTF32_IS_SAFE(c, strict)) { \ 841 (c)=UTF_ERROR_VALUE; \ 846 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ 847 if((uint32_t)(c)<=0x10ffff) { \ 855 #define UTF32_FWD_1_SAFE(s, i, length) { \ 860 #define UTF32_FWD_N_SAFE(s, i, length, n) { \ 861 if(((i)+=(n))>(length)) { \ 867 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ 873 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ 878 #define UTF32_BACK_1_UNSAFE(s, i) { \ 883 #define UTF32_BACK_N_UNSAFE(s, i, n) { \ 888 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 892 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 894 if(!UTF32_IS_SAFE(c, strict)) { \ 895 (c)=UTF_ERROR_VALUE; \ 900 #define UTF32_BACK_1_SAFE(s, start, i) { \ 905 #define UTF32_BACK_N_SAFE(s, start, i, n) { \ 913 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ 923 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) 926 #define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) 929 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) 933 #define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) 936 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) 940 #define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) 943 #define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 947 #define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) 950 #define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) 954 #define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) 957 #define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) 961 #define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) 964 #define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) 968 #define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) 971 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) 975 #define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) 978 #define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) 982 #define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) 985 #define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) 989 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) 992 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) 1001 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) 1008 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) 1015 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) 1022 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) 1029 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c) 1036 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH 1047 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c) 1060 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c) 1073 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 1084 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length) 1095 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n) 1111 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i) 1124 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c) 1137 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i) 1150 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n) 1166 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) C API: 8-bit Unicode handling macros.
C API: Code point macros.
C API: 16-bit Unicode handling macros.