1 /* 2 * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 #ifndef UTF8_CHAR_H 6 #define UTF8_CHAR_H 7 8 #include <ctype.h> 9 #include <string.h> 10 11 #include <UnicodeChar.h> 12 13 14 struct UTF8Char { 15 char bytes[4]; 16 17 UTF8Char() 18 { 19 bytes[0] = 0; 20 } 21 22 UTF8Char(char c) 23 { 24 bytes[0] = c; 25 } 26 27 UTF8Char(const char* c) 28 { 29 SetTo(c, ByteCount(*c)); 30 } 31 32 UTF8Char(const char* c, int32 count) 33 { 34 SetTo(c, count); 35 } 36 37 void SetTo(const char* c, int32 count) 38 { 39 bytes[0] = c[0]; 40 if (count > 1) { 41 bytes[1] = c[1]; 42 if (count > 2) { 43 bytes[2] = c[2]; 44 if (count > 3) 45 bytes[3] = c[3]; 46 } 47 } 48 } 49 50 static int32 ByteCount(char firstChar) 51 { 52 // Note, this does not recognize invalid chars 53 uchar c = firstChar; 54 if (c < 0x80) 55 return 1; 56 if (c < 0xe0) 57 return 2; 58 return c < 0xf0 ? 3 : 4; 59 } 60 61 int32 ByteCount() const 62 { 63 return ByteCount(bytes[0]); 64 } 65 66 bool IsFullWidth() const 67 { 68 switch (BUnicodeChar::EastAsianWidth(BUnicodeChar::FromUTF8(bytes))) { 69 case B_UNICODE_EA_FULLWIDTH: 70 case B_UNICODE_EA_WIDE: 71 return true; 72 default: 73 break; 74 } 75 return false; 76 } 77 78 bool IsSpace() const 79 { 80 return BUnicodeChar::IsSpace(BUnicodeChar::FromUTF8(bytes)); 81 } 82 83 bool IsAlNum() const 84 { 85 return BUnicodeChar::IsAlNum(BUnicodeChar::FromUTF8(bytes)); 86 } 87 88 UTF8Char ToLower() const 89 { 90 uint32 c = BUnicodeChar::ToLower(BUnicodeChar::FromUTF8(bytes)); 91 92 UTF8Char character; 93 char* utf8 = character.bytes; 94 BUnicodeChar::ToUTF8(c, &utf8); 95 96 return character; 97 } 98 99 bool operator==(const UTF8Char& other) const 100 { 101 int32 byteCount = ByteCount(); 102 bool equals = bytes[0] == other.bytes[0]; 103 if (byteCount > 1 && equals) { 104 equals = bytes[1] == other.bytes[1]; 105 if (byteCount > 2 && equals) { 106 equals = bytes[2] == other.bytes[2]; 107 if (byteCount > 3 && equals) 108 equals = bytes[3] == other.bytes[3]; 109 } 110 } 111 return equals; 112 } 113 114 bool operator!=(const UTF8Char& other) const 115 { 116 return !(*this == other); 117 } 118 }; 119 120 121 #endif // UTF8_CHAR_H 122