1 /* 2 * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 #ifndef UTF8_CHAR_H 6 #define UTF8_CHAR_H 7 8 #include <ctype.h> 9 #include <string.h> 10 11 12 struct UTF8Char { 13 char bytes[4]; 14 15 UTF8Char() 16 { 17 } 18 19 UTF8Char(char c) 20 { 21 bytes[0] = c; 22 } 23 24 UTF8Char(const char* c, int32 count) 25 { 26 SetTo(c, count); 27 } 28 29 void SetTo(const char* c, int32 count) 30 { 31 bytes[0] = c[0]; 32 if (count > 1) { 33 bytes[1] = c[1]; 34 if (count > 2) { 35 bytes[2] = c[2]; 36 if (count > 3) 37 bytes[3] = c[3]; 38 } 39 } 40 } 41 42 static int32 ByteCount(char firstChar) 43 { 44 // Note, this does not recognize invalid chars 45 uchar c = firstChar; 46 if (c < 0x80) 47 return 1; 48 if (c < 0xe0) 49 return 2; 50 return c < 0xf0 ? 3 : 4; 51 } 52 53 int32 ByteCount() const 54 { 55 return ByteCount(bytes[0]); 56 } 57 58 bool IsFullWidth() const 59 { 60 // TODO: Implement! 61 return false; 62 } 63 64 bool IsSpace() const 65 { 66 // TODO: Support multi-byte chars! 67 return ByteCount() == 1 ? isspace(bytes[0]) : false; 68 } 69 70 UTF8Char ToLower() const 71 { 72 // TODO: Support multi-byte chars! 73 if (ByteCount() > 1) 74 return *this; 75 76 return UTF8Char((char)tolower(bytes[0])); 77 } 78 79 bool operator==(const UTF8Char& other) const 80 { 81 int32 byteCount = ByteCount(); 82 bool equals = bytes[0] == other.bytes[0]; 83 if (byteCount > 1 && equals) { 84 equals = bytes[1] == other.bytes[1]; 85 if (byteCount > 2 && equals) { 86 equals = bytes[2] == other.bytes[2]; 87 if (byteCount > 3 && equals) 88 equals = bytes[3] == other.bytes[3]; 89 } 90 } 91 return equals; 92 } 93 94 bool operator!=(const UTF8Char& other) const 95 { 96 return !(*this == other); 97 } 98 }; 99 100 101 #endif // UTF8_CHAR_H 102