152b1d543SIngo Weinhold /* 252b1d543SIngo Weinhold * Copyright 2008, Ingo Weinhold, ingo_weinhold@gmx.de. 352b1d543SIngo Weinhold * Distributed under the terms of the MIT License. 452b1d543SIngo Weinhold */ 552b1d543SIngo Weinhold #ifndef UTF8_CHAR_H 652b1d543SIngo Weinhold #define UTF8_CHAR_H 752b1d543SIngo Weinhold 852b1d543SIngo Weinhold #include <ctype.h> 952b1d543SIngo Weinhold #include <string.h> 1052b1d543SIngo Weinhold 1152b1d543SIngo Weinhold 1252b1d543SIngo Weinhold struct UTF8Char { 1352b1d543SIngo Weinhold char bytes[4]; 1452b1d543SIngo Weinhold 1552b1d543SIngo Weinhold UTF8Char() 1652b1d543SIngo Weinhold { 1752b1d543SIngo Weinhold } 1852b1d543SIngo Weinhold 1952b1d543SIngo Weinhold UTF8Char(char c) 2052b1d543SIngo Weinhold { 2152b1d543SIngo Weinhold bytes[0] = c; 2252b1d543SIngo Weinhold } 2352b1d543SIngo Weinhold 24bdc33077SIngo Weinhold UTF8Char(const char* c, int32 count) 25bdc33077SIngo Weinhold { 26bdc33077SIngo Weinhold SetTo(c, count); 27bdc33077SIngo Weinhold } 28bdc33077SIngo Weinhold 29bdc33077SIngo Weinhold void SetTo(const char* c, int32 count) 30bdc33077SIngo Weinhold { 31bdc33077SIngo Weinhold bytes[0] = c[0]; 32bdc33077SIngo Weinhold if (count > 1) { 33bdc33077SIngo Weinhold bytes[1] = c[1]; 34bdc33077SIngo Weinhold if (count > 2) { 35bdc33077SIngo Weinhold bytes[2] = c[2]; 36bdc33077SIngo Weinhold if (count > 3) 37bdc33077SIngo Weinhold bytes[3] = c[3]; 38bdc33077SIngo Weinhold } 39bdc33077SIngo Weinhold } 40bdc33077SIngo Weinhold } 41bdc33077SIngo Weinhold 4252b1d543SIngo Weinhold static int32 ByteCount(char firstChar) 4352b1d543SIngo Weinhold { 4452b1d543SIngo Weinhold // Note, this does not recognize invalid chars 45*82224430SStephan Aßmus uchar c = firstChar; 4652b1d543SIngo Weinhold if (c < 0x80) 4752b1d543SIngo Weinhold return 1; 4852b1d543SIngo Weinhold if (c < 0xe0) 4952b1d543SIngo Weinhold return 2; 5052b1d543SIngo Weinhold return c < 0xf0 ? 3 : 4; 5152b1d543SIngo Weinhold } 5252b1d543SIngo Weinhold 5352b1d543SIngo Weinhold int32 ByteCount() const 5452b1d543SIngo Weinhold { 5552b1d543SIngo Weinhold return ByteCount(bytes[0]); 5652b1d543SIngo Weinhold } 5752b1d543SIngo Weinhold 584c9d4b02SIngo Weinhold bool IsFullWidth() const 594c9d4b02SIngo Weinhold { 604c9d4b02SIngo Weinhold // TODO: Implement! 614c9d4b02SIngo Weinhold return false; 624c9d4b02SIngo Weinhold } 634c9d4b02SIngo Weinhold 6452b1d543SIngo Weinhold bool IsSpace() const 6552b1d543SIngo Weinhold { 6652b1d543SIngo Weinhold // TODO: Support multi-byte chars! 6752b1d543SIngo Weinhold return ByteCount() == 1 ? isspace(bytes[0]) : false; 6852b1d543SIngo Weinhold } 6952b1d543SIngo Weinhold 7052b1d543SIngo Weinhold UTF8Char ToLower() const 7152b1d543SIngo Weinhold { 7252b1d543SIngo Weinhold // TODO: Support multi-byte chars! 7352b1d543SIngo Weinhold if (ByteCount() > 1) 7452b1d543SIngo Weinhold return *this; 7552b1d543SIngo Weinhold 7652b1d543SIngo Weinhold return UTF8Char((char)tolower(bytes[0])); 7752b1d543SIngo Weinhold } 7852b1d543SIngo Weinhold 7952b1d543SIngo Weinhold bool operator==(const UTF8Char& other) const 8052b1d543SIngo Weinhold { 8152b1d543SIngo Weinhold int32 byteCount = ByteCount(); 8252b1d543SIngo Weinhold bool equals = bytes[0] == other.bytes[0]; 8352b1d543SIngo Weinhold if (byteCount > 1 && equals) { 8452b1d543SIngo Weinhold equals = bytes[1] == other.bytes[1]; 8552b1d543SIngo Weinhold if (byteCount > 2 && equals) { 8652b1d543SIngo Weinhold equals = bytes[2] == other.bytes[2]; 8752b1d543SIngo Weinhold if (byteCount > 3 && equals) 8852b1d543SIngo Weinhold equals = bytes[3] == other.bytes[3]; 8952b1d543SIngo Weinhold } 9052b1d543SIngo Weinhold } 9152b1d543SIngo Weinhold return equals; 9252b1d543SIngo Weinhold } 9352b1d543SIngo Weinhold 9452b1d543SIngo Weinhold bool operator!=(const UTF8Char& other) const 9552b1d543SIngo Weinhold { 9652b1d543SIngo Weinhold return !(*this == other); 9752b1d543SIngo Weinhold } 9852b1d543SIngo Weinhold }; 9952b1d543SIngo Weinhold 10052b1d543SIngo Weinhold 10152b1d543SIngo Weinhold #endif // UTF8_CHAR_H 102