1 #include <string.h> 2 #include <CharacterSet.h> 3 #include <Debug.h> 4 #include "character_sets.h" 5 6 namespace BPrivate { 7 8 /** 9 * These variables are used in defining the character_sets_by_id array below. 10 * @see http://www.iana.org/assignments/character-sets 11 **/ 12 13 const BCharacterSet unicode(0,106,"Unicode","UTF-8","UTF-8",NULL); 14 15 const char * isoLatin1aliases[] = 16 { "iso-ir-100","ISO_8859-1","ISO-8859-1","latin1","11","IBM819","CP819","csISOLatin1",NULL }; 17 const BCharacterSet isoLatin1(1,4,"ISO Latin 1","ISO_8859-1:1987","ISO_8859-1",isoLatin1aliases); 18 19 const char * isoLatin2aliases[] = 20 { "iso-ir-101","ISO_8859-2","ISO-8859-2","latin2","12","csISOLatin2",NULL }; 21 const BCharacterSet isoLatin2(2,5,"ISO Latin 2","ISO_8859-2:1987","ISO_8859-2",isoLatin2aliases); 22 23 const char * isoLatin3aliases[] = 24 { "iso-ir-109","ISO_8859-3","ISO-8859-3","latin3","13","csISOLatin3",NULL }; 25 const BCharacterSet isoLatin3(3,6,"ISO Latin 3","ISO_8859-3:1988","ISO_8859-3",isoLatin3aliases); 26 27 const char * isoLatin4aliases[] = 28 { "iso-ir-110","ISO_8859-4","ISO-8859-4","latin4","14","csISOLatin4",NULL }; 29 const BCharacterSet isoLatin4(4,7,"ISO Latin 4","ISO_8859-4:1988","ISO_8859-4",isoLatin4aliases); 30 31 const char * isoLatin5aliases[] = 32 { "iso-ir-144","ISO_8859-5","ISO-8859-5","cyrillic","csISOLatinCyrillic",NULL }; 33 const BCharacterSet isoLatin5(5,8,"ISO Cyrillic","ISO_8859-5:1988","ISO_8859-5",isoLatin5aliases); 34 35 const char * isoLatin6aliases[] = 36 { "iso-ir-127","ISO_8859-6","ISO-8859-6","ECMA-114","ASMO-708","arabic","csISOLatinArabic",NULL }; 37 const BCharacterSet isoLatin6(6,9,"ISO Arabic","ISO_8859-6:1987","ISO_8859-6",isoLatin6aliases); 38 39 const char * isoLatin7aliases[] = 40 { "iso-ir-126","ISO_8859-7","ISO-8859-7","ELOT_928","ECMA-118","greek","greek8","csISOLatinGreek",NULL }; 41 const BCharacterSet isoLatin7(7,10,"ISO Greek","ISO_8859-7:1987","ISO_8859-7",isoLatin7aliases); 42 43 const char * isoLatin8aliases[] = 44 { "iso-ir-138","ISO_8859-8","ISO-8859-8","hebrew","csISOLatinHebrew",NULL }; 45 const BCharacterSet isoLatin8(8,11,"ISO Hebrew","ISO_8859-8:1988","ISO-8859-8",isoLatin8aliases); 46 47 const char * isoLatin9aliases[] = 48 { "iso-ir-148","ISO_8859-9","ISO-8859-9","latin5","15","csISOLatin5",NULL }; 49 const BCharacterSet isoLatin9(9,12,"ISO Latin 5","ISO_8859-9:1989","ISO-8859-9",isoLatin9aliases); 50 51 const char * isoLatin10aliases[] = 52 { "iso-ir-157","16","ISO_8859-10:1992","csISOLatin6","latin6",NULL }; 53 const BCharacterSet isoLatin10(10,13,"ISO Latin 6","ISO-8859-10","ISO-8859-10",isoLatin10aliases); 54 55 const char * macintoshAliases[] = 56 { "mac","csMacintosh",NULL }; 57 const BCharacterSet macintosh(11,2027,"Macintosh Roman","macintosh",NULL,macintoshAliases); 58 59 const char * shiftJISaliases[] = 60 { "MS_Kanji","csShiftJIS",NULL }; 61 const BCharacterSet shiftJIS(12,17,"Shift JIS","Shift_JIS","Shift_JIS",shiftJISaliases); 62 63 const char * EUCPackedJapaneseAliases[] = 64 { "EUC-JP","csEUCPkdFmtJapanese",NULL }; 65 const BCharacterSet packedJapanese(13,18,"EUC Packed Format Japanese", 66 "Extended_UNIX_Code_Packed_Format_for_Japanese","EUC-JP", 67 EUCPackedJapaneseAliases); 68 69 const char * JIS0208aliases[] = 70 { "iso-ir-87","x0208","JIS_X0208-1983","csISO87JISX0208",NULL }; 71 const BCharacterSet JIS0208(14,63,"JIS 0208","JIS_C6226-1983",NULL,JIS0208aliases); 72 73 const BCharacterSet windows1252(15,2252,"MS-Windows Codepage 1252","windows-1252",NULL,NULL); 74 75 const char * unicode2aliases[] = 76 { "csUnicode",NULL }; 77 const BCharacterSet unicode2(16,1000,"Unicode 2.0","ISO-10646-UCS-2",NULL,unicode2aliases); 78 79 const char * KOI8Raliases[] = 80 { "csKOI8R",NULL }; 81 const BCharacterSet KOI8R(17,2084,"KOI8-R Cyrillic","KOI8-R","KOI8-R",KOI8Raliases); 82 83 const BCharacterSet windows1251(18,2251,"MS-Windows Codepage 1251","windows-1251",NULL,NULL); 84 85 const char * IBM866aliases[] = 86 { "cp866","866","csIBM866",NULL }; 87 const BCharacterSet IBM866(19,2086,"IBM Codepage 866","IBM866","IBM866",IBM866aliases); 88 89 const char * IBM437aliases[] = 90 { "cp437","437","csPC8CodePage437",NULL }; 91 const BCharacterSet IBM437(20,2011,"IBM Codepage 437","IBM437","IBM437",IBM437aliases); 92 93 const char * eucKRaliases[] = 94 { "csEUCKR",NULL }; 95 const BCharacterSet eucKR(21,38,"EUC Korean","EUC-KR","EUC-KR",eucKRaliases); 96 97 const BCharacterSet iso13(22,109,"ISO 8859-13","ISO-8859-13","ISO-8859-13",NULL); 98 99 const char * iso14aliases[] = 100 { "iso-ir-199","ISO_8859-14:1998","ISO_8859-14","latin8","iso-celtic","l8",NULL }; 101 const BCharacterSet iso14(23,110,"ISO 8859-14","ISO-8859-14","ISO-8859-14",iso14aliases); 102 103 const char * iso15aliases[] = 104 { "ISO_8859-14","Latin-9",NULL }; 105 const BCharacterSet iso15(24,111,"ISO 8859-15","ISO-8859-15","ISO-8859-15",iso15aliases); 106 107 // chinese character set testing 108 109 const char * big5aliases[] = 110 { "csBig5",NULL }; 111 const BCharacterSet big5(25,2026,"Big5","Big5","Big5",big5aliases); 112 113 const BCharacterSet gb18030(26,114,"GB18030","GB18030",NULL,NULL); 114 115 /** 116 * The following initializes the global character set array. 117 * It is organized by id for efficient retrieval using predefined constants in UTF8.h and Font.h. 118 * Character sets are stored contiguously and may be efficiently iterated over. 119 * To add a new character set, define the character set above -- remember to increment the id -- 120 * and then add &<charSetName> to the _end_ of the following list. That's all. 121 **/ 122 123 const BCharacterSet * character_sets_by_id[] = { 124 &unicode, 125 &isoLatin1, &isoLatin2, &isoLatin3, &isoLatin4, &isoLatin5, 126 &isoLatin6, &isoLatin7, &isoLatin8, &isoLatin9, &isoLatin10, 127 &macintosh, 128 // R5 BFont encodings end here 129 &shiftJIS, &packedJapanese, &JIS0208, 130 &windows1252, &unicode2, &KOI8R, &windows1251, 131 &IBM866, &IBM437, &eucKR, &iso13, &iso14, &iso15, 132 // R5 convert_to/from_utf8 encodings end here 133 &big5,&gb18030, 134 }; 135 const uint32 character_sets_by_id_count = sizeof(character_sets_by_id)/sizeof(const BCharacterSet*); 136 137 /** 138 * The following code initializes the global MIBenum array. 139 * This sparsely populated array exists as an efficient way to access character sets by MIBenum. 140 * The MIBenum array is automatically allocated, and initialized by the following class. 141 * The following class should only be instantiated once, this is assured by using an assertion. 142 * No changes are required to the following code to add a new character set. 143 **/ 144 145 const BCharacterSet ** character_sets_by_MIBenum; 146 uint32 maximum_valid_MIBenum; 147 148 class MIBenumArrayInitializer { 149 public: 150 MIBenumArrayInitializer() { 151 DEBUG_ONLY(static int onlyOneTime = 0;) 152 ASSERT_WITH_MESSAGE(onlyOneTime++ == 0,"MIBenumArrayInitializer should be instantiated only one time."); 153 // analyzing character_sets_by_id 154 uint32 max_MIBenum = 0; 155 for (uint32 index = 0 ; index < character_sets_by_id_count ; index++ ) { 156 if (max_MIBenum < character_sets_by_id[index]->GetMIBenum()) { 157 max_MIBenum = character_sets_by_id[index]->GetMIBenum(); 158 } 159 } 160 // initializing extern variables 161 character_sets_by_MIBenum = new (const BCharacterSet*)[max_MIBenum+2]; 162 maximum_valid_MIBenum = max_MIBenum; 163 // initializing MIBenum array 164 memset(character_sets_by_MIBenum,0,sizeof(BCharacterSet*)*(max_MIBenum+2)); 165 for (uint32 index2 = 0 ; index2 < character_sets_by_id_count ; index2++ ) { 166 const BCharacterSet * charset = character_sets_by_id[index2]; 167 character_sets_by_MIBenum[charset->GetMIBenum()] = charset; 168 } 169 } 170 ~MIBenumArrayInitializer() 171 { 172 delete [] character_sets_by_MIBenum; 173 } 174 } runTheInitializer; 175 176 } 177 178