1 #include <UTF8.h> 2 #include <iconv.h> 3 #include <CharacterSet.h> 4 #include <CharacterSetRoster.h> 5 #include <Errors.h> 6 #include <errno.h> 7 #include <stdio.h> 8 9 using namespace BPrivate; 10 11 typedef char ** input_buffer_t; 12 13 int iconvctl (iconv_t icd, int request, void* argument); 14 15 status_t 16 convert_encoding(const char * from, const char * to, 17 const char * src, int32 * srcLen, 18 char * dst, int32 * dstLen, 19 int32 * state, char substitute) 20 { 21 status_t status; 22 if (*srcLen == 0) { 23 // nothing to do! 24 return B_OK; 25 } 26 iconv_t conversion = iconv_open(to,from); 27 if (conversion == (iconv_t)-1) { 28 return B_ERROR; 29 } 30 if (state == 0) { 31 return B_ERROR; 32 } 33 if (*state == 0) { 34 iconv(conversion,0,0,0,0); 35 } 36 input_buffer_t inputBuffer = const_cast<input_buffer_t>(&src); 37 size_t inputLeft = *srcLen; 38 size_t outputLeft = *dstLen; 39 do { 40 size_t nonReversibleConversions = iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft); 41 if (nonReversibleConversions == (size_t)-1) { 42 if (errno == E2BIG) { 43 // Not enough room in the output buffer for the next converted character 44 // This is not a "real" error, we just quit out. 45 break; 46 } 47 switch (errno) { 48 case EILSEQ: // unable to generate a corresponding character 49 { 50 // discard the input character 51 const int one = 1, zero = 0; 52 iconvctl(conversion,ICONV_SET_DISCARD_ILSEQ,(void*)&one); 53 iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft); 54 iconvctl(conversion,ICONV_SET_DISCARD_ILSEQ,(void*)&zero); 55 // prepare to convert the substitute character to target encoding 56 char * original = new char[1]; 57 original[0] = substitute; 58 size_t len = 1; 59 char * copy = original; 60 // Perform the conversion 61 // We ignore any errors during this as part of robustness/best-effort 62 // We use ISO-8859-1 as a source because it is a single byte encoding 63 // It also overlaps UTF-8 for the lower 128 characters. It is also 64 // likely to have a mapping to almost any target encoding. 65 iconv_t iso8859_1to = iconv_open(to,"ISO-8859-1"); 66 if (iso8859_1to != (iconv_t)-1) { 67 iconv(iso8859_1to,0,0,0,0); 68 iconv(iso8859_1to,const_cast<input_buffer_t>(©),&len,&dst,&outputLeft); 69 iconv_close(iso8859_1to); 70 } 71 delete original; 72 } 73 break; 74 case EINVAL: // incomplete multibyte sequence in the input 75 // we just eat bad bytes, as part of robustness/best-effort 76 inputBuffer++; 77 inputLeft--; 78 break; 79 default: 80 // unknown error, completely bail 81 status = errno; 82 iconv_close(conversion); 83 return status; 84 } 85 } 86 } while ((inputLeft > 0) && (outputLeft > 0)); 87 *srcLen -= inputLeft; 88 *dstLen -= outputLeft; 89 iconv_close(conversion); 90 if (*srcLen != 0) { 91 // able to convert at least one character 92 return B_OK; 93 } else { 94 // not able to convert at least one character 95 return B_ERROR; 96 } 97 } 98 99 status_t 100 convert_to_utf8(uint32 srcEncoding, 101 const char * src, int32 * srcLen, 102 char * dst, int32 * dstLen, 103 int32 * state, char substitute = B_SUBSTITUTE) 104 { 105 const BCharacterSet * charset = BCharacterSetRoster::GetCharacterSetByConversionID(srcEncoding); 106 if (charset == 0) { 107 return B_ERROR; 108 } 109 return convert_encoding(charset->GetName(),"UTF-8",src,srcLen,dst,dstLen,state,substitute); 110 } 111 112 status_t 113 convert_from_utf8(uint32 dstEncoding, 114 const char * src, int32 * srcLen, 115 char * dst, int32 * dstLen, 116 int32 * state, char substitute = B_SUBSTITUTE) 117 { 118 const BCharacterSet * charset = BCharacterSetRoster::GetCharacterSetByConversionID(dstEncoding); 119 if (charset == 0) { 120 return B_ERROR; 121 } 122 return convert_encoding("UTF-8",charset->GetName(),src,srcLen,dst,dstLen,state,substitute); 123 } 124