1 /* 2 * Copyright 2016, Haiku, inc. 3 * Distributed under terms of the MIT license. 4 */ 5 6 7 #include "TextEncoding.h" 8 9 #include <unicode/ucnv.h> 10 #include <unicode/ucsdet.h> 11 12 #include <algorithm> 13 14 15 namespace BPrivate { 16 17 18 BTextEncoding::BTextEncoding(BString name) 19 : 20 fName(name), 21 fUtf8Converter(NULL), 22 fConverter(NULL) 23 { 24 } 25 26 27 BTextEncoding::BTextEncoding(const char* data, size_t length) 28 : 29 fUtf8Converter(NULL), 30 fConverter(NULL) 31 { 32 UErrorCode error = U_ZERO_ERROR; 33 34 UCharsetDetector* detector = ucsdet_open(&error); 35 ucsdet_setText(detector, data, length, &error); 36 const UCharsetMatch* encoding = ucsdet_detect(detector, &error); 37 38 fName = ucsdet_getName(encoding, &error); 39 ucsdet_close(detector); 40 } 41 42 43 BTextEncoding::~BTextEncoding() 44 { 45 if (fUtf8Converter != NULL) 46 ucnv_close(fUtf8Converter); 47 48 if (fConverter != NULL) 49 ucnv_close(fConverter); 50 } 51 52 53 status_t 54 BTextEncoding::InitCheck() 55 { 56 if (fName.IsEmpty()) 57 return B_NO_INIT; 58 else 59 return B_OK; 60 } 61 62 63 status_t 64 BTextEncoding::Decode(const char* input, size_t& inputLength, char* output, 65 size_t& outputLength) 66 { 67 const char* base = input; 68 char* target = output; 69 70 // Optimize the easy case. 71 // Note: we don't check the input to be valid UTF-8 when doing that. 72 if (fName == "UTF-8") { 73 outputLength = std::min(inputLength, outputLength); 74 inputLength = outputLength; 75 memcpy(output, input, inputLength); 76 return B_OK; 77 } 78 79 UErrorCode error = U_ZERO_ERROR; 80 81 if (fUtf8Converter == NULL) 82 fUtf8Converter = ucnv_open("UTF-8", &error); 83 84 if (fConverter == NULL) 85 fConverter = ucnv_open(fName.String(), &error); 86 87 ucnv_convertEx(fUtf8Converter, fConverter, &target, output + outputLength, 88 &base, input + inputLength, NULL, NULL, NULL, NULL, FALSE, TRUE, 89 &error); 90 91 // inputLength is set to the number of bytes consumed. We may not use all of 92 // the input data (for example if it is cut in the middle of an utf-8 char). 93 inputLength = base - input; 94 outputLength = target - output; 95 96 if (!U_SUCCESS(error)) 97 return B_ERROR; 98 99 return B_OK; 100 } 101 102 103 status_t 104 BTextEncoding::Encode(const char* input, size_t& inputLength, char* output, 105 size_t& outputLength) 106 { 107 const char* base = input; 108 char* target = output; 109 110 // Optimize the easy case. 111 // Note: we don't check the input to be valid UTF-8 when doing that. 112 if (fName == "UTF-8") { 113 outputLength = std::min(inputLength, outputLength); 114 inputLength = outputLength; 115 memcpy(output, input, inputLength); 116 return B_OK; 117 } 118 119 UErrorCode error = U_ZERO_ERROR; 120 121 if (fUtf8Converter == NULL) 122 fUtf8Converter = ucnv_open("UTF-8", &error); 123 124 if (fConverter == NULL) 125 fConverter = ucnv_open(fName.String(), &error); 126 127 ucnv_convertEx(fConverter, fUtf8Converter, &target, output + outputLength, 128 &base, input + inputLength, NULL, NULL, NULL, NULL, FALSE, TRUE, 129 &error); 130 131 // inputLength is set to the number of bytes consumed. We may not use all of 132 // the input data (for example if it is cut in the middle of an utf-8 char). 133 inputLength = base - input; 134 outputLength = target - output; 135 136 if (!U_SUCCESS(error)) 137 return B_ERROR; 138 139 return B_OK; 140 } 141 142 143 status_t 144 BTextEncoding::Flush(char* output, size_t& outputLength) 145 { 146 char* target = output; 147 148 if (fName == "UTF-8") 149 return B_OK; 150 151 if (fUtf8Converter == NULL || fConverter == NULL) 152 return B_NO_INIT; 153 154 UErrorCode error = U_ZERO_ERROR; 155 156 ucnv_convertEx(fConverter, fUtf8Converter, &target, output + outputLength, 157 NULL, NULL, NULL, NULL, NULL, NULL, FALSE, TRUE, 158 &error); 159 160 if (!U_SUCCESS(error)) 161 return B_ERROR; 162 163 return B_OK; 164 } 165 166 167 BString 168 BTextEncoding::GetName() 169 { 170 return fName; 171 } 172 173 174 }; 175