xref: /haiku/src/kits/locale/TextEncoding.cpp (revision 13581b3d2a71545960b98fefebc5225b5bf29072)
1 /*
2  * Copyright 2016, Haiku, inc.
3  * Distributed under terms of the MIT license.
4  */
5 
6 
7 #include "TextEncoding.h"
8 
9 #include <unicode/ucnv.h>
10 #include <unicode/ucsdet.h>
11 
12 #include <algorithm>
13 
14 
15 namespace BPrivate {
16 
17 
18 BTextEncoding::BTextEncoding(BString name)
19 	:
20 	fName(name),
21 	fUtf8Converter(NULL),
22 	fConverter(NULL)
23 {
24 }
25 
26 
27 BTextEncoding::BTextEncoding(const char* data, size_t length)
28 	:
29 	fUtf8Converter(NULL),
30 	fConverter(NULL)
31 {
32 	UErrorCode error = U_ZERO_ERROR;
33 
34 	UCharsetDetector* detector = ucsdet_open(&error);
35 	ucsdet_setText(detector, data, length, &error);
36 	const UCharsetMatch* encoding = ucsdet_detect(detector, &error);
37 
38 	fName = ucsdet_getName(encoding, &error);
39 	ucsdet_close(detector);
40 }
41 
42 
43 BTextEncoding::~BTextEncoding()
44 {
45 	if (fUtf8Converter != NULL)
46 		ucnv_close(fUtf8Converter);
47 
48 	if (fConverter != NULL)
49 		ucnv_close(fConverter);
50 }
51 
52 
53 status_t
54 BTextEncoding::InitCheck()
55 {
56 	if (fName.IsEmpty())
57 		return B_NO_INIT;
58 	else
59 		return B_OK;
60 }
61 
62 
63 status_t
64 BTextEncoding::Decode(const char* input, size_t& inputLength, char* output,
65 	size_t& outputLength)
66 {
67 	const char* base = input;
68 	char* target = output;
69 
70 	// Optimize the easy case.
71 	// Note: we don't check the input to be valid UTF-8 when doing that.
72 	if (fName == "UTF-8") {
73 		outputLength = std::min(inputLength, outputLength);
74 		inputLength = outputLength;
75 		memcpy(output, input, inputLength);
76 		return B_OK;
77 	}
78 
79 	UErrorCode error = U_ZERO_ERROR;
80 
81 	if (fUtf8Converter == NULL)
82 		fUtf8Converter = ucnv_open("UTF-8", &error);
83 
84 	if (fConverter == NULL)
85 		fConverter = ucnv_open(fName.String(), &error);
86 
87 	ucnv_convertEx(fUtf8Converter, fConverter, &target, output + outputLength,
88 		&base, input + inputLength, NULL, NULL, NULL, NULL, FALSE, TRUE,
89 		&error);
90 
91 	// inputLength is set to the number of bytes consumed. We may not use all of
92 	// the input data (for example if it is cut in the middle of an utf-8 char).
93 	inputLength = base - input;
94 	outputLength = target - output;
95 
96 	if (!U_SUCCESS(error))
97 		return B_ERROR;
98 
99 	return B_OK;
100 }
101 
102 
103 status_t
104 BTextEncoding::Encode(const char* input, size_t& inputLength, char* output,
105 	size_t& outputLength)
106 {
107 	const char* base = input;
108 	char* target = output;
109 
110 	// Optimize the easy case.
111 	// Note: we don't check the input to be valid UTF-8 when doing that.
112 	if (fName == "UTF-8") {
113 		outputLength = std::min(inputLength, outputLength);
114 		inputLength = outputLength;
115 		memcpy(output, input, inputLength);
116 		return B_OK;
117 	}
118 
119 	UErrorCode error = U_ZERO_ERROR;
120 
121 	if (fUtf8Converter == NULL)
122 		fUtf8Converter = ucnv_open("UTF-8", &error);
123 
124 	if (fConverter == NULL)
125 		fConverter = ucnv_open(fName.String(), &error);
126 
127 	ucnv_convertEx(fConverter, fUtf8Converter, &target, output + outputLength,
128 		&base, input + inputLength, NULL, NULL, NULL, NULL, FALSE, TRUE,
129 		&error);
130 
131 	// inputLength is set to the number of bytes consumed. We may not use all of
132 	// the input data (for example if it is cut in the middle of an utf-8 char).
133 	inputLength = base - input;
134 	outputLength = target - output;
135 
136 	if (!U_SUCCESS(error))
137 		return B_ERROR;
138 
139 	return B_OK;
140 }
141 
142 
143 status_t
144 BTextEncoding::Flush(char* output, size_t& outputLength)
145 {
146 	char* target = output;
147 
148 	if (fName == "UTF-8")
149 		return B_OK;
150 
151 	if (fUtf8Converter == NULL || fConverter == NULL)
152 		return B_NO_INIT;
153 
154 	UErrorCode error = U_ZERO_ERROR;
155 
156 	ucnv_convertEx(fConverter, fUtf8Converter, &target, output + outputLength,
157 		NULL, NULL, NULL, NULL, NULL, NULL, FALSE, TRUE,
158 		&error);
159 
160 	if (!U_SUCCESS(error))
161 		return B_ERROR;
162 
163 	return B_OK;
164 }
165 
166 
167 BString
168 BTextEncoding::GetName()
169 {
170 	return fName;
171 }
172 
173 
174 };
175