File: | jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c |
Warning: | line 199, column 13 Value stored to 'byte3' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* |
2 | * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. Oracle designates this |
8 | * particular file as subject to the "Classpath" exception as provided |
9 | * by Oracle in the LICENSE file that accompanied this code. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 | * or visit www.oracle.com if you need additional information or have any |
23 | * questions. |
24 | */ |
25 | |
26 | #include <stdlib.h> |
27 | #include <ctype.h> |
28 | |
29 | #include "jni.h" |
30 | |
31 | #include "utf_util.h" |
32 | |
33 | |
34 | /* Error and assert macros */ |
35 | #define UTF_ERROR(m)utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 35, m) utfError(__FILE__"/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c", __LINE__35, m) |
36 | #define UTF_ASSERT(x)( (x)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 36, "ASSERT ERROR " "x") : (void)0 ) ( (x)==0 ? UTF_ERROR("ASSERT ERROR " #x)utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 36, "ASSERT ERROR " #x) : (void)0 ) |
37 | |
38 | // Platform independed part |
39 | |
40 | static void utfError(char *file, int line, char *message) { |
41 | (void)fprintf(stderr, "UTF ERROR [\"%s\":%d]: %s\n", file, line, message)__fprintf_chk (stderr, 2 - 1, "UTF ERROR [\"%s\":%d]: %s\n", file , line, message); |
42 | abort(); |
43 | } |
44 | |
45 | /* Determine length of this Standard UTF-8 in Modified UTF-8. |
46 | * Validation is done of the basic UTF encoding rules, returns |
47 | * length (no change) when errors are detected in the UTF encoding. |
48 | * |
49 | * Note: Accepts Modified UTF-8 also, no verification on the |
50 | * correctness of Standard UTF-8 is done. e,g, 0xC080 input is ok. |
51 | */ |
52 | int JNICALL utf8sToUtf8mLength(jbyte *string, int length) { |
53 | int newLength; |
54 | int i; |
55 | |
56 | newLength = 0; |
57 | for ( i = 0 ; i < length ; i++ ) { |
58 | unsigned byte; |
59 | |
60 | byte = (unsigned char)string[i]; |
61 | if ( (byte & 0x80) == 0 ) { /* 1byte encoding */ |
62 | newLength++; |
63 | if ( byte == 0 ) { |
64 | newLength++; /* We gain one byte in length on NULL bytes */ |
65 | } |
66 | } else if ( (byte & 0xE0) == 0xC0 ) { /* 2byte encoding */ |
67 | /* Check encoding of following bytes */ |
68 | if ( (i+1) >= length || (string[i+1] & 0xC0) != 0x80 ) { |
69 | break; /* Error condition */ |
70 | } |
71 | i++; /* Skip next byte */ |
72 | newLength += 2; |
73 | } else if ( (byte & 0xF0) == 0xE0 ) { /* 3byte encoding */ |
74 | /* Check encoding of following bytes */ |
75 | if ( (i+2) >= length || (string[i+1] & 0xC0) != 0x80 |
76 | || (string[i+2] & 0xC0) != 0x80 ) { |
77 | break; /* Error condition */ |
78 | } |
79 | i += 2; /* Skip next two bytes */ |
80 | newLength += 3; |
81 | } else if ( (byte & 0xF8) == 0xF0 ) { /* 4byte encoding */ |
82 | /* Check encoding of following bytes */ |
83 | if ( (i+3) >= length || (string[i+1] & 0xC0) != 0x80 |
84 | || (string[i+2] & 0xC0) != 0x80 |
85 | || (string[i+3] & 0xC0) != 0x80 ) { |
86 | break; /* Error condition */ |
87 | } |
88 | i += 3; /* Skip next 3 bytes */ |
89 | newLength += 6; /* 4byte encoding turns into 2 3byte ones */ |
90 | } else { |
91 | break; /* Error condition */ |
92 | } |
93 | } |
94 | if ( i != length ) { |
95 | /* Error in finding new length, return old length so no conversion */ |
96 | /* FIXUP: ERROR_MESSAGE? */ |
97 | return length; |
98 | } |
99 | return newLength; |
100 | } |
101 | |
102 | /* Convert Standard UTF-8 to Modified UTF-8. |
103 | * Assumes the UTF-8 encoding was validated by utf8mLength() above. |
104 | * |
105 | * Note: Accepts Modified UTF-8 also, no verification on the |
106 | * correctness of Standard UTF-8 is done. e,g, 0xC080 input is ok. |
107 | */ |
108 | void JNICALL utf8sToUtf8m(jbyte *string, int length, jbyte *newString, int newLength) { |
109 | int i; |
110 | int j; |
111 | |
112 | j = 0; |
113 | for ( i = 0 ; i < length ; i++ ) { |
114 | unsigned byte1; |
115 | |
116 | byte1 = (unsigned char)string[i]; |
117 | |
118 | /* NULL bytes and bytes starting with 11110xxx are special */ |
119 | if ( (byte1 & 0x80) == 0 ) { /* 1byte encoding */ |
120 | if ( byte1 == 0 ) { |
121 | /* Bits out: 11000000 10000000 */ |
122 | newString[j++] = (jbyte)0xC0; |
123 | newString[j++] = (jbyte)0x80; |
124 | } else { |
125 | /* Single byte */ |
126 | newString[j++] = byte1; |
127 | } |
128 | } else if ( (byte1 & 0xE0) == 0xC0 ) { /* 2byte encoding */ |
129 | newString[j++] = byte1; |
130 | newString[j++] = string[++i]; |
131 | } else if ( (byte1 & 0xF0) == 0xE0 ) { /* 3byte encoding */ |
132 | newString[j++] = byte1; |
133 | newString[j++] = string[++i]; |
134 | newString[j++] = string[++i]; |
135 | } else if ( (byte1 & 0xF8) == 0xF0 ) { /* 4byte encoding */ |
136 | /* Beginning of 4byte encoding, turn into 2 3byte encodings */ |
137 | unsigned byte2, byte3, byte4, u21; |
138 | |
139 | /* Bits in: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
140 | byte2 = (unsigned char)string[++i]; |
141 | byte3 = (unsigned char)string[++i]; |
142 | byte4 = (unsigned char)string[++i]; |
143 | /* Reconstruct full 21bit value */ |
144 | u21 = (byte1 & 0x07) << 18; |
145 | u21 += (byte2 & 0x3F) << 12; |
146 | u21 += (byte3 & 0x3F) << 6; |
147 | u21 += (byte4 & 0x3F); |
148 | /* Bits out: 11101101 1010xxxx 10xxxxxx */ |
149 | newString[j++] = (jbyte)0xED; |
150 | newString[j++] = (jbyte)(0xA0 + (((u21 >> 16) - 1) & 0x0F)); |
151 | newString[j++] = (jbyte)(0x80 + ((u21 >> 10) & 0x3F)); |
152 | /* Bits out: 11101101 1011xxxx 10xxxxxx */ |
153 | newString[j++] = (jbyte)0xED; |
154 | newString[j++] = (jbyte)(0xB0 + ((u21 >> 6) & 0x0F)); |
155 | newString[j++] = byte4; |
156 | } |
157 | } |
158 | UTF_ASSERT(i==length)( (i==length)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 158, "ASSERT ERROR " "i==length") : (void)0 ); |
159 | UTF_ASSERT(j==newLength)( (j==newLength)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 159, "ASSERT ERROR " "j==newLength") : (void)0 ); |
160 | newString[j] = (jbyte)0; |
161 | } |
162 | |
163 | /* Given a Modified UTF-8 string, calculate the Standard UTF-8 length. |
164 | * Basic validation of the UTF encoding rules is done, and length is |
165 | * returned (no change) when errors are detected. |
166 | * |
167 | * Note: No validation is made that this is indeed Modified UTF-8 coming in. |
168 | * |
169 | */ |
170 | int JNICALL utf8mToUtf8sLength(jbyte *string, int length) { |
171 | int newLength; |
172 | int i; |
173 | |
174 | newLength = 0; |
175 | for ( i = 0 ; i < length ; i++ ) { |
176 | unsigned byte1, byte2, byte3, byte4, byte5, byte6; |
177 | |
178 | byte1 = (unsigned char)string[i]; |
179 | if ( (byte1 & 0x80) == 0 ) { /* 1byte encoding */ |
180 | newLength++; |
181 | } else if ( (byte1 & 0xE0) == 0xC0 ) { /* 2byte encoding */ |
182 | /* Check encoding of following bytes */ |
183 | if ( (i+1) >= length || (string[i+1] & 0xC0) != 0x80 ) { |
184 | break; /* Error condition */ |
185 | } |
186 | byte2 = (unsigned char)string[++i]; |
187 | if ( byte1 != 0xC0 || byte2 != 0x80 ) { |
188 | newLength += 2; /* Normal 2byte encoding, not 0xC080 */ |
189 | } else { |
190 | newLength++; /* We will turn 0xC080 into 0 */ |
191 | } |
192 | } else if ( (byte1 & 0xF0) == 0xE0 ) { /* 3byte encoding */ |
193 | /* Check encoding of following bytes */ |
194 | if ( (i+2) >= length || (string[i+1] & 0xC0) != 0x80 |
195 | || (string[i+2] & 0xC0) != 0x80 ) { |
196 | break; /* Error condition */ |
197 | } |
198 | byte2 = (unsigned char)string[++i]; |
199 | byte3 = (unsigned char)string[++i]; |
Value stored to 'byte3' is never read | |
200 | newLength += 3; |
201 | /* Possible process a second 3byte encoding */ |
202 | if ( (i+3) < length && byte1 == 0xED && (byte2 & 0xF0) == 0xA0 ) { |
203 | /* See if this is a pair of 3byte encodings */ |
204 | byte4 = (unsigned char)string[i+1]; |
205 | byte5 = (unsigned char)string[i+2]; |
206 | byte6 = (unsigned char)string[i+3]; |
207 | if ( byte4 == 0xED && (byte5 & 0xF0) == 0xB0 ) { |
208 | /* Check encoding of 3rd byte */ |
209 | if ( (byte6 & 0xC0) != 0x80 ) { |
210 | break; /* Error condition */ |
211 | } |
212 | newLength++; /* New string will have 4byte encoding */ |
213 | i += 3; /* Skip next 3 bytes */ |
214 | } |
215 | } |
216 | } else { |
217 | break; /* Error condition */ |
218 | } |
219 | } |
220 | if ( i != length ) { |
221 | /* Error in UTF encoding */ |
222 | /* FIXUP: ERROR_MESSAGE()? */ |
223 | return length; |
224 | } |
225 | return newLength; |
226 | } |
227 | |
228 | /* Convert a Modified UTF-8 string into a Standard UTF-8 string |
229 | * It is assumed that this string has been validated in terms of the |
230 | * basic UTF encoding rules by utf8Length() above. |
231 | * |
232 | * Note: No validation is made that this is indeed Modified UTF-8 coming in. |
233 | * |
234 | */ |
235 | void JNICALL utf8mToUtf8s(jbyte *string, int length, jbyte *newString, int newLength) { |
236 | int i; |
237 | int j; |
238 | |
239 | j = 0; |
240 | for ( i = 0 ; i < length ; i++ ) { |
241 | unsigned byte1, byte2, byte3, byte4, byte5, byte6; |
242 | |
243 | byte1 = (unsigned char)string[i]; |
244 | if ( (byte1 & 0x80) == 0 ) { /* 1byte encoding */ |
245 | /* Single byte */ |
246 | newString[j++] = byte1; |
247 | } else if ( (byte1 & 0xE0) == 0xC0 ) { /* 2byte encoding */ |
248 | byte2 = (unsigned char)string[++i]; |
249 | if ( byte1 != 0xC0 || byte2 != 0x80 ) { |
250 | newString[j++] = byte1; |
251 | newString[j++] = byte2; |
252 | } else { |
253 | newString[j++] = 0; |
254 | } |
255 | } else if ( (byte1 & 0xF0) == 0xE0 ) { /* 3byte encoding */ |
256 | byte2 = (unsigned char)string[++i]; |
257 | byte3 = (unsigned char)string[++i]; |
258 | if ( i+3 < length && byte1 == 0xED && (byte2 & 0xF0) == 0xA0 ) { |
259 | /* See if this is a pair of 3byte encodings */ |
260 | byte4 = (unsigned char)string[i+1]; |
261 | byte5 = (unsigned char)string[i+2]; |
262 | byte6 = (unsigned char)string[i+3]; |
263 | if ( byte4 == 0xED && (byte5 & 0xF0) == 0xB0 ) { |
264 | unsigned u21; |
265 | |
266 | /* Bits in: 11101101 1010xxxx 10xxxxxx */ |
267 | /* Bits in: 11101101 1011xxxx 10xxxxxx */ |
268 | i += 3; |
269 | |
270 | /* Reconstruct 21 bit code */ |
271 | u21 = ((byte2 & 0x0F) + 1) << 16; |
272 | u21 += (byte3 & 0x3F) << 10; |
273 | u21 += (byte5 & 0x0F) << 6; |
274 | u21 += (byte6 & 0x3F); |
275 | |
276 | /* Bits out: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
277 | |
278 | /* Convert to 4byte encoding */ |
279 | newString[j++] = 0xF0 + ((u21 >> 18) & 0x07); |
280 | newString[j++] = 0x80 + ((u21 >> 12) & 0x3F); |
281 | newString[j++] = 0x80 + ((u21 >> 6) & 0x3F); |
282 | newString[j++] = 0x80 + (u21 & 0x3F); |
283 | continue; |
284 | } |
285 | } |
286 | /* Normal 3byte encoding */ |
287 | newString[j++] = byte1; |
288 | newString[j++] = byte2; |
289 | newString[j++] = byte3; |
290 | } |
291 | } |
292 | UTF_ASSERT(i==length)( (i==length)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 292, "ASSERT ERROR " "i==length") : (void)0 ); |
293 | UTF_ASSERT(j==newLength)( (j==newLength)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 293, "ASSERT ERROR " "j==newLength") : (void)0 ); |
294 | newString[j] = 0; |
295 | } |
296 | |
297 | #ifdef _WIN32 |
298 | // Microsoft Windows specific part |
299 | |
300 | #include <windows.h> |
301 | |
302 | static UINT getCodepage() { |
303 | LANGID langID; |
304 | LCID localeID; |
305 | TCHAR strCodePage[7]; // ANSI code page id |
306 | |
307 | static UINT intCodePage = -1; |
308 | |
309 | if (intCodePage == -1) { |
310 | // Firts call, get codepage from the os |
311 | langID = LANGIDFROMLCID(GetUserDefaultLCID()); |
312 | localeID = MAKELCID(langID, SORT_DEFAULT); |
313 | if (GetLocaleInfo(localeID, LOCALE_IDEFAULTANSICODEPAGE, |
314 | strCodePage, sizeof(strCodePage)/sizeof(TCHAR)) > 0 ) { |
315 | intCodePage = atoi(strCodePage); |
316 | } |
317 | else { |
318 | intCodePage = GetACP(); |
319 | } |
320 | } |
321 | |
322 | return intCodePage; |
323 | } |
324 | |
325 | /* |
326 | * Get wide string (assumes len>0) |
327 | */ |
328 | static WCHAR* getWideString(UINT codePage, char* str, int len, int *pwlen) { |
329 | int wlen; |
330 | WCHAR* wstr; |
331 | |
332 | /* Convert the string to WIDE string */ |
333 | wlen = MultiByteToWideChar(codePage, 0, str, len, NULL((void*)0), 0); |
334 | *pwlen = wlen; |
335 | if (wlen <= 0) { |
336 | UTF_ERROR(("Can't get WIDE string length"))utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 336, ("Can't get WIDE string length")); |
337 | return NULL((void*)0); |
338 | } |
339 | wstr = (WCHAR*)malloc(wlen * sizeof(WCHAR)); |
340 | if (wstr == NULL((void*)0)) { |
341 | UTF_ERROR(("Can't malloc() any space"))utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 341, ("Can't malloc() any space")); |
342 | return NULL((void*)0); |
343 | } |
344 | if (MultiByteToWideChar(codePage, 0, str, len, wstr, wlen) == 0) { |
345 | UTF_ERROR(("Can't get WIDE string"))utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 345, ("Can't get WIDE string")); |
346 | return NULL((void*)0); |
347 | } |
348 | return wstr; |
349 | } |
350 | |
351 | /* |
352 | * Convert UTF-8 to a platform string |
353 | * NOTE: outputBufSize includes the space for the trailing 0. |
354 | */ |
355 | int JNICALL utf8ToPlatform(jbyte *utf8, int len, char* output, int outputBufSize) { |
356 | int wlen; |
357 | int plen; |
358 | WCHAR* wstr; |
359 | UINT codepage; |
360 | int outputMaxLen; |
361 | |
362 | UTF_ASSERT(utf8)( (utf8)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 362, "ASSERT ERROR " "utf8") : (void)0 ); |
363 | UTF_ASSERT(output)( (output)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 363, "ASSERT ERROR " "output") : (void)0 ); |
364 | UTF_ASSERT(len >= 0)( (len >= 0)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 364, "ASSERT ERROR " "len >= 0") : (void)0 ); |
365 | UTF_ASSERT(outputBufSize > len)( (outputBufSize > len)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 365, "ASSERT ERROR " "outputBufSize > len") : (void)0 ); |
366 | outputMaxLen = outputBufSize - 1; // leave space for trailing 0 |
367 | |
368 | /* Zero length is ok, but we don't need to do much */ |
369 | if ( len == 0 ) { |
370 | output[0] = 0; |
371 | return 0; |
372 | } |
373 | |
374 | /* Get WIDE string version (assumes len>0) */ |
375 | wstr = getWideString(CP_UTF8, (char*)utf8, len, &wlen); |
376 | if ( wstr == NULL((void*)0) ) { |
377 | // Can't allocate WIDE string |
378 | goto just_copy_bytes; |
379 | } |
380 | |
381 | /* Convert WIDE string to MultiByte string */ |
382 | codepage = getCodepage(); |
383 | plen = WideCharToMultiByte(codepage, 0, wstr, wlen, |
384 | output, outputMaxLen, NULL((void*)0), NULL((void*)0)); |
385 | free(wstr); |
386 | if (plen <= 0) { |
387 | // Can't convert WIDE string to multi-byte |
388 | goto just_copy_bytes; |
389 | } |
390 | output[plen] = '\0'; |
391 | return plen; |
392 | |
393 | just_copy_bytes: |
394 | (void)memcpy(output, utf8, len); |
395 | output[len] = 0; |
396 | return len; |
397 | } |
398 | |
399 | /* |
400 | * Convert Platform Encoding to UTF-8. |
401 | * NOTE: outputBufSize includes the space for the trailing 0. |
402 | */ |
403 | int JNICALL utf8FromPlatform(char *str, int len, jbyte *output, int outputBufSize) { |
404 | int wlen; |
405 | int plen; |
406 | WCHAR* wstr; |
407 | UINT codepage; |
408 | int outputMaxLen; |
409 | |
410 | UTF_ASSERT(str)( (str)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 410, "ASSERT ERROR " "str") : (void)0 ); |
411 | UTF_ASSERT(output)( (output)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 411, "ASSERT ERROR " "output") : (void)0 ); |
412 | UTF_ASSERT(len >= 0)( (len >= 0)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 412, "ASSERT ERROR " "len >= 0") : (void)0 ); |
413 | UTF_ASSERT(outputBufSize > len)( (outputBufSize > len)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 413, "ASSERT ERROR " "outputBufSize > len") : (void)0 ); |
414 | outputMaxLen = outputBufSize - 1; // leave space for trailing 0 |
415 | |
416 | /* Zero length is ok, but we don't need to do much */ |
417 | if ( len == 0 ) { |
418 | output[0] = 0; |
419 | return 0; |
420 | } |
421 | |
422 | /* Get WIDE string version (assumes len>0) */ |
423 | codepage = getCodepage(); |
424 | wstr = getWideString(codepage, str, len, &wlen); |
425 | if ( wstr == NULL((void*)0) ) { |
426 | goto just_copy_bytes; |
427 | } |
428 | |
429 | /* Convert WIDE string to UTF-8 string */ |
430 | plen = WideCharToMultiByte(CP_UTF8, 0, wstr, wlen, |
431 | (char*)output, outputMaxLen, NULL((void*)0), NULL((void*)0)); |
432 | free(wstr); |
433 | if (plen <= 0) { |
434 | UTF_ERROR(("Can't convert WIDE string to multi-byte"))utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 434, ("Can't convert WIDE string to multi-byte")); |
435 | goto just_copy_bytes; |
436 | } |
437 | output[plen] = '\0'; |
438 | return plen; |
439 | |
440 | just_copy_bytes: |
441 | (void)memcpy(output, str, len); |
442 | output[len] = 0; |
443 | return len; |
444 | } |
445 | |
446 | |
447 | #else |
448 | // *NIX specific part |
449 | |
450 | #include <iconv.h> |
451 | #include <locale.h> |
452 | #include <langinfo.h> |
453 | #include <string.h> |
454 | |
455 | typedef enum {TO_UTF8, FROM_UTF8} conv_direction; |
456 | |
457 | /* |
458 | * Do iconv() conversion. |
459 | * Returns length or -1 if output overflows. |
460 | * NOTE: outputBufSize includes the space for the trailing 0. |
461 | */ |
462 | static int iconvConvert(conv_direction drn, char *bytes, size_t len, char *output, size_t outputBufSize) { |
463 | |
464 | static char *codeset = 0; |
465 | iconv_t func; |
466 | size_t bytes_converted; |
467 | size_t inLeft, outLeft; |
468 | char *inbuf, *outbuf; |
469 | int outputMaxLen; |
470 | |
471 | UTF_ASSERT(bytes)( (bytes)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 471, "ASSERT ERROR " "bytes") : (void)0 ); |
472 | UTF_ASSERT(output)( (output)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 472, "ASSERT ERROR " "output") : (void)0 ); |
473 | UTF_ASSERT(outputBufSize > len)( (outputBufSize > len)==0 ? utfError("/home/daniel/Projects/java/jdk/src/jdk.jdwp.agent/share/native/libjdwp/utf_util.c" , 473, "ASSERT ERROR " "outputBufSize > len") : (void)0 ); |
474 | outputMaxLen = outputBufSize - 1; // leave space for trailing 0 |
475 | |
476 | /* Zero length is ok, but we don't need to do much */ |
477 | if ( len == 0 ) { |
478 | output[0] = 0; |
479 | return 0; |
480 | } |
481 | |
482 | if (codeset == NULL((void*)0) && codeset != (char *) -1) { |
483 | // locale is not initialized, do it now |
484 | if (setlocale(LC_ALL6, "") != NULL((void*)0)) { |
485 | // nl_langinfo returns ANSI_X3.4-1968 by default |
486 | codeset = (char*)nl_langinfo(CODESETCODESET); |
487 | } |
488 | |
489 | if (codeset == NULL((void*)0)) { |
490 | // Not able to intialize process locale from platform one. |
491 | codeset = (char *) -1; |
492 | } |
493 | } |
494 | |
495 | if (codeset == (char *) -1) { |
496 | // There was an error during initialization, so just bail out |
497 | goto just_copy_bytes; |
498 | } |
499 | |
500 | func = (drn == TO_UTF8) ? iconv_open(codeset, "UTF-8") : iconv_open("UTF-8", codeset); |
501 | if (func == (iconv_t) -1) { |
502 | // Requested charset combination is not supported, conversion couldn't be done. |
503 | // make sure we will not try it again |
504 | codeset = (char *) -1; |
505 | goto just_copy_bytes; |
506 | } |
507 | |
508 | // perform conversion |
509 | inbuf = bytes; |
510 | outbuf = output; |
511 | inLeft = len; |
512 | outLeft = outputMaxLen; |
513 | |
514 | bytes_converted = iconv(func, (void*)&inbuf, &inLeft, &outbuf, &outLeft); |
515 | if (bytes_converted == (size_t) -1 || bytes_converted == 0 || inLeft != 0) { |
516 | // Input string is invalid, not able to convert entire string |
517 | // or some other iconv error happens. |
518 | iconv_close(func); |
519 | goto just_copy_bytes; |
520 | } |
521 | |
522 | iconv_close(func); |
523 | // Overwrite bytes_converted with value of actually stored bytes |
524 | bytes_converted = outputMaxLen-outLeft; |
525 | output[bytes_converted] = 0; |
526 | return bytes_converted; |
527 | |
528 | |
529 | just_copy_bytes: |
530 | (void)memcpy(output, bytes, len); |
531 | output[len] = 0; |
532 | return len; |
533 | } |
534 | |
535 | /* |
536 | * Convert UTF-8 to Platform Encoding. |
537 | * Returns length or -1 if output overflows. |
538 | * NOTE: outputBufSize includes the space for the trailing 0. |
539 | */ |
540 | int JNICALL utf8ToPlatform(jbyte *utf8, int len, char *output, int outputBufSize) { |
541 | return iconvConvert(FROM_UTF8, (char*)utf8, len, output, outputBufSize); |
542 | } |
543 | |
544 | /* |
545 | * Convert Platform Encoding to UTF-8. |
546 | * Returns length or -1 if output overflows. |
547 | * NOTE: outputBufSize includes the space for the trailing 0. |
548 | */ |
549 | int JNICALL utf8FromPlatform(char *str, int len, jbyte *output, int outputBufSize) { |
550 | return iconvConvert(TO_UTF8, str, len, (char*) output, outputBufSize); |
551 | } |
552 | |
553 | #endif |