e14743d1 |
1 | /* |
2 | SDL - Simple DirectMedia Layer |
3 | Copyright (C) 1997-2009 Sam Lantinga |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | This library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with this library; if not, write to the Free Software |
17 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
18 | |
19 | Sam Lantinga |
20 | slouken@libsdl.org |
21 | */ |
22 | #include "SDL_config.h" |
23 | |
24 | /* This file contains portable iconv functions for SDL */ |
25 | |
26 | #include "SDL_stdinc.h" |
27 | #include "SDL_endian.h" |
28 | |
29 | #ifdef HAVE_ICONV |
30 | |
31 | /* Depending on which standard the iconv() was implemented with, |
32 | iconv() may or may not use const char ** for the inbuf param. |
33 | If we get this wrong, it's just a warning, so no big deal. |
34 | */ |
35 | #if defined(_XGP6) || \ |
36 | defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) |
37 | #define ICONV_INBUF_NONCONST |
38 | #endif |
39 | |
40 | #include <errno.h> |
41 | |
42 | size_t SDL_iconv(SDL_iconv_t cd, |
43 | const char **inbuf, size_t *inbytesleft, |
44 | char **outbuf, size_t *outbytesleft) |
45 | { |
46 | size_t retCode; |
47 | #ifdef ICONV_INBUF_NONCONST |
48 | retCode = iconv(cd, (char **)inbuf, inbytesleft, outbuf, outbytesleft); |
49 | #else |
50 | retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft); |
51 | #endif |
52 | if ( retCode == (size_t)-1 ) { |
53 | switch(errno) { |
54 | case E2BIG: |
55 | return SDL_ICONV_E2BIG; |
56 | case EILSEQ: |
57 | return SDL_ICONV_EILSEQ; |
58 | case EINVAL: |
59 | return SDL_ICONV_EINVAL; |
60 | default: |
61 | return SDL_ICONV_ERROR; |
62 | } |
63 | } |
64 | return retCode; |
65 | } |
66 | |
67 | #else |
68 | |
69 | /* Lots of useful information on Unicode at: |
70 | http://www.cl.cam.ac.uk/~mgk25/unicode.html |
71 | */ |
72 | |
73 | #define UNICODE_BOM 0xFEFF |
74 | |
75 | #define UNKNOWN_ASCII '?' |
76 | #define UNKNOWN_UNICODE 0xFFFD |
77 | |
78 | enum { |
79 | ENCODING_UNKNOWN, |
80 | ENCODING_ASCII, |
81 | ENCODING_LATIN1, |
82 | ENCODING_UTF8, |
83 | ENCODING_UTF16, /* Needs byte order marker */ |
84 | ENCODING_UTF16BE, |
85 | ENCODING_UTF16LE, |
86 | ENCODING_UTF32, /* Needs byte order marker */ |
87 | ENCODING_UTF32BE, |
88 | ENCODING_UTF32LE, |
89 | ENCODING_UCS2, /* Native byte order assumed */ |
90 | ENCODING_UCS4, /* Native byte order assumed */ |
91 | }; |
92 | #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
93 | #define ENCODING_UTF16NATIVE ENCODING_UTF16BE |
94 | #define ENCODING_UTF32NATIVE ENCODING_UTF32BE |
95 | #else |
96 | #define ENCODING_UTF16NATIVE ENCODING_UTF16LE |
97 | #define ENCODING_UTF32NATIVE ENCODING_UTF32LE |
98 | #endif |
99 | |
100 | struct _SDL_iconv_t |
101 | { |
102 | int src_fmt; |
103 | int dst_fmt; |
104 | }; |
105 | |
106 | static struct { |
107 | const char *name; |
108 | int format; |
109 | } encodings[] = { |
110 | { "ASCII", ENCODING_ASCII }, |
111 | { "US-ASCII", ENCODING_ASCII }, |
112 | { "8859-1", ENCODING_LATIN1 }, |
113 | { "ISO-8859-1", ENCODING_LATIN1 }, |
114 | { "UTF8", ENCODING_UTF8 }, |
115 | { "UTF-8", ENCODING_UTF8 }, |
116 | { "UTF16", ENCODING_UTF16 }, |
117 | { "UTF-16", ENCODING_UTF16 }, |
118 | { "UTF16BE", ENCODING_UTF16BE }, |
119 | { "UTF-16BE", ENCODING_UTF16BE }, |
120 | { "UTF16LE", ENCODING_UTF16LE }, |
121 | { "UTF-16LE", ENCODING_UTF16LE }, |
122 | { "UTF32", ENCODING_UTF32 }, |
123 | { "UTF-32", ENCODING_UTF32 }, |
124 | { "UTF32BE", ENCODING_UTF32BE }, |
125 | { "UTF-32BE", ENCODING_UTF32BE }, |
126 | { "UTF32LE", ENCODING_UTF32LE }, |
127 | { "UTF-32LE", ENCODING_UTF32LE }, |
128 | { "UCS2", ENCODING_UCS2 }, |
129 | { "UCS-2", ENCODING_UCS2 }, |
130 | { "UCS4", ENCODING_UCS4 }, |
131 | { "UCS-4", ENCODING_UCS4 }, |
132 | }; |
133 | |
134 | static const char *getlocale(char *buffer, size_t bufsize) |
135 | { |
136 | const char *lang; |
137 | char *ptr; |
138 | |
139 | lang = SDL_getenv("LC_ALL"); |
140 | if ( !lang ) { |
141 | lang = SDL_getenv("LC_CTYPE"); |
142 | } |
143 | if ( !lang ) { |
144 | lang = SDL_getenv("LC_MESSAGES"); |
145 | } |
146 | if ( !lang ) { |
147 | lang = SDL_getenv("LANG"); |
148 | } |
149 | if ( !lang || !*lang || SDL_strcmp(lang, "C") == 0 ) { |
150 | lang = "ASCII"; |
151 | } |
152 | |
153 | /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */ |
154 | ptr = SDL_strchr(lang, '.'); |
155 | if (ptr != NULL) { |
156 | lang = ptr + 1; |
157 | } |
158 | |
159 | SDL_strlcpy(buffer, lang, bufsize); |
160 | ptr = SDL_strchr(buffer, '@'); |
161 | if (ptr != NULL) { |
162 | *ptr = '\0'; /* chop end of string. */ |
163 | } |
164 | |
165 | return buffer; |
166 | } |
167 | |
168 | SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode) |
169 | { |
170 | int src_fmt = ENCODING_UNKNOWN; |
171 | int dst_fmt = ENCODING_UNKNOWN; |
172 | int i; |
173 | char fromcode_buffer[64]; |
174 | char tocode_buffer[64]; |
175 | |
176 | if ( !fromcode || !*fromcode ) { |
177 | fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer)); |
178 | } |
179 | if ( !tocode || !*tocode ) { |
180 | tocode = getlocale(tocode_buffer, sizeof(tocode_buffer)); |
181 | } |
182 | for ( i = 0; i < SDL_arraysize(encodings); ++i ) { |
183 | if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) { |
184 | src_fmt = encodings[i].format; |
185 | if ( dst_fmt != ENCODING_UNKNOWN ) { |
186 | break; |
187 | } |
188 | } |
189 | if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) { |
190 | dst_fmt = encodings[i].format; |
191 | if ( src_fmt != ENCODING_UNKNOWN ) { |
192 | break; |
193 | } |
194 | } |
195 | } |
196 | if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) { |
197 | SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd)); |
198 | if ( cd ) { |
199 | cd->src_fmt = src_fmt; |
200 | cd->dst_fmt = dst_fmt; |
201 | return cd; |
202 | } |
203 | } |
204 | return (SDL_iconv_t)-1; |
205 | } |
206 | |
207 | size_t SDL_iconv(SDL_iconv_t cd, |
208 | const char **inbuf, size_t *inbytesleft, |
209 | char **outbuf, size_t *outbytesleft) |
210 | { |
211 | /* For simplicity, we'll convert everything to and from UCS-4 */ |
212 | const char *src; |
213 | char *dst; |
214 | size_t srclen, dstlen; |
215 | Uint32 ch = 0; |
216 | size_t total; |
217 | |
218 | if ( !inbuf || !*inbuf ) { |
219 | /* Reset the context */ |
220 | return 0; |
221 | } |
222 | if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) { |
223 | return SDL_ICONV_E2BIG; |
224 | } |
225 | src = *inbuf; |
226 | srclen = (inbytesleft ? *inbytesleft : 0); |
227 | dst = *outbuf; |
228 | dstlen = *outbytesleft; |
229 | |
230 | switch ( cd->src_fmt ) { |
231 | case ENCODING_UTF16: |
232 | /* Scan for a byte order marker */ |
233 | { |
234 | Uint8 *p = (Uint8 *)src; |
235 | size_t n = srclen / 2; |
236 | while ( n ) { |
237 | if ( p[0] == 0xFF && p[1] == 0xFE ) { |
238 | cd->src_fmt = ENCODING_UTF16BE; |
239 | break; |
240 | } else if ( p[0] == 0xFE && p[1] == 0xFF ) { |
241 | cd->src_fmt = ENCODING_UTF16LE; |
242 | break; |
243 | } |
244 | p += 2; |
245 | --n; |
246 | } |
247 | if ( n == 0 ) { |
248 | /* We can't tell, default to host order */ |
249 | cd->src_fmt = ENCODING_UTF16NATIVE; |
250 | } |
251 | } |
252 | break; |
253 | case ENCODING_UTF32: |
254 | /* Scan for a byte order marker */ |
255 | { |
256 | Uint8 *p = (Uint8 *)src; |
257 | size_t n = srclen / 4; |
258 | while ( n ) { |
259 | if ( p[0] == 0xFF && p[1] == 0xFE && |
260 | p[2] == 0x00 && p[3] == 0x00 ) { |
261 | cd->src_fmt = ENCODING_UTF32BE; |
262 | break; |
263 | } else if ( p[0] == 0x00 && p[1] == 0x00 && |
264 | p[2] == 0xFE && p[3] == 0xFF ) { |
265 | cd->src_fmt = ENCODING_UTF32LE; |
266 | break; |
267 | } |
268 | p += 4; |
269 | --n; |
270 | } |
271 | if ( n == 0 ) { |
272 | /* We can't tell, default to host order */ |
273 | cd->src_fmt = ENCODING_UTF32NATIVE; |
274 | } |
275 | } |
276 | break; |
277 | } |
278 | |
279 | switch ( cd->dst_fmt ) { |
280 | case ENCODING_UTF16: |
281 | /* Default to host order, need to add byte order marker */ |
282 | if ( dstlen < 2 ) { |
283 | return SDL_ICONV_E2BIG; |
284 | } |
285 | *(Uint16 *)dst = UNICODE_BOM; |
286 | dst += 2; |
287 | dstlen -= 2; |
288 | cd->dst_fmt = ENCODING_UTF16NATIVE; |
289 | break; |
290 | case ENCODING_UTF32: |
291 | /* Default to host order, need to add byte order marker */ |
292 | if ( dstlen < 4 ) { |
293 | return SDL_ICONV_E2BIG; |
294 | } |
295 | *(Uint32 *)dst = UNICODE_BOM; |
296 | dst += 4; |
297 | dstlen -= 4; |
298 | cd->dst_fmt = ENCODING_UTF32NATIVE; |
299 | break; |
300 | } |
301 | |
302 | total = 0; |
303 | while ( srclen > 0 ) { |
304 | /* Decode a character */ |
305 | switch ( cd->src_fmt ) { |
306 | case ENCODING_ASCII: |
307 | { |
308 | Uint8 *p = (Uint8 *)src; |
309 | ch = (Uint32)(p[0] & 0x7F); |
310 | ++src; |
311 | --srclen; |
312 | } |
313 | break; |
314 | case ENCODING_LATIN1: |
315 | { |
316 | Uint8 *p = (Uint8 *)src; |
317 | ch = (Uint32)p[0]; |
318 | ++src; |
319 | --srclen; |
320 | } |
321 | break; |
322 | case ENCODING_UTF8: /* RFC 3629 */ |
323 | { |
324 | Uint8 *p = (Uint8 *)src; |
325 | size_t left = 0; |
326 | SDL_bool overlong = SDL_FALSE; |
327 | if ( p[0] >= 0xFC ) { |
328 | if ( (p[0] & 0xFE) != 0xFC ) { |
329 | /* Skip illegal sequences |
330 | return SDL_ICONV_EILSEQ; |
331 | */ |
332 | ch = UNKNOWN_UNICODE; |
333 | } else { |
334 | if ( p[0] == 0xFC ) { |
335 | overlong = SDL_TRUE; |
336 | } |
337 | ch = (Uint32)(p[0] & 0x01); |
338 | left = 5; |
339 | } |
340 | } else if ( p[0] >= 0xF8 ) { |
341 | if ( (p[0] & 0xFC) != 0xF8 ) { |
342 | /* Skip illegal sequences |
343 | return SDL_ICONV_EILSEQ; |
344 | */ |
345 | ch = UNKNOWN_UNICODE; |
346 | } else { |
347 | if ( p[0] == 0xF8 ) { |
348 | overlong = SDL_TRUE; |
349 | } |
350 | ch = (Uint32)(p[0] & 0x03); |
351 | left = 4; |
352 | } |
353 | } else if ( p[0] >= 0xF0 ) { |
354 | if ( (p[0] & 0xF8) != 0xF0 ) { |
355 | /* Skip illegal sequences |
356 | return SDL_ICONV_EILSEQ; |
357 | */ |
358 | ch = UNKNOWN_UNICODE; |
359 | } else { |
360 | if ( p[0] == 0xF0 ) { |
361 | overlong = SDL_TRUE; |
362 | } |
363 | ch = (Uint32)(p[0] & 0x07); |
364 | left = 3; |
365 | } |
366 | } else if ( p[0] >= 0xE0 ) { |
367 | if ( (p[0] & 0xF0) != 0xE0 ) { |
368 | /* Skip illegal sequences |
369 | return SDL_ICONV_EILSEQ; |
370 | */ |
371 | ch = UNKNOWN_UNICODE; |
372 | } else { |
373 | if ( p[0] == 0xE0 ) { |
374 | overlong = SDL_TRUE; |
375 | } |
376 | ch = (Uint32)(p[0] & 0x0F); |
377 | left = 2; |
378 | } |
379 | } else if ( p[0] >= 0xC0 ) { |
380 | if ( (p[0] & 0xE0) != 0xC0 ) { |
381 | /* Skip illegal sequences |
382 | return SDL_ICONV_EILSEQ; |
383 | */ |
384 | ch = UNKNOWN_UNICODE; |
385 | } else { |
386 | if ( (p[0] & 0xCE) == 0xC0 ) { |
387 | overlong = SDL_TRUE; |
388 | } |
389 | ch = (Uint32)(p[0] & 0x1F); |
390 | left = 1; |
391 | } |
392 | } else { |
393 | if ( (p[0] & 0x80) != 0x00 ) { |
394 | /* Skip illegal sequences |
395 | return SDL_ICONV_EILSEQ; |
396 | */ |
397 | ch = UNKNOWN_UNICODE; |
398 | } else { |
399 | ch = (Uint32)p[0]; |
400 | } |
401 | } |
402 | ++src; |
403 | --srclen; |
404 | if ( srclen < left ) { |
405 | return SDL_ICONV_EINVAL; |
406 | } |
407 | while ( left-- ) { |
408 | ++p; |
409 | if ( (p[0] & 0xC0) != 0x80 ) { |
410 | /* Skip illegal sequences |
411 | return SDL_ICONV_EILSEQ; |
412 | */ |
413 | ch = UNKNOWN_UNICODE; |
414 | break; |
415 | } |
416 | ch <<= 6; |
417 | ch |= (p[0] & 0x3F); |
418 | ++src; |
419 | --srclen; |
420 | } |
421 | if ( overlong ) { |
422 | /* Potential security risk |
423 | return SDL_ICONV_EILSEQ; |
424 | */ |
425 | ch = UNKNOWN_UNICODE; |
426 | } |
427 | if ( (ch >= 0xD800 && ch <= 0xDFFF) || |
428 | (ch == 0xFFFE || ch == 0xFFFF) || |
429 | ch > 0x10FFFF ) { |
430 | /* Skip illegal sequences |
431 | return SDL_ICONV_EILSEQ; |
432 | */ |
433 | ch = UNKNOWN_UNICODE; |
434 | } |
435 | } |
436 | break; |
437 | case ENCODING_UTF16BE: /* RFC 2781 */ |
438 | { |
439 | Uint8 *p = (Uint8 *)src; |
440 | Uint16 W1, W2; |
441 | if ( srclen < 2 ) { |
442 | return SDL_ICONV_EINVAL; |
443 | } |
444 | W1 = ((Uint16)p[0] << 8) | |
445 | (Uint16)p[1]; |
446 | src += 2; |
447 | srclen -= 2; |
448 | if ( W1 < 0xD800 || W1 > 0xDFFF ) { |
449 | ch = (Uint32)W1; |
450 | break; |
451 | } |
452 | if ( W1 > 0xDBFF ) { |
453 | /* Skip illegal sequences |
454 | return SDL_ICONV_EILSEQ; |
455 | */ |
456 | ch = UNKNOWN_UNICODE; |
457 | break; |
458 | } |
459 | if ( srclen < 2 ) { |
460 | return SDL_ICONV_EINVAL; |
461 | } |
462 | p = (Uint8 *)src; |
463 | W2 = ((Uint16)p[0] << 8) | |
464 | (Uint16)p[1]; |
465 | src += 2; |
466 | srclen -= 2; |
467 | if ( W2 < 0xDC00 || W2 > 0xDFFF ) { |
468 | /* Skip illegal sequences |
469 | return SDL_ICONV_EILSEQ; |
470 | */ |
471 | ch = UNKNOWN_UNICODE; |
472 | break; |
473 | } |
474 | ch = (((Uint32)(W1 & 0x3FF) << 10) | |
475 | (Uint32)(W2 & 0x3FF)) + 0x10000; |
476 | } |
477 | break; |
478 | case ENCODING_UTF16LE: /* RFC 2781 */ |
479 | { |
480 | Uint8 *p = (Uint8 *)src; |
481 | Uint16 W1, W2; |
482 | if ( srclen < 2 ) { |
483 | return SDL_ICONV_EINVAL; |
484 | } |
485 | W1 = ((Uint16)p[1] << 8) | |
486 | (Uint16)p[0]; |
487 | src += 2; |
488 | srclen -= 2; |
489 | if ( W1 < 0xD800 || W1 > 0xDFFF ) { |
490 | ch = (Uint32)W1; |
491 | break; |
492 | } |
493 | if ( W1 > 0xDBFF ) { |
494 | /* Skip illegal sequences |
495 | return SDL_ICONV_EILSEQ; |
496 | */ |
497 | ch = UNKNOWN_UNICODE; |
498 | break; |
499 | } |
500 | if ( srclen < 2 ) { |
501 | return SDL_ICONV_EINVAL; |
502 | } |
503 | p = (Uint8 *)src; |
504 | W2 = ((Uint16)p[1] << 8) | |
505 | (Uint16)p[0]; |
506 | src += 2; |
507 | srclen -= 2; |
508 | if ( W2 < 0xDC00 || W2 > 0xDFFF ) { |
509 | /* Skip illegal sequences |
510 | return SDL_ICONV_EILSEQ; |
511 | */ |
512 | ch = UNKNOWN_UNICODE; |
513 | break; |
514 | } |
515 | ch = (((Uint32)(W1 & 0x3FF) << 10) | |
516 | (Uint32)(W2 & 0x3FF)) + 0x10000; |
517 | } |
518 | break; |
519 | case ENCODING_UTF32BE: |
520 | { |
521 | Uint8 *p = (Uint8 *)src; |
522 | if ( srclen < 4 ) { |
523 | return SDL_ICONV_EINVAL; |
524 | } |
525 | ch = ((Uint32)p[0] << 24) | |
526 | ((Uint32)p[1] << 16) | |
527 | ((Uint32)p[2] << 8) | |
528 | (Uint32)p[3]; |
529 | src += 4; |
530 | srclen -= 4; |
531 | } |
532 | break; |
533 | case ENCODING_UTF32LE: |
534 | { |
535 | Uint8 *p = (Uint8 *)src; |
536 | if ( srclen < 4 ) { |
537 | return SDL_ICONV_EINVAL; |
538 | } |
539 | ch = ((Uint32)p[3] << 24) | |
540 | ((Uint32)p[2] << 16) | |
541 | ((Uint32)p[1] << 8) | |
542 | (Uint32)p[0]; |
543 | src += 4; |
544 | srclen -= 4; |
545 | } |
546 | break; |
547 | case ENCODING_UCS2: |
548 | { |
549 | Uint16 *p = (Uint16 *)src; |
550 | if ( srclen < 2 ) { |
551 | return SDL_ICONV_EINVAL; |
552 | } |
553 | ch = *p; |
554 | src += 2; |
555 | srclen -= 2; |
556 | } |
557 | break; |
558 | case ENCODING_UCS4: |
559 | { |
560 | Uint32 *p = (Uint32 *)src; |
561 | if ( srclen < 4 ) { |
562 | return SDL_ICONV_EINVAL; |
563 | } |
564 | ch = *p; |
565 | src += 4; |
566 | srclen -= 4; |
567 | } |
568 | break; |
569 | } |
570 | |
571 | /* Encode a character */ |
572 | switch ( cd->dst_fmt ) { |
573 | case ENCODING_ASCII: |
574 | { |
575 | Uint8 *p = (Uint8 *)dst; |
576 | if ( dstlen < 1 ) { |
577 | return SDL_ICONV_E2BIG; |
578 | } |
579 | if ( ch > 0x7F ) { |
580 | *p = UNKNOWN_ASCII; |
581 | } else { |
582 | *p = (Uint8)ch; |
583 | } |
584 | ++dst; |
585 | --dstlen; |
586 | } |
587 | break; |
588 | case ENCODING_LATIN1: |
589 | { |
590 | Uint8 *p = (Uint8 *)dst; |
591 | if ( dstlen < 1 ) { |
592 | return SDL_ICONV_E2BIG; |
593 | } |
594 | if ( ch > 0xFF ) { |
595 | *p = UNKNOWN_ASCII; |
596 | } else { |
597 | *p = (Uint8)ch; |
598 | } |
599 | ++dst; |
600 | --dstlen; |
601 | } |
602 | break; |
603 | case ENCODING_UTF8: /* RFC 3629 */ |
604 | { |
605 | Uint8 *p = (Uint8 *)dst; |
606 | if ( ch > 0x10FFFF ) { |
607 | ch = UNKNOWN_UNICODE; |
608 | } |
609 | if ( ch <= 0x7F ) { |
610 | if ( dstlen < 1 ) { |
611 | return SDL_ICONV_E2BIG; |
612 | } |
613 | *p = (Uint8)ch; |
614 | ++dst; |
615 | --dstlen; |
616 | } else if ( ch <= 0x7FF ) { |
617 | if ( dstlen < 2 ) { |
618 | return SDL_ICONV_E2BIG; |
619 | } |
620 | p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F); |
621 | p[1] = 0x80 | (Uint8)(ch & 0x3F); |
622 | dst += 2; |
623 | dstlen -= 2; |
624 | } else if ( ch <= 0xFFFF ) { |
625 | if ( dstlen < 3 ) { |
626 | return SDL_ICONV_E2BIG; |
627 | } |
628 | p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F); |
629 | p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F); |
630 | p[2] = 0x80 | (Uint8)(ch & 0x3F); |
631 | dst += 3; |
632 | dstlen -= 3; |
633 | } else if ( ch <= 0x1FFFFF ) { |
634 | if ( dstlen < 4 ) { |
635 | return SDL_ICONV_E2BIG; |
636 | } |
637 | p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07); |
638 | p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F); |
639 | p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F); |
640 | p[3] = 0x80 | (Uint8)(ch & 0x3F); |
641 | dst += 4; |
642 | dstlen -= 4; |
643 | } else if ( ch <= 0x3FFFFFF ) { |
644 | if ( dstlen < 5 ) { |
645 | return SDL_ICONV_E2BIG; |
646 | } |
647 | p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03); |
648 | p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F); |
649 | p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F); |
650 | p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F); |
651 | p[4] = 0x80 | (Uint8)(ch & 0x3F); |
652 | dst += 5; |
653 | dstlen -= 5; |
654 | } else { |
655 | if ( dstlen < 6 ) { |
656 | return SDL_ICONV_E2BIG; |
657 | } |
658 | p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01); |
659 | p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F); |
660 | p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F); |
661 | p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F); |
662 | p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F); |
663 | p[5] = 0x80 | (Uint8)(ch & 0x3F); |
664 | dst += 6; |
665 | dstlen -= 6; |
666 | } |
667 | } |
668 | break; |
669 | case ENCODING_UTF16BE: /* RFC 2781 */ |
670 | { |
671 | Uint8 *p = (Uint8 *)dst; |
672 | if ( ch > 0x10FFFF ) { |
673 | ch = UNKNOWN_UNICODE; |
674 | } |
675 | if ( ch < 0x10000 ) { |
676 | if ( dstlen < 2 ) { |
677 | return SDL_ICONV_E2BIG; |
678 | } |
679 | p[0] = (Uint8)(ch >> 8); |
680 | p[1] = (Uint8)ch; |
681 | dst += 2; |
682 | dstlen -= 2; |
683 | } else { |
684 | Uint16 W1, W2; |
685 | if ( dstlen < 4 ) { |
686 | return SDL_ICONV_E2BIG; |
687 | } |
688 | ch = ch - 0x10000; |
689 | W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); |
690 | W2 = 0xDC00 | (Uint16)(ch & 0x3FF); |
691 | p[0] = (Uint8)(W1 >> 8); |
692 | p[1] = (Uint8)W1; |
693 | p[2] = (Uint8)(W2 >> 8); |
694 | p[3] = (Uint8)W2; |
695 | dst += 4; |
696 | dstlen -= 4; |
697 | } |
698 | } |
699 | break; |
700 | case ENCODING_UTF16LE: /* RFC 2781 */ |
701 | { |
702 | Uint8 *p = (Uint8 *)dst; |
703 | if ( ch > 0x10FFFF ) { |
704 | ch = UNKNOWN_UNICODE; |
705 | } |
706 | if ( ch < 0x10000 ) { |
707 | if ( dstlen < 2 ) { |
708 | return SDL_ICONV_E2BIG; |
709 | } |
710 | p[1] = (Uint8)(ch >> 8); |
711 | p[0] = (Uint8)ch; |
712 | dst += 2; |
713 | dstlen -= 2; |
714 | } else { |
715 | Uint16 W1, W2; |
716 | if ( dstlen < 4 ) { |
717 | return SDL_ICONV_E2BIG; |
718 | } |
719 | ch = ch - 0x10000; |
720 | W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); |
721 | W2 = 0xDC00 | (Uint16)(ch & 0x3FF); |
722 | p[1] = (Uint8)(W1 >> 8); |
723 | p[0] = (Uint8)W1; |
724 | p[3] = (Uint8)(W2 >> 8); |
725 | p[2] = (Uint8)W2; |
726 | dst += 4; |
727 | dstlen -= 4; |
728 | } |
729 | } |
730 | break; |
731 | case ENCODING_UTF32BE: |
732 | { |
733 | Uint8 *p = (Uint8 *)dst; |
734 | if ( ch > 0x10FFFF ) { |
735 | ch = UNKNOWN_UNICODE; |
736 | } |
737 | if ( dstlen < 4 ) { |
738 | return SDL_ICONV_E2BIG; |
739 | } |
740 | p[0] = (Uint8)(ch >> 24); |
741 | p[1] = (Uint8)(ch >> 16); |
742 | p[2] = (Uint8)(ch >> 8); |
743 | p[3] = (Uint8)ch; |
744 | dst += 4; |
745 | dstlen -= 4; |
746 | } |
747 | break; |
748 | case ENCODING_UTF32LE: |
749 | { |
750 | Uint8 *p = (Uint8 *)dst; |
751 | if ( ch > 0x10FFFF ) { |
752 | ch = UNKNOWN_UNICODE; |
753 | } |
754 | if ( dstlen < 4 ) { |
755 | return SDL_ICONV_E2BIG; |
756 | } |
757 | p[3] = (Uint8)(ch >> 24); |
758 | p[2] = (Uint8)(ch >> 16); |
759 | p[1] = (Uint8)(ch >> 8); |
760 | p[0] = (Uint8)ch; |
761 | dst += 4; |
762 | dstlen -= 4; |
763 | } |
764 | break; |
765 | case ENCODING_UCS2: |
766 | { |
767 | Uint16 *p = (Uint16 *)dst; |
768 | if ( ch > 0xFFFF ) { |
769 | ch = UNKNOWN_UNICODE; |
770 | } |
771 | if ( dstlen < 2 ) { |
772 | return SDL_ICONV_E2BIG; |
773 | } |
774 | *p = (Uint16)ch; |
775 | dst += 2; |
776 | dstlen -= 2; |
777 | } |
778 | break; |
779 | case ENCODING_UCS4: |
780 | { |
781 | Uint32 *p = (Uint32 *)dst; |
782 | if ( ch > 0x7FFFFFFF ) { |
783 | ch = UNKNOWN_UNICODE; |
784 | } |
785 | if ( dstlen < 4 ) { |
786 | return SDL_ICONV_E2BIG; |
787 | } |
788 | *p = ch; |
789 | dst += 4; |
790 | dstlen -= 4; |
791 | } |
792 | break; |
793 | } |
794 | |
795 | /* Update state */ |
796 | *inbuf = src; |
797 | *inbytesleft = srclen; |
798 | *outbuf = dst; |
799 | *outbytesleft = dstlen; |
800 | ++total; |
801 | } |
802 | return total; |
803 | } |
804 | |
805 | int SDL_iconv_close(SDL_iconv_t cd) |
806 | { |
807 | if ( cd && cd != (SDL_iconv_t)-1 ) { |
808 | SDL_free(cd); |
809 | } |
810 | return 0; |
811 | } |
812 | |
813 | #endif /* !HAVE_ICONV */ |
814 | |
815 | char *SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft) |
816 | { |
817 | SDL_iconv_t cd; |
818 | char *string; |
819 | size_t stringsize; |
820 | char *outbuf; |
821 | size_t outbytesleft; |
822 | size_t retCode = 0; |
823 | |
824 | cd = SDL_iconv_open(tocode, fromcode); |
825 | if ( cd == (SDL_iconv_t)-1 ) { |
826 | /* See if we can recover here (fixes iconv on Solaris 11) */ |
827 | if ( !tocode || !*tocode ) { |
828 | tocode = "UTF-8"; |
829 | } |
830 | if ( !fromcode || !*fromcode ) { |
831 | fromcode = "UTF-8"; |
832 | } |
833 | cd = SDL_iconv_open(tocode, fromcode); |
834 | } |
835 | if ( cd == (SDL_iconv_t)-1 ) { |
836 | return NULL; |
837 | } |
838 | |
839 | stringsize = inbytesleft > 4 ? inbytesleft : 4; |
840 | string = SDL_malloc(stringsize); |
841 | if ( !string ) { |
842 | SDL_iconv_close(cd); |
843 | return NULL; |
844 | } |
845 | outbuf = string; |
846 | outbytesleft = stringsize; |
847 | SDL_memset(outbuf, 0, 4); |
848 | |
849 | while ( inbytesleft > 0 ) { |
850 | retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); |
851 | switch (retCode) { |
852 | case SDL_ICONV_E2BIG: |
853 | { |
854 | char *oldstring = string; |
855 | stringsize *= 2; |
856 | string = SDL_realloc(string, stringsize); |
857 | if ( !string ) { |
858 | SDL_iconv_close(cd); |
859 | return NULL; |
860 | } |
861 | outbuf = string + (outbuf - oldstring); |
862 | outbytesleft = stringsize - (outbuf - string); |
863 | SDL_memset(outbuf, 0, 4); |
864 | } |
865 | break; |
866 | case SDL_ICONV_EILSEQ: |
867 | /* Try skipping some input data - not perfect, but... */ |
868 | ++inbuf; |
869 | --inbytesleft; |
870 | break; |
871 | case SDL_ICONV_EINVAL: |
872 | case SDL_ICONV_ERROR: |
873 | /* We can't continue... */ |
874 | inbytesleft = 0; |
875 | break; |
876 | } |
877 | } |
878 | SDL_iconv_close(cd); |
879 | |
880 | return string; |
881 | } |