SDL-1.2.14
[sdl_omap.git] / src / stdlib / SDL_iconv.c
1 /*
2     SDL - Simple DirectMedia Layer
3     Copyright (C) 1997-2009 Sam Lantinga
4
5     This library is free software; you can redistribute it and/or
6     modify it under the terms of the GNU Lesser General Public
7     License as published by the Free Software Foundation; either
8     version 2.1 of the License, or (at your option) any later version.
9
10     This library is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13     Lesser General Public License for more details.
14
15     You should have received a copy of the GNU Lesser General Public
16     License along with this library; if not, write to the Free Software
17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
19     Sam Lantinga
20     slouken@libsdl.org
21 */
22 #include "SDL_config.h"
23
24 /* This file contains portable iconv functions for SDL */
25
26 #include "SDL_stdinc.h"
27 #include "SDL_endian.h"
28
29 #ifdef HAVE_ICONV
30
31 /* Depending on which standard the iconv() was implemented with,
32    iconv() may or may not use const char ** for the inbuf param.
33    If we get this wrong, it's just a warning, so no big deal.
34 */
35 #if defined(_XGP6) || \
36     defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2))
37 #define ICONV_INBUF_NONCONST
38 #endif
39
40 #include <errno.h>
41
42 size_t SDL_iconv(SDL_iconv_t cd,
43                  const char **inbuf, size_t *inbytesleft,
44                  char **outbuf, size_t *outbytesleft)
45 {
46         size_t retCode;
47 #ifdef ICONV_INBUF_NONCONST
48         retCode = iconv(cd, (char **)inbuf, inbytesleft, outbuf, outbytesleft);
49 #else
50         retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
51 #endif
52         if ( retCode == (size_t)-1 ) {
53                 switch(errno) {
54                     case E2BIG:
55                         return SDL_ICONV_E2BIG;
56                     case EILSEQ:
57                         return SDL_ICONV_EILSEQ;
58                     case EINVAL:
59                         return SDL_ICONV_EINVAL;
60                     default:
61                         return SDL_ICONV_ERROR;
62                 }
63         }
64         return retCode;
65 }
66
67 #else
68
69 /* Lots of useful information on Unicode at:
70         http://www.cl.cam.ac.uk/~mgk25/unicode.html
71 */
72
73 #define UNICODE_BOM     0xFEFF
74
75 #define UNKNOWN_ASCII   '?'
76 #define UNKNOWN_UNICODE 0xFFFD
77
78 enum {
79         ENCODING_UNKNOWN,
80         ENCODING_ASCII,
81         ENCODING_LATIN1,
82         ENCODING_UTF8,
83         ENCODING_UTF16,         /* Needs byte order marker */
84         ENCODING_UTF16BE,
85         ENCODING_UTF16LE,
86         ENCODING_UTF32,         /* Needs byte order marker */
87         ENCODING_UTF32BE,
88         ENCODING_UTF32LE,
89         ENCODING_UCS2,          /* Native byte order assumed */
90         ENCODING_UCS4,          /* Native byte order assumed */
91 };
92 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
93 #define ENCODING_UTF16NATIVE    ENCODING_UTF16BE
94 #define ENCODING_UTF32NATIVE    ENCODING_UTF32BE
95 #else
96 #define ENCODING_UTF16NATIVE    ENCODING_UTF16LE
97 #define ENCODING_UTF32NATIVE    ENCODING_UTF32LE
98 #endif
99
100 struct _SDL_iconv_t
101 {
102         int src_fmt;
103         int dst_fmt;
104 };
105
106 static struct {
107         const char *name;
108         int format;
109 } encodings[] = {
110         { "ASCII",      ENCODING_ASCII },
111         { "US-ASCII",   ENCODING_ASCII },
112         { "8859-1",     ENCODING_LATIN1 },
113         { "ISO-8859-1", ENCODING_LATIN1 },
114         { "UTF8",       ENCODING_UTF8 },
115         { "UTF-8",      ENCODING_UTF8 },
116         { "UTF16",      ENCODING_UTF16 },
117         { "UTF-16",     ENCODING_UTF16 },
118         { "UTF16BE",    ENCODING_UTF16BE },
119         { "UTF-16BE",   ENCODING_UTF16BE },
120         { "UTF16LE",    ENCODING_UTF16LE },
121         { "UTF-16LE",   ENCODING_UTF16LE },
122         { "UTF32",      ENCODING_UTF32 },
123         { "UTF-32",     ENCODING_UTF32 },
124         { "UTF32BE",    ENCODING_UTF32BE },
125         { "UTF-32BE",   ENCODING_UTF32BE },
126         { "UTF32LE",    ENCODING_UTF32LE },
127         { "UTF-32LE",   ENCODING_UTF32LE },
128         { "UCS2",       ENCODING_UCS2 },
129         { "UCS-2",      ENCODING_UCS2 },
130         { "UCS4",       ENCODING_UCS4 },
131         { "UCS-4",      ENCODING_UCS4 },
132 };
133
134 static const char *getlocale(char *buffer, size_t bufsize)
135 {
136         const char *lang;
137         char *ptr;
138
139         lang = SDL_getenv("LC_ALL");
140         if ( !lang ) {
141                 lang = SDL_getenv("LC_CTYPE");
142         }
143         if ( !lang ) {
144                 lang = SDL_getenv("LC_MESSAGES");
145         }
146         if ( !lang ) {
147                 lang = SDL_getenv("LANG");
148         }
149         if ( !lang || !*lang || SDL_strcmp(lang, "C") == 0 ) {
150                 lang = "ASCII";
151         }
152
153         /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
154         ptr = SDL_strchr(lang, '.');
155         if (ptr != NULL) {
156                 lang = ptr + 1;
157         }
158
159         SDL_strlcpy(buffer, lang, bufsize);
160         ptr = SDL_strchr(buffer, '@');
161         if (ptr != NULL) {
162                 *ptr = '\0';  /* chop end of string. */
163         }
164
165         return buffer;
166 }
167
168 SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
169 {
170         int src_fmt = ENCODING_UNKNOWN;
171         int dst_fmt = ENCODING_UNKNOWN;
172         int i;
173         char fromcode_buffer[64];
174         char tocode_buffer[64];
175
176         if ( !fromcode || !*fromcode ) {
177                 fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
178         }
179         if ( !tocode || !*tocode ) {
180                 tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
181         }
182         for ( i = 0; i < SDL_arraysize(encodings); ++i ) {
183                 if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) {
184                         src_fmt = encodings[i].format;
185                         if ( dst_fmt != ENCODING_UNKNOWN ) {
186                                 break;
187                         }
188                 }
189                 if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) {
190                         dst_fmt = encodings[i].format;
191                         if ( src_fmt != ENCODING_UNKNOWN ) {
192                                 break;
193                         }
194                 }
195         }
196         if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) {
197                 SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd));
198                 if ( cd ) {
199                         cd->src_fmt = src_fmt;
200                         cd->dst_fmt = dst_fmt;
201                         return cd;
202                 }
203         }
204         return (SDL_iconv_t)-1;
205 }
206
207 size_t SDL_iconv(SDL_iconv_t cd,
208                  const char **inbuf, size_t *inbytesleft,
209                  char **outbuf, size_t *outbytesleft)
210 {
211         /* For simplicity, we'll convert everything to and from UCS-4 */
212         const char *src;
213         char *dst;
214         size_t srclen, dstlen;
215         Uint32 ch = 0;
216         size_t total;
217
218         if ( !inbuf || !*inbuf ) {
219                 /* Reset the context */
220                 return 0;
221         }
222         if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) {
223                 return SDL_ICONV_E2BIG;
224         }
225         src = *inbuf;
226         srclen = (inbytesleft ? *inbytesleft : 0);
227         dst = *outbuf;
228         dstlen = *outbytesleft;
229
230         switch ( cd->src_fmt ) {
231             case ENCODING_UTF16:
232                 /* Scan for a byte order marker */
233                 {
234                         Uint8 *p = (Uint8 *)src;
235                         size_t n = srclen / 2;
236                         while ( n ) {
237                                 if ( p[0] == 0xFF && p[1] == 0xFE ) {
238                                         cd->src_fmt = ENCODING_UTF16BE;
239                                         break;
240                                 } else if ( p[0] == 0xFE && p[1] == 0xFF ) {
241                                         cd->src_fmt = ENCODING_UTF16LE;
242                                         break;
243                                 }
244                                 p += 2;
245                                 --n;
246                         }
247                         if ( n == 0 ) {
248                                 /* We can't tell, default to host order */
249                                 cd->src_fmt = ENCODING_UTF16NATIVE;
250                         }
251                 }
252                 break;
253             case ENCODING_UTF32:
254                 /* Scan for a byte order marker */
255                 {
256                         Uint8 *p = (Uint8 *)src;
257                         size_t n = srclen / 4;
258                         while ( n ) {
259                                 if ( p[0] == 0xFF && p[1] == 0xFE &&
260                                      p[2] == 0x00 && p[3] == 0x00 ) {
261                                         cd->src_fmt = ENCODING_UTF32BE;
262                                         break;
263                                 } else if ( p[0] == 0x00 && p[1] == 0x00 &&
264                                             p[2] == 0xFE && p[3] == 0xFF ) {
265                                         cd->src_fmt = ENCODING_UTF32LE;
266                                         break;
267                                 }
268                                 p += 4;
269                                 --n;
270                         }
271                         if ( n == 0 ) {
272                                 /* We can't tell, default to host order */
273                                 cd->src_fmt = ENCODING_UTF32NATIVE;
274                         }
275                 }
276                 break;
277         }
278
279         switch ( cd->dst_fmt ) {
280             case ENCODING_UTF16:
281                 /* Default to host order, need to add byte order marker */
282                 if ( dstlen < 2 ) {
283                         return SDL_ICONV_E2BIG;
284                 }
285                 *(Uint16 *)dst = UNICODE_BOM;
286                 dst += 2;
287                 dstlen -= 2;
288                 cd->dst_fmt = ENCODING_UTF16NATIVE;
289                 break;
290             case ENCODING_UTF32:
291                 /* Default to host order, need to add byte order marker */
292                 if ( dstlen < 4 ) {
293                         return SDL_ICONV_E2BIG;
294                 }
295                 *(Uint32 *)dst = UNICODE_BOM;
296                 dst += 4;
297                 dstlen -= 4;
298                 cd->dst_fmt = ENCODING_UTF32NATIVE;
299                 break;
300         }
301
302         total = 0;
303         while ( srclen > 0 ) {
304                 /* Decode a character */
305                 switch ( cd->src_fmt ) {
306                     case ENCODING_ASCII:
307                         {
308                                 Uint8 *p = (Uint8 *)src;
309                                 ch = (Uint32)(p[0] & 0x7F);
310                                 ++src;
311                                 --srclen;
312                         }
313                         break;
314                     case ENCODING_LATIN1:
315                         {
316                                 Uint8 *p = (Uint8 *)src;
317                                 ch = (Uint32)p[0];
318                                 ++src;
319                                 --srclen;
320                         }
321                         break;
322                     case ENCODING_UTF8: /* RFC 3629 */
323                         {
324                                 Uint8 *p = (Uint8 *)src;
325                                 size_t left = 0;
326                                 SDL_bool overlong = SDL_FALSE;
327                                 if ( p[0] >= 0xFC ) {
328                                         if ( (p[0] & 0xFE) != 0xFC ) {
329                                                 /* Skip illegal sequences
330                                                 return SDL_ICONV_EILSEQ;
331                                                 */
332                                                 ch = UNKNOWN_UNICODE;
333                                         } else {
334                                                 if ( p[0] == 0xFC ) {
335                                                         overlong = SDL_TRUE;
336                                                 }
337                                                 ch = (Uint32)(p[0] & 0x01);
338                                                 left = 5;
339                                         }
340                                 } else if ( p[0] >= 0xF8 ) {
341                                         if ( (p[0] & 0xFC) != 0xF8 ) {
342                                                 /* Skip illegal sequences
343                                                 return SDL_ICONV_EILSEQ;
344                                                 */
345                                                 ch = UNKNOWN_UNICODE;
346                                         } else {
347                                                 if ( p[0] == 0xF8 ) {
348                                                         overlong = SDL_TRUE;
349                                                 }
350                                                 ch = (Uint32)(p[0] & 0x03);
351                                                 left = 4;
352                                         }
353                                 } else if ( p[0] >= 0xF0 ) {
354                                         if ( (p[0] & 0xF8) != 0xF0 ) {
355                                                 /* Skip illegal sequences
356                                                 return SDL_ICONV_EILSEQ;
357                                                 */
358                                                 ch = UNKNOWN_UNICODE;
359                                         } else {
360                                                 if ( p[0] == 0xF0 ) {
361                                                         overlong = SDL_TRUE;
362                                                 }
363                                                 ch = (Uint32)(p[0] & 0x07);
364                                                 left = 3;
365                                         }
366                                 } else if ( p[0] >= 0xE0 ) {
367                                         if ( (p[0] & 0xF0) != 0xE0 ) {
368                                                 /* Skip illegal sequences
369                                                 return SDL_ICONV_EILSEQ;
370                                                 */
371                                                 ch = UNKNOWN_UNICODE;
372                                         } else {
373                                                 if ( p[0] == 0xE0 ) {
374                                                         overlong = SDL_TRUE;
375                                                 }
376                                                 ch = (Uint32)(p[0] & 0x0F);
377                                                 left = 2;
378                                         }
379                                 } else if ( p[0] >= 0xC0 ) {
380                                         if ( (p[0] & 0xE0) != 0xC0 ) {
381                                                 /* Skip illegal sequences
382                                                 return SDL_ICONV_EILSEQ;
383                                                 */
384                                                 ch = UNKNOWN_UNICODE;
385                                         } else {
386                                                 if ( (p[0] & 0xCE) == 0xC0 ) {
387                                                         overlong = SDL_TRUE;
388                                                 }
389                                                 ch = (Uint32)(p[0] & 0x1F);
390                                                 left = 1;
391                                         }
392                                 } else {
393                                         if ( (p[0] & 0x80) != 0x00 ) {
394                                                 /* Skip illegal sequences
395                                                 return SDL_ICONV_EILSEQ;
396                                                 */
397                                                 ch = UNKNOWN_UNICODE;
398                                         } else {
399                                                 ch = (Uint32)p[0];
400                                         }
401                                 }
402                                 ++src;
403                                 --srclen;
404                                 if ( srclen < left ) {
405                                         return SDL_ICONV_EINVAL;
406                                 }
407                                 while ( left-- ) {
408                                         ++p;
409                                         if ( (p[0] & 0xC0) != 0x80 ) {
410                                                 /* Skip illegal sequences
411                                                 return SDL_ICONV_EILSEQ;
412                                                 */
413                                                 ch = UNKNOWN_UNICODE;
414                                                 break;
415                                         }
416                                         ch <<= 6;
417                                         ch |= (p[0] & 0x3F);
418                                         ++src;
419                                         --srclen;
420                                 }
421                                 if ( overlong ) {
422                                         /* Potential security risk
423                                         return SDL_ICONV_EILSEQ;
424                                         */
425                                         ch = UNKNOWN_UNICODE;
426                                 }
427                                 if ( (ch >= 0xD800 && ch <= 0xDFFF) ||
428                                      (ch == 0xFFFE || ch == 0xFFFF) ||
429                                      ch > 0x10FFFF ) {
430                                         /* Skip illegal sequences
431                                         return SDL_ICONV_EILSEQ;
432                                         */
433                                         ch = UNKNOWN_UNICODE;
434                                 }
435                         }
436                         break;
437                     case ENCODING_UTF16BE: /* RFC 2781 */
438                         {
439                                 Uint8 *p = (Uint8 *)src;
440                                 Uint16 W1, W2;
441                                 if ( srclen < 2 ) {
442                                         return SDL_ICONV_EINVAL;
443                                 }
444                                 W1 = ((Uint16)p[0] << 8) |
445                                       (Uint16)p[1];
446                                 src += 2;
447                                 srclen -= 2;
448                                 if ( W1 < 0xD800 || W1 > 0xDFFF ) {
449                                         ch = (Uint32)W1;
450                                         break;
451                                 }
452                                 if ( W1 > 0xDBFF ) {
453                                         /* Skip illegal sequences
454                                         return SDL_ICONV_EILSEQ;
455                                         */
456                                         ch = UNKNOWN_UNICODE;
457                                         break;
458                                 }
459                                 if ( srclen < 2 ) {
460                                         return SDL_ICONV_EINVAL;
461                                 }
462                                 p = (Uint8 *)src;
463                                 W2 = ((Uint16)p[0] << 8) |
464                                       (Uint16)p[1];
465                                 src += 2;
466                                 srclen -= 2;
467                                 if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
468                                         /* Skip illegal sequences
469                                         return SDL_ICONV_EILSEQ;
470                                         */
471                                         ch = UNKNOWN_UNICODE;
472                                         break;
473                                 }
474                                 ch = (((Uint32)(W1 & 0x3FF) << 10) |
475                                       (Uint32)(W2 & 0x3FF)) + 0x10000;
476                         }
477                         break;
478                     case ENCODING_UTF16LE: /* RFC 2781 */
479                         {
480                                 Uint8 *p = (Uint8 *)src;
481                                 Uint16 W1, W2;
482                                 if ( srclen < 2 ) {
483                                         return SDL_ICONV_EINVAL;
484                                 }
485                                 W1 = ((Uint16)p[1] << 8) |
486                                       (Uint16)p[0];
487                                 src += 2;
488                                 srclen -= 2;
489                                 if ( W1 < 0xD800 || W1 > 0xDFFF ) {
490                                         ch = (Uint32)W1;
491                                         break;
492                                 }
493                                 if ( W1 > 0xDBFF ) {
494                                         /* Skip illegal sequences
495                                         return SDL_ICONV_EILSEQ;
496                                         */
497                                         ch = UNKNOWN_UNICODE;
498                                         break;
499                                 }
500                                 if ( srclen < 2 ) {
501                                         return SDL_ICONV_EINVAL;
502                                 }
503                                 p = (Uint8 *)src;
504                                 W2 = ((Uint16)p[1] << 8) |
505                                       (Uint16)p[0];
506                                 src += 2;
507                                 srclen -= 2;
508                                 if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
509                                         /* Skip illegal sequences
510                                         return SDL_ICONV_EILSEQ;
511                                         */
512                                         ch = UNKNOWN_UNICODE;
513                                         break;
514                                 }
515                                 ch = (((Uint32)(W1 & 0x3FF) << 10) |
516                                       (Uint32)(W2 & 0x3FF)) + 0x10000;
517                         }
518                         break;
519                     case ENCODING_UTF32BE:
520                         {
521                                 Uint8 *p = (Uint8 *)src;
522                                 if ( srclen < 4 ) {
523                                         return SDL_ICONV_EINVAL;
524                                 }
525                                 ch = ((Uint32)p[0] << 24) |
526                                      ((Uint32)p[1] << 16) |
527                                      ((Uint32)p[2] << 8) |
528                                       (Uint32)p[3];
529                                 src += 4;
530                                 srclen -= 4;
531                         }
532                         break;
533                     case ENCODING_UTF32LE:
534                         {
535                                 Uint8 *p = (Uint8 *)src;
536                                 if ( srclen < 4 ) {
537                                         return SDL_ICONV_EINVAL;
538                                 }
539                                 ch = ((Uint32)p[3] << 24) |
540                                      ((Uint32)p[2] << 16) |
541                                      ((Uint32)p[1] << 8) |
542                                       (Uint32)p[0];
543                                 src += 4;
544                                 srclen -= 4;
545                         }
546                         break;
547                     case ENCODING_UCS2:
548                         {
549                                 Uint16 *p = (Uint16 *)src;
550                                 if ( srclen < 2 ) {
551                                         return SDL_ICONV_EINVAL;
552                                 }
553                                 ch = *p;
554                                 src += 2;
555                                 srclen -= 2;
556                         }
557                         break;
558                     case ENCODING_UCS4:
559                         {
560                                 Uint32 *p = (Uint32 *)src;
561                                 if ( srclen < 4 ) {
562                                         return SDL_ICONV_EINVAL;
563                                 }
564                                 ch = *p;
565                                 src += 4;
566                                 srclen -= 4;
567                         }
568                         break;
569                 }
570
571                 /* Encode a character */
572                 switch ( cd->dst_fmt ) {
573                     case ENCODING_ASCII:
574                         {
575                                 Uint8 *p = (Uint8 *)dst;
576                                 if ( dstlen < 1 ) {
577                                         return SDL_ICONV_E2BIG;
578                                 }
579                                 if ( ch > 0x7F ) {
580                                         *p = UNKNOWN_ASCII;
581                                 } else {
582                                         *p = (Uint8)ch;
583                                 }
584                                 ++dst;
585                                 --dstlen;
586                         }
587                         break;
588                     case ENCODING_LATIN1:
589                         {
590                                 Uint8 *p = (Uint8 *)dst;
591                                 if ( dstlen < 1 ) {
592                                         return SDL_ICONV_E2BIG;
593                                 }
594                                 if ( ch > 0xFF ) {
595                                         *p = UNKNOWN_ASCII;
596                                 } else {
597                                         *p = (Uint8)ch;
598                                 }
599                                 ++dst;
600                                 --dstlen;
601                         }
602                         break;
603                     case ENCODING_UTF8: /* RFC 3629 */
604                         {
605                                 Uint8 *p = (Uint8 *)dst;
606                                 if ( ch > 0x10FFFF ) {
607                                         ch = UNKNOWN_UNICODE;
608                                 }
609                                 if ( ch <= 0x7F ) {
610                                         if ( dstlen < 1 ) {
611                                                 return SDL_ICONV_E2BIG;
612                                         }
613                                         *p = (Uint8)ch;
614                                         ++dst;
615                                         --dstlen;
616                                 } else if ( ch <= 0x7FF ) {
617                                         if ( dstlen < 2 ) {
618                                                 return SDL_ICONV_E2BIG;
619                                         }
620                                         p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
621                                         p[1] = 0x80 | (Uint8)(ch & 0x3F);
622                                         dst += 2;
623                                         dstlen -= 2;
624                                 } else if ( ch <= 0xFFFF ) {
625                                         if ( dstlen < 3 ) {
626                                                 return SDL_ICONV_E2BIG;
627                                         }
628                                         p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
629                                         p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
630                                         p[2] = 0x80 | (Uint8)(ch & 0x3F);
631                                         dst += 3;
632                                         dstlen -= 3;
633                                 } else if ( ch <= 0x1FFFFF ) {
634                                         if ( dstlen < 4 ) {
635                                                 return SDL_ICONV_E2BIG;
636                                         }
637                                         p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
638                                         p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
639                                         p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
640                                         p[3] = 0x80 | (Uint8)(ch & 0x3F);
641                                         dst += 4;
642                                         dstlen -= 4;
643                                 } else if ( ch <= 0x3FFFFFF ) {
644                                         if ( dstlen < 5 ) {
645                                                 return SDL_ICONV_E2BIG;
646                                         }
647                                         p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03);
648                                         p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
649                                         p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
650                                         p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
651                                         p[4] = 0x80 | (Uint8)(ch & 0x3F);
652                                         dst += 5;
653                                         dstlen -= 5;
654                                 } else {
655                                         if ( dstlen < 6 ) {
656                                                 return SDL_ICONV_E2BIG;
657                                         }
658                                         p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01);
659                                         p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F);
660                                         p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
661                                         p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
662                                         p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
663                                         p[5] = 0x80 | (Uint8)(ch & 0x3F);
664                                         dst += 6;
665                                         dstlen -= 6;
666                                 }
667                         }
668                         break;
669                     case ENCODING_UTF16BE: /* RFC 2781 */
670                         {
671                                 Uint8 *p = (Uint8 *)dst;
672                                 if ( ch > 0x10FFFF ) {
673                                         ch = UNKNOWN_UNICODE;
674                                 }
675                                 if ( ch < 0x10000 ) {
676                                         if ( dstlen < 2 ) {
677                                                 return SDL_ICONV_E2BIG;
678                                         }
679                                         p[0] = (Uint8)(ch >> 8);
680                                         p[1] = (Uint8)ch;
681                                         dst += 2;
682                                         dstlen -= 2;
683                                 } else {
684                                         Uint16 W1, W2;
685                                         if ( dstlen < 4 ) {
686                                                 return SDL_ICONV_E2BIG;
687                                         }
688                                         ch = ch - 0x10000;
689                                         W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
690                                         W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
691                                         p[0] = (Uint8)(W1 >> 8);
692                                         p[1] = (Uint8)W1;
693                                         p[2] = (Uint8)(W2 >> 8);
694                                         p[3] = (Uint8)W2;
695                                         dst += 4;
696                                         dstlen -= 4;
697                                 }
698                         }
699                         break;
700                     case ENCODING_UTF16LE: /* RFC 2781 */
701                         {
702                                 Uint8 *p = (Uint8 *)dst;
703                                 if ( ch > 0x10FFFF ) {
704                                         ch = UNKNOWN_UNICODE;
705                                 }
706                                 if ( ch < 0x10000 ) {
707                                         if ( dstlen < 2 ) {
708                                                 return SDL_ICONV_E2BIG;
709                                         }
710                                         p[1] = (Uint8)(ch >> 8);
711                                         p[0] = (Uint8)ch;
712                                         dst += 2;
713                                         dstlen -= 2;
714                                 } else {
715                                         Uint16 W1, W2;
716                                         if ( dstlen < 4 ) {
717                                                 return SDL_ICONV_E2BIG;
718                                         }
719                                         ch = ch - 0x10000;
720                                         W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
721                                         W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
722                                         p[1] = (Uint8)(W1 >> 8);
723                                         p[0] = (Uint8)W1;
724                                         p[3] = (Uint8)(W2 >> 8);
725                                         p[2] = (Uint8)W2;
726                                         dst += 4;
727                                         dstlen -= 4;
728                                 }
729                         }
730                         break;
731                     case ENCODING_UTF32BE:
732                         {
733                                 Uint8 *p = (Uint8 *)dst;
734                                 if ( ch > 0x10FFFF ) {
735                                         ch = UNKNOWN_UNICODE;
736                                 }
737                                 if ( dstlen < 4 ) {
738                                         return SDL_ICONV_E2BIG;
739                                 }
740                                 p[0] = (Uint8)(ch >> 24);
741                                 p[1] = (Uint8)(ch >> 16);
742                                 p[2] = (Uint8)(ch >> 8);
743                                 p[3] = (Uint8)ch;
744                                 dst += 4;
745                                 dstlen -= 4;
746                         }
747                         break;
748                     case ENCODING_UTF32LE:
749                         {
750                                 Uint8 *p = (Uint8 *)dst;
751                                 if ( ch > 0x10FFFF ) {
752                                         ch = UNKNOWN_UNICODE;
753                                 }
754                                 if ( dstlen < 4 ) {
755                                         return SDL_ICONV_E2BIG;
756                                 }
757                                 p[3] = (Uint8)(ch >> 24);
758                                 p[2] = (Uint8)(ch >> 16);
759                                 p[1] = (Uint8)(ch >> 8);
760                                 p[0] = (Uint8)ch;
761                                 dst += 4;
762                                 dstlen -= 4;
763                         }
764                         break;
765                     case ENCODING_UCS2:
766                         {
767                                 Uint16 *p = (Uint16 *)dst;
768                                 if ( ch > 0xFFFF ) {
769                                         ch = UNKNOWN_UNICODE;
770                                 }
771                                 if ( dstlen < 2 ) {
772                                         return SDL_ICONV_E2BIG;
773                                 }
774                                 *p = (Uint16)ch;
775                                 dst += 2;
776                                 dstlen -= 2;
777                         }
778                         break;
779                     case ENCODING_UCS4:
780                         {
781                                 Uint32 *p = (Uint32 *)dst;
782                                 if ( ch > 0x7FFFFFFF ) {
783                                         ch = UNKNOWN_UNICODE;
784                                 }
785                                 if ( dstlen < 4 ) {
786                                         return SDL_ICONV_E2BIG;
787                                 }
788                                 *p = ch;
789                                 dst += 4;
790                                 dstlen -= 4;
791                         }
792                         break;
793                 }
794
795                 /* Update state */
796                 *inbuf = src;
797                 *inbytesleft = srclen;
798                 *outbuf = dst;
799                 *outbytesleft = dstlen;
800                 ++total;
801         }
802         return total;
803 }
804
805 int SDL_iconv_close(SDL_iconv_t cd)
806 {
807         if ( cd && cd != (SDL_iconv_t)-1 ) {
808                 SDL_free(cd);
809         }
810         return 0;
811 }
812
813 #endif /* !HAVE_ICONV */
814
815 char *SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)
816 {
817         SDL_iconv_t cd;
818         char *string;
819         size_t stringsize;
820         char *outbuf;
821         size_t outbytesleft;
822         size_t retCode = 0;
823
824         cd = SDL_iconv_open(tocode, fromcode);
825         if ( cd == (SDL_iconv_t)-1 ) {
826                 /* See if we can recover here (fixes iconv on Solaris 11) */
827                 if ( !tocode || !*tocode ) {
828                         tocode = "UTF-8";
829                 }
830                 if ( !fromcode || !*fromcode ) {
831                         fromcode = "UTF-8";
832                 }
833                 cd = SDL_iconv_open(tocode, fromcode);
834         }
835         if ( cd == (SDL_iconv_t)-1 ) {
836                 return NULL;
837         }
838
839         stringsize = inbytesleft > 4 ? inbytesleft : 4;
840         string = SDL_malloc(stringsize);
841         if ( !string ) {
842                 SDL_iconv_close(cd);
843                 return NULL;
844         }
845         outbuf = string;
846         outbytesleft = stringsize;
847         SDL_memset(outbuf, 0, 4);
848
849         while ( inbytesleft > 0 ) {
850                 retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
851                 switch (retCode) {
852                     case SDL_ICONV_E2BIG:
853                         {
854                                 char *oldstring = string;
855                                 stringsize *= 2;
856                                 string = SDL_realloc(string, stringsize);
857                                 if ( !string ) {
858                                         SDL_iconv_close(cd);
859                                         return NULL;
860                                 }
861                                 outbuf = string + (outbuf - oldstring);
862                                 outbytesleft = stringsize - (outbuf - string);
863                                 SDL_memset(outbuf, 0, 4);
864                         }
865                         break;
866                     case SDL_ICONV_EILSEQ:
867                         /* Try skipping some input data - not perfect, but... */
868                         ++inbuf;
869                         --inbytesleft;
870                         break;
871                     case SDL_ICONV_EINVAL:
872                     case SDL_ICONV_ERROR:
873                         /* We can't continue... */
874                         inbytesleft = 0;
875                         break;
876                 }
877         }
878         SDL_iconv_close(cd);
879
880         return string;
881 }