Merge pull request #389 from notaz/master
[pcsx_rearmed.git] / deps / flac-1.3.2 / src / share / utf8 / iconvert.c
CommitLineData
ce188d4d 1/*
2 * Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19#ifdef HAVE_CONFIG_H
20# include <config.h>
21#endif
22
23#if !defined _WIN32 && defined HAVE_ICONV
24
25#include <assert.h>
26#include <errno.h>
27#include <iconv.h>
28#include <stdlib.h>
29#include <string.h>
30
31#include "iconvert.h"
32#include "share/alloc.h"
33#include "share/safe_str.h"
34
35/*
36 * Convert data from one encoding to another. Return:
37 *
38 * -2 : memory allocation failed
39 * -1 : unknown encoding
40 * 0 : data was converted exactly
41 * 1 : data was converted inexactly
42 * 2 : data was invalid (but still converted)
43 *
44 * We convert in two steps, via UTF-8, as this is the only
45 * reliable way of distinguishing between invalid input
46 * and valid input which iconv refuses to transliterate.
47 * We convert from UTF-8 twice, because we have no way of
48 * knowing whether the conversion was exact if iconv returns
49 * E2BIG (due to a bug in the specification of iconv).
50 * An alternative approach is to assume that the output of
51 * iconv is never more than 4 times as long as the input,
52 * but I prefer to avoid that assumption if possible.
53 */
54
55int iconvert(const char *fromcode, const char *tocode,
56 const char *from, size_t fromlen,
57 char **to, size_t *tolen)
58{
59 int ret = 0;
60 iconv_t cd1, cd2;
61 char *ib;
62 char *ob;
63 char *utfbuf = 0, *outbuf, *newbuf;
64 size_t utflen, outlen, ibl, obl, k;
65 char tbuf[2048];
66
67 cd1 = iconv_open("UTF-8", fromcode);
68 if (cd1 == (iconv_t)(-1))
69 return -1;
70
71 cd2 = (iconv_t)(-1);
72 /* Don't use strcasecmp() as it's locale-dependent. */
73 if (!strchr("Uu", tocode[0]) ||
74 !strchr("Tt", tocode[1]) ||
75 !strchr("Ff", tocode[2]) ||
76 tocode[3] != '-' ||
77 tocode[4] != '8' ||
78 tocode[5] != '\0') {
79 char *tocode1;
80 size_t dest_len = strlen(tocode) + 11;
81 /*
82 * Try using this non-standard feature of glibc and libiconv.
83 * This is deliberately not a config option as people often
84 * change their iconv library without rebuilding applications.
85 */
86 tocode1 = safe_malloc_(dest_len);
87 if (!tocode1)
88 goto fail;
89
90 safe_strncpy(tocode1, tocode, dest_len);
91 safe_strncat(tocode1, "//TRANSLIT", dest_len);
92 cd2 = iconv_open(tocode1, "UTF-8");
93 free(tocode1);
94
95 if (cd2 == (iconv_t)(-1))
96 cd2 = iconv_open(tocode, fromcode);
97
98 if (cd2 == (iconv_t)(-1)) {
99 iconv_close(cd1);
100 return -1;
101 }
102 }
103
104 utflen = 1; /*fromlen * 2 + 1; XXX */
105 utfbuf = malloc(utflen);
106 if (!utfbuf)
107 goto fail;
108
109 /* Convert to UTF-8 */
110 ib = (char *)from;
111 ibl = fromlen;
112 ob = utfbuf;
113 obl = utflen;
114 for (;;) {
115 k = iconv(cd1, &ib, &ibl, &ob, &obl);
116 assert((!k && !ibl) ||
117 (k == (size_t)(-1) && errno == E2BIG && ibl && obl < 6) ||
118 (k == (size_t)(-1) &&
119 (errno == EILSEQ || errno == EINVAL) && ibl));
120 if (!ibl)
121 break;
122 if (obl < 6) {
123 /* Enlarge the buffer */
124 if(utflen*2 < utflen) /* overflow check */
125 goto fail;
126 utflen *= 2;
127 newbuf = realloc(utfbuf, utflen);
128 if (!newbuf)
129 goto fail;
130 ob = (ob - utfbuf) + newbuf;
131 obl = utflen - (ob - newbuf);
132 utfbuf = newbuf;
133 }
134 else {
135 /* Invalid input */
136 ib++, ibl--;
137 *ob++ = '#', obl--;
138 ret = 2;
139 iconv(cd1, 0, 0, 0, 0);
140 }
141 }
142
143 if (cd2 == (iconv_t)(-1)) {
144 /* The target encoding was UTF-8 */
145 if (tolen)
146 *tolen = ob - utfbuf;
147 if (!to) {
148 free(utfbuf);
149 iconv_close(cd1);
150 return ret;
151 }
152 newbuf = safe_realloc_add_2op_(utfbuf, (ob - utfbuf), /*+*/1);
153 if (!newbuf)
154 goto fail;
155 ob = (ob - utfbuf) + newbuf;
156 *ob = '\0';
157 *to = newbuf;
158 iconv_close(cd1);
159 return ret;
160 }
161
162 /* Truncate the buffer to be tidy */
163 utflen = ob - utfbuf;
164 newbuf = realloc(utfbuf, utflen);
165 if (!newbuf)
166 goto fail;
167 utfbuf = newbuf;
168
169 /* Convert from UTF-8 to discover how long the output is */
170 outlen = 0;
171 ib = utfbuf;
172 ibl = utflen;
173 while (ibl) {
174 ob = tbuf;
175 obl = sizeof(tbuf);
176 k = iconv(cd2, &ib, &ibl, &ob, &obl);
177 assert((k != (size_t)(-1) && !ibl) ||
178 (k == (size_t)(-1) && errno == E2BIG && ibl) ||
179 (k == (size_t)(-1) && errno == EILSEQ && ibl));
180 if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) {
181 /* Replace one character */
182 char *tb = "?";
183 size_t tbl = 1;
184
185 outlen += ob - tbuf;
186 ob = tbuf;
187 obl = sizeof(tbuf);
188 k = iconv(cd2, &tb, &tbl, &ob, &obl);
189 assert((!k && !tbl) ||
190 (k == (size_t)(-1) && errno == EILSEQ && tbl));
191 for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
192 ;
193 }
194 outlen += ob - tbuf;
195 }
196 ob = tbuf;
197 obl = sizeof(tbuf);
198 k = iconv(cd2, 0, 0, &ob, &obl);
199 assert(!k);
200 outlen += ob - tbuf;
201
202 /* Convert from UTF-8 for real */
203 outbuf = safe_malloc_add_2op_(outlen, /*+*/1);
204 if (!outbuf)
205 goto fail;
206 ib = utfbuf;
207 ibl = utflen;
208 ob = outbuf;
209 obl = outlen;
210 while (ibl) {
211 k = iconv(cd2, &ib, &ibl, &ob, &obl);
212 assert((k != (size_t)(-1) && !ibl) ||
213 (k == (size_t)(-1) && errno == EILSEQ && ibl));
214 if (k && !ret)
215 ret = 1;
216 if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) {
217 /* Replace one character */
218 char *tb = "?";
219 size_t tbl = 1;
220
221 k = iconv(cd2, &tb, &tbl, &ob, &obl);
222 assert((!k && !tbl) ||
223 (k == (size_t)(-1) && errno == EILSEQ && tbl));
224 for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
225 ;
226 }
227 }
228 k = iconv(cd2, 0, 0, &ob, &obl);
229 assert(!k);
230 assert(!obl);
231 *ob = '\0';
232
233 free(utfbuf);
234 iconv_close(cd1);
235 iconv_close(cd2);
236 if (tolen)
237 *tolen = outlen;
238 if (!to) {
239 free(outbuf);
240 return ret;
241 }
242 *to = outbuf;
243 return ret;
244
245 fail:
246 if(0 != utfbuf)
247 free(utfbuf);
248 iconv_close(cd1);
249 if (cd2 != (iconv_t)(-1))
250 iconv_close(cd2);
251 return -2;
252}
253
254#endif /* HAVE_ICONV */