[pcsx_rearmed.git] / deps / flac-1.3.2 / src / share / utf8 / iconvert.c

/*
 * Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#ifdef HAVE_CONFIG_H
#  include <config.h>
#endif

#if !defined _WIN32 && defined HAVE_ICONV

#include <assert.h>
#include <errno.h>
#include <iconv.h>
#include <stdlib.h>
#include <string.h>

#include "iconvert.h"
#include "share/alloc.h"
#include "share/safe_str.h"

/*
 * Convert data from one encoding to another. Return:
 *
 *  -2 : memory allocation failed
 *  -1 : unknown encoding
 *   0 : data was converted exactly
 *   1 : data was converted inexactly
 *   2 : data was invalid (but still converted)
 *
 * We convert in two steps, via UTF-8, as this is the only
 * reliable way of distinguishing between invalid input
 * and valid input which iconv refuses to transliterate.
 * We convert from UTF-8 twice, because we have no way of
 * knowing whether the conversion was exact if iconv returns
 * E2BIG (due to a bug in the specification of iconv).
 * An alternative approach is to assume that the output of
 * iconv is never more than 4 times as long as the input,
 * but I prefer to avoid that assumption if possible.
 */

int iconvert(const char *fromcode, const char *tocode,
	     const char *from, size_t fromlen,
	     char **to, size_t *tolen)
{
  int ret = 0;
  iconv_t cd1, cd2;
  char *ib;
  char *ob;
  char *utfbuf = 0, *outbuf, *newbuf;
  size_t utflen, outlen, ibl, obl, k;
  char tbuf[2048];

  cd1 = iconv_open("UTF-8", fromcode);
  if (cd1 == (iconv_t)(-1))
    return -1;

  cd2 = (iconv_t)(-1);
  /* Don't use strcasecmp() as it's locale-dependent. */
  if (!strchr("Uu", tocode[0]) ||
      !strchr("Tt", tocode[1]) ||
      !strchr("Ff", tocode[2]) ||
      tocode[3] != '-' ||
      tocode[4] != '8' ||
      tocode[5] != '\0') {
    char *tocode1;
	size_t dest_len = strlen(tocode) + 11;
    /*
     * Try using this non-standard feature of glibc and libiconv.
     * This is deliberately not a config option as people often
     * change their iconv library without rebuilding applications.
     */
    tocode1 = safe_malloc_(dest_len);
    if (!tocode1)
      goto fail;

    safe_strncpy(tocode1, tocode, dest_len);
    safe_strncat(tocode1, "//TRANSLIT", dest_len);
    cd2 = iconv_open(tocode1, "UTF-8");
    free(tocode1);

    if (cd2 == (iconv_t)(-1))
      cd2 = iconv_open(tocode, fromcode);

    if (cd2 == (iconv_t)(-1)) {
      iconv_close(cd1);
      return -1;
    }
  }

  utflen = 1; /*fromlen * 2 + 1; XXX */
  utfbuf = malloc(utflen);
  if (!utfbuf)
    goto fail;

  /* Convert to UTF-8 */
  ib = (char *)from;
  ibl = fromlen;
  ob = utfbuf;
  obl = utflen;
  for (;;) {
    k = iconv(cd1, &ib, &ibl, &ob, &obl);
    assert((!k && !ibl) ||
	   (k == (size_t)(-1) && errno == E2BIG && ibl && obl < 6) ||
	   (k == (size_t)(-1) &&
	    (errno == EILSEQ || errno == EINVAL) && ibl));
    if (!ibl)
      break;
    if (obl < 6) {
      /* Enlarge the buffer */
      if(utflen*2 < utflen) /* overflow check */
	goto fail;
      utflen *= 2;
      newbuf = realloc(utfbuf, utflen);
      if (!newbuf)
	goto fail;
      ob = (ob - utfbuf) + newbuf;
      obl = utflen - (ob - newbuf);
      utfbuf = newbuf;
    }
    else {
      /* Invalid input */
      ib++, ibl--;
      *ob++ = '#', obl--;
      ret = 2;
      iconv(cd1, 0, 0, 0, 0);
    }
  }

  if (cd2 == (iconv_t)(-1)) {
    /* The target encoding was UTF-8 */
    if (tolen)
      *tolen = ob - utfbuf;
    if (!to) {
      free(utfbuf);
      iconv_close(cd1);
      return ret;
    }
    newbuf = safe_realloc_add_2op_(utfbuf, (ob - utfbuf), /*+*/1);
    if (!newbuf)
      goto fail;
    ob = (ob - utfbuf) + newbuf;
    *ob = '\0';
    *to = newbuf;
    iconv_close(cd1);
    return ret;
  }

  /* Truncate the buffer to be tidy */
  utflen = ob - utfbuf;
  newbuf = realloc(utfbuf, utflen);
  if (!newbuf)
    goto fail;
  utfbuf = newbuf;

  /* Convert from UTF-8 to discover how long the output is */
  outlen = 0;
  ib = utfbuf;
  ibl = utflen;
  while (ibl) {
    ob = tbuf;
    obl = sizeof(tbuf);
    k = iconv(cd2, &ib, &ibl, &ob, &obl);
    assert((k != (size_t)(-1) && !ibl) ||
	   (k == (size_t)(-1) && errno == E2BIG && ibl) ||
	   (k == (size_t)(-1) && errno == EILSEQ && ibl));
    if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) {
      /* Replace one character */
      char *tb = "?";
      size_t tbl = 1;

      outlen += ob - tbuf;
      ob = tbuf;
      obl = sizeof(tbuf);
      k = iconv(cd2, &tb, &tbl, &ob, &obl);
      assert((!k && !tbl) ||
	     (k == (size_t)(-1) && errno == EILSEQ && tbl));
      for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
	;
    }
    outlen += ob - tbuf;
  }
  ob = tbuf;
  obl = sizeof(tbuf);
  k = iconv(cd2, 0, 0, &ob, &obl);
  assert(!k);
  outlen += ob - tbuf;

  /* Convert from UTF-8 for real */
  outbuf = safe_malloc_add_2op_(outlen, /*+*/1);
  if (!outbuf)
    goto fail;
  ib = utfbuf;
  ibl = utflen;
  ob = outbuf;
  obl = outlen;
  while (ibl) {
    k = iconv(cd2, &ib, &ibl, &ob, &obl);
    assert((k != (size_t)(-1) && !ibl) ||
	   (k == (size_t)(-1) && errno == EILSEQ && ibl));
    if (k && !ret)
      ret = 1;
    if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) {
      /* Replace one character */
      char *tb = "?";
      size_t tbl = 1;

      k = iconv(cd2, &tb, &tbl, &ob, &obl);
      assert((!k && !tbl) ||
	     (k == (size_t)(-1) && errno == EILSEQ && tbl));
      for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
	;
    }
  }
  k = iconv(cd2, 0, 0, &ob, &obl);
  assert(!k);
  assert(!obl);
  *ob = '\0';

  free(utfbuf);
  iconv_close(cd1);
  iconv_close(cd2);
  if (tolen)
    *tolen = outlen;
  if (!to) {
    free(outbuf);
    return ret;
  }
  *to = outbuf;
  return ret;

 fail:
  if(0 != utfbuf)
    free(utfbuf);
  iconv_close(cd1);
  if (cd2 != (iconv_t)(-1))
    iconv_close(cd2);
  return -2;
}

#endif /* HAVE_ICONV */
Commit	Line	Data
ce188d4d	1	/*
	2	* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
	3	*
	4	* This program is free software; you can redistribute it and/or modify
	5	* it under the terms of the GNU General Public License as published by
	6	* the Free Software Foundation; either version 2 of the License, or
	7	* (at your option) any later version.
	8	*
	9	* This program is distributed in the hope that it will be useful,
	10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	* GNU General Public License for more details.
	13	*
	14	* You should have received a copy of the GNU General Public License along
	15	* with this program; if not, write to the Free Software Foundation, Inc.,
	16	* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
	17	*/
	18
	19	#ifdef HAVE_CONFIG_H
	20	# include <config.h>
	21	#endif
	22
	23	#if !defined _WIN32 && defined HAVE_ICONV
	24
	25	#include <assert.h>
	26	#include <errno.h>
	27	#include <iconv.h>
	28	#include <stdlib.h>
	29	#include <string.h>
	30
	31	#include "iconvert.h"
	32	#include "share/alloc.h"
	33	#include "share/safe_str.h"
	34
	35	/*
	36	* Convert data from one encoding to another. Return:
	37	*
	38	* -2 : memory allocation failed
	39	* -1 : unknown encoding
	40	* 0 : data was converted exactly
	41	* 1 : data was converted inexactly
	42	* 2 : data was invalid (but still converted)
	43	*
	44	* We convert in two steps, via UTF-8, as this is the only
	45	* reliable way of distinguishing between invalid input
	46	* and valid input which iconv refuses to transliterate.
	47	* We convert from UTF-8 twice, because we have no way of
	48	* knowing whether the conversion was exact if iconv returns
	49	* E2BIG (due to a bug in the specification of iconv).
	50	* An alternative approach is to assume that the output of
	51	* iconv is never more than 4 times as long as the input,
	52	* but I prefer to avoid that assumption if possible.
	53	*/
	54
	55	int iconvert(const char fromcode, const char tocode,
	56	const char *from, size_t fromlen,
	57	char *to, size_t tolen)
	58	{
	59	int ret = 0;
	60	iconv_t cd1, cd2;
	61	char *ib;
	62	char *ob;
	63	char utfbuf = 0, outbuf, *newbuf;
	64	size_t utflen, outlen, ibl, obl, k;
65	char tbuf[2048];
66
67	cd1 = iconv_open("UTF-8", fromcode);
68	if (cd1 == (iconv_t)(-1))
69	return -1;
70
71	cd2 = (iconv_t)(-1);
72	/* Don't use strcasecmp() as it's locale-dependent. */
73	if (!strchr("Uu", tocode[0]) \|\|
74	!strchr("Tt", tocode[1]) \|\|
75	!strchr("Ff", tocode[2]) \|\|
76	tocode[3] != '-' \|\|
77	tocode[4] != '8' \|\|
78	tocode[5] != '\0') {
79	char *tocode1;
80	size_t dest_len = strlen(tocode) + 11;
81	/*
82	* Try using this non-standard feature of glibc and libiconv.
83	* This is deliberately not a config option as people often
84	* change their iconv library without rebuilding applications.
85	*/
86	tocode1 = safe_malloc_(dest_len);
87	if (!tocode1)
88	goto fail;
89
90	safe_strncpy(tocode1, tocode, dest_len);
91	safe_strncat(tocode1, "//TRANSLIT", dest_len);
92	cd2 = iconv_open(tocode1, "UTF-8");
93	free(tocode1);
94
95	if (cd2 == (iconv_t)(-1))
96	cd2 = iconv_open(tocode, fromcode);
97
98	if (cd2 == (iconv_t)(-1)) {
99	iconv_close(cd1);
100	return -1;
101	}
102	}
103
104	utflen = 1; /fromlen 2 + 1; XXX */
105	utfbuf = malloc(utflen);
106	if (!utfbuf)
107	goto fail;
108
109	/* Convert to UTF-8 */
110	ib = (char *)from;
111	ibl = fromlen;
112	ob = utfbuf;
113	obl = utflen;
114	for (;;) {
115	k = iconv(cd1, &ib, &ibl, &ob, &obl);
116	assert((!k && !ibl) \|\|
117	(k == (size_t)(-1) && errno == E2BIG && ibl && obl < 6) \|\|
118	(k == (size_t)(-1) &&
119	(errno == EILSEQ \|\| errno == EINVAL) && ibl));
120	if (!ibl)
121	break;
122	if (obl < 6) {
123	/* Enlarge the buffer */
124	if(utflen2 < utflen) / overflow check */
125	goto fail;
126	utflen *= 2;
127	newbuf = realloc(utfbuf, utflen);
128	if (!newbuf)
129	goto fail;
130	ob = (ob - utfbuf) + newbuf;
131	obl = utflen - (ob - newbuf);
132	utfbuf = newbuf;
133	}
134	else {
135	/* Invalid input */
136	ib++, ibl--;
137	*ob++ = '#', obl--;
138	ret = 2;
139	iconv(cd1, 0, 0, 0, 0);
140	}
141	}
142
143	if (cd2 == (iconv_t)(-1)) {
144	/* The target encoding was UTF-8 */
145	if (tolen)
146	*tolen = ob - utfbuf;
147	if (!to) {
148	free(utfbuf);
149	iconv_close(cd1);
150	return ret;
151	}
152	newbuf = safe_realloc_add_2op_(utfbuf, (ob - utfbuf), /+/1);
153	if (!newbuf)
154	goto fail;
155	ob = (ob - utfbuf) + newbuf;
156	*ob = '\0';
157	*to = newbuf;
158	iconv_close(cd1);
159	return ret;
160	}
161
162	/* Truncate the buffer to be tidy */
163	utflen = ob - utfbuf;
164	newbuf = realloc(utfbuf, utflen);
165	if (!newbuf)
166	goto fail;
167	utfbuf = newbuf;
168
169	/* Convert from UTF-8 to discover how long the output is */
170	outlen = 0;
171	ib = utfbuf;
172	ibl = utflen;
173	while (ibl) {
174	ob = tbuf;
175	obl = sizeof(tbuf);
176	k = iconv(cd2, &ib, &ibl, &ob, &obl);
177	assert((k != (size_t)(-1) && !ibl) \|\|
178	(k == (size_t)(-1) && errno == E2BIG && ibl) \|\|
179	(k == (size_t)(-1) && errno == EILSEQ && ibl));
180	if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) {
181	/* Replace one character */
182	char *tb = "?";
183	size_t tbl = 1;
184
185	outlen += ob - tbuf;
186	ob = tbuf;
187	obl = sizeof(tbuf);
188	k = iconv(cd2, &tb, &tbl, &ob, &obl);
189	assert((!k && !tbl) \|\|
190	(k == (size_t)(-1) && errno == EILSEQ && tbl));
191	for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
192	;
193	}
194	outlen += ob - tbuf;
195	}
196	ob = tbuf;
197	obl = sizeof(tbuf);
198	k = iconv(cd2, 0, 0, &ob, &obl);
199	assert(!k);
200	outlen += ob - tbuf;
201
202	/* Convert from UTF-8 for real */
203	outbuf = safe_malloc_add_2op_(outlen, /+/1);
204	if (!outbuf)
205	goto fail;
206	ib = utfbuf;
207	ibl = utflen;
208	ob = outbuf;
209	obl = outlen;
210	while (ibl) {
211	k = iconv(cd2, &ib, &ibl, &ob, &obl);
212	assert((k != (size_t)(-1) && !ibl) \|\|
213	(k == (size_t)(-1) && errno == EILSEQ && ibl));
214	if (k && !ret)
215	ret = 1;
216	if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) {
217	/* Replace one character */
218	char *tb = "?";
219	size_t tbl = 1;
220
221	k = iconv(cd2, &tb, &tbl, &ob, &obl);
222	assert((!k && !tbl) \|\|
223	(k == (size_t)(-1) && errno == EILSEQ && tbl));
224	for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
225	;
226	}
227	}
228	k = iconv(cd2, 0, 0, &ob, &obl);
229	assert(!k);
230	assert(!obl);
231	*ob = '\0';
232
233	free(utfbuf);
234	iconv_close(cd1);
235	iconv_close(cd2);
236	if (tolen)
237	*tolen = outlen;
238	if (!to) {
239	free(outbuf);
240	return ret;
241	}
242	*to = outbuf;
243	return ret;
244
245	fail:
246	if(0 != utfbuf)
247	free(utfbuf);
248	iconv_close(cd1);
249	if (cd2 != (iconv_t)(-1))
250	iconv_close(cd2);
251	return -2;
252	}
253
254	#endif /* HAVE_ICONV */