[pcsx_rearmed.git] / deps / flac-1.3.2 / src / share / utf8 / charset.c

/*
 * Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

/*
 * See the corresponding header file for a description of the functions
 * that this file provides.
 *
 * This was first written for Ogg Vorbis but could be of general use.
 *
 * The only deliberate assumption about data sizes is that a short has
 * at least 16 bits, but this code has only been tested on systems with
 * 8-bit char, 16-bit short and 32-bit int.
 */

#ifdef HAVE_CONFIG_H
#  include <config.h>
#endif

#if !defined _WIN32 && !defined HAVE_ICONV /* should be && defined USE_CHARSET_CONVERT */

#include <stdlib.h>

#include "share/alloc.h"
#include "charset.h"

#include "charmaps.h"

/*
 * This is like the standard strcasecmp, but it does not depend
 * on the locale. Locale-dependent functions can be dangerous:
 * we once had a bug involving strcasecmp("iso", "ISO") in a
 * Turkish locale!
 *
 * (I'm not really sure what the official standard says
 * about the sign of strcasecmp("Z", "["), but usually
 * we're only interested in whether it's zero.)
 */

static int ascii_strcasecmp(const char *s1, const char *s2)
{
  char c1, c2;

  for (;; s1++, s2++) {
    if (!*s1 || !*s2)
      break;
    if (*s1 == *s2)
      continue;
    c1 = *s1;
    if ('a' <= c1 && c1 <= 'z')
      c1 += 'A' - 'a';
    c2 = *s2;
    if ('a' <= c2 && c2 <= 'z')
      c2 += 'A' - 'a';
    if (c1 != c2)
      break;
  }
  return (unsigned char)*s1 - (unsigned char)*s2;
}

/*
 * UTF-8 equivalents of the C library's wctomb() and mbtowc().
 */

int utf8_mbtowc(int *pwc, const char *s, size_t n)
{
  unsigned char c;
  int wc, i, k;

  if (!n || !s)
    return 0;

  c = *s;
  if (c < 0x80) {
    if (pwc)
      *pwc = c;
    return c ? 1 : 0;
  }
  else if (c < 0xc2)
    return -1;
  else if (c < 0xe0) {
    if (n >= 2 && (s[1] & 0xc0) == 0x80) {
      if (pwc)
	*pwc = ((c & 0x1f) << 6) | (s[1] & 0x3f);
      return 2;
    }
    else
      return -1;
  }
  else if (c < 0xf0)
    k = 3;
  else if (c < 0xf8)
    k = 4;
  else if (c < 0xfc)
    k = 5;
  else if (c < 0xfe)
    k = 6;
  else
    return -1;

  if (n < (size_t)k)
    return -1;
  wc = *s++ & ((1 << (7 - k)) - 1);
  for (i = 1; i < k; i++) {
    if ((*s & 0xc0) != 0x80)
      return -1;
    wc = (wc << 6) | (*s++ & 0x3f);
  }
  if (wc < (1 << (5 * k - 4)))
    return -1;
  if (pwc)
    *pwc = wc;
  return k;
}

int utf8_wctomb(char *s, int wc1)
{
  unsigned int wc = wc1;

  if (!s)
    return 0;
  if (wc < (1u << 7)) {
    *s++ = wc;
    return 1;
  }
  else if (wc < (1u << 11)) {
    *s++ = 0xc0 | (wc >> 6);
    *s++ = 0x80 | (wc & 0x3f);
    return 2;
  }
  else if (wc < (1u << 16)) {
    *s++ = 0xe0 | (wc >> 12);
    *s++ = 0x80 | ((wc >> 6) & 0x3f);
    *s++ = 0x80 | (wc & 0x3f);
    return 3;
  }
  else if (wc < (1u << 21)) {
    *s++ = 0xf0 | (wc >> 18);
    *s++ = 0x80 | ((wc >> 12) & 0x3f);
    *s++ = 0x80 | ((wc >> 6) & 0x3f);
    *s++ = 0x80 | (wc & 0x3f);
    return 4;
  }
  else if (wc < (1u << 26)) {
    *s++ = 0xf8 | (wc >> 24);
    *s++ = 0x80 | ((wc >> 18) & 0x3f);
    *s++ = 0x80 | ((wc >> 12) & 0x3f);
    *s++ = 0x80 | ((wc >> 6) & 0x3f);
    *s++ = 0x80 | (wc & 0x3f);
    return 5;
  }
  else if (wc < (1u << 31)) {
    *s++ = 0xfc | (wc >> 30);
    *s++ = 0x80 | ((wc >> 24) & 0x3f);
    *s++ = 0x80 | ((wc >> 18) & 0x3f);
    *s++ = 0x80 | ((wc >> 12) & 0x3f);
    *s++ = 0x80 | ((wc >> 6) & 0x3f);
    *s++ = 0x80 | (wc & 0x3f);
    return 6;
  }
  else
    return -1;
}

/*
 * The charset "object" and methods.
 */

struct charset {
  int max;
  int (*mbtowc)(void *table, int *pwc, const char *s, size_t n);
  int (*wctomb)(void *table, char *s, int wc);
  void *map;
};

int charset_mbtowc(struct charset *charset, int *pwc, const char *s, size_t n)
{
  return (*charset->mbtowc)(charset->map, pwc, s, n);
}

int charset_wctomb(struct charset *charset, char *s, int wc)
{
  return (*charset->wctomb)(charset->map, s, wc);
}

int charset_max(struct charset *charset)
{
  return charset->max;
}

/*
 * Implementation of UTF-8.
 */

static int mbtowc_utf8(void *map, int *pwc, const char *s, size_t n)
{
  (void)map;
  return utf8_mbtowc(pwc, s, n);
}

static int wctomb_utf8(void *map, char *s, int wc)
{
  (void)map;
  return utf8_wctomb(s, wc);
}

/*
 * Implementation of US-ASCII.
 * Probably on most architectures this compiles to less than 256 bytes
 * of code, so we can save space by not having a table for this one.
 */

static int mbtowc_ascii(void *map, int *pwc, const char *s, size_t n)
{
  int wc;

  (void)map;
  if (!n || !s)
    return 0;
  wc = (unsigned char)*s;
  if (wc & ~0x7f)
    return -1;
  if (pwc)
    *pwc = wc;
  return wc ? 1 : 0;
}

static int wctomb_ascii(void *map, char *s, int wc)
{
  (void)map;
  if (!s)
    return 0;
  if (wc & ~0x7f)
    return -1;
  *s = wc;
  return 1;
}

/*
 * Implementation of ISO-8859-1.
 * Probably on most architectures this compiles to less than 256 bytes
 * of code, so we can save space by not having a table for this one.
 */

static int mbtowc_iso1(void *map, int *pwc, const char *s, size_t n)
{
  int wc;

  (void)map;
  if (!n || !s)
    return 0;
  wc = (unsigned char)*s;
  if (wc & ~0xff)
    return -1;
  if (pwc)
    *pwc = wc;
  return wc ? 1 : 0;
}

static int wctomb_iso1(void *map, char *s, int wc)
{
  (void)map;
  if (!s)
    return 0;
  if (wc & ~0xff)
    return -1;
  *s = wc;
  return 1;
}

/*
 * Implementation of any 8-bit charset.
 */

struct map {
  const unsigned short *from;
  struct inverse_map *to;
};

static int mbtowc_8bit(void *map1, int *pwc, const char *s, size_t n)
{
  struct map *map = map1;
  unsigned short wc;

  if (!n || !s)
    return 0;
  wc = map->from[(unsigned char)*s];
  if (wc == 0xffff)
    return -1;
  if (pwc)
    *pwc = (int)wc;
  return wc ? 1 : 0;
}

/*
 * For the inverse map we use a hash table, which has the advantages
 * of small constant memory requirement and simple memory allocation,
 * but the disadvantage of slow conversion in the worst case.
 * If you need real-time performance while letting a potentially
 * malicious user define their own map, then the method used in
 * linux/drivers/char/consolemap.c would be more appropriate.
 */

struct inverse_map {
  unsigned char first[256];
  unsigned char next[256];
};

/*
 * The simple hash is good enough for this application.
 * Use the alternative trivial hashes for testing.
 */
#define HASH(i) ((i) & 0xff)
/* #define HASH(i) 0 */
/* #define HASH(i) 99 */

static struct inverse_map *make_inverse_map(const unsigned short *from)
{
  struct inverse_map *to;
  char used[256];
  int i, j, k;

  to = malloc(sizeof(struct inverse_map));
  if (!to)
    return 0;
  for (i = 0; i < 256; i++)
    to->first[i] = to->next[i] = used[i] = 0;
  for (i = 255; i >= 0; i--)
    if (from[i] != 0xffff) {
      k = HASH(from[i]);
      to->next[i] = to->first[k];
      to->first[k] = i;
      used[k] = 1;
    }

  /* Point the empty buckets at an empty list. */
  for (i = 0; i < 256; i++)
    if (!to->next[i])
      break;
  if (i < 256)
    for (j = 0; j < 256; j++)
      if (!used[j])
	to->first[j] = i;

  return to;
}

static int wctomb_8bit(void *map1, char *s, int wc1)
{
  struct map *map = map1;
  unsigned short wc = wc1;
  int i;

  if (!s)
    return 0;

  if (wc1 & ~0xffff)
    return -1;

  if (1) /* Change 1 to 0 to test the case where malloc fails. */
    if (!map->to)
      map->to = make_inverse_map(map->from);

  if (map->to) {
    /* Use the inverse map. */
    i = map->to->first[HASH(wc)];
    for (;;) {
      if (map->from[i] == wc) {
	*s = i;
	return 1;
      }
      if (!(i = map->to->next[i]))
	break;
    }
  }
  else {
    /* We don't have an inverse map, so do a linear search. */
    for (i = 0; i < 256; i++)
      if (map->from[i] == wc) {
	*s = i;
	return 1;
      }
  }

  return -1;
}

/*
 * The "constructor" charset_find().
 */

struct charset charset_utf8 = {
  6,
  &mbtowc_utf8,
  &wctomb_utf8,
  0
};

struct charset charset_iso1 = {
  1,
  &mbtowc_iso1,
  &wctomb_iso1,
  0
};

struct charset charset_ascii = {
  1,
  &mbtowc_ascii,
  &wctomb_ascii,
  0
};

struct charset *charset_find(const char *code)
{
  int i;

  /* Find good (MIME) name. */
  for (i = 0; names[i].bad; i++)
    if (!ascii_strcasecmp(code, names[i].bad)) {
      code = names[i].good;
      break;
    }

  /* Recognise some charsets for which we avoid using a table. */
  if (!ascii_strcasecmp(code, "UTF-8"))
    return &charset_utf8;
  if (!ascii_strcasecmp(code, "US-ASCII"))
    return &charset_ascii;
  if (!ascii_strcasecmp(code, "ISO-8859-1"))
    return &charset_iso1;

  /* Look for a mapping for a simple 8-bit encoding. */
  for (i = 0; maps[i].name; i++)
    if (!ascii_strcasecmp(code, maps[i].name)) {
      if (!maps[i].charset) {
	maps[i].charset = malloc(sizeof(struct charset));
	if (maps[i].charset) {
	  struct map *map = malloc(sizeof(struct map));
	  if (!map) {
	    free(maps[i].charset);
	    maps[i].charset = 0;
	  }
	  else {
	    maps[i].charset->max = 1;
	    maps[i].charset->mbtowc = &mbtowc_8bit;
	    maps[i].charset->wctomb = &wctomb_8bit;
	    maps[i].charset->map = map;
	    map->from = maps[i].map;
	    map->to = 0; /* inverse mapping is created when required */
	  }
	}
      }
      return maps[i].charset;
    }

  return 0;
}

/*
 * Function to convert a buffer from one encoding to another.
 * Invalid bytes are replaced by '#', and characters that are
 * not available in the target encoding are replaced by '?'.
 * Each of TO and TOLEN may be zero, if the result is not needed.
 * The output buffer is null-terminated, so it is all right to
 * use charset_convert(fromcode, tocode, s, strlen(s), &t, 0).
 */

int charset_convert(const char *fromcode, const char *tocode,
		    const char *from, size_t fromlen,
		    char **to, size_t *tolen)
{
  int ret = 0;
  struct charset *charset1, *charset2;
  char *tobuf, *p, *newbuf;
  int i, j, wc;

  charset1 = charset_find(fromcode);
  charset2 = charset_find(tocode);
  if (!charset1 || !charset2 )
    return -1;

  tobuf = safe_malloc_mul2add_(fromlen, /*times*/charset2->max, /*+*/1);
  if (!tobuf)
    return -2;

  for (p = tobuf; fromlen; from += i, fromlen -= i, p += j) {
    i = charset_mbtowc(charset1, &wc, from, fromlen);
    if (!i)
      i = 1;
    else if (i == -1) {
      i  = 1;
      wc = '#';
      ret = 2;
    }
    j = charset_wctomb(charset2, p, wc);
    if (j == -1) {
      if (!ret)
	ret = 1;
      j = charset_wctomb(charset2, p, '?');
      if (j == -1)
	j = 0;
    }
  }

  if (tolen)
    *tolen = p - tobuf;
  *p++ = '\0';
  if (to) {
    newbuf = realloc(tobuf, p - tobuf);
    *to = newbuf ? newbuf : tobuf;
  }
  else
    free(tobuf);

  return ret;
}

#endif /* USE_CHARSET_ICONV */
Commit	Line	Data
ce188d4d	1	/*
	2	* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
	3	*
	4	* This program is free software; you can redistribute it and/or modify
	5	* it under the terms of the GNU General Public License as published by
	6	* the Free Software Foundation; either version 2 of the License, or
	7	* (at your option) any later version.
	8	*
	9	* This program is distributed in the hope that it will be useful,
	10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	* GNU General Public License for more details.
	13	*
	14	* You should have received a copy of the GNU General Public License along
	15	* with this program; if not, write to the Free Software Foundation, Inc.,
	16	* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
	17	*/
	18
	19	/*
	20	* See the corresponding header file for a description of the functions
	21	* that this file provides.
	22	*
	23	* This was first written for Ogg Vorbis but could be of general use.
	24	*
	25	* The only deliberate assumption about data sizes is that a short has
	26	* at least 16 bits, but this code has only been tested on systems with
	27	* 8-bit char, 16-bit short and 32-bit int.
	28	*/
	29
	30	#ifdef HAVE_CONFIG_H
	31	# include <config.h>
	32	#endif
	33
	34	#if !defined _WIN32 && !defined HAVE_ICONV /* should be && defined USE_CHARSET_CONVERT */
	35
	36	#include <stdlib.h>
	37
	38	#include "share/alloc.h"
	39	#include "charset.h"
	40
	41	#include "charmaps.h"
	42
	43	/*
	44	* This is like the standard strcasecmp, but it does not depend
	45	* on the locale. Locale-dependent functions can be dangerous:
	46	* we once had a bug involving strcasecmp("iso", "ISO") in a
	47	* Turkish locale!
	48	*
	49	* (I'm not really sure what the official standard says
	50	* about the sign of strcasecmp("Z", "["), but usually
	51	* we're only interested in whether it's zero.)
	52	*/
	53
	54	static int ascii_strcasecmp(const char s1, const char s2)
	55	{
	56	char c1, c2;
	57
	58	for (;; s1++, s2++) {
	59	if (!s1 \|\| !s2)
	60	break;
	61	if (s1 == s2)
	62	continue;
	63	c1 = *s1;
	64	if ('a' <= c1 && c1 <= 'z')
65	c1 += 'A' - 'a';
66	c2 = *s2;
67	if ('a' <= c2 && c2 <= 'z')
68	c2 += 'A' - 'a';
69	if (c1 != c2)
70	break;
71	}
72	return (unsigned char)s1 - (unsigned char)s2;
73	}
74
75	/*
76	* UTF-8 equivalents of the C library's wctomb() and mbtowc().
77	*/
78
79	int utf8_mbtowc(int pwc, const char s, size_t n)
80	{
81	unsigned char c;
82	int wc, i, k;
83
84	if (!n \|\| !s)
85	return 0;
86
87	c = *s;
88	if (c < 0x80) {
89	if (pwc)
90	*pwc = c;
91	return c ? 1 : 0;
92	}
93	else if (c < 0xc2)
94	return -1;
95	else if (c < 0xe0) {
96	if (n >= 2 && (s[1] & 0xc0) == 0x80) {
97	if (pwc)
98	*pwc = ((c & 0x1f) << 6) \| (s[1] & 0x3f);
99	return 2;
100	}
101	else
102	return -1;
103	}
104	else if (c < 0xf0)
105	k = 3;
106	else if (c < 0xf8)
107	k = 4;
108	else if (c < 0xfc)
109	k = 5;
110	else if (c < 0xfe)
111	k = 6;
112	else
113	return -1;
114
115	if (n < (size_t)k)
116	return -1;
117	wc = *s++ & ((1 << (7 - k)) - 1);
118	for (i = 1; i < k; i++) {
119	if ((*s & 0xc0) != 0x80)
120	return -1;
121	wc = (wc << 6) \| (*s++ & 0x3f);
122	}
123	if (wc < (1 << (5 * k - 4)))
124	return -1;
125	if (pwc)
126	*pwc = wc;
127	return k;
128	}
129
130	int utf8_wctomb(char *s, int wc1)
131	{
132	unsigned int wc = wc1;
133
134	if (!s)
135	return 0;
136	if (wc < (1u << 7)) {
137	*s++ = wc;
138	return 1;
139	}
140	else if (wc < (1u << 11)) {
141	*s++ = 0xc0 \| (wc >> 6);
142	*s++ = 0x80 \| (wc & 0x3f);
143	return 2;
144	}
145	else if (wc < (1u << 16)) {
146	*s++ = 0xe0 \| (wc >> 12);
147	*s++ = 0x80 \| ((wc >> 6) & 0x3f);
148	*s++ = 0x80 \| (wc & 0x3f);
149	return 3;
150	}
151	else if (wc < (1u << 21)) {
152	*s++ = 0xf0 \| (wc >> 18);
153	*s++ = 0x80 \| ((wc >> 12) & 0x3f);
154	*s++ = 0x80 \| ((wc >> 6) & 0x3f);
155	*s++ = 0x80 \| (wc & 0x3f);
156	return 4;
157	}
158	else if (wc < (1u << 26)) {
159	*s++ = 0xf8 \| (wc >> 24);
160	*s++ = 0x80 \| ((wc >> 18) & 0x3f);
161	*s++ = 0x80 \| ((wc >> 12) & 0x3f);
162	*s++ = 0x80 \| ((wc >> 6) & 0x3f);
163	*s++ = 0x80 \| (wc & 0x3f);
164	return 5;
165	}
166	else if (wc < (1u << 31)) {
167	*s++ = 0xfc \| (wc >> 30);
168	*s++ = 0x80 \| ((wc >> 24) & 0x3f);
169	*s++ = 0x80 \| ((wc >> 18) & 0x3f);
170	*s++ = 0x80 \| ((wc >> 12) & 0x3f);
171	*s++ = 0x80 \| ((wc >> 6) & 0x3f);
172	*s++ = 0x80 \| (wc & 0x3f);
173	return 6;
174	}
175	else
176	return -1;
177	}
178
179	/*
180	* The charset "object" and methods.
181	*/
182
183	struct charset {
184	int max;
185	int (mbtowc)(void table, int pwc, const char s, size_t n);
186	int (wctomb)(void table, char *s, int wc);
187	void *map;
188	};
189
190	int charset_mbtowc(struct charset charset, int pwc, const char *s, size_t n)
191	{
192	return (*charset->mbtowc)(charset->map, pwc, s, n);
193	}
194
195	int charset_wctomb(struct charset charset, char s, int wc)
196	{
197	return (*charset->wctomb)(charset->map, s, wc);
198	}
199
200	int charset_max(struct charset *charset)
201	{
202	return charset->max;
203	}
204
205	/*
206	* Implementation of UTF-8.
207	*/
208
209	static int mbtowc_utf8(void map, int pwc, const char *s, size_t n)
210	{
211	(void)map;
212	return utf8_mbtowc(pwc, s, n);
213	}
214
215	static int wctomb_utf8(void map, char s, int wc)
216	{
217	(void)map;
218	return utf8_wctomb(s, wc);
219	}
220
221	/*
222	* Implementation of US-ASCII.
223	* Probably on most architectures this compiles to less than 256 bytes
224	* of code, so we can save space by not having a table for this one.
225	*/
226
227	static int mbtowc_ascii(void map, int pwc, const char *s, size_t n)
228	{
229	int wc;
230
231	(void)map;
232	if (!n \|\| !s)
233	return 0;
234	wc = (unsigned char)*s;
235	if (wc & ~0x7f)
236	return -1;
237	if (pwc)
238	*pwc = wc;
239	return wc ? 1 : 0;
240	}
241
242	static int wctomb_ascii(void map, char s, int wc)
243	{
244	(void)map;
245	if (!s)
246	return 0;
247	if (wc & ~0x7f)
248	return -1;
249	*s = wc;
250	return 1;
251	}
252
253	/*
254	* Implementation of ISO-8859-1.
255	* Probably on most architectures this compiles to less than 256 bytes
256	* of code, so we can save space by not having a table for this one.
257	*/
258
259	static int mbtowc_iso1(void map, int pwc, const char *s, size_t n)
260	{
261	int wc;
262
263	(void)map;
264	if (!n \|\| !s)
265	return 0;
266	wc = (unsigned char)*s;
267	if (wc & ~0xff)
268	return -1;
269	if (pwc)
270	*pwc = wc;
271	return wc ? 1 : 0;
272	}
273
274	static int wctomb_iso1(void map, char s, int wc)
275	{
276	(void)map;
277	if (!s)
278	return 0;
279	if (wc & ~0xff)
280	return -1;
281	*s = wc;
282	return 1;
283	}
284
285	/*
286	* Implementation of any 8-bit charset.
287	*/
288
289	struct map {
290	const unsigned short *from;
291	struct inverse_map *to;
292	};
293
294	static int mbtowc_8bit(void map1, int pwc, const char *s, size_t n)
295	{
296	struct map *map = map1;
297	unsigned short wc;
298
299	if (!n \|\| !s)
300	return 0;
301	wc = map->from[(unsigned char)*s];
302	if (wc == 0xffff)
303	return -1;
304	if (pwc)
305	*pwc = (int)wc;
306	return wc ? 1 : 0;
307	}
308
309	/*
310	* For the inverse map we use a hash table, which has the advantages
311	* of small constant memory requirement and simple memory allocation,
312	* but the disadvantage of slow conversion in the worst case.
313	* If you need real-time performance while letting a potentially
314	* malicious user define their own map, then the method used in
315	* linux/drivers/char/consolemap.c would be more appropriate.
316	*/
317
318	struct inverse_map {
319	unsigned char first[256];
320	unsigned char next[256];
321	};
322
323	/*
324	* The simple hash is good enough for this application.
325	* Use the alternative trivial hashes for testing.
326	*/
327	#define HASH(i) ((i) & 0xff)
328	/* #define HASH(i) 0 */
329	/* #define HASH(i) 99 */
330
331	static struct inverse_map make_inverse_map(const unsigned short from)
332	{
333	struct inverse_map *to;
334	char used[256];
335	int i, j, k;
336
337	to = malloc(sizeof(struct inverse_map));
338	if (!to)
339	return 0;
340	for (i = 0; i < 256; i++)
341	to->first[i] = to->next[i] = used[i] = 0;
342	for (i = 255; i >= 0; i--)
343	if (from[i] != 0xffff) {
344	k = HASH(from[i]);
345	to->next[i] = to->first[k];
346	to->first[k] = i;
347	used[k] = 1;
348	}
349
350	/* Point the empty buckets at an empty list. */
351	for (i = 0; i < 256; i++)
352	if (!to->next[i])
353	break;
354	if (i < 256)
355	for (j = 0; j < 256; j++)
356	if (!used[j])
357	to->first[j] = i;
358
359	return to;
360	}
361
362	static int wctomb_8bit(void map1, char s, int wc1)
363	{
364	struct map *map = map1;
365	unsigned short wc = wc1;
366	int i;
367
368	if (!s)
369	return 0;
370
371	if (wc1 & ~0xffff)
372	return -1;
373
374	if (1) /* Change 1 to 0 to test the case where malloc fails. */
375	if (!map->to)
376	map->to = make_inverse_map(map->from);
377
378	if (map->to) {
379	/* Use the inverse map. */
380	i = map->to->first[HASH(wc)];
381	for (;;) {
382	if (map->from[i] == wc) {
383	*s = i;
384	return 1;
385	}
386	if (!(i = map->to->next[i]))
387	break;
388	}
389	}
390	else {
391	/* We don't have an inverse map, so do a linear search. */
392	for (i = 0; i < 256; i++)
393	if (map->from[i] == wc) {
394	*s = i;
395	return 1;
396	}
397	}
398
399	return -1;
400	}
401
402	/*
403	* The "constructor" charset_find().
404	*/
405
406	struct charset charset_utf8 = {
407	6,
408	&mbtowc_utf8,
409	&wctomb_utf8,
410	0
411	};
412
413	struct charset charset_iso1 = {
414	1,
415	&mbtowc_iso1,
416	&wctomb_iso1,
417	0
418	};
419
420	struct charset charset_ascii = {
421	1,
422	&mbtowc_ascii,
423	&wctomb_ascii,
424	0
425	};
426
427	struct charset charset_find(const char code)
428	{
429	int i;
430
431	/* Find good (MIME) name. */
432	for (i = 0; names[i].bad; i++)
433	if (!ascii_strcasecmp(code, names[i].bad)) {
434	code = names[i].good;
435	break;
436	}
437
438	/* Recognise some charsets for which we avoid using a table. */
439	if (!ascii_strcasecmp(code, "UTF-8"))
440	return &charset_utf8;
441	if (!ascii_strcasecmp(code, "US-ASCII"))
442	return &charset_ascii;
443	if (!ascii_strcasecmp(code, "ISO-8859-1"))
444	return &charset_iso1;
445
446	/* Look for a mapping for a simple 8-bit encoding. */
447	for (i = 0; maps[i].name; i++)
448	if (!ascii_strcasecmp(code, maps[i].name)) {
449	if (!maps[i].charset) {
450	maps[i].charset = malloc(sizeof(struct charset));
451	if (maps[i].charset) {
452	struct map *map = malloc(sizeof(struct map));
453	if (!map) {
454	free(maps[i].charset);
455	maps[i].charset = 0;
456	}
457	else {
458	maps[i].charset->max = 1;
459	maps[i].charset->mbtowc = &mbtowc_8bit;
460	maps[i].charset->wctomb = &wctomb_8bit;
461	maps[i].charset->map = map;
462	map->from = maps[i].map;
463	map->to = 0; /* inverse mapping is created when required */
464	}
465	}
466	}
467	return maps[i].charset;
468	}
469
470	return 0;
471	}
472
473	/*
474	* Function to convert a buffer from one encoding to another.
475	* Invalid bytes are replaced by '#', and characters that are
476	* not available in the target encoding are replaced by '?'.
477	* Each of TO and TOLEN may be zero, if the result is not needed.
478	* The output buffer is null-terminated, so it is all right to
479	* use charset_convert(fromcode, tocode, s, strlen(s), &t, 0).
480	*/
481
482	int charset_convert(const char fromcode, const char tocode,
483	const char *from, size_t fromlen,
484	char *to, size_t tolen)
485	{
486	int ret = 0;
487	struct charset charset1, charset2;
488	char tobuf, p, *newbuf;
489	int i, j, wc;
490
491	charset1 = charset_find(fromcode);
492	charset2 = charset_find(tocode);
493	if (!charset1 \|\| !charset2 )
494	return -1;
495
496	tobuf = safe_malloc_mul2add_(fromlen, /times/charset2->max, /+/1);
497	if (!tobuf)
498	return -2;
499
500	for (p = tobuf; fromlen; from += i, fromlen -= i, p += j) {
501	i = charset_mbtowc(charset1, &wc, from, fromlen);
502	if (!i)
503	i = 1;
504	else if (i == -1) {
505	i = 1;
506	wc = '#';
507	ret = 2;
508	}
509	j = charset_wctomb(charset2, p, wc);
510	if (j == -1) {
511	if (!ret)
512	ret = 1;
513	j = charset_wctomb(charset2, p, '?');
514	if (j == -1)
515	j = 0;
516	}
517	}
518
519	if (tolen)
520	*tolen = p - tobuf;
521	*p++ = '\0';
522	if (to) {
523	newbuf = realloc(tobuf, p - tobuf);
524	*to = newbuf ? newbuf : tobuf;
525	}
526	else
527	free(tobuf);
528
529	return ret;
530	}
531
532	#endif /* USE_CHARSET_ICONV */