[pcsx_rearmed.git] / deps / flac-1.3.2 / src / share / utf8 / charset.c

/*
 * Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

/*
 * See the corresponding header file for a description of the functions
 * that this file provides.
 *
 * This was first written for Ogg Vorbis but could be of general use.
 *
 * The only deliberate assumption about data sizes is that a short has
 * at least 16 bits, but this code has only been tested on systems with
 * 8-bit char, 16-bit short and 32-bit int.
 */

#ifdef HAVE_CONFIG_H
#  include <config.h>
#endif

#if !defined _WIN32 && !defined HAVE_ICONV /* should be && defined USE_CHARSET_CONVERT */

#include <stdlib.h>

#include "share/alloc.h"
#include "charset.h"

#include "charmaps.h"

/*
 * This is like the standard strcasecmp, but it does not depend
 * on the locale. Locale-dependent functions can be dangerous:
 * we once had a bug involving strcasecmp("iso", "ISO") in a
 * Turkish locale!
 *
 * (I'm not really sure what the official standard says
 * about the sign of strcasecmp("Z", "["), but usually
 * we're only interested in whether it's zero.)
 */

static int ascii_strcasecmp(const char *s1, const char *s2)
{
  char c1, c2;

  for (;; s1++, s2++) {
    if (!*s1 || !*s2)
      break;
    if (*s1 == *s2)
      continue;
    c1 = *s1;
    if ('a' <= c1 && c1 <= 'z')
      c1 += 'A' - 'a';
    c2 = *s2;
    if ('a' <= c2 && c2 <= 'z')
      c2 += 'A' - 'a';
    if (c1 != c2)
      break;
  }
  return (unsigned char)*s1 - (unsigned char)*s2;
}

/*
 * UTF-8 equivalents of the C library's wctomb() and mbtowc().
 */

int utf8_mbtowc(int *pwc, const char *s, size_t n)
{
  unsigned char c;
  int wc, i, k;

  if (!n || !s)
    return 0;

  c = *s;
  if (c < 0x80) {
    if (pwc)
      *pwc = c;
    return c ? 1 : 0;
  }
  else if (c < 0xc2)
    return -1;
  else if (c < 0xe0) {
    if (n >= 2 && (s[1] & 0xc0) == 0x80) {
      if (pwc)
	*pwc = ((c & 0x1f) << 6) | (s[1] & 0x3f);
      return 2;
    }
    else
      return -1;
  }
  else if (c < 0xf0)
    k = 3;
  else if (c < 0xf8)
    k = 4;
  else if (c < 0xfc)
    k = 5;
  else if (c < 0xfe)
    k = 6;
  else
    return -1;

  if (n < (size_t)k)
    return -1;
  wc = *s++ & ((1 << (7 - k)) - 1);
  for (i = 1; i < k; i++) {
    if ((*s & 0xc0) != 0x80)
      return -1;
    wc = (wc << 6) | (*s++ & 0x3f);
  }
  if (wc < (1 << (5 * k - 4)))
    return -1;
  if (pwc)
    *pwc = wc;
  return k;
}

int utf8_wctomb(char *s, int wc1)
{
  unsigned int wc = wc1;

  if (!s)
    return 0;
  if (wc < (1u << 7)) {
    *s++ = wc;
    return 1;
  }
  else if (wc < (1u << 11)) {
    *s++ = 0xc0 | (wc >> 6);
    *s++ = 0x80 | (wc & 0x3f);
    return 2;
  }
  else if (wc < (1u << 16)) {
    *s++ = 0xe0 | (wc >> 12);
    *s++ = 0x80 | ((wc >> 6) & 0x3f);
    *s++ = 0x80 | (wc & 0x3f);
    return 3;
  }
  else if (wc < (1u << 21)) {
    *s++ = 0xf0 | (wc >> 18);
    *s++ = 0x80 | ((wc >> 12) & 0x3f);
    *s++ = 0x80 | ((wc >> 6) & 0x3f);
    *s++ = 0x80 | (wc & 0x3f);
    return 4;
  }
  else if (wc < (1u << 26)) {
    *s++ = 0xf8 | (wc >> 24);
    *s++ = 0x80 | ((wc >> 18) & 0x3f);
    *s++ = 0x80 | ((wc >> 12) & 0x3f);
    *s++ = 0x80 | ((wc >> 6) & 0x3f);
    *s++ = 0x80 | (wc & 0x3f);
    return 5;
  }
  else if (wc < (1u << 31)) {
    *s++ = 0xfc | (wc >> 30);
    *s++ = 0x80 | ((wc >> 24) & 0x3f);
    *s++ = 0x80 | ((wc >> 18) & 0x3f);
    *s++ = 0x80 | ((wc >> 12) & 0x3f);
    *s++ = 0x80 | ((wc >> 6) & 0x3f);
    *s++ = 0x80 | (wc & 0x3f);
    return 6;
  }
  else
    return -1;
}

/*
 * The charset "object" and methods.
 */

struct charset {
  int max;
  int (*mbtowc)(void *table, int *pwc, const char *s, size_t n);
  int (*wctomb)(void *table, char *s, int wc);
  void *map;
};

int charset_mbtowc(struct charset *charset, int *pwc, const char *s, size_t n)
{
  return (*charset->mbtowc)(charset->map, pwc, s, n);
}

int charset_wctomb(struct charset *charset, char *s, int wc)
{
  return (*charset->wctomb)(charset->map, s, wc);
}

int charset_max(struct charset *charset)
{
  return charset->max;
}

/*
 * Implementation of UTF-8.
 */

static int mbtowc_utf8(void *map, int *pwc, const char *s, size_t n)
{
  (void)map;
  return utf8_mbtowc(pwc, s, n);
}

static int wctomb_utf8(void *map, char *s, int wc)
{
  (void)map;
  return utf8_wctomb(s, wc);
}

/*
 * Implementation of US-ASCII.
 * Probably on most architectures this compiles to less than 256 bytes
 * of code, so we can save space by not having a table for this one.
 */

static int mbtowc_ascii(void *map, int *pwc, const char *s, size_t n)
{
  int wc;

  (void)map;
  if (!n || !s)
    return 0;
  wc = (unsigned char)*s;
  if (wc & ~0x7f)
    return -1;
  if (pwc)
    *pwc = wc;
  return wc ? 1 : 0;
}

static int wctomb_ascii(void *map, char *s, int wc)
{
  (void)map;
  if (!s)
    return 0;
  if (wc & ~0x7f)
    return -1;
  *s = wc;
  return 1;
}

/*
 * Implementation of ISO-8859-1.
 * Probably on most architectures this compiles to less than 256 bytes
 * of code, so we can save space by not having a table for this one.
 */

static int mbtowc_iso1(void *map, int *pwc, const char *s, size_t n)
{
  int wc;

  (void)map;
  if (!n || !s)
    return 0;
  wc = (unsigned char)*s;
  if (wc & ~0xff)
    return -1;
  if (pwc)
    *pwc = wc;
  return wc ? 1 : 0;
}

static int wctomb_iso1(void *map, char *s, int wc)
{
  (void)map;
  if (!s)
    return 0;
  if (wc & ~0xff)
    return -1;
  *s = wc;
  return 1;
}

/*
 * Implementation of any 8-bit charset.
 */

struct map {
  const unsigned short *from;
  struct inverse_map *to;
};

static int mbtowc_8bit(void *map1, int *pwc, const char *s, size_t n)
{
  struct map *map = map1;
  unsigned short wc;

  if (!n || !s)
    return 0;
  wc = map->from[(unsigned char)*s];
  if (wc == 0xffff)
    return -1;
  if (pwc)
    *pwc = (int)wc;
  return wc ? 1 : 0;
}

/*
 * For the inverse map we use a hash table, which has the advantages
 * of small constant memory requirement and simple memory allocation,
 * but the disadvantage of slow conversion in the worst case.
 * If you need real-time performance while letting a potentially
 * malicious user define their own map, then the method used in
 * linux/drivers/char/consolemap.c would be more appropriate.
 */

struct inverse_map {
  unsigned char first[256];
  unsigned char next[256];
};

/*
 * The simple hash is good enough for this application.
 * Use the alternative trivial hashes for testing.
 */
#define HASH(i) ((i) & 0xff)
/* #define HASH(i) 0 */
/* #define HASH(i) 99 */

static struct inverse_map *make_inverse_map(const unsigned short *from)
{
  struct inverse_map *to;
  char used[256];
  int i, j, k;

  to = malloc(sizeof(struct inverse_map));
  if (!to)
    return 0;
  for (i = 0; i < 256; i++)
    to->first[i] = to->next[i] = used[i] = 0;
  for (i = 255; i >= 0; i--)
    if (from[i] != 0xffff) {
      k = HASH(from[i]);
      to->next[i] = to->first[k];
      to->first[k] = i;
      used[k] = 1;
    }

  /* Point the empty buckets at an empty list. */
  for (i = 0; i < 256; i++)
    if (!to->next[i])
      break;
  if (i < 256)
    for (j = 0; j < 256; j++)
      if (!used[j])
	to->first[j] = i;

  return to;
}

static int wctomb_8bit(void *map1, char *s, int wc1)
{
  struct map *map = map1;
  unsigned short wc = wc1;
  int i;

  if (!s)
    return 0;

  if (wc1 & ~0xffff)
    return -1;

  if (1) /* Change 1 to 0 to test the case where malloc fails. */
    if (!map->to)
      map->to = make_inverse_map(map->from);

  if (map->to) {
    /* Use the inverse map. */
    i = map->to->first[HASH(wc)];
    for (;;) {
      if (map->from[i] == wc) {
	*s = i;
	return 1;
      }
      if (!(i = map->to->next[i]))
	break;
    }
  }
  else {
    /* We don't have an inverse map, so do a linear search. */
    for (i = 0; i < 256; i++)
      if (map->from[i] == wc) {
	*s = i;
	return 1;
      }
  }

  return -1;
}

/*
 * The "constructor" charset_find().
 */

struct charset charset_utf8 = {
  6,
  &mbtowc_utf8,
  &wctomb_utf8,
  0
};

struct charset charset_iso1 = {
  1,
  &mbtowc_iso1,
  &wctomb_iso1,
  0
};

struct charset charset_ascii = {
  1,
  &mbtowc_ascii,
  &wctomb_ascii,
  0
};

struct charset *charset_find(const char *code)
{
  int i;

  /* Find good (MIME) name. */
  for (i = 0; names[i].bad; i++)
    if (!ascii_strcasecmp(code, names[i].bad)) {
      code = names[i].good;
      break;
    }

  /* Recognise some charsets for which we avoid using a table. */
  if (!ascii_strcasecmp(code, "UTF-8"))
    return &charset_utf8;
  if (!ascii_strcasecmp(code, "US-ASCII"))
    return &charset_ascii;
  if (!ascii_strcasecmp(code, "ISO-8859-1"))
    return &charset_iso1;

  /* Look for a mapping for a simple 8-bit encoding. */
  for (i = 0; maps[i].name; i++)
    if (!ascii_strcasecmp(code, maps[i].name)) {
      if (!maps[i].charset) {
	maps[i].charset = malloc(sizeof(struct charset));
	if (maps[i].charset) {
	  struct map *map = malloc(sizeof(struct map));
	  if (!map) {
	    free(maps[i].charset);
	    maps[i].charset = 0;
	  }
	  else {
	    maps[i].charset->max = 1;
	    maps[i].charset->mbtowc = &mbtowc_8bit;
	    maps[i].charset->wctomb = &wctomb_8bit;
	    maps[i].charset->map = map;
	    map->from = maps[i].map;
	    map->to = 0; /* inverse mapping is created when required */
	  }
	}
      }
      return maps[i].charset;
    }

  return 0;
}

/*
 * Function to convert a buffer from one encoding to another.
 * Invalid bytes are replaced by '#', and characters that are
 * not available in the target encoding are replaced by '?'.
 * Each of TO and TOLEN may be zero, if the result is not needed.
 * The output buffer is null-terminated, so it is all right to
 * use charset_convert(fromcode, tocode, s, strlen(s), &t, 0).
 */

int charset_convert(const char *fromcode, const char *tocode,
		    const char *from, size_t fromlen,
		    char **to, size_t *tolen)
{
  int ret = 0;
  struct charset *charset1, *charset2;
  char *tobuf, *p, *newbuf;
  int i, j, wc;

  charset1 = charset_find(fromcode);
  charset2 = charset_find(tocode);
  if (!charset1 || !charset2 )
    return -1;

  tobuf = safe_malloc_mul2add_(fromlen, /*times*/charset2->max, /*+*/1);
  if (!tobuf)
    return -2;

  for (p = tobuf; fromlen; from += i, fromlen -= i, p += j) {
    i = charset_mbtowc(charset1, &wc, from, fromlen);
    if (!i)
      i = 1;
    else if (i == -1) {
      i  = 1;
      wc = '#';
      ret = 2;
    }
    j = charset_wctomb(charset2, p, wc);
    if (j == -1) {
      if (!ret)
	ret = 1;
      j = charset_wctomb(charset2, p, '?');
      if (j == -1)
	j = 0;
    }
  }

  if (tolen)
    *tolen = p - tobuf;
  *p++ = '\0';
  if (to) {
    newbuf = realloc(tobuf, p - tobuf);
    *to = newbuf ? newbuf : tobuf;
  }
  else
    free(tobuf);

  return ret;
}

#endif /* USE_CHARSET_ICONV */
Commit	Line	Data
	1	/*
	2	* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
	3	*
	4	* This program is free software; you can redistribute it and/or modify
	5	* it under the terms of the GNU General Public License as published by
	6	* the Free Software Foundation; either version 2 of the License, or
	7	* (at your option) any later version.
	8	*
	9	* This program is distributed in the hope that it will be useful,
	10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	* GNU General Public License for more details.
	13	*
	14	* You should have received a copy of the GNU General Public License along
	15	* with this program; if not, write to the Free Software Foundation, Inc.,
	16	* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
	17	*/
	18
	19	/*
	20	* See the corresponding header file for a description of the functions
	21	* that this file provides.
	22	*
	23	* This was first written for Ogg Vorbis but could be of general use.
	24	*
	25	* The only deliberate assumption about data sizes is that a short has
	26	* at least 16 bits, but this code has only been tested on systems with
	27	* 8-bit char, 16-bit short and 32-bit int.
	28	*/
	29
	30	#ifdef HAVE_CONFIG_H
	31	# include <config.h>
	32	#endif
	33
	34	#if !defined _WIN32 && !defined HAVE_ICONV /* should be && defined USE_CHARSET_CONVERT */
	35
	36	#include <stdlib.h>
	37
	38	#include "share/alloc.h"
	39	#include "charset.h"
	40
	41	#include "charmaps.h"
	42
	43	/*
	44	* This is like the standard strcasecmp, but it does not depend
	45	* on the locale. Locale-dependent functions can be dangerous:
	46	* we once had a bug involving strcasecmp("iso", "ISO") in a
	47	* Turkish locale!
	48	*
	49	* (I'm not really sure what the official standard says
	50	* about the sign of strcasecmp("Z", "["), but usually
	51	* we're only interested in whether it's zero.)
	52	*/
	53
	54	static int ascii_strcasecmp(const char s1, const char s2)
	55	{
	56	char c1, c2;
	57
	58	for (;; s1++, s2++) {
	59	if (!s1 \|\| !s2)
	60	break;
	61	if (s1 == s2)
	62	continue;
	63	c1 = *s1;
	64	if ('a' <= c1 && c1 <= 'z')
	65	c1 += 'A' - 'a';
	66	c2 = *s2;
	67	if ('a' <= c2 && c2 <= 'z')
	68	c2 += 'A' - 'a';
	69	if (c1 != c2)
	70	break;
	71	}
	72	return (unsigned char)s1 - (unsigned char)s2;
	73	}
	74
	75	/*
	76	* UTF-8 equivalents of the C library's wctomb() and mbtowc().
	77	*/
	78
	79	int utf8_mbtowc(int pwc, const char s, size_t n)
	80	{
	81	unsigned char c;
	82	int wc, i, k;
	83
	84	if (!n \|\| !s)
	85	return 0;
	86
	87	c = *s;
	88	if (c < 0x80) {
	89	if (pwc)
	90	*pwc = c;
	91	return c ? 1 : 0;
	92	}
	93	else if (c < 0xc2)
	94	return -1;
	95	else if (c < 0xe0) {
	96	if (n >= 2 && (s[1] & 0xc0) == 0x80) {
	97	if (pwc)
	98	*pwc = ((c & 0x1f) << 6) \| (s[1] & 0x3f);
	99	return 2;
	100	}
	101	else
	102	return -1;
	103	}
	104	else if (c < 0xf0)
	105	k = 3;
	106	else if (c < 0xf8)
	107	k = 4;
	108	else if (c < 0xfc)
	109	k = 5;
	110	else if (c < 0xfe)
	111	k = 6;
	112	else
	113	return -1;
	114
	115	if (n < (size_t)k)
	116	return -1;
	117	wc = *s++ & ((1 << (7 - k)) - 1);
	118	for (i = 1; i < k; i++) {
	119	if ((*s & 0xc0) != 0x80)
	120	return -1;
	121	wc = (wc << 6) \| (*s++ & 0x3f);
	122	}
	123	if (wc < (1 << (5 * k - 4)))
	124	return -1;
	125	if (pwc)
	126	*pwc = wc;
	127	return k;
	128	}
	129
	130	int utf8_wctomb(char *s, int wc1)
	131	{
	132	unsigned int wc = wc1;
	133
	134	if (!s)
	135	return 0;
	136	if (wc < (1u << 7)) {
	137	*s++ = wc;
	138	return 1;
	139	}
	140	else if (wc < (1u << 11)) {
	141	*s++ = 0xc0 \| (wc >> 6);
	142	*s++ = 0x80 \| (wc & 0x3f);
	143	return 2;
	144	}
	145	else if (wc < (1u << 16)) {
	146	*s++ = 0xe0 \| (wc >> 12);
	147	*s++ = 0x80 \| ((wc >> 6) & 0x3f);
	148	*s++ = 0x80 \| (wc & 0x3f);
	149	return 3;
	150	}
	151	else if (wc < (1u << 21)) {
	152	*s++ = 0xf0 \| (wc >> 18);
	153	*s++ = 0x80 \| ((wc >> 12) & 0x3f);
	154	*s++ = 0x80 \| ((wc >> 6) & 0x3f);
	155	*s++ = 0x80 \| (wc & 0x3f);
	156	return 4;
	157	}
	158	else if (wc < (1u << 26)) {
	159	*s++ = 0xf8 \| (wc >> 24);
	160	*s++ = 0x80 \| ((wc >> 18) & 0x3f);
	161	*s++ = 0x80 \| ((wc >> 12) & 0x3f);
	162	*s++ = 0x80 \| ((wc >> 6) & 0x3f);
	163	*s++ = 0x80 \| (wc & 0x3f);
	164	return 5;
	165	}
	166	else if (wc < (1u << 31)) {
	167	*s++ = 0xfc \| (wc >> 30);
	168	*s++ = 0x80 \| ((wc >> 24) & 0x3f);
	169	*s++ = 0x80 \| ((wc >> 18) & 0x3f);
	170	*s++ = 0x80 \| ((wc >> 12) & 0x3f);
	171	*s++ = 0x80 \| ((wc >> 6) & 0x3f);
	172	*s++ = 0x80 \| (wc & 0x3f);
	173	return 6;
	174	}
	175	else
	176	return -1;
	177	}
	178
	179	/*
	180	* The charset "object" and methods.
	181	*/
	182
	183	struct charset {
	184	int max;
	185	int (mbtowc)(void table, int pwc, const char s, size_t n);
	186	int (wctomb)(void table, char *s, int wc);
	187	void *map;
	188	};
	189
	190	int charset_mbtowc(struct charset charset, int pwc, const char *s, size_t n)
	191	{
	192	return (*charset->mbtowc)(charset->map, pwc, s, n);
	193	}
	194
	195	int charset_wctomb(struct charset charset, char s, int wc)
	196	{
	197	return (*charset->wctomb)(charset->map, s, wc);
	198	}
	199
	200	int charset_max(struct charset *charset)
	201	{
	202	return charset->max;
	203	}
	204
	205	/*
	206	* Implementation of UTF-8.
	207	*/
	208
	209	static int mbtowc_utf8(void map, int pwc, const char *s, size_t n)
	210	{
	211	(void)map;
	212	return utf8_mbtowc(pwc, s, n);
	213	}
	214
	215	static int wctomb_utf8(void map, char s, int wc)
	216	{
	217	(void)map;
	218	return utf8_wctomb(s, wc);
	219	}
	220
	221	/*
	222	* Implementation of US-ASCII.
	223	* Probably on most architectures this compiles to less than 256 bytes
	224	* of code, so we can save space by not having a table for this one.
	225	*/
	226
	227	static int mbtowc_ascii(void map, int pwc, const char *s, size_t n)
	228	{
	229	int wc;
	230
	231	(void)map;
	232	if (!n \|\| !s)
	233	return 0;
	234	wc = (unsigned char)*s;
	235	if (wc & ~0x7f)
	236	return -1;
	237	if (pwc)
	238	*pwc = wc;
	239	return wc ? 1 : 0;
	240	}
	241
	242	static int wctomb_ascii(void map, char s, int wc)
	243	{
	244	(void)map;
	245	if (!s)
	246	return 0;
	247	if (wc & ~0x7f)
	248	return -1;
	249	*s = wc;
	250	return 1;
	251	}
	252
	253	/*
	254	* Implementation of ISO-8859-1.
	255	* Probably on most architectures this compiles to less than 256 bytes
	256	* of code, so we can save space by not having a table for this one.
	257	*/
	258
	259	static int mbtowc_iso1(void map, int pwc, const char *s, size_t n)
	260	{
	261	int wc;
	262
	263	(void)map;
	264	if (!n \|\| !s)
	265	return 0;
	266	wc = (unsigned char)*s;
	267	if (wc & ~0xff)
	268	return -1;
	269	if (pwc)
	270	*pwc = wc;
	271	return wc ? 1 : 0;
	272	}
	273
	274	static int wctomb_iso1(void map, char s, int wc)
	275	{
	276	(void)map;
	277	if (!s)
	278	return 0;
	279	if (wc & ~0xff)
	280	return -1;
	281	*s = wc;
	282	return 1;
	283	}
	284
	285	/*
	286	* Implementation of any 8-bit charset.
	287	*/
	288
	289	struct map {
	290	const unsigned short *from;
	291	struct inverse_map *to;
	292	};
	293
	294	static int mbtowc_8bit(void map1, int pwc, const char *s, size_t n)
	295	{
	296	struct map *map = map1;
	297	unsigned short wc;
	298
	299	if (!n \|\| !s)
	300	return 0;
	301	wc = map->from[(unsigned char)*s];
	302	if (wc == 0xffff)
	303	return -1;
	304	if (pwc)
	305	*pwc = (int)wc;
	306	return wc ? 1 : 0;
	307	}
	308
	309	/*
	310	* For the inverse map we use a hash table, which has the advantages
	311	* of small constant memory requirement and simple memory allocation,
	312	* but the disadvantage of slow conversion in the worst case.
	313	* If you need real-time performance while letting a potentially
	314	* malicious user define their own map, then the method used in
	315	* linux/drivers/char/consolemap.c would be more appropriate.
	316	*/
	317
	318	struct inverse_map {
	319	unsigned char first[256];
	320	unsigned char next[256];
	321	};
	322
	323	/*
	324	* The simple hash is good enough for this application.
	325	* Use the alternative trivial hashes for testing.
	326	*/
	327	#define HASH(i) ((i) & 0xff)
	328	/* #define HASH(i) 0 */
	329	/* #define HASH(i) 99 */
	330
	331	static struct inverse_map make_inverse_map(const unsigned short from)
	332	{
	333	struct inverse_map *to;
	334	char used[256];
	335	int i, j, k;
	336
	337	to = malloc(sizeof(struct inverse_map));
	338	if (!to)
	339	return 0;
	340	for (i = 0; i < 256; i++)
	341	to->first[i] = to->next[i] = used[i] = 0;
	342	for (i = 255; i >= 0; i--)
	343	if (from[i] != 0xffff) {
	344	k = HASH(from[i]);
	345	to->next[i] = to->first[k];
	346	to->first[k] = i;
	347	used[k] = 1;
	348	}
	349
	350	/* Point the empty buckets at an empty list. */
	351	for (i = 0; i < 256; i++)
	352	if (!to->next[i])
	353	break;
	354	if (i < 256)
	355	for (j = 0; j < 256; j++)
	356	if (!used[j])
	357	to->first[j] = i;
	358
	359	return to;
	360	}
	361
	362	static int wctomb_8bit(void map1, char s, int wc1)
	363	{
	364	struct map *map = map1;
	365	unsigned short wc = wc1;
	366	int i;
	367
	368	if (!s)
	369	return 0;
	370
	371	if (wc1 & ~0xffff)
	372	return -1;
	373
	374	if (1) /* Change 1 to 0 to test the case where malloc fails. */
	375	if (!map->to)
	376	map->to = make_inverse_map(map->from);
	377
	378	if (map->to) {
	379	/* Use the inverse map. */
	380	i = map->to->first[HASH(wc)];
	381	for (;;) {
	382	if (map->from[i] == wc) {
	383	*s = i;
	384	return 1;
	385	}
	386	if (!(i = map->to->next[i]))
	387	break;
	388	}
	389	}
	390	else {
	391	/* We don't have an inverse map, so do a linear search. */
	392	for (i = 0; i < 256; i++)
	393	if (map->from[i] == wc) {
	394	*s = i;
	395	return 1;
	396	}
	397	}
	398
	399	return -1;
	400	}
	401
	402	/*
	403	* The "constructor" charset_find().
	404	*/
	405
	406	struct charset charset_utf8 = {
	407	6,
	408	&mbtowc_utf8,
	409	&wctomb_utf8,
	410	0
	411	};
	412
	413	struct charset charset_iso1 = {
	414	1,
	415	&mbtowc_iso1,
	416	&wctomb_iso1,
	417	0
	418	};
	419
	420	struct charset charset_ascii = {
	421	1,
	422	&mbtowc_ascii,
	423	&wctomb_ascii,
	424	0
	425	};
	426
	427	struct charset charset_find(const char code)
	428	{
	429	int i;
	430
	431	/* Find good (MIME) name. */
	432	for (i = 0; names[i].bad; i++)
	433	if (!ascii_strcasecmp(code, names[i].bad)) {
	434	code = names[i].good;
	435	break;
	436	}
	437
	438	/* Recognise some charsets for which we avoid using a table. */
	439	if (!ascii_strcasecmp(code, "UTF-8"))
	440	return &charset_utf8;
	441	if (!ascii_strcasecmp(code, "US-ASCII"))
	442	return &charset_ascii;
	443	if (!ascii_strcasecmp(code, "ISO-8859-1"))
	444	return &charset_iso1;
	445
	446	/* Look for a mapping for a simple 8-bit encoding. */
	447	for (i = 0; maps[i].name; i++)
	448	if (!ascii_strcasecmp(code, maps[i].name)) {
	449	if (!maps[i].charset) {
	450	maps[i].charset = malloc(sizeof(struct charset));
	451	if (maps[i].charset) {
	452	struct map *map = malloc(sizeof(struct map));
	453	if (!map) {
	454	free(maps[i].charset);
	455	maps[i].charset = 0;
	456	}
	457	else {
	458	maps[i].charset->max = 1;
	459	maps[i].charset->mbtowc = &mbtowc_8bit;
	460	maps[i].charset->wctomb = &wctomb_8bit;
	461	maps[i].charset->map = map;
	462	map->from = maps[i].map;
	463	map->to = 0; /* inverse mapping is created when required */
	464	}
	465	}
	466	}
	467	return maps[i].charset;
	468	}
	469
	470	return 0;
	471	}
	472
	473	/*
	474	* Function to convert a buffer from one encoding to another.
	475	* Invalid bytes are replaced by '#', and characters that are
	476	* not available in the target encoding are replaced by '?'.
	477	* Each of TO and TOLEN may be zero, if the result is not needed.
	478	* The output buffer is null-terminated, so it is all right to
	479	* use charset_convert(fromcode, tocode, s, strlen(s), &t, 0).
	480	*/
	481
	482	int charset_convert(const char fromcode, const char tocode,
	483	const char *from, size_t fromlen,
	484	char *to, size_t tolen)
	485	{
	486	int ret = 0;
	487	struct charset charset1, charset2;
	488	char tobuf, p, *newbuf;
	489	int i, j, wc;
	490
	491	charset1 = charset_find(fromcode);
	492	charset2 = charset_find(tocode);
	493	if (!charset1 \|\| !charset2 )
	494	return -1;
	495
	496	tobuf = safe_malloc_mul2add_(fromlen, /times/charset2->max, /+/1);
	497	if (!tobuf)
	498	return -2;
	499
	500	for (p = tobuf; fromlen; from += i, fromlen -= i, p += j) {
	501	i = charset_mbtowc(charset1, &wc, from, fromlen);
	502	if (!i)
	503	i = 1;
	504	else if (i == -1) {
	505	i = 1;
	506	wc = '#';
	507	ret = 2;
	508	}
	509	j = charset_wctomb(charset2, p, wc);
	510	if (j == -1) {
	511	if (!ret)
	512	ret = 1;
	513	j = charset_wctomb(charset2, p, '?');
	514	if (j == -1)
	515	j = 0;
	516	}
	517	}
	518
	519	if (tolen)
	520	*tolen = p - tobuf;
	521	*p++ = '\0';
	522	if (to) {
	523	newbuf = realloc(tobuf, p - tobuf);
	524	*to = newbuf ? newbuf : tobuf;
	525	}
	526	else
	527	free(tobuf);
	528
	529	return ret;
	530	}
	531
	532	#endif /* USE_CHARSET_ICONV */