[pcsx_rearmed.git] / deps / libretro-common / encodings / encoding_utf.c

/* Copyright  (C) 2010-2020 The RetroArch team
 *
 * ---------------------------------------------------------------------------------------
 * The following license statement only applies to this file (encoding_utf.c).
 * ---------------------------------------------------------------------------------------
 *
 * Permission is hereby granted, free of charge,
 * to any person obtaining a copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include <stdint.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>

#include <boolean.h>
#include <compat/strl.h>
#include <retro_inline.h>

#include <encodings/utf.h>

#if defined(_WIN32) && !defined(_XBOX)
#include <windows.h>
#elif defined(_XBOX)
#include <xtl.h>
#endif

#define UTF8_WALKBYTE(string) (*((*(string))++))

static unsigned leading_ones(uint8_t c)
{
   unsigned ones = 0;
   while (c & 0x80)
   {
      ones++;
      c <<= 1;
   }

   return ones;
}

/**
 * utf8_conv_utf32:
 *
 * Simple implementation. Assumes the sequence is
 * properly synchronized and terminated.
 **/
size_t utf8_conv_utf32(uint32_t *out, size_t out_chars,
      const char *in, size_t in_size)
{
   unsigned i;
   size_t ret = 0;
   while (in_size && out_chars)
   {
      unsigned extra, shift;
      uint32_t c;
      uint8_t first = *in++;
      unsigned ones = leading_ones(first);

      if (ones > 6 || ones == 1) /* Invalid or desync. */
         break;

      extra = ones ? ones - 1 : ones;
      if (1 + extra > in_size) /* Overflow. */
         break;

      shift = (extra - 1) * 6;
      c     = (first & ((1 << (7 - ones)) - 1)) << (6 * extra);

      for (i = 0; i < extra; i++, in++, shift -= 6)
         c |= (*in & 0x3f) << shift;

      *out++   = c;
      in_size -= 1 + extra;
      out_chars--;
      ret++;
   }

   return ret;
}

/**
 * utf16_conv_utf8:
 *
 * Leaf function.
 **/
bool utf16_conv_utf8(uint8_t *out, size_t *out_chars,
     const uint16_t *in, size_t in_size)
{
   size_t out_pos            = 0;
   size_t in_pos             = 0;
   static const 
      uint8_t utf8_limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };

   for (;;)
   {
      unsigned num_adds;
      uint32_t value;

      if (in_pos == in_size)
      {
         *out_chars = out_pos;
         return true;
      }
      value = in[in_pos++];
      if (value < 0x80)
      {
         if (out)
            out[out_pos] = (char)value;
         out_pos++;
         continue;
      }

      if (value >= 0xD800 && value < 0xE000)
      {
         uint32_t c2;

         if (value >= 0xDC00 || in_pos == in_size)
            break;
         c2 = in[in_pos++];
         if (c2 < 0xDC00 || c2 >= 0xE000)
            break;
         value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
      }

      for (num_adds = 1; num_adds < 5; num_adds++)
         if (value < (((uint32_t)1) << (num_adds * 5 + 6)))
            break;
      if (out)
         out[out_pos] = (char)(utf8_limits[num_adds - 1]
               + (value >> (6 * num_adds)));
      out_pos++;
      do
      {
         num_adds--;
         if (out)
            out[out_pos] = (char)(0x80
                  + ((value >> (6 * num_adds)) & 0x3F));
         out_pos++;
      }while (num_adds != 0);
   }

   *out_chars = out_pos;
   return false;
}

/**
 * utf8cpy:
 *
 * Acts mostly like strlcpy.
 *
 * Copies the given number of UTF-8 characters,
 * but at most @d_len bytes.
 *
 * Always NULL terminates. Does not copy half a character.
 * @s is assumed valid UTF-8.
 * Use only if @chars is considerably less than @d_len. 
 *
 * @return Number of bytes. 
 **/
size_t utf8cpy(char *d, size_t d_len, const char *s, size_t chars)
{
   const uint8_t *sb     = (const uint8_t*)s;
   const uint8_t *sb_org = sb;

   if (!s)
      return 0;

   while (*sb && chars-- > 0)
   {
      sb++;
      while ((*sb & 0xC0) == 0x80)
         sb++;
   }

   if ((size_t)(sb - sb_org) > d_len-1 /* NUL */)
   {
      sb = sb_org + d_len-1;
      while ((*sb & 0xC0) == 0x80)
         sb--;
   }

   memcpy(d, sb_org, sb-sb_org);
   d[sb-sb_org] = '\0';

   return sb-sb_org;
}

/**
 * utf8skip:
 *
 * Leaf function
 **/
const char *utf8skip(const char *str, size_t chars)
{
   const uint8_t *strb = (const uint8_t*)str;

   if (!chars)
      return str;

   do
   {
      strb++;
      while ((*strb & 0xC0)==0x80)
         strb++;
      chars--;
   }while (chars);

   return (const char*)strb;
}

/**
 * utf8len:
 *
 * Leaf function.
 **/
size_t utf8len(const char *string)
{
   size_t ret = 0;

   if (!string)
      return 0;

   while (*string)
   {
      if ((*string & 0xC0) != 0x80)
         ret++;
      string++;
   }
   return ret;
}

/** 
 * utf8_walk:
 *
 * Does not validate the input.
 *
 * Leaf function.
 *
 * @return Returns garbage if it's not UTF-8.
 **/
uint32_t utf8_walk(const char **string)
{
   uint8_t first = UTF8_WALKBYTE(string);
   uint32_t ret  = 0;

   if (first < 128)
      return first;

   ret    = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F);
   if (first >= 0xE0)
   {
      ret = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F);
      if (first >= 0xF0)
      {
         ret = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F);
         return ret | (first & 7) << 18;
      }
      return ret | (first & 15) << 12;
   }

   return ret | (first & 31) << 6;
}

static bool utf16_to_char(uint8_t **utf_data,
      size_t *dest_len, const uint16_t *in)
{
   unsigned len    = 0;
   while (in[len] != '\0')
      len++;
   utf16_conv_utf8(NULL, dest_len, in, len);
   *dest_len  += 1;
   if ((*utf_data = (uint8_t*)malloc(*dest_len)) != 0)
      return utf16_conv_utf8(*utf_data, dest_len, in, len);
   return false;
}

/**
 * utf16_to_char_string:
 **/
bool utf16_to_char_string(const uint16_t *in, char *s, size_t len)
{
   size_t     dest_len     = 0;
   uint8_t *utf16_data     = NULL;
   bool            ret     = utf16_to_char(&utf16_data, &dest_len, in);

   if (ret)
   {
      utf16_data[dest_len] = 0;
      strlcpy(s, (const char*)utf16_data, len);
   }

   free(utf16_data);
   utf16_data              = NULL;

   return ret;
}

#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE)
/**
 * mb_to_mb_string_alloc:
 *
 * @return Returned pointer MUST be freed by the caller if non-NULL.
 **/
static char *mb_to_mb_string_alloc(const char *str,
      enum CodePage cp_in, enum CodePage cp_out)
{
   wchar_t *path_buf_wide = NULL;
   int path_buf_wide_len  = MultiByteToWideChar(cp_in, 0, str, -1, NULL, 0);

   /* Windows 95 will return 0 from these functions with 
    * a UTF8 codepage set without MSLU.
    *
    * From an unknown MSDN version (others omit this info):
    *   - CP_UTF8 Windows 98/Me, Windows NT 4.0 and later: 
    *   Translate using UTF-8. When this is set, dwFlags must be zero.
    *   - Windows 95: Under the Microsoft Layer for Unicode, 
    *   MultiByteToWideChar also supports CP_UTF7 and CP_UTF8.
    */

   if (!path_buf_wide_len)
      return strdup(str);

   if ((path_buf_wide = (wchar_t*)
      calloc(path_buf_wide_len + sizeof(wchar_t), sizeof(wchar_t))))
   {
      MultiByteToWideChar(cp_in, 0,
            str, -1, path_buf_wide, path_buf_wide_len);

      if (*path_buf_wide)
      {
         int path_buf_len = WideCharToMultiByte(cp_out, 0,
               path_buf_wide, -1, NULL, 0, NULL, NULL);

         if (path_buf_len)
         {
            char *path_buf = (char*)
               calloc(path_buf_len + sizeof(char), sizeof(char));

            if (path_buf)
            {
               WideCharToMultiByte(cp_out, 0,
                     path_buf_wide, -1, path_buf,
                     path_buf_len, NULL, NULL);

               free(path_buf_wide);

               if (*path_buf)
                  return path_buf;

               free(path_buf);
               return NULL;
            }
         }
         else
         {
            free(path_buf_wide);
            return strdup(str);
         }
      }

      free(path_buf_wide);
   }

   return NULL;
}
#endif

/**
 * utf8_to_local_string_alloc:
 *
 * @return Returned pointer MUST be freed by the caller if non-NULL.
 **/
char* utf8_to_local_string_alloc(const char *str)
{
   if (str && *str)
#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE)
      return mb_to_mb_string_alloc(str, CODEPAGE_UTF8, CODEPAGE_LOCAL);
#else
      return strdup(str); /* Assume string needs no modification if not on Windows */
#endif
   return NULL;
}

/**
 * local_to_utf8_string_alloc:
 *
 * @return Returned pointer MUST be freed by the caller if non-NULL.
 **/
char *local_to_utf8_string_alloc(const char *str)
{
	if (str && *str)
#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE)
		return mb_to_mb_string_alloc(str, CODEPAGE_LOCAL, CODEPAGE_UTF8);
#else
      return strdup(str); /* Assume string needs no modification if not on Windows */
#endif
	return NULL;
}

/**
 * utf8_to_utf16_string_alloc:
 * 
 * @return Returned pointer MUST be freed by the caller if non-NULL.
 **/
wchar_t* utf8_to_utf16_string_alloc(const char *str)
{
#ifdef _WIN32
   int len        = 0;
#else
   size_t len     = 0;
#endif
   wchar_t *buf   = NULL;

   if (!str || !*str)
      return NULL;

#ifdef _WIN32
   if ((len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0)))
   {
      if (!(buf = (wchar_t*)calloc(len, sizeof(wchar_t))))
         return NULL;

      if ((MultiByteToWideChar(CP_UTF8, 0, str, -1, buf, len)) < 0)
      {
         free(buf);
         return NULL;
      }
   }
   else
   {
      /* Fallback to ANSI codepage instead */
      if ((len = MultiByteToWideChar(CP_ACP, 0, str, -1, NULL, 0)))
      {
         if (!(buf = (wchar_t*)calloc(len, sizeof(wchar_t))))
            return NULL;

         if ((MultiByteToWideChar(CP_ACP, 0, str, -1, buf, len)) < 0)
         {
            free(buf);
            return NULL;
         }
      }
   }
#else
   /* NOTE: For now, assume non-Windows platforms' locale is already UTF-8. */
   if ((len = mbstowcs(NULL, str, 0) + 1))
   {
      if (!(buf = (wchar_t*)calloc(len, sizeof(wchar_t))))
         return NULL;

      if ((mbstowcs(buf, str, len)) == (size_t)-1)
      {
         free(buf);
         return NULL;
      }
   }
#endif

   return buf;
}

/**
 * utf16_to_utf8_string_alloc:
 *
 * @return Returned pointer MUST be freed by the caller if non-NULL.
 **/
char* utf16_to_utf8_string_alloc(const wchar_t *str)
{
#ifdef _WIN32
   int len        = 0;
#else
   size_t len     = 0;
#endif
   char *buf      = NULL;

   if (!str || !*str)
      return NULL;

#ifdef _WIN32
   {
      UINT code_page = CP_UTF8;

      /* fallback to ANSI codepage instead */
      if (!(len = WideCharToMultiByte(code_page,
            0, str, -1, NULL, 0, NULL, NULL)))
      {
         code_page   = CP_ACP;
         len         = WideCharToMultiByte(code_page,
               0, str, -1, NULL, 0, NULL, NULL);
      }

      if (!(buf = (char*)calloc(len, sizeof(char))))
         return NULL;

      if (WideCharToMultiByte(code_page,
            0, str, -1, buf, len, NULL, NULL) < 0)
      {
         free(buf);
         return NULL;
      }
   }
#else
   /* NOTE: For now, assume non-Windows platforms' 
    * locale is already UTF-8. */
   if ((len = wcstombs(NULL, str, 0) + 1))
   {
      if (!(buf = (char*)calloc(len, sizeof(char))))
         return NULL;

      if (wcstombs(buf, str, len) == (size_t)-1)
      {
         free(buf);
         return NULL;
      }
   }
#endif

   return buf;
}
Commit	Line	Data
	1	/* Copyright (C) 2010-2020 The RetroArch team
	2	*
	3	* ---------------------------------------------------------------------------------------
	4	* The following license statement only applies to this file (encoding_utf.c).
	5	* ---------------------------------------------------------------------------------------
	6	*
	7	* Permission is hereby granted, free of charge,
	8	* to any person obtaining a copy of this software and associated documentation files (the "Software"),
	9	* to deal in the Software without restriction, including without limitation the rights to
	10	* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
	11	* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
	12	*
	13	* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
	14	*
	15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
	16	* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
	18	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
	19	* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	20	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
	21	*/
	22
	23	#include <stdint.h>
	24	#include <stdlib.h>
	25	#include <stddef.h>
	26	#include <string.h>
	27
	28	#include <boolean.h>
	29	#include <compat/strl.h>
	30	#include <retro_inline.h>
	31
	32	#include <encodings/utf.h>
	33
	34	#if defined(_WIN32) && !defined(_XBOX)
	35	#include <windows.h>
	36	#elif defined(_XBOX)
	37	#include <xtl.h>
	38	#endif
	39
	40	#define UTF8_WALKBYTE(string) ((((string))++))
	41
	42	static unsigned leading_ones(uint8_t c)
	43	{
	44	unsigned ones = 0;
	45	while (c & 0x80)
	46	{
	47	ones++;
	48	c <<= 1;
	49	}
	50
	51	return ones;
	52	}
	53
	54	/**
	55	* utf8_conv_utf32:
	56	*
	57	* Simple implementation. Assumes the sequence is
	58	* properly synchronized and terminated.
	59	**/
	60	size_t utf8_conv_utf32(uint32_t *out, size_t out_chars,
	61	const char *in, size_t in_size)
	62	{
	63	unsigned i;
	64	size_t ret = 0;
	65	while (in_size && out_chars)
	66	{
	67	unsigned extra, shift;
	68	uint32_t c;
	69	uint8_t first = *in++;
	70	unsigned ones = leading_ones(first);
	71
	72	if (ones > 6 \|\| ones == 1) /* Invalid or desync. */
	73	break;
	74
	75	extra = ones ? ones - 1 : ones;
	76	if (1 + extra > in_size) /* Overflow. */
	77	break;
	78
	79	shift = (extra - 1) * 6;
	80	c = (first & ((1 << (7 - ones)) - 1)) << (6 * extra);
	81
	82	for (i = 0; i < extra; i++, in++, shift -= 6)
	83	c \|= (*in & 0x3f) << shift;
	84
	85	*out++ = c;
	86	in_size -= 1 + extra;
	87	out_chars--;
	88	ret++;
	89	}
	90
	91	return ret;
	92	}
	93
	94	/**
	95	* utf16_conv_utf8:
	96	*
	97	* Leaf function.
	98	**/
	99	bool utf16_conv_utf8(uint8_t out, size_t out_chars,
	100	const uint16_t *in, size_t in_size)
	101	{
	102	size_t out_pos = 0;
	103	size_t in_pos = 0;
	104	static const
	105	uint8_t utf8_limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
	106
	107	for (;;)
	108	{
	109	unsigned num_adds;
	110	uint32_t value;
	111
	112	if (in_pos == in_size)
	113	{
	114	*out_chars = out_pos;
	115	return true;
	116	}
	117	value = in[in_pos++];
	118	if (value < 0x80)
	119	{
	120	if (out)
	121	out[out_pos] = (char)value;
	122	out_pos++;
	123	continue;
	124	}
	125
	126	if (value >= 0xD800 && value < 0xE000)
	127	{
	128	uint32_t c2;
	129
	130	if (value >= 0xDC00 \|\| in_pos == in_size)
	131	break;
	132	c2 = in[in_pos++];
	133	if (c2 < 0xDC00 \|\| c2 >= 0xE000)
	134	break;
	135	value = (((value - 0xD800) << 10) \| (c2 - 0xDC00)) + 0x10000;
	136	}
	137
	138	for (num_adds = 1; num_adds < 5; num_adds++)
	139	if (value < (((uint32_t)1) << (num_adds * 5 + 6)))
	140	break;
	141	if (out)
	142	out[out_pos] = (char)(utf8_limits[num_adds - 1]
	143	+ (value >> (6 * num_adds)));
	144	out_pos++;
	145	do
	146	{
	147	num_adds--;
	148	if (out)
	149	out[out_pos] = (char)(0x80
	150	+ ((value >> (6 * num_adds)) & 0x3F));
	151	out_pos++;
	152	}while (num_adds != 0);
	153	}
	154
	155	*out_chars = out_pos;
	156	return false;
	157	}
	158
	159	/**
	160	* utf8cpy:
	161	*
	162	* Acts mostly like strlcpy.
	163	*
	164	* Copies the given number of UTF-8 characters,
	165	* but at most @d_len bytes.
	166	*
	167	* Always NULL terminates. Does not copy half a character.
	168	* @s is assumed valid UTF-8.
	169	* Use only if @chars is considerably less than @d_len.
	170	*
	171	* @return Number of bytes.
	172	**/
	173	size_t utf8cpy(char d, size_t d_len, const char s, size_t chars)
	174	{
	175	const uint8_t sb = (const uint8_t)s;
	176	const uint8_t *sb_org = sb;
	177
	178	if (!s)
	179	return 0;
	180
	181	while (*sb && chars-- > 0)
	182	{
	183	sb++;
	184	while ((*sb & 0xC0) == 0x80)
	185	sb++;
	186	}
	187
	188	if ((size_t)(sb - sb_org) > d_len-1 /* NUL */)
	189	{
	190	sb = sb_org + d_len-1;
	191	while ((*sb & 0xC0) == 0x80)
	192	sb--;
	193	}
	194
	195	memcpy(d, sb_org, sb-sb_org);
	196	d[sb-sb_org] = '\0';
	197
	198	return sb-sb_org;
	199	}
	200
	201	/**
	202	* utf8skip:
	203	*
	204	* Leaf function
	205	**/
	206	const char utf8skip(const char str, size_t chars)
	207	{
	208	const uint8_t strb = (const uint8_t)str;
	209
	210	if (!chars)
	211	return str;
	212
	213	do
	214	{
	215	strb++;
	216	while ((*strb & 0xC0)==0x80)
	217	strb++;
	218	chars--;
	219	}while (chars);
	220
	221	return (const char*)strb;
	222	}
	223
	224	/**
	225	* utf8len:
	226	*
	227	* Leaf function.
	228	**/
	229	size_t utf8len(const char *string)
	230	{
	231	size_t ret = 0;
	232
	233	if (!string)
	234	return 0;
	235
	236	while (*string)
	237	{
	238	if ((*string & 0xC0) != 0x80)
	239	ret++;
	240	string++;
	241	}
	242	return ret;
	243	}
	244
	245	/**
	246	* utf8_walk:
	247	*
	248	* Does not validate the input.
	249	*
	250	* Leaf function.
	251	*
	252	* @return Returns garbage if it's not UTF-8.
	253	**/
	254	uint32_t utf8_walk(const char **string)
	255	{
	256	uint8_t first = UTF8_WALKBYTE(string);
	257	uint32_t ret = 0;
	258
	259	if (first < 128)
	260	return first;
	261
	262	ret = (ret << 6) \| (UTF8_WALKBYTE(string) & 0x3F);
	263	if (first >= 0xE0)
	264	{
	265	ret = (ret << 6) \| (UTF8_WALKBYTE(string) & 0x3F);
	266	if (first >= 0xF0)
	267	{
	268	ret = (ret << 6) \| (UTF8_WALKBYTE(string) & 0x3F);
	269	return ret \| (first & 7) << 18;
	270	}
	271	return ret \| (first & 15) << 12;
	272	}
	273
	274	return ret \| (first & 31) << 6;
	275	}
	276
	277	static bool utf16_to_char(uint8_t **utf_data,
	278	size_t dest_len, const uint16_t in)
	279	{
	280	unsigned len = 0;
	281	while (in[len] != '\0')
	282	len++;
	283	utf16_conv_utf8(NULL, dest_len, in, len);
	284	*dest_len += 1;
	285	if ((utf_data = (uint8_t)malloc(*dest_len)) != 0)
	286	return utf16_conv_utf8(*utf_data, dest_len, in, len);
	287	return false;
	288	}
	289
	290	/**
	291	* utf16_to_char_string:
	292	**/
	293	bool utf16_to_char_string(const uint16_t in, char s, size_t len)
	294	{
	295	size_t dest_len = 0;
	296	uint8_t *utf16_data = NULL;
	297	bool ret = utf16_to_char(&utf16_data, &dest_len, in);
	298
	299	if (ret)
	300	{
	301	utf16_data[dest_len] = 0;
	302	strlcpy(s, (const char*)utf16_data, len);
	303	}
	304
	305	free(utf16_data);
	306	utf16_data = NULL;
	307
	308	return ret;
	309	}
	310
	311	#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE)
	312	/**
	313	* mb_to_mb_string_alloc:
	314	*
	315	* @return Returned pointer MUST be freed by the caller if non-NULL.
	316	**/
	317	static char mb_to_mb_string_alloc(const char str,
	318	enum CodePage cp_in, enum CodePage cp_out)
	319	{
	320	wchar_t *path_buf_wide = NULL;
	321	int path_buf_wide_len = MultiByteToWideChar(cp_in, 0, str, -1, NULL, 0);
	322
	323	/* Windows 95 will return 0 from these functions with
	324	* a UTF8 codepage set without MSLU.
	325	*
	326	* From an unknown MSDN version (others omit this info):
	327	* - CP_UTF8 Windows 98/Me, Windows NT 4.0 and later:
	328	* Translate using UTF-8. When this is set, dwFlags must be zero.
	329	* - Windows 95: Under the Microsoft Layer for Unicode,
	330	* MultiByteToWideChar also supports CP_UTF7 and CP_UTF8.
	331	*/
	332
	333	if (!path_buf_wide_len)
	334	return strdup(str);
	335
	336	if ((path_buf_wide = (wchar_t*)
	337	calloc(path_buf_wide_len + sizeof(wchar_t), sizeof(wchar_t))))
	338	{
	339	MultiByteToWideChar(cp_in, 0,
	340	str, -1, path_buf_wide, path_buf_wide_len);
	341
	342	if (*path_buf_wide)
	343	{
	344	int path_buf_len = WideCharToMultiByte(cp_out, 0,
	345	path_buf_wide, -1, NULL, 0, NULL, NULL);
	346
	347	if (path_buf_len)
	348	{
	349	char path_buf = (char)
	350	calloc(path_buf_len + sizeof(char), sizeof(char));
	351
	352	if (path_buf)
	353	{
	354	WideCharToMultiByte(cp_out, 0,
	355	path_buf_wide, -1, path_buf,
	356	path_buf_len, NULL, NULL);
	357
	358	free(path_buf_wide);
	359
	360	if (*path_buf)
	361	return path_buf;
	362
	363	free(path_buf);
	364	return NULL;
	365	}
	366	}
	367	else
	368	{
	369	free(path_buf_wide);
	370	return strdup(str);
	371	}
	372	}
	373
	374	free(path_buf_wide);
	375	}
	376
	377	return NULL;
	378	}
	379	#endif
	380
	381	/**
	382	* utf8_to_local_string_alloc:
	383	*
	384	* @return Returned pointer MUST be freed by the caller if non-NULL.
	385	**/
	386	char* utf8_to_local_string_alloc(const char *str)
	387	{
	388	if (str && *str)
	389	#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE)
	390	return mb_to_mb_string_alloc(str, CODEPAGE_UTF8, CODEPAGE_LOCAL);
	391	#else
	392	return strdup(str); /* Assume string needs no modification if not on Windows */
	393	#endif
	394	return NULL;
	395	}
	396
	397	/**
	398	* local_to_utf8_string_alloc:
	399	*
	400	* @return Returned pointer MUST be freed by the caller if non-NULL.
	401	**/
	402	char local_to_utf8_string_alloc(const char str)
	403	{
	404	if (str && *str)
	405	#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE)
	406	return mb_to_mb_string_alloc(str, CODEPAGE_LOCAL, CODEPAGE_UTF8);
	407	#else
	408	return strdup(str); /* Assume string needs no modification if not on Windows */
	409	#endif
	410	return NULL;
	411	}
	412
	413	/**
	414	* utf8_to_utf16_string_alloc:
	415	*
	416	* @return Returned pointer MUST be freed by the caller if non-NULL.
	417	**/
	418	wchar_t* utf8_to_utf16_string_alloc(const char *str)
	419	{
	420	#ifdef _WIN32
	421	int len = 0;
	422	#else
	423	size_t len = 0;
	424	#endif
	425	wchar_t *buf = NULL;
	426
	427	if (!str \|\| !*str)
	428	return NULL;
	429
	430	#ifdef _WIN32
	431	if ((len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0)))
	432	{
	433	if (!(buf = (wchar_t*)calloc(len, sizeof(wchar_t))))
	434	return NULL;
	435
	436	if ((MultiByteToWideChar(CP_UTF8, 0, str, -1, buf, len)) < 0)
	437	{
	438	free(buf);
	439	return NULL;
	440	}
	441	}
	442	else
	443	{
	444	/* Fallback to ANSI codepage instead */
	445	if ((len = MultiByteToWideChar(CP_ACP, 0, str, -1, NULL, 0)))
	446	{
	447	if (!(buf = (wchar_t*)calloc(len, sizeof(wchar_t))))
	448	return NULL;
	449
	450	if ((MultiByteToWideChar(CP_ACP, 0, str, -1, buf, len)) < 0)
	451	{
	452	free(buf);
	453	return NULL;
	454	}
	455	}
	456	}
	457	#else
	458	/* NOTE: For now, assume non-Windows platforms' locale is already UTF-8. */
	459	if ((len = mbstowcs(NULL, str, 0) + 1))
	460	{
	461	if (!(buf = (wchar_t*)calloc(len, sizeof(wchar_t))))
	462	return NULL;
	463
	464	if ((mbstowcs(buf, str, len)) == (size_t)-1)
	465	{
	466	free(buf);
	467	return NULL;
	468	}
	469	}
	470	#endif
	471
	472	return buf;
	473	}
	474
	475	/**
	476	* utf16_to_utf8_string_alloc:
	477	*
	478	* @return Returned pointer MUST be freed by the caller if non-NULL.
	479	**/
	480	char* utf16_to_utf8_string_alloc(const wchar_t *str)
	481	{
	482	#ifdef _WIN32
	483	int len = 0;
	484	#else
	485	size_t len = 0;
	486	#endif
	487	char *buf = NULL;
	488
	489	if (!str \|\| !*str)
	490	return NULL;
	491
	492	#ifdef _WIN32
	493	{
	494	UINT code_page = CP_UTF8;
	495
	496	/* fallback to ANSI codepage instead */
	497	if (!(len = WideCharToMultiByte(code_page,
	498	0, str, -1, NULL, 0, NULL, NULL)))
	499	{
	500	code_page = CP_ACP;
	501	len = WideCharToMultiByte(code_page,
	502	0, str, -1, NULL, 0, NULL, NULL);
	503	}
	504
	505	if (!(buf = (char*)calloc(len, sizeof(char))))
	506	return NULL;
	507
	508	if (WideCharToMultiByte(code_page,
	509	0, str, -1, buf, len, NULL, NULL) < 0)
	510	{
	511	free(buf);
	512	return NULL;
	513	}
	514	}
	515	#else
	516	/* NOTE: For now, assume non-Windows platforms'
	517	* locale is already UTF-8. */
	518	if ((len = wcstombs(NULL, str, 0) + 1))
	519	{
	520	if (!(buf = (char*)calloc(len, sizeof(char))))
	521	return NULL;
	522
	523	if (wcstombs(buf, str, len) == (size_t)-1)
	524	{
	525	free(buf);
	526	return NULL;
	527	}
	528	}
	529	#endif
	530
	531	return buf;
	532	}