[pcsx_rearmed.git] / libretro-common / encodings / encoding_utf.c

/* Copyright  (C) 2010-2020 The RetroArch team
 *
 * ---------------------------------------------------------------------------------------
 * The following license statement only applies to this file (encoding_utf.c).
 * ---------------------------------------------------------------------------------------
 *
 * Permission is hereby granted, free of charge,
 * to any person obtaining a copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include <stdint.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>

#include <boolean.h>
#include <compat/strl.h>
#include <retro_inline.h>

#include <encodings/utf.h>

#if defined(_WIN32) && !defined(_XBOX)
#include <windows.h>
#elif defined(_XBOX)
#include <xtl.h>
#endif

#define UTF8_WALKBYTE(string) (*((*(string))++))

static unsigned leading_ones(uint8_t c)
{
   unsigned ones = 0;
   while (c & 0x80)
   {
      ones++;
      c <<= 1;
   }

   return ones;
}

/* Simple implementation. Assumes the sequence is
 * properly synchronized and terminated. */

size_t utf8_conv_utf32(uint32_t *out, size_t out_chars,
      const char *in, size_t in_size)
{
   unsigned i;
   size_t ret = 0;
   while (in_size && out_chars)
   {
      unsigned extra, shift;
      uint32_t c;
      uint8_t first = *in++;
      unsigned ones = leading_ones(first);

      if (ones > 6 || ones == 1) /* Invalid or desync. */
         break;

      extra = ones ? ones - 1 : ones;
      if (1 + extra > in_size) /* Overflow. */
         break;

      shift = (extra - 1) * 6;
      c     = (first & ((1 << (7 - ones)) - 1)) << (6 * extra);

      for (i = 0; i < extra; i++, in++, shift -= 6)
         c |= (*in & 0x3f) << shift;

      *out++ = c;
      in_size -= 1 + extra;
      out_chars--;
      ret++;
   }

   return ret;
}

bool utf16_conv_utf8(uint8_t *out, size_t *out_chars,
     const uint16_t *in, size_t in_size)
{
   size_t out_pos            = 0;
   size_t in_pos             = 0;
   static const 
      uint8_t utf8_limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };

   for (;;)
   {
      unsigned num_adds;
      uint32_t value;

      if (in_pos == in_size)
      {
         *out_chars = out_pos;
         return true;
      }
      value = in[in_pos++];
      if (value < 0x80)
      {
         if (out)
            out[out_pos] = (char)value;
         out_pos++;
         continue;
      }

      if (value >= 0xD800 && value < 0xE000)
      {
         uint32_t c2;

         if (value >= 0xDC00 || in_pos == in_size)
            break;
         c2 = in[in_pos++];
         if (c2 < 0xDC00 || c2 >= 0xE000)
            break;
         value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
      }

      for (num_adds = 1; num_adds < 5; num_adds++)
         if (value < (((uint32_t)1) << (num_adds * 5 + 6)))
            break;
      if (out)
         out[out_pos] = (char)(utf8_limits[num_adds - 1]
               + (value >> (6 * num_adds)));
      out_pos++;
      do
      {
         num_adds--;
         if (out)
            out[out_pos] = (char)(0x80
                  + ((value >> (6 * num_adds)) & 0x3F));
         out_pos++;
      }while (num_adds != 0);
   }

   *out_chars = out_pos;
   return false;
}

/* Acts mostly like strlcpy.
 *
 * Copies the given number of UTF-8 characters,
 * but at most d_len bytes.
 *
 * Always NULL terminates.
 * Does not copy half a character.
 *
 * Returns number of bytes. 's' is assumed valid UTF-8.
 * Use only if 'chars' is considerably less than 'd_len'. */
size_t utf8cpy(char *d, size_t d_len, const char *s, size_t chars)
{
   const uint8_t *sb     = (const uint8_t*)s;
   const uint8_t *sb_org = sb;

   if (!s)
      return 0;

   while (*sb && chars-- > 0)
   {
      sb++;
      while ((*sb & 0xC0) == 0x80)
         sb++;
   }

   if ((size_t)(sb - sb_org) > d_len-1 /* NUL */)
   {
      sb = sb_org + d_len-1;
      while ((*sb & 0xC0) == 0x80)
         sb--;
   }

   memcpy(d, sb_org, sb-sb_org);
   d[sb-sb_org] = '\0';

   return sb-sb_org;
}

const char *utf8skip(const char *str, size_t chars)
{
   const uint8_t *strb = (const uint8_t*)str;

   if (!chars)
      return str;

   do
   {
      strb++;
      while ((*strb & 0xC0)==0x80)
         strb++;
      chars--;
   }while (chars);

   return (const char*)strb;
}

size_t utf8len(const char *string)
{
   size_t ret = 0;

   if (!string)
      return 0;

   while (*string)
   {
      if ((*string & 0xC0) != 0x80)
         ret++;
      string++;
   }
   return ret;
}

/* Does not validate the input, returns garbage if it's not UTF-8. */
uint32_t utf8_walk(const char **string)
{
   uint8_t first = UTF8_WALKBYTE(string);
   uint32_t ret  = 0;

   if (first < 128)
      return first;

   ret    = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F);
   if (first >= 0xE0)
   {
      ret = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F);
      if (first >= 0xF0)
      {
         ret = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F);
         return ret | (first & 7) << 18;
      }
      return ret | (first & 15) << 12;
   }

   return ret | (first & 31) << 6;
}

static bool utf16_to_char(uint8_t **utf_data,
      size_t *dest_len, const uint16_t *in)
{
   unsigned len    = 0;

   while (in[len] != '\0')
      len++;

   utf16_conv_utf8(NULL, dest_len, in, len);
   *dest_len  += 1;
   *utf_data   = (uint8_t*)malloc(*dest_len);
   if (*utf_data == 0)
      return false;

   return utf16_conv_utf8(*utf_data, dest_len, in, len);
}

bool utf16_to_char_string(const uint16_t *in, char *s, size_t len)
{
   size_t     dest_len  = 0;
   uint8_t *utf16_data  = NULL;
   bool            ret  = utf16_to_char(&utf16_data, &dest_len, in);

   if (ret)
   {
      utf16_data[dest_len] = 0;
      strlcpy(s, (const char*)utf16_data, len);
   }

   free(utf16_data);
   utf16_data = NULL;

   return ret;
}

#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE)
/* Returned pointer MUST be freed by the caller if non-NULL. */
static char *mb_to_mb_string_alloc(const char *str,
      enum CodePage cp_in, enum CodePage cp_out)
{
   wchar_t *path_buf_wide = NULL;
   int path_buf_wide_len  = MultiByteToWideChar(cp_in, 0, str, -1, NULL, 0);

   /* Windows 95 will return 0 from these functions with 
    * a UTF8 codepage set without MSLU.
    *
    * From an unknown MSDN version (others omit this info):
    *   - CP_UTF8 Windows 98/Me, Windows NT 4.0 and later: 
    *   Translate using UTF-8. When this is set, dwFlags must be zero.
    *   - Windows 95: Under the Microsoft Layer for Unicode, 
    *   MultiByteToWideChar also supports CP_UTF7 and CP_UTF8.
    */

   if (!path_buf_wide_len)
      return strdup(str);

   path_buf_wide = (wchar_t*)
      calloc(path_buf_wide_len + sizeof(wchar_t), sizeof(wchar_t));

   if (path_buf_wide)
   {
      MultiByteToWideChar(cp_in, 0,
            str, -1, path_buf_wide, path_buf_wide_len);

      if (*path_buf_wide)
      {
         int path_buf_len = WideCharToMultiByte(cp_out, 0,
               path_buf_wide, -1, NULL, 0, NULL, NULL);

         if (path_buf_len)
         {
            char *path_buf = (char*)
               calloc(path_buf_len + sizeof(char), sizeof(char));

            if (path_buf)
            {
               WideCharToMultiByte(cp_out, 0,
                     path_buf_wide, -1, path_buf,
                     path_buf_len, NULL, NULL);

               free(path_buf_wide);

               if (*path_buf)
                  return path_buf;

               free(path_buf);
               return NULL;
            }
         }
         else
         {
            free(path_buf_wide);
            return strdup(str);
         }
      }

      free(path_buf_wide);
   }

   return NULL;
}
#endif

/* Returned pointer MUST be freed by the caller if non-NULL. */
char* utf8_to_local_string_alloc(const char *str)
{
   if (str && *str)
   {
#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE)
      return mb_to_mb_string_alloc(str, CODEPAGE_UTF8, CODEPAGE_LOCAL);
#else
      /* assume string needs no modification if not on Windows */
      return strdup(str);
#endif
   }
   return NULL;
}

/* Returned pointer MUST be freed by the caller if non-NULL. */
char* local_to_utf8_string_alloc(const char *str)
{
   if (str && *str)
   {
#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE)
      return mb_to_mb_string_alloc(str, CODEPAGE_LOCAL, CODEPAGE_UTF8);
#else
      /* assume string needs no modification if not on Windows */
      return strdup(str);
#endif
   }
   return NULL;
}

/* Returned pointer MUST be freed by the caller if non-NULL. */
wchar_t* utf8_to_utf16_string_alloc(const char *str)
{
#ifdef _WIN32
   int len        = 0;
   int out_len    = 0;
#else
   size_t len     = 0;
   size_t out_len = 0;
#endif
   wchar_t *buf   = NULL;

   if (!str || !*str)
      return NULL;

#ifdef _WIN32
   len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);

   if (len)
   {
      buf = (wchar_t*)calloc(len, sizeof(wchar_t));

      if (!buf)
         return NULL;

      out_len = MultiByteToWideChar(CP_UTF8, 0, str, -1, buf, len);
   }
   else
   {
      /* fallback to ANSI codepage instead */
      len = MultiByteToWideChar(CP_ACP, 0, str, -1, NULL, 0);

      if (len)
      {
         buf = (wchar_t*)calloc(len, sizeof(wchar_t));

         if (!buf)
            return NULL;

         out_len = MultiByteToWideChar(CP_ACP, 0, str, -1, buf, len);
      }
   }

   if (out_len < 0)
   {
      free(buf);
      return NULL;
   }
#else
   /* NOTE: For now, assume non-Windows platforms' locale is already UTF-8. */
   len = mbstowcs(NULL, str, 0) + 1;

   if (len)
   {
      buf = (wchar_t*)calloc(len, sizeof(wchar_t));

      if (!buf)
         return NULL;

      out_len = mbstowcs(buf, str, len);
   }

   if (out_len == (size_t)-1)
   {
      free(buf);
      return NULL;
   }
#endif

   return buf;
}

/* Returned pointer MUST be freed by the caller if non-NULL. */
char* utf16_to_utf8_string_alloc(const wchar_t *str)
{
#ifdef _WIN32
   int len        = 0;
#else
   size_t len     = 0;
#endif
   char *buf      = NULL;

   if (!str || !*str)
      return NULL;

#ifdef _WIN32
   {
      UINT code_page = CP_UTF8;
      len            = WideCharToMultiByte(code_page,
            0, str, -1, NULL, 0, NULL, NULL);

      /* fallback to ANSI codepage instead */
      if (!len)
      {
         code_page   = CP_ACP;
         len         = WideCharToMultiByte(code_page,
               0, str, -1, NULL, 0, NULL, NULL);
      }

      buf = (char*)calloc(len, sizeof(char));

      if (!buf)
         return NULL;

      if (WideCharToMultiByte(code_page,
            0, str, -1, buf, len, NULL, NULL) < 0)
      {
         free(buf);
         return NULL;
      }
   }
#else
   /* NOTE: For now, assume non-Windows platforms' 
    * locale is already UTF-8. */
   len = wcstombs(NULL, str, 0) + 1;

   if (len)
   {
      buf = (char*)calloc(len, sizeof(char));

      if (!buf)
         return NULL;

      if (wcstombs(buf, str, len) == (size_t)-1)
      {
         free(buf);
         return NULL;
      }
   }
#endif

   return buf;
}
Commit	Line	Data
07a88422	1	/* Copyright (C) 2010-2020 The RetroArch team
	2	*
	3	* ---------------------------------------------------------------------------------------
	4	* The following license statement only applies to this file (encoding_utf.c).
	5	* ---------------------------------------------------------------------------------------
	6	*
	7	* Permission is hereby granted, free of charge,
	8	* to any person obtaining a copy of this software and associated documentation files (the "Software"),
	9	* to deal in the Software without restriction, including without limitation the rights to
	10	* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
	11	* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
	12	*
	13	* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
	14	*
	15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
	16	* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
	18	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
	19	* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	20	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
	21	*/
	22
	23	#include <stdint.h>
	24	#include <stdlib.h>
	25	#include <stddef.h>
	26	#include <string.h>
	27
	28	#include <boolean.h>
	29	#include <compat/strl.h>
	30	#include <retro_inline.h>
	31
	32	#include <encodings/utf.h>
	33
	34	#if defined(_WIN32) && !defined(_XBOX)
	35	#include <windows.h>
	36	#elif defined(_XBOX)
	37	#include <xtl.h>
	38	#endif
	39
	40	#define UTF8_WALKBYTE(string) ((((string))++))
	41
	42	static unsigned leading_ones(uint8_t c)
	43	{
	44	unsigned ones = 0;
	45	while (c & 0x80)
	46	{
	47	ones++;
	48	c <<= 1;
	49	}
	50
	51	return ones;
	52	}
	53
	54	/* Simple implementation. Assumes the sequence is
	55	* properly synchronized and terminated. */
	56
	57	size_t utf8_conv_utf32(uint32_t *out, size_t out_chars,
	58	const char *in, size_t in_size)
	59	{
	60	unsigned i;
	61	size_t ret = 0;
	62	while (in_size && out_chars)
	63	{
	64	unsigned extra, shift;
65	uint32_t c;
66	uint8_t first = *in++;
67	unsigned ones = leading_ones(first);
68
69	if (ones > 6 \|\| ones == 1) /* Invalid or desync. */
70	break;
71
72	extra = ones ? ones - 1 : ones;
73	if (1 + extra > in_size) /* Overflow. */
74	break;
75
76	shift = (extra - 1) * 6;
77	c = (first & ((1 << (7 - ones)) - 1)) << (6 * extra);
78
79	for (i = 0; i < extra; i++, in++, shift -= 6)
80	c \|= (*in & 0x3f) << shift;
81
82	*out++ = c;
83	in_size -= 1 + extra;
84	out_chars--;
85	ret++;
86	}
87
88	return ret;
89	}
90
91	bool utf16_conv_utf8(uint8_t out, size_t out_chars,
92	const uint16_t *in, size_t in_size)
93	{
94	size_t out_pos = 0;
95	size_t in_pos = 0;
96	static const
97	uint8_t utf8_limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
98
99	for (;;)
100	{
101	unsigned num_adds;
102	uint32_t value;
103
104	if (in_pos == in_size)
105	{
106	*out_chars = out_pos;
107	return true;
108	}
109	value = in[in_pos++];
110	if (value < 0x80)
111	{
112	if (out)
113	out[out_pos] = (char)value;
114	out_pos++;
115	continue;
116	}
117
118	if (value >= 0xD800 && value < 0xE000)
119	{
120	uint32_t c2;
121
122	if (value >= 0xDC00 \|\| in_pos == in_size)
123	break;
124	c2 = in[in_pos++];
125	if (c2 < 0xDC00 \|\| c2 >= 0xE000)
126	break;
127	value = (((value - 0xD800) << 10) \| (c2 - 0xDC00)) + 0x10000;
128	}
129
130	for (num_adds = 1; num_adds < 5; num_adds++)
131	if (value < (((uint32_t)1) << (num_adds * 5 + 6)))
132	break;
133	if (out)
134	out[out_pos] = (char)(utf8_limits[num_adds - 1]
135	+ (value >> (6 * num_adds)));
136	out_pos++;
137	do
138	{
139	num_adds--;
140	if (out)
141	out[out_pos] = (char)(0x80
142	+ ((value >> (6 * num_adds)) & 0x3F));
143	out_pos++;
144	}while (num_adds != 0);
145	}
146
147	*out_chars = out_pos;
148	return false;
149	}
150
151	/* Acts mostly like strlcpy.
152	*
153	* Copies the given number of UTF-8 characters,
154	* but at most d_len bytes.
155	*
156	* Always NULL terminates.
157	* Does not copy half a character.
158	*
159	* Returns number of bytes. 's' is assumed valid UTF-8.
160	* Use only if 'chars' is considerably less than 'd_len'. */
161	size_t utf8cpy(char d, size_t d_len, const char s, size_t chars)
162	{
163	const uint8_t sb = (const uint8_t)s;
164	const uint8_t *sb_org = sb;
165
166	if (!s)
167	return 0;
168
169	while (*sb && chars-- > 0)
170	{
171	sb++;
172	while ((*sb & 0xC0) == 0x80)
173	sb++;
174	}
175
176	if ((size_t)(sb - sb_org) > d_len-1 /* NUL */)
177	{
178	sb = sb_org + d_len-1;
179	while ((*sb & 0xC0) == 0x80)
180	sb--;
181	}
182
183	memcpy(d, sb_org, sb-sb_org);
184	d[sb-sb_org] = '\0';
185
186	return sb-sb_org;
187	}
188
189	const char utf8skip(const char str, size_t chars)
190	{
191	const uint8_t strb = (const uint8_t)str;
192
193	if (!chars)
194	return str;
195
196	do
197	{
198	strb++;
199	while ((*strb & 0xC0)==0x80)
200	strb++;
201	chars--;
202	}while (chars);
203
204	return (const char*)strb;
205	}
206
207	size_t utf8len(const char *string)
208	{
209	size_t ret = 0;
210
211	if (!string)
212	return 0;
213
214	while (*string)
215	{
216	if ((*string & 0xC0) != 0x80)
217	ret++;
218	string++;
219	}
220	return ret;
221	}
222
223	/* Does not validate the input, returns garbage if it's not UTF-8. */
224	uint32_t utf8_walk(const char **string)
225	{
226	uint8_t first = UTF8_WALKBYTE(string);
227	uint32_t ret = 0;
228
229	if (first < 128)
230	return first;
231
232	ret = (ret << 6) \| (UTF8_WALKBYTE(string) & 0x3F);
233	if (first >= 0xE0)
234	{
235	ret = (ret << 6) \| (UTF8_WALKBYTE(string) & 0x3F);
236	if (first >= 0xF0)
237	{
238	ret = (ret << 6) \| (UTF8_WALKBYTE(string) & 0x3F);
239	return ret \| (first & 7) << 18;
240	}
241	return ret \| (first & 15) << 12;
242	}
243
244	return ret \| (first & 31) << 6;
245	}
246
247	static bool utf16_to_char(uint8_t **utf_data,
248	size_t dest_len, const uint16_t in)
249	{
250	unsigned len = 0;
251
252	while (in[len] != '\0')
253	len++;
254
255	utf16_conv_utf8(NULL, dest_len, in, len);
256	*dest_len += 1;
257	utf_data = (uint8_t)malloc(*dest_len);
258	if (*utf_data == 0)
259	return false;
260
261	return utf16_conv_utf8(*utf_data, dest_len, in, len);
262	}
263
264	bool utf16_to_char_string(const uint16_t in, char s, size_t len)
265	{
266	size_t dest_len = 0;
267	uint8_t *utf16_data = NULL;
268	bool ret = utf16_to_char(&utf16_data, &dest_len, in);
269
270	if (ret)
271	{
272	utf16_data[dest_len] = 0;
273	strlcpy(s, (const char*)utf16_data, len);
274	}
275
276	free(utf16_data);
277	utf16_data = NULL;
278
279	return ret;
280	}
281
282	#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE)
283	/* Returned pointer MUST be freed by the caller if non-NULL. */
284	static char mb_to_mb_string_alloc(const char str,
285	enum CodePage cp_in, enum CodePage cp_out)
286	{
287	wchar_t *path_buf_wide = NULL;
288	int path_buf_wide_len = MultiByteToWideChar(cp_in, 0, str, -1, NULL, 0);
289
290	/* Windows 95 will return 0 from these functions with
291	* a UTF8 codepage set without MSLU.
292	*
293	* From an unknown MSDN version (others omit this info):
294	* - CP_UTF8 Windows 98/Me, Windows NT 4.0 and later:
295	* Translate using UTF-8. When this is set, dwFlags must be zero.
296	* - Windows 95: Under the Microsoft Layer for Unicode,
297	* MultiByteToWideChar also supports CP_UTF7 and CP_UTF8.
298	*/
299
300	if (!path_buf_wide_len)
301	return strdup(str);
302
303	path_buf_wide = (wchar_t*)
304	calloc(path_buf_wide_len + sizeof(wchar_t), sizeof(wchar_t));
305
306	if (path_buf_wide)
307	{
308	MultiByteToWideChar(cp_in, 0,
309	str, -1, path_buf_wide, path_buf_wide_len);
310
311	if (*path_buf_wide)
312	{
313	int path_buf_len = WideCharToMultiByte(cp_out, 0,
314	path_buf_wide, -1, NULL, 0, NULL, NULL);
315
316	if (path_buf_len)
317	{
318	char path_buf = (char)
319	calloc(path_buf_len + sizeof(char), sizeof(char));
320
321	if (path_buf)
322	{
323	WideCharToMultiByte(cp_out, 0,
324	path_buf_wide, -1, path_buf,
325	path_buf_len, NULL, NULL);
326
327	free(path_buf_wide);
328
329	if (*path_buf)
330	return path_buf;
331
332	free(path_buf);
333	return NULL;
334	}
335	}
336	else
337	{
338	free(path_buf_wide);
339	return strdup(str);
340	}
341	}
342
343	free(path_buf_wide);
344	}
345
346	return NULL;
347	}
348	#endif
349
350	/* Returned pointer MUST be freed by the caller if non-NULL. */
351	char* utf8_to_local_string_alloc(const char *str)
352	{
353	if (str && *str)
354	{
355	#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE)
356	return mb_to_mb_string_alloc(str, CODEPAGE_UTF8, CODEPAGE_LOCAL);
357	#else
358	/* assume string needs no modification if not on Windows */
359	return strdup(str);
360	#endif
361	}
362	return NULL;
363	}
364
365	/* Returned pointer MUST be freed by the caller if non-NULL. */
366	char* local_to_utf8_string_alloc(const char *str)
367	{
368	if (str && *str)
369	{
370	#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE)
371	return mb_to_mb_string_alloc(str, CODEPAGE_LOCAL, CODEPAGE_UTF8);
372	#else
373	/* assume string needs no modification if not on Windows */
374	return strdup(str);
375	#endif
376	}
377	return NULL;
378	}
379
380	/* Returned pointer MUST be freed by the caller if non-NULL. */
381	wchar_t* utf8_to_utf16_string_alloc(const char *str)
382	{
383	#ifdef _WIN32
384	int len = 0;
385	int out_len = 0;
386	#else
387	size_t len = 0;
388	size_t out_len = 0;
389	#endif
390	wchar_t *buf = NULL;
391
392	if (!str \|\| !*str)
393	return NULL;
394
395	#ifdef _WIN32
396	len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
397
398	if (len)
399	{
400	buf = (wchar_t*)calloc(len, sizeof(wchar_t));
401
402	if (!buf)
403	return NULL;
404
405	out_len = MultiByteToWideChar(CP_UTF8, 0, str, -1, buf, len);
406	}
407	else
408	{
409	/* fallback to ANSI codepage instead */
410	len = MultiByteToWideChar(CP_ACP, 0, str, -1, NULL, 0);
411
412	if (len)
413	{
414	buf = (wchar_t*)calloc(len, sizeof(wchar_t));
415
416	if (!buf)
417	return NULL;
418
419	out_len = MultiByteToWideChar(CP_ACP, 0, str, -1, buf, len);
420	}
421	}
422
423	if (out_len < 0)
424	{
425	free(buf);
426	return NULL;
427	}
428	#else
429	/* NOTE: For now, assume non-Windows platforms' locale is already UTF-8. */
430	len = mbstowcs(NULL, str, 0) + 1;
431
432	if (len)
433	{
434	buf = (wchar_t*)calloc(len, sizeof(wchar_t));
435
436	if (!buf)
437	return NULL;
438
439	out_len = mbstowcs(buf, str, len);
440	}
441
442	if (out_len == (size_t)-1)
443	{
444	free(buf);
445	return NULL;
446	}
447	#endif
448
449	return buf;
450	}
451
452	/* Returned pointer MUST be freed by the caller if non-NULL. */
453	char* utf16_to_utf8_string_alloc(const wchar_t *str)
454	{
455	#ifdef _WIN32
456	int len = 0;
457	#else
458	size_t len = 0;
459	#endif
460	char *buf = NULL;
461
462	if (!str \|\| !*str)
463	return NULL;
464
465	#ifdef _WIN32
466	{
467	UINT code_page = CP_UTF8;
468	len = WideCharToMultiByte(code_page,
469	0, str, -1, NULL, 0, NULL, NULL);
470
471	/* fallback to ANSI codepage instead */
472	if (!len)
473	{
474	code_page = CP_ACP;
475	len = WideCharToMultiByte(code_page,
476	0, str, -1, NULL, 0, NULL, NULL);
477	}
478
479	buf = (char*)calloc(len, sizeof(char));
480
481	if (!buf)
482	return NULL;
483
484	if (WideCharToMultiByte(code_page,
485	0, str, -1, buf, len, NULL, NULL) < 0)
486	{
487	free(buf);
488	return NULL;
489	}
490	}
491	#else
492	/* NOTE: For now, assume non-Windows platforms'
493	* locale is already UTF-8. */
494	len = wcstombs(NULL, str, 0) + 1;
495
496	if (len)
497	{
498	buf = (char*)calloc(len, sizeof(char));
499
500	if (!buf)
501	return NULL;
502
503	if (wcstombs(buf, str, len) == (size_t)-1)
504	{
505	free(buf);
506	return NULL;
507	}
508	}
509	#endif
510
511	return buf;
512	}