ce188d4d |
1 | /* |
2 | * Copyright (C) 2001 Peter Harris <peter.harris@hummingbird.com> |
3 | * Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org> |
4 | * |
5 | * Buffer overflow checking added: Josh Coalson, 9/9/2007 |
6 | * |
7 | * Win32 part rewritten: lvqcl, 2/2/2016 |
8 | * |
9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of the GNU General Public License as published by |
11 | * the Free Software Foundation; either version 2 of the License, or |
12 | * (at your option) any later version. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License along |
20 | * with this program; if not, write to the Free Software Foundation, Inc., |
21 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
22 | */ |
23 | |
24 | /* |
25 | * Convert a string between UTF-8 and the locale's charset. |
26 | */ |
27 | |
28 | #ifdef HAVE_CONFIG_H |
29 | # include <config.h> |
30 | #endif |
31 | |
32 | #include <stdlib.h> |
33 | |
34 | #include "share/alloc.h" |
35 | #include "share/utf8.h" |
36 | |
37 | #ifdef _WIN32 |
38 | |
39 | #include <windows.h> |
40 | |
41 | int utf8_encode(const char *from, char **to) |
42 | { |
43 | wchar_t *unicode = NULL; |
44 | char *utf8 = NULL; |
45 | int ret = -1; |
46 | |
47 | do { |
48 | int len; |
49 | |
50 | len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from, -1, NULL, 0); |
51 | if(len == 0) break; |
52 | |
53 | unicode = (wchar_t*) safe_malloc_mul_2op_((size_t)len, sizeof(wchar_t)); |
54 | if(unicode == NULL) break; |
55 | |
56 | len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from, -1, unicode, len); |
57 | if(len == 0) break; |
58 | |
59 | len = WideCharToMultiByte(CP_UTF8, 0, unicode, -1, NULL, 0, NULL, NULL); |
60 | if(len == 0) break; |
61 | |
62 | utf8 = (char*) safe_malloc_mul_2op_((size_t)len, sizeof(char)); |
63 | if(utf8 == NULL) break; |
64 | |
65 | len = WideCharToMultiByte(CP_UTF8, 0, unicode, -1, utf8, len, NULL, NULL); |
66 | if(len == 0) break; |
67 | |
68 | ret = 0; |
69 | |
70 | } while(0); |
71 | |
72 | free(unicode); |
73 | |
74 | if(ret == 0) { |
75 | *to = utf8; |
76 | } else { |
77 | free(utf8); |
78 | *to = NULL; |
79 | } |
80 | |
81 | return ret; |
82 | } |
83 | |
84 | int utf8_decode(const char *from, char **to) |
85 | { |
86 | wchar_t *unicode = NULL; |
87 | char *acp = NULL; |
88 | int ret = -1; |
89 | |
90 | do { |
91 | int len; |
92 | |
93 | len = MultiByteToWideChar(CP_UTF8, 0, from, -1, NULL, 0); |
94 | if(len == 0) break; |
95 | |
96 | unicode = (wchar_t*) safe_malloc_mul_2op_((size_t)len, sizeof(wchar_t)); |
97 | if(unicode == NULL) break; |
98 | |
99 | len = MultiByteToWideChar(CP_UTF8, 0, from, -1, unicode, len); |
100 | if(len == 0) break; |
101 | |
102 | len = WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, unicode, -1, NULL, 0, NULL, NULL); |
103 | if(len == 0) break; |
104 | |
105 | acp = (char*) safe_malloc_mul_2op_((size_t)len, sizeof(char)); |
106 | if(acp == NULL) break; |
107 | |
108 | len = WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, unicode, -1, acp, len, NULL, NULL); |
109 | if(len == 0) break; |
110 | |
111 | ret = 0; |
112 | |
113 | } while(0); |
114 | |
115 | free(unicode); |
116 | |
117 | if(ret == 0) { |
118 | *to = acp; |
119 | } else { |
120 | free(acp); |
121 | *to = NULL; |
122 | } |
123 | |
124 | return ret; |
125 | } |
126 | |
127 | #else /* End win32. Rest is for real operating systems */ |
128 | |
129 | |
130 | #ifdef HAVE_LANGINFO_CODESET |
131 | #include <langinfo.h> |
132 | #endif |
133 | |
134 | #include <string.h> |
135 | |
136 | #include "share/safe_str.h" |
137 | #include "iconvert.h" |
138 | #include "charset.h" |
139 | |
140 | static const char *current_charset(void) |
141 | { |
142 | const char *c = 0; |
143 | #ifdef HAVE_LANGINFO_CODESET |
144 | c = nl_langinfo(CODESET); |
145 | #endif |
146 | |
147 | if (!c) |
148 | c = getenv("CHARSET"); |
149 | |
150 | return c? c : "US-ASCII"; |
151 | } |
152 | |
153 | static int convert_buffer(const char *fromcode, const char *tocode, |
154 | const char *from, size_t fromlen, |
155 | char **to, size_t *tolen) |
156 | { |
157 | int ret = -1; |
158 | |
159 | #ifdef HAVE_ICONV |
160 | ret = iconvert(fromcode, tocode, from, fromlen, to, tolen); |
161 | if (ret != -1) |
162 | return ret; |
163 | #endif |
164 | |
165 | #ifndef HAVE_ICONV /* should be ifdef USE_CHARSET_CONVERT */ |
166 | ret = charset_convert(fromcode, tocode, from, fromlen, to, tolen); |
167 | if (ret != -1) |
168 | return ret; |
169 | #endif |
170 | |
171 | return ret; |
172 | } |
173 | |
174 | static int convert_string(const char *fromcode, const char *tocode, |
175 | const char *from, char **to, char replace) |
176 | { |
177 | int ret; |
178 | size_t fromlen; |
179 | char *s; |
180 | |
181 | fromlen = strlen(from); |
182 | ret = convert_buffer(fromcode, tocode, from, fromlen, to, 0); |
183 | if (ret == -2) |
184 | return -1; |
185 | if (ret != -1) |
186 | return ret; |
187 | |
188 | s = safe_malloc_add_2op_(fromlen, /*+*/1); |
189 | if (!s) |
190 | return -1; |
191 | safe_strncpy(s, from, fromlen + 1); |
192 | *to = s; |
193 | for (; *s; s++) |
194 | if (*s & ~0x7f) |
195 | *s = replace; |
196 | return 3; |
197 | } |
198 | |
199 | int utf8_encode(const char *from, char **to) |
200 | { |
201 | return convert_string(current_charset(), "UTF-8", from, to, '#'); |
202 | } |
203 | |
204 | int utf8_decode(const char *from, char **to) |
205 | { |
206 | return convert_string("UTF-8", current_charset(), from, to, '?'); |
207 | } |
208 | |
209 | #endif |