226a5691 |
1 | /* Copyright (C) 2010-2020 The RetroArch team |
2 | * |
3 | * --------------------------------------------------------------------------------------- |
4 | * The following license statement only applies to this file (stdstring.c). |
5 | * --------------------------------------------------------------------------------------- |
6 | * |
7 | * Permission is hereby granted, free of charge, |
8 | * to any person obtaining a copy of this software and associated documentation files (the "Software"), |
9 | * to deal in the Software without restriction, including without limitation the rights to |
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, |
11 | * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, |
16 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
18 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
19 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
21 | */ |
22 | |
23 | #include <stdint.h> |
24 | #include <ctype.h> |
25 | |
26 | #include <string/stdstring.h> |
27 | #include <encodings/utf.h> |
28 | |
29 | const uint8_t lr_char_props[256] = { |
30 | /*x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ |
31 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x80,0x00,0x00,0x80,0x00,0x00, /* 0x */ |
32 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 1x */ |
33 | 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 2x !"#$%&'()*+,-./ */ |
34 | 0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x00,0x00,0x00,0x00,0x00,0x00, /* 3x 0123456789:;<=>? */ |
35 | 0x00,0x23,0x23,0x23,0x23,0x23,0x23,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22, /* 4x @ABCDEFGHIJKLMNO */ |
36 | 0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x00,0x00,0x00,0x00,0x08, /* 5x PQRSTUVWXYZ[\]^_ */ |
37 | 0x00,0x25,0x25,0x25,0x25,0x25,0x25,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24, /* 6x `abcdefghijklmno */ |
38 | 0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x00,0x00,0x00,0x00,0x00, /* 7x pqrstuvwxyz{|}~ */ |
39 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8x */ |
40 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 9x */ |
41 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Ax */ |
42 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Bx */ |
43 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Cx */ |
44 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Dx */ |
45 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Ex */ |
46 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Fx */ |
47 | }; |
48 | |
49 | char *string_init(const char *src) |
50 | { |
51 | return src ? strdup(src) : NULL; |
52 | } |
53 | |
54 | void string_set(char **string, const char *src) |
55 | { |
56 | free(*string); |
57 | *string = string_init(src); |
58 | } |
59 | |
60 | |
61 | char *string_to_upper(char *s) |
62 | { |
63 | char *cs = (char *)s; |
64 | for ( ; *cs != '\0'; cs++) |
65 | *cs = toupper((unsigned char)*cs); |
66 | return s; |
67 | } |
68 | |
69 | char *string_to_lower(char *s) |
70 | { |
71 | char *cs = (char *)s; |
72 | for ( ; *cs != '\0'; cs++) |
73 | *cs = tolower((unsigned char)*cs); |
74 | return s; |
75 | } |
76 | |
77 | char *string_ucwords(char *s) |
78 | { |
79 | char *cs = (char *)s; |
80 | for ( ; *cs != '\0'; cs++) |
81 | { |
82 | if (*cs == ' ') |
83 | *(cs+1) = toupper((unsigned char)*(cs+1)); |
84 | } |
85 | |
86 | s[0] = toupper((unsigned char)s[0]); |
87 | return s; |
88 | } |
89 | |
90 | char *string_replace_substring(const char *in, |
91 | const char *pattern, const char *replacement) |
92 | { |
93 | size_t numhits, pattern_len, replacement_len, outlen; |
94 | const char *inat = NULL; |
95 | const char *inprev = NULL; |
96 | char *out = NULL; |
97 | char *outat = NULL; |
98 | |
99 | /* if either pattern or replacement is NULL, |
100 | * duplicate in and let caller handle it. */ |
101 | if (!pattern || !replacement) |
102 | return strdup(in); |
103 | |
104 | pattern_len = strlen(pattern); |
105 | replacement_len = strlen(replacement); |
106 | numhits = 0; |
107 | inat = in; |
108 | |
109 | while ((inat = strstr(inat, pattern))) |
110 | { |
111 | inat += pattern_len; |
112 | numhits++; |
113 | } |
114 | |
115 | outlen = strlen(in) - pattern_len*numhits + replacement_len*numhits; |
116 | out = (char *)malloc(outlen+1); |
117 | |
118 | if (!out) |
119 | return NULL; |
120 | |
121 | outat = out; |
122 | inat = in; |
123 | inprev = in; |
124 | |
125 | while ((inat = strstr(inat, pattern))) |
126 | { |
127 | memcpy(outat, inprev, inat-inprev); |
128 | outat += inat-inprev; |
129 | memcpy(outat, replacement, replacement_len); |
130 | outat += replacement_len; |
131 | inat += pattern_len; |
132 | inprev = inat; |
133 | } |
134 | strcpy(outat, inprev); |
135 | |
136 | return out; |
137 | } |
138 | |
139 | /* Remove leading whitespaces */ |
140 | char *string_trim_whitespace_left(char *const s) |
141 | { |
142 | if (s && *s) |
143 | { |
144 | size_t len = strlen(s); |
145 | char *current = s; |
146 | |
147 | while (*current && ISSPACE((unsigned char)*current)) |
148 | { |
149 | ++current; |
150 | --len; |
151 | } |
152 | |
153 | if (s != current) |
154 | memmove(s, current, len + 1); |
155 | } |
156 | |
157 | return s; |
158 | } |
159 | |
160 | /* Remove trailing whitespaces */ |
161 | char *string_trim_whitespace_right(char *const s) |
162 | { |
163 | if (s && *s) |
164 | { |
165 | size_t len = strlen(s); |
166 | char *current = s + len - 1; |
167 | |
168 | while (current != s && ISSPACE((unsigned char)*current)) |
169 | { |
170 | --current; |
171 | --len; |
172 | } |
173 | |
174 | current[ISSPACE((unsigned char)*current) ? 0 : 1] = '\0'; |
175 | } |
176 | |
177 | return s; |
178 | } |
179 | |
180 | /* Remove leading and trailing whitespaces */ |
181 | char *string_trim_whitespace(char *const s) |
182 | { |
183 | string_trim_whitespace_right(s); /* order matters */ |
184 | string_trim_whitespace_left(s); |
185 | |
186 | return s; |
187 | } |
188 | |
189 | char *word_wrap(char* buffer, const char *string, int line_width, bool unicode, unsigned max_lines) |
190 | { |
191 | unsigned i = 0; |
192 | unsigned len = (unsigned)strlen(string); |
193 | unsigned lines = 1; |
194 | |
195 | while (i < len) |
196 | { |
197 | unsigned counter; |
198 | int pos = (int)(&buffer[i] - buffer); |
199 | |
200 | /* copy string until the end of the line is reached */ |
201 | for (counter = 1; counter <= (unsigned)line_width; counter++) |
202 | { |
203 | const char *character; |
204 | unsigned char_len; |
205 | unsigned j = i; |
206 | |
207 | /* check if end of string reached */ |
208 | if (i == len) |
209 | { |
210 | buffer[i] = 0; |
211 | return buffer; |
212 | } |
213 | |
214 | character = utf8skip(&string[i], 1); |
215 | char_len = (unsigned)(character - &string[i]); |
216 | |
217 | if (!unicode) |
218 | counter += char_len - 1; |
219 | |
220 | do |
221 | { |
222 | buffer[i] = string[i]; |
223 | char_len--; |
224 | i++; |
225 | } while (char_len); |
226 | |
227 | /* check for newlines embedded in the original input |
228 | * and reset the index */ |
229 | if (buffer[j] == '\n') |
230 | { |
231 | lines++; |
232 | counter = 1; |
233 | } |
234 | } |
235 | |
236 | /* check for whitespace */ |
237 | if (string[i] == ' ') |
238 | { |
239 | if ((max_lines == 0 || lines < max_lines)) |
240 | { |
241 | buffer[i] = '\n'; |
242 | i++; |
243 | lines++; |
244 | } |
245 | } |
246 | else |
247 | { |
248 | int k; |
249 | |
250 | /* check for nearest whitespace back in string */ |
251 | for (k = i; k > 0; k--) |
252 | { |
253 | if (string[k] != ' ' || (max_lines != 0 && lines >= max_lines)) |
254 | continue; |
255 | |
256 | buffer[k] = '\n'; |
257 | /* set string index back to character after this one */ |
258 | i = k + 1; |
259 | lines++; |
260 | break; |
261 | } |
262 | |
263 | if (&buffer[i] - buffer == pos) |
264 | return buffer; |
265 | } |
266 | } |
267 | |
268 | buffer[i] = 0; |
269 | |
270 | return buffer; |
271 | } |
272 | |
273 | /* Splits string into tokens seperated by 'delim' |
274 | * > Returned token string must be free()'d |
275 | * > Returns NULL if token is not found |
276 | * > After each call, 'str' is set to the position after the |
277 | * last found token |
278 | * > Tokens *include* empty strings |
279 | * Usage example: |
280 | * char *str = "1,2,3,4,5,6,7,,,10,"; |
281 | * char **str_ptr = &str; |
282 | * char *token = NULL; |
283 | * while ((token = string_tokenize(str_ptr, ","))) |
284 | * { |
285 | * printf("%s\n", token); |
286 | * free(token); |
287 | * token = NULL; |
288 | * } |
289 | */ |
290 | char* string_tokenize(char **str, const char *delim) |
291 | { |
292 | /* Taken from https://codereview.stackexchange.com/questions/216956/strtok-function-thread-safe-supports-empty-tokens-doesnt-change-string# */ |
293 | char *str_ptr = NULL; |
294 | char *delim_ptr = NULL; |
295 | char *token = NULL; |
296 | size_t token_len = 0; |
297 | |
298 | /* Sanity checks */ |
299 | if (!str || string_is_empty(delim)) |
300 | return NULL; |
301 | |
302 | str_ptr = *str; |
303 | |
304 | /* Note: we don't check string_is_empty() here, |
305 | * empty strings are valid */ |
306 | if (!str_ptr) |
307 | return NULL; |
308 | |
309 | /* Search for delimiter */ |
310 | delim_ptr = strstr(str_ptr, delim); |
311 | |
312 | if (delim_ptr) |
313 | token_len = delim_ptr - str_ptr; |
314 | else |
315 | token_len = strlen(str_ptr); |
316 | |
317 | /* Allocate token string */ |
318 | token = (char *)malloc((token_len + 1) * sizeof(char)); |
319 | |
320 | if (!token) |
321 | return NULL; |
322 | |
323 | /* Copy token */ |
324 | strlcpy(token, str_ptr, (token_len + 1) * sizeof(char)); |
325 | token[token_len] = '\0'; |
326 | |
327 | /* Update input string pointer */ |
328 | *str = delim_ptr ? delim_ptr + strlen(delim) : NULL; |
329 | |
330 | return token; |
331 | } |
332 | |
333 | /* Removes every instance of character 'c' from 'str' */ |
334 | void string_remove_all_chars(char *str, char c) |
335 | { |
336 | char *read_ptr = NULL; |
337 | char *write_ptr = NULL; |
338 | |
339 | if (string_is_empty(str)) |
340 | return; |
341 | |
342 | read_ptr = str; |
343 | write_ptr = str; |
344 | |
345 | while (*read_ptr != '\0') |
346 | { |
347 | *write_ptr = *read_ptr++; |
348 | write_ptr += (*write_ptr != c) ? 1 : 0; |
349 | } |
350 | |
351 | *write_ptr = '\0'; |
352 | } |
353 | |
354 | /* Replaces every instance of character 'find' in 'str' |
355 | * with character 'replace' */ |
356 | void string_replace_all_chars(char *str, char find, char replace) |
357 | { |
358 | char *str_ptr = str; |
359 | |
360 | if (string_is_empty(str)) |
361 | return; |
362 | |
363 | while ((str_ptr = strchr(str_ptr, find))) |
364 | *str_ptr++ = replace; |
365 | } |
366 | |
367 | /* Converts string to unsigned integer. |
368 | * Returns 0 if string is invalid */ |
369 | unsigned string_to_unsigned(const char *str) |
370 | { |
371 | const char *ptr = NULL; |
372 | |
373 | if (string_is_empty(str)) |
374 | return 0; |
375 | |
376 | for (ptr = str; *ptr != '\0'; ptr++) |
377 | { |
378 | if (!ISDIGIT((unsigned char)*ptr)) |
379 | return 0; |
380 | } |
381 | |
382 | return (unsigned)strtoul(str, NULL, 10); |
383 | } |
384 | |
385 | /* Converts hexadecimal string to unsigned integer. |
386 | * Handles optional leading '0x'. |
387 | * Returns 0 if string is invalid */ |
388 | unsigned string_hex_to_unsigned(const char *str) |
389 | { |
390 | const char *hex_str = str; |
391 | const char *ptr = NULL; |
392 | size_t len; |
393 | |
394 | if (string_is_empty(str)) |
395 | return 0; |
396 | |
397 | /* Remove leading '0x', if required */ |
398 | len = strlen(str); |
399 | |
400 | if (len >= 2) |
401 | if ((str[0] == '0') && |
402 | ((str[1] == 'x') || (str[1] == 'X'))) |
403 | hex_str = str + 2; |
404 | |
405 | if (string_is_empty(hex_str)) |
406 | return 0; |
407 | |
408 | /* Check for valid characters */ |
409 | for (ptr = hex_str; *ptr != '\0'; ptr++) |
410 | { |
411 | if (!isxdigit((unsigned char)*ptr)) |
412 | return 0; |
413 | } |
414 | |
415 | return (unsigned)strtoul(hex_str, NULL, 16); |
416 | } |