git subrepo clone https://github.com/libretro/libretro-common.git deps/libretro-common
[pcsx_rearmed.git] / deps / libretro-common / string / stdstring.c
CommitLineData
3719602c
PC
1/* Copyright (C) 2010-2020 The RetroArch team
2 *
3 * ---------------------------------------------------------------------------------------
4 * The following license statement only applies to this file (stdstring.c).
5 * ---------------------------------------------------------------------------------------
6 *
7 * Permission is hereby granted, free of charge,
8 * to any person obtaining a copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation the rights to
10 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
11 * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
16 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include <stdint.h>
24#include <ctype.h>
25#include <string.h>
26
27#include <compat/strl.h>
28#include <string/stdstring.h>
29#include <encodings/utf.h>
30
31const uint8_t lr_char_props[256] = {
32 /*x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
33 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x80,0x00,0x00,0x80,0x00,0x00, /* 0x */
34 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 1x */
35 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 2x !"#$%&'()*+,-./ */
36 0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x00,0x00,0x00,0x00,0x00,0x00, /* 3x 0123456789:;<=>? */
37 0x00,0x23,0x23,0x23,0x23,0x23,0x23,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22, /* 4x @ABCDEFGHIJKLMNO */
38 0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x00,0x00,0x00,0x00,0x08, /* 5x PQRSTUVWXYZ[\]^_ */
39 0x00,0x25,0x25,0x25,0x25,0x25,0x25,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24, /* 6x `abcdefghijklmno */
40 0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x00,0x00,0x00,0x00,0x00, /* 7x pqrstuvwxyz{|}~ */
41 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8x */
42 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 9x */
43 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Ax */
44 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Bx */
45 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Cx */
46 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Dx */
47 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Ex */
48 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Fx */
49};
50
51char *string_init(const char *src)
52{
53 return src ? strdup(src) : NULL;
54}
55
56void string_set(char **string, const char *src)
57{
58 free(*string);
59 *string = string_init(src);
60}
61
62
63char *string_to_upper(char *s)
64{
65 char *cs = (char *)s;
66 for ( ; *cs != '\0'; cs++)
67 *cs = toupper((unsigned char)*cs);
68 return s;
69}
70
71char *string_to_lower(char *s)
72{
73 char *cs = (char *)s;
74 for ( ; *cs != '\0'; cs++)
75 *cs = tolower((unsigned char)*cs);
76 return s;
77}
78
79char *string_ucwords(char *s)
80{
81 char *cs = (char *)s;
82 for ( ; *cs != '\0'; cs++)
83 {
84 if (*cs == ' ')
85 *(cs+1) = toupper((unsigned char)*(cs+1));
86 }
87
88 s[0] = toupper((unsigned char)s[0]);
89 return s;
90}
91
92char *string_replace_substring(const char *in,
93 const char *pattern, size_t pattern_len,
94 const char *replacement, size_t replacement_len)
95{
96 size_t outlen;
97 size_t numhits = 0;
98 const char *inat = NULL;
99 const char *inprev = NULL;
100 char *out = NULL;
101 char *outat = NULL;
102
103 /* if either pattern or replacement is NULL,
104 * duplicate in and let caller handle it. */
105 if (!pattern || !replacement)
106 return strdup(in);
107
108 inat = in;
109
110 while ((inat = strstr(inat, pattern)))
111 {
112 inat += pattern_len;
113 numhits++;
114 }
115
116 outlen = strlen(in) - pattern_len*numhits + replacement_len*numhits;
117
118 if (!(out = (char *)malloc(outlen+1)))
119 return NULL;
120
121 outat = out;
122 inat = in;
123 inprev = in;
124
125 while ((inat = strstr(inat, pattern)))
126 {
127 memcpy(outat, inprev, inat-inprev);
128 outat += inat-inprev;
129 memcpy(outat, replacement, replacement_len);
130 outat += replacement_len;
131 inat += pattern_len;
132 inprev = inat;
133 }
134 strcpy(outat, inprev);
135
136 return out;
137}
138
139/**
140 * string_trim_whitespace_left:
141 *
142 * Remove leading whitespaces
143 **/
144char *string_trim_whitespace_left(char *const s)
145{
146 if (s && *s)
147 {
148 size_t len = strlen(s);
149 char *current = s;
150
151 while (*current && ISSPACE((unsigned char)*current))
152 {
153 ++current;
154 --len;
155 }
156
157 if (s != current)
158 memmove(s, current, len + 1);
159 }
160
161 return s;
162}
163
164/**
165 * string_trim_whitespace_right:
166 *
167 * Remove trailing whitespaces
168 **/
169char *string_trim_whitespace_right(char *const s)
170{
171 if (s && *s)
172 {
173 size_t len = strlen(s);
174 char *current = s + len - 1;
175
176 while (current != s && ISSPACE((unsigned char)*current))
177 {
178 --current;
179 --len;
180 }
181
182 current[ISSPACE((unsigned char)*current) ? 0 : 1] = '\0';
183 }
184
185 return s;
186}
187
188/**
189 * string_trim_whitespace:
190 *
191 * Remove leading and trailing whitespaces
192 **/
193char *string_trim_whitespace(char *const s)
194{
195 string_trim_whitespace_right(s); /* order matters */
196 string_trim_whitespace_left(s);
197
198 return s;
199}
200
201/**
202 * word_wrap:
203 * @dst : pointer to destination buffer.
204 * @dst_size : size of destination buffer.
205 * @src : pointer to input string.
206 * @line_width : max number of characters per line.
207 * @wideglyph_width : not used, but is necessary to keep
208 * compatibility with word_wrap_wideglyph().
209 * @max_lines : max lines of destination string.
210 * 0 means no limit.
211 *
212 * Wraps string specified by 'src' to destination buffer
213 * specified by 'dst' and 'dst_size'.
214 * This function assumes that all glyphs in the string
215 * have an on-screen pixel width similar to that of
216 * regular Latin characters - i.e. it will not wrap
217 * correctly any text containing so-called 'wide' Unicode
218 * characters (e.g. CJK languages, emojis, etc.).
219 **/
220void word_wrap(
221 char *dst, size_t dst_size,
222 const char *src, size_t src_len,
223 int line_width, int wideglyph_width, unsigned max_lines)
224{
225 char *lastspace = NULL;
226 unsigned counter = 0;
227 unsigned lines = 1;
228 const char *src_end = src + src_len;
229
230 /* Prevent buffer overflow */
231 if (dst_size < src_len + 1)
232 return;
233
234 /* Early return if src string length is less
235 * than line width */
236 if (src_len < (size_t)line_width)
237 {
238 strlcpy(dst, src, dst_size);
239 return;
240 }
241
242 while (*src != '\0')
243 {
244 unsigned char_len = (unsigned)(utf8skip(src, 1) - src);
245 counter++;
246
247 if (*src == ' ')
248 lastspace = dst; /* Remember the location of the whitespace */
249 else if (*src == '\n')
250 {
251 /* If newlines embedded in the input,
252 * reset the index */
253 lines++;
254 counter = 0;
255
256 /* Early return if remaining src string
257 * length is less than line width */
258 if (src_end - src <= line_width)
259 {
260 strlcpy(dst, src, dst_size);
261 return;
262 }
263 }
264
265 while (char_len--)
266 *dst++ = *src++;
267
268 if (counter >= (unsigned)line_width)
269 {
270 counter = 0;
271
272 if (lastspace && (max_lines == 0 || lines < max_lines))
273 {
274 /* Replace nearest (previous) whitespace
275 * with newline character */
276 *lastspace = '\n';
277 lines++;
278
279 src -= dst - lastspace - 1;
280 dst = lastspace + 1;
281 lastspace = NULL;
282
283 /* Early return if remaining src string
284 * length is less than line width */
285 if (src_end - src < line_width)
286 {
287 strlcpy(dst, src, dst_size);
288 return;
289 }
290 }
291 }
292 }
293
294 *dst = '\0';
295}
296
297/**
298 * word_wrap_wideglyph:
299 * @dst : pointer to destination buffer.
300 * @dst_size : size of destination buffer.
301 * @src : pointer to input string.
302 * @line_width : max number of characters per line.
303 * @wideglyph_width : effective width of 'wide' Unicode glyphs.
304 * the value here is normalised relative to the
305 * typical on-screen pixel width of a regular
306 * Latin character:
307 * - a regular Latin character is defined to
308 * have an effective width of 100
309 * - wideglyph_width = 100 * (wide_character_pixel_width / latin_character_pixel_width)
310 * - e.g. if 'wide' Unicode characters in 'src'
311 * have an on-screen pixel width twice that of
312 * regular Latin characters, wideglyph_width
313 * would be 200
314 * @max_lines : max lines of destination string.
315 * 0 means no limit.
316 *
317 * Wraps string specified by @src to destination buffer
318 * specified by @dst and @dst_size.
319 * This function assumes that all glyphs in the string
320 * are:
321 * - EITHER 'non-wide' Unicode glyphs, with an on-screen
322 * pixel width similar to that of regular Latin characters
323 * - OR 'wide' Unicode glyphs (e.g. CJK languages, emojis, etc.)
324 * with an on-screen pixel width defined by @wideglyph_width
325 * Note that wrapping may occur in inappropriate locations
326 * if @src string contains 'wide' Unicode characters whose
327 * on-screen pixel width deviates greatly from the set
328 * @wideglyph_width value.
329 **/
330void word_wrap_wideglyph(char *dst, size_t dst_size,
331 const char *src, size_t src_len, int line_width,
332 int wideglyph_width, unsigned max_lines)
333{
334 char *lastspace = NULL;
335 char *lastwideglyph = NULL;
336 const char *src_end = src + src_len;
337 unsigned lines = 1;
338 /* 'line_width' means max numbers of characters per line,
339 * but this metric is only meaningful when dealing with
340 * 'regular' glyphs that have an on-screen pixel width
341 * similar to that of regular Latin characters.
342 * When handing so-called 'wide' Unicode glyphs, it is
343 * necessary to consider the actual on-screen pixel width
344 * of each character.
345 * In order to do this, we create a distinction between
346 * regular Latin 'non-wide' glyphs and 'wide' glyphs, and
347 * normalise all values relative to the on-screen pixel
348 * width of regular Latin characters:
349 * - Regular 'non-wide' glyphs have a normalised width of 100
350 * - 'line_width' is therefore normalised to 100 * (width_in_characters)
351 * - 'wide' glyphs have a normalised width of
352 * 100 * (wide_character_pixel_width / latin_character_pixel_width)
353 * - When a character is detected, the position in the current
354 * line is incremented by the regular normalised width of 100
355 * - If that character is then determined to be a 'wide'
356 * glyph, the position in the current line is further incremented
357 * by the difference between the normalised 'wide' and 'non-wide'
358 * width values */
359 unsigned counter_normalized = 0;
360 int line_width_normalized = line_width * 100;
361 int additional_counter_normalized = wideglyph_width - 100;
362
363 /* Early return if src string length is less
364 * than line width */
365 if (src_end - src < line_width)
366 {
367 strlcpy(dst, src, dst_size);
368 return;
369 }
370
371 while (*src != '\0')
372 {
373 unsigned char_len = (unsigned)(utf8skip(src, 1) - src);
374 counter_normalized += 100;
375
376 /* Prevent buffer overflow */
377 if (char_len >= dst_size)
378 break;
379
380 if (*src == ' ')
381 lastspace = dst; /* Remember the location of the whitespace */
382 else if (*src == '\n')
383 {
384 /* If newlines embedded in the input,
385 * reset the index */
386 lines++;
387 counter_normalized = 0;
388
389 /* Early return if remaining src string
390 * length is less than line width */
391 if (src_end - src <= line_width)
392 {
393 strlcpy(dst, src, dst_size);
394 return;
395 }
396 }
397 else if (char_len >= 3)
398 {
399 /* Remember the location of the first byte
400 * whose length as UTF-8 >= 3*/
401 lastwideglyph = dst;
402 counter_normalized += additional_counter_normalized;
403 }
404
405 dst_size -= char_len;
406 while (char_len--)
407 *dst++ = *src++;
408
409 if (counter_normalized >= (unsigned)line_width_normalized)
410 {
411 counter_normalized = 0;
412
413 if (max_lines != 0 && lines >= max_lines)
414 continue;
415 else if (lastwideglyph && (!lastspace || lastwideglyph > lastspace))
416 {
417 /* Insert newline character */
418 *lastwideglyph = '\n';
419 lines++;
420 src -= dst - lastwideglyph;
421 dst = lastwideglyph + 1;
422 lastwideglyph = NULL;
423
424 /* Early return if remaining src string
425 * length is less than line width */
426 if (src_end - src <= line_width)
427 {
428 strlcpy(dst, src, dst_size);
429 return;
430 }
431 }
432 else if (lastspace)
433 {
434 /* Replace nearest (previous) whitespace
435 * with newline character */
436 *lastspace = '\n';
437 lines++;
438 src -= dst - lastspace - 1;
439 dst = lastspace + 1;
440 lastspace = NULL;
441
442 /* Early return if remaining src string
443 * length is less than line width */
444 if (src_end - src < line_width)
445 {
446 strlcpy(dst, src, dst_size);
447 return;
448 }
449 }
450 }
451 }
452
453 *dst = '\0';
454}
455
456/**
457 * string_tokenize:
458 *
459 * Splits string into tokens seperated by @delim
460 * > Returned token string must be free()'d
461 * > Returns NULL if token is not found
462 * > After each call, @str is set to the position after the
463 * last found token
464 * > Tokens *include* empty strings
465 * Usage example:
466 * char *str = "1,2,3,4,5,6,7,,,10,";
467 * char **str_ptr = &str;
468 * char *token = NULL;
469 * while ((token = string_tokenize(str_ptr, ",")))
470 * {
471 * printf("%s\n", token);
472 * free(token);
473 * token = NULL;
474 * }
475 **/
476char* string_tokenize(char **str, const char *delim)
477{
478 /* Taken from https://codereview.stackexchange.com/questions/216956/strtok-function-thread-safe-supports-empty-tokens-doesnt-change-string# */
479 char *str_ptr = NULL;
480 char *delim_ptr = NULL;
481 char *token = NULL;
482 size_t token_len = 0;
483
484 /* Sanity checks */
485 if (!str || string_is_empty(delim))
486 return NULL;
487
488
489 /* Note: we don't check string_is_empty() here,
490 * empty strings are valid */
491 if (!(str_ptr = *str))
492 return NULL;
493
494 /* Search for delimiter */
495 if ((delim_ptr = strstr(str_ptr, delim)))
496 token_len = delim_ptr - str_ptr;
497 else
498 token_len = strlen(str_ptr);
499
500 /* Allocate token string */
501 if (!(token = (char *)malloc((token_len + 1) * sizeof(char))))
502 return NULL;
503
504 /* Copy token */
505 strlcpy(token, str_ptr, (token_len + 1) * sizeof(char));
506 token[token_len] = '\0';
507
508 /* Update input string pointer */
509 *str = delim_ptr ? delim_ptr + strlen(delim) : NULL;
510
511 return token;
512}
513
514/**
515 * string_remove_all_chars:
516 * @str : input string (must be non-NULL, otherwise UB)
517 *
518 * Leaf function.
519 *
520 * Removes every instance of character @c from @str
521 **/
522void string_remove_all_chars(char *str, char c)
523{
524 char *read_ptr = str;
525 char *write_ptr = str;
526
527 while (*read_ptr != '\0')
528 {
529 *write_ptr = *read_ptr++;
530 if (*write_ptr != c)
531 write_ptr++;
532 }
533
534 *write_ptr = '\0';
535}
536
537/**
538 * string_replace_all_chars:
539 * @str : input string (must be non-NULL, otherwise UB)
540 * @find : character to find
541 * @replace : character to replace @find with
542 *
543 * Replaces every instance of character @find in @str
544 * with character @replace
545 **/
546void string_replace_all_chars(char *str, char find, char replace)
547{
548 char *str_ptr = str;
549 while ((str_ptr = strchr(str_ptr, find)))
550 *str_ptr++ = replace;
551}
552
553/**
554 * string_to_unsigned:
555 * @str : input string
556 *
557 * Converts string to unsigned integer.
558 *
559 * @return 0 if string is invalid, otherwise > 0
560 **/
561unsigned string_to_unsigned(const char *str)
562{
563 const char *ptr = NULL;
564
565 if (string_is_empty(str))
566 return 0;
567
568 for (ptr = str; *ptr != '\0'; ptr++)
569 {
570 if (!ISDIGIT((unsigned char)*ptr))
571 return 0;
572 }
573
574 return (unsigned)strtoul(str, NULL, 10);
575}
576
577/**
578 * string_hex_to_unsigned:
579 * @str : input string (must be non-NULL, otherwise UB)
580 *
581 * Converts hexadecimal string to unsigned integer.
582 * Handles optional leading '0x'.
583 *
584 * @return 0 if string is invalid, otherwise > 0
585 **/
586unsigned string_hex_to_unsigned(const char *str)
587{
588 const char *hex_str = str;
589 const char *ptr = NULL;
590
591 /* Remove leading '0x', if required */
592 if (str[0] != '\0' && str[1] != '\0')
593 {
594 if ( (str[0] == '0') &&
595 ((str[1] == 'x') ||
596 (str[1] == 'X')))
597 {
598 hex_str = str + 2;
599 if (string_is_empty(hex_str))
600 return 0;
601 }
602 }
603 else
604 return 0;
605
606 /* Check for valid characters */
607 for (ptr = hex_str; *ptr != '\0'; ptr++)
608 {
609 if (!isxdigit((unsigned char)*ptr))
610 return 0;
611 }
612
613 return (unsigned)strtoul(hex_str, NULL, 16);
614}
615
616/**
617 * string_count_occurrences_single_character:
618 *
619 * Leaf function.
620 *
621 * Get the total number of occurrences of character @c in @str.
622 *
623 * @return Total number of occurrences of character @c
624 */
625int string_count_occurrences_single_character(const char *str, char c)
626{
627 int count = 0;
628
629 for (; *str; str++)
630 if (*str == c)
631 count++;
632
633 return count;
634}
635
636/**
637 * string_replace_whitespace_with_single_character:
638 *
639 * Leaf function.
640 *
641 * Replaces all spaces with given character @c.
642 **/
643void string_replace_whitespace_with_single_character(char *str, char c)
644{
645 for (; *str; str++)
646 if (ISSPACE(*str))
647 *str = c;
648}
649
650/**
651 * string_replace_multi_space_with_single_space:
652 *
653 * Leaf function.
654 *
655 * Replaces multiple spaces with a single space in a string.
656 **/
657void string_replace_multi_space_with_single_space(char *str)
658{
659 char *str_trimmed = str;
660 bool prev_is_space = false;
661 bool curr_is_space = false;
662
663 for (; *str; str++)
664 {
665 curr_is_space = ISSPACE(*str);
666 if (prev_is_space && curr_is_space)
667 continue;
668 *str_trimmed++ = *str;
669 prev_is_space = curr_is_space;
670 }
671 *str_trimmed = '\0';
672}
673
674/**
675 * string_remove_all_whitespace:
676 *
677 * Leaf function.
678 *
679 * Remove all spaces from the given string.
680 **/
681void string_remove_all_whitespace(char *str_trimmed, const char *str)
682{
683 for (; *str; str++)
684 if (!ISSPACE(*str))
685 *str_trimmed++ = *str;
686 *str_trimmed = '\0';
687}
688
689/**
690 * Retrieve the last occurance of the given character in a string.
691 */
692int string_index_last_occurance(const char *str, char c)
693{
694 const char *pos = strrchr(str, c);
695 if (pos)
696 return (int)(pos - str);
697 return -1;
698}
699
700/**
701 * string_find_index_substring_string:
702 * @str : input string (must be non-NULL, otherwise UB)
703 * @substr : substring to find in @str
704 *
705 * Find the position of substring @substr in string @str.
706 **/
707int string_find_index_substring_string(const char *str, const char *substr)
708{
709 const char *pos = strstr(str, substr);
710 if (pos)
711 return (int)(pos - str);
712 return -1;
713}
714
715/**
716 * string_copy_only_ascii:
717 *
718 * Leaf function.
719 *
720 * Strips non-ASCII characters from a string.
721 **/
722void string_copy_only_ascii(char *str_stripped, const char *str)
723{
724 for (; *str; str++)
725 if (*str > 0x1F && *str < 0x7F)
726 *str_stripped++ = *str;
727 *str_stripped = '\0';
728}