[pcsx_rearmed.git] / libretro-common / string / stdstring.c

/* Copyright  (C) 2010-2020 The RetroArch team
 *
 * ---------------------------------------------------------------------------------------
 * The following license statement only applies to this file (stdstring.c).
 * ---------------------------------------------------------------------------------------
 *
 * Permission is hereby granted, free of charge,
 * to any person obtaining a copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include <stdint.h>
#include <ctype.h>
#include <string.h>

#include <compat/strl.h>
#include <string/stdstring.h>
#include <encodings/utf.h>

const uint8_t lr_char_props[256] = {
	/*x0   x1   x2   x3   x4   x5   x6   x7   x8   x9   xA   xB   xC   xD   xE   xF */
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x80,0x00,0x00,0x80,0x00,0x00, /* 0x                  */
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 1x                  */
	0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 2x  !"#$%&'()*+,-./ */
	0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x00,0x00,0x00,0x00,0x00,0x00, /* 3x 0123456789:;<=>? */
	0x00,0x23,0x23,0x23,0x23,0x23,0x23,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22, /* 4x @ABCDEFGHIJKLMNO */
	0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x00,0x00,0x00,0x00,0x08, /* 5x PQRSTUVWXYZ[\]^_ */
	0x00,0x25,0x25,0x25,0x25,0x25,0x25,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24, /* 6x `abcdefghijklmno */
	0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x00,0x00,0x00,0x00,0x00, /* 7x pqrstuvwxyz{|}~  */
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8x                  */
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 9x                  */
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Ax                  */
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Bx                  */
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Cx                  */
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Dx                  */
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Ex                  */
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Fx                  */
};

char *string_init(const char *src)
{
   return src ? strdup(src) : NULL;
}

void string_set(char **string, const char *src)
{
   free(*string);
   *string = string_init(src);
}


char *string_to_upper(char *s)
{
   char *cs = (char *)s;
   for ( ; *cs != '\0'; cs++)
      *cs = toupper((unsigned char)*cs);
   return s;
}

char *string_to_lower(char *s)
{
   char *cs = (char *)s;
   for ( ; *cs != '\0'; cs++)
      *cs = tolower((unsigned char)*cs);
   return s;
}

char *string_ucwords(char *s)
{
   char *cs = (char *)s;
   for ( ; *cs != '\0'; cs++)
   {
      if (*cs == ' ')
         *(cs+1) = toupper((unsigned char)*(cs+1));
   }

   s[0] = toupper((unsigned char)s[0]);
   return s;
}

char *string_replace_substring(const char *in,
      const char *pattern, size_t pattern_len,
      const char *replacement, size_t replacement_len)
{
   size_t outlen;
   size_t numhits     = 0;
   const char *inat   = NULL;
   const char *inprev = NULL;
   char          *out = NULL;
   char        *outat = NULL;

   /* if either pattern or replacement is NULL,
    * duplicate in and let caller handle it. */
   if (!pattern || !replacement)
      return strdup(in);

   inat            = in;

   while ((inat = strstr(inat, pattern)))
   {
      inat += pattern_len;
      numhits++;
   }

   outlen          = strlen(in) - pattern_len*numhits + replacement_len*numhits;

   if (!(out = (char *)malloc(outlen+1)))
      return NULL;

   outat           = out;
   inat            = in;
   inprev          = in;

   while ((inat = strstr(inat, pattern)))
   {
      memcpy(outat, inprev, inat-inprev);
      outat += inat-inprev;
      memcpy(outat, replacement, replacement_len);
      outat += replacement_len;
      inat  += pattern_len;
      inprev = inat;
   }
   strcpy(outat, inprev);

   return out;
}

/**
 * string_trim_whitespace_left:
 *
 * Remove leading whitespaces
 **/
char *string_trim_whitespace_left(char *const s)
{
   if (s && *s)
   {
      size_t len     = strlen(s);
      char *current  = s;

      while (*current && ISSPACE((unsigned char)*current))
      {
         ++current;
         --len;
      }

      if (s != current)
         memmove(s, current, len + 1);
   }

   return s;
}

/**
 * string_trim_whitespace_right:
 *
 * Remove trailing whitespaces
 **/
char *string_trim_whitespace_right(char *const s)
{
   if (s && *s)
   {
      size_t len     = strlen(s);
      char  *current = s + len - 1;

      while (current != s && ISSPACE((unsigned char)*current))
      {
         --current;
         --len;
      }

      current[ISSPACE((unsigned char)*current) ? 0 : 1] = '\0';
   }

   return s;
}

/**
 * string_trim_whitespace:
 *
 * Remove leading and trailing whitespaces
 **/
char *string_trim_whitespace(char *const s)
{
   string_trim_whitespace_right(s);  /* order matters */
   string_trim_whitespace_left(s);

   return s;
}

/**
 * word_wrap:
 * @dst                : pointer to destination buffer.
 * @dst_size           : size of destination buffer.
 * @src                : pointer to input string.
 * @line_width         : max number of characters per line.
 * @wideglyph_width    : not used, but is necessary to keep
 *                       compatibility with word_wrap_wideglyph().
 * @max_lines          : max lines of destination string.
 *                       0 means no limit.
 *
 * Wraps string specified by 'src' to destination buffer
 * specified by 'dst' and 'dst_size'.
 * This function assumes that all glyphs in the string
 * have an on-screen pixel width similar to that of
 * regular Latin characters - i.e. it will not wrap
 * correctly any text containing so-called 'wide' Unicode
 * characters (e.g. CJK languages, emojis, etc.).
 **/
void word_wrap(
      char *dst,       size_t dst_size,
      const char *src, size_t src_len,
      int line_width,  int wideglyph_width, unsigned max_lines)
{
   char *lastspace     = NULL;
   unsigned counter    = 0;
   unsigned lines      = 1;
   const char *src_end = src + src_len;

   /* Prevent buffer overflow */
   if (dst_size < src_len + 1)
      return;

   /* Early return if src string length is less
    * than line width */
   if (src_len < (size_t)line_width)
   {
      strlcpy(dst, src, dst_size);
      return;
   }

   while (*src != '\0')
   {
      unsigned char_len = (unsigned)(utf8skip(src, 1) - src);
      counter++;

      if (*src == ' ')
         lastspace = dst; /* Remember the location of the whitespace */
      else if (*src == '\n')
      {
         /* If newlines embedded in the input,
          * reset the index */
         lines++;
         counter = 0;

         /* Early return if remaining src string
          * length is less than line width */
         if (src_end - src <= line_width)
         {
            strlcpy(dst, src, dst_size);
            return;
         }
     }

      while (char_len--)
         *dst++ = *src++;

      if (counter >= (unsigned)line_width)
      {
         counter = 0;

         if (lastspace && (max_lines == 0 || lines < max_lines))
         {
            /* Replace nearest (previous) whitespace
             * with newline character */
            *lastspace = '\n';
            lines++;

            src -= dst - lastspace - 1;
            dst = lastspace + 1;
            lastspace  = NULL;

            /* Early return if remaining src string
             * length is less than line width */
            if (src_end - src < line_width)
            {
               strlcpy(dst, src, dst_size);
               return;
            }
         }
      }
   }

   *dst = '\0';
}

/**
 * word_wrap_wideglyph:
 * @dst                : pointer to destination buffer.
 * @dst_size           : size of destination buffer.
 * @src                : pointer to input string.
 * @line_width         : max number of characters per line.
 * @wideglyph_width    : effective width of 'wide' Unicode glyphs.
 *                       the value here is normalised relative to the
 *                       typical on-screen pixel width of a regular
 *                       Latin character:
 *                       - a regular Latin character is defined to
 *                         have an effective width of 100
 *                       - wideglyph_width = 100 * (wide_character_pixel_width / latin_character_pixel_width)
 *                       - e.g. if 'wide' Unicode characters in 'src'
 *                         have an on-screen pixel width twice that of
 *                         regular Latin characters, wideglyph_width
 *                         would be 200
 * @max_lines          : max lines of destination string.
 *                       0 means no limit.
 *
 * Wraps string specified by @src to destination buffer
 * specified by @dst and @dst_size.
 * This function assumes that all glyphs in the string
 * are:
 * - EITHER 'non-wide' Unicode glyphs, with an on-screen
 *   pixel width similar to that of regular Latin characters
 * - OR 'wide' Unicode glyphs (e.g. CJK languages, emojis, etc.)
 *   with an on-screen pixel width defined by @wideglyph_width
 * Note that wrapping may occur in inappropriate locations
 * if @src string contains 'wide' Unicode characters whose
 * on-screen pixel width deviates greatly from the set
 * @wideglyph_width value.
 **/
void word_wrap_wideglyph(char *dst, size_t dst_size,
      const char *src, size_t src_len, int line_width,
      int wideglyph_width, unsigned max_lines)
{
   char *lastspace                   = NULL;
   char *lastwideglyph               = NULL;
   const char *src_end               = src + src_len;
   unsigned lines                    = 1;
   /* 'line_width' means max numbers of characters per line,
    * but this metric is only meaningful when dealing with
    * 'regular' glyphs that have an on-screen pixel width
    * similar to that of regular Latin characters.
    * When handing so-called 'wide' Unicode glyphs, it is
    * necessary to consider the actual on-screen pixel width
    * of each character.
    * In order to do this, we create a distinction between
    * regular Latin 'non-wide' glyphs and 'wide' glyphs, and
    * normalise all values relative to the on-screen pixel
    * width of regular Latin characters:
    * - Regular 'non-wide' glyphs have a normalised width of 100
    * - 'line_width' is therefore normalised to 100 * (width_in_characters)
    * - 'wide' glyphs have a normalised width of
    *   100 * (wide_character_pixel_width / latin_character_pixel_width)
    * - When a character is detected, the position in the current
    *   line is incremented by the regular normalised width of 100
    * - If that character is then determined to be a 'wide'
    *   glyph, the position in the current line is further incremented
    *   by the difference between the normalised 'wide' and 'non-wide'
    *   width values */
   unsigned counter_normalized       = 0;
   int line_width_normalized         = line_width * 100;
   int additional_counter_normalized = wideglyph_width - 100;
 
   /* Early return if src string length is less
    * than line width */
   if (src_end - src < line_width)
   {
      strlcpy(dst, src, dst_size);
      return;
   }

   while (*src != '\0')
   {
      unsigned char_len   = (unsigned)(utf8skip(src, 1) - src);
      counter_normalized += 100;

      /* Prevent buffer overflow */
      if (char_len >= dst_size)
         break;

      if (*src == ' ')
         lastspace          = dst; /* Remember the location of the whitespace */
      else if (*src == '\n')
      {
         /* If newlines embedded in the input,
          * reset the index */
         lines++;
         counter_normalized = 0;

         /* Early return if remaining src string
          * length is less than line width */
         if (src_end - src <= line_width)
         {
            strlcpy(dst, src, dst_size);
            return;
         }
      }
      else if (char_len >= 3)
      {
         /* Remember the location of the first byte
          * whose length as UTF-8 >= 3*/
         lastwideglyph       = dst;
         counter_normalized += additional_counter_normalized;
      }

      dst_size -= char_len;
      while (char_len--)
         *dst++ = *src++;

      if (counter_normalized >= (unsigned)line_width_normalized)
      {
         counter_normalized = 0;

         if (max_lines != 0 && lines >= max_lines)
            continue;
         else if (lastwideglyph && (!lastspace || lastwideglyph > lastspace))
         {
            /* Insert newline character */
            *lastwideglyph = '\n';
            lines++;
            src           -= dst - lastwideglyph;
            dst            = lastwideglyph + 1;
            lastwideglyph  = NULL;

            /* Early return if remaining src string
             * length is less than line width */
            if (src_end - src <= line_width)
            {
               strlcpy(dst, src, dst_size);
               return;
            }
         }
         else if (lastspace)
         {
            /* Replace nearest (previous) whitespace
             * with newline character */
            *lastspace = '\n';
            lines++;
            src       -= dst - lastspace - 1;
            dst        = lastspace + 1;
            lastspace  = NULL;

            /* Early return if remaining src string
             * length is less than line width */
            if (src_end - src < line_width)
            {
               strlcpy(dst, src, dst_size);
               return;
            }
         }
      }
   }

   *dst = '\0';
}

/**
 * string_tokenize:
 *
 * Splits string into tokens seperated by @delim
 * > Returned token string must be free()'d
 * > Returns NULL if token is not found
 * > After each call, @str is set to the position after the
 *   last found token
 * > Tokens *include* empty strings
 * Usage example:
 *    char *str      = "1,2,3,4,5,6,7,,,10,";
 *    char **str_ptr = &str;
 *    char *token    = NULL;
 *    while ((token = string_tokenize(str_ptr, ",")))
 *    {
 *        printf("%s\n", token);
 *        free(token);
 *        token = NULL;
 *    }
 **/
char* string_tokenize(char **str, const char *delim)
{
   /* Taken from https://codereview.stackexchange.com/questions/216956/strtok-function-thread-safe-supports-empty-tokens-doesnt-change-string# */
   char *str_ptr    = NULL;
   char *delim_ptr  = NULL;
   char *token      = NULL;
   size_t token_len = 0;

   /* Sanity checks */
   if (!str || string_is_empty(delim))
      return NULL;


   /* Note: we don't check string_is_empty() here,
    * empty strings are valid */
   if (!(str_ptr = *str))
      return NULL;

   /* Search for delimiter */
   if ((delim_ptr = strstr(str_ptr, delim)))
      token_len = delim_ptr - str_ptr;
   else
      token_len = strlen(str_ptr);

   /* Allocate token string */
   if (!(token = (char *)malloc((token_len + 1) * sizeof(char))))
      return NULL;

   /* Copy token */
   strlcpy(token, str_ptr, (token_len + 1) * sizeof(char));
   token[token_len] = '\0';

   /* Update input string pointer */
   *str = delim_ptr ? delim_ptr + strlen(delim) : NULL;

   return token;
}

/**
 * string_remove_all_chars:
 * @str                : input string (must be non-NULL, otherwise UB)
 *
 * Leaf function.
 *
 * Removes every instance of character @c from @str
 **/
void string_remove_all_chars(char *str, char c)
{
   char *read_ptr  = str;
   char *write_ptr = str;

   while (*read_ptr != '\0')
   {
      *write_ptr = *read_ptr++;
      if (*write_ptr != c)
         write_ptr++;
   }

   *write_ptr = '\0';
}

/**
 * string_replace_all_chars:
 * @str                : input string (must be non-NULL, otherwise UB)
 * @find               : character to find
 * @replace            : character to replace @find with
 *
 * Replaces every instance of character @find in @str
 * with character @replace
 **/
void string_replace_all_chars(char *str, char find, char replace)
{
   char *str_ptr = str;
   while ((str_ptr = strchr(str_ptr, find)))
      *str_ptr++ = replace;
}

/**
 * string_to_unsigned:
 * @str                : input string
 *
 * Converts string to unsigned integer.
 *
 * @return 0 if string is invalid, otherwise > 0
 **/
unsigned string_to_unsigned(const char *str)
{
   const char *ptr = NULL;

   if (string_is_empty(str))
      return 0;

   for (ptr = str; *ptr != '\0'; ptr++)
   {
      if (!ISDIGIT((unsigned char)*ptr))
         return 0;
   }

   return (unsigned)strtoul(str, NULL, 10);
}

/**
 * string_hex_to_unsigned:
 * @str                : input string (must be non-NULL, otherwise UB)
 *
 * Converts hexadecimal string to unsigned integer.
 * Handles optional leading '0x'.
 *
 * @return 0 if string is invalid, otherwise > 0
 **/
unsigned string_hex_to_unsigned(const char *str)
{
   const char *hex_str = str;
   const char *ptr     = NULL;

   /* Remove leading '0x', if required */
   if (str[0] != '\0' && str[1] != '\0')
   {
      if ( (str[0] == '0') &&
          ((str[1] == 'x') || 
           (str[1] == 'X')))
      {
         hex_str = str + 2;
         if (string_is_empty(hex_str))
            return 0;
      }
   }
   else
      return 0;

   /* Check for valid characters */
   for (ptr = hex_str; *ptr != '\0'; ptr++)
   {
      if (!isxdigit((unsigned char)*ptr))
         return 0;
   }

   return (unsigned)strtoul(hex_str, NULL, 16);
}

/**
 * string_count_occurrences_single_character:
 *
 * Leaf function.
 *
 * Get the total number of occurrences of character @c in @str.
 *
 * @return Total number of occurrences of character @c
 */
int string_count_occurrences_single_character(const char *str, char c)
{
   int count = 0;

   for (; *str; str++)
      if (*str == c)
         count++;

   return count;
}

/**
 * string_replace_whitespace_with_single_character:
 * 
 * Leaf function.
 *
 * Replaces all spaces with given character @c.
 **/
void string_replace_whitespace_with_single_character(char *str, char c)
{
   for (; *str; str++)
      if (ISSPACE(*str))
         *str = c;
}

/**
 * string_replace_multi_space_with_single_space:
 *
 * Leaf function.
 *
 * Replaces multiple spaces with a single space in a string.
 **/
void string_replace_multi_space_with_single_space(char *str)
{
   char *str_trimmed  = str;
   bool prev_is_space = false;
   bool curr_is_space = false;

   for (; *str; str++)
   {
      curr_is_space  = ISSPACE(*str);
      if (prev_is_space && curr_is_space)
         continue;
      *str_trimmed++ = *str;
      prev_is_space  = curr_is_space;
   }
   *str_trimmed = '\0';
}

/**
 * string_remove_all_whitespace:
 *
 * Leaf function.
 *
 * Remove all spaces from the given string.
 **/
void string_remove_all_whitespace(char *str_trimmed, const char *str)
{
   for (; *str; str++)
      if (!ISSPACE(*str))
         *str_trimmed++ = *str;
   *str_trimmed = '\0';
}

/**
 * Retrieve the last occurance of the given character in a string.
 */
int string_index_last_occurance(const char *str, char c)
{
   const char *pos = strrchr(str, c);
   if (pos)
      return (int)(pos - str);
   return -1;
}

/**
 * string_find_index_substring_string:
 * @str                : input string (must be non-NULL, otherwise UB)
 * @substr             : substring to find in @str
 *
 * Find the position of substring @substr in string @str.
 **/
int string_find_index_substring_string(const char *str, const char *substr)
{
   const char *pos = strstr(str, substr);
   if (pos)
      return (int)(pos - str);
   return -1;
}

/**
 * string_copy_only_ascii:
 *
 * Leaf function.
 *
 * Strips non-ASCII characters from a string.
 **/
void string_copy_only_ascii(char *str_stripped, const char *str)
{
   for (; *str; str++)
      if (*str > 0x1F && *str < 0x7F)
         *str_stripped++ = *str;
   *str_stripped = '\0';
}
Commit	Line	Data
	1	/* Copyright (C) 2010-2020 The RetroArch team
	2	*
	3	* ---------------------------------------------------------------------------------------
	4	* The following license statement only applies to this file (stdstring.c).
	5	* ---------------------------------------------------------------------------------------
	6	*
	7	* Permission is hereby granted, free of charge,
	8	* to any person obtaining a copy of this software and associated documentation files (the "Software"),
	9	* to deal in the Software without restriction, including without limitation the rights to
	10	* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
	11	* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
	12	*
	13	* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
	14	*
	15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
	16	* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
	18	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
	19	* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	20	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
	21	*/
	22
	23	#include <stdint.h>
	24	#include <ctype.h>
	25	#include <string.h>
	26
	27	#include <compat/strl.h>
	28	#include <string/stdstring.h>
	29	#include <encodings/utf.h>
	30
	31	const uint8_t lr_char_props[256] = {
	32	/x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF /
	33	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x80,0x00,0x00,0x80,0x00,0x00, /* 0x */
	34	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 1x */
	35	0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 2x !"#$%&'()+,-./ /
	36	0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x00,0x00,0x00,0x00,0x00,0x00, /* 3x 0123456789:;<=>? */
	37	0x00,0x23,0x23,0x23,0x23,0x23,0x23,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22, /* 4x @ABCDEFGHIJKLMNO */
	38	0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x00,0x00,0x00,0x00,0x08, /* 5x PQRSTUVWXYZ[\]^_ */
	39	0x00,0x25,0x25,0x25,0x25,0x25,0x25,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24, /* 6x `abcdefghijklmno */
	40	0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x00,0x00,0x00,0x00,0x00, /* 7x pqrstuvwxyz{\|}~ */
	41	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8x */
	42	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 9x */
	43	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Ax */
	44	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Bx */
	45	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Cx */
	46	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Dx */
	47	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Ex */
	48	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Fx */
	49	};
	50
	51	char string_init(const char src)
	52	{
	53	return src ? strdup(src) : NULL;
	54	}
	55
	56	void string_set(char *string, const char src)
	57	{
	58	free(*string);
	59	*string = string_init(src);
	60	}
	61
	62
	63	char string_to_upper(char s)
	64	{
	65	char cs = (char )s;
	66	for ( ; *cs != '\0'; cs++)
	67	cs = toupper((unsigned char)cs);
	68	return s;
	69	}
	70
	71	char string_to_lower(char s)
	72	{
	73	char cs = (char )s;
	74	for ( ; *cs != '\0'; cs++)
	75	cs = tolower((unsigned char)cs);
	76	return s;
	77	}
	78
	79	char string_ucwords(char s)
	80	{
	81	char cs = (char )s;
	82	for ( ; *cs != '\0'; cs++)
	83	{
	84	if (*cs == ' ')
	85	(cs+1) = toupper((unsigned char)(cs+1));
	86	}
	87
	88	s[0] = toupper((unsigned char)s[0]);
	89	return s;
	90	}
	91
	92	char string_replace_substring(const char in,
	93	const char *pattern, size_t pattern_len,
	94	const char *replacement, size_t replacement_len)
	95	{
	96	size_t outlen;
	97	size_t numhits = 0;
	98	const char *inat = NULL;
	99	const char *inprev = NULL;
	100	char *out = NULL;
	101	char *outat = NULL;
	102
	103	/* if either pattern or replacement is NULL,
	104	* duplicate in and let caller handle it. */
	105	if (!pattern \|\| !replacement)
	106	return strdup(in);
	107
	108	inat = in;
	109
	110	while ((inat = strstr(inat, pattern)))
	111	{
	112	inat += pattern_len;
	113	numhits++;
	114	}
	115
	116	outlen = strlen(in) - pattern_lennumhits + replacement_lennumhits;
	117
	118	if (!(out = (char *)malloc(outlen+1)))
	119	return NULL;
	120
	121	outat = out;
	122	inat = in;
	123	inprev = in;
	124
	125	while ((inat = strstr(inat, pattern)))
	126	{
	127	memcpy(outat, inprev, inat-inprev);
	128	outat += inat-inprev;
	129	memcpy(outat, replacement, replacement_len);
	130	outat += replacement_len;
	131	inat += pattern_len;
	132	inprev = inat;
	133	}
	134	strcpy(outat, inprev);
	135
	136	return out;
	137	}
	138
	139	/**
	140	* string_trim_whitespace_left:
	141	*
	142	* Remove leading whitespaces
	143	**/
	144	char string_trim_whitespace_left(char const s)
	145	{
	146	if (s && *s)
	147	{
	148	size_t len = strlen(s);
	149	char *current = s;
	150
	151	while (current && ISSPACE((unsigned char)current))
	152	{
	153	++current;
	154	--len;
	155	}
	156
	157	if (s != current)
	158	memmove(s, current, len + 1);
	159	}
	160
	161	return s;
	162	}
	163
	164	/**
	165	* string_trim_whitespace_right:
	166	*
	167	* Remove trailing whitespaces
	168	**/
	169	char string_trim_whitespace_right(char const s)
	170	{
	171	if (s && *s)
	172	{
	173	size_t len = strlen(s);
	174	char *current = s + len - 1;
	175
	176	while (current != s && ISSPACE((unsigned char)*current))
	177	{
	178	--current;
	179	--len;
	180	}
	181
	182	current[ISSPACE((unsigned char)*current) ? 0 : 1] = '\0';
	183	}
	184
	185	return s;
	186	}
	187
	188	/**
	189	* string_trim_whitespace:
	190	*
	191	* Remove leading and trailing whitespaces
	192	**/
	193	char string_trim_whitespace(char const s)
	194	{
	195	string_trim_whitespace_right(s); /* order matters */
	196	string_trim_whitespace_left(s);
	197
	198	return s;
	199	}
	200
	201	/**
	202	* word_wrap:
	203	* @dst : pointer to destination buffer.
	204	* @dst_size : size of destination buffer.
	205	* @src : pointer to input string.
	206	* @line_width : max number of characters per line.
	207	* @wideglyph_width : not used, but is necessary to keep
	208	* compatibility with word_wrap_wideglyph().
	209	* @max_lines : max lines of destination string.
	210	* 0 means no limit.
	211	*
	212	* Wraps string specified by 'src' to destination buffer
	213	* specified by 'dst' and 'dst_size'.
	214	* This function assumes that all glyphs in the string
	215	* have an on-screen pixel width similar to that of
	216	* regular Latin characters - i.e. it will not wrap
	217	* correctly any text containing so-called 'wide' Unicode
	218	* characters (e.g. CJK languages, emojis, etc.).
	219	**/
	220	void word_wrap(
	221	char *dst, size_t dst_size,
	222	const char *src, size_t src_len,
	223	int line_width, int wideglyph_width, unsigned max_lines)
	224	{
	225	char *lastspace = NULL;
	226	unsigned counter = 0;
	227	unsigned lines = 1;
	228	const char *src_end = src + src_len;
	229
	230	/* Prevent buffer overflow */
	231	if (dst_size < src_len + 1)
	232	return;
	233
	234	/* Early return if src string length is less
	235	* than line width */
	236	if (src_len < (size_t)line_width)
	237	{
	238	strlcpy(dst, src, dst_size);
	239	return;
	240	}
	241
	242	while (*src != '\0')
	243	{
	244	unsigned char_len = (unsigned)(utf8skip(src, 1) - src);
	245	counter++;
	246
	247	if (*src == ' ')
	248	lastspace = dst; /* Remember the location of the whitespace */
	249	else if (*src == '\n')
	250	{
	251	/* If newlines embedded in the input,
	252	* reset the index */
	253	lines++;
	254	counter = 0;
	255
	256	/* Early return if remaining src string
	257	* length is less than line width */
	258	if (src_end - src <= line_width)
	259	{
	260	strlcpy(dst, src, dst_size);
	261	return;
	262	}
	263	}
	264
	265	while (char_len--)
	266	dst++ = src++;
	267
	268	if (counter >= (unsigned)line_width)
	269	{
	270	counter = 0;
	271
	272	if (lastspace && (max_lines == 0 \|\| lines < max_lines))
	273	{
	274	/* Replace nearest (previous) whitespace
	275	* with newline character */
	276	*lastspace = '\n';
	277	lines++;
	278
	279	src -= dst - lastspace - 1;
	280	dst = lastspace + 1;
	281	lastspace = NULL;
	282
	283	/* Early return if remaining src string
	284	* length is less than line width */
	285	if (src_end - src < line_width)
	286	{
	287	strlcpy(dst, src, dst_size);
	288	return;
	289	}
	290	}
	291	}
	292	}
	293
	294	*dst = '\0';
	295	}
	296
	297	/**
	298	* word_wrap_wideglyph:
	299	* @dst : pointer to destination buffer.
	300	* @dst_size : size of destination buffer.
	301	* @src : pointer to input string.
	302	* @line_width : max number of characters per line.
	303	* @wideglyph_width : effective width of 'wide' Unicode glyphs.
	304	* the value here is normalised relative to the
	305	* typical on-screen pixel width of a regular
	306	* Latin character:
	307	* - a regular Latin character is defined to
	308	* have an effective width of 100
	309	* - wideglyph_width = 100 * (wide_character_pixel_width / latin_character_pixel_width)
	310	* - e.g. if 'wide' Unicode characters in 'src'
	311	* have an on-screen pixel width twice that of
	312	* regular Latin characters, wideglyph_width
	313	* would be 200
	314	* @max_lines : max lines of destination string.
	315	* 0 means no limit.
	316	*
	317	* Wraps string specified by @src to destination buffer
	318	* specified by @dst and @dst_size.
	319	* This function assumes that all glyphs in the string
	320	* are:
	321	* - EITHER 'non-wide' Unicode glyphs, with an on-screen
	322	* pixel width similar to that of regular Latin characters
	323	* - OR 'wide' Unicode glyphs (e.g. CJK languages, emojis, etc.)
	324	* with an on-screen pixel width defined by @wideglyph_width
	325	* Note that wrapping may occur in inappropriate locations
	326	* if @src string contains 'wide' Unicode characters whose
	327	* on-screen pixel width deviates greatly from the set
	328	* @wideglyph_width value.
	329	**/
	330	void word_wrap_wideglyph(char *dst, size_t dst_size,
	331	const char *src, size_t src_len, int line_width,
	332	int wideglyph_width, unsigned max_lines)
	333	{
	334	char *lastspace = NULL;
	335	char *lastwideglyph = NULL;
	336	const char *src_end = src + src_len;
	337	unsigned lines = 1;
	338	/* 'line_width' means max numbers of characters per line,
	339	* but this metric is only meaningful when dealing with
	340	* 'regular' glyphs that have an on-screen pixel width
	341	* similar to that of regular Latin characters.
	342	* When handing so-called 'wide' Unicode glyphs, it is
	343	* necessary to consider the actual on-screen pixel width
	344	* of each character.
	345	* In order to do this, we create a distinction between
	346	* regular Latin 'non-wide' glyphs and 'wide' glyphs, and
	347	* normalise all values relative to the on-screen pixel
	348	* width of regular Latin characters:
	349	* - Regular 'non-wide' glyphs have a normalised width of 100
	350	* - 'line_width' is therefore normalised to 100 * (width_in_characters)
	351	* - 'wide' glyphs have a normalised width of
	352	* 100 * (wide_character_pixel_width / latin_character_pixel_width)
	353	* - When a character is detected, the position in the current
	354	* line is incremented by the regular normalised width of 100
	355	* - If that character is then determined to be a 'wide'
	356	* glyph, the position in the current line is further incremented
	357	* by the difference between the normalised 'wide' and 'non-wide'
	358	* width values */
	359	unsigned counter_normalized = 0;
	360	int line_width_normalized = line_width * 100;
	361	int additional_counter_normalized = wideglyph_width - 100;
	362
	363	/* Early return if src string length is less
	364	* than line width */
	365	if (src_end - src < line_width)
	366	{
	367	strlcpy(dst, src, dst_size);
	368	return;
	369	}
	370
	371	while (*src != '\0')
	372	{
	373	unsigned char_len = (unsigned)(utf8skip(src, 1) - src);
	374	counter_normalized += 100;
	375
	376	/* Prevent buffer overflow */
	377	if (char_len >= dst_size)
	378	break;
	379
	380	if (*src == ' ')
	381	lastspace = dst; /* Remember the location of the whitespace */
	382	else if (*src == '\n')
	383	{
	384	/* If newlines embedded in the input,
	385	* reset the index */
	386	lines++;
	387	counter_normalized = 0;
	388
	389	/* Early return if remaining src string
	390	* length is less than line width */
	391	if (src_end - src <= line_width)
	392	{
	393	strlcpy(dst, src, dst_size);
	394	return;
	395	}
	396	}
	397	else if (char_len >= 3)
	398	{
	399	/* Remember the location of the first byte
	400	* whose length as UTF-8 >= 3*/
	401	lastwideglyph = dst;
	402	counter_normalized += additional_counter_normalized;
	403	}
	404
	405	dst_size -= char_len;
	406	while (char_len--)
	407	dst++ = src++;
	408
	409	if (counter_normalized >= (unsigned)line_width_normalized)
	410	{
	411	counter_normalized = 0;
	412
	413	if (max_lines != 0 && lines >= max_lines)
	414	continue;
	415	else if (lastwideglyph && (!lastspace \|\| lastwideglyph > lastspace))
	416	{
	417	/* Insert newline character */
	418	*lastwideglyph = '\n';
	419	lines++;
	420	src -= dst - lastwideglyph;
	421	dst = lastwideglyph + 1;
	422	lastwideglyph = NULL;
	423
	424	/* Early return if remaining src string
	425	* length is less than line width */
	426	if (src_end - src <= line_width)
	427	{
	428	strlcpy(dst, src, dst_size);
	429	return;
	430	}
	431	}
	432	else if (lastspace)
	433	{
	434	/* Replace nearest (previous) whitespace
	435	* with newline character */
	436	*lastspace = '\n';
	437	lines++;
	438	src -= dst - lastspace - 1;
	439	dst = lastspace + 1;
	440	lastspace = NULL;
	441
	442	/* Early return if remaining src string
	443	* length is less than line width */
	444	if (src_end - src < line_width)
	445	{
	446	strlcpy(dst, src, dst_size);
	447	return;
	448	}
	449	}
	450	}
	451	}
	452
	453	*dst = '\0';
	454	}
	455
	456	/**
	457	* string_tokenize:
	458	*
	459	* Splits string into tokens seperated by @delim
	460	* > Returned token string must be free()'d
	461	* > Returns NULL if token is not found
	462	* > After each call, @str is set to the position after the
	463	* last found token
	464	* > Tokens include empty strings
	465	* Usage example:
	466	* char *str = "1,2,3,4,5,6,7,,,10,";
	467	* char **str_ptr = &str;
	468	* char *token = NULL;
	469	* while ((token = string_tokenize(str_ptr, ",")))
	470	* {
	471	* printf("%s\n", token);
	472	* free(token);
	473	* token = NULL;
	474	* }
	475	**/
	476	char* string_tokenize(char *str, const char delim)
	477	{
	478	/* Taken from https://codereview.stackexchange.com/questions/216956/strtok-function-thread-safe-supports-empty-tokens-doesnt-change-string# */
	479	char *str_ptr = NULL;
	480	char *delim_ptr = NULL;
	481	char *token = NULL;
	482	size_t token_len = 0;
	483
	484	/* Sanity checks */
	485	if (!str \|\| string_is_empty(delim))
	486	return NULL;
	487
	488
	489	/* Note: we don't check string_is_empty() here,
	490	* empty strings are valid */
	491	if (!(str_ptr = *str))
	492	return NULL;
	493
	494	/* Search for delimiter */
	495	if ((delim_ptr = strstr(str_ptr, delim)))
	496	token_len = delim_ptr - str_ptr;
	497	else
	498	token_len = strlen(str_ptr);
	499
	500	/* Allocate token string */
	501	if (!(token = (char )malloc((token_len + 1) sizeof(char))))
	502	return NULL;
	503
	504	/* Copy token */
	505	strlcpy(token, str_ptr, (token_len + 1) * sizeof(char));
	506	token[token_len] = '\0';
	507
	508	/* Update input string pointer */
	509	*str = delim_ptr ? delim_ptr + strlen(delim) : NULL;
	510
	511	return token;
	512	}
	513
	514	/**
	515	* string_remove_all_chars:
	516	* @str : input string (must be non-NULL, otherwise UB)
	517	*
	518	* Leaf function.
	519	*
	520	* Removes every instance of character @c from @str
	521	**/
	522	void string_remove_all_chars(char *str, char c)
	523	{
	524	char *read_ptr = str;
	525	char *write_ptr = str;
	526
	527	while (*read_ptr != '\0')
	528	{
	529	write_ptr = read_ptr++;
	530	if (*write_ptr != c)
	531	write_ptr++;
	532	}
	533
	534	*write_ptr = '\0';
	535	}
	536
	537	/**
	538	* string_replace_all_chars:
	539	* @str : input string (must be non-NULL, otherwise UB)
	540	* @find : character to find
	541	* @replace : character to replace @find with
	542	*
	543	* Replaces every instance of character @find in @str
	544	* with character @replace
	545	**/
	546	void string_replace_all_chars(char *str, char find, char replace)
	547	{
	548	char *str_ptr = str;
	549	while ((str_ptr = strchr(str_ptr, find)))
	550	*str_ptr++ = replace;
	551	}
	552
	553	/**
	554	* string_to_unsigned:
	555	* @str : input string
	556	*
	557	* Converts string to unsigned integer.
	558	*
	559	* @return 0 if string is invalid, otherwise > 0
	560	**/
	561	unsigned string_to_unsigned(const char *str)
	562	{
	563	const char *ptr = NULL;
	564
	565	if (string_is_empty(str))
	566	return 0;
	567
	568	for (ptr = str; *ptr != '\0'; ptr++)
	569	{
	570	if (!ISDIGIT((unsigned char)*ptr))
	571	return 0;
	572	}
	573
	574	return (unsigned)strtoul(str, NULL, 10);
	575	}
	576
	577	/**
	578	* string_hex_to_unsigned:
	579	* @str : input string (must be non-NULL, otherwise UB)
	580	*
	581	* Converts hexadecimal string to unsigned integer.
	582	* Handles optional leading '0x'.
	583	*
	584	* @return 0 if string is invalid, otherwise > 0
	585	**/
	586	unsigned string_hex_to_unsigned(const char *str)
	587	{
	588	const char *hex_str = str;
	589	const char *ptr = NULL;
	590
	591	/* Remove leading '0x', if required */
	592	if (str[0] != '\0' && str[1] != '\0')
	593	{
	594	if ( (str[0] == '0') &&
	595	((str[1] == 'x') \|\|
	596	(str[1] == 'X')))
	597	{
	598	hex_str = str + 2;
	599	if (string_is_empty(hex_str))
	600	return 0;
	601	}
	602	}
	603	else
	604	return 0;
	605
	606	/* Check for valid characters */
	607	for (ptr = hex_str; *ptr != '\0'; ptr++)
	608	{
	609	if (!isxdigit((unsigned char)*ptr))
	610	return 0;
	611	}
	612
	613	return (unsigned)strtoul(hex_str, NULL, 16);
	614	}
	615
	616	/**
	617	* string_count_occurrences_single_character:
	618	*
	619	* Leaf function.
	620	*
	621	* Get the total number of occurrences of character @c in @str.
	622	*
	623	* @return Total number of occurrences of character @c
	624	*/
	625	int string_count_occurrences_single_character(const char *str, char c)
	626	{
	627	int count = 0;
	628
	629	for (; *str; str++)
	630	if (*str == c)
	631	count++;
	632
	633	return count;
	634	}
	635
	636	/**
	637	* string_replace_whitespace_with_single_character:
	638	*
	639	* Leaf function.
	640	*
	641	* Replaces all spaces with given character @c.
	642	**/
	643	void string_replace_whitespace_with_single_character(char *str, char c)
	644	{
	645	for (; *str; str++)
	646	if (ISSPACE(*str))
	647	*str = c;
	648	}
	649
	650	/**
	651	* string_replace_multi_space_with_single_space:
	652	*
	653	* Leaf function.
	654	*
	655	* Replaces multiple spaces with a single space in a string.
	656	**/
	657	void string_replace_multi_space_with_single_space(char *str)
	658	{
	659	char *str_trimmed = str;
	660	bool prev_is_space = false;
	661	bool curr_is_space = false;
	662
	663	for (; *str; str++)
	664	{
	665	curr_is_space = ISSPACE(*str);
	666	if (prev_is_space && curr_is_space)
	667	continue;
	668	str_trimmed++ = str;
	669	prev_is_space = curr_is_space;
	670	}
	671	*str_trimmed = '\0';
	672	}
	673
	674	/**
	675	* string_remove_all_whitespace:
	676	*
	677	* Leaf function.
	678	*
	679	* Remove all spaces from the given string.
	680	**/
	681	void string_remove_all_whitespace(char str_trimmed, const char str)
	682	{
	683	for (; *str; str++)
	684	if (!ISSPACE(*str))
	685	str_trimmed++ = str;
	686	*str_trimmed = '\0';
	687	}
	688
	689	/**
	690	* Retrieve the last occurance of the given character in a string.
	691	*/
	692	int string_index_last_occurance(const char *str, char c)
	693	{
	694	const char *pos = strrchr(str, c);
	695	if (pos)
	696	return (int)(pos - str);
	697	return -1;
	698	}
	699
	700	/**
	701	* string_find_index_substring_string:
	702	* @str : input string (must be non-NULL, otherwise UB)
	703	* @substr : substring to find in @str
	704	*
	705	* Find the position of substring @substr in string @str.
	706	**/
	707	int string_find_index_substring_string(const char str, const char substr)
	708	{
	709	const char *pos = strstr(str, substr);
	710	if (pos)
	711	return (int)(pos - str);
	712	return -1;
	713	}
	714
	715	/**
	716	* string_copy_only_ascii:
	717	*
	718	* Leaf function.
	719	*
	720	* Strips non-ASCII characters from a string.
	721	**/
	722	void string_copy_only_ascii(char str_stripped, const char str)
	723	{
	724	for (; *str; str++)
	725	if (str > 0x1F && str < 0x7F)
	726	str_stripped++ = str;
	727	*str_stripped = '\0';
	728	}