deps/libchdr/deps/zstd-1.5.6/programs/lorem.c

   1 /*
   2  * Copyright (c) Meta Platforms, Inc. and affiliates.
   3  * All rights reserved.
   4  *
   5  * This source code is licensed under both the BSD-style license (found in the
   6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
   7  * in the COPYING file in the root directory of this source tree).
   8  * You may select, at your option, one of the above-listed licenses.
   9  */
  10
  11 /* Implementation notes:
  12  *
  13  * This is a very simple lorem ipsum generator
  14  * which features a static list of words
  15  * and print them one after another randomly
  16  * with a fake sentence / paragraph structure.
  17  *
  18  * The goal is to generate a printable text
  19  * that can be used to fake a text compression scenario.
  20  * The resulting compression / ratio curve of the lorem ipsum generator
  21  * is more satisfying than the previous statistical generator,
  22  * which was initially designed for entropy compression,
  23  * and lacks a regularity more representative of text.
  24  *
  25  * The compression ratio achievable on the generated lorem ipsum
  26  * is still a bit too good, presumably because the dictionary is a bit too
  27  * small. It would be possible to create some more complex scheme, notably by
  28  * enlarging the dictionary with a word generator, and adding grammatical rules
  29  * (composition) and syntax rules. But that's probably overkill for the intended
  30  * goal.
  31  */
  32
  33 #include "lorem.h"
  34 #include <assert.h>
  35 #include <limits.h> /* INT_MAX */
  36 #include <string.h> /* memcpy */
  37
  38 #define WORD_MAX_SIZE 20
  39
  40 /* Define the word pool */
  41 static const char* kWords[] = {
  42     "lorem",        "ipsum",      "dolor",       "sit",          "amet",
  43     "consectetur",  "adipiscing", "elit",        "sed",          "do",
  44     "eiusmod",      "tempor",     "incididunt",  "ut",           "labore",
  45     "et",           "dolore",     "magna",       "aliqua",       "dis",
  46     "lectus",       "vestibulum", "mattis",      "ullamcorper",  "velit",
  47     "commodo",      "a",          "lacus",       "arcu",         "magnis",
  48     "parturient",   "montes",     "nascetur",    "ridiculus",    "mus",
  49     "mauris",       "nulla",      "malesuada",   "pellentesque", "eget",
  50     "gravida",      "in",         "dictum",      "non",          "erat",
  51     "nam",          "voluptat",   "maecenas",    "blandit",      "aliquam",
  52     "etiam",        "enim",       "lobortis",    "scelerisque",  "fermentum",
  53     "dui",          "faucibus",   "ornare",      "at",           "elementum",
  54     "eu",           "facilisis",  "odio",        "morbi",        "quis",
  55     "eros",         "donec",      "ac",          "orci",         "purus",
  56     "turpis",       "cursus",     "leo",         "vel",          "porta",
  57     "consequat",    "interdum",   "varius",      "vulputate",    "aliquet",
  58     "pharetra",     "nunc",       "auctor",      "urna",         "id",
  59     "metus",        "viverra",    "nibh",        "cras",         "mi",
  60     "unde",         "omnis",      "iste",        "natus",        "error",
  61     "perspiciatis", "voluptatem", "accusantium", "doloremque",   "laudantium",
  62     "totam",        "rem",        "aperiam",     "eaque",        "ipsa",
  63     "quae",         "ab",         "illo",        "inventore",    "veritatis",
  64     "quasi",        "architecto", "beatae",      "vitae",        "dicta",
  65     "sunt",         "explicabo",  "nemo",        "ipsam",        "quia",
  66     "voluptas",     "aspernatur", "aut",         "odit",         "fugit",
  67     "consequuntur", "magni",      "dolores",     "eos",          "qui",
  68     "ratione",      "sequi",      "nesciunt",    "neque",        "porro",
  69     "quisquam",     "est",        "dolorem",     "adipisci",     "numquam",
  70     "eius",         "modi",       "tempora",     "incidunt",     "magnam",
  71     "quaerat",      "ad",         "minima",      "veniam",       "nostrum",
  72     "ullam",        "corporis",   "suscipit",    "laboriosam",   "nisi",
  73     "aliquid",      "ex",         "ea",          "commodi",      "consequatur",
  74     "autem",        "eum",        "iure",        "voluptate",    "esse",
  75     "quam",         "nihil",      "molestiae",   "illum",        "fugiat",
  76     "quo",          "pariatur",   "vero",        "accusamus",    "iusto",
  77     "dignissimos",  "ducimus",    "blanditiis",  "praesentium",  "voluptatum",
  78     "deleniti",     "atque",      "corrupti",    "quos",         "quas",
  79     "molestias",    "excepturi",  "sint",        "occaecati",    "cupiditate",
  80     "provident",    "similique",  "culpa",       "officia",      "deserunt",
  81     "mollitia",     "animi",      "laborum",     "dolorum",      "fuga",
  82     "harum",        "quidem",     "rerum",       "facilis",      "expedita",
  83     "distinctio",   "libero",     "tempore",     "cum",          "soluta",
  84     "nobis",        "eligendi",   "optio",       "cumque",       "impedit",
  85     "minus",        "quod",       "maxime",      "placeat",      "facere",
  86     "possimus",     "assumenda",  "repellendus", "temporibus",   "quibusdam",
  87     "officiis",     "debitis",    "saepe",       "eveniet",      "voluptates",
  88     "repudiandae",  "recusandae", "itaque",      "earum",        "hic",
  89     "tenetur",      "sapiente",   "delectus",    "reiciendis",   "cillum",
  90     "maiores",      "alias",      "perferendis", "doloribus",    "asperiores",
  91     "repellat",     "minim",      "nostrud",     "exercitation", "ullamco",
  92     "laboris",      "aliquip",    "duis",        "aute",         "irure",
  93 };
  94 static const unsigned kNbWords = sizeof(kWords) / sizeof(kWords[0]);
  95
  96 /* simple 1-dimension distribution, based on word's length, favors small words
  97  */
  98 static const int kWeights[]    = { 0, 8, 6, 4, 3, 2 };
  99 static const size_t kNbWeights = sizeof(kWeights) / sizeof(kWeights[0]);
 100
 101 #define DISTRIB_SIZE_MAX 650
 102 static int g_distrib[DISTRIB_SIZE_MAX] = { 0 };
 103 static unsigned g_distribCount         = 0;
 104
 105 static void countFreqs(
 106         const char* words[],
 107         size_t nbWords,
 108         const int* weights,
 109         size_t nbWeights)
 110 {
 111     unsigned total = 0;
 112     size_t w;
 113     for (w = 0; w < nbWords; w++) {
 114         size_t len = strlen(words[w]);
 115         int lmax;
 116         if (len >= nbWeights)
 117             len = nbWeights - 1;
 118         lmax = weights[len];
 119         total += (unsigned)lmax;
 120     }
 121     g_distribCount = total;
 122     assert(g_distribCount <= DISTRIB_SIZE_MAX);
 123 }
 124
 125 static void init_word_distrib(
 126         const char* words[],
 127         size_t nbWords,
 128         const int* weights,
 129         size_t nbWeights)
 130 {
 131     size_t w, d = 0;
 132     countFreqs(words, nbWords, weights, nbWeights);
 133     for (w = 0; w < nbWords; w++) {
 134         size_t len = strlen(words[w]);
 135         int l, lmax;
 136         if (len >= nbWeights)
 137             len = nbWeights - 1;
 138         lmax = weights[len];
 139         for (l = 0; l < lmax; l++) {
 140             g_distrib[d++] = (int)w;
 141         }
 142     }
 143 }
 144
 145 /* Note: this unit only works when invoked sequentially.
 146  * No concurrent access is allowed */
 147 static char* g_ptr         = NULL;
 148 static size_t g_nbChars    = 0;
 149 static size_t g_maxChars   = 10000000;
 150 static unsigned g_randRoot = 0;
 151
 152 #define RDG_rotl32(x, r) ((x << r) | (x >> (32 - r)))
 153 static unsigned LOREM_rand(unsigned range)
 154 {
 155     static const unsigned prime1 = 2654435761U;
 156     static const unsigned prime2 = 2246822519U;
 157     unsigned rand32              = g_randRoot;
 158     rand32 *= prime1;
 159     rand32 ^= prime2;
 160     rand32     = RDG_rotl32(rand32, 13);
 161     g_randRoot = rand32;
 162     return (unsigned)(((unsigned long long)rand32 * range) >> 32);
 163 }
 164
 165 static void writeLastCharacters(void)
 166 {
 167     size_t lastChars = g_maxChars - g_nbChars;
 168     assert(g_maxChars >= g_nbChars);
 169     if (lastChars == 0)
 170         return;
 171     g_ptr[g_nbChars++] = '.';
 172     if (lastChars > 2) {
 173         memset(g_ptr + g_nbChars, ' ', lastChars - 2);
 174     }
 175     if (lastChars > 1) {
 176         g_ptr[g_maxChars - 1] = '\n';
 177     }
 178     g_nbChars = g_maxChars;
 179 }
 180
 181 static void generateWord(const char* word, const char* separator, int upCase)
 182 {
 183     size_t const len = strlen(word) + strlen(separator);
 184     if (g_nbChars + len > g_maxChars) {
 185         writeLastCharacters();
 186         return;
 187     }
 188     memcpy(g_ptr + g_nbChars, word, strlen(word));
 189     if (upCase) {
 190         static const char toUp = 'A' - 'a';
 191         g_ptr[g_nbChars]       = (char)(g_ptr[g_nbChars] + toUp);
 192     }
 193     g_nbChars += strlen(word);
 194     memcpy(g_ptr + g_nbChars, separator, strlen(separator));
 195     g_nbChars += strlen(separator);
 196 }
 197
 198 static int about(unsigned target)
 199 {
 200     return (int)(LOREM_rand(target) + LOREM_rand(target) + 1);
 201 }
 202
 203 /* Function to generate a random sentence */
 204 static void generateSentence(int nbWords)
 205 {
 206     int commaPos       = about(9);
 207     int comma2         = commaPos + about(7);
 208     int qmark          = (LOREM_rand(11) == 7);
 209     const char* endSep = qmark ? "? " : ". ";
 210     int i;
 211     for (i = 0; i < nbWords; i++) {
 212         int const wordID       = g_distrib[LOREM_rand(g_distribCount)];
 213         const char* const word = kWords[wordID];
 214         const char* sep        = " ";
 215         if (i == commaPos)
 216             sep = ", ";
 217         if (i == comma2)
 218             sep = ", ";
 219         if (i == nbWords - 1)
 220             sep = endSep;
 221         generateWord(word, sep, i == 0);
 222     }
 223 }
 224
 225 static void generateParagraph(int nbSentences)
 226 {
 227     int i;
 228     for (i = 0; i < nbSentences; i++) {
 229         int wordsPerSentence = about(11);
 230         generateSentence(wordsPerSentence);
 231     }
 232     if (g_nbChars < g_maxChars) {
 233         g_ptr[g_nbChars++] = '\n';
 234     }
 235     if (g_nbChars < g_maxChars) {
 236         g_ptr[g_nbChars++] = '\n';
 237     }
 238 }
 239
 240 /* It's "common" for lorem ipsum generators to start with the same first
 241  * pre-defined sentence */
 242 static void generateFirstSentence(void)
 243 {
 244     int i;
 245     for (i = 0; i < 18; i++) {
 246         const char* word      = kWords[i];
 247         const char* separator = " ";
 248         if (i == 4)
 249             separator = ", ";
 250         if (i == 7)
 251             separator = ", ";
 252         generateWord(word, separator, i == 0);
 253     }
 254     generateWord(kWords[18], ". ", 0);
 255 }
 256
 257 size_t
 258 LOREM_genBlock(void* buffer, size_t size, unsigned seed, int first, int fill)
 259 {
 260     g_ptr = (char*)buffer;
 261     assert(size < INT_MAX);
 262     g_maxChars = size;
 263     g_nbChars  = 0;
 264     g_randRoot = seed;
 265     if (g_distribCount == 0) {
 266         init_word_distrib(kWords, kNbWords, kWeights, kNbWeights);
 267     }
 268
 269     if (first) {
 270         generateFirstSentence();
 271     }
 272     while (g_nbChars < g_maxChars) {
 273         int sentencePerParagraph = about(7);
 274         generateParagraph(sentencePerParagraph);
 275         if (!fill)
 276             break; /* only generate one paragraph in not-fill mode */
 277     }
 278     g_ptr = NULL;
 279     return g_nbChars;
 280 }
 281
 282 void LOREM_genBuffer(void* buffer, size_t size, unsigned seed)
 283 {
 284     LOREM_genBlock(buffer, size, seed, 1, 1);
 285 }