[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.6 / programs / lorem.c

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

/* Implementation notes:
 *
 * This is a very simple lorem ipsum generator
 * which features a static list of words
 * and print them one after another randomly
 * with a fake sentence / paragraph structure.
 *
 * The goal is to generate a printable text
 * that can be used to fake a text compression scenario.
 * The resulting compression / ratio curve of the lorem ipsum generator
 * is more satisfying than the previous statistical generator,
 * which was initially designed for entropy compression,
 * and lacks a regularity more representative of text.
 *
 * The compression ratio achievable on the generated lorem ipsum
 * is still a bit too good, presumably because the dictionary is a bit too
 * small. It would be possible to create some more complex scheme, notably by
 * enlarging the dictionary with a word generator, and adding grammatical rules
 * (composition) and syntax rules. But that's probably overkill for the intended
 * goal.
 */

#include "lorem.h"
#include <assert.h>
#include <limits.h> /* INT_MAX */
#include <string.h> /* memcpy */

#define WORD_MAX_SIZE 20

/* Define the word pool */
static const char* kWords[] = {
    "lorem",        "ipsum",      "dolor",       "sit",          "amet",
    "consectetur",  "adipiscing", "elit",        "sed",          "do",
    "eiusmod",      "tempor",     "incididunt",  "ut",           "labore",
    "et",           "dolore",     "magna",       "aliqua",       "dis",
    "lectus",       "vestibulum", "mattis",      "ullamcorper",  "velit",
    "commodo",      "a",          "lacus",       "arcu",         "magnis",
    "parturient",   "montes",     "nascetur",    "ridiculus",    "mus",
    "mauris",       "nulla",      "malesuada",   "pellentesque", "eget",
    "gravida",      "in",         "dictum",      "non",          "erat",
    "nam",          "voluptat",   "maecenas",    "blandit",      "aliquam",
    "etiam",        "enim",       "lobortis",    "scelerisque",  "fermentum",
    "dui",          "faucibus",   "ornare",      "at",           "elementum",
    "eu",           "facilisis",  "odio",        "morbi",        "quis",
    "eros",         "donec",      "ac",          "orci",         "purus",
    "turpis",       "cursus",     "leo",         "vel",          "porta",
    "consequat",    "interdum",   "varius",      "vulputate",    "aliquet",
    "pharetra",     "nunc",       "auctor",      "urna",         "id",
    "metus",        "viverra",    "nibh",        "cras",         "mi",
    "unde",         "omnis",      "iste",        "natus",        "error",
    "perspiciatis", "voluptatem", "accusantium", "doloremque",   "laudantium",
    "totam",        "rem",        "aperiam",     "eaque",        "ipsa",
    "quae",         "ab",         "illo",        "inventore",    "veritatis",
    "quasi",        "architecto", "beatae",      "vitae",        "dicta",
    "sunt",         "explicabo",  "nemo",        "ipsam",        "quia",
    "voluptas",     "aspernatur", "aut",         "odit",         "fugit",
    "consequuntur", "magni",      "dolores",     "eos",          "qui",
    "ratione",      "sequi",      "nesciunt",    "neque",        "porro",
    "quisquam",     "est",        "dolorem",     "adipisci",     "numquam",
    "eius",         "modi",       "tempora",     "incidunt",     "magnam",
    "quaerat",      "ad",         "minima",      "veniam",       "nostrum",
    "ullam",        "corporis",   "suscipit",    "laboriosam",   "nisi",
    "aliquid",      "ex",         "ea",          "commodi",      "consequatur",
    "autem",        "eum",        "iure",        "voluptate",    "esse",
    "quam",         "nihil",      "molestiae",   "illum",        "fugiat",
    "quo",          "pariatur",   "vero",        "accusamus",    "iusto",
    "dignissimos",  "ducimus",    "blanditiis",  "praesentium",  "voluptatum",
    "deleniti",     "atque",      "corrupti",    "quos",         "quas",
    "molestias",    "excepturi",  "sint",        "occaecati",    "cupiditate",
    "provident",    "similique",  "culpa",       "officia",      "deserunt",
    "mollitia",     "animi",      "laborum",     "dolorum",      "fuga",
    "harum",        "quidem",     "rerum",       "facilis",      "expedita",
    "distinctio",   "libero",     "tempore",     "cum",          "soluta",
    "nobis",        "eligendi",   "optio",       "cumque",       "impedit",
    "minus",        "quod",       "maxime",      "placeat",      "facere",
    "possimus",     "assumenda",  "repellendus", "temporibus",   "quibusdam",
    "officiis",     "debitis",    "saepe",       "eveniet",      "voluptates",
    "repudiandae",  "recusandae", "itaque",      "earum",        "hic",
    "tenetur",      "sapiente",   "delectus",    "reiciendis",   "cillum",
    "maiores",      "alias",      "perferendis", "doloribus",    "asperiores",
    "repellat",     "minim",      "nostrud",     "exercitation", "ullamco",
    "laboris",      "aliquip",    "duis",        "aute",         "irure",
};
static const unsigned kNbWords = sizeof(kWords) / sizeof(kWords[0]);

/* simple 1-dimension distribution, based on word's length, favors small words
 */
static const int kWeights[]    = { 0, 8, 6, 4, 3, 2 };
static const size_t kNbWeights = sizeof(kWeights) / sizeof(kWeights[0]);

#define DISTRIB_SIZE_MAX 650
static int g_distrib[DISTRIB_SIZE_MAX] = { 0 };
static unsigned g_distribCount         = 0;

static void countFreqs(
        const char* words[],
        size_t nbWords,
        const int* weights,
        size_t nbWeights)
{
    unsigned total = 0;
    size_t w;
    for (w = 0; w < nbWords; w++) {
        size_t len = strlen(words[w]);
        int lmax;
        if (len >= nbWeights)
            len = nbWeights - 1;
        lmax = weights[len];
        total += (unsigned)lmax;
    }
    g_distribCount = total;
    assert(g_distribCount <= DISTRIB_SIZE_MAX);
}

static void init_word_distrib(
        const char* words[],
        size_t nbWords,
        const int* weights,
        size_t nbWeights)
{
    size_t w, d = 0;
    countFreqs(words, nbWords, weights, nbWeights);
    for (w = 0; w < nbWords; w++) {
        size_t len = strlen(words[w]);
        int l, lmax;
        if (len >= nbWeights)
            len = nbWeights - 1;
        lmax = weights[len];
        for (l = 0; l < lmax; l++) {
            g_distrib[d++] = (int)w;
        }
    }
}

/* Note: this unit only works when invoked sequentially.
 * No concurrent access is allowed */
static char* g_ptr         = NULL;
static size_t g_nbChars    = 0;
static size_t g_maxChars   = 10000000;
static unsigned g_randRoot = 0;

#define RDG_rotl32(x, r) ((x << r) | (x >> (32 - r)))
static unsigned LOREM_rand(unsigned range)
{
    static const unsigned prime1 = 2654435761U;
    static const unsigned prime2 = 2246822519U;
    unsigned rand32              = g_randRoot;
    rand32 *= prime1;
    rand32 ^= prime2;
    rand32     = RDG_rotl32(rand32, 13);
    g_randRoot = rand32;
    return (unsigned)(((unsigned long long)rand32 * range) >> 32);
}

static void writeLastCharacters(void)
{
    size_t lastChars = g_maxChars - g_nbChars;
    assert(g_maxChars >= g_nbChars);
    if (lastChars == 0)
        return;
    g_ptr[g_nbChars++] = '.';
    if (lastChars > 2) {
        memset(g_ptr + g_nbChars, ' ', lastChars - 2);
    }
    if (lastChars > 1) {
        g_ptr[g_maxChars - 1] = '\n';
    }
    g_nbChars = g_maxChars;
}

static void generateWord(const char* word, const char* separator, int upCase)
{
    size_t const len = strlen(word) + strlen(separator);
    if (g_nbChars + len > g_maxChars) {
        writeLastCharacters();
        return;
    }
    memcpy(g_ptr + g_nbChars, word, strlen(word));
    if (upCase) {
        static const char toUp = 'A' - 'a';
        g_ptr[g_nbChars]       = (char)(g_ptr[g_nbChars] + toUp);
    }
    g_nbChars += strlen(word);
    memcpy(g_ptr + g_nbChars, separator, strlen(separator));
    g_nbChars += strlen(separator);
}

static int about(unsigned target)
{
    return (int)(LOREM_rand(target) + LOREM_rand(target) + 1);
}

/* Function to generate a random sentence */
static void generateSentence(int nbWords)
{
    int commaPos       = about(9);
    int comma2         = commaPos + about(7);
    int qmark          = (LOREM_rand(11) == 7);
    const char* endSep = qmark ? "? " : ". ";
    int i;
    for (i = 0; i < nbWords; i++) {
        int const wordID       = g_distrib[LOREM_rand(g_distribCount)];
        const char* const word = kWords[wordID];
        const char* sep        = " ";
        if (i == commaPos)
            sep = ", ";
        if (i == comma2)
            sep = ", ";
        if (i == nbWords - 1)
            sep = endSep;
        generateWord(word, sep, i == 0);
    }
}

static void generateParagraph(int nbSentences)
{
    int i;
    for (i = 0; i < nbSentences; i++) {
        int wordsPerSentence = about(11);
        generateSentence(wordsPerSentence);
    }
    if (g_nbChars < g_maxChars) {
        g_ptr[g_nbChars++] = '\n';
    }
    if (g_nbChars < g_maxChars) {
        g_ptr[g_nbChars++] = '\n';
    }
}

/* It's "common" for lorem ipsum generators to start with the same first
 * pre-defined sentence */
static void generateFirstSentence(void)
{
    int i;
    for (i = 0; i < 18; i++) {
        const char* word      = kWords[i];
        const char* separator = " ";
        if (i == 4)
            separator = ", ";
        if (i == 7)
            separator = ", ";
        generateWord(word, separator, i == 0);
    }
    generateWord(kWords[18], ". ", 0);
}

size_t
LOREM_genBlock(void* buffer, size_t size, unsigned seed, int first, int fill)
{
    g_ptr = (char*)buffer;
    assert(size < INT_MAX);
    g_maxChars = size;
    g_nbChars  = 0;
    g_randRoot = seed;
    if (g_distribCount == 0) {
        init_word_distrib(kWords, kNbWords, kWeights, kNbWeights);
    }

    if (first) {
        generateFirstSentence();
    }
    while (g_nbChars < g_maxChars) {
        int sentencePerParagraph = about(7);
        generateParagraph(sentencePerParagraph);
        if (!fill)
            break; /* only generate one paragraph in not-fill mode */
    }
    g_ptr = NULL;
    return g_nbChars;
}

void LOREM_genBuffer(void* buffer, size_t size, unsigned seed)
{
    LOREM_genBlock(buffer, size, seed, 1, 1);
}
Commit	Line	Data
	1	/*
	2	* Copyright (c) Meta Platforms, Inc. and affiliates.
	3	* All rights reserved.
	4	*
	5	* This source code is licensed under both the BSD-style license (found in the
	6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
	7	* in the COPYING file in the root directory of this source tree).
	8	* You may select, at your option, one of the above-listed licenses.
	9	*/
	10
	11	/* Implementation notes:
	12	*
	13	* This is a very simple lorem ipsum generator
	14	* which features a static list of words
	15	* and print them one after another randomly
	16	* with a fake sentence / paragraph structure.
	17	*
	18	* The goal is to generate a printable text
	19	* that can be used to fake a text compression scenario.
	20	* The resulting compression / ratio curve of the lorem ipsum generator
	21	* is more satisfying than the previous statistical generator,
	22	* which was initially designed for entropy compression,
	23	* and lacks a regularity more representative of text.
	24	*
	25	* The compression ratio achievable on the generated lorem ipsum
	26	* is still a bit too good, presumably because the dictionary is a bit too
	27	* small. It would be possible to create some more complex scheme, notably by
	28	* enlarging the dictionary with a word generator, and adding grammatical rules
	29	* (composition) and syntax rules. But that's probably overkill for the intended
	30	* goal.
	31	*/
	32
	33	#include "lorem.h"
	34	#include <assert.h>
	35	#include <limits.h> /* INT_MAX */
	36	#include <string.h> /* memcpy */
	37
	38	#define WORD_MAX_SIZE 20
	39
	40	/* Define the word pool */
	41	static const char* kWords[] = {
	42	"lorem", "ipsum", "dolor", "sit", "amet",
	43	"consectetur", "adipiscing", "elit", "sed", "do",
	44	"eiusmod", "tempor", "incididunt", "ut", "labore",
	45	"et", "dolore", "magna", "aliqua", "dis",
	46	"lectus", "vestibulum", "mattis", "ullamcorper", "velit",
	47	"commodo", "a", "lacus", "arcu", "magnis",
	48	"parturient", "montes", "nascetur", "ridiculus", "mus",
	49	"mauris", "nulla", "malesuada", "pellentesque", "eget",
	50	"gravida", "in", "dictum", "non", "erat",
	51	"nam", "voluptat", "maecenas", "blandit", "aliquam",
	52	"etiam", "enim", "lobortis", "scelerisque", "fermentum",
	53	"dui", "faucibus", "ornare", "at", "elementum",
	54	"eu", "facilisis", "odio", "morbi", "quis",
	55	"eros", "donec", "ac", "orci", "purus",
	56	"turpis", "cursus", "leo", "vel", "porta",
	57	"consequat", "interdum", "varius", "vulputate", "aliquet",
	58	"pharetra", "nunc", "auctor", "urna", "id",
	59	"metus", "viverra", "nibh", "cras", "mi",
	60	"unde", "omnis", "iste", "natus", "error",
	61	"perspiciatis", "voluptatem", "accusantium", "doloremque", "laudantium",
	62	"totam", "rem", "aperiam", "eaque", "ipsa",
	63	"quae", "ab", "illo", "inventore", "veritatis",
	64	"quasi", "architecto", "beatae", "vitae", "dicta",
	65	"sunt", "explicabo", "nemo", "ipsam", "quia",
	66	"voluptas", "aspernatur", "aut", "odit", "fugit",
	67	"consequuntur", "magni", "dolores", "eos", "qui",
	68	"ratione", "sequi", "nesciunt", "neque", "porro",
	69	"quisquam", "est", "dolorem", "adipisci", "numquam",
	70	"eius", "modi", "tempora", "incidunt", "magnam",
	71	"quaerat", "ad", "minima", "veniam", "nostrum",
	72	"ullam", "corporis", "suscipit", "laboriosam", "nisi",
	73	"aliquid", "ex", "ea", "commodi", "consequatur",
	74	"autem", "eum", "iure", "voluptate", "esse",
	75	"quam", "nihil", "molestiae", "illum", "fugiat",
	76	"quo", "pariatur", "vero", "accusamus", "iusto",
	77	"dignissimos", "ducimus", "blanditiis", "praesentium", "voluptatum",
	78	"deleniti", "atque", "corrupti", "quos", "quas",
	79	"molestias", "excepturi", "sint", "occaecati", "cupiditate",
	80	"provident", "similique", "culpa", "officia", "deserunt",
	81	"mollitia", "animi", "laborum", "dolorum", "fuga",
	82	"harum", "quidem", "rerum", "facilis", "expedita",
	83	"distinctio", "libero", "tempore", "cum", "soluta",
	84	"nobis", "eligendi", "optio", "cumque", "impedit",
	85	"minus", "quod", "maxime", "placeat", "facere",
	86	"possimus", "assumenda", "repellendus", "temporibus", "quibusdam",
	87	"officiis", "debitis", "saepe", "eveniet", "voluptates",
	88	"repudiandae", "recusandae", "itaque", "earum", "hic",
	89	"tenetur", "sapiente", "delectus", "reiciendis", "cillum",
	90	"maiores", "alias", "perferendis", "doloribus", "asperiores",
	91	"repellat", "minim", "nostrud", "exercitation", "ullamco",
	92	"laboris", "aliquip", "duis", "aute", "irure",
	93	};
	94	static const unsigned kNbWords = sizeof(kWords) / sizeof(kWords[0]);
	95
	96	/* simple 1-dimension distribution, based on word's length, favors small words
	97	*/
	98	static const int kWeights[] = { 0, 8, 6, 4, 3, 2 };
	99	static const size_t kNbWeights = sizeof(kWeights) / sizeof(kWeights[0]);
	100
	101	#define DISTRIB_SIZE_MAX 650
	102	static int g_distrib[DISTRIB_SIZE_MAX] = { 0 };
	103	static unsigned g_distribCount = 0;
	104
	105	static void countFreqs(
	106	const char* words[],
	107	size_t nbWords,
	108	const int* weights,
	109	size_t nbWeights)
	110	{
	111	unsigned total = 0;
	112	size_t w;
	113	for (w = 0; w < nbWords; w++) {
	114	size_t len = strlen(words[w]);
	115	int lmax;
	116	if (len >= nbWeights)
	117	len = nbWeights - 1;
	118	lmax = weights[len];
	119	total += (unsigned)lmax;
	120	}
	121	g_distribCount = total;
	122	assert(g_distribCount <= DISTRIB_SIZE_MAX);
	123	}
	124
	125	static void init_word_distrib(
	126	const char* words[],
	127	size_t nbWords,
	128	const int* weights,
	129	size_t nbWeights)
	130	{
	131	size_t w, d = 0;
	132	countFreqs(words, nbWords, weights, nbWeights);
	133	for (w = 0; w < nbWords; w++) {
	134	size_t len = strlen(words[w]);
	135	int l, lmax;
	136	if (len >= nbWeights)
	137	len = nbWeights - 1;
	138	lmax = weights[len];
	139	for (l = 0; l < lmax; l++) {
	140	g_distrib[d++] = (int)w;
	141	}
	142	}
	143	}
	144
	145	/* Note: this unit only works when invoked sequentially.
	146	* No concurrent access is allowed */
	147	static char* g_ptr = NULL;
	148	static size_t g_nbChars = 0;
	149	static size_t g_maxChars = 10000000;
	150	static unsigned g_randRoot = 0;
	151
	152	#define RDG_rotl32(x, r) ((x << r) \| (x >> (32 - r)))
	153	static unsigned LOREM_rand(unsigned range)
	154	{
	155	static const unsigned prime1 = 2654435761U;
	156	static const unsigned prime2 = 2246822519U;
	157	unsigned rand32 = g_randRoot;
	158	rand32 *= prime1;
	159	rand32 ^= prime2;
	160	rand32 = RDG_rotl32(rand32, 13);
	161	g_randRoot = rand32;
	162	return (unsigned)(((unsigned long long)rand32 * range) >> 32);
	163	}
	164
	165	static void writeLastCharacters(void)
	166	{
	167	size_t lastChars = g_maxChars - g_nbChars;
	168	assert(g_maxChars >= g_nbChars);
	169	if (lastChars == 0)
	170	return;
	171	g_ptr[g_nbChars++] = '.';
	172	if (lastChars > 2) {
	173	memset(g_ptr + g_nbChars, ' ', lastChars - 2);
	174	}
	175	if (lastChars > 1) {
	176	g_ptr[g_maxChars - 1] = '\n';
	177	}
	178	g_nbChars = g_maxChars;
	179	}
	180
	181	static void generateWord(const char* word, const char* separator, int upCase)
	182	{
	183	size_t const len = strlen(word) + strlen(separator);
	184	if (g_nbChars + len > g_maxChars) {
	185	writeLastCharacters();
	186	return;
	187	}
	188	memcpy(g_ptr + g_nbChars, word, strlen(word));
	189	if (upCase) {
	190	static const char toUp = 'A' - 'a';
	191	g_ptr[g_nbChars] = (char)(g_ptr[g_nbChars] + toUp);
	192	}
	193	g_nbChars += strlen(word);
	194	memcpy(g_ptr + g_nbChars, separator, strlen(separator));
	195	g_nbChars += strlen(separator);
	196	}
	197
	198	static int about(unsigned target)
	199	{
	200	return (int)(LOREM_rand(target) + LOREM_rand(target) + 1);
	201	}
	202
	203	/* Function to generate a random sentence */
	204	static void generateSentence(int nbWords)
	205	{
	206	int commaPos = about(9);
	207	int comma2 = commaPos + about(7);
	208	int qmark = (LOREM_rand(11) == 7);
	209	const char* endSep = qmark ? "? " : ". ";
	210	int i;
	211	for (i = 0; i < nbWords; i++) {
	212	int const wordID = g_distrib[LOREM_rand(g_distribCount)];
	213	const char* const word = kWords[wordID];
	214	const char* sep = " ";
	215	if (i == commaPos)
	216	sep = ", ";
	217	if (i == comma2)
	218	sep = ", ";
	219	if (i == nbWords - 1)
	220	sep = endSep;
	221	generateWord(word, sep, i == 0);
	222	}
	223	}
	224
	225	static void generateParagraph(int nbSentences)
	226	{
	227	int i;
	228	for (i = 0; i < nbSentences; i++) {
	229	int wordsPerSentence = about(11);
	230	generateSentence(wordsPerSentence);
	231	}
	232	if (g_nbChars < g_maxChars) {
	233	g_ptr[g_nbChars++] = '\n';
	234	}
	235	if (g_nbChars < g_maxChars) {
	236	g_ptr[g_nbChars++] = '\n';
	237	}
	238	}
	239
	240	/* It's "common" for lorem ipsum generators to start with the same first
	241	* pre-defined sentence */
	242	static void generateFirstSentence(void)
	243	{
	244	int i;
	245	for (i = 0; i < 18; i++) {
	246	const char* word = kWords[i];
	247	const char* separator = " ";
	248	if (i == 4)
	249	separator = ", ";
	250	if (i == 7)
	251	separator = ", ";
	252	generateWord(word, separator, i == 0);
	253	}
	254	generateWord(kWords[18], ". ", 0);
	255	}
	256
	257	size_t
	258	LOREM_genBlock(void* buffer, size_t size, unsigned seed, int first, int fill)
	259	{
	260	g_ptr = (char*)buffer;
	261	assert(size < INT_MAX);
	262	g_maxChars = size;
	263	g_nbChars = 0;
	264	g_randRoot = seed;
	265	if (g_distribCount == 0) {
	266	init_word_distrib(kWords, kNbWords, kWeights, kNbWeights);
	267	}
	268
	269	if (first) {
	270	generateFirstSentence();
	271	}
	272	while (g_nbChars < g_maxChars) {
	273	int sentencePerParagraph = about(7);
	274	generateParagraph(sentencePerParagraph);
	275	if (!fill)
	276	break; /* only generate one paragraph in not-fill mode */
	277	}
	278	g_ptr = NULL;
	279	return g_nbChars;
	280	}
	281
	282	void LOREM_genBuffer(void* buffer, size_t size, unsigned seed)
	283	{
	284	LOREM_genBlock(buffer, size, seed, 1, 1);
	285	}