[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.5 / tests / regression / data.c

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

#include "data.h"

#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>   /* free() */

#include <sys/stat.h>

#include <curl/curl.h>

#include "mem.h"
#include "util.h"
#define XXH_STATIC_LINKING_ONLY
#include "xxhash.h"

/**
 * Data objects
 */

#define REGRESSION_RELEASE(x) \
    "https://github.com/facebook/zstd/releases/download/regression-data/" x

data_t silesia = {
    .name = "silesia",
    .type = data_type_dir,
    .data =
        {
            .url = REGRESSION_RELEASE("silesia.tar.zst"),
            .xxhash64 = 0x48a199f92f93e977LL,
        },
};

data_t silesia_tar = {
    .name = "silesia.tar",
    .type = data_type_file,
    .data =
        {
            .url = REGRESSION_RELEASE("silesia.tar.zst"),
            .xxhash64 = 0x48a199f92f93e977LL,
        },
};

data_t github = {
    .name = "github",
    .type = data_type_dir,
    .data =
        {
            .url = REGRESSION_RELEASE("github.tar.zst"),
            .xxhash64 = 0xa9b1b44b020df292LL,
        },
    .dict =
        {
            .url = REGRESSION_RELEASE("github.dict.zst"),
            .xxhash64 = 0x1eddc6f737d3cb53LL,

        },
};

data_t github_tar = {
    .name = "github.tar",
    .type = data_type_file,
    .data =
        {
            .url = REGRESSION_RELEASE("github.tar.zst"),
            .xxhash64 = 0xa9b1b44b020df292LL,
        },
    .dict =
        {
            .url = REGRESSION_RELEASE("github.dict.zst"),
            .xxhash64 = 0x1eddc6f737d3cb53LL,

        },
};

static data_t* g_data[] = {
    &silesia,
    &silesia_tar,
    &github,
    &github_tar,
    NULL,
};

data_t const* const* data = (data_t const* const*)g_data;

/**
 * data helpers.
 */

int data_has_dict(data_t const* data) {
    return data->dict.url != NULL;
}

/**
 * data buffer helper functions (documented in header).
 */

data_buffer_t data_buffer_create(size_t const capacity) {
    data_buffer_t buffer = {};

    buffer.data = (uint8_t*)malloc(capacity);
    if (buffer.data == NULL)
        return buffer;
    buffer.capacity = capacity;
    return buffer;
}

data_buffer_t data_buffer_read(char const* filename) {
    data_buffer_t buffer = {};

    uint64_t const size = UTIL_getFileSize(filename);
    if (size == UTIL_FILESIZE_UNKNOWN) {
        fprintf(stderr, "unknown size for %s\n", filename);
        return buffer;
    }

    buffer.data = (uint8_t*)malloc(size);
    if (buffer.data == NULL) {
        fprintf(stderr, "malloc failed\n");
        return buffer;
    }
    buffer.capacity = size;

    FILE* file = fopen(filename, "rb");
    if (file == NULL) {
        fprintf(stderr, "file null\n");
        goto err;
    }
    buffer.size = fread(buffer.data, 1, buffer.capacity, file);
    fclose(file);
    if (buffer.size != buffer.capacity) {
        fprintf(stderr, "read %zu != %zu\n", buffer.size, buffer.capacity);
        goto err;
    }

    return buffer;
err:
    free(buffer.data);
    memset(&buffer, 0, sizeof(buffer));
    return buffer;
}

data_buffer_t data_buffer_get_data(data_t const* data) {
    data_buffer_t const kEmptyBuffer = {};

    if (data->type != data_type_file)
        return kEmptyBuffer;

    return data_buffer_read(data->data.path);
}

data_buffer_t data_buffer_get_dict(data_t const* data) {
    data_buffer_t const kEmptyBuffer = {};

    if (!data_has_dict(data))
        return kEmptyBuffer;

    return data_buffer_read(data->dict.path);
}

int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) {
    size_t const size =
        buffer1.size < buffer2.size ? buffer1.size : buffer2.size;
    int const cmp = memcmp(buffer1.data, buffer2.data, size);
    if (cmp != 0)
        return cmp;
    if (buffer1.size < buffer2.size)
        return -1;
    if (buffer1.size == buffer2.size)
        return 0;
    assert(buffer1.size > buffer2.size);
    return 1;
}

void data_buffer_free(data_buffer_t buffer) {
    free(buffer.data);
}

/**
 * data filenames helpers.
 */

FileNamesTable* data_filenames_get(data_t const* data)
{
    char const* const path = data->data.path;
    return UTIL_createExpandedFNT(&path, 1, 0 /* followLinks */ );
}

/**
 * data buffers helpers.
 */

data_buffers_t data_buffers_get(data_t const* data) {
    data_buffers_t buffers = {.size = 0};
    FileNamesTable* const filenames = data_filenames_get(data);
    if (filenames == NULL) return buffers;
    if (filenames->tableSize == 0) {
        UTIL_freeFileNamesTable(filenames);
        return buffers;
    }

    data_buffer_t* buffersPtr =
        (data_buffer_t*)malloc(filenames->tableSize * sizeof(*buffersPtr));
    if (buffersPtr == NULL) {
        UTIL_freeFileNamesTable(filenames);
        return buffers;
    }
    buffers.buffers = (data_buffer_t const*)buffersPtr;
    buffers.size = filenames->tableSize;

    for (size_t i = 0; i < filenames->tableSize; ++i) {
        buffersPtr[i] = data_buffer_read(filenames->fileNames[i]);
        if (buffersPtr[i].data == NULL) {
            data_buffers_t const kEmptyBuffer = {};
            data_buffers_free(buffers);
            UTIL_freeFileNamesTable(filenames);
            return kEmptyBuffer;
        }
    }

    UTIL_freeFileNamesTable(filenames);
    return buffers;
}

/**
 * Frees the data buffers.
 */
void data_buffers_free(data_buffers_t buffers) {
    free((data_buffer_t*)buffers.buffers);
}

/**
 * Initialization and download functions.
 */

static char* g_data_dir = NULL;

/* mkdir -p */
static int ensure_directory_exists(char const* indir) {
    char* const dir = strdup(indir);
    char* end = dir;
    int ret = 0;
    if (dir == NULL) {
        ret = EINVAL;
        goto out;
    }
    do {
        /* Find the next directory level. */
        for (++end; *end != '\0' && *end != '/'; ++end)
            ;
        /* End the string there, make the directory, and restore the string. */
        char const save = *end;
        *end = '\0';
        int const isdir = UTIL_isDirectory(dir);
        ret = mkdir(dir, S_IRWXU);
        *end = save;
        /* Its okay if the directory already exists. */
        if (ret == 0 || (errno == EEXIST && isdir))
            continue;
        ret = errno;
        fprintf(stderr, "mkdir() failed\n");
        goto out;
    } while (*end != '\0');

    ret = 0;
out:
    free(dir);
    return ret;
}

/** Concatenate 3 strings into a new buffer. */
static char* cat3(char const* str1, char const* str2, char const* str3) {
    size_t const size1 = strlen(str1);
    size_t const size2 = strlen(str2);
    size_t const size3 = str3 == NULL ? 0 : strlen(str3);
    size_t const size = size1 + size2 + size3 + 1;
    char* const dst = (char*)malloc(size);
    if (dst == NULL)
        return NULL;
    strcpy(dst, str1);
    strcpy(dst + size1, str2);
    if (str3 != NULL)
        strcpy(dst + size1 + size2, str3);
    assert(strlen(dst) == size1 + size2 + size3);
    return dst;
}

static char* cat2(char const* str1, char const* str2) {
    return cat3(str1, str2, NULL);
}

/**
 * State needed by the curl callback.
 * It takes data from curl, hashes it, and writes it to the file.
 */
typedef struct {
    FILE* file;
    XXH64_state_t xxhash64;
    int error;
} curl_data_t;

/** Create the curl state. */
static curl_data_t curl_data_create(
    data_resource_t const* resource,
    data_type_t type) {
    curl_data_t cdata = {};

    XXH64_reset(&cdata.xxhash64, 0);

    assert(UTIL_isDirectory(g_data_dir));

    if (type == data_type_file) {
        /* Decompress the resource and store to the path. */
        char* cmd = cat3("zstd -dqfo '", resource->path, "'");
        if (cmd == NULL) {
            cdata.error = ENOMEM;
            return cdata;
        }
        cdata.file = popen(cmd, "w");
        free(cmd);
    } else {
        /* Decompress and extract the resource to the cache directory. */
        char* cmd = cat3("zstd -dc | tar -x -C '", g_data_dir, "'");
        if (cmd == NULL) {
            cdata.error = ENOMEM;
            return cdata;
        }
        cdata.file = popen(cmd, "w");
        free(cmd);
    }
    if (cdata.file == NULL) {
        cdata.error = errno;
    }

    return cdata;
}

/** Free the curl state. */
static int curl_data_free(curl_data_t cdata) {
    return pclose(cdata.file);
}

/** curl callback. Updates the hash, and writes to the file. */
static size_t curl_write(void* data, size_t size, size_t count, void* ptr) {
    curl_data_t* cdata = (curl_data_t*)ptr;
    size_t const written = fwrite(data, size, count, cdata->file);
    XXH64_update(&cdata->xxhash64, data, written * size);
    return written;
}

static int curl_download_resource(
    CURL* curl,
    data_resource_t const* resource,
    data_type_t type) {
    curl_data_t cdata;
    /* Download the data. */
    if (curl_easy_setopt(curl, CURLOPT_URL, resource->url) != 0)
        return EINVAL;
    if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0)
        return EINVAL;
    cdata = curl_data_create(resource, type);
    if (cdata.error != 0)
        return cdata.error;
    int const curl_err = curl_easy_perform(curl);
    int const close_err = curl_data_free(cdata);
    if (curl_err) {
        fprintf(
            stderr,
            "downloading '%s' for '%s' failed\n",
            resource->url,
            resource->path);
        return EIO;
    }
    if (close_err) {
        fprintf(stderr, "writing data to '%s' failed\n", resource->path);
        return EIO;
    }
    /* check that the file exists. */
    if (type == data_type_file && !UTIL_isRegularFile(resource->path)) {
        fprintf(stderr, "output file '%s' does not exist\n", resource->path);
        return EIO;
    }
    if (type == data_type_dir && !UTIL_isDirectory(resource->path)) {
        fprintf(
            stderr, "output directory '%s' does not exist\n", resource->path);
        return EIO;
    }
    /* Check that the hash matches. */
    if (XXH64_digest(&cdata.xxhash64) != resource->xxhash64) {
        fprintf(
            stderr,
            "checksum does not match: 0x%llxLL != 0x%llxLL\n",
            (unsigned long long)XXH64_digest(&cdata.xxhash64),
            (unsigned long long)resource->xxhash64);
        return EINVAL;
    }

    return 0;
}

/** Download a single data object. */
static int curl_download_datum(CURL* curl, data_t const* data) {
    int ret;
    ret = curl_download_resource(curl, &data->data, data->type);
    if (ret != 0)
        return ret;
    if (data_has_dict(data)) {
        ret = curl_download_resource(curl, &data->dict, data_type_file);
        if (ret != 0)
            return ret;
    }
    return ret;
}

/** Download all the data. */
static int curl_download_data(data_t const* const* data) {
    if (curl_global_init(CURL_GLOBAL_ALL) != 0)
        return EFAULT;

    curl_data_t cdata = {};
    CURL* curl = curl_easy_init();
    int err = EFAULT;

    if (curl == NULL)
        return EFAULT;

    if (curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L) != 0)
        goto out;
    if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L) != 0)
        goto out;
    if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_write) != 0)
        goto out;

    assert(data != NULL);
    for (; *data != NULL; ++data) {
        if (curl_download_datum(curl, *data) != 0)
            goto out;
    }

    err = 0;
out:
    curl_easy_cleanup(curl);
    curl_global_cleanup();
    return err;
}

/** Fill the path member variable of the data objects. */
static int data_create_paths(data_t* const* data, char const* dir) {
    size_t const dirlen = strlen(dir);
    assert(data != NULL);
    for (; *data != NULL; ++data) {
        data_t* const datum = *data;
        datum->data.path = cat3(dir, "/", datum->name);
        if (datum->data.path == NULL)
            return ENOMEM;
        if (data_has_dict(datum)) {
            datum->dict.path = cat2(datum->data.path, ".dict");
            if (datum->dict.path == NULL)
                return ENOMEM;
        }
    }
    return 0;
}

/** Free the path member variable of the data objects. */
static void data_free_paths(data_t* const* data) {
    assert(data != NULL);
    for (; *data != NULL; ++data) {
        data_t* datum = *data;
        free((void*)datum->data.path);
        free((void*)datum->dict.path);
        datum->data.path = NULL;
        datum->dict.path = NULL;
    }
}

static char const kStampName[] = "STAMP";

static void xxh_update_le(XXH64_state_t* state, uint64_t data) {
    if (!MEM_isLittleEndian())
        data = MEM_swap64(data);
    XXH64_update(state, &data, sizeof(data));
}

/** Hash the data to create the stamp. */
static uint64_t stamp_hash(data_t const* const* data) {
    XXH64_state_t state;

    XXH64_reset(&state, 0);
    assert(data != NULL);
    for (; *data != NULL; ++data) {
        data_t const* datum = *data;
        /* We don't care about the URL that we fetch from. */
        /* The path is derived from the name. */
        XXH64_update(&state, datum->name, strlen(datum->name));
        xxh_update_le(&state, datum->data.xxhash64);
        xxh_update_le(&state, datum->dict.xxhash64);
        xxh_update_le(&state, datum->type);
    }
    return XXH64_digest(&state);
}

/** Check if the stamp matches the stamp in the cache directory. */
static int stamp_check(char const* dir, data_t const* const* data) {
    char* stamp = cat3(dir, "/", kStampName);
    uint64_t const expected = stamp_hash(data);
    XXH64_canonical_t actual;
    FILE* stampfile = NULL;
    int matches = 0;

    if (stamp == NULL)
        goto out;
    if (!UTIL_isRegularFile(stamp)) {
        fprintf(stderr, "stamp does not exist: recreating the data cache\n");
        goto out;
    }

    stampfile = fopen(stamp, "rb");
    if (stampfile == NULL) {
        fprintf(stderr, "could not open stamp: recreating the data cache\n");
        goto out;
    }

    size_t b;
    if ((b = fread(&actual, sizeof(actual), 1, stampfile)) != 1) {
        fprintf(stderr, "invalid stamp: recreating the data cache\n");
        goto out;
    }

    matches = (expected == XXH64_hashFromCanonical(&actual));
    if (matches)
        fprintf(stderr, "stamp matches: reusing the cached data\n");
    else
        fprintf(stderr, "stamp does not match: recreating the data cache\n");

out:
    free(stamp);
    if (stampfile != NULL)
        fclose(stampfile);
    return matches;
}

/** On success write a new stamp, on failure delete the old stamp. */
static int
stamp_write(char const* dir, data_t const* const* data, int const data_err) {
    char* stamp = cat3(dir, "/", kStampName);
    FILE* stampfile = NULL;
    int err = EIO;

    if (stamp == NULL)
        return ENOMEM;

    if (data_err != 0) {
        err = data_err;
        goto out;
    }
    XXH64_canonical_t hash;

    XXH64_canonicalFromHash(&hash, stamp_hash(data));

    stampfile = fopen(stamp, "wb");
    if (stampfile == NULL)
        goto out;
    if (fwrite(&hash, sizeof(hash), 1, stampfile) != 1)
        goto out;
    err = 0;
    fprintf(stderr, "stamped new data cache\n");
out:
    if (err != 0)
        /* Ignore errors. */
        unlink(stamp);
    free(stamp);
    if (stampfile != NULL)
        fclose(stampfile);
    return err;
}

int data_init(char const* dir) {
    int err;

    if (dir == NULL)
        return EINVAL;

    /* This must be first to simplify logic. */
    err = ensure_directory_exists(dir);
    if (err != 0)
        return err;

    /* Save the cache directory. */
    g_data_dir = strdup(dir);
    if (g_data_dir == NULL)
        return ENOMEM;

    err = data_create_paths(g_data, dir);
    if (err != 0)
        return err;

    /* If the stamp matches then we are good to go.
     * This must be called before any modifications to the data cache.
     * After this point, we MUST call stamp_write() to update the STAMP,
     * since we've updated the data cache.
     */
    if (stamp_check(dir, data))
        return 0;

    err = curl_download_data(data);
    if (err != 0)
        goto out;

out:
    /* This must be last, since it must know if data_init() succeeded. */
    stamp_write(dir, data, err);
    return err;
}

void data_finish(void) {
    data_free_paths(g_data);
    free(g_data_dir);
    g_data_dir = NULL;
}
Commit	Line	Data
	1	/*
	2	* Copyright (c) Meta Platforms, Inc. and affiliates.
	3	* All rights reserved.
	4	*
	5	* This source code is licensed under both the BSD-style license (found in the
	6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
	7	* in the COPYING file in the root directory of this source tree).
	8	* You may select, at your option, one of the above-listed licenses.
	9	*/
	10
	11	#include "data.h"
	12
	13	#include <assert.h>
	14	#include <errno.h>
	15	#include <stdio.h>
	16	#include <string.h>
	17	#include <stdlib.h> /* free() */
	18
	19	#include <sys/stat.h>
	20
	21	#include <curl/curl.h>
	22
	23	#include "mem.h"
	24	#include "util.h"
	25	#define XXH_STATIC_LINKING_ONLY
	26	#include "xxhash.h"
	27
	28	/**
	29	* Data objects
	30	*/
	31
	32	#define REGRESSION_RELEASE(x) \
	33	"https://github.com/facebook/zstd/releases/download/regression-data/" x
	34
	35	data_t silesia = {
	36	.name = "silesia",
	37	.type = data_type_dir,
	38	.data =
	39	{
	40	.url = REGRESSION_RELEASE("silesia.tar.zst"),
	41	.xxhash64 = 0x48a199f92f93e977LL,
	42	},
	43	};
	44
	45	data_t silesia_tar = {
	46	.name = "silesia.tar",
	47	.type = data_type_file,
	48	.data =
	49	{
	50	.url = REGRESSION_RELEASE("silesia.tar.zst"),
	51	.xxhash64 = 0x48a199f92f93e977LL,
	52	},
	53	};
	54
	55	data_t github = {
	56	.name = "github",
	57	.type = data_type_dir,
	58	.data =
	59	{
	60	.url = REGRESSION_RELEASE("github.tar.zst"),
	61	.xxhash64 = 0xa9b1b44b020df292LL,
	62	},
	63	.dict =
	64	{
	65	.url = REGRESSION_RELEASE("github.dict.zst"),
	66	.xxhash64 = 0x1eddc6f737d3cb53LL,
	67
	68	},
	69	};
	70
	71	data_t github_tar = {
	72	.name = "github.tar",
	73	.type = data_type_file,
	74	.data =
	75	{
	76	.url = REGRESSION_RELEASE("github.tar.zst"),
	77	.xxhash64 = 0xa9b1b44b020df292LL,
	78	},
	79	.dict =
	80	{
	81	.url = REGRESSION_RELEASE("github.dict.zst"),
	82	.xxhash64 = 0x1eddc6f737d3cb53LL,
	83
	84	},
	85	};
	86
	87	static data_t* g_data[] = {
	88	&silesia,
	89	&silesia_tar,
	90	&github,
	91	&github_tar,
	92	NULL,
	93	};
	94
	95	data_t const* const* data = (data_t const* const*)g_data;
	96
	97	/**
	98	* data helpers.
	99	*/
	100
	101	int data_has_dict(data_t const* data) {
	102	return data->dict.url != NULL;
	103	}
	104
	105	/**
	106	* data buffer helper functions (documented in header).
	107	*/
	108
	109	data_buffer_t data_buffer_create(size_t const capacity) {
	110	data_buffer_t buffer = {};
	111
	112	buffer.data = (uint8_t*)malloc(capacity);
	113	if (buffer.data == NULL)
	114	return buffer;
	115	buffer.capacity = capacity;
	116	return buffer;
	117	}
	118
	119	data_buffer_t data_buffer_read(char const* filename) {
	120	data_buffer_t buffer = {};
	121
	122	uint64_t const size = UTIL_getFileSize(filename);
	123	if (size == UTIL_FILESIZE_UNKNOWN) {
	124	fprintf(stderr, "unknown size for %s\n", filename);
	125	return buffer;
	126	}
	127
	128	buffer.data = (uint8_t*)malloc(size);
	129	if (buffer.data == NULL) {
	130	fprintf(stderr, "malloc failed\n");
	131	return buffer;
	132	}
	133	buffer.capacity = size;
	134
	135	FILE* file = fopen(filename, "rb");
	136	if (file == NULL) {
	137	fprintf(stderr, "file null\n");
	138	goto err;
	139	}
	140	buffer.size = fread(buffer.data, 1, buffer.capacity, file);
	141	fclose(file);
	142	if (buffer.size != buffer.capacity) {
	143	fprintf(stderr, "read %zu != %zu\n", buffer.size, buffer.capacity);
	144	goto err;
	145	}
	146
	147	return buffer;
	148	err:
	149	free(buffer.data);
	150	memset(&buffer, 0, sizeof(buffer));
	151	return buffer;
	152	}
	153
	154	data_buffer_t data_buffer_get_data(data_t const* data) {
	155	data_buffer_t const kEmptyBuffer = {};
	156
	157	if (data->type != data_type_file)
	158	return kEmptyBuffer;
	159
	160	return data_buffer_read(data->data.path);
	161	}
	162
	163	data_buffer_t data_buffer_get_dict(data_t const* data) {
	164	data_buffer_t const kEmptyBuffer = {};
	165
	166	if (!data_has_dict(data))
	167	return kEmptyBuffer;
	168
	169	return data_buffer_read(data->dict.path);
	170	}
	171
	172	int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) {
	173	size_t const size =
	174	buffer1.size < buffer2.size ? buffer1.size : buffer2.size;
	175	int const cmp = memcmp(buffer1.data, buffer2.data, size);
	176	if (cmp != 0)
	177	return cmp;
	178	if (buffer1.size < buffer2.size)
	179	return -1;
	180	if (buffer1.size == buffer2.size)
	181	return 0;
	182	assert(buffer1.size > buffer2.size);
	183	return 1;
	184	}
	185
	186	void data_buffer_free(data_buffer_t buffer) {
	187	free(buffer.data);
	188	}
	189
	190	/**
	191	* data filenames helpers.
	192	*/
	193
	194	FileNamesTable* data_filenames_get(data_t const* data)
	195	{
	196	char const* const path = data->data.path;
	197	return UTIL_createExpandedFNT(&path, 1, 0 /* followLinks */ );
	198	}
	199
	200	/**
	201	* data buffers helpers.
	202	*/
	203
	204	data_buffers_t data_buffers_get(data_t const* data) {
	205	data_buffers_t buffers = {.size = 0};
	206	FileNamesTable* const filenames = data_filenames_get(data);
	207	if (filenames == NULL) return buffers;
	208	if (filenames->tableSize == 0) {
	209	UTIL_freeFileNamesTable(filenames);
	210	return buffers;
	211	}
	212
	213	data_buffer_t* buffersPtr =
	214	(data_buffer_t)malloc(filenames->tableSize sizeof(*buffersPtr));
	215	if (buffersPtr == NULL) {
	216	UTIL_freeFileNamesTable(filenames);
	217	return buffers;
	218	}
	219	buffers.buffers = (data_buffer_t const*)buffersPtr;
	220	buffers.size = filenames->tableSize;
	221
	222	for (size_t i = 0; i < filenames->tableSize; ++i) {
	223	buffersPtr[i] = data_buffer_read(filenames->fileNames[i]);
	224	if (buffersPtr[i].data == NULL) {
	225	data_buffers_t const kEmptyBuffer = {};
	226	data_buffers_free(buffers);
	227	UTIL_freeFileNamesTable(filenames);
	228	return kEmptyBuffer;
	229	}
	230	}
	231
	232	UTIL_freeFileNamesTable(filenames);
	233	return buffers;
	234	}
	235
	236	/**
	237	* Frees the data buffers.
	238	*/
	239	void data_buffers_free(data_buffers_t buffers) {
	240	free((data_buffer_t*)buffers.buffers);
	241	}
	242
	243	/**
	244	* Initialization and download functions.
	245	*/
	246
	247	static char* g_data_dir = NULL;
	248
	249	/* mkdir -p */
	250	static int ensure_directory_exists(char const* indir) {
	251	char* const dir = strdup(indir);
	252	char* end = dir;
	253	int ret = 0;
	254	if (dir == NULL) {
	255	ret = EINVAL;
	256	goto out;
	257	}
	258	do {
	259	/* Find the next directory level. */
	260	for (++end; end != '\0' && end != '/'; ++end)
	261	;
	262	/* End the string there, make the directory, and restore the string. */
	263	char const save = *end;
	264	*end = '\0';
	265	int const isdir = UTIL_isDirectory(dir);
	266	ret = mkdir(dir, S_IRWXU);
	267	*end = save;
	268	/* Its okay if the directory already exists. */
	269	if (ret == 0 \|\| (errno == EEXIST && isdir))
	270	continue;
	271	ret = errno;
	272	fprintf(stderr, "mkdir() failed\n");
	273	goto out;
	274	} while (*end != '\0');
	275
	276	ret = 0;
	277	out:
	278	free(dir);
	279	return ret;
	280	}
	281
	282	/** Concatenate 3 strings into a new buffer. */
	283	static char* cat3(char const* str1, char const* str2, char const* str3) {
	284	size_t const size1 = strlen(str1);
	285	size_t const size2 = strlen(str2);
	286	size_t const size3 = str3 == NULL ? 0 : strlen(str3);
	287	size_t const size = size1 + size2 + size3 + 1;
	288	char* const dst = (char*)malloc(size);
	289	if (dst == NULL)
	290	return NULL;
	291	strcpy(dst, str1);
	292	strcpy(dst + size1, str2);
	293	if (str3 != NULL)
	294	strcpy(dst + size1 + size2, str3);
	295	assert(strlen(dst) == size1 + size2 + size3);
	296	return dst;
	297	}
	298
	299	static char* cat2(char const* str1, char const* str2) {
	300	return cat3(str1, str2, NULL);
	301	}
	302
	303	/**
	304	* State needed by the curl callback.
	305	* It takes data from curl, hashes it, and writes it to the file.
	306	*/
	307	typedef struct {
	308	FILE* file;
	309	XXH64_state_t xxhash64;
	310	int error;
	311	} curl_data_t;
	312
	313	/** Create the curl state. */
	314	static curl_data_t curl_data_create(
	315	data_resource_t const* resource,
	316	data_type_t type) {
	317	curl_data_t cdata = {};
	318
	319	XXH64_reset(&cdata.xxhash64, 0);
	320
	321	assert(UTIL_isDirectory(g_data_dir));
	322
	323	if (type == data_type_file) {
	324	/* Decompress the resource and store to the path. */
	325	char* cmd = cat3("zstd -dqfo '", resource->path, "'");
	326	if (cmd == NULL) {
	327	cdata.error = ENOMEM;
	328	return cdata;
	329	}
	330	cdata.file = popen(cmd, "w");
	331	free(cmd);
	332	} else {
	333	/* Decompress and extract the resource to the cache directory. */
	334	char* cmd = cat3("zstd -dc \| tar -x -C '", g_data_dir, "'");
	335	if (cmd == NULL) {
	336	cdata.error = ENOMEM;
	337	return cdata;
	338	}
	339	cdata.file = popen(cmd, "w");
	340	free(cmd);
	341	}
	342	if (cdata.file == NULL) {
	343	cdata.error = errno;
	344	}
	345
	346	return cdata;
	347	}
	348
	349	/** Free the curl state. */
	350	static int curl_data_free(curl_data_t cdata) {
	351	return pclose(cdata.file);
	352	}
	353
	354	/** curl callback. Updates the hash, and writes to the file. */
	355	static size_t curl_write(void* data, size_t size, size_t count, void* ptr) {
	356	curl_data_t* cdata = (curl_data_t*)ptr;
	357	size_t const written = fwrite(data, size, count, cdata->file);
	358	XXH64_update(&cdata->xxhash64, data, written * size);
	359	return written;
	360	}
	361
	362	static int curl_download_resource(
	363	CURL* curl,
	364	data_resource_t const* resource,
	365	data_type_t type) {
	366	curl_data_t cdata;
	367	/* Download the data. */
	368	if (curl_easy_setopt(curl, CURLOPT_URL, resource->url) != 0)
	369	return EINVAL;
	370	if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0)
	371	return EINVAL;
	372	cdata = curl_data_create(resource, type);
	373	if (cdata.error != 0)
	374	return cdata.error;
	375	int const curl_err = curl_easy_perform(curl);
	376	int const close_err = curl_data_free(cdata);
	377	if (curl_err) {
	378	fprintf(
	379	stderr,
	380	"downloading '%s' for '%s' failed\n",
	381	resource->url,
	382	resource->path);
	383	return EIO;
	384	}
	385	if (close_err) {
	386	fprintf(stderr, "writing data to '%s' failed\n", resource->path);
	387	return EIO;
	388	}
	389	/* check that the file exists. */
	390	if (type == data_type_file && !UTIL_isRegularFile(resource->path)) {
	391	fprintf(stderr, "output file '%s' does not exist\n", resource->path);
	392	return EIO;
	393	}
	394	if (type == data_type_dir && !UTIL_isDirectory(resource->path)) {
	395	fprintf(
	396	stderr, "output directory '%s' does not exist\n", resource->path);
	397	return EIO;
	398	}
	399	/* Check that the hash matches. */
	400	if (XXH64_digest(&cdata.xxhash64) != resource->xxhash64) {
	401	fprintf(
	402	stderr,
	403	"checksum does not match: 0x%llxLL != 0x%llxLL\n",
	404	(unsigned long long)XXH64_digest(&cdata.xxhash64),
	405	(unsigned long long)resource->xxhash64);
	406	return EINVAL;
	407	}
	408
	409	return 0;
	410	}
	411
	412	/** Download a single data object. */
	413	static int curl_download_datum(CURL* curl, data_t const* data) {
	414	int ret;
	415	ret = curl_download_resource(curl, &data->data, data->type);
	416	if (ret != 0)
	417	return ret;
	418	if (data_has_dict(data)) {
	419	ret = curl_download_resource(curl, &data->dict, data_type_file);
	420	if (ret != 0)
	421	return ret;
	422	}
	423	return ret;
	424	}
	425
	426	/** Download all the data. */
	427	static int curl_download_data(data_t const* const* data) {
	428	if (curl_global_init(CURL_GLOBAL_ALL) != 0)
	429	return EFAULT;
	430
	431	curl_data_t cdata = {};
	432	CURL* curl = curl_easy_init();
	433	int err = EFAULT;
	434
	435	if (curl == NULL)
	436	return EFAULT;
	437
	438	if (curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L) != 0)
	439	goto out;
	440	if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L) != 0)
	441	goto out;
	442	if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_write) != 0)
	443	goto out;
	444
	445	assert(data != NULL);
	446	for (; *data != NULL; ++data) {
	447	if (curl_download_datum(curl, *data) != 0)
	448	goto out;
	449	}
	450
	451	err = 0;
	452	out:
	453	curl_easy_cleanup(curl);
	454	curl_global_cleanup();
	455	return err;
	456	}
	457
	458	/** Fill the path member variable of the data objects. */
	459	static int data_create_paths(data_t* const* data, char const* dir) {
	460	size_t const dirlen = strlen(dir);
	461	assert(data != NULL);
	462	for (; *data != NULL; ++data) {
	463	data_t* const datum = *data;
	464	datum->data.path = cat3(dir, "/", datum->name);
	465	if (datum->data.path == NULL)
	466	return ENOMEM;
	467	if (data_has_dict(datum)) {
	468	datum->dict.path = cat2(datum->data.path, ".dict");
	469	if (datum->dict.path == NULL)
	470	return ENOMEM;
	471	}
	472	}
	473	return 0;
	474	}
	475
	476	/** Free the path member variable of the data objects. */
	477	static void data_free_paths(data_t* const* data) {
	478	assert(data != NULL);
	479	for (; *data != NULL; ++data) {
	480	data_t* datum = *data;
	481	free((void*)datum->data.path);
	482	free((void*)datum->dict.path);
	483	datum->data.path = NULL;
	484	datum->dict.path = NULL;
	485	}
	486	}
	487
	488	static char const kStampName[] = "STAMP";
	489
	490	static void xxh_update_le(XXH64_state_t* state, uint64_t data) {
	491	if (!MEM_isLittleEndian())
	492	data = MEM_swap64(data);
	493	XXH64_update(state, &data, sizeof(data));
	494	}
	495
	496	/** Hash the data to create the stamp. */
	497	static uint64_t stamp_hash(data_t const* const* data) {
	498	XXH64_state_t state;
	499
	500	XXH64_reset(&state, 0);
	501	assert(data != NULL);
	502	for (; *data != NULL; ++data) {
	503	data_t const* datum = *data;
	504	/* We don't care about the URL that we fetch from. */
	505	/* The path is derived from the name. */
	506	XXH64_update(&state, datum->name, strlen(datum->name));
	507	xxh_update_le(&state, datum->data.xxhash64);
	508	xxh_update_le(&state, datum->dict.xxhash64);
	509	xxh_update_le(&state, datum->type);
	510	}
	511	return XXH64_digest(&state);
	512	}
	513
	514	/** Check if the stamp matches the stamp in the cache directory. */
	515	static int stamp_check(char const* dir, data_t const* const* data) {
	516	char* stamp = cat3(dir, "/", kStampName);
	517	uint64_t const expected = stamp_hash(data);
	518	XXH64_canonical_t actual;
	519	FILE* stampfile = NULL;
	520	int matches = 0;
	521
	522	if (stamp == NULL)
	523	goto out;
	524	if (!UTIL_isRegularFile(stamp)) {
	525	fprintf(stderr, "stamp does not exist: recreating the data cache\n");
	526	goto out;
	527	}
	528
	529	stampfile = fopen(stamp, "rb");
	530	if (stampfile == NULL) {
	531	fprintf(stderr, "could not open stamp: recreating the data cache\n");
	532	goto out;
	533	}
	534
	535	size_t b;
	536	if ((b = fread(&actual, sizeof(actual), 1, stampfile)) != 1) {
	537	fprintf(stderr, "invalid stamp: recreating the data cache\n");
	538	goto out;
	539	}
	540
	541	matches = (expected == XXH64_hashFromCanonical(&actual));
	542	if (matches)
	543	fprintf(stderr, "stamp matches: reusing the cached data\n");
	544	else
	545	fprintf(stderr, "stamp does not match: recreating the data cache\n");
	546
	547	out:
	548	free(stamp);
	549	if (stampfile != NULL)
	550	fclose(stampfile);
	551	return matches;
	552	}
	553
	554	/** On success write a new stamp, on failure delete the old stamp. */
	555	static int
	556	stamp_write(char const* dir, data_t const* const* data, int const data_err) {
	557	char* stamp = cat3(dir, "/", kStampName);
	558	FILE* stampfile = NULL;
	559	int err = EIO;
	560
	561	if (stamp == NULL)
	562	return ENOMEM;
	563
	564	if (data_err != 0) {
	565	err = data_err;
	566	goto out;
	567	}
	568	XXH64_canonical_t hash;
	569
	570	XXH64_canonicalFromHash(&hash, stamp_hash(data));
	571
	572	stampfile = fopen(stamp, "wb");
	573	if (stampfile == NULL)
	574	goto out;
	575	if (fwrite(&hash, sizeof(hash), 1, stampfile) != 1)
	576	goto out;
	577	err = 0;
	578	fprintf(stderr, "stamped new data cache\n");
	579	out:
	580	if (err != 0)
	581	/* Ignore errors. */
	582	unlink(stamp);
	583	free(stamp);
	584	if (stampfile != NULL)
	585	fclose(stampfile);
	586	return err;
	587	}
	588
	589	int data_init(char const* dir) {
	590	int err;
	591
	592	if (dir == NULL)
	593	return EINVAL;
	594
	595	/* This must be first to simplify logic. */
	596	err = ensure_directory_exists(dir);
	597	if (err != 0)
	598	return err;
	599
	600	/* Save the cache directory. */
	601	g_data_dir = strdup(dir);
	602	if (g_data_dir == NULL)
	603	return ENOMEM;
	604
	605	err = data_create_paths(g_data, dir);
	606	if (err != 0)
	607	return err;
	608
	609	/* If the stamp matches then we are good to go.
	610	* This must be called before any modifications to the data cache.
	611	* After this point, we MUST call stamp_write() to update the STAMP,
	612	* since we've updated the data cache.
	613	*/
	614	if (stamp_check(dir, data))
	615	return 0;
	616
	617	err = curl_download_data(data);
	618	if (err != 0)
	619	goto out;
	620
	621	out:
	622	/* This must be last, since it must know if data_init() succeeded. */
	623	stamp_write(dir, data, err);
	624	return err;
	625	}
	626
	627	void data_finish(void) {
	628	data_free_paths(g_data);
	629	free(g_data_dir);
	630	g_data_dir = NULL;
	631	}