[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.5 / programs / benchfn.c

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */


/* *************************************
*  Includes
***************************************/
#include <stdlib.h>      /* malloc, free */
#include <string.h>      /* memset */
#include <assert.h>      /* assert */

#include "timefn.h"        /* UTIL_time_t, UTIL_getTime */
#include "benchfn.h"


/* *************************************
*  Constants
***************************************/
#define TIMELOOP_MICROSEC     SEC_TO_MICRO      /* 1 second */
#define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */

#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)


/* *************************************
*  Debug errors
***************************************/
#if defined(DEBUG) && (DEBUG >= 1)
#  include <stdio.h>       /* fprintf */
#  define DISPLAY(...)       fprintf(stderr, __VA_ARGS__)
#  define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
#else
#  define DEBUGOUTPUT(...)
#endif


/* error without displaying */
#define RETURN_QUIET_ERROR(retValue, ...) {           \
    DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
    DEBUGOUTPUT("Error : ");                          \
    DEBUGOUTPUT(__VA_ARGS__);                         \
    DEBUGOUTPUT(" \n");                               \
    return retValue;                                  \
}

/* Abort execution if a condition is not met */
#define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }


/* *************************************
*  Benchmarking an arbitrary function
***************************************/

int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
{
    return outcome.error_tag_never_ever_use_directly == 0;
}

/* warning : this function will stop program execution if outcome is invalid !
 *           check outcome validity first, using BMK_isValid_runResult() */
BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
{
    CONTROL(outcome.error_tag_never_ever_use_directly == 0);
    return outcome.internal_never_ever_use_directly;
}

size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
{
    CONTROL(outcome.error_tag_never_ever_use_directly != 0);
    return outcome.error_result_never_ever_use_directly;
}

static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
{
    BMK_runOutcome_t b;
    memset(&b, 0, sizeof(b));
    b.error_tag_never_ever_use_directly = 1;
    b.error_result_never_ever_use_directly = errorResult;
    return b;
}

static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
{
    BMK_runOutcome_t outcome;
    outcome.error_tag_never_ever_use_directly = 0;
    outcome.internal_never_ever_use_directly = runTime;
    return outcome;
}


/* initFn will be measured once, benchFn will be measured `nbLoops` times */
/* initFn is optional, provide NULL if none */
/* benchFn must return a size_t value that errorFn can interpret */
/* takes # of blocks and list of size & stuff for each. */
/* can report result of benchFn for each block into blockResult. */
/* blockResult is optional, provide NULL if this information is not required */
/* note : time per loop can be reported as zero if run time < timer resolution */
BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
                                   unsigned nbLoops)
{
    size_t dstSize = 0;
    nbLoops += !nbLoops;   /* minimum nbLoops is 1 */

    /* init */
    {   size_t i;
        for(i = 0; i < p.blockCount; i++) {
            memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]);  /* warm up and erase result buffer */
    }   }

    /* benchmark */
    {   UTIL_time_t const clockStart = UTIL_getTime();
        unsigned loopNb, blockNb;
        if (p.initFn != NULL) p.initFn(p.initPayload);
        for (loopNb = 0; loopNb < nbLoops; loopNb++) {
            for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
                size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
                                   p.dstBuffers[blockNb], p.dstCapacities[blockNb],
                                   p.benchPayload);
                if (loopNb == 0) {
                    if (p.blockResults != NULL) p.blockResults[blockNb] = res;
                    if ((p.errorFn != NULL) && (p.errorFn(res))) {
                        RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
                            "Function benchmark failed on block %u (of size %u) with error %i",
                            blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
                    }
                    dstSize += res;
            }   }
        }  /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */

        {   PTime const totalTime = UTIL_clockSpanNano(clockStart);
            BMK_runTime_t rt;
            rt.nanoSecPerRun = (double)totalTime / nbLoops;
            rt.sumOfReturn = dstSize;
            return BMK_setValid_runTime(rt);
    }   }
}


/* ====  Benchmarking any function, providing intermediate results  ==== */

struct BMK_timedFnState_s {
    PTime timeSpent_ns;
    PTime timeBudget_ns;
    PTime runBudget_ns;
    BMK_runTime_t fastestRun;
    unsigned nbLoops;
    UTIL_time_t coolTime;
};  /* typedef'd to BMK_timedFnState_t within bench.h */

BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
{
    BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
    if (r == NULL) return NULL;   /* malloc() error */
    BMK_resetTimedFnState(r, total_ms, run_ms);
    return r;
}

void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }

BMK_timedFnState_t*
BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
{
    typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1];  /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
    typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align;  /* force tfs to be aligned at its next best position */
    size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
    BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
    if (buffer == NULL) return NULL;
    if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
    if ((size_t)buffer % tfs_alignment) return NULL;  /* buffer must be properly aligned */
    BMK_resetTimedFnState(r, total_ms, run_ms);
    return r;
}

void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
{
    if (!total_ms) total_ms = 1 ;
    if (!run_ms) run_ms = 1;
    if (run_ms > total_ms) run_ms = total_ms;
    timedFnState->timeSpent_ns = 0;
    timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
    timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
    timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000;  /* hopefully large enough : must be larger than any potential measurement */
    timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
    timedFnState->nbLoops = 1;
    timedFnState->coolTime = UTIL_getTime();
}

/* Tells if nb of seconds set in timedFnState for all runs is spent.
 * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
{
    return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
}


#undef MIN
#define MIN(a,b)   ( (a) < (b) ? (a) : (b) )

#define MINUSABLETIME  (TIMELOOP_NANOSEC / 2)  /* 0.5 seconds */

BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
                                  BMK_benchParams_t p)
{
    PTime const runBudget_ns = cont->runBudget_ns;
    PTime const runTimeMin_ns = runBudget_ns / 2;
    int completed = 0;
    BMK_runTime_t bestRunTime = cont->fastestRun;

    while (!completed) {
        BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);

        if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
            return runResult;
        }

        {   BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
            double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;

            cont->timeSpent_ns += (unsigned long long)loopDuration_ns;

            /* estimate nbLoops for next run to last approximately 1 second */
            if (loopDuration_ns > ((double)runBudget_ns / 50)) {
                double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
                cont->nbLoops = (unsigned)((double)runBudget_ns / fastestRun_ns) + 1;
            } else {
                /* previous run was too short : blindly increase workload by x multiplier */
                const unsigned multiplier = 10;
                assert(cont->nbLoops < ((unsigned)-1) / multiplier);  /* avoid overflow */
                cont->nbLoops *= multiplier;
            }

            if(loopDuration_ns < (double)runTimeMin_ns) {
                /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
                assert(completed == 0);
                continue;
            } else {
                if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
                    bestRunTime = newRunTime;
                }
                completed = 1;
            }
        }
    }   /* while (!completed) */

    return BMK_setValid_runTime(bestRunTime);
}
Commit	Line	Data
648db22b	1	/*
	2	* Copyright (c) Meta Platforms, Inc. and affiliates.
	3	* All rights reserved.
	4	*
	5	* This source code is licensed under both the BSD-style license (found in the
	6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
	7	* in the COPYING file in the root directory of this source tree).
	8	* You may select, at your option, one of the above-listed licenses.
	9	*/
	10
	11
	12
	13	/* *************************************
	14	* Includes
	15	***************************************/
	16	#include <stdlib.h> /* malloc, free */
	17	#include <string.h> /* memset */
	18	#include <assert.h> /* assert */
	19
	20	#include "timefn.h" /* UTIL_time_t, UTIL_getTime */
	21	#include "benchfn.h"
	22
	23
	24	/* *************************************
	25	* Constants
	26	***************************************/
	27	#define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */
	28	#define TIMELOOP_NANOSEC (11000000000ULL) / 1 second */
	29
	30	#define KB *(1 <<10)
	31	#define MB *(1 <<20)
	32	#define GB *(1U<<30)
	33
	34
	35	/* *************************************
	36	* Debug errors
	37	***************************************/
	38	#if defined(DEBUG) && (DEBUG >= 1)
	39	# include <stdio.h> /* fprintf */
	40	# define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
	41	# define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
	42	#else
	43	# define DEBUGOUTPUT(...)
	44	#endif
	45
	46
	47	/* error without displaying */
	48	#define RETURN_QUIET_ERROR(retValue, ...) { \
	49	DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
	50	DEBUGOUTPUT("Error : "); \
	51	DEBUGOUTPUT(__VA_ARGS__); \
	52	DEBUGOUTPUT(" \n"); \
	53	return retValue; \
	54	}
	55
	56	/* Abort execution if a condition is not met */
	57	#define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
	58
	59
	60	/* *************************************
	61	* Benchmarking an arbitrary function
	62	***************************************/
	63
	64	int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
65	{
66	return outcome.error_tag_never_ever_use_directly == 0;
67	}
68
69	/* warning : this function will stop program execution if outcome is invalid !
70	* check outcome validity first, using BMK_isValid_runResult() */
71	BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
72	{
73	CONTROL(outcome.error_tag_never_ever_use_directly == 0);
74	return outcome.internal_never_ever_use_directly;
75	}
76
77	size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
78	{
79	CONTROL(outcome.error_tag_never_ever_use_directly != 0);
80	return outcome.error_result_never_ever_use_directly;
81	}
82
83	static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
84	{
85	BMK_runOutcome_t b;
86	memset(&b, 0, sizeof(b));
87	b.error_tag_never_ever_use_directly = 1;
88	b.error_result_never_ever_use_directly = errorResult;
89	return b;
90	}
91
92	static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
93	{
94	BMK_runOutcome_t outcome;
95	outcome.error_tag_never_ever_use_directly = 0;
96	outcome.internal_never_ever_use_directly = runTime;
97	return outcome;
98	}
99
100
101	/* initFn will be measured once, benchFn will be measured `nbLoops` times */
102	/* initFn is optional, provide NULL if none */
103	/* benchFn must return a size_t value that errorFn can interpret */
104	/* takes # of blocks and list of size & stuff for each. */
105	/* can report result of benchFn for each block into blockResult. */
106	/* blockResult is optional, provide NULL if this information is not required */
107	/* note : time per loop can be reported as zero if run time < timer resolution */
108	BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
109	unsigned nbLoops)
110	{
111	size_t dstSize = 0;
112	nbLoops += !nbLoops; /* minimum nbLoops is 1 */
113
114	/* init */
115	{ size_t i;
116	for(i = 0; i < p.blockCount; i++) {
117	memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */
118	} }
119
120	/* benchmark */
121	{ UTIL_time_t const clockStart = UTIL_getTime();
122	unsigned loopNb, blockNb;
123	if (p.initFn != NULL) p.initFn(p.initPayload);
124	for (loopNb = 0; loopNb < nbLoops; loopNb++) {
125	for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
126	size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
127	p.dstBuffers[blockNb], p.dstCapacities[blockNb],
128	p.benchPayload);
129	if (loopNb == 0) {
130	if (p.blockResults != NULL) p.blockResults[blockNb] = res;
131	if ((p.errorFn != NULL) && (p.errorFn(res))) {
132	RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
133	"Function benchmark failed on block %u (of size %u) with error %i",
134	blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
135	}
136	dstSize += res;
137	} }
138	} /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
139
140	{ PTime const totalTime = UTIL_clockSpanNano(clockStart);
141	BMK_runTime_t rt;
142	rt.nanoSecPerRun = (double)totalTime / nbLoops;
143	rt.sumOfReturn = dstSize;
144	return BMK_setValid_runTime(rt);
145	} }
146	}
147
148
149	/* ==== Benchmarking any function, providing intermediate results ==== */
150
151	struct BMK_timedFnState_s {
152	PTime timeSpent_ns;
153	PTime timeBudget_ns;
154	PTime runBudget_ns;
155	BMK_runTime_t fastestRun;
156	unsigned nbLoops;
157	UTIL_time_t coolTime;
158	}; /* typedef'd to BMK_timedFnState_t within bench.h */
159
160	BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
161	{
162	BMK_timedFnState_t* const r = (BMK_timedFnState_t)malloc(sizeof(r));
163	if (r == NULL) return NULL; /* malloc() error */
164	BMK_resetTimedFnState(r, total_ms, run_ms);
165	return r;
166	}
167
168	void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
169
170	BMK_timedFnState_t*
171	BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
172	{
173	typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
174	typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */
175	size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
176	BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
177	if (buffer == NULL) return NULL;
178	if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
179	if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */
180	BMK_resetTimedFnState(r, total_ms, run_ms);
181	return r;
182	}
183
184	void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
185	{
186	if (!total_ms) total_ms = 1 ;
187	if (!run_ms) run_ms = 1;
188	if (run_ms > total_ms) run_ms = total_ms;
189	timedFnState->timeSpent_ns = 0;
190	timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
191	timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
192	timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */
193	timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
194	timedFnState->nbLoops = 1;
195	timedFnState->coolTime = UTIL_getTime();
196	}
197
198	/* Tells if nb of seconds set in timedFnState for all runs is spent.
199	* note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
200	int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
201	{
202	return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
203	}
204
205
206	#undef MIN
207	#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
208
209	#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */
210
211	BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
212	BMK_benchParams_t p)
213	{
214	PTime const runBudget_ns = cont->runBudget_ns;
215	PTime const runTimeMin_ns = runBudget_ns / 2;
216	int completed = 0;
217	BMK_runTime_t bestRunTime = cont->fastestRun;
218
219	while (!completed) {
220	BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
221
222	if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
223	return runResult;
224	}
225
226	{ BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
227	double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
228
229	cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
230
231	/* estimate nbLoops for next run to last approximately 1 second */
232	if (loopDuration_ns > ((double)runBudget_ns / 50)) {
233	double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
234	cont->nbLoops = (unsigned)((double)runBudget_ns / fastestRun_ns) + 1;
235	} else {
236	/* previous run was too short : blindly increase workload by x multiplier */
237	const unsigned multiplier = 10;
238	assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */
239	cont->nbLoops *= multiplier;
240	}
241
242	if(loopDuration_ns < (double)runTimeMin_ns) {
243	/* don't report results for which benchmark run time was too small : increased risks of rounding errors */
244	assert(completed == 0);
245	continue;
246	} else {
247	if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
248	bestRunTime = newRunTime;
249	}
250	completed = 1;
251	}
252	}
253	} /* while (!completed) */
254
255	return BMK_setValid_runTime(bestRunTime);
256	}