[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.5 / contrib / match_finders / zstd_edist.c

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

/*-*************************************
*  Dependencies
***************************************/

/* Currently relies on qsort when combining contiguous matches. This can probably 
 * be avoided but would require changes to the algorithm. The qsort is far from 
 * the bottleneck in this algorithm even for medium sized files so it's probably 
 * not worth trying to address */ 
#include <stdlib.h>
#include <assert.h>

#include "zstd_edist.h"
#include "mem.h"

/*-*************************************
*  Constants
***************************************/

/* Just a sential for the entries of the diagonal matrix */
#define ZSTD_EDIST_DIAG_MAX (S32)(1 << 30)

/* How large should a snake be to be considered a 'big' snake. 
 * For an explanation of what a 'snake' is with respect to the 
 * edit distance matrix, see the linked paper in zstd_edist.h */
#define ZSTD_EDIST_SNAKE_THRESH 20

/* After how many iterations should we start to use the heuristic
 * based on 'big' snakes */
#define ZSTD_EDIST_SNAKE_ITER_THRESH 200

/* After how many iterations should be just give up and take 
 * the best available edit script for this round */ 
#define ZSTD_EDIST_EXPENSIVE_THRESH 1024

/*-*************************************
*  Structures
***************************************/

typedef struct {
    U32 dictIdx;
    U32 srcIdx;
    U32 matchLength;
} ZSTD_eDist_match;

typedef struct {
    const BYTE* dict;
    const BYTE* src;
    size_t dictSize;
    size_t srcSize;
    S32* forwardDiag;            /* Entries of the forward diagonal stored here */
    S32* backwardDiag;           /* Entries of the backward diagonal stored here.
                                  *   Note: this buffer and the 'forwardDiag' buffer 
                                  *   are contiguous. See the ZSTD_eDist_genSequences */
    ZSTD_eDist_match* matches;   /* Accumulate matches of length 1 in this buffer. 
                                  *   In a subsequence post-processing step, we combine 
                                  *   contiguous matches. */
    U32 nbMatches;
} ZSTD_eDist_state;

typedef struct {
    S32 dictMid;           /* The mid diagonal for the dictionary */
    S32 srcMid;            /* The mid diagonal for the source */ 
    int lowUseHeuristics;  /* Should we use heuristics for the low part */
    int highUseHeuristics; /* Should we use heuristics for the high part */ 
} ZSTD_eDist_partition;

/*-*************************************
*  Internal
***************************************/

static void ZSTD_eDist_diag(ZSTD_eDist_state* state,
                    ZSTD_eDist_partition* partition,
                    S32 dictLow, S32 dictHigh, S32 srcLow, 
                    S32 srcHigh, int useHeuristics)
{
    S32* const forwardDiag = state->forwardDiag;
    S32* const backwardDiag = state->backwardDiag;
    const BYTE* const dict = state->dict;
    const BYTE* const src = state->src;

    S32 const diagMin = dictLow - srcHigh;
    S32 const diagMax = dictHigh - srcLow;
    S32 const forwardMid = dictLow - srcLow;
    S32 const backwardMid = dictHigh - srcHigh;

    S32 forwardMin = forwardMid;
    S32 forwardMax = forwardMid;
    S32 backwardMin = backwardMid;
    S32 backwardMax = backwardMid;
    int odd = (forwardMid - backwardMid) & 1;
    U32 iterations;

    forwardDiag[forwardMid] = dictLow;
    backwardDiag[backwardMid] = dictHigh;

    /* Main loop for updating diag entries. Unless useHeuristics is 
     * set to false, this loop will run until it finds the minimal 
     * edit script */ 
    for (iterations = 1;;iterations++) {
        S32 diag;
        int bigSnake = 0;
        
        if (forwardMin > diagMin) {
            forwardMin--;
            forwardDiag[forwardMin - 1] = -1;
        } else {
            forwardMin++;
        }

        if (forwardMax < diagMax) {
            forwardMax++;
            forwardDiag[forwardMax + 1] = -1;
        } else {
            forwardMax--;
        }

        for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
            S32 dictIdx;
            S32 srcIdx;
            S32 low = forwardDiag[diag - 1];
            S32 high = forwardDiag[diag + 1];
            S32 dictIdx0 = low < high ? high : low + 1;

            for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag;
                dictIdx < dictHigh && srcIdx < srcHigh && dict[dictIdx] == src[srcIdx];
                dictIdx++, srcIdx++) continue;

            if (dictIdx - dictIdx0 > ZSTD_EDIST_SNAKE_THRESH)
                bigSnake = 1;

            forwardDiag[diag] = dictIdx;

            if (odd && backwardMin <= diag && diag <= backwardMax && backwardDiag[diag] <= dictIdx) {
                partition->dictMid = dictIdx;
                partition->srcMid = srcIdx;
                partition->lowUseHeuristics = 0;
                partition->highUseHeuristics = 0;
                return;
            }
        }

        if (backwardMin > diagMin) {
            backwardMin--;
            backwardDiag[backwardMin - 1] = ZSTD_EDIST_DIAG_MAX;
        } else {
            backwardMin++;
        }

        if (backwardMax < diagMax) {
            backwardMax++;
            backwardDiag[backwardMax + 1] = ZSTD_EDIST_DIAG_MAX;
        } else {
            backwardMax--;
        }


        for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
            S32 dictIdx;
            S32 srcIdx;
            S32 low = backwardDiag[diag - 1];
            S32 high = backwardDiag[diag + 1];
            S32 dictIdx0 = low < high ? low : high - 1;

            for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag;
                dictLow < dictIdx && srcLow < srcIdx && dict[dictIdx - 1] == src[srcIdx - 1];
                dictIdx--, srcIdx--) continue;

            if (dictIdx0 - dictIdx > ZSTD_EDIST_SNAKE_THRESH)
                bigSnake = 1;

            backwardDiag[diag] = dictIdx;

            if (!odd && forwardMin <= diag && diag <= forwardMax && dictIdx <= forwardDiag[diag]) {
                partition->dictMid = dictIdx;
                partition->srcMid = srcIdx;
                partition->lowUseHeuristics = 0;
                partition->highUseHeuristics = 0;
                return;
            }
        }

        if (!useHeuristics)
            continue;

        /* Everything under this point is a heuristic. Using these will 
         * substantially speed up the match finding. In some cases, taking 
         * the total match finding time from several minutes to seconds.
         * Of course, the caveat is that the edit script found may no longer 
         * be optimal */ 

        /* Big snake heuristic */ 
        if (iterations > ZSTD_EDIST_SNAKE_ITER_THRESH && bigSnake) {
            {
                S32 best = 0;
                
                for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
                    S32 diagDiag = diag - forwardMid;
                    S32 dictIdx = forwardDiag[diag];
                    S32 srcIdx = dictIdx - diag;
                    S32 v = (dictIdx - dictLow) * 2 - diagDiag;

                    if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
                        if (v > best 
                          && dictLow + ZSTD_EDIST_SNAKE_THRESH <= dictIdx && dictIdx <= dictHigh
                          && srcLow + ZSTD_EDIST_SNAKE_THRESH <= srcIdx && srcIdx <= srcHigh) {
                            S32 k;
                            for (k = 1; dict[dictIdx - k] == src[srcIdx - k]; k++) {
                                if (k == ZSTD_EDIST_SNAKE_THRESH) {
                                    best = v;
                                    partition->dictMid = dictIdx;
                                    partition->srcMid = srcIdx;
                                    break;
                                }
                            }
                        }
                    }
                }

                if (best > 0) {
                    partition->lowUseHeuristics = 0;
                    partition->highUseHeuristics = 1;
                    return;
                }
            }

            {
                S32 best = 0;

                for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
                    S32 diagDiag = diag - backwardMid;
                    S32 dictIdx = backwardDiag[diag];
                    S32 srcIdx = dictIdx - diag;
                    S32 v = (dictHigh - dictIdx) * 2 + diagDiag;

                    if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
                        if (v > best 
                          && dictLow < dictIdx && dictIdx <= dictHigh - ZSTD_EDIST_SNAKE_THRESH
                          && srcLow < srcIdx && srcIdx <= srcHigh - ZSTD_EDIST_SNAKE_THRESH) {
                            int k;
                            for (k = 0; dict[dictIdx + k] == src[srcIdx + k]; k++) {
                                if (k == ZSTD_EDIST_SNAKE_THRESH - 1) { 
                                    best = v;
                                    partition->dictMid = dictIdx;
                                    partition->srcMid = srcIdx;
                                    break; 
                                }
                            }
                        }
                    }
                }

                if (best > 0) {
                    partition->lowUseHeuristics = 1;
                    partition->highUseHeuristics = 0;
                    return;
                }
            }
        }

        /* More general 'too expensive' heuristic */ 
        if (iterations >= ZSTD_EDIST_EXPENSIVE_THRESH) {
            S32 forwardDictSrcBest;
            S32 forwardDictBest = 0;
            S32 backwardDictSrcBest;
            S32 backwardDictBest = 0;

            forwardDictSrcBest = -1;
            for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
                S32 dictIdx = MIN(forwardDiag[diag], dictHigh);
                S32 srcIdx = dictIdx - diag;

                if (srcHigh < srcIdx) {
                    dictIdx = srcHigh + diag;
                    srcIdx = srcHigh;
                }

                if (forwardDictSrcBest < dictIdx + srcIdx) {
                    forwardDictSrcBest = dictIdx + srcIdx;
                    forwardDictBest = dictIdx;
                }
            }

            backwardDictSrcBest = ZSTD_EDIST_DIAG_MAX;
            for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
                S32 dictIdx = MAX(dictLow, backwardDiag[diag]);
                S32 srcIdx = dictIdx - diag;

                if (srcIdx < srcLow) {
                    dictIdx = srcLow + diag;
                    srcIdx = srcLow;
                }

                if (dictIdx + srcIdx < backwardDictSrcBest) {
                    backwardDictSrcBest = dictIdx + srcIdx;
                    backwardDictBest = dictIdx;
                }
            }

            if ((dictHigh + srcHigh) - backwardDictSrcBest < forwardDictSrcBest - (dictLow + srcLow)) {
                partition->dictMid = forwardDictBest;
                partition->srcMid = forwardDictSrcBest - forwardDictBest;
                partition->lowUseHeuristics = 0;
                partition->highUseHeuristics = 1;
            } else {
                partition->dictMid = backwardDictBest;
                partition->srcMid = backwardDictSrcBest - backwardDictBest;
                partition->lowUseHeuristics = 1;
                partition->highUseHeuristics = 0;
            }
            return;
        }
    }
}

static void ZSTD_eDist_insertMatch(ZSTD_eDist_state* state, 
                    S32 const dictIdx, S32 const srcIdx)
{
    state->matches[state->nbMatches].dictIdx = dictIdx;
    state->matches[state->nbMatches].srcIdx = srcIdx;
    state->matches[state->nbMatches].matchLength = 1;
    state->nbMatches++;
}

static int ZSTD_eDist_compare(ZSTD_eDist_state* state,
                    S32 dictLow, S32 dictHigh, S32 srcLow,
                    S32 srcHigh, int useHeuristics)
{
    const BYTE* const dict = state->dict;
    const BYTE* const src = state->src;

    /* Found matches while traversing from the low end */ 
    while (dictLow < dictHigh && srcLow < srcHigh && dict[dictLow] == src[srcLow]) {
        ZSTD_eDist_insertMatch(state, dictLow, srcLow);
        dictLow++;
        srcLow++;
    }

    /* Found matches while traversing from the high end */
    while (dictLow < dictHigh && srcLow < srcHigh && dict[dictHigh - 1] == src[srcHigh - 1]) {
        ZSTD_eDist_insertMatch(state, dictHigh - 1, srcHigh - 1);
        dictHigh--;
        srcHigh--;
    }
    
    /* If the low and high end end up touching. If we wanted to make 
     * note of the differences like most diffing algorithms do, we would 
     * do so here. In our case, we're only concerned with matches 
     * Note: if you wanted to find the edit distance of the algorithm, 
     *   you could just accumulate the cost for an insertion/deletion 
     *   below. */ 
    if (dictLow == dictHigh) {
        while (srcLow < srcHigh) {
            /* Reaching this point means inserting src[srcLow] into 
             * the current position of dict */ 
            srcLow++;
        }
    } else if (srcLow == srcHigh) {
        while (dictLow < dictHigh) {
            /* Reaching this point means deleting dict[dictLow] from 
             * the current position of dict */ 
            dictLow++;
        }
    } else {
        ZSTD_eDist_partition partition;
        partition.dictMid = 0;
        partition.srcMid = 0;
        ZSTD_eDist_diag(state, &partition, dictLow, dictHigh, 
            srcLow, srcHigh, useHeuristics);
        if (ZSTD_eDist_compare(state, dictLow, partition.dictMid, 
          srcLow, partition.srcMid, partition.lowUseHeuristics))
            return 1;
        if (ZSTD_eDist_compare(state, partition.dictMid, dictHigh,
          partition.srcMid, srcHigh, partition.highUseHeuristics))
            return 1;
    }

    return 0;
}

static int ZSTD_eDist_matchComp(const void* p, const void* q)
{
    S32 const l = ((ZSTD_eDist_match*)p)->srcIdx;
    S32 const r = ((ZSTD_eDist_match*)q)->srcIdx;
    return (l - r);
}

/* The matches from the approach above will all be of the form 
 * (dictIdx, srcIdx, 1). This method combines contiguous matches 
 * of length MINMATCH or greater. Matches less than MINMATCH 
 * are discarded */ 
static void ZSTD_eDist_combineMatches(ZSTD_eDist_state* state)
{
    /* Create a new buffer to put the combined matches into 
     * and memcpy to state->matches after */ 
    ZSTD_eDist_match* combinedMatches = 
        ZSTD_malloc(state->nbMatches * sizeof(ZSTD_eDist_match), 
        ZSTD_defaultCMem);

    U32 nbCombinedMatches = 1;
    size_t i;

    /* Make sure that the srcIdx and dictIdx are in sorted order.
     * The combination step won't work otherwise */ 
    qsort(state->matches, state->nbMatches, sizeof(ZSTD_eDist_match), ZSTD_eDist_matchComp);

    memcpy(combinedMatches, state->matches, sizeof(ZSTD_eDist_match));
    for (i = 1; i < state->nbMatches; i++) {
        ZSTD_eDist_match const match = state->matches[i];
        ZSTD_eDist_match const combinedMatch = 
            combinedMatches[nbCombinedMatches - 1];
        if (combinedMatch.srcIdx + combinedMatch.matchLength == match.srcIdx && 
          combinedMatch.dictIdx + combinedMatch.matchLength == match.dictIdx) {
            combinedMatches[nbCombinedMatches - 1].matchLength++;
        } else {
            /* Discard matches that are less than MINMATCH */
            if (combinedMatches[nbCombinedMatches - 1].matchLength < MINMATCH) {
                nbCombinedMatches--;
            }

            memcpy(combinedMatches + nbCombinedMatches, 
                state->matches + i, sizeof(ZSTD_eDist_match));
            nbCombinedMatches++;
        }
    }
    memcpy(state->matches, combinedMatches, nbCombinedMatches * sizeof(ZSTD_eDist_match));
    state->nbMatches = nbCombinedMatches;
    ZSTD_free(combinedMatches, ZSTD_defaultCMem);
}

static size_t ZSTD_eDist_convertMatchesToSequences(ZSTD_Sequence* sequences, 
    ZSTD_eDist_state* state)
{
    const ZSTD_eDist_match* matches = state->matches;
    size_t const nbMatches = state->nbMatches;
    size_t const dictSize = state->dictSize;
    size_t nbSequences = 0;
    size_t i;
    for (i = 0; i < nbMatches; i++) {
        ZSTD_eDist_match const match = matches[i];
        U32 const litLength = !i ? match.srcIdx : 
            match.srcIdx - (matches[i - 1].srcIdx + matches[i - 1].matchLength);
        U32 const offset = (match.srcIdx + dictSize) - match.dictIdx;
        U32 const matchLength = match.matchLength;
        sequences[nbSequences].offset = offset;
        sequences[nbSequences].litLength = litLength;
        sequences[nbSequences].matchLength = matchLength;
        nbSequences++;
    }
    return nbSequences;
}

/*-*************************************
*  Internal utils
***************************************/

static size_t ZSTD_eDist_hamingDist(const BYTE* const a,
                        const BYTE* const b, size_t n)
{
    size_t i;
    size_t dist = 0;
    for (i = 0; i < n; i++)
        dist += a[i] != b[i];
    return dist; 
}

/* This is a pretty naive recursive implementation that should only
 * be used for quick tests obviously. Don't try and run this on a 
 * GB file or something. There are faster implementations. Use those
 * if you need to run it for large files. */
static size_t ZSTD_eDist_levenshteinDist(const BYTE* const s,
                        size_t const sn, const BYTE* const t,
                        size_t const tn)
{
    size_t a, b, c;

    if (!sn)
        return tn;
    if (!tn)
        return sn;
    
    if (s[sn - 1] == t[tn - 1])
        return ZSTD_eDist_levenshteinDist(
            s, sn - 1, t, tn - 1);
    
    a = ZSTD_eDist_levenshteinDist(s, sn - 1, t, tn - 1);
    b = ZSTD_eDist_levenshteinDist(s, sn, t, tn - 1);
    c = ZSTD_eDist_levenshteinDist(s, sn - 1, t, tn);

    if (a > b)
        a = b;
    if (a > c)
        a = c;
    
    return a + 1;
}

static void ZSTD_eDist_validateMatches(ZSTD_eDist_match* matches,
                        size_t const nbMatches, const BYTE* const dict,
                        size_t const dictSize, const BYTE* const src,
                        size_t const srcSize)
{
    size_t i;
    for (i = 0; i < nbMatches; i++) {
        ZSTD_eDist_match match = matches[i];
        U32 const dictIdx = match.dictIdx;
        U32 const srcIdx = match.srcIdx;
        U32 const matchLength = match.matchLength;
        
        assert(dictIdx + matchLength < dictSize);
        assert(srcIdx + matchLength < srcSize);
        assert(!memcmp(dict + dictIdx, src + srcIdx, matchLength));
    }
}

/*-*************************************
*  API
***************************************/

size_t ZSTD_eDist_genSequences(ZSTD_Sequence* sequences, 
                        const void* dict, size_t dictSize,
                        const void* src, size_t srcSize,
                        int useHeuristics)
{
    size_t const nbDiags = dictSize + srcSize + 3;
    S32* buffer = ZSTD_malloc(nbDiags * 2 * sizeof(S32), ZSTD_defaultCMem);
    ZSTD_eDist_state state;
    size_t nbSequences = 0;

    state.dict = (const BYTE*)dict;
    state.src = (const BYTE*)src;
    state.dictSize = dictSize;
    state.srcSize = srcSize;
    state.forwardDiag = buffer;
    state.backwardDiag = buffer + nbDiags;
    state.forwardDiag += srcSize + 1;
    state.backwardDiag += srcSize + 1;
    state.matches = ZSTD_malloc(srcSize * sizeof(ZSTD_eDist_match), ZSTD_defaultCMem);
    state.nbMatches = 0;

    ZSTD_eDist_compare(&state, 0, dictSize, 0, srcSize, 1);
    ZSTD_eDist_combineMatches(&state);
    nbSequences = ZSTD_eDist_convertMatchesToSequences(sequences, &state);

    ZSTD_free(buffer, ZSTD_defaultCMem);
    ZSTD_free(state.matches, ZSTD_defaultCMem);

    return nbSequences;
}
Commit	Line	Data
	1	/*
	2	* Copyright (c) Meta Platforms, Inc. and affiliates.
	3	* All rights reserved.
	4	*
	5	* This source code is licensed under both the BSD-style license (found in the
	6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
	7	* in the COPYING file in the root directory of this source tree).
	8	* You may select, at your option, one of the above-listed licenses.
	9	*/
	10
	11	/-************************************
	12	* Dependencies
	13	***************************************/
	14
	15	/* Currently relies on qsort when combining contiguous matches. This can probably
	16	* be avoided but would require changes to the algorithm. The qsort is far from
	17	* the bottleneck in this algorithm even for medium sized files so it's probably
	18	* not worth trying to address */
	19	#include <stdlib.h>
	20	#include <assert.h>
	21
	22	#include "zstd_edist.h"
	23	#include "mem.h"
	24
	25	/-************************************
	26	* Constants
	27	***************************************/
	28
	29	/* Just a sential for the entries of the diagonal matrix */
	30	#define ZSTD_EDIST_DIAG_MAX (S32)(1 << 30)
	31
	32	/* How large should a snake be to be considered a 'big' snake.
	33	* For an explanation of what a 'snake' is with respect to the
	34	* edit distance matrix, see the linked paper in zstd_edist.h */
	35	#define ZSTD_EDIST_SNAKE_THRESH 20
	36
	37	/* After how many iterations should we start to use the heuristic
	38	* based on 'big' snakes */
	39	#define ZSTD_EDIST_SNAKE_ITER_THRESH 200
	40
	41	/* After how many iterations should be just give up and take
	42	* the best available edit script for this round */
	43	#define ZSTD_EDIST_EXPENSIVE_THRESH 1024
	44
	45	/-************************************
	46	* Structures
	47	***************************************/
	48
	49	typedef struct {
	50	U32 dictIdx;
	51	U32 srcIdx;
	52	U32 matchLength;
	53	} ZSTD_eDist_match;
	54
	55	typedef struct {
	56	const BYTE* dict;
	57	const BYTE* src;
	58	size_t dictSize;
	59	size_t srcSize;
	60	S32* forwardDiag; /* Entries of the forward diagonal stored here */
	61	S32* backwardDiag; /* Entries of the backward diagonal stored here.
	62	* Note: this buffer and the 'forwardDiag' buffer
	63	* are contiguous. See the ZSTD_eDist_genSequences */
	64	ZSTD_eDist_match* matches; /* Accumulate matches of length 1 in this buffer.
	65	* In a subsequence post-processing step, we combine
	66	* contiguous matches. */
	67	U32 nbMatches;
	68	} ZSTD_eDist_state;
	69
	70	typedef struct {
	71	S32 dictMid; /* The mid diagonal for the dictionary */
	72	S32 srcMid; /* The mid diagonal for the source */
	73	int lowUseHeuristics; /* Should we use heuristics for the low part */
	74	int highUseHeuristics; /* Should we use heuristics for the high part */
	75	} ZSTD_eDist_partition;
	76
	77	/-************************************
	78	* Internal
	79	***************************************/
	80
	81	static void ZSTD_eDist_diag(ZSTD_eDist_state* state,
	82	ZSTD_eDist_partition* partition,
	83	S32 dictLow, S32 dictHigh, S32 srcLow,
	84	S32 srcHigh, int useHeuristics)
	85	{
	86	S32* const forwardDiag = state->forwardDiag;
	87	S32* const backwardDiag = state->backwardDiag;
	88	const BYTE* const dict = state->dict;
	89	const BYTE* const src = state->src;
	90
	91	S32 const diagMin = dictLow - srcHigh;
	92	S32 const diagMax = dictHigh - srcLow;
	93	S32 const forwardMid = dictLow - srcLow;
	94	S32 const backwardMid = dictHigh - srcHigh;
	95
	96	S32 forwardMin = forwardMid;
	97	S32 forwardMax = forwardMid;
	98	S32 backwardMin = backwardMid;
	99	S32 backwardMax = backwardMid;
	100	int odd = (forwardMid - backwardMid) & 1;
	101	U32 iterations;
	102
	103	forwardDiag[forwardMid] = dictLow;
	104	backwardDiag[backwardMid] = dictHigh;
	105
	106	/* Main loop for updating diag entries. Unless useHeuristics is
	107	* set to false, this loop will run until it finds the minimal
	108	* edit script */
	109	for (iterations = 1;;iterations++) {
	110	S32 diag;
	111	int bigSnake = 0;
	112
	113	if (forwardMin > diagMin) {
	114	forwardMin--;
	115	forwardDiag[forwardMin - 1] = -1;
	116	} else {
	117	forwardMin++;
	118	}
	119
	120	if (forwardMax < diagMax) {
	121	forwardMax++;
	122	forwardDiag[forwardMax + 1] = -1;
	123	} else {
	124	forwardMax--;
	125	}
	126
	127	for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
	128	S32 dictIdx;
	129	S32 srcIdx;
	130	S32 low = forwardDiag[diag - 1];
	131	S32 high = forwardDiag[diag + 1];
	132	S32 dictIdx0 = low < high ? high : low + 1;
	133
	134	for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag;
	135	dictIdx < dictHigh && srcIdx < srcHigh && dict[dictIdx] == src[srcIdx];
	136	dictIdx++, srcIdx++) continue;
	137
	138	if (dictIdx - dictIdx0 > ZSTD_EDIST_SNAKE_THRESH)
	139	bigSnake = 1;
	140
	141	forwardDiag[diag] = dictIdx;
	142
	143	if (odd && backwardMin <= diag && diag <= backwardMax && backwardDiag[diag] <= dictIdx) {
	144	partition->dictMid = dictIdx;
	145	partition->srcMid = srcIdx;
	146	partition->lowUseHeuristics = 0;
	147	partition->highUseHeuristics = 0;
	148	return;
	149	}
	150	}
	151
	152	if (backwardMin > diagMin) {
	153	backwardMin--;
	154	backwardDiag[backwardMin - 1] = ZSTD_EDIST_DIAG_MAX;
	155	} else {
	156	backwardMin++;
	157	}
	158
	159	if (backwardMax < diagMax) {
	160	backwardMax++;
	161	backwardDiag[backwardMax + 1] = ZSTD_EDIST_DIAG_MAX;
	162	} else {
	163	backwardMax--;
	164	}
	165
	166
	167	for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
	168	S32 dictIdx;
	169	S32 srcIdx;
	170	S32 low = backwardDiag[diag - 1];
	171	S32 high = backwardDiag[diag + 1];
	172	S32 dictIdx0 = low < high ? low : high - 1;
	173
	174	for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag;
	175	dictLow < dictIdx && srcLow < srcIdx && dict[dictIdx - 1] == src[srcIdx - 1];
	176	dictIdx--, srcIdx--) continue;
	177
	178	if (dictIdx0 - dictIdx > ZSTD_EDIST_SNAKE_THRESH)
	179	bigSnake = 1;
	180
	181	backwardDiag[diag] = dictIdx;
	182
	183	if (!odd && forwardMin <= diag && diag <= forwardMax && dictIdx <= forwardDiag[diag]) {
	184	partition->dictMid = dictIdx;
	185	partition->srcMid = srcIdx;
	186	partition->lowUseHeuristics = 0;
	187	partition->highUseHeuristics = 0;
	188	return;
	189	}
	190	}
	191
	192	if (!useHeuristics)
	193	continue;
	194
	195	/* Everything under this point is a heuristic. Using these will
	196	* substantially speed up the match finding. In some cases, taking
	197	* the total match finding time from several minutes to seconds.
	198	* Of course, the caveat is that the edit script found may no longer
	199	* be optimal */
	200
	201	/* Big snake heuristic */
	202	if (iterations > ZSTD_EDIST_SNAKE_ITER_THRESH && bigSnake) {
	203	{
	204	S32 best = 0;
	205
	206	for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
	207	S32 diagDiag = diag - forwardMid;
	208	S32 dictIdx = forwardDiag[diag];
	209	S32 srcIdx = dictIdx - diag;
	210	S32 v = (dictIdx - dictLow) * 2 - diagDiag;
	211
	212	if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
	213	if (v > best
	214	&& dictLow + ZSTD_EDIST_SNAKE_THRESH <= dictIdx && dictIdx <= dictHigh
	215	&& srcLow + ZSTD_EDIST_SNAKE_THRESH <= srcIdx && srcIdx <= srcHigh) {
	216	S32 k;
	217	for (k = 1; dict[dictIdx - k] == src[srcIdx - k]; k++) {
	218	if (k == ZSTD_EDIST_SNAKE_THRESH) {
	219	best = v;
	220	partition->dictMid = dictIdx;
	221	partition->srcMid = srcIdx;
	222	break;
	223	}
	224	}
	225	}
	226	}
	227	}
	228
	229	if (best > 0) {
	230	partition->lowUseHeuristics = 0;
	231	partition->highUseHeuristics = 1;
	232	return;
	233	}
	234	}
	235
	236	{
	237	S32 best = 0;
	238
	239	for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
	240	S32 diagDiag = diag - backwardMid;
	241	S32 dictIdx = backwardDiag[diag];
	242	S32 srcIdx = dictIdx - diag;
	243	S32 v = (dictHigh - dictIdx) * 2 + diagDiag;
	244
	245	if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
	246	if (v > best
	247	&& dictLow < dictIdx && dictIdx <= dictHigh - ZSTD_EDIST_SNAKE_THRESH
	248	&& srcLow < srcIdx && srcIdx <= srcHigh - ZSTD_EDIST_SNAKE_THRESH) {
	249	int k;
	250	for (k = 0; dict[dictIdx + k] == src[srcIdx + k]; k++) {
	251	if (k == ZSTD_EDIST_SNAKE_THRESH - 1) {
	252	best = v;
	253	partition->dictMid = dictIdx;
	254	partition->srcMid = srcIdx;
	255	break;
	256	}
	257	}
	258	}
	259	}
	260	}
	261
	262	if (best > 0) {
	263	partition->lowUseHeuristics = 1;
	264	partition->highUseHeuristics = 0;
	265	return;
	266	}
	267	}
	268	}
	269
	270	/* More general 'too expensive' heuristic */
	271	if (iterations >= ZSTD_EDIST_EXPENSIVE_THRESH) {
	272	S32 forwardDictSrcBest;
	273	S32 forwardDictBest = 0;
	274	S32 backwardDictSrcBest;
	275	S32 backwardDictBest = 0;
	276
	277	forwardDictSrcBest = -1;
	278	for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
	279	S32 dictIdx = MIN(forwardDiag[diag], dictHigh);
	280	S32 srcIdx = dictIdx - diag;
	281
	282	if (srcHigh < srcIdx) {
	283	dictIdx = srcHigh + diag;
	284	srcIdx = srcHigh;
	285	}
	286
	287	if (forwardDictSrcBest < dictIdx + srcIdx) {
	288	forwardDictSrcBest = dictIdx + srcIdx;
	289	forwardDictBest = dictIdx;
	290	}
	291	}
	292
	293	backwardDictSrcBest = ZSTD_EDIST_DIAG_MAX;
	294	for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
	295	S32 dictIdx = MAX(dictLow, backwardDiag[diag]);
	296	S32 srcIdx = dictIdx - diag;
	297
	298	if (srcIdx < srcLow) {
	299	dictIdx = srcLow + diag;
	300	srcIdx = srcLow;
	301	}
	302
	303	if (dictIdx + srcIdx < backwardDictSrcBest) {
	304	backwardDictSrcBest = dictIdx + srcIdx;
	305	backwardDictBest = dictIdx;
	306	}
	307	}
	308
	309	if ((dictHigh + srcHigh) - backwardDictSrcBest < forwardDictSrcBest - (dictLow + srcLow)) {
	310	partition->dictMid = forwardDictBest;
	311	partition->srcMid = forwardDictSrcBest - forwardDictBest;
	312	partition->lowUseHeuristics = 0;
	313	partition->highUseHeuristics = 1;
	314	} else {
	315	partition->dictMid = backwardDictBest;
	316	partition->srcMid = backwardDictSrcBest - backwardDictBest;
	317	partition->lowUseHeuristics = 1;
	318	partition->highUseHeuristics = 0;
	319	}
	320	return;
	321	}
	322	}
	323	}
	324
	325	static void ZSTD_eDist_insertMatch(ZSTD_eDist_state* state,
	326	S32 const dictIdx, S32 const srcIdx)
	327	{
	328	state->matches[state->nbMatches].dictIdx = dictIdx;
	329	state->matches[state->nbMatches].srcIdx = srcIdx;
	330	state->matches[state->nbMatches].matchLength = 1;
	331	state->nbMatches++;
	332	}
	333
	334	static int ZSTD_eDist_compare(ZSTD_eDist_state* state,
	335	S32 dictLow, S32 dictHigh, S32 srcLow,
	336	S32 srcHigh, int useHeuristics)
	337	{
	338	const BYTE* const dict = state->dict;
	339	const BYTE* const src = state->src;
	340
	341	/* Found matches while traversing from the low end */
	342	while (dictLow < dictHigh && srcLow < srcHigh && dict[dictLow] == src[srcLow]) {
	343	ZSTD_eDist_insertMatch(state, dictLow, srcLow);
	344	dictLow++;
	345	srcLow++;
	346	}
	347
	348	/* Found matches while traversing from the high end */
	349	while (dictLow < dictHigh && srcLow < srcHigh && dict[dictHigh - 1] == src[srcHigh - 1]) {
	350	ZSTD_eDist_insertMatch(state, dictHigh - 1, srcHigh - 1);
	351	dictHigh--;
	352	srcHigh--;
	353	}
	354
	355	/* If the low and high end end up touching. If we wanted to make
	356	* note of the differences like most diffing algorithms do, we would
	357	* do so here. In our case, we're only concerned with matches
	358	* Note: if you wanted to find the edit distance of the algorithm,
	359	* you could just accumulate the cost for an insertion/deletion
	360	* below. */
	361	if (dictLow == dictHigh) {
	362	while (srcLow < srcHigh) {
	363	/* Reaching this point means inserting src[srcLow] into
	364	* the current position of dict */
	365	srcLow++;
	366	}
	367	} else if (srcLow == srcHigh) {
	368	while (dictLow < dictHigh) {
	369	/* Reaching this point means deleting dict[dictLow] from
	370	* the current position of dict */
	371	dictLow++;
	372	}
	373	} else {
	374	ZSTD_eDist_partition partition;
	375	partition.dictMid = 0;
	376	partition.srcMid = 0;
	377	ZSTD_eDist_diag(state, &partition, dictLow, dictHigh,
	378	srcLow, srcHigh, useHeuristics);
	379	if (ZSTD_eDist_compare(state, dictLow, partition.dictMid,
	380	srcLow, partition.srcMid, partition.lowUseHeuristics))
	381	return 1;
	382	if (ZSTD_eDist_compare(state, partition.dictMid, dictHigh,
	383	partition.srcMid, srcHigh, partition.highUseHeuristics))
	384	return 1;
	385	}
	386
	387	return 0;
	388	}
	389
	390	static int ZSTD_eDist_matchComp(const void* p, const void* q)
	391	{
	392	S32 const l = ((ZSTD_eDist_match*)p)->srcIdx;
	393	S32 const r = ((ZSTD_eDist_match*)q)->srcIdx;
	394	return (l - r);
	395	}
	396
	397	/* The matches from the approach above will all be of the form
	398	* (dictIdx, srcIdx, 1). This method combines contiguous matches
	399	* of length MINMATCH or greater. Matches less than MINMATCH
	400	* are discarded */
	401	static void ZSTD_eDist_combineMatches(ZSTD_eDist_state* state)
	402	{
	403	/* Create a new buffer to put the combined matches into
	404	* and memcpy to state->matches after */
	405	ZSTD_eDist_match* combinedMatches =
	406	ZSTD_malloc(state->nbMatches * sizeof(ZSTD_eDist_match),
	407	ZSTD_defaultCMem);
	408
	409	U32 nbCombinedMatches = 1;
	410	size_t i;
	411
	412	/* Make sure that the srcIdx and dictIdx are in sorted order.
	413	* The combination step won't work otherwise */
	414	qsort(state->matches, state->nbMatches, sizeof(ZSTD_eDist_match), ZSTD_eDist_matchComp);
	415
	416	memcpy(combinedMatches, state->matches, sizeof(ZSTD_eDist_match));
	417	for (i = 1; i < state->nbMatches; i++) {
	418	ZSTD_eDist_match const match = state->matches[i];
	419	ZSTD_eDist_match const combinedMatch =
	420	combinedMatches[nbCombinedMatches - 1];
	421	if (combinedMatch.srcIdx + combinedMatch.matchLength == match.srcIdx &&
	422	combinedMatch.dictIdx + combinedMatch.matchLength == match.dictIdx) {
	423	combinedMatches[nbCombinedMatches - 1].matchLength++;
	424	} else {
	425	/* Discard matches that are less than MINMATCH */
	426	if (combinedMatches[nbCombinedMatches - 1].matchLength < MINMATCH) {
	427	nbCombinedMatches--;
	428	}
	429
	430	memcpy(combinedMatches + nbCombinedMatches,
	431	state->matches + i, sizeof(ZSTD_eDist_match));
	432	nbCombinedMatches++;
	433	}
	434	}
	435	memcpy(state->matches, combinedMatches, nbCombinedMatches * sizeof(ZSTD_eDist_match));
	436	state->nbMatches = nbCombinedMatches;
	437	ZSTD_free(combinedMatches, ZSTD_defaultCMem);
	438	}
	439
	440	static size_t ZSTD_eDist_convertMatchesToSequences(ZSTD_Sequence* sequences,
	441	ZSTD_eDist_state* state)
	442	{
	443	const ZSTD_eDist_match* matches = state->matches;
	444	size_t const nbMatches = state->nbMatches;
	445	size_t const dictSize = state->dictSize;
	446	size_t nbSequences = 0;
	447	size_t i;
	448	for (i = 0; i < nbMatches; i++) {
	449	ZSTD_eDist_match const match = matches[i];
	450	U32 const litLength = !i ? match.srcIdx :
	451	match.srcIdx - (matches[i - 1].srcIdx + matches[i - 1].matchLength);
	452	U32 const offset = (match.srcIdx + dictSize) - match.dictIdx;
	453	U32 const matchLength = match.matchLength;
	454	sequences[nbSequences].offset = offset;
	455	sequences[nbSequences].litLength = litLength;
	456	sequences[nbSequences].matchLength = matchLength;
	457	nbSequences++;
	458	}
	459	return nbSequences;
	460	}
	461
	462	/-************************************
	463	* Internal utils
	464	***************************************/
	465
	466	static size_t ZSTD_eDist_hamingDist(const BYTE* const a,
	467	const BYTE* const b, size_t n)
	468	{
	469	size_t i;
	470	size_t dist = 0;
	471	for (i = 0; i < n; i++)
	472	dist += a[i] != b[i];
	473	return dist;
	474	}
	475
	476	/* This is a pretty naive recursive implementation that should only
	477	* be used for quick tests obviously. Don't try and run this on a
	478	* GB file or something. There are faster implementations. Use those
	479	* if you need to run it for large files. */
	480	static size_t ZSTD_eDist_levenshteinDist(const BYTE* const s,
	481	size_t const sn, const BYTE* const t,
	482	size_t const tn)
	483	{
	484	size_t a, b, c;
	485
	486	if (!sn)
	487	return tn;
	488	if (!tn)
	489	return sn;
	490
	491	if (s[sn - 1] == t[tn - 1])
	492	return ZSTD_eDist_levenshteinDist(
	493	s, sn - 1, t, tn - 1);
	494
	495	a = ZSTD_eDist_levenshteinDist(s, sn - 1, t, tn - 1);
	496	b = ZSTD_eDist_levenshteinDist(s, sn, t, tn - 1);
	497	c = ZSTD_eDist_levenshteinDist(s, sn - 1, t, tn);
	498
	499	if (a > b)
	500	a = b;
	501	if (a > c)
	502	a = c;
	503
	504	return a + 1;
	505	}
	506
	507	static void ZSTD_eDist_validateMatches(ZSTD_eDist_match* matches,
	508	size_t const nbMatches, const BYTE* const dict,
	509	size_t const dictSize, const BYTE* const src,
	510	size_t const srcSize)
	511	{
	512	size_t i;
	513	for (i = 0; i < nbMatches; i++) {
	514	ZSTD_eDist_match match = matches[i];
	515	U32 const dictIdx = match.dictIdx;
	516	U32 const srcIdx = match.srcIdx;
	517	U32 const matchLength = match.matchLength;
	518
	519	assert(dictIdx + matchLength < dictSize);
	520	assert(srcIdx + matchLength < srcSize);
	521	assert(!memcmp(dict + dictIdx, src + srcIdx, matchLength));
	522	}
	523	}
	524
	525	/-************************************
	526	* API
	527	***************************************/
	528
	529	size_t ZSTD_eDist_genSequences(ZSTD_Sequence* sequences,
	530	const void* dict, size_t dictSize,
	531	const void* src, size_t srcSize,
	532	int useHeuristics)
	533	{
	534	size_t const nbDiags = dictSize + srcSize + 3;
	535	S32* buffer = ZSTD_malloc(nbDiags * 2 * sizeof(S32), ZSTD_defaultCMem);
	536	ZSTD_eDist_state state;
	537	size_t nbSequences = 0;
	538
	539	state.dict = (const BYTE*)dict;
	540	state.src = (const BYTE*)src;
	541	state.dictSize = dictSize;
	542	state.srcSize = srcSize;
	543	state.forwardDiag = buffer;
	544	state.backwardDiag = buffer + nbDiags;
	545	state.forwardDiag += srcSize + 1;
	546	state.backwardDiag += srcSize + 1;
	547	state.matches = ZSTD_malloc(srcSize * sizeof(ZSTD_eDist_match), ZSTD_defaultCMem);
	548	state.nbMatches = 0;
	549
	550	ZSTD_eDist_compare(&state, 0, dictSize, 0, srcSize, 1);
	551	ZSTD_eDist_combineMatches(&state);
	552	nbSequences = ZSTD_eDist_convertMatchesToSequences(sequences, &state);
	553
	554	ZSTD_free(buffer, ZSTD_defaultCMem);
	555	ZSTD_free(state.matches, ZSTD_defaultCMem);
	556
	557	return nbSequences;
	558	}