[pcsx_rearmed.git] / deps / lightning / lib / aarch64-logical-immediates.c

// AArch64 Logical Immediate Encoding and Decoding
//
// I hereby place this code in the public domain, as per the terms of the
// CC0 license: https://creativecommons.org/publicdomain/zero/1.0/

#include <stdint.h>
#include <stdbool.h>

static inline int nonzeroCountTrailingZeros64(uint64_t n) {
    return __builtin_ctzll(n);
}

static inline int countTrailingZeros64(uint64_t n) {
    return n ? nonzeroCountTrailingZeros64(n) : 64;
}

static inline int nonzeroCountLeadingZeros64(uint64_t n) {
    return __builtin_clzll(n);
}

static inline int nonzeroCountLeadingZeros32(uint32_t n) {
    return __builtin_clz(n);
}

static inline uint64_t rotateRight64(uint64_t v, int n) {
    // return __builtin_rotateright64(v, n);
    return (v >> (n & 63)) | (v << (-n & 63));
}

static inline uint64_t clearTrailingOnes64(uint64_t n) {
    return n & (n + 1);
}

#define ENCODE_FAILED (-1)

int encodeLogicalImmediate64(uint64_t val) {
    // Consider an ARM64 logical immediate as a pattern of "o" ones preceded
    // by "z" more-significant zeroes, repeated to fill a 64-bit integer.
    // o > 0, z > 0, and the size (o + z) is a power of two in [2,64]. This
    // part of the pattern is encoded in the fields "imms" and "N".
    //
    // "immr" encodes a further right rotate of the repeated pattern, allowing
    // a wide range of useful bitwise constants to be represented.
    //
    // (The spec describes the "immr" rotate as rotating the "o + z" bit
    // pattern before repeating it to fill 64-bits, but, as it's a repeating
    // pattern, rotating afterwards is equivalent.)

    // This encoding is not allowed to represent all-zero or all-one values.
    if (val == 0 || ~val == 0)
        return ENCODE_FAILED;

    // To detect an immediate that may be encoded in this scheme, we first
    // remove the right-rotate, by rotating such that the least significant
    // bit is a one and the most significant bit is a zero.
    //
    // We do this by clearing any trailing one bits, then counting the
    // trailing zeroes. This finds an "edge", where zero goes to one.
    // We then rotate the original value right by that amount, moving
    // the first one to the least significant bit.

    int rotation = countTrailingZeros64(clearTrailingOnes64(val));
    uint64_t normalized = rotateRight64(val, rotation & 63);

    // Now we have normalized the value, and determined the rotation, we can
    // determine "z" by counting the leading zeroes, and "o" by counting the
    // trailing ones. (These will both be positive, as we already rejected 0
    // and ~0, and rotated the value to start with a zero and end with a one.)

    int zeroes = nonzeroCountLeadingZeros64(normalized);
    int ones = nonzeroCountTrailingZeros64(~normalized);
    int size = zeroes + ones;

    // Detect the repeating pattern (by comparing every repetition to the
    // one next to it, using rotate).

    if (rotateRight64(val, size & 63) != val)
        return ENCODE_FAILED;

    // We do not need to further validate size to ensure it is a power of two
    // between 2 and 64. The only "minimal" patterns that can repeat to fill a
    // 64-bit value must have a length that is a factor of 64 (i.e. it is a
    // power of two in the range [1,64]). And our pattern cannot be of length
    // one (as we already rejected 0 and ~0).
    //
    // By "minimal" patterns I refer to patterns which do not themselves
    // contain repetitions. For example, '010101' is a non-minimal pattern of
    // a non-power-of-two length that can pass the above rotational test. It
    // consists of the minimal pattern '01'. All our patterns are minimal, as
    // they contain only one contiguous run of ones separated by at least one
    // zero.

    // Finally, we encode the values. "rotation" is the amount we rotated
    // right by to "undo" the right-rotate encoded in immr, so must be
    // negated.

    // size 2:  N=0 immr=00000r imms=11110s
    // size 4:  N=0 immr=0000rr imms=1110ss
    // size 8:  N=0 immr=000rrr imms=110sss
    // size 16: N=0 immr=00rrrr imms=10ssss
    // size 32: N=0 immr=0rrrrr imms=0sssss
    // size 64: N=1 immr=rrrrrr imms=ssssss
    int immr = -rotation & (size - 1);
    int imms = -(size << 1) | (ones - 1);
    int N = (size >> 6);

    return (N << 12) | (immr << 6) | (imms & 0x3f);
}

int encodeLogicalImmediate32(uint32_t val) {
    return encodeLogicalImmediate64(((uint64_t)val << 32) | val);
}

// Decoding!

bool isValidLogicalImmediate64(unsigned val) {
    unsigned N = (val >> 12) & 1;
    unsigned imms = val & 0x3f;
    unsigned pattern = (N << 6) | (~imms & 0x3f);
    return (pattern & (pattern - 1)) != 0;
}

bool isValidLogicalImmediate32(unsigned val) {
    unsigned N = (val >> 12) & 1;
    return N == 0 && isValidLogicalImmediate64(val);
}

#define DECODE_FAILED 0

// returns DECODE_FAILED (zero) if the encoding is invalid
uint64_t decodeLogicalImmediate64(unsigned val) {
    // Fun way to generate the immediates with mask ^ (mask << S)
    static const uint64_t mask_lookup[] = {
        0xffffffffffffffff, // size = 64
        0x00000000ffffffff, // size = 32
        0x0000ffff0000ffff, // size = 16
        0x00ff00ff00ff00ff, // size = 8
        0x0f0f0f0f0f0f0f0f, // size = 4
        0x3333333333333333, // size = 2
    };

    unsigned N = (val >> 12) & 1;
    int immr = (val >> 6) & 0x3f;
    unsigned imms = val & 0x3f;

    unsigned pattern = (N << 6) | (~imms & 0x3f);

    if (!(pattern & (pattern - 1))) return DECODE_FAILED;

    int leading_zeroes = nonzeroCountLeadingZeros32(pattern);
    unsigned imms_mask = 0x7fffffff >> leading_zeroes;
    uint64_t mask = mask_lookup[leading_zeroes - 25];
    unsigned S = (imms + 1) & imms_mask;
    return rotateRight64(mask ^ (mask << S), immr);
}

uint32_t decodeLogicalImmediate32(unsigned val) {
    unsigned N = (val >> 12) & 1;
    if (N) return DECODE_FAILED;
    return (uint32_t)decodeLogicalImmediate64(val);
}
Commit	Line	Data
	1	// AArch64 Logical Immediate Encoding and Decoding
	2	//
	3	// I hereby place this code in the public domain, as per the terms of the
	4	// CC0 license: https://creativecommons.org/publicdomain/zero/1.0/
	5
	6	#include <stdint.h>
	7	#include <stdbool.h>
	8
	9	static inline int nonzeroCountTrailingZeros64(uint64_t n) {
	10	return __builtin_ctzll(n);
	11	}
	12
	13	static inline int countTrailingZeros64(uint64_t n) {
	14	return n ? nonzeroCountTrailingZeros64(n) : 64;
	15	}
	16
	17	static inline int nonzeroCountLeadingZeros64(uint64_t n) {
	18	return __builtin_clzll(n);
	19	}
	20
	21	static inline int nonzeroCountLeadingZeros32(uint32_t n) {
	22	return __builtin_clz(n);
	23	}
	24
	25	static inline uint64_t rotateRight64(uint64_t v, int n) {
	26	// return __builtin_rotateright64(v, n);
	27	return (v >> (n & 63)) \| (v << (-n & 63));
	28	}
	29
	30	static inline uint64_t clearTrailingOnes64(uint64_t n) {
	31	return n & (n + 1);
	32	}
	33
	34	#define ENCODE_FAILED (-1)
	35
	36	int encodeLogicalImmediate64(uint64_t val) {
	37	// Consider an ARM64 logical immediate as a pattern of "o" ones preceded
	38	// by "z" more-significant zeroes, repeated to fill a 64-bit integer.
	39	// o > 0, z > 0, and the size (o + z) is a power of two in [2,64]. This
	40	// part of the pattern is encoded in the fields "imms" and "N".
	41	//
	42	// "immr" encodes a further right rotate of the repeated pattern, allowing
	43	// a wide range of useful bitwise constants to be represented.
	44	//
	45	// (The spec describes the "immr" rotate as rotating the "o + z" bit
	46	// pattern before repeating it to fill 64-bits, but, as it's a repeating
	47	// pattern, rotating afterwards is equivalent.)
	48
	49	// This encoding is not allowed to represent all-zero or all-one values.
	50	if (val == 0 \|\| ~val == 0)
	51	return ENCODE_FAILED;
	52
	53	// To detect an immediate that may be encoded in this scheme, we first
	54	// remove the right-rotate, by rotating such that the least significant
	55	// bit is a one and the most significant bit is a zero.
	56	//
	57	// We do this by clearing any trailing one bits, then counting the
	58	// trailing zeroes. This finds an "edge", where zero goes to one.
	59	// We then rotate the original value right by that amount, moving
	60	// the first one to the least significant bit.
	61
	62	int rotation = countTrailingZeros64(clearTrailingOnes64(val));
	63	uint64_t normalized = rotateRight64(val, rotation & 63);
	64
	65	// Now we have normalized the value, and determined the rotation, we can
	66	// determine "z" by counting the leading zeroes, and "o" by counting the
	67	// trailing ones. (These will both be positive, as we already rejected 0
	68	// and ~0, and rotated the value to start with a zero and end with a one.)
	69
	70	int zeroes = nonzeroCountLeadingZeros64(normalized);
	71	int ones = nonzeroCountTrailingZeros64(~normalized);
	72	int size = zeroes + ones;
	73
	74	// Detect the repeating pattern (by comparing every repetition to the
	75	// one next to it, using rotate).
	76
	77	if (rotateRight64(val, size & 63) != val)
	78	return ENCODE_FAILED;
	79
	80	// We do not need to further validate size to ensure it is a power of two
	81	// between 2 and 64. The only "minimal" patterns that can repeat to fill a
	82	// 64-bit value must have a length that is a factor of 64 (i.e. it is a
	83	// power of two in the range [1,64]). And our pattern cannot be of length
	84	// one (as we already rejected 0 and ~0).
	85	//
	86	// By "minimal" patterns I refer to patterns which do not themselves
	87	// contain repetitions. For example, '010101' is a non-minimal pattern of
	88	// a non-power-of-two length that can pass the above rotational test. It
	89	// consists of the minimal pattern '01'. All our patterns are minimal, as
	90	// they contain only one contiguous run of ones separated by at least one
	91	// zero.
	92
	93	// Finally, we encode the values. "rotation" is the amount we rotated
	94	// right by to "undo" the right-rotate encoded in immr, so must be
	95	// negated.
	96
	97	// size 2: N=0 immr=00000r imms=11110s
	98	// size 4: N=0 immr=0000rr imms=1110ss
	99	// size 8: N=0 immr=000rrr imms=110sss
	100	// size 16: N=0 immr=00rrrr imms=10ssss
	101	// size 32: N=0 immr=0rrrrr imms=0sssss
	102	// size 64: N=1 immr=rrrrrr imms=ssssss
	103	int immr = -rotation & (size - 1);
	104	int imms = -(size << 1) \| (ones - 1);
	105	int N = (size >> 6);
	106
	107	return (N << 12) \| (immr << 6) \| (imms & 0x3f);
	108	}
	109
	110	int encodeLogicalImmediate32(uint32_t val) {
	111	return encodeLogicalImmediate64(((uint64_t)val << 32) \| val);
	112	}
	113
	114	// Decoding!
	115
	116	bool isValidLogicalImmediate64(unsigned val) {
	117	unsigned N = (val >> 12) & 1;
	118	unsigned imms = val & 0x3f;
	119	unsigned pattern = (N << 6) \| (~imms & 0x3f);
	120	return (pattern & (pattern - 1)) != 0;
	121	}
	122
	123	bool isValidLogicalImmediate32(unsigned val) {
	124	unsigned N = (val >> 12) & 1;
	125	return N == 0 && isValidLogicalImmediate64(val);
	126	}
	127
	128	#define DECODE_FAILED 0
	129
	130	// returns DECODE_FAILED (zero) if the encoding is invalid
	131	uint64_t decodeLogicalImmediate64(unsigned val) {
	132	// Fun way to generate the immediates with mask ^ (mask << S)
	133	static const uint64_t mask_lookup[] = {
	134	0xffffffffffffffff, // size = 64
	135	0x00000000ffffffff, // size = 32
	136	0x0000ffff0000ffff, // size = 16
	137	0x00ff00ff00ff00ff, // size = 8
	138	0x0f0f0f0f0f0f0f0f, // size = 4
	139	0x3333333333333333, // size = 2
	140	};
	141
	142	unsigned N = (val >> 12) & 1;
	143	int immr = (val >> 6) & 0x3f;
	144	unsigned imms = val & 0x3f;
	145
	146	unsigned pattern = (N << 6) \| (~imms & 0x3f);
	147
	148	if (!(pattern & (pattern - 1))) return DECODE_FAILED;
	149
	150	int leading_zeroes = nonzeroCountLeadingZeros32(pattern);
	151	unsigned imms_mask = 0x7fffffff >> leading_zeroes;
	152	uint64_t mask = mask_lookup[leading_zeroes - 25];
	153	unsigned S = (imms + 1) & imms_mask;
	154	return rotateRight64(mask ^ (mask << S), immr);
	155	}
	156
	157	uint32_t decodeLogicalImmediate32(unsigned val) {
	158	unsigned N = (val >> 12) & 1;
	159	if (N) return DECODE_FAILED;
	160	return (uint32_t)decodeLogicalImmediate64(val);
	161	}