[pcsx_rearmed.git] / deps / lightning / lib / aarch64-logical-immediates.c

// AArch64 Logical Immediate Encoding and Decoding
//
// I hereby place this code in the public domain, as per the terms of the
// CC0 license: https://creativecommons.org/publicdomain/zero/1.0/

#include <stdint.h>
#include <stdbool.h>

static inline int nonzeroCountTrailingZeros64(uint64_t n) {
    return __builtin_ctzll(n);
}

static inline int countTrailingZeros64(uint64_t n) {
    return n ? nonzeroCountTrailingZeros64(n) : 64;
}

static inline int nonzeroCountLeadingZeros64(uint64_t n) {
    return __builtin_clzll(n);
}

static inline int nonzeroCountLeadingZeros32(uint32_t n) {
    return __builtin_clz(n);
}

static inline uint64_t rotateRight64(uint64_t v, int n) {
    // return __builtin_rotateright64(v, n);
    return (v >> (n & 63)) | (v << (-n & 63));
}

static inline uint64_t clearTrailingOnes64(uint64_t n) {
    return n & (n + 1);
}

#define ENCODE_FAILED (-1)

int encodeLogicalImmediate64(uint64_t val) {
    // Consider an ARM64 logical immediate as a pattern of "o" ones preceded
    // by "z" more-significant zeroes, repeated to fill a 64-bit integer.
    // o > 0, z > 0, and the size (o + z) is a power of two in [2,64]. This
    // part of the pattern is encoded in the fields "imms" and "N".
    //
    // "immr" encodes a further right rotate of the repeated pattern, allowing
    // a wide range of useful bitwise constants to be represented.
    //
    // (The spec describes the "immr" rotate as rotating the "o + z" bit
    // pattern before repeating it to fill 64-bits, but, as it's a repeating
    // pattern, rotating afterwards is equivalent.)

    // This encoding is not allowed to represent all-zero or all-one values.
    if (val == 0 || ~val == 0)
        return ENCODE_FAILED;

    // To detect an immediate that may be encoded in this scheme, we first
    // remove the right-rotate, by rotating such that the least significant
    // bit is a one and the most significant bit is a zero.
    //
    // We do this by clearing any trailing one bits, then counting the
    // trailing zeroes. This finds an "edge", where zero goes to one.
    // We then rotate the original value right by that amount, moving
    // the first one to the least significant bit.

    int rotation = countTrailingZeros64(clearTrailingOnes64(val));
    uint64_t normalized = rotateRight64(val, rotation & 63);

    // Now we have normalized the value, and determined the rotation, we can
    // determine "z" by counting the leading zeroes, and "o" by counting the
    // trailing ones. (These will both be positive, as we already rejected 0
    // and ~0, and rotated the value to start with a zero and end with a one.)

    int zeroes = nonzeroCountLeadingZeros64(normalized);
    int ones = nonzeroCountTrailingZeros64(~normalized);
    int size = zeroes + ones;

    // Detect the repeating pattern (by comparing every repetition to the
    // one next to it, using rotate).

    if (rotateRight64(val, size & 63) != val)
        return ENCODE_FAILED;

    // We do not need to further validate size to ensure it is a power of two
    // between 2 and 64. The only "minimal" patterns that can repeat to fill a
    // 64-bit value must have a length that is a factor of 64 (i.e. it is a
    // power of two in the range [1,64]). And our pattern cannot be of length
    // one (as we already rejected 0 and ~0).
    //
    // By "minimal" patterns I refer to patterns which do not themselves
    // contain repetitions. For example, '010101' is a non-minimal pattern of
    // a non-power-of-two length that can pass the above rotational test. It
    // consists of the minimal pattern '01'. All our patterns are minimal, as
    // they contain only one contiguous run of ones separated by at least one
    // zero.

    // Finally, we encode the values. "rotation" is the amount we rotated
    // right by to "undo" the right-rotate encoded in immr, so must be
    // negated.

    // size 2:  N=0 immr=00000r imms=11110s
    // size 4:  N=0 immr=0000rr imms=1110ss
    // size 8:  N=0 immr=000rrr imms=110sss
    // size 16: N=0 immr=00rrrr imms=10ssss
    // size 32: N=0 immr=0rrrrr imms=0sssss
    // size 64: N=1 immr=rrrrrr imms=ssssss
    int immr = -rotation & (size - 1);
    int imms = -(size << 1) | (ones - 1);
    int N = (size >> 6);

    return (N << 12) | (immr << 6) | (imms & 0x3f);
}

int encodeLogicalImmediate32(uint32_t val) {
    return encodeLogicalImmediate64(((uint64_t)val << 32) | val);
}

// Decoding!

bool isValidLogicalImmediate64(unsigned val) {
    unsigned N = (val >> 12) & 1;
    unsigned imms = val & 0x3f;
    unsigned pattern = (N << 6) | (~imms & 0x3f);
    return (pattern & (pattern - 1)) != 0;
}

bool isValidLogicalImmediate32(unsigned val) {
    unsigned N = (val >> 12) & 1;
    return N == 0 && isValidLogicalImmediate64(val);
}

#define DECODE_FAILED 0

// returns DECODE_FAILED (zero) if the encoding is invalid
uint64_t decodeLogicalImmediate64(unsigned val) {
    // Fun way to generate the immediates with mask ^ (mask << S)
    static const uint64_t mask_lookup[] = {
        0xffffffffffffffff, // size = 64
        0x00000000ffffffff, // size = 32
        0x0000ffff0000ffff, // size = 16
        0x00ff00ff00ff00ff, // size = 8
        0x0f0f0f0f0f0f0f0f, // size = 4
        0x3333333333333333, // size = 2
    };

    unsigned N = (val >> 12) & 1;
    int immr = (val >> 6) & 0x3f;
    unsigned imms = val & 0x3f;

    unsigned pattern = (N << 6) | (~imms & 0x3f);

    if (!(pattern & (pattern - 1))) return DECODE_FAILED;

    int leading_zeroes = nonzeroCountLeadingZeros32(pattern);
    unsigned imms_mask = 0x7fffffff >> leading_zeroes;
    uint64_t mask = mask_lookup[leading_zeroes - 25];
    unsigned S = (imms + 1) & imms_mask;
    return rotateRight64(mask ^ (mask << S), immr);
}

uint32_t decodeLogicalImmediate32(unsigned val) {
    unsigned N = (val >> 12) & 1;
    if (N) return DECODE_FAILED;
    return (uint32_t)decodeLogicalImmediate64(val);
}
Commit	Line	Data
79bfeef6 PC	1	// AArch64 Logical Immediate Encoding and Decoding
	2	//
	3	// I hereby place this code in the public domain, as per the terms of the
	4	// CC0 license: https://creativecommons.org/publicdomain/zero/1.0/
	5
	6	#include <stdint.h>
	7	#include <stdbool.h>
	8
	9	static inline int nonzeroCountTrailingZeros64(uint64_t n) {
	10	return __builtin_ctzll(n);
	11	}
	12
	13	static inline int countTrailingZeros64(uint64_t n) {
	14	return n ? nonzeroCountTrailingZeros64(n) : 64;
	15	}
	16
	17	static inline int nonzeroCountLeadingZeros64(uint64_t n) {
	18	return __builtin_clzll(n);
	19	}
	20
	21	static inline int nonzeroCountLeadingZeros32(uint32_t n) {
	22	return __builtin_clz(n);
	23	}
	24
	25	static inline uint64_t rotateRight64(uint64_t v, int n) {
	26	// return __builtin_rotateright64(v, n);
	27	return (v >> (n & 63)) \| (v << (-n & 63));
	28	}
	29
	30	static inline uint64_t clearTrailingOnes64(uint64_t n) {
	31	return n & (n + 1);
	32	}
	33
	34	#define ENCODE_FAILED (-1)
	35
	36	int encodeLogicalImmediate64(uint64_t val) {
	37	// Consider an ARM64 logical immediate as a pattern of "o" ones preceded
	38	// by "z" more-significant zeroes, repeated to fill a 64-bit integer.
	39	// o > 0, z > 0, and the size (o + z) is a power of two in [2,64]. This
	40	// part of the pattern is encoded in the fields "imms" and "N".
	41	//
	42	// "immr" encodes a further right rotate of the repeated pattern, allowing
	43	// a wide range of useful bitwise constants to be represented.
	44	//
	45	// (The spec describes the "immr" rotate as rotating the "o + z" bit
	46	// pattern before repeating it to fill 64-bits, but, as it's a repeating
	47	// pattern, rotating afterwards is equivalent.)
	48
	49	// This encoding is not allowed to represent all-zero or all-one values.
	50	if (val == 0 \|\| ~val == 0)
	51	return ENCODE_FAILED;
	52
	53	// To detect an immediate that may be encoded in this scheme, we first
	54	// remove the right-rotate, by rotating such that the least significant
	55	// bit is a one and the most significant bit is a zero.
	56	//
	57	// We do this by clearing any trailing one bits, then counting the
	58	// trailing zeroes. This finds an "edge", where zero goes to one.
	59	// We then rotate the original value right by that amount, moving
	60	// the first one to the least significant bit.
	61
	62	int rotation = countTrailingZeros64(clearTrailingOnes64(val));
	63	uint64_t normalized = rotateRight64(val, rotation & 63);
	64
65	// Now we have normalized the value, and determined the rotation, we can
66	// determine "z" by counting the leading zeroes, and "o" by counting the
67	// trailing ones. (These will both be positive, as we already rejected 0
68	// and ~0, and rotated the value to start with a zero and end with a one.)
69
70	int zeroes = nonzeroCountLeadingZeros64(normalized);
71	int ones = nonzeroCountTrailingZeros64(~normalized);
72	int size = zeroes + ones;
73
74	// Detect the repeating pattern (by comparing every repetition to the
75	// one next to it, using rotate).
76
77	if (rotateRight64(val, size & 63) != val)
78	return ENCODE_FAILED;
79
80	// We do not need to further validate size to ensure it is a power of two
81	// between 2 and 64. The only "minimal" patterns that can repeat to fill a
82	// 64-bit value must have a length that is a factor of 64 (i.e. it is a
83	// power of two in the range [1,64]). And our pattern cannot be of length
84	// one (as we already rejected 0 and ~0).
85	//
86	// By "minimal" patterns I refer to patterns which do not themselves
87	// contain repetitions. For example, '010101' is a non-minimal pattern of
88	// a non-power-of-two length that can pass the above rotational test. It
89	// consists of the minimal pattern '01'. All our patterns are minimal, as
90	// they contain only one contiguous run of ones separated by at least one
91	// zero.
92
93	// Finally, we encode the values. "rotation" is the amount we rotated
94	// right by to "undo" the right-rotate encoded in immr, so must be
95	// negated.
96
97	// size 2: N=0 immr=00000r imms=11110s
98	// size 4: N=0 immr=0000rr imms=1110ss
99	// size 8: N=0 immr=000rrr imms=110sss
100	// size 16: N=0 immr=00rrrr imms=10ssss
101	// size 32: N=0 immr=0rrrrr imms=0sssss
102	// size 64: N=1 immr=rrrrrr imms=ssssss
103	int immr = -rotation & (size - 1);
104	int imms = -(size << 1) \| (ones - 1);
105	int N = (size >> 6);
106
107	return (N << 12) \| (immr << 6) \| (imms & 0x3f);
108	}
109
110	int encodeLogicalImmediate32(uint32_t val) {
111	return encodeLogicalImmediate64(((uint64_t)val << 32) \| val);
112	}
113
114	// Decoding!
115
116	bool isValidLogicalImmediate64(unsigned val) {
117	unsigned N = (val >> 12) & 1;
118	unsigned imms = val & 0x3f;
119	unsigned pattern = (N << 6) \| (~imms & 0x3f);
120	return (pattern & (pattern - 1)) != 0;
121	}
122
123	bool isValidLogicalImmediate32(unsigned val) {
124	unsigned N = (val >> 12) & 1;
125	return N == 0 && isValidLogicalImmediate64(val);
126	}
127
128	#define DECODE_FAILED 0
129
130	// returns DECODE_FAILED (zero) if the encoding is invalid
131	uint64_t decodeLogicalImmediate64(unsigned val) {
132	// Fun way to generate the immediates with mask ^ (mask << S)
133	static const uint64_t mask_lookup[] = {
134	0xffffffffffffffff, // size = 64
135	0x00000000ffffffff, // size = 32
136	0x0000ffff0000ffff, // size = 16
137	0x00ff00ff00ff00ff, // size = 8
138	0x0f0f0f0f0f0f0f0f, // size = 4
139	0x3333333333333333, // size = 2
140	};
141
142	unsigned N = (val >> 12) & 1;
143	int immr = (val >> 6) & 0x3f;
144	unsigned imms = val & 0x3f;
145
146	unsigned pattern = (N << 6) \| (~imms & 0x3f);
147
148	if (!(pattern & (pattern - 1))) return DECODE_FAILED;
149
150	int leading_zeroes = nonzeroCountLeadingZeros32(pattern);
151	unsigned imms_mask = 0x7fffffff >> leading_zeroes;
152	uint64_t mask = mask_lookup[leading_zeroes - 25];
153	unsigned S = (imms + 1) & imms_mask;
154	return rotateRight64(mask ^ (mask << S), immr);
155	}
156
157	uint32_t decodeLogicalImmediate32(unsigned val) {
158	unsigned N = (val >> 12) & 1;
159	if (N) return DECODE_FAILED;
160	return (uint32_t)decodeLogicalImmediate64(val);
161	}