Commit | Line | Data |
---|---|---|
79bfeef6 PC |
1 | // AArch64 Logical Immediate Encoding and Decoding |
2 | // | |
3 | // I hereby place this code in the public domain, as per the terms of the | |
4 | // CC0 license: https://creativecommons.org/publicdomain/zero/1.0/ | |
5 | ||
6 | #include <stdint.h> | |
7 | #include <stdbool.h> | |
8 | ||
9 | static inline int nonzeroCountTrailingZeros64(uint64_t n) { | |
10 | return __builtin_ctzll(n); | |
11 | } | |
12 | ||
13 | static inline int countTrailingZeros64(uint64_t n) { | |
14 | return n ? nonzeroCountTrailingZeros64(n) : 64; | |
15 | } | |
16 | ||
17 | static inline int nonzeroCountLeadingZeros64(uint64_t n) { | |
18 | return __builtin_clzll(n); | |
19 | } | |
20 | ||
21 | static inline int nonzeroCountLeadingZeros32(uint32_t n) { | |
22 | return __builtin_clz(n); | |
23 | } | |
24 | ||
25 | static inline uint64_t rotateRight64(uint64_t v, int n) { | |
26 | // return __builtin_rotateright64(v, n); | |
27 | return (v >> (n & 63)) | (v << (-n & 63)); | |
28 | } | |
29 | ||
30 | static inline uint64_t clearTrailingOnes64(uint64_t n) { | |
31 | return n & (n + 1); | |
32 | } | |
33 | ||
34 | #define ENCODE_FAILED (-1) | |
35 | ||
36 | int encodeLogicalImmediate64(uint64_t val) { | |
37 | // Consider an ARM64 logical immediate as a pattern of "o" ones preceded | |
38 | // by "z" more-significant zeroes, repeated to fill a 64-bit integer. | |
39 | // o > 0, z > 0, and the size (o + z) is a power of two in [2,64]. This | |
40 | // part of the pattern is encoded in the fields "imms" and "N". | |
41 | // | |
42 | // "immr" encodes a further right rotate of the repeated pattern, allowing | |
43 | // a wide range of useful bitwise constants to be represented. | |
44 | // | |
45 | // (The spec describes the "immr" rotate as rotating the "o + z" bit | |
46 | // pattern before repeating it to fill 64-bits, but, as it's a repeating | |
47 | // pattern, rotating afterwards is equivalent.) | |
48 | ||
49 | // This encoding is not allowed to represent all-zero or all-one values. | |
50 | if (val == 0 || ~val == 0) | |
51 | return ENCODE_FAILED; | |
52 | ||
53 | // To detect an immediate that may be encoded in this scheme, we first | |
54 | // remove the right-rotate, by rotating such that the least significant | |
55 | // bit is a one and the most significant bit is a zero. | |
56 | // | |
57 | // We do this by clearing any trailing one bits, then counting the | |
58 | // trailing zeroes. This finds an "edge", where zero goes to one. | |
59 | // We then rotate the original value right by that amount, moving | |
60 | // the first one to the least significant bit. | |
61 | ||
62 | int rotation = countTrailingZeros64(clearTrailingOnes64(val)); | |
63 | uint64_t normalized = rotateRight64(val, rotation & 63); | |
64 | ||
65 | // Now we have normalized the value, and determined the rotation, we can | |
66 | // determine "z" by counting the leading zeroes, and "o" by counting the | |
67 | // trailing ones. (These will both be positive, as we already rejected 0 | |
68 | // and ~0, and rotated the value to start with a zero and end with a one.) | |
69 | ||
70 | int zeroes = nonzeroCountLeadingZeros64(normalized); | |
71 | int ones = nonzeroCountTrailingZeros64(~normalized); | |
72 | int size = zeroes + ones; | |
73 | ||
74 | // Detect the repeating pattern (by comparing every repetition to the | |
75 | // one next to it, using rotate). | |
76 | ||
77 | if (rotateRight64(val, size & 63) != val) | |
78 | return ENCODE_FAILED; | |
79 | ||
80 | // We do not need to further validate size to ensure it is a power of two | |
81 | // between 2 and 64. The only "minimal" patterns that can repeat to fill a | |
82 | // 64-bit value must have a length that is a factor of 64 (i.e. it is a | |
83 | // power of two in the range [1,64]). And our pattern cannot be of length | |
84 | // one (as we already rejected 0 and ~0). | |
85 | // | |
86 | // By "minimal" patterns I refer to patterns which do not themselves | |
87 | // contain repetitions. For example, '010101' is a non-minimal pattern of | |
88 | // a non-power-of-two length that can pass the above rotational test. It | |
89 | // consists of the minimal pattern '01'. All our patterns are minimal, as | |
90 | // they contain only one contiguous run of ones separated by at least one | |
91 | // zero. | |
92 | ||
93 | // Finally, we encode the values. "rotation" is the amount we rotated | |
94 | // right by to "undo" the right-rotate encoded in immr, so must be | |
95 | // negated. | |
96 | ||
97 | // size 2: N=0 immr=00000r imms=11110s | |
98 | // size 4: N=0 immr=0000rr imms=1110ss | |
99 | // size 8: N=0 immr=000rrr imms=110sss | |
100 | // size 16: N=0 immr=00rrrr imms=10ssss | |
101 | // size 32: N=0 immr=0rrrrr imms=0sssss | |
102 | // size 64: N=1 immr=rrrrrr imms=ssssss | |
103 | int immr = -rotation & (size - 1); | |
104 | int imms = -(size << 1) | (ones - 1); | |
105 | int N = (size >> 6); | |
106 | ||
107 | return (N << 12) | (immr << 6) | (imms & 0x3f); | |
108 | } | |
109 | ||
110 | int encodeLogicalImmediate32(uint32_t val) { | |
111 | return encodeLogicalImmediate64(((uint64_t)val << 32) | val); | |
112 | } | |
113 | ||
114 | // Decoding! | |
115 | ||
116 | bool isValidLogicalImmediate64(unsigned val) { | |
117 | unsigned N = (val >> 12) & 1; | |
118 | unsigned imms = val & 0x3f; | |
119 | unsigned pattern = (N << 6) | (~imms & 0x3f); | |
120 | return (pattern & (pattern - 1)) != 0; | |
121 | } | |
122 | ||
123 | bool isValidLogicalImmediate32(unsigned val) { | |
124 | unsigned N = (val >> 12) & 1; | |
125 | return N == 0 && isValidLogicalImmediate64(val); | |
126 | } | |
127 | ||
128 | #define DECODE_FAILED 0 | |
129 | ||
130 | // returns DECODE_FAILED (zero) if the encoding is invalid | |
131 | uint64_t decodeLogicalImmediate64(unsigned val) { | |
132 | // Fun way to generate the immediates with mask ^ (mask << S) | |
133 | static const uint64_t mask_lookup[] = { | |
134 | 0xffffffffffffffff, // size = 64 | |
135 | 0x00000000ffffffff, // size = 32 | |
136 | 0x0000ffff0000ffff, // size = 16 | |
137 | 0x00ff00ff00ff00ff, // size = 8 | |
138 | 0x0f0f0f0f0f0f0f0f, // size = 4 | |
139 | 0x3333333333333333, // size = 2 | |
140 | }; | |
141 | ||
142 | unsigned N = (val >> 12) & 1; | |
143 | int immr = (val >> 6) & 0x3f; | |
144 | unsigned imms = val & 0x3f; | |
145 | ||
146 | unsigned pattern = (N << 6) | (~imms & 0x3f); | |
147 | ||
148 | if (!(pattern & (pattern - 1))) return DECODE_FAILED; | |
149 | ||
150 | int leading_zeroes = nonzeroCountLeadingZeros32(pattern); | |
151 | unsigned imms_mask = 0x7fffffff >> leading_zeroes; | |
152 | uint64_t mask = mask_lookup[leading_zeroes - 25]; | |
153 | unsigned S = (imms + 1) & imms_mask; | |
154 | return rotateRight64(mask ^ (mask << S), immr); | |
155 | } | |
156 | ||
157 | uint32_t decodeLogicalImmediate32(unsigned val) { | |
158 | unsigned N = (val >> 12) & 1; | |
159 | if (N) return DECODE_FAILED; | |
160 | return (uint32_t)decodeLogicalImmediate64(val); | |
161 | } |