| 1 | // AArch64 Logical Immediate Encoding and Decoding |
| 2 | // |
| 3 | // I hereby place this code in the public domain, as per the terms of the |
| 4 | // CC0 license: https://creativecommons.org/publicdomain/zero/1.0/ |
| 5 | |
| 6 | #include <stdint.h> |
| 7 | #include <stdbool.h> |
| 8 | |
| 9 | static inline int nonzeroCountTrailingZeros64(uint64_t n) { |
| 10 | return __builtin_ctzll(n); |
| 11 | } |
| 12 | |
| 13 | static inline int countTrailingZeros64(uint64_t n) { |
| 14 | return n ? nonzeroCountTrailingZeros64(n) : 64; |
| 15 | } |
| 16 | |
| 17 | static inline int nonzeroCountLeadingZeros64(uint64_t n) { |
| 18 | return __builtin_clzll(n); |
| 19 | } |
| 20 | |
| 21 | static inline int nonzeroCountLeadingZeros32(uint32_t n) { |
| 22 | return __builtin_clz(n); |
| 23 | } |
| 24 | |
| 25 | static inline uint64_t rotateRight64(uint64_t v, int n) { |
| 26 | // return __builtin_rotateright64(v, n); |
| 27 | return (v >> (n & 63)) | (v << (-n & 63)); |
| 28 | } |
| 29 | |
| 30 | static inline uint64_t clearTrailingOnes64(uint64_t n) { |
| 31 | return n & (n + 1); |
| 32 | } |
| 33 | |
| 34 | #define ENCODE_FAILED (-1) |
| 35 | |
| 36 | int encodeLogicalImmediate64(uint64_t val) { |
| 37 | // Consider an ARM64 logical immediate as a pattern of "o" ones preceded |
| 38 | // by "z" more-significant zeroes, repeated to fill a 64-bit integer. |
| 39 | // o > 0, z > 0, and the size (o + z) is a power of two in [2,64]. This |
| 40 | // part of the pattern is encoded in the fields "imms" and "N". |
| 41 | // |
| 42 | // "immr" encodes a further right rotate of the repeated pattern, allowing |
| 43 | // a wide range of useful bitwise constants to be represented. |
| 44 | // |
| 45 | // (The spec describes the "immr" rotate as rotating the "o + z" bit |
| 46 | // pattern before repeating it to fill 64-bits, but, as it's a repeating |
| 47 | // pattern, rotating afterwards is equivalent.) |
| 48 | |
| 49 | // This encoding is not allowed to represent all-zero or all-one values. |
| 50 | if (val == 0 || ~val == 0) |
| 51 | return ENCODE_FAILED; |
| 52 | |
| 53 | // To detect an immediate that may be encoded in this scheme, we first |
| 54 | // remove the right-rotate, by rotating such that the least significant |
| 55 | // bit is a one and the most significant bit is a zero. |
| 56 | // |
| 57 | // We do this by clearing any trailing one bits, then counting the |
| 58 | // trailing zeroes. This finds an "edge", where zero goes to one. |
| 59 | // We then rotate the original value right by that amount, moving |
| 60 | // the first one to the least significant bit. |
| 61 | |
| 62 | int rotation = countTrailingZeros64(clearTrailingOnes64(val)); |
| 63 | uint64_t normalized = rotateRight64(val, rotation & 63); |
| 64 | |
| 65 | // Now we have normalized the value, and determined the rotation, we can |
| 66 | // determine "z" by counting the leading zeroes, and "o" by counting the |
| 67 | // trailing ones. (These will both be positive, as we already rejected 0 |
| 68 | // and ~0, and rotated the value to start with a zero and end with a one.) |
| 69 | |
| 70 | int zeroes = nonzeroCountLeadingZeros64(normalized); |
| 71 | int ones = nonzeroCountTrailingZeros64(~normalized); |
| 72 | int size = zeroes + ones; |
| 73 | |
| 74 | // Detect the repeating pattern (by comparing every repetition to the |
| 75 | // one next to it, using rotate). |
| 76 | |
| 77 | if (rotateRight64(val, size & 63) != val) |
| 78 | return ENCODE_FAILED; |
| 79 | |
| 80 | // We do not need to further validate size to ensure it is a power of two |
| 81 | // between 2 and 64. The only "minimal" patterns that can repeat to fill a |
| 82 | // 64-bit value must have a length that is a factor of 64 (i.e. it is a |
| 83 | // power of two in the range [1,64]). And our pattern cannot be of length |
| 84 | // one (as we already rejected 0 and ~0). |
| 85 | // |
| 86 | // By "minimal" patterns I refer to patterns which do not themselves |
| 87 | // contain repetitions. For example, '010101' is a non-minimal pattern of |
| 88 | // a non-power-of-two length that can pass the above rotational test. It |
| 89 | // consists of the minimal pattern '01'. All our patterns are minimal, as |
| 90 | // they contain only one contiguous run of ones separated by at least one |
| 91 | // zero. |
| 92 | |
| 93 | // Finally, we encode the values. "rotation" is the amount we rotated |
| 94 | // right by to "undo" the right-rotate encoded in immr, so must be |
| 95 | // negated. |
| 96 | |
| 97 | // size 2: N=0 immr=00000r imms=11110s |
| 98 | // size 4: N=0 immr=0000rr imms=1110ss |
| 99 | // size 8: N=0 immr=000rrr imms=110sss |
| 100 | // size 16: N=0 immr=00rrrr imms=10ssss |
| 101 | // size 32: N=0 immr=0rrrrr imms=0sssss |
| 102 | // size 64: N=1 immr=rrrrrr imms=ssssss |
| 103 | int immr = -rotation & (size - 1); |
| 104 | int imms = -(size << 1) | (ones - 1); |
| 105 | int N = (size >> 6); |
| 106 | |
| 107 | return (N << 12) | (immr << 6) | (imms & 0x3f); |
| 108 | } |
| 109 | |
| 110 | int encodeLogicalImmediate32(uint32_t val) { |
| 111 | return encodeLogicalImmediate64(((uint64_t)val << 32) | val); |
| 112 | } |
| 113 | |
| 114 | // Decoding! |
| 115 | |
| 116 | bool isValidLogicalImmediate64(unsigned val) { |
| 117 | unsigned N = (val >> 12) & 1; |
| 118 | unsigned imms = val & 0x3f; |
| 119 | unsigned pattern = (N << 6) | (~imms & 0x3f); |
| 120 | return (pattern & (pattern - 1)) != 0; |
| 121 | } |
| 122 | |
| 123 | bool isValidLogicalImmediate32(unsigned val) { |
| 124 | unsigned N = (val >> 12) & 1; |
| 125 | return N == 0 && isValidLogicalImmediate64(val); |
| 126 | } |
| 127 | |
| 128 | #define DECODE_FAILED 0 |
| 129 | |
| 130 | // returns DECODE_FAILED (zero) if the encoding is invalid |
| 131 | uint64_t decodeLogicalImmediate64(unsigned val) { |
| 132 | // Fun way to generate the immediates with mask ^ (mask << S) |
| 133 | static const uint64_t mask_lookup[] = { |
| 134 | 0xffffffffffffffff, // size = 64 |
| 135 | 0x00000000ffffffff, // size = 32 |
| 136 | 0x0000ffff0000ffff, // size = 16 |
| 137 | 0x00ff00ff00ff00ff, // size = 8 |
| 138 | 0x0f0f0f0f0f0f0f0f, // size = 4 |
| 139 | 0x3333333333333333, // size = 2 |
| 140 | }; |
| 141 | |
| 142 | unsigned N = (val >> 12) & 1; |
| 143 | int immr = (val >> 6) & 0x3f; |
| 144 | unsigned imms = val & 0x3f; |
| 145 | |
| 146 | unsigned pattern = (N << 6) | (~imms & 0x3f); |
| 147 | |
| 148 | if (!(pattern & (pattern - 1))) return DECODE_FAILED; |
| 149 | |
| 150 | int leading_zeroes = nonzeroCountLeadingZeros32(pattern); |
| 151 | unsigned imms_mask = 0x7fffffff >> leading_zeroes; |
| 152 | uint64_t mask = mask_lookup[leading_zeroes - 25]; |
| 153 | unsigned S = (imms + 1) & imms_mask; |
| 154 | return rotateRight64(mask ^ (mask << S), immr); |
| 155 | } |
| 156 | |
| 157 | uint32_t decodeLogicalImmediate32(unsigned val) { |
| 158 | unsigned N = (val >> 12) & 1; |
| 159 | if (N) return DECODE_FAILED; |
| 160 | return (uint32_t)decodeLogicalImmediate64(val); |
| 161 | } |