- asm ("sub %[diff], %[uDst], %[uSrc]\n\t"
- "add %[diff], %[diff], %[mask]\n\t"
- "eor %[mix], %[uDst], %[uSrc]\n\t"
- "and %[mix], %[mix], %[mask]\n\t"
- "sub %[mix], %[diff], %[mix]\n\t"
- "and %[mix], %[mix], %[mask]\n\t"
- "sub %[diff], %[diff], %[mix]\n\t"
- "sub %[mix], %[mix], %[mix], lsr #0x05\n\t"
- "and %[mix], %[diff], %[mix]"
- : [diff] "=&r" (diff), [mix] "=&r" (mix)
- : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x8420));
+ asm ("sub %[diff], %[uDst], %[uSrc]\n\t" // uDst - uSrc
+ "add %[diff], %[diff], %[mask]\n\t" // diff = ... + 0x8420
+ "eor %[mix], %[uDst], %[uSrc]\n\t" // uDst ^ uSrc
+ "and %[mix], %[mix], %[mask]\n\t" // low_bits = ... & 0x8420
+ "sub %[mix], %[diff], %[mix]\n\t" // diff - low_bits
+ "and %[mix], %[mix], %[mask]\n\t" // borrows = ... & 0x8420
+ "sub %[diff], %[diff], %[mix]\n\t" // modulo = diff - borrows
+ "sub %[mix], %[mix], %[mix], lsr #0x05\n\t" // clamp = borrows - (borrows >> 5)
+ "and %[mix], %[diff], %[mix]" // mix = modulo & clamp
+ : [diff] "=&r" (diff), [mix] "=&r" (mix)
+ : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x8420));
+ }
+
+ // There's not a case where we can get into this function,
+ // SKIP_USRC_MSB_MASK is false, and the msb of uSrc is unset.
+ if (!SKIP_USRC_MSB_MASK) {
+ asm ("orr %[mix], %[mix], #0x8000" : [mix] "+r" (mix));