+ /* w0 = address, w1 = data, w2 = cycles */
+ ldr x3, [fp, #LO_mem_wtab]
+ mov w4, w0, lsr #12
+ ldr x3, [x3, w4, uxtw #3]
+ adds x3, x3, x3
+ bcs 4f
+ add x3, x0, x3
+ mov w0, w2
+ tbz x3, #1, 10f // & 2
+ tbz x3, #0, 2f // & 1
+3:
+ stur w1, [x3, #-3]
+ ret
+2:
+ lsr w2, w1, #8
+ lsr w1, w1, #24
+ sturh w2, [x3, #-2]
+ strb w1, [x3]
+ ret
+10:
+ tbz x3, #0, 0f // & 1
+1:
+ lsr w1, w1, #16
+ sturh w1, [x3, #-1]
+ ret
+0:
+ lsr w2, w1, #24
+ strb w2, [x3]
+ ret
+4:
+ mov w0, w2 // todo