pld [r1]
mov r3, #0x07c0
vdup.16 q15, r3
+ tst r0, #8
+ beq 0f
+ @ align the dst
+ vld1.16 {d0}, [r1]!
+ sub r2, r2, #8
+ vshl.u16 d0, d0, #1
+ vshl.u16 d1, d0, #10
+ vsri.u16 d1, d0, #11
+ vbit d1, d0, d30
+ vst1.16 {d1}, [r0]!
+0:
subs r2, r2, #64
blt btr16_end64
0:
umull r12,r2, r3, r2
0:
pld [r1, #48*3]
- vld3.8 {d0-d2}, [r1, :64]!
- vld3.8 {d3-d5}, [r1, :64]!
+ vld3.8 {d0-d2}, [r1]!
+ vld3.8 {d3-d5}, [r1]!
vswp d0, d2
vswp d3, d5
vst3.8 {d0-d2}, [r0, :64]!
vdup.16 q15, r3
0:
pld [r1, #48*3]
- vld3.8 {d1-d3}, [r1, :64]!
- vld3.8 {d5-d7}, [r1, :64]!
+ vld3.8 {d1-d3}, [r1]!
+ vld3.8 {d5-d7}, [r1]!
vshll.u8 q8, d2, #3 @ g
vshll.u8 q9, d6, #3