-.global gteMVMVA_neon @ r0=CP2 (d,c), op
-gteMVMVA_neon:
- push {r4-r5,lr}
-
- add r12, r0, #4*32
-
- ubfx r2, r1, #15, #2 @ v
-
- vmov.i32 q0, #0 @ d0,d1
- vmov.i32 q1, #0 @ d2,d3
- vmov.i32 q2, #0 @ d4,d5
- cmp r2, #3
- addeq r4, r0, #4*9
- addne r3, r0, r2, lsl #3
- ldmeqia r4, {r3-r5}
- ldmneia r3, {r4,r5}
- pkhbteq r4, r3, r4, lsl #16
+@ note: non-std calling convention used
+@ r0 = CP2 (d,c) (must preserve)
+@ r1 = op
+@ r4,r5 = VXYZ(v) packed
+@ r6 = &MX11(mx)
+@ r7 = &CV1(cv)
+.global gteMVMVA_part_neon
+gteMVMVA_part_neon: