59774ed0 |
1 | /* |
2 | * (C) GraÅžvydas "notaz" Ignotas, 2011 |
3 | * |
0c2ca3ba |
4 | * This work is licensed under the terms of GNU GPL version 2 or later. |
59774ed0 |
5 | * See the COPYING file in the top-level directory. |
6 | */ |
7 | |
8 | /* .equiv HAVE_ARMV7, 1 */ |
9 | |
10 | .text |
11 | .align 2 |
12 | |
7c621bf0 |
13 | .macro sgnxt16 rd rs |
59774ed0 |
14 | .if HAVE_ARMV7 |
7c621bf0 |
15 | sxth \rd, \rs |
59774ed0 |
16 | .else |
7c621bf0 |
17 | lsl \rd, \rs, #16 |
0c2ca3ba |
18 | asr \rd, \rd, #16 |
59774ed0 |
19 | .endif |
20 | .endm |
21 | |
0c2ca3ba |
22 | @ prepare work reg for ssatx |
23 | @ in: wr reg, bit to saturate to |
24 | .macro ssatx_prep wr bit |
25 | .if !HAVE_ARMV7 |
26 | mov \wr, #(1<<(\bit-1)) |
27 | .endif |
28 | .endm |
29 | |
30 | .macro ssatx rd wr bit |
31 | .if HAVE_ARMV7 |
32 | ssat \rd, #\bit, \rd |
33 | .else |
34 | cmp \rd, \wr |
35 | subge \rd, \wr, #1 |
36 | cmn \rd, \wr |
37 | rsblt \rd, \wr, #0 |
38 | .endif |
39 | .endm |
40 | |
054175e9 |
41 | @ prepare work reg for ssatx0 (sat to 0..2^(bit-1)) |
42 | @ in: wr reg, bit to saturate to |
43 | .macro ssatx0_prep wr bit |
44 | mov \wr, #(1<<(\bit-1)) |
45 | .endm |
46 | |
47 | .macro ssatx0 rd wr bit |
48 | cmp \rd, \wr |
49 | subge \rd, \wr, #1 |
50 | cmn \rd, #0 |
51 | movlt \rd, #0 |
52 | .endm |
53 | |
0c2ca3ba |
54 | .macro usat16_ rd rs |
55 | .if HAVE_ARMV7 |
56 | usat \rd, #16, \rs |
57 | .else |
58 | subs \rd, \rs, #0 |
59 | movlt \rd, #0 |
60 | cmp \rd, #0x10000 |
61 | movge \rd, #0x0ff00 |
62 | orrge \rd, #0x000ff |
63 | .endif |
64 | .endm |
65 | |
7c621bf0 |
66 | .macro udiv_ rd rm rs |
67 | lsl \rm, #16 |
0c2ca3ba |
68 | clz \rd, \rs |
69 | lsl \rs, \rs, \rd @ shift up divisor |
70 | orr \rd, \rd, #1<<31 |
71 | lsr \rd, \rd, \rd |
72 | 0: |
73 | cmp \rm, \rs |
74 | subcs \rm, \rs |
75 | adcs \rd, \rd, \rd |
76 | lsr \rs, #1 |
77 | bcc 0b |
78 | .endm |
79 | |
7c621bf0 |
80 | .macro newton_step rcp den zero t1 t2 |
81 | umull \t2, \t1, \den, \rcp @ \t2 is dummy |
82 | sub \t1, \zero, \t1, lsl #2 |
83 | smlal \t2, \rcp, \t1, \rcp |
84 | .endm |
85 | |
86 | .macro udiv_newton rd rm rs t1 t2 t3 t4 |
87 | lsl \rd, \rm, #16 |
88 | clz \t1, \rs |
89 | mov \t2, #0 |
90 | lsl \rs, \t1 @ normalize for the algo |
91 | mov \rm, #0x4d000000 @ initial estimate ~1.2 |
92 | |
93 | newton_step \rm, \rs, \t2, \t3, \t4 |
94 | newton_step \rm, \rs, \t2, \t3, \t4 |
95 | newton_step \rm, \rs, \t2, \t3, \t4 |
96 | newton_step \rm, \rs, \t2, \t3, \t4 |
97 | |
98 | umull \t4, \rd, \rm, \rd |
99 | rsb \t2, \t1, #30 @ here t1 is 1..15 |
100 | mov \rd, \rd, lsr \t2 |
101 | .endm |
102 | |
103 | @ unsigned divide rd = rm / rs; 16.16 result |
104 | @ no div by 0 check |
105 | @ in: rm, rs |
106 | @ trash: rm rs t* |
107 | .macro udiv rd rm rs t1 t2 t3 t4 |
108 | @udiv_ \rd, \rm, \rs |
109 | udiv_newton \rd, \rm, \rs, \t1, \t2, \t3, \t4 |
110 | .endm |
0c2ca3ba |
111 | |
112 | @ calculate RTPS/RTPT MAC values |
113 | @ in: r0 context, r8,r9 VXYZ |
114 | @ out: r10-r12 MAC123 |
115 | @ trash: r1-r7 |
116 | .macro do_rtpx_mac |
117 | add r1, r0, #4*32 |
118 | add r2, r0, #4*(32+5) @ gteTRX |
119 | ldmia r1!,{r5-r7} @ gteR1*,gteR2* |
120 | ldmia r2, {r10-r12} |
121 | smulbb r2, r5, r8 @ gteR11 * gteVX0 |
122 | smultt r3, r5, r8 @ gteR12 * gteVY0 |
123 | smulbb r4, r6, r9 @ gteR13 * gteVZ0 |
124 | qadd r2, r2, r3 |
125 | asr r4, r4, #1 @ prevent oflow, lose a bit |
126 | add r3, r4, r2, asr #1 |
127 | add r10,r10,r3, asr #11 @ gteMAC1 |
128 | smultb r2, r6, r8 @ gteR21 * gteVX0 |
129 | smulbt r3, r7, r8 @ gteR22 * gteVY0 |
130 | smultb r4, r7, r9 @ gteR23 * gteVZ0 |
131 | ldmia r1!,{r5-r6} @ gteR3* |
132 | qadd r2, r2, r3 |
133 | asr r4, r4, #1 |
134 | add r3, r4, r2, asr #1 |
135 | add r11,r11,r3, asr #11 @ gteMAC2 |
136 | @ be more accurate for gteMAC3, since it's also a divider |
137 | smulbb r2, r5, r8 @ gteR31 * gteVX0 |
138 | smultt r3, r5, r8 @ gteR32 * gteVY0 |
139 | smulbb r4, r6, r9 @ gteR33 * gteVZ0 |
140 | qadd r2, r2, r3 |
141 | asr r3, r4, #31 @ expand to 64bit |
142 | adds r1, r2, r4 |
143 | adc r3, r2, asr #31 @ 64bit sum in r3,r1 |
144 | add r12,r12,r3, lsl #20 |
145 | add r12,r12,r1, lsr #12 @ gteMAC3 |
146 | .endm |
147 | |
148 | |
149 | .global gteRTPS_nf_arm @ r0=CP2 (d,c), |
150 | gteRTPS_nf_arm: |
151 | push {r4-r11,lr} |
152 | |
153 | ldmia r0, {r8,r9} @ VXYZ(0) |
154 | do_rtpx_mac |
155 | add r1, r0, #4*25 @ gteMAC1 |
156 | add r2, r0, #4*17 @ gteSZ1 |
157 | stmia r1, {r10-r12} @ gteMAC123 save |
158 | ldmia r2, {r3-r5} |
159 | add r1, r0, #4*16 @ gteSZ0 |
160 | add r2, r0, #4*9 @ gteIR1 |
161 | ssatx_prep r6, 16 |
162 | usat16_ lr, r12 @ limD |
163 | ssatx r10,r6, 16 |
164 | ssatx r11,r6, 16 |
165 | ssatx r12,r6, 16 |
166 | stmia r1, {r3-r5,lr} @ gteSZ* |
167 | ldr r3, [r0,#4*(32+26)] @ gteH |
168 | stmia r2, {r10,r11,r12} @ gteIR123 save |
169 | cmp r3, lr, lsl #1 @ gteH < gteSZ3*2 ? |
170 | mov r9, #1<<30 |
171 | bhs 1f |
172 | .if 1 |
7c621bf0 |
173 | udiv r9, r3, lr, r1, r2, r6, r7 |
0c2ca3ba |
174 | .else |
175 | push {r0, r12} |
176 | mov r0, r3 |
177 | mov r1, lr |
178 | bl DIVIDE |
179 | mov r9, r0 |
180 | pop {r0, r12} |
181 | .endif |
182 | 1: |
183 | ldrd r6, [r0,#4*(32+24)] @ gteOFXY |
184 | cmp r9, #0x20000 |
185 | add r1, r0, #4*12 @ gteSXY0 |
186 | movhs r9, #0x20000 |
187 | ldmia r1, {r2-r4} |
188 | /* quotient */ subhs r9, #1 |
189 | mov r2, #0 |
190 | smlal r6, r2, r10, r9 |
191 | stmia r1!,{r3,r4} @ shift gteSXY |
192 | mov r3, #0 |
193 | smlal r7, r3, r11, r9 |
194 | lsr r6, #16 |
195 | /* gteDQA, gteDQB */ ldrd r10,[r0, #4*(32+27)] |
196 | orr r6, r2, lsl #16 @ (gteOFX + gteIR1 * q) >> 16 |
197 | ssatx_prep r2, 11 |
198 | lsr r7, #16 |
199 | /* gteDQB + gteDQA * q */ mla r4, r10, r9, r11 |
200 | orr r7, r3, lsl #16 @ (gteOFY + gteIR2 * q) >> 16 |
201 | ssatx r6, r2, 11 @ gteSX2 |
202 | ssatx r7, r2, 11 @ gteSY2 |
203 | strh r6, [r1] |
204 | strh r7, [r1, #2] |
205 | str r4, [r0,#4*24] @ gteMAC0 |
206 | asrs r4, #12 |
207 | movmi r4, #0 |
208 | cmp r4, #0x1000 @ limH |
209 | movgt r4, #0x1000 |
210 | str r4, [r0,#4*8] @ gteIR0 |
211 | |
212 | pop {r4-r11,pc} |
213 | .size gteRTPS_nf_arm, .-gteRTPS_nf_arm |
214 | |
215 | |
216 | .global gteRTPT_nf_arm @ r0=CP2 (d,c), |
217 | gteRTPT_nf_arm: |
218 | ldr r1, [r0, #4*19] @ gteSZ3 |
219 | push {r4-r11,lr} |
220 | str r1, [r0, #4*16] @ gteSZ0 |
221 | mov lr, #0 |
222 | |
223 | rtpt_arm_loop: |
224 | add r1, r0, lr, lsl #1 |
225 | ldrd r8, [r1] @ VXYZ(v) |
226 | do_rtpx_mac |
227 | |
228 | ssatx_prep r6, 16 |
229 | usat16_ r2, r12 @ limD |
230 | add r1, r0, #4*25 @ gteMAC1 |
231 | ldr r3, [r0,#4*(32+26)] @ gteH |
232 | stmia r1, {r10-r12} @ gteMAC123 save |
233 | add r1, r0, #4*17 |
234 | ssatx r10,r6, 16 |
235 | ssatx r11,r6, 16 |
236 | ssatx r12,r6, 16 |
237 | str r2, [r1, lr] @ fSZ(v) |
238 | cmp r3, r2, lsl #1 @ gteH < gteSZ3*2 ? |
239 | mov r9, #1<<30 |
240 | bhs 1f |
241 | .if 1 |
7c621bf0 |
242 | udiv r9, r3, r2, r1, r4, r6, r7 |
0c2ca3ba |
243 | .else |
244 | push {r0, r12, lr} |
245 | mov r0, r3 |
246 | mov r1, r2 |
247 | bl DIVIDE |
248 | mov r9, r0 |
249 | pop {r0, r12, lr} |
250 | .endif |
054175e9 |
251 | 1: cmp r9, #0x20000 |
0c2ca3ba |
252 | add r1, r0, #4*12 |
253 | movhs r9, #0x20000 |
254 | ldrd r6, [r0,#4*(32+24)] @ gteOFXY |
255 | /* quotient */ subhs r9, #1 |
256 | mov r2, #0 |
257 | smlal r6, r2, r10, r9 |
258 | mov r3, #0 |
259 | smlal r7, r3, r11, r9 |
260 | lsr r6, #16 |
261 | orr r6, r2, lsl #16 @ (gteOFX + gteIR1 * q) >> 16 |
262 | ssatx_prep r2, 11 |
263 | lsr r7, #16 |
264 | orr r7, r3, lsl #16 @ (gteOFY + gteIR2 * q) >> 16 |
265 | ssatx r6, r2, 11 @ gteSX(v) |
266 | ssatx r7, r2, 11 @ gteSY(v) |
267 | strh r6, [r1, lr]! |
268 | add lr, #4 |
269 | strh r7, [r1, #2] |
270 | cmp lr, #12 |
271 | blt rtpt_arm_loop |
272 | |
273 | ldrd r4, [r0, #4*(32+27)] @ gteDQA, gteDQB |
274 | add r1, r0, #4*9 @ gteIR1 |
275 | mla r3, r4, r9, r5 @ gteDQB + gteDQA * q |
276 | stmia r1, {r10,r11,r12} @ gteIR123 save |
277 | |
278 | str r3, [r0,#4*24] @ gteMAC0 |
279 | asrs r3, #12 |
280 | movmi r3, #0 |
281 | cmp r3, #0x1000 @ limH |
282 | movgt r3, #0x1000 |
283 | str r3, [r0,#4*8] @ gteIR0 |
284 | |
285 | pop {r4-r11,pc} |
286 | .size gteRTPT_nf_arm, .-gteRTPT_nf_arm |
287 | |
59774ed0 |
288 | |
054175e9 |
289 | @ note: not std calling convention used |
290 | @ r0 = CP2 (d,c) (must preserve) |
291 | @ r1 = needs_shift12 |
292 | @ r4,r5 = VXYZ(v) packed |
293 | @ r6 = &MX11(mx) |
294 | @ r7 = &CV1(cv) |
295 | .macro mvma_op do_flags |
296 | push {r8-r11} |
297 | |
298 | .if \do_flags |
299 | ands r3, r1, #1 @ gteFLAG, shift_need |
300 | .else |
301 | tst r1, #1 |
302 | .endif |
303 | ldmia r7, {r7-r9} @ CV123 |
304 | ldmia r6!,{r10-r12} @ MX1*,MX2* |
305 | asr r1, r7, #20 |
306 | lsl r7, #12 @ expand to 64bit |
307 | smlalbb r7, r1, r10, r4 @ MX11 * vx |
308 | smlaltt r7, r1, r10, r4 @ MX12 * vy |
309 | smlalbb r7, r1, r11, r5 @ MX13 * vz |
310 | lsrne r7, #12 |
311 | orrne r7, r1, lsl #20 @ gteMAC0 |
312 | .if \do_flags |
313 | asrne r1, #20 |
314 | adds r2, r7, #0x80000000 |
315 | adcs r1, #0 |
316 | orrgt r3, #(1<<30) |
317 | orrmi r3, #(1<<31)|(1<<27) |
318 | tst r3, #1 @ repeat shift test |
319 | .endif |
320 | asr r1, r8, #20 |
321 | lsl r8, #12 @ expand to 64bit |
322 | smlaltb r8, r1, r11, r4 @ MX21 * vx |
323 | smlalbt r8, r1, r12, r4 @ MX22 * vy |
324 | smlaltb r8, r1, r12, r5 @ MX23 * vz |
325 | lsrne r8, #12 |
326 | orrne r8, r1, lsl #20 @ gteMAC1 |
327 | .if \do_flags |
328 | asrne r1, #20 |
329 | adds r2, r8, #0x80000000 |
330 | adcs r1, #0 |
331 | orrgt r3, #(1<<29) |
332 | orrmi r3, #(1<<31)|(1<<26) |
333 | tst r3, #1 @ repeat shift test |
334 | .endif |
335 | ldmia r6!,{r10-r11} @ MX3* |
336 | asr r1, r9, #20 |
337 | lsl r9, #12 @ expand to 64bit |
338 | smlalbb r9, r1, r10, r4 @ MX31 * vx |
339 | smlaltt r9, r1, r10, r4 @ MX32 * vy |
340 | smlalbb r9, r1, r11, r5 @ MX33 * vz |
341 | lsrne r9, #12 |
342 | orrne r9, r1, lsl #20 @ gteMAC2 |
343 | .if \do_flags |
344 | asrne r1, #20 |
345 | adds r2, r9, #0x80000000 |
346 | adcs r1, #0 |
347 | orrgt r3, #(1<<28) |
348 | orrmi r3, #(1<<31)|(1<<25) |
349 | bic r3, #1 |
350 | .else |
351 | mov r3, #0 |
352 | .endif |
353 | str r3, [r0, #4*(32+31)] @ gteFLAG |
354 | add r1, r0, #4*25 |
355 | stmia r1, {r7-r9} |
356 | |
357 | pop {r8-r11} |
358 | bx lr |
359 | .endm |
360 | |
361 | .global gteMVMVA_part_arm |
362 | gteMVMVA_part_arm: |
363 | mvma_op 1 |
364 | .size gteMVMVA_part_arm, .-gteMVMVA_part_arm |
365 | |
366 | .global gteMVMVA_part_nf_arm |
367 | gteMVMVA_part_nf_arm: |
368 | mvma_op 0 |
369 | .size gteMVMVA_part_nf_arm, .-gteMVMVA_part_nf_arm |
370 | |
371 | @ common version of MVMVA with cv3 (== 0) and shift12, |
372 | @ can't overflow so no gteMAC flags needed |
373 | @ note: not std calling convention used |
374 | @ r0 = CP2 (d,c) (must preserve) |
375 | @ r4,r5 = VXYZ(v) packed |
376 | @ r6 = &MX11(mx) |
377 | .global gteMVMVA_part_cv3sh12_arm |
378 | gteMVMVA_part_cv3sh12_arm: |
379 | push {r8-r9} |
380 | ldmia r6!,{r7-r9} @ MX1*,MX2* |
381 | smulbb r1, r7, r4 @ MX11 * vx |
382 | smultt r2, r7, r4 @ MX12 * vy |
383 | smulbb r3, r8, r5 @ MX13 * vz |
384 | qadd r1, r1, r2 |
385 | asr r3, #1 @ prevent oflow, lose a bit |
386 | add r1, r3, r1, asr #1 |
387 | asr r7, r1, #11 |
388 | smultb r1, r8, r4 @ MX21 * vx |
389 | smulbt r2, r9, r4 @ MX22 * vy |
390 | smultb r3, r9, r5 @ MX23 * vz |
391 | qadd r1, r1, r2 |
392 | asr r3, #1 |
393 | add r1, r3, r1, asr #1 |
394 | asr r8, r1, #11 |
395 | ldmia r6, {r6,r9} @ MX3* |
396 | smulbb r1, r6, r4 @ MX31 * vx |
397 | smultt r2, r6, r4 @ MX32 * vy |
398 | smulbb r3, r9, r5 @ MX33 * vz |
399 | qadd r1, r1, r2 |
400 | asr r3, #1 |
401 | add r1, r3, r1, asr #1 |
402 | asr r9, r1, #11 |
403 | add r1, r0, #4*25 |
404 | mov r2, #0 |
405 | stmia r1, {r7-r9} |
406 | str r2, [r0, #4*(32+31)] @ gteFLAG |
407 | pop {r8-r9} |
408 | bx lr |
409 | .size gteMVMVA_part_cv3sh12_arm, .-gteMVMVA_part_cv3sh12_arm |
410 | |
411 | |
59774ed0 |
412 | .global gteNCLIP_arm @ r0=CP2 (d,c), |
413 | gteNCLIP_arm: |
414 | push {r4-r6,lr} |
7c621bf0 |
415 | ldrsh r4, [r0, #4*12+2] |
416 | ldrsh r5, [r0, #4*13+2] |
417 | ldrsh r6, [r0, #4*14+2] |
418 | ldrsh lr, [r0, #4*12] |
419 | ldrsh r2, [r0, #4*13] |
59774ed0 |
420 | sub r12, r4, r5 @ 3: gteSY0 - gteSY1 |
421 | sub r5, r5, r6 @ 1: gteSY1 - gteSY2 |
7c621bf0 |
422 | smull r1, r5, lr, r5 @ RdLo, RdHi |
59774ed0 |
423 | sub r6, r4 @ 2: gteSY2 - gteSY0 |
7c621bf0 |
424 | ldrsh r3, [r0, #4*14] |
59774ed0 |
425 | smlal r1, r5, r2, r6 |
426 | mov lr, #0 @ gteFLAG |
59774ed0 |
427 | smlal r1, r5, r3, r12 |
428 | mov r6, #1<<31 |
429 | orr r6, #1<<15 |
430 | movs r2, r1, lsl #1 |
431 | adc r5, r5 |
432 | cmp r5, #0 |
433 | .if HAVE_ARMV7 |
434 | movtgt lr, #((1<<31)|(1<<16))>>16 |
435 | .else |
436 | movgt lr, #(1<<31) |
437 | orrgt lr, #(1<<16) |
438 | .endif |
59774ed0 |
439 | cmn r5, #1 |
59774ed0 |
440 | orrmi lr, r6 |
441 | str r1, [r0, #4*24] |
442 | str lr, [r0, #4*(32+31)] @ gteFLAG |
443 | |
444 | pop {r4-r6,pc} |
445 | .size gteNCLIP_arm, .-gteNCLIP_arm |
446 | |
447 | |
054175e9 |
448 | .macro gteMACtoIR lm |
449 | ldr r2, [r0, #4*25] @ gteMAC1 |
450 | mov r1, #1<<15 |
451 | ldr r12,[r0, #4*(32+31)] @ gteFLAG |
452 | cmp r2, r1 |
453 | subge r2, r1, #1 |
454 | orrge r12, #(1<<31)|(1<<24) |
455 | .if \lm |
456 | cmp r2, #0 |
457 | movlt r2, #0 |
458 | .else |
459 | cmn r2, r1 |
460 | rsblt r2, r1, #0 |
461 | .endif |
462 | str r2, [r0, #4*9] |
463 | ldrd r2, [r0, #4*26] @ gteMAC23 |
464 | orrlt r12, #(1<<31)|(1<<24) |
465 | cmp r2, r1 |
466 | subge r2, r1, #1 |
467 | orrge r12, #1<<23 |
468 | orrge r12, #1<<31 |
469 | .if \lm |
470 | cmp r2, #0 |
471 | movlt r2, #0 |
472 | .else |
473 | cmn r2, r1 |
474 | rsblt r2, r1, #0 |
475 | .endif |
476 | orrlt r12, #1<<23 |
477 | orrlt r12, #1<<31 |
478 | cmp r3, r1 |
479 | subge r3, r1, #1 |
480 | orrge r12, #1<<22 |
481 | .if \lm |
482 | cmp r3, #0 |
483 | movlt r3, #0 |
484 | .else |
485 | cmn r3, r1 |
486 | rsblt r3, r1, #0 |
487 | .endif |
488 | orrlt r12, #1<<22 |
489 | strd r2, [r0, #4*10] @ gteIR23 |
490 | str r12,[r0, #4*(32+31)] @ gteFLAG |
491 | bx lr |
492 | .endm |
493 | |
494 | .global gteMACtoIR_lm0 @ r0=CP2 (d,c) |
495 | gteMACtoIR_lm0: |
496 | gteMACtoIR 0 |
497 | .size gteMACtoIR_lm0, .-gteMACtoIR_lm0 |
498 | |
499 | .global gteMACtoIR_lm1 @ r0=CP2 (d,c) |
500 | gteMACtoIR_lm1: |
501 | gteMACtoIR 1 |
502 | .size gteMACtoIR_lm1, .-gteMACtoIR_lm1 |
503 | |
504 | |
505 | .global gteMACtoIR_lm0_nf @ r0=CP2 (d,c) |
506 | gteMACtoIR_lm0_nf: |
507 | add r12, r0, #4*25 |
508 | ldmia r12, {r1-r3} |
509 | ssatx_prep r12, 16 |
510 | ssatx r1, r12, 16 |
511 | ssatx r2, r12, 16 |
512 | ssatx r3, r12, 16 |
513 | add r12, r0, #4*9 |
514 | stmia r12, {r1-r3} |
515 | bx lr |
516 | .size gteMACtoIR_lm0_nf, .-gteMACtoIR_lm0_nf |
517 | |
518 | |
519 | .global gteMACtoIR_lm1_nf @ r0=CP2 (d,c) |
520 | gteMACtoIR_lm1_nf: |
521 | add r12, r0, #4*25 |
522 | ldmia r12, {r1-r3} |
523 | ssatx0_prep r12, 16 |
524 | ssatx0 r1, r12, 16 |
525 | ssatx0 r2, r12, 16 |
526 | ssatx0 r3, r12, 16 |
527 | add r12, r0, #4*9 |
528 | stmia r12, {r1-r3} |
529 | bx lr |
530 | .size gteMACtoIR_lm1_nf, .-gteMACtoIR_lm1_nf |
531 | |
532 | |
533 | .if 0 |
534 | .global gteMVMVA_test |
535 | gteMVMVA_test: |
536 | push {r4-r7,lr} |
537 | push {r1} |
538 | and r2, r1, #0x18000 @ v |
539 | cmp r2, #0x18000 @ v == 3? |
540 | addeq r4, r0, #4*9 |
541 | addne r3, r0, r2, lsr #12 |
542 | ldmeqia r4, {r3-r5} |
543 | ldmneia r3, {r4,r5} |
544 | lsleq r3, #16 |
545 | lsreq r3, #16 |
546 | orreq r4, r3, r4, lsl #16 @ r4,r5 = VXYZ(v) |
547 | @and r5, #0xffff |
548 | add r12, r0, #4*32 |
549 | and r3, r1, #0x60000 @ mx |
550 | lsr r3, #17 |
551 | add r6, r12, r3, lsl #5 |
552 | cmp r3, #3 |
553 | adreq r6, zeroes |
554 | and r2, r1, #0x06000 @ cv |
555 | lsr r2, #13 |
556 | add r7, r12, r2, lsl #5 |
557 | add r7, #4*5 |
558 | cmp r2, #3 |
559 | adreq r7, zeroes |
560 | .if 1 |
561 | adr lr, 1f |
562 | bne 0f |
563 | tst r1, #1<<19 |
564 | bne gteMVMVA_part_cv3sh12_arm |
565 | 0: |
566 | and r1, #1<<19 |
567 | lsr r1, #19 |
568 | b gteMVMVA_part_arm |
569 | 1: |
570 | pop {r1} |
571 | tst r1, #1<<10 |
572 | adr lr, 0f |
573 | beq gteMACtoIR_lm0 |
574 | bne gteMACtoIR_lm1 |
575 | 0: |
576 | .else |
577 | bl gteMVMVA_part_neon |
578 | pop {r1} |
579 | and r1, #1<<10 |
580 | bl gteMACtoIR_flags_neon |
581 | .endif |
582 | pop {r4-r7,pc} |
583 | |
584 | zeroes: |
585 | .word 0,0,0,0,0 |
586 | .endif |
587 | |
588 | |
59774ed0 |
589 | @ vim:filetype=armasm |
590 | |