59774ed0 |
1 | /* |
2 | * (C) GraÅžvydas "notaz" Ignotas, 2011 |
3 | * |
0c2ca3ba |
4 | * This work is licensed under the terms of GNU GPL version 2 or later. |
59774ed0 |
5 | * See the COPYING file in the top-level directory. |
6 | */ |
7 | |
665f33e1 |
8 | #include "arm_features.h" |
59774ed0 |
9 | |
10 | .text |
11 | .align 2 |
12 | |
7c621bf0 |
13 | .macro sgnxt16 rd rs |
665f33e1 |
14 | #ifdef HAVE_ARMV7 |
7c621bf0 |
15 | sxth \rd, \rs |
665f33e1 |
16 | #else |
7c621bf0 |
17 | lsl \rd, \rs, #16 |
0c2ca3ba |
18 | asr \rd, \rd, #16 |
665f33e1 |
19 | #endif |
59774ed0 |
20 | .endm |
21 | |
0c2ca3ba |
22 | @ prepare work reg for ssatx |
23 | @ in: wr reg, bit to saturate to |
24 | .macro ssatx_prep wr bit |
665f33e1 |
25 | #ifndef HAVE_ARMV7 |
0c2ca3ba |
26 | mov \wr, #(1<<(\bit-1)) |
665f33e1 |
27 | #endif |
0c2ca3ba |
28 | .endm |
29 | |
30 | .macro ssatx rd wr bit |
665f33e1 |
31 | #ifdef HAVE_ARMV7 |
0c2ca3ba |
32 | ssat \rd, #\bit, \rd |
665f33e1 |
33 | #else |
0c2ca3ba |
34 | cmp \rd, \wr |
35 | subge \rd, \wr, #1 |
36 | cmn \rd, \wr |
37 | rsblt \rd, \wr, #0 |
665f33e1 |
38 | #endif |
0c2ca3ba |
39 | .endm |
40 | |
054175e9 |
41 | @ prepare work reg for ssatx0 (sat to 0..2^(bit-1)) |
42 | @ in: wr reg, bit to saturate to |
43 | .macro ssatx0_prep wr bit |
44 | mov \wr, #(1<<(\bit-1)) |
45 | .endm |
46 | |
47 | .macro ssatx0 rd wr bit |
48 | cmp \rd, \wr |
49 | subge \rd, \wr, #1 |
50 | cmn \rd, #0 |
51 | movlt \rd, #0 |
52 | .endm |
53 | |
0c2ca3ba |
54 | .macro usat16_ rd rs |
665f33e1 |
55 | #ifdef HAVE_ARMV7 |
0c2ca3ba |
56 | usat \rd, #16, \rs |
665f33e1 |
57 | #else |
0c2ca3ba |
58 | subs \rd, \rs, #0 |
59 | movlt \rd, #0 |
60 | cmp \rd, #0x10000 |
61 | movge \rd, #0x0ff00 |
62 | orrge \rd, #0x000ff |
665f33e1 |
63 | #endif |
0c2ca3ba |
64 | .endm |
65 | |
665f33e1 |
66 | #ifdef HAVE_ARMV5 |
67 | |
7c621bf0 |
68 | .macro udiv_ rd rm rs |
69 | lsl \rm, #16 |
0c2ca3ba |
70 | clz \rd, \rs |
71 | lsl \rs, \rs, \rd @ shift up divisor |
72 | orr \rd, \rd, #1<<31 |
73 | lsr \rd, \rd, \rd |
74 | 0: |
75 | cmp \rm, \rs |
76 | subcs \rm, \rs |
77 | adcs \rd, \rd, \rd |
78 | lsr \rs, #1 |
79 | bcc 0b |
80 | .endm |
81 | |
7c621bf0 |
82 | .macro newton_step rcp den zero t1 t2 |
83 | umull \t2, \t1, \den, \rcp @ \t2 is dummy |
84 | sub \t1, \zero, \t1, lsl #2 |
85 | smlal \t2, \rcp, \t1, \rcp |
86 | .endm |
87 | |
88 | .macro udiv_newton rd rm rs t1 t2 t3 t4 |
89 | lsl \rd, \rm, #16 |
90 | clz \t1, \rs |
91 | mov \t2, #0 |
92 | lsl \rs, \t1 @ normalize for the algo |
93 | mov \rm, #0x4d000000 @ initial estimate ~1.2 |
94 | |
95 | newton_step \rm, \rs, \t2, \t3, \t4 |
96 | newton_step \rm, \rs, \t2, \t3, \t4 |
97 | newton_step \rm, \rs, \t2, \t3, \t4 |
98 | newton_step \rm, \rs, \t2, \t3, \t4 |
99 | |
100 | umull \t4, \rd, \rm, \rd |
101 | rsb \t2, \t1, #30 @ here t1 is 1..15 |
102 | mov \rd, \rd, lsr \t2 |
103 | .endm |
104 | |
105 | @ unsigned divide rd = rm / rs; 16.16 result |
106 | @ no div by 0 check |
107 | @ in: rm, rs |
108 | @ trash: rm rs t* |
109 | .macro udiv rd rm rs t1 t2 t3 t4 |
110 | @udiv_ \rd, \rm, \rs |
111 | udiv_newton \rd, \rm, \rs, \t1, \t2, \t3, \t4 |
112 | .endm |
0c2ca3ba |
113 | |
114 | @ calculate RTPS/RTPT MAC values |
115 | @ in: r0 context, r8,r9 VXYZ |
116 | @ out: r10-r12 MAC123 |
117 | @ trash: r1-r7 |
118 | .macro do_rtpx_mac |
119 | add r1, r0, #4*32 |
120 | add r2, r0, #4*(32+5) @ gteTRX |
121 | ldmia r1!,{r5-r7} @ gteR1*,gteR2* |
122 | ldmia r2, {r10-r12} |
123 | smulbb r2, r5, r8 @ gteR11 * gteVX0 |
124 | smultt r3, r5, r8 @ gteR12 * gteVY0 |
125 | smulbb r4, r6, r9 @ gteR13 * gteVZ0 |
126 | qadd r2, r2, r3 |
127 | asr r4, r4, #1 @ prevent oflow, lose a bit |
128 | add r3, r4, r2, asr #1 |
129 | add r10,r10,r3, asr #11 @ gteMAC1 |
130 | smultb r2, r6, r8 @ gteR21 * gteVX0 |
131 | smulbt r3, r7, r8 @ gteR22 * gteVY0 |
132 | smultb r4, r7, r9 @ gteR23 * gteVZ0 |
133 | ldmia r1!,{r5-r6} @ gteR3* |
134 | qadd r2, r2, r3 |
135 | asr r4, r4, #1 |
136 | add r3, r4, r2, asr #1 |
137 | add r11,r11,r3, asr #11 @ gteMAC2 |
138 | @ be more accurate for gteMAC3, since it's also a divider |
139 | smulbb r2, r5, r8 @ gteR31 * gteVX0 |
140 | smultt r3, r5, r8 @ gteR32 * gteVY0 |
141 | smulbb r4, r6, r9 @ gteR33 * gteVZ0 |
142 | qadd r2, r2, r3 |
143 | asr r3, r4, #31 @ expand to 64bit |
144 | adds r1, r2, r4 |
145 | adc r3, r2, asr #31 @ 64bit sum in r3,r1 |
146 | add r12,r12,r3, lsl #20 |
147 | add r12,r12,r1, lsr #12 @ gteMAC3 |
148 | .endm |
149 | |
150 | |
151 | .global gteRTPS_nf_arm @ r0=CP2 (d,c), |
152 | gteRTPS_nf_arm: |
153 | push {r4-r11,lr} |
154 | |
155 | ldmia r0, {r8,r9} @ VXYZ(0) |
156 | do_rtpx_mac |
157 | add r1, r0, #4*25 @ gteMAC1 |
158 | add r2, r0, #4*17 @ gteSZ1 |
159 | stmia r1, {r10-r12} @ gteMAC123 save |
160 | ldmia r2, {r3-r5} |
161 | add r1, r0, #4*16 @ gteSZ0 |
162 | add r2, r0, #4*9 @ gteIR1 |
163 | ssatx_prep r6, 16 |
164 | usat16_ lr, r12 @ limD |
165 | ssatx r10,r6, 16 |
166 | ssatx r11,r6, 16 |
167 | ssatx r12,r6, 16 |
168 | stmia r1, {r3-r5,lr} @ gteSZ* |
169 | ldr r3, [r0,#4*(32+26)] @ gteH |
170 | stmia r2, {r10,r11,r12} @ gteIR123 save |
171 | cmp r3, lr, lsl #1 @ gteH < gteSZ3*2 ? |
172 | mov r9, #1<<30 |
173 | bhs 1f |
174 | .if 1 |
7c621bf0 |
175 | udiv r9, r3, lr, r1, r2, r6, r7 |
0c2ca3ba |
176 | .else |
177 | push {r0, r12} |
178 | mov r0, r3 |
179 | mov r1, lr |
180 | bl DIVIDE |
181 | mov r9, r0 |
182 | pop {r0, r12} |
183 | .endif |
184 | 1: |
185 | ldrd r6, [r0,#4*(32+24)] @ gteOFXY |
186 | cmp r9, #0x20000 |
187 | add r1, r0, #4*12 @ gteSXY0 |
188 | movhs r9, #0x20000 |
189 | ldmia r1, {r2-r4} |
190 | /* quotient */ subhs r9, #1 |
76720f7f |
191 | mov r2, r6, asr #31 |
0c2ca3ba |
192 | smlal r6, r2, r10, r9 |
193 | stmia r1!,{r3,r4} @ shift gteSXY |
76720f7f |
194 | mov r3, r7, asr #31 |
0c2ca3ba |
195 | smlal r7, r3, r11, r9 |
196 | lsr r6, #16 |
197 | /* gteDQA, gteDQB */ ldrd r10,[r0, #4*(32+27)] |
198 | orr r6, r2, lsl #16 @ (gteOFX + gteIR1 * q) >> 16 |
199 | ssatx_prep r2, 11 |
200 | lsr r7, #16 |
201 | /* gteDQB + gteDQA * q */ mla r4, r10, r9, r11 |
202 | orr r7, r3, lsl #16 @ (gteOFY + gteIR2 * q) >> 16 |
203 | ssatx r6, r2, 11 @ gteSX2 |
204 | ssatx r7, r2, 11 @ gteSY2 |
205 | strh r6, [r1] |
206 | strh r7, [r1, #2] |
207 | str r4, [r0,#4*24] @ gteMAC0 |
208 | asrs r4, #12 |
209 | movmi r4, #0 |
210 | cmp r4, #0x1000 @ limH |
211 | movgt r4, #0x1000 |
212 | str r4, [r0,#4*8] @ gteIR0 |
213 | |
214 | pop {r4-r11,pc} |
215 | .size gteRTPS_nf_arm, .-gteRTPS_nf_arm |
216 | |
217 | |
218 | .global gteRTPT_nf_arm @ r0=CP2 (d,c), |
219 | gteRTPT_nf_arm: |
220 | ldr r1, [r0, #4*19] @ gteSZ3 |
221 | push {r4-r11,lr} |
222 | str r1, [r0, #4*16] @ gteSZ0 |
223 | mov lr, #0 |
224 | |
225 | rtpt_arm_loop: |
226 | add r1, r0, lr, lsl #1 |
227 | ldrd r8, [r1] @ VXYZ(v) |
228 | do_rtpx_mac |
229 | |
230 | ssatx_prep r6, 16 |
231 | usat16_ r2, r12 @ limD |
232 | add r1, r0, #4*25 @ gteMAC1 |
233 | ldr r3, [r0,#4*(32+26)] @ gteH |
234 | stmia r1, {r10-r12} @ gteMAC123 save |
235 | add r1, r0, #4*17 |
236 | ssatx r10,r6, 16 |
237 | ssatx r11,r6, 16 |
238 | ssatx r12,r6, 16 |
239 | str r2, [r1, lr] @ fSZ(v) |
240 | cmp r3, r2, lsl #1 @ gteH < gteSZ3*2 ? |
241 | mov r9, #1<<30 |
242 | bhs 1f |
243 | .if 1 |
7c621bf0 |
244 | udiv r9, r3, r2, r1, r4, r6, r7 |
0c2ca3ba |
245 | .else |
246 | push {r0, r12, lr} |
247 | mov r0, r3 |
248 | mov r1, r2 |
249 | bl DIVIDE |
250 | mov r9, r0 |
251 | pop {r0, r12, lr} |
252 | .endif |
054175e9 |
253 | 1: cmp r9, #0x20000 |
0c2ca3ba |
254 | add r1, r0, #4*12 |
255 | movhs r9, #0x20000 |
256 | ldrd r6, [r0,#4*(32+24)] @ gteOFXY |
257 | /* quotient */ subhs r9, #1 |
76720f7f |
258 | mov r2, r6, asr #31 |
0c2ca3ba |
259 | smlal r6, r2, r10, r9 |
76720f7f |
260 | mov r3, r7, asr #31 |
0c2ca3ba |
261 | smlal r7, r3, r11, r9 |
262 | lsr r6, #16 |
263 | orr r6, r2, lsl #16 @ (gteOFX + gteIR1 * q) >> 16 |
264 | ssatx_prep r2, 11 |
265 | lsr r7, #16 |
266 | orr r7, r3, lsl #16 @ (gteOFY + gteIR2 * q) >> 16 |
267 | ssatx r6, r2, 11 @ gteSX(v) |
268 | ssatx r7, r2, 11 @ gteSY(v) |
269 | strh r6, [r1, lr]! |
270 | add lr, #4 |
271 | strh r7, [r1, #2] |
272 | cmp lr, #12 |
273 | blt rtpt_arm_loop |
274 | |
275 | ldrd r4, [r0, #4*(32+27)] @ gteDQA, gteDQB |
276 | add r1, r0, #4*9 @ gteIR1 |
277 | mla r3, r4, r9, r5 @ gteDQB + gteDQA * q |
278 | stmia r1, {r10,r11,r12} @ gteIR123 save |
279 | |
280 | str r3, [r0,#4*24] @ gteMAC0 |
281 | asrs r3, #12 |
282 | movmi r3, #0 |
283 | cmp r3, #0x1000 @ limH |
284 | movgt r3, #0x1000 |
285 | str r3, [r0,#4*8] @ gteIR0 |
286 | |
287 | pop {r4-r11,pc} |
288 | .size gteRTPT_nf_arm, .-gteRTPT_nf_arm |
289 | |
59774ed0 |
290 | |
054175e9 |
291 | @ note: not std calling convention used |
292 | @ r0 = CP2 (d,c) (must preserve) |
293 | @ r1 = needs_shift12 |
294 | @ r4,r5 = VXYZ(v) packed |
295 | @ r6 = &MX11(mx) |
296 | @ r7 = &CV1(cv) |
297 | .macro mvma_op do_flags |
298 | push {r8-r11} |
299 | |
300 | .if \do_flags |
301 | ands r3, r1, #1 @ gteFLAG, shift_need |
302 | .else |
303 | tst r1, #1 |
304 | .endif |
305 | ldmia r7, {r7-r9} @ CV123 |
306 | ldmia r6!,{r10-r12} @ MX1*,MX2* |
307 | asr r1, r7, #20 |
308 | lsl r7, #12 @ expand to 64bit |
309 | smlalbb r7, r1, r10, r4 @ MX11 * vx |
310 | smlaltt r7, r1, r10, r4 @ MX12 * vy |
311 | smlalbb r7, r1, r11, r5 @ MX13 * vz |
312 | lsrne r7, #12 |
313 | orrne r7, r1, lsl #20 @ gteMAC0 |
314 | .if \do_flags |
315 | asrne r1, #20 |
316 | adds r2, r7, #0x80000000 |
317 | adcs r1, #0 |
318 | orrgt r3, #(1<<30) |
319 | orrmi r3, #(1<<31)|(1<<27) |
320 | tst r3, #1 @ repeat shift test |
321 | .endif |
322 | asr r1, r8, #20 |
323 | lsl r8, #12 @ expand to 64bit |
324 | smlaltb r8, r1, r11, r4 @ MX21 * vx |
325 | smlalbt r8, r1, r12, r4 @ MX22 * vy |
326 | smlaltb r8, r1, r12, r5 @ MX23 * vz |
327 | lsrne r8, #12 |
328 | orrne r8, r1, lsl #20 @ gteMAC1 |
329 | .if \do_flags |
330 | asrne r1, #20 |
331 | adds r2, r8, #0x80000000 |
332 | adcs r1, #0 |
333 | orrgt r3, #(1<<29) |
334 | orrmi r3, #(1<<31)|(1<<26) |
335 | tst r3, #1 @ repeat shift test |
336 | .endif |
337 | ldmia r6!,{r10-r11} @ MX3* |
338 | asr r1, r9, #20 |
339 | lsl r9, #12 @ expand to 64bit |
340 | smlalbb r9, r1, r10, r4 @ MX31 * vx |
341 | smlaltt r9, r1, r10, r4 @ MX32 * vy |
342 | smlalbb r9, r1, r11, r5 @ MX33 * vz |
343 | lsrne r9, #12 |
344 | orrne r9, r1, lsl #20 @ gteMAC2 |
345 | .if \do_flags |
346 | asrne r1, #20 |
347 | adds r2, r9, #0x80000000 |
348 | adcs r1, #0 |
349 | orrgt r3, #(1<<28) |
350 | orrmi r3, #(1<<31)|(1<<25) |
351 | bic r3, #1 |
352 | .else |
353 | mov r3, #0 |
354 | .endif |
355 | str r3, [r0, #4*(32+31)] @ gteFLAG |
356 | add r1, r0, #4*25 |
357 | stmia r1, {r7-r9} |
358 | |
359 | pop {r8-r11} |
360 | bx lr |
361 | .endm |
362 | |
363 | .global gteMVMVA_part_arm |
364 | gteMVMVA_part_arm: |
365 | mvma_op 1 |
366 | .size gteMVMVA_part_arm, .-gteMVMVA_part_arm |
367 | |
368 | .global gteMVMVA_part_nf_arm |
369 | gteMVMVA_part_nf_arm: |
370 | mvma_op 0 |
371 | .size gteMVMVA_part_nf_arm, .-gteMVMVA_part_nf_arm |
372 | |
373 | @ common version of MVMVA with cv3 (== 0) and shift12, |
374 | @ can't overflow so no gteMAC flags needed |
375 | @ note: not std calling convention used |
376 | @ r0 = CP2 (d,c) (must preserve) |
377 | @ r4,r5 = VXYZ(v) packed |
378 | @ r6 = &MX11(mx) |
379 | .global gteMVMVA_part_cv3sh12_arm |
380 | gteMVMVA_part_cv3sh12_arm: |
381 | push {r8-r9} |
382 | ldmia r6!,{r7-r9} @ MX1*,MX2* |
383 | smulbb r1, r7, r4 @ MX11 * vx |
384 | smultt r2, r7, r4 @ MX12 * vy |
385 | smulbb r3, r8, r5 @ MX13 * vz |
386 | qadd r1, r1, r2 |
387 | asr r3, #1 @ prevent oflow, lose a bit |
388 | add r1, r3, r1, asr #1 |
389 | asr r7, r1, #11 |
390 | smultb r1, r8, r4 @ MX21 * vx |
391 | smulbt r2, r9, r4 @ MX22 * vy |
392 | smultb r3, r9, r5 @ MX23 * vz |
393 | qadd r1, r1, r2 |
394 | asr r3, #1 |
395 | add r1, r3, r1, asr #1 |
396 | asr r8, r1, #11 |
397 | ldmia r6, {r6,r9} @ MX3* |
398 | smulbb r1, r6, r4 @ MX31 * vx |
399 | smultt r2, r6, r4 @ MX32 * vy |
400 | smulbb r3, r9, r5 @ MX33 * vz |
401 | qadd r1, r1, r2 |
402 | asr r3, #1 |
403 | add r1, r3, r1, asr #1 |
404 | asr r9, r1, #11 |
405 | add r1, r0, #4*25 |
406 | mov r2, #0 |
407 | stmia r1, {r7-r9} |
408 | str r2, [r0, #4*(32+31)] @ gteFLAG |
409 | pop {r8-r9} |
410 | bx lr |
411 | .size gteMVMVA_part_cv3sh12_arm, .-gteMVMVA_part_cv3sh12_arm |
412 | |
665f33e1 |
413 | #endif /* HAVE_ARMV5 */ |
054175e9 |
414 | |
59774ed0 |
415 | .global gteNCLIP_arm @ r0=CP2 (d,c), |
416 | gteNCLIP_arm: |
417 | push {r4-r6,lr} |
7c621bf0 |
418 | ldrsh r4, [r0, #4*12+2] |
419 | ldrsh r5, [r0, #4*13+2] |
420 | ldrsh r6, [r0, #4*14+2] |
421 | ldrsh lr, [r0, #4*12] |
422 | ldrsh r2, [r0, #4*13] |
59774ed0 |
423 | sub r12, r4, r5 @ 3: gteSY0 - gteSY1 |
424 | sub r5, r5, r6 @ 1: gteSY1 - gteSY2 |
7c621bf0 |
425 | smull r1, r5, lr, r5 @ RdLo, RdHi |
59774ed0 |
426 | sub r6, r4 @ 2: gteSY2 - gteSY0 |
7c621bf0 |
427 | ldrsh r3, [r0, #4*14] |
59774ed0 |
428 | smlal r1, r5, r2, r6 |
429 | mov lr, #0 @ gteFLAG |
59774ed0 |
430 | smlal r1, r5, r3, r12 |
431 | mov r6, #1<<31 |
432 | orr r6, #1<<15 |
433 | movs r2, r1, lsl #1 |
434 | adc r5, r5 |
435 | cmp r5, #0 |
665f33e1 |
436 | #ifdef HAVE_ARMV7 |
59774ed0 |
437 | movtgt lr, #((1<<31)|(1<<16))>>16 |
665f33e1 |
438 | #else |
59774ed0 |
439 | movgt lr, #(1<<31) |
440 | orrgt lr, #(1<<16) |
665f33e1 |
441 | #endif |
59774ed0 |
442 | cmn r5, #1 |
59774ed0 |
443 | orrmi lr, r6 |
444 | str r1, [r0, #4*24] |
445 | str lr, [r0, #4*(32+31)] @ gteFLAG |
446 | |
447 | pop {r4-r6,pc} |
448 | .size gteNCLIP_arm, .-gteNCLIP_arm |
449 | |
450 | |
054175e9 |
451 | .macro gteMACtoIR lm |
452 | ldr r2, [r0, #4*25] @ gteMAC1 |
453 | mov r1, #1<<15 |
454 | ldr r12,[r0, #4*(32+31)] @ gteFLAG |
455 | cmp r2, r1 |
456 | subge r2, r1, #1 |
457 | orrge r12, #(1<<31)|(1<<24) |
458 | .if \lm |
459 | cmp r2, #0 |
460 | movlt r2, #0 |
461 | .else |
462 | cmn r2, r1 |
463 | rsblt r2, r1, #0 |
464 | .endif |
465 | str r2, [r0, #4*9] |
665f33e1 |
466 | #ifdef HAVE_ARMV5 |
054175e9 |
467 | ldrd r2, [r0, #4*26] @ gteMAC23 |
665f33e1 |
468 | #else |
469 | ldr r2, [r0, #4*26] |
470 | ldr r3, [r0, #4*27] |
471 | #endif |
054175e9 |
472 | orrlt r12, #(1<<31)|(1<<24) |
473 | cmp r2, r1 |
474 | subge r2, r1, #1 |
475 | orrge r12, #1<<23 |
476 | orrge r12, #1<<31 |
477 | .if \lm |
478 | cmp r2, #0 |
479 | movlt r2, #0 |
480 | .else |
481 | cmn r2, r1 |
482 | rsblt r2, r1, #0 |
483 | .endif |
484 | orrlt r12, #1<<23 |
485 | orrlt r12, #1<<31 |
486 | cmp r3, r1 |
487 | subge r3, r1, #1 |
488 | orrge r12, #1<<22 |
489 | .if \lm |
490 | cmp r3, #0 |
491 | movlt r3, #0 |
492 | .else |
493 | cmn r3, r1 |
494 | rsblt r3, r1, #0 |
495 | .endif |
496 | orrlt r12, #1<<22 |
665f33e1 |
497 | #ifdef HAVE_ARMV5 |
054175e9 |
498 | strd r2, [r0, #4*10] @ gteIR23 |
665f33e1 |
499 | #else |
500 | str r2, [r0, #4*10] |
501 | str r3, [r0, #4*11] |
502 | #endif |
054175e9 |
503 | str r12,[r0, #4*(32+31)] @ gteFLAG |
504 | bx lr |
505 | .endm |
506 | |
507 | .global gteMACtoIR_lm0 @ r0=CP2 (d,c) |
508 | gteMACtoIR_lm0: |
509 | gteMACtoIR 0 |
510 | .size gteMACtoIR_lm0, .-gteMACtoIR_lm0 |
511 | |
512 | .global gteMACtoIR_lm1 @ r0=CP2 (d,c) |
513 | gteMACtoIR_lm1: |
514 | gteMACtoIR 1 |
515 | .size gteMACtoIR_lm1, .-gteMACtoIR_lm1 |
516 | |
517 | |
518 | .global gteMACtoIR_lm0_nf @ r0=CP2 (d,c) |
519 | gteMACtoIR_lm0_nf: |
520 | add r12, r0, #4*25 |
521 | ldmia r12, {r1-r3} |
522 | ssatx_prep r12, 16 |
523 | ssatx r1, r12, 16 |
524 | ssatx r2, r12, 16 |
525 | ssatx r3, r12, 16 |
526 | add r12, r0, #4*9 |
527 | stmia r12, {r1-r3} |
528 | bx lr |
529 | .size gteMACtoIR_lm0_nf, .-gteMACtoIR_lm0_nf |
530 | |
531 | |
532 | .global gteMACtoIR_lm1_nf @ r0=CP2 (d,c) |
533 | gteMACtoIR_lm1_nf: |
534 | add r12, r0, #4*25 |
535 | ldmia r12, {r1-r3} |
536 | ssatx0_prep r12, 16 |
537 | ssatx0 r1, r12, 16 |
538 | ssatx0 r2, r12, 16 |
539 | ssatx0 r3, r12, 16 |
540 | add r12, r0, #4*9 |
541 | stmia r12, {r1-r3} |
542 | bx lr |
543 | .size gteMACtoIR_lm1_nf, .-gteMACtoIR_lm1_nf |
544 | |
545 | |
546 | .if 0 |
547 | .global gteMVMVA_test |
548 | gteMVMVA_test: |
549 | push {r4-r7,lr} |
550 | push {r1} |
551 | and r2, r1, #0x18000 @ v |
552 | cmp r2, #0x18000 @ v == 3? |
553 | addeq r4, r0, #4*9 |
554 | addne r3, r0, r2, lsr #12 |
555 | ldmeqia r4, {r3-r5} |
556 | ldmneia r3, {r4,r5} |
557 | lsleq r3, #16 |
558 | lsreq r3, #16 |
559 | orreq r4, r3, r4, lsl #16 @ r4,r5 = VXYZ(v) |
560 | @and r5, #0xffff |
561 | add r12, r0, #4*32 |
562 | and r3, r1, #0x60000 @ mx |
563 | lsr r3, #17 |
564 | add r6, r12, r3, lsl #5 |
565 | cmp r3, #3 |
566 | adreq r6, zeroes |
567 | and r2, r1, #0x06000 @ cv |
568 | lsr r2, #13 |
569 | add r7, r12, r2, lsl #5 |
570 | add r7, #4*5 |
571 | cmp r2, #3 |
572 | adreq r7, zeroes |
573 | .if 1 |
574 | adr lr, 1f |
575 | bne 0f |
576 | tst r1, #1<<19 |
577 | bne gteMVMVA_part_cv3sh12_arm |
578 | 0: |
579 | and r1, #1<<19 |
580 | lsr r1, #19 |
581 | b gteMVMVA_part_arm |
582 | 1: |
583 | pop {r1} |
584 | tst r1, #1<<10 |
585 | adr lr, 0f |
586 | beq gteMACtoIR_lm0 |
587 | bne gteMACtoIR_lm1 |
588 | 0: |
589 | .else |
590 | bl gteMVMVA_part_neon |
591 | pop {r1} |
592 | and r1, #1<<10 |
593 | bl gteMACtoIR_flags_neon |
594 | .endif |
595 | pop {r4-r7,pc} |
596 | |
597 | zeroes: |
598 | .word 0,0,0,0,0 |
599 | .endif |
600 | |
601 | |
59774ed0 |
602 | @ vim:filetype=armasm |
603 | |