59774ed0 |
1 | /* |
2 | * (C) GraÅžvydas "notaz" Ignotas, 2011 |
3 | * |
0c2ca3ba |
4 | * This work is licensed under the terms of GNU GPL version 2 or later. |
59774ed0 |
5 | * See the COPYING file in the top-level directory. |
6 | */ |
7 | |
665f33e1 |
8 | #include "arm_features.h" |
59774ed0 |
9 | |
10 | .text |
11 | .align 2 |
12 | |
7c621bf0 |
13 | .macro sgnxt16 rd rs |
665f33e1 |
14 | #ifdef HAVE_ARMV7 |
7c621bf0 |
15 | sxth \rd, \rs |
665f33e1 |
16 | #else |
7c621bf0 |
17 | lsl \rd, \rs, #16 |
0c2ca3ba |
18 | asr \rd, \rd, #16 |
665f33e1 |
19 | #endif |
59774ed0 |
20 | .endm |
21 | |
0c2ca3ba |
22 | @ prepare work reg for ssatx |
23 | @ in: wr reg, bit to saturate to |
24 | .macro ssatx_prep wr bit |
665f33e1 |
25 | #ifndef HAVE_ARMV7 |
0c2ca3ba |
26 | mov \wr, #(1<<(\bit-1)) |
665f33e1 |
27 | #endif |
0c2ca3ba |
28 | .endm |
29 | |
30 | .macro ssatx rd wr bit |
665f33e1 |
31 | #ifdef HAVE_ARMV7 |
0c2ca3ba |
32 | ssat \rd, #\bit, \rd |
665f33e1 |
33 | #else |
0c2ca3ba |
34 | cmp \rd, \wr |
35 | subge \rd, \wr, #1 |
36 | cmn \rd, \wr |
37 | rsblt \rd, \wr, #0 |
665f33e1 |
38 | #endif |
0c2ca3ba |
39 | .endm |
40 | |
054175e9 |
41 | @ prepare work reg for ssatx0 (sat to 0..2^(bit-1)) |
42 | @ in: wr reg, bit to saturate to |
43 | .macro ssatx0_prep wr bit |
44 | mov \wr, #(1<<(\bit-1)) |
45 | .endm |
46 | |
47 | .macro ssatx0 rd wr bit |
48 | cmp \rd, \wr |
49 | subge \rd, \wr, #1 |
50 | cmn \rd, #0 |
51 | movlt \rd, #0 |
52 | .endm |
53 | |
0c2ca3ba |
54 | .macro usat16_ rd rs |
665f33e1 |
55 | #ifdef HAVE_ARMV7 |
0c2ca3ba |
56 | usat \rd, #16, \rs |
665f33e1 |
57 | #else |
0c2ca3ba |
58 | subs \rd, \rs, #0 |
59 | movlt \rd, #0 |
60 | cmp \rd, #0x10000 |
61 | movge \rd, #0x0ff00 |
62 | orrge \rd, #0x000ff |
665f33e1 |
63 | #endif |
0c2ca3ba |
64 | .endm |
65 | |
665f33e1 |
66 | #ifdef HAVE_ARMV5 |
67 | |
7c621bf0 |
68 | .macro udiv_ rd rm rs |
69 | lsl \rm, #16 |
0c2ca3ba |
70 | clz \rd, \rs |
71 | lsl \rs, \rs, \rd @ shift up divisor |
72 | orr \rd, \rd, #1<<31 |
73 | lsr \rd, \rd, \rd |
74 | 0: |
75 | cmp \rm, \rs |
76 | subcs \rm, \rs |
77 | adcs \rd, \rd, \rd |
78 | lsr \rs, #1 |
79 | bcc 0b |
80 | .endm |
81 | |
7c621bf0 |
82 | .macro newton_step rcp den zero t1 t2 |
83 | umull \t2, \t1, \den, \rcp @ \t2 is dummy |
84 | sub \t1, \zero, \t1, lsl #2 |
85 | smlal \t2, \rcp, \t1, \rcp |
86 | .endm |
87 | |
88 | .macro udiv_newton rd rm rs t1 t2 t3 t4 |
89 | lsl \rd, \rm, #16 |
90 | clz \t1, \rs |
91 | mov \t2, #0 |
92 | lsl \rs, \t1 @ normalize for the algo |
93 | mov \rm, #0x4d000000 @ initial estimate ~1.2 |
94 | |
95 | newton_step \rm, \rs, \t2, \t3, \t4 |
96 | newton_step \rm, \rs, \t2, \t3, \t4 |
97 | newton_step \rm, \rs, \t2, \t3, \t4 |
98 | newton_step \rm, \rs, \t2, \t3, \t4 |
99 | |
100 | umull \t4, \rd, \rm, \rd |
101 | rsb \t2, \t1, #30 @ here t1 is 1..15 |
102 | mov \rd, \rd, lsr \t2 |
103 | .endm |
104 | |
105 | @ unsigned divide rd = rm / rs; 16.16 result |
106 | @ no div by 0 check |
107 | @ in: rm, rs |
108 | @ trash: rm rs t* |
109 | .macro udiv rd rm rs t1 t2 t3 t4 |
110 | @udiv_ \rd, \rm, \rs |
111 | udiv_newton \rd, \rm, \rs, \t1, \t2, \t3, \t4 |
112 | .endm |
0c2ca3ba |
113 | |
114 | @ calculate RTPS/RTPT MAC values |
115 | @ in: r0 context, r8,r9 VXYZ |
116 | @ out: r10-r12 MAC123 |
117 | @ trash: r1-r7 |
118 | .macro do_rtpx_mac |
119 | add r1, r0, #4*32 |
120 | add r2, r0, #4*(32+5) @ gteTRX |
121 | ldmia r1!,{r5-r7} @ gteR1*,gteR2* |
122 | ldmia r2, {r10-r12} |
123 | smulbb r2, r5, r8 @ gteR11 * gteVX0 |
124 | smultt r3, r5, r8 @ gteR12 * gteVY0 |
125 | smulbb r4, r6, r9 @ gteR13 * gteVZ0 |
126 | qadd r2, r2, r3 |
127 | asr r4, r4, #1 @ prevent oflow, lose a bit |
128 | add r3, r4, r2, asr #1 |
129 | add r10,r10,r3, asr #11 @ gteMAC1 |
130 | smultb r2, r6, r8 @ gteR21 * gteVX0 |
131 | smulbt r3, r7, r8 @ gteR22 * gteVY0 |
132 | smultb r4, r7, r9 @ gteR23 * gteVZ0 |
133 | ldmia r1!,{r5-r6} @ gteR3* |
134 | qadd r2, r2, r3 |
135 | asr r4, r4, #1 |
136 | add r3, r4, r2, asr #1 |
137 | add r11,r11,r3, asr #11 @ gteMAC2 |
138 | @ be more accurate for gteMAC3, since it's also a divider |
139 | smulbb r2, r5, r8 @ gteR31 * gteVX0 |
140 | smultt r3, r5, r8 @ gteR32 * gteVY0 |
141 | smulbb r4, r6, r9 @ gteR33 * gteVZ0 |
142 | qadd r2, r2, r3 |
143 | asr r3, r4, #31 @ expand to 64bit |
144 | adds r1, r2, r4 |
145 | adc r3, r2, asr #31 @ 64bit sum in r3,r1 |
146 | add r12,r12,r3, lsl #20 |
147 | add r12,r12,r1, lsr #12 @ gteMAC3 |
148 | .endm |
149 | |
150 | |
5c6457c3 |
151 | FUNCTION(gteRTPS_nf_arm): @ r0=CP2 (d,c), |
0c2ca3ba |
152 | push {r4-r11,lr} |
153 | |
154 | ldmia r0, {r8,r9} @ VXYZ(0) |
155 | do_rtpx_mac |
156 | add r1, r0, #4*25 @ gteMAC1 |
157 | add r2, r0, #4*17 @ gteSZ1 |
158 | stmia r1, {r10-r12} @ gteMAC123 save |
159 | ldmia r2, {r3-r5} |
160 | add r1, r0, #4*16 @ gteSZ0 |
161 | add r2, r0, #4*9 @ gteIR1 |
162 | ssatx_prep r6, 16 |
163 | usat16_ lr, r12 @ limD |
164 | ssatx r10,r6, 16 |
165 | ssatx r11,r6, 16 |
166 | ssatx r12,r6, 16 |
167 | stmia r1, {r3-r5,lr} @ gteSZ* |
168 | ldr r3, [r0,#4*(32+26)] @ gteH |
169 | stmia r2, {r10,r11,r12} @ gteIR123 save |
170 | cmp r3, lr, lsl #1 @ gteH < gteSZ3*2 ? |
171 | mov r9, #1<<30 |
172 | bhs 1f |
173 | .if 1 |
7c621bf0 |
174 | udiv r9, r3, lr, r1, r2, r6, r7 |
0c2ca3ba |
175 | .else |
176 | push {r0, r12} |
177 | mov r0, r3 |
178 | mov r1, lr |
179 | bl DIVIDE |
180 | mov r9, r0 |
181 | pop {r0, r12} |
182 | .endif |
183 | 1: |
a53073ec |
184 | ldrd r6, r7, [r0, #4*(32+24)] @ gteOFXY |
0c2ca3ba |
185 | cmp r9, #0x20000 |
186 | add r1, r0, #4*12 @ gteSXY0 |
187 | movhs r9, #0x20000 |
188 | ldmia r1, {r2-r4} |
189 | /* quotient */ subhs r9, #1 |
76720f7f |
190 | mov r2, r6, asr #31 |
0c2ca3ba |
191 | smlal r6, r2, r10, r9 |
192 | stmia r1!,{r3,r4} @ shift gteSXY |
76720f7f |
193 | mov r3, r7, asr #31 |
0c2ca3ba |
194 | smlal r7, r3, r11, r9 |
195 | lsr r6, #16 |
a53073ec |
196 | /* gteDQA, gteDQB */ ldrd r10,r11, [r0, #4*(32+27)] |
0c2ca3ba |
197 | orr r6, r2, lsl #16 @ (gteOFX + gteIR1 * q) >> 16 |
198 | ssatx_prep r2, 11 |
199 | lsr r7, #16 |
200 | /* gteDQB + gteDQA * q */ mla r4, r10, r9, r11 |
201 | orr r7, r3, lsl #16 @ (gteOFY + gteIR2 * q) >> 16 |
202 | ssatx r6, r2, 11 @ gteSX2 |
203 | ssatx r7, r2, 11 @ gteSY2 |
204 | strh r6, [r1] |
205 | strh r7, [r1, #2] |
206 | str r4, [r0,#4*24] @ gteMAC0 |
207 | asrs r4, #12 |
208 | movmi r4, #0 |
209 | cmp r4, #0x1000 @ limH |
210 | movgt r4, #0x1000 |
211 | str r4, [r0,#4*8] @ gteIR0 |
212 | |
213 | pop {r4-r11,pc} |
214 | .size gteRTPS_nf_arm, .-gteRTPS_nf_arm |
215 | |
216 | |
5c6457c3 |
217 | FUNCTION(gteRTPT_nf_arm): @ r0=CP2 (d,c), |
0c2ca3ba |
218 | ldr r1, [r0, #4*19] @ gteSZ3 |
219 | push {r4-r11,lr} |
220 | str r1, [r0, #4*16] @ gteSZ0 |
221 | mov lr, #0 |
222 | |
223 | rtpt_arm_loop: |
224 | add r1, r0, lr, lsl #1 |
a53073ec |
225 | ldrd r8, r9, [r1] @ VXYZ(v) |
0c2ca3ba |
226 | do_rtpx_mac |
227 | |
228 | ssatx_prep r6, 16 |
229 | usat16_ r2, r12 @ limD |
230 | add r1, r0, #4*25 @ gteMAC1 |
231 | ldr r3, [r0,#4*(32+26)] @ gteH |
232 | stmia r1, {r10-r12} @ gteMAC123 save |
233 | add r1, r0, #4*17 |
234 | ssatx r10,r6, 16 |
235 | ssatx r11,r6, 16 |
236 | ssatx r12,r6, 16 |
237 | str r2, [r1, lr] @ fSZ(v) |
238 | cmp r3, r2, lsl #1 @ gteH < gteSZ3*2 ? |
239 | mov r9, #1<<30 |
240 | bhs 1f |
241 | .if 1 |
7c621bf0 |
242 | udiv r9, r3, r2, r1, r4, r6, r7 |
0c2ca3ba |
243 | .else |
244 | push {r0, r12, lr} |
245 | mov r0, r3 |
246 | mov r1, r2 |
247 | bl DIVIDE |
248 | mov r9, r0 |
249 | pop {r0, r12, lr} |
250 | .endif |
054175e9 |
251 | 1: cmp r9, #0x20000 |
0c2ca3ba |
252 | add r1, r0, #4*12 |
253 | movhs r9, #0x20000 |
a53073ec |
254 | ldrd r6, r7, [r0,#4*(32+24)] @ gteOFXY |
0c2ca3ba |
255 | /* quotient */ subhs r9, #1 |
76720f7f |
256 | mov r2, r6, asr #31 |
0c2ca3ba |
257 | smlal r6, r2, r10, r9 |
76720f7f |
258 | mov r3, r7, asr #31 |
0c2ca3ba |
259 | smlal r7, r3, r11, r9 |
260 | lsr r6, #16 |
261 | orr r6, r2, lsl #16 @ (gteOFX + gteIR1 * q) >> 16 |
262 | ssatx_prep r2, 11 |
263 | lsr r7, #16 |
264 | orr r7, r3, lsl #16 @ (gteOFY + gteIR2 * q) >> 16 |
265 | ssatx r6, r2, 11 @ gteSX(v) |
266 | ssatx r7, r2, 11 @ gteSY(v) |
267 | strh r6, [r1, lr]! |
268 | add lr, #4 |
269 | strh r7, [r1, #2] |
270 | cmp lr, #12 |
271 | blt rtpt_arm_loop |
272 | |
a53073ec |
273 | ldrd r4, r5, [r0, #4*(32+27)] @ gteDQA, gteDQB |
0c2ca3ba |
274 | add r1, r0, #4*9 @ gteIR1 |
275 | mla r3, r4, r9, r5 @ gteDQB + gteDQA * q |
276 | stmia r1, {r10,r11,r12} @ gteIR123 save |
277 | |
278 | str r3, [r0,#4*24] @ gteMAC0 |
279 | asrs r3, #12 |
280 | movmi r3, #0 |
281 | cmp r3, #0x1000 @ limH |
282 | movgt r3, #0x1000 |
283 | str r3, [r0,#4*8] @ gteIR0 |
284 | |
285 | pop {r4-r11,pc} |
286 | .size gteRTPT_nf_arm, .-gteRTPT_nf_arm |
287 | |
59774ed0 |
288 | |
054175e9 |
289 | @ note: not std calling convention used |
290 | @ r0 = CP2 (d,c) (must preserve) |
291 | @ r1 = needs_shift12 |
292 | @ r4,r5 = VXYZ(v) packed |
293 | @ r6 = &MX11(mx) |
294 | @ r7 = &CV1(cv) |
295 | .macro mvma_op do_flags |
296 | push {r8-r11} |
297 | |
298 | .if \do_flags |
299 | ands r3, r1, #1 @ gteFLAG, shift_need |
300 | .else |
301 | tst r1, #1 |
302 | .endif |
303 | ldmia r7, {r7-r9} @ CV123 |
304 | ldmia r6!,{r10-r12} @ MX1*,MX2* |
305 | asr r1, r7, #20 |
306 | lsl r7, #12 @ expand to 64bit |
307 | smlalbb r7, r1, r10, r4 @ MX11 * vx |
308 | smlaltt r7, r1, r10, r4 @ MX12 * vy |
309 | smlalbb r7, r1, r11, r5 @ MX13 * vz |
310 | lsrne r7, #12 |
311 | orrne r7, r1, lsl #20 @ gteMAC0 |
312 | .if \do_flags |
313 | asrne r1, #20 |
314 | adds r2, r7, #0x80000000 |
315 | adcs r1, #0 |
316 | orrgt r3, #(1<<30) |
317 | orrmi r3, #(1<<31)|(1<<27) |
318 | tst r3, #1 @ repeat shift test |
319 | .endif |
320 | asr r1, r8, #20 |
321 | lsl r8, #12 @ expand to 64bit |
322 | smlaltb r8, r1, r11, r4 @ MX21 * vx |
323 | smlalbt r8, r1, r12, r4 @ MX22 * vy |
324 | smlaltb r8, r1, r12, r5 @ MX23 * vz |
325 | lsrne r8, #12 |
326 | orrne r8, r1, lsl #20 @ gteMAC1 |
327 | .if \do_flags |
328 | asrne r1, #20 |
329 | adds r2, r8, #0x80000000 |
330 | adcs r1, #0 |
331 | orrgt r3, #(1<<29) |
332 | orrmi r3, #(1<<31)|(1<<26) |
333 | tst r3, #1 @ repeat shift test |
334 | .endif |
335 | ldmia r6!,{r10-r11} @ MX3* |
336 | asr r1, r9, #20 |
337 | lsl r9, #12 @ expand to 64bit |
338 | smlalbb r9, r1, r10, r4 @ MX31 * vx |
339 | smlaltt r9, r1, r10, r4 @ MX32 * vy |
340 | smlalbb r9, r1, r11, r5 @ MX33 * vz |
341 | lsrne r9, #12 |
342 | orrne r9, r1, lsl #20 @ gteMAC2 |
343 | .if \do_flags |
344 | asrne r1, #20 |
345 | adds r2, r9, #0x80000000 |
346 | adcs r1, #0 |
347 | orrgt r3, #(1<<28) |
348 | orrmi r3, #(1<<31)|(1<<25) |
349 | bic r3, #1 |
350 | .else |
351 | mov r3, #0 |
352 | .endif |
353 | str r3, [r0, #4*(32+31)] @ gteFLAG |
354 | add r1, r0, #4*25 |
355 | stmia r1, {r7-r9} |
356 | |
357 | pop {r8-r11} |
358 | bx lr |
359 | .endm |
360 | |
5c6457c3 |
361 | FUNCTION(gteMVMVA_part_arm): |
054175e9 |
362 | mvma_op 1 |
363 | .size gteMVMVA_part_arm, .-gteMVMVA_part_arm |
364 | |
5c6457c3 |
365 | FUNCTION(gteMVMVA_part_nf_arm): |
054175e9 |
366 | mvma_op 0 |
367 | .size gteMVMVA_part_nf_arm, .-gteMVMVA_part_nf_arm |
368 | |
369 | @ common version of MVMVA with cv3 (== 0) and shift12, |
370 | @ can't overflow so no gteMAC flags needed |
371 | @ note: not std calling convention used |
372 | @ r0 = CP2 (d,c) (must preserve) |
373 | @ r4,r5 = VXYZ(v) packed |
374 | @ r6 = &MX11(mx) |
5c6457c3 |
375 | FUNCTION(gteMVMVA_part_cv3sh12_arm): |
054175e9 |
376 | push {r8-r9} |
377 | ldmia r6!,{r7-r9} @ MX1*,MX2* |
378 | smulbb r1, r7, r4 @ MX11 * vx |
379 | smultt r2, r7, r4 @ MX12 * vy |
380 | smulbb r3, r8, r5 @ MX13 * vz |
381 | qadd r1, r1, r2 |
382 | asr r3, #1 @ prevent oflow, lose a bit |
383 | add r1, r3, r1, asr #1 |
384 | asr r7, r1, #11 |
385 | smultb r1, r8, r4 @ MX21 * vx |
386 | smulbt r2, r9, r4 @ MX22 * vy |
387 | smultb r3, r9, r5 @ MX23 * vz |
388 | qadd r1, r1, r2 |
389 | asr r3, #1 |
390 | add r1, r3, r1, asr #1 |
391 | asr r8, r1, #11 |
392 | ldmia r6, {r6,r9} @ MX3* |
393 | smulbb r1, r6, r4 @ MX31 * vx |
394 | smultt r2, r6, r4 @ MX32 * vy |
395 | smulbb r3, r9, r5 @ MX33 * vz |
396 | qadd r1, r1, r2 |
397 | asr r3, #1 |
398 | add r1, r3, r1, asr #1 |
399 | asr r9, r1, #11 |
400 | add r1, r0, #4*25 |
401 | mov r2, #0 |
402 | stmia r1, {r7-r9} |
403 | str r2, [r0, #4*(32+31)] @ gteFLAG |
404 | pop {r8-r9} |
405 | bx lr |
406 | .size gteMVMVA_part_cv3sh12_arm, .-gteMVMVA_part_cv3sh12_arm |
407 | |
665f33e1 |
408 | #endif /* HAVE_ARMV5 */ |
054175e9 |
409 | |
5c6457c3 |
410 | FUNCTION(gteNCLIP_arm): @ r0=CP2 (d,c), |
59774ed0 |
411 | push {r4-r6,lr} |
7c621bf0 |
412 | ldrsh r4, [r0, #4*12+2] |
413 | ldrsh r5, [r0, #4*13+2] |
414 | ldrsh r6, [r0, #4*14+2] |
415 | ldrsh lr, [r0, #4*12] |
416 | ldrsh r2, [r0, #4*13] |
59774ed0 |
417 | sub r12, r4, r5 @ 3: gteSY0 - gteSY1 |
418 | sub r5, r5, r6 @ 1: gteSY1 - gteSY2 |
7c621bf0 |
419 | smull r1, r5, lr, r5 @ RdLo, RdHi |
59774ed0 |
420 | sub r6, r4 @ 2: gteSY2 - gteSY0 |
7c621bf0 |
421 | ldrsh r3, [r0, #4*14] |
59774ed0 |
422 | smlal r1, r5, r2, r6 |
423 | mov lr, #0 @ gteFLAG |
59774ed0 |
424 | smlal r1, r5, r3, r12 |
425 | mov r6, #1<<31 |
426 | orr r6, #1<<15 |
427 | movs r2, r1, lsl #1 |
428 | adc r5, r5 |
429 | cmp r5, #0 |
665f33e1 |
430 | #ifdef HAVE_ARMV7 |
59774ed0 |
431 | movtgt lr, #((1<<31)|(1<<16))>>16 |
665f33e1 |
432 | #else |
59774ed0 |
433 | movgt lr, #(1<<31) |
434 | orrgt lr, #(1<<16) |
665f33e1 |
435 | #endif |
59774ed0 |
436 | cmn r5, #1 |
59774ed0 |
437 | orrmi lr, r6 |
438 | str r1, [r0, #4*24] |
439 | str lr, [r0, #4*(32+31)] @ gteFLAG |
440 | |
441 | pop {r4-r6,pc} |
442 | .size gteNCLIP_arm, .-gteNCLIP_arm |
443 | |
444 | |
054175e9 |
445 | .macro gteMACtoIR lm |
446 | ldr r2, [r0, #4*25] @ gteMAC1 |
447 | mov r1, #1<<15 |
448 | ldr r12,[r0, #4*(32+31)] @ gteFLAG |
449 | cmp r2, r1 |
450 | subge r2, r1, #1 |
451 | orrge r12, #(1<<31)|(1<<24) |
452 | .if \lm |
453 | cmp r2, #0 |
454 | movlt r2, #0 |
455 | .else |
456 | cmn r2, r1 |
457 | rsblt r2, r1, #0 |
458 | .endif |
459 | str r2, [r0, #4*9] |
665f33e1 |
460 | #ifdef HAVE_ARMV5 |
a53073ec |
461 | ldrd r2, r3, [r0, #4*26] @ gteMAC23 |
665f33e1 |
462 | #else |
463 | ldr r2, [r0, #4*26] |
464 | ldr r3, [r0, #4*27] |
465 | #endif |
054175e9 |
466 | orrlt r12, #(1<<31)|(1<<24) |
467 | cmp r2, r1 |
468 | subge r2, r1, #1 |
469 | orrge r12, #1<<23 |
470 | orrge r12, #1<<31 |
471 | .if \lm |
472 | cmp r2, #0 |
473 | movlt r2, #0 |
474 | .else |
475 | cmn r2, r1 |
476 | rsblt r2, r1, #0 |
477 | .endif |
478 | orrlt r12, #1<<23 |
479 | orrlt r12, #1<<31 |
480 | cmp r3, r1 |
481 | subge r3, r1, #1 |
482 | orrge r12, #1<<22 |
483 | .if \lm |
484 | cmp r3, #0 |
485 | movlt r3, #0 |
486 | .else |
487 | cmn r3, r1 |
488 | rsblt r3, r1, #0 |
489 | .endif |
490 | orrlt r12, #1<<22 |
665f33e1 |
491 | #ifdef HAVE_ARMV5 |
a53073ec |
492 | strd r2, r3, [r0, #4*10] @ gteIR23 |
665f33e1 |
493 | #else |
494 | str r2, [r0, #4*10] |
495 | str r3, [r0, #4*11] |
496 | #endif |
054175e9 |
497 | str r12,[r0, #4*(32+31)] @ gteFLAG |
498 | bx lr |
499 | .endm |
500 | |
5c6457c3 |
501 | FUNCTION(gteMACtoIR_lm0): @ r0=CP2 (d,c) |
054175e9 |
502 | gteMACtoIR 0 |
503 | .size gteMACtoIR_lm0, .-gteMACtoIR_lm0 |
504 | |
5c6457c3 |
505 | FUNCTION(gteMACtoIR_lm1): @ r0=CP2 (d,c) |
054175e9 |
506 | gteMACtoIR 1 |
507 | .size gteMACtoIR_lm1, .-gteMACtoIR_lm1 |
508 | |
509 | |
5c6457c3 |
510 | FUNCTION(gteMACtoIR_lm0_nf): @ r0=CP2 (d,c) |
054175e9 |
511 | add r12, r0, #4*25 |
512 | ldmia r12, {r1-r3} |
513 | ssatx_prep r12, 16 |
514 | ssatx r1, r12, 16 |
515 | ssatx r2, r12, 16 |
516 | ssatx r3, r12, 16 |
517 | add r12, r0, #4*9 |
518 | stmia r12, {r1-r3} |
519 | bx lr |
520 | .size gteMACtoIR_lm0_nf, .-gteMACtoIR_lm0_nf |
521 | |
522 | |
5c6457c3 |
523 | FUNCTION(gteMACtoIR_lm1_nf): @ r0=CP2 (d,c) |
054175e9 |
524 | add r12, r0, #4*25 |
525 | ldmia r12, {r1-r3} |
526 | ssatx0_prep r12, 16 |
527 | ssatx0 r1, r12, 16 |
528 | ssatx0 r2, r12, 16 |
529 | ssatx0 r3, r12, 16 |
530 | add r12, r0, #4*9 |
531 | stmia r12, {r1-r3} |
532 | bx lr |
533 | .size gteMACtoIR_lm1_nf, .-gteMACtoIR_lm1_nf |
534 | |
535 | |
536 | .if 0 |
5c6457c3 |
537 | FUNCTION(gteMVMVA_test): |
054175e9 |
538 | push {r4-r7,lr} |
539 | push {r1} |
540 | and r2, r1, #0x18000 @ v |
541 | cmp r2, #0x18000 @ v == 3? |
542 | addeq r4, r0, #4*9 |
543 | addne r3, r0, r2, lsr #12 |
544 | ldmeqia r4, {r3-r5} |
545 | ldmneia r3, {r4,r5} |
546 | lsleq r3, #16 |
547 | lsreq r3, #16 |
548 | orreq r4, r3, r4, lsl #16 @ r4,r5 = VXYZ(v) |
549 | @and r5, #0xffff |
550 | add r12, r0, #4*32 |
551 | and r3, r1, #0x60000 @ mx |
552 | lsr r3, #17 |
553 | add r6, r12, r3, lsl #5 |
554 | cmp r3, #3 |
555 | adreq r6, zeroes |
556 | and r2, r1, #0x06000 @ cv |
557 | lsr r2, #13 |
558 | add r7, r12, r2, lsl #5 |
559 | add r7, #4*5 |
560 | cmp r2, #3 |
561 | adreq r7, zeroes |
562 | .if 1 |
563 | adr lr, 1f |
564 | bne 0f |
565 | tst r1, #1<<19 |
566 | bne gteMVMVA_part_cv3sh12_arm |
567 | 0: |
568 | and r1, #1<<19 |
569 | lsr r1, #19 |
570 | b gteMVMVA_part_arm |
571 | 1: |
572 | pop {r1} |
573 | tst r1, #1<<10 |
574 | adr lr, 0f |
575 | beq gteMACtoIR_lm0 |
576 | bne gteMACtoIR_lm1 |
577 | 0: |
578 | .else |
579 | bl gteMVMVA_part_neon |
580 | pop {r1} |
581 | and r1, #1<<10 |
582 | bl gteMACtoIR_flags_neon |
583 | .endif |
584 | pop {r4-r7,pc} |
585 | |
586 | zeroes: |
587 | .word 0,0,0,0,0 |
588 | .endif |
589 | |
590 | |
59774ed0 |
591 | @ vim:filetype=armasm |
592 | |