59774ed0 |
1 | /* |
2 | * (C) GraÅžvydas "notaz" Ignotas, 2011 |
3 | * |
0c2ca3ba |
4 | * This work is licensed under the terms of GNU GPL version 2 or later. |
59774ed0 |
5 | * See the COPYING file in the top-level directory. |
6 | */ |
7 | |
8 | /* .equiv HAVE_ARMV7, 1 */ |
9 | |
10 | .text |
11 | .align 2 |
12 | |
0c2ca3ba |
13 | .macro sgnxt16 rd |
59774ed0 |
14 | .if HAVE_ARMV7 |
0c2ca3ba |
15 | sxth \rd, \rd |
59774ed0 |
16 | .else |
0c2ca3ba |
17 | lsl \rd, \rd, #16 |
18 | asr \rd, \rd, #16 |
59774ed0 |
19 | .endif |
20 | .endm |
21 | |
0c2ca3ba |
22 | @ prepare work reg for ssatx |
23 | @ in: wr reg, bit to saturate to |
24 | .macro ssatx_prep wr bit |
25 | .if !HAVE_ARMV7 |
26 | mov \wr, #(1<<(\bit-1)) |
27 | .endif |
28 | .endm |
29 | |
30 | .macro ssatx rd wr bit |
31 | .if HAVE_ARMV7 |
32 | ssat \rd, #\bit, \rd |
33 | .else |
34 | cmp \rd, \wr |
35 | subge \rd, \wr, #1 |
36 | cmn \rd, \wr |
37 | rsblt \rd, \wr, #0 |
38 | .endif |
39 | .endm |
40 | |
054175e9 |
41 | @ prepare work reg for ssatx0 (sat to 0..2^(bit-1)) |
42 | @ in: wr reg, bit to saturate to |
43 | .macro ssatx0_prep wr bit |
44 | mov \wr, #(1<<(\bit-1)) |
45 | .endm |
46 | |
47 | .macro ssatx0 rd wr bit |
48 | cmp \rd, \wr |
49 | subge \rd, \wr, #1 |
50 | cmn \rd, #0 |
51 | movlt \rd, #0 |
52 | .endm |
53 | |
0c2ca3ba |
54 | .macro usat16_ rd rs |
55 | .if HAVE_ARMV7 |
56 | usat \rd, #16, \rs |
57 | .else |
58 | subs \rd, \rs, #0 |
59 | movlt \rd, #0 |
60 | cmp \rd, #0x10000 |
61 | movge \rd, #0x0ff00 |
62 | orrge \rd, #0x000ff |
63 | .endif |
64 | .endm |
65 | |
66 | @ unsigned divide rd = rm / rs |
67 | @ no div by 0 check |
68 | @ in: rm, rs |
69 | @ trash: rm rs |
70 | .macro udiv rd rm rs |
71 | clz \rd, \rs |
72 | lsl \rs, \rs, \rd @ shift up divisor |
73 | orr \rd, \rd, #1<<31 |
74 | lsr \rd, \rd, \rd |
75 | 0: |
76 | cmp \rm, \rs |
77 | subcs \rm, \rs |
78 | adcs \rd, \rd, \rd |
79 | lsr \rs, #1 |
80 | bcc 0b |
81 | .endm |
82 | |
83 | |
84 | @ calculate RTPS/RTPT MAC values |
85 | @ in: r0 context, r8,r9 VXYZ |
86 | @ out: r10-r12 MAC123 |
87 | @ trash: r1-r7 |
88 | .macro do_rtpx_mac |
89 | add r1, r0, #4*32 |
90 | add r2, r0, #4*(32+5) @ gteTRX |
91 | ldmia r1!,{r5-r7} @ gteR1*,gteR2* |
92 | ldmia r2, {r10-r12} |
93 | smulbb r2, r5, r8 @ gteR11 * gteVX0 |
94 | smultt r3, r5, r8 @ gteR12 * gteVY0 |
95 | smulbb r4, r6, r9 @ gteR13 * gteVZ0 |
96 | qadd r2, r2, r3 |
97 | asr r4, r4, #1 @ prevent oflow, lose a bit |
98 | add r3, r4, r2, asr #1 |
99 | add r10,r10,r3, asr #11 @ gteMAC1 |
100 | smultb r2, r6, r8 @ gteR21 * gteVX0 |
101 | smulbt r3, r7, r8 @ gteR22 * gteVY0 |
102 | smultb r4, r7, r9 @ gteR23 * gteVZ0 |
103 | ldmia r1!,{r5-r6} @ gteR3* |
104 | qadd r2, r2, r3 |
105 | asr r4, r4, #1 |
106 | add r3, r4, r2, asr #1 |
107 | add r11,r11,r3, asr #11 @ gteMAC2 |
108 | @ be more accurate for gteMAC3, since it's also a divider |
109 | smulbb r2, r5, r8 @ gteR31 * gteVX0 |
110 | smultt r3, r5, r8 @ gteR32 * gteVY0 |
111 | smulbb r4, r6, r9 @ gteR33 * gteVZ0 |
112 | qadd r2, r2, r3 |
113 | asr r3, r4, #31 @ expand to 64bit |
114 | adds r1, r2, r4 |
115 | adc r3, r2, asr #31 @ 64bit sum in r3,r1 |
116 | add r12,r12,r3, lsl #20 |
117 | add r12,r12,r1, lsr #12 @ gteMAC3 |
118 | .endm |
119 | |
120 | |
121 | .global gteRTPS_nf_arm @ r0=CP2 (d,c), |
122 | gteRTPS_nf_arm: |
123 | push {r4-r11,lr} |
124 | |
125 | ldmia r0, {r8,r9} @ VXYZ(0) |
126 | do_rtpx_mac |
127 | add r1, r0, #4*25 @ gteMAC1 |
128 | add r2, r0, #4*17 @ gteSZ1 |
129 | stmia r1, {r10-r12} @ gteMAC123 save |
130 | ldmia r2, {r3-r5} |
131 | add r1, r0, #4*16 @ gteSZ0 |
132 | add r2, r0, #4*9 @ gteIR1 |
133 | ssatx_prep r6, 16 |
134 | usat16_ lr, r12 @ limD |
135 | ssatx r10,r6, 16 |
136 | ssatx r11,r6, 16 |
137 | ssatx r12,r6, 16 |
138 | stmia r1, {r3-r5,lr} @ gteSZ* |
139 | ldr r3, [r0,#4*(32+26)] @ gteH |
140 | stmia r2, {r10,r11,r12} @ gteIR123 save |
141 | cmp r3, lr, lsl #1 @ gteH < gteSZ3*2 ? |
142 | mov r9, #1<<30 |
143 | bhs 1f |
144 | .if 1 |
145 | lsl r3, #16 |
146 | udiv r9, r3, lr |
147 | .else |
148 | push {r0, r12} |
149 | mov r0, r3 |
150 | mov r1, lr |
151 | bl DIVIDE |
152 | mov r9, r0 |
153 | pop {r0, r12} |
154 | .endif |
155 | 1: |
156 | ldrd r6, [r0,#4*(32+24)] @ gteOFXY |
157 | cmp r9, #0x20000 |
158 | add r1, r0, #4*12 @ gteSXY0 |
159 | movhs r9, #0x20000 |
160 | ldmia r1, {r2-r4} |
161 | /* quotient */ subhs r9, #1 |
162 | mov r2, #0 |
163 | smlal r6, r2, r10, r9 |
164 | stmia r1!,{r3,r4} @ shift gteSXY |
165 | mov r3, #0 |
166 | smlal r7, r3, r11, r9 |
167 | lsr r6, #16 |
168 | /* gteDQA, gteDQB */ ldrd r10,[r0, #4*(32+27)] |
169 | orr r6, r2, lsl #16 @ (gteOFX + gteIR1 * q) >> 16 |
170 | ssatx_prep r2, 11 |
171 | lsr r7, #16 |
172 | /* gteDQB + gteDQA * q */ mla r4, r10, r9, r11 |
173 | orr r7, r3, lsl #16 @ (gteOFY + gteIR2 * q) >> 16 |
174 | ssatx r6, r2, 11 @ gteSX2 |
175 | ssatx r7, r2, 11 @ gteSY2 |
176 | strh r6, [r1] |
177 | strh r7, [r1, #2] |
178 | str r4, [r0,#4*24] @ gteMAC0 |
179 | asrs r4, #12 |
180 | movmi r4, #0 |
181 | cmp r4, #0x1000 @ limH |
182 | movgt r4, #0x1000 |
183 | str r4, [r0,#4*8] @ gteIR0 |
184 | |
185 | pop {r4-r11,pc} |
186 | .size gteRTPS_nf_arm, .-gteRTPS_nf_arm |
187 | |
188 | |
189 | .global gteRTPT_nf_arm @ r0=CP2 (d,c), |
190 | gteRTPT_nf_arm: |
191 | ldr r1, [r0, #4*19] @ gteSZ3 |
192 | push {r4-r11,lr} |
193 | str r1, [r0, #4*16] @ gteSZ0 |
194 | mov lr, #0 |
195 | |
196 | rtpt_arm_loop: |
197 | add r1, r0, lr, lsl #1 |
198 | ldrd r8, [r1] @ VXYZ(v) |
199 | do_rtpx_mac |
200 | |
201 | ssatx_prep r6, 16 |
202 | usat16_ r2, r12 @ limD |
203 | add r1, r0, #4*25 @ gteMAC1 |
204 | ldr r3, [r0,#4*(32+26)] @ gteH |
205 | stmia r1, {r10-r12} @ gteMAC123 save |
206 | add r1, r0, #4*17 |
207 | ssatx r10,r6, 16 |
208 | ssatx r11,r6, 16 |
209 | ssatx r12,r6, 16 |
210 | str r2, [r1, lr] @ fSZ(v) |
211 | cmp r3, r2, lsl #1 @ gteH < gteSZ3*2 ? |
212 | mov r9, #1<<30 |
213 | bhs 1f |
214 | .if 1 |
215 | lsl r3, #16 |
216 | udiv r9, r3, r2 |
217 | .else |
218 | push {r0, r12, lr} |
219 | mov r0, r3 |
220 | mov r1, r2 |
221 | bl DIVIDE |
222 | mov r9, r0 |
223 | pop {r0, r12, lr} |
224 | .endif |
054175e9 |
225 | 1: cmp r9, #0x20000 |
0c2ca3ba |
226 | add r1, r0, #4*12 |
227 | movhs r9, #0x20000 |
228 | ldrd r6, [r0,#4*(32+24)] @ gteOFXY |
229 | /* quotient */ subhs r9, #1 |
230 | mov r2, #0 |
231 | smlal r6, r2, r10, r9 |
232 | mov r3, #0 |
233 | smlal r7, r3, r11, r9 |
234 | lsr r6, #16 |
235 | orr r6, r2, lsl #16 @ (gteOFX + gteIR1 * q) >> 16 |
236 | ssatx_prep r2, 11 |
237 | lsr r7, #16 |
238 | orr r7, r3, lsl #16 @ (gteOFY + gteIR2 * q) >> 16 |
239 | ssatx r6, r2, 11 @ gteSX(v) |
240 | ssatx r7, r2, 11 @ gteSY(v) |
241 | strh r6, [r1, lr]! |
242 | add lr, #4 |
243 | strh r7, [r1, #2] |
244 | cmp lr, #12 |
245 | blt rtpt_arm_loop |
246 | |
247 | ldrd r4, [r0, #4*(32+27)] @ gteDQA, gteDQB |
248 | add r1, r0, #4*9 @ gteIR1 |
249 | mla r3, r4, r9, r5 @ gteDQB + gteDQA * q |
250 | stmia r1, {r10,r11,r12} @ gteIR123 save |
251 | |
252 | str r3, [r0,#4*24] @ gteMAC0 |
253 | asrs r3, #12 |
254 | movmi r3, #0 |
255 | cmp r3, #0x1000 @ limH |
256 | movgt r3, #0x1000 |
257 | str r3, [r0,#4*8] @ gteIR0 |
258 | |
259 | pop {r4-r11,pc} |
260 | .size gteRTPT_nf_arm, .-gteRTPT_nf_arm |
261 | |
59774ed0 |
262 | |
054175e9 |
263 | @ note: not std calling convention used |
264 | @ r0 = CP2 (d,c) (must preserve) |
265 | @ r1 = needs_shift12 |
266 | @ r4,r5 = VXYZ(v) packed |
267 | @ r6 = &MX11(mx) |
268 | @ r7 = &CV1(cv) |
269 | .macro mvma_op do_flags |
270 | push {r8-r11} |
271 | |
272 | .if \do_flags |
273 | ands r3, r1, #1 @ gteFLAG, shift_need |
274 | .else |
275 | tst r1, #1 |
276 | .endif |
277 | ldmia r7, {r7-r9} @ CV123 |
278 | ldmia r6!,{r10-r12} @ MX1*,MX2* |
279 | asr r1, r7, #20 |
280 | lsl r7, #12 @ expand to 64bit |
281 | smlalbb r7, r1, r10, r4 @ MX11 * vx |
282 | smlaltt r7, r1, r10, r4 @ MX12 * vy |
283 | smlalbb r7, r1, r11, r5 @ MX13 * vz |
284 | lsrne r7, #12 |
285 | orrne r7, r1, lsl #20 @ gteMAC0 |
286 | .if \do_flags |
287 | asrne r1, #20 |
288 | adds r2, r7, #0x80000000 |
289 | adcs r1, #0 |
290 | orrgt r3, #(1<<30) |
291 | orrmi r3, #(1<<31)|(1<<27) |
292 | tst r3, #1 @ repeat shift test |
293 | .endif |
294 | asr r1, r8, #20 |
295 | lsl r8, #12 @ expand to 64bit |
296 | smlaltb r8, r1, r11, r4 @ MX21 * vx |
297 | smlalbt r8, r1, r12, r4 @ MX22 * vy |
298 | smlaltb r8, r1, r12, r5 @ MX23 * vz |
299 | lsrne r8, #12 |
300 | orrne r8, r1, lsl #20 @ gteMAC1 |
301 | .if \do_flags |
302 | asrne r1, #20 |
303 | adds r2, r8, #0x80000000 |
304 | adcs r1, #0 |
305 | orrgt r3, #(1<<29) |
306 | orrmi r3, #(1<<31)|(1<<26) |
307 | tst r3, #1 @ repeat shift test |
308 | .endif |
309 | ldmia r6!,{r10-r11} @ MX3* |
310 | asr r1, r9, #20 |
311 | lsl r9, #12 @ expand to 64bit |
312 | smlalbb r9, r1, r10, r4 @ MX31 * vx |
313 | smlaltt r9, r1, r10, r4 @ MX32 * vy |
314 | smlalbb r9, r1, r11, r5 @ MX33 * vz |
315 | lsrne r9, #12 |
316 | orrne r9, r1, lsl #20 @ gteMAC2 |
317 | .if \do_flags |
318 | asrne r1, #20 |
319 | adds r2, r9, #0x80000000 |
320 | adcs r1, #0 |
321 | orrgt r3, #(1<<28) |
322 | orrmi r3, #(1<<31)|(1<<25) |
323 | bic r3, #1 |
324 | .else |
325 | mov r3, #0 |
326 | .endif |
327 | str r3, [r0, #4*(32+31)] @ gteFLAG |
328 | add r1, r0, #4*25 |
329 | stmia r1, {r7-r9} |
330 | |
331 | pop {r8-r11} |
332 | bx lr |
333 | .endm |
334 | |
335 | .global gteMVMVA_part_arm |
336 | gteMVMVA_part_arm: |
337 | mvma_op 1 |
338 | .size gteMVMVA_part_arm, .-gteMVMVA_part_arm |
339 | |
340 | .global gteMVMVA_part_nf_arm |
341 | gteMVMVA_part_nf_arm: |
342 | mvma_op 0 |
343 | .size gteMVMVA_part_nf_arm, .-gteMVMVA_part_nf_arm |
344 | |
345 | @ common version of MVMVA with cv3 (== 0) and shift12, |
346 | @ can't overflow so no gteMAC flags needed |
347 | @ note: not std calling convention used |
348 | @ r0 = CP2 (d,c) (must preserve) |
349 | @ r4,r5 = VXYZ(v) packed |
350 | @ r6 = &MX11(mx) |
351 | .global gteMVMVA_part_cv3sh12_arm |
352 | gteMVMVA_part_cv3sh12_arm: |
353 | push {r8-r9} |
354 | ldmia r6!,{r7-r9} @ MX1*,MX2* |
355 | smulbb r1, r7, r4 @ MX11 * vx |
356 | smultt r2, r7, r4 @ MX12 * vy |
357 | smulbb r3, r8, r5 @ MX13 * vz |
358 | qadd r1, r1, r2 |
359 | asr r3, #1 @ prevent oflow, lose a bit |
360 | add r1, r3, r1, asr #1 |
361 | asr r7, r1, #11 |
362 | smultb r1, r8, r4 @ MX21 * vx |
363 | smulbt r2, r9, r4 @ MX22 * vy |
364 | smultb r3, r9, r5 @ MX23 * vz |
365 | qadd r1, r1, r2 |
366 | asr r3, #1 |
367 | add r1, r3, r1, asr #1 |
368 | asr r8, r1, #11 |
369 | ldmia r6, {r6,r9} @ MX3* |
370 | smulbb r1, r6, r4 @ MX31 * vx |
371 | smultt r2, r6, r4 @ MX32 * vy |
372 | smulbb r3, r9, r5 @ MX33 * vz |
373 | qadd r1, r1, r2 |
374 | asr r3, #1 |
375 | add r1, r3, r1, asr #1 |
376 | asr r9, r1, #11 |
377 | add r1, r0, #4*25 |
378 | mov r2, #0 |
379 | stmia r1, {r7-r9} |
380 | str r2, [r0, #4*(32+31)] @ gteFLAG |
381 | pop {r8-r9} |
382 | bx lr |
383 | .size gteMVMVA_part_cv3sh12_arm, .-gteMVMVA_part_cv3sh12_arm |
384 | |
385 | |
59774ed0 |
386 | .global gteNCLIP_arm @ r0=CP2 (d,c), |
387 | gteNCLIP_arm: |
388 | push {r4-r6,lr} |
389 | |
390 | add r1, r0, #4*12 |
391 | ldmia r1, {r1-r3} |
392 | mov r4, r1, asr #16 |
393 | mov r5, r2, asr #16 |
394 | mov r6, r3, asr #16 |
395 | sub r12, r4, r5 @ 3: gteSY0 - gteSY1 |
396 | sub r5, r5, r6 @ 1: gteSY1 - gteSY2 |
397 | sgnxt16 r1 |
398 | smull r1, r5, r1, r5 @ RdLo, RdHi |
399 | sub r6, r4 @ 2: gteSY2 - gteSY0 |
400 | sgnxt16 r2 |
401 | smlal r1, r5, r2, r6 |
402 | mov lr, #0 @ gteFLAG |
403 | sgnxt16 r3 |
404 | smlal r1, r5, r3, r12 |
405 | mov r6, #1<<31 |
406 | orr r6, #1<<15 |
407 | movs r2, r1, lsl #1 |
408 | adc r5, r5 |
409 | cmp r5, #0 |
410 | .if HAVE_ARMV7 |
411 | movtgt lr, #((1<<31)|(1<<16))>>16 |
412 | .else |
413 | movgt lr, #(1<<31) |
414 | orrgt lr, #(1<<16) |
415 | .endif |
416 | mvngt r1, #1<<31 @ maxint |
417 | cmn r5, #1 |
418 | movmi r1, #1<<31 @ minint |
419 | orrmi lr, r6 |
420 | str r1, [r0, #4*24] |
421 | str lr, [r0, #4*(32+31)] @ gteFLAG |
422 | |
423 | pop {r4-r6,pc} |
424 | .size gteNCLIP_arm, .-gteNCLIP_arm |
425 | |
426 | |
054175e9 |
427 | .macro gteMACtoIR lm |
428 | ldr r2, [r0, #4*25] @ gteMAC1 |
429 | mov r1, #1<<15 |
430 | ldr r12,[r0, #4*(32+31)] @ gteFLAG |
431 | cmp r2, r1 |
432 | subge r2, r1, #1 |
433 | orrge r12, #(1<<31)|(1<<24) |
434 | .if \lm |
435 | cmp r2, #0 |
436 | movlt r2, #0 |
437 | .else |
438 | cmn r2, r1 |
439 | rsblt r2, r1, #0 |
440 | .endif |
441 | str r2, [r0, #4*9] |
442 | ldrd r2, [r0, #4*26] @ gteMAC23 |
443 | orrlt r12, #(1<<31)|(1<<24) |
444 | cmp r2, r1 |
445 | subge r2, r1, #1 |
446 | orrge r12, #1<<23 |
447 | orrge r12, #1<<31 |
448 | .if \lm |
449 | cmp r2, #0 |
450 | movlt r2, #0 |
451 | .else |
452 | cmn r2, r1 |
453 | rsblt r2, r1, #0 |
454 | .endif |
455 | orrlt r12, #1<<23 |
456 | orrlt r12, #1<<31 |
457 | cmp r3, r1 |
458 | subge r3, r1, #1 |
459 | orrge r12, #1<<22 |
460 | .if \lm |
461 | cmp r3, #0 |
462 | movlt r3, #0 |
463 | .else |
464 | cmn r3, r1 |
465 | rsblt r3, r1, #0 |
466 | .endif |
467 | orrlt r12, #1<<22 |
468 | strd r2, [r0, #4*10] @ gteIR23 |
469 | str r12,[r0, #4*(32+31)] @ gteFLAG |
470 | bx lr |
471 | .endm |
472 | |
473 | .global gteMACtoIR_lm0 @ r0=CP2 (d,c) |
474 | gteMACtoIR_lm0: |
475 | gteMACtoIR 0 |
476 | .size gteMACtoIR_lm0, .-gteMACtoIR_lm0 |
477 | |
478 | .global gteMACtoIR_lm1 @ r0=CP2 (d,c) |
479 | gteMACtoIR_lm1: |
480 | gteMACtoIR 1 |
481 | .size gteMACtoIR_lm1, .-gteMACtoIR_lm1 |
482 | |
483 | |
484 | .global gteMACtoIR_lm0_nf @ r0=CP2 (d,c) |
485 | gteMACtoIR_lm0_nf: |
486 | add r12, r0, #4*25 |
487 | ldmia r12, {r1-r3} |
488 | ssatx_prep r12, 16 |
489 | ssatx r1, r12, 16 |
490 | ssatx r2, r12, 16 |
491 | ssatx r3, r12, 16 |
492 | add r12, r0, #4*9 |
493 | stmia r12, {r1-r3} |
494 | bx lr |
495 | .size gteMACtoIR_lm0_nf, .-gteMACtoIR_lm0_nf |
496 | |
497 | |
498 | .global gteMACtoIR_lm1_nf @ r0=CP2 (d,c) |
499 | gteMACtoIR_lm1_nf: |
500 | add r12, r0, #4*25 |
501 | ldmia r12, {r1-r3} |
502 | ssatx0_prep r12, 16 |
503 | ssatx0 r1, r12, 16 |
504 | ssatx0 r2, r12, 16 |
505 | ssatx0 r3, r12, 16 |
506 | add r12, r0, #4*9 |
507 | stmia r12, {r1-r3} |
508 | bx lr |
509 | .size gteMACtoIR_lm1_nf, .-gteMACtoIR_lm1_nf |
510 | |
511 | |
512 | .if 0 |
513 | .global gteMVMVA_test |
514 | gteMVMVA_test: |
515 | push {r4-r7,lr} |
516 | push {r1} |
517 | and r2, r1, #0x18000 @ v |
518 | cmp r2, #0x18000 @ v == 3? |
519 | addeq r4, r0, #4*9 |
520 | addne r3, r0, r2, lsr #12 |
521 | ldmeqia r4, {r3-r5} |
522 | ldmneia r3, {r4,r5} |
523 | lsleq r3, #16 |
524 | lsreq r3, #16 |
525 | orreq r4, r3, r4, lsl #16 @ r4,r5 = VXYZ(v) |
526 | @and r5, #0xffff |
527 | add r12, r0, #4*32 |
528 | and r3, r1, #0x60000 @ mx |
529 | lsr r3, #17 |
530 | add r6, r12, r3, lsl #5 |
531 | cmp r3, #3 |
532 | adreq r6, zeroes |
533 | and r2, r1, #0x06000 @ cv |
534 | lsr r2, #13 |
535 | add r7, r12, r2, lsl #5 |
536 | add r7, #4*5 |
537 | cmp r2, #3 |
538 | adreq r7, zeroes |
539 | .if 1 |
540 | adr lr, 1f |
541 | bne 0f |
542 | tst r1, #1<<19 |
543 | bne gteMVMVA_part_cv3sh12_arm |
544 | 0: |
545 | and r1, #1<<19 |
546 | lsr r1, #19 |
547 | b gteMVMVA_part_arm |
548 | 1: |
549 | pop {r1} |
550 | tst r1, #1<<10 |
551 | adr lr, 0f |
552 | beq gteMACtoIR_lm0 |
553 | bne gteMACtoIR_lm1 |
554 | 0: |
555 | .else |
556 | bl gteMVMVA_part_neon |
557 | pop {r1} |
558 | and r1, #1<<10 |
559 | bl gteMACtoIR_flags_neon |
560 | .endif |
561 | pop {r4-r7,pc} |
562 | |
563 | zeroes: |
564 | .word 0,0,0,0,0 |
565 | .endif |
566 | |
567 | |
59774ed0 |
568 | @ vim:filetype=armasm |
569 | |