supporting caanoo, line doublers, refactoring
[picodrive.git] / pico / carthw / svp / stub_arm.S
... / ...
CommitLineData
1@ vim:filetype=armasm
2
3@ Compiler helper functions and some SVP HLE code
4
5@ (c) Copyright 2008, Grazvydas "notaz" Ignotas
6@ Free for non-commercial use.
7
8.global ssp_drc_entry
9.global ssp_drc_next
10.global ssp_drc_next_patch
11.global ssp_drc_end
12.global ssp_hle_800
13.global ssp_hle_902
14.global ssp_hle_07_030
15.global ssp_hle_07_036
16.global ssp_hle_07_6d6
17.global ssp_hle_11_12c
18.global ssp_hle_11_384
19.global ssp_hle_11_38a
20
21.text
22.align 2
23
24@ SSP_GR0, SSP_X, SSP_Y, SSP_A,
25@ SSP_ST, SSP_STACK, SSP_PC, SSP_P,
26@ SSP_PM0, SSP_PM1, SSP_PM2, SSP_XST,
27@ SSP_PM4, SSP_gr13, SSP_PMC, SSP_AL
28
29@ register map:
30@ r4: XXYY
31@ r5: A
32@ r6: STACK and emu flags: sss0 * .uu. .lll NZCV (NZCV is PSR bits from ARM)
33@ r7: SSP context
34@ r8: r0-r2 (.210)
35@ r9: r4-r6 (.654)
36@ r10: P
37@ r11: cycles
38@ r12: tmp
39
40
41#define SSP_OFFS_GR 0x400
42#define SSP_PC 6
43#define SSP_P 7
44#define SSP_PM0 8
45#define SSP_PMC 14
46#define SSP_OFFS_PM_WRITE 0x46c // pmac_write[]
47#define SSP_OFFS_EMUSTAT 0x484 // emu_status
48#define SSP_OFFS_IRAM_ROM 0x48c // ptr_iram_rom
49#define SSP_OFFS_DRAM 0x490 // ptr_dram
50#define SSP_OFFS_IRAM_DIRTY 0x494
51#define SSP_OFFS_IRAM_CTX 0x498 // iram_context
52#define SSP_OFFS_BLTAB 0x49c // block_table
53#define SSP_OFFS_BLTAB_IRAM 0x4a0
54#define SSP_OFFS_TMP0 0x4a4 // for entry PC
55#define SSP_OFFS_TMP1 0x4a8
56#define SSP_OFFS_TMP2 0x4ac
57#define SSP_WAIT_PM0 0x2000
58
59
60.macro ssp_drc_do_next patch_jump=0
61.if \patch_jump
62 str lr, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
63.endif
64 mov r0, r0, lsl #16
65 mov r0, r0, lsr #16
66 str r0, [r7, #SSP_OFFS_TMP0]
67 cmp r0, #0x400
68 blt 0f @ ssp_de_iram
69
70 ldr r2, [r7, #SSP_OFFS_BLTAB]
71 ldr r2, [r2, r0, lsl #2]
72 tst r2, r2
73.if \patch_jump
74 bne ssp_drc_do_patch
75.else
76 bxne r2
77.endif
78 bl ssp_translate_block
79 mov r2, r0
80 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
81 ldr r1, [r7, #SSP_OFFS_BLTAB]
82 str r2, [r1, r0, lsl #2]
83.if \patch_jump
84 b ssp_drc_do_patch
85.else
86 bx r2
87.endif
88
890: @ ssp_de_iram:
90 ldr r1, [r7, #SSP_OFFS_IRAM_DIRTY]
91 tst r1, r1
92 ldreq r1, [r7, #SSP_OFFS_IRAM_CTX]
93 beq 1f @ ssp_de_iram_ctx
94
95 bl ssp_get_iram_context
96 mov r1, #0
97 str r1, [r7, #SSP_OFFS_IRAM_DIRTY]
98 mov r1, r0
99 str r1, [r7, #SSP_OFFS_IRAM_CTX]
100 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
101
1021: @ ssp_de_iram_ctx:
103 ldr r2, [r7, #SSP_OFFS_BLTAB_IRAM]
104 add r2, r2, r1, lsl #12 @ block_tab_iram + iram_context * 0x800/2*4
105 add r1, r2, r0, lsl #2
106 ldr r2, [r1]
107 tst r2, r2
108.if \patch_jump
109 bne ssp_drc_do_patch
110.else
111 bxne r2
112.endif
113 str r1, [r7, #SSP_OFFS_TMP1]
114 bl ssp_translate_block
115 mov r2, r0
116 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
117 ldr r1, [r7, #SSP_OFFS_TMP1] @ &block_table_iram[iram_context][rPC]
118 str r2, [r1]
119.if \patch_jump
120 b ssp_drc_do_patch
121.else
122 bx r2
123.endif
124.endm @ ssp_drc_do_next
125
126
127ssp_drc_entry:
128 stmfd sp!, {r4-r11, lr}
129 mov r11, r0
130ssp_regfile_load:
131 ldr r7, =ssp
132 ldr r7, [r7]
133 add r2, r7, #0x400
134 add r2, r2, #4
135 ldmia r2, {r3,r4,r5,r6,r8}
136 mov r3, r3, lsr #16
137 mov r3, r3, lsl #16
138 orr r4, r3, r4, lsr #16 @ XXYY
139
140 and r8, r8, #0x0f0000
141 mov r8, r8, lsl #13 @ sss0 *
142 and r9, r6, #0x670000
143 tst r6, #0x80000000
144 orrne r8, r8, #0x8
145 tst r6, #0x20000000
146 orrne r8, r8, #0x4 @ sss0 * NZ..
147 orr r6, r8, r9, lsr #12 @ sss0 * .uu. .lll NZ..
148
149 ldr r8, [r7, #0x440] @ r0-r2
150 ldr r9, [r7, #0x444] @ r4-r6
151 ldr r10,[r7, #(0x400+SSP_P*4)] @ P
152
153 ldr r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
154 mov r0, r0, lsr #16
155
156
157ssp_drc_next:
158 ssp_drc_do_next 0
159
160
161ssp_drc_next_patch:
162 ssp_drc_do_next 1
163
164ssp_drc_do_patch:
165 ldr r1, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
166 subs r12,r2, r1
167 moveq r3, #0xe1000000
168 orreq r3, r3, #0x00a00000 @ nop
169 streq r3, [r1, #-4]
170 beq ssp_drc_dp_end
171
172 cmp r12,#4
173 ldreq r3, [r1]
174 addeq r3, r3, #1
175 streq r3, [r1, #-4] @ move the other cond up
176 moveq r3, #0xe1000000
177 orreq r3, r3, #0x00a00000
178 streq r3, [r1] @ fill it's place with nop
179 beq ssp_drc_dp_end
180
181 ldr r3, [r1, #-4]
182 sub r12,r12,#4
183 mov r3, r3, lsr #24
184 bic r3, r3, #1 @ L bit
185 orr r3, r3, r12,lsl #6
186 mov r3, r3, ror #8 @ patched branch instruction
187 str r3, [r1, #-4] @ patch the bl/b to jump directly to another handler
188
189ssp_drc_dp_end:
190 str r2, [r7, #SSP_OFFS_TMP1]
191 sub r0, r1, #4
192 add r1, r1, #4
193 bl cache_flush_d_inval_i
194 ldr r2, [r7, #SSP_OFFS_TMP1]
195 ldr r0, [r7, #SSP_OFFS_TMP0]
196 bx r2
197
198
199ssp_drc_end:
200 mov r0, r0, lsl #16
201 str r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
202
203ssp_regfile_store:
204 str r10,[r7, #(0x400+SSP_P*4)] @ P
205 str r8, [r7, #0x440] @ r0-r2
206 str r9, [r7, #0x444] @ r4-r6
207
208 mov r9, r6, lsr #13
209 and r9, r9, #(7<<16) @ STACK
210 mov r3, r6, lsl #28
211 msr cpsr_flg, r3 @ to to ARM PSR
212 and r6, r6, #0x670
213 mov r6, r6, lsl #12
214 orrmi r6, r6, #0x80000000 @ N
215 orreq r6, r6, #0x20000000 @ Z
216
217 mov r3, r4, lsl #16 @ Y
218 mov r2, r4, lsr #16
219 mov r2, r2, lsl #16 @ X
220 add r8, r7, #0x400
221 add r8, r8, #4
222 stmia r8, {r2,r3,r5,r6,r9}
223
224 mov r0, r11
225 ldmfd sp!, {r4-r11, lr}
226 bx lr
227
228
229
230@ ld A, PM0
231@ andi 2
232@ bra z=1, gloc_0800
233ssp_hle_800:
234 ldr r0, [r7, #(SSP_OFFS_GR+SSP_PM0*4)]
235 ldr r1, [r7, #SSP_OFFS_EMUSTAT]
236 tst r0, #0x20000
237 orreq r1, r1, #SSP_WAIT_PM0
238 subeq r11,r11, #1024
239 streq r1, [r7, #SSP_OFFS_EMUSTAT]
240 mov r0, #0x400
241 beq ssp_drc_end
242 orrne r0, r0, #0x004
243 b ssp_drc_next
244
245
246.macro hle_flushflags
247 bic r6, r6, #0xf
248 mrs r1, cpsr
249 orr r6, r6, r1, lsr #28
250.endm
251
252.macro hle_popstack
253 sub r6, r6, #0x20000000
254 add r1, r7, #0x400
255 add r1, r1, #0x048 @ stack
256 add r1, r1, r6, lsr #28
257 ldrh r0, [r1]
258.endm
259
260ssp_hle_902:
261 cmp r11, #0
262 ble ssp_drc_end
263
264 add r1, r7, #0x200
265 ldrh r0, [r1]
266 ldr r3, [r7, #SSP_OFFS_IRAM_ROM]
267 add r2, r3, r0, lsl #1 @ (r7|00)
268 ldrh r0, [r2], #2
269 mov r5, r5, lsl #16
270 mov r5, r5, lsr #16
271 bic r0, r0, #0xfc00
272 add r3, r3, r0, lsl #1 @ IRAM dest
273 ldrh r12,[r2], #2 @ length
274 bic r3, r3, #3 @ always seen aligned
275@ orr r5, r5, #0x08000000
276@ orr r5, r5, #0x00880000
277@ sub r5, r5, r12, lsl #16
278 bic r6, r6, #0xf
279 add r12,r12,#1
280 mov r0, #1
281 str r0, [r7, #SSP_OFFS_IRAM_DIRTY]
282 sub r11,r11,r12,lsl #1
283 sub r11,r11,r12 @ -= length*3
284
285ssp_hle_902_loop:
286 ldrh r0, [r2], #2
287 ldrh r1, [r2], #2
288 subs r12,r12,#2
289 orr r0, r0, r1, lsl #16
290 str r0, [r3], #4
291 bgt ssp_hle_902_loop
292
293 tst r12, #1
294 ldrneh r0, [r2], #2
295 strneh r0, [r3], #2
296
297 ldr r0, [r7, #SSP_OFFS_IRAM_ROM]
298 add r1, r7, #0x200
299 sub r2, r2, r0
300 mov r2, r2, lsr #1
301 strh r2, [r1] @ (r7|00)
302
303 sub r0, r3, r0
304 mov r0, r0, lsr #1
305 orr r0, r0, #0x08000000
306 orr r0, r0, #0x001c8000
307 str r0, [r7, #(SSP_OFFS_GR+SSP_PMC*4)]
308 str r0, [r7, #(SSP_OFFS_PM_WRITE+4*4)]
309
310 hle_popstack
311 subs r11,r11,#16 @ timeslice is likely to end
312 ble ssp_drc_end
313 b ssp_drc_next
314
315
316@ this one is car rendering related
317.macro hle_11_12c_mla offs_in
318 ldrsh r5, [r7, #(\offs_in+0)]
319 ldrsh r0, [r7, #(\offs_in+2)]
320 ldrsh r1, [r7, #(\offs_in+4)]
321 mul r5, r2, r5
322 ldrsh r12,[r7, #(\offs_in+6)]
323 mla r5, r3, r0, r5
324 mla r5, r4, r1, r5
325 add r5, r5, r12,lsl #11
326
327 movs r5, r5, lsr #13
328 add r1, r7, r8, lsr #23
329 strh r5, [r1]
330 add r8, r8, #(1<<24)
331.endm
332
333ssp_hle_11_12c:
334 cmp r11, #0
335 ble ssp_drc_end
336
337 mov r0, #0
338 bl ssp_pm_read
339 mov r4, r0
340
341 mov r0, #0
342 bl ssp_pm_read
343 mov r5, r0
344
345 mov r0, #0
346 bl ssp_pm_read
347
348 mov r2, r4, lsl #16
349 mov r2, r2, asr #15 @ (r7|00) << 1
350 mov r3, r5, lsl #16
351 mov r3, r3, asr #15 @ (r7|01) << 1
352 mov r4, r0, lsl #16
353 mov r4, r4, asr #15 @ (r7|10) << 1
354
355 bic r8, r8, #0xff
356 mov r8, r8, ror #16
357
358 hle_11_12c_mla 0x20
359 hle_11_12c_mla 0x28
360 hle_11_12c_mla 0x30
361
362 mov r8, r8, ror #16
363 orr r8, r8, #0x1c
364@ hle_flushflags
365 hle_popstack
366 sub r11,r11,#33
367 b ssp_drc_next
368
369
370ssp_hle_11_384:
371 mov r3, #2
372 b ssp_hle_11_38x
373
374ssp_hle_11_38a:
375 mov r3, #3 @ r5
376
377ssp_hle_11_38x:
378 cmp r11, #0
379 ble ssp_drc_end
380
381 mov r2, #0 @ EFh, EEh
382 mov r1, #1 @ r4
383 add r0, r7, #0x1c0 @ r0 (based)
384
385ssp_hle_11_38x_loop:
386 ldrh r5, [r0], #2
387 ldr r12,[r7, #0x224]
388 mov r5, r5, lsl #16
389 eor r5, r5, r5, asr #31
390 add r5, r5, r5, lsr #31 @ abs(r5)
391 cmp r5, r12,lsl #16
392 orrpl r2, r2, r1,lsl #16 @ EFh |= r4
393
394 ldrh r5, [r0, #2]!
395 ldr r12,[r7, #0x220]
396 cmp r5, r12,lsr #16
397 orrpl r2, r2, r1,lsl #16 @ EFh |= r4
398
399 ldr r12,[r7, #0x1e8]
400 add r0, r0, #2
401 mov r12,r12,lsl #16
402 cmp r5, r12,lsr #16
403 orrmi r2, r2, r1
404
405 mov r1, r1, lsl #1
406 subs r3, r3, #1
407 bpl ssp_hle_11_38x_loop
408
409 str r2, [r7, #0x1dc]
410 sub r0, r0, r7
411 bic r8, r8, #0xff
412 orr r8, r8, r0, lsr #1
413 bic r9, r9, #0xff
414 orr r9, r9, r1
415
416@ hle_flushflags
417 hle_popstack
418 sub r11,r11,#(9+30*4)
419 b ssp_drc_next
420
421
422ssp_hle_07_6d6:
423 cmp r11, #0
424 ble ssp_drc_end
425
426 ldr r1, [r7, #0x20c]
427 and r0, r8, #0xff @ assuming alignment
428 add r0, r7, r0, lsl #1
429 mov r2, r1, lsr #16
430 mov r1, r1, lsl #16 @ 106h << 16
431 mov r2, r2, lsl #16 @ 107h << 16
432
433ssp_hle_07_6d6_loop:
434 ldr r5, [r0], #4
435 tst r5, r5
436 bmi ssp_hle_07_6d6_end
437 mov r5, r5, lsl #16
438 cmp r5, r1
439 movmi r1, r5
440 cmp r5, r2
441 sub r11,r11,#16
442 bmi ssp_hle_07_6d6_loop
443 mov r2, r5
444 b ssp_hle_07_6d6_loop
445
446ssp_hle_07_6d6_end:
447 sub r0, r0, r7
448 mov r0, r0, lsr #1
449 bic r8, r8, #0xff
450 orr r8, r8, r0
451 orr r1, r2, r1, lsr #16
452 str r1, [r7, #0x20c]
453 hle_popstack
454 sub r11,r11,#6
455 b ssp_drc_next
456
457
458ssp_hle_07_030:
459 ldrh r0, [r7]
460 mov r0, r0, lsl #4
461 orr r0, r0, r0, lsr #16
462 strh r0, [r7]
463 sub r11,r11,#3
464
465ssp_hle_07_036:
466 ldr r1, [r7, #0x1e0] @ F1h F0h
467 rsb r5, r1, r1, lsr #16
468 mov r5, r5, lsl #16 @ AL not needed
469 cmp r5, #(4<<16)
470 sub r11,r11,#5
471 bmi hle_07_036_ending2
472 ldr r1, [r7, #0x1dc] @ EEh
473 cmp r5, r1, lsl #16
474 sub r11,r11,#5
475 bpl hle_07_036_ret
476
477 mov r0, r5, lsr #16
478 add r1, r7, #0x100
479 strh r0, [r1, #0xea] @ F5h
480 ldr r0, [r7, #0x1e0] @ F0h
481 and r0, r0, #3
482 strh r0, [r1, #0xf0] @ F8h
483 add r2, r0, #0xc0 @ r2
484 add r2, r7, r2, lsl #1
485 ldrh r2, [r2]
486 ldr r0, [r7]
487 mov r1, #4
488 and r0, r0, r2
489 bl ssp_pm_write
490 @ will handle PMC later
491 ldr r0, [r7, #0x1e8] @ F5h << 16
492 ldr r1, [r7, #0x1f0] @ F8h
493 ldr r2, [r7, #0x1d4] @ EAh
494 sub r0, r0, #(3<<16)
495 add r0, r0, r1, lsl #16
496 sub r0, r2, r0, asr #18
497 and r0, r0, #0x7f
498 rsbs r0, r0, #0x78 @ length
499 ble hle_07_036_ending1
500
501 sub r11,r11,r0
502
503 @ copy part
504 ldr r1, [r7, #(SSP_OFFS_GR+SSP_PMC*4)]
505 ldr r2, [r7, #SSP_OFFS_DRAM]
506 mov r1, r1, lsl #16
507 add r1, r2, r1, lsr #15 @ addr (based)
508 ldrh r2, [r7, #0] @ pattern
509 ldrh r3, [r7, #6] @ mode
510
511 mov r12, #0x4000
512 orr r12,r12,#0x0018
513 subs r12,r3, r12
514 subnes r12,r12,#0x0400
515 blne tr_unhandled
516
517 orr r2, r2, r2, lsl #16
518 tst r3, #0x400
519 bne hle_07_036_ovrwr
520
521hle_07_036_no_ovrwr:
522 tst r1, #2
523 strneh r2, [r1], #0x3e @ align
524 subne r0, r0, #1
525 subs r0, r0, #4
526 blt hle_07_036_l2
527
528hle_07_036_l1:
529 subs r0, r0, #4
530 str r2, [r1], #0x40
531 str r2, [r1], #0x40
532 bge hle_07_036_l1
533
534hle_07_036_l2:
535 tst r0, #2
536 strne r2, [r1], #0x40
537 tst r0, #1
538 strneh r2, [r1], #2
539 b hle_07_036_end_copy
540
541hle_07_036_ovrwr:
542 tst r2, #0x000f
543 orreq r12,r12,#0x000f
544 tst r2, #0x00f0
545 orreq r12,r12,#0x00f0
546 tst r2, #0x0f00
547 orreq r12,r12,#0x0f00
548 tst r2, #0xf000
549 orreq r12,r12,#0xf000
550 orrs r12,r12,r12,lsl #16
551 beq hle_07_036_no_ovrwr
552
553 tst r1, #2
554 beq hle_07_036_ol0
555 ldrh r3, [r1]
556 and r3, r3, r12
557 orr r3, r3, r2
558 strh r3, [r1], #0x3e @ align
559 sub r0, r0, #1
560
561hle_07_036_ol0:
562 subs r0, r0, #2
563 blt hle_07_036_ol2
564
565hle_07_036_ol1:
566 subs r0, r0, #2
567 ldr r3, [r1]
568 and r3, r3, r12
569 orr r3, r3, r2
570 str r3, [r1], #0x40
571 bge hle_07_036_ol1
572
573hle_07_036_ol2:
574 tst r0, #1
575 ldrneh r3, [r1]
576 andne r3, r3, r12
577 orrne r3, r3, r2
578 strneh r3, [r1], #2
579
580hle_07_036_end_copy:
581 ldr r2, [r7, #SSP_OFFS_DRAM]
582 add r3, r7, #0x400
583 sub r0, r1, r2 @ new addr
584 mov r0, r0, lsr #1
585 strh r0, [r3, #(0x6c+4*4)] @ SSP_OFFS_PM_WRITE+4*4 (low)
586
587hle_07_036_ending1:
588 ldr r0, [r7, #0x1e0] @ F1h << 16
589 add r0, r0, #(1<<16)
590 and r0, r0, #(3<<16)
591 add r0, r0, #(0xc4<<16)
592 bic r8, r8, #0xff0000
593 orr r8, r8, r0 @ r2
594 add r0, r7, r0, lsr #15
595 ldrh r0, [r0]
596 ldr r2, [r7]
597 and r0, r0, r2
598 movs r5, r0, lsl #16
599
600 ldr r1, [r7, #4] @ new mode
601 add r2, r7, #0x400
602 strh r1, [r2, #(0x6c+4*4+2)] @ SSP_OFFS_PM_WRITE+4*4 (high)
603 mov r1, #4
604 bl ssp_pm_write
605 sub r11,r11,#35
606
607hle_07_036_ret:
608 hle_popstack
609 b ssp_drc_next
610
611hle_07_036_ending2:
612 sub r11,r11,#3
613 movs r5, r5, lsl #1
614 bmi hle_07_036_ret
615 mov r0, #0x87
616 b ssp_drc_next @ let the dispatcher finish this
617