some fixes and adjustments
[picodrive.git] / Pico / carthw / svp / stub_arm.S
CommitLineData
e807ac75 1@ vim:filetype=armasm
2
3.if 0
4#include "compiler.h"
5.endif
6
7.global tcache
8
9.global flush_inval_caches
71bb1b7b 10.global ssp_drc_entry
11.global ssp_drc_next
45883918 12.global ssp_drc_next_patch
13.global ssp_drc_end
d5276282 14.global ssp_hle_800
f5d1115f 15.global ssp_hle_902
ee9ee9fd 16.global ssp_hle_07_030
17.global ssp_hle_07_036
18.global ssp_hle_07_6d6
19.global ssp_hle_11_12c
20.global ssp_hle_11_384
21.global ssp_hle_11_38a
e807ac75 22
23@ translation cache buffer
24.text
25.align 12 @ 4096
26.size tcache, TCACHE_SIZE
27tcache:
28 .space TCACHE_SIZE
29
30
31.text
32.align 2
33
34
35flush_inval_caches:
36 mov r2, #0x0 @ must be 0
37 swi 0x9f0002
38 bx lr
39
40
41@ SSP_GR0, SSP_X, SSP_Y, SSP_A,
42@ SSP_ST, SSP_STACK, SSP_PC, SSP_P,
43@ SSP_PM0, SSP_PM1, SSP_PM2, SSP_XST,
44@ SSP_PM4, SSP_gr13, SSP_PMC, SSP_AL
45
46@ register map:
47@ r4: XXYY
48@ r5: A
b9c1d012 49@ r6: STACK and emu flags: sss0 * .uu. .lll NZCV (NZCV is PSR bits from ARM)
e807ac75 50@ r7: SSP context
5d817c91 51@ r8: r0-r2 (.210)
52@ r9: r4-r6 (.654)
e807ac75 53@ r10: P
54@ r11: cycles
55
e807ac75 56
45883918 57#define SSP_OFFS_GR 0x400
58#define SSP_PC 6
59#define SSP_P 7
60#define SSP_PM0 8
f5d1115f 61#define SSP_PMC 14
62#define SSP_OFFS_PM_WRITE 0x46c // pmac_write[]
45883918 63#define SSP_OFFS_EMUSTAT 0x484 // emu_status
f5d1115f 64#define SSP_OFFS_IRAM_ROM 0x48c // ptr_iram_rom
ee9ee9fd 65#define SSP_OFFS_DRAM 0x490 // ptr_dram
45883918 66#define SSP_OFFS_IRAM_DIRTY 0x494
67#define SSP_OFFS_IRAM_CTX 0x498 // iram_context
68#define SSP_OFFS_BLTAB 0x49c // block_table
69#define SSP_OFFS_BLTAB_IRAM 0x4a0
70#define SSP_OFFS_TMP0 0x4a4 // for entry PC
71#define SSP_OFFS_TMP1 0x4a8
72#define SSP_OFFS_TMP2 0x4ac
73#define SSP_WAIT_PM0 0x2000
74
75
76.macro ssp_drc_do_next patch_jump=0
77.if \patch_jump
78 str lr, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
79.endif
80 mov r0, r0, lsl #16
81 mov r0, r0, lsr #16
82 str r0, [r7, #SSP_OFFS_TMP0]
83 cmp r0, #0x400
84 blt 0f @ ssp_de_iram
85
86 ldr r2, [r7, #SSP_OFFS_BLTAB]
87 ldr r2, [r2, r0, lsl #2]
88 tst r2, r2
89.if \patch_jump
90 bne ssp_drc_do_patch
91.else
92 bxne r2
93.endif
94 bl ssp_translate_block
95 mov r2, r0
96 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
97 ldr r1, [r7, #SSP_OFFS_BLTAB]
98 str r2, [r1, r0, lsl #2]
99.if \patch_jump
100 b ssp_drc_do_patch
101.else
102 bx r2
103.endif
104
1050: @ ssp_de_iram:
106 ldr r1, [r7, #SSP_OFFS_IRAM_DIRTY]
107 tst r1, r1
108 ldreq r1, [r7, #SSP_OFFS_IRAM_CTX]
109 beq 1f @ ssp_de_iram_ctx
110
111 bl ssp_get_iram_context
112 mov r1, #0
113 str r1, [r7, #SSP_OFFS_IRAM_DIRTY]
114 mov r1, r0
115 str r1, [r7, #SSP_OFFS_IRAM_CTX]
116 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
117
1181: @ ssp_de_iram_ctx:
119 ldr r2, [r7, #SSP_OFFS_BLTAB_IRAM]
120 add r2, r2, r1, lsl #12 @ block_tab_iram + iram_context * 0x800/2*4
121 add r1, r2, r0, lsl #2
122 ldr r2, [r1]
123 tst r2, r2
124.if \patch_jump
125 bne ssp_drc_do_patch
126.else
127 bxne r2
128.endif
129 str r1, [r7, #SSP_OFFS_TMP1]
130 bl ssp_translate_block
131 mov r2, r0
132 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
133 ldr r1, [r7, #SSP_OFFS_TMP1] @ &block_table_iram[iram_context][rPC]
134 str r2, [r1]
135.if \patch_jump
136 b ssp_drc_do_patch
137.else
138 bx r2
139.endif
140.endm @ ssp_drc_do_next
141
142
143ssp_drc_entry:
144 stmfd sp!, {r4-r11, lr}
145 mov r11, r0
71bb1b7b 146ssp_regfile_load:
e807ac75 147 ldr r7, =ssp
148 ldr r7, [r7]
149 add r2, r7, #0x400
150 add r2, r2, #4
151 ldmia r2, {r3,r4,r5,r6,r8}
152 mov r3, r3, lsr #16
153 mov r3, r3, lsl #16
154 orr r4, r3, r4, lsr #16 @ XXYY
b9c1d012 155
156 and r8, r8, #0x0f0000
157 mov r8, r8, lsl #13 @ sss0 *
158 and r9, r6, #0x670000
159 tst r6, #0x80000000
160 orrne r8, r8, #0x8
161 tst r6, #0x20000000
162 orrne r8, r8, #0x4 @ sss0 * NZ..
a6fb500b 163 orr r6, r8, r9, lsr #12 @ sss0 * .uu. .lll NZ..
b9c1d012 164
e807ac75 165 ldr r8, [r7, #0x440] @ r0-r2
166 ldr r9, [r7, #0x444] @ r4-r6
45883918 167 ldr r10,[r7, #(0x400+SSP_P*4)] @ P
168
169 ldr r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
170 mov r0, r0, lsr #16
171
172
173ssp_drc_next:
174 ssp_drc_do_next 0
175
176
177ssp_drc_next_patch:
178 ssp_drc_do_next 1
179
180ssp_drc_do_patch:
181 ldr r1, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
182 subs r12,r2, r1
183 moveq r3, #0xe1000000
184 orreq r3, r3, #0x00a00000 @ nop
185 streq r3, [r1, #-4]
186 beq ssp_drc_dp_end
187
188 cmp r12,#4
189 ldreq r3, [r1]
190 addeq r3, r3, #1
191 streq r3, [r1, #-4] @ move the other cond up
192 moveq r3, #0xe1000000
193 orreq r3, r3, #0x00a00000
194 streq r3, [r1] @ fill it's place with nop
195 beq ssp_drc_dp_end
196
197 ldr r3, [r1, #-4]
198 sub r12,r12,#4
199 mov r3, r3, lsr #24
200 bic r3, r3, #1 @ L bit
201 orr r3, r3, r12,lsl #6
202 mov r3, r3, ror #8 @ patched branch instruction
203 str r3, [r1, #-4]
204
205ssp_drc_dp_end:
206 str r2, [r7, #SSP_OFFS_TMP1]
207 sub r0, r1, #4
208 add r1, r1, #4
209 bl flush_inval_caches
210 ldr r2, [r7, #SSP_OFFS_TMP1]
211 ldr r0, [r7, #SSP_OFFS_TMP0]
212 bx r2
e807ac75 213
214
45883918 215ssp_drc_end:
216 mov r0, r0, lsl #16
217 str r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
218
71bb1b7b 219ssp_regfile_store:
45883918 220 str r10,[r7, #(0x400+SSP_P*4)] @ P
e807ac75 221 str r8, [r7, #0x440] @ r0-r2
222 str r9, [r7, #0x444] @ r4-r6
b9c1d012 223
224 mov r9, r6, lsr #13
e807ac75 225 and r9, r9, #(7<<16) @ STACK
b9c1d012 226 mov r3, r6, lsl #28
227 msr cpsr_flg, r3 @ to to ARM PSR
228 and r6, r6, #0x670
229 mov r6, r6, lsl #12
230 orrmi r6, r6, #0x80000000 @ N
231 orreq r6, r6, #0x20000000 @ Z
232
e807ac75 233 mov r3, r4, lsl #16 @ Y
234 mov r2, r4, lsr #16
235 mov r2, r2, lsl #16 @ X
236 add r8, r7, #0x400
237 add r8, r8, #4
238 stmia r8, {r2,r3,r5,r6,r9}
71bb1b7b 239
71bb1b7b 240 mov r0, r11
241 ldmfd sp!, {r4-r11, lr}
242 bx lr
243
244
d5276282 245
246@ ld A, PM0
247@ andi 2
248@ bra z=1, gloc_0800
249ssp_hle_800:
d5276282 250 ldr r0, [r7, #(SSP_OFFS_GR+SSP_PM0*4)]
71bb1b7b 251 ldr r1, [r7, #SSP_OFFS_EMUSTAT]
d5276282 252 tst r0, #0x20000
f5d1115f 253 orreq r1, r1, #SSP_WAIT_PM0
45883918 254 subeq r11,r11, #1024
71bb1b7b 255 streq r1, [r7, #SSP_OFFS_EMUSTAT]
45883918 256 mov r0, #0x400
257 beq ssp_drc_end
258 orrne r0, r0, #0x004
45883918 259 b ssp_drc_next
d5276282 260
e807ac75 261
ee9ee9fd 262.macro hle_flushflags
263 bic r6, r6, #0xf
264 mrs r1, cpsr
265 orr r6, r6, r1, lsr #28
266.endm
267
268.macro hle_popstack
269 sub r6, r6, #0x20000000
270 add r1, r7, #0x400
271 add r1, r1, #0x048 @ stack
272 add r1, r1, r6, lsr #28
273 ldrh r0, [r1]
274.endm
275
f5d1115f 276ssp_hle_902:
277 cmp r11, #0
278 ble ssp_drc_end
279
280 add r1, r7, #0x200
281 ldrh r0, [r1]
282 ldr r3, [r7, #SSP_OFFS_IRAM_ROM]
283 add r2, r3, r0, lsl #1 @ (r7|00)
284 ldrh r0, [r2], #2
285 mov r5, r5, lsl #16
286 mov r5, r5, lsr #16
287 bic r0, r0, #0xfc00
288 add r3, r3, r0, lsl #1 @ IRAM dest
289 ldrh r12,[r2], #2 @ length
290 bic r3, r3, #3 @ always seen aligned
291@ orr r5, r5, #0x08000000
292@ orr r5, r5, #0x00880000
293@ sub r5, r5, r12, lsl #16
294 bic r6, r6, #0xf
295 add r12,r12,#1
296 mov r0, #1
297 str r0, [r7, #SSP_OFFS_IRAM_DIRTY]
298 sub r11,r11,r12,lsl #1
299 sub r11,r11,r12 @ -= length*3
300
301ssp_hle_902_loop:
302 ldrh r0, [r2], #2
303 ldrh r1, [r2], #2
304 subs r12,r12,#2
305 orr r0, r0, r1, lsl #16
306 str r0, [r3], #4
307 bgt ssp_hle_902_loop
308
309 tst r12, #1
310 ldrneh r0, [r2], #2
311 strneh r0, [r3], #2
312
313 ldr r0, [r7, #SSP_OFFS_IRAM_ROM]
314 add r1, r7, #0x200
315 sub r2, r2, r0
316 mov r2, r2, lsr #1
317 strh r2, [r1] @ (r7|00)
318
319 sub r0, r3, r0
320 mov r0, r0, lsr #1
321 orr r0, r0, #0x08000000
322 orr r0, r0, #0x001c8000
323 str r0, [r7, #(SSP_OFFS_GR+SSP_PMC*4)]
324 str r0, [r7, #(SSP_OFFS_PM_WRITE+4*4)]
325
ee9ee9fd 326 hle_popstack
f5d1115f 327 subs r11,r11,#16 @ timeslice is likely to end
328 ble ssp_drc_end
329 b ssp_drc_next
330
ee9ee9fd 331
332@ this one is car rendering related
333.macro hle_11_12c_mla offs_in
334 ldrsh r5, [r7, #(\offs_in+0)]
335 ldrsh r0, [r7, #(\offs_in+2)]
336 ldrsh r1, [r7, #(\offs_in+4)]
337 mul r5, r2, r5
338 ldrsh r12,[r7, #(\offs_in+6)]
339 mla r5, r3, r0, r5
340 mla r5, r4, r1, r5
341 add r5, r5, r12,lsl #11
342
343 movs r5, r5, lsr #13
344 add r1, r7, r8, lsr #23
345 strh r5, [r1]
346 add r8, r8, #(1<<24)
347.endm
348
349ssp_hle_11_12c:
350 cmp r11, #0
351 ble ssp_drc_end
352
353 mov r0, #0
354 bl ssp_pm_read
355 mov r4, r0
356
357 mov r0, #0
358 bl ssp_pm_read
359 mov r5, r0
360
361 mov r0, #0
362 bl ssp_pm_read
363
364 mov r2, r4, lsl #16
365 mov r2, r2, asr #15 @ (r7|00) << 1
366 mov r3, r5, lsl #16
367 mov r3, r3, asr #15 @ (r7|01) << 1
368 mov r4, r0, lsl #16
369 mov r4, r4, asr #15 @ (r7|10) << 1
370
371 bic r8, r8, #0xff
372 mov r8, r8, ror #16
373
374 hle_11_12c_mla 0x20
375 hle_11_12c_mla 0x28
376 hle_11_12c_mla 0x30
377
378 mov r8, r8, ror #16
379 orr r8, r8, #0x1c
380@ hle_flushflags
381 hle_popstack
382 sub r11,r11,#33
383 b ssp_drc_next
384
385
386ssp_hle_11_384:
387 mov r3, #2
388 b ssp_hle_11_38x
389
390ssp_hle_11_38a:
391 mov r3, #3 @ r5
392
393ssp_hle_11_38x:
394 cmp r11, #0
395 ble ssp_drc_end
396
397 mov r2, #0 @ EFh, EEh
398 mov r1, #1 @ r4
399 add r0, r7, #0x1c0 @ r0 (based)
400
401ssp_hle_11_38x_loop:
402 ldrh r5, [r0], #2
403 ldr r12,[r7, #0x224]
404 mov r5, r5, lsl #16
405 eor r5, r5, r5, asr #31
406 add r5, r5, r5, lsr #31 @ abs(r5)
407 cmp r5, r12,lsl #16
408 orrpl r2, r2, r1,lsl #16 @ EFh |= r4
409
410 ldrh r5, [r0, #2]!
411 ldr r12,[r7, #0x220]
412 cmp r5, r12,lsr #16
413 orrpl r2, r2, r1,lsl #16 @ EFh |= r4
414
415 ldr r12,[r7, #0x1e8]
416 add r0, r0, #2
417 mov r12,r12,lsl #16
418 cmp r5, r12,lsr #16
419 orrmi r2, r2, r1
420
421 mov r1, r1, lsl #1
422 subs r3, r3, #1
423 bpl ssp_hle_11_38x_loop
424
425 str r2, [r7, #0x1dc]
426 sub r0, r0, r7
427 bic r8, r8, #0xff
428 orr r8, r8, r0, lsr #1
429 bic r9, r9, #0xff
430 orr r9, r9, r1
431
432@ hle_flushflags
433 hle_popstack
434 sub r11,r11,#(9+30*4)
435 b ssp_drc_next
436
437
438ssp_hle_07_6d6:
439 cmp r11, #0
440 ble ssp_drc_end
441
442 ldr r1, [r7, #0x20c]
443 and r0, r8, #0xff @ assuming alignment
444 add r0, r7, r0, lsl #1
445 mov r2, r1, lsr #16
446 mov r1, r1, lsl #16 @ 106h << 16
447 mov r2, r2, lsl #16 @ 107h << 16
448
449ssp_hle_07_6d6_loop:
450 ldr r5, [r0], #4
451 tst r5, r5
452 bmi ssp_hle_07_6d6_end
453 mov r5, r5, lsl #16
454 cmp r5, r1
455 movmi r1, r5
456 cmp r5, r2
457 sub r11,r11,#16
458 bmi ssp_hle_07_6d6_loop
459 mov r2, r5
460 b ssp_hle_07_6d6_loop
461
462ssp_hle_07_6d6_end:
463 sub r0, r0, r7
464 mov r0, r0, lsr #1
465 bic r8, r8, #0xff
466 orr r8, r8, r0
467 orr r1, r2, r1, lsr #16
468 str r1, [r7, #0x20c]
469 hle_popstack
470 sub r11,r11,#6
471 b ssp_drc_next
472
473
474ssp_hle_07_030:
475 ldrh r0, [r7]
476 mov r0, r0, lsl #4
477 orr r0, r0, r0, lsr #16
478 strh r0, [r7]
479 sub r11,r11,#3
480
481ssp_hle_07_036:
34e243f1 482 ldr r1, [r7, #0x1e0] @ F1h F0h
ee9ee9fd 483 rsb r5, r1, r1, lsr #16
34e243f1 484 mov r5, r5, lsl #16 @ AL not needed
ee9ee9fd 485 cmp r5, #(4<<16)
486 sub r11,r11,#5
487 bmi hle_07_036_ending2
34e243f1 488 ldr r1, [r7, #0x1dc] @ EEh
ee9ee9fd 489 cmp r5, r1, lsl #16
34e243f1 490 sub r11,r11,#5
ee9ee9fd 491 bpl hle_07_036_ret
34e243f1 492
493 mov r0, r5, lsr #16
ee9ee9fd 494 add r1, r7, #0x100
34e243f1 495 strh r0, [r1, #0xea] @ F5h
496 ldr r0, [r7, #0x1e0] @ F0h
ee9ee9fd 497 and r0, r0, #3
34e243f1 498 strh r0, [r1, #0xf0] @ F8h
499 add r2, r0, #0xc0 @ r2
ee9ee9fd 500 add r2, r7, r2, lsl #1
501 ldrh r2, [r2]
502 ldr r0, [r7]
503 mov r1, #4
504 and r0, r0, r2
505 bl ssp_pm_write
506 @ will handle PMC later
34e243f1 507 ldr r0, [r7, #0x1e8] @ F5h << 16
508 ldr r1, [r7, #0x1f0] @ F8h
509 ldr r2, [r7, #0x1d4] @ EAh
ee9ee9fd 510 sub r0, r0, #(3<<16)
511 add r0, r0, r1, lsl #16
34e243f1 512 sub r0, r2, r0, asr #18
ee9ee9fd 513 and r0, r0, #0x7f
514 rsbs r0, r0, #0x78 @ length
515 ble hle_07_036_ending1
516
34e243f1 517 sub r11,r11,r0
518
519 @ copy part
ee9ee9fd 520 ldr r1, [r7, #(SSP_OFFS_GR+SSP_PMC*4)]
521 ldr r2, [r7, #SSP_OFFS_DRAM]
522 mov r1, r1, lsl #16
523 add r1, r2, r1, lsr #15 @ addr (based)
524 ldrh r2, [r7, #0] @ pattern
525 ldrh r3, [r7, #6] @ mode
526
527 mov r12, #0x4000
528 orr r12,r12,#0x0018
34e243f1 529 subs r12,r3, r12
530 subnes r12,r12,#0x0400
531 blne tr_unhandled
ee9ee9fd 532
533 orr r2, r2, r2, lsl #16
534 tst r3, #0x400
535 bne hle_07_036_ovrwr
536
34e243f1 537hle_07_036_no_ovrwr:
ee9ee9fd 538 tst r1, #2
34e243f1 539 strneh r2, [r1], #0x3e @ align
ee9ee9fd 540 subne r0, r0, #1
34e243f1 541 subs r0, r0, #4
ee9ee9fd 542 blt hle_07_036_l2
543
544hle_07_036_l1:
545 subs r0, r0, #4
34e243f1 546 str r2, [r1], #0x40
547 str r2, [r1], #0x40
548 bge hle_07_036_l1
ee9ee9fd 549
550hle_07_036_l2:
551 tst r0, #2
34e243f1 552 strne r2, [r1], #0x40
ee9ee9fd 553 tst r0, #1
554 strneh r2, [r1], #2
34e243f1 555 b hle_07_036_end_copy
ee9ee9fd 556
557hle_07_036_ovrwr:
34e243f1 558 tst r2, #0x000f
559 orreq r12,r12,#0x000f
560 tst r2, #0x00f0
561 orreq r12,r12,#0x00f0
562 tst r2, #0x0f00
563 orreq r12,r12,#0x0f00
564 tst r2, #0xf000
565 orreq r12,r12,#0xf000
566 orrs r12,r12,r12,lsl #16
567 beq hle_07_036_no_ovrwr
568
569 tst r1, #2
570 beq hle_07_036_ol0
571 ldrh r3, [r1]
572 and r3, r3, r12
573 orr r3, r3, r2
574 strh r3, [r1], #0x3e @ align
575 sub r0, r0, #1
576
577hle_07_036_ol0:
578 subs r0, r0, #2
579 blt hle_07_036_ol2
580
581hle_07_036_ol1:
582 subs r0, r0, #2
583 ldr r3, [r1]
584 and r3, r3, r12
585 orr r3, r3, r2
586 str r3, [r1], #0x40
587 bge hle_07_036_ol1
588
589hle_07_036_ol2:
590 tst r0, #1
591 ldrneh r3, [r1]
592 andne r3, r3, r12
593 orrne r3, r3, r2
594 strneh r3, [r1], #2
595
596hle_07_036_end_copy:
597 ldr r2, [r7, #SSP_OFFS_DRAM]
598 add r3, r7, #0x400
599 sub r0, r1, r2 @ new addr
600 mov r0, r0, lsr #1
601 strh r0, [r3, #(0x6c+4*4)] @ SSP_OFFS_PM_WRITE+4*4 (low)
ee9ee9fd 602
603hle_07_036_ending1:
34e243f1 604 ldr r0, [r7, #0x1e0] @ F1h << 16
ee9ee9fd 605 add r0, r0, #(1<<16)
606 and r0, r0, #(3<<16)
607 add r0, r0, #(0xc4<<16)
608 bic r8, r8, #0xff0000
34e243f1 609 orr r8, r8, r0 @ r2
610 add r0, r7, r0, lsr #15
ee9ee9fd 611 ldrh r0, [r0]
612 ldr r2, [r7]
34e243f1 613 and r0, r0, r2
614 movs r5, r0, lsl #16
ee9ee9fd 615
34e243f1 616 ldr r1, [r7, #4] @ new mode
617 add r2, r7, #0x400
618 strh r1, [r2, #(0x6c+4*4+2)] @ SSP_OFFS_PM_WRITE+4*4 (high)
ee9ee9fd 619 mov r1, #4
620 bl ssp_pm_write
34e243f1 621 sub r11,r11,#35
ee9ee9fd 622
623hle_07_036_ret:
624 hle_popstack
625 b ssp_drc_next
626
627hle_07_036_ending2:
628 sub r11,r11,#3
629 movs r5, r5, lsl #1
630 bmi hle_07_036_ret
34e243f1 631 mov r0, #0x87
ee9ee9fd 632 b ssp_drc_next @ let the dispatcher finish this
633