bugfixes, new config system and messed code for it
[picodrive.git] / Pico / carthw / svp / stub_arm.S
CommitLineData
e807ac75 1@ vim:filetype=armasm
2
3.if 0
4#include "compiler.h"
5.endif
6
7.global tcache
1ca2ea4f 8.global ssp_block_table
9.global ssp_block_table_iram
e807ac75 10
11.global flush_inval_caches
71bb1b7b 12.global ssp_drc_entry
13.global ssp_drc_next
45883918 14.global ssp_drc_next_patch
15.global ssp_drc_end
d5276282 16.global ssp_hle_800
f5d1115f 17.global ssp_hle_902
ee9ee9fd 18.global ssp_hle_07_030
19.global ssp_hle_07_036
20.global ssp_hle_07_6d6
21.global ssp_hle_11_12c
22.global ssp_hle_11_384
23.global ssp_hle_11_38a
e807ac75 24
1ca2ea4f 25@ translation cache buffer + pointer table
e807ac75 26.text
27.align 12 @ 4096
1ca2ea4f 28.size tcache, SSP_TCACHE_SIZE
29.size ssp_block_table, SSP_BLOCKTAB_SIZE
30.size ssp_block_table_iram, SSP_BLOCKTAB_IRAM_SIZE
e807ac75 31tcache:
1ca2ea4f 32 .space SSP_TCACHE_SIZE
33ssp_block_table:
34 .space SSP_BLOCKTAB_SIZE
35ssp_block_table_iram:
36 .space SSP_BLOCKTAB_IRAM_SIZE
37 .space SSP_BLOCKTAB_ALIGN_SIZE
e807ac75 38
39
40.text
41.align 2
42
43
44flush_inval_caches:
45 mov r2, #0x0 @ must be 0
46 swi 0x9f0002
47 bx lr
48
49
50@ SSP_GR0, SSP_X, SSP_Y, SSP_A,
51@ SSP_ST, SSP_STACK, SSP_PC, SSP_P,
52@ SSP_PM0, SSP_PM1, SSP_PM2, SSP_XST,
53@ SSP_PM4, SSP_gr13, SSP_PMC, SSP_AL
54
55@ register map:
56@ r4: XXYY
57@ r5: A
b9c1d012 58@ r6: STACK and emu flags: sss0 * .uu. .lll NZCV (NZCV is PSR bits from ARM)
e807ac75 59@ r7: SSP context
5d817c91 60@ r8: r0-r2 (.210)
61@ r9: r4-r6 (.654)
e807ac75 62@ r10: P
63@ r11: cycles
64
e807ac75 65
45883918 66#define SSP_OFFS_GR 0x400
67#define SSP_PC 6
68#define SSP_P 7
69#define SSP_PM0 8
f5d1115f 70#define SSP_PMC 14
71#define SSP_OFFS_PM_WRITE 0x46c // pmac_write[]
45883918 72#define SSP_OFFS_EMUSTAT 0x484 // emu_status
f5d1115f 73#define SSP_OFFS_IRAM_ROM 0x48c // ptr_iram_rom
ee9ee9fd 74#define SSP_OFFS_DRAM 0x490 // ptr_dram
45883918 75#define SSP_OFFS_IRAM_DIRTY 0x494
76#define SSP_OFFS_IRAM_CTX 0x498 // iram_context
77#define SSP_OFFS_BLTAB 0x49c // block_table
78#define SSP_OFFS_BLTAB_IRAM 0x4a0
79#define SSP_OFFS_TMP0 0x4a4 // for entry PC
80#define SSP_OFFS_TMP1 0x4a8
81#define SSP_OFFS_TMP2 0x4ac
82#define SSP_WAIT_PM0 0x2000
83
84
85.macro ssp_drc_do_next patch_jump=0
86.if \patch_jump
87 str lr, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
88.endif
89 mov r0, r0, lsl #16
90 mov r0, r0, lsr #16
91 str r0, [r7, #SSP_OFFS_TMP0]
92 cmp r0, #0x400
93 blt 0f @ ssp_de_iram
94
95 ldr r2, [r7, #SSP_OFFS_BLTAB]
96 ldr r2, [r2, r0, lsl #2]
97 tst r2, r2
98.if \patch_jump
99 bne ssp_drc_do_patch
100.else
101 bxne r2
102.endif
103 bl ssp_translate_block
104 mov r2, r0
105 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
106 ldr r1, [r7, #SSP_OFFS_BLTAB]
107 str r2, [r1, r0, lsl #2]
108.if \patch_jump
109 b ssp_drc_do_patch
110.else
111 bx r2
112.endif
113
1140: @ ssp_de_iram:
115 ldr r1, [r7, #SSP_OFFS_IRAM_DIRTY]
116 tst r1, r1
117 ldreq r1, [r7, #SSP_OFFS_IRAM_CTX]
118 beq 1f @ ssp_de_iram_ctx
119
120 bl ssp_get_iram_context
121 mov r1, #0
122 str r1, [r7, #SSP_OFFS_IRAM_DIRTY]
123 mov r1, r0
124 str r1, [r7, #SSP_OFFS_IRAM_CTX]
125 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
126
1271: @ ssp_de_iram_ctx:
128 ldr r2, [r7, #SSP_OFFS_BLTAB_IRAM]
129 add r2, r2, r1, lsl #12 @ block_tab_iram + iram_context * 0x800/2*4
130 add r1, r2, r0, lsl #2
131 ldr r2, [r1]
132 tst r2, r2
133.if \patch_jump
134 bne ssp_drc_do_patch
135.else
136 bxne r2
137.endif
138 str r1, [r7, #SSP_OFFS_TMP1]
139 bl ssp_translate_block
140 mov r2, r0
141 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
142 ldr r1, [r7, #SSP_OFFS_TMP1] @ &block_table_iram[iram_context][rPC]
143 str r2, [r1]
144.if \patch_jump
145 b ssp_drc_do_patch
146.else
147 bx r2
148.endif
149.endm @ ssp_drc_do_next
150
151
152ssp_drc_entry:
153 stmfd sp!, {r4-r11, lr}
154 mov r11, r0
71bb1b7b 155ssp_regfile_load:
e807ac75 156 ldr r7, =ssp
157 ldr r7, [r7]
158 add r2, r7, #0x400
159 add r2, r2, #4
160 ldmia r2, {r3,r4,r5,r6,r8}
161 mov r3, r3, lsr #16
162 mov r3, r3, lsl #16
163 orr r4, r3, r4, lsr #16 @ XXYY
b9c1d012 164
165 and r8, r8, #0x0f0000
166 mov r8, r8, lsl #13 @ sss0 *
167 and r9, r6, #0x670000
168 tst r6, #0x80000000
169 orrne r8, r8, #0x8
170 tst r6, #0x20000000
171 orrne r8, r8, #0x4 @ sss0 * NZ..
a6fb500b 172 orr r6, r8, r9, lsr #12 @ sss0 * .uu. .lll NZ..
b9c1d012 173
e807ac75 174 ldr r8, [r7, #0x440] @ r0-r2
175 ldr r9, [r7, #0x444] @ r4-r6
45883918 176 ldr r10,[r7, #(0x400+SSP_P*4)] @ P
177
178 ldr r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
179 mov r0, r0, lsr #16
180
181
182ssp_drc_next:
183 ssp_drc_do_next 0
184
185
186ssp_drc_next_patch:
187 ssp_drc_do_next 1
188
189ssp_drc_do_patch:
190 ldr r1, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
191 subs r12,r2, r1
192 moveq r3, #0xe1000000
193 orreq r3, r3, #0x00a00000 @ nop
194 streq r3, [r1, #-4]
195 beq ssp_drc_dp_end
196
197 cmp r12,#4
198 ldreq r3, [r1]
199 addeq r3, r3, #1
200 streq r3, [r1, #-4] @ move the other cond up
201 moveq r3, #0xe1000000
202 orreq r3, r3, #0x00a00000
203 streq r3, [r1] @ fill it's place with nop
204 beq ssp_drc_dp_end
205
206 ldr r3, [r1, #-4]
207 sub r12,r12,#4
208 mov r3, r3, lsr #24
209 bic r3, r3, #1 @ L bit
210 orr r3, r3, r12,lsl #6
211 mov r3, r3, ror #8 @ patched branch instruction
212 str r3, [r1, #-4]
213
214ssp_drc_dp_end:
215 str r2, [r7, #SSP_OFFS_TMP1]
216 sub r0, r1, #4
217 add r1, r1, #4
218 bl flush_inval_caches
219 ldr r2, [r7, #SSP_OFFS_TMP1]
220 ldr r0, [r7, #SSP_OFFS_TMP0]
221 bx r2
e807ac75 222
223
45883918 224ssp_drc_end:
225 mov r0, r0, lsl #16
226 str r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
227
71bb1b7b 228ssp_regfile_store:
45883918 229 str r10,[r7, #(0x400+SSP_P*4)] @ P
e807ac75 230 str r8, [r7, #0x440] @ r0-r2
231 str r9, [r7, #0x444] @ r4-r6
b9c1d012 232
233 mov r9, r6, lsr #13
e807ac75 234 and r9, r9, #(7<<16) @ STACK
b9c1d012 235 mov r3, r6, lsl #28
236 msr cpsr_flg, r3 @ to to ARM PSR
237 and r6, r6, #0x670
238 mov r6, r6, lsl #12
239 orrmi r6, r6, #0x80000000 @ N
240 orreq r6, r6, #0x20000000 @ Z
241
e807ac75 242 mov r3, r4, lsl #16 @ Y
243 mov r2, r4, lsr #16
244 mov r2, r2, lsl #16 @ X
245 add r8, r7, #0x400
246 add r8, r8, #4
247 stmia r8, {r2,r3,r5,r6,r9}
71bb1b7b 248
71bb1b7b 249 mov r0, r11
250 ldmfd sp!, {r4-r11, lr}
251 bx lr
252
253
d5276282 254
255@ ld A, PM0
256@ andi 2
257@ bra z=1, gloc_0800
258ssp_hle_800:
d5276282 259 ldr r0, [r7, #(SSP_OFFS_GR+SSP_PM0*4)]
71bb1b7b 260 ldr r1, [r7, #SSP_OFFS_EMUSTAT]
d5276282 261 tst r0, #0x20000
f5d1115f 262 orreq r1, r1, #SSP_WAIT_PM0
45883918 263 subeq r11,r11, #1024
71bb1b7b 264 streq r1, [r7, #SSP_OFFS_EMUSTAT]
45883918 265 mov r0, #0x400
266 beq ssp_drc_end
267 orrne r0, r0, #0x004
45883918 268 b ssp_drc_next
d5276282 269
e807ac75 270
ee9ee9fd 271.macro hle_flushflags
272 bic r6, r6, #0xf
273 mrs r1, cpsr
274 orr r6, r6, r1, lsr #28
275.endm
276
277.macro hle_popstack
278 sub r6, r6, #0x20000000
279 add r1, r7, #0x400
280 add r1, r1, #0x048 @ stack
281 add r1, r1, r6, lsr #28
282 ldrh r0, [r1]
283.endm
284
f5d1115f 285ssp_hle_902:
286 cmp r11, #0
287 ble ssp_drc_end
288
289 add r1, r7, #0x200
290 ldrh r0, [r1]
291 ldr r3, [r7, #SSP_OFFS_IRAM_ROM]
292 add r2, r3, r0, lsl #1 @ (r7|00)
293 ldrh r0, [r2], #2
294 mov r5, r5, lsl #16
295 mov r5, r5, lsr #16
296 bic r0, r0, #0xfc00
297 add r3, r3, r0, lsl #1 @ IRAM dest
298 ldrh r12,[r2], #2 @ length
299 bic r3, r3, #3 @ always seen aligned
300@ orr r5, r5, #0x08000000
301@ orr r5, r5, #0x00880000
302@ sub r5, r5, r12, lsl #16
303 bic r6, r6, #0xf
304 add r12,r12,#1
305 mov r0, #1
306 str r0, [r7, #SSP_OFFS_IRAM_DIRTY]
307 sub r11,r11,r12,lsl #1
308 sub r11,r11,r12 @ -= length*3
309
310ssp_hle_902_loop:
311 ldrh r0, [r2], #2
312 ldrh r1, [r2], #2
313 subs r12,r12,#2
314 orr r0, r0, r1, lsl #16
315 str r0, [r3], #4
316 bgt ssp_hle_902_loop
317
318 tst r12, #1
319 ldrneh r0, [r2], #2
320 strneh r0, [r3], #2
321
322 ldr r0, [r7, #SSP_OFFS_IRAM_ROM]
323 add r1, r7, #0x200
324 sub r2, r2, r0
325 mov r2, r2, lsr #1
326 strh r2, [r1] @ (r7|00)
327
328 sub r0, r3, r0
329 mov r0, r0, lsr #1
330 orr r0, r0, #0x08000000
331 orr r0, r0, #0x001c8000
332 str r0, [r7, #(SSP_OFFS_GR+SSP_PMC*4)]
333 str r0, [r7, #(SSP_OFFS_PM_WRITE+4*4)]
334
ee9ee9fd 335 hle_popstack
f5d1115f 336 subs r11,r11,#16 @ timeslice is likely to end
337 ble ssp_drc_end
338 b ssp_drc_next
339
ee9ee9fd 340
341@ this one is car rendering related
342.macro hle_11_12c_mla offs_in
343 ldrsh r5, [r7, #(\offs_in+0)]
344 ldrsh r0, [r7, #(\offs_in+2)]
345 ldrsh r1, [r7, #(\offs_in+4)]
346 mul r5, r2, r5
347 ldrsh r12,[r7, #(\offs_in+6)]
348 mla r5, r3, r0, r5
349 mla r5, r4, r1, r5
350 add r5, r5, r12,lsl #11
351
352 movs r5, r5, lsr #13
353 add r1, r7, r8, lsr #23
354 strh r5, [r1]
355 add r8, r8, #(1<<24)
356.endm
357
358ssp_hle_11_12c:
359 cmp r11, #0
360 ble ssp_drc_end
361
362 mov r0, #0
363 bl ssp_pm_read
364 mov r4, r0
365
366 mov r0, #0
367 bl ssp_pm_read
368 mov r5, r0
369
370 mov r0, #0
371 bl ssp_pm_read
372
373 mov r2, r4, lsl #16
374 mov r2, r2, asr #15 @ (r7|00) << 1
375 mov r3, r5, lsl #16
376 mov r3, r3, asr #15 @ (r7|01) << 1
377 mov r4, r0, lsl #16
378 mov r4, r4, asr #15 @ (r7|10) << 1
379
380 bic r8, r8, #0xff
381 mov r8, r8, ror #16
382
383 hle_11_12c_mla 0x20
384 hle_11_12c_mla 0x28
385 hle_11_12c_mla 0x30
386
387 mov r8, r8, ror #16
388 orr r8, r8, #0x1c
389@ hle_flushflags
390 hle_popstack
391 sub r11,r11,#33
392 b ssp_drc_next
393
394
395ssp_hle_11_384:
396 mov r3, #2
397 b ssp_hle_11_38x
398
399ssp_hle_11_38a:
400 mov r3, #3 @ r5
401
402ssp_hle_11_38x:
403 cmp r11, #0
404 ble ssp_drc_end
405
406 mov r2, #0 @ EFh, EEh
407 mov r1, #1 @ r4
408 add r0, r7, #0x1c0 @ r0 (based)
409
410ssp_hle_11_38x_loop:
411 ldrh r5, [r0], #2
412 ldr r12,[r7, #0x224]
413 mov r5, r5, lsl #16
414 eor r5, r5, r5, asr #31
415 add r5, r5, r5, lsr #31 @ abs(r5)
416 cmp r5, r12,lsl #16
417 orrpl r2, r2, r1,lsl #16 @ EFh |= r4
418
419 ldrh r5, [r0, #2]!
420 ldr r12,[r7, #0x220]
421 cmp r5, r12,lsr #16
422 orrpl r2, r2, r1,lsl #16 @ EFh |= r4
423
424 ldr r12,[r7, #0x1e8]
425 add r0, r0, #2
426 mov r12,r12,lsl #16
427 cmp r5, r12,lsr #16
428 orrmi r2, r2, r1
429
430 mov r1, r1, lsl #1
431 subs r3, r3, #1
432 bpl ssp_hle_11_38x_loop
433
434 str r2, [r7, #0x1dc]
435 sub r0, r0, r7
436 bic r8, r8, #0xff
437 orr r8, r8, r0, lsr #1
438 bic r9, r9, #0xff
439 orr r9, r9, r1
440
441@ hle_flushflags
442 hle_popstack
443 sub r11,r11,#(9+30*4)
444 b ssp_drc_next
445
446
447ssp_hle_07_6d6:
448 cmp r11, #0
449 ble ssp_drc_end
450
451 ldr r1, [r7, #0x20c]
452 and r0, r8, #0xff @ assuming alignment
453 add r0, r7, r0, lsl #1
454 mov r2, r1, lsr #16
455 mov r1, r1, lsl #16 @ 106h << 16
456 mov r2, r2, lsl #16 @ 107h << 16
457
458ssp_hle_07_6d6_loop:
459 ldr r5, [r0], #4
460 tst r5, r5
461 bmi ssp_hle_07_6d6_end
462 mov r5, r5, lsl #16
463 cmp r5, r1
464 movmi r1, r5
465 cmp r5, r2
466 sub r11,r11,#16
467 bmi ssp_hle_07_6d6_loop
468 mov r2, r5
469 b ssp_hle_07_6d6_loop
470
471ssp_hle_07_6d6_end:
472 sub r0, r0, r7
473 mov r0, r0, lsr #1
474 bic r8, r8, #0xff
475 orr r8, r8, r0
476 orr r1, r2, r1, lsr #16
477 str r1, [r7, #0x20c]
478 hle_popstack
479 sub r11,r11,#6
480 b ssp_drc_next
481
482
483ssp_hle_07_030:
484 ldrh r0, [r7]
485 mov r0, r0, lsl #4
486 orr r0, r0, r0, lsr #16
487 strh r0, [r7]
488 sub r11,r11,#3
489
490ssp_hle_07_036:
34e243f1 491 ldr r1, [r7, #0x1e0] @ F1h F0h
ee9ee9fd 492 rsb r5, r1, r1, lsr #16
34e243f1 493 mov r5, r5, lsl #16 @ AL not needed
ee9ee9fd 494 cmp r5, #(4<<16)
495 sub r11,r11,#5
496 bmi hle_07_036_ending2
34e243f1 497 ldr r1, [r7, #0x1dc] @ EEh
ee9ee9fd 498 cmp r5, r1, lsl #16
34e243f1 499 sub r11,r11,#5
ee9ee9fd 500 bpl hle_07_036_ret
34e243f1 501
502 mov r0, r5, lsr #16
ee9ee9fd 503 add r1, r7, #0x100
34e243f1 504 strh r0, [r1, #0xea] @ F5h
505 ldr r0, [r7, #0x1e0] @ F0h
ee9ee9fd 506 and r0, r0, #3
34e243f1 507 strh r0, [r1, #0xf0] @ F8h
508 add r2, r0, #0xc0 @ r2
ee9ee9fd 509 add r2, r7, r2, lsl #1
510 ldrh r2, [r2]
511 ldr r0, [r7]
512 mov r1, #4
513 and r0, r0, r2
514 bl ssp_pm_write
515 @ will handle PMC later
34e243f1 516 ldr r0, [r7, #0x1e8] @ F5h << 16
517 ldr r1, [r7, #0x1f0] @ F8h
518 ldr r2, [r7, #0x1d4] @ EAh
ee9ee9fd 519 sub r0, r0, #(3<<16)
520 add r0, r0, r1, lsl #16
34e243f1 521 sub r0, r2, r0, asr #18
ee9ee9fd 522 and r0, r0, #0x7f
523 rsbs r0, r0, #0x78 @ length
524 ble hle_07_036_ending1
525
34e243f1 526 sub r11,r11,r0
527
528 @ copy part
ee9ee9fd 529 ldr r1, [r7, #(SSP_OFFS_GR+SSP_PMC*4)]
530 ldr r2, [r7, #SSP_OFFS_DRAM]
531 mov r1, r1, lsl #16
532 add r1, r2, r1, lsr #15 @ addr (based)
533 ldrh r2, [r7, #0] @ pattern
534 ldrh r3, [r7, #6] @ mode
535
536 mov r12, #0x4000
537 orr r12,r12,#0x0018
34e243f1 538 subs r12,r3, r12
539 subnes r12,r12,#0x0400
540 blne tr_unhandled
ee9ee9fd 541
542 orr r2, r2, r2, lsl #16
543 tst r3, #0x400
544 bne hle_07_036_ovrwr
545
34e243f1 546hle_07_036_no_ovrwr:
ee9ee9fd 547 tst r1, #2
34e243f1 548 strneh r2, [r1], #0x3e @ align
ee9ee9fd 549 subne r0, r0, #1
34e243f1 550 subs r0, r0, #4
ee9ee9fd 551 blt hle_07_036_l2
552
553hle_07_036_l1:
554 subs r0, r0, #4
34e243f1 555 str r2, [r1], #0x40
556 str r2, [r1], #0x40
557 bge hle_07_036_l1
ee9ee9fd 558
559hle_07_036_l2:
560 tst r0, #2
34e243f1 561 strne r2, [r1], #0x40
ee9ee9fd 562 tst r0, #1
563 strneh r2, [r1], #2
34e243f1 564 b hle_07_036_end_copy
ee9ee9fd 565
566hle_07_036_ovrwr:
34e243f1 567 tst r2, #0x000f
568 orreq r12,r12,#0x000f
569 tst r2, #0x00f0
570 orreq r12,r12,#0x00f0
571 tst r2, #0x0f00
572 orreq r12,r12,#0x0f00
573 tst r2, #0xf000
574 orreq r12,r12,#0xf000
575 orrs r12,r12,r12,lsl #16
576 beq hle_07_036_no_ovrwr
577
578 tst r1, #2
579 beq hle_07_036_ol0
580 ldrh r3, [r1]
581 and r3, r3, r12
582 orr r3, r3, r2
583 strh r3, [r1], #0x3e @ align
584 sub r0, r0, #1
585
586hle_07_036_ol0:
587 subs r0, r0, #2
588 blt hle_07_036_ol2
589
590hle_07_036_ol1:
591 subs r0, r0, #2
592 ldr r3, [r1]
593 and r3, r3, r12
594 orr r3, r3, r2
595 str r3, [r1], #0x40
596 bge hle_07_036_ol1
597
598hle_07_036_ol2:
599 tst r0, #1
600 ldrneh r3, [r1]
601 andne r3, r3, r12
602 orrne r3, r3, r2
603 strneh r3, [r1], #2
604
605hle_07_036_end_copy:
606 ldr r2, [r7, #SSP_OFFS_DRAM]
607 add r3, r7, #0x400
608 sub r0, r1, r2 @ new addr
609 mov r0, r0, lsr #1
610 strh r0, [r3, #(0x6c+4*4)] @ SSP_OFFS_PM_WRITE+4*4 (low)
ee9ee9fd 611
612hle_07_036_ending1:
34e243f1 613 ldr r0, [r7, #0x1e0] @ F1h << 16
ee9ee9fd 614 add r0, r0, #(1<<16)
615 and r0, r0, #(3<<16)
616 add r0, r0, #(0xc4<<16)
617 bic r8, r8, #0xff0000
34e243f1 618 orr r8, r8, r0 @ r2
619 add r0, r7, r0, lsr #15
ee9ee9fd 620 ldrh r0, [r0]
621 ldr r2, [r7]
34e243f1 622 and r0, r0, r2
623 movs r5, r0, lsl #16
ee9ee9fd 624
34e243f1 625 ldr r1, [r7, #4] @ new mode
626 add r2, r7, #0x400
627 strh r1, [r2, #(0x6c+4*4+2)] @ SSP_OFFS_PM_WRITE+4*4 (high)
ee9ee9fd 628 mov r1, #4
629 bl ssp_pm_write
34e243f1 630 sub r11,r11,#35
ee9ee9fd 631
632hle_07_036_ret:
633 hle_popstack
634 b ssp_drc_next
635
636hle_07_036_ending2:
637 sub r11,r11,#3
638 movs r5, r5, lsl #1
639 bmi hle_07_036_ret
34e243f1 640 mov r0, #0x87
ee9ee9fd 641 b ssp_drc_next @ let the dispatcher finish this
642