clarify PicoDrive's license
[picodrive.git] / pico / carthw / svp / stub_arm.S
CommitLineData
cff531af 1@*
2@* Compiler helper functions and some SVP HLE code
3@* (C) notaz, 2008,2009
4@*
5@* This work is licensed under the terms of MAME license.
6@* See COPYING file in the top-level directory.
7@*
65ca3034 8
71bb1b7b 9.global ssp_drc_entry
10.global ssp_drc_next
45883918 11.global ssp_drc_next_patch
12.global ssp_drc_end
d5276282 13.global ssp_hle_800
f5d1115f 14.global ssp_hle_902
ee9ee9fd 15.global ssp_hle_07_030
16.global ssp_hle_07_036
17.global ssp_hle_07_6d6
18.global ssp_hle_11_12c
19.global ssp_hle_11_384
20.global ssp_hle_11_38a
e807ac75 21
e807ac75 22.text
23.align 2
24
e807ac75 25@ SSP_GR0, SSP_X, SSP_Y, SSP_A,
26@ SSP_ST, SSP_STACK, SSP_PC, SSP_P,
27@ SSP_PM0, SSP_PM1, SSP_PM2, SSP_XST,
28@ SSP_PM4, SSP_gr13, SSP_PMC, SSP_AL
29
30@ register map:
31@ r4: XXYY
32@ r5: A
b9c1d012 33@ r6: STACK and emu flags: sss0 * .uu. .lll NZCV (NZCV is PSR bits from ARM)
e807ac75 34@ r7: SSP context
5d817c91 35@ r8: r0-r2 (.210)
36@ r9: r4-r6 (.654)
e807ac75 37@ r10: P
38@ r11: cycles
f8af9634 39@ r12: tmp
e807ac75 40
e807ac75 41
45883918 42#define SSP_OFFS_GR 0x400
43#define SSP_PC 6
44#define SSP_P 7
45#define SSP_PM0 8
f5d1115f 46#define SSP_PMC 14
47#define SSP_OFFS_PM_WRITE 0x46c // pmac_write[]
45883918 48#define SSP_OFFS_EMUSTAT 0x484 // emu_status
f5d1115f 49#define SSP_OFFS_IRAM_ROM 0x48c // ptr_iram_rom
ee9ee9fd 50#define SSP_OFFS_DRAM 0x490 // ptr_dram
45883918 51#define SSP_OFFS_IRAM_DIRTY 0x494
52#define SSP_OFFS_IRAM_CTX 0x498 // iram_context
53#define SSP_OFFS_BLTAB 0x49c // block_table
54#define SSP_OFFS_BLTAB_IRAM 0x4a0
55#define SSP_OFFS_TMP0 0x4a4 // for entry PC
56#define SSP_OFFS_TMP1 0x4a8
57#define SSP_OFFS_TMP2 0x4ac
58#define SSP_WAIT_PM0 0x2000
59
60
61.macro ssp_drc_do_next patch_jump=0
62.if \patch_jump
63 str lr, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
64.endif
65 mov r0, r0, lsl #16
66 mov r0, r0, lsr #16
67 str r0, [r7, #SSP_OFFS_TMP0]
68 cmp r0, #0x400
69 blt 0f @ ssp_de_iram
70
71 ldr r2, [r7, #SSP_OFFS_BLTAB]
72 ldr r2, [r2, r0, lsl #2]
73 tst r2, r2
74.if \patch_jump
75 bne ssp_drc_do_patch
76.else
77 bxne r2
78.endif
79 bl ssp_translate_block
80 mov r2, r0
81 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
82 ldr r1, [r7, #SSP_OFFS_BLTAB]
83 str r2, [r1, r0, lsl #2]
84.if \patch_jump
85 b ssp_drc_do_patch
86.else
87 bx r2
88.endif
89
900: @ ssp_de_iram:
91 ldr r1, [r7, #SSP_OFFS_IRAM_DIRTY]
92 tst r1, r1
93 ldreq r1, [r7, #SSP_OFFS_IRAM_CTX]
94 beq 1f @ ssp_de_iram_ctx
95
96 bl ssp_get_iram_context
97 mov r1, #0
98 str r1, [r7, #SSP_OFFS_IRAM_DIRTY]
99 mov r1, r0
100 str r1, [r7, #SSP_OFFS_IRAM_CTX]
101 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
102
1031: @ ssp_de_iram_ctx:
104 ldr r2, [r7, #SSP_OFFS_BLTAB_IRAM]
105 add r2, r2, r1, lsl #12 @ block_tab_iram + iram_context * 0x800/2*4
106 add r1, r2, r0, lsl #2
107 ldr r2, [r1]
108 tst r2, r2
109.if \patch_jump
110 bne ssp_drc_do_patch
111.else
112 bxne r2
113.endif
114 str r1, [r7, #SSP_OFFS_TMP1]
115 bl ssp_translate_block
116 mov r2, r0
117 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
118 ldr r1, [r7, #SSP_OFFS_TMP1] @ &block_table_iram[iram_context][rPC]
119 str r2, [r1]
120.if \patch_jump
121 b ssp_drc_do_patch
122.else
123 bx r2
124.endif
125.endm @ ssp_drc_do_next
126
127
128ssp_drc_entry:
129 stmfd sp!, {r4-r11, lr}
130 mov r11, r0
71bb1b7b 131ssp_regfile_load:
e807ac75 132 ldr r7, =ssp
133 ldr r7, [r7]
134 add r2, r7, #0x400
135 add r2, r2, #4
136 ldmia r2, {r3,r4,r5,r6,r8}
137 mov r3, r3, lsr #16
138 mov r3, r3, lsl #16
139 orr r4, r3, r4, lsr #16 @ XXYY
b9c1d012 140
141 and r8, r8, #0x0f0000
142 mov r8, r8, lsl #13 @ sss0 *
143 and r9, r6, #0x670000
144 tst r6, #0x80000000
145 orrne r8, r8, #0x8
146 tst r6, #0x20000000
147 orrne r8, r8, #0x4 @ sss0 * NZ..
a6fb500b 148 orr r6, r8, r9, lsr #12 @ sss0 * .uu. .lll NZ..
b9c1d012 149
e807ac75 150 ldr r8, [r7, #0x440] @ r0-r2
151 ldr r9, [r7, #0x444] @ r4-r6
45883918 152 ldr r10,[r7, #(0x400+SSP_P*4)] @ P
153
154 ldr r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
155 mov r0, r0, lsr #16
156
157
158ssp_drc_next:
159 ssp_drc_do_next 0
160
161
162ssp_drc_next_patch:
163 ssp_drc_do_next 1
164
165ssp_drc_do_patch:
166 ldr r1, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
167 subs r12,r2, r1
168 moveq r3, #0xe1000000
169 orreq r3, r3, #0x00a00000 @ nop
170 streq r3, [r1, #-4]
171 beq ssp_drc_dp_end
172
173 cmp r12,#4
174 ldreq r3, [r1]
175 addeq r3, r3, #1
176 streq r3, [r1, #-4] @ move the other cond up
177 moveq r3, #0xe1000000
178 orreq r3, r3, #0x00a00000
179 streq r3, [r1] @ fill it's place with nop
180 beq ssp_drc_dp_end
181
182 ldr r3, [r1, #-4]
183 sub r12,r12,#4
184 mov r3, r3, lsr #24
185 bic r3, r3, #1 @ L bit
186 orr r3, r3, r12,lsl #6
187 mov r3, r3, ror #8 @ patched branch instruction
f8af9634 188 str r3, [r1, #-4] @ patch the bl/b to jump directly to another handler
45883918 189
190ssp_drc_dp_end:
191 str r2, [r7, #SSP_OFFS_TMP1]
192 sub r0, r1, #4
193 add r1, r1, #4
84100c0f 194 bl cache_flush_d_inval_i
45883918 195 ldr r2, [r7, #SSP_OFFS_TMP1]
196 ldr r0, [r7, #SSP_OFFS_TMP0]
197 bx r2
e807ac75 198
199
45883918 200ssp_drc_end:
201 mov r0, r0, lsl #16
202 str r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
203
71bb1b7b 204ssp_regfile_store:
45883918 205 str r10,[r7, #(0x400+SSP_P*4)] @ P
e807ac75 206 str r8, [r7, #0x440] @ r0-r2
207 str r9, [r7, #0x444] @ r4-r6
b9c1d012 208
209 mov r9, r6, lsr #13
e807ac75 210 and r9, r9, #(7<<16) @ STACK
b9c1d012 211 mov r3, r6, lsl #28
212 msr cpsr_flg, r3 @ to to ARM PSR
213 and r6, r6, #0x670
214 mov r6, r6, lsl #12
215 orrmi r6, r6, #0x80000000 @ N
216 orreq r6, r6, #0x20000000 @ Z
217
e807ac75 218 mov r3, r4, lsl #16 @ Y
219 mov r2, r4, lsr #16
220 mov r2, r2, lsl #16 @ X
221 add r8, r7, #0x400
222 add r8, r8, #4
223 stmia r8, {r2,r3,r5,r6,r9}
71bb1b7b 224
71bb1b7b 225 mov r0, r11
226 ldmfd sp!, {r4-r11, lr}
227 bx lr
228
229
d5276282 230
231@ ld A, PM0
232@ andi 2
233@ bra z=1, gloc_0800
234ssp_hle_800:
d5276282 235 ldr r0, [r7, #(SSP_OFFS_GR+SSP_PM0*4)]
71bb1b7b 236 ldr r1, [r7, #SSP_OFFS_EMUSTAT]
d5276282 237 tst r0, #0x20000
f5d1115f 238 orreq r1, r1, #SSP_WAIT_PM0
45883918 239 subeq r11,r11, #1024
71bb1b7b 240 streq r1, [r7, #SSP_OFFS_EMUSTAT]
45883918 241 mov r0, #0x400
242 beq ssp_drc_end
243 orrne r0, r0, #0x004
45883918 244 b ssp_drc_next
d5276282 245
e807ac75 246
ee9ee9fd 247.macro hle_flushflags
248 bic r6, r6, #0xf
249 mrs r1, cpsr
250 orr r6, r6, r1, lsr #28
251.endm
252
253.macro hle_popstack
254 sub r6, r6, #0x20000000
255 add r1, r7, #0x400
256 add r1, r1, #0x048 @ stack
257 add r1, r1, r6, lsr #28
258 ldrh r0, [r1]
259.endm
260
f5d1115f 261ssp_hle_902:
262 cmp r11, #0
263 ble ssp_drc_end
264
265 add r1, r7, #0x200
266 ldrh r0, [r1]
267 ldr r3, [r7, #SSP_OFFS_IRAM_ROM]
268 add r2, r3, r0, lsl #1 @ (r7|00)
269 ldrh r0, [r2], #2
270 mov r5, r5, lsl #16
271 mov r5, r5, lsr #16
272 bic r0, r0, #0xfc00
273 add r3, r3, r0, lsl #1 @ IRAM dest
274 ldrh r12,[r2], #2 @ length
275 bic r3, r3, #3 @ always seen aligned
276@ orr r5, r5, #0x08000000
277@ orr r5, r5, #0x00880000
278@ sub r5, r5, r12, lsl #16
279 bic r6, r6, #0xf
280 add r12,r12,#1
281 mov r0, #1
282 str r0, [r7, #SSP_OFFS_IRAM_DIRTY]
283 sub r11,r11,r12,lsl #1
284 sub r11,r11,r12 @ -= length*3
285
286ssp_hle_902_loop:
287 ldrh r0, [r2], #2
288 ldrh r1, [r2], #2
289 subs r12,r12,#2
290 orr r0, r0, r1, lsl #16
291 str r0, [r3], #4
292 bgt ssp_hle_902_loop
293
294 tst r12, #1
295 ldrneh r0, [r2], #2
296 strneh r0, [r3], #2
297
298 ldr r0, [r7, #SSP_OFFS_IRAM_ROM]
299 add r1, r7, #0x200
300 sub r2, r2, r0
301 mov r2, r2, lsr #1
302 strh r2, [r1] @ (r7|00)
303
304 sub r0, r3, r0
305 mov r0, r0, lsr #1
306 orr r0, r0, #0x08000000
307 orr r0, r0, #0x001c8000
308 str r0, [r7, #(SSP_OFFS_GR+SSP_PMC*4)]
309 str r0, [r7, #(SSP_OFFS_PM_WRITE+4*4)]
310
ee9ee9fd 311 hle_popstack
f5d1115f 312 subs r11,r11,#16 @ timeslice is likely to end
313 ble ssp_drc_end
314 b ssp_drc_next
315
ee9ee9fd 316
317@ this one is car rendering related
318.macro hle_11_12c_mla offs_in
319 ldrsh r5, [r7, #(\offs_in+0)]
320 ldrsh r0, [r7, #(\offs_in+2)]
321 ldrsh r1, [r7, #(\offs_in+4)]
322 mul r5, r2, r5
323 ldrsh r12,[r7, #(\offs_in+6)]
324 mla r5, r3, r0, r5
325 mla r5, r4, r1, r5
326 add r5, r5, r12,lsl #11
327
328 movs r5, r5, lsr #13
329 add r1, r7, r8, lsr #23
330 strh r5, [r1]
331 add r8, r8, #(1<<24)
332.endm
333
334ssp_hle_11_12c:
335 cmp r11, #0
336 ble ssp_drc_end
337
338 mov r0, #0
339 bl ssp_pm_read
340 mov r4, r0
341
342 mov r0, #0
343 bl ssp_pm_read
344 mov r5, r0
345
346 mov r0, #0
347 bl ssp_pm_read
348
349 mov r2, r4, lsl #16
350 mov r2, r2, asr #15 @ (r7|00) << 1
351 mov r3, r5, lsl #16
352 mov r3, r3, asr #15 @ (r7|01) << 1
353 mov r4, r0, lsl #16
354 mov r4, r4, asr #15 @ (r7|10) << 1
355
356 bic r8, r8, #0xff
357 mov r8, r8, ror #16
358
359 hle_11_12c_mla 0x20
360 hle_11_12c_mla 0x28
361 hle_11_12c_mla 0x30
362
363 mov r8, r8, ror #16
364 orr r8, r8, #0x1c
365@ hle_flushflags
366 hle_popstack
367 sub r11,r11,#33
368 b ssp_drc_next
369
370
371ssp_hle_11_384:
372 mov r3, #2
373 b ssp_hle_11_38x
374
375ssp_hle_11_38a:
376 mov r3, #3 @ r5
377
378ssp_hle_11_38x:
379 cmp r11, #0
380 ble ssp_drc_end
381
382 mov r2, #0 @ EFh, EEh
383 mov r1, #1 @ r4
384 add r0, r7, #0x1c0 @ r0 (based)
385
386ssp_hle_11_38x_loop:
387 ldrh r5, [r0], #2
388 ldr r12,[r7, #0x224]
389 mov r5, r5, lsl #16
390 eor r5, r5, r5, asr #31
391 add r5, r5, r5, lsr #31 @ abs(r5)
392 cmp r5, r12,lsl #16
393 orrpl r2, r2, r1,lsl #16 @ EFh |= r4
394
395 ldrh r5, [r0, #2]!
396 ldr r12,[r7, #0x220]
397 cmp r5, r12,lsr #16
398 orrpl r2, r2, r1,lsl #16 @ EFh |= r4
399
400 ldr r12,[r7, #0x1e8]
401 add r0, r0, #2
402 mov r12,r12,lsl #16
403 cmp r5, r12,lsr #16
404 orrmi r2, r2, r1
405
406 mov r1, r1, lsl #1
407 subs r3, r3, #1
408 bpl ssp_hle_11_38x_loop
409
410 str r2, [r7, #0x1dc]
411 sub r0, r0, r7
412 bic r8, r8, #0xff
413 orr r8, r8, r0, lsr #1
414 bic r9, r9, #0xff
415 orr r9, r9, r1
416
417@ hle_flushflags
418 hle_popstack
419 sub r11,r11,#(9+30*4)
420 b ssp_drc_next
421
422
423ssp_hle_07_6d6:
424 cmp r11, #0
425 ble ssp_drc_end
426
427 ldr r1, [r7, #0x20c]
428 and r0, r8, #0xff @ assuming alignment
429 add r0, r7, r0, lsl #1
430 mov r2, r1, lsr #16
431 mov r1, r1, lsl #16 @ 106h << 16
432 mov r2, r2, lsl #16 @ 107h << 16
433
434ssp_hle_07_6d6_loop:
435 ldr r5, [r0], #4
436 tst r5, r5
437 bmi ssp_hle_07_6d6_end
438 mov r5, r5, lsl #16
439 cmp r5, r1
440 movmi r1, r5
441 cmp r5, r2
442 sub r11,r11,#16
443 bmi ssp_hle_07_6d6_loop
444 mov r2, r5
445 b ssp_hle_07_6d6_loop
446
447ssp_hle_07_6d6_end:
448 sub r0, r0, r7
449 mov r0, r0, lsr #1
450 bic r8, r8, #0xff
451 orr r8, r8, r0
452 orr r1, r2, r1, lsr #16
453 str r1, [r7, #0x20c]
454 hle_popstack
455 sub r11,r11,#6
456 b ssp_drc_next
457
458
459ssp_hle_07_030:
460 ldrh r0, [r7]
461 mov r0, r0, lsl #4
462 orr r0, r0, r0, lsr #16
463 strh r0, [r7]
464 sub r11,r11,#3
465
466ssp_hle_07_036:
34e243f1 467 ldr r1, [r7, #0x1e0] @ F1h F0h
ee9ee9fd 468 rsb r5, r1, r1, lsr #16
34e243f1 469 mov r5, r5, lsl #16 @ AL not needed
ee9ee9fd 470 cmp r5, #(4<<16)
471 sub r11,r11,#5
472 bmi hle_07_036_ending2
34e243f1 473 ldr r1, [r7, #0x1dc] @ EEh
ee9ee9fd 474 cmp r5, r1, lsl #16
34e243f1 475 sub r11,r11,#5
ee9ee9fd 476 bpl hle_07_036_ret
34e243f1 477
478 mov r0, r5, lsr #16
ee9ee9fd 479 add r1, r7, #0x100
34e243f1 480 strh r0, [r1, #0xea] @ F5h
481 ldr r0, [r7, #0x1e0] @ F0h
ee9ee9fd 482 and r0, r0, #3
34e243f1 483 strh r0, [r1, #0xf0] @ F8h
484 add r2, r0, #0xc0 @ r2
ee9ee9fd 485 add r2, r7, r2, lsl #1
486 ldrh r2, [r2]
487 ldr r0, [r7]
488 mov r1, #4
489 and r0, r0, r2
490 bl ssp_pm_write
491 @ will handle PMC later
34e243f1 492 ldr r0, [r7, #0x1e8] @ F5h << 16
493 ldr r1, [r7, #0x1f0] @ F8h
494 ldr r2, [r7, #0x1d4] @ EAh
ee9ee9fd 495 sub r0, r0, #(3<<16)
496 add r0, r0, r1, lsl #16
34e243f1 497 sub r0, r2, r0, asr #18
ee9ee9fd 498 and r0, r0, #0x7f
499 rsbs r0, r0, #0x78 @ length
500 ble hle_07_036_ending1
501
34e243f1 502 sub r11,r11,r0
503
504 @ copy part
ee9ee9fd 505 ldr r1, [r7, #(SSP_OFFS_GR+SSP_PMC*4)]
506 ldr r2, [r7, #SSP_OFFS_DRAM]
507 mov r1, r1, lsl #16
508 add r1, r2, r1, lsr #15 @ addr (based)
509 ldrh r2, [r7, #0] @ pattern
510 ldrh r3, [r7, #6] @ mode
511
512 mov r12, #0x4000
513 orr r12,r12,#0x0018
34e243f1 514 subs r12,r3, r12
515 subnes r12,r12,#0x0400
516 blne tr_unhandled
ee9ee9fd 517
518 orr r2, r2, r2, lsl #16
519 tst r3, #0x400
520 bne hle_07_036_ovrwr
521
34e243f1 522hle_07_036_no_ovrwr:
ee9ee9fd 523 tst r1, #2
34e243f1 524 strneh r2, [r1], #0x3e @ align
ee9ee9fd 525 subne r0, r0, #1
34e243f1 526 subs r0, r0, #4
ee9ee9fd 527 blt hle_07_036_l2
528
529hle_07_036_l1:
530 subs r0, r0, #4
34e243f1 531 str r2, [r1], #0x40
532 str r2, [r1], #0x40
533 bge hle_07_036_l1
ee9ee9fd 534
535hle_07_036_l2:
536 tst r0, #2
34e243f1 537 strne r2, [r1], #0x40
ee9ee9fd 538 tst r0, #1
539 strneh r2, [r1], #2
34e243f1 540 b hle_07_036_end_copy
ee9ee9fd 541
542hle_07_036_ovrwr:
34e243f1 543 tst r2, #0x000f
544 orreq r12,r12,#0x000f
545 tst r2, #0x00f0
546 orreq r12,r12,#0x00f0
547 tst r2, #0x0f00
548 orreq r12,r12,#0x0f00
549 tst r2, #0xf000
550 orreq r12,r12,#0xf000
551 orrs r12,r12,r12,lsl #16
552 beq hle_07_036_no_ovrwr
553
554 tst r1, #2
555 beq hle_07_036_ol0
556 ldrh r3, [r1]
557 and r3, r3, r12
558 orr r3, r3, r2
559 strh r3, [r1], #0x3e @ align
560 sub r0, r0, #1
561
562hle_07_036_ol0:
563 subs r0, r0, #2
564 blt hle_07_036_ol2
565
566hle_07_036_ol1:
567 subs r0, r0, #2
568 ldr r3, [r1]
569 and r3, r3, r12
570 orr r3, r3, r2
571 str r3, [r1], #0x40
572 bge hle_07_036_ol1
573
574hle_07_036_ol2:
575 tst r0, #1
576 ldrneh r3, [r1]
577 andne r3, r3, r12
578 orrne r3, r3, r2
579 strneh r3, [r1], #2
580
581hle_07_036_end_copy:
582 ldr r2, [r7, #SSP_OFFS_DRAM]
583 add r3, r7, #0x400
584 sub r0, r1, r2 @ new addr
585 mov r0, r0, lsr #1
586 strh r0, [r3, #(0x6c+4*4)] @ SSP_OFFS_PM_WRITE+4*4 (low)
ee9ee9fd 587
588hle_07_036_ending1:
34e243f1 589 ldr r0, [r7, #0x1e0] @ F1h << 16
ee9ee9fd 590 add r0, r0, #(1<<16)
591 and r0, r0, #(3<<16)
592 add r0, r0, #(0xc4<<16)
593 bic r8, r8, #0xff0000
34e243f1 594 orr r8, r8, r0 @ r2
595 add r0, r7, r0, lsr #15
ee9ee9fd 596 ldrh r0, [r0]
597 ldr r2, [r7]
34e243f1 598 and r0, r0, r2
599 movs r5, r0, lsl #16
ee9ee9fd 600
34e243f1 601 ldr r1, [r7, #4] @ new mode
602 add r2, r7, #0x400
603 strh r1, [r2, #(0x6c+4*4+2)] @ SSP_OFFS_PM_WRITE+4*4 (high)
ee9ee9fd 604 mov r1, #4
605 bl ssp_pm_write
34e243f1 606 sub r11,r11,#35
ee9ee9fd 607
608hle_07_036_ret:
609 hle_popstack
610 b ssp_drc_next
611
612hle_07_036_ending2:
613 sub r11,r11,#3
614 movs r5, r5, lsl #1
615 bmi hle_07_036_ret
34e243f1 616 mov r0, #0x87
ee9ee9fd 617 b ssp_drc_next @ let the dispatcher finish this
618
cff531af 619@ vim:filetype=armasm