C89 MSVC doesn't support 'inline' - will have to make define for
[picodrive.git] / pico / carthw / svp / stub_arm.S
... / ...
CommitLineData
1@*
2@* Compiler helper functions and some SVP HLE code
3@* (C) notaz, 2008,2009
4@*
5@* This work is licensed under the terms of MAME license.
6@* See COPYING file in the top-level directory.
7@*
8
9#include "../../arm_features.h"
10
11.syntax unified
12.text
13.align 2
14
15@ SSP_GR0, SSP_X, SSP_Y, SSP_A,
16@ SSP_ST, SSP_STACK, SSP_PC, SSP_P,
17@ SSP_PM0, SSP_PM1, SSP_PM2, SSP_XST,
18@ SSP_PM4, SSP_gr13, SSP_PMC, SSP_AL
19
20@ register map:
21@ r4: XXYY
22@ r5: A
23@ r6: STACK and emu flags: sss0 * .uu. .lll NZCV (NZCV is PSR bits from ARM)
24@ r7: SSP context
25@ r8: r0-r2 (.210)
26@ r9: r4-r6 (.654)
27@ r10: P
28@ r11: cycles
29@ r12: tmp
30
31
32#define SSP_OFFS_GR 0x400
33#define SSP_PC 6
34#define SSP_P 7
35#define SSP_PM0 8
36#define SSP_PMC 14
37#define SSP_OFFS_PM_WRITE 0x46c // pmac_write[]
38#define SSP_OFFS_EMUSTAT 0x484 // emu_status
39#define SSP_OFFS_IRAM_ROM 0x48c // ptr_iram_rom
40#define SSP_OFFS_DRAM 0x490 // ptr_dram
41#define SSP_OFFS_IRAM_DIRTY 0x494
42#define SSP_OFFS_IRAM_CTX 0x498 // iram_context
43#define SSP_OFFS_BLTAB 0x49c // block_table
44#define SSP_OFFS_BLTAB_IRAM 0x4a0
45#define SSP_OFFS_TMP0 0x4a4 // for entry PC
46#define SSP_OFFS_TMP1 0x4a8
47#define SSP_OFFS_TMP2 0x4ac
48#define SSP_WAIT_PM0 0x2000
49
50
51.macro ssp_drc_do_next patch_jump=0
52.if \patch_jump
53 str lr, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
54.endif
55 mov r0, r0, lsl #16
56 mov r0, r0, lsr #16
57 str r0, [r7, #SSP_OFFS_TMP0]
58 cmp r0, #0x400
59 blt 0f @ ssp_de_iram
60
61 ldr r2, [r7, #SSP_OFFS_BLTAB]
62 ldr r2, [r2, r0, lsl #2]
63 tst r2, r2
64.if \patch_jump
65 bne ssp_drc_do_patch
66.else
67 bxne r2
68.endif
69 bl ssp_translate_block
70 mov r2, r0
71 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
72 ldr r1, [r7, #SSP_OFFS_BLTAB]
73 str r2, [r1, r0, lsl #2]
74.if \patch_jump
75 b ssp_drc_do_patch
76.else
77 bx r2
78.endif
79
800: @ ssp_de_iram:
81 ldr r1, [r7, #SSP_OFFS_IRAM_DIRTY]
82 tst r1, r1
83 ldreq r1, [r7, #SSP_OFFS_IRAM_CTX]
84 beq 1f @ ssp_de_iram_ctx
85
86 bl ssp_get_iram_context
87 mov r1, #0
88 str r1, [r7, #SSP_OFFS_IRAM_DIRTY]
89 mov r1, r0
90 str r1, [r7, #SSP_OFFS_IRAM_CTX]
91 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
92
931: @ ssp_de_iram_ctx:
94 ldr r2, [r7, #SSP_OFFS_BLTAB_IRAM]
95 add r2, r2, r1, lsl #12 @ block_tab_iram + iram_context * 0x800/2*4
96 add r1, r2, r0, lsl #2
97 ldr r2, [r1]
98 tst r2, r2
99.if \patch_jump
100 bne ssp_drc_do_patch
101.else
102 bxne r2
103.endif
104 str r1, [r7, #SSP_OFFS_TMP1]
105 bl ssp_translate_block
106 mov r2, r0
107 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
108 ldr r1, [r7, #SSP_OFFS_TMP1] @ &block_table_iram[iram_context][rPC]
109 str r2, [r1]
110.if \patch_jump
111 b ssp_drc_do_patch
112.else
113 bx r2
114.endif
115.endm @ ssp_drc_do_next
116
117
118FUNCTION(ssp_drc_entry):
119 stmfd sp!, {r4-r11, lr}
120 mov r7, r0 @ ssp
121 mov r11, r1
122ssp_regfile_load:
123 add r2, r7, #0x400
124 add r2, r2, #4
125 ldmia r2, {r3,r4,r5,r6,r8}
126 mov r3, r3, lsr #16
127 mov r3, r3, lsl #16
128 orr r4, r3, r4, lsr #16 @ XXYY
129
130 and r8, r8, #0x0f0000
131 mov r8, r8, lsl #13 @ sss0 *
132 and r9, r6, #0x670000
133 tst r6, #0x80000000
134 orrne r8, r8, #0x8
135 tst r6, #0x20000000
136 orrne r8, r8, #0x4 @ sss0 * NZ..
137 orr r6, r8, r9, lsr #12 @ sss0 * .uu. .lll NZ..
138
139 ldr r8, [r7, #0x440] @ r0-r2
140 ldr r9, [r7, #0x444] @ r4-r6
141 ldr r10,[r7, #(0x400+SSP_P*4)] @ P
142
143 ldr r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
144 mov r0, r0, lsr #16
145
146
147FUNCTION(ssp_drc_next):
148 ssp_drc_do_next 0
149
150
151FUNCTION(ssp_drc_next_patch):
152 ssp_drc_do_next 1
153
154ssp_drc_do_patch:
155 ldr r1, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
156 subs r12,r2, r1
157 moveq r3, #0xe1000000
158 orreq r3, r3, #0x00a00000 @ nop
159 streq r3, [r1, #-4]
160 beq ssp_drc_dp_end
161
162 cmp r12,#4
163 ldreq r3, [r1]
164 addeq r3, r3, #1
165 streq r3, [r1, #-4] @ move the other cond up
166 moveq r3, #0xe1000000
167 orreq r3, r3, #0x00a00000
168 streq r3, [r1] @ fill it's place with nop
169 beq ssp_drc_dp_end
170
171 ldr r3, [r1, #-4]
172 sub r12,r12,#4
173 mov r3, r3, lsr #24
174 bic r3, r3, #1 @ L bit
175 orr r3, r3, r12,lsl #6
176 mov r3, r3, ror #8 @ patched branch instruction
177 str r3, [r1, #-4] @ patch the bl/b to jump directly to another handler
178
179ssp_drc_dp_end:
180 str r2, [r7, #SSP_OFFS_TMP1]
181 sub r0, r1, #4
182 add r1, r1, #4
183 bl cache_flush_d_inval_i
184 ldr r2, [r7, #SSP_OFFS_TMP1]
185 ldr r0, [r7, #SSP_OFFS_TMP0]
186 bx r2
187
188
189FUNCTION(ssp_drc_end):
190 mov r0, r0, lsl #16
191 str r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
192
193ssp_regfile_store:
194 str r10,[r7, #(0x400+SSP_P*4)] @ P
195 str r8, [r7, #0x440] @ r0-r2
196 str r9, [r7, #0x444] @ r4-r6
197
198 mov r9, r6, lsr #13
199 and r9, r9, #(7<<16) @ STACK
200 mov r3, r6, lsl #28
201 msr cpsr_f, r3 @ to to ARM PSR
202 and r6, r6, #0x670
203 mov r6, r6, lsl #12
204 orrmi r6, r6, #0x80000000 @ N
205 orreq r6, r6, #0x20000000 @ Z
206
207 mov r3, r4, lsl #16 @ Y
208 mov r2, r4, lsr #16
209 mov r2, r2, lsl #16 @ X
210 add r8, r7, #0x400
211 add r8, r8, #4
212 stmia r8, {r2,r3,r5,r6,r9}
213
214 mov r0, r11
215 ldmfd sp!, {r4-r11, lr}
216 bx lr
217
218
219
220@ ld A, PM0
221@ andi 2
222@ bra z=1, gloc_0800
223FUNCTION(ssp_hle_800):
224 ldr r0, [r7, #(SSP_OFFS_GR+SSP_PM0*4)]
225 ldr r1, [r7, #SSP_OFFS_EMUSTAT]
226 tst r0, #0x20000
227 orreq r1, r1, #SSP_WAIT_PM0
228 subeq r11,r11, #1024
229 streq r1, [r7, #SSP_OFFS_EMUSTAT]
230 mov r0, #0x400
231 beq ssp_drc_end
232 orrne r0, r0, #0x004
233 b ssp_drc_next
234
235
236.macro hle_flushflags
237 bic r6, r6, #0xf
238 mrs r1, cpsr
239 orr r6, r6, r1, lsr #28
240.endm
241
242.macro hle_popstack
243 sub r6, r6, #0x20000000
244 add r1, r7, #0x400
245 add r1, r1, #0x048 @ stack
246 add r1, r1, r6, lsr #28
247 ldrh r0, [r1]
248.endm
249
250FUNCTION(ssp_hle_902):
251 cmp r11, #0
252 ble ssp_drc_end
253
254 add r1, r7, #0x200
255 ldrh r0, [r1]
256 ldr r3, [r7, #SSP_OFFS_IRAM_ROM]
257 add r2, r3, r0, lsl #1 @ (r7|00)
258 ldrh r0, [r2], #2
259 mov r5, r5, lsl #16
260 mov r5, r5, lsr #16
261 bic r0, r0, #0xfc00
262 add r3, r3, r0, lsl #1 @ IRAM dest
263 ldrh r12,[r2], #2 @ length
264 bic r3, r3, #3 @ always seen aligned
265@ orr r5, r5, #0x08000000
266@ orr r5, r5, #0x00880000
267@ sub r5, r5, r12, lsl #16
268 bic r6, r6, #0xf
269 add r12,r12,#1
270 mov r0, #1
271 str r0, [r7, #SSP_OFFS_IRAM_DIRTY]
272 sub r11,r11,r12,lsl #1
273 sub r11,r11,r12 @ -= length*3
274
275ssp_hle_902_loop:
276 ldrh r0, [r2], #2
277 ldrh r1, [r2], #2
278 subs r12,r12,#2
279 orr r0, r0, r1, lsl #16
280 str r0, [r3], #4
281 bgt ssp_hle_902_loop
282
283 tst r12, #1
284 ldrhne r0, [r2], #2
285 strhne r0, [r3], #2
286
287 ldr r0, [r7, #SSP_OFFS_IRAM_ROM]
288 add r1, r7, #0x200
289 sub r2, r2, r0
290 mov r2, r2, lsr #1
291 strh r2, [r1] @ (r7|00)
292
293 sub r0, r3, r0
294 mov r0, r0, lsr #1
295 orr r0, r0, #0x08000000
296 orr r0, r0, #0x001c8000
297 str r0, [r7, #(SSP_OFFS_GR+SSP_PMC*4)]
298 str r0, [r7, #(SSP_OFFS_PM_WRITE+4*4)]
299
300 hle_popstack
301 subs r11,r11,#16 @ timeslice is likely to end
302 ble ssp_drc_end
303 b ssp_drc_next
304
305
306@ this one is car rendering related
307.macro hle_11_12c_mla offs_in
308 ldrsh r5, [r7, #(\offs_in+0)]
309 ldrsh r0, [r7, #(\offs_in+2)]
310 ldrsh r1, [r7, #(\offs_in+4)]
311 mul r5, r2, r5
312 ldrsh r12,[r7, #(\offs_in+6)]
313 mla r5, r3, r0, r5
314 mla r5, r4, r1, r5
315 add r5, r5, r12,lsl #11
316
317 movs r5, r5, lsr #13
318 add r1, r7, r8, lsr #23
319 strh r5, [r1]
320 add r8, r8, #(1<<24)
321.endm
322
323FUNCTION(ssp_hle_11_12c):
324 cmp r11, #0
325 ble ssp_drc_end
326
327 mov r0, #0
328 bl ssp_pm_read
329 mov r4, r0
330
331 mov r0, #0
332 bl ssp_pm_read
333 mov r5, r0
334
335 mov r0, #0
336 bl ssp_pm_read
337
338 mov r2, r4, lsl #16
339 mov r2, r2, asr #15 @ (r7|00) << 1
340 mov r3, r5, lsl #16
341 mov r3, r3, asr #15 @ (r7|01) << 1
342 mov r4, r0, lsl #16
343 mov r4, r4, asr #15 @ (r7|10) << 1
344
345 bic r8, r8, #0xff
346 mov r8, r8, ror #16
347
348 hle_11_12c_mla 0x20
349 hle_11_12c_mla 0x28
350 hle_11_12c_mla 0x30
351
352 mov r8, r8, ror #16
353 orr r8, r8, #0x1c
354@ hle_flushflags
355 hle_popstack
356 sub r11,r11,#33
357 b ssp_drc_next
358
359
360FUNCTION(ssp_hle_11_384):
361 mov r3, #2
362 b ssp_hle_11_38x
363
364FUNCTION(ssp_hle_11_38a):
365 mov r3, #3 @ r5
366
367ssp_hle_11_38x:
368 cmp r11, #0
369 ble ssp_drc_end
370
371 mov r2, #0 @ EFh, EEh
372 mov r1, #1 @ r4
373 add r0, r7, #0x1c0 @ r0 (based)
374
375ssp_hle_11_38x_loop:
376 ldrh r5, [r0], #2
377 ldr r12,[r7, #0x224]
378 mov r5, r5, lsl #16
379 eor r5, r5, r5, asr #31
380 add r5, r5, r5, lsr #31 @ abs(r5)
381 cmp r5, r12,lsl #16
382 orrpl r2, r2, r1,lsl #16 @ EFh |= r4
383
384 ldrh r5, [r0, #2]!
385 ldr r12,[r7, #0x220]
386 cmp r5, r12,lsr #16
387 orrpl r2, r2, r1,lsl #16 @ EFh |= r4
388
389 ldr r12,[r7, #0x1e8]
390 add r0, r0, #2
391 mov r12,r12,lsl #16
392 cmp r5, r12,lsr #16
393 orrmi r2, r2, r1
394
395 mov r1, r1, lsl #1
396 subs r3, r3, #1
397 bpl ssp_hle_11_38x_loop
398
399 str r2, [r7, #0x1dc]
400 sub r0, r0, r7
401 bic r8, r8, #0xff
402 orr r8, r8, r0, lsr #1
403 bic r9, r9, #0xff
404 orr r9, r9, r1
405
406@ hle_flushflags
407 hle_popstack
408 sub r11,r11,#(9+30*4)
409 b ssp_drc_next
410
411
412FUNCTION(ssp_hle_07_6d6):
413 cmp r11, #0
414 ble ssp_drc_end
415
416 ldr r1, [r7, #0x20c]
417 and r0, r8, #0xff @ assuming alignment
418 add r0, r7, r0, lsl #1
419 mov r2, r1, lsr #16
420 mov r1, r1, lsl #16 @ 106h << 16
421 mov r2, r2, lsl #16 @ 107h << 16
422
423ssp_hle_07_6d6_loop:
424 ldr r5, [r0], #4
425 tst r5, r5
426 bmi ssp_hle_07_6d6_end
427 mov r5, r5, lsl #16
428 cmp r5, r1
429 movmi r1, r5
430 cmp r5, r2
431 sub r11,r11,#16
432 bmi ssp_hle_07_6d6_loop
433 mov r2, r5
434 b ssp_hle_07_6d6_loop
435
436ssp_hle_07_6d6_end:
437 sub r0, r0, r7
438 mov r0, r0, lsr #1
439 bic r8, r8, #0xff
440 orr r8, r8, r0
441 orr r1, r2, r1, lsr #16
442 str r1, [r7, #0x20c]
443 hle_popstack
444 sub r11,r11,#6
445 b ssp_drc_next
446
447
448FUNCTION(ssp_hle_07_030):
449 ldrh r0, [r7]
450 mov r0, r0, lsl #4
451 orr r0, r0, r0, lsr #16
452 strh r0, [r7]
453 sub r11,r11,#3
454
455FUNCTION(ssp_hle_07_036):
456 ldr r1, [r7, #0x1e0] @ F1h F0h
457 rsb r5, r1, r1, lsr #16
458 mov r5, r5, lsl #16 @ AL not needed
459 cmp r5, #(4<<16)
460 sub r11,r11,#5
461 bmi hle_07_036_ending2
462 ldr r1, [r7, #0x1dc] @ EEh
463 cmp r5, r1, lsl #16
464 sub r11,r11,#5
465 bpl hle_07_036_ret
466
467 mov r0, r5, lsr #16
468 add r1, r7, #0x100
469 strh r0, [r1, #0xea] @ F5h
470 ldr r0, [r7, #0x1e0] @ F0h
471 and r0, r0, #3
472 strh r0, [r1, #0xf0] @ F8h
473 add r2, r0, #0xc0 @ r2
474 add r2, r7, r2, lsl #1
475 ldrh r2, [r2]
476 ldr r0, [r7]
477 mov r1, #4
478 and r0, r0, r2
479 bl ssp_pm_write
480 @ will handle PMC later
481 ldr r0, [r7, #0x1e8] @ F5h << 16
482 ldr r1, [r7, #0x1f0] @ F8h
483 ldr r2, [r7, #0x1d4] @ EAh
484 sub r0, r0, #(3<<16)
485 add r0, r0, r1, lsl #16
486 sub r0, r2, r0, asr #18
487 and r0, r0, #0x7f
488 rsbs r0, r0, #0x78 @ length
489 ble hle_07_036_ending1
490
491 sub r11,r11,r0
492
493 @ copy part
494 ldr r1, [r7, #(SSP_OFFS_GR+SSP_PMC*4)]
495 ldr r2, [r7, #SSP_OFFS_DRAM]
496 mov r1, r1, lsl #16
497 add r1, r2, r1, lsr #15 @ addr (based)
498 ldrh r2, [r7, #0] @ pattern
499 ldrh r3, [r7, #6] @ mode
500
501 mov r12, #0x4000
502 orr r12,r12,#0x0018
503 subs r12,r3, r12
504 subsne r12,r12,#0x0400
505 blne tr_unhandled
506
507 orr r2, r2, r2, lsl #16
508 tst r3, #0x400
509 bne hle_07_036_ovrwr
510
511hle_07_036_no_ovrwr:
512 tst r1, #2
513 strhne r2, [r1], #0x3e @ align
514 subne r0, r0, #1
515 subs r0, r0, #4
516 blt hle_07_036_l2
517
518hle_07_036_l1:
519 subs r0, r0, #4
520 str r2, [r1], #0x40
521 str r2, [r1], #0x40
522 bge hle_07_036_l1
523
524hle_07_036_l2:
525 tst r0, #2
526 strne r2, [r1], #0x40
527 tst r0, #1
528 strhne r2, [r1], #2
529 b hle_07_036_end_copy
530
531hle_07_036_ovrwr:
532 tst r2, #0x000f
533 orreq r12,r12,#0x000f
534 tst r2, #0x00f0
535 orreq r12,r12,#0x00f0
536 tst r2, #0x0f00
537 orreq r12,r12,#0x0f00
538 tst r2, #0xf000
539 orreq r12,r12,#0xf000
540 orrs r12,r12,r12,lsl #16
541 beq hle_07_036_no_ovrwr
542
543 tst r1, #2
544 beq hle_07_036_ol0
545 ldrh r3, [r1]
546 and r3, r3, r12
547 orr r3, r3, r2
548 strh r3, [r1], #0x3e @ align
549 sub r0, r0, #1
550
551hle_07_036_ol0:
552 subs r0, r0, #2
553 blt hle_07_036_ol2
554
555hle_07_036_ol1:
556 subs r0, r0, #2
557 ldr r3, [r1]
558 and r3, r3, r12
559 orr r3, r3, r2
560 str r3, [r1], #0x40
561 bge hle_07_036_ol1
562
563hle_07_036_ol2:
564 tst r0, #1
565 ldrhne r3, [r1]
566 andne r3, r3, r12
567 orrne r3, r3, r2
568 strhne r3, [r1], #2
569
570hle_07_036_end_copy:
571 ldr r2, [r7, #SSP_OFFS_DRAM]
572 add r3, r7, #0x400
573 sub r0, r1, r2 @ new addr
574 mov r0, r0, lsr #1
575 strh r0, [r3, #(0x6c+4*4)] @ SSP_OFFS_PM_WRITE+4*4 (low)
576
577hle_07_036_ending1:
578 ldr r0, [r7, #0x1e0] @ F1h << 16
579 add r0, r0, #(1<<16)
580 and r0, r0, #(3<<16)
581 add r0, r0, #(0xc4<<16)
582 bic r8, r8, #0xff0000
583 orr r8, r8, r0 @ r2
584 add r0, r7, r0, lsr #15
585 ldrh r0, [r0]
586 ldr r2, [r7]
587 and r0, r0, r2
588 movs r5, r0, lsl #16
589
590 ldr r1, [r7, #4] @ new mode
591 add r2, r7, #0x400
592 strh r1, [r2, #(0x6c+4*4+2)] @ SSP_OFFS_PM_WRITE+4*4 (high)
593 mov r1, #4
594 bl ssp_pm_write
595 sub r11,r11,#35
596
597hle_07_036_ret:
598 hle_popstack
599 b ssp_drc_next
600
601hle_07_036_ending2:
602 sub r11,r11,#3
603 movs r5, r5, lsl #1
604 bmi hle_07_036_ret
605 mov r0, #0x87
606 b ssp_drc_next @ let the dispatcher finish this
607
608
609@ ios stuff - preserving r9 on external calls
610#ifdef __MACH__
611
612#define APPLE_WRAP(f) \
613 f:; \
614 push {r9,lr}; \
615 bl _##f; \
616 pop {r9,pc}
617
618APPLE_WRAP(cache_flush_d_inval_i)
619APPLE_WRAP(ssp_get_iram_context)
620APPLE_WRAP(ssp_pm_read)
621APPLE_WRAP(ssp_pm_write)
622APPLE_WRAP(ssp_translate_block)
623APPLE_WRAP(tr_unhandled)
624
625#endif
626
627@ vim:filetype=armasm