svp compiler: jump fixup
[picodrive.git] / Pico / carthw / svp / stub_arm.S
CommitLineData
e807ac75 1@ vim:filetype=armasm
2
3.if 0
4#include "compiler.h"
5.endif
6
7.global tcache
8
9.global flush_inval_caches
71bb1b7b 10.global ssp_drc_entry
11.global ssp_drc_next
45883918 12.global ssp_drc_next_patch
13.global ssp_drc_end
d5276282 14.global ssp_hle_800
e807ac75 15
16@ translation cache buffer
17.text
18.align 12 @ 4096
19.size tcache, TCACHE_SIZE
20tcache:
21 .space TCACHE_SIZE
22
23
24.text
25.align 2
26
27
28flush_inval_caches:
29 mov r2, #0x0 @ must be 0
30 swi 0x9f0002
31 bx lr
32
33
34@ SSP_GR0, SSP_X, SSP_Y, SSP_A,
35@ SSP_ST, SSP_STACK, SSP_PC, SSP_P,
36@ SSP_PM0, SSP_PM1, SSP_PM2, SSP_XST,
37@ SSP_PM4, SSP_gr13, SSP_PMC, SSP_AL
38
39@ register map:
40@ r4: XXYY
41@ r5: A
b9c1d012 42@ r6: STACK and emu flags: sss0 * .uu. .lll NZCV (NZCV is PSR bits from ARM)
e807ac75 43@ r7: SSP context
5d817c91 44@ r8: r0-r2 (.210)
45@ r9: r4-r6 (.654)
e807ac75 46@ r10: P
47@ r11: cycles
48
e807ac75 49
45883918 50#define SSP_OFFS_GR 0x400
51#define SSP_PC 6
52#define SSP_P 7
53#define SSP_PM0 8
54#define SSP_OFFS_EMUSTAT 0x484 // emu_status
55#define SSP_OFFS_IRAM_DIRTY 0x494
56#define SSP_OFFS_IRAM_CTX 0x498 // iram_context
57#define SSP_OFFS_BLTAB 0x49c // block_table
58#define SSP_OFFS_BLTAB_IRAM 0x4a0
59#define SSP_OFFS_TMP0 0x4a4 // for entry PC
60#define SSP_OFFS_TMP1 0x4a8
61#define SSP_OFFS_TMP2 0x4ac
62#define SSP_WAIT_PM0 0x2000
63
64
65.macro ssp_drc_do_next patch_jump=0
66.if \patch_jump
67 str lr, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
68.endif
69 mov r0, r0, lsl #16
70 mov r0, r0, lsr #16
71 str r0, [r7, #SSP_OFFS_TMP0]
72 cmp r0, #0x400
73 blt 0f @ ssp_de_iram
74
75 ldr r2, [r7, #SSP_OFFS_BLTAB]
76 ldr r2, [r2, r0, lsl #2]
77 tst r2, r2
78.if \patch_jump
79 bne ssp_drc_do_patch
80.else
81 bxne r2
82.endif
83 bl ssp_translate_block
84 mov r2, r0
85 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
86 ldr r1, [r7, #SSP_OFFS_BLTAB]
87 str r2, [r1, r0, lsl #2]
88.if \patch_jump
89 b ssp_drc_do_patch
90.else
91 bx r2
92.endif
93
940: @ ssp_de_iram:
95 ldr r1, [r7, #SSP_OFFS_IRAM_DIRTY]
96 tst r1, r1
97 ldreq r1, [r7, #SSP_OFFS_IRAM_CTX]
98 beq 1f @ ssp_de_iram_ctx
99
100 bl ssp_get_iram_context
101 mov r1, #0
102 str r1, [r7, #SSP_OFFS_IRAM_DIRTY]
103 mov r1, r0
104 str r1, [r7, #SSP_OFFS_IRAM_CTX]
105 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
106
1071: @ ssp_de_iram_ctx:
108 ldr r2, [r7, #SSP_OFFS_BLTAB_IRAM]
109 add r2, r2, r1, lsl #12 @ block_tab_iram + iram_context * 0x800/2*4
110 add r1, r2, r0, lsl #2
111 ldr r2, [r1]
112 tst r2, r2
113.if \patch_jump
114 bne ssp_drc_do_patch
115.else
116 bxne r2
117.endif
118 str r1, [r7, #SSP_OFFS_TMP1]
119 bl ssp_translate_block
120 mov r2, r0
121 ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
122 ldr r1, [r7, #SSP_OFFS_TMP1] @ &block_table_iram[iram_context][rPC]
123 str r2, [r1]
124.if \patch_jump
125 b ssp_drc_do_patch
126.else
127 bx r2
128.endif
129.endm @ ssp_drc_do_next
130
131
132ssp_drc_entry:
133 stmfd sp!, {r4-r11, lr}
134 mov r11, r0
71bb1b7b 135ssp_regfile_load:
e807ac75 136 ldr r7, =ssp
137 ldr r7, [r7]
138 add r2, r7, #0x400
139 add r2, r2, #4
140 ldmia r2, {r3,r4,r5,r6,r8}
141 mov r3, r3, lsr #16
142 mov r3, r3, lsl #16
143 orr r4, r3, r4, lsr #16 @ XXYY
b9c1d012 144
145 and r8, r8, #0x0f0000
146 mov r8, r8, lsl #13 @ sss0 *
147 and r9, r6, #0x670000
148 tst r6, #0x80000000
149 orrne r8, r8, #0x8
150 tst r6, #0x20000000
151 orrne r8, r8, #0x4 @ sss0 * NZ..
a6fb500b 152 orr r6, r8, r9, lsr #12 @ sss0 * .uu. .lll NZ..
b9c1d012 153
e807ac75 154 ldr r8, [r7, #0x440] @ r0-r2
155 ldr r9, [r7, #0x444] @ r4-r6
45883918 156 ldr r10,[r7, #(0x400+SSP_P*4)] @ P
157
158 ldr r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
159 mov r0, r0, lsr #16
160
161
162ssp_drc_next:
163 ssp_drc_do_next 0
164
165
166ssp_drc_next_patch:
167 ssp_drc_do_next 1
168
169ssp_drc_do_patch:
170 ldr r1, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
171 subs r12,r2, r1
172 moveq r3, #0xe1000000
173 orreq r3, r3, #0x00a00000 @ nop
174 streq r3, [r1, #-4]
175 beq ssp_drc_dp_end
176
177 cmp r12,#4
178 ldreq r3, [r1]
179 addeq r3, r3, #1
180 streq r3, [r1, #-4] @ move the other cond up
181 moveq r3, #0xe1000000
182 orreq r3, r3, #0x00a00000
183 streq r3, [r1] @ fill it's place with nop
184 beq ssp_drc_dp_end
185
186 ldr r3, [r1, #-4]
187 sub r12,r12,#4
188 mov r3, r3, lsr #24
189 bic r3, r3, #1 @ L bit
190 orr r3, r3, r12,lsl #6
191 mov r3, r3, ror #8 @ patched branch instruction
192 str r3, [r1, #-4]
193
194ssp_drc_dp_end:
195 str r2, [r7, #SSP_OFFS_TMP1]
196 sub r0, r1, #4
197 add r1, r1, #4
198 bl flush_inval_caches
199 ldr r2, [r7, #SSP_OFFS_TMP1]
200 ldr r0, [r7, #SSP_OFFS_TMP0]
201 bx r2
e807ac75 202
203
45883918 204ssp_drc_end:
205 mov r0, r0, lsl #16
206 str r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
207
71bb1b7b 208ssp_regfile_store:
45883918 209 str r10,[r7, #(0x400+SSP_P*4)] @ P
e807ac75 210 str r8, [r7, #0x440] @ r0-r2
211 str r9, [r7, #0x444] @ r4-r6
b9c1d012 212
213 mov r9, r6, lsr #13
e807ac75 214 and r9, r9, #(7<<16) @ STACK
b9c1d012 215 mov r3, r6, lsl #28
216 msr cpsr_flg, r3 @ to to ARM PSR
217 and r6, r6, #0x670
218 mov r6, r6, lsl #12
219 orrmi r6, r6, #0x80000000 @ N
220 orreq r6, r6, #0x20000000 @ Z
221
e807ac75 222 mov r3, r4, lsl #16 @ Y
223 mov r2, r4, lsr #16
224 mov r2, r2, lsl #16 @ X
225 add r8, r7, #0x400
226 add r8, r8, #4
227 stmia r8, {r2,r3,r5,r6,r9}
71bb1b7b 228
71bb1b7b 229 mov r0, r11
230 ldmfd sp!, {r4-r11, lr}
231 bx lr
232
233
d5276282 234
235@ ld A, PM0
236@ andi 2
237@ bra z=1, gloc_0800
238ssp_hle_800:
d5276282 239 ldr r0, [r7, #(SSP_OFFS_GR+SSP_PM0*4)]
71bb1b7b 240 ldr r1, [r7, #SSP_OFFS_EMUSTAT]
d5276282 241 tst r0, #0x20000
242 orreq r1, r1, #SSP_WAIT_PM0
45883918 243 subeq r11,r11, #1024
71bb1b7b 244 streq r1, [r7, #SSP_OFFS_EMUSTAT]
45883918 245 mov r0, #0x400
246 beq ssp_drc_end
247 orrne r0, r0, #0x004
248
249 b ssp_drc_next
d5276282 250
e807ac75 251