drc/gte: add some stall handling
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / linkage_arm64.S
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * linkage_arm.s for PCSX *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2021 notaz *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "arm_features.h"
23#include "new_dynarec_config.h"
24#include "assem_arm64.h"
25#include "linkage_offsets.h"
26
27.bss
28 .align 4
29 .global dynarec_local
30 .type dynarec_local, %object
31 .size dynarec_local, LO_dynarec_local_size
32dynarec_local:
33 .space LO_dynarec_local_size
34
35#define DRC_VAR_(name, vname, size_) \
36 vname = dynarec_local + LO_##name; \
37 .global vname; \
38 .type vname, %object; \
39 .size vname, size_
40
41#define DRC_VAR(name, size_) \
42 DRC_VAR_(name, ESYM(name), size_)
43
44DRC_VAR(next_interupt, 4)
45DRC_VAR(cycle_count, 4)
46DRC_VAR(last_count, 4)
47DRC_VAR(pending_exception, 4)
48DRC_VAR(stop, 4)
687b4580 49DRC_VAR(branch_target, 4)
be516ebe 50DRC_VAR(address, 4)
687b4580 51#DRC_VAR(align0, 16) /* unused/alignment */
be516ebe 52DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs)
53
54/* psxRegs */
7c3a5182 55#DRC_VAR(reg, 128)
be516ebe 56DRC_VAR(lo, 4)
57DRC_VAR(hi, 4)
58DRC_VAR(reg_cop0, 128)
59DRC_VAR(reg_cop2d, 128)
60DRC_VAR(reg_cop2c, 128)
61DRC_VAR(pcaddr, 4)
62#DRC_VAR(code, 4)
63#DRC_VAR(cycle, 4)
64#DRC_VAR(interrupt, 4)
65#DRC_VAR(intCycle, 256)
66
67DRC_VAR(rcnts, 7*4*4)
be516ebe 68DRC_VAR(inv_code_start, 4)
69DRC_VAR(inv_code_end, 4)
687b4580 70DRC_VAR(mem_rtab, 8)
71DRC_VAR(mem_wtab, 8)
72DRC_VAR(psxH_ptr, 8)
73DRC_VAR(invc_ptr, 8)
74DRC_VAR(zeromem_ptr, 8)
75DRC_VAR(scratch_buf_ptr, 8)
76#DRC_VAR(align1, 16) /* unused/alignment */
be516ebe 77DRC_VAR(mini_ht, 256)
78DRC_VAR(restore_candidate, 512)
79
80
81 .text
82 .align 2
83
84/* r0 = virtual target address */
85/* r1 = instruction to patch */
86.macro dyna_linker_main
d1e4ebd9 87 /* XXX TODO: should be able to do better than this... */
be516ebe 88 bl get_addr_ht
89 br x0
90.endm
91
92
93FUNCTION(dyna_linker):
94 /* r0 = virtual target address */
95 /* r1 = instruction to patch */
96 dyna_linker_main
97 .size dyna_linker, .-dyna_linker
98
99FUNCTION(exec_pagefault):
100 /* r0 = instruction pointer */
101 /* r1 = fault address */
102 /* r2 = cause */
103 bl abort
104 .size exec_pagefault, .-exec_pagefault
105
106/* Special dynamic linker for the case where a page fault
107 may occur in a branch delay slot */
108FUNCTION(dyna_linker_ds):
109 /* r0 = virtual target address */
110 /* r1 = instruction to patch */
111 dyna_linker_main
112 .size dyna_linker_ds, .-dyna_linker_ds
113
be516ebe 114 .align 2
115FUNCTION(cc_interrupt):
d1e4ebd9 116 ldr w0, [rFP, #LO_last_count]
117 mov w2, #0x1fc
118 add rCC, w0, rCC
119 str wzr, [rFP, #LO_pending_exception]
120 and w2, w2, rCC, lsr #17
121 add x3, rFP, #LO_restore_candidate
122 str rCC, [rFP, #LO_cycle] /* PCSX cycles */
123# str rCC, [rFP, #LO_reg_cop0+36] /* Count */
124 ldr w19, [x3, w2, uxtw]
125 mov x21, lr
126 cbnz w19, 4f
1271:
128 bl gen_interupt
129 mov lr, x21
130 ldr rCC, [rFP, #LO_cycle]
131 ldr w0, [rFP, #LO_next_interupt]
132 ldr w1, [rFP, #LO_pending_exception]
133 ldr w2, [rFP, #LO_stop]
134 str w0, [rFP, #LO_last_count]
135 sub rCC, rCC, w0
136 cbnz w2, new_dyna_leave
137 cbnz w1, 2f
138 ret
1392:
140 ldr w0, [rFP, #LO_pcaddr]
141 bl get_addr_ht
142 br x0
1434:
144 /* Move 'dirty' blocks to the 'clean' list */
145 lsl w20, w2, #3
146 str wzr, [x3, w2, uxtw]
1475:
148 mov w0, w20
149 add w20, w20, #1
150 tbz w19, #0, 6f
151 bl clean_blocks
1526:
153 lsr w19, w19, #1
154 tst w20, #31
155 bne 5b
156 b 1b
be516ebe 157 .size cc_interrupt, .-cc_interrupt
158
be516ebe 159 .align 2
160FUNCTION(fp_exception):
161 mov w2, #0x10000000
1620:
81dbbf4c 163 ldr w1, [rFP, #LO_reg_cop0+48] /* Status */
be516ebe 164 mov w3, #0x80000000
81dbbf4c 165 str w0, [rFP, #LO_reg_cop0+56] /* EPC */
be516ebe 166 orr w1, w1, #2
167 add w2, w2, #0x2c
81dbbf4c 168 str w1, [rFP, #LO_reg_cop0+48] /* Status */
169 str w2, [rFP, #LO_reg_cop0+52] /* Cause */
be516ebe 170 add w0, w3, #0x80
171 bl get_addr_ht
172 br x0
173 .size fp_exception, .-fp_exception
174 .align 2
175FUNCTION(fp_exception_ds):
176 mov w2, #0x90000000 /* Set high bit if delay slot */
177 b 0b
178 .size fp_exception_ds, .-fp_exception_ds
179
180 .align 2
181FUNCTION(jump_syscall):
81dbbf4c 182 ldr w1, [rFP, #LO_reg_cop0+48] /* Status */
be516ebe 183 mov w3, #0x80000000
81dbbf4c 184 str w0, [rFP, #LO_reg_cop0+56] /* EPC */
be516ebe 185 orr w1, w1, #2
186 mov w2, #0x20
81dbbf4c 187 str w1, [rFP, #LO_reg_cop0+48] /* Status */
188 str w2, [rFP, #LO_reg_cop0+52] /* Cause */
be516ebe 189 add w0, w3, #0x80
190 bl get_addr_ht
191 br x0
192 .size jump_syscall, .-jump_syscall
193 .align 2
194
be516ebe 195 /* note: psxException might do recursive recompiler call from it's HLE code,
196 * so be ready for this */
3968e69e 197FUNCTION(jump_to_new_pc):
81dbbf4c 198 ldr w1, [rFP, #LO_next_interupt]
199 ldr rCC, [rFP, #LO_cycle]
200 ldr w0, [rFP, #LO_pcaddr]
3968e69e 201 sub rCC, rCC, w1
81dbbf4c 202 str w1, [rFP, #LO_last_count]
be516ebe 203 bl get_addr_ht
204 br x0
3968e69e 205 .size jump_to_new_pc, .-jump_to_new_pc
be516ebe 206
687b4580 207 /* stack must be aligned by 16, and include space for save_regs() use */
be516ebe 208 .align 2
209FUNCTION(new_dyna_start):
687b4580 210 stp x29, x30, [sp, #-SSP_ALL]!
be516ebe 211 ldr w1, [x0, #LO_next_interupt]
212 ldr w2, [x0, #LO_cycle]
213 stp x19, x20, [sp, #16*1]
214 stp x21, x22, [sp, #16*2]
215 stp x23, x24, [sp, #16*3]
216 stp x25, x26, [sp, #16*4]
217 stp x27, x28, [sp, #16*5]
218 mov rFP, x0
219 ldr w0, [rFP, #LO_pcaddr]
220 str w1, [rFP, #LO_last_count]
221 sub rCC, w2, w1
222 bl get_addr_ht
223 br x0
224 .size new_dyna_start, .-new_dyna_start
225
226 .align 2
227FUNCTION(new_dyna_leave):
228 ldr w0, [rFP, #LO_last_count]
229 add rCC, rCC, w0
230 str rCC, [rFP, #LO_cycle]
231 ldp x19, x20, [sp, #16*1]
232 ldp x21, x22, [sp, #16*2]
233 ldp x23, x24, [sp, #16*3]
234 ldp x25, x26, [sp, #16*4]
235 ldp x27, x28, [sp, #16*5]
687b4580 236 ldp x29, x30, [sp], #SSP_ALL
be516ebe 237 ret
238 .size new_dyna_leave, .-new_dyna_leave
239
240/* --------------------------------------- */
241
242.align 2
243
d1e4ebd9 244.macro memhandler_pre
245 /* w0 = adddr/data, x1 = rhandler, w2 = cycles, x3 = whandler */
246 ldr w4, [rFP, #LO_last_count]
247 add w4, w4, w2
248 str w4, [rFP, #LO_cycle]
249.endm
250
251.macro memhandler_post
252 ldr w2, [rFP, #LO_next_interupt]
253 ldr w1, [rFP, #LO_cycle]
254 sub w0, w1, w2
255 str w2, [rFP, #LO_last_count]
256.endm
257
258FUNCTION(do_memhandler_pre):
259 memhandler_pre
260 ret
261
262FUNCTION(do_memhandler_post):
263 memhandler_post
264 ret
265
266.macro pcsx_read_mem readop tab_shift
267 /* w0 = address, x1 = handler_tab, w2 = cycles */
d1e4ebd9 268 ubfm w4, w0, #\tab_shift, #11
269 ldr x3, [x1, w4, uxtw #3]
270 adds x3, x3, x3
271 bcs 0f
272 \readop w0, [x3, w4, uxtw #\tab_shift]
273 ret
2740:
3968e69e 275 stp xzr, x30, [sp, #-16]!
d1e4ebd9 276 memhandler_pre
277 blr x3
278.endm
279
be516ebe 280FUNCTION(jump_handler_read8):
3968e69e 281 add x1, x1, #0x1000/4*8 + 0x1000/2*8 /* shift to r8 part */
d1e4ebd9 282 pcsx_read_mem ldrb, 0
283 b handler_read_end
be516ebe 284
285FUNCTION(jump_handler_read16):
3968e69e 286 add x1, x1, #0x1000/4*8 /* shift to r16 part */
d1e4ebd9 287 pcsx_read_mem ldrh, 1
288 b handler_read_end
be516ebe 289
290FUNCTION(jump_handler_read32):
d1e4ebd9 291 pcsx_read_mem ldr, 2
292
293handler_read_end:
294 ldp xzr, x30, [sp], #16
295 ret
296
297.macro pcsx_write_mem wrtop movop tab_shift
298 /* w0 = address, w1 = data, w2 = cycles, x3 = handler_tab */
d1e4ebd9 299 ubfm w4, w0, #\tab_shift, #11
300 ldr x3, [x3, w4, uxtw #3]
d1e4ebd9 301 adds x3, x3, x3
d1e4ebd9 302 bcs 0f
303 mov w0, w2 /* cycle return */
304 \wrtop w1, [x3, w4, uxtw #\tab_shift]
305 ret
3060:
3968e69e 307 stp xzr, x30, [sp, #-16]!
308 str w0, [rFP, #LO_address] /* some handlers still need it... */
d1e4ebd9 309 \movop w0, w1
310 memhandler_pre
311 blr x3
312.endm
be516ebe 313
314FUNCTION(jump_handler_write8):
3968e69e 315 add x3, x3, #0x1000/4*8 + 0x1000/2*8 /* shift to r8 part */
d1e4ebd9 316 pcsx_write_mem strb uxtb 0
317 b handler_write_end
be516ebe 318
319FUNCTION(jump_handler_write16):
3968e69e 320 add x3, x3, #0x1000/4*8 /* shift to r16 part */
d1e4ebd9 321 pcsx_write_mem strh uxth 1
322 b handler_write_end
be516ebe 323
324FUNCTION(jump_handler_write32):
d1e4ebd9 325 pcsx_write_mem str mov 2
be516ebe 326
d1e4ebd9 327handler_write_end:
328 memhandler_post
329 ldp xzr, x30, [sp], #16
330 ret
be516ebe 331
332FUNCTION(jump_handle_swl):
3968e69e 333 /* w0 = address, w1 = data, w2 = cycles */
81dbbf4c 334 ldr x3, [rFP, #LO_mem_wtab]
3968e69e 335 mov w4, w0, lsr #12
336 ldr x3, [x3, w4, uxtw #3]
337 adds x3, x3, x3
338 bcs 4f
339 add x3, x0, x3
340 mov w0, w2
341 tbz x3, #1, 10f // & 2
342 tbz x3, #0, 2f // & 1
3433:
344 stur w1, [x3, #-3]
345 ret
3462:
347 lsr w2, w1, #8
348 lsr w1, w1, #24
349 sturh w2, [x3, #-2]
350 strb w1, [x3]
351 ret
35210:
353 tbz x3, #0, 0f // & 1
3541:
355 lsr w1, w1, #16
356 sturh w1, [x3, #-1]
357 ret
3580:
359 lsr w2, w1, #24
360 strb w2, [x3]
361 ret
3624:
363 mov w0, w2 // todo
be516ebe 364 bl abort
3968e69e 365 ret
be516ebe 366
367FUNCTION(jump_handle_swr):
3968e69e 368 /* w0 = address, w1 = data, w2 = cycles */
81dbbf4c 369 ldr x3, [rFP, #LO_mem_wtab]
3968e69e 370 mov w4, w0, lsr #12
371 ldr x3, [x3, w4, uxtw #3]
372 adds x3, x3, x3
373 bcs 4f
374 add x3, x0, x3
375 mov w0, w2
376 tbz x3, #1, 10f // & 2
377 tbz x3, #0, 2f // & 1
3783:
379 strb w1, [x3]
380 ret
3812:
382 strh w1, [x3]
383 ret
38410:
385 tbz x3, #0, 0f // & 1
3861:
387 lsr w2, w1, #8
388 strb w1, [x3]
389 sturh w2, [x3, #1]
390 ret
3910:
392 str w1, [x3]
393 ret
3944:
395 mov w0, w2 // todo
be516ebe 396 bl abort
3968e69e 397 ret
be516ebe 398
81dbbf4c 399FUNCTION(call_gteStall):
400 /* w0 = op_cycles, w1 = cycles */
401 ldr w2, [rFP, #LO_last_count]
402 str lr, [rFP, #LO_saved_lr]
403 add w1, w1, w2
404 str w1, [rFP, #LO_cycle]
405 add x1, rFP, #LO_psxRegs
406 bl gteCheckStallRaw
407 ldr lr, [rFP, #LO_saved_lr]
408 add rCC, rCC, w0
409 ret
410