| 1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * |
| 2 | * linkage_arm.s for PCSX * |
| 3 | * Copyright (C) 2009-2011 Ari64 * |
| 4 | * Copyright (C) 2021 notaz * |
| 5 | * * |
| 6 | * This program is free software; you can redistribute it and/or modify * |
| 7 | * it under the terms of the GNU General Public License as published by * |
| 8 | * the Free Software Foundation; either version 2 of the License, or * |
| 9 | * (at your option) any later version. * |
| 10 | * * |
| 11 | * This program is distributed in the hope that it will be useful, * |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
| 14 | * GNU General Public License for more details. * |
| 15 | * * |
| 16 | * You should have received a copy of the GNU General Public License * |
| 17 | * along with this program; if not, write to the * |
| 18 | * Free Software Foundation, Inc., * |
| 19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * |
| 20 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
| 21 | |
| 22 | #include "arm_features.h" |
| 23 | #include "new_dynarec_config.h" |
| 24 | #include "assem_arm64.h" |
| 25 | #include "linkage_offsets.h" |
| 26 | |
| 27 | #if (LO_mem_wtab & 7) |
| 28 | #error misligned pointers |
| 29 | #endif |
| 30 | |
| 31 | .bss |
| 32 | .align 4 |
| 33 | .global dynarec_local |
| 34 | .type dynarec_local, %object |
| 35 | .size dynarec_local, LO_dynarec_local_size |
| 36 | dynarec_local: |
| 37 | .space LO_dynarec_local_size |
| 38 | |
| 39 | #define DRC_VAR_(name, vname, size_) \ |
| 40 | vname = dynarec_local + LO_##name; \ |
| 41 | .global vname; \ |
| 42 | .type vname, %object; \ |
| 43 | .size vname, size_ |
| 44 | |
| 45 | #define DRC_VAR(name, size_) \ |
| 46 | DRC_VAR_(name, ESYM(name), size_) |
| 47 | |
| 48 | DRC_VAR(next_interupt, 4) |
| 49 | DRC_VAR(cycle_count, 4) |
| 50 | DRC_VAR(last_count, 4) |
| 51 | DRC_VAR(pending_exception, 4) |
| 52 | DRC_VAR(stop, 4) |
| 53 | DRC_VAR(branch_target, 4) |
| 54 | DRC_VAR(address, 4) |
| 55 | DRC_VAR(hack_addr, 4) |
| 56 | DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) |
| 57 | |
| 58 | /* psxRegs */ |
| 59 | #DRC_VAR(reg, 128) |
| 60 | DRC_VAR(lo, 4) |
| 61 | DRC_VAR(hi, 4) |
| 62 | DRC_VAR(reg_cop0, 128) |
| 63 | DRC_VAR(reg_cop2d, 128) |
| 64 | DRC_VAR(reg_cop2c, 128) |
| 65 | DRC_VAR(pcaddr, 4) |
| 66 | #DRC_VAR(code, 4) |
| 67 | #DRC_VAR(cycle, 4) |
| 68 | #DRC_VAR(interrupt, 4) |
| 69 | #DRC_VAR(intCycle, 256) |
| 70 | |
| 71 | DRC_VAR(rcnts, 7*4*4) |
| 72 | DRC_VAR(inv_code_start, 4) |
| 73 | DRC_VAR(inv_code_end, 4) |
| 74 | DRC_VAR(mem_rtab, 8) |
| 75 | DRC_VAR(mem_wtab, 8) |
| 76 | DRC_VAR(psxH_ptr, 8) |
| 77 | DRC_VAR(invc_ptr, 8) |
| 78 | DRC_VAR(zeromem_ptr, 8) |
| 79 | DRC_VAR(scratch_buf_ptr, 8) |
| 80 | DRC_VAR(ram_offset, 8) |
| 81 | DRC_VAR(mini_ht, 256) |
| 82 | DRC_VAR(restore_candidate, 512) |
| 83 | |
| 84 | |
| 85 | .text |
| 86 | .align 2 |
| 87 | |
| 88 | /* r0 = virtual target address */ |
| 89 | /* r1 = instruction to patch */ |
| 90 | .macro dyna_linker_main |
| 91 | /* XXX TODO: should be able to do better than this... */ |
| 92 | bl get_addr_ht |
| 93 | br x0 |
| 94 | .endm |
| 95 | |
| 96 | |
| 97 | FUNCTION(dyna_linker): |
| 98 | /* r0 = virtual target address */ |
| 99 | /* r1 = instruction to patch */ |
| 100 | dyna_linker_main |
| 101 | .size dyna_linker, .-dyna_linker |
| 102 | |
| 103 | FUNCTION(exec_pagefault): |
| 104 | /* r0 = instruction pointer */ |
| 105 | /* r1 = fault address */ |
| 106 | /* r2 = cause */ |
| 107 | bl abort |
| 108 | .size exec_pagefault, .-exec_pagefault |
| 109 | |
| 110 | /* Special dynamic linker for the case where a page fault |
| 111 | may occur in a branch delay slot */ |
| 112 | FUNCTION(dyna_linker_ds): |
| 113 | /* r0 = virtual target address */ |
| 114 | /* r1 = instruction to patch */ |
| 115 | dyna_linker_main |
| 116 | .size dyna_linker_ds, .-dyna_linker_ds |
| 117 | |
| 118 | .align 2 |
| 119 | FUNCTION(cc_interrupt): |
| 120 | ldr w0, [rFP, #LO_last_count] |
| 121 | mov w2, #0x1fc |
| 122 | add rCC, w0, rCC |
| 123 | str wzr, [rFP, #LO_pending_exception] |
| 124 | and w2, w2, rCC, lsr #17 |
| 125 | add x3, rFP, #LO_restore_candidate |
| 126 | str rCC, [rFP, #LO_cycle] /* PCSX cycles */ |
| 127 | # str rCC, [rFP, #LO_reg_cop0+36] /* Count */ |
| 128 | ldr w19, [x3, w2, uxtw] |
| 129 | mov x21, lr |
| 130 | cbnz w19, 4f |
| 131 | 1: |
| 132 | bl gen_interupt |
| 133 | mov lr, x21 |
| 134 | ldr rCC, [rFP, #LO_cycle] |
| 135 | ldr w0, [rFP, #LO_next_interupt] |
| 136 | ldr w1, [rFP, #LO_pending_exception] |
| 137 | ldr w2, [rFP, #LO_stop] |
| 138 | str w0, [rFP, #LO_last_count] |
| 139 | sub rCC, rCC, w0 |
| 140 | cbnz w2, new_dyna_leave |
| 141 | cbnz w1, 2f |
| 142 | ret |
| 143 | 2: |
| 144 | ldr w0, [rFP, #LO_pcaddr] |
| 145 | bl get_addr_ht |
| 146 | br x0 |
| 147 | 4: |
| 148 | /* Move 'dirty' blocks to the 'clean' list */ |
| 149 | lsl w20, w2, #3 |
| 150 | str wzr, [x3, w2, uxtw] |
| 151 | 5: |
| 152 | mov w0, w20 |
| 153 | add w20, w20, #1 |
| 154 | tbz w19, #0, 6f |
| 155 | bl clean_blocks |
| 156 | 6: |
| 157 | lsr w19, w19, #1 |
| 158 | tst w20, #31 |
| 159 | bne 5b |
| 160 | b 1b |
| 161 | .size cc_interrupt, .-cc_interrupt |
| 162 | |
| 163 | .align 2 |
| 164 | FUNCTION(fp_exception): |
| 165 | mov w2, #0x10000000 |
| 166 | 0: |
| 167 | ldr w1, [rFP, #LO_reg_cop0+48] /* Status */ |
| 168 | mov w3, #0x80000000 |
| 169 | str w0, [rFP, #LO_reg_cop0+56] /* EPC */ |
| 170 | orr w1, w1, #2 |
| 171 | add w2, w2, #0x2c |
| 172 | str w1, [rFP, #LO_reg_cop0+48] /* Status */ |
| 173 | str w2, [rFP, #LO_reg_cop0+52] /* Cause */ |
| 174 | add w0, w3, #0x80 |
| 175 | bl get_addr_ht |
| 176 | br x0 |
| 177 | .size fp_exception, .-fp_exception |
| 178 | .align 2 |
| 179 | FUNCTION(fp_exception_ds): |
| 180 | mov w2, #0x90000000 /* Set high bit if delay slot */ |
| 181 | b 0b |
| 182 | .size fp_exception_ds, .-fp_exception_ds |
| 183 | |
| 184 | .align 2 |
| 185 | FUNCTION(jump_break_ds): |
| 186 | mov w0, #0x24 |
| 187 | mov w1, #1 |
| 188 | b call_psxException |
| 189 | FUNCTION(jump_break): |
| 190 | mov w0, #0x24 |
| 191 | mov w1, #0 |
| 192 | b call_psxException |
| 193 | FUNCTION(jump_syscall_ds): |
| 194 | mov w0, #0x20 |
| 195 | mov w1, #1 |
| 196 | b call_psxException |
| 197 | FUNCTION(jump_syscall): |
| 198 | mov w0, #0x20 |
| 199 | mov w1, #0 |
| 200 | |
| 201 | call_psxException: |
| 202 | ldr w3, [rFP, #LO_last_count] |
| 203 | str w2, [rFP, #LO_pcaddr] |
| 204 | add rCC, w3, rCC |
| 205 | str rCC, [rFP, #LO_cycle] /* PCSX cycles */ |
| 206 | bl psxException |
| 207 | |
| 208 | /* note: psxException might do recursive recompiler call from it's HLE code, |
| 209 | * so be ready for this */ |
| 210 | FUNCTION(jump_to_new_pc): |
| 211 | ldr w1, [rFP, #LO_next_interupt] |
| 212 | ldr rCC, [rFP, #LO_cycle] |
| 213 | ldr w0, [rFP, #LO_pcaddr] |
| 214 | sub rCC, rCC, w1 |
| 215 | str w1, [rFP, #LO_last_count] |
| 216 | bl get_addr_ht |
| 217 | br x0 |
| 218 | .size jump_to_new_pc, .-jump_to_new_pc |
| 219 | |
| 220 | /* stack must be aligned by 16, and include space for save_regs() use */ |
| 221 | .align 2 |
| 222 | FUNCTION(new_dyna_start): |
| 223 | stp x29, x30, [sp, #-SSP_ALL]! |
| 224 | ldr w1, [x0, #LO_next_interupt] |
| 225 | ldr w2, [x0, #LO_cycle] |
| 226 | stp x19, x20, [sp, #16*1] |
| 227 | stp x21, x22, [sp, #16*2] |
| 228 | stp x23, x24, [sp, #16*3] |
| 229 | stp x25, x26, [sp, #16*4] |
| 230 | stp x27, x28, [sp, #16*5] |
| 231 | mov rFP, x0 |
| 232 | ldr w0, [rFP, #LO_pcaddr] |
| 233 | str w1, [rFP, #LO_last_count] |
| 234 | sub rCC, w2, w1 |
| 235 | bl get_addr_ht |
| 236 | br x0 |
| 237 | .size new_dyna_start, .-new_dyna_start |
| 238 | |
| 239 | .align 2 |
| 240 | FUNCTION(new_dyna_leave): |
| 241 | ldr w0, [rFP, #LO_last_count] |
| 242 | add rCC, rCC, w0 |
| 243 | str rCC, [rFP, #LO_cycle] |
| 244 | ldp x19, x20, [sp, #16*1] |
| 245 | ldp x21, x22, [sp, #16*2] |
| 246 | ldp x23, x24, [sp, #16*3] |
| 247 | ldp x25, x26, [sp, #16*4] |
| 248 | ldp x27, x28, [sp, #16*5] |
| 249 | ldp x29, x30, [sp], #SSP_ALL |
| 250 | ret |
| 251 | .size new_dyna_leave, .-new_dyna_leave |
| 252 | |
| 253 | /* --------------------------------------- */ |
| 254 | |
| 255 | .align 2 |
| 256 | |
| 257 | .macro memhandler_pre |
| 258 | /* w0 = adddr/data, x1 = rhandler, w2 = cycles, x3 = whandler */ |
| 259 | ldr w4, [rFP, #LO_last_count] |
| 260 | add w4, w4, w2 |
| 261 | str w4, [rFP, #LO_cycle] |
| 262 | .endm |
| 263 | |
| 264 | .macro memhandler_post |
| 265 | ldr w0, [rFP, #LO_next_interupt] |
| 266 | ldr w2, [rFP, #LO_cycle] // memhandlers can modify cc, like dma |
| 267 | str w0, [rFP, #LO_last_count] |
| 268 | sub w0, w2, w0 |
| 269 | .endm |
| 270 | |
| 271 | FUNCTION(do_memhandler_pre): |
| 272 | memhandler_pre |
| 273 | ret |
| 274 | |
| 275 | FUNCTION(do_memhandler_post): |
| 276 | memhandler_post |
| 277 | ret |
| 278 | |
| 279 | .macro pcsx_read_mem readop tab_shift |
| 280 | /* w0 = address, x1 = handler_tab, w2 = cycles */ |
| 281 | ubfm w4, w0, #\tab_shift, #11 |
| 282 | ldr x3, [x1, w4, uxtw #3] |
| 283 | adds x3, x3, x3 |
| 284 | bcs 0f |
| 285 | \readop w0, [x3, w4, uxtw #\tab_shift] |
| 286 | ret |
| 287 | 0: |
| 288 | stp xzr, x30, [sp, #-16]! |
| 289 | memhandler_pre |
| 290 | blr x3 |
| 291 | .endm |
| 292 | |
| 293 | FUNCTION(jump_handler_read8): |
| 294 | add x1, x1, #0x1000/4*8 + 0x1000/2*8 /* shift to r8 part */ |
| 295 | pcsx_read_mem ldrb, 0 |
| 296 | b handler_read_end |
| 297 | |
| 298 | FUNCTION(jump_handler_read16): |
| 299 | add x1, x1, #0x1000/4*8 /* shift to r16 part */ |
| 300 | pcsx_read_mem ldrh, 1 |
| 301 | b handler_read_end |
| 302 | |
| 303 | FUNCTION(jump_handler_read32): |
| 304 | pcsx_read_mem ldr, 2 |
| 305 | |
| 306 | handler_read_end: |
| 307 | ldp xzr, x30, [sp], #16 |
| 308 | ret |
| 309 | |
| 310 | .macro pcsx_write_mem wrtop movop tab_shift |
| 311 | /* w0 = address, w1 = data, w2 = cycles, x3 = handler_tab */ |
| 312 | ubfm w4, w0, #\tab_shift, #11 |
| 313 | ldr x3, [x3, w4, uxtw #3] |
| 314 | adds x3, x3, x3 |
| 315 | bcs 0f |
| 316 | mov w0, w2 /* cycle return */ |
| 317 | \wrtop w1, [x3, w4, uxtw #\tab_shift] |
| 318 | ret |
| 319 | 0: |
| 320 | stp xzr, x30, [sp, #-16]! |
| 321 | str w0, [rFP, #LO_address] /* some handlers still need it... */ |
| 322 | \movop w0, w1 |
| 323 | memhandler_pre |
| 324 | blr x3 |
| 325 | .endm |
| 326 | |
| 327 | FUNCTION(jump_handler_write8): |
| 328 | add x3, x3, #0x1000/4*8 + 0x1000/2*8 /* shift to r8 part */ |
| 329 | pcsx_write_mem strb uxtb 0 |
| 330 | b handler_write_end |
| 331 | |
| 332 | FUNCTION(jump_handler_write16): |
| 333 | add x3, x3, #0x1000/4*8 /* shift to r16 part */ |
| 334 | pcsx_write_mem strh uxth 1 |
| 335 | b handler_write_end |
| 336 | |
| 337 | FUNCTION(jump_handler_write32): |
| 338 | pcsx_write_mem str mov 2 |
| 339 | |
| 340 | handler_write_end: |
| 341 | memhandler_post |
| 342 | ldp xzr, x30, [sp], #16 |
| 343 | ret |
| 344 | |
| 345 | FUNCTION(jump_handle_swl): |
| 346 | /* w0 = address, w1 = data, w2 = cycles */ |
| 347 | ldr x3, [rFP, #LO_mem_wtab] |
| 348 | orr w4, wzr, w0, lsr #12 |
| 349 | ldr x3, [x3, w4, uxtw #3] |
| 350 | adds x3, x3, x3 |
| 351 | bcs 4f |
| 352 | add x3, x0, x3 |
| 353 | mov w0, w2 |
| 354 | tbz x3, #1, 10f // & 2 |
| 355 | tbz x3, #0, 2f // & 1 |
| 356 | 3: |
| 357 | stur w1, [x3, #-3] |
| 358 | ret |
| 359 | 2: |
| 360 | lsr w2, w1, #8 |
| 361 | lsr w1, w1, #24 |
| 362 | sturh w2, [x3, #-2] |
| 363 | strb w1, [x3] |
| 364 | ret |
| 365 | 10: |
| 366 | tbz x3, #0, 0f // & 1 |
| 367 | 1: |
| 368 | lsr w1, w1, #16 |
| 369 | sturh w1, [x3, #-1] |
| 370 | ret |
| 371 | 0: |
| 372 | lsr w2, w1, #24 |
| 373 | strb w2, [x3] |
| 374 | ret |
| 375 | 4: |
| 376 | mov w0, w2 // todo |
| 377 | bl abort |
| 378 | ret |
| 379 | |
| 380 | FUNCTION(jump_handle_swr): |
| 381 | /* w0 = address, w1 = data, w2 = cycles */ |
| 382 | ldr x3, [rFP, #LO_mem_wtab] |
| 383 | orr w4, wzr, w0, lsr #12 |
| 384 | ldr x3, [x3, w4, uxtw #3] |
| 385 | adds x3, x3, x3 |
| 386 | bcs 4f |
| 387 | add x3, x0, x3 |
| 388 | mov w0, w2 |
| 389 | tbz x3, #1, 10f // & 2 |
| 390 | tbz x3, #0, 2f // & 1 |
| 391 | 3: |
| 392 | strb w1, [x3] |
| 393 | ret |
| 394 | 2: |
| 395 | strh w1, [x3] |
| 396 | ret |
| 397 | 10: |
| 398 | tbz x3, #0, 0f // & 1 |
| 399 | 1: |
| 400 | lsr w2, w1, #8 |
| 401 | strb w1, [x3] |
| 402 | sturh w2, [x3, #1] |
| 403 | ret |
| 404 | 0: |
| 405 | str w1, [x3] |
| 406 | ret |
| 407 | 4: |
| 408 | mov w0, w2 // todo |
| 409 | bl abort |
| 410 | ret |
| 411 | |
| 412 | FUNCTION(call_gteStall): |
| 413 | /* w0 = op_cycles, w1 = cycles */ |
| 414 | ldr w2, [rFP, #LO_last_count] |
| 415 | str lr, [rFP, #LO_saved_lr] |
| 416 | add w1, w1, w2 |
| 417 | str w1, [rFP, #LO_cycle] |
| 418 | add x1, rFP, #LO_psxRegs |
| 419 | bl gteCheckStallRaw |
| 420 | ldr lr, [rFP, #LO_saved_lr] |
| 421 | add rCC, rCC, w0 |
| 422 | ret |
| 423 | |