X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Flinkage_arm.S;h=bbc52c3c02c2058ef1244aaff156709e10d98033;hp=e31b9b46422ad1f56397983da30af1286553e160;hb=3968e69e7fa8f9cb0d44ac79477d5929b9649271;hpb=b1f89e6f247c9b11c745cc1a7201cce5fb4fe08f diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index e31b9b46..bbc52c3c 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -20,6 +20,7 @@ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #include "arm_features.h" +#include "new_dynarec_config.h" #include "linkage_offsets.h" @@ -31,8 +32,6 @@ #define get_addr_ht ESYM(get_addr_ht) #define clean_blocks ESYM(clean_blocks) #define gen_interupt ESYM(gen_interupt) -#define psxException ESYM(psxException) -#define execI ESYM(execI) #define invalidate_addr ESYM(invalidate_addr) #endif @@ -58,12 +57,13 @@ DRC_VAR(cycle_count, 4) DRC_VAR(last_count, 4) DRC_VAR(pending_exception, 4) DRC_VAR(stop, 4) -DRC_VAR(invc_ptr, 4) +DRC_VAR(branch_target, 4) DRC_VAR(address, 4) +@DRC_VAR(align0, 4) /* unused/alignment */ DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ -DRC_VAR(reg, 128) +@DRC_VAR(reg, 128) DRC_VAR(lo, 4) DRC_VAR(hi, 4) DRC_VAR(reg_cop0, 128) @@ -76,20 +76,34 @@ DRC_VAR(pcaddr, 4) @DRC_VAR(intCycle, 256) DRC_VAR(rcnts, 7*4*4) +DRC_VAR(inv_code_start, 4) +DRC_VAR(inv_code_end, 4) DRC_VAR(mem_rtab, 4) DRC_VAR(mem_wtab, 4) DRC_VAR(psxH_ptr, 4) DRC_VAR(zeromem_ptr, 4) -DRC_VAR(inv_code_start, 4) -DRC_VAR(inv_code_end, 4) -DRC_VAR(branch_target, 4) -@DRC_VAR(align0, 16) /* unused/alignment */ +DRC_VAR(invc_ptr, 4) +DRC_VAR(scratch_buf_ptr, 4) +@DRC_VAR(align1, 8) /* unused/alignment */ DRC_VAR(mini_ht, 256) DRC_VAR(restore_candidate, 512) -/* unused */ -DRC_VAR(FCR0, 4) -DRC_VAR(FCR31, 4) + +#ifdef TEXRELS_FORBIDDEN + .data + .align 2 +ptr_jump_in: + .word ESYM(jump_in) +ptr_jump_dirty: + .word ESYM(jump_dirty) +ptr_hash_table: + .word ESYM(hash_table) +#endif + + + .syntax unified + .text + .align 2 #ifndef HAVE_ARMV5 .macro blx rd @@ -99,7 +113,12 @@ DRC_VAR(FCR31, 4) #endif .macro load_varadr reg var -#if defined(__ARM_ARCH_7A__) && !defined(__PIC__) +#if defined(HAVE_ARMV7) && defined(TEXRELS_FORBIDDEN) + movw \reg, #:lower16:(\var-(1678f+8)) + movt \reg, #:upper16:(\var-(1678f+8)) +1678: + add \reg, pc +#elif defined(HAVE_ARMV7) && !defined(__PIC__) movw \reg, #:lower16:\var movt \reg, #:upper16:\var #else @@ -107,8 +126,19 @@ DRC_VAR(FCR31, 4) #endif .endm +.macro load_varadr_ext reg var +#if defined(HAVE_ARMV7) && defined(TEXRELS_FORBIDDEN) + movw \reg, #:lower16:(ptr_\var-(1678f+8)) + movt \reg, #:upper16:(ptr_\var-(1678f+8)) +1678: + ldr \reg, [pc, \reg] +#else + load_varadr \reg \var +#endif +.endm + .macro mov_16 reg imm -#ifdef __ARM_ARCH_7A__ +#ifdef HAVE_ARMV7 movw \reg, #\imm #else mov \reg, #(\imm & 0x00ff) @@ -117,7 +147,7 @@ DRC_VAR(FCR31, 4) .endm .macro mov_24 reg imm -#ifdef __ARM_ARCH_7A__ +#ifdef HAVE_ARMV7 movw \reg, #(\imm & 0xffff) movt \reg, #(\imm >> 16) #else @@ -127,10 +157,11 @@ DRC_VAR(FCR31, 4) #endif .endm +/* r0 = virtual target address */ +/* r1 = instruction to patch */ .macro dyna_linker_main - /* r0 = virtual target address */ - /* r1 = instruction to patch */ - ldr r3, .jiptr +#ifndef NO_WRITE_EXEC + load_varadr_ext r3, jump_in /* get_page */ lsr r2, r0, #12 mov r6, #4096 @@ -151,14 +182,10 @@ DRC_VAR(FCR31, 4) 1: movs r4, r5 beq 2f - ldr r3, [r5] - ldr r5, [r4, #12] + ldr r3, [r5] /* ll_entry .vaddr */ + ldrd r4, r5, [r4, #8] /* ll_entry .next, .addr */ teq r3, r0 bne 1b - ldr r3, [r4, #4] - ldr r4, [r4, #8] - tst r3, r3 - bne 1b teq r4, r6 moveq pc, r4 /* Stale i-cache */ mov r8, r4 @@ -180,18 +207,18 @@ DRC_VAR(FCR31, 4) 3: /* hash_table lookup */ cmp r2, #2048 - ldr r3, .jdptr + load_varadr_ext r3, jump_dirty eor r4, r0, r0, lsl #16 lslcc r2, r0, #9 - ldr r6, .htptr + load_varadr_ext r6, hash_table lsr r4, r4, #12 lsrcc r2, r2, #21 bic r4, r4, #15 ldr r5, [r3, r2, lsl #2] ldr r7, [r6, r4]! teq r7, r0 - ldreq pc, [r6, #4] - ldr r7, [r6, #8] + ldreq pc, [r6, #8] + ldr r7, [r6, #4] teq r7, r0 ldreq pc, [r6, #12] /* jump_dirty lookup */ @@ -206,17 +233,20 @@ DRC_VAR(FCR31, 4) ldr r1, [r4, #8] /* hash_table insert */ ldr r2, [r6] - ldr r3, [r6, #4] + ldr r3, [r6, #8] str r0, [r6] - str r1, [r6, #4] - str r2, [r6, #8] + str r1, [r6, #8] + str r2, [r6, #4] str r3, [r6, #12] mov pc, r1 8: +#else + /* XXX: should be able to do better than this... */ + bl get_addr_ht + mov pc, r0 +#endif .endm - .text - .align 2 FUNCTION(dyna_linker): /* r0 = virtual target address */ @@ -281,12 +311,6 @@ FUNCTION(dyna_linker_ds): sub r0, r1, #4 b exec_pagefault .size dyna_linker_ds, .-dyna_linker_ds -.jiptr: - .word jump_in -.jdptr: - .word jump_dirty -.htptr: - .word hash_table .align 2 @@ -349,13 +373,13 @@ FUNCTION(jump_vaddr_r7): add r0, r7, #0 .size jump_vaddr_r7, .-jump_vaddr_r7 FUNCTION(jump_vaddr): - ldr r1, .htptr + load_varadr_ext r1, hash_table mvn r3, #15 and r2, r3, r2, lsr #12 ldr r2, [r1, r2]! teq r2, r0 - ldreq pc, [r1, #4] - ldr r2, [r1, #8] + ldreq pc, [r1, #8] + ldr r2, [r1, #4] teq r2, r0 ldreq pc, [r1, #12] str r10, [fp, #LO_cycle_count] @@ -367,8 +391,7 @@ FUNCTION(jump_vaddr): .align 2 FUNCTION(verify_code_ds): - str r8, [fp, #LO_branch_target] -FUNCTION(verify_code_vm): + str r8, [fp, #LO_branch_target] @ preserve HOST_BTREG? FUNCTION(verify_code): /* r1 = source */ /* r2 = target */ @@ -403,7 +426,7 @@ FUNCTION(verify_code): bl get_addr mov pc, r0 .size verify_code, .-verify_code - .size verify_code_vm, .-verify_code_vm + .size verify_code_ds, .-verify_code_ds .align 2 FUNCTION(cc_interrupt): @@ -430,7 +453,7 @@ FUNCTION(cc_interrupt): str r0, [fp, #LO_last_count] sub r10, r10, r0 tst r2, r2 - ldmnefd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} + ldmfdne sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} tst r1, r1 moveq pc, lr .E2: @@ -451,14 +474,6 @@ FUNCTION(cc_interrupt): b .E1 .size cc_interrupt, .-cc_interrupt - .align 2 -FUNCTION(do_interrupt): - ldr r0, [fp, #LO_pcaddr] - bl get_addr_ht - add r10, r10, #2 - mov pc, r0 - .size do_interrupt, .-do_interrupt - .align 2 FUNCTION(fp_exception): mov r2, #0x10000000 @@ -495,19 +510,9 @@ FUNCTION(jump_syscall): .size jump_syscall, .-jump_syscall .align 2 - .align 2 -FUNCTION(jump_syscall_hle): - str r0, [fp, #LO_pcaddr] /* PC must be set to EPC for psxException */ - ldr r2, [fp, #LO_last_count] - mov r1, #0 /* in delay slot */ - add r2, r2, r10 - mov r0, #0x20 /* cause */ - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - bl psxException - /* note: psxException might do recursive recompiler call from it's HLE code, * so be ready for this */ -pcsx_return: +FUNCTION(jump_to_new_pc): ldr r1, [fp, #LO_next_interupt] ldr r10, [fp, #LO_cycle] ldr r0, [fp, #LO_pcaddr] @@ -515,27 +520,7 @@ pcsx_return: str r1, [fp, #LO_last_count] bl get_addr_ht mov pc, r0 - .size jump_syscall_hle, .-jump_syscall_hle - - .align 2 -FUNCTION(jump_hlecall): - ldr r2, [fp, #LO_last_count] - str r0, [fp, #LO_pcaddr] - add r2, r2, r10 - adr lr, pcsx_return - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - bx r1 - .size jump_hlecall, .-jump_hlecall - - .align 2 -FUNCTION(jump_intcall): - ldr r2, [fp, #LO_last_count] - str r0, [fp, #LO_pcaddr] - add r2, r2, r10 - adr lr, pcsx_return - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - b execI - .size jump_hlecall, .-jump_hlecall + .size jump_to_new_pc, .-jump_to_new_pc .align 2 FUNCTION(new_dyna_leave): @@ -548,72 +533,72 @@ FUNCTION(new_dyna_leave): .align 2 FUNCTION(invalidate_addr_r0): - stmia fp, {r0, r1, r2, r3, r12, lr} + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} b invalidate_addr_call .size invalidate_addr_r0, .-invalidate_addr_r0 .align 2 FUNCTION(invalidate_addr_r1): - stmia fp, {r0, r1, r2, r3, r12, lr} + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} mov r0, r1 b invalidate_addr_call .size invalidate_addr_r1, .-invalidate_addr_r1 .align 2 FUNCTION(invalidate_addr_r2): - stmia fp, {r0, r1, r2, r3, r12, lr} + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} mov r0, r2 b invalidate_addr_call .size invalidate_addr_r2, .-invalidate_addr_r2 .align 2 FUNCTION(invalidate_addr_r3): - stmia fp, {r0, r1, r2, r3, r12, lr} + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} mov r0, r3 b invalidate_addr_call .size invalidate_addr_r3, .-invalidate_addr_r3 .align 2 FUNCTION(invalidate_addr_r4): - stmia fp, {r0, r1, r2, r3, r12, lr} + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} mov r0, r4 b invalidate_addr_call .size invalidate_addr_r4, .-invalidate_addr_r4 .align 2 FUNCTION(invalidate_addr_r5): - stmia fp, {r0, r1, r2, r3, r12, lr} + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} mov r0, r5 b invalidate_addr_call .size invalidate_addr_r5, .-invalidate_addr_r5 .align 2 FUNCTION(invalidate_addr_r6): - stmia fp, {r0, r1, r2, r3, r12, lr} + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} mov r0, r6 b invalidate_addr_call .size invalidate_addr_r6, .-invalidate_addr_r6 .align 2 FUNCTION(invalidate_addr_r7): - stmia fp, {r0, r1, r2, r3, r12, lr} + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} mov r0, r7 b invalidate_addr_call .size invalidate_addr_r7, .-invalidate_addr_r7 .align 2 FUNCTION(invalidate_addr_r8): - stmia fp, {r0, r1, r2, r3, r12, lr} + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} mov r0, r8 b invalidate_addr_call .size invalidate_addr_r8, .-invalidate_addr_r8 .align 2 FUNCTION(invalidate_addr_r9): - stmia fp, {r0, r1, r2, r3, r12, lr} + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} mov r0, r9 b invalidate_addr_call .size invalidate_addr_r9, .-invalidate_addr_r9 .align 2 FUNCTION(invalidate_addr_r10): - stmia fp, {r0, r1, r2, r3, r12, lr} + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} mov r0, r10 b invalidate_addr_call .size invalidate_addr_r10, .-invalidate_addr_r10 .align 2 FUNCTION(invalidate_addr_r12): - stmia fp, {r0, r1, r2, r3, r12, lr} + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} mov r0, r12 .size invalidate_addr_r12, .-invalidate_addr_r12 .align 2 @@ -623,14 +608,14 @@ invalidate_addr_call: cmp r0, r12 cmpcs lr, r0 blcc invalidate_addr - ldmia fp, {r0, r1, r2, r3, r12, pc} + ldmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, pc} .size invalidate_addr_call, .-invalidate_addr_call .align 2 FUNCTION(new_dyna_start): /* ip is stored to conform EABI alignment */ stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} - load_varadr fp, dynarec_local + mov fp, r0 /* dynarec_local */ ldr r0, [fp, #LO_pcaddr] bl get_addr_ht ldr r1, [fp, #LO_next_interupt] @@ -665,11 +650,11 @@ FUNCTION(new_dyna_start): FUNCTION(jump_handler_read8): add r1, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part - pcsx_read_mem ldrccb, 0 + pcsx_read_mem ldrbcc, 0 FUNCTION(jump_handler_read16): add r1, #0x1000/4*4 @ shift to r16 part - pcsx_read_mem ldrcch, 1 + pcsx_read_mem ldrhcc, 1 FUNCTION(jump_handler_read32): pcsx_read_mem ldrcc, 2 @@ -698,19 +683,19 @@ FUNCTION(jump_handler_read32): blx r3 ldr r0, [fp, #LO_next_interupt] - pop {r2, r3} + pop {r2, lr} str r0, [fp, #LO_last_count] sub r0, r2, r0 - bx r3 + bx lr .endm FUNCTION(jump_handler_write8): add r3, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part - pcsx_write_mem strccb, 0 + pcsx_write_mem strbcc, 0 FUNCTION(jump_handler_write16): add r3, #0x1000/4*4 @ shift to r16 part - pcsx_write_mem strcch, 1 + pcsx_write_mem strhcc, 1 FUNCTION(jump_handler_write32): pcsx_write_mem strcc, 2 @@ -726,10 +711,10 @@ FUNCTION(jump_handler_write_h): blx r3 ldr r0, [fp, #LO_next_interupt] - pop {r2, r3} + pop {r2, lr} str r0, [fp, #LO_last_count] sub r0, r2, r0 - bx r3 + bx lr FUNCTION(jump_handle_swl): /* r0 = address, r1 = data, r2 = cycles */ @@ -757,8 +742,8 @@ FUNCTION(jump_handle_swl): tst r3, #1 lsrne r1, #16 @ 1 lsreq r12, r1, #24 @ 0 - strneh r1, [r3, #-1] - streqb r12, [r3] + strhne r1, [r3, #-1] + strbeq r12, [r3] bx lr 4: mov r0, r2 @@ -777,8 +762,8 @@ FUNCTION(jump_handle_swr): and r12,r3, #3 mov r0, r2 cmp r12,#2 - strgtb r1, [r3] @ 3 - streqh r1, [r3] @ 2 + strbgt r1, [r3] @ 3 + strheq r1, [r3] @ 2 cmp r12,#1 strlt r1, [r3] @ 0 bxne lr @@ -796,7 +781,7 @@ FUNCTION(jump_handle_swr): /* r0 = address, r2 = cycles */ ldr r3, [fp, #LO_rcnts+6*4+7*4*\num] @ cycleStart mov r0, r2, lsl #16 - sub r0, r3, lsl #16 + sub r0, r0, r3, lsl #16 lsr r0, #16 bx lr .endm @@ -831,7 +816,7 @@ FUNCTION(rcnt2_read_count_m1): /* r0 = address, r2 = cycles */ ldr r3, [fp, #LO_rcnts+6*4+7*4*2] mov r0, r2, lsl #16-3 - sub r0, r3, lsl #16-3 + sub r0, r0, r3, lsl #16-3 lsr r0, #16 @ /= 8 bx lr