X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Flinkage_arm.S;h=1a16aa04da6ff8b08cae51104ea1a8946b9fbab2;hb=cdc2da64d4c55a97c4507b6c9389bf4dca04695b;hp=50b577b391b0fbb0d318144aead5191cdf45a97e;hpb=de5a60c397c1ec31ed2a133230011fe241b2b953;p=pcsx_rearmed.git diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 50b577b3..1a16aa04 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -20,20 +20,21 @@ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #include "arm_features.h" +#include "new_dynarec_config.h" #include "linkage_offsets.h" #ifdef __MACH__ #define dynarec_local ESYM(dynarec_local) -#define add_link ESYM(add_link) +#define add_jump_out ESYM(add_jump_out) #define new_recompile_block ESYM(new_recompile_block) #define get_addr ESYM(get_addr) #define get_addr_ht ESYM(get_addr_ht) #define clean_blocks ESYM(clean_blocks) #define gen_interupt ESYM(gen_interupt) -#define psxException ESYM(psxException) -#define execI ESYM(execI) #define invalidate_addr ESYM(invalidate_addr) +#define gteCheckStallRaw ESYM(gteCheckStallRaw) +#define psxException ESYM(psxException) #endif .bss @@ -58,12 +59,13 @@ DRC_VAR(cycle_count, 4) DRC_VAR(last_count, 4) DRC_VAR(pending_exception, 4) DRC_VAR(stop, 4) -DRC_VAR(invc_ptr, 4) +DRC_VAR(branch_target, 4) DRC_VAR(address, 4) +DRC_VAR(hack_addr, 4) DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ -DRC_VAR(reg, 128) +@DRC_VAR(reg, 128) DRC_VAR(lo, 4) DRC_VAR(hi, 4) DRC_VAR(reg_cop0, 128) @@ -76,23 +78,20 @@ DRC_VAR(pcaddr, 4) @DRC_VAR(intCycle, 256) DRC_VAR(rcnts, 7*4*4) +DRC_VAR(inv_code_start, 4) +DRC_VAR(inv_code_end, 4) DRC_VAR(mem_rtab, 4) DRC_VAR(mem_wtab, 4) DRC_VAR(psxH_ptr, 4) DRC_VAR(zeromem_ptr, 4) -DRC_VAR(inv_code_start, 4) -DRC_VAR(inv_code_end, 4) -DRC_VAR(branch_target, 4) +DRC_VAR(invc_ptr, 4) DRC_VAR(scratch_buf_ptr, 4) -@DRC_VAR(align0, 12) /* unused/alignment */ +DRC_VAR(ram_offset, 4) DRC_VAR(mini_ht, 256) DRC_VAR(restore_candidate, 512) -/* unused */ -DRC_VAR(FCR0, 4) -DRC_VAR(FCR31, 4) -#ifdef __MACH__ +#ifdef TEXRELS_FORBIDDEN .data .align 2 ptr_jump_in: @@ -116,21 +115,21 @@ ptr_hash_table: #endif .macro load_varadr reg var -#if defined(__ARM_ARCH_7A__) && !defined(__PIC__) - movw \reg, #:lower16:\var - movt \reg, #:upper16:\var -#elif defined(__ARM_ARCH_7A__) && defined(__MACH__) +#if defined(HAVE_ARMV7) && defined(TEXRELS_FORBIDDEN) movw \reg, #:lower16:(\var-(1678f+8)) movt \reg, #:upper16:(\var-(1678f+8)) 1678: add \reg, pc +#elif defined(HAVE_ARMV7) && !defined(__PIC__) + movw \reg, #:lower16:\var + movt \reg, #:upper16:\var #else ldr \reg, =\var #endif .endm .macro load_varadr_ext reg var -#if defined(__ARM_ARCH_7A__) && defined(__MACH__) && defined(__PIC__) +#if defined(HAVE_ARMV7) && defined(TEXRELS_FORBIDDEN) movw \reg, #:lower16:(ptr_\var-(1678f+8)) movt \reg, #:upper16:(ptr_\var-(1678f+8)) 1678: @@ -141,7 +140,7 @@ ptr_hash_table: .endm .macro mov_16 reg imm -#ifdef __ARM_ARCH_7A__ +#ifdef HAVE_ARMV7 movw \reg, #\imm #else mov \reg, #(\imm & 0x00ff) @@ -150,7 +149,7 @@ ptr_hash_table: .endm .macro mov_24 reg imm -#ifdef __ARM_ARCH_7A__ +#ifdef HAVE_ARMV7 movw \reg, #(\imm & 0xffff) movt \reg, #(\imm >> 16) #else @@ -160,9 +159,10 @@ ptr_hash_table: #endif .endm +/* r0 = virtual target address */ +/* r1 = instruction to patch */ .macro dyna_linker_main - /* r0 = virtual target address */ - /* r1 = instruction to patch */ +#ifndef NO_WRITE_EXEC load_varadr_ext r3, jump_in /* get_page */ lsr r2, r0, #12 @@ -178,7 +178,7 @@ ptr_hash_table: orrcs r2, r6, #2048 ldr r5, [r3, r2, lsl #2] lsl r12, r12, #8 - add r6, r1, r12, asr #6 + add r6, r1, r12, asr #6 /* old target */ mov r8, #0 /* jump_in lookup */ 1: @@ -198,7 +198,7 @@ ptr_hash_table: mov r5, r1 mov r1, r6 - bl add_link + bl add_jump_out sub r2, r8, r5 and r1, r7, #0xff000000 lsl r2, r2, #6 @@ -219,8 +219,8 @@ ptr_hash_table: ldr r5, [r3, r2, lsl #2] ldr r7, [r6, r4]! teq r7, r0 - ldreq pc, [r6, #4] - ldr r7, [r6, #8] + ldreq pc, [r6, #8] + ldr r7, [r6, #4] teq r7, r0 ldreq pc, [r6, #12] /* jump_dirty lookup */ @@ -235,13 +235,18 @@ ptr_hash_table: ldr r1, [r4, #8] /* hash_table insert */ ldr r2, [r6] - ldr r3, [r6, #4] + ldr r3, [r6, #8] str r0, [r6] - str r1, [r6, #4] - str r2, [r6, #8] + str r1, [r6, #8] + str r2, [r6, #4] str r3, [r6, #12] mov pc, r1 8: +#else + /* XXX: should be able to do better than this... */ + bl get_addr_ht + mov pc, r0 +#endif .endm @@ -259,7 +264,7 @@ FUNCTION(dyna_linker): beq dyna_linker /* pagefault */ mov r1, r0 - mov r2, #8 + mov r2, #(4<<2) /* Address error (fetch) */ .size dyna_linker, .-dyna_linker FUNCTION(exec_pagefault): @@ -267,21 +272,13 @@ FUNCTION(exec_pagefault): /* r1 = fault address */ /* r2 = cause */ ldr r3, [fp, #LO_reg_cop0+48] /* Status */ - mvn r6, #0xF000000F - ldr r4, [fp, #LO_reg_cop0+16] /* Context */ - bic r6, r6, #0x0F800000 str r0, [fp, #LO_reg_cop0+56] /* EPC */ orr r3, r3, #2 str r1, [fp, #LO_reg_cop0+32] /* BadVAddr */ - bic r4, r4, r6 str r3, [fp, #LO_reg_cop0+48] /* Status */ - and r5, r6, r1, lsr #9 str r2, [fp, #LO_reg_cop0+52] /* Cause */ - and r1, r1, r6, lsl #9 - str r1, [fp, #LO_reg_cop0+40] /* EntryHi */ - orr r4, r4, r5 - str r4, [fp, #LO_reg_cop0+16] /* Context */ mov r0, #0x80000000 + orr r0, r0, #0x80 bl get_addr_ht mov pc, r0 .size exec_pagefault, .-exec_pagefault @@ -375,8 +372,8 @@ FUNCTION(jump_vaddr): and r2, r3, r2, lsr #12 ldr r2, [r1, r2]! teq r2, r0 - ldreq pc, [r1, #4] - ldr r2, [r1, #8] + ldreq pc, [r1, #8] + ldr r2, [r1, #4] teq r2, r0 ldreq pc, [r1, #12] str r10, [fp, #LO_cycle_count] @@ -388,8 +385,7 @@ FUNCTION(jump_vaddr): .align 2 FUNCTION(verify_code_ds): - str r8, [fp, #LO_branch_target] -FUNCTION(verify_code_vm): + str r8, [fp, #LO_branch_target] @ preserve HOST_BTREG? FUNCTION(verify_code): /* r1 = source */ /* r2 = target */ @@ -424,7 +420,7 @@ FUNCTION(verify_code): bl get_addr mov pc, r0 .size verify_code, .-verify_code - .size verify_code_vm, .-verify_code_vm + .size verify_code_ds, .-verify_code_ds .align 2 FUNCTION(cc_interrupt): @@ -436,7 +432,7 @@ FUNCTION(cc_interrupt): and r2, r2, r10, lsr #17 add r3, fp, #LO_restore_candidate str r10, [fp, #LO_cycle] /* PCSX cycles */ -@@ str r10, [fp, #LO_reg_cop0+36] /* Count */ +@@ str r10, [fp, #LO_reg_cop0+36] /* Count - not on PSX */ ldr r4, [r2, r3] mov r10, lr tst r4, r4 @@ -472,14 +468,6 @@ FUNCTION(cc_interrupt): b .E1 .size cc_interrupt, .-cc_interrupt - .align 2 -FUNCTION(do_interrupt): - ldr r0, [fp, #LO_pcaddr] - bl get_addr_ht - add r10, r10, #2 - mov pc, r0 - .size do_interrupt, .-do_interrupt - .align 2 FUNCTION(fp_exception): mov r2, #0x10000000 @@ -502,33 +490,32 @@ FUNCTION(fp_exception_ds): .size fp_exception_ds, .-fp_exception_ds .align 2 +FUNCTION(jump_break_ds): + mov r0, #0x24 + mov r1, #1 + b call_psxException +FUNCTION(jump_break): + mov r0, #0x24 + mov r1, #0 + b call_psxException +FUNCTION(jump_syscall_ds): + mov r0, #0x20 + mov r1, #1 + b call_psxException FUNCTION(jump_syscall): - ldr r1, [fp, #LO_reg_cop0+48] /* Status */ - mov r3, #0x80000000 - str r0, [fp, #LO_reg_cop0+56] /* EPC */ - orr r1, #2 - mov r2, #0x20 - str r1, [fp, #LO_reg_cop0+48] /* Status */ - str r2, [fp, #LO_reg_cop0+52] /* Cause */ - add r0, r3, #0x80 - bl get_addr_ht - mov pc, r0 - .size jump_syscall, .-jump_syscall - .align 2 + mov r0, #0x20 + mov r1, #0 - .align 2 -FUNCTION(jump_syscall_hle): - str r0, [fp, #LO_pcaddr] /* PC must be set to EPC for psxException */ - ldr r2, [fp, #LO_last_count] - mov r1, #0 /* in delay slot */ - add r2, r2, r10 - mov r0, #0x20 /* cause */ - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ +call_psxException: + ldr r3, [fp, #LO_last_count] + str r2, [fp, #LO_pcaddr] + add r10, r3, r10 + str r10, [fp, #LO_cycle] /* PCSX cycles */ bl psxException /* note: psxException might do recursive recompiler call from it's HLE code, * so be ready for this */ -pcsx_return: +FUNCTION(jump_to_new_pc): ldr r1, [fp, #LO_next_interupt] ldr r10, [fp, #LO_cycle] ldr r0, [fp, #LO_pcaddr] @@ -536,27 +523,7 @@ pcsx_return: str r1, [fp, #LO_last_count] bl get_addr_ht mov pc, r0 - .size jump_syscall_hle, .-jump_syscall_hle - - .align 2 -FUNCTION(jump_hlecall): - ldr r2, [fp, #LO_last_count] - str r0, [fp, #LO_pcaddr] - add r2, r2, r10 - adr lr, pcsx_return - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - bx r1 - .size jump_hlecall, .-jump_hlecall - - .align 2 -FUNCTION(jump_intcall): - ldr r2, [fp, #LO_last_count] - str r0, [fp, #LO_pcaddr] - add r2, r2, r10 - adr lr, pcsx_return - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - b execI - .size jump_hlecall, .-jump_hlecall + .size jump_to_new_pc, .-jump_to_new_pc .align 2 FUNCTION(new_dyna_leave): @@ -651,7 +618,7 @@ invalidate_addr_call: FUNCTION(new_dyna_start): /* ip is stored to conform EABI alignment */ stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} - load_varadr fp, dynarec_local + mov fp, r0 /* dynarec_local */ ldr r0, [fp, #LO_pcaddr] bl get_addr_ht ldr r1, [fp, #LO_next_interupt] @@ -696,6 +663,13 @@ FUNCTION(jump_handler_read32): pcsx_read_mem ldrcc, 2 +.macro memhandler_post + ldr r0, [fp, #LO_next_interupt] + ldr r2, [fp, #LO_cycle] @ memhandlers can modify cc, like dma + str r0, [fp, #LO_last_count] + sub r0, r2, r0 +.endm + .macro pcsx_write_mem wrtop tab_shift /* r0 = address, r1 = data, r2 = cycles, r3 = handler_tab */ lsl r12,r0, #20 @@ -703,7 +677,7 @@ FUNCTION(jump_handler_read32): ldr r3, [r3, r12, lsl #2] str r0, [fp, #LO_address] @ some handlers still need it.. lsls r3, #1 - mov r0, r2 @ cycle return in case of direct store + mov r0, r2 @ cycle return in case of direct store .if \tab_shift == 1 lsl r12, #1 \wrtop r1, [r3, r12] @@ -714,15 +688,14 @@ FUNCTION(jump_handler_read32): ldr r12, [fp, #LO_last_count] mov r0, r1 add r2, r2, r12 - push {r2, lr} str r2, [fp, #LO_cycle] + + str lr, [fp, #LO_saved_lr] blx r3 + ldr lr, [fp, #LO_saved_lr] - ldr r0, [fp, #LO_next_interupt] - pop {r2, r3} - str r0, [fp, #LO_last_count] - sub r0, r2, r0 - bx r3 + memhandler_post + bx lr .endm FUNCTION(jump_handler_write8): @@ -742,15 +715,14 @@ FUNCTION(jump_handler_write_h): str r0, [fp, #LO_address] @ some handlers still need it.. add r2, r2, r12 mov r0, r1 - push {r2, lr} str r2, [fp, #LO_cycle] + + str lr, [fp, #LO_saved_lr] blx r3 + ldr lr, [fp, #LO_saved_lr] - ldr r0, [fp, #LO_next_interupt] - pop {r2, r3} - str r0, [fp, #LO_last_count] - sub r0, r2, r0 - bx r3 + memhandler_post + bx lr FUNCTION(jump_handle_swl): /* r0 = address, r1 = data, r2 = cycles */ @@ -856,4 +828,56 @@ FUNCTION(rcnt2_read_count_m1): lsr r0, #16 @ /= 8 bx lr +FUNCTION(call_gteStall): + /* r0 = op_cycles, r1 = cycles */ + ldr r2, [fp, #LO_last_count] + str lr, [fp, #LO_saved_lr] + add r1, r1, r2 + str r1, [fp, #LO_cycle] + add r1, fp, #LO_psxRegs + bl gteCheckStallRaw + ldr lr, [fp, #LO_saved_lr] + add r10, r10, r0 + bx lr + +#ifdef HAVE_ARMV6 + +FUNCTION(get_reg): + ldr r12, [r0] + and r1, r1, #0xff + ldr r2, [r0, #4] + orr r1, r1, r1, lsl #8 + ldr r3, [r0, #8] + orr r1, r1, r1, lsl #16 @ searched char in every byte + ldrb r0, [r0, #12] @ last byte + eor r12, r12, r1 + eor r2, r2, r1 + eor r3, r3, r1 + cmp r0, r1, lsr #24 + mov r0, #12 + mvn r1, #0 @ r1=~0 + bxeq lr + orr r3, r3, #0xff000000 @ EXCLUDE_REG + uadd8 r0, r12, r1 @ add and set GE bits when not 0 (match) + mov r12, #0 + sel r0, r12, r1 @ 0 if no match, else ff in some byte + uadd8 r2, r2, r1 + sel r2, r12, r1 + uadd8 r3, r3, r1 + sel r3, r12, r1 + mov r12, #3 + clz r0, r0 @ 0, 8, 16, 24 or 32 + clz r2, r2 + clz r3, r3 + sub r0, r12, r0, lsr #3 @ 3, 2, 1, 0 or -1 + sub r2, r12, r2, lsr #3 + sub r3, r12, r3, lsr #3 + orr r2, r2, #4 + orr r3, r3, #8 + and r0, r0, r2 + and r0, r0, r3 + bx lr + +#endif /* HAVE_ARMV6 */ + @ vim:filetype=armasm