X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Flinkage_arm.s;h=19c9686b2644b4db01c2ca101e7e950e5bb05a1a;hp=f838fcbf0d93c8ed8e2f91e6d2b9eda8e4cef505;hb=b1be1eeee94d3547c20719acfa6b0082404897f1;hpb=57871462a0b157066bbc4a763c59b61085436609 diff --git a/libpcsxcore/new_dynarec/linkage_arm.s b/libpcsxcore/new_dynarec/linkage_arm.s index f838fcbf..19c9686b 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.s +++ b/libpcsxcore/new_dynarec/linkage_arm.s @@ -1,6 +1,7 @@ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - * Mupen64plus - linkage_arm.s * - * Copyright (C) 2009-2010 Ari64 * + * linkage_arm.s for PCSX * + * Copyright (C) 2009-2011 Ari64 * + * Copyright (C) 2010-2011 Gražvydas "notaz" Ignotas * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -17,29 +18,20 @@ * Free Software Foundation, Inc., * * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - .cpu arm9tdmi - .fpu softvfp - .eabi_attribute 20, 1 - .eabi_attribute 21, 1 - .eabi_attribute 23, 3 - .eabi_attribute 24, 1 - .eabi_attribute 25, 1 - .eabi_attribute 26, 2 - .eabi_attribute 30, 6 - .eabi_attribute 18, 4 - .file "linkage_arm.s" + +/* .equiv HAVE_ARMV7, 1 */ + .global rdram rdram = 0x80000000 .global dynarec_local .global reg .global hi .global lo - .global reg_cop1_simple - .global reg_cop1_double .global reg_cop0 + .global reg_cop2d + .global reg_cop2c .global FCR0 .global FCR31 - .global rounding_modes .global next_interupt .global cycle_count .global last_count @@ -48,23 +40,25 @@ rdram = 0x80000000 .global stop .global invc_ptr .global address - .global readmem_dword - .global dword - .global word - .global hword - .global byte .global branch_target .global PC - .global fake_pc .global mini_ht .global restore_candidate - .global memory_map + /* psx */ + .global psxRegs + .global mem_rtab + .global mem_wtab + .global psxH_ptr + .global inv_code_start + .global inv_code_end + .global rcnts + .bss .align 4 .type dynarec_local, %object - .size dynarec_local, 64 + .size dynarec_local, dynarec_local_end-dynarec_local dynarec_local: - .space 64+16+16+8+8+8+8+256+8+8+128+128+128+16+8+132+4+256+512+4194304 + .space dynarec_local_end-dynarec_local next_interupt = dynarec_local + 64 .type next_interupt, %object .size next_interupt, 4 @@ -77,10 +71,7 @@ last_count = cycle_count + 4 pending_exception = last_count + 4 .type pending_exception, %object .size pending_exception, 4 -pcaddr = pending_exception + 4 - .type pcaddr, %object - .size pcaddr, 4 -stop = pcaddr + 4 +stop = pending_exception + 4 .type stop, %object .size stop, 4 invc_ptr = stop + 4 @@ -89,120 +80,169 @@ invc_ptr = stop + 4 address = invc_ptr + 4 .type address, %object .size address, 4 -readmem_dword = address + 4 - .type readmem_dword, %object - .size readmem_dword, 8 -dword = readmem_dword + 8 - .type dword, %object - .size dword, 8 -word = dword + 8 - .type word, %object - .size word, 4 -hword = word + 4 - .type hword, %object - .size hword, 2 -byte = hword + 2 - .type byte, %object - .size byte, 1 /* 1 byte free */ -FCR0 = hword + 4 - .type FCR0, %object - .size FCR0, 4 -FCR31 = FCR0 + 4 - .type FCR31, %object - .size FCR31, 4 -reg = FCR31 + 4 +psxRegs = address + 4 + +/* psxRegs */ + .type psxRegs, %object + .size psxRegs, psxRegs_end-psxRegs +reg = psxRegs .type reg, %object - .size reg, 256 -hi = reg + 256 - .type hi, %object - .size hi, 8 -lo = hi + 8 + .size reg, 128 +lo = reg + 128 .type lo, %object - .size lo, 8 -reg_cop0 = lo + 8 + .size lo, 4 +hi = lo + 4 + .type hi, %object + .size hi, 4 +reg_cop0 = hi + 4 .type reg_cop0, %object .size reg_cop0, 128 -reg_cop1_simple = reg_cop0 + 128 - .type reg_cop1_simple, %object - .size reg_cop1_simple, 128 -reg_cop1_double = reg_cop1_simple + 128 - .type reg_cop1_double, %object - .size reg_cop1_double, 128 -rounding_modes = reg_cop1_double + 128 - .type rounding_modes, %object - .size rounding_modes, 16 -branch_target = rounding_modes + 16 - .type branch_target, %object - .size branch_target, 4 -PC = branch_target + 4 +reg_cop2d = reg_cop0 + 128 + .type reg_cop2d, %object + .size reg_cop2d, 128 +reg_cop2c = reg_cop2d + 128 + .type reg_cop2c, %object + .size reg_cop2c, 128 +PC = reg_cop2c + 128 +pcaddr = PC .type PC, %object .size PC, 4 -fake_pc = PC + 4 - .type fake_pc, %object - .size fake_pc, 132 -/* 4 bytes free */ -mini_ht = fake_pc + 136 +code = PC + 4 + .type code, %object + .size code, 4 +cycle = code + 4 + .type cycle, %object + .size cycle, 4 +interrupt = cycle + 4 + .type interrupt, %object + .size interrupt, 4 +intCycle = interrupt + 4 + .type intCycle, %object + .size intCycle, 256 +psxRegs_end = intCycle + 256 + +rcnts = psxRegs_end + .type rcnts, %object + .size rcnts, 7*4*4 +rcnts_end = rcnts + 7*4*4 + +mem_rtab = rcnts_end + .type mem_rtab, %object + .size mem_rtab, 4 +mem_wtab = mem_rtab + 4 + .type mem_wtab, %object + .size mem_wtab, 4 +psxH_ptr = mem_wtab + 4 + .type psxH_ptr, %object + .size psxH_ptr, 4 +inv_code_start = psxH_ptr + 4 + .type inv_code_start, %object + .size inv_code_start, 4 +inv_code_end = inv_code_start + 4 + .type inv_code_end, %object + .size inv_code_end, 4 +branch_target = inv_code_end + 4 + .type branch_target, %object + .size branch_target, 4 +align0 = branch_target + 4 /* unused/alignment */ + .type align0, %object + .size align0, 4 +mini_ht = align0 + 4 .type mini_ht, %object .size mini_ht, 256 restore_candidate = mini_ht + 256 .type restore_candidate, %object .size restore_candidate, 512 -memory_map = restore_candidate + 512 - .type memory_map, %object - .size memory_map, 4194304 +dynarec_local_end = restore_candidate + 512 - .text - .align 2 - .global dyna_linker - .type dyna_linker, %function -dyna_linker: +/* unused */ +FCR0 = align0 + .type FCR0, %object + .size FCR0, 4 +FCR31 = align0 + .type FCR31, %object + .size FCR31, 4 + +.macro load_var_adr reg var +.if HAVE_ARMV7 + movw \reg, #:lower16:\var + movt \reg, #:upper16:\var +.else + ldr \reg, =\var +.endif +.endm + +.macro mov_16 reg imm +.if HAVE_ARMV7 + movw \reg, #\imm +.else + mov \reg, #(\imm & 0x00ff) + orr \reg, #(\imm & 0xff00) +.endif +.endm + +.macro mov_24 reg imm +.if HAVE_ARMV7 + movw \reg, #(\imm & 0xffff) + movt \reg, #(\imm >> 16) +.else + mov \reg, #(\imm & 0x0000ff) + orr \reg, #(\imm & 0x00ff00) + orr \reg, #(\imm & 0xff0000) +.endif +.endm + +.macro dyna_linker_main /* r0 = virtual target address */ /* r1 = instruction to patch */ - ldr r4, .tlbptr - lsr r5, r0, #12 - mov r12, r0 - cmp r0, #0xC0000000 - mov r6, #4096 - ldrge r12, [r4, r5, lsl #2] - mov r2, #0x80000 ldr r3, .jiptr - tst r12, r12 + /* get_page */ + lsr r2, r0, #12 + mov r6, #4096 + bic r2, r2, #0xe0000 sub r6, r6, #1 - moveq r12, r0 + cmp r2, #0x1000 ldr r7, [r1] - eor r2, r2, r12, lsr #12 - and r6, r6, r12, lsr #12 + biclt r2, #0x0e00 + and r6, r6, r2 cmp r2, #2048 add r12, r7, #2 orrcs r2, r6, #2048 ldr r5, [r3, r2, lsl #2] lsl r12, r12, #8 + add r6, r1, r12, asr #6 + mov r8, #0 /* jump_in lookup */ -.A1: +1: movs r4, r5 - beq .A3 + beq 2f ldr r3, [r5] ldr r5, [r4, #12] teq r3, r0 - bne .A1 + bne 1b ldr r3, [r4, #4] ldr r4, [r4, #8] tst r3, r3 - bne .A1 -.A2: - mov r5, r1 - add r1, r1, r12, asr #6 - teq r1, r4 + bne 1b + teq r4, r6 moveq pc, r4 /* Stale i-cache */ + mov r8, r4 + b 1b /* jump_in may have dupes, continue search */ +2: + tst r8, r8 + beq 3f /* r0 not in jump_in */ + + mov r5, r1 + mov r1, r6 bl add_link - sub r2, r4, r5 + sub r2, r8, r5 and r1, r7, #0xff000000 lsl r2, r2, #6 sub r1, r1, #2 add r1, r1, r2, lsr #8 str r1, [r5] - mov pc, r4 -.A3: + mov pc, r8 +3: /* hash_table lookup */ cmp r2, #2048 ldr r3, .jdptr @@ -220,14 +260,14 @@ dyna_linker: teq r7, r0 ldreq pc, [r6, #12] /* jump_dirty lookup */ -.A6: +6: movs r4, r5 - beq .A8 + beq 8f ldr r3, [r5] ldr r5, [r4, #12] teq r3, r0 - bne .A6 -.A7: + bne 6b +7: ldr r1, [r4, #8] /* hash_table insert */ ldr r2, [r6] @@ -237,7 +277,18 @@ dyna_linker: str r2, [r6, #8] str r3, [r6, #12] mov pc, r1 -.A8: +8: +.endm + + .text + .align 2 + .global dyna_linker + .type dyna_linker, %function +dyna_linker: + /* r0 = virtual target address */ + /* r1 = instruction to patch */ + dyna_linker_main + mov r4, r0 mov r5, r1 bl new_recompile_block @@ -274,6 +325,7 @@ exec_pagefault: bl get_addr_ht mov pc, r0 .size exec_pagefault, .-exec_pagefault + /* Special dynamic linker for the case where a page fault may occur in a branch delay slot */ .global dyna_linker_ds @@ -281,86 +333,8 @@ exec_pagefault: dyna_linker_ds: /* r0 = virtual target address */ /* r1 = instruction to patch */ - ldr r4, .tlbptr - lsr r5, r0, #12 - mov r12, r0 - cmp r0, #0xC0000000 - mov r6, #4096 - ldrge r12, [r4, r5, lsl #2] - mov r2, #0x80000 - ldr r3, .jiptr - tst r12, r12 - sub r6, r6, #1 - moveq r12, r0 - ldr r7, [r1] - eor r2, r2, r12, lsr #12 - and r6, r6, r12, lsr #12 - cmp r2, #2048 - add r12, r7, #2 - orrcs r2, r6, #2048 - ldr r5, [r3, r2, lsl #2] - lsl r12, r12, #8 - /* jump_in lookup */ -.B1: - movs r4, r5 - beq .B3 - ldr r3, [r5] - ldr r5, [r4, #12] - teq r3, r0 - bne .B1 - ldr r3, [r4, #4] - ldr r4, [r4, #8] - tst r3, r3 - bne .B1 -.B2: - mov r5, r1 - add r1, r1, r12, asr #6 - teq r1, r4 - moveq pc, r4 /* Stale i-cache */ - bl add_link - sub r2, r4, r5 - and r1, r7, #0xff000000 - lsl r2, r2, #6 - sub r1, r1, #2 - add r1, r1, r2, lsr #8 - str r1, [r5] - mov pc, r4 -.B3: - /* hash_table lookup */ - cmp r2, #2048 - ldr r3, .jdptr - eor r4, r0, r0, lsl #16 - lslcc r2, r0, #9 - ldr r6, .htptr - lsr r4, r4, #12 - lsrcc r2, r2, #21 - bic r4, r4, #15 - ldr r5, [r3, r2, lsl #2] - ldr r7, [r6, r4]! - teq r7, r0 - ldreq pc, [r6, #4] - ldr r7, [r6, #8] - teq r7, r0 - ldreq pc, [r6, #12] - /* jump_dirty lookup */ -.B6: - movs r4, r5 - beq .B8 - ldr r3, [r5] - ldr r5, [r4, #12] - teq r3, r0 - bne .B6 -.B7: - ldr r1, [r4, #8] - /* hash_table insert */ - ldr r2, [r6] - ldr r3, [r6, #4] - str r0, [r6] - str r1, [r6, #4] - str r2, [r6, #8] - str r3, [r6, #12] - mov pc, r1 -.B8: + dyna_linker_main + mov r4, r0 bic r0, r0, #7 mov r5, r1 @@ -380,10 +354,9 @@ dyna_linker_ds: .word jump_in .jdptr: .word jump_dirty -.tlbptr: - .word tlb_LUT_r .htptr: .word hash_table + .align 2 .global jump_vaddr_r0 .type jump_vaddr_r0, %function @@ -484,6 +457,7 @@ jump_vaddr: ldr r10, [fp, #cycle_count-dynarec_local] mov pc, r0 .size jump_vaddr, .-jump_vaddr + .align 2 .global verify_code_ds .type verify_code_ds, %function @@ -493,30 +467,6 @@ verify_code_ds: .global verify_code_vm .type verify_code_vm, %function verify_code_vm: - /* r0 = instruction pointer (virtual address) */ - /* r1 = source (virtual address) */ - /* r2 = target */ - /* r3 = length */ - cmp r1, #0xC0000000 - blt verify_code - add r12, fp, #memory_map-dynarec_local - lsr r4, r1, #12 - add r5, r1, r3 - sub r5, #1 - ldr r6, [r12, r4, lsl #2] - lsr r5, r5, #12 - movs r7, r6 - bmi .D5 - add r1, r1, r6, lsl #2 - lsl r6, r6, #2 -.D1: - add r4, r4, #1 - teq r6, r7, lsl #2 - bne .D5 - ldr r7, [r12, r4, lsl #2] - cmp r4, r5 - bls .D1 - .size verify_code_vm, .-verify_code_vm .global verify_code .type verify_code, %function verify_code: @@ -553,6 +503,8 @@ verify_code: bl get_addr mov pc, r0 .size verify_code, .-verify_code + .size verify_code_vm, .-verify_code_vm + .align 2 .global cc_interrupt .type cc_interrupt, %function @@ -564,7 +516,8 @@ cc_interrupt: str r1, [fp, #pending_exception-dynarec_local] and r2, r2, r10, lsr #17 add r3, fp, #restore_candidate-dynarec_local - str r10, [fp, #reg_cop0+36-dynarec_local] /* Count */ + str r10, [fp, #cycle-dynarec_local] /* PCSX cycles */ +@@ str r10, [fp, #reg_cop0+36-dynarec_local] /* Count */ ldr r4, [r2, r3] mov r10, lr tst r4, r4 @@ -572,23 +525,20 @@ cc_interrupt: .E1: bl gen_interupt mov lr, r10 - ldr r10, [fp, #reg_cop0+36-dynarec_local] /* Count */ + ldr r10, [fp, #cycle-dynarec_local] ldr r0, [fp, #next_interupt-dynarec_local] ldr r1, [fp, #pending_exception-dynarec_local] ldr r2, [fp, #stop-dynarec_local] str r0, [fp, #last_count-dynarec_local] sub r10, r10, r0 tst r2, r2 - bne .E3 + ldmnefd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} tst r1, r1 moveq pc, lr .E2: ldr r0, [fp, #pcaddr-dynarec_local] bl get_addr_ht mov pc, r0 -.E3: - add r12, fp, #28 - ldmia r12, {r4, r5, r6, r7, r8, r9, sl, fp, pc} .E4: /* Move 'dirty' blocks to the 'clean' list */ lsl r5, r2, #3 @@ -601,21 +551,18 @@ cc_interrupt: tst r5, #31 bne .E5 b .E1 - .size cc_interrupt, .-cc_interrupt + .align 2 .global do_interrupt .type do_interrupt, %function do_interrupt: ldr r0, [fp, #pcaddr-dynarec_local] bl get_addr_ht - ldr r1, [fp, #next_interupt-dynarec_local] - ldr r10, [fp, #reg_cop0+36-dynarec_local] /* Count */ - str r1, [fp, #last_count-dynarec_local] - sub r10, r10, r1 add r10, r10, #2 mov pc, r0 .size do_interrupt, .-do_interrupt + .align 2 .global fp_exception .type fp_exception, %function @@ -629,7 +576,7 @@ fp_exception: add r2, r2, #0x2c str r1, [fp, #reg_cop0+48-dynarec_local] /* Status */ str r2, [fp, #reg_cop0+52-dynarec_local] /* Cause */ - add r0, r3, #0x180 + add r0, r3, #0x80 bl get_addr_ht mov pc, r0 .size fp_exception, .-fp_exception @@ -640,6 +587,7 @@ fp_exception_ds: mov r2, #0x90000000 /* Set high bit if delay slot */ b .E7 .size fp_exception_ds, .-fp_exception_ds + .align 2 .global jump_syscall .type jump_syscall, %function @@ -651,352 +599,402 @@ jump_syscall: mov r2, #0x20 str r1, [fp, #reg_cop0+48-dynarec_local] /* Status */ str r2, [fp, #reg_cop0+52-dynarec_local] /* Cause */ - add r0, r3, #0x180 + add r0, r3, #0x80 bl get_addr_ht mov pc, r0 .size jump_syscall, .-jump_syscall .align 2 - .global indirect_jump_indexed - .type indirect_jump_indexed, %function -indirect_jump_indexed: - ldr r0, [r0, r1, lsl #2] - .size indirect_jump_indexed, .-indirect_jump_indexed - .align 2 - .global indirect_jump - .type indirect_jump, %function -indirect_jump: - ldr r12, [fp, #last_count-dynarec_local] - add r2, r2, r12 - str r2, [fp, #reg_cop0+36-dynarec_local] /* Count */ - mov pc, r0 - .size indirect_jump, .-indirect_jump + .align 2 - .global jump_eret - .type jump_eret, %function -jump_eret: - ldr r1, [fp, #reg_cop0+48-dynarec_local] /* Status */ - ldr r0, [fp, #last_count-dynarec_local] - bic r1, r1, #2 - add r10, r0, r10 - str r1, [fp, #reg_cop0+48-dynarec_local] /* Status */ - str r10, [fp, #reg_cop0+36-dynarec_local] /* Count */ - bl check_interupt + .global jump_syscall_hle + .type jump_syscall_hle, %function +jump_syscall_hle: + str r0, [fp, #pcaddr-dynarec_local] /* PC must be set to EPC for psxException */ + ldr r2, [fp, #last_count-dynarec_local] + mov r1, #0 /* in delay slot */ + add r2, r2, r10 + mov r0, #0x20 /* cause */ + str r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */ + bl psxException + + /* note: psxException might do recorsive recompiler call from it's HLE code, + * so be ready for this */ +pcsx_return: ldr r1, [fp, #next_interupt-dynarec_local] - ldr r0, [fp, #reg_cop0+56-dynarec_local] /* EPC */ + ldr r10, [fp, #cycle-dynarec_local] + ldr r0, [fp, #pcaddr-dynarec_local] + sub r10, r10, r1 str r1, [fp, #last_count-dynarec_local] - subs r10, r10, r1 - bpl .E11 -.E8: - add r6, fp, #reg+256-dynarec_local - mov r5, #248 - mov r1, #0 -.E9: - ldr r2, [r6, #-8]! - ldr r3, [r6, #4] - eor r3, r3, r2, asr #31 - subs r3, r3, #1 - adc r1, r1, r1 - subs r5, r5, #8 - bne .E9 - ldr r2, [fp, #hi-dynarec_local] - ldr r3, [fp, #hi+4-dynarec_local] - eors r3, r3, r2, asr #31 - ldr r2, [fp, #lo-dynarec_local] - ldreq r3, [fp, #lo+4-dynarec_local] - eoreq r3, r3, r2, asr #31 - subs r3, r3, #1 - adc r1, r1, r1 - bl get_addr_32 + bl get_addr_ht mov pc, r0 -.E11: + .size jump_syscall_hle, .-jump_syscall_hle + + .align 2 + .global jump_hlecall + .type jump_hlecall, %function +jump_hlecall: + ldr r2, [fp, #last_count-dynarec_local] str r0, [fp, #pcaddr-dynarec_local] - bl cc_interrupt - ldr r0, [fp, #pcaddr-dynarec_local] - b .E8 - .size jump_eret, .-jump_eret + add r2, r2, r10 + adr lr, pcsx_return + str r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */ + bx r1 + .size jump_hlecall, .-jump_hlecall + .align 2 - .global new_dyna_start - .type new_dyna_start, %function -new_dyna_start: - ldr r12, .dlptr - mov r0, #0xa4000000 - stmia r12, {r4, r5, r6, r7, r8, r9, sl, fp, lr} - sub fp, r12, #28 - add r0, r0, #0x40 - bl new_recompile_block - ldr r0, [fp, #next_interupt-dynarec_local] - ldr r10, [fp, #reg_cop0+36-dynarec_local] /* Count */ - str r0, [fp, #last_count-dynarec_local] - sub r10, r10, r0 - mov pc, #0x7000000 -.dlptr: - .word dynarec_local+28 - .size new_dyna_start, .-new_dyna_start + .global jump_intcall + .type jump_intcall, %function +jump_intcall: + ldr r2, [fp, #last_count-dynarec_local] + str r0, [fp, #pcaddr-dynarec_local] + add r2, r2, r10 + adr lr, pcsx_return + str r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */ + b execI + .size jump_hlecall, .-jump_hlecall + +new_dyna_leave: .align 2 - .global write_rdram_new - .type write_rdram_new, %function -write_rdram_new: - ldr r2, [fp, #address-dynarec_local] - ldr r0, [fp, #word-dynarec_local] - str r0, [r2] - b .E12 - .size write_rdram_new, .-write_rdram_new + .global new_dyna_leave + .type new_dyna_leave, %function + ldr r0, [fp, #last_count-dynarec_local] + add r12, fp, #28 + add r10, r0, r10 + str r10, [fp, #cycle-dynarec_local] + ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} + .size new_dyna_leave, .-new_dyna_leave + .align 2 - .global write_rdramb_new - .type write_rdramb_new, %function -write_rdramb_new: - ldr r2, [fp, #address-dynarec_local] - ldrb r0, [fp, #byte-dynarec_local] - eor r2, r2, #3 - strb r0, [r2] - b .E12 - .size write_rdramb_new, .-write_rdramb_new + .global invalidate_addr_r0 + .type invalidate_addr_r0, %function +invalidate_addr_r0: + stmia fp, {r0, r1, r2, r3, r12, lr} + b invalidate_addr_call + .size invalidate_addr_r0, .-invalidate_addr_r0 .align 2 - .global write_rdramh_new - .type write_rdramh_new, %function -write_rdramh_new: - ldr r2, [fp, #address-dynarec_local] - ldrh r0, [fp, #hword-dynarec_local] - eor r2, r2, #2 - strh r0, [r2] - b .E12 - .size write_rdramh_new, .-write_rdramh_new + .global invalidate_addr_r1 + .type invalidate_addr_r1, %function +invalidate_addr_r1: + stmia fp, {r0, r1, r2, r3, r12, lr} + mov r0, r1 + b invalidate_addr_call + .size invalidate_addr_r1, .-invalidate_addr_r1 .align 2 - .global write_rdramd_new - .type write_rdramd_new, %function -write_rdramd_new: - ldr r2, [fp, #address-dynarec_local] -/* ldrd r0, [fp, #dword-dynarec_local]*/ - ldr r0, [fp, #dword-dynarec_local] - ldr r1, [fp, #dword+4-dynarec_local] - str r0, [r2, #4] - str r1, [r2] - b .E12 - .size write_rdramd_new, .-write_rdramd_new + .global invalidate_addr_r2 + .type invalidate_addr_r2, %function +invalidate_addr_r2: + stmia fp, {r0, r1, r2, r3, r12, lr} + mov r0, r2 + b invalidate_addr_call + .size invalidate_addr_r2, .-invalidate_addr_r2 .align 2 - .global do_invalidate - .type do_invalidate, %function -do_invalidate: - ldr r2, [fp, #address-dynarec_local] -.E12: - ldr r1, [fp, #invc_ptr-dynarec_local] - lsr r0, r2, #12 - ldrb r2, [r1, r0] - tst r2, r2 - beq invalidate_block - mov pc, lr - .size do_invalidate, .-do_invalidate + .global invalidate_addr_r3 + .type invalidate_addr_r3, %function +invalidate_addr_r3: + stmia fp, {r0, r1, r2, r3, r12, lr} + mov r0, r3 + b invalidate_addr_call + .size invalidate_addr_r3, .-invalidate_addr_r3 + .align 2 + .global invalidate_addr_r4 + .type invalidate_addr_r4, %function +invalidate_addr_r4: + stmia fp, {r0, r1, r2, r3, r12, lr} + mov r0, r4 + b invalidate_addr_call + .size invalidate_addr_r4, .-invalidate_addr_r4 .align 2 - .global read_nomem_new - .type read_nomem_new, %function -read_nomem_new: - ldr r2, [fp, #address-dynarec_local] - add r12, fp, #memory_map-dynarec_local - lsr r0, r2, #12 - ldr r12, [r12, r0, lsl #2] - mov r1, #8 - tst r12, r12 - bmi tlb_exception - ldr r0, [r2, r12, lsl #2] - str r0, [fp, #readmem_dword-dynarec_local] - mov pc, lr - .size read_nomem_new, .-read_nomem_new + .global invalidate_addr_r5 + .type invalidate_addr_r5, %function +invalidate_addr_r5: + stmia fp, {r0, r1, r2, r3, r12, lr} + mov r0, r5 + b invalidate_addr_call + .size invalidate_addr_r5, .-invalidate_addr_r5 .align 2 - .global read_nomemb_new - .type read_nomemb_new, %function -read_nomemb_new: - ldr r2, [fp, #address-dynarec_local] - add r12, fp, #memory_map-dynarec_local - lsr r0, r2, #12 - ldr r12, [r12, r0, lsl #2] - mov r1, #8 - tst r12, r12 - bmi tlb_exception - eor r2, r2, #3 - ldrb r0, [r2, r12, lsl #2] - str r0, [fp, #readmem_dword-dynarec_local] - mov pc, lr - .size read_nomemb_new, .-read_nomemb_new + .global invalidate_addr_r6 + .type invalidate_addr_r6, %function +invalidate_addr_r6: + stmia fp, {r0, r1, r2, r3, r12, lr} + mov r0, r6 + b invalidate_addr_call + .size invalidate_addr_r6, .-invalidate_addr_r6 .align 2 - .global read_nomemh_new - .type read_nomemh_new, %function -read_nomemh_new: - ldr r2, [fp, #address-dynarec_local] - add r12, fp, #memory_map-dynarec_local - lsr r0, r2, #12 - ldr r12, [r12, r0, lsl #2] - mov r1, #8 - tst r12, r12 - bmi tlb_exception - lsl r12, r12, #2 - eor r2, r2, #2 - ldrh r0, [r2, r12] - str r0, [fp, #readmem_dword-dynarec_local] - mov pc, lr - .size read_nomemh_new, .-read_nomemh_new + .global invalidate_addr_r7 + .type invalidate_addr_r7, %function +invalidate_addr_r7: + stmia fp, {r0, r1, r2, r3, r12, lr} + mov r0, r7 + b invalidate_addr_call + .size invalidate_addr_r7, .-invalidate_addr_r7 .align 2 - .global read_nomemd_new - .type read_nomemd_new, %function -read_nomemd_new: - ldr r2, [fp, #address-dynarec_local] - add r12, fp, #memory_map-dynarec_local - lsr r0, r2, #12 - ldr r12, [r12, r0, lsl #2] - mov r1, #8 - tst r12, r12 - bmi tlb_exception - lsl r12, r12, #2 -/* ldrd r0, [r2, r12]*/ - add r3, r2, #4 - ldr r0, [r2, r12] - ldr r1, [r3, r12] - str r0, [fp, #readmem_dword+4-dynarec_local] - str r1, [fp, #readmem_dword-dynarec_local] - mov pc, lr - .size read_nomemd_new, .-read_nomemd_new + .global invalidate_addr_r8 + .type invalidate_addr_r8, %function +invalidate_addr_r8: + stmia fp, {r0, r1, r2, r3, r12, lr} + mov r0, r8 + b invalidate_addr_call + .size invalidate_addr_r8, .-invalidate_addr_r8 .align 2 - .global write_nomem_new - .type write_nomem_new, %function -write_nomem_new: - str r3, [fp, #24] - str lr, [fp, #28] - bl do_invalidate - ldr r2, [fp, #address-dynarec_local] - add r12, fp, #memory_map-dynarec_local - ldr lr, [fp, #28] - lsr r0, r2, #12 - ldr r3, [fp, #24] - ldr r12, [r12, r0, lsl #2] - mov r1, #0xc - tst r12, #0x40000000 - bne tlb_exception - ldr r0, [fp, #word-dynarec_local] - str r0, [r2, r12, lsl #2] - mov pc, lr - .size write_nomem_new, .-write_nomem_new + .global invalidate_addr_r9 + .type invalidate_addr_r9, %function +invalidate_addr_r9: + stmia fp, {r0, r1, r2, r3, r12, lr} + mov r0, r9 + b invalidate_addr_call + .size invalidate_addr_r9, .-invalidate_addr_r9 .align 2 - .global write_nomemb_new - .type write_nomemb_new, %function -write_nomemb_new: - str r3, [fp, #24] - str lr, [fp, #28] - bl do_invalidate - ldr r2, [fp, #address-dynarec_local] - add r12, fp, #memory_map-dynarec_local - ldr lr, [fp, #28] - lsr r0, r2, #12 - ldr r3, [fp, #24] - ldr r12, [r12, r0, lsl #2] - mov r1, #0xc - tst r12, #0x40000000 - bne tlb_exception - eor r2, r2, #3 - ldrb r0, [fp, #byte-dynarec_local] - strb r0, [r2, r12, lsl #2] - mov pc, lr - .size write_nomemb_new, .-write_nomemb_new + .global invalidate_addr_r10 + .type invalidate_addr_r10, %function +invalidate_addr_r10: + stmia fp, {r0, r1, r2, r3, r12, lr} + mov r0, r10 + b invalidate_addr_call + .size invalidate_addr_r10, .-invalidate_addr_r10 .align 2 - .global write_nomemh_new - .type write_nomemh_new, %function -write_nomemh_new: - str r3, [fp, #24] - str lr, [fp, #28] - bl do_invalidate - ldr r2, [fp, #address-dynarec_local] - add r12, fp, #memory_map-dynarec_local - ldr lr, [fp, #28] - lsr r0, r2, #12 - ldr r3, [fp, #24] - ldr r12, [r12, r0, lsl #2] - mov r1, #0xc - lsls r12, #2 - bcs tlb_exception - eor r2, r2, #2 - ldrh r0, [fp, #hword-dynarec_local] - strh r0, [r2, r12] - mov pc, lr - .size write_nomemh_new, .-write_nomemh_new + .global invalidate_addr_r12 + .type invalidate_addr_r12, %function +invalidate_addr_r12: + stmia fp, {r0, r1, r2, r3, r12, lr} + mov r0, r12 + .size invalidate_addr_r12, .-invalidate_addr_r12 .align 2 - .global write_nomemd_new - .type write_nomemd_new, %function -write_nomemd_new: - str r3, [fp, #24] - str lr, [fp, #28] - bl do_invalidate - ldr r2, [fp, #address-dynarec_local] - add r12, fp, #memory_map-dynarec_local - ldr lr, [fp, #28] - lsr r0, r2, #12 - ldr r3, [fp, #24] - ldr r12, [r12, r0, lsl #2] - mov r1, #0xc - lsls r12, #2 - bcs tlb_exception - add r3, r2, #4 - ldr r0, [fp, #dword+4-dynarec_local] - ldr r1, [fp, #dword-dynarec_local] -/* strd r0, [r2, r12]*/ - str r0, [r2, r12] - str r1, [r3, r12] - mov pc, lr - .size write_nomemd_new, .-write_nomemd_new + .global invalidate_addr_call + .type invalidate_addr_call, %function +invalidate_addr_call: + ldr r12, [fp, #inv_code_start-dynarec_local] + ldr lr, [fp, #inv_code_end-dynarec_local] + cmp r0, r12 + cmpcs lr, r0 + blcc invalidate_addr + ldmia fp, {r0, r1, r2, r3, r12, pc} + .size invalidate_addr_call, .-invalidate_addr_call + .align 2 - .global tlb_exception - .type tlb_exception, %function -tlb_exception: - /* r1 = cause */ - /* r2 = address */ - /* r3 = instr addr/flags */ - ldr r4, [fp, #reg_cop0+48-dynarec_local] /* Status */ - add r5, fp, #memory_map-dynarec_local - lsr r6, r3, #12 - orr r1, r1, r3, lsl #31 - orr r4, r4, #2 - ldr r7, [r5, r6, lsl #2] - bic r8, r3, #3 - str r4, [fp, #reg_cop0+48-dynarec_local] /* Status */ - mov r6, #0x6000000 - str r1, [fp, #reg_cop0+52-dynarec_local] /* Cause */ - orr r6, r6, #0x22 - ldr r0, [r8, r7, lsl #2] - add r4, r8, r1, asr #29 - add r5, fp, #reg-dynarec_local - str r4, [fp, #reg_cop0+56-dynarec_local] /* EPC */ - mov r7, #0xf8 - ldr r8, [fp, #reg_cop0+16-dynarec_local] /* Context */ - lsl r1, r0, #16 - lsr r4, r0, #26 - and r7, r7, r0, lsr #18 - mvn r9, #0xF000000F - sub r2, r2, r1, asr #16 - bic r9, r9, #0x0F800000 - rors r6, r6, r4 - mov r0, #0x80000000 - ldrcs r2, [r5, r7] - bic r8, r8, r9 - tst r3, #2 - str r2, [r5, r7] - add r4, r2, r1, asr #16 - add r6, fp, #reg+4-dynarec_local - asr r3, r2, #31 - str r4, [fp, #reg_cop0+32-dynarec_local] /* BadVAddr */ - add r0, r0, #0x180 - and r4, r9, r4, lsr #9 - strne r3, [r6, r7] - orr r8, r8, r4 - str r8, [fp, #reg_cop0+16-dynarec_local] /* Context */ + .global new_dyna_start + .type new_dyna_start, %function +new_dyna_start: + /* ip is stored to conform EABI alignment */ + stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} + load_var_adr fp, dynarec_local + ldr r0, [fp, #pcaddr-dynarec_local] bl get_addr_ht ldr r1, [fp, #next_interupt-dynarec_local] - ldr r10, [fp, #reg_cop0+36-dynarec_local] /* Count */ + ldr r10, [fp, #cycle-dynarec_local] str r1, [fp, #last_count-dynarec_local] sub r10, r10, r1 - mov pc, r0 - .size tlb_exception, .-tlb_exception - .align 2 - .global breakpoint - .type breakpoint, %function -breakpoint: - /* Set breakpoint here for debugging */ - mov pc, lr - .size breakpoint, .-breakpoint - .section .note.GNU-stack,"",%progbits + mov pc, r0 + .size new_dyna_start, .-new_dyna_start + +/* --------------------------------------- */ + +.align 2 +.global jump_handler_read8 +.global jump_handler_read16 +.global jump_handler_read32 +.global jump_handler_write8 +.global jump_handler_write16 +.global jump_handler_write32 +.global jump_handler_write_h +.global jump_handle_swl +.global jump_handle_swr +.global rcnt0_read_count_m0 +.global rcnt0_read_count_m1 +.global rcnt1_read_count_m0 +.global rcnt1_read_count_m1 +.global rcnt2_read_count_m0 +.global rcnt2_read_count_m1 + + +.macro pcsx_read_mem readop tab_shift + /* r0 = address, r1 = handler_tab, r2 = cycles */ + lsl r3, r0, #20 + lsr r3, #(20+\tab_shift) + ldr r12, [fp, #last_count-dynarec_local] + ldr r1, [r1, r3, lsl #2] + add r2, r2, r12 + lsls r1, #1 +.if \tab_shift == 1 + lsl r3, #1 + \readop r0, [r1, r3] +.else + \readop r0, [r1, r3, lsl #\tab_shift] +.endif + movcc pc, lr + str r2, [fp, #cycle-dynarec_local] + bx r1 +.endm + +jump_handler_read8: + add r1, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part + pcsx_read_mem ldrccb, 0 + +jump_handler_read16: + add r1, #0x1000/4*4 @ shift to r16 part + pcsx_read_mem ldrcch, 1 + +jump_handler_read32: + pcsx_read_mem ldrcc, 2 + + +.macro pcsx_write_mem wrtop tab_shift + /* r0 = address, r1 = data, r2 = cycles, r3 = handler_tab */ + lsl r12,r0, #20 + lsr r12, #(20+\tab_shift) + ldr r3, [r3, r12, lsl #2] + str r0, [fp, #address-dynarec_local] @ some handlers still need it.. + lsls r3, #1 + mov r0, r2 @ cycle return in case of direct store +.if \tab_shift == 1 + lsl r12, #1 + \wrtop r1, [r3, r12] +.else + \wrtop r1, [r3, r12, lsl #\tab_shift] +.endif + movcc pc, lr + ldr r12, [fp, #last_count-dynarec_local] + mov r0, r1 + add r2, r2, r12 + push {r2, lr} + str r2, [fp, #cycle-dynarec_local] + blx r3 + + ldr r0, [fp, #next_interupt-dynarec_local] + pop {r2, r3} + str r0, [fp, #last_count-dynarec_local] + sub r0, r2, r0 + bx r3 +.endm + +jump_handler_write8: + add r3, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part + pcsx_write_mem strccb, 0 + +jump_handler_write16: + add r3, #0x1000/4*4 @ shift to r16 part + pcsx_write_mem strcch, 1 + +jump_handler_write32: + pcsx_write_mem strcc, 2 + +jump_handler_write_h: + /* r0 = address, r1 = data, r2 = cycles, r3 = handler */ + ldr r12, [fp, #last_count-dynarec_local] + str r0, [fp, #address-dynarec_local] @ some handlers still need it.. + add r2, r2, r12 + mov r0, r1 + push {r2, lr} + str r2, [fp, #cycle-dynarec_local] + blx r3 + + ldr r0, [fp, #next_interupt-dynarec_local] + pop {r2, r3} + str r0, [fp, #last_count-dynarec_local] + sub r0, r2, r0 + bx r3 + +jump_handle_swl: + /* r0 = address, r1 = data, r2 = cycles */ + ldr r3, [fp, #mem_wtab-dynarec_local] + mov r12,r0,lsr #12 + ldr r3, [r3, r12, lsl #2] + lsls r3, #1 + bcs 4f + add r3, r0, r3 + mov r0, r2 + tst r3, #2 + beq 101f + tst r3, #1 + beq 2f +3: + str r1, [r3, #-3] + bx lr +2: + lsr r2, r1, #8 + lsr r1, #24 + strh r2, [r3, #-2] + strb r1, [r3] + bx lr +101: + tst r3, #1 + lsrne r1, #16 @ 1 + lsreq r12, r1, #24 @ 0 + strneh r1, [r3, #-1] + streqb r12, [r3] + bx lr +4: + mov r0, r2 +@ b abort + bx lr @ TODO? + + +jump_handle_swr: + /* r0 = address, r1 = data, r2 = cycles */ + ldr r3, [fp, #mem_wtab-dynarec_local] + mov r12,r0,lsr #12 + ldr r3, [r3, r12, lsl #2] + lsls r3, #1 + bcs 4f + add r3, r0, r3 + and r12,r3, #3 + mov r0, r2 + cmp r12,#2 + strgtb r1, [r3] @ 3 + streqh r1, [r3] @ 2 + cmp r12,#1 + strlt r1, [r3] @ 0 + bxne lr + lsr r2, r1, #8 @ 1 + strb r1, [r3] + strh r2, [r3, #1] + bx lr +4: + mov r0, r2 +@ b abort + bx lr @ TODO? + + +.macro rcntx_read_mode0 num + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #rcnts-dynarec_local+6*4+7*4*\num] @ cycleStart + mov r0, r2, lsl #16 + sub r0, r3, lsl #16 + lsr r0, #16 + bx lr +.endm + +rcnt0_read_count_m0: + rcntx_read_mode0 0 + +rcnt1_read_count_m0: + rcntx_read_mode0 1 + +rcnt2_read_count_m0: + rcntx_read_mode0 2 + +rcnt0_read_count_m1: + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #rcnts-dynarec_local+6*4+7*4*0] @ cycleStart + mov_16 r1, 0x3334 + sub r2, r2, r3 + mul r0, r1, r2 @ /= 5 + lsr r0, #16 + bx lr + +rcnt1_read_count_m1: + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #rcnts-dynarec_local+6*4+7*4*1] + mov_24 r1, 0x1e6cde + sub r2, r2, r3 + umull r3, r0, r1, r2 @ ~ /= hsync_cycles, max ~0x1e6cdd + bx lr + +rcnt2_read_count_m1: + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #rcnts-dynarec_local+6*4+7*4*2] + mov r0, r2, lsl #16-3 + sub r0, r3, lsl #16-3 + lsr r0, #16 @ /= 8 + bx lr + +@ vim:filetype=armasm