X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Flinkage_arm.s;h=ac4929f42db3e32097516b1451d42c5fddb0086e;hp=8f1a2b2f590f00d1df8e2323e6e6eae21cf0ba87;hb=76f71c2748608a51e1f9a49273eb3ff58e715700;hpb=a06c1d6e156b90ce9bfa18664256dfe8f815857e diff --git a/libpcsxcore/new_dynarec/linkage_arm.s b/libpcsxcore/new_dynarec/linkage_arm.s index 8f1a2b2f..ac4929f4 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.s +++ b/libpcsxcore/new_dynarec/linkage_arm.s @@ -1,7 +1,7 @@ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * linkage_arm.s for PCSX * - * Copyright (C) 2009-2010 Ari64 * - * Copyright (C) 2010 Gražvydas "notaz" Ignotas * + * Copyright (C) 2009-2011 Ari64 * + * Copyright (C) 2010-2011 Gražvydas "notaz" Ignotas * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -203,52 +203,66 @@ memory_map = restore_candidate + 512 .size memory_map, 4194304 dynarec_local_end = memory_map + 4194304 - .text - .align 2 - .global dyna_linker - .type dyna_linker, %function -dyna_linker: +.macro load_var_adr reg var +.if HAVE_ARMV7 + movw \reg, #:lower16:\var + movt \reg, #:upper16:\var +.else + ldr \reg, =\var +.endif +.endm + +.macro dyna_linker_main /* r0 = virtual target address */ /* r1 = instruction to patch */ - mov r12, r0 - mov r6, #4096 - mov r2, #0x80000 ldr r3, .jiptr + /* get_page */ + lsr r2, r0, #12 + mov r6, #4096 + bic r2, r2, #0xe0000 sub r6, r6, #1 + cmp r2, #0x1000 ldr r7, [r1] - eor r2, r2, r12, lsr #12 - and r6, r6, r12, lsr #12 + biclt r2, #0x0e00 + and r6, r6, r2 cmp r2, #2048 add r12, r7, #2 orrcs r2, r6, #2048 ldr r5, [r3, r2, lsl #2] lsl r12, r12, #8 + add r6, r1, r12, asr #6 + mov r8, #0 /* jump_in lookup */ -.A1: +1: movs r4, r5 - beq .A3 + beq 2f ldr r3, [r5] ldr r5, [r4, #12] teq r3, r0 - bne .A1 + bne 1b ldr r3, [r4, #4] ldr r4, [r4, #8] tst r3, r3 - bne .A1 -.A2: - mov r5, r1 - add r1, r1, r12, asr #6 - teq r1, r4 + bne 1b + teq r4, r6 moveq pc, r4 /* Stale i-cache */ + mov r8, r4 + b 1b /* jump_in may have dupes, continue search */ +2: + tst r8, r8 + beq 3f /* r0 not in jump_in */ + + mov r5, r1 + mov r1, r6 bl add_link - sub r2, r4, r5 + sub r2, r8, r5 and r1, r7, #0xff000000 lsl r2, r2, #6 sub r1, r1, #2 add r1, r1, r2, lsr #8 str r1, [r5] - mov pc, r4 -.A3: + mov pc, r8 +3: /* hash_table lookup */ cmp r2, #2048 ldr r3, .jdptr @@ -266,14 +280,14 @@ dyna_linker: teq r7, r0 ldreq pc, [r6, #12] /* jump_dirty lookup */ -.A6: +6: movs r4, r5 - beq .A8 + beq 8f ldr r3, [r5] ldr r5, [r4, #12] teq r3, r0 - bne .A6 -.A7: + bne 6b +7: ldr r1, [r4, #8] /* hash_table insert */ ldr r2, [r6] @@ -283,7 +297,18 @@ dyna_linker: str r2, [r6, #8] str r3, [r6, #12] mov pc, r1 -.A8: +8: +.endm + + .text + .align 2 + .global dyna_linker + .type dyna_linker, %function +dyna_linker: + /* r0 = virtual target address */ + /* r1 = instruction to patch */ + dyna_linker_main + mov r4, r0 mov r5, r1 bl new_recompile_block @@ -328,80 +353,8 @@ exec_pagefault: dyna_linker_ds: /* r0 = virtual target address */ /* r1 = instruction to patch */ - mov r12, r0 - mov r6, #4096 - mov r2, #0x80000 - ldr r3, .jiptr - sub r6, r6, #1 - ldr r7, [r1] - eor r2, r2, r12, lsr #12 - and r6, r6, r12, lsr #12 - cmp r2, #2048 - add r12, r7, #2 - orrcs r2, r6, #2048 - ldr r5, [r3, r2, lsl #2] - lsl r12, r12, #8 - /* jump_in lookup */ -.B1: - movs r4, r5 - beq .B3 - ldr r3, [r5] - ldr r5, [r4, #12] - teq r3, r0 - bne .B1 - ldr r3, [r4, #4] - ldr r4, [r4, #8] - tst r3, r3 - bne .B1 -.B2: - mov r5, r1 - add r1, r1, r12, asr #6 - teq r1, r4 - moveq pc, r4 /* Stale i-cache */ - bl add_link - sub r2, r4, r5 - and r1, r7, #0xff000000 - lsl r2, r2, #6 - sub r1, r1, #2 - add r1, r1, r2, lsr #8 - str r1, [r5] - mov pc, r4 -.B3: - /* hash_table lookup */ - cmp r2, #2048 - ldr r3, .jdptr - eor r4, r0, r0, lsl #16 - lslcc r2, r0, #9 - ldr r6, .htptr - lsr r4, r4, #12 - lsrcc r2, r2, #21 - bic r4, r4, #15 - ldr r5, [r3, r2, lsl #2] - ldr r7, [r6, r4]! - teq r7, r0 - ldreq pc, [r6, #4] - ldr r7, [r6, #8] - teq r7, r0 - ldreq pc, [r6, #12] - /* jump_dirty lookup */ -.B6: - movs r4, r5 - beq .B8 - ldr r3, [r5] - ldr r5, [r4, #12] - teq r3, r0 - bne .B6 -.B7: - ldr r1, [r4, #8] - /* hash_table insert */ - ldr r2, [r6] - ldr r3, [r6, #4] - str r0, [r6] - str r1, [r6, #4] - str r2, [r6, #8] - str r3, [r6, #12] - mov pc, r1 -.B8: + dyna_linker_main + mov r4, r0 bic r0, r0, #7 mov r5, r1 @@ -624,16 +577,12 @@ cc_interrupt: .global do_interrupt .type do_interrupt, %function do_interrupt: - /* FIXME: cycles already calculated, not needed? */ ldr r0, [fp, #pcaddr-dynarec_local] bl get_addr_ht - ldr r1, [fp, #next_interupt-dynarec_local] - ldr r10, [fp, #cycle-dynarec_local] - str r1, [fp, #last_count-dynarec_local] - sub r10, r10, r1 add r10, r10, #2 mov pc, r0 .size do_interrupt, .-do_interrupt + .align 2 .global fp_exception .type fp_exception, %function @@ -707,11 +656,23 @@ jump_hlecall: ldr r2, [fp, #last_count-dynarec_local] str r0, [fp, #pcaddr-dynarec_local] add r2, r2, r10 - str r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */ adr lr, pcsx_return + str r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */ bx r1 .size jump_hlecall, .-jump_hlecall + .align 2 + .global jump_intcall + .type jump_intcall, %function +jump_intcall: + ldr r2, [fp, #last_count-dynarec_local] + str r0, [fp, #pcaddr-dynarec_local] + add r2, r2, r10 + adr lr, pcsx_return + str r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */ + b execI + .size jump_hlecall, .-jump_hlecall + new_dyna_leave: .align 2 .global new_dyna_leave @@ -740,30 +701,107 @@ indirect_jump: .size indirect_jump_indexed, .-indirect_jump_indexed .align 2 - .global jump_eret - .type jump_eret, %function -jump_eret: - ldr r1, [fp, #reg_cop0+48-dynarec_local] /* Status */ - ldr r0, [fp, #last_count-dynarec_local] - bic r1, r1, #2 - add r10, r0, r10 - str r1, [fp, #reg_cop0+48-dynarec_local] /* Status */ - str r10, [fp, #cycle-dynarec_local] - bl check_interupt - ldr r1, [fp, #next_interupt-dynarec_local] - ldr r0, [fp, #reg_cop0+56-dynarec_local] /* EPC */ - str r1, [fp, #last_count-dynarec_local] - subs r10, r10, r1 - bpl .E11 -.E8: - bl get_addr - mov pc, r0 -.E11: - str r0, [fp, #pcaddr-dynarec_local] - bl cc_interrupt - ldr r0, [fp, #pcaddr-dynarec_local] - b .E8 - .size jump_eret, .-jump_eret + .global invalidate_addr_r0 + .type invalidate_addr_r0, %function +invalidate_addr_r0: + stmia fp, {r0, r1, r2, r3, r12, lr} + lsr r0, r0, #12 + b invalidate_addr_call + .size invalidate_addr_r0, .-invalidate_addr_r0 + .align 2 + .global invalidate_addr_r1 + .type invalidate_addr_r1, %function +invalidate_addr_r1: + stmia fp, {r0, r1, r2, r3, r12, lr} + lsr r0, r1, #12 + b invalidate_addr_call + .size invalidate_addr_r1, .-invalidate_addr_r1 + .align 2 + .global invalidate_addr_r2 + .type invalidate_addr_r2, %function +invalidate_addr_r2: + stmia fp, {r0, r1, r2, r3, r12, lr} + lsr r0, r2, #12 + b invalidate_addr_call + .size invalidate_addr_r2, .-invalidate_addr_r2 + .align 2 + .global invalidate_addr_r3 + .type invalidate_addr_r3, %function +invalidate_addr_r3: + stmia fp, {r0, r1, r2, r3, r12, lr} + lsr r0, r3, #12 + b invalidate_addr_call + .size invalidate_addr_r3, .-invalidate_addr_r3 + .align 2 + .global invalidate_addr_r4 + .type invalidate_addr_r4, %function +invalidate_addr_r4: + stmia fp, {r0, r1, r2, r3, r12, lr} + lsr r0, r4, #12 + b invalidate_addr_call + .size invalidate_addr_r4, .-invalidate_addr_r4 + .align 2 + .global invalidate_addr_r5 + .type invalidate_addr_r5, %function +invalidate_addr_r5: + stmia fp, {r0, r1, r2, r3, r12, lr} + lsr r0, r5, #12 + b invalidate_addr_call + .size invalidate_addr_r5, .-invalidate_addr_r5 + .align 2 + .global invalidate_addr_r6 + .type invalidate_addr_r6, %function +invalidate_addr_r6: + stmia fp, {r0, r1, r2, r3, r12, lr} + lsr r0, r6, #12 + b invalidate_addr_call + .size invalidate_addr_r6, .-invalidate_addr_r6 + .align 2 + .global invalidate_addr_r7 + .type invalidate_addr_r7, %function +invalidate_addr_r7: + stmia fp, {r0, r1, r2, r3, r12, lr} + lsr r0, r7, #12 + b invalidate_addr_call + .size invalidate_addr_r7, .-invalidate_addr_r7 + .align 2 + .global invalidate_addr_r8 + .type invalidate_addr_r8, %function +invalidate_addr_r8: + stmia fp, {r0, r1, r2, r3, r12, lr} + lsr r0, r8, #12 + b invalidate_addr_call + .size invalidate_addr_r8, .-invalidate_addr_r8 + .align 2 + .global invalidate_addr_r9 + .type invalidate_addr_r9, %function +invalidate_addr_r9: + stmia fp, {r0, r1, r2, r3, r12, lr} + lsr r0, r9, #12 + b invalidate_addr_call + .size invalidate_addr_r9, .-invalidate_addr_r9 + .align 2 + .global invalidate_addr_r10 + .type invalidate_addr_r10, %function +invalidate_addr_r10: + stmia fp, {r0, r1, r2, r3, r12, lr} + lsr r0, r10, #12 + b invalidate_addr_call + .size invalidate_addr_r10, .-invalidate_addr_r10 + .align 2 + .global invalidate_addr_r12 + .type invalidate_addr_r12, %function +invalidate_addr_r12: + stmia fp, {r0, r1, r2, r3, r12, lr} + lsr r0, r12, #12 + .size invalidate_addr_r12, .-invalidate_addr_r12 + .align 2 + .global invalidate_addr_call + .type invalidate_addr_call, %function +invalidate_addr_call: + bl invalidate_block + ldmia fp, {r0, r1, r2, r3, r12, pc} + .size invalidate_addr_call, .-invalidate_addr_call .align 2 .global new_dyna_start @@ -771,12 +809,7 @@ jump_eret: new_dyna_start: /* ip is stored to conform EABI alignment */ stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} -.if HAVE_ARMV7 - movw fp, #:lower16:dynarec_local - movt fp, #:upper16:dynarec_local -.else - ldr fp, .dlptr -.endif + load_var_adr fp, dynarec_local ldr r0, [fp, #pcaddr-dynarec_local] bl get_addr_ht ldr r1, [fp, #next_interupt-dynarec_local] @@ -784,8 +817,6 @@ new_dyna_start: str r1, [fp, #last_count-dynarec_local] sub r10, r10, r1 mov pc, r0 -.dlptr: - .word dynarec_local .size new_dyna_start, .-new_dyna_start /* --------------------------------------- */ @@ -803,6 +834,7 @@ new_dyna_start: .global ari_write_ram_mirror8 .global ari_write_ram_mirror16 .global ari_write_ram_mirror32 +.global ari_write_ram_mirror_ro32 .global ari_read_bios8 .global ari_read_bios16 .global ari_read_bios32 @@ -852,34 +884,34 @@ ari_read_ram_mirror32: ari_read_ram_mirror (3<<11), ldr /* invalidation is already taken care of by the caller */ -.macro ari_write_ram bic_const var op +.macro ari_write_ram bic_const var pf ldr r0, [fp, #address-dynarec_local] - ldr r1, [fp, #\var-dynarec_local] + ldr\pf r1, [fp, #\var-dynarec_local] .if \bic_const bic r0, r0, #\bic_const .endif - \op r1, [r0] + str\pf r1, [r0] mov pc, lr .endm ari_write_ram8: - ari_write_ram 0, byte, strb + ari_write_ram 0, byte, b ari_write_ram16: - ari_write_ram 1, hword, strh + ari_write_ram 1, hword, h ari_write_ram32: - ari_write_ram 3, word, str + ari_write_ram 3, word, -.macro ari_write_ram_mirror mvn_const var op +.macro ari_write_ram_mirror mvn_const var pf ldr r0, [fp, #address-dynarec_local] mvn r3, #\mvn_const - ldr r1, [fp, #\var-dynarec_local] + ldr\pf r1, [fp, #\var-dynarec_local] and r0, r3, lsr #11 ldr r2, [fp, #invc_ptr-dynarec_local] orr r0, r0, #1<<31 ldrb r2, [r2, r0, lsr #12] - \op r1, [r0] + str\pf r1, [r0] tst r2, r2 movne pc, lr lsr r0, r0, #12 @@ -887,13 +919,21 @@ ari_write_ram32: .endm ari_write_ram_mirror8: - ari_write_ram_mirror 0, byte, strb + ari_write_ram_mirror 0, byte, b ari_write_ram_mirror16: - ari_write_ram_mirror (1<<11), hword, strh + ari_write_ram_mirror (1<<11), hword, h ari_write_ram_mirror32: - ari_write_ram_mirror (3<<11), word, str + ari_write_ram_mirror (3<<11), word, + +ari_write_ram_mirror_ro32: + load_var_adr r0, pcsx_ram_is_ro + ldr r0, [r0] + tst r0, r0 + movne pc, lr + nop + b ari_write_ram_mirror32 .macro ari_read_bios_mirror bic_const op @@ -993,9 +1033,9 @@ ari_read_io32: .endif .endm -.macro ari_write_io opvl opst var mem_tab tab_shift +.macro ari_write_io pf var mem_tab tab_shift ldr r0, [fp, #address-dynarec_local] - \opvl r1, [fp, #\var-dynarec_local] + ldr\pf r1, [fp, #\var-dynarec_local] .if \tab_shift == 0 bic r0, r0, #3 .endif @@ -1015,26 +1055,52 @@ ari_read_io32: bxne r12 0: ldr r3, [fp, #psxH_ptr-dynarec_local] - \opst r1, [r2, r3] + str\pf r1, [r2, r3] mov pc, lr 1: -.if \tab_shift == 1 @ write16 cmp r2, #0x1c00 blo 0b cmp r2, #0x1e00 +.if \tab_shift != 0 ldrlo pc, [fp, #spu_writef-dynarec_local] - nop +.else + @ write32 to SPU - very rare case (is this correct?) + bhs 0b + add r2, r0, #2 + mov r3, r1, lsr #16 + push {r2,r3,lr} + mov lr, pc + ldr pc, [fp, #spu_writef-dynarec_local] + pop {r0,r1,lr} + ldr pc, [fp, #spu_writef-dynarec_local] .endif + nop b 0b .endm ari_write_io8: - ari_write_io ldrb, strb, byte, tab_write8, 2 + @ PCSX always writes to psxH, so do we for consistency + ldr r0, [fp, #address-dynarec_local] + ldr r3, [fp, #psxH_ptr-dynarec_local] + ldrb r1, [fp, #byte-dynarec_local] + bic r2, r0, #0x1f800000 + ldr r12,[fp, #tab_write8-dynarec_local] + strb r1, [r2, r3] + subs r3, r2, #0x1000 + movlo pc, lr +@ ari_write_io_old 2 + cmp r3, #0x880 + movhs pc, lr + ldr r12,[r12, r3, lsl #2] + mov r0, r1 + tst r12,r12 + bxne r12 + mov pc, lr ari_write_io16: - ari_write_io ldrh, strh, hword, tab_write16, 1 + ari_write_io h, hword, tab_write16, 1 ari_write_io32: - ari_write_io ldr, str, word, tab_write32, 0 + ari_write_io , word, tab_write32, 0 @ vim:filetype=armasm