drc: implement memory access speculation
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / linkage_arm.s
index ac4929f..c1bc8c9 100644 (file)
  *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
-.equiv HAVE_ARMV7, 1
+/* .equiv HAVE_ARMV7, 1 */
 
-.if HAVE_ARMV7
-       .cpu cortex-a8
-       .fpu vfp
-.else
-       .cpu arm9tdmi
-       .fpu softvfp
-.endif 
        .global rdram
 rdram = 0x80000000
        .global dynarec_local
@@ -60,8 +53,12 @@ rdram = 0x80000000
        .global memory_map
        /* psx */
        .global psxRegs
+       .global mem_rtab
+       .global mem_wtab
        .global nd_pcsx_io
        .global psxH_ptr
+       .global inv_code_start
+       .global inv_code_end
 
        .bss
        .align  4
@@ -153,8 +150,15 @@ intCycle = interrupt + 4
        .size   intCycle, 256
 psxRegs_end = intCycle + 256
 
+mem_rtab = psxRegs_end
+       .type   mem_rtab, %object
+       .size   mem_rtab, 4
+mem_wtab = mem_rtab + 4
+       .type   mem_wtab, %object
+       .size   mem_wtab, 4
+
 /* nd_pcsx_io */
-nd_pcsx_io = psxRegs_end
+nd_pcsx_io = mem_wtab + 4
        .type   nd_pcsx_io, %object
        .size   nd_pcsx_io, nd_pcsx_io_end-nd_pcsx_io
 tab_read8 = nd_pcsx_io
@@ -186,7 +190,13 @@ nd_pcsx_io_end = spu_writef + 4
 psxH_ptr = nd_pcsx_io_end
        .type   psxH_ptr, %object
        .size   psxH_ptr, 4
-align0 = psxH_ptr + 4 /* just for alignment */
+inv_code_start = psxH_ptr + 4
+       .type   inv_code_start, %object
+       .size   inv_code_start, 4
+inv_code_end = inv_code_start + 4
+       .type   inv_code_end, %object
+       .size   inv_code_end, 4
+align0 = inv_code_end + 4 /* just for alignment */
        .type   align0, %object
        .size   align0, 4
 branch_target = align0 + 4
@@ -705,7 +715,6 @@ indirect_jump:
        .type   invalidate_addr_r0, %function
 invalidate_addr_r0:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r0, #12     
        b       invalidate_addr_call
        .size   invalidate_addr_r0, .-invalidate_addr_r0
        .align  2
@@ -713,7 +722,7 @@ invalidate_addr_r0:
        .type   invalidate_addr_r1, %function
 invalidate_addr_r1:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r1, #12     
+       mov     r0, r1
        b       invalidate_addr_call
        .size   invalidate_addr_r1, .-invalidate_addr_r1
        .align  2
@@ -721,7 +730,7 @@ invalidate_addr_r1:
        .type   invalidate_addr_r2, %function
 invalidate_addr_r2:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r2, #12     
+       mov     r0, r2
        b       invalidate_addr_call
        .size   invalidate_addr_r2, .-invalidate_addr_r2
        .align  2
@@ -729,7 +738,7 @@ invalidate_addr_r2:
        .type   invalidate_addr_r3, %function
 invalidate_addr_r3:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r3, #12     
+       mov     r0, r3
        b       invalidate_addr_call
        .size   invalidate_addr_r3, .-invalidate_addr_r3
        .align  2
@@ -737,7 +746,7 @@ invalidate_addr_r3:
        .type   invalidate_addr_r4, %function
 invalidate_addr_r4:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r4, #12     
+       mov     r0, r4
        b       invalidate_addr_call
        .size   invalidate_addr_r4, .-invalidate_addr_r4
        .align  2
@@ -745,7 +754,7 @@ invalidate_addr_r4:
        .type   invalidate_addr_r5, %function
 invalidate_addr_r5:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r5, #12     
+       mov     r0, r5
        b       invalidate_addr_call
        .size   invalidate_addr_r5, .-invalidate_addr_r5
        .align  2
@@ -753,7 +762,7 @@ invalidate_addr_r5:
        .type   invalidate_addr_r6, %function
 invalidate_addr_r6:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r6, #12     
+       mov     r0, r6
        b       invalidate_addr_call
        .size   invalidate_addr_r6, .-invalidate_addr_r6
        .align  2
@@ -761,7 +770,7 @@ invalidate_addr_r6:
        .type   invalidate_addr_r7, %function
 invalidate_addr_r7:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r7, #12     
+       mov     r0, r7
        b       invalidate_addr_call
        .size   invalidate_addr_r7, .-invalidate_addr_r7
        .align  2
@@ -769,7 +778,7 @@ invalidate_addr_r7:
        .type   invalidate_addr_r8, %function
 invalidate_addr_r8:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r8, #12     
+       mov     r0, r8
        b       invalidate_addr_call
        .size   invalidate_addr_r8, .-invalidate_addr_r8
        .align  2
@@ -777,7 +786,7 @@ invalidate_addr_r8:
        .type   invalidate_addr_r9, %function
 invalidate_addr_r9:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r9, #12     
+       mov     r0, r9
        b       invalidate_addr_call
        .size   invalidate_addr_r9, .-invalidate_addr_r9
        .align  2
@@ -785,7 +794,7 @@ invalidate_addr_r9:
        .type   invalidate_addr_r10, %function
 invalidate_addr_r10:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r10, #12    
+       mov     r0, r10
        b       invalidate_addr_call
        .size   invalidate_addr_r10, .-invalidate_addr_r10
        .align  2
@@ -793,13 +802,17 @@ invalidate_addr_r10:
        .type   invalidate_addr_r12, %function
 invalidate_addr_r12:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r12, #12    
+       mov     r0, r12
        .size   invalidate_addr_r12, .-invalidate_addr_r12
        .align  2
        .global invalidate_addr_call
        .type   invalidate_addr_call, %function
 invalidate_addr_call:
-       bl      invalidate_block
+       ldr     r12, [fp, #inv_code_start-dynarec_local]
+       ldr     lr, [fp, #inv_code_end-dynarec_local]
+       cmp     r0, r12
+       cmpcs   lr, r0
+       blcc    invalidate_addr
        ldmia   fp, {r0, r1, r2, r3, r12, pc}
        .size   invalidate_addr_call, .-invalidate_addr_call
 
@@ -844,6 +857,15 @@ new_dyna_start:
 .global        ari_write_io8
 .global        ari_write_io16
 .global        ari_write_io32
+.global        jump_handler_read8
+.global        jump_handler_read16
+.global        jump_handler_read32
+.global        jump_handler_write8
+.global        jump_handler_write16
+.global        jump_handler_write32
+.global        jump_handler_write_h
+.global jump_handle_swl
+.global jump_handle_swr
 
 .macro ari_read_ram bic_const op
        ldr     r0, [fp, #address-dynarec_local]
@@ -914,8 +936,13 @@ ari_write_ram32:
        str\pf  r1, [r0]
        tst     r2, r2
        movne   pc, lr
-       lsr     r0, r0, #12
-       b       invalidate_block
+       ldr     r1, [fp, #inv_code_start-dynarec_local]
+       ldr     r2, [fp, #inv_code_end-dynarec_local]
+       cmp     r0, r1
+       cmpcs   r2, r0
+       movcs   pc, lr
+       nop
+       b       invalidate_addr
 .endm
 
 ari_write_ram_mirror8:
@@ -1103,4 +1130,154 @@ ari_write_io16:
 ari_write_io32:
        ari_write_io , word, tab_write32, 0
 
+/* */
+
+.macro pcsx_read_mem readop tab_shift
+       /* r0 = address, r1 = handler_tab, r2 = cycles */
+       lsl     r3, r0, #20
+       lsr     r3, #(20+\tab_shift)
+       ldr     r12, [fp, #last_count-dynarec_local]
+       ldr     r1, [r1, r3, lsl #2]
+       add     r2, r2, r12
+       lsls    r1, #1
+.if \tab_shift == 1
+       lsl     r3, #1
+       \readop r0, [r1, r3]
+.else
+       \readop r0, [r1, r3, lsl #\tab_shift]
+.endif
+       movcc   pc, lr
+       str     r2, [fp, #cycle-dynarec_local]
+       bx      r1
+.endm
+
+jump_handler_read8:
+       add     r1, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part
+       pcsx_read_mem ldrccb, 0
+
+jump_handler_read16:
+       add     r1, #0x1000/4*4              @ shift to r16 part
+       pcsx_read_mem ldrcch, 1
+
+jump_handler_read32:
+       pcsx_read_mem ldrcc, 2
+
+
+.macro pcsx_write_mem wrtop tab_shift
+       /* r0 = address, r1 = data, r2 = cycles, r3 = handler_tab */
+       lsl     r12,r0, #20
+       lsr     r12, #(20+\tab_shift)
+       ldr     r3, [r3, r12, lsl #2]
+       str     r0, [fp, #address-dynarec_local]      @ some handlers still need it..
+       lsls    r3, #1
+       mov     r0, r2                                @ cycle return in case of direct store
+.if \tab_shift == 1
+       lsl     r12, #1
+       \wrtop  r1, [r3, r12]
+.else
+       \wrtop  r1, [r3, r12, lsl #\tab_shift]
+.endif
+       movcc   pc, lr
+       ldr     r12, [fp, #last_count-dynarec_local]
+       mov     r0, r1
+       add     r2, r2, r12
+       push    {r2, lr}
+       str     r2, [fp, #cycle-dynarec_local]
+       blx     r3
+
+       ldr     r0, [fp, #next_interupt-dynarec_local]
+       pop     {r2, r3}
+       str     r0, [fp, #last_count-dynarec_local]
+       sub     r0, r2, r0
+       bx      r3
+.endm
+
+jump_handler_write8:
+       add     r3, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part
+       pcsx_write_mem strccb, 0
+
+jump_handler_write16:
+       add     r3, #0x1000/4*4              @ shift to r16 part
+       pcsx_write_mem strcch, 1
+
+jump_handler_write32:
+       pcsx_write_mem strcc, 2
+
+jump_handler_write_h:
+       /* r0 = address, r1 = data, r2 = cycles, r3 = handler */
+       ldr     r12, [fp, #last_count-dynarec_local]
+       str     r0, [fp, #address-dynarec_local]      @ some handlers still need it..
+       add     r2, r2, r12
+       mov     r0, r1
+       push    {r2, lr}
+       str     r2, [fp, #cycle-dynarec_local]
+       blx     r3
+
+       ldr     r0, [fp, #next_interupt-dynarec_local]
+       pop     {r2, r3}
+       str     r0, [fp, #last_count-dynarec_local]
+       sub     r0, r2, r0
+       bx      r3
+
+jump_handle_swl:
+       /* r0 = address, r1 = data, r2 = cycles */
+       ldr     r3, [fp, #mem_wtab-dynarec_local]
+       mov     r12,r0,lsr #12
+       ldr     r3, [r3, r12, lsl #2]
+       lsls    r3, #1
+       bcs     4f
+       add     r3, r0, r3
+       mov     r0, r2
+       tst     r3, #2
+       beq     101f
+       tst     r3, #1
+       beq     2f
+3:
+       str     r1, [r3, #-3]
+       bx      lr
+2:
+       lsr     r2, r1, #8
+       lsr     r1, #24
+       strh    r2, [r3, #-2]
+       strb    r1, [r3]
+       bx      lr
+101:
+       tst     r3, #1
+       lsrne   r1, #16         @ 1
+       lsreq   r12, r1, #24    @ 0
+       strneh  r1, [r3, #-1]
+       streqb  r12, [r3]
+       bx      lr
+4:
+       mov     r0, r2
+       b       abort
+       bx      lr              @ TODO?
+
+
+jump_handle_swr:
+       /* r0 = address, r1 = data, r2 = cycles */
+       ldr     r3, [fp, #mem_wtab-dynarec_local]
+       mov     r12,r0,lsr #12
+       ldr     r3, [r3, r12, lsl #2]
+       lsls    r3, #1
+       bcs     4f
+       add     r3, r0, r3
+       and     r12,r3, #3
+       mov     r0, r2
+       cmp     r12,#2
+       strgtb  r1, [r3]        @ 3
+       streqh  r1, [r3]        @ 2
+       cmp     r12,#1
+       strlt   r1, [r3]        @ 0
+       bxne    lr
+       lsr     r2, r1, #8      @ 1
+       strb    r1, [r3]
+       strh    r2, [r3, #1]
+       bx      lr
+4:
+       mov     r0, r2
+       b       abort
+       bx      lr              @ TODO?
+
+
 @ vim:filetype=armasm