inline/parametrize rootcounter reads
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / linkage_arm.s
index 6107e27..19c9686 100644 (file)
  *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
-.equiv HAVE_ARMV7, 1
+/* .equiv HAVE_ARMV7, 1 */
 
-.if HAVE_ARMV7
-       .cpu cortex-a8
-       .fpu vfp
-.else
-       .cpu arm9tdmi
-       .fpu softvfp
-.endif 
        .global rdram
 rdram = 0x80000000
        .global dynarec_local
@@ -47,28 +40,25 @@ rdram = 0x80000000
        .global stop
        .global invc_ptr
        .global address
-       .global readmem_dword
-       .global readmem_word
-       .global dword
-       .global word
-       .global hword
-       .global byte
        .global branch_target
        .global PC
        .global mini_ht
        .global restore_candidate
-       .global memory_map
        /* psx */
        .global psxRegs
-       .global nd_pcsx_io
+       .global mem_rtab
+       .global mem_wtab
        .global psxH_ptr
+       .global inv_code_start
+       .global inv_code_end
+       .global rcnts
 
        .bss
        .align  4
        .type   dynarec_local, %object
        .size   dynarec_local, dynarec_local_end-dynarec_local
 dynarec_local:
-       .space  dynarec_local_end-dynarec_local /*0x400630*/
+       .space  dynarec_local_end-dynarec_local
 next_interupt = dynarec_local + 64
        .type   next_interupt, %object
        .size   next_interupt, 4
@@ -90,29 +80,7 @@ invc_ptr = stop + 4
 address = invc_ptr + 4
        .type   address, %object
        .size   address, 4
-readmem_dword = address + 4
-readmem_word = readmem_dword
-       .type   readmem_dword, %object
-       .size   readmem_dword, 8
-dword = readmem_dword + 8
-       .type   dword, %object
-       .size   dword, 8
-word = dword + 8
-       .type   word, %object
-       .size   word, 4
-hword = word + 4
-       .type   hword, %object
-       .size   hword, 2
-byte = hword + 2
-       .type   byte, %object
-       .size   byte, 1 /* 1 byte free */
-FCR0 = hword + 4
-       .type   FCR0, %object
-       .size   FCR0, 4
-FCR31 = FCR0 + 4
-       .type   FCR31, %object
-       .size   FCR31, 4
-psxRegs = FCR31 + 4
+psxRegs = address + 4
 
 /* psxRegs */
        .type   psxRegs, %object
@@ -153,55 +121,47 @@ intCycle = interrupt + 4
        .size   intCycle, 256
 psxRegs_end = intCycle + 256
 
-/* nd_pcsx_io */
-nd_pcsx_io = psxRegs_end
-       .type   nd_pcsx_io, %object
-       .size   nd_pcsx_io, nd_pcsx_io_end-nd_pcsx_io
-tab_read8 = nd_pcsx_io
-       .type   tab_read8, %object
-       .size   tab_read8, 4
-tab_read16 = tab_read8 + 4
-       .type   tab_read16, %object
-       .size   tab_read16, 4
-tab_read32 = tab_read16 + 4
-       .type   tab_read32, %object
-       .size   tab_read32, 4
-tab_write8 = tab_read32 + 4
-       .type   tab_write8, %object
-       .size   tab_write8, 4
-tab_write16 = tab_write8 + 4
-       .type   tab_write16, %object
-       .size   tab_write16, 4
-tab_write32 = tab_write16 + 4
-       .type   tab_write32, %object
-       .size   tab_write32, 4
-spu_readf = tab_write32 + 4
-       .type   spu_readf, %object
-       .size   spu_readf, 4
-spu_writef = spu_readf + 4
-       .type   spu_writef, %object
-       .size   spu_writef, 4
-nd_pcsx_io_end = spu_writef + 4
-
-psxH_ptr = nd_pcsx_io_end
+rcnts = psxRegs_end
+       .type   rcnts, %object
+       .size   rcnts, 7*4*4
+rcnts_end = rcnts + 7*4*4
+
+mem_rtab = rcnts_end
+       .type   mem_rtab, %object
+       .size   mem_rtab, 4
+mem_wtab = mem_rtab + 4
+       .type   mem_wtab, %object
+       .size   mem_wtab, 4
+psxH_ptr = mem_wtab + 4
        .type   psxH_ptr, %object
        .size   psxH_ptr, 4
-align0 = psxH_ptr + 4 /* just for alignment */
-       .type   align0, %object
-       .size   align0, 4
-branch_target = align0 + 4
+inv_code_start = psxH_ptr + 4
+       .type   inv_code_start, %object
+       .size   inv_code_start, 4
+inv_code_end = inv_code_start + 4
+       .type   inv_code_end, %object
+       .size   inv_code_end, 4
+branch_target = inv_code_end + 4
        .type   branch_target, %object
        .size   branch_target, 4
-mini_ht = branch_target + 4
+align0 = branch_target + 4 /* unused/alignment */
+       .type   align0, %object
+       .size   align0, 4
+mini_ht = align0 + 4
        .type   mini_ht, %object
        .size   mini_ht, 256
 restore_candidate = mini_ht + 256
        .type   restore_candidate, %object
        .size   restore_candidate, 512
-memory_map = restore_candidate + 512
-       .type   memory_map, %object
-       .size   memory_map, 4194304
-dynarec_local_end = memory_map + 4194304
+dynarec_local_end = restore_candidate + 512
+
+/* unused */
+FCR0 = align0
+       .type   FCR0, %object
+       .size   FCR0, 4
+FCR31 = align0
+       .type   FCR31, %object
+       .size   FCR31, 4
 
 .macro load_var_adr reg var
 .if HAVE_ARMV7
@@ -212,52 +172,77 @@ dynarec_local_end = memory_map + 4194304
 .endif
 .endm
 
-       .text
-       .align  2
-       .global dyna_linker
-       .type   dyna_linker, %function
-dyna_linker:
+.macro mov_16 reg imm
+.if HAVE_ARMV7
+       movw    \reg, #\imm
+.else
+       mov     \reg, #(\imm & 0x00ff)
+       orr     \reg, #(\imm & 0xff00)
+.endif
+.endm
+
+.macro mov_24 reg imm
+.if HAVE_ARMV7
+       movw    \reg, #(\imm & 0xffff)
+       movt    \reg, #(\imm >> 16)
+.else
+       mov     \reg, #(\imm & 0x0000ff)
+       orr     \reg, #(\imm & 0x00ff00)
+       orr     \reg, #(\imm & 0xff0000)
+.endif
+.endm
+
+.macro dyna_linker_main
        /* r0 = virtual target address */
        /* r1 = instruction to patch */
-       mov     r12, r0
-       mov     r6, #4096
-       mov     r2, #0x80000
        ldr     r3, .jiptr
+       /* get_page */
+       lsr     r2, r0, #12
+       mov     r6, #4096
+       bic     r2, r2, #0xe0000
        sub     r6, r6, #1
+       cmp     r2, #0x1000
        ldr     r7, [r1]
-       eor     r2, r2, r12, lsr #12
-       and     r6, r6, r12, lsr #12
+       biclt   r2, #0x0e00
+       and     r6, r6, r2
        cmp     r2, #2048
        add     r12, r7, #2
        orrcs   r2, r6, #2048
        ldr     r5, [r3, r2, lsl #2]
        lsl     r12, r12, #8
+       add     r6, r1, r12, asr #6
+       mov     r8, #0
        /* jump_in lookup */
-.A1:
+1:
        movs    r4, r5
-       beq     .A3
+       beq     2f
        ldr     r3, [r5]
        ldr     r5, [r4, #12]
        teq     r3, r0
-       bne     .A1
+       bne     1b
        ldr     r3, [r4, #4]
        ldr     r4, [r4, #8]
        tst     r3, r3
-       bne     .A1
-.A2:
-       mov     r5, r1
-       add     r1, r1, r12, asr #6
-       teq     r1, r4
+       bne     1b
+       teq     r4, r6
        moveq   pc, r4 /* Stale i-cache */
+       mov     r8, r4
+       b       1b     /* jump_in may have dupes, continue search */
+2:
+       tst     r8, r8
+       beq     3f     /* r0 not in jump_in */
+
+       mov     r5, r1
+       mov     r1, r6
        bl      add_link
-       sub     r2, r4, r5
+       sub     r2, r8, r5
        and     r1, r7, #0xff000000
        lsl     r2, r2, #6
        sub     r1, r1, #2
        add     r1, r1, r2, lsr #8
        str     r1, [r5]
-       mov     pc, r4
-.A3:
+       mov     pc, r8
+3:
        /* hash_table lookup */
        cmp     r2, #2048
        ldr     r3, .jdptr
@@ -275,14 +260,14 @@ dyna_linker:
        teq     r7, r0
        ldreq   pc, [r6, #12]
        /* jump_dirty lookup */
-.A6:
+6:
        movs    r4, r5
-       beq     .A8
+       beq     8f
        ldr     r3, [r5]
        ldr     r5, [r4, #12]
        teq     r3, r0
-       bne     .A6
-.A7:
+       bne     6b
+7:
        ldr     r1, [r4, #8]
        /* hash_table insert */
        ldr     r2, [r6]
@@ -292,7 +277,18 @@ dyna_linker:
        str     r2, [r6, #8]
        str     r3, [r6, #12]
        mov     pc, r1
-.A8:
+8:
+.endm
+
+       .text
+       .align  2
+       .global dyna_linker
+       .type   dyna_linker, %function
+dyna_linker:
+       /* r0 = virtual target address */
+       /* r1 = instruction to patch */
+       dyna_linker_main
+
        mov     r4, r0
        mov     r5, r1
        bl      new_recompile_block
@@ -337,80 +333,8 @@ exec_pagefault:
 dyna_linker_ds:
        /* r0 = virtual target address */
        /* r1 = instruction to patch */
-       mov     r12, r0
-       mov     r6, #4096
-       mov     r2, #0x80000
-       ldr     r3, .jiptr
-       sub     r6, r6, #1
-       ldr     r7, [r1]
-       eor     r2, r2, r12, lsr #12
-       and     r6, r6, r12, lsr #12
-       cmp     r2, #2048
-       add     r12, r7, #2
-       orrcs   r2, r6, #2048
-       ldr     r5, [r3, r2, lsl #2]
-       lsl     r12, r12, #8
-       /* jump_in lookup */
-.B1:
-       movs    r4, r5
-       beq     .B3
-       ldr     r3, [r5]
-       ldr     r5, [r4, #12]
-       teq     r3, r0
-       bne     .B1
-       ldr     r3, [r4, #4]
-       ldr     r4, [r4, #8]
-       tst     r3, r3
-       bne     .B1
-.B2:
-       mov     r5, r1
-       add     r1, r1, r12, asr #6
-       teq     r1, r4
-       moveq   pc, r4 /* Stale i-cache */
-       bl      add_link
-       sub     r2, r4, r5
-       and     r1, r7, #0xff000000
-       lsl     r2, r2, #6
-       sub     r1, r1, #2
-       add     r1, r1, r2, lsr #8
-       str     r1, [r5]
-       mov     pc, r4
-.B3:
-       /* hash_table lookup */
-       cmp     r2, #2048
-       ldr     r3, .jdptr
-       eor     r4, r0, r0, lsl #16
-       lslcc   r2, r0, #9
-       ldr     r6, .htptr
-       lsr     r4, r4, #12
-       lsrcc   r2, r2, #21
-       bic     r4, r4, #15
-       ldr     r5, [r3, r2, lsl #2]
-       ldr     r7, [r6, r4]!
-       teq     r7, r0
-       ldreq   pc, [r6, #4]
-       ldr     r7, [r6, #8]
-       teq     r7, r0
-       ldreq   pc, [r6, #12]
-       /* jump_dirty lookup */
-.B6:
-       movs    r4, r5
-       beq     .B8
-       ldr     r3, [r5]
-       ldr     r5, [r4, #12]
-       teq     r3, r0
-       bne     .B6
-.B7:
-       ldr     r1, [r4, #8]
-       /* hash_table insert */
-       ldr     r2, [r6]
-       ldr     r3, [r6, #4]
-       str     r0, [r6]
-       str     r1, [r6, #4]
-       str     r2, [r6, #8]
-       str     r3, [r6, #12]
-       mov     pc, r1
-.B8:
+       dyna_linker_main
+
        mov     r4, r0
        bic     r0, r0, #7
        mov     r5, r1
@@ -740,28 +664,11 @@ new_dyna_leave:
        ldmfd   sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc}
        .size   new_dyna_leave, .-new_dyna_leave
 
-       /* these are used to call memhandlers */
-       .align  2
-       .global indirect_jump_indexed
-       .type   indirect_jump_indexed, %function
-indirect_jump_indexed:
-       ldr     r0, [r0, r1, lsl #2]
-       .global indirect_jump
-       .type   indirect_jump, %function
-indirect_jump:
-       ldr     r12, [fp, #last_count-dynarec_local]
-       add     r2, r2, r12 
-       str     r2, [fp, #cycle-dynarec_local]
-       mov     pc, r0
-       .size   indirect_jump, .-indirect_jump
-       .size   indirect_jump_indexed, .-indirect_jump_indexed
-
        .align  2
        .global invalidate_addr_r0
        .type   invalidate_addr_r0, %function
 invalidate_addr_r0:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r0, #12     
        b       invalidate_addr_call
        .size   invalidate_addr_r0, .-invalidate_addr_r0
        .align  2
@@ -769,7 +676,7 @@ invalidate_addr_r0:
        .type   invalidate_addr_r1, %function
 invalidate_addr_r1:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r1, #12     
+       mov     r0, r1
        b       invalidate_addr_call
        .size   invalidate_addr_r1, .-invalidate_addr_r1
        .align  2
@@ -777,7 +684,7 @@ invalidate_addr_r1:
        .type   invalidate_addr_r2, %function
 invalidate_addr_r2:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r2, #12     
+       mov     r0, r2
        b       invalidate_addr_call
        .size   invalidate_addr_r2, .-invalidate_addr_r2
        .align  2
@@ -785,7 +692,7 @@ invalidate_addr_r2:
        .type   invalidate_addr_r3, %function
 invalidate_addr_r3:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r3, #12     
+       mov     r0, r3
        b       invalidate_addr_call
        .size   invalidate_addr_r3, .-invalidate_addr_r3
        .align  2
@@ -793,7 +700,7 @@ invalidate_addr_r3:
        .type   invalidate_addr_r4, %function
 invalidate_addr_r4:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r4, #12     
+       mov     r0, r4
        b       invalidate_addr_call
        .size   invalidate_addr_r4, .-invalidate_addr_r4
        .align  2
@@ -801,7 +708,7 @@ invalidate_addr_r4:
        .type   invalidate_addr_r5, %function
 invalidate_addr_r5:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r5, #12     
+       mov     r0, r5
        b       invalidate_addr_call
        .size   invalidate_addr_r5, .-invalidate_addr_r5
        .align  2
@@ -809,7 +716,7 @@ invalidate_addr_r5:
        .type   invalidate_addr_r6, %function
 invalidate_addr_r6:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r6, #12     
+       mov     r0, r6
        b       invalidate_addr_call
        .size   invalidate_addr_r6, .-invalidate_addr_r6
        .align  2
@@ -817,7 +724,7 @@ invalidate_addr_r6:
        .type   invalidate_addr_r7, %function
 invalidate_addr_r7:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r7, #12     
+       mov     r0, r7
        b       invalidate_addr_call
        .size   invalidate_addr_r7, .-invalidate_addr_r7
        .align  2
@@ -825,7 +732,7 @@ invalidate_addr_r7:
        .type   invalidate_addr_r8, %function
 invalidate_addr_r8:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r8, #12     
+       mov     r0, r8
        b       invalidate_addr_call
        .size   invalidate_addr_r8, .-invalidate_addr_r8
        .align  2
@@ -833,7 +740,7 @@ invalidate_addr_r8:
        .type   invalidate_addr_r9, %function
 invalidate_addr_r9:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r9, #12     
+       mov     r0, r9
        b       invalidate_addr_call
        .size   invalidate_addr_r9, .-invalidate_addr_r9
        .align  2
@@ -841,7 +748,7 @@ invalidate_addr_r9:
        .type   invalidate_addr_r10, %function
 invalidate_addr_r10:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r10, #12    
+       mov     r0, r10
        b       invalidate_addr_call
        .size   invalidate_addr_r10, .-invalidate_addr_r10
        .align  2
@@ -849,13 +756,17 @@ invalidate_addr_r10:
        .type   invalidate_addr_r12, %function
 invalidate_addr_r12:
        stmia   fp, {r0, r1, r2, r3, r12, lr}
-       lsr     r0, r12, #12    
+       mov     r0, r12
        .size   invalidate_addr_r12, .-invalidate_addr_r12
        .align  2
        .global invalidate_addr_call
        .type   invalidate_addr_call, %function
 invalidate_addr_call:
-       bl      invalidate_block
+       ldr     r12, [fp, #inv_code_start-dynarec_local]
+       ldr     lr, [fp, #inv_code_end-dynarec_local]
+       cmp     r0, r12
+       cmpcs   lr, r0
+       blcc    invalidate_addr
        ldmia   fp, {r0, r1, r2, r3, r12, pc}
        .size   invalidate_addr_call, .-invalidate_addr_call
 
@@ -878,285 +789,212 @@ new_dyna_start:
 /* --------------------------------------- */
 
 .align 2
-.global        ari_read_ram8
-.global        ari_read_ram16
-.global        ari_read_ram32
-.global        ari_read_ram_mirror8
-.global        ari_read_ram_mirror16
-.global        ari_read_ram_mirror32
-.global        ari_write_ram8
-.global        ari_write_ram16
-.global        ari_write_ram32
-.global        ari_write_ram_mirror8
-.global        ari_write_ram_mirror16
-.global        ari_write_ram_mirror32
-.global        ari_write_ram_mirror_ro32
-.global        ari_read_bios8
-.global        ari_read_bios16
-.global        ari_read_bios32
-.global        ari_read_io8
-.global        ari_read_io16
-.global        ari_read_io32
-.global        ari_write_io8
-.global        ari_write_io16
-.global        ari_write_io32
-
-.macro ari_read_ram bic_const op
-       ldr     r0, [fp, #address-dynarec_local]
-.if \bic_const
-       bic     r0, r0, #\bic_const
+.global        jump_handler_read8
+.global        jump_handler_read16
+.global        jump_handler_read32
+.global        jump_handler_write8
+.global        jump_handler_write16
+.global        jump_handler_write32
+.global        jump_handler_write_h
+.global jump_handle_swl
+.global jump_handle_swr
+.global rcnt0_read_count_m0
+.global rcnt0_read_count_m1
+.global rcnt1_read_count_m0
+.global rcnt1_read_count_m1
+.global rcnt2_read_count_m0
+.global rcnt2_read_count_m1
+
+
+.macro pcsx_read_mem readop tab_shift
+       /* r0 = address, r1 = handler_tab, r2 = cycles */
+       lsl     r3, r0, #20
+       lsr     r3, #(20+\tab_shift)
+       ldr     r12, [fp, #last_count-dynarec_local]
+       ldr     r1, [r1, r3, lsl #2]
+       add     r2, r2, r12
+       lsls    r1, #1
+.if \tab_shift == 1
+       lsl     r3, #1
+       \readop r0, [r1, r3]
+.else
+       \readop r0, [r1, r3, lsl #\tab_shift]
 .endif
-       \op     r0, [r0]
-       str     r0, [fp, #readmem_dword-dynarec_local]
-       mov     pc, lr
+       movcc   pc, lr
+       str     r2, [fp, #cycle-dynarec_local]
+       bx      r1
 .endm
 
-ari_read_ram8:
-       ari_read_ram 0, ldrb
-
-ari_read_ram16:
-       ari_read_ram 1, ldrh
-
-ari_read_ram32:
-       ari_read_ram 3, ldr
-
-.macro ari_read_ram_mirror mvn_const, op
-       ldr     r0, [fp, #address-dynarec_local]
-       mvn     r1, #\mvn_const
-       and     r0, r1, lsr #11
-       orr     r0, r0, #1<<31
-       \op     r0, [r0]
-       str     r0, [fp, #readmem_dword-dynarec_local]
-       mov     pc, lr
-.endm
+jump_handler_read8:
+       add     r1, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part
+       pcsx_read_mem ldrccb, 0
 
-ari_read_ram_mirror8:
-       ari_read_ram_mirror 0, ldrb
+jump_handler_read16:
+       add     r1, #0x1000/4*4              @ shift to r16 part
+       pcsx_read_mem ldrcch, 1
 
-ari_read_ram_mirror16:
-       ari_read_ram_mirror (1<<11), ldrh
+jump_handler_read32:
+       pcsx_read_mem ldrcc, 2
 
-ari_read_ram_mirror32:
-       ari_read_ram_mirror (3<<11), ldr
 
-/* invalidation is already taken care of by the caller */
-.macro ari_write_ram bic_const var pf
-       ldr     r0, [fp, #address-dynarec_local]
-       ldr\pf  r1, [fp, #\var-dynarec_local]
-.if \bic_const
-       bic     r0, r0, #\bic_const
+.macro pcsx_write_mem wrtop tab_shift
+       /* r0 = address, r1 = data, r2 = cycles, r3 = handler_tab */
+       lsl     r12,r0, #20
+       lsr     r12, #(20+\tab_shift)
+       ldr     r3, [r3, r12, lsl #2]
+       str     r0, [fp, #address-dynarec_local]      @ some handlers still need it..
+       lsls    r3, #1
+       mov     r0, r2                                @ cycle return in case of direct store
+.if \tab_shift == 1
+       lsl     r12, #1
+       \wrtop  r1, [r3, r12]
+.else
+       \wrtop  r1, [r3, r12, lsl #\tab_shift]
 .endif
-       str\pf  r1, [r0]
-       mov     pc, lr
-.endm
-
-ari_write_ram8:
-       ari_write_ram 0, byte, b
-
-ari_write_ram16:
-       ari_write_ram 1, hword, h
-
-ari_write_ram32:
-       ari_write_ram 3, word,
-
-.macro ari_write_ram_mirror mvn_const var pf
-       ldr     r0, [fp, #address-dynarec_local]
-       mvn     r3, #\mvn_const
-       ldr\pf  r1, [fp, #\var-dynarec_local]
-       and     r0, r3, lsr #11
-       ldr     r2, [fp, #invc_ptr-dynarec_local]
-       orr     r0, r0, #1<<31
-       ldrb    r2, [r2, r0, lsr #12]
-       str\pf  r1, [r0]
-       tst     r2, r2
-       movne   pc, lr
-       lsr     r0, r0, #12
-       b       invalidate_block
-.endm
-
-ari_write_ram_mirror8:
-       ari_write_ram_mirror 0, byte, b
-
-ari_write_ram_mirror16:
-       ari_write_ram_mirror (1<<11), hword, h
-
-ari_write_ram_mirror32:
-       ari_write_ram_mirror (3<<11), word,
-
-ari_write_ram_mirror_ro32:
-       load_var_adr r0, pcsx_ram_is_ro
-       ldr     r0, [r0]
-       tst     r0, r0
-       movne   pc, lr
-       nop
-       b       ari_write_ram_mirror32
-
+       movcc   pc, lr
+       ldr     r12, [fp, #last_count-dynarec_local]
+       mov     r0, r1
+       add     r2, r2, r12
+       push    {r2, lr}
+       str     r2, [fp, #cycle-dynarec_local]
+       blx     r3
 
-.macro ari_read_bios_mirror bic_const op
-       ldr     r0, [fp, #address-dynarec_local]
-       orr     r0, r0, #0x80000000
-       bic     r0, r0, #(0x20000000|\bic_const)        @ map to 0x9fc...
-       \op     r0, [r0]
-       str     r0, [fp, #readmem_dword-dynarec_local]
-       mov     pc, lr
+       ldr     r0, [fp, #next_interupt-dynarec_local]
+       pop     {r2, r3}
+       str     r0, [fp, #last_count-dynarec_local]
+       sub     r0, r2, r0
+       bx      r3
 .endm
 
-ari_read_bios8:
-       ari_read_bios_mirror 0, ldrb
-
-ari_read_bios16:
-       ari_read_bios_mirror 1, ldrh
+jump_handler_write8:
+       add     r3, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part
+       pcsx_write_mem strccb, 0
 
-ari_read_bios32:
-       ari_read_bios_mirror 3, ldr
+jump_handler_write16:
+       add     r3, #0x1000/4*4              @ shift to r16 part
+       pcsx_write_mem strcch, 1
 
+jump_handler_write32:
+       pcsx_write_mem strcc, 2
 
-@ for testing
-.macro ari_read_io_old tab_shift
-       str     lr, [sp, #-8]! @ EABI alignment..
-.if \tab_shift == 0
-       bl      psxHwRead32
-.endif
-.if \tab_shift == 1
-       bl      psxHwRead16
-.endif
-.if \tab_shift == 2
-       bl      psxHwRead8
-.endif
-       str     r0, [fp, #readmem_dword-dynarec_local]
-       ldr     pc, [sp], #8
-.endm
+jump_handler_write_h:
+       /* r0 = address, r1 = data, r2 = cycles, r3 = handler */
+       ldr     r12, [fp, #last_count-dynarec_local]
+       str     r0, [fp, #address-dynarec_local]      @ some handlers still need it..
+       add     r2, r2, r12
+       mov     r0, r1
+       push    {r2, lr}
+       str     r2, [fp, #cycle-dynarec_local]
+       blx     r3
 
-.macro ari_read_io readop mem_tab tab_shift
-       ldr     r0, [fp, #address-dynarec_local]
-       ldr     r1, [fp, #psxH_ptr-dynarec_local]
-.if \tab_shift == 0
-       bic     r0, r0, #3
-.endif
-.if \tab_shift == 1
-       bic     r0, r0, #1
-.endif
-       bic     r2, r0, #0x1f800000
-       ldr     r12,[fp, #\mem_tab-dynarec_local]
-       subs    r3, r2, #0x1000
-       blo     2f
-@      ari_read_io_old \tab_shift
-       cmp     r3, #0x880
-       bhs     1f
-       ldr     r12,[r12, r3, lsl #\tab_shift]
-       tst     r12,r12
+       ldr     r0, [fp, #next_interupt-dynarec_local]
+       pop     {r2, r3}
+       str     r0, [fp, #last_count-dynarec_local]
+       sub     r0, r2, r0
+       bx      r3
+
+jump_handle_swl:
+       /* r0 = address, r1 = data, r2 = cycles */
+       ldr     r3, [fp, #mem_wtab-dynarec_local]
+       mov     r12,r0,lsr #12
+       ldr     r3, [r3, r12, lsl #2]
+       lsls    r3, #1
+       bcs     4f
+       add     r3, r0, r3
+       mov     r0, r2
+       tst     r3, #2
+       beq     101f
+       tst     r3, #1
        beq     2f
-0:
-       str     lr, [sp, #-8]! @ EABI alignment..
-       blx     r12
-       str     r0, [fp, #readmem_dword-dynarec_local]
-       ldr     pc, [sp], #8
-
-1:
-.if \tab_shift == 1 @ read16
-       cmp     r2, #0x1c00
-       blo     2f
-       cmp     r2, #0x1e00
-       bhs     2f
-       ldr     r12,[fp, #spu_readf-dynarec_local]
-       b       0b
-.endif
+3:
+       str     r1, [r3, #-3]
+       bx      lr
 2:
-       @ no handler, just read psxH
-       \readop r0, [r1, r2]
-       str     r0, [fp, #readmem_dword-dynarec_local]
-       mov     pc, lr
-.endm
-
-ari_read_io8:
-       ari_read_io ldrb, tab_read8, 2
-
-ari_read_io16:
-       ari_read_io ldrh, tab_read16, 1
+       lsr     r2, r1, #8
+       lsr     r1, #24
+       strh    r2, [r3, #-2]
+       strb    r1, [r3]
+       bx      lr
+101:
+       tst     r3, #1
+       lsrne   r1, #16         @ 1
+       lsreq   r12, r1, #24    @ 0
+       strneh  r1, [r3, #-1]
+       streqb  r12, [r3]
+       bx      lr
+4:
+       mov     r0, r2
+@      b       abort
+       bx      lr              @ TODO?
+
+
+jump_handle_swr:
+       /* r0 = address, r1 = data, r2 = cycles */
+       ldr     r3, [fp, #mem_wtab-dynarec_local]
+       mov     r12,r0,lsr #12
+       ldr     r3, [r3, r12, lsl #2]
+       lsls    r3, #1
+       bcs     4f
+       add     r3, r0, r3
+       and     r12,r3, #3
+       mov     r0, r2
+       cmp     r12,#2
+       strgtb  r1, [r3]        @ 3
+       streqh  r1, [r3]        @ 2
+       cmp     r12,#1
+       strlt   r1, [r3]        @ 0
+       bxne    lr
+       lsr     r2, r1, #8      @ 1
+       strb    r1, [r3]
+       strh    r2, [r3, #1]
+       bx      lr
+4:
+       mov     r0, r2
+@      b       abort
+       bx      lr              @ TODO?
 
-ari_read_io32:
-       ari_read_io ldr, tab_read32, 0
 
-.macro ari_write_io_old tab_shift
-.if \tab_shift == 0
-       b       psxHwWrite32
-.endif
-.if \tab_shift == 1
-       b       psxHwWrite16
-.endif
-.if \tab_shift == 2
-       b       psxHwWrite8
-.endif
+.macro rcntx_read_mode0 num
+       /* r0 = address, r2 = cycles */
+       ldr     r3, [fp, #rcnts-dynarec_local+6*4+7*4*\num] @ cycleStart
+       mov     r0, r2, lsl #16
+       sub     r0, r3, lsl #16
+       lsr     r0, #16
+       bx      lr
 .endm
 
-.macro ari_write_io pf var mem_tab tab_shift
-       ldr     r0, [fp, #address-dynarec_local]
-       ldr\pf  r1, [fp, #\var-dynarec_local]
-.if \tab_shift == 0
-       bic     r0, r0, #3
-.endif
-.if \tab_shift == 1
-       bic     r0, r0, #1
-.endif
-       bic     r2, r0, #0x1f800000
-       ldr     r12,[fp, #\mem_tab-dynarec_local]
-       subs    r3, r2, #0x1000
-       blo     0f
-@      ari_write_io_old \tab_shift
-       cmp     r3, #0x880
-       bhs     1f
-       ldr     r12,[r12, r3, lsl #\tab_shift]
-       mov     r0, r1
-       tst     r12,r12
-       bxne    r12
-0:
-       ldr     r3, [fp, #psxH_ptr-dynarec_local]
-       str\pf  r1, [r2, r3]
-       mov     pc, lr
-1:
-       cmp     r2, #0x1c00
-       blo     0b
-       cmp     r2, #0x1e00
-.if \tab_shift != 0
-       ldrlo   pc, [fp, #spu_writef-dynarec_local]
-.else
-       @ write32 to SPU - very rare case (is this correct?)
-       bhs     0b
-       add     r2, r0, #2
-       mov     r3, r1, lsr #16
-       push    {r2,r3,lr}
-       mov     lr, pc
-       ldr     pc, [fp, #spu_writef-dynarec_local]
-       pop     {r0,r1,lr}
-       ldr     pc, [fp, #spu_writef-dynarec_local]
-.endif
-       nop
-       b       0b
-.endm
-
-ari_write_io8:
-       @ PCSX always writes to psxH, so do we for consistency
-       ldr     r0, [fp, #address-dynarec_local]
-       ldr     r3, [fp, #psxH_ptr-dynarec_local]
-       ldrb    r1, [fp, #byte-dynarec_local]
-       bic     r2, r0, #0x1f800000
-       ldr     r12,[fp, #tab_write8-dynarec_local]
-       strb    r1, [r2, r3]
-       subs    r3, r2, #0x1000
-       movlo   pc, lr
-@      ari_write_io_old 2
-       cmp     r3, #0x880
-       movhs   pc, lr
-       ldr     r12,[r12, r3, lsl #2]
-       mov     r0, r1
-       tst     r12,r12
-       bxne    r12
-       mov     pc, lr
-
-ari_write_io16:
-       ari_write_io h, hword, tab_write16, 1
-
-ari_write_io32:
-       ari_write_io , word, tab_write32, 0
+rcnt0_read_count_m0:
+       rcntx_read_mode0 0
+
+rcnt1_read_count_m0:
+       rcntx_read_mode0 1
+
+rcnt2_read_count_m0:
+       rcntx_read_mode0 2
+
+rcnt0_read_count_m1:
+       /* r0 = address, r2 = cycles */
+       ldr     r3, [fp, #rcnts-dynarec_local+6*4+7*4*0] @ cycleStart
+       mov_16  r1, 0x3334
+       sub     r2, r2, r3
+       mul     r0, r1, r2              @ /= 5
+       lsr     r0, #16
+       bx      lr
+
+rcnt1_read_count_m1:
+       /* r0 = address, r2 = cycles */
+       ldr     r3, [fp, #rcnts-dynarec_local+6*4+7*4*1]
+       mov_24  r1, 0x1e6cde
+       sub     r2, r2, r3
+       umull   r3, r0, r1, r2          @ ~ /= hsync_cycles, max ~0x1e6cdd
+       bx      lr
+
+rcnt2_read_count_m1:
+       /* r0 = address, r2 = cycles */
+       ldr     r3, [fp, #rcnts-dynarec_local+6*4+7*4*2]
+       mov     r0, r2, lsl #16-3
+       sub     r0, r3, lsl #16-3
+       lsr     r0, #16                 @ /= 8
+       bx      lr
 
 @ vim:filetype=armasm