drc: merge Ari64's patch: 11_reduce_invstub_memory_usage
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / linkage_arm.s
index 5451110..57fb3d2 100644 (file)
@@ -1,6 +1,7 @@
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- *   Mupen64plus - linkage_arm.s                                           *
- *   Copyright (C) 2009-2010 Ari64                                         *
+ *   linkage_arm.s for PCSX                                                *
+ *   Copyright (C) 2009-2011 Ari64                                         *
+ *   Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas                     *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
  *   Free Software Foundation, Inc.,                                       *
  *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+.equiv HAVE_ARMV7, 1
+
+.if HAVE_ARMV7
+       .cpu cortex-a8
+       .fpu vfp
+.else
        .cpu arm9tdmi
        .fpu softvfp
-       .eabi_attribute 20, 1
-       .eabi_attribute 21, 1
-       .eabi_attribute 23, 3
-       .eabi_attribute 24, 1
-       .eabi_attribute 25, 1
-       .eabi_attribute 26, 2
-       .eabi_attribute 30, 6
-       .eabi_attribute 18, 4
-       .file   "linkage_arm.s"
+.endif 
        .global rdram
 rdram = 0x80000000
        .global dynarec_local
@@ -60,6 +60,8 @@ rdram = 0x80000000
        .global memory_map
        /* psx */
        .global psxRegs
+       .global nd_pcsx_io
+       .global psxH_ptr
 
        .bss
        .align  4
@@ -110,13 +112,14 @@ FCR0 = hword + 4
 FCR31 = FCR0 + 4
        .type   FCR31, %object
        .size   FCR31, 4
-reg = FCR31 + 4
+psxRegs = FCR31 + 4
 
 /* psxRegs */
-psxRegs = reg
+       .type   psxRegs, %object
+       .size   psxRegs, psxRegs_end-psxRegs
+reg = psxRegs
        .type   reg, %object
        .size   reg, 128
-       .size   psxRegs, psxRegs_end-psxRegs
 lo = reg + 128
        .type   lo, %object
        .size   lo, 4
@@ -147,13 +150,46 @@ interrupt = cycle + 4
        .size   interrupt, 4
 intCycle = interrupt + 4
        .type   intCycle, %object
-       .size   intCycle, 128
-psxRegs_end = intCycle + 128
+       .size   intCycle, 256
+psxRegs_end = intCycle + 256
+
+/* nd_pcsx_io */
+nd_pcsx_io = psxRegs_end
+       .type   nd_pcsx_io, %object
+       .size   nd_pcsx_io, nd_pcsx_io_end-nd_pcsx_io
+tab_read8 = nd_pcsx_io
+       .type   tab_read8, %object
+       .size   tab_read8, 4
+tab_read16 = tab_read8 + 4
+       .type   tab_read16, %object
+       .size   tab_read16, 4
+tab_read32 = tab_read16 + 4
+       .type   tab_read32, %object
+       .size   tab_read32, 4
+tab_write8 = tab_read32 + 4
+       .type   tab_write8, %object
+       .size   tab_write8, 4
+tab_write16 = tab_write8 + 4
+       .type   tab_write16, %object
+       .size   tab_write16, 4
+tab_write32 = tab_write16 + 4
+       .type   tab_write32, %object
+       .size   tab_write32, 4
+spu_readf = tab_write32 + 4
+       .type   spu_readf, %object
+       .size   spu_readf, 4
+spu_writef = spu_readf + 4
+       .type   spu_writef, %object
+       .size   spu_writef, 4
+nd_pcsx_io_end = spu_writef + 4
 
-align0 = psxRegs_end /* just for alignment */
+psxH_ptr = nd_pcsx_io_end
+       .type   psxH_ptr, %object
+       .size   psxH_ptr, 4
+align0 = psxH_ptr + 4 /* just for alignment */
        .type   align0, %object
-       .size   align0, 8
-branch_target = align0 + 8
+       .size   align0, 4
+branch_target = align0 + 4
        .type   branch_target, %object
        .size   branch_target, 4
 mini_ht = branch_target + 4
@@ -563,16 +599,13 @@ cc_interrupt:
        str     r0, [fp, #last_count-dynarec_local]
        sub     r10, r10, r0
        tst     r2, r2
-       bne     .E3
+       ldmnefd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc}
        tst     r1, r1
        moveq   pc, lr
 .E2:
        ldr     r0, [fp, #pcaddr-dynarec_local]
        bl      get_addr_ht
        mov     pc, r0
-.E3:
-       add     r12, fp, #28
-       ldmia   r12, {r4, r5, r6, r7, r8, r9, sl, fp, pc}
 .E4:
        /* Move 'dirty' blocks to the 'clean' list */
        lsl     r5, r2, #3
@@ -593,13 +626,10 @@ cc_interrupt:
 do_interrupt:
        ldr     r0, [fp, #pcaddr-dynarec_local]
        bl      get_addr_ht
-       ldr     r1, [fp, #next_interupt-dynarec_local]
-       ldr     r10, [fp, #cycle-dynarec_local]
-       str     r1, [fp, #last_count-dynarec_local]
-       sub     r10, r10, r1
        add     r10, r10, #2
        mov     pc, r0
        .size   do_interrupt, .-do_interrupt
+
        .align  2
        .global fp_exception
        .type   fp_exception, %function
@@ -673,11 +703,23 @@ jump_hlecall:
        ldr     r2, [fp, #last_count-dynarec_local]
        str     r0, [fp, #pcaddr-dynarec_local]
        add     r2, r2, r10
-       str     r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */
        adr     lr, pcsx_return
+       str     r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */
        bx      r1
        .size   jump_hlecall, .-jump_hlecall
 
+       .align  2
+       .global jump_intcall
+       .type   jump_intcall, %function
+jump_intcall:
+       ldr     r2, [fp, #last_count-dynarec_local]
+       str     r0, [fp, #pcaddr-dynarec_local]
+       add     r2, r2, r10
+       adr     lr, pcsx_return
+       str     r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */
+       b       execI
+       .size   jump_hlecall, .-jump_hlecall
+
 new_dyna_leave:
        .align  2
        .global new_dyna_leave
@@ -686,7 +728,7 @@ new_dyna_leave:
        add     r12, fp, #28
        add     r10, r0, r10
        str     r10, [fp, #cycle-dynarec_local]
-       ldmia   r12, {r4, r5, r6, r7, r8, r9, sl, fp, pc}
+       ldmfd   sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc}
        .size   new_dyna_leave, .-new_dyna_leave
 
        /* these are used to call memhandlers */
@@ -706,40 +748,121 @@ indirect_jump:
        .size   indirect_jump_indexed, .-indirect_jump_indexed
 
        .align  2
-       .global jump_eret
-       .type   jump_eret, %function
-jump_eret:
-       ldr     r1, [fp, #reg_cop0+48-dynarec_local] /* Status */
-       ldr     r0, [fp, #last_count-dynarec_local]
-       bic     r1, r1, #2
-       add     r10, r0, r10
-       str     r1, [fp, #reg_cop0+48-dynarec_local] /* Status */
-       str     r10, [fp, #cycle-dynarec_local]
-       bl      check_interupt
-       ldr     r1, [fp, #next_interupt-dynarec_local]
-       ldr     r0, [fp, #reg_cop0+56-dynarec_local] /* EPC */
-       str     r1, [fp, #last_count-dynarec_local]
-       subs    r10, r10, r1
-       bpl     .E11
-.E8:
-       bl      get_addr
-       mov     pc, r0
-.E11:
-       str     r0, [fp, #pcaddr-dynarec_local]
-       bl      cc_interrupt
-       ldr     r0, [fp, #pcaddr-dynarec_local]
-       b       .E8
-       .size   jump_eret, .-jump_eret
+       .global invalidate_addr_r0
+       .type   invalidate_addr_r0, %function
+invalidate_addr_r0:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r0, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r0, .-invalidate_addr_r0
+       .align  2
+       .global invalidate_addr_r1
+       .type   invalidate_addr_r1, %function
+invalidate_addr_r1:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r1, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r1, .-invalidate_addr_r1
+       .align  2
+       .global invalidate_addr_r2
+       .type   invalidate_addr_r2, %function
+invalidate_addr_r2:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r2, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r2, .-invalidate_addr_r2
+       .align  2
+       .global invalidate_addr_r3
+       .type   invalidate_addr_r3, %function
+invalidate_addr_r3:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r3, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r3, .-invalidate_addr_r3
+       .align  2
+       .global invalidate_addr_r4
+       .type   invalidate_addr_r4, %function
+invalidate_addr_r4:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r4, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r4, .-invalidate_addr_r4
+       .align  2
+       .global invalidate_addr_r5
+       .type   invalidate_addr_r5, %function
+invalidate_addr_r5:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r5, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r5, .-invalidate_addr_r5
+       .align  2
+       .global invalidate_addr_r6
+       .type   invalidate_addr_r6, %function
+invalidate_addr_r6:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r6, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r6, .-invalidate_addr_r6
+       .align  2
+       .global invalidate_addr_r7
+       .type   invalidate_addr_r7, %function
+invalidate_addr_r7:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r7, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r7, .-invalidate_addr_r7
+       .align  2
+       .global invalidate_addr_r8
+       .type   invalidate_addr_r8, %function
+invalidate_addr_r8:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r8, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r8, .-invalidate_addr_r8
+       .align  2
+       .global invalidate_addr_r9
+       .type   invalidate_addr_r9, %function
+invalidate_addr_r9:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r9, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r9, .-invalidate_addr_r9
+       .align  2
+       .global invalidate_addr_r10
+       .type   invalidate_addr_r10, %function
+invalidate_addr_r10:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r10, #12    
+       b       invalidate_addr_call
+       .size   invalidate_addr_r10, .-invalidate_addr_r10
+       .align  2
+       .global invalidate_addr_r12
+       .type   invalidate_addr_r12, %function
+invalidate_addr_r12:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r12, #12    
+       .size   invalidate_addr_r12, .-invalidate_addr_r12
+       .align  2
+       .global invalidate_addr_call
+       .type   invalidate_addr_call, %function
+invalidate_addr_call:
+       bl      invalidate_block
+       ldmia   fp, {r0, r1, r2, r3, r12, pc}
+       .size   invalidate_addr_call, .-invalidate_addr_call
 
        .align  2
        .global new_dyna_start
        .type   new_dyna_start, %function
 new_dyna_start:
-       ldr     r12, .dlptr
-       stmia   r12, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
-       sub     fp, r12, #28
+       /* ip is stored to conform EABI alignment */
+       stmfd   sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr}
+.if HAVE_ARMV7
+       movw    fp, #:lower16:dynarec_local
+       movt    fp, #:upper16:dynarec_local
+.else
+       ldr     fp, .dlptr
+.endif
        ldr     r0, [fp, #pcaddr-dynarec_local]
-       /*bl    new_recompile_block*/
        bl      get_addr_ht
        ldr     r1, [fp, #next_interupt-dynarec_local]
        ldr     r10, [fp, #cycle-dynarec_local]
@@ -747,146 +870,282 @@ new_dyna_start:
        sub     r10, r10, r1
        mov     pc, r0
 .dlptr:
-       .word   dynarec_local+28
+       .word   dynarec_local
        .size   new_dyna_start, .-new_dyna_start
 
-       .align  2
-       .global write_rdram_new
-       .type   write_rdram_new, %function
-write_rdram_new:
-       ldr     r2, [fp, #address-dynarec_local]
-       ldr     r0, [fp, #word-dynarec_local]
-       str     r0, [r2]
-       b       .E12
-       .size   write_rdram_new, .-write_rdram_new
-       .align  2
-       .global write_rdramb_new
-       .type   write_rdramb_new, %function
-write_rdramb_new:
-       ldr     r2, [fp, #address-dynarec_local]
-       ldrb    r0, [fp, #byte-dynarec_local]
-       eor     r2, r2, #3
-       strb    r0, [r2]
-       b       .E12
-       .size   write_rdramb_new, .-write_rdramb_new
-       .align  2
-       .global write_rdramh_new
-       .type   write_rdramh_new, %function
-write_rdramh_new:
-       ldr     r2, [fp, #address-dynarec_local]
-       ldrh    r0, [fp, #hword-dynarec_local]
-       eor     r2, r2, #2
-       strh    r0, [r2]
-       b       .E12
-       .size   write_rdramh_new, .-write_rdramh_new
+/* --------------------------------------- */
 
-       .align  2
-       .global do_invalidate
-       .type   do_invalidate, %function
-do_invalidate:
-       ldr     r2, [fp, #address-dynarec_local]
-.E12:
-       ldr     r1, [fp, #invc_ptr-dynarec_local]
-       lsr     r0, r2, #12
-       ldrb    r2, [r1, r0]
-       tst     r2, r2
-       beq     invalidate_block
+.align 2
+.global        ari_read_ram8
+.global        ari_read_ram16
+.global        ari_read_ram32
+.global        ari_read_ram_mirror8
+.global        ari_read_ram_mirror16
+.global        ari_read_ram_mirror32
+.global        ari_write_ram8
+.global        ari_write_ram16
+.global        ari_write_ram32
+.global        ari_write_ram_mirror8
+.global        ari_write_ram_mirror16
+.global        ari_write_ram_mirror32
+.global        ari_read_bios8
+.global        ari_read_bios16
+.global        ari_read_bios32
+.global        ari_read_io8
+.global        ari_read_io16
+.global        ari_read_io32
+.global        ari_write_io8
+.global        ari_write_io16
+.global        ari_write_io32
+
+.macro ari_read_ram bic_const op
+       ldr     r0, [fp, #address-dynarec_local]
+.if \bic_const
+       bic     r0, r0, #\bic_const
+.endif
+       \op     r0, [r0]
+       str     r0, [fp, #readmem_dword-dynarec_local]
        mov     pc, lr
-       .size   do_invalidate, .-do_invalidate
+.endm
 
-       .align  2
-       .global read_nomem_new
-       .type   read_nomem_new, %function
-/*read_nomem_new:*/
-read_nomemb_new:
-read_nomemh_new:
-read_nomemd_new:
-       /* should never happen */
-       b       read_nomem_new
-/*
-       ldr     r2, [fp, #address-dynarec_local]
-       add     r12, fp, #memory_map-dynarec_local
-       lsr     r0, r2, #12
-       ldr     r12, [r12, r0, lsl #2]
-       mov     r1, #8
-       tst     r12, r12
-       bmi     tlb_exception
-       ldr     r0, [r2, r12, lsl #2]
+ari_read_ram8:
+       ari_read_ram 0, ldrb
+
+ari_read_ram16:
+       ari_read_ram 1, ldrh
+
+ari_read_ram32:
+       ari_read_ram 3, ldr
+
+.macro ari_read_ram_mirror mvn_const, op
+       ldr     r0, [fp, #address-dynarec_local]
+       mvn     r1, #\mvn_const
+       and     r0, r1, lsr #11
+       orr     r0, r0, #1<<31
+       \op     r0, [r0]
        str     r0, [fp, #readmem_dword-dynarec_local]
        mov     pc, lr
-*/
-       .size   read_nomem_new, .-read_nomem_new
-/*
-       .align  2
-       .global read_nomemb_new
-       .type   read_nomemb_new, %function
-write_nomem_new:
-       str     r3, [fp, #24]
-       str     lr, [fp, #28]
-       bl      do_invalidate
-       ldr     r2, [fp, #address-dynarec_local]
-       add     r12, fp, #memory_map-dynarec_local
-       ldr     lr, [fp, #28]
-       lsr     r0, r2, #12
-       ldr     r3, [fp, #24]
-       ldr     r12, [r12, r0, lsl #2]
-       mov     r1, #0xc
-       tst     r12, #0x40000000
-       bne     tlb_exception
-       ldr     r0, [fp, #word-dynarec_local]
-       str     r0, [r2, r12, lsl #2]
+.endm
+
+ari_read_ram_mirror8:
+       ari_read_ram_mirror 0, ldrb
+
+ari_read_ram_mirror16:
+       ari_read_ram_mirror (1<<11), ldrh
+
+ari_read_ram_mirror32:
+       ari_read_ram_mirror (3<<11), ldr
+
+/* invalidation is already taken care of by the caller */
+.macro ari_write_ram bic_const var pf
+       ldr     r0, [fp, #address-dynarec_local]
+       ldr\pf  r1, [fp, #\var-dynarec_local]
+.if \bic_const
+       bic     r0, r0, #\bic_const
+.endif
+       str\pf  r1, [r0]
        mov     pc, lr
-       .size   write_nomem_new, .-write_nomem_new
+.endm
 
-       .align  2
-       .global write_nomemb_new
-       .type   write_nomemb_new, %function
-write_nomemb_new:
-       str     r3, [fp, #24]
-       str     lr, [fp, #28]
-       bl      do_invalidate
-       ldr     r2, [fp, #address-dynarec_local]
-       add     r12, fp, #memory_map-dynarec_local
-       ldr     lr, [fp, #28]
-       lsr     r0, r2, #12
-       ldr     r3, [fp, #24]
-       ldr     r12, [r12, r0, lsl #2]
-       mov     r1, #0xc
-       tst     r12, #0x40000000
-       bne     tlb_exception
-       eor     r2, r2, #3
-       ldrb    r0, [fp, #byte-dynarec_local]
-       strb    r0, [r2, r12, lsl #2]
+ari_write_ram8:
+       ari_write_ram 0, byte, b
+
+ari_write_ram16:
+       ari_write_ram 1, hword, h
+
+ari_write_ram32:
+       ari_write_ram 3, word,
+
+.macro ari_write_ram_mirror mvn_const var pf
+       ldr     r0, [fp, #address-dynarec_local]
+       mvn     r3, #\mvn_const
+       ldr\pf  r1, [fp, #\var-dynarec_local]
+       and     r0, r3, lsr #11
+       ldr     r2, [fp, #invc_ptr-dynarec_local]
+       orr     r0, r0, #1<<31
+       ldrb    r2, [r2, r0, lsr #12]
+       str\pf  r1, [r0]
+       tst     r2, r2
+       movne   pc, lr
+       lsr     r0, r0, #12
+       b       invalidate_block
+.endm
+
+ari_write_ram_mirror8:
+       ari_write_ram_mirror 0, byte, b
+
+ari_write_ram_mirror16:
+       ari_write_ram_mirror (1<<11), hword, h
+
+ari_write_ram_mirror32:
+       ari_write_ram_mirror (3<<11), word,
+
+
+.macro ari_read_bios_mirror bic_const op
+       ldr     r0, [fp, #address-dynarec_local]
+       orr     r0, r0, #0x80000000
+       bic     r0, r0, #(0x20000000|\bic_const)        @ map to 0x9fc...
+       \op     r0, [r0]
+       str     r0, [fp, #readmem_dword-dynarec_local]
        mov     pc, lr
-       .size   write_nomemb_new, .-write_nomemb_new
+.endm
 
-       .align  2
-       .global write_nomemh_new
-       .type   write_nomemh_new, %function
-write_nomemh_new:
-       str     r3, [fp, #24]
-       str     lr, [fp, #28]
-       bl      do_invalidate
-       ldr     r2, [fp, #address-dynarec_local]
-       add     r12, fp, #memory_map-dynarec_local
-       ldr     lr, [fp, #28]
-       lsr     r0, r2, #12
-       ldr     r3, [fp, #24]
-       ldr     r12, [r12, r0, lsl #2]
-       mov     r1, #0xc
-       lsls    r12, #2
-       bcs     tlb_exception
-       eor     r2, r2, #2
-       ldrh    r0, [fp, #hword-dynarec_local]
-       strh    r0, [r2, r12]
+ari_read_bios8:
+       ari_read_bios_mirror 0, ldrb
+
+ari_read_bios16:
+       ari_read_bios_mirror 1, ldrh
+
+ari_read_bios32:
+       ari_read_bios_mirror 3, ldr
+
+
+@ for testing
+.macro ari_read_io_old tab_shift
+       str     lr, [sp, #-8]! @ EABI alignment..
+.if \tab_shift == 0
+       bl      psxHwRead32
+.endif
+.if \tab_shift == 1
+       bl      psxHwRead16
+.endif
+.if \tab_shift == 2
+       bl      psxHwRead8
+.endif
+       str     r0, [fp, #readmem_dword-dynarec_local]
+       ldr     pc, [sp], #8
+.endm
+
+.macro ari_read_io readop mem_tab tab_shift
+       ldr     r0, [fp, #address-dynarec_local]
+       ldr     r1, [fp, #psxH_ptr-dynarec_local]
+.if \tab_shift == 0
+       bic     r0, r0, #3
+.endif
+.if \tab_shift == 1
+       bic     r0, r0, #1
+.endif
+       bic     r2, r0, #0x1f800000
+       ldr     r12,[fp, #\mem_tab-dynarec_local]
+       subs    r3, r2, #0x1000
+       blo     2f
+@      ari_read_io_old \tab_shift
+       cmp     r3, #0x880
+       bhs     1f
+       ldr     r12,[r12, r3, lsl #\tab_shift]
+       tst     r12,r12
+       beq     2f
+0:
+       str     lr, [sp, #-8]! @ EABI alignment..
+       blx     r12
+       str     r0, [fp, #readmem_dword-dynarec_local]
+       ldr     pc, [sp], #8
+
+1:
+.if \tab_shift == 1 @ read16
+       cmp     r2, #0x1c00
+       blo     2f
+       cmp     r2, #0x1e00
+       bhs     2f
+       ldr     r12,[fp, #spu_readf-dynarec_local]
+       b       0b
+.endif
+2:
+       @ no handler, just read psxH
+       \readop r0, [r1, r2]
+       str     r0, [fp, #readmem_dword-dynarec_local]
        mov     pc, lr
-       .size   write_nomemh_new, .-write_nomemh_new
-*/
-       .align  2
-       .global breakpoint
-       .type   breakpoint, %function
-breakpoint:
-       /* Set breakpoint here for debugging */
+.endm
+
+ari_read_io8:
+       ari_read_io ldrb, tab_read8, 2
+
+ari_read_io16:
+       ari_read_io ldrh, tab_read16, 1
+
+ari_read_io32:
+       ari_read_io ldr, tab_read32, 0
+
+.macro ari_write_io_old tab_shift
+.if \tab_shift == 0
+       b       psxHwWrite32
+.endif
+.if \tab_shift == 1
+       b       psxHwWrite16
+.endif
+.if \tab_shift == 2
+       b       psxHwWrite8
+.endif
+.endm
+
+.macro ari_write_io pf var mem_tab tab_shift
+       ldr     r0, [fp, #address-dynarec_local]
+       ldr\pf  r1, [fp, #\var-dynarec_local]
+.if \tab_shift == 0
+       bic     r0, r0, #3
+.endif
+.if \tab_shift == 1
+       bic     r0, r0, #1
+.endif
+       bic     r2, r0, #0x1f800000
+       ldr     r12,[fp, #\mem_tab-dynarec_local]
+       subs    r3, r2, #0x1000
+       blo     0f
+@      ari_write_io_old \tab_shift
+       cmp     r3, #0x880
+       bhs     1f
+       ldr     r12,[r12, r3, lsl #\tab_shift]
+       mov     r0, r1
+       tst     r12,r12
+       bxne    r12
+0:
+       ldr     r3, [fp, #psxH_ptr-dynarec_local]
+       str\pf  r1, [r2, r3]
        mov     pc, lr
-       .size   breakpoint, .-breakpoint
-       .section        .note.GNU-stack,"",%progbits
+1:
+       cmp     r2, #0x1c00
+       blo     0b
+       cmp     r2, #0x1e00
+.if \tab_shift != 0
+       ldrlo   pc, [fp, #spu_writef-dynarec_local]
+.else
+       @ write32 to SPU - very rare case (is this correct?)
+       bhs     0b
+       add     r2, r0, #2
+       mov     r3, r1, lsr #16
+       push    {r2,r3,lr}
+       mov     lr, pc
+       ldr     pc, [fp, #spu_writef-dynarec_local]
+       pop     {r0,r1,lr}
+       ldr     pc, [fp, #spu_writef-dynarec_local]
+.endif
+       nop
+       b       0b
+.endm
+
+ari_write_io8:
+       @ PCSX always writes to psxH, so do we for consistency
+       ldr     r0, [fp, #address-dynarec_local]
+       ldr     r3, [fp, #psxH_ptr-dynarec_local]
+       ldrb    r1, [fp, #byte-dynarec_local]
+       bic     r2, r0, #0x1f800000
+       ldr     r12,[fp, #tab_write8-dynarec_local]
+       strb    r1, [r2, r3]
+       subs    r3, r2, #0x1000
+       movlo   pc, lr
+@      ari_write_io_old 2
+       cmp     r3, #0x880
+       movhs   pc, lr
+       ldr     r12,[r12, r3, lsl #2]
+       mov     r0, r1
+       tst     r12,r12
+       bxne    r12
+       mov     pc, lr
+
+ari_write_io16:
+       ari_write_io h, hword, tab_write16, 1
+
+ari_write_io32:
+       ari_write_io , word, tab_write32, 0
+
+@ vim:filetype=armasm