drc: merge Ari64's patch: 11_reduce_invstub_memory_usage
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / linkage_arm.s
index f8bdca2..57fb3d2 100644 (file)
@@ -1,6 +1,6 @@
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
  *   linkage_arm.s for PCSX                                                *
- *   Copyright (C) 2009-2010 Ari64                                         *
+ *   Copyright (C) 2009-2011 Ari64                                         *
  *   Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas                     *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
@@ -624,16 +624,12 @@ cc_interrupt:
        .global do_interrupt
        .type   do_interrupt, %function
 do_interrupt:
-       /* FIXME: cycles already calculated, not needed? */
        ldr     r0, [fp, #pcaddr-dynarec_local]
        bl      get_addr_ht
-       ldr     r1, [fp, #next_interupt-dynarec_local]
-       ldr     r10, [fp, #cycle-dynarec_local]
-       str     r1, [fp, #last_count-dynarec_local]
-       sub     r10, r10, r1
        add     r10, r10, #2
        mov     pc, r0
        .size   do_interrupt, .-do_interrupt
+
        .align  2
        .global fp_exception
        .type   fp_exception, %function
@@ -707,11 +703,23 @@ jump_hlecall:
        ldr     r2, [fp, #last_count-dynarec_local]
        str     r0, [fp, #pcaddr-dynarec_local]
        add     r2, r2, r10
-       str     r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */
        adr     lr, pcsx_return
+       str     r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */
        bx      r1
        .size   jump_hlecall, .-jump_hlecall
 
+       .align  2
+       .global jump_intcall
+       .type   jump_intcall, %function
+jump_intcall:
+       ldr     r2, [fp, #last_count-dynarec_local]
+       str     r0, [fp, #pcaddr-dynarec_local]
+       add     r2, r2, r10
+       adr     lr, pcsx_return
+       str     r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */
+       b       execI
+       .size   jump_hlecall, .-jump_hlecall
+
 new_dyna_leave:
        .align  2
        .global new_dyna_leave
@@ -739,6 +747,109 @@ indirect_jump:
        .size   indirect_jump, .-indirect_jump
        .size   indirect_jump_indexed, .-indirect_jump_indexed
 
+       .align  2
+       .global invalidate_addr_r0
+       .type   invalidate_addr_r0, %function
+invalidate_addr_r0:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r0, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r0, .-invalidate_addr_r0
+       .align  2
+       .global invalidate_addr_r1
+       .type   invalidate_addr_r1, %function
+invalidate_addr_r1:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r1, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r1, .-invalidate_addr_r1
+       .align  2
+       .global invalidate_addr_r2
+       .type   invalidate_addr_r2, %function
+invalidate_addr_r2:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r2, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r2, .-invalidate_addr_r2
+       .align  2
+       .global invalidate_addr_r3
+       .type   invalidate_addr_r3, %function
+invalidate_addr_r3:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r3, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r3, .-invalidate_addr_r3
+       .align  2
+       .global invalidate_addr_r4
+       .type   invalidate_addr_r4, %function
+invalidate_addr_r4:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r4, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r4, .-invalidate_addr_r4
+       .align  2
+       .global invalidate_addr_r5
+       .type   invalidate_addr_r5, %function
+invalidate_addr_r5:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r5, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r5, .-invalidate_addr_r5
+       .align  2
+       .global invalidate_addr_r6
+       .type   invalidate_addr_r6, %function
+invalidate_addr_r6:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r6, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r6, .-invalidate_addr_r6
+       .align  2
+       .global invalidate_addr_r7
+       .type   invalidate_addr_r7, %function
+invalidate_addr_r7:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r7, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r7, .-invalidate_addr_r7
+       .align  2
+       .global invalidate_addr_r8
+       .type   invalidate_addr_r8, %function
+invalidate_addr_r8:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r8, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r8, .-invalidate_addr_r8
+       .align  2
+       .global invalidate_addr_r9
+       .type   invalidate_addr_r9, %function
+invalidate_addr_r9:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r9, #12     
+       b       invalidate_addr_call
+       .size   invalidate_addr_r9, .-invalidate_addr_r9
+       .align  2
+       .global invalidate_addr_r10
+       .type   invalidate_addr_r10, %function
+invalidate_addr_r10:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r10, #12    
+       b       invalidate_addr_call
+       .size   invalidate_addr_r10, .-invalidate_addr_r10
+       .align  2
+       .global invalidate_addr_r12
+       .type   invalidate_addr_r12, %function
+invalidate_addr_r12:
+       stmia   fp, {r0, r1, r2, r3, r12, lr}
+       lsr     r0, r12, #12    
+       .size   invalidate_addr_r12, .-invalidate_addr_r12
+       .align  2
+       .global invalidate_addr_call
+       .type   invalidate_addr_call, %function
+invalidate_addr_call:
+       bl      invalidate_block
+       ldmia   fp, {r0, r1, r2, r3, r12, pc}
+       .size   invalidate_addr_call, .-invalidate_addr_call
+
        .align  2
        .global new_dyna_start
        .type   new_dyna_start, %function
@@ -826,34 +937,34 @@ ari_read_ram_mirror32:
        ari_read_ram_mirror (3<<11), ldr
 
 /* invalidation is already taken care of by the caller */
-.macro ari_write_ram bic_const var op
+.macro ari_write_ram bic_const var pf
        ldr     r0, [fp, #address-dynarec_local]
-       ldr     r1, [fp, #\var-dynarec_local]
+       ldr\pf  r1, [fp, #\var-dynarec_local]
 .if \bic_const
        bic     r0, r0, #\bic_const
 .endif
-       \op     r1, [r0]
+       str\pf  r1, [r0]
        mov     pc, lr
 .endm
 
 ari_write_ram8:
-       ari_write_ram 0, byte, strb
+       ari_write_ram 0, byte, b
 
 ari_write_ram16:
-       ari_write_ram 1, hword, strh
+       ari_write_ram 1, hword, h
 
 ari_write_ram32:
-       ari_write_ram 3, word, str
+       ari_write_ram 3, word,
 
-.macro ari_write_ram_mirror mvn_const var op
+.macro ari_write_ram_mirror mvn_const var pf
        ldr     r0, [fp, #address-dynarec_local]
        mvn     r3, #\mvn_const
-       ldr     r1, [fp, #\var-dynarec_local]
+       ldr\pf  r1, [fp, #\var-dynarec_local]
        and     r0, r3, lsr #11
        ldr     r2, [fp, #invc_ptr-dynarec_local]
        orr     r0, r0, #1<<31
        ldrb    r2, [r2, r0, lsr #12]
-       \op     r1, [r0]
+       str\pf  r1, [r0]
        tst     r2, r2
        movne   pc, lr
        lsr     r0, r0, #12
@@ -861,13 +972,13 @@ ari_write_ram32:
 .endm
 
 ari_write_ram_mirror8:
-       ari_write_ram_mirror 0, byte, strb
+       ari_write_ram_mirror 0, byte, b
 
 ari_write_ram_mirror16:
-       ari_write_ram_mirror (1<<11), hword, strh
+       ari_write_ram_mirror (1<<11), hword, h
 
 ari_write_ram_mirror32:
-       ari_write_ram_mirror (3<<11), word, str
+       ari_write_ram_mirror (3<<11), word,
 
 
 .macro ari_read_bios_mirror bic_const op
@@ -967,9 +1078,9 @@ ari_read_io32:
 .endif
 .endm
 
-.macro ari_write_io opvl opst var mem_tab tab_shift
+.macro ari_write_io pf var mem_tab tab_shift
        ldr     r0, [fp, #address-dynarec_local]
-       \opvl   r1, [fp, #\var-dynarec_local]
+       ldr\pf  r1, [fp, #\var-dynarec_local]
 .if \tab_shift == 0
        bic     r0, r0, #3
 .endif
@@ -989,16 +1100,26 @@ ari_read_io32:
        bxne    r12
 0:
        ldr     r3, [fp, #psxH_ptr-dynarec_local]
-       \opst   r1, [r2, r3]
+       str\pf  r1, [r2, r3]
        mov     pc, lr
 1:
-.if \tab_shift == 1 @ write16
        cmp     r2, #0x1c00
        blo     0b
        cmp     r2, #0x1e00
+.if \tab_shift != 0
        ldrlo   pc, [fp, #spu_writef-dynarec_local]
-       nop
+.else
+       @ write32 to SPU - very rare case (is this correct?)
+       bhs     0b
+       add     r2, r0, #2
+       mov     r3, r1, lsr #16
+       push    {r2,r3,lr}
+       mov     lr, pc
+       ldr     pc, [fp, #spu_writef-dynarec_local]
+       pop     {r0,r1,lr}
+       ldr     pc, [fp, #spu_writef-dynarec_local]
 .endif
+       nop
        b       0b
 .endm
 
@@ -1022,9 +1143,9 @@ ari_write_io8:
        mov     pc, lr
 
 ari_write_io16:
-       ari_write_io ldrh, strh, hword, tab_write16, 1
+       ari_write_io h, hword, tab_write16, 1
 
 ari_write_io32:
-       ari_write_io ldr, str, word, tab_write32, 0
+       ari_write_io , word, tab_write32, 0
 
 @ vim:filetype=armasm