From 33a1eda13ebe624fc1233d820c8db7dcd7aba0d8 Mon Sep 17 00:00:00 2001
From: notaz <notasas@gmail.com>
Date: Thu, 3 Aug 2023 01:20:45 +0300
Subject: [PATCH] drc: some cleanup

---
 libpcsxcore/new_dynarec/assem_arm.h     |  3 +-
 libpcsxcore/new_dynarec/assem_arm64.h   |  6 ++-
 libpcsxcore/new_dynarec/emu_if.h        |  1 -
 libpcsxcore/new_dynarec/linkage_arm64.S | 24 ++++++++++
 libpcsxcore/new_dynarec/new_dynarec.c   | 59 ++++++++++---------------
 5 files changed, 53 insertions(+), 40 deletions(-)

diff --git a/libpcsxcore/new_dynarec/assem_arm.h b/libpcsxcore/new_dynarec/assem_arm.h
index 75273aa8..b3558767 100644
--- a/libpcsxcore/new_dynarec/assem_arm.h
+++ b/libpcsxcore/new_dynarec/assem_arm.h
@@ -16,7 +16,6 @@
 
 #define HOST_REGS 13
 #define HOST_CCREG 10
-#define HOST_BTREG 8
 #define EXCLUDE_REG 11
 
 // Note: FP is set to &dynarec_local when executing generated code.
@@ -33,6 +32,8 @@
 #define PREFERRED_REG_FIRST 4
 #define PREFERRED_REG_LAST  9
 
+#define DRC_DBG_REGMASK CALLER_SAVE_REGS
+
 extern char *invc_ptr;
 
 #define TARGET_SIZE_2 24 // 2^24 = 16 megabytes
diff --git a/libpcsxcore/new_dynarec/assem_arm64.h b/libpcsxcore/new_dynarec/assem_arm64.h
index 6d1a17f0..f8ee042f 100644
--- a/libpcsxcore/new_dynarec/assem_arm64.h
+++ b/libpcsxcore/new_dynarec/assem_arm64.h
@@ -5,7 +5,6 @@
    r19-r29: callee-save */
 
 #define HOST_REGS 29
-#define HOST_BTREG 27
 #define EXCLUDE_REG -1
 
 #define SP 31
@@ -27,8 +26,11 @@
 #define PREFERRED_REG_FIRST 19
 #define PREFERRED_REG_LAST  27
 
+#define DRC_DBG_REGMASK 3 // others done by do_insn_cmp_arm64
+#define do_insn_cmp do_insn_cmp_arm64
+
 // stack space
-#define SSP_CALLEE_REGS (8*12)
+#define SSP_CALLEE_REGS (8*12) // new_dyna_start caller's
 #define SSP_CALLER_REGS (8*20)
 #define SSP_ALL (SSP_CALLEE_REGS+SSP_CALLER_REGS)
 
diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h
index 3f4aba6f..ec307fc4 100644
--- a/libpcsxcore/new_dynarec/emu_if.h
+++ b/libpcsxcore/new_dynarec/emu_if.h
@@ -83,4 +83,3 @@ void pcsx_mtc0_ds(u32 reg, u32 val);
 /* misc */
 extern void SysPrintf(const char *fmt, ...);
 
-#define rdram ((u_char *)psxM)
diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S
index 644b0356..501a4fe7 100644
--- a/libpcsxcore/new_dynarec/linkage_arm64.S
+++ b/libpcsxcore/new_dynarec/linkage_arm64.S
@@ -374,3 +374,27 @@ FUNCTION(call_gteStall):
 	add	rCC, rCC, w0
 	ret
 
+#ifdef DRC_DBG
+#undef do_insn_cmp
+FUNCTION(do_insn_cmp_arm64):
+	stp	x2,  x3,  [sp, #(SSP_CALLEE_REGS + 2*8)]
+	stp	x4,  x5,  [sp, #(SSP_CALLEE_REGS + 4*8)]
+	stp	x6,  x7,  [sp, #(SSP_CALLEE_REGS + 6*8)]
+	stp	x8,  x9,  [sp, #(SSP_CALLEE_REGS + 8*8)]
+	stp	x10, x11, [sp, #(SSP_CALLEE_REGS + 10*8)]
+	stp	x12, x13, [sp, #(SSP_CALLEE_REGS + 12*8)]
+	stp	x14, x15, [sp, #(SSP_CALLEE_REGS + 14*8)]
+	stp	x16, x17, [sp, #(SSP_CALLEE_REGS + 16*8)]
+	stp	x18, x30, [sp, #(SSP_CALLEE_REGS + 18*8)]
+	bl	do_insn_cmp
+	ldp	x2,  x3,  [sp, #(SSP_CALLEE_REGS + 2*8)]
+	ldp	x4,  x5,  [sp, #(SSP_CALLEE_REGS + 4*8)]
+	ldp	x6,  x7,  [sp, #(SSP_CALLEE_REGS + 6*8)]
+	ldp	x8,  x9,  [sp, #(SSP_CALLEE_REGS + 8*8)]
+	ldp	x10, x11, [sp, #(SSP_CALLEE_REGS + 10*8)]
+	ldp	x12, x13, [sp, #(SSP_CALLEE_REGS + 12*8)]
+	ldp	x14, x15, [sp, #(SSP_CALLEE_REGS + 14*8)]
+	ldp	x16, x17, [sp, #(SSP_CALLEE_REGS + 16*8)]
+	ldp	x18, x30, [sp, #(SSP_CALLEE_REGS + 18*8)]
+	ret
+#endif
diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c
index 00d307b3..db751266 100644
--- a/libpcsxcore/new_dynarec/new_dynarec.c
+++ b/libpcsxcore/new_dynarec/new_dynarec.c
@@ -338,9 +338,9 @@ static struct compile_info
 #define CCREG 36 // Cycle count
 #define INVCP 37 // Pointer to invalid_code
 //#define MMREG 38 // Pointer to memory_map
-#define ROREG 39 // ram offset (if rdram!=0x80000000)
+#define ROREG 39 // ram offset (if psxM != 0x80000000)
 #define TEMPREG 40
-#define FTEMP 40 // FPU temporary register
+#define FTEMP 40 // Load/store temporary register (was fpu)
 #define PTEMP 41 // Prefetch temporary register
 //#define TLREG 42 // TLB mapping offset
 #define RHASH 43 // Return address hash
@@ -349,7 +349,6 @@ static struct compile_info
 #define MAXREG 45
 #define AGEN1 46 // Address generation temporary register (pass5b_preallocate2)
 //#define AGEN2 47 // Address generation temporary register
-#define BTREG 50 // Branch target temporary register
 
   /* instruction types */
 #define NOP 0     // No operation
@@ -380,7 +379,6 @@ static struct compile_info
   /* branch codes */
 #define TAKEN 1
 #define NOTTAKEN 2
-#define NULLDS 3
 
 #define DJT_1 (void *)1l // no function, just a label in assem_debug log
 #define DJT_2 (void *)2l
@@ -1054,12 +1052,8 @@ static void lsn(u_char hsn[], int i)
   if(dops[i].itype==C2LS) {
     hsn[FTEMP]=0;
   }
-  // Load L/R also uses FTEMP as a temporary register
-  if(dops[i].itype==LOADLR) {
-    hsn[FTEMP]=0;
-  }
-  // Also SWL/SWR/SDL/SDR
-  if(dops[i].opcode==0x2a||dops[i].opcode==0x2e||dops[i].opcode==0x2c||dops[i].opcode==0x2d) {
+  // Load/store L/R also uses FTEMP as a temporary register
+  if (dops[i].itype == LOADLR || dops[i].itype == STORELR) {
     hsn[FTEMP]=0;
   }
   // Don't remove the miniht registers
@@ -1236,7 +1230,11 @@ static const struct {
   FUNCNAME(do_memhandler_post),
 #endif
 #ifdef DRC_DBG
+# ifdef __aarch64__
+  FUNCNAME(do_insn_cmp_arm64),
+# else
   FUNCNAME(do_insn_cmp),
+# endif
 #endif
 };
 
@@ -2837,11 +2835,11 @@ static void *emit_fastpath_cmp_jump(int i, const struct regstat *i_regs,
     // alignment check
     u_int op = dops[i].opcode;
     int mask = ((op & 0x37) == 0x21 || op == 0x25) ? 1 : 3; // LH/SH/LHU
-    void *jaddr;
+    void *jaddr2;
     emit_testimm(addr, mask);
-    jaddr = out;
+    jaddr2 = out;
     emit_jne(0);
-    add_stub_r(ALIGNMENT_STUB, jaddr, out, i, addr, i_regs, ccadj_, 0);
+    add_stub_r(ALIGNMENT_STUB, jaddr2, out, i, addr, i_regs, ccadj_, 0);
   }
 
   if(type==MTYPE_8020) { // RAM 80200000+ mirror
@@ -4491,7 +4489,7 @@ static void address_generation(int i, const struct regstat *i_regs, signed char
     {
       int offset = cinfo[i].imm;
       int add_offset = offset != 0;
-      int c=(i_regs->wasconst>>rs)&1;
+      int c = rs >= 0 && ((i_regs->wasconst >> rs) & 1);
       if(dops[i].rs1==0) {
         // Using r0 as a base address
         assert(ra >= 0);
@@ -4942,6 +4940,8 @@ static void drc_dbg_emit_do_cmp(int i, int ccadj_)
   extern void do_insn_cmp();
   //extern int cycle;
   u_int hr, reglist = get_host_reglist(regs[i].regmap);
+  reglist |= get_host_reglist(regs[i].regmap_entry);
+  reglist &= DRC_DBG_REGMASK;
 
   assem_debug("//do_insn_cmp %08x\n", start+i*4);
   save_regs(reglist);
@@ -5090,11 +5090,7 @@ static void do_ccstub(int n)
   assem_debug("do_ccstub %x\n",start+(u_int)stubs[n].b*4);
   set_jump_target(stubs[n].addr, out);
   int i=stubs[n].b;
-  if(stubs[n].d==NULLDS) {
-    // Delay slot instruction is nullified ("likely" branch)
-    wb_dirtys(regs[i].regmap,regs[i].dirty);
-  }
-  else if(stubs[n].d!=TAKEN) {
+  if (stubs[n].d != TAKEN) {
     wb_dirtys(branch_regs[i].regmap,branch_regs[i].dirty);
   }
   else {
@@ -5259,10 +5255,6 @@ static void do_ccstub(int n)
   }else if(stubs[n].d==NOTTAKEN) {
     if(i<slen-2) load_needed_regs(branch_regs[i].regmap,regmap_pre[i+2]);
     else load_all_regs(branch_regs[i].regmap);
-  }else if(stubs[n].d==NULLDS) {
-    // Delay slot instruction is nullified ("likely" branch)
-    if(i<slen-2) load_needed_regs(regs[i].regmap,regmap_pre[i+2]);
-    else load_all_regs(regs[i].regmap);
   }else{
     load_all_regs(branch_regs[i].regmap);
   }
@@ -6145,7 +6137,7 @@ void disassemble_inst(int i)
     #ifndef REGMAP_PRINT
     return;
     #endif
-    printf("D: %"PRIx64"  WD: %"PRIx64"  U: %"PRIx64"  hC: %x  hWC: %x  hLC: %x\n",
+    printf("D: %x  WD: %x  U: %"PRIx64"  hC: %x  hWC: %x  hLC: %x\n",
       regs[i].dirty, regs[i].wasdirty, unneeded_reg[i],
       regs[i].isconst, regs[i].wasconst, regs[i].loadedconst);
     print_regmap("pre:   ", regmap_pre[i]);
@@ -6302,7 +6294,7 @@ void new_dynarec_init(void)
 #endif
   arch_init();
   new_dynarec_test();
-  ram_offset=(uintptr_t)rdram-0x80000000;
+  ram_offset = (uintptr_t)psxM - 0x80000000;
   if (ram_offset!=0)
     SysPrintf("warning: RAM is not directly mapped, performance will suffer\n");
   SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n");
@@ -6339,12 +6331,13 @@ void new_dynarec_cleanup(void)
 
 static u_int *get_source_start(u_int addr, u_int *limit)
 {
-  if (addr < 0x00200000 ||
-    (0xa0000000 <= addr && addr < 0xa0200000))
+  if (addr < 0x00800000
+      || (0x80000000 <= addr && addr < 0x80800000)
+      || (0xa0000000 <= addr && addr < 0xa0800000))
   {
     // used for BIOS calls mostly?
-    *limit = (addr&0xa0000000)|0x00200000;
-    return (u_int *)(rdram + (addr&0x1fffff));
+    *limit = (addr & 0xa0600000) + 0x00200000;
+    return (u_int *)(psxM + (addr & 0x1fffff));
   }
   else if (!Config.HLE && (
     /* (0x9fc00000 <= addr && addr < 0x9fc80000) ||*/
@@ -6358,10 +6351,6 @@ static u_int *get_source_start(u_int addr, u_int *limit)
     *limit = (addr & 0xfff00000) | 0x80000;
     return (u_int *)((u_char *)psxR + (addr&0x7ffff));
   }
-  else if (addr >= 0x80000000 && addr < 0x80000000+RAM_SIZE) {
-    *limit = (addr & 0x80600000) + 0x00200000;
-    return (u_int *)(rdram + (addr&0x1fffff));
-  }
   return NULL;
 }
 
@@ -7203,7 +7192,6 @@ static noinline void pass3_register_alloc(u_int addr)
     dops[1].bt=1;
     ds=1;
     unneeded_reg[0]=1;
-    current.regmap[HOST_BTREG]=BTREG;
   }
 
   for(i=0;i<slen;i++)
@@ -7764,7 +7752,6 @@ static noinline void pass3_register_alloc(u_int addr)
         }
       }
     }
-    if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1;
     //regs[i].waswritten=current.waswritten;
   }
 }
@@ -8987,7 +8974,7 @@ static int new_recompile_block(u_int addr)
   /* Pass 6 - Optimize clean/dirty state */
   pass6_clean_registers(0, slen-1, 1);
 
-  /* Pass 7 - Identify 32-bit registers */
+  /* Pass 7 */
   for (i=slen-1;i>=0;i--)
   {
     if(dops[i].itype==CJUMP||dops[i].itype==SJUMP)
-- 
2.39.5