sh2 drc: RISC-V (RV64IM) code emitter, some work on MIPS64
authorkub <derkub@gmail.com>
Wed, 13 Nov 2019 20:05:35 +0000 (21:05 +0100)
committerkub <derkub@gmail.com>
Wed, 13 Nov 2019 20:56:11 +0000 (21:56 +0100)
21 files changed:
Makefile
config.aarch64 [deleted file]
config.caanoo [deleted file]
config.caanoo47 [deleted file]
config.dingux [deleted file]
config.dingux54 [deleted file]
config.gcw0 [deleted file]
config.gp2x [deleted file]
config.gp2x47 [deleted file]
config.i386 [deleted file]
config.x86 [deleted file]
cpu/drc/emit_arm.c
cpu/drc/emit_arm64.c
cpu/drc/emit_mips.c
cpu/drc/emit_x86.c
cpu/sh2/compiler.c
cpu/sh2/compiler.h
pico/32x/32x.c
platform/common/dismips.c
platform/linux/emu.c
tools/mkoffsets.sh

index a79c054..0a0ab12 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -68,6 +68,10 @@ else ifneq (,$(findstring aarch64,$(ARCH)))
 use_fame ?= 1
 use_cz80 ?= 1
 use_sh2drc ?= 1
+else ifneq (,$(findstring riscv,$(ARCH)))
+use_fame ?= 1
+use_cz80 ?= 1
+use_sh2drc ?= 1
 endif
 
 -include Makefile.local
@@ -266,7 +270,7 @@ pico/carthw_cfg.c: pico/carthw.cfg
 # random deps
 pico/carthw/svp/compiler.o : cpu/drc/emit_arm.c
 cpu/sh2/compiler.o : cpu/drc/emit_arm.c cpu/drc/emit_arm64.c
-cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/drc/emit_mips.c
+cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/drc/emit_mips.c cpu/drc/emit_riscv.c
 cpu/sh2/mame/sh2pico.o : cpu/sh2/mame/sh2.c
 pico/pico.o pico/cd/mcd.o pico/32x/32x.o : pico/pico_cmn.c pico/pico_int.h
 pico/memory.o pico/cd/memory.o pico/32x/memory.o : pico/pico_int.h pico/memory.h
diff --git a/config.aarch64 b/config.aarch64
deleted file mode 100644 (file)
index 9631d64..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-# Automatically generated by configure
-# Configured with: './configure' '--platform=generic'
-CC = aarch64-linux-gnu-gcc
-CXX = aarch64-linux-gnu-g++
-AS = aarch64-linux-gnu-as
-STRIP = aarch64-linux-gnu-strip
-CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result
-ASFLAGS += 
-LDFLAGS += # --sysroot ${HOME}/opt/aarch64/debian-arm64
-LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl
-
-ARCH = aarch64
-PLATFORM = generic
-SOUND_DRIVERS = oss alsa sdl
diff --git a/config.caanoo b/config.caanoo
deleted file mode 100644 (file)
index 1ffc54d..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-# Automatically generated by configure
-# Configured with: './configure' '--platform=gp2x'
-CC = arm-gph-linux-gnueabi-gcc
-CXX = arm-gph-linux-gnueabi-g++
-AS = arm-gph-linux-gnueabi-as
-STRIP = arm-gph-linux-gnueabi-strip
-CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include -D__GP2X__ -Wno-unused-result
-CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common
-CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-pure-const
-ASFLAGS += -mfloat-abi=soft -mcpu=arm920t
-LDFLAGS += --sysroot ${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/src/gp2x/armroot-eabi/lib -static
-LDLIBS += -lpng -lm -ldl
-
-ARCH = arm
-PLATFORM = gp2x
-SOUND_DRIVERS = oss
diff --git a/config.caanoo47 b/config.caanoo47
deleted file mode 100644 (file)
index 5bcf860..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-# Automatically generated by configure
-# Configured with: './configure' '--platform=gp2x'
-CC = arm-linux-gnueabi-gcc
-CXX = arm-linux-gnueabi-g++
-AS = arm-linux-gnueabi-as
-STRIP = arm-linux-gnueabi-strip
-CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include  -D__GP2X__  -Wno-unused-result
-CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common
-CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-sra -fno-ipa-pure-const
-ASFLAGS += -mfloat-abi=soft -mcpu=arm920t
-LDFLAGS += -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/src/gp2x/armroot-eabi/lib -static
-LDLIBS += -lpng -lm -ldl
-
-ARCH = arm
-PLATFORM = gp2x
-SOUND_DRIVERS = oss
diff --git a/config.dingux b/config.dingux
deleted file mode 100644 (file)
index b981bd3..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-# Automatically generated by configure
-# Configured with: './configure' '--platform=opendingux'
-CC = mipsel-linux-gcc
-CXX = mipsel-linux-g++
-AS = mipsel-linux-as
-STRIP = mipsel-linux-strip
-CFLAGS +=  -I${HOME}/opt/opendingux-toolchain/usr/include/ -I${HOME}/opt/opendingux-toolchain/usr/include/SDL
-CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector
-ASFLAGS += 
-LDFLAGS += --sysroot ${HOME}/opt/opendingux-toolchain -L${HOME}/opt/opendingux-toolchain/lib
-LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl
-
-ARCH = mipsel
-PLATFORM = opendingux
-SOUND_DRIVERS = sdl
diff --git a/config.dingux54 b/config.dingux54
deleted file mode 100644 (file)
index a232d95..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-# Automatically generated by configure
-# Configured with: './configure' '--platform=opendingux'
-CC = mipsel-linux-gnu-gcc
-CXX = mipsel-linux-gnu-g++
-AS = mipsel-linux-gnu-as
-STRIP = mipsel-linux-gnu-strip
-CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/ -I${HOME}/opt/opendingux-toolchain/usr/include/SDL
-CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector
-ASFLAGS += 
-LDFLAGS += -B${HOME}/opt/opendingux-toolchain/usr/lib -B${HOME}/opt/opendingux-toolchain/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/lib
-LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl
-
-ARCH = mipsel
-PLATFORM = opendingux
-SOUND_DRIVERS = sdl
diff --git a/config.gcw0 b/config.gcw0
deleted file mode 100644 (file)
index cebe79a..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-# Automatically generated by configure
-# Configured with: './configure' '--platform=gcw0'
-CC = mipsel-gcw0-linux-uclibc-gcc
-CXX = mipsel-gcw0-linux-uclibc-g++
-AS = mipsel-gcw0-linux-uclibc-as
-STRIP = mipsel-gcw0-linux-uclibc-strip
-CFLAGS +=  -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/ -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL
-CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector
-ASFLAGS += 
-LDFLAGS += --sysroot ${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot
-LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl
-
-ARCH = mipsel
-PLATFORM = opendingux
-SOUND_DRIVERS = sdl
diff --git a/config.gp2x b/config.gp2x
deleted file mode 100644 (file)
index cf99bd7..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-# Automatically generated by configure
-# Configured with: './configure' '--platform=gp2x'
-CC = arm-open2x-linux-gcc
-CXX = arm-open2x-linux-g++
-AS = arm-open2x-linux-as
-STRIP = arm-open2x-linux-strip
-CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include  -D__GP2X__
-CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common
-CFLAGS += -finline-limit=42 -fipa-cp -fno-ipa-pure-const
-ASFLAGS += -mcpu=arm920t -mfloat-abi=soft
-LDFLAGS += --sysroot ${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static
-LDLIBS += -lpng -lm -ldl
-
-ARCH = arm
-PLATFORM = gp2x
-SOUND_DRIVERS = oss
diff --git a/config.gp2x47 b/config.gp2x47
deleted file mode 100644 (file)
index 8a86e85..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-# Automatically generated by configure
-# Configured with: './configure' '--platform=gp2x'
-CC = arm-linux-gnueabi-gcc
-CXX = arm-linux-gnueabi-g++
-AS = arm-linux-gnueabi-as
-STRIP = arm-linux-gnueabi-strip
-CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -D__GP2X__ -Wno-unused-result
-CFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common
-CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-sra -fno-ipa-pure-const
-ASFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t
-LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/usr/lib -L${HOME}/src/gp2x/armroot/lib -static
-LDLIBS += -lpng -lm -ldl
-
-ARCH = arm
-PLATFORM = gp2x
-SOUND_DRIVERS = oss
diff --git a/config.i386 b/config.i386
deleted file mode 100644 (file)
index 9c8c2e6..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-# Automatically generated by configure
-# Configured with: './configure' '--platform=generic'
-CC = gcc
-CXX = g++
-AS = as
-STRIP = strip
-CFLAGS +=  -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -m32
-ASFLAGS += 
-LDFLAGS += -m32 -L/usr/lib/i386-linux-gnu -L${HOME}/opt/lib32
-LDLIBS += -lSDL-1.2 -lasound -lpng -lz -lm -ldl
-
-ARCH = i386
-PLATFORM = generic
-SOUND_DRIVERS = oss alsa sdl
diff --git a/config.x86 b/config.x86
deleted file mode 100644 (file)
index 4544001..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-# Automatically generated by configure
-# Configured with: './configure' '--platform=generic'
-CC = gcc
-CXX = g++
-AS = as
-STRIP = strip
-CFLAGS +=  -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result
-ASFLAGS += 
-LDFLAGS += -L/usr/lib/x86_64-linux-gnu
-LDLIBS += -lSDL-1.2 -lasound -lpng -lz -lm -ldl
-
-ARCH = x86_64
-PLATFORM = generic
-SOUND_DRIVERS = oss alsa sdl
index 25a2c72..8f633fa 100644 (file)
@@ -365,7 +365,7 @@ static void emith_flush(void)
 
 #define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm)
 #define EOP_LDR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,3,rn,rd,shift_imm,A_AM1_LSL,rm)
-#define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm);
+#define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm)
 #define EOP_STR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,2,rn,rd,shift_imm,A_AM1_LSL,rm)
 
 #define EOP_LDRH_IMM2(cond,rd,rn,offset_8)  EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,0,1,abs(offset_8))
@@ -470,84 +470,89 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int
        if (cond == A_COND_NV)
                return;
 
-       switch (op) {
-       case A_OP_MOV:
-               rn = 0;
-               // count bits in imm and use MVN if more bits 1 than 0
-               if (count_bits(imm) > 16) {
-                       imm = ~imm;
-                       op = A_OP_MVN;
-               }
-               // count insns needed for mov/orr #imm
+       do {
+               u32 u;
+               // try to get the topmost byte empty to possibly save an insn
                for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++)
                        v = (v << 2) | (v >> 30);
-#ifdef HAVE_ARMV7
-               for (i = 2; i > 0; i--, v >>= 8)
-                       while (v > 0xff && !(v & 3))
-                               v >>= 2;
-               if (v) { // 3+ insns needed...
-                       if (op == A_OP_MVN)
-                               imm = ~imm;
-                       // ...prefer movw/movt
-                       EOP_MOVW(rd, imm);
-                       if (imm & 0xffff0000)
-                               EOP_MOVT(rd, imm);
-                       return;
-               }
-#else
-               for (i = 3; i > 0; i--, v >>= 8)
-                       while (v > 0xff && !(v & 3))
-                               v >>= 2;
-               if (v) { // 4 insns needed...
-                       if (op == A_OP_MVN)
+
+               switch (op) {
+               case A_OP_MOV:
+                       rn = 0;
+                       // use MVN if more bits 1 than 0
+                       if (count_bits(imm) > 16) {
                                imm = ~imm;
-                       // ...emit literal load
-                       int idx, o;
-                       if (literal_iindex >= MAX_HOST_LITERALS) {
-                               elprintf(EL_STATUS|EL_SVP|EL_ANOMALY,
-                                       "pool overflow");
-                               exit(1);
+                               op = A_OP_MVN;
+                               ror2 = -1;
+                               break;
+                       }
+                       // count insns needed for mov/orr #imm
+#ifdef HAVE_ARMV7
+                       for (i = 2, u = v; i > 0; i--, u >>= 8)
+                               while (u > 0xff && !(u & 3))
+                                       u >>= 2;
+                       if (u) { // 3+ insns needed...
+                               if (op == A_OP_MVN)
+                                       imm = ~imm;
+                               // ...prefer movw/movt
+                               EOP_MOVW(rd, imm);
+                               if (imm & 0xffff0000)
+                                       EOP_MOVT(rd, imm);
+                               return;
                        }
-                       idx = emith_pool_literal(imm, &o);
-                       literal_insn[literal_iindex++] = (u32 *)tcache_ptr;
-                       EOP_LDR_IMM2(cond, rd, PC, idx * sizeof(u32));
-                       if (o > 0)
-                               EOP_C_DOP_IMM(cond, A_OP_ADD, 0, rd, rd, 0, o);
-                       else if (o < 0)
-                               EOP_C_DOP_IMM(cond, A_OP_SUB, 0, rd, rd, 0, -o);
+#else
+                       for (i = 2, u = v; i > 0; i--, u >>= 8)
+                               while (u > 0xff && !(u & 3))
+                                       u >>= 2;
+                       if (u) { // 4 insns needed...
+                               if (op == A_OP_MVN)
+                                       imm = ~imm;
+                               // ...emit literal load
+                               int idx, o;
+                               if (literal_iindex >= MAX_HOST_LITERALS) {
+                                       elprintf(EL_STATUS|EL_SVP|EL_ANOMALY,
+                                               "pool overflow");
+                                       exit(1);
+                               }
+                               idx = emith_pool_literal(imm, &o);
+                               literal_insn[literal_iindex++] = (u32 *)tcache_ptr;
+                               EOP_LDR_IMM2(cond, rd, PC, idx * sizeof(u32));
+                               if (o > 0)
+                                   EOP_C_DOP_IMM(cond, A_OP_ADD, 0,rd,rd,0,o);
+                               else if (o < 0)
+                                   EOP_C_DOP_IMM(cond, A_OP_SUB, 0,rd,rd,0,-o);
                        return;
-               }
+                       }
 #endif
-               break;
+                       break;
 
-       case A_OP_AND:
-               // AND must fit into 1 insn. if not, use BIC
-               for (v = imm, ror2 = 0; (v >> 8) && ror2 < 32/2; ror2++)
-                       v = (v << 2) | (v >> 30);
-               if (v >> 8) {
-                       imm = ~imm;
-                       op = A_OP_BIC;
-               }
-               break;
-
-       case A_OP_SUB:
-       case A_OP_ADD:
-               // count bits in imm and swap ADD and SUB if more bits 1 than 0
-               if (s == 0 && count_bits(imm) > 16) {
-                       imm = -imm;
-                       op ^= (A_OP_ADD^A_OP_SUB);
+               case A_OP_AND:
+                       // AND must fit into 1 insn. if not, use BIC
+                       for (u = v; u > 0xff && !(u & 3); u >>= 2) ;
+                       if (u >> 8) {
+                               imm = ~imm;
+                               op = A_OP_BIC;
+                               ror2 = -1;
+                       }
+                       break;
+
+               case A_OP_SUB:
+               case A_OP_ADD:
+                       // swap ADD and SUB if more bits 1 than 0
+                       if (s == 0 && count_bits(imm) > 16) {
+                               imm = -imm;
+                               op ^= (A_OP_ADD^A_OP_SUB);
+                               ror2 = -1;
+                       }
+               case A_OP_EOR:
+               case A_OP_ORR:
+               case A_OP_BIC:
+                       if (s == 0 && imm == 0 && rd == rn)
+                               return;
+                       break;
                }
-       case A_OP_EOR:
-       case A_OP_ORR:
-       case A_OP_BIC:
-               if (s == 0 && imm == 0 && rd == rn)
-                       return;
-               break;
-       }
+       } while (ror2 < 0);
 
-       // try to get the topmost byte empty to possibly save an insn
-       for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++)
-               v = (v << 2) | (v >> 30);
        do {
                // shift down to get 'best' rot2
                while (v > 0xff && !(v & 3))
index dc0cf55..3f40d4c 100644 (file)
@@ -134,9 +134,9 @@ enum { XT_UXTW=0x4, XT_UXTX=0x6, XT_LSL=0x7, XT_SXTW=0xc, XT_SXTX=0xe };
 #define A64_TST_REG(rn, rm, stype, simm) \
        A64_ANDS_REG(Z0, rn, rm, stype, simm)
 #define A64_MOV_REG(rd, rm, stype, simm) \
-       A64_OR_REG(rd, Z0, rm, stype, simm);
+       A64_OR_REG(rd, Z0, rm, stype, simm)
 #define A64_MVN_REG(rd, rm, stype, simm) \
-       A64_ORN_REG(rd, Z0, rm, stype, simm);
+       A64_ORN_REG(rd, Z0, rm, stype, simm)
 
 // rd = rn OP (rm EXTEND simm)
 #define A64_ADD_XREG(rd, rn, rm, xtopt, simm) \
index 8252747..6f07e50 100644 (file)
 // opcode field (encoded in op)
 enum { OP__FN=000, OP__RT, OP_J, OP_JAL, OP_BEQ, OP_BNE, OP_BLEZ, OP_BGTZ };
 enum { OP_ADDI=010, OP_ADDIU, OP_SLTI, OP_SLTIU, OP_ANDI, OP_ORI, OP_XORI, OP_LUI };
-enum { OP_LB=040, OP_LH, OP_LWL, OP_LW, OP_LBU, OP_LHU, OP_LWR };
-enum { OP_SB=050, OP_SH, OP_SWL, OP_SW, __(54), __(55), OP_SWR };
+enum { OP_LB=040, OP_LH, OP_LWL, OP_LW, OP_LBU, OP_LHU, OP_LWR, OP_LWU };
+enum { OP_SB=050, OP_SH, OP_SWL, OP_SW, OP_SDL, OP_SDR, OP_SWR };
+enum { OP_DADDI=030, OP_DADDIU, OP_LDL, OP_LDR, OP_SD=067, OP_LD=077 };
 // function field (encoded in fn if opcode = OP__FN)
 enum { FN_SLL=000, __(01), FN_SRL, FN_SRA, FN_SLLV, __(05), FN_SRLV, FN_SRAV };
-enum { FN_MFHI=020, FN_MTHI, FN_MFLO, FN_MTLO };
-enum { FN_MULT=030, FN_MULTU, FN_DIV, FN_DIVU };
+enum { FN_JR=010, FN_JALR, FN_MOVZ, FN_MOVN, FN_SYNC=017 };
+enum { FN_MFHI=020, FN_MTHI, FN_MFLO, FN_MTLO, FN_DSSLV, __(25), FN_DSLRV, FN_DSRAV };
+enum { FN_MULT=030, FN_MULTU, FN_DIV, FN_DIVU, FN_DMULT, FN_DMULTU, FN_DDIV, FN_DDIVU };
 enum { FN_ADD=040, FN_ADDU, FN_SUB, FN_SUBU, FN_AND, FN_OR, FN_XOR, FN_NOR };
-enum { FN_JR=010, FN_JALR, FN_MOVZ, FN_MOVN, FN_SYNC=017, FN_SLT=052, FN_SLTU };
+enum { FN_SLT=052, FN_SLTU, FN_DADD, FN_DADDU, FN_DSUB, FN_DSUBU };
+enum { FN_DSLL=070, __(71), FN_DSRL, FN_DSRA, FN_DSLL32, __(75), FN_DSRL32, FN_DSRA32 };
 // rt field (encoded in rt if opcode = OP__RT)
 enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 };
 
@@ -85,8 +88,12 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 };
 // rd = rs OP rt
 #define MIPS_ADD_REG(rd, rs, rt) \
        MIPS_OP_REG(FN_ADDU, rd, rs, rt)
+#define MIPS_DADD_REG(rd, rs, rt) \
+       MIPS_OP_REG(FN_DADDU, rd, rs, rt)
 #define MIPS_SUB_REG(rd, rs, rt) \
        MIPS_OP_REG(FN_SUBU, rd, rs, rt)
+#define MIPS_DSUB_REG(rd, rs, rt) \
+       MIPS_OP_REG(FN_DSUBU, rd, rs, rt)
 
 #define MIPS_NEG_REG(rd, rt) \
        MIPS_SUB_REG(rd, Z0, rt)
@@ -122,6 +129,8 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 };
 // rt = rs OP imm16
 #define MIPS_ADD_IMM(rt, rs, imm16) \
        MIPS_OP_IMM(OP_ADDIU, rt, rs, imm16)
+#define MIPS_DADD_IMM(rt, rs, imm16) \
+       MIPS_OP_IMM(OP_DADDIU, rt, rs, imm16)
 
 #define MIPS_XOR_IMM(rt, rs, imm16) \
        MIPS_OP_IMM(OP_XORI, rt, rs, imm16)
@@ -144,6 +153,11 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 };
 #define MIPS_ASR_IMM(rd, rt, bits) \
        MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SRA)
 
+#define MIPS_DLSL_IMM(rd, rt, bits) \
+       MIPS_INSN(OP__FN, _, rt, rd, bits, FN_DSLL)
+#define MIPS_DLSL32_IMM(rd, rt, bits) \
+       MIPS_INSN(OP__FN, _, rt, rd, bits, FN_DSLL32)
+
 // rt = (rs < imm16)
 #define MIPS_SLT_IMM(rt, rs, imm16) \
        MIPS_OP_IMM(OP_SLTI, rt, rs, imm16)
@@ -193,23 +207,45 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 };
 
 // load/store indexed base
 
+#define MIPS_LD(rt, rs, offs16) \
+       MIPS_OP_IMM(OP_LD, rt, rs, (u16)(offs16))
 #define MIPS_LW(rt, rs, offs16) \
-       MIPS_INSN(OP_LW, rs, rt, _,_, (u16)(offs16))
+       MIPS_OP_IMM(OP_LW, rt, rs, (u16)(offs16))
 #define MIPS_LH(rt, rs, offs16) \
-       MIPS_INSN(OP_LH, rs, rt, _,_, (u16)(offs16))
+       MIPS_OP_IMM(OP_LH, rt, rs, (u16)(offs16))
 #define MIPS_LB(rt, rs, offs16) \
-       MIPS_INSN(OP_LB, rs, rt, _,_, (u16)(offs16))
+       MIPS_OP_IMM(OP_LB, rt, rs, (u16)(offs16))
 #define MIPS_LHU(rt, rs, offs16) \
-       MIPS_INSN(OP_LHU, rs, rt, _,_, (u16)(offs16))
+       MIPS_OP_IMM(OP_LHU, rt, rs, (u16)(offs16))
 #define MIPS_LBU(rt, rs, offs16) \
-       MIPS_INSN(OP_LBU, rs, rt, _,_, (u16)(offs16))
+       MIPS_OP_IMM(OP_LBU, rt, rs, (u16)(offs16))
 
+#define MIPS_SD(rt, rs, offs16) \
+       MIPS_OP_IMM(OP_SD, rt, rs, (u16)(offs16))
 #define MIPS_SW(rt, rs, offs16) \
-       MIPS_INSN(OP_SW, rs, rt, _,_, (u16)(offs16))
+       MIPS_OP_IMM(OP_SW, rt, rs, (u16)(offs16))
 #define MIPS_SH(rt, rs, offs16) \
-       MIPS_INSN(OP_SH, rs, rt, _,_, (u16)(offs16))
+       MIPS_OP_IMM(OP_SH, rt, rs, (u16)(offs16))
 #define MIPS_SB(rt, rs, offs16) \
-       MIPS_INSN(OP_SB, rs, rt, _,_, (u16)(offs16))
+       MIPS_OP_IMM(OP_SB, rt, rs, (u16)(offs16))
+
+// pointer operations
+
+#if __mips == 4 || __mips == 64
+#define OP_LP                          OP_LD
+#define OP_SP                          OP_SD
+#define OP_PADDIU                      OP_DADDIU
+#define FN_PADDU                       FN_DADDU
+#define FN_PSUBU                       FN_DSUBU
+#define PTR_SCALE                      3
+#else
+#define OP_LP                          OP_LW
+#define OP_SP                          OP_SW
+#define OP_PADDIU                      OP_ADDIU
+#define FN_PADDU                       FN_ADDU
+#define FN_PSUBU                       FN_SUBU
+#define PTR_SCALE                      2
+#endif
 
 // XXX: tcache_ptr type for SVP and SH2 compilers differs..
 #define EMIT_PTR(ptr, x) \
@@ -442,14 +478,14 @@ static void emith_set_arith_flags(int rd, int rs, int rt, s32 imm, int sub)
 
        if (emith_flg_hint & _FHV) {
                emith_flg_noV = 0;
-               if (rt >0)                            // Nt^Ns in FV, bit 31
+               if (rt > Z0)                            // Nt^Ns in FV, bit 31
                        EMIT(MIPS_XOR_REG(FV, rs, rt));
-               else if (imm == 0)
+               else if (rt == Z0 || imm == 0)
                        emith_flg_noV = 1;              // imm #0 can't overflow
                else if ((imm < 0) == !sub)
                        EMIT(MIPS_NOR_REG(FV, rs, Z0));
                else if ((imm > 0) == !sub)
-                       EMIT(MIPS_OR_REG(FV, rs, Z0));
+                       EMIT(MIPS_XOR_REG(FV, rs, Z0));
        }
        // full V = Nd^Nt^Ns^C calculation is deferred until really needed
 
@@ -483,13 +519,17 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm)
        EMIT(MIPS_MVN_REG(d, s))
 
 #define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \
+       if (simm) { \
+               EMIT(MIPS_LSL_IMM(AT, s2, simm)); \
+               EMIT(MIPS_OP_REG(FN_PADDU, d, s1, AT)); \
+       } else  EMIT(MIPS_OP_REG(FN_PADDU, d, s1, s2)); \
+} while (0)
+#define emith_add_r_r_r_lsl(d, s1, s2, simm) do { \
        if (simm) { \
                EMIT(MIPS_LSL_IMM(AT, s2, simm)); \
                EMIT(MIPS_ADD_REG(d, s1, AT)); \
        } else  EMIT(MIPS_ADD_REG(d, s1, s2)); \
 } while (0)
-#define emith_add_r_r_r_lsl(d, s1, s2, simm) \
-       emith_add_r_r_r_lsl_ptr(d, s1, s2, simm)
 
 #define emith_add_r_r_r_lsr(d, s1, s2, simm) do { \
        if (simm) { \
@@ -498,6 +538,16 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm)
        } else  EMIT(MIPS_ADD_REG(d, s1, s2)); \
 } while (0)
 
+#define emith_addf_r_r_r_lsl_ptr(d, s1, s2, simm) do { \
+       if (simm) { \
+               EMIT(MIPS_LSL_IMM(AT, s2, simm)); \
+               EMIT(MIPS_OP_REG(FN_PADDU, FNZ, s1, AT)); \
+               emith_set_arith_flags(d, s1, AT, 0, 0); \
+       } else { \
+               EMIT(MIPS_OP_REG(FN_PADDU, FNZ, s1, s2)); \
+               emith_set_arith_flags(d, s1, s2, 0, 0); \
+       } \
+} while (0)
 #define emith_addf_r_r_r_lsl(d, s1, s2, simm) do { \
        if (simm) { \
                EMIT(MIPS_LSL_IMM(AT, s2, simm)); \
@@ -586,6 +636,8 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm)
 #define emith_add_r_r_r(d, s1, s2) \
        emith_add_r_r_r_lsl(d, s1, s2, 0)
 
+#define emith_addf_r_r_r_ptr(d, s1, s2) \
+       emith_addf_r_r_r_lsl_ptr(d, s1, s2, 0)
 #define emith_addf_r_r_r(d, s1, s2) \
        emith_addf_r_r_r_lsl(d, s1, s2, 0)
 
@@ -697,14 +749,26 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm)
 // move immediate
 static void emith_move_imm(int r, uintptr_t imm)
 {
-       if ((s16)imm == imm) {
+#if __mips == 4 || __mips == 64
+       if ((s32)imm != imm) {
+               emith_move_imm(r, imm >> 32);
+               if (imm & 0xffff0000) {
+                       EMIT(MIPS_DLSL_IMM(r, r, 16));
+                       EMIT(MIPS_OR_IMM(r, r, (imm >> 16) & 0xffff));
+                       EMIT(MIPS_DLSL_IMM(r, r, 16));
+               } else  EMIT(MIPS_DLSL32_IMM(r, r, 0));
+               if (imm & 0x0000ffff)
+                       EMIT(MIPS_OR_IMM(r, r, imm & 0xffff));
+       } else
+#endif
+        if ((s16)imm == imm) {
                EMIT(MIPS_ADD_IMM(r, Z0, imm));
-       } else if (!(imm >> 16)) {
+       } else if (!((u32)imm >> 16)) {
                EMIT(MIPS_OR_IMM(r, Z0, imm));
        } else {
                int s = Z0;
-               if (imm >> 16) {
-                       EMIT(MIPS_MOVT_IMM(r, imm >> 16));
+               if ((u32)imm >> 16) {
+                       EMIT(MIPS_MOVT_IMM(r, (u32)imm >> 16));
                        s = r;
                }
                if ((u16)imm)
@@ -729,17 +793,17 @@ static void emith_move_imm(int r, uintptr_t imm)
 } while (0)
 
 // arithmetic, immediate - can only be ADDI[U], since SUBI[U] doesn't exist
-static void emith_arith_imm(int op, int rd, int rs, u32 imm)
+static void emith_add_imm(int ptr, int rd, int rs, u32 imm)
 {
        if ((s16)imm == imm) {
                if (imm || rd != rs)
-                       EMIT(MIPS_OP_IMM(op, rd, rs, imm));
+                       EMIT(MIPS_OP_IMM(ptr ? OP_PADDIU:OP_ADDIU, rd,rs,imm));
        } else if ((s32)imm  < 0) {
                emith_move_r_imm(AT, -imm);
-               EMIT(MIPS_OP_REG(FN_SUB + (op-OP_ADDI), rd, rs, AT));
+               EMIT(MIPS_OP_REG((ptr ? FN_PSUBU:FN_SUBU), rd,rs,AT));
        } else {
                emith_move_r_imm(AT, imm);
-               EMIT(MIPS_OP_REG(FN_ADD + (op-OP_ADDI), rd, rs, AT));
+               EMIT(MIPS_OP_REG((ptr ? FN_PADDU:FN_ADDU), rd,rs,AT));
        }
 }
 
@@ -760,7 +824,7 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm)
        emith_subf_r_r_imm(r, r, imm)
 
 #define emith_adc_r_imm(r, imm) \
-       emith_adc_r_r_imm(r, r, imm);
+       emith_adc_r_r_imm(r, r, imm)
 
 #define emith_adcf_r_imm(r, imm) \
        emith_adcf_r_r_imm(r, r, imm)
@@ -770,10 +834,10 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm)
 //     emith_subf_r_r_imm(FNZ, r, (s16)imm)
 
 #define emith_add_r_r_ptr_imm(d, s, imm) \
-       emith_arith_imm(OP_ADDIU, d, s, imm)
+       emith_add_imm(1, d, s, imm)
 
 #define emith_add_r_r_imm(d, s, imm) \
-       emith_add_r_r_ptr_imm(d, s, imm)
+       emith_add_imm(0, d, s, imm)
 
 #define emith_addf_r_r_imm(d, s, imm) do { \
        emith_add_r_r_imm(FNZ, s, imm); \
@@ -1043,22 +1107,24 @@ static void emith_lohi_nops(void)
 
 // load/store. offs has 16 bits signed, which is currently sufficient
 #define emith_read_r_r_offs_ptr(r, rs, offs) \
-       EMIT(MIPS_LW(r, rs, offs))
+       EMIT(MIPS_OP_IMM(OP_LP, r, rs, offs))
 #define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \
        emith_read_r_r_offs_ptr(r, rs, offs)
 
 #define emith_read_r_r_offs(r, rs, offs) \
-       emith_read_r_r_offs_ptr(r, rs, offs)
+       EMIT(MIPS_LW(r, rs, offs))
 #define emith_read_r_r_offs_c(cond, r, rs, offs) \
        emith_read_r_r_offs(r, rs, offs)
  
 #define emith_read_r_r_r_ptr(r, rs, rm) do { \
        emith_add_r_r_r(AT, rs, rm); \
-       EMIT(MIPS_LW(r, AT, 0)); \
+       EMIT(MIPS_OP_IMM(OP_LP, r, AT, 0)); \
 } while (0)
 
-#define emith_read_r_r_r(r, rs, rm) \
-       emith_read_r_r_r_ptr(r, rs, rm)
+#define emith_read_r_r_r(r, rs, rm) do { \
+       emith_add_r_r_r(AT, rs, rm); \
+       EMIT(MIPS_LW(r, AT, 0)); \
+} while (0)
 #define emith_read_r_r_r_c(cond, r, rs, rm) \
        emith_read_r_r_r(r, rs, rm)
 
@@ -1112,24 +1178,26 @@ static void emith_lohi_nops(void)
 
 
 #define emith_write_r_r_offs_ptr(r, rs, offs) \
-       EMIT(MIPS_SW(r, rs, offs))
+       EMIT(MIPS_OP_IMM(OP_SP, r, rs, offs))
 #define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \
        emith_write_r_r_offs_ptr(r, rs, offs)
 
 #define emith_write_r_r_r_ptr(r, rs, rm) do { \
        emith_add_r_r_r(AT, rs, rm); \
-       EMIT(MIPS_SW(r, AT, 0)); \
+       EMIT(MIPS_OP_IMM(OP_SP, r, AT, 0)); \
 } while (0)
 #define emith_write_r_r_r_ptr_c(cond, r, rs, rm) \
        emith_write_r_r_r_ptr(r, rs, rm)
 
 #define emith_write_r_r_offs(r, rs, offs) \
-       emith_write_r_r_offs_ptr(r, rs, offs)
+       EMIT(MIPS_SW(r, rs, offs))
 #define emith_write_r_r_offs_c(cond, r, rs, offs) \
        emith_write_r_r_offs(r, rs, offs)
 
-#define emith_write_r_r_r(r, rs, rm) \
-       emith_write_r_r_r_ptr(r, rs, rm)
+#define emith_write_r_r_r(r, rs, rm) do { \
+       emith_add_r_r_r(AT, rs, rm); \
+       EMIT(MIPS_SW(r, AT, 0)); \
+} while (0)
 #define emith_write_r_r_r_c(cond, r, rs, rm) \
        emith_write_r_r_r(r, rs, rm)
 
@@ -1164,7 +1232,7 @@ static void emith_lohi_nops(void)
        int _c; u32 _m = mask & 0x300fffc; /* r2-r15,r24-r25 */ \
        if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align */ \
        int _s = count_bits(_m) * 4, _o = _s; \
-       if (_s) emith_sub_r_imm(SP, _s); \
+       if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \
        for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \
                if (_m & (1 << _c)) \
                        { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \
@@ -1177,7 +1245,7 @@ static void emith_lohi_nops(void)
        for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \
                if (_m & (1 << _c)) \
                        { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \
-       if (_s) emith_add_r_imm(SP, _s); \
+       if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \
 } while (0)
 
 #define host_arg2reg(rd, arg) \
@@ -1343,8 +1411,8 @@ static int emith_cond_check(int cond, int *r)
        emith_jump_cond(cond, target)
 
 #define emith_jump_cond_inrange(target) \
-       ((u8 *)target - (u8 *)tcache_ptr - 4 <  0x00020000U || \
-        (u8 *)target - (u8 *)tcache_ptr - 4 >= 0xfffe0010U) // mind cond_check
+       ((u8 *)target - (u8 *)tcache_ptr - 4 <   0x20000 && \
+        (u8 *)target - (u8 *)tcache_ptr - 4 >= -0x20000+0x10) //mind cond_check
 
 // NB: returns position of patch for cache maintenance
 #define emith_jump_patch(ptr, target, pos) do { \
@@ -1359,8 +1427,8 @@ static int emith_cond_check(int cond, int *r)
 } while (0)
 
 #define emith_jump_patch_inrange(ptr, target) \
-       ((u8 *)target - (u8 *)ptr - 4 <  0x00020000U || \
-        (u8 *)target - (u8 *)ptr - 4 >= 0xfffe0010U) // mind cond_check
+       ((u8 *)target - (u8 *)ptr - 4 <   0x20000 && \
+        (u8 *)target - (u8 *)ptr - 4 >= -0x20000+0x10) // mind cond_check
 #define emith_jump_patch_size() 4
 
 #define emith_jump_at(ptr, target) do { \
@@ -1410,7 +1478,7 @@ static int emith_cond_check(int cond, int *r)
 
 // NB: ABI SP alignment is 8 for compatibility with MIPS IV
 #define emith_push_ret(r) do { \
-       emith_sub_r_imm(SP, 8+16); /* reserve new arg save area (16) */ \
+       emith_add_r_r_ptr_imm(SP, SP, -8-16); /* ABI: 16 byte arg save area */ \
        emith_write_r_r_offs(LR, SP, 4+16); \
        if ((r) > 0) emith_write_r_r_offs(r, SP, 0+16); \
 } while (0)
@@ -1418,7 +1486,7 @@ static int emith_cond_check(int cond, int *r)
 #define emith_pop_and_ret(r) do { \
        if ((r) > 0) emith_read_r_r_offs(r, SP, 0+16); \
        emith_read_r_r_offs(LR, SP, 4+16); \
-       emith_add_r_imm(SP, 8+16); \
+       emith_add_r_r_ptr_imm(SP, SP, 8+16); \
        emith_ret(); \
 } while (0)
 
@@ -1436,7 +1504,7 @@ static int emith_cond_check(int cond, int *r)
        int _c; u32 _m = 0xd0ff0000; \
        if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align for SP is 8 */ \
        int _s = count_bits(_m) * 4 + 16, _o = _s; /* 16 byte arg save area */ \
-       if (_s) emith_sub_r_imm(SP, _s); \
+       if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \
        for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \
                if (_m & (1 << _c)) \
                        { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \
@@ -1448,23 +1516,23 @@ static int emith_cond_check(int cond, int *r)
        for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \
                if (_m & (1 << _c)) \
                        { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \
-       if (_s) emith_add_r_imm(SP, _s); \
+       if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \
        emith_ret(); \
 } while (0)
 
 // NB: assumes a is in arg0, tab, func and mask are temp
 #define emith_sh2_rcall(a, tab, func, mask) do { \
        emith_lsr(mask, a, SH2_READ_SHIFT); \
-       emith_add_r_r_r_lsl_ptr(tab, tab, mask, 3); \
+       emith_add_r_r_r_lsl_ptr(tab, tab, mask, PTR_SCALE+1); \
        emith_read_r_r_offs_ptr(func, tab, 0); \
-       emith_read_r_r_offs(mask, tab, 4); \
-       emith_addf_r_r_r/*_ptr*/(func, func, func); \
+       emith_read_r_r_offs(mask, tab, (1 << PTR_SCALE)); \
+       emith_addf_r_r_r_ptr(func, func, func); \
 } while (0)
 
 // NB: assumes a, val are in arg0 and arg1, tab and func are temp
 #define emith_sh2_wcall(a, val, tab, func) do { \
        emith_lsr(func, a, SH2_WRITE_SHIFT); \
-       emith_lsl(func, func, 2); \
+       emith_lsl(func, func, PTR_SCALE); \
        emith_read_r_r_r_ptr(func, tab, func); \
        emith_move_r_r_ptr(6, CONTEXT_REG); /* arg2 */ \
        emith_jump_reg(func); \
index e728449..9ed8b56 100644 (file)
@@ -448,11 +448,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI,        // x86-64,i386 common
 
 // fake conditionals (using SJMP instead)
 #define emith_move_r_imm_c(cond, r, imm) \
-       emith_move_r_imm(r, imm);
+       emith_move_r_imm(r, imm)
 #define emith_add_r_imm_c(cond, r, imm) \
-       emith_add_r_imm(r, imm);
+       emith_add_r_imm(r, imm)
 #define emith_sub_r_imm_c(cond, r, imm) \
-       emith_sub_r_imm(r, imm);
+       emith_sub_r_imm(r, imm)
 #define emith_or_r_imm_c(cond, r, imm) \
        emith_or_r_imm(r, imm)
 #define emith_eor_r_imm_c(cond, r, imm) \
@@ -468,11 +468,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI,        // x86-64,i386 common
 #define emith_ror_c(cond, d, s, cnt) \
        emith_ror(d, s, cnt)
 #define emith_and_r_r_c(cond, d, s) \
-       emith_and_r_r(d, s);
+       emith_and_r_r(d, s)
 #define emith_add_r_r_imm_c(cond, d, s, imm) \
-       emith_add_r_r_imm(d, s, imm);
+       emith_add_r_r_imm(d, s, imm)
 #define emith_sub_r_r_imm_c(cond, d, s, imm) \
-       emith_sub_r_r_imm(d, s, imm);
+       emith_sub_r_r_imm(d, s, imm)
 
 #define emith_read8_r_r_r_c(cond, r, rs, rm) \
        emith_read8_r_r_r(r, rs, rm)
index 2320c50..d1cde69 100644 (file)
 #define LOOP_OPTIMIZER          1
 #define T_OPTIMIZER             1
 
-// limits (per block)
-#define MAX_BLOCK_SIZE          (BLOCK_INSN_LIMIT * 6 * 6)
-
-// max literal offset from the block end
 #define MAX_LITERAL_OFFSET      0x200  // max. MOVA, MOV @(PC) offset
-#define MAX_LITERALS            (BLOCK_INSN_LIMIT / 4)
-#define MAX_LOCAL_BRANCHES      (BLOCK_INSN_LIMIT / 4)
+#define MAX_LOCAL_TARGETS       (BLOCK_INSN_LIMIT / 4)
+#define MAX_LOCAL_BRANCHES      (BLOCK_INSN_LIMIT / 2)
 
 // debug stuff
 // 01 - warnings/errors
@@ -294,7 +290,7 @@ struct block_link {
   u32 target_pc;
   void *jump;                // insn address
   void *blx;                 // block link/exit  area if any
-  u8 jdisp[8];               // jump backup buffer
+  u8 jdisp[12];              // jump backup buffer
   struct block_link *next;   // either in block_entry->links or unresolved
   struct block_link *o_next; //     ...in block_entry->o_links
   struct block_link *prev;
@@ -443,6 +439,8 @@ static void rcache_free_tmp(int hr);
 #include "../drc/emit_arm64.c"
 #elif defined(__mips__)
 #include "../drc/emit_mips.c"
+#elif defined(__riscv__) || defined(__riscv)
+#include "../drc/emit_riscv.c"
 #elif defined(__i386__)
 #include "../drc/emit_x86.c"
 #elif defined(__x86_64__)
@@ -1207,45 +1205,10 @@ static void dr_flush_tcache(int tcid)
 
 static void *dr_failure(void)
 {
-  lprintf("recompilation failed\n");
+  printf("recompilation failed\n");
   exit(1);
 }
 
-#define ADD_TO_ARRAY(array, count, item, failcode) { \
-  if (count >= ARRAY_SIZE(array)) { \
-    dbg(1, "warning: " #array " overflow"); \
-    failcode; \
-  } else \
-    array[count++] = item; \
-}
-
-static inline int find_in_array(u32 *array, size_t size, u32 what)
-{
-  size_t i;
-  for (i = 0; i < size; i++)
-    if (what == array[i])
-      return i;
-
-  return -1;
-}
-
-static int find_in_sorted_array(u32 *array, size_t size, u32 what)
-{
-  // binary search in sorted array
-  int left = 0, right = size-1;
-  while (left <= right)
-  {
-    int middle = (left + right) / 2;
-    if (array[middle] == what)
-      return middle;
-    else if (array[middle] < what)
-      left = middle + 1;
-    else
-      right = middle - 1;
-  }
-  return -1;
-}
-
 // ---------------------------------------------------------------
 
 // NB rcache allocation dependencies:
@@ -2868,6 +2831,88 @@ static void emit_do_static_regs(int is_write, int tmpr)
   }
 }
 
+// block local link stuff
+struct linkage {
+  u32 pc;
+  void *ptr;
+  struct block_link *bl;
+  u32 mask;
+};
+
+static inline int find_in_linkage(const struct linkage *array, int size, u32 pc)
+{
+  size_t i;
+  for (i = 0; i < size; i++)
+    if (pc == array[i].pc)
+      return i;
+
+  return -1;
+}
+
+static int find_in_sorted_linkage(const struct linkage *array, int size, u32 pc)
+{
+  // binary search in sorted array
+  int left = 0, right = size-1;
+  while (left <= right)
+  {
+    int middle = (left + right) / 2;
+    if (array[middle].pc == pc)
+      return middle;
+    else if (array[middle].pc < pc)
+      left = middle + 1;
+    else
+      right = middle - 1;
+  }
+  return -1;
+}
+
+static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tcache_id,
+                                const struct linkage *targets, int target_count,
+                                const struct linkage *links, int link_count)
+{
+  struct block_link *bl;
+  int u, v, tmp;
+
+  for (u = 0; u < link_count; u++) {
+    emith_pool_check();
+    // look up local branch targets
+    v = find_in_sorted_linkage(targets, target_count, links[u].pc);
+    if (v >= 0) {
+      if (! targets[v].ptr) {
+        // forward branch not yet resolved, prepare external linking
+        emith_jump_patch(links[u].ptr, tcache_ptr, NULL);
+        bl = dr_prepare_ext_branch(block->entryp, links[u].pc, sh2->is_slave, tcache_id);
+        if (bl) {
+          emith_flush(); // flush to inhibit insn swapping
+          bl->type = BL_LDJMP;
+        }
+
+        tmp = rcache_get_tmp_arg(0);
+        emith_move_r_imm(tmp, links[u].pc);
+        rcache_free_tmp(tmp);
+        emith_jump_patchable(sh2_drc_dispatcher);
+      } else if (emith_jump_patch_inrange(links[u].ptr, targets[v].ptr)) {
+        // inrange local branch
+        emith_jump_patch(links[u].ptr, targets[v].ptr, NULL);
+      } else {
+        // far local branch
+        emith_jump_patch(links[u].ptr, tcache_ptr, NULL);
+        emith_jump(targets[v].ptr);
+      }
+    } else {
+      // external or exit, emit blx area entry
+      void *target = (links[u].pc & 1 ? sh2_drc_exit : sh2_drc_dispatcher);
+      if (links[u].bl)
+        links[u].bl->blx = tcache_ptr;
+      emith_jump_patch(links[u].ptr, tcache_ptr, NULL);
+      tmp = rcache_get_tmp_arg(0);
+      emith_move_r_imm(tmp, links[u].pc & ~1);
+      rcache_free_tmp(tmp);
+      emith_jump(target);
+    }
+  }
+}
+
 #define DELAY_SAVE_T(sr) { \
   int t_ = rcache_get_tmp(); \
   emith_bic_r_imm(sr, T_save); \
@@ -2887,17 +2932,10 @@ static void *dr_get_pc_base(u32 pc, SH2 *sh2);
 static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
 {
   // branch targets in current block
-  u32 branch_target_pc[MAX_LOCAL_BRANCHES];
-  void *branch_target_ptr[MAX_LOCAL_BRANCHES];
+  struct linkage branch_targets[MAX_LOCAL_TARGETS];
   int branch_target_count = 0;
-  // unresolved local forward branches, for fixup at block end
-  u32 branch_patch_pc[MAX_LOCAL_BRANCHES];
-  void *branch_patch_ptr[MAX_LOCAL_BRANCHES];
-  int branch_patch_count = 0;
-  // external branch targets with a block link/exit area
-  u32 blx_target_pc[MAX_LOCAL_BRANCHES];
-  void *blx_target_ptr[MAX_LOCAL_BRANCHES];
-  struct block_link *blx_target_bl[MAX_LOCAL_BRANCHES];
+  // unresolved local or external targets with block link/exit area if needed
+  struct linkage blx_targets[MAX_LOCAL_BRANCHES];
   int blx_target_count = 0;
 
   u8 op_flags[BLOCK_INSN_LIMIT];
@@ -2906,6 +2944,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
     int delay_reg:8;
     u32 loop_type:8;
     u32 polling:8;
+    u32 pinning:1;
     u32 test_irq:1;
     u32 pending_branch_direct:1;
     u32 pending_branch_indirect:1;
@@ -2914,23 +2953,20 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
 #if LOOP_OPTIMIZER
   // loops with pinned registers for optimzation
   // pinned regs are like statics and don't need saving/restoring inside a loop
-  u32 pinned_loop_pc[MAX_LOCAL_BRANCHES/16];
-  void *pinned_loop_ptr[MAX_LOCAL_BRANCHES/16];
-  u32 pinned_loop_mask[MAX_LOCAL_BRANCHES/16];
+  struct linkage pinned_loops[MAX_LOCAL_TARGETS/16];
   int pinned_loop_count = 0;
 #endif
 
   // PC of current, first, last SH2 insn
   u32 pc, base_pc, end_pc;
   u32 base_literals, end_literals;
-  void *block_entry_ptr;
+  u8 *block_entry_ptr;
   struct block_desc *block;
   struct block_entry *entry;
   struct block_link *bl;
   u16 *dr_pc_base;
   struct op_data *opd;
   int blkid_main = 0;
-  int skip_op = 0;
   int tmp, tmp2;
   int cycles;
   int i, v;
@@ -2971,8 +3007,15 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
   for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) {
     if (op_flags[i] & OF_DELAY_OP)
       op_flags[i] &= ~OF_BTARGET;
-    if (op_flags[i] & OF_BTARGET)
-      ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, );
+    if (op_flags[i] & OF_BTARGET) {
+      if (branch_target_count < ARRAY_SIZE(branch_targets))
+        branch_targets[branch_target_count++] = (struct linkage) { .pc = pc };
+      else {
+        printf("warning: linkage overflow\n");
+        end_pc = pc;
+        break;
+      }
+    }
     if (ops[i].op == OP_LDC && (ops[i].dest & BITMASK1(SHR_SR)) && pc+2 < end_pc)
       op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR.IMASK change
     // unify T and SR since rcache doesn't know about "virtual" guest regs
@@ -3040,9 +3083,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
         if (op_flags[v] & OF_BASIC_LOOP) {
           m3 &= ~rcache_regs_static & ~BITMASK5(SHR_PC, SHR_PR, SHR_SR, SHR_T, SHR_MEM);
           if (m3 && count_bits(m3) < count_bits(rcache_vregs_reg) &&
-              pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)-1) {
-            pinned_loop_mask[pinned_loop_count] = m3;
-            pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v;
+              pinned_loop_count < ARRAY_SIZE(pinned_loops)-1) {
+            pinned_loops[pinned_loop_count++] =
+                (struct linkage) { .mask = m3, .pc = base_pc + 2*v };
           } else
             op_flags[v] &= ~OF_BASIC_LOOP;
         }
@@ -3052,10 +3095,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
 #endif
   }
 
-  if (branch_target_count > 0) {
-    memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count);
-  }
-
   tcache_ptr = dr_prepare_cache(tcache_id, (end_pc - base_pc) / 2);
 #if (DRC_DEBUG & 4)
   tcache_dsm_ptrs[tcache_id] = tcache_ptr;
@@ -3076,7 +3115,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
   emith_invalidate_t();
   drcf = (struct drcf) { 0 };
 #if LOOP_OPTIMIZER
-  pinned_loop_pc[pinned_loop_count] = -1;
+  pinned_loops[pinned_loop_count].pc = -1;
   pinned_loop_count = 0;
 #endif
 
@@ -3090,24 +3129,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
     int tmp3, tmp4;
     int sr;
 
-    opd = &ops[i];
-    op = FETCH_OP(pc);
-
-#if (DRC_DEBUG & 2)
-    insns_compiled++;
-#endif
-#if (DRC_DEBUG & 4)
-    DasmSH2(sh2dasm_buff, pc, op);
-    if (op_flags[i] & OF_BTARGET) {
-      if ((op_flags[i] & OF_LOOP) == OF_DELAY_LOOP)     tmp3 = '+';
-      else if ((op_flags[i] & OF_LOOP) == OF_POLL_LOOP) tmp3 = '=';
-      else if ((op_flags[i] & OF_LOOP) == OF_IDLE_LOOP) tmp3 = '~';
-      else                                              tmp3 = '*';
-    } else if (drcf.loop_type)                          tmp3 = '.';
-    else                                                tmp3 = ' ';
-    printf("%c%08x %04x %s\n", tmp3, pc, op, sh2dasm_buff);
-#endif
-
     if (op_flags[i] & OF_BTARGET)
     {
       if (pc != base_pc)
@@ -3143,9 +3164,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
         break;
       }
 
-      v = find_in_sorted_array(branch_target_pc, branch_target_count, pc);
+      v = find_in_sorted_linkage(branch_targets, branch_target_count, pc);
       if (v >= 0)
-        branch_target_ptr[v] = tcache_ptr;
+        branch_targets[v].ptr = tcache_ptr;
 #if LOOP_DETECTION
       drcf.loop_type = op_flags[i] & OF_LOOP;
       drcf.delay_reg = -1;
@@ -3176,12 +3197,13 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
 
 #if LOOP_OPTIMIZER
       if (op_flags[i] & OF_BASIC_LOOP) {
-        if (pinned_loop_pc[pinned_loop_count] == pc) {
+        if (pinned_loops[pinned_loop_count].pc == pc) {
           // pin needed regs on loop entry 
-          FOR_ALL_BITS_SET_DO(pinned_loop_mask[pinned_loop_count], v, rcache_pin_reg(v));
+          FOR_ALL_BITS_SET_DO(pinned_loops[pinned_loop_count].mask, v, rcache_pin_reg(v));
           emith_flush();
           // store current PC as loop target
-          pinned_loop_ptr[pinned_loop_count] = tcache_ptr;
+          pinned_loops[pinned_loop_count].ptr = tcache_ptr;
+          drcf.pinning = 1;
         } else
           op_flags[i] &= ~OF_BASIC_LOOP;
       }
@@ -3193,11 +3215,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
         EMITH_JMP_START(DCOND_GT);
         rcache_save_pinned();
 
-        if (blx_target_count < ARRAY_SIZE(blx_target_pc)) {
+        if (blx_target_count < ARRAY_SIZE(blx_targets)) {
           // exit via stub in blx table (saves some 1-3 insns in the main flow)
-          blx_target_ptr[blx_target_count] = tcache_ptr;
-          blx_target_pc[blx_target_count] = pc|1;
-          blx_target_bl[blx_target_count++] = NULL;
+          blx_targets[blx_target_count++] =
+              (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL };
           emith_jump_patchable(tcache_ptr);
         } else {
           // blx table full, must inline exit code
@@ -3210,12 +3231,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
       } else
 #endif
       {
-        if (blx_target_count < ARRAY_SIZE(blx_target_pc)) {
+        if (blx_target_count < ARRAY_SIZE(blx_targets)) {
           // exit via stub in blx table (saves some 1-3 insns in the main flow)
-          blx_target_pc[blx_target_count] = pc|1;
-          blx_target_bl[blx_target_count] = NULL;
           emith_cmp_r_imm(sr, 0);
-          blx_target_ptr[blx_target_count++] = tcache_ptr;
+          blx_targets[blx_target_count++] =
+              (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL };
           emith_jump_cond_patchable(DCOND_LE, tcache_ptr);
         } else {
           // blx table full, must inline exit code
@@ -3282,13 +3302,40 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
     }
 #endif
 
+    // emit blx area if limits are approached
+    if (blx_target_count && (blx_target_count > ARRAY_SIZE(blx_targets)-4 || 
+        !emith_jump_patch_inrange(blx_targets[0].ptr, tcache_ptr+0x100))) {
+      u8 *jp;
+      rcache_invalidate_tmp();
+      jp = tcache_ptr;
+      emith_jump_patchable(tcache_ptr);
+      emit_branch_linkage_code(sh2, block, tcache_id, branch_targets,
+                          branch_target_count, blx_targets, blx_target_count);
+      blx_target_count = 0;
+      do_host_disasm(tcache_id);
+      emith_jump_patch(jp, tcache_ptr, NULL);
+    }
+
     emith_pool_check();
-    pc += 2;
 
-    if (skip_op > 0) {
-      skip_op--;
-      continue;
-    }
+    opd = &ops[i];
+    op = FETCH_OP(pc);
+#if (DRC_DEBUG & 4)
+    DasmSH2(sh2dasm_buff, pc, op);
+    if (op_flags[i] & OF_BTARGET) {
+      if ((op_flags[i] & OF_LOOP) == OF_DELAY_LOOP)     tmp3 = '+';
+      else if ((op_flags[i] & OF_LOOP) == OF_POLL_LOOP) tmp3 = '=';
+      else if ((op_flags[i] & OF_LOOP) == OF_IDLE_LOOP) tmp3 = '~';
+      else                                              tmp3 = '*';
+    } else if (drcf.loop_type)                          tmp3 = '.';
+    else                                                tmp3 = ' ';
+    printf("%c%08x %04x %s\n", tmp3, pc, op, sh2dasm_buff);
+#endif
+
+    pc += 2;
+#if (DRC_DEBUG & 2)
+    insns_compiled++;
+#endif
 
     if (op_flags[i] & OF_DELAY_OP)
     {
@@ -4422,7 +4469,7 @@ end_op:
         emit_sync_t_to_sr();
         emith_sh2_delay_loop(cycles, drcf.delay_reg);
         rcache_unlock_all(); // may lock delay_reg
-        drcf.polling = drcf.loop_type = 0;
+        drcf.polling = drcf.loop_type = drcf.pinning = 0;
       }
 #endif
 
@@ -4464,33 +4511,39 @@ end_op:
         emith_sync_t(sr);
       // no modification of host status/flags between here and branching!
 
-      v = find_in_sorted_array(branch_target_pc, branch_target_count, target_pc);
+      v = find_in_sorted_linkage(branch_targets, branch_target_count, target_pc);
       if (v >= 0)
       {
         // local branch
-        if (branch_target_ptr[v]) {
+        if (branch_targets[v].ptr) {
           // local backward jump, link here now since host PC is already known
-          target = branch_target_ptr[v];
+          target = branch_targets[v].ptr;
 #if LOOP_OPTIMIZER
-          if (pinned_loop_pc[pinned_loop_count] == target_pc) {
+          if (pinned_loops[pinned_loop_count].pc == target_pc) {
             // backward jump at end of optimized loop
             rcache_unpin_all();
-            target = pinned_loop_ptr[pinned_loop_count];
+            target = pinned_loops[pinned_loop_count].ptr;
             pinned_loop_count ++;
           }
 #endif
-          if (cond != -1)
-            emith_jump_cond(cond, target);
-          else {
+          if (cond != -1) {
+            if (emith_jump_patch_inrange(tcache_ptr, target)) {
+              emith_jump_cond(cond, target);
+            } else {
+              // not reachable directly, must use far branch
+              EMITH_JMP_START(emith_invert_cond(cond));
+              emith_jump(target);
+              EMITH_JMP_END(emith_invert_cond(cond));
+            }
+          } else {
             emith_jump(target);
             rcache_invalidate();
           }
-        } else if (branch_patch_count < MAX_LOCAL_BRANCHES) {
+        } else if (blx_target_count < MAX_LOCAL_BRANCHES) {
           // local forward jump
           target = tcache_ptr;
-          branch_patch_pc[branch_patch_count] = target_pc;
-          branch_patch_ptr[branch_patch_count] = target;
-          branch_patch_count++;
+          blx_targets[blx_target_count++] =
+              (struct linkage) { .pc = target_pc, .ptr = target, .bl = NULL };
           if (cond != -1)
             emith_jump_cond_patchable(cond, target);
           else {
@@ -4498,7 +4551,7 @@ end_op:
             rcache_invalidate();
           }
         } else
-          dbg(1, "warning: too many local branches");
+          dbg(1, "warning: too many unresolved branches");
       }
 
       if (target == NULL)
@@ -4507,13 +4560,12 @@ end_op:
         bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id);
         if (cond != -1) {
 #if 1
-          if (bl && blx_target_count < ARRAY_SIZE(blx_target_pc)) {
+          if (bl && blx_target_count < ARRAY_SIZE(blx_targets)) {
             // conditional jumps get a blx stub for the far jump
-            blx_target_pc[blx_target_count] = target_pc;
-            blx_target_bl[blx_target_count] = bl;
-            blx_target_ptr[blx_target_count++] = tcache_ptr;
             bl->type = BL_JCCBLX;
             target = tcache_ptr;
+            blx_targets[blx_target_count++] =
+                (struct linkage) { .pc = target_pc, .ptr = target, .bl = bl };
             emith_jump_cond_patchable(cond, target);
           } else {
             // not linkable, or blx table full; inline jump @dispatcher
@@ -4660,44 +4712,15 @@ end_op:
   } else
     rcache_flush();
 
-  // emit blx area
-  for (i = 0; i < blx_target_count; i++) {
-    void *target = (blx_target_pc[i] & 1 ? sh2_drc_exit : sh2_drc_dispatcher);
-
-    emith_pool_check();
-    bl = blx_target_bl[i];
-    if (bl)
-      bl->blx = tcache_ptr;
-    emith_jump_patch(blx_target_ptr[i], tcache_ptr, NULL);
-    tmp = rcache_get_tmp_arg(0);
-    emith_move_r_imm(tmp, blx_target_pc[i] & ~1);
-    emith_jump(target);
-    rcache_invalidate();
-  }
+  // link unresolved branches, emitting blx area entries as needed
+  emit_branch_linkage_code(sh2, block, tcache_id, branch_targets,
+                      branch_target_count, blx_targets, blx_target_count);
 
   emith_flush();
   do_host_disasm(tcache_id);
 
   emith_pool_commit(0);
 
-  // link local branches
-  for (i = 0; i < branch_patch_count; i++) {
-    void *target;
-    int t;
-    t = find_in_sorted_array(branch_target_pc, branch_target_count, branch_patch_pc[i]);
-    target = branch_target_ptr[t];
-    if (target == NULL) {
-      // flush pc and go back to dispatcher (this should no longer happen)
-      dbg(1, "stray branch to %08x %p", branch_patch_pc[i], tcache_ptr);
-      target = tcache_ptr;
-      tmp = rcache_get_tmp_arg(0);
-      emith_move_r_imm(tmp, branch_patch_pc[i]);
-      emith_jump(sh2_drc_dispatcher);
-      rcache_flush();
-    }
-    emith_jump_patch(branch_patch_ptr[i], target, NULL);
-  }
-
   // fill blx backup; do this last to backup final patched code
   for (i = 0; i < block->entry_count; i++)
     for (bl = block->entryp[i].o_links; bl; bl = bl->o_next)
@@ -4927,7 +4950,7 @@ static void sh2_generate_utils(void)
   // pc = sh2_drc_dispatcher_call(u32 pc)
   sh2_drc_dispatcher_call = (void *)tcache_ptr;
   emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx));
-  emith_add_r_imm(arg2, 2*sizeof(void *));
+  emith_add_r_imm(arg2, (u32)(2*sizeof(void *)));
   emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
   emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx));
   emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0);
@@ -4957,7 +4980,7 @@ static void sh2_generate_utils(void)
   emith_jump_cond(DCOND_NE, sh2_drc_dispatcher);
 #endif
   emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *));
-  emith_sub_r_imm(arg2, 2*sizeof(void *));
+  emith_sub_r_imm(arg2, (u32)(2*sizeof(void *)));
   emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
   emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx));
 #if (DRC_DEBUG & 128)
index 415f01b..44620f4 100644 (file)
@@ -41,6 +41,8 @@ unsigned short scan_block(unsigned int base_pc, int is_slave,
 #define        DRC_SR_REG      "r28"
 #elif defined(__mips__)
 #define        DRC_SR_REG      "s6"
+#elif defined(__riscv__) || defined(__riscv)
+#define        DRC_SR_REG      "s11"
 #elif defined(__i386__)
 #define        DRC_SR_REG      "edi"
 #elif defined(__x86_64__)
index 7e2e039..896b5aa 100644 (file)
@@ -506,7 +506,7 @@ void sync_sh2s_normal(unsigned int m68k_target)
         if (CYCLES_GT(now, ssh2.m68krcycles_done))
           now = ssh2.m68krcycles_done;
       }
-      if (now - timer_cycles >= STEP_N) {
+      if (CYCLES_GT(now, timer_cycles+STEP_N)) {
         p32x_timers_do(now - timer_cycles);
         timer_cycles = now;
       }
index 41c0f7a..f9888f2 100644 (file)
@@ -127,7 +127,7 @@ static const struct insn special_insns[] = {
        {0x38, S_IMM_DT, "dsll"},
        {0x3A, S_IMM_DT, "dsrl"},
        {0x3B, S_IMM_DT, "dsra"},
-       {0x3D, S_IMM_DT, "dsll32"},
+       {0x3C, S_IMM_DT, "dsll32"},
        {0x3E, S_IMM_DT, "dsrl32"},
        {0x3F, S_IMM_DT, "dsra32"},
 };
index 887d783..9366526 100644 (file)
@@ -29,7 +29,7 @@ void pemu_prep_defconfig(void)
 \r
 void pemu_validate_config(void)\r
 {\r
-#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__i386__) && !defined(__x86_64__)\r
+#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__riscv__) &&  !defined(__riscv) && !defined(__i386__) && !defined(__x86_64__)\r
        PicoIn.opt &= ~POPT_EN_DRC;\r
 #endif\r
 }\r
index 8a0557c..349b860 100755 (executable)
@@ -16,7 +16,7 @@ compile_rodata ()
        # $CC $CFLAGS -I .. -nostdlib -Wl,-edummy /tmp/getoffs.c \
        #                                       -o /tmp/getoffs.o || exit 1
        # find the name of the .rodata section (in case -fdata-sections is used)
-       rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' |
+       rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata\|\.sdata' |
                                                sed 's/^[^.]*././;s/ .*//')
        # read out .rodata section as hex string (should be only 4 or 8 bytes)
        ro=$(readelf -x $rosect /tmp/getoffs.o | grep '0x' | cut -c14-48 |