From: kub Date: Wed, 13 Nov 2019 20:05:35 +0000 (+0100) Subject: sh2 drc: RISC-V (RV64IM) code emitter, some work on MIPS64 X-Git-Tag: v2.00~815 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e7ee50107538e504f6ece2ccffdc63b98fbd2ba7;p=picodrive.git sh2 drc: RISC-V (RV64IM) code emitter, some work on MIPS64 --- diff --git a/Makefile b/Makefile index a79c054b..0a0ab127 100644 --- a/Makefile +++ b/Makefile @@ -68,6 +68,10 @@ else ifneq (,$(findstring aarch64,$(ARCH))) use_fame ?= 1 use_cz80 ?= 1 use_sh2drc ?= 1 +else ifneq (,$(findstring riscv,$(ARCH))) +use_fame ?= 1 +use_cz80 ?= 1 +use_sh2drc ?= 1 endif -include Makefile.local @@ -266,7 +270,7 @@ pico/carthw_cfg.c: pico/carthw.cfg # random deps pico/carthw/svp/compiler.o : cpu/drc/emit_arm.c cpu/sh2/compiler.o : cpu/drc/emit_arm.c cpu/drc/emit_arm64.c -cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/drc/emit_mips.c +cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/drc/emit_mips.c cpu/drc/emit_riscv.c cpu/sh2/mame/sh2pico.o : cpu/sh2/mame/sh2.c pico/pico.o pico/cd/mcd.o pico/32x/32x.o : pico/pico_cmn.c pico/pico_int.h pico/memory.o pico/cd/memory.o pico/32x/memory.o : pico/pico_int.h pico/memory.h diff --git a/config.aarch64 b/config.aarch64 deleted file mode 100644 index 9631d64e..00000000 --- a/config.aarch64 +++ /dev/null @@ -1,14 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=generic' -CC = aarch64-linux-gnu-gcc -CXX = aarch64-linux-gnu-g++ -AS = aarch64-linux-gnu-as -STRIP = aarch64-linux-gnu-strip -CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -ASFLAGS += -LDFLAGS += # --sysroot ${HOME}/opt/aarch64/debian-arm64 -LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl - -ARCH = aarch64 -PLATFORM = generic -SOUND_DRIVERS = oss alsa sdl diff --git a/config.caanoo b/config.caanoo deleted file mode 100644 index 1ffc54da..00000000 --- a/config.caanoo +++ /dev/null @@ -1,16 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gp2x' -CC = arm-gph-linux-gnueabi-gcc -CXX = arm-gph-linux-gnueabi-g++ -AS = arm-gph-linux-gnueabi-as -STRIP = arm-gph-linux-gnueabi-strip -CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include -D__GP2X__ -Wno-unused-result -CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common -CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-pure-const -ASFLAGS += -mfloat-abi=soft -mcpu=arm920t -LDFLAGS += --sysroot ${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/src/gp2x/armroot-eabi/lib -static -LDLIBS += -lpng -lm -ldl - -ARCH = arm -PLATFORM = gp2x -SOUND_DRIVERS = oss diff --git a/config.caanoo47 b/config.caanoo47 deleted file mode 100644 index 5bcf8608..00000000 --- a/config.caanoo47 +++ /dev/null @@ -1,16 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gp2x' -CC = arm-linux-gnueabi-gcc -CXX = arm-linux-gnueabi-g++ -AS = arm-linux-gnueabi-as -STRIP = arm-linux-gnueabi-strip -CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include -D__GP2X__ -Wno-unused-result -CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common -CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-sra -fno-ipa-pure-const -ASFLAGS += -mfloat-abi=soft -mcpu=arm920t -LDFLAGS += -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/src/gp2x/armroot-eabi/lib -static -LDLIBS += -lpng -lm -ldl - -ARCH = arm -PLATFORM = gp2x -SOUND_DRIVERS = oss diff --git a/config.dingux b/config.dingux deleted file mode 100644 index b981bd3f..00000000 --- a/config.dingux +++ /dev/null @@ -1,15 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=opendingux' -CC = mipsel-linux-gcc -CXX = mipsel-linux-g++ -AS = mipsel-linux-as -STRIP = mipsel-linux-strip -CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/ -I${HOME}/opt/opendingux-toolchain/usr/include/SDL -CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector -ASFLAGS += -LDFLAGS += --sysroot ${HOME}/opt/opendingux-toolchain -L${HOME}/opt/opendingux-toolchain/lib -LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl - -ARCH = mipsel -PLATFORM = opendingux -SOUND_DRIVERS = sdl diff --git a/config.dingux54 b/config.dingux54 deleted file mode 100644 index a232d952..00000000 --- a/config.dingux54 +++ /dev/null @@ -1,15 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=opendingux' -CC = mipsel-linux-gnu-gcc -CXX = mipsel-linux-gnu-g++ -AS = mipsel-linux-gnu-as -STRIP = mipsel-linux-gnu-strip -CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/ -I${HOME}/opt/opendingux-toolchain/usr/include/SDL -CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector -ASFLAGS += -LDFLAGS += -B${HOME}/opt/opendingux-toolchain/usr/lib -B${HOME}/opt/opendingux-toolchain/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/lib -LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl - -ARCH = mipsel -PLATFORM = opendingux -SOUND_DRIVERS = sdl diff --git a/config.gcw0 b/config.gcw0 deleted file mode 100644 index cebe79a1..00000000 --- a/config.gcw0 +++ /dev/null @@ -1,15 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gcw0' -CC = mipsel-gcw0-linux-uclibc-gcc -CXX = mipsel-gcw0-linux-uclibc-g++ -AS = mipsel-gcw0-linux-uclibc-as -STRIP = mipsel-gcw0-linux-uclibc-strip -CFLAGS += -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/ -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL -CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector -ASFLAGS += -LDFLAGS += --sysroot ${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot -LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl - -ARCH = mipsel -PLATFORM = opendingux -SOUND_DRIVERS = sdl diff --git a/config.gp2x b/config.gp2x deleted file mode 100644 index cf99bd77..00000000 --- a/config.gp2x +++ /dev/null @@ -1,16 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gp2x' -CC = arm-open2x-linux-gcc -CXX = arm-open2x-linux-g++ -AS = arm-open2x-linux-as -STRIP = arm-open2x-linux-strip -CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -D__GP2X__ -CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common -CFLAGS += -finline-limit=42 -fipa-cp -fno-ipa-pure-const -ASFLAGS += -mcpu=arm920t -mfloat-abi=soft -LDFLAGS += --sysroot ${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static -LDLIBS += -lpng -lm -ldl - -ARCH = arm -PLATFORM = gp2x -SOUND_DRIVERS = oss diff --git a/config.gp2x47 b/config.gp2x47 deleted file mode 100644 index 8a86e850..00000000 --- a/config.gp2x47 +++ /dev/null @@ -1,16 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gp2x' -CC = arm-linux-gnueabi-gcc -CXX = arm-linux-gnueabi-g++ -AS = arm-linux-gnueabi-as -STRIP = arm-linux-gnueabi-strip -CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -D__GP2X__ -Wno-unused-result -CFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common -CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-sra -fno-ipa-pure-const -ASFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/usr/lib -L${HOME}/src/gp2x/armroot/lib -static -LDLIBS += -lpng -lm -ldl - -ARCH = arm -PLATFORM = gp2x -SOUND_DRIVERS = oss diff --git a/config.i386 b/config.i386 deleted file mode 100644 index 9c8c2e65..00000000 --- a/config.i386 +++ /dev/null @@ -1,14 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=generic' -CC = gcc -CXX = g++ -AS = as -STRIP = strip -CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -m32 -ASFLAGS += -LDFLAGS += -m32 -L/usr/lib/i386-linux-gnu -L${HOME}/opt/lib32 -LDLIBS += -lSDL-1.2 -lasound -lpng -lz -lm -ldl - -ARCH = i386 -PLATFORM = generic -SOUND_DRIVERS = oss alsa sdl diff --git a/config.x86 b/config.x86 deleted file mode 100644 index 45440011..00000000 --- a/config.x86 +++ /dev/null @@ -1,14 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=generic' -CC = gcc -CXX = g++ -AS = as -STRIP = strip -CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -ASFLAGS += -LDFLAGS += -L/usr/lib/x86_64-linux-gnu -LDLIBS += -lSDL-1.2 -lasound -lpng -lz -lm -ldl - -ARCH = x86_64 -PLATFORM = generic -SOUND_DRIVERS = oss alsa sdl diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 25a2c72f..8f633fa3 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -365,7 +365,7 @@ static void emith_flush(void) #define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_LDR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,3,rn,rd,shift_imm,A_AM1_LSL,rm) -#define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm); +#define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_STR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,2,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,0,1,abs(offset_8)) @@ -470,84 +470,89 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int if (cond == A_COND_NV) return; - switch (op) { - case A_OP_MOV: - rn = 0; - // count bits in imm and use MVN if more bits 1 than 0 - if (count_bits(imm) > 16) { - imm = ~imm; - op = A_OP_MVN; - } - // count insns needed for mov/orr #imm + do { + u32 u; + // try to get the topmost byte empty to possibly save an insn for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++) v = (v << 2) | (v >> 30); -#ifdef HAVE_ARMV7 - for (i = 2; i > 0; i--, v >>= 8) - while (v > 0xff && !(v & 3)) - v >>= 2; - if (v) { // 3+ insns needed... - if (op == A_OP_MVN) - imm = ~imm; - // ...prefer movw/movt - EOP_MOVW(rd, imm); - if (imm & 0xffff0000) - EOP_MOVT(rd, imm); - return; - } -#else - for (i = 3; i > 0; i--, v >>= 8) - while (v > 0xff && !(v & 3)) - v >>= 2; - if (v) { // 4 insns needed... - if (op == A_OP_MVN) + + switch (op) { + case A_OP_MOV: + rn = 0; + // use MVN if more bits 1 than 0 + if (count_bits(imm) > 16) { imm = ~imm; - // ...emit literal load - int idx, o; - if (literal_iindex >= MAX_HOST_LITERALS) { - elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, - "pool overflow"); - exit(1); + op = A_OP_MVN; + ror2 = -1; + break; + } + // count insns needed for mov/orr #imm +#ifdef HAVE_ARMV7 + for (i = 2, u = v; i > 0; i--, u >>= 8) + while (u > 0xff && !(u & 3)) + u >>= 2; + if (u) { // 3+ insns needed... + if (op == A_OP_MVN) + imm = ~imm; + // ...prefer movw/movt + EOP_MOVW(rd, imm); + if (imm & 0xffff0000) + EOP_MOVT(rd, imm); + return; } - idx = emith_pool_literal(imm, &o); - literal_insn[literal_iindex++] = (u32 *)tcache_ptr; - EOP_LDR_IMM2(cond, rd, PC, idx * sizeof(u32)); - if (o > 0) - EOP_C_DOP_IMM(cond, A_OP_ADD, 0, rd, rd, 0, o); - else if (o < 0) - EOP_C_DOP_IMM(cond, A_OP_SUB, 0, rd, rd, 0, -o); +#else + for (i = 2, u = v; i > 0; i--, u >>= 8) + while (u > 0xff && !(u & 3)) + u >>= 2; + if (u) { // 4 insns needed... + if (op == A_OP_MVN) + imm = ~imm; + // ...emit literal load + int idx, o; + if (literal_iindex >= MAX_HOST_LITERALS) { + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, + "pool overflow"); + exit(1); + } + idx = emith_pool_literal(imm, &o); + literal_insn[literal_iindex++] = (u32 *)tcache_ptr; + EOP_LDR_IMM2(cond, rd, PC, idx * sizeof(u32)); + if (o > 0) + EOP_C_DOP_IMM(cond, A_OP_ADD, 0,rd,rd,0,o); + else if (o < 0) + EOP_C_DOP_IMM(cond, A_OP_SUB, 0,rd,rd,0,-o); return; - } + } #endif - break; + break; - case A_OP_AND: - // AND must fit into 1 insn. if not, use BIC - for (v = imm, ror2 = 0; (v >> 8) && ror2 < 32/2; ror2++) - v = (v << 2) | (v >> 30); - if (v >> 8) { - imm = ~imm; - op = A_OP_BIC; - } - break; - - case A_OP_SUB: - case A_OP_ADD: - // count bits in imm and swap ADD and SUB if more bits 1 than 0 - if (s == 0 && count_bits(imm) > 16) { - imm = -imm; - op ^= (A_OP_ADD^A_OP_SUB); + case A_OP_AND: + // AND must fit into 1 insn. if not, use BIC + for (u = v; u > 0xff && !(u & 3); u >>= 2) ; + if (u >> 8) { + imm = ~imm; + op = A_OP_BIC; + ror2 = -1; + } + break; + + case A_OP_SUB: + case A_OP_ADD: + // swap ADD and SUB if more bits 1 than 0 + if (s == 0 && count_bits(imm) > 16) { + imm = -imm; + op ^= (A_OP_ADD^A_OP_SUB); + ror2 = -1; + } + case A_OP_EOR: + case A_OP_ORR: + case A_OP_BIC: + if (s == 0 && imm == 0 && rd == rn) + return; + break; } - case A_OP_EOR: - case A_OP_ORR: - case A_OP_BIC: - if (s == 0 && imm == 0 && rd == rn) - return; - break; - } + } while (ror2 < 0); - // try to get the topmost byte empty to possibly save an insn - for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++) - v = (v << 2) | (v >> 30); do { // shift down to get 'best' rot2 while (v > 0xff && !(v & 3)) diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index dc0cf559..3f40d4cd 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -134,9 +134,9 @@ enum { XT_UXTW=0x4, XT_UXTX=0x6, XT_LSL=0x7, XT_SXTW=0xc, XT_SXTX=0xe }; #define A64_TST_REG(rn, rm, stype, simm) \ A64_ANDS_REG(Z0, rn, rm, stype, simm) #define A64_MOV_REG(rd, rm, stype, simm) \ - A64_OR_REG(rd, Z0, rm, stype, simm); + A64_OR_REG(rd, Z0, rm, stype, simm) #define A64_MVN_REG(rd, rm, stype, simm) \ - A64_ORN_REG(rd, Z0, rm, stype, simm); + A64_ORN_REG(rd, Z0, rm, stype, simm) // rd = rn OP (rm EXTEND simm) #define A64_ADD_XREG(rd, rn, rm, xtopt, simm) \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 82527474..6f07e509 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -62,14 +62,17 @@ // opcode field (encoded in op) enum { OP__FN=000, OP__RT, OP_J, OP_JAL, OP_BEQ, OP_BNE, OP_BLEZ, OP_BGTZ }; enum { OP_ADDI=010, OP_ADDIU, OP_SLTI, OP_SLTIU, OP_ANDI, OP_ORI, OP_XORI, OP_LUI }; -enum { OP_LB=040, OP_LH, OP_LWL, OP_LW, OP_LBU, OP_LHU, OP_LWR }; -enum { OP_SB=050, OP_SH, OP_SWL, OP_SW, __(54), __(55), OP_SWR }; +enum { OP_LB=040, OP_LH, OP_LWL, OP_LW, OP_LBU, OP_LHU, OP_LWR, OP_LWU }; +enum { OP_SB=050, OP_SH, OP_SWL, OP_SW, OP_SDL, OP_SDR, OP_SWR }; +enum { OP_DADDI=030, OP_DADDIU, OP_LDL, OP_LDR, OP_SD=067, OP_LD=077 }; // function field (encoded in fn if opcode = OP__FN) enum { FN_SLL=000, __(01), FN_SRL, FN_SRA, FN_SLLV, __(05), FN_SRLV, FN_SRAV }; -enum { FN_MFHI=020, FN_MTHI, FN_MFLO, FN_MTLO }; -enum { FN_MULT=030, FN_MULTU, FN_DIV, FN_DIVU }; +enum { FN_JR=010, FN_JALR, FN_MOVZ, FN_MOVN, FN_SYNC=017 }; +enum { FN_MFHI=020, FN_MTHI, FN_MFLO, FN_MTLO, FN_DSSLV, __(25), FN_DSLRV, FN_DSRAV }; +enum { FN_MULT=030, FN_MULTU, FN_DIV, FN_DIVU, FN_DMULT, FN_DMULTU, FN_DDIV, FN_DDIVU }; enum { FN_ADD=040, FN_ADDU, FN_SUB, FN_SUBU, FN_AND, FN_OR, FN_XOR, FN_NOR }; -enum { FN_JR=010, FN_JALR, FN_MOVZ, FN_MOVN, FN_SYNC=017, FN_SLT=052, FN_SLTU }; +enum { FN_SLT=052, FN_SLTU, FN_DADD, FN_DADDU, FN_DSUB, FN_DSUBU }; +enum { FN_DSLL=070, __(71), FN_DSRL, FN_DSRA, FN_DSLL32, __(75), FN_DSRL32, FN_DSRA32 }; // rt field (encoded in rt if opcode = OP__RT) enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; @@ -85,8 +88,12 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; // rd = rs OP rt #define MIPS_ADD_REG(rd, rs, rt) \ MIPS_OP_REG(FN_ADDU, rd, rs, rt) +#define MIPS_DADD_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_DADDU, rd, rs, rt) #define MIPS_SUB_REG(rd, rs, rt) \ MIPS_OP_REG(FN_SUBU, rd, rs, rt) +#define MIPS_DSUB_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_DSUBU, rd, rs, rt) #define MIPS_NEG_REG(rd, rt) \ MIPS_SUB_REG(rd, Z0, rt) @@ -122,6 +129,8 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; // rt = rs OP imm16 #define MIPS_ADD_IMM(rt, rs, imm16) \ MIPS_OP_IMM(OP_ADDIU, rt, rs, imm16) +#define MIPS_DADD_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_DADDIU, rt, rs, imm16) #define MIPS_XOR_IMM(rt, rs, imm16) \ MIPS_OP_IMM(OP_XORI, rt, rs, imm16) @@ -144,6 +153,11 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; #define MIPS_ASR_IMM(rd, rt, bits) \ MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SRA) +#define MIPS_DLSL_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_DSLL) +#define MIPS_DLSL32_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_DSLL32) + // rt = (rs < imm16) #define MIPS_SLT_IMM(rt, rs, imm16) \ MIPS_OP_IMM(OP_SLTI, rt, rs, imm16) @@ -193,23 +207,45 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; // load/store indexed base +#define MIPS_LD(rt, rs, offs16) \ + MIPS_OP_IMM(OP_LD, rt, rs, (u16)(offs16)) #define MIPS_LW(rt, rs, offs16) \ - MIPS_INSN(OP_LW, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LW, rt, rs, (u16)(offs16)) #define MIPS_LH(rt, rs, offs16) \ - MIPS_INSN(OP_LH, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LH, rt, rs, (u16)(offs16)) #define MIPS_LB(rt, rs, offs16) \ - MIPS_INSN(OP_LB, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LB, rt, rs, (u16)(offs16)) #define MIPS_LHU(rt, rs, offs16) \ - MIPS_INSN(OP_LHU, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LHU, rt, rs, (u16)(offs16)) #define MIPS_LBU(rt, rs, offs16) \ - MIPS_INSN(OP_LBU, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LBU, rt, rs, (u16)(offs16)) +#define MIPS_SD(rt, rs, offs16) \ + MIPS_OP_IMM(OP_SD, rt, rs, (u16)(offs16)) #define MIPS_SW(rt, rs, offs16) \ - MIPS_INSN(OP_SW, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_SW, rt, rs, (u16)(offs16)) #define MIPS_SH(rt, rs, offs16) \ - MIPS_INSN(OP_SH, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_SH, rt, rs, (u16)(offs16)) #define MIPS_SB(rt, rs, offs16) \ - MIPS_INSN(OP_SB, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_SB, rt, rs, (u16)(offs16)) + +// pointer operations + +#if __mips == 4 || __mips == 64 +#define OP_LP OP_LD +#define OP_SP OP_SD +#define OP_PADDIU OP_DADDIU +#define FN_PADDU FN_DADDU +#define FN_PSUBU FN_DSUBU +#define PTR_SCALE 3 +#else +#define OP_LP OP_LW +#define OP_SP OP_SW +#define OP_PADDIU OP_ADDIU +#define FN_PADDU FN_ADDU +#define FN_PSUBU FN_SUBU +#define PTR_SCALE 2 +#endif // XXX: tcache_ptr type for SVP and SH2 compilers differs.. #define EMIT_PTR(ptr, x) \ @@ -442,14 +478,14 @@ static void emith_set_arith_flags(int rd, int rs, int rt, s32 imm, int sub) if (emith_flg_hint & _FHV) { emith_flg_noV = 0; - if (rt >= 0) // Nt^Ns in FV, bit 31 + if (rt > Z0) // Nt^Ns in FV, bit 31 EMIT(MIPS_XOR_REG(FV, rs, rt)); - else if (imm == 0) + else if (rt == Z0 || imm == 0) emith_flg_noV = 1; // imm #0 can't overflow else if ((imm < 0) == !sub) EMIT(MIPS_NOR_REG(FV, rs, Z0)); else if ((imm > 0) == !sub) - EMIT(MIPS_OR_REG(FV, rs, Z0)); + EMIT(MIPS_XOR_REG(FV, rs, Z0)); } // full V = Nd^Nt^Ns^C calculation is deferred until really needed @@ -483,13 +519,17 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) EMIT(MIPS_MVN_REG(d, s)) #define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_OP_REG(FN_PADDU, d, s1, AT)); \ + } else EMIT(MIPS_OP_REG(FN_PADDU, d, s1, s2)); \ +} while (0) +#define emith_add_r_r_r_lsl(d, s1, s2, simm) do { \ if (simm) { \ EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ EMIT(MIPS_ADD_REG(d, s1, AT)); \ } else EMIT(MIPS_ADD_REG(d, s1, s2)); \ } while (0) -#define emith_add_r_r_r_lsl(d, s1, s2, simm) \ - emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) #define emith_add_r_r_r_lsr(d, s1, s2, simm) do { \ if (simm) { \ @@ -498,6 +538,16 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) } else EMIT(MIPS_ADD_REG(d, s1, s2)); \ } while (0) +#define emith_addf_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_OP_REG(FN_PADDU, FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(MIPS_OP_REG(FN_PADDU, FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) #define emith_addf_r_r_r_lsl(d, s1, s2, simm) do { \ if (simm) { \ EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ @@ -586,6 +636,8 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) #define emith_add_r_r_r(d, s1, s2) \ emith_add_r_r_r_lsl(d, s1, s2, 0) +#define emith_addf_r_r_r_ptr(d, s1, s2) \ + emith_addf_r_r_r_lsl_ptr(d, s1, s2, 0) #define emith_addf_r_r_r(d, s1, s2) \ emith_addf_r_r_r_lsl(d, s1, s2, 0) @@ -697,14 +749,26 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) // move immediate static void emith_move_imm(int r, uintptr_t imm) { - if ((s16)imm == imm) { +#if __mips == 4 || __mips == 64 + if ((s32)imm != imm) { + emith_move_imm(r, imm >> 32); + if (imm & 0xffff0000) { + EMIT(MIPS_DLSL_IMM(r, r, 16)); + EMIT(MIPS_OR_IMM(r, r, (imm >> 16) & 0xffff)); + EMIT(MIPS_DLSL_IMM(r, r, 16)); + } else EMIT(MIPS_DLSL32_IMM(r, r, 0)); + if (imm & 0x0000ffff) + EMIT(MIPS_OR_IMM(r, r, imm & 0xffff)); + } else +#endif + if ((s16)imm == imm) { EMIT(MIPS_ADD_IMM(r, Z0, imm)); - } else if (!(imm >> 16)) { + } else if (!((u32)imm >> 16)) { EMIT(MIPS_OR_IMM(r, Z0, imm)); } else { int s = Z0; - if (imm >> 16) { - EMIT(MIPS_MOVT_IMM(r, imm >> 16)); + if ((u32)imm >> 16) { + EMIT(MIPS_MOVT_IMM(r, (u32)imm >> 16)); s = r; } if ((u16)imm) @@ -729,17 +793,17 @@ static void emith_move_imm(int r, uintptr_t imm) } while (0) // arithmetic, immediate - can only be ADDI[U], since SUBI[U] doesn't exist -static void emith_arith_imm(int op, int rd, int rs, u32 imm) +static void emith_add_imm(int ptr, int rd, int rs, u32 imm) { if ((s16)imm == imm) { if (imm || rd != rs) - EMIT(MIPS_OP_IMM(op, rd, rs, imm)); + EMIT(MIPS_OP_IMM(ptr ? OP_PADDIU:OP_ADDIU, rd,rs,imm)); } else if ((s32)imm < 0) { emith_move_r_imm(AT, -imm); - EMIT(MIPS_OP_REG(FN_SUB + (op-OP_ADDI), rd, rs, AT)); + EMIT(MIPS_OP_REG((ptr ? FN_PSUBU:FN_SUBU), rd,rs,AT)); } else { emith_move_r_imm(AT, imm); - EMIT(MIPS_OP_REG(FN_ADD + (op-OP_ADDI), rd, rs, AT)); + EMIT(MIPS_OP_REG((ptr ? FN_PADDU:FN_ADDU), rd,rs,AT)); } } @@ -760,7 +824,7 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm) emith_subf_r_r_imm(r, r, imm) #define emith_adc_r_imm(r, imm) \ - emith_adc_r_r_imm(r, r, imm); + emith_adc_r_r_imm(r, r, imm) #define emith_adcf_r_imm(r, imm) \ emith_adcf_r_r_imm(r, r, imm) @@ -770,10 +834,10 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm) // emith_subf_r_r_imm(FNZ, r, (s16)imm) #define emith_add_r_r_ptr_imm(d, s, imm) \ - emith_arith_imm(OP_ADDIU, d, s, imm) + emith_add_imm(1, d, s, imm) #define emith_add_r_r_imm(d, s, imm) \ - emith_add_r_r_ptr_imm(d, s, imm) + emith_add_imm(0, d, s, imm) #define emith_addf_r_r_imm(d, s, imm) do { \ emith_add_r_r_imm(FNZ, s, imm); \ @@ -1043,22 +1107,24 @@ static void emith_lohi_nops(void) // load/store. offs has 16 bits signed, which is currently sufficient #define emith_read_r_r_offs_ptr(r, rs, offs) \ - EMIT(MIPS_LW(r, rs, offs)) + EMIT(MIPS_OP_IMM(OP_LP, r, rs, offs)) #define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ emith_read_r_r_offs_ptr(r, rs, offs) #define emith_read_r_r_offs(r, rs, offs) \ - emith_read_r_r_offs_ptr(r, rs, offs) + EMIT(MIPS_LW(r, rs, offs)) #define emith_read_r_r_offs_c(cond, r, rs, offs) \ emith_read_r_r_offs(r, rs, offs) #define emith_read_r_r_r_ptr(r, rs, rm) do { \ emith_add_r_r_r(AT, rs, rm); \ - EMIT(MIPS_LW(r, AT, 0)); \ + EMIT(MIPS_OP_IMM(OP_LP, r, AT, 0)); \ } while (0) -#define emith_read_r_r_r(r, rs, rm) \ - emith_read_r_r_r_ptr(r, rs, rm) +#define emith_read_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_LW(r, AT, 0)); \ +} while (0) #define emith_read_r_r_r_c(cond, r, rs, rm) \ emith_read_r_r_r(r, rs, rm) @@ -1112,24 +1178,26 @@ static void emith_lohi_nops(void) #define emith_write_r_r_offs_ptr(r, rs, offs) \ - EMIT(MIPS_SW(r, rs, offs)) + EMIT(MIPS_OP_IMM(OP_SP, r, rs, offs)) #define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ emith_write_r_r_offs_ptr(r, rs, offs) #define emith_write_r_r_r_ptr(r, rs, rm) do { \ emith_add_r_r_r(AT, rs, rm); \ - EMIT(MIPS_SW(r, AT, 0)); \ + EMIT(MIPS_OP_IMM(OP_SP, r, AT, 0)); \ } while (0) #define emith_write_r_r_r_ptr_c(cond, r, rs, rm) \ emith_write_r_r_r_ptr(r, rs, rm) #define emith_write_r_r_offs(r, rs, offs) \ - emith_write_r_r_offs_ptr(r, rs, offs) + EMIT(MIPS_SW(r, rs, offs)) #define emith_write_r_r_offs_c(cond, r, rs, offs) \ emith_write_r_r_offs(r, rs, offs) -#define emith_write_r_r_r(r, rs, rm) \ - emith_write_r_r_r_ptr(r, rs, rm) +#define emith_write_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_SW(r, AT, 0)); \ +} while (0) #define emith_write_r_r_r_c(cond, r, rs, rm) \ emith_write_r_r_r(r, rs, rm) @@ -1164,7 +1232,7 @@ static void emith_lohi_nops(void) int _c; u32 _m = mask & 0x300fffc; /* r2-r15,r24-r25 */ \ if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align */ \ int _s = count_bits(_m) * 4, _o = _s; \ - if (_s) emith_sub_r_imm(SP, _s); \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ if (_m & (1 << _c)) \ { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ @@ -1177,7 +1245,7 @@ static void emith_lohi_nops(void) for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ if (_m & (1 << _c)) \ { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ - if (_s) emith_add_r_imm(SP, _s); \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ } while (0) #define host_arg2reg(rd, arg) \ @@ -1343,8 +1411,8 @@ static int emith_cond_check(int cond, int *r) emith_jump_cond(cond, target) #define emith_jump_cond_inrange(target) \ - ((u8 *)target - (u8 *)tcache_ptr - 4 < 0x00020000U || \ - (u8 *)target - (u8 *)tcache_ptr - 4 >= 0xfffe0010U) // mind cond_check + ((u8 *)target - (u8 *)tcache_ptr - 4 < 0x20000 && \ + (u8 *)target - (u8 *)tcache_ptr - 4 >= -0x20000+0x10) //mind cond_check // NB: returns position of patch for cache maintenance #define emith_jump_patch(ptr, target, pos) do { \ @@ -1359,8 +1427,8 @@ static int emith_cond_check(int cond, int *r) } while (0) #define emith_jump_patch_inrange(ptr, target) \ - ((u8 *)target - (u8 *)ptr - 4 < 0x00020000U || \ - (u8 *)target - (u8 *)ptr - 4 >= 0xfffe0010U) // mind cond_check + ((u8 *)target - (u8 *)ptr - 4 < 0x20000 && \ + (u8 *)target - (u8 *)ptr - 4 >= -0x20000+0x10) // mind cond_check #define emith_jump_patch_size() 4 #define emith_jump_at(ptr, target) do { \ @@ -1410,7 +1478,7 @@ static int emith_cond_check(int cond, int *r) // NB: ABI SP alignment is 8 for compatibility with MIPS IV #define emith_push_ret(r) do { \ - emith_sub_r_imm(SP, 8+16); /* reserve new arg save area (16) */ \ + emith_add_r_r_ptr_imm(SP, SP, -8-16); /* ABI: 16 byte arg save area */ \ emith_write_r_r_offs(LR, SP, 4+16); \ if ((r) > 0) emith_write_r_r_offs(r, SP, 0+16); \ } while (0) @@ -1418,7 +1486,7 @@ static int emith_cond_check(int cond, int *r) #define emith_pop_and_ret(r) do { \ if ((r) > 0) emith_read_r_r_offs(r, SP, 0+16); \ emith_read_r_r_offs(LR, SP, 4+16); \ - emith_add_r_imm(SP, 8+16); \ + emith_add_r_r_ptr_imm(SP, SP, 8+16); \ emith_ret(); \ } while (0) @@ -1436,7 +1504,7 @@ static int emith_cond_check(int cond, int *r) int _c; u32 _m = 0xd0ff0000; \ if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align for SP is 8 */ \ int _s = count_bits(_m) * 4 + 16, _o = _s; /* 16 byte arg save area */ \ - if (_s) emith_sub_r_imm(SP, _s); \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ if (_m & (1 << _c)) \ { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ @@ -1448,23 +1516,23 @@ static int emith_cond_check(int cond, int *r) for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ if (_m & (1 << _c)) \ { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ - if (_s) emith_add_r_imm(SP, _s); \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ emith_ret(); \ } while (0) // NB: assumes a is in arg0, tab, func and mask are temp #define emith_sh2_rcall(a, tab, func, mask) do { \ emith_lsr(mask, a, SH2_READ_SHIFT); \ - emith_add_r_r_r_lsl_ptr(tab, tab, mask, 3); \ + emith_add_r_r_r_lsl_ptr(tab, tab, mask, PTR_SCALE+1); \ emith_read_r_r_offs_ptr(func, tab, 0); \ - emith_read_r_r_offs(mask, tab, 4); \ - emith_addf_r_r_r/*_ptr*/(func, func, func); \ + emith_read_r_r_offs(mask, tab, (1 << PTR_SCALE)); \ + emith_addf_r_r_r_ptr(func, func, func); \ } while (0) // NB: assumes a, val are in arg0 and arg1, tab and func are temp #define emith_sh2_wcall(a, val, tab, func) do { \ emith_lsr(func, a, SH2_WRITE_SHIFT); \ - emith_lsl(func, func, 2); \ + emith_lsl(func, func, PTR_SCALE); \ emith_read_r_r_r_ptr(func, tab, func); \ emith_move_r_r_ptr(6, CONTEXT_REG); /* arg2 */ \ emith_jump_reg(func); \ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index e7284499..9ed8b563 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -448,11 +448,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common // fake conditionals (using SJMP instead) #define emith_move_r_imm_c(cond, r, imm) \ - emith_move_r_imm(r, imm); + emith_move_r_imm(r, imm) #define emith_add_r_imm_c(cond, r, imm) \ - emith_add_r_imm(r, imm); + emith_add_r_imm(r, imm) #define emith_sub_r_imm_c(cond, r, imm) \ - emith_sub_r_imm(r, imm); + emith_sub_r_imm(r, imm) #define emith_or_r_imm_c(cond, r, imm) \ emith_or_r_imm(r, imm) #define emith_eor_r_imm_c(cond, r, imm) \ @@ -468,11 +468,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define emith_ror_c(cond, d, s, cnt) \ emith_ror(d, s, cnt) #define emith_and_r_r_c(cond, d, s) \ - emith_and_r_r(d, s); + emith_and_r_r(d, s) #define emith_add_r_r_imm_c(cond, d, s, imm) \ - emith_add_r_r_imm(d, s, imm); + emith_add_r_r_imm(d, s, imm) #define emith_sub_r_r_imm_c(cond, d, s, imm) \ - emith_sub_r_r_imm(d, s, imm); + emith_sub_r_r_imm(d, s, imm) #define emith_read8_r_r_r_c(cond, r, rs, rm) \ emith_read8_r_r_r(r, rs, rm) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 2320c501..d1cde69e 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -47,13 +47,9 @@ #define LOOP_OPTIMIZER 1 #define T_OPTIMIZER 1 -// limits (per block) -#define MAX_BLOCK_SIZE (BLOCK_INSN_LIMIT * 6 * 6) - -// max literal offset from the block end #define MAX_LITERAL_OFFSET 0x200 // max. MOVA, MOV @(PC) offset -#define MAX_LITERALS (BLOCK_INSN_LIMIT / 4) -#define MAX_LOCAL_BRANCHES (BLOCK_INSN_LIMIT / 4) +#define MAX_LOCAL_TARGETS (BLOCK_INSN_LIMIT / 4) +#define MAX_LOCAL_BRANCHES (BLOCK_INSN_LIMIT / 2) // debug stuff // 01 - warnings/errors @@ -294,7 +290,7 @@ struct block_link { u32 target_pc; void *jump; // insn address void *blx; // block link/exit area if any - u8 jdisp[8]; // jump backup buffer + u8 jdisp[12]; // jump backup buffer struct block_link *next; // either in block_entry->links or unresolved struct block_link *o_next; // ...in block_entry->o_links struct block_link *prev; @@ -443,6 +439,8 @@ static void rcache_free_tmp(int hr); #include "../drc/emit_arm64.c" #elif defined(__mips__) #include "../drc/emit_mips.c" +#elif defined(__riscv__) || defined(__riscv) +#include "../drc/emit_riscv.c" #elif defined(__i386__) #include "../drc/emit_x86.c" #elif defined(__x86_64__) @@ -1207,45 +1205,10 @@ static void dr_flush_tcache(int tcid) static void *dr_failure(void) { - lprintf("recompilation failed\n"); + printf("recompilation failed\n"); exit(1); } -#define ADD_TO_ARRAY(array, count, item, failcode) { \ - if (count >= ARRAY_SIZE(array)) { \ - dbg(1, "warning: " #array " overflow"); \ - failcode; \ - } else \ - array[count++] = item; \ -} - -static inline int find_in_array(u32 *array, size_t size, u32 what) -{ - size_t i; - for (i = 0; i < size; i++) - if (what == array[i]) - return i; - - return -1; -} - -static int find_in_sorted_array(u32 *array, size_t size, u32 what) -{ - // binary search in sorted array - int left = 0, right = size-1; - while (left <= right) - { - int middle = (left + right) / 2; - if (array[middle] == what) - return middle; - else if (array[middle] < what) - left = middle + 1; - else - right = middle - 1; - } - return -1; -} - // --------------------------------------------------------------- // NB rcache allocation dependencies: @@ -2868,6 +2831,88 @@ static void emit_do_static_regs(int is_write, int tmpr) } } +// block local link stuff +struct linkage { + u32 pc; + void *ptr; + struct block_link *bl; + u32 mask; +}; + +static inline int find_in_linkage(const struct linkage *array, int size, u32 pc) +{ + size_t i; + for (i = 0; i < size; i++) + if (pc == array[i].pc) + return i; + + return -1; +} + +static int find_in_sorted_linkage(const struct linkage *array, int size, u32 pc) +{ + // binary search in sorted array + int left = 0, right = size-1; + while (left <= right) + { + int middle = (left + right) / 2; + if (array[middle].pc == pc) + return middle; + else if (array[middle].pc < pc) + left = middle + 1; + else + right = middle - 1; + } + return -1; +} + +static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tcache_id, + const struct linkage *targets, int target_count, + const struct linkage *links, int link_count) +{ + struct block_link *bl; + int u, v, tmp; + + for (u = 0; u < link_count; u++) { + emith_pool_check(); + // look up local branch targets + v = find_in_sorted_linkage(targets, target_count, links[u].pc); + if (v >= 0) { + if (! targets[v].ptr) { + // forward branch not yet resolved, prepare external linking + emith_jump_patch(links[u].ptr, tcache_ptr, NULL); + bl = dr_prepare_ext_branch(block->entryp, links[u].pc, sh2->is_slave, tcache_id); + if (bl) { + emith_flush(); // flush to inhibit insn swapping + bl->type = BL_LDJMP; + } + + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, links[u].pc); + rcache_free_tmp(tmp); + emith_jump_patchable(sh2_drc_dispatcher); + } else if (emith_jump_patch_inrange(links[u].ptr, targets[v].ptr)) { + // inrange local branch + emith_jump_patch(links[u].ptr, targets[v].ptr, NULL); + } else { + // far local branch + emith_jump_patch(links[u].ptr, tcache_ptr, NULL); + emith_jump(targets[v].ptr); + } + } else { + // external or exit, emit blx area entry + void *target = (links[u].pc & 1 ? sh2_drc_exit : sh2_drc_dispatcher); + if (links[u].bl) + links[u].bl->blx = tcache_ptr; + emith_jump_patch(links[u].ptr, tcache_ptr, NULL); + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, links[u].pc & ~1); + rcache_free_tmp(tmp); + emith_jump(target); + } + } +} + #define DELAY_SAVE_T(sr) { \ int t_ = rcache_get_tmp(); \ emith_bic_r_imm(sr, T_save); \ @@ -2887,17 +2932,10 @@ static void *dr_get_pc_base(u32 pc, SH2 *sh2); static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { // branch targets in current block - u32 branch_target_pc[MAX_LOCAL_BRANCHES]; - void *branch_target_ptr[MAX_LOCAL_BRANCHES]; + struct linkage branch_targets[MAX_LOCAL_TARGETS]; int branch_target_count = 0; - // unresolved local forward branches, for fixup at block end - u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; - void *branch_patch_ptr[MAX_LOCAL_BRANCHES]; - int branch_patch_count = 0; - // external branch targets with a block link/exit area - u32 blx_target_pc[MAX_LOCAL_BRANCHES]; - void *blx_target_ptr[MAX_LOCAL_BRANCHES]; - struct block_link *blx_target_bl[MAX_LOCAL_BRANCHES]; + // unresolved local or external targets with block link/exit area if needed + struct linkage blx_targets[MAX_LOCAL_BRANCHES]; int blx_target_count = 0; u8 op_flags[BLOCK_INSN_LIMIT]; @@ -2906,6 +2944,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) int delay_reg:8; u32 loop_type:8; u32 polling:8; + u32 pinning:1; u32 test_irq:1; u32 pending_branch_direct:1; u32 pending_branch_indirect:1; @@ -2914,23 +2953,20 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if LOOP_OPTIMIZER // loops with pinned registers for optimzation // pinned regs are like statics and don't need saving/restoring inside a loop - u32 pinned_loop_pc[MAX_LOCAL_BRANCHES/16]; - void *pinned_loop_ptr[MAX_LOCAL_BRANCHES/16]; - u32 pinned_loop_mask[MAX_LOCAL_BRANCHES/16]; + struct linkage pinned_loops[MAX_LOCAL_TARGETS/16]; int pinned_loop_count = 0; #endif // PC of current, first, last SH2 insn u32 pc, base_pc, end_pc; u32 base_literals, end_literals; - void *block_entry_ptr; + u8 *block_entry_ptr; struct block_desc *block; struct block_entry *entry; struct block_link *bl; u16 *dr_pc_base; struct op_data *opd; int blkid_main = 0; - int skip_op = 0; int tmp, tmp2; int cycles; int i, v; @@ -2971,8 +3007,15 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) { if (op_flags[i] & OF_DELAY_OP) op_flags[i] &= ~OF_BTARGET; - if (op_flags[i] & OF_BTARGET) - ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, ); + if (op_flags[i] & OF_BTARGET) { + if (branch_target_count < ARRAY_SIZE(branch_targets)) + branch_targets[branch_target_count++] = (struct linkage) { .pc = pc }; + else { + printf("warning: linkage overflow\n"); + end_pc = pc; + break; + } + } if (ops[i].op == OP_LDC && (ops[i].dest & BITMASK1(SHR_SR)) && pc+2 < end_pc) op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR.IMASK change // unify T and SR since rcache doesn't know about "virtual" guest regs @@ -3040,9 +3083,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (op_flags[v] & OF_BASIC_LOOP) { m3 &= ~rcache_regs_static & ~BITMASK5(SHR_PC, SHR_PR, SHR_SR, SHR_T, SHR_MEM); if (m3 && count_bits(m3) < count_bits(rcache_vregs_reg) && - pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)-1) { - pinned_loop_mask[pinned_loop_count] = m3; - pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v; + pinned_loop_count < ARRAY_SIZE(pinned_loops)-1) { + pinned_loops[pinned_loop_count++] = + (struct linkage) { .mask = m3, .pc = base_pc + 2*v }; } else op_flags[v] &= ~OF_BASIC_LOOP; } @@ -3052,10 +3095,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #endif } - if (branch_target_count > 0) { - memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count); - } - tcache_ptr = dr_prepare_cache(tcache_id, (end_pc - base_pc) / 2); #if (DRC_DEBUG & 4) tcache_dsm_ptrs[tcache_id] = tcache_ptr; @@ -3076,7 +3115,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_invalidate_t(); drcf = (struct drcf) { 0 }; #if LOOP_OPTIMIZER - pinned_loop_pc[pinned_loop_count] = -1; + pinned_loops[pinned_loop_count].pc = -1; pinned_loop_count = 0; #endif @@ -3090,24 +3129,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) int tmp3, tmp4; int sr; - opd = &ops[i]; - op = FETCH_OP(pc); - -#if (DRC_DEBUG & 2) - insns_compiled++; -#endif -#if (DRC_DEBUG & 4) - DasmSH2(sh2dasm_buff, pc, op); - if (op_flags[i] & OF_BTARGET) { - if ((op_flags[i] & OF_LOOP) == OF_DELAY_LOOP) tmp3 = '+'; - else if ((op_flags[i] & OF_LOOP) == OF_POLL_LOOP) tmp3 = '='; - else if ((op_flags[i] & OF_LOOP) == OF_IDLE_LOOP) tmp3 = '~'; - else tmp3 = '*'; - } else if (drcf.loop_type) tmp3 = '.'; - else tmp3 = ' '; - printf("%c%08x %04x %s\n", tmp3, pc, op, sh2dasm_buff); -#endif - if (op_flags[i] & OF_BTARGET) { if (pc != base_pc) @@ -3143,9 +3164,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) break; } - v = find_in_sorted_array(branch_target_pc, branch_target_count, pc); + v = find_in_sorted_linkage(branch_targets, branch_target_count, pc); if (v >= 0) - branch_target_ptr[v] = tcache_ptr; + branch_targets[v].ptr = tcache_ptr; #if LOOP_DETECTION drcf.loop_type = op_flags[i] & OF_LOOP; drcf.delay_reg = -1; @@ -3176,12 +3197,13 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if LOOP_OPTIMIZER if (op_flags[i] & OF_BASIC_LOOP) { - if (pinned_loop_pc[pinned_loop_count] == pc) { + if (pinned_loops[pinned_loop_count].pc == pc) { // pin needed regs on loop entry - FOR_ALL_BITS_SET_DO(pinned_loop_mask[pinned_loop_count], v, rcache_pin_reg(v)); + FOR_ALL_BITS_SET_DO(pinned_loops[pinned_loop_count].mask, v, rcache_pin_reg(v)); emith_flush(); // store current PC as loop target - pinned_loop_ptr[pinned_loop_count] = tcache_ptr; + pinned_loops[pinned_loop_count].ptr = tcache_ptr; + drcf.pinning = 1; } else op_flags[i] &= ~OF_BASIC_LOOP; } @@ -3193,11 +3215,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) EMITH_JMP_START(DCOND_GT); rcache_save_pinned(); - if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { + if (blx_target_count < ARRAY_SIZE(blx_targets)) { // exit via stub in blx table (saves some 1-3 insns in the main flow) - blx_target_ptr[blx_target_count] = tcache_ptr; - blx_target_pc[blx_target_count] = pc|1; - blx_target_bl[blx_target_count++] = NULL; + blx_targets[blx_target_count++] = + (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL }; emith_jump_patchable(tcache_ptr); } else { // blx table full, must inline exit code @@ -3210,12 +3231,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } else #endif { - if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { + if (blx_target_count < ARRAY_SIZE(blx_targets)) { // exit via stub in blx table (saves some 1-3 insns in the main flow) - blx_target_pc[blx_target_count] = pc|1; - blx_target_bl[blx_target_count] = NULL; emith_cmp_r_imm(sr, 0); - blx_target_ptr[blx_target_count++] = tcache_ptr; + blx_targets[blx_target_count++] = + (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL }; emith_jump_cond_patchable(DCOND_LE, tcache_ptr); } else { // blx table full, must inline exit code @@ -3282,13 +3302,40 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } #endif + // emit blx area if limits are approached + if (blx_target_count && (blx_target_count > ARRAY_SIZE(blx_targets)-4 || + !emith_jump_patch_inrange(blx_targets[0].ptr, tcache_ptr+0x100))) { + u8 *jp; + rcache_invalidate_tmp(); + jp = tcache_ptr; + emith_jump_patchable(tcache_ptr); + emit_branch_linkage_code(sh2, block, tcache_id, branch_targets, + branch_target_count, blx_targets, blx_target_count); + blx_target_count = 0; + do_host_disasm(tcache_id); + emith_jump_patch(jp, tcache_ptr, NULL); + } + emith_pool_check(); - pc += 2; - if (skip_op > 0) { - skip_op--; - continue; - } + opd = &ops[i]; + op = FETCH_OP(pc); +#if (DRC_DEBUG & 4) + DasmSH2(sh2dasm_buff, pc, op); + if (op_flags[i] & OF_BTARGET) { + if ((op_flags[i] & OF_LOOP) == OF_DELAY_LOOP) tmp3 = '+'; + else if ((op_flags[i] & OF_LOOP) == OF_POLL_LOOP) tmp3 = '='; + else if ((op_flags[i] & OF_LOOP) == OF_IDLE_LOOP) tmp3 = '~'; + else tmp3 = '*'; + } else if (drcf.loop_type) tmp3 = '.'; + else tmp3 = ' '; + printf("%c%08x %04x %s\n", tmp3, pc, op, sh2dasm_buff); +#endif + + pc += 2; +#if (DRC_DEBUG & 2) + insns_compiled++; +#endif if (op_flags[i] & OF_DELAY_OP) { @@ -4422,7 +4469,7 @@ end_op: emit_sync_t_to_sr(); emith_sh2_delay_loop(cycles, drcf.delay_reg); rcache_unlock_all(); // may lock delay_reg - drcf.polling = drcf.loop_type = 0; + drcf.polling = drcf.loop_type = drcf.pinning = 0; } #endif @@ -4464,33 +4511,39 @@ end_op: emith_sync_t(sr); // no modification of host status/flags between here and branching! - v = find_in_sorted_array(branch_target_pc, branch_target_count, target_pc); + v = find_in_sorted_linkage(branch_targets, branch_target_count, target_pc); if (v >= 0) { // local branch - if (branch_target_ptr[v]) { + if (branch_targets[v].ptr) { // local backward jump, link here now since host PC is already known - target = branch_target_ptr[v]; + target = branch_targets[v].ptr; #if LOOP_OPTIMIZER - if (pinned_loop_pc[pinned_loop_count] == target_pc) { + if (pinned_loops[pinned_loop_count].pc == target_pc) { // backward jump at end of optimized loop rcache_unpin_all(); - target = pinned_loop_ptr[pinned_loop_count]; + target = pinned_loops[pinned_loop_count].ptr; pinned_loop_count ++; } #endif - if (cond != -1) - emith_jump_cond(cond, target); - else { + if (cond != -1) { + if (emith_jump_patch_inrange(tcache_ptr, target)) { + emith_jump_cond(cond, target); + } else { + // not reachable directly, must use far branch + EMITH_JMP_START(emith_invert_cond(cond)); + emith_jump(target); + EMITH_JMP_END(emith_invert_cond(cond)); + } + } else { emith_jump(target); rcache_invalidate(); } - } else if (branch_patch_count < MAX_LOCAL_BRANCHES) { + } else if (blx_target_count < MAX_LOCAL_BRANCHES) { // local forward jump target = tcache_ptr; - branch_patch_pc[branch_patch_count] = target_pc; - branch_patch_ptr[branch_patch_count] = target; - branch_patch_count++; + blx_targets[blx_target_count++] = + (struct linkage) { .pc = target_pc, .ptr = target, .bl = NULL }; if (cond != -1) emith_jump_cond_patchable(cond, target); else { @@ -4498,7 +4551,7 @@ end_op: rcache_invalidate(); } } else - dbg(1, "warning: too many local branches"); + dbg(1, "warning: too many unresolved branches"); } if (target == NULL) @@ -4507,13 +4560,12 @@ end_op: bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); if (cond != -1) { #if 1 - if (bl && blx_target_count < ARRAY_SIZE(blx_target_pc)) { + if (bl && blx_target_count < ARRAY_SIZE(blx_targets)) { // conditional jumps get a blx stub for the far jump - blx_target_pc[blx_target_count] = target_pc; - blx_target_bl[blx_target_count] = bl; - blx_target_ptr[blx_target_count++] = tcache_ptr; bl->type = BL_JCCBLX; target = tcache_ptr; + blx_targets[blx_target_count++] = + (struct linkage) { .pc = target_pc, .ptr = target, .bl = bl }; emith_jump_cond_patchable(cond, target); } else { // not linkable, or blx table full; inline jump @dispatcher @@ -4660,44 +4712,15 @@ end_op: } else rcache_flush(); - // emit blx area - for (i = 0; i < blx_target_count; i++) { - void *target = (blx_target_pc[i] & 1 ? sh2_drc_exit : sh2_drc_dispatcher); - - emith_pool_check(); - bl = blx_target_bl[i]; - if (bl) - bl->blx = tcache_ptr; - emith_jump_patch(blx_target_ptr[i], tcache_ptr, NULL); - tmp = rcache_get_tmp_arg(0); - emith_move_r_imm(tmp, blx_target_pc[i] & ~1); - emith_jump(target); - rcache_invalidate(); - } + // link unresolved branches, emitting blx area entries as needed + emit_branch_linkage_code(sh2, block, tcache_id, branch_targets, + branch_target_count, blx_targets, blx_target_count); emith_flush(); do_host_disasm(tcache_id); emith_pool_commit(0); - // link local branches - for (i = 0; i < branch_patch_count; i++) { - void *target; - int t; - t = find_in_sorted_array(branch_target_pc, branch_target_count, branch_patch_pc[i]); - target = branch_target_ptr[t]; - if (target == NULL) { - // flush pc and go back to dispatcher (this should no longer happen) - dbg(1, "stray branch to %08x %p", branch_patch_pc[i], tcache_ptr); - target = tcache_ptr; - tmp = rcache_get_tmp_arg(0); - emith_move_r_imm(tmp, branch_patch_pc[i]); - emith_jump(sh2_drc_dispatcher); - rcache_flush(); - } - emith_jump_patch(branch_patch_ptr[i], target, NULL); - } - // fill blx backup; do this last to backup final patched code for (i = 0; i < block->entry_count; i++) for (bl = block->entryp[i].o_links; bl; bl = bl->o_next) @@ -4927,7 +4950,7 @@ static void sh2_generate_utils(void) // pc = sh2_drc_dispatcher_call(u32 pc) sh2_drc_dispatcher_call = (void *)tcache_ptr; emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); - emith_add_r_imm(arg2, 2*sizeof(void *)); + emith_add_r_imm(arg2, (u32)(2*sizeof(void *))); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0); @@ -4957,7 +4980,7 @@ static void sh2_generate_utils(void) emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); #endif emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *)); - emith_sub_r_imm(arg2, 2*sizeof(void *)); + emith_sub_r_imm(arg2, (u32)(2*sizeof(void *))); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); #if (DRC_DEBUG & 128) diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 415f01ba..44620f48 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -41,6 +41,8 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, #define DRC_SR_REG "r28" #elif defined(__mips__) #define DRC_SR_REG "s6" +#elif defined(__riscv__) || defined(__riscv) +#define DRC_SR_REG "s11" #elif defined(__i386__) #define DRC_SR_REG "edi" #elif defined(__x86_64__) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 7e2e039e..896b5aa1 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -506,7 +506,7 @@ void sync_sh2s_normal(unsigned int m68k_target) if (CYCLES_GT(now, ssh2.m68krcycles_done)) now = ssh2.m68krcycles_done; } - if (now - timer_cycles >= STEP_N) { + if (CYCLES_GT(now, timer_cycles+STEP_N)) { p32x_timers_do(now - timer_cycles); timer_cycles = now; } diff --git a/platform/common/dismips.c b/platform/common/dismips.c index 41c0f7a5..f9888f2a 100644 --- a/platform/common/dismips.c +++ b/platform/common/dismips.c @@ -127,7 +127,7 @@ static const struct insn special_insns[] = { {0x38, S_IMM_DT, "dsll"}, {0x3A, S_IMM_DT, "dsrl"}, {0x3B, S_IMM_DT, "dsra"}, - {0x3D, S_IMM_DT, "dsll32"}, + {0x3C, S_IMM_DT, "dsll32"}, {0x3E, S_IMM_DT, "dsrl32"}, {0x3F, S_IMM_DT, "dsra32"}, }; diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 887d7836..93665263 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -29,7 +29,7 @@ void pemu_prep_defconfig(void) void pemu_validate_config(void) { -#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__i386__) && !defined(__x86_64__) +#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__riscv__) && !defined(__riscv) && !defined(__i386__) && !defined(__x86_64__) PicoIn.opt &= ~POPT_EN_DRC; #endif } diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 8a0557c7..349b8605 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -16,7 +16,7 @@ compile_rodata () # $CC $CFLAGS -I .. -nostdlib -Wl,-edummy /tmp/getoffs.c \ # -o /tmp/getoffs.o || exit 1 # find the name of the .rodata section (in case -fdata-sections is used) - rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' | + rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata\|\.sdata' | sed 's/^[^.]*././;s/ .*//') # read out .rodata section as hex string (should be only 4 or 8 bytes) ro=$(readelf -x $rosect /tmp/getoffs.o | grep '0x' | cut -c14-48 |