From e7ee50107538e504f6ece2ccffdc63b98fbd2ba7 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 13 Nov 2019 21:05:35 +0100 Subject: [PATCH] sh2 drc: RISC-V (RV64IM) code emitter, some work on MIPS64 --- Makefile | 6 +- config.aarch64 | 14 -- config.caanoo | 16 -- config.caanoo47 | 16 -- config.dingux | 15 -- config.dingux54 | 15 -- config.gcw0 | 15 -- config.gp2x | 16 -- config.gp2x47 | 16 -- config.i386 | 14 -- config.x86 | 14 -- cpu/drc/emit_arm.c | 145 +++++++++-------- cpu/drc/emit_arm64.c | 4 +- cpu/drc/emit_mips.c | 174 ++++++++++++++------ cpu/drc/emit_x86.c | 12 +- cpu/sh2/compiler.c | 335 ++++++++++++++++++++------------------ cpu/sh2/compiler.h | 2 + pico/32x/32x.c | 2 +- platform/common/dismips.c | 2 +- platform/linux/emu.c | 2 +- tools/mkoffsets.sh | 2 +- 21 files changed, 394 insertions(+), 443 deletions(-) delete mode 100644 config.aarch64 delete mode 100644 config.caanoo delete mode 100644 config.caanoo47 delete mode 100644 config.dingux delete mode 100644 config.dingux54 delete mode 100644 config.gcw0 delete mode 100644 config.gp2x delete mode 100644 config.gp2x47 delete mode 100644 config.i386 delete mode 100644 config.x86 diff --git a/Makefile b/Makefile index a79c054b..0a0ab127 100644 --- a/Makefile +++ b/Makefile @@ -68,6 +68,10 @@ else ifneq (,$(findstring aarch64,$(ARCH))) use_fame ?= 1 use_cz80 ?= 1 use_sh2drc ?= 1 +else ifneq (,$(findstring riscv,$(ARCH))) +use_fame ?= 1 +use_cz80 ?= 1 +use_sh2drc ?= 1 endif -include Makefile.local @@ -266,7 +270,7 @@ pico/carthw_cfg.c: pico/carthw.cfg # random deps pico/carthw/svp/compiler.o : cpu/drc/emit_arm.c cpu/sh2/compiler.o : cpu/drc/emit_arm.c cpu/drc/emit_arm64.c -cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/drc/emit_mips.c +cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/drc/emit_mips.c cpu/drc/emit_riscv.c cpu/sh2/mame/sh2pico.o : cpu/sh2/mame/sh2.c pico/pico.o pico/cd/mcd.o pico/32x/32x.o : pico/pico_cmn.c pico/pico_int.h pico/memory.o pico/cd/memory.o pico/32x/memory.o : pico/pico_int.h pico/memory.h diff --git a/config.aarch64 b/config.aarch64 deleted file mode 100644 index 9631d64e..00000000 --- a/config.aarch64 +++ /dev/null @@ -1,14 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=generic' -CC = aarch64-linux-gnu-gcc -CXX = aarch64-linux-gnu-g++ -AS = aarch64-linux-gnu-as -STRIP = aarch64-linux-gnu-strip -CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -ASFLAGS += -LDFLAGS += # --sysroot ${HOME}/opt/aarch64/debian-arm64 -LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl - -ARCH = aarch64 -PLATFORM = generic -SOUND_DRIVERS = oss alsa sdl diff --git a/config.caanoo b/config.caanoo deleted file mode 100644 index 1ffc54da..00000000 --- a/config.caanoo +++ /dev/null @@ -1,16 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gp2x' -CC = arm-gph-linux-gnueabi-gcc -CXX = arm-gph-linux-gnueabi-g++ -AS = arm-gph-linux-gnueabi-as -STRIP = arm-gph-linux-gnueabi-strip -CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include -D__GP2X__ -Wno-unused-result -CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common -CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-pure-const -ASFLAGS += -mfloat-abi=soft -mcpu=arm920t -LDFLAGS += --sysroot ${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/src/gp2x/armroot-eabi/lib -static -LDLIBS += -lpng -lm -ldl - -ARCH = arm -PLATFORM = gp2x -SOUND_DRIVERS = oss diff --git a/config.caanoo47 b/config.caanoo47 deleted file mode 100644 index 5bcf8608..00000000 --- a/config.caanoo47 +++ /dev/null @@ -1,16 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gp2x' -CC = arm-linux-gnueabi-gcc -CXX = arm-linux-gnueabi-g++ -AS = arm-linux-gnueabi-as -STRIP = arm-linux-gnueabi-strip -CFLAGS += -I${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/include -I${HOME}/src/gp2x/armroot-eabi/include -D__GP2X__ -Wno-unused-result -CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common -CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-sra -fno-ipa-pure-const -ASFLAGS += -mfloat-abi=soft -mcpu=arm920t -LDFLAGS += -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/lib/gcc/arm-gph-linux-gnueabi/4.2.4 -B${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/opt/GPH_SDK/tools/gcc-4.2.4-glibc-2.7-eabi/arm-gph-linux-gnueabi/sys-root/usr/lib -L${HOME}/src/gp2x/armroot-eabi/lib -static -LDLIBS += -lpng -lm -ldl - -ARCH = arm -PLATFORM = gp2x -SOUND_DRIVERS = oss diff --git a/config.dingux b/config.dingux deleted file mode 100644 index b981bd3f..00000000 --- a/config.dingux +++ /dev/null @@ -1,15 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=opendingux' -CC = mipsel-linux-gcc -CXX = mipsel-linux-g++ -AS = mipsel-linux-as -STRIP = mipsel-linux-strip -CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/ -I${HOME}/opt/opendingux-toolchain/usr/include/SDL -CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector -ASFLAGS += -LDFLAGS += --sysroot ${HOME}/opt/opendingux-toolchain -L${HOME}/opt/opendingux-toolchain/lib -LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl - -ARCH = mipsel -PLATFORM = opendingux -SOUND_DRIVERS = sdl diff --git a/config.dingux54 b/config.dingux54 deleted file mode 100644 index a232d952..00000000 --- a/config.dingux54 +++ /dev/null @@ -1,15 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=opendingux' -CC = mipsel-linux-gnu-gcc -CXX = mipsel-linux-gnu-g++ -AS = mipsel-linux-gnu-as -STRIP = mipsel-linux-gnu-strip -CFLAGS += -I${HOME}/opt/opendingux-toolchain/usr/include/ -I${HOME}/opt/opendingux-toolchain/usr/include/SDL -CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector -ASFLAGS += -LDFLAGS += -B${HOME}/opt/opendingux-toolchain/usr/lib -B${HOME}/opt/opendingux-toolchain/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/opendingux-toolchain/lib -LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl - -ARCH = mipsel -PLATFORM = opendingux -SOUND_DRIVERS = sdl diff --git a/config.gcw0 b/config.gcw0 deleted file mode 100644 index cebe79a1..00000000 --- a/config.gcw0 +++ /dev/null @@ -1,15 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gcw0' -CC = mipsel-gcw0-linux-uclibc-gcc -CXX = mipsel-gcw0-linux-uclibc-g++ -AS = mipsel-gcw0-linux-uclibc-as -STRIP = mipsel-gcw0-linux-uclibc-strip -CFLAGS += -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/ -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL -CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector -ASFLAGS += -LDFLAGS += --sysroot ${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot -LDLIBS += -lSDL -lasound -lpng -lz -lm -ldl - -ARCH = mipsel -PLATFORM = opendingux -SOUND_DRIVERS = sdl diff --git a/config.gp2x b/config.gp2x deleted file mode 100644 index cf99bd77..00000000 --- a/config.gp2x +++ /dev/null @@ -1,16 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gp2x' -CC = arm-open2x-linux-gcc -CXX = arm-open2x-linux-g++ -AS = arm-open2x-linux-as -STRIP = arm-open2x-linux-strip -CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -D__GP2X__ -CFLAGS += -mfloat-abi=soft -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common -CFLAGS += -finline-limit=42 -fipa-cp -fno-ipa-pure-const -ASFLAGS += -mcpu=arm920t -mfloat-abi=soft -LDFLAGS += --sysroot ${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/src/gp2x/armroot/lib -static -LDLIBS += -lpng -lm -ldl - -ARCH = arm -PLATFORM = gp2x -SOUND_DRIVERS = oss diff --git a/config.gp2x47 b/config.gp2x47 deleted file mode 100644 index 8a86e850..00000000 --- a/config.gp2x47 +++ /dev/null @@ -1,16 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=gp2x' -CC = arm-linux-gnueabi-gcc -CXX = arm-linux-gnueabi-g++ -AS = arm-linux-gnueabi-as -STRIP = arm-linux-gnueabi-strip -CFLAGS += -I${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -I${HOME}/src/gp2x/armroot/include -D__GP2X__ -Wno-unused-result -CFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -mtune=arm920t -mno-thumb-interwork -fno-stack-protector -fno-common -CFLAGS += -finline-limit=42 -fipa-pta -fno-ipa-sra -fno-ipa-pure-const -ASFLAGS += -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa -mcpu=arm920t -LDFLAGS += -mabi=apcs-gnu -mfpu=fpa -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/usr/lib -L${HOME}/src/gp2x/armroot/lib -static -LDLIBS += -lpng -lm -ldl - -ARCH = arm -PLATFORM = gp2x -SOUND_DRIVERS = oss diff --git a/config.i386 b/config.i386 deleted file mode 100644 index 9c8c2e65..00000000 --- a/config.i386 +++ /dev/null @@ -1,14 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=generic' -CC = gcc -CXX = g++ -AS = as -STRIP = strip -CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -m32 -ASFLAGS += -LDFLAGS += -m32 -L/usr/lib/i386-linux-gnu -L${HOME}/opt/lib32 -LDLIBS += -lSDL-1.2 -lasound -lpng -lz -lm -ldl - -ARCH = i386 -PLATFORM = generic -SOUND_DRIVERS = oss alsa sdl diff --git a/config.x86 b/config.x86 deleted file mode 100644 index 45440011..00000000 --- a/config.x86 +++ /dev/null @@ -1,14 +0,0 @@ -# Automatically generated by configure -# Configured with: './configure' '--platform=generic' -CC = gcc -CXX = g++ -AS = as -STRIP = strip -CFLAGS += -I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -ASFLAGS += -LDFLAGS += -L/usr/lib/x86_64-linux-gnu -LDLIBS += -lSDL-1.2 -lasound -lpng -lz -lm -ldl - -ARCH = x86_64 -PLATFORM = generic -SOUND_DRIVERS = oss alsa sdl diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 25a2c72f..8f633fa3 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -365,7 +365,7 @@ static void emith_flush(void) #define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_LDR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,3,rn,rd,shift_imm,A_AM1_LSL,rm) -#define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm); +#define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_STR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,2,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,0,1,abs(offset_8)) @@ -470,84 +470,89 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int if (cond == A_COND_NV) return; - switch (op) { - case A_OP_MOV: - rn = 0; - // count bits in imm and use MVN if more bits 1 than 0 - if (count_bits(imm) > 16) { - imm = ~imm; - op = A_OP_MVN; - } - // count insns needed for mov/orr #imm + do { + u32 u; + // try to get the topmost byte empty to possibly save an insn for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++) v = (v << 2) | (v >> 30); -#ifdef HAVE_ARMV7 - for (i = 2; i > 0; i--, v >>= 8) - while (v > 0xff && !(v & 3)) - v >>= 2; - if (v) { // 3+ insns needed... - if (op == A_OP_MVN) - imm = ~imm; - // ...prefer movw/movt - EOP_MOVW(rd, imm); - if (imm & 0xffff0000) - EOP_MOVT(rd, imm); - return; - } -#else - for (i = 3; i > 0; i--, v >>= 8) - while (v > 0xff && !(v & 3)) - v >>= 2; - if (v) { // 4 insns needed... - if (op == A_OP_MVN) + + switch (op) { + case A_OP_MOV: + rn = 0; + // use MVN if more bits 1 than 0 + if (count_bits(imm) > 16) { imm = ~imm; - // ...emit literal load - int idx, o; - if (literal_iindex >= MAX_HOST_LITERALS) { - elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, - "pool overflow"); - exit(1); + op = A_OP_MVN; + ror2 = -1; + break; + } + // count insns needed for mov/orr #imm +#ifdef HAVE_ARMV7 + for (i = 2, u = v; i > 0; i--, u >>= 8) + while (u > 0xff && !(u & 3)) + u >>= 2; + if (u) { // 3+ insns needed... + if (op == A_OP_MVN) + imm = ~imm; + // ...prefer movw/movt + EOP_MOVW(rd, imm); + if (imm & 0xffff0000) + EOP_MOVT(rd, imm); + return; } - idx = emith_pool_literal(imm, &o); - literal_insn[literal_iindex++] = (u32 *)tcache_ptr; - EOP_LDR_IMM2(cond, rd, PC, idx * sizeof(u32)); - if (o > 0) - EOP_C_DOP_IMM(cond, A_OP_ADD, 0, rd, rd, 0, o); - else if (o < 0) - EOP_C_DOP_IMM(cond, A_OP_SUB, 0, rd, rd, 0, -o); +#else + for (i = 2, u = v; i > 0; i--, u >>= 8) + while (u > 0xff && !(u & 3)) + u >>= 2; + if (u) { // 4 insns needed... + if (op == A_OP_MVN) + imm = ~imm; + // ...emit literal load + int idx, o; + if (literal_iindex >= MAX_HOST_LITERALS) { + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, + "pool overflow"); + exit(1); + } + idx = emith_pool_literal(imm, &o); + literal_insn[literal_iindex++] = (u32 *)tcache_ptr; + EOP_LDR_IMM2(cond, rd, PC, idx * sizeof(u32)); + if (o > 0) + EOP_C_DOP_IMM(cond, A_OP_ADD, 0,rd,rd,0,o); + else if (o < 0) + EOP_C_DOP_IMM(cond, A_OP_SUB, 0,rd,rd,0,-o); return; - } + } #endif - break; + break; - case A_OP_AND: - // AND must fit into 1 insn. if not, use BIC - for (v = imm, ror2 = 0; (v >> 8) && ror2 < 32/2; ror2++) - v = (v << 2) | (v >> 30); - if (v >> 8) { - imm = ~imm; - op = A_OP_BIC; - } - break; - - case A_OP_SUB: - case A_OP_ADD: - // count bits in imm and swap ADD and SUB if more bits 1 than 0 - if (s == 0 && count_bits(imm) > 16) { - imm = -imm; - op ^= (A_OP_ADD^A_OP_SUB); + case A_OP_AND: + // AND must fit into 1 insn. if not, use BIC + for (u = v; u > 0xff && !(u & 3); u >>= 2) ; + if (u >> 8) { + imm = ~imm; + op = A_OP_BIC; + ror2 = -1; + } + break; + + case A_OP_SUB: + case A_OP_ADD: + // swap ADD and SUB if more bits 1 than 0 + if (s == 0 && count_bits(imm) > 16) { + imm = -imm; + op ^= (A_OP_ADD^A_OP_SUB); + ror2 = -1; + } + case A_OP_EOR: + case A_OP_ORR: + case A_OP_BIC: + if (s == 0 && imm == 0 && rd == rn) + return; + break; } - case A_OP_EOR: - case A_OP_ORR: - case A_OP_BIC: - if (s == 0 && imm == 0 && rd == rn) - return; - break; - } + } while (ror2 < 0); - // try to get the topmost byte empty to possibly save an insn - for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++) - v = (v << 2) | (v >> 30); do { // shift down to get 'best' rot2 while (v > 0xff && !(v & 3)) diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index dc0cf559..3f40d4cd 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -134,9 +134,9 @@ enum { XT_UXTW=0x4, XT_UXTX=0x6, XT_LSL=0x7, XT_SXTW=0xc, XT_SXTX=0xe }; #define A64_TST_REG(rn, rm, stype, simm) \ A64_ANDS_REG(Z0, rn, rm, stype, simm) #define A64_MOV_REG(rd, rm, stype, simm) \ - A64_OR_REG(rd, Z0, rm, stype, simm); + A64_OR_REG(rd, Z0, rm, stype, simm) #define A64_MVN_REG(rd, rm, stype, simm) \ - A64_ORN_REG(rd, Z0, rm, stype, simm); + A64_ORN_REG(rd, Z0, rm, stype, simm) // rd = rn OP (rm EXTEND simm) #define A64_ADD_XREG(rd, rn, rm, xtopt, simm) \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 82527474..6f07e509 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -62,14 +62,17 @@ // opcode field (encoded in op) enum { OP__FN=000, OP__RT, OP_J, OP_JAL, OP_BEQ, OP_BNE, OP_BLEZ, OP_BGTZ }; enum { OP_ADDI=010, OP_ADDIU, OP_SLTI, OP_SLTIU, OP_ANDI, OP_ORI, OP_XORI, OP_LUI }; -enum { OP_LB=040, OP_LH, OP_LWL, OP_LW, OP_LBU, OP_LHU, OP_LWR }; -enum { OP_SB=050, OP_SH, OP_SWL, OP_SW, __(54), __(55), OP_SWR }; +enum { OP_LB=040, OP_LH, OP_LWL, OP_LW, OP_LBU, OP_LHU, OP_LWR, OP_LWU }; +enum { OP_SB=050, OP_SH, OP_SWL, OP_SW, OP_SDL, OP_SDR, OP_SWR }; +enum { OP_DADDI=030, OP_DADDIU, OP_LDL, OP_LDR, OP_SD=067, OP_LD=077 }; // function field (encoded in fn if opcode = OP__FN) enum { FN_SLL=000, __(01), FN_SRL, FN_SRA, FN_SLLV, __(05), FN_SRLV, FN_SRAV }; -enum { FN_MFHI=020, FN_MTHI, FN_MFLO, FN_MTLO }; -enum { FN_MULT=030, FN_MULTU, FN_DIV, FN_DIVU }; +enum { FN_JR=010, FN_JALR, FN_MOVZ, FN_MOVN, FN_SYNC=017 }; +enum { FN_MFHI=020, FN_MTHI, FN_MFLO, FN_MTLO, FN_DSSLV, __(25), FN_DSLRV, FN_DSRAV }; +enum { FN_MULT=030, FN_MULTU, FN_DIV, FN_DIVU, FN_DMULT, FN_DMULTU, FN_DDIV, FN_DDIVU }; enum { FN_ADD=040, FN_ADDU, FN_SUB, FN_SUBU, FN_AND, FN_OR, FN_XOR, FN_NOR }; -enum { FN_JR=010, FN_JALR, FN_MOVZ, FN_MOVN, FN_SYNC=017, FN_SLT=052, FN_SLTU }; +enum { FN_SLT=052, FN_SLTU, FN_DADD, FN_DADDU, FN_DSUB, FN_DSUBU }; +enum { FN_DSLL=070, __(71), FN_DSRL, FN_DSRA, FN_DSLL32, __(75), FN_DSRL32, FN_DSRA32 }; // rt field (encoded in rt if opcode = OP__RT) enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; @@ -85,8 +88,12 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; // rd = rs OP rt #define MIPS_ADD_REG(rd, rs, rt) \ MIPS_OP_REG(FN_ADDU, rd, rs, rt) +#define MIPS_DADD_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_DADDU, rd, rs, rt) #define MIPS_SUB_REG(rd, rs, rt) \ MIPS_OP_REG(FN_SUBU, rd, rs, rt) +#define MIPS_DSUB_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_DSUBU, rd, rs, rt) #define MIPS_NEG_REG(rd, rt) \ MIPS_SUB_REG(rd, Z0, rt) @@ -122,6 +129,8 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; // rt = rs OP imm16 #define MIPS_ADD_IMM(rt, rs, imm16) \ MIPS_OP_IMM(OP_ADDIU, rt, rs, imm16) +#define MIPS_DADD_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_DADDIU, rt, rs, imm16) #define MIPS_XOR_IMM(rt, rs, imm16) \ MIPS_OP_IMM(OP_XORI, rt, rs, imm16) @@ -144,6 +153,11 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; #define MIPS_ASR_IMM(rd, rt, bits) \ MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SRA) +#define MIPS_DLSL_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_DSLL) +#define MIPS_DLSL32_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_DSLL32) + // rt = (rs < imm16) #define MIPS_SLT_IMM(rt, rs, imm16) \ MIPS_OP_IMM(OP_SLTI, rt, rs, imm16) @@ -193,23 +207,45 @@ enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; // load/store indexed base +#define MIPS_LD(rt, rs, offs16) \ + MIPS_OP_IMM(OP_LD, rt, rs, (u16)(offs16)) #define MIPS_LW(rt, rs, offs16) \ - MIPS_INSN(OP_LW, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LW, rt, rs, (u16)(offs16)) #define MIPS_LH(rt, rs, offs16) \ - MIPS_INSN(OP_LH, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LH, rt, rs, (u16)(offs16)) #define MIPS_LB(rt, rs, offs16) \ - MIPS_INSN(OP_LB, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LB, rt, rs, (u16)(offs16)) #define MIPS_LHU(rt, rs, offs16) \ - MIPS_INSN(OP_LHU, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LHU, rt, rs, (u16)(offs16)) #define MIPS_LBU(rt, rs, offs16) \ - MIPS_INSN(OP_LBU, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_LBU, rt, rs, (u16)(offs16)) +#define MIPS_SD(rt, rs, offs16) \ + MIPS_OP_IMM(OP_SD, rt, rs, (u16)(offs16)) #define MIPS_SW(rt, rs, offs16) \ - MIPS_INSN(OP_SW, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_SW, rt, rs, (u16)(offs16)) #define MIPS_SH(rt, rs, offs16) \ - MIPS_INSN(OP_SH, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_SH, rt, rs, (u16)(offs16)) #define MIPS_SB(rt, rs, offs16) \ - MIPS_INSN(OP_SB, rs, rt, _,_, (u16)(offs16)) + MIPS_OP_IMM(OP_SB, rt, rs, (u16)(offs16)) + +// pointer operations + +#if __mips == 4 || __mips == 64 +#define OP_LP OP_LD +#define OP_SP OP_SD +#define OP_PADDIU OP_DADDIU +#define FN_PADDU FN_DADDU +#define FN_PSUBU FN_DSUBU +#define PTR_SCALE 3 +#else +#define OP_LP OP_LW +#define OP_SP OP_SW +#define OP_PADDIU OP_ADDIU +#define FN_PADDU FN_ADDU +#define FN_PSUBU FN_SUBU +#define PTR_SCALE 2 +#endif // XXX: tcache_ptr type for SVP and SH2 compilers differs.. #define EMIT_PTR(ptr, x) \ @@ -442,14 +478,14 @@ static void emith_set_arith_flags(int rd, int rs, int rt, s32 imm, int sub) if (emith_flg_hint & _FHV) { emith_flg_noV = 0; - if (rt >= 0) // Nt^Ns in FV, bit 31 + if (rt > Z0) // Nt^Ns in FV, bit 31 EMIT(MIPS_XOR_REG(FV, rs, rt)); - else if (imm == 0) + else if (rt == Z0 || imm == 0) emith_flg_noV = 1; // imm #0 can't overflow else if ((imm < 0) == !sub) EMIT(MIPS_NOR_REG(FV, rs, Z0)); else if ((imm > 0) == !sub) - EMIT(MIPS_OR_REG(FV, rs, Z0)); + EMIT(MIPS_XOR_REG(FV, rs, Z0)); } // full V = Nd^Nt^Ns^C calculation is deferred until really needed @@ -483,13 +519,17 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) EMIT(MIPS_MVN_REG(d, s)) #define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_OP_REG(FN_PADDU, d, s1, AT)); \ + } else EMIT(MIPS_OP_REG(FN_PADDU, d, s1, s2)); \ +} while (0) +#define emith_add_r_r_r_lsl(d, s1, s2, simm) do { \ if (simm) { \ EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ EMIT(MIPS_ADD_REG(d, s1, AT)); \ } else EMIT(MIPS_ADD_REG(d, s1, s2)); \ } while (0) -#define emith_add_r_r_r_lsl(d, s1, s2, simm) \ - emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) #define emith_add_r_r_r_lsr(d, s1, s2, simm) do { \ if (simm) { \ @@ -498,6 +538,16 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) } else EMIT(MIPS_ADD_REG(d, s1, s2)); \ } while (0) +#define emith_addf_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_OP_REG(FN_PADDU, FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(MIPS_OP_REG(FN_PADDU, FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) #define emith_addf_r_r_r_lsl(d, s1, s2, simm) do { \ if (simm) { \ EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ @@ -586,6 +636,8 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) #define emith_add_r_r_r(d, s1, s2) \ emith_add_r_r_r_lsl(d, s1, s2, 0) +#define emith_addf_r_r_r_ptr(d, s1, s2) \ + emith_addf_r_r_r_lsl_ptr(d, s1, s2, 0) #define emith_addf_r_r_r(d, s1, s2) \ emith_addf_r_r_r_lsl(d, s1, s2, 0) @@ -697,14 +749,26 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) // move immediate static void emith_move_imm(int r, uintptr_t imm) { - if ((s16)imm == imm) { +#if __mips == 4 || __mips == 64 + if ((s32)imm != imm) { + emith_move_imm(r, imm >> 32); + if (imm & 0xffff0000) { + EMIT(MIPS_DLSL_IMM(r, r, 16)); + EMIT(MIPS_OR_IMM(r, r, (imm >> 16) & 0xffff)); + EMIT(MIPS_DLSL_IMM(r, r, 16)); + } else EMIT(MIPS_DLSL32_IMM(r, r, 0)); + if (imm & 0x0000ffff) + EMIT(MIPS_OR_IMM(r, r, imm & 0xffff)); + } else +#endif + if ((s16)imm == imm) { EMIT(MIPS_ADD_IMM(r, Z0, imm)); - } else if (!(imm >> 16)) { + } else if (!((u32)imm >> 16)) { EMIT(MIPS_OR_IMM(r, Z0, imm)); } else { int s = Z0; - if (imm >> 16) { - EMIT(MIPS_MOVT_IMM(r, imm >> 16)); + if ((u32)imm >> 16) { + EMIT(MIPS_MOVT_IMM(r, (u32)imm >> 16)); s = r; } if ((u16)imm) @@ -729,17 +793,17 @@ static void emith_move_imm(int r, uintptr_t imm) } while (0) // arithmetic, immediate - can only be ADDI[U], since SUBI[U] doesn't exist -static void emith_arith_imm(int op, int rd, int rs, u32 imm) +static void emith_add_imm(int ptr, int rd, int rs, u32 imm) { if ((s16)imm == imm) { if (imm || rd != rs) - EMIT(MIPS_OP_IMM(op, rd, rs, imm)); + EMIT(MIPS_OP_IMM(ptr ? OP_PADDIU:OP_ADDIU, rd,rs,imm)); } else if ((s32)imm < 0) { emith_move_r_imm(AT, -imm); - EMIT(MIPS_OP_REG(FN_SUB + (op-OP_ADDI), rd, rs, AT)); + EMIT(MIPS_OP_REG((ptr ? FN_PSUBU:FN_SUBU), rd,rs,AT)); } else { emith_move_r_imm(AT, imm); - EMIT(MIPS_OP_REG(FN_ADD + (op-OP_ADDI), rd, rs, AT)); + EMIT(MIPS_OP_REG((ptr ? FN_PADDU:FN_ADDU), rd,rs,AT)); } } @@ -760,7 +824,7 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm) emith_subf_r_r_imm(r, r, imm) #define emith_adc_r_imm(r, imm) \ - emith_adc_r_r_imm(r, r, imm); + emith_adc_r_r_imm(r, r, imm) #define emith_adcf_r_imm(r, imm) \ emith_adcf_r_r_imm(r, r, imm) @@ -770,10 +834,10 @@ static void emith_arith_imm(int op, int rd, int rs, u32 imm) // emith_subf_r_r_imm(FNZ, r, (s16)imm) #define emith_add_r_r_ptr_imm(d, s, imm) \ - emith_arith_imm(OP_ADDIU, d, s, imm) + emith_add_imm(1, d, s, imm) #define emith_add_r_r_imm(d, s, imm) \ - emith_add_r_r_ptr_imm(d, s, imm) + emith_add_imm(0, d, s, imm) #define emith_addf_r_r_imm(d, s, imm) do { \ emith_add_r_r_imm(FNZ, s, imm); \ @@ -1043,22 +1107,24 @@ static void emith_lohi_nops(void) // load/store. offs has 16 bits signed, which is currently sufficient #define emith_read_r_r_offs_ptr(r, rs, offs) \ - EMIT(MIPS_LW(r, rs, offs)) + EMIT(MIPS_OP_IMM(OP_LP, r, rs, offs)) #define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ emith_read_r_r_offs_ptr(r, rs, offs) #define emith_read_r_r_offs(r, rs, offs) \ - emith_read_r_r_offs_ptr(r, rs, offs) + EMIT(MIPS_LW(r, rs, offs)) #define emith_read_r_r_offs_c(cond, r, rs, offs) \ emith_read_r_r_offs(r, rs, offs) #define emith_read_r_r_r_ptr(r, rs, rm) do { \ emith_add_r_r_r(AT, rs, rm); \ - EMIT(MIPS_LW(r, AT, 0)); \ + EMIT(MIPS_OP_IMM(OP_LP, r, AT, 0)); \ } while (0) -#define emith_read_r_r_r(r, rs, rm) \ - emith_read_r_r_r_ptr(r, rs, rm) +#define emith_read_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_LW(r, AT, 0)); \ +} while (0) #define emith_read_r_r_r_c(cond, r, rs, rm) \ emith_read_r_r_r(r, rs, rm) @@ -1112,24 +1178,26 @@ static void emith_lohi_nops(void) #define emith_write_r_r_offs_ptr(r, rs, offs) \ - EMIT(MIPS_SW(r, rs, offs)) + EMIT(MIPS_OP_IMM(OP_SP, r, rs, offs)) #define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ emith_write_r_r_offs_ptr(r, rs, offs) #define emith_write_r_r_r_ptr(r, rs, rm) do { \ emith_add_r_r_r(AT, rs, rm); \ - EMIT(MIPS_SW(r, AT, 0)); \ + EMIT(MIPS_OP_IMM(OP_SP, r, AT, 0)); \ } while (0) #define emith_write_r_r_r_ptr_c(cond, r, rs, rm) \ emith_write_r_r_r_ptr(r, rs, rm) #define emith_write_r_r_offs(r, rs, offs) \ - emith_write_r_r_offs_ptr(r, rs, offs) + EMIT(MIPS_SW(r, rs, offs)) #define emith_write_r_r_offs_c(cond, r, rs, offs) \ emith_write_r_r_offs(r, rs, offs) -#define emith_write_r_r_r(r, rs, rm) \ - emith_write_r_r_r_ptr(r, rs, rm) +#define emith_write_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_SW(r, AT, 0)); \ +} while (0) #define emith_write_r_r_r_c(cond, r, rs, rm) \ emith_write_r_r_r(r, rs, rm) @@ -1164,7 +1232,7 @@ static void emith_lohi_nops(void) int _c; u32 _m = mask & 0x300fffc; /* r2-r15,r24-r25 */ \ if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align */ \ int _s = count_bits(_m) * 4, _o = _s; \ - if (_s) emith_sub_r_imm(SP, _s); \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ if (_m & (1 << _c)) \ { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ @@ -1177,7 +1245,7 @@ static void emith_lohi_nops(void) for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ if (_m & (1 << _c)) \ { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ - if (_s) emith_add_r_imm(SP, _s); \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ } while (0) #define host_arg2reg(rd, arg) \ @@ -1343,8 +1411,8 @@ static int emith_cond_check(int cond, int *r) emith_jump_cond(cond, target) #define emith_jump_cond_inrange(target) \ - ((u8 *)target - (u8 *)tcache_ptr - 4 < 0x00020000U || \ - (u8 *)target - (u8 *)tcache_ptr - 4 >= 0xfffe0010U) // mind cond_check + ((u8 *)target - (u8 *)tcache_ptr - 4 < 0x20000 && \ + (u8 *)target - (u8 *)tcache_ptr - 4 >= -0x20000+0x10) //mind cond_check // NB: returns position of patch for cache maintenance #define emith_jump_patch(ptr, target, pos) do { \ @@ -1359,8 +1427,8 @@ static int emith_cond_check(int cond, int *r) } while (0) #define emith_jump_patch_inrange(ptr, target) \ - ((u8 *)target - (u8 *)ptr - 4 < 0x00020000U || \ - (u8 *)target - (u8 *)ptr - 4 >= 0xfffe0010U) // mind cond_check + ((u8 *)target - (u8 *)ptr - 4 < 0x20000 && \ + (u8 *)target - (u8 *)ptr - 4 >= -0x20000+0x10) // mind cond_check #define emith_jump_patch_size() 4 #define emith_jump_at(ptr, target) do { \ @@ -1410,7 +1478,7 @@ static int emith_cond_check(int cond, int *r) // NB: ABI SP alignment is 8 for compatibility with MIPS IV #define emith_push_ret(r) do { \ - emith_sub_r_imm(SP, 8+16); /* reserve new arg save area (16) */ \ + emith_add_r_r_ptr_imm(SP, SP, -8-16); /* ABI: 16 byte arg save area */ \ emith_write_r_r_offs(LR, SP, 4+16); \ if ((r) > 0) emith_write_r_r_offs(r, SP, 0+16); \ } while (0) @@ -1418,7 +1486,7 @@ static int emith_cond_check(int cond, int *r) #define emith_pop_and_ret(r) do { \ if ((r) > 0) emith_read_r_r_offs(r, SP, 0+16); \ emith_read_r_r_offs(LR, SP, 4+16); \ - emith_add_r_imm(SP, 8+16); \ + emith_add_r_r_ptr_imm(SP, SP, 8+16); \ emith_ret(); \ } while (0) @@ -1436,7 +1504,7 @@ static int emith_cond_check(int cond, int *r) int _c; u32 _m = 0xd0ff0000; \ if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align for SP is 8 */ \ int _s = count_bits(_m) * 4 + 16, _o = _s; /* 16 byte arg save area */ \ - if (_s) emith_sub_r_imm(SP, _s); \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ if (_m & (1 << _c)) \ { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ @@ -1448,23 +1516,23 @@ static int emith_cond_check(int cond, int *r) for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ if (_m & (1 << _c)) \ { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ - if (_s) emith_add_r_imm(SP, _s); \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ emith_ret(); \ } while (0) // NB: assumes a is in arg0, tab, func and mask are temp #define emith_sh2_rcall(a, tab, func, mask) do { \ emith_lsr(mask, a, SH2_READ_SHIFT); \ - emith_add_r_r_r_lsl_ptr(tab, tab, mask, 3); \ + emith_add_r_r_r_lsl_ptr(tab, tab, mask, PTR_SCALE+1); \ emith_read_r_r_offs_ptr(func, tab, 0); \ - emith_read_r_r_offs(mask, tab, 4); \ - emith_addf_r_r_r/*_ptr*/(func, func, func); \ + emith_read_r_r_offs(mask, tab, (1 << PTR_SCALE)); \ + emith_addf_r_r_r_ptr(func, func, func); \ } while (0) // NB: assumes a, val are in arg0 and arg1, tab and func are temp #define emith_sh2_wcall(a, val, tab, func) do { \ emith_lsr(func, a, SH2_WRITE_SHIFT); \ - emith_lsl(func, func, 2); \ + emith_lsl(func, func, PTR_SCALE); \ emith_read_r_r_r_ptr(func, tab, func); \ emith_move_r_r_ptr(6, CONTEXT_REG); /* arg2 */ \ emith_jump_reg(func); \ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index e7284499..9ed8b563 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -448,11 +448,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common // fake conditionals (using SJMP instead) #define emith_move_r_imm_c(cond, r, imm) \ - emith_move_r_imm(r, imm); + emith_move_r_imm(r, imm) #define emith_add_r_imm_c(cond, r, imm) \ - emith_add_r_imm(r, imm); + emith_add_r_imm(r, imm) #define emith_sub_r_imm_c(cond, r, imm) \ - emith_sub_r_imm(r, imm); + emith_sub_r_imm(r, imm) #define emith_or_r_imm_c(cond, r, imm) \ emith_or_r_imm(r, imm) #define emith_eor_r_imm_c(cond, r, imm) \ @@ -468,11 +468,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define emith_ror_c(cond, d, s, cnt) \ emith_ror(d, s, cnt) #define emith_and_r_r_c(cond, d, s) \ - emith_and_r_r(d, s); + emith_and_r_r(d, s) #define emith_add_r_r_imm_c(cond, d, s, imm) \ - emith_add_r_r_imm(d, s, imm); + emith_add_r_r_imm(d, s, imm) #define emith_sub_r_r_imm_c(cond, d, s, imm) \ - emith_sub_r_r_imm(d, s, imm); + emith_sub_r_r_imm(d, s, imm) #define emith_read8_r_r_r_c(cond, r, rs, rm) \ emith_read8_r_r_r(r, rs, rm) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 2320c501..d1cde69e 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -47,13 +47,9 @@ #define LOOP_OPTIMIZER 1 #define T_OPTIMIZER 1 -// limits (per block) -#define MAX_BLOCK_SIZE (BLOCK_INSN_LIMIT * 6 * 6) - -// max literal offset from the block end #define MAX_LITERAL_OFFSET 0x200 // max. MOVA, MOV @(PC) offset -#define MAX_LITERALS (BLOCK_INSN_LIMIT / 4) -#define MAX_LOCAL_BRANCHES (BLOCK_INSN_LIMIT / 4) +#define MAX_LOCAL_TARGETS (BLOCK_INSN_LIMIT / 4) +#define MAX_LOCAL_BRANCHES (BLOCK_INSN_LIMIT / 2) // debug stuff // 01 - warnings/errors @@ -294,7 +290,7 @@ struct block_link { u32 target_pc; void *jump; // insn address void *blx; // block link/exit area if any - u8 jdisp[8]; // jump backup buffer + u8 jdisp[12]; // jump backup buffer struct block_link *next; // either in block_entry->links or unresolved struct block_link *o_next; // ...in block_entry->o_links struct block_link *prev; @@ -443,6 +439,8 @@ static void rcache_free_tmp(int hr); #include "../drc/emit_arm64.c" #elif defined(__mips__) #include "../drc/emit_mips.c" +#elif defined(__riscv__) || defined(__riscv) +#include "../drc/emit_riscv.c" #elif defined(__i386__) #include "../drc/emit_x86.c" #elif defined(__x86_64__) @@ -1207,45 +1205,10 @@ static void dr_flush_tcache(int tcid) static void *dr_failure(void) { - lprintf("recompilation failed\n"); + printf("recompilation failed\n"); exit(1); } -#define ADD_TO_ARRAY(array, count, item, failcode) { \ - if (count >= ARRAY_SIZE(array)) { \ - dbg(1, "warning: " #array " overflow"); \ - failcode; \ - } else \ - array[count++] = item; \ -} - -static inline int find_in_array(u32 *array, size_t size, u32 what) -{ - size_t i; - for (i = 0; i < size; i++) - if (what == array[i]) - return i; - - return -1; -} - -static int find_in_sorted_array(u32 *array, size_t size, u32 what) -{ - // binary search in sorted array - int left = 0, right = size-1; - while (left <= right) - { - int middle = (left + right) / 2; - if (array[middle] == what) - return middle; - else if (array[middle] < what) - left = middle + 1; - else - right = middle - 1; - } - return -1; -} - // --------------------------------------------------------------- // NB rcache allocation dependencies: @@ -2868,6 +2831,88 @@ static void emit_do_static_regs(int is_write, int tmpr) } } +// block local link stuff +struct linkage { + u32 pc; + void *ptr; + struct block_link *bl; + u32 mask; +}; + +static inline int find_in_linkage(const struct linkage *array, int size, u32 pc) +{ + size_t i; + for (i = 0; i < size; i++) + if (pc == array[i].pc) + return i; + + return -1; +} + +static int find_in_sorted_linkage(const struct linkage *array, int size, u32 pc) +{ + // binary search in sorted array + int left = 0, right = size-1; + while (left <= right) + { + int middle = (left + right) / 2; + if (array[middle].pc == pc) + return middle; + else if (array[middle].pc < pc) + left = middle + 1; + else + right = middle - 1; + } + return -1; +} + +static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tcache_id, + const struct linkage *targets, int target_count, + const struct linkage *links, int link_count) +{ + struct block_link *bl; + int u, v, tmp; + + for (u = 0; u < link_count; u++) { + emith_pool_check(); + // look up local branch targets + v = find_in_sorted_linkage(targets, target_count, links[u].pc); + if (v >= 0) { + if (! targets[v].ptr) { + // forward branch not yet resolved, prepare external linking + emith_jump_patch(links[u].ptr, tcache_ptr, NULL); + bl = dr_prepare_ext_branch(block->entryp, links[u].pc, sh2->is_slave, tcache_id); + if (bl) { + emith_flush(); // flush to inhibit insn swapping + bl->type = BL_LDJMP; + } + + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, links[u].pc); + rcache_free_tmp(tmp); + emith_jump_patchable(sh2_drc_dispatcher); + } else if (emith_jump_patch_inrange(links[u].ptr, targets[v].ptr)) { + // inrange local branch + emith_jump_patch(links[u].ptr, targets[v].ptr, NULL); + } else { + // far local branch + emith_jump_patch(links[u].ptr, tcache_ptr, NULL); + emith_jump(targets[v].ptr); + } + } else { + // external or exit, emit blx area entry + void *target = (links[u].pc & 1 ? sh2_drc_exit : sh2_drc_dispatcher); + if (links[u].bl) + links[u].bl->blx = tcache_ptr; + emith_jump_patch(links[u].ptr, tcache_ptr, NULL); + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, links[u].pc & ~1); + rcache_free_tmp(tmp); + emith_jump(target); + } + } +} + #define DELAY_SAVE_T(sr) { \ int t_ = rcache_get_tmp(); \ emith_bic_r_imm(sr, T_save); \ @@ -2887,17 +2932,10 @@ static void *dr_get_pc_base(u32 pc, SH2 *sh2); static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { // branch targets in current block - u32 branch_target_pc[MAX_LOCAL_BRANCHES]; - void *branch_target_ptr[MAX_LOCAL_BRANCHES]; + struct linkage branch_targets[MAX_LOCAL_TARGETS]; int branch_target_count = 0; - // unresolved local forward branches, for fixup at block end - u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; - void *branch_patch_ptr[MAX_LOCAL_BRANCHES]; - int branch_patch_count = 0; - // external branch targets with a block link/exit area - u32 blx_target_pc[MAX_LOCAL_BRANCHES]; - void *blx_target_ptr[MAX_LOCAL_BRANCHES]; - struct block_link *blx_target_bl[MAX_LOCAL_BRANCHES]; + // unresolved local or external targets with block link/exit area if needed + struct linkage blx_targets[MAX_LOCAL_BRANCHES]; int blx_target_count = 0; u8 op_flags[BLOCK_INSN_LIMIT]; @@ -2906,6 +2944,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) int delay_reg:8; u32 loop_type:8; u32 polling:8; + u32 pinning:1; u32 test_irq:1; u32 pending_branch_direct:1; u32 pending_branch_indirect:1; @@ -2914,23 +2953,20 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if LOOP_OPTIMIZER // loops with pinned registers for optimzation // pinned regs are like statics and don't need saving/restoring inside a loop - u32 pinned_loop_pc[MAX_LOCAL_BRANCHES/16]; - void *pinned_loop_ptr[MAX_LOCAL_BRANCHES/16]; - u32 pinned_loop_mask[MAX_LOCAL_BRANCHES/16]; + struct linkage pinned_loops[MAX_LOCAL_TARGETS/16]; int pinned_loop_count = 0; #endif // PC of current, first, last SH2 insn u32 pc, base_pc, end_pc; u32 base_literals, end_literals; - void *block_entry_ptr; + u8 *block_entry_ptr; struct block_desc *block; struct block_entry *entry; struct block_link *bl; u16 *dr_pc_base; struct op_data *opd; int blkid_main = 0; - int skip_op = 0; int tmp, tmp2; int cycles; int i, v; @@ -2971,8 +3007,15 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) { if (op_flags[i] & OF_DELAY_OP) op_flags[i] &= ~OF_BTARGET; - if (op_flags[i] & OF_BTARGET) - ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, ); + if (op_flags[i] & OF_BTARGET) { + if (branch_target_count < ARRAY_SIZE(branch_targets)) + branch_targets[branch_target_count++] = (struct linkage) { .pc = pc }; + else { + printf("warning: linkage overflow\n"); + end_pc = pc; + break; + } + } if (ops[i].op == OP_LDC && (ops[i].dest & BITMASK1(SHR_SR)) && pc+2 < end_pc) op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR.IMASK change // unify T and SR since rcache doesn't know about "virtual" guest regs @@ -3040,9 +3083,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (op_flags[v] & OF_BASIC_LOOP) { m3 &= ~rcache_regs_static & ~BITMASK5(SHR_PC, SHR_PR, SHR_SR, SHR_T, SHR_MEM); if (m3 && count_bits(m3) < count_bits(rcache_vregs_reg) && - pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)-1) { - pinned_loop_mask[pinned_loop_count] = m3; - pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v; + pinned_loop_count < ARRAY_SIZE(pinned_loops)-1) { + pinned_loops[pinned_loop_count++] = + (struct linkage) { .mask = m3, .pc = base_pc + 2*v }; } else op_flags[v] &= ~OF_BASIC_LOOP; } @@ -3052,10 +3095,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #endif } - if (branch_target_count > 0) { - memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count); - } - tcache_ptr = dr_prepare_cache(tcache_id, (end_pc - base_pc) / 2); #if (DRC_DEBUG & 4) tcache_dsm_ptrs[tcache_id] = tcache_ptr; @@ -3076,7 +3115,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_invalidate_t(); drcf = (struct drcf) { 0 }; #if LOOP_OPTIMIZER - pinned_loop_pc[pinned_loop_count] = -1; + pinned_loops[pinned_loop_count].pc = -1; pinned_loop_count = 0; #endif @@ -3090,24 +3129,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) int tmp3, tmp4; int sr; - opd = &ops[i]; - op = FETCH_OP(pc); - -#if (DRC_DEBUG & 2) - insns_compiled++; -#endif -#if (DRC_DEBUG & 4) - DasmSH2(sh2dasm_buff, pc, op); - if (op_flags[i] & OF_BTARGET) { - if ((op_flags[i] & OF_LOOP) == OF_DELAY_LOOP) tmp3 = '+'; - else if ((op_flags[i] & OF_LOOP) == OF_POLL_LOOP) tmp3 = '='; - else if ((op_flags[i] & OF_LOOP) == OF_IDLE_LOOP) tmp3 = '~'; - else tmp3 = '*'; - } else if (drcf.loop_type) tmp3 = '.'; - else tmp3 = ' '; - printf("%c%08x %04x %s\n", tmp3, pc, op, sh2dasm_buff); -#endif - if (op_flags[i] & OF_BTARGET) { if (pc != base_pc) @@ -3143,9 +3164,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) break; } - v = find_in_sorted_array(branch_target_pc, branch_target_count, pc); + v = find_in_sorted_linkage(branch_targets, branch_target_count, pc); if (v >= 0) - branch_target_ptr[v] = tcache_ptr; + branch_targets[v].ptr = tcache_ptr; #if LOOP_DETECTION drcf.loop_type = op_flags[i] & OF_LOOP; drcf.delay_reg = -1; @@ -3176,12 +3197,13 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if LOOP_OPTIMIZER if (op_flags[i] & OF_BASIC_LOOP) { - if (pinned_loop_pc[pinned_loop_count] == pc) { + if (pinned_loops[pinned_loop_count].pc == pc) { // pin needed regs on loop entry - FOR_ALL_BITS_SET_DO(pinned_loop_mask[pinned_loop_count], v, rcache_pin_reg(v)); + FOR_ALL_BITS_SET_DO(pinned_loops[pinned_loop_count].mask, v, rcache_pin_reg(v)); emith_flush(); // store current PC as loop target - pinned_loop_ptr[pinned_loop_count] = tcache_ptr; + pinned_loops[pinned_loop_count].ptr = tcache_ptr; + drcf.pinning = 1; } else op_flags[i] &= ~OF_BASIC_LOOP; } @@ -3193,11 +3215,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) EMITH_JMP_START(DCOND_GT); rcache_save_pinned(); - if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { + if (blx_target_count < ARRAY_SIZE(blx_targets)) { // exit via stub in blx table (saves some 1-3 insns in the main flow) - blx_target_ptr[blx_target_count] = tcache_ptr; - blx_target_pc[blx_target_count] = pc|1; - blx_target_bl[blx_target_count++] = NULL; + blx_targets[blx_target_count++] = + (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL }; emith_jump_patchable(tcache_ptr); } else { // blx table full, must inline exit code @@ -3210,12 +3231,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } else #endif { - if (blx_target_count < ARRAY_SIZE(blx_target_pc)) { + if (blx_target_count < ARRAY_SIZE(blx_targets)) { // exit via stub in blx table (saves some 1-3 insns in the main flow) - blx_target_pc[blx_target_count] = pc|1; - blx_target_bl[blx_target_count] = NULL; emith_cmp_r_imm(sr, 0); - blx_target_ptr[blx_target_count++] = tcache_ptr; + blx_targets[blx_target_count++] = + (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL }; emith_jump_cond_patchable(DCOND_LE, tcache_ptr); } else { // blx table full, must inline exit code @@ -3282,13 +3302,40 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } #endif + // emit blx area if limits are approached + if (blx_target_count && (blx_target_count > ARRAY_SIZE(blx_targets)-4 || + !emith_jump_patch_inrange(blx_targets[0].ptr, tcache_ptr+0x100))) { + u8 *jp; + rcache_invalidate_tmp(); + jp = tcache_ptr; + emith_jump_patchable(tcache_ptr); + emit_branch_linkage_code(sh2, block, tcache_id, branch_targets, + branch_target_count, blx_targets, blx_target_count); + blx_target_count = 0; + do_host_disasm(tcache_id); + emith_jump_patch(jp, tcache_ptr, NULL); + } + emith_pool_check(); - pc += 2; - if (skip_op > 0) { - skip_op--; - continue; - } + opd = &ops[i]; + op = FETCH_OP(pc); +#if (DRC_DEBUG & 4) + DasmSH2(sh2dasm_buff, pc, op); + if (op_flags[i] & OF_BTARGET) { + if ((op_flags[i] & OF_LOOP) == OF_DELAY_LOOP) tmp3 = '+'; + else if ((op_flags[i] & OF_LOOP) == OF_POLL_LOOP) tmp3 = '='; + else if ((op_flags[i] & OF_LOOP) == OF_IDLE_LOOP) tmp3 = '~'; + else tmp3 = '*'; + } else if (drcf.loop_type) tmp3 = '.'; + else tmp3 = ' '; + printf("%c%08x %04x %s\n", tmp3, pc, op, sh2dasm_buff); +#endif + + pc += 2; +#if (DRC_DEBUG & 2) + insns_compiled++; +#endif if (op_flags[i] & OF_DELAY_OP) { @@ -4422,7 +4469,7 @@ end_op: emit_sync_t_to_sr(); emith_sh2_delay_loop(cycles, drcf.delay_reg); rcache_unlock_all(); // may lock delay_reg - drcf.polling = drcf.loop_type = 0; + drcf.polling = drcf.loop_type = drcf.pinning = 0; } #endif @@ -4464,33 +4511,39 @@ end_op: emith_sync_t(sr); // no modification of host status/flags between here and branching! - v = find_in_sorted_array(branch_target_pc, branch_target_count, target_pc); + v = find_in_sorted_linkage(branch_targets, branch_target_count, target_pc); if (v >= 0) { // local branch - if (branch_target_ptr[v]) { + if (branch_targets[v].ptr) { // local backward jump, link here now since host PC is already known - target = branch_target_ptr[v]; + target = branch_targets[v].ptr; #if LOOP_OPTIMIZER - if (pinned_loop_pc[pinned_loop_count] == target_pc) { + if (pinned_loops[pinned_loop_count].pc == target_pc) { // backward jump at end of optimized loop rcache_unpin_all(); - target = pinned_loop_ptr[pinned_loop_count]; + target = pinned_loops[pinned_loop_count].ptr; pinned_loop_count ++; } #endif - if (cond != -1) - emith_jump_cond(cond, target); - else { + if (cond != -1) { + if (emith_jump_patch_inrange(tcache_ptr, target)) { + emith_jump_cond(cond, target); + } else { + // not reachable directly, must use far branch + EMITH_JMP_START(emith_invert_cond(cond)); + emith_jump(target); + EMITH_JMP_END(emith_invert_cond(cond)); + } + } else { emith_jump(target); rcache_invalidate(); } - } else if (branch_patch_count < MAX_LOCAL_BRANCHES) { + } else if (blx_target_count < MAX_LOCAL_BRANCHES) { // local forward jump target = tcache_ptr; - branch_patch_pc[branch_patch_count] = target_pc; - branch_patch_ptr[branch_patch_count] = target; - branch_patch_count++; + blx_targets[blx_target_count++] = + (struct linkage) { .pc = target_pc, .ptr = target, .bl = NULL }; if (cond != -1) emith_jump_cond_patchable(cond, target); else { @@ -4498,7 +4551,7 @@ end_op: rcache_invalidate(); } } else - dbg(1, "warning: too many local branches"); + dbg(1, "warning: too many unresolved branches"); } if (target == NULL) @@ -4507,13 +4560,12 @@ end_op: bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); if (cond != -1) { #if 1 - if (bl && blx_target_count < ARRAY_SIZE(blx_target_pc)) { + if (bl && blx_target_count < ARRAY_SIZE(blx_targets)) { // conditional jumps get a blx stub for the far jump - blx_target_pc[blx_target_count] = target_pc; - blx_target_bl[blx_target_count] = bl; - blx_target_ptr[blx_target_count++] = tcache_ptr; bl->type = BL_JCCBLX; target = tcache_ptr; + blx_targets[blx_target_count++] = + (struct linkage) { .pc = target_pc, .ptr = target, .bl = bl }; emith_jump_cond_patchable(cond, target); } else { // not linkable, or blx table full; inline jump @dispatcher @@ -4660,44 +4712,15 @@ end_op: } else rcache_flush(); - // emit blx area - for (i = 0; i < blx_target_count; i++) { - void *target = (blx_target_pc[i] & 1 ? sh2_drc_exit : sh2_drc_dispatcher); - - emith_pool_check(); - bl = blx_target_bl[i]; - if (bl) - bl->blx = tcache_ptr; - emith_jump_patch(blx_target_ptr[i], tcache_ptr, NULL); - tmp = rcache_get_tmp_arg(0); - emith_move_r_imm(tmp, blx_target_pc[i] & ~1); - emith_jump(target); - rcache_invalidate(); - } + // link unresolved branches, emitting blx area entries as needed + emit_branch_linkage_code(sh2, block, tcache_id, branch_targets, + branch_target_count, blx_targets, blx_target_count); emith_flush(); do_host_disasm(tcache_id); emith_pool_commit(0); - // link local branches - for (i = 0; i < branch_patch_count; i++) { - void *target; - int t; - t = find_in_sorted_array(branch_target_pc, branch_target_count, branch_patch_pc[i]); - target = branch_target_ptr[t]; - if (target == NULL) { - // flush pc and go back to dispatcher (this should no longer happen) - dbg(1, "stray branch to %08x %p", branch_patch_pc[i], tcache_ptr); - target = tcache_ptr; - tmp = rcache_get_tmp_arg(0); - emith_move_r_imm(tmp, branch_patch_pc[i]); - emith_jump(sh2_drc_dispatcher); - rcache_flush(); - } - emith_jump_patch(branch_patch_ptr[i], target, NULL); - } - // fill blx backup; do this last to backup final patched code for (i = 0; i < block->entry_count; i++) for (bl = block->entryp[i].o_links; bl; bl = bl->o_next) @@ -4927,7 +4950,7 @@ static void sh2_generate_utils(void) // pc = sh2_drc_dispatcher_call(u32 pc) sh2_drc_dispatcher_call = (void *)tcache_ptr; emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); - emith_add_r_imm(arg2, 2*sizeof(void *)); + emith_add_r_imm(arg2, (u32)(2*sizeof(void *))); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0); @@ -4957,7 +4980,7 @@ static void sh2_generate_utils(void) emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); #endif emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *)); - emith_sub_r_imm(arg2, 2*sizeof(void *)); + emith_sub_r_imm(arg2, (u32)(2*sizeof(void *))); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); #if (DRC_DEBUG & 128) diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 415f01ba..44620f48 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -41,6 +41,8 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, #define DRC_SR_REG "r28" #elif defined(__mips__) #define DRC_SR_REG "s6" +#elif defined(__riscv__) || defined(__riscv) +#define DRC_SR_REG "s11" #elif defined(__i386__) #define DRC_SR_REG "edi" #elif defined(__x86_64__) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 7e2e039e..896b5aa1 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -506,7 +506,7 @@ void sync_sh2s_normal(unsigned int m68k_target) if (CYCLES_GT(now, ssh2.m68krcycles_done)) now = ssh2.m68krcycles_done; } - if (now - timer_cycles >= STEP_N) { + if (CYCLES_GT(now, timer_cycles+STEP_N)) { p32x_timers_do(now - timer_cycles); timer_cycles = now; } diff --git a/platform/common/dismips.c b/platform/common/dismips.c index 41c0f7a5..f9888f2a 100644 --- a/platform/common/dismips.c +++ b/platform/common/dismips.c @@ -127,7 +127,7 @@ static const struct insn special_insns[] = { {0x38, S_IMM_DT, "dsll"}, {0x3A, S_IMM_DT, "dsrl"}, {0x3B, S_IMM_DT, "dsra"}, - {0x3D, S_IMM_DT, "dsll32"}, + {0x3C, S_IMM_DT, "dsll32"}, {0x3E, S_IMM_DT, "dsrl32"}, {0x3F, S_IMM_DT, "dsra32"}, }; diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 887d7836..93665263 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -29,7 +29,7 @@ void pemu_prep_defconfig(void) void pemu_validate_config(void) { -#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__i386__) && !defined(__x86_64__) +#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__riscv__) && !defined(__riscv) && !defined(__i386__) && !defined(__x86_64__) PicoIn.opt &= ~POPT_EN_DRC; #endif } diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index 8a0557c7..349b8605 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -16,7 +16,7 @@ compile_rodata () # $CC $CFLAGS -I .. -nostdlib -Wl,-edummy /tmp/getoffs.c \ # -o /tmp/getoffs.o || exit 1 # find the name of the .rodata section (in case -fdata-sections is used) - rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' | + rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata\|\.sdata' | sed 's/^[^.]*././;s/ .*//') # read out .rodata section as hex string (should be only 4 or 8 bytes) ro=$(readelf -x $rosect /tmp/getoffs.o | grep '0x' | cut -c14-48 | -- 2.39.2