From: LibretroAdmin <105389611+LibretroAdmin@users.noreply.github.com> Date: Mon, 30 May 2022 17:56:24 +0000 (+0100) Subject: Merge pull request #657 from pcercuei/update-lightrec-20220529 X-Git-Tag: r24l~472 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=437b1e617808119c3a24a72c77cd2fa86a5d3220;hp=aa314e8ebc2281c4b2d3db3378f143de5d68f335;p=pcsx_rearmed.git Merge pull request #657 from pcercuei/update-lightrec-20220529 Update lightrec 20220529 --- diff --git a/Makefile b/Makefile index bff1a1c3..d1585a17 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,7 @@ CFLAGS += -DPCNT endif LIGHTREC_CUSTOM_MAP ?= 0 +CFLAGS += -DLIGHTREC_CUSTOM_MAP=$(LIGHTREC_CUSTOM_MAP) # core OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore/database.o \ @@ -91,10 +92,9 @@ libpcsxcore/psxbios.o: CFLAGS += -Wno-nonnull # dynarec ifeq "$(DYNAREC)" "lightrec" CFLAGS += -Ideps/lightning/include -Ideps/lightrec -Iinclude/lightning -Iinclude/lightrec \ - -DLIGHTREC -DLIGHTREC_STATIC \ - -DLIGHTREC_CUSTOM_MAP=$(LIGHTREC_CUSTOM_MAP) -LDLIBS += -lrt + -DLIGHTREC -DLIGHTREC_STATIC -DHAVE_MMAP ifeq ($(LIGHTREC_CUSTOM_MAP),1) +LDLIBS += -lrt OBJS += libpcsxcore/lightrec/mem.o endif OBJS += libpcsxcore/lightrec/plugin.o diff --git a/deps/lightning/.gitignore b/deps/lightning/.gitignore index 62ca42aa..6fc5bf95 100644 --- a/deps/lightning/.gitignore +++ b/deps/lightning/.gitignore @@ -1,4 +1,14 @@ +* + +*.o +*.lo +*.la + +.libs/ +.deps/ +*/.libs/ +*/.deps/ + autom4te.cache aclocal.m4 depcomp @@ -20,14 +30,14 @@ missing size stamp-h1 test-driver -check/.deps -doc/.deps -lib/.deps + m4/libtool.m4 m4/lt~obsolete.m4 m4/ltoptions.m4 m4/ltsugar.m4 m4/ltversion.m4 -doc/mdate-sh -doc/texinfo.tex + lightning.pc +include/lightning.h + +build-aux/ diff --git a/deps/lightning/.gitrepo b/deps/lightning/.gitrepo index f0ed7b55..05d58f39 100644 --- a/deps/lightning/.gitrepo +++ b/deps/lightning/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/gnu_lightning.git branch = pcsx_rearmed - commit = 6f101bf8eccef737d60bf7e6ba85558db49e7908 - parent = 02dbc8694f303728f19734328166a1c6dfef289c + commit = 2a199e4d3cb250a76bd91f42eaf56f6233d34663 + parent = db4140baf19c727fa1a705236130edfc6f363ce0 method = merge cmdver = 0.4.3 diff --git a/deps/lightning/ChangeLog b/deps/lightning/ChangeLog index 9964207e..a8420405 100644 --- a/deps/lightning/ChangeLog +++ b/deps/lightning/ChangeLog @@ -1,3 +1,24 @@ +2022-05-14 Paulo Andrade + + * include/lightning.h.in: Reorder jit_mov{n,z}r in instruction list. + * lib/jit_alpha.c, lib/jit_alpha-cpu.c, lib/jit_hppa.c, + lib/jit_hppa-cpu.c, lib/jit_ia64.c, lib/jit_ia64-cpu.c, + lib/jit_riscv.c, lib/jit_riscv-cpu.c, lib/jit_s390.c, + lib/jit_s390-cpu.c, lib/jit_sparc.c, lib/jit_sparc-cpu.c: + Implement fallback jit_mov{n,z}r. These are a somewhat cheap + implementation, but should be reviewed for the arches that already + have a proper conditional move. + * lib/jit_arm-sz.c, lib/jit_mips-sz.c: Add missing maximum size + estimative and reorder. + * lib/jit_aarch64-sz.c, lib/jit_x86-sz.c, lib/jit_ppc-sz.c: + Reorder entry to match definition order. + * lib/jit_aarch64-sz.c, lib/jit_alpha-sz.c, lib/jit_hppa-sz.c, + lib/jit_ia64-sz.c, lib/jit_riscv-sz.c, lib/jit_s390-sz.c, + lib/jit_sparc-sz.c: Add heuristic value, basically the sum of + the cost of a movr + beqr. + * lib/jit_names.c: Add entries for debug output of mov{n,z}r. + * lib/lightning.c: Use proper bitmask in jit_classify. + 2021-04-03 Marc Nieper-Wißkirchen * check/Makefile.am: Add test for the live instruction. diff --git a/deps/lightning/README b/deps/lightning/README index ae36ea57..7e3df424 100644 --- a/deps/lightning/README +++ b/deps/lightning/README @@ -1,3 +1,5 @@ GNU lightning is a library to aid in making portable programs that compile assembly code at run time. For more information, look at the info documentation. + +For help building lightning, see README-hacking. diff --git a/deps/lightning/README-hacking b/deps/lightning/README-hacking index 285f3c93..cc615985 100644 --- a/deps/lightning/README-hacking +++ b/deps/lightning/README-hacking @@ -22,6 +22,12 @@ for Debian-based systems such as Ubuntu: ** Building +If you intend to do development work with lightning, it's useful to build +lightning with its disassembler feature enabled. This optional feature +requires additional dependencies. On Ubuntu, this command should work: + + $ sudo apt-get install binutils-dev libiberty-dev zlib1g-dev + After getting the git sources, and installing the tools above, you can run $ ./bootstrap @@ -38,6 +44,10 @@ should output no difference. After that first time, running make should suffice. +To install lightning: + + $ sudo make install + ** Gnulib This distribution also uses Gnulib (https://www.gnu.org/software/gnulib) to diff --git a/deps/lightning/check/.gitignore b/deps/lightning/check/.gitignore new file mode 100644 index 00000000..a0047bba --- /dev/null +++ b/deps/lightning/check/.gitignore @@ -0,0 +1,65 @@ +*.nodata +nodata +*.log +*.trs + +3to2 +bswap +add +align +allocai +allocar +alu_add +alu_and +alu_com +alu_div +alu_lsh +alu_mul +alu_neg +alu_or +alu_rem +alu_rsb +alu_rsh +alu_sub +alu_xor +alux_add +alux_sub +bp +branch +call +carg +carry +ccall +clobber +ctramp +cva_list +cvt +divi +fib +float +fop_abs +fop_sqrt +hton +jmpr +ldsti +ldstr +ldstr-c +ldstxi +ldstxi-c +ldstxr +ldstxr-c +lightning +live +put +qalu_div +qalu_mul +range +ranger +ret +rpn +self +setcode +stack +tramp +va_list +varargs diff --git a/deps/lightning/check/Makefile.am b/deps/lightning/check/Makefile.am index f1155d7d..fc9f232e 100644 --- a/deps/lightning/check/Makefile.am +++ b/deps/lightning/check/Makefile.am @@ -65,6 +65,7 @@ EXTRA_DIST = \ ldstxi-c.tst ldstxi-c.ok \ cvt.tst cvt.ok \ hton.tst hton.ok \ + bswap.tst bswap.ok \ branch.tst branch.ok \ alu.inc \ alu_add.tst alu_add.ok \ @@ -117,7 +118,7 @@ base_TESTS = \ ldstr ldsti \ ldstxr ldstxi \ ldstr-c ldstxr-c ldstxi-c \ - cvt hton branch \ + cvt hton bswap branch \ alu_add alux_add \ alu_sub alux_sub alu_rsb \ alu_mul alu_div alu_rem \ @@ -196,7 +197,7 @@ arm_TESTS = \ rpn.arm ldstr.arm ldsti.arm \ ldstxr.arm ldstxi.arm \ ldstr-c.arm ldstxr-c.arm ldstxi-c.arm \ - cvt.arm hton.arm branch.arm \ + cvt.arm hton.arm bswap.arm branch.arm \ alu_add.arm alux_add.arm \ alu_sub.arm alux_sub.arm alu_rsb.arm \ alu_mul.arm alu_div.arm alu_rem.arm \ @@ -222,7 +223,7 @@ swf_TESTS = \ rpn.swf ldstr.swf ldsti.swf \ ldstxr.swf ldstxi.swf \ ldstr-c.swf ldstxr-c.swf ldstxi-c.swf \ - cvt.swf hton.swf branch.swf \ + cvt.swf hton.swf bswap.swf branch.swf \ alu_add.swf alux_add.swf \ alu_sub.swf alux_sub.swf alu_rsb.swf \ alu_mul.swf alu_div.swf alu_rem.swf \ @@ -246,7 +247,7 @@ arm_swf_TESTS = \ rpn.arm.swf ldstr.arm.swf ldsti.arm.swf \ ldstxr.arm.swf ldstxi.arm.swf \ ldstr-c.arm.swf ldstxr-c.arm.swf ldstxi-c.arm.swf \ - cvt.arm.swf hton.arm.swf branch.arm.swf \ + cvt.arm.swf hton.arm.swf bswap.arm.swf branch.arm.swf \ alu_add.arm.swf alux_add.arm.swf \ alu_sub.arm.swf alux_sub.arm.swf alu_rsb.arm.swf \ alu_mul.arm.swf alu_div.arm.swf alu_rem.arm.swf \ @@ -271,8 +272,8 @@ arm4_swf_TESTS = \ rpn.arm4.swf ldstr.arm4.swf ldsti.arm4.swf \ ldstxr.arm4.swf ldstxi.arm4.swf \ ldstr-c.arm4.swf ldstxr-c.arm4.swf ldstxi-c.arm4.swf \ - cvt.arm4.swf hton.arm4.swf branch.arm4.swf \ - alu_add.arm4.swf alux_add.arm4.swf \ + cvt.arm4.swf hton.arm4.swf bswap.arm4.swf \ + branch.arm4.swf alu_add.arm4.swf alux_add.arm4.swf \ alu_sub.arm4.swf alux_sub.arm4.swf alu_rsb.arm4.swf \ alu_mul.arm4.swf alu_div.arm4.swf alu_rem.arm4.swf \ alu_and.arm4.swf alu_or.arm4.swf alu_xor.arm4.swf \ diff --git a/deps/lightning/check/bswap.ok b/deps/lightning/check/bswap.ok new file mode 100644 index 00000000..9766475a --- /dev/null +++ b/deps/lightning/check/bswap.ok @@ -0,0 +1 @@ +ok diff --git a/deps/lightning/check/bswap.tst b/deps/lightning/check/bswap.tst new file mode 100644 index 00000000..f123e950 --- /dev/null +++ b/deps/lightning/check/bswap.tst @@ -0,0 +1,154 @@ +.data 16 +ok: +.c "ok\n" + +#define us12_i 0x1234 +#define us7f_i 0x7ff7 +#define us80_i 0x8008 +#define usff_i 0xffff +#define ui12_i 0x01234567 +#define ui7f_i 0x7f7ff7f7 +#define ui80_i 0x80800808 +#define uiff_i 0xffffffff +#define ul12_i 0x0123456789abcdef +#define ul7f_i 0x7f7f7f7ff7f7f7f7 +#define ul80_i 0x8080808008080808 +#define ulff_i 0xffffffffffffffff + +#if __WORDSIZE == 32 +# define xus12_i 0xffff1234 +# define xus7f_i 0x10107ff7 +# define xus80_i 0x81188008 +# define xusff_i 0xeaaeffff +#else +# define xus12_i 0xffffffffffff1234 +# define xus7f_i 0x1010100101017ff7 +# define xus80_i 0x8181811818818008 +# define xusff_i 0xeaeaeaaeaeaeffff +# define xui12_i 0xffffffff01234567 +# define xui7f_i 0x101001017f7ff7f7 +# define xui80_i 0x8181181880800808 +# define xuiff_i 0xeaeaaeaeffffffff +#endif + +# define us12_o 0x3412 +# define us7f_o 0xf77f +# define us80_o 0x0880 +# define usff_o 0xffff +# define ui12_o 0x67452301 +# define ui7f_o 0xf7f77f7f +# define ui80_o 0x08088080 +# define uiff_o 0xffffffff +# define ul12_o 0xefcdab8967452301 +# define ul7f_o 0xf7f7f7f77f7f7f7f +# define ul80_o 0x0808080880808080 +# define ulff_o 0xffffffffffffffff + +#define BSWAP4(I, O, T, R0, R1) \ + movi %R0 I \ + bswapr_##T %R1 %R0 \ + beqi T##R0##R1##I %R1 O \ + calli @abort \ +T##R0##R1##I: + +#define BSWAP3(T, R0, R1) \ + BSWAP4(T##12_i, T##12_o, T, R0, R1) \ + BSWAP4(x##T##12_i, T##12_o, T, R0, R1) \ + BSWAP4(T##7f_i, T##7f_o, T, R0, R1) \ + BSWAP4(x##T##7f_i, T##7f_o, T, R0, R1) \ + BSWAP4(T##80_i, T##80_o, T, R0, R1) \ + BSWAP4(x##T##80_i, T##80_o, T, R0, R1) \ + BSWAP4(T##ff_i, T##ff_o, T, R0, R1) \ + BSWAP4(x##T##ff_i, T##ff_o, T, R0, R1) + +#define BSWAP3x(T, R0, R1) \ + BSWAP4(T##12_i, T##12_o, T, R0, R1) \ + BSWAP4(T##7f_i, T##7f_o, T, R0, R1) \ + BSWAP4(T##80_i, T##80_o, T, R0, R1) \ + BSWAP4(T##ff_i, T##ff_o, T, R0, R1) + +#define BSWAP2(T, V0, V1, V2, R0, R1, R2) \ + BSWAP3(T, V0, V0) \ + BSWAP3(T, V0, V1) \ + BSWAP3(T, V0, V2) \ + BSWAP3(T, V0, R0) \ + BSWAP3(T, V0, R1) \ + BSWAP3(T, V0, R2) \ + +#define BSWAP2x(T, V0, V1, V2, R0, R1, R2) \ + BSWAP3x(T, V0, V0) \ + BSWAP3x(T, V0, V1) \ + BSWAP3x(T, V0, V2) \ + BSWAP3x(T, V0, R0) \ + BSWAP3x(T, V0, R1) \ + BSWAP3x(T, V0, R2) \ + +#define BSWAP1(T, V0, V1, V2, R0, R1, R2) \ + BSWAP2(T, V0, V1, V2, R0, R1, R2) \ + BSWAP2(T, V1, V2, R0, R1, R2, V0) \ + BSWAP2(T, V2, R0, R1, R2, V0, V1) \ + BSWAP2(T, R0, R1, R2, V0, V1, V2) \ + BSWAP2(T, R1, R2, V0, V1, V2, R0) \ + BSWAP2(T, R2, V0, V1, V2, R0, R1) + +#define BSWAP1x(T, V0, V1, V2, R0, R1, R2) \ + BSWAP2x(T, V0, V1, V2, R0, R1, R2) \ + BSWAP2x(T, V1, V2, R0, R1, R2, V0) \ + BSWAP2x(T, V2, R0, R1, R2, V0, V1) \ + BSWAP2x(T, R0, R1, R2, V0, V1, V2) \ + BSWAP2x(T, R1, R2, V0, V1, V2, R0) \ + BSWAP2x(T, R2, V0, V1, V2, R0, R1) + +#if __WORDSIZE == 32 +# define BSWAP(V0, V1, V2, R0, R1, R2) \ + BSWAP1(us, V0, V1, V2, R0, R1, R2) \ + BSWAP1x(ui, V0, V1, V2, R0, R1, R2) +#else +# define BSWAP(V0, V1, V2, R0, R1, R2) \ + BSWAP1(us, V0, V1, V2, R0, R1, R2) \ + BSWAP1(ui, V0, V1, V2, R0, R1, R2) \ + BSWAP1x(ul, V0, V1, V2, R0, R1, R2) +#endif + +.code + prolog + /* simple sequence for easier disassembly reading and encoding check */ + movi %r0 us12_i + bswapr_us %r1 %r0 + beqi us %r1 us12_o + calli @abort +us: + + movi %r0 xus12_i + bswapr_us %r1 %r0 + beqi xus %r1 us12_o + calli @abort +xus: + movi %r0 ui12_i + bswapr_ui %r1 %r0 + beqi ui %r1 ui12_o + calli @abort +ui: +#if __WORDSIZE == 64 + movi %r0 xui12_i + bswapr_ui %r1 %r0 + beqi xui %r1 ui12_o + calli @abort +xui: + movi %r0 ul12_i + bswapr_ul %r1 %r0 + beqi ul %r1 ul12_o + calli @abort +ul: +#endif + + BSWAP(v0, v1, v2, r0, r1, r2) + + // just to know did not abort + prepare + pushargi ok + ellipsis + finishi @printf + + ret + epilog diff --git a/deps/lightning/check/lightning.c b/deps/lightning/check/lightning.c index c92364af..3cf3e70d 100644 --- a/deps/lightning/check/lightning.c +++ b/deps/lightning/check/lightning.c @@ -30,6 +30,7 @@ #include #include #include +#include #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) # include @@ -327,6 +328,11 @@ static void htonr_ui(void); static void ntohr_ui(void); static void htonr_ul(void); static void ntohr_ul(void); #endif static void htonr(void); static void ntohr(void); +static void bswapr_us(void); static void bswapr_ui(void); +#if __WORDSIZE == 64 +static void bswapr_ul(void); +#endif +static void bswapr(void); static void movnr(void); static void movzr(void); static void ldr_c(void); static void ldi_c(void); static void ldr_uc(void); static void ldi_uc(void); @@ -642,6 +648,11 @@ static instr_t instr_vector[] = { entry(htonr_ul), entry(ntohr_ul), #endif entry(htonr), entry(ntohr), + entry(bswapr_us), entry(bswapr_ui), +#if __WORDSIZE == 64 + entry(bswapr_ul), +#endif + entry(bswapr), entry(movnr), entry(movzr), entry(ldr_c), entry(ldi_c), entry(ldr_uc), entry(ldi_uc), @@ -1491,6 +1502,11 @@ entry_ir_ir(htonr_ui) entry_ir_ir(ntohr_ui) entry_ir_ir(htonr_ul) entry_ir_ir(ntohr_ul) #endif entry_ir_ir(htonr) entry_ir_ir(ntohr) +entry_ir_ir(bswapr_us) entry_ir_ir(bswapr_ui) +#if __WORDSIZE == 64 +entry_ir_ir(bswapr_ul) +#endif +entry_ir_ir(bswapr) entry_ir_ir_ir(movnr) entry_ir_ir_ir(movzr) entry_ir_ir(ldr_c) entry_ir_pm(ldi_c) entry_ir_ir(ldr_uc) entry_ir_pm(ldi_uc) diff --git a/deps/lightning/configure.ac b/deps/lightning/configure.ac index 1f2c6b4a..5b582d28 100644 --- a/deps/lightning/configure.ac +++ b/deps/lightning/configure.ac @@ -63,7 +63,7 @@ case "$target_cpu" in *) ;; esac -AC_CHECK_FUNCS(mremap ffsl getopt_long_only isnan isinf,,) +AC_CHECK_FUNCS(mmap mremap ffsl getopt_long_only isnan isinf,,) AC_CHECK_HEADERS([getopt.h stdint.h],,,) @@ -72,7 +72,7 @@ AC_ARG_ENABLE(disassembler, [Enable jit disassembler using binutils]), [DISASSEMBLER=$enableval], [DISASSEMBLER=auto]) if test "x$DISASSEMBLER" != "xno"; then - # FIXME need to check for libiberty first or will fail to link + AC_CHECK_LIB(iberty, htab_try_create, , [HAVE_IBERTY="no"]) AC_CHECK_LIB(bfd, bfd_init, , @@ -81,17 +81,43 @@ if test "x$DISASSEMBLER" != "xno"; then [HAVE_Z="no"]) AC_CHECK_LIB(opcodes, init_disassemble_info, , [HAVE_OPCODES="no"]) - if test "x$HAVE_IBERTY" = "xno" -o \ - "x$HAVE_BFD" = "xno" -o \ - "x$HAVE_Z" = "xno" -o \ - "x$HAVE_OPCODES" = "xno"; then - if test "x$DISASSEMBLER" != "xauto"; then - AC_MSG_ERROR([binutils not found, see http://www.gnu.org/software/binutils/]) - else - AC_MSG_WARN([binutils not found, see http://www.gnu.org/software/binutils/]) - DISASSEMBLER="no" - fi + + if test "x$HAVE_IBERTY" = "xno"; then + if test "x$DISASSEMBLER" = "xyes"; then + AC_MSG_ERROR([libiberty not found]) + else + AC_MSG_WARN([libiberty not found]) + DISASSEMBLER="no" + fi fi + + if test "x$HAVE_BFD" = "xno"; then + if test "x$DISASSEMBLER" = "xyes"; then + AC_MSG_ERROR([binutils BFD not found, see http://www.gnu.org/software/binutils/]) + else + AC_MSG_WARN([binutils BFD not found, see http://www.gnu.org/software/binutils/]) + DISASSEMBLER="no" + fi + fi + + if test "x$HAVE_Z" = "xno"; then + if test "x$DISASSEMBLER" = "xyes"; then + AC_MSG_ERROR([zlib not found, see https://zlib.net/]) + else + AC_MSG_WARN([zlib not found, see https://zlib.net/]) + DISASSEMBLER="no" + fi + fi + + if test "x$HAVE_OPCODES" = "xno"; then + if test "x$DISASSEMBLER" = "xyes"; then + AC_MSG_ERROR([binutils opcodes not found, see https://www.gnu.org/software/binutils/]) + else + AC_MSG_WARN([binutils opcodes not found, see https://www.gnu.org/software/binutils/]) + DISASSEMBLER="no" + fi + fi + fi AM_CONDITIONAL(with_disassembler, [test "x$DISASSEMBLER" != "xno"]) if test "x$DISASSEMBLER" != "xno"; then diff --git a/deps/lightning/doc/.gitignore b/deps/lightning/doc/.gitignore index ae3678f8..0e0a6da3 100644 --- a/deps/lightning/doc/.gitignore +++ b/deps/lightning/doc/.gitignore @@ -1,3 +1,13 @@ *.info* stamp-* /version.texi + +texinfo.tex +mdate-sh + +fact +ifib +incr +printf +rfib +rpn diff --git a/deps/lightning/doc/body.texi b/deps/lightning/doc/body.texi index 51c08d33..c174fcfb 100644 --- a/deps/lightning/doc/body.texi +++ b/deps/lightning/doc/body.texi @@ -89,7 +89,11 @@ assembles machine instructions without further tests. @node Installation @chapter Configuring and installing @lightning{} -The first thing to do to use @lightning{} is to configure the +Here we will assume that your system already has the dependencies +necessary to build @lightning{}. For more on dependencies, see +@lightning{}'s @file{README-hacking} file. + +The first thing to do to build @lightning{} is to configure the program, picking the set of macros to be used on the host architecture; this configuration is automatically performed by the @file{configure} shell script; to run it, merely type: @@ -368,6 +372,14 @@ htonr _us _ui _ul @r{Host-to-network (big endian) order} ntohr _us _ui _ul @r{Network-to-host order } @end example +@code{bswapr} can be used to unconditionally byte-swap an operand. +On little-endian architectures, @code{htonr} and @code{ntohr} resolve +to this. +The @code{_ul} variant is only available in 64-bit architectures. +@example +bswapr _us _ui _ul 01 = byte_swap(02) +@end example + @item Load operations @code{ld} accepts two operands while @code{ldx} accepts three; in both cases, the last can be either a register or an immediate diff --git a/deps/lightning/include/lightning.h.in b/deps/lightning/include/lightning.h.in index 422fc138..887a951a 100644 --- a/deps/lightning/include/lightning.h.in +++ b/deps/lightning/include/lightning.h.in @@ -123,6 +123,11 @@ typedef jit_int32_t jit_bool_t; typedef jit_int32_t jit_gpr_t; typedef jit_int32_t jit_fpr_t; +#if !defined(__powerpc__) && \ + (defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__)) +#define __powerpc__ 1 +#endif + #if defined(__i386__) || defined(__x86_64__) # include #elif defined(__mips__) @@ -339,6 +344,11 @@ typedef enum { #define jit_movr(u,v) jit_new_node_ww(jit_code_movr,u,v) #define jit_movi(u,v) jit_new_node_ww(jit_code_movi,u,v) jit_code_movr, jit_code_movi, + +#define jit_movnr(u,v,w) jit_new_node_www(jit_code_movnr,u,v,w) +#define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w) + jit_code_movnr, jit_code_movzr, + #define jit_extr_c(u,v) jit_new_node_ww(jit_code_extr_c,u,v) #define jit_extr_uc(u,v) jit_new_node_ww(jit_code_extr_uc,u,v) jit_code_extr_c, jit_code_extr_uc, @@ -891,9 +901,17 @@ typedef enum { #define jit_movr_d_w(u, v) jit_new_node_ww(jit_code_movr_d_w, u, v) #define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v) -#define jit_movnr(u,v,w) jit_new_node_www(jit_code_movnr,u,v,w) -#define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w) - jit_code_movnr, jit_code_movzr, +#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v) + jit_code_bswapr_us, +#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) + jit_code_bswapr_ui, +#define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) + jit_code_bswapr_ul, +#if __WORDSIZE == 32 +#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) +#else +#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) +#endif jit_code_last_code } jit_code_t; diff --git a/deps/lightning/include/lightning/jit_private.h b/deps/lightning/include/lightning/jit_private.h index e00e74d6..0af24cbc 100644 --- a/deps/lightning/include/lightning/jit_private.h +++ b/deps/lightning/include/lightning/jit_private.h @@ -264,8 +264,9 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, #define jit_cc_a0_flt 0x00000020 /* arg0 is immediate float */ #define jit_cc_a0_dbl 0x00000040 /* arg0 is immediate double */ #define jit_cc_a0_arg 0x00000080 /* arg1 is an argument int id */ -#define jit_cc_a1_reg 0x00000100 /* arg1 is a register */ -#define jit_cc_a1_chg 0x00000200 /* arg1 is modified */ +#define jit_cc_a0_cnd 0x00000100 /* arg1 is a conditinally set register */ +#define jit_cc_a1_reg 0x00000200 /* arg1 is a register */ +#define jit_cc_a1_chg 0x00000400 /* arg1 is modified */ #define jit_cc_a1_int 0x00001000 /* arg1 is immediate word */ #define jit_cc_a1_flt 0x00002000 /* arg1 is immediate float */ #define jit_cc_a1_dbl 0x00004000 /* arg1 is immediate double */ @@ -718,6 +719,7 @@ _emit_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); extern void _emit_stxi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); +extern void jit_init_print(void); extern void jit_init_debug(const char*); extern void jit_finish_debug(void); diff --git a/deps/lightning/lib/jit_aarch64-cpu.c b/deps/lightning/lib/jit_aarch64-cpu.c index 53698b08..7d2a99d6 100644 --- a/deps/lightning/lib/jit_aarch64-cpu.c +++ b/deps/lightning/lib/jit_aarch64-cpu.c @@ -663,17 +663,11 @@ static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define stxr_l(r0,r1,r2) STR(r2,r1,r0) # define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1) static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ul(r0,r1) REV(r0,r1) -# else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ul(r0,r1) REV(r0,r1) # define extr_c(r0,r1) SXTB(r0,r1) # define extr_uc(r0,r1) UXTB(r0,r1) # define extr_s(r0,r1) SXTH(r0,r1) @@ -1461,21 +1455,19 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } -#if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - htonr_ul(r0, r1); + bswapr_ul(r0, r1); rshi_u(r0, r0, 48); } static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - htonr_ul(r0, r1); + bswapr_ul(r0, r1); rshi_u(r0, r0, 32); } -#endif static void _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) diff --git a/deps/lightning/lib/jit_aarch64-sz.c b/deps/lightning/lib/jit_aarch64-sz.c index 4fa7a426..e1f6d961 100644 --- a/deps/lightning/lib/jit_aarch64-sz.c +++ b/deps/lightning/lib/jit_aarch64-sz.c @@ -95,6 +95,8 @@ 8, /* nei */ 4, /* movr */ 16, /* movi */ + 8, /* movnr */ + 8, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -399,6 +401,7 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 8, /* movnr */ - 8, /* movzr */ + 8, /* bswapr_us */ + 8, /* bswapr_ui */ + 4, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_aarch64.c b/deps/lightning/lib/jit_aarch64.c index 369408c5..f0be046c 100644 --- a/deps/lightning/lib/jit_aarch64.c +++ b/deps/lightning/lib/jit_aarch64.c @@ -1128,6 +1128,9 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); diff --git a/deps/lightning/lib/jit_alpha-cpu.c b/deps/lightning/lib/jit_alpha-cpu.c index 8bfef9ca..2dd701d7 100644 --- a/deps/lightning/lib/jit_alpha-cpu.c +++ b/deps/lightning/lib/jit_alpha-cpu.c @@ -311,6 +311,10 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define negr(r0,r1) NEGQ(r1,r0) # define comr(r0,r1) NOT(r1,r0) # define addr(r0,r1,r2) ADDQ(r1,r2,r0) @@ -622,18 +626,12 @@ static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t); static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_ui(r0,r1) _extr_ui(_jit,r0,r1) static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); -# else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ul(r0,r1) _bswapr_ul(_jit,r0,r1) +static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t); # define jmpr(r0) JMP(_R31_REGNO,r0,0) # define jmpi(i0) _jmpi(_jit,i0) static void _jmpi(jit_state_t*, jit_word_t); @@ -811,6 +809,24 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = beqi(_jit->pc.w, r2, 0); + MOV(r1, r0); + patch_at(w, _jit->pc.w); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = bnei(_jit->pc.w, r2, 0); + MOV(r1, r0); + patch_at(w, _jit->pc.w); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -2453,7 +2469,7 @@ _extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t t0; t0 = jit_get_reg(jit_class_gpr); @@ -2465,7 +2481,7 @@ _htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t t0; jit_int32_t t1; @@ -2491,7 +2507,7 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t t0; jit_int32_t t1; diff --git a/deps/lightning/lib/jit_alpha-sz.c b/deps/lightning/lib/jit_alpha-sz.c index e1a572aa..ecfeba3b 100644 --- a/deps/lightning/lib/jit_alpha-sz.c +++ b/deps/lightning/lib/jit_alpha-sz.c @@ -95,6 +95,8 @@ 12, /* nei */ 4, /* movr */ 32, /* movi */ + 12, /* movnr */ + 12, /* movzr */ 8, /* extr_c */ 8, /* extr_uc */ 8, /* extr_s */ @@ -399,4 +401,7 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 16, /* bswapr_us */ + 36, /* bswapr_ui */ + 36, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_alpha.c b/deps/lightning/lib/jit_alpha.c index 9a067aa4..d7bb3ecf 100644 --- a/deps/lightning/lib/jit_alpha.c +++ b/deps/lightning/lib/jit_alpha.c @@ -1086,12 +1086,17 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); case_rr(ext, _us); case_rr(ext, _i); case_rr(ext, _ui); + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { diff --git a/deps/lightning/lib/jit_arm-cpu.c b/deps/lightning/lib/jit_arm-cpu.c index 1cf36e08..14ba36bb 100644 --- a/deps/lightning/lib/jit_arm-cpu.c +++ b/deps/lightning/lib/jit_arm-cpu.c @@ -612,7 +612,7 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused; # define CMNI(rn,im) CC_CMNI(ARM_CC_AL,rn,im) # define T2_CMNI(rn,im) torri(THUMB2_CMNI,rn,_R15_REGNO,im) # define CC_TST(cc,rn,rm) corrr(cc,ARM_TST,rn,r0,rm) -# define TST(rn,rm) CC_TST(ARM_CC_AL,rn,rm) +# define TST(rn,rm) corrr(ARM_CC_AL,ARM_TST,rn,0,rm) # define T1_TST(rn,rm) is(THUMB_TST|(_u3(rm)<<3)|_u3(rn)) # define T2_TST(rn,rm) torrr(THUMB2_TST,rn,_R15_REGNO,rm) # define CC_TSTI(cc,rn,im) corri(cc,ARM_TST|ARM_I,rn,0,im) @@ -1095,15 +1095,10 @@ static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t); static void _stxr_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define stxi_i(r0,r1,i0) _stxi_i(_jit,r0,r1,i0) static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr(r0,r1) movr(r0,r1) -# endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_uc(r0,r1) _extr_uc(_jit,r0,r1) @@ -3609,11 +3604,9 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } } -# if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - jit_int32_t t0; if (jit_thumb_p()) { if ((r0|r1) < 8) T1_REV(r0, r1); @@ -3627,20 +3620,14 @@ _htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) rshi_u(r0, r0, 16); } else { - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); + generic_bswapr_us(_jit, r0, r1); } } } /* inline glibc htonl (without register clobber) */ static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; if (jit_thumb_p()) { @@ -3662,7 +3649,6 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } } } -#endif static void _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) diff --git a/deps/lightning/lib/jit_arm-sz.c b/deps/lightning/lib/jit_arm-sz.c index 6368c138..293d3069 100644 --- a/deps/lightning/lib/jit_arm-sz.c +++ b/deps/lightning/lib/jit_arm-sz.c @@ -96,6 +96,8 @@ 14, /* nei */ 4, /* movr */ 8, /* movi */ + 8, /* movnr */ + 8, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -400,6 +402,9 @@ 12, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 8, /* bswapr_us */ + 4, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ @@ -500,6 +505,8 @@ 14, /* nei */ 4, /* movr */ 8, /* movi */ + 22, /* movnr */ + 22, /* movzr */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ @@ -804,7 +811,8 @@ 12, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 8, /* movnr */ - 8, /* movzr */ + 20, /* bswapr_us */ + 16, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_arm.c b/deps/lightning/lib/jit_arm.c index 051f84da..0fdd1a7a 100644 --- a/deps/lightning/lib/jit_arm.c +++ b/deps/lightning/lib/jit_arm.c @@ -1498,6 +1498,8 @@ _emit_code(jit_state_t *_jit) case_wrr(stx, _i); case_rr(hton, _us); case_rr(hton, _ui); + case_rr(bswap, _us); + case_rr(bswap, _ui); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); diff --git a/deps/lightning/lib/jit_disasm.c b/deps/lightning/lib/jit_disasm.c index 27a3ed1e..25983a68 100644 --- a/deps/lightning/lib/jit_disasm.c +++ b/deps/lightning/lib/jit_disasm.c @@ -59,6 +59,7 @@ static FILE *disasm_stream; void jit_init_debug(const char *progname) { + jit_init_print(); #if DISASSEMBLER bfd_init(); @@ -74,65 +75,24 @@ jit_init_debug(const char *progname) bfd_check_format(disasm_bfd, bfd_object); bfd_check_format(disasm_bfd, bfd_archive); if (!disasm_stream) - disasm_stream = stderr; + disasm_stream = stdout; + INIT_DISASSEMBLE_INFO(disasm_info, disasm_stream, fprintf); -# if defined(__i386__) || defined(__x86_64__) - disasm_info.arch = bfd_arch_i386; -# if defined(__x86_64__) -# if __WORDSIZE == 32 - disasm_info.mach = bfd_mach_x64_32; -# else - disasm_info.mach = bfd_mach_x86_64; -# endif -# else - disasm_info.mach = bfd_mach_i386_i386; -# endif -# endif -# if defined(__powerpc__) - disasm_info.arch = bfd_arch_powerpc; - disasm_info.mach = bfd_mach_ppc64; -# if HAVE_DISASSEMBLE_INIT_FOR_TARGET + disasm_info.arch = bfd_get_arch(disasm_bfd); + disasm_info.mach = bfd_get_mach(disasm_bfd); + +# if HAVE_DISASSEMBLE_INIT_FOR_TARGET disassemble_init_for_target(&disasm_info); -# elif HAVE_DISASSEMBLE_INIT_POWERPC - disassemble_init_powerpc(&disasm_info); -# endif -# if defined(__powerpc64__) +# endif + +# if defined(__powerpc64__) disasm_info.disassembler_options = "64"; -# endif -# if HAVE_DISASSEMBLE_INIT_FOR_TARGET - disassemble_init_for_target(&disasm_info); -# elif HAVE_DISASSEMBLE_INIT_POWERPC - disassemble_init_powerpc(&disasm_info); -# endif # endif -# if defined(__sparc__) +# if defined(__sparc__) || defined(__s390__) || defined(__s390x__) disasm_info.endian = disasm_info.display_endian = BFD_ENDIAN_BIG; # endif # if defined(__s390__) || defined(__s390x__) - disasm_info.arch = bfd_arch_s390; -# if __WORDSIZE == 32 - disasm_info.mach = bfd_mach_s390_31; -# else - disasm_info.mach = bfd_mach_s390_64; -# endif - disasm_info.endian = disasm_info.display_endian = BFD_ENDIAN_BIG; disasm_info.disassembler_options = "zarch"; -# endif -# if defined(__alpha__) - disasm_info.arch = bfd_arch_alpha; - disasm_info.mach = bfd_mach_alpha_ev6; -# endif -# if defined(__hppa__) - disasm_info.arch = bfd_arch_hppa; - disasm_info.mach = bfd_mach_hppa10; -# endif -# if defined(__riscv) - disasm_info.arch = bfd_arch_riscv; -# if __WORDSIZE == 32 - disasm_info.mach = bfd_mach_riscv32; -# else - disasm_info.mach = bfd_mach_riscv64; -# endif # endif disasm_info.print_address_func = disasm_print_address; diff --git a/deps/lightning/lib/jit_hppa-cpu.c b/deps/lightning/lib/jit_hppa-cpu.c index db5a36a1..6ca54f36 100644 --- a/deps/lightning/lib/jit_hppa-cpu.c +++ b/deps/lightning/lib/jit_hppa-cpu.c @@ -648,18 +648,18 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); #define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define comr(r0,r1) UADDCM(_R0_REGNO,r1,r0) #define negr(r0,r1) SUB(_R0_REGNO,r1,r0) #define extr_c(r0,r1) EXTRWR(r1,31,8,r0) #define extr_uc(r0,r1) EXTRWR_U(r1,31,8,r0) #define extr_s(r0,r1) EXTRWR(r1,31,16,r0) #define extr_us(r0,r1) EXTRWR_U(r1,31,16,r0) -#if __BYTE_ORDER == __BIG_ENDIAN -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) movr(r0,r1) -#else -# error need htonr implementation -#endif +#define bswapr_us(r0,r1) generic_bswapr_us(_jit,r0,r1) +#define bswapr_ui(r0,r1) generic_bswapr_ui(_jit,r0,r1) #define addr(r0,r1,r2) ADD(r1,r2,r0) #define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -1633,6 +1633,24 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = beqi(_jit->pc.w, r2, 0); + COPY(r1, r0); + patch_at(w, _jit->pc.w); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = bnei(_jit->pc.w, r2, 0); + COPY(r1, r0); + patch_at(w, _jit->pc.w); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/deps/lightning/lib/jit_hppa-sz.c b/deps/lightning/lib/jit_hppa-sz.c index 3c04f637..1bfb7e63 100644 --- a/deps/lightning/lib/jit_hppa-sz.c +++ b/deps/lightning/lib/jit_hppa-sz.c @@ -95,6 +95,8 @@ 8, /* nei */ 4, /* movr */ 8, /* movi */ + 16, /* movnr */ + 16, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -399,4 +401,7 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 36, /* bswapr_us */ + 80, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_hppa.c b/deps/lightning/lib/jit_hppa.c index 21fe20c8..26688429 100644 --- a/deps/lightning/lib/jit_hppa.c +++ b/deps/lightning/lib/jit_hppa.c @@ -1026,6 +1026,8 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1052,6 +1054,8 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _us); case_rr(hton, _us); case_rr(hton, _ui); + case_rr(bswap, _us); + case_rr(bswap, _ui); case_rrr(lt,); case_rrw(lt,); case_rrr(lt, _u); diff --git a/deps/lightning/lib/jit_ia64-cpu.c b/deps/lightning/lib/jit_ia64-cpu.c index dec14650..63bb92db 100644 --- a/deps/lightning/lib/jit_ia64-cpu.c +++ b/deps/lightning/lib/jit_ia64-cpu.c @@ -1307,17 +1307,15 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); #define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); -#if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ul(r0,r1) MUX1(r0,r1,MUX_REV) -#else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -#endif +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ul(r0,r1) MUX1(r0,r1,MUX_REV) #define extr_c(r0,r1) SXT1(r0,r1) #define extr_uc(r0,r1) ZXT1(r0,r1) #define extr_s(r0,r1) SXT2(r0,r1) @@ -3483,6 +3481,38 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = beqi(_jit->pc.w, r2, 0); + movr(r0, r1); + patch_at(w, _jit->pc.w); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = bnei(_jit->pc.w, r2, 0); + movr(r0, r1); + patch_at(w, _jit->pc.w); +} + +static void +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + bswapr_ul(r0, r1); + rshi_u(r0, r0, 48); +} + +static void +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + bswapr_ul(r0, r1); + rshi_u(r0, r0, 32); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -3949,48 +3979,6 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } -#if __BYTE_ORDER == __LITTLE_ENDIAN -static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); -} - -static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - jit_int32_t t1; - jit_int32_t t2; - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 24); - rshi(rn(t1), r1, 16); - rshi(rn(t2), r1, 8); - andi(rn(t0), rn(t0), 0xff); - andi(rn(t1), rn(t1), 0xff); - andi(rn(t2), rn(t2), 0xff); - andi(r0, r1, 0xff); - lshi(r0, r0, 24); - lshi(rn(t1), rn(t1), 8); - orr(r0, r0, rn(t0)); - lshi(rn(t2), rn(t2), 16); - orr(r0, r0, rn(t1)); - orr(r0, r0, rn(t2)); - jit_unget_reg(t2); - jit_unget_reg(t1); - jit_unget_reg(t0); -} -#endif - static void _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/deps/lightning/lib/jit_ia64-sz.c b/deps/lightning/lib/jit_ia64-sz.c index 59826d99..c81b3ea6 100644 --- a/deps/lightning/lib/jit_ia64-sz.c +++ b/deps/lightning/lib/jit_ia64-sz.c @@ -68,9 +68,9 @@ 16, /* lshr */ 16, /* lshi */ 16, /* rshr */ - 16, /* rshi */ + 32, /* rshi */ 16, /* rshr_u */ - 16, /* rshi_u */ + 32, /* rshi_u */ 16, /* negr */ 16, /* comr */ 32, /* ltr */ @@ -95,14 +95,16 @@ 32, /* nei */ 16, /* movr */ 16, /* movi */ + 48, /* movnr */ + 48, /* movzr */ 16, /* extr_c */ 16, /* extr_uc */ 16, /* extr_s */ 16, /* extr_us */ 16, /* extr_i */ 16, /* extr_ui */ - 64, /* htonr_us */ - 160, /* htonr_ui */ + 48, /* htonr_us */ + 48, /* htonr_ui */ 16, /* htonr_ul */ 16, /* ldr_c */ 32, /* ldi_c */ @@ -399,4 +401,7 @@ 0, /* movi_d_ww */ 16, /* movr_d_w */ 32, /* movi_d_w */ + 48, /* bswapr_us */ + 48, /* bswapr_ui */ + 16, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_ia64.c b/deps/lightning/lib/jit_ia64.c index 9207d817..8b4cd004 100644 --- a/deps/lightning/lib/jit_ia64.c +++ b/deps/lightning/lib/jit_ia64.c @@ -1175,6 +1175,8 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rr(neg,); case_rr(com,); + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1196,6 +1198,9 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); diff --git a/deps/lightning/lib/jit_memory.c b/deps/lightning/lib/jit_memory.c index 4d7f92da..d1e3a144 100644 --- a/deps/lightning/lib/jit_memory.c +++ b/deps/lightning/lib/jit_memory.c @@ -19,7 +19,6 @@ #include #include -#include /* * Prototypes diff --git a/deps/lightning/lib/jit_mips-cpu.c b/deps/lightning/lib/jit_mips-cpu.c index 119547d0..06255891 100644 --- a/deps/lightning/lib/jit_mips-cpu.c +++ b/deps/lightning/lib/jit_mips-cpu.c @@ -54,8 +54,11 @@ typedef union { #endif int op; } jit_instr_t; -/* FIXME */ +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) +# define jit_mips2_p() 1 +#else # define jit_mips2_p() 0 +#endif # define _ZERO_REGNO 0 # define _T0_REGNO 0x08 # define _T1_REGNO 0x09 @@ -107,7 +110,8 @@ typedef union { # endif # define can_sign_extend_short_p(im) ((im) >= -32678 && (im) <= 32767) # define can_zero_extend_short_p(im) ((im) >= 0 && (im) <= 65535) -# define is_low_mask(im) (((im) & 1) ? (__builtin_popcountl((im) + 1) == 1) : 0) +# define is_low_mask(im) (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0) +# define is_middle_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) <= 1) : 0) # define is_high_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) == 0) : 0) # define masked_bits_count(im) __builtin_popcountl(im) # define unmasked_bits_count(im) (__WORDSIZE - masked_bits_count(im)) @@ -321,6 +325,7 @@ static void _nop(jit_state_t*,jit_int32_t); # define DADDIU(rt,rs,im) hrri(MIPS_DADDIU,rs,rt,im) # define SUBU(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_SUBU) # define DSUBU(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_DSUBU) +# define MUL(rd,rs,rt) hrrr_t(MIPS_SPECIAL2,rs,rt,rd,MIPS_MUL) # define MULT(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULT) # define MULTU(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULTU) # define DMULT(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULT) @@ -346,8 +351,12 @@ static void _nop(jit_state_t*,jit_int32_t); # define DSRL32(rd,rt,sa) rrit(rt,rd,sa,MIPS_DSRL32) # define INS(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-1,pos,MIPS_INS) # define DINS(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-1,pos,MIPS_DINS) +# define DINSU(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-32-1,pos-32,MIPS_DINSU) +# define DINSM(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-32-1,pos,MIPS_DINSM) # define EXT(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos,MIPS_EXT) # define DEXT(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos,MIPS_DEXT) +# define DEXTU(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos-32,MIPS_DEXTU) +# define DEXTM(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-32-1,pos,MIPS_DEXTM) # define ROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_SRL) # define DROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL) # define MFHI(rd) rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFHI) @@ -412,6 +421,10 @@ static void _nop(jit_state_t*,jit_int32_t); # define div(rs,rt) DDIV(rs,rt) # define divu(rs,rt) DDIVU(rs,rt) # endif +# define extr(rd,rt,lsb,nb) _extr(_jit,rd,rt,lsb,nb) +static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define insr(rd,rt,lsb,nb) _insr(_jit,rd,rt,lsb,nb) +static void _insr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); #define addcr(r0,r1,r2) _addcr(_jit,r0,r1,r2) @@ -594,23 +607,12 @@ static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1) static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # endif -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# if __WORDSIZE == 64 -# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); -# endif -# else -# define htonr_us(r0,r1) extr_us(r0,r1) -# if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) -# else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +# define bswapr_ul(r0,r1) generic_bswapr_ul(_jit,r0,r1) # endif # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); @@ -809,6 +811,38 @@ _nop(jit_state_t *_jit, jit_int32_t i0) assert(i0 == 0); } +static void +_extr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t pos, jit_int32_t size) +{ + assert(size > 0); + + if (__WORDSIZE == 32) + EXT(r0, r1, pos, size); + else if (pos >= 32) + DEXTU(r0, r1, pos, size); + else if (size > 32) + DEXTM(r0, r1, pos, size); + else + DEXT(r0, r1, pos, size); +} + +static void +_insr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t pos, jit_int32_t size) +{ + assert(size > 0); + + if (__WORDSIZE == 32) + INS(r0, r1, pos, size); + else if (pos >= 32) + DINSU(r0, r1, pos, size); + else if (size > 32) + DINSM(r0, r1, pos, size); + else + DINS(r0, r1, pos, size); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -1003,8 +1037,12 @@ _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - multu(r1, r2); - MFLO(r0); + if (__WORDSIZE == 32) + MUL(r0, r1, r2); + else { + multu(r1, r2); + MFLO(r0); + } } static void @@ -1169,25 +1207,33 @@ _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_zero_extend_short_p(i0)) - ANDI(r0, r1, i0); + ANDI(r0, r1, i0); else if (is_low_mask(i0)) { -#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) - if (masked_bits_count(i0) <= 32) - EXT(r0, r1, 0, masked_bits_count(i0)); - else -#endif - { - lshi(r0, r1, unmasked_bits_count(i0)); - rshi_u(r0, r0, unmasked_bits_count(i0)); - } + if (jit_mips2_p()) + extr(r0, r1, 0, masked_bits_count(i0)); + else { + lshi(r0, r1, unmasked_bits_count(i0)); + rshi_u(r0, r0, unmasked_bits_count(i0)); + } } else if (is_high_mask(i0)) { - rshi(r0, r1, unmasked_bits_count(i0)); - lshi(r0, r0, unmasked_bits_count(i0)); + if (jit_mips2_p() && r0 == r1) + insr(r0, _ZERO_REGNO, 0, unmasked_bits_count(i0)); + else { + rshi(r0, r1, unmasked_bits_count(i0)); + lshi(r0, r0, unmasked_bits_count(i0)); + } + } else if (jit_mips2_p() && is_middle_mask(i0)) { + extr(r0, r1, __builtin_ctzl(i0), masked_bits_count(i0)); + lshi(r0, r0, __builtin_ctzl(i0)); + } else if (jit_mips2_p() && is_middle_mask(~i0)) { + if (r0 != r1) + movr(r0, r1); + insr(r0, _ZERO_REGNO, __builtin_ctzl(~i0), masked_bits_count(~i0)); } else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - AND(r0, r1, rn(reg)); - jit_unget_reg(reg); + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + AND(r0, r1, rn(reg)); + jit_unget_reg(reg); } } @@ -1708,60 +1754,34 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } #endif -# if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); -} - -static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - jit_int32_t t1; - jit_int32_t t2; - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 24); - rshi(rn(t1), r1, 16); - rshi(rn(t2), r1, 8); - andi(rn(t0), rn(t0), 0xff); - andi(rn(t1), rn(t1), 0xff); - andi(rn(t2), rn(t2), 0xff); - andi(r0, r1, 0xff); - lshi(r0, r0, 24); - lshi(rn(t1), rn(t1), 8); - orr(r0, r0, rn(t0)); - lshi(rn(t2), rn(t2), 16); - orr(r0, r0, rn(t1)); - orr(r0, r0, rn(t2)); - jit_unget_reg(t2); - jit_unget_reg(t1); - jit_unget_reg(t0); + if (jit_mips2_p()) { + extr_us(r0, r1); + WSBH(r0, r0); + } else { + generic_bswapr_us(_jit, r0, r1); + } } static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - rshi_u(rn(reg), r1, 32); - htonr_ui(r0, r1); - htonr_ui(rn(reg), rn(reg)); - lshi(r0, r0, 32); - orr(r0, r0, rn(reg)); - jit_unget_reg(reg); + if (jit_mips2_p()) { + if (__WORDSIZE == 64) { + SLL(r0, r1, 0); + WSBH(r0, r0); + ROTR(r0, r0, 16); + extr(r0, r0, 0, 32); + } else { + WSBH(r0, r1); + ROTR(r0, r0, 16); + } + } else { + generic_bswapr_ui(_jit, r0, r1); + } } -# endif static void _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) @@ -1789,8 +1809,12 @@ _extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - lshi(r0, r1, 32); - rshi_u(r0, r0, 32); + if (jit_mips2_p()) + DEXT(r0, r1, 0, 32); + else { + lshi(r0, r1, 32); + rshi_u(r0, r0, 32); + } } # endif @@ -1836,10 +1860,8 @@ _lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - if (i0 == 0) { - SLT(r0, _ZERO_REGNO, r1); - XORI(r0, r0, 1); - } + if (can_sign_extend_short_p(i0 + 1)) + SLTI(r0, r1, i0 + 1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -1860,10 +1882,8 @@ _lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - if (i0 == 0) { - SLTU(r0, _ZERO_REGNO, r1); - XORI(r0, r0, 1); - } + if (can_sign_extend_short_p(i0 + 1)) + SLTIU(r0, r1, i0 + 1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -1904,10 +1924,15 @@ _gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ger(r0, r1, rn(reg)); - jit_unget_reg(reg); + if (can_sign_extend_short_p(i0)) { + SLTI(r0, r1, i0); + XORI(r0, r0, 1); + } else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ger(r0, r1, rn(reg)); + jit_unget_reg(reg); + } } static void @@ -1922,10 +1947,15 @@ _gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ger_u(r0, r1, rn(reg)); - jit_unget_reg(reg); + if (can_sign_extend_short_p(i0)) { + SLTIU(r0, r1, i0); + XORI(r0, r0, 1); + } else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ger_u(r0, r1, rn(reg)); + jit_unget_reg(reg); + } } static void @@ -2848,16 +2878,12 @@ _bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_word_t w; jit_int32_t t0; t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); - if (can_zero_extend_short_p(i1)) { - ANDI(rn(t0), r0, i1); - w = _jit->pc.w; - BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1); - NOP(1); - } - else { - movi(rn(t0), i1); - w = bmsr(i0, r0, rn(t0)); - } + + andi(rn(t0), r0, i1); + w = _jit->pc.w; + BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1); + NOP(1); + jit_unget_reg(t0); return (w); } @@ -2882,16 +2908,12 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_word_t w; jit_int32_t t0; t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); - if (can_zero_extend_short_p(i1)) { - ANDI(rn(t0), r0, i1); - w = _jit->pc.w; - BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1); - NOP(1); - } - else { - movi(rn(t0), i1); - w = bmcr(i0, r0, rn(t0)); - } + + andi(rn(t0), r0, i1); + w = _jit->pc.w; + BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1); + NOP(1); + jit_unget_reg(t0); return (w); } diff --git a/deps/lightning/lib/jit_mips-sz.c b/deps/lightning/lib/jit_mips-sz.c index b33fef2f..b4642fa9 100644 --- a/deps/lightning/lib/jit_mips-sz.c +++ b/deps/lightning/lib/jit_mips-sz.c @@ -1,7 +1,7 @@ #if __WORDSIZE == 32 #if NEW_ABI -#define JIT_INSTR_MAX 44 +#define JIT_INSTR_MAX 52 0, /* data */ 0, /* live */ 0, /* align */ @@ -42,8 +42,8 @@ 28, /* subxr */ 28, /* subxi */ 16, /* rsbi */ - 8, /* mulr */ - 16, /* muli */ + 4, /* mulr */ + 12, /* muli */ 12, /* qmulr */ 20, /* qmuli */ 12, /* qmulr_u */ @@ -96,6 +96,8 @@ 8, /* nei */ 4, /* movr */ 8, /* movi */ + 4, /* movnr */ + 4, /* movzr */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ @@ -400,6 +402,9 @@ 0, /* movi_d_ww */ 4, /* movr_d_w */ 12, /* movi_d_w */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* NEW_ABI */ #endif /* __WORDSIZE */ @@ -446,8 +451,8 @@ 28, /* subxr */ 28, /* subxi */ 16, /* rsbi */ - 8, /* mulr */ - 16, /* muli */ + 4, /* mulr */ + 12, /* muli */ 12, /* qmulr */ 20, /* qmuli */ 12, /* qmulr_u */ @@ -500,6 +505,8 @@ 8, /* nei */ 4, /* movr */ 8, /* movi */ + 4, /* movnr */ + 4, /* movzr */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ @@ -804,11 +811,14 @@ 8, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* NEW_ABI */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 44 +#define JIT_INSTR_MAX 116 0, /* data */ 0, /* live */ 4, /* align */ @@ -903,6 +913,8 @@ 8, /* nei */ 4, /* movr */ 28, /* movi */ + 4, /* movnr */ + 4, /* movzr */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ @@ -1207,6 +1219,7 @@ 0, /* movi_d_ww */ 4, /* movr_d_w */ 12, /* movi_d_w */ - 4, /* movnr */ - 4, /* movzr */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 116, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_mips.c b/deps/lightning/lib/jit_mips.c index 5ffad2b5..94fe797c 100644 --- a/deps/lightning/lib/jit_mips.c +++ b/deps/lightning/lib/jit_mips.c @@ -1419,6 +1419,11 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _ui); #if __WORDSIZE == 64 case_rr(hton, _ul); +#endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +#if __WORDSIZE == 64 + case_rr(bswap, _ul); #endif case_rr(ext, _c); case_rr(ext, _uc); diff --git a/deps/lightning/lib/jit_names.c b/deps/lightning/lib/jit_names.c index 475bc96c..ebd3d56f 100644 --- a/deps/lightning/lib/jit_names.c +++ b/deps/lightning/lib/jit_names.c @@ -69,6 +69,7 @@ static char *code_name[] = { "gtr_u", "gti_u", "ner", "nei", "movr", "movi", + "movnr", "movzr", "extr_c", "extr_uc", "extr_s", "extr_us", "extr_i", "extr_ui", @@ -227,4 +228,6 @@ static char *code_name[] = { "movr_f_w", "movi_f_w", "movr_d_ww", "movi_d_ww", "movr_d_w", "movi_d_w", + "bswapr_us", + "bswapr_ui", "bswapr_ul", }; diff --git a/deps/lightning/lib/jit_ppc-cpu.c b/deps/lightning/lib/jit_ppc-cpu.c index 0046a058..cab085fd 100644 --- a/deps/lightning/lib/jit_ppc-cpu.c +++ b/deps/lightning/lib/jit_ppc-cpu.c @@ -521,23 +521,12 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); # define extr_i(r0,r1) EXTSW(r0,r1) # define extr_ui(r0,r1) CLRLDI(r0,r1,32) # endif -# if __BYTE_ORDER == __BIG_ENDIAN -# define htonr_us(r0,r1) extr_us(r0,r1) -# if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) -# else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif -# else -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# if __WORDSIZE == 64 -# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); -# endif +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +# define bswapr_ul(r0,r1) generic_bswapr_ul(_jit,r0,r1) # endif # define addr(r0,r1,r2) ADD(r0,r1,r2) # define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) @@ -1158,22 +1147,20 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (word); } -# if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); + if (r0 == r1) { + RLWIMI(r0, r0, 16, 8, 15); + RLWINM(r0, r0, 24, 16, 31); + } else { + RLWINM(r0, r1, 8, 16, 23); + RLWIMI(r0, r1, 24, 24, 31); + } } static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); @@ -1188,22 +1175,6 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_unget_reg(reg); } -# if __WORDSIZE == 64 -static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - rshi_u(rn(reg), r1, 32); - htonr_ui(r0, r1); - htonr_ui(rn(reg), rn(reg)); - lshi(r0, r0, 32); - orr(r0, r0, rn(reg)); - jit_unget_reg(reg); -} -# endif -# endif - static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/deps/lightning/lib/jit_ppc-fpu.c b/deps/lightning/lib/jit_ppc-fpu.c index 1e84f8e3..387cc6fd 100644 --- a/deps/lightning/lib/jit_ppc-fpu.c +++ b/deps/lightning/lib/jit_ppc-fpu.c @@ -511,7 +511,11 @@ _truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) FCTIWZ(rn(reg), r1); /* use reserved 8 bytes area */ stxi_d(alloca_offset - 8, _FP_REGNO, rn(reg)); +# if __BYTE_ORDER == __BIG_ENDIAN ldxi_i(r0, _FP_REGNO, alloca_offset - 4); +# else + ldxi_i(r0, _FP_REGNO, alloca_offset - 8); +# endif jit_unget_reg(reg); } diff --git a/deps/lightning/lib/jit_ppc-sz.c b/deps/lightning/lib/jit_ppc-sz.c index 28251b42..0be7047b 100644 --- a/deps/lightning/lib/jit_ppc-sz.c +++ b/deps/lightning/lib/jit_ppc-sz.c @@ -97,6 +97,8 @@ 16, /* nei */ 4, /* movr */ 8, /* movi */ + 12, /* movnr */ + 12, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -401,8 +403,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 12, /* movnr */ - 12, /* movzr */ + 20, /* bswapr_us */ + 16, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* _CALL_SYV */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ @@ -507,6 +510,8 @@ 16, /* nei */ 4, /* movr */ 8, /* movi */ + 12, /* movnr */ + 12, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -811,8 +816,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 12, /* movnr */ - 12, /* movzr */ + 20, /* bswapr_us */ + 16, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* _CALL_AIX */ #endif /* __BYTEORDER */ #endif /* __powerpc__ */ @@ -916,6 +922,8 @@ 16, /* nei */ 4, /* movr */ 36, /* movi */ + 12, /* movnr */ + 12, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -1220,8 +1228,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 12, /* movnr */ - 12, /* movzr */ + 20, /* bswapr_us */ + 16, /* bswapr_ui */ + 44, /* bswapr_ul */ #endif /* __BYTEORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -1324,6 +1333,8 @@ 16, /* nei */ 4, /* movr */ 36, /* movi */ + 12, /* movnr */ + 12, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -1628,8 +1639,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 12, /* movnr */ - 12, /* movzr */ + 20, /* bswapr_us */ + 16, /* bswapr_ui */ + 44, /* bswapr_ul */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_ppc.c b/deps/lightning/lib/jit_ppc.c index d05d4b1a..e94d1a5e 100644 --- a/deps/lightning/lib/jit_ppc.c +++ b/deps/lightning/lib/jit_ppc.c @@ -1355,6 +1355,11 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _ui); # if __WORDSIZE == 64 case_rr(hton, _ul); +# endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +# if __WORDSIZE == 64 + case_rr(bswap, _ul); # endif case_rr(neg,); case_rr(com,); diff --git a/deps/lightning/lib/jit_print.c b/deps/lightning/lib/jit_print.c index c44623a0..61d9650c 100644 --- a/deps/lightning/lib/jit_print.c +++ b/deps/lightning/lib/jit_print.c @@ -55,12 +55,16 @@ static FILE *print_stream; * Implementation */ void -_jit_print(jit_state_t *_jit) +jit_init_print(void) { - jit_node_t *node; - if (!print_stream) print_stream = stderr; +} + +void +_jit_print(jit_state_t *_jit) +{ + jit_node_t *node; if ((node = _jitc->head)) { jit_print_node(node); diff --git a/deps/lightning/lib/jit_riscv-cpu.c b/deps/lightning/lib/jit_riscv-cpu.c index 388489fb..9f029c03 100644 --- a/deps/lightning/lib/jit_riscv-cpu.c +++ b/deps/lightning/lib/jit_riscv-cpu.c @@ -434,12 +434,9 @@ static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1) static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -# define htonr_us(r0, r1) _htonr_us(_jit, r0, r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0, r1) _htonr_ui(_jit, r0, r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ul(r0, r1) _htonr_ul(_jit, r0, r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_us(r0, r1) generic_bswapr_us(_jit, r0, r1) +# define bswapr_ui(r0, r1) generic_bswapr_ui(_jit, r0, r1) +# define bswapr_ul(r0, r1) generic_bswapr_ul(_jit, r0, r1) # define extr_c(r0, r1) _extr_c(_jit, r0, r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_uc(r0, r1) andi(r0, r1, 0xff) @@ -455,6 +452,10 @@ static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0, im) _movi_p(_jit, r0, im) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define ltr(r0, r1, r2) SLT(r0, r1, r2) # define lti(r0, r1, im) _lti(_jit, r0, r1, im) static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -1243,59 +1244,6 @@ DEFST(s, H) DEFST(i, W) DEFST(l, D) -static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); -} - -static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - jit_int32_t t1; - jit_int32_t t2; - t0 = jit_get_reg(jit_class_gpr); - t1 = jit_get_reg(jit_class_gpr); - t2 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 24); - rshi(rn(t1), r1, 16); - rshi(rn(t2), r1, 8); - andi(rn(t0), rn(t0), 0xff); - andi(rn(t1), rn(t1), 0xff); - andi(rn(t2), rn(t2), 0xff); - andi(r0, r1, 0xff); - lshi(r0, r0, 24); - lshi(rn(t1), rn(t1), 8); - orr(r0, r0, rn(t0)); - lshi(rn(t2), rn(t2), 16); - orr(r0, r0, rn(t1)); - orr(r0, r0, rn(t2)); - jit_unget_reg(t2); - jit_unget_reg(t1); - jit_unget_reg(t0); -} - -static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) -{ - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr); - rshi_u(rn(t0), r1, 32); - htonr_ui(r0, r1); - htonr_ui(rn(t0), rn(t0)); - lshi(r0, r0, 32); - orr(r0, r0, rn(t0)); - jit_unget_reg(t0); -} - static void _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -1373,6 +1321,24 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = beqi(_jit->pc.w, r2, 0); + movr(r1, r0); + patch_at(w, _jit->pc.w); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = bnei(_jit->pc.w, r2, 0); + movr(r1, r0); + patch_at(w, _jit->pc.w); +} + static void _lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/deps/lightning/lib/jit_riscv-sz.c b/deps/lightning/lib/jit_riscv-sz.c index 2f1d7258..c8908d88 100644 --- a/deps/lightning/lib/jit_riscv-sz.c +++ b/deps/lightning/lib/jit_riscv-sz.c @@ -94,6 +94,8 @@ 8, /* nei */ 4, /* movr */ 24, /* movi */ + 8, /* movnr */ + 8, /* movzr */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ @@ -398,4 +400,7 @@ 0, /* movi_d_ww */ 4, /* movr_d_w */ 16, /* movi_d_w */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 116, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_riscv.c b/deps/lightning/lib/jit_riscv.c index 55b23914..1dc3c9ec 100644 --- a/deps/lightning/lib/jit_riscv.c +++ b/deps/lightning/lib/jit_riscv.c @@ -1125,12 +1125,17 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); case_rr(ext, _us); case_rr(ext, _i); case_rr(ext, _ui); + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { diff --git a/deps/lightning/lib/jit_s390-cpu.c b/deps/lightning/lib/jit_s390-cpu.c index 02cac604..619ab152 100644 --- a/deps/lightning/lib/jit_s390-cpu.c +++ b/deps/lightning/lib/jit_s390-cpu.c @@ -966,6 +966,13 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define bswapr_us(r0, r1) generic_bswapr_us(_jit, r0, r1) +# define bswapr_ui(r0, r1) generic_bswapr_ui(_jit, r0, r1) +# define bswapr_ul(r0, r1) generic_bswapr_ul(_jit, r0, r1) +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define addr(r0,r1,r2) _addr(_jit,r0,r1,r2) static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) @@ -1079,13 +1086,6 @@ static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _xorr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define xori(r0,r1,i0) _xori(_jit,r0,r1,i0) static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# define htonr_us(r0,r1) extr_us(r0,r1) -# if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) -# else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif # define extr_c(r0,r1) LGBR(r0,r1) # define extr_uc(r0,r1) LLGCR(r0,r1) # define extr_s(r0,r1) LGHR(r0,r1) @@ -2442,6 +2442,32 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = beqi_p(_jit->pc.w, r2, 0); +#if __WORDSIZE == 32 + LR(r0, r1); +#else + LGR(r0, r1); +#endif + patch_at(w, _jit->pc.w); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = bnei_p(_jit->pc.w, r2, 0); +#if __WORDSIZE == 32 + LR(r0, r1); +#else + LGR(r0, r1); +#endif + patch_at(w, _jit->pc.w); +} + static void _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/deps/lightning/lib/jit_s390-sz.c b/deps/lightning/lib/jit_s390-sz.c index bb8b2dc9..bb9071d1 100644 --- a/deps/lightning/lib/jit_s390-sz.c +++ b/deps/lightning/lib/jit_s390-sz.c @@ -1,6 +1,6 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 104 +#define JIT_INSTR_MAX 128 0, /* data */ 0, /* live */ 6, /* align */ @@ -95,6 +95,8 @@ 24, /* nei */ 4, /* movr */ 16, /* movi */ + 14, /* movnr */ + 14, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -399,10 +401,13 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 52, /* bswapr_us */ + 128, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 104 +#define JIT_INSTR_MAX 344 0, /* data */ 0, /* live */ 6, /* align */ @@ -497,6 +502,8 @@ 24, /* nei */ 4, /* movr */ 16, /* movi */ + 14, /* movnr */ + 14, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -801,4 +808,7 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 68, /* bswapr_us */ + 160, /* bswapr_ui */ + 344, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_s390.c b/deps/lightning/lib/jit_s390.c index 7cd1d7f3..4b89bea0 100644 --- a/deps/lightning/lib/jit_s390.c +++ b/deps/lightning/lib/jit_s390.c @@ -1151,6 +1151,11 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _ui); #if __WORDSIZE == 64 case_rr(hton, _ul); +#endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +#if __WORDSIZE == 64 + case_rr(bswap, _ul); #endif case_rr(ext, _c); case_rr(ext, _uc); @@ -1160,6 +1165,8 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); #endif + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { diff --git a/deps/lightning/lib/jit_sparc-cpu.c b/deps/lightning/lib/jit_sparc-cpu.c index 051647a7..90c3767b 100644 --- a/deps/lightning/lib/jit_sparc-cpu.c +++ b/deps/lightning/lib/jit_sparc-cpu.c @@ -545,6 +545,13 @@ static void _movr(jit_state_t*, jit_int32_t, jit_int32_t); static void _movi(jit_state_t*, jit_int32_t, jit_word_t); # define movi_p(r0, i0) _movi_p(_jit, r0, i0) static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t); +# define bswapr_us(r0, r1) generic_bswapr_us(_jit, r0, r1) +# define bswapr_ui(r0, r1) generic_bswapr_ui(_jit, r0, r1) +# define bswapr_ul(r0, r1) generic_bswapr_ul(_jit, r0, r1) +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define comr(r0, r1) XNOR(r1, 0, r0) # define negr(r0, r1) NEG(r1, r0) # define addr(r0, r1, r2) ADD(r1, r2, r0) @@ -669,7 +676,6 @@ static void _xori(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define rshr_u(r0, r1, r2) SRLX(r1, r2, r0) # define rshi_u(r0, r1, i0) SRLXI(r1, i0, r0) # endif -# define htonr_us(r0,r1) extr_us(r0,r1) # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_uc(r0,r1) andi(r0, r1, 0xff) @@ -677,11 +683,7 @@ static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_us(r0,r1) _extr_us(_jit,r0,r1) static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) -# else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) +# if __WORDSIZE == 64 # define extr_i(r0,r1) _extr_i(_jit,r0,r1) static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_ui(r0,r1) _extr_ui(_jit,r0,r1) @@ -1213,6 +1215,24 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = beqi(_jit->pc.w, r2, 0); + ORI(r1, 0, r0); + patch_at(w, _jit->pc.w); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + w = bnei(_jit->pc.w, r2, 0); + ORI(r1, 0, r0); + patch_at(w, _jit->pc.w); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/deps/lightning/lib/jit_sparc-sz.c b/deps/lightning/lib/jit_sparc-sz.c index ac683b66..5ec051d9 100644 --- a/deps/lightning/lib/jit_sparc-sz.c +++ b/deps/lightning/lib/jit_sparc-sz.c @@ -1,5 +1,5 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 44 +#define JIT_INSTR_MAX 52 0, /* data */ 0, /* live */ 0, /* align */ @@ -94,6 +94,8 @@ 16, /* nei */ 4, /* movr */ 8, /* movi */ + 20, /* movnr */ + 20, /* movzr */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ @@ -398,10 +400,13 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 64 +#define JIT_INSTR_MAX 116 0, /* data */ 0, /* live */ 4, /* align */ @@ -496,6 +501,8 @@ 16, /* nei */ 4, /* movr */ 24, /* movi */ + 20, /* movnr */ + 20, /* movzr */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ @@ -800,4 +807,7 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 116, /* bswapr_ul */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_sparc.c b/deps/lightning/lib/jit_sparc.c index 158c09d6..23d44425 100644 --- a/deps/lightning/lib/jit_sparc.c +++ b/deps/lightning/lib/jit_sparc.c @@ -1463,6 +1463,11 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _ui); #if __WORDSIZE == 64 case_rr(hton, _ul); +#endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +#if __WORDSIZE == 64 + case_rr(bswap, _ul); #endif case_rr(ext, _c); case_rr(ext, _uc); @@ -1472,6 +1477,8 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); #endif + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { diff --git a/deps/lightning/lib/jit_x86-cpu.c b/deps/lightning/lib/jit_x86-cpu.c index 6dcf6727..81534f08 100644 --- a/deps/lightning/lib/jit_x86-cpu.c +++ b/deps/lightning/lib/jit_x86-cpu.c @@ -379,13 +379,13 @@ static void _movir(jit_state_t*,jit_int32_t,jit_int32_t); # define movir_u(r0, r1) _movir_u(_jit, r0, r1) static void _movir_u(jit_state_t*,jit_int32_t,jit_int32_t); # endif -# define htonr_us(r0, r1) _htonr_us(_jit, r0, r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0, r1) _htonr_ui(_jit, r0, r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); # if __X64 && !__X64_32 -#define htonr_ul(r0, r1) _htonr_ul(_jit, r0, r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); +#define bswapr_ul(r0, r1) _bswapr_ul(_jit, r0, r1) +static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t); #endif # define extr_c(r0, r1) _extr_c(_jit, r0, r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); @@ -2263,7 +2263,7 @@ _movir_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) #endif static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { extr_us(r0, r1); ic(0x66); @@ -2274,7 +2274,7 @@ _htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { movr(r0, r1); rex(0, 0, _NOREG, _NOREG, r0); @@ -2284,7 +2284,7 @@ _htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) #if __X64 && !__X64_32 static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { movr(r0, r1); rex(0, 1, _NOREG, _NOREG, r0); diff --git a/deps/lightning/lib/jit_x86-sz.c b/deps/lightning/lib/jit_x86-sz.c index 2cf88808..bd4b9a08 100644 --- a/deps/lightning/lib/jit_x86-sz.c +++ b/deps/lightning/lib/jit_x86-sz.c @@ -95,6 +95,8 @@ 16, /* nei */ 2, /* movr */ 5, /* movi */ + 7, /* movnr */ + 7, /* movzr */ 11, /* extr_c */ 11, /* extr_uc */ 3, /* extr_s */ @@ -399,8 +401,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 7, /* movnr */ - 7, /* movzr */ + 7, /* bswapr_us */ + 4, /* bswapr_ui */ + 0, /* bswapr_ul */ #endif #if __X64 @@ -500,6 +503,8 @@ 14, /* nei */ 3, /* movr */ 10, /* movi */ + 7, /* movnr */ + 7, /* movzr */ 7, /* extr_c */ 7, /* extr_uc */ 4, /* extr_s */ @@ -804,8 +809,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 7, /* movnr */ - 7, /* movzr */ + 9, /* bswapr_us */ + 6, /* bswapr_ui */ + 6, /* bswapr_ul */ #else # if __X64_32 @@ -904,6 +910,8 @@ 14, /* nei */ 3, /* movr */ 6, /* movi */ + 7, /* movnr */ + 7, /* movzr */ 7, /* extr_c */ 7, /* extr_uc */ 4, /* extr_s */ @@ -1208,8 +1216,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 7, /* movnr */ - 7, /* movzr */ + 9, /* bswapr_us */ + 6, /* bswapr_ui */ + 0, /* bswapr_ul */ # else #define JIT_INSTR_MAX 115 @@ -1307,6 +1316,8 @@ 14, /* nei */ 3, /* movr */ 10, /* movi */ + 7, /* movnr */ + 7, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -1611,8 +1622,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 7, /* movnr */ - 7, /* movzr */ + 9, /* bswapr_us */ + 6, /* bswapr_ui */ + 6, /* bswapr_ul */ #endif /* __CYGWIN__ || _WIN32 */ # endif /* __X64_32 */ #endif /* __X64 */ diff --git a/deps/lightning/lib/jit_x86.c b/deps/lightning/lib/jit_x86.c index 133ee39d..e3e13834 100644 --- a/deps/lightning/lib/jit_x86.c +++ b/deps/lightning/lib/jit_x86.c @@ -1698,6 +1698,11 @@ _emit_code(jit_state_t *_jit) case_rr(hton, _ui); #if __X64 && !__X64_32 case_rr(hton, _ul); +#endif + case_rr(bswap, _us); + case_rr(bswap, _ui); +#if __X64 && !__X64_32 + case_rr(bswap, _ul); #endif case_rr(ext, _c); case_rr(ext, _uc); diff --git a/deps/lightning/lib/lightning.c b/deps/lightning/lib/lightning.c index 30632939..b78bd07c 100644 --- a/deps/lightning/lib/lightning.c +++ b/deps/lightning/lib/lightning.c @@ -19,7 +19,9 @@ #include #include -#include +#if HAVE_MMAP +# include +#endif #if defined(__sgi) # include #endif @@ -956,10 +958,12 @@ _jit_destroy_state(jit_state_t *_jit) #if DEVEL_DISASSEMBLER jit_really_clear_state(); #endif +#if HAVE_MMAP if (!_jit->user_code) munmap(_jit->code.ptr, _jit->code.length); if (!_jit->user_data) munmap(_jit->data.ptr, _jit->data.length); +#endif jit_free((jit_pointer_t *)&_jit); } @@ -1380,6 +1384,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_truncr_f_i: case jit_code_truncr_f_l: case jit_code_truncr_d_i: case jit_code_truncr_d_l: case jit_code_htonr_us: case jit_code_htonr_ui: case jit_code_htonr_ul: + case jit_code_bswapr_us: case jit_code_bswapr_ui: case jit_code_bswapr_ul: case jit_code_ldr_c: case jit_code_ldr_uc: case jit_code_ldr_s: case jit_code_ldr_us: case jit_code_ldr_i: case jit_code_ldr_ui: case jit_code_ldr_l: case jit_code_negr_f: @@ -1435,7 +1440,6 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_unordi_d: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_dbl; break; - case jit_code_movnr: case jit_code_movzr: case jit_code_addr: case jit_code_addxr: case jit_code_addcr: case jit_code_subr: case jit_code_subxr: case jit_code_subcr: case jit_code_mulr: case jit_code_divr: case jit_code_divr_u: @@ -1532,6 +1536,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_bxsubr: case jit_code_bxsubr_u: mask = jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a1_chg|jit_cc_a2_reg; break; + case jit_code_movnr: case jit_code_movzr: + mask = jit_cc_a0_reg|jit_cc_a0_cnd|jit_cc_a1_reg|jit_cc_a2_reg; + break; default: abort(); } @@ -1892,6 +1899,9 @@ _jit_dataset(jit_state_t *_jit) #endif assert(!_jitc->dataset); +#if !HAVE_MMAP + assert(_jit->user_data); +#else if (!_jit->user_data) { /* create read only data buffer */ @@ -1909,6 +1919,7 @@ _jit_dataset(jit_state_t *_jit) close(mmap_fd); #endif } +#endif /* !HAVE_MMAP */ if (!_jitc->no_data) jit_memcpy(_jit->data.ptr, _jitc->data.ptr, _jitc->data.offset); @@ -2023,6 +2034,9 @@ _jit_emit(jit_state_t *_jit) _jitc->emit = 1; +#if !HAVE_MMAP + assert(_jit->user_code); +#else if (!_jit->user_code) { #if defined(__sgi) mmap_fd = open("/dev/zero", O_RDWR); @@ -2032,6 +2046,7 @@ _jit_emit(jit_state_t *_jit) MAP_PRIVATE | MAP_ANON, mmap_fd, 0); assert(_jit->code.ptr != MAP_FAILED); } +#endif /* !HAVE_MMAP */ _jitc->code.end = _jit->code.ptr + _jit->code.length - jit_get_max_instr(); _jit->pc.uc = _jit->code.ptr; @@ -2045,6 +2060,9 @@ _jit_emit(jit_state_t *_jit) node->code == jit_code_epilog)) node->flag &= ~jit_flag_patch; } +#if !HAVE_MMAP + assert(_jit->user_code); +#else if (_jit->user_code) goto fail; #if GET_JIT_SIZE @@ -2078,6 +2096,7 @@ _jit_emit(jit_state_t *_jit) _jitc->code.end = _jit->code.ptr + _jit->code.length - jit_get_max_instr(); _jit->pc.uc = _jit->code.ptr; +#endif /* !HAVE_MMAP */ } else break; @@ -2094,6 +2113,7 @@ _jit_emit(jit_state_t *_jit) if (_jit->user_data) jit_free((jit_pointer_t *)&_jitc->data.ptr); +#if HAVE_MMAP else { result = mprotect(_jit->data.ptr, _jit->data.length, PROT_READ); assert(result == 0); @@ -2103,6 +2123,7 @@ _jit_emit(jit_state_t *_jit) PROT_READ | PROT_EXEC); assert(result == 0); } +#endif /* HAVE_MMAP */ return (_jit->code.ptr); fail: @@ -3304,7 +3325,7 @@ _register_change_p(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, default: value = jit_classify(node->code); /* lack of extra information */ - if (value & jit_cc_a0_jmp) + if (value & (jit_cc_a0_jmp|jit_cc_a0_cnd)) return (jit_reg_change); else if ((value & (jit_cc_a0_reg|jit_cc_a0_chg)) == (jit_cc_a0_reg|jit_cc_a0_chg) && @@ -3489,6 +3510,31 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, } } +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define htonr_us(r0,r1) bswapr_us(r0,r1) +# define htonr_ui(r0,r1) bswapr_ui(r0,r1) +# if __WORDSIZE == 64 +# define htonr_ul(r0,r1) bswapr_ul(r0,r1) +# endif +#else +# define htonr_us(r0,r1) extr_us(r0,r1) +# if __WORDSIZE == 32 +# define htonr_ui(r0,r1) movr(r0,r1) +# else +# define htonr_ui(r0,r1) extr_ui(r0,r1) +# define htonr_ul(r0,r1) movr(r0,r1) +# endif +#endif + +static maybe_unused void +generic_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1); +static maybe_unused void +generic_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1); +#if __WORDSIZE == 64 +static maybe_unused void +generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1); +#endif + #if defined(__i386__) || defined(__x86_64__) # include "jit_x86.c" #elif defined(__mips__) @@ -3512,3 +3558,47 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, #elif defined(__riscv) # include "jit_riscv.c" #endif + +static maybe_unused void +generic_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg = jit_get_reg(jit_class_gpr); + + rshi(rn(reg), r1, 8); + andi(r0, r1, 0xff); + andi(rn(reg), rn(reg), 0xff); + lshi(r0, r0, 8); + orr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} + +static maybe_unused void +generic_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg = jit_get_reg(jit_class_gpr); + + rshi(rn(reg), r1, 16); + bswapr_us(r0, r1); + bswapr_us(rn(reg), rn(reg)); + lshi(r0, r0, 16); + orr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} + +#if __WORDSIZE == 64 +static maybe_unused void +generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg = jit_get_reg(jit_class_gpr); + + rshi_u(rn(reg), r1, 32); + bswapr_ui(r0, r1); + bswapr_ui(rn(reg), rn(reg)); + lshi(r0, r0, 32); + orr(r0, r0, rn(reg)); + + jit_unget_reg(reg); +} +#endif diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo index ae2fc77c..c9d423a7 100644 --- a/deps/lightrec/.gitrepo +++ b/deps/lightrec/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/lightrec.git branch = master - commit = de06670b257004d98d30e8585a4e6530e77d3acd - parent = e24732050e902bd5402b2b7da7c391d2ca8fa799 + commit = 49ef275a66aad8540ab73b09b0dd2128ebe4d6dc + parent = a0467ff492a25521867fcfb7d66b9c617017151a method = merge cmdver = 0.4.3 diff --git a/deps/lightrec/CMakeLists.txt b/deps/lightrec/CMakeLists.txt index 9ff58d62..7b285185 100644 --- a/deps/lightrec/CMakeLists.txt +++ b/deps/lightrec/CMakeLists.txt @@ -108,6 +108,16 @@ if (ENABLE_THREADED_COMPILER) target_link_libraries(${PROJECT_NAME} PRIVATE ${PTHREAD_LIBRARIES}) endif (ENABLE_THREADED_COMPILER) +option(ENABLE_CODE_BUFFER "Enable external code buffer" OFF) +if (ENABLE_CODE_BUFFER) + target_sources(${PROJECT_NAME} PRIVATE tlsf/tlsf.c) + target_include_directories(${PROJECT_NAME} PRIVATE tlsf) +endif (ENABLE_CODE_BUFFER) + +if (ENABLE_CODE_BUFFER AND ENABLE_THREADED_COMPILER) + message(SEND_ERROR "External code buffer cannot be used along with the threaded compiler") +endif (ENABLE_CODE_BUFFER AND ENABLE_THREADED_COMPILER) + find_library(LIBLIGHTNING lightning REQUIRED) find_path(LIBLIGHTNING_INCLUDE_DIR lightning.h REQUIRED) diff --git a/deps/lightrec/blockcache.c b/deps/lightrec/blockcache.c index 4512392d..2182f298 100644 --- a/deps/lightrec/blockcache.c +++ b/deps/lightrec/blockcache.c @@ -63,8 +63,8 @@ void remove_from_code_lut(struct blockcache *cache, struct block *block) u32 offset = lut_offset(block->pc); if (block->function) { - memset(&state->code_lut[offset], 0, - block->nb_ops * sizeof(*state->code_lut)); + memset(lut_address(state, offset), 0, + block->nb_ops * lut_elm_size(state)); } } @@ -152,10 +152,11 @@ u32 lightrec_calculate_block_hash(const struct block *block) bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block) { - void **lut_entry = &state->code_lut[lut_offset(block->pc)]; + u32 offset = lut_offset(block->pc); bool outdated; + void *addr; - if (*lut_entry) + if (lut_read(state, offset)) return false; outdated = block->hash != lightrec_calculate_block_hash(block); @@ -163,9 +164,11 @@ bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *bloc /* The block was marked as outdated, but the content is still * the same */ if (block->function) - *lut_entry = block->function; + addr = block->function; else - *lut_entry = state->get_next_block; + addr = state->get_next_block; + + lut_write(state, offset, addr); } return outdated; diff --git a/deps/lightrec/disassembler.c b/deps/lightrec/disassembler.c index 43ac6772..cb332c67 100644 --- a/deps/lightrec/disassembler.c +++ b/deps/lightrec/disassembler.c @@ -382,22 +382,23 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, } } -void lightrec_print_disassembly(const struct block *block, const u32 *code) +void lightrec_print_disassembly(const struct block *block, const u32 *code_ptr) { const struct opcode *op; const char **flags_ptr; size_t nb_flags, count, count2; char buf[256], buf2[256], buf3[256]; unsigned int i; - u32 pc, branch_pc; + u32 pc, branch_pc, code; bool is_io; for (i = 0; i < block->nb_ops; i++) { op = &block->opcode_list[i]; branch_pc = get_branch_pc(block, i, 0); pc = block->pc + (i << 2); + code = LE32TOH(code_ptr[i]); - count = print_op((union code)code[i], pc, buf, sizeof(buf), + count = print_op((union code)code, pc, buf, sizeof(buf), &flags_ptr, &nb_flags, &is_io); flags_ptr = NULL; @@ -406,7 +407,7 @@ void lightrec_print_disassembly(const struct block *block, const u32 *code) count2 = print_op(op->c, branch_pc, buf2, sizeof(buf2), &flags_ptr, &nb_flags, &is_io); - if (code[i] == op->c.opcode) { + if (code == op->c.opcode) { *buf2 = '\0'; count2 = 0; } diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c index fa74cc09..fd289356 100644 --- a/deps/lightrec/emitter.c +++ b/deps/lightrec/emitter.c @@ -392,11 +392,34 @@ static void rec_alu_shiftv(struct lightrec_cstate *state, const struct block *bl lightrec_free_reg(reg_cache, rd); } +static void rec_movi(struct lightrec_cstate *state, + const struct block *block, u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u16 flags = REG_EXT; + u8 rt; + + if (!(c.i.imm & 0x8000)) + flags |= REG_ZEXT; + + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags); + + jit_movi(rt, (s32)(s16) c.i.imm); + + lightrec_free_reg(reg_cache, rt); +} + static void rec_ADDIU(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_alu_imm(state, block, offset, jit_code_addi, false); + + if (block->opcode_list[offset].c.i.rs) + rec_alu_imm(state, block, offset, jit_code_addi, false); + else + rec_movi(state, block, offset); } static void rec_ADDI(struct lightrec_cstate *state, @@ -404,7 +427,7 @@ static void rec_ADDI(struct lightrec_cstate *state, { /* TODO: Handle the exception? */ _jit_name(block->_jit, __func__); - rec_alu_imm(state, block, offset, jit_code_addi, false); + rec_ADDIU(state, block, offset); } static void rec_SLTIU(struct lightrec_cstate *state, @@ -1022,22 +1045,31 @@ static void rec_io(struct lightrec_cstate *state, } } +static u32 rec_ram_mask(struct lightrec_state *state) +{ + return (RAM_SIZE << (state->mirrors_mapped * 2)) - 1; +} + static void rec_store_memory(struct lightrec_cstate *cstate, const struct block *block, u16 offset, jit_code_t code, + jit_code_t swap_code, uintptr_t addr_offset, u32 addr_mask, bool invalidate) { + const struct lightrec_state *state = cstate->state; struct regcache *reg_cache = cstate->reg_cache; struct opcode *op = &block->opcode_list[offset]; jit_state_t *_jit = block->_jit; union code c = op->c; u8 rs, rt, tmp, tmp2, tmp3, addr_reg, addr_reg2; s16 imm = (s16)c.i.imm; - s32 simm = (s32)imm << (__WORDSIZE / 32 - 1); + s32 simm = (s32)imm << (1 - lut_is_32bit(state)); s32 lut_offt = offsetof(struct lightrec_state, code_lut); bool no_mask = op->flags & LIGHTREC_NO_MASK; - bool add_imm = c.i.imm && invalidate && simm + lut_offt != (s16)(simm + lut_offt); + bool add_imm = c.i.imm && + ((!state->mirrors_mapped && !no_mask) || (invalidate && + ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt)))); bool need_tmp = !no_mask || addr_offset || add_imm; bool need_tmp2 = addr_offset || invalidate; @@ -1071,7 +1103,17 @@ static void rec_store_memory(struct lightrec_cstate *cstate, addr_reg2 = addr_reg; } - jit_new_node_www(code, imm, addr_reg2, rt); + if (is_big_endian() && swap_code && c.i.rt) { + tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_new_node_ww(swap_code, tmp3, rt); + jit_new_node_www(code, imm, addr_reg2, tmp3); + + lightrec_free_reg(reg_cache, tmp3); + } else { + jit_new_node_www(code, imm, addr_reg2, rt); + } + lightrec_free_reg(reg_cache, rt); if (invalidate) { @@ -1082,17 +1124,22 @@ static void rec_store_memory(struct lightrec_cstate *cstate, addr_reg = tmp2; } - if (__WORDSIZE == 64) { + if (!lut_is_32bit(state)) { jit_lshi(tmp2, addr_reg, 1); addr_reg = tmp2; } - if (__WORDSIZE == 64 || addr_reg != rs || c.i.rs != 0) { + if (addr_reg == rs && c.i.rs == 0) { + addr_reg = LIGHTREC_REG_STATE; + } else { jit_addr(tmp2, addr_reg, LIGHTREC_REG_STATE); addr_reg = tmp2; } - jit_stxi(lut_offt, addr_reg, tmp3); + if (lut_is_32bit(state)) + jit_stxi_i(lut_offt, addr_reg, tmp3); + else + jit_stxi(lut_offt, addr_reg, tmp3); lightrec_free_reg(reg_cache, tmp3); } @@ -1107,29 +1154,32 @@ static void rec_store_memory(struct lightrec_cstate *cstate, static void rec_store_ram(struct lightrec_cstate *cstate, const struct block *block, u16 offset, jit_code_t code, - bool invalidate) + jit_code_t swap_code, bool invalidate) { + struct lightrec_state *state = cstate->state; + _jit_note(block->_jit, __FILE__, __LINE__); - return rec_store_memory(cstate, block, offset, code, - cstate->state->offset_ram, - RAM_SIZE - 1, invalidate); + return rec_store_memory(cstate, block, offset, code, swap_code, + state->offset_ram, rec_ram_mask(state), + invalidate); } static void rec_store_scratch(struct lightrec_cstate *cstate, - const struct block *block, - u16 offset, jit_code_t code) + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code) { _jit_note(block->_jit, __FILE__, __LINE__); - return rec_store_memory(cstate, block, offset, code, + return rec_store_memory(cstate, block, offset, code, swap_code, cstate->state->offset_scratch, 0x1fffffff, false); } static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, const struct block *block, - u16 offset, jit_code_t code) + u16 offset, jit_code_t code, + jit_code_t swap_code) { struct lightrec_state *state = cstate->state; struct regcache *reg_cache = cstate->reg_cache; @@ -1181,14 +1231,24 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, } rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); - jit_new_node_www(code, imm, tmp, rt); + + if (is_big_endian() && swap_code && c.i.rt) { + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_new_node_ww(swap_code, tmp2, rt); + jit_new_node_www(code, imm, tmp, tmp2); + + lightrec_free_reg(reg_cache, tmp2); + } else { + jit_new_node_www(code, imm, tmp, rt); + } lightrec_free_reg(reg_cache, rt); lightrec_free_reg(reg_cache, tmp); } static void rec_store_direct(struct lightrec_cstate *cstate, const struct block *block, - u16 offset, jit_code_t code) + u16 offset, jit_code_t code, jit_code_t swap_code) { struct lightrec_state *state = cstate->state; u32 ram_size = state->mirrors_mapped ? RAM_SIZE * 4 : RAM_SIZE; @@ -1219,12 +1279,15 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block /* Compute the offset to the code LUT */ jit_andi(tmp, tmp2, (RAM_SIZE - 1) & ~3); - if (__WORDSIZE == 64) + if (!lut_is_32bit(state)) jit_lshi(tmp, tmp, 1); jit_addr(tmp, LIGHTREC_REG_STATE, tmp); /* Write NULL to the code LUT to invalidate any block that's there */ - jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3); + if (lut_is_32bit(state)) + jit_stxi_i(offsetof(struct lightrec_state, code_lut), tmp, tmp3); + else + jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3); if (state->offset_ram != state->offset_scratch) { jit_movi(tmp, state->offset_ram); @@ -1247,14 +1310,25 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block lightrec_free_reg(reg_cache, tmp3); rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); - jit_new_node_www(code, 0, tmp2, rt); + + if (is_big_endian() && swap_code && c.i.rt) { + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_new_node_ww(swap_code, tmp, rt); + jit_new_node_www(code, 0, tmp2, tmp); + + lightrec_free_reg(reg_cache, tmp); + } else { + jit_new_node_www(code, 0, tmp2, rt); + } lightrec_free_reg(reg_cache, rt); lightrec_free_reg(reg_cache, tmp2); } static void rec_store(struct lightrec_cstate *state, - const struct block *block, u16 offset, jit_code_t code) + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code) { u16 flags = block->opcode_list[offset].flags; bool no_invalidate = (flags & LIGHTREC_NO_INVALIDATE) || @@ -1262,16 +1336,19 @@ static void rec_store(struct lightrec_cstate *state, switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) { case LIGHTREC_IO_RAM: - rec_store_ram(state, block, offset, code, !no_invalidate); + rec_store_ram(state, block, offset, code, + swap_code, !no_invalidate); break; case LIGHTREC_IO_SCRATCH: - rec_store_scratch(state, block, offset, code); + rec_store_scratch(state, block, offset, code, swap_code); break; case LIGHTREC_IO_DIRECT: - if (no_invalidate) - rec_store_direct_no_invalidate(state, block, offset, code); - else - rec_store_direct(state, block, offset, code); + if (no_invalidate) { + rec_store_direct_no_invalidate(state, block, offset, + code, swap_code); + } else { + rec_store_direct(state, block, offset, code, swap_code); + } break; default: rec_io(state, block, offset, true, false); @@ -1283,14 +1360,15 @@ static void rec_SB(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_store(state, block, offset, jit_code_stxi_c); + rec_store(state, block, offset, jit_code_stxi_c, 0); } static void rec_SH(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_store(state, block, offset, jit_code_stxi_s); + rec_store(state, block, offset, + jit_code_stxi_s, jit_code_bswapr_us); } static void rec_SW(struct lightrec_cstate *state, @@ -1298,7 +1376,8 @@ static void rec_SW(struct lightrec_cstate *state, { _jit_name(block->_jit, __func__); - rec_store(state, block, offset, jit_code_stxi_i); + rec_store(state, block, offset, + jit_code_stxi_i, jit_code_bswapr_ui); } static void rec_SWL(struct lightrec_cstate *state, @@ -1323,15 +1402,17 @@ static void rec_SWC2(struct lightrec_cstate *state, } static void rec_load_memory(struct lightrec_cstate *cstate, - const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned, uintptr_t addr_offset, u32 addr_mask) { struct regcache *reg_cache = cstate->reg_cache; struct opcode *op = &block->opcode_list[offset]; jit_state_t *_jit = block->_jit; u8 rs, rt, addr_reg, flags = REG_EXT; + bool no_mask = op->flags & LIGHTREC_NO_MASK; union code c = op->c; + s16 imm; if (!c.i.rt) return; @@ -1342,11 +1423,18 @@ static void rec_load_memory(struct lightrec_cstate *cstate, rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags); - if (!(op->flags & LIGHTREC_NO_MASK)) { - jit_andi(rt, rs, addr_mask); + if (!cstate->state->mirrors_mapped && c.i.imm && !no_mask) { + jit_addi(rt, rs, (s16)c.i.imm); addr_reg = rt; + imm = 0; } else { addr_reg = rs; + imm = (s16)c.i.imm; + } + + if (!no_mask) { + jit_andi(rt, addr_reg, addr_mask); + addr_reg = rt; } if (addr_offset) { @@ -1354,44 +1442,55 @@ static void rec_load_memory(struct lightrec_cstate *cstate, addr_reg = rt; } - jit_new_node_www(code, rt, addr_reg, (s16)c.i.imm); + jit_new_node_www(code, rt, addr_reg, imm); + + if (is_big_endian() && swap_code) { + jit_new_node_ww(swap_code, rt, rt); + + if (c.i.op == OP_LH) + jit_extr_s(rt, rt); + else if (c.i.op == OP_LW && __WORDSIZE == 64) + jit_extr_i(rt, rt); + } lightrec_free_reg(reg_cache, rs); lightrec_free_reg(reg_cache, rt); } static void rec_load_ram(struct lightrec_cstate *cstate, - const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) { _jit_note(block->_jit, __FILE__, __LINE__); - rec_load_memory(cstate, block, offset, code, is_unsigned, - cstate->state->offset_ram, RAM_SIZE - 1); + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, + cstate->state->offset_ram, rec_ram_mask(cstate->state)); } static void rec_load_bios(struct lightrec_cstate *cstate, - const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) { _jit_note(block->_jit, __FILE__, __LINE__); - rec_load_memory(cstate, block, offset, code, is_unsigned, + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, cstate->state->offset_bios, 0x1fffffff); } static void rec_load_scratch(struct lightrec_cstate *cstate, - const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) { _jit_note(block->_jit, __FILE__, __LINE__); - rec_load_memory(cstate, block, offset, code, is_unsigned, + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, cstate->state->offset_scratch, 0x1fffffff); } -static void rec_load_direct(struct lightrec_cstate *cstate, const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) +static void rec_load_direct(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, + bool is_unsigned) { struct lightrec_state *state = cstate->state; struct regcache *reg_cache = cstate->reg_cache; @@ -1483,28 +1582,38 @@ static void rec_load_direct(struct lightrec_cstate *cstate, const struct block * jit_new_node_www(code, rt, rt, imm); + if (is_big_endian() && swap_code) { + jit_new_node_ww(swap_code, rt, rt); + + if (c.i.op == OP_LH) + jit_extr_s(rt, rt); + else if (c.i.op == OP_LW && __WORDSIZE == 64) + jit_extr_i(rt, rt); + } + lightrec_free_reg(reg_cache, addr_reg); lightrec_free_reg(reg_cache, rt); lightrec_free_reg(reg_cache, tmp); } static void rec_load(struct lightrec_cstate *state, const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) + u16 offset, jit_code_t code, jit_code_t swap_code, + bool is_unsigned) { u16 flags = block->opcode_list[offset].flags; switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) { case LIGHTREC_IO_RAM: - rec_load_ram(state, block, offset, code, is_unsigned); + rec_load_ram(state, block, offset, code, swap_code, is_unsigned); break; case LIGHTREC_IO_BIOS: - rec_load_bios(state, block, offset, code, is_unsigned); + rec_load_bios(state, block, offset, code, swap_code, is_unsigned); break; case LIGHTREC_IO_SCRATCH: - rec_load_scratch(state, block, offset, code, is_unsigned); + rec_load_scratch(state, block, offset, code, swap_code, is_unsigned); break; case LIGHTREC_IO_DIRECT: - rec_load_direct(state, block, offset, code, is_unsigned); + rec_load_direct(state, block, offset, code, swap_code, is_unsigned); break; default: rec_io(state, block, offset, false, true); @@ -1515,25 +1624,25 @@ static void rec_load(struct lightrec_cstate *state, const struct block *block, static void rec_LB(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_c, false); + rec_load(state, block, offset, jit_code_ldxi_c, 0, false); } static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_uc, true); + rec_load(state, block, offset, jit_code_ldxi_uc, 0, true); } static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_s, false); + rec_load(state, block, offset, jit_code_ldxi_s, jit_code_bswapr_us, false); } static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_us, true); + rec_load(state, block, offset, jit_code_ldxi_us, jit_code_bswapr_us, true); } static void rec_LWL(struct lightrec_cstate *state, const struct block *block, u16 offset) @@ -1551,7 +1660,7 @@ static void rec_LWR(struct lightrec_cstate *state, const struct block *block, u1 static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_i, false); + rec_load(state, block, offset, jit_code_ldxi_i, jit_code_bswapr_ui, false); } static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u16 offset) @@ -1759,6 +1868,26 @@ static void rec_cp0_CTC0(struct lightrec_cstate *state, rec_mtc0(state, block, offset); } +static unsigned int cp2d_i_offset(u8 reg) +{ + return offsetof(struct lightrec_state, regs.cp2d[reg]); +} + +static unsigned int cp2d_s_offset(u8 reg) +{ + return cp2d_i_offset(reg) + is_big_endian() * 2; +} + +static unsigned int cp2c_i_offset(u8 reg) +{ + return offsetof(struct lightrec_state, regs.cp2c[reg]); +} + +static unsigned int cp2c_s_offset(u8 reg) +{ + return cp2c_i_offset(reg) + is_big_endian() * 2; +} + static void rec_cp2_basic_MFC2(struct lightrec_cstate *state, const struct block *block, u16 offset) { @@ -1783,16 +1912,14 @@ static void rec_cp2_basic_MFC2(struct lightrec_cstate *state, case 9: case 10: case 11: - jit_ldxi_s(rt, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2d[reg])); + jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg)); break; case 7: case 16: case 17: case 18: case 19: - jit_ldxi_us(rt, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2d[reg])); + jit_ldxi_us(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg)); break; case 28: case 29: @@ -1803,8 +1930,7 @@ static void rec_cp2_basic_MFC2(struct lightrec_cstate *state, for (i = 0; i < 3; i++) { out = i == 0 ? rt : tmp; - jit_ldxi_s(tmp, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2d[9 + i])); + jit_ldxi_s(tmp, LIGHTREC_REG_STATE, cp2d_s_offset(9 + i)); jit_movi(tmp2, 0x1f); jit_rshi(out, tmp, 7); @@ -1826,8 +1952,7 @@ static void rec_cp2_basic_MFC2(struct lightrec_cstate *state, lightrec_free_reg(reg_cache, tmp3); break; default: - jit_ldxi_i(rt, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2d[reg])); + jit_ldxi_i(rt, LIGHTREC_REG_STATE, cp2d_i_offset(reg)); break; } @@ -1853,13 +1978,11 @@ static void rec_cp2_basic_CFC2(struct lightrec_cstate *state, case 29: case 30: rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT); - jit_ldxi_s(rt, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2c[c.r.rd])); + jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2c_s_offset(c.r.rd)); break; default: rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT); - jit_ldxi_i(rt, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2c[c.r.rd])); + jit_ldxi_i(rt, LIGHTREC_REG_STATE, cp2c_i_offset(c.r.rd)); break; } @@ -1888,19 +2011,14 @@ static void rec_cp2_basic_MTC2(struct lightrec_cstate *state, switch (c.r.rd) { case 15: tmp = lightrec_alloc_reg_temp(reg_cache, _jit); - jit_ldxi_i(tmp, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2d[13])); + jit_ldxi_i(tmp, LIGHTREC_REG_STATE, cp2d_i_offset(13)); tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); - jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2d[14])); + jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, cp2d_i_offset(14)); - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[12]), - LIGHTREC_REG_STATE, tmp); - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[13]), - LIGHTREC_REG_STATE, tmp2); - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[14]), - LIGHTREC_REG_STATE, rt); + jit_stxi_i(cp2d_i_offset(12), LIGHTREC_REG_STATE, tmp); + jit_stxi_i(cp2d_i_offset(13), LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(cp2d_i_offset(14), LIGHTREC_REG_STATE, rt); lightrec_free_reg(reg_cache, tmp); lightrec_free_reg(reg_cache, tmp2); @@ -1910,18 +2028,15 @@ static void rec_cp2_basic_MTC2(struct lightrec_cstate *state, jit_lshi(tmp, rt, 7); jit_andi(tmp, tmp, 0xf80); - jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[9]), - LIGHTREC_REG_STATE, tmp); + jit_stxi_s(cp2d_s_offset(9), LIGHTREC_REG_STATE, tmp); jit_lshi(tmp, rt, 2); jit_andi(tmp, tmp, 0xf80); - jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[10]), - LIGHTREC_REG_STATE, tmp); + jit_stxi_s(cp2d_s_offset(10), LIGHTREC_REG_STATE, tmp); jit_rshi(tmp, rt, 3); jit_andi(tmp, tmp, 0xf80); - jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[11]), - LIGHTREC_REG_STATE, tmp); + jit_stxi_s(cp2d_s_offset(11), LIGHTREC_REG_STATE, tmp); lightrec_free_reg(reg_cache, tmp); break; @@ -1945,17 +2060,14 @@ static void rec_cp2_basic_MTC2(struct lightrec_cstate *state, jit_patch_at(to_loop, loop); - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[31]), - LIGHTREC_REG_STATE, tmp2); - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[30]), - LIGHTREC_REG_STATE, rt); + jit_stxi_i(cp2d_i_offset(31), LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(cp2d_i_offset(30), LIGHTREC_REG_STATE, rt); lightrec_free_reg(reg_cache, tmp); lightrec_free_reg(reg_cache, tmp2); break; default: - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[c.r.rd]), - LIGHTREC_REG_STATE, rt); + jit_stxi_i(cp2d_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt); break; } @@ -1982,8 +2094,7 @@ static void rec_cp2_basic_CTC2(struct lightrec_cstate *state, case 27: case 29: case 30: - jit_stxi_s(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]), - LIGHTREC_REG_STATE, rt); + jit_stxi_s(cp2c_s_offset(c.r.rd), LIGHTREC_REG_STATE, rt); break; case 31: tmp = lightrec_alloc_reg_temp(reg_cache, _jit); @@ -1996,16 +2107,14 @@ static void rec_cp2_basic_CTC2(struct lightrec_cstate *state, jit_andi(tmp2, rt, 0x7ffff000); jit_orr(tmp, tmp2, tmp); - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[31]), - LIGHTREC_REG_STATE, tmp); + jit_stxi_i(cp2c_i_offset(31), LIGHTREC_REG_STATE, tmp); lightrec_free_reg(reg_cache, tmp); lightrec_free_reg(reg_cache, tmp2); break; default: - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]), - LIGHTREC_REG_STATE, rt); + jit_stxi_i(cp2c_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt); } lightrec_free_reg(reg_cache, rt); diff --git a/deps/lightrec/lightrec-config.h.cmakein b/deps/lightrec/lightrec-config.h.cmakein index 3cef2b8c..f5524e9d 100644 --- a/deps/lightrec/lightrec-config.h.cmakein +++ b/deps/lightrec/lightrec-config.h.cmakein @@ -10,6 +10,7 @@ #cmakedefine01 ENABLE_FIRST_PASS #cmakedefine01 ENABLE_DISASSEMBLER #cmakedefine01 ENABLE_TINYMM +#cmakedefine01 ENABLE_CODE_BUFFER #cmakedefine01 HAS_DEFAULT_ELM diff --git a/deps/lightrec/lightrec-private.h b/deps/lightrec/lightrec-private.h index 75940b3f..87565a63 100644 --- a/deps/lightrec/lightrec-private.h +++ b/deps/lightrec/lightrec-private.h @@ -6,6 +6,7 @@ #ifndef __LIGHTREC_PRIVATE_H__ #define __LIGHTREC_PRIVATE_H__ +#include "lightning-wrapper.h" #include "lightrec-config.h" #include "disassembler.h" #include "lightrec.h" @@ -135,6 +136,7 @@ struct lightrec_state { struct recompiler *rec; struct lightrec_cstate *cstate; struct reaper *reaper; + void *tlsf; void (*eob_wrapper_func)(void); void (*memset_func)(void); void (*get_next_block)(void); @@ -143,6 +145,7 @@ struct lightrec_state { unsigned int nb_maps; const struct lightrec_mem_map *maps; uintptr_t offset_ram, offset_bios, offset_scratch; + _Bool with_32bit_lut; _Bool mirrors_mapped; _Bool invalidate_from_dma_only; void *code_lut[]; @@ -156,6 +159,9 @@ void lightrec_free_block(struct lightrec_state *state, struct block *block); void remove_from_code_lut(struct blockcache *cache, struct block *block); +enum psx_map +lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr); + const struct lightrec_mem_map * lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr); @@ -175,6 +181,50 @@ static inline u32 lut_offset(u32 pc) return (pc & (RAM_SIZE - 1)) >> 2; // RAM } +static inline _Bool is_big_endian(void) +{ + return __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__; +} + +static inline _Bool lut_is_32bit(const struct lightrec_state *state) +{ + return __WORDSIZE == 32 || + (ENABLE_CODE_BUFFER && state->with_32bit_lut); +} + +static inline size_t lut_elm_size(const struct lightrec_state *state) +{ + return lut_is_32bit(state) ? 4 : sizeof(void *); +} + +static inline void ** lut_address(struct lightrec_state *state, u32 offset) +{ + if (lut_is_32bit(state)) + return (void **) ((uintptr_t) state->code_lut + offset * 4); + else + return &state->code_lut[offset]; +} + +static inline void * lut_read(struct lightrec_state *state, u32 offset) +{ + void **lut_entry = lut_address(state, lut_offset(offset)); + + if (lut_is_32bit(state)) + return (void *)(uintptr_t) *(u32 *) lut_entry; + else + return *lut_entry; +} + +static inline void lut_write(struct lightrec_state *state, u32 offset, void *ptr) +{ + void **lut_entry = lut_address(state, offset); + + if (lut_is_32bit(state)) + *(u32 *) lut_entry = (u32)(uintptr_t) ptr; + else + *lut_entry = ptr; +} + static inline u32 get_ds_pc(const struct block *block, u16 offset, s16 imm) { u16 flags = block->opcode_list[offset].flags; diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index 9889272a..d172a30a 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -16,6 +16,7 @@ #include "recompiler.h" #include "regcache.h" #include "optimizer.h" +#include "tlsf/tlsf.h" #include #include @@ -198,30 +199,39 @@ static void lightrec_invalidate_map(struct lightrec_state *state, const struct lightrec_mem_map *map, u32 addr, u32 len) { if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) { - memset(&state->code_lut[lut_offset(addr)], 0, - ((len + 3) / 4) * sizeof(void *)); + memset(lut_address(state, lut_offset(addr)), 0, + ((len + 3) / 4) * lut_elm_size(state)); } } -const struct lightrec_mem_map * -lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr) +enum psx_map +lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr) { const struct lightrec_mem_map *map; unsigned int i; - u32 addr; for (i = 0; i < state->nb_maps; i++) { - const struct lightrec_mem_map *mapi = &state->maps[i]; + map = &state->maps[i]; - if (kaddr >= mapi->pc && kaddr < mapi->pc + mapi->length) { - map = mapi; - break; - } + if (kaddr >= map->pc && kaddr < map->pc + map->length) + return (enum psx_map) i; } - if (i == state->nb_maps) + return PSX_MAP_UNKNOWN; +} + +const struct lightrec_mem_map * +lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr) +{ + const struct lightrec_mem_map *map; + enum psx_map idx; + u32 addr; + + idx = lightrec_get_map_idx(state, kaddr); + if (idx == PSX_MAP_UNKNOWN) return NULL; + map = &state->maps[idx]; addr = kaddr - map->pc; while (map->mirror_of) @@ -615,7 +625,7 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) void *func; for (;;) { - func = state->code_lut[lut_offset(pc)]; + func = lut_read(state, pc); if (func && func != state->get_next_block) break; @@ -686,13 +696,51 @@ static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta, return state->target_cycle - state->current_cycle; } +static void * lightrec_emit_code(struct lightrec_state *state, + jit_state_t *_jit, unsigned int *size) +{ + bool has_code_buffer = ENABLE_CODE_BUFFER && state->tlsf; + jit_word_t code_size, new_code_size; + void *code; + + jit_realize(); + + if (!ENABLE_DISASSEMBLER) + jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE); + + if (has_code_buffer) { + jit_get_code(&code_size); + code = tlsf_malloc(state->tlsf, (size_t) code_size); + if (!code) + return NULL; + + jit_set_code(code, code_size); + } + + code = jit_emit(); + + jit_get_code(&new_code_size); + lightrec_register(MEM_FOR_CODE, new_code_size); + + if (has_code_buffer) { + tlsf_realloc(state->tlsf, code, new_code_size); + + pr_debug("Creating code block at address 0x%" PRIxPTR ", " + "code size: %" PRIuPTR " new: %" PRIuPTR "\n", + (uintptr_t) code, code_size, new_code_size); + } + + *size = (unsigned int) new_code_size; + + return code; +} + static struct block * generate_wrapper(struct lightrec_state *state) { struct block *block; jit_state_t *_jit; unsigned int i; int stack_ptr; - jit_word_t code_size; jit_node_t *to_tramp, *to_fn_epilog; jit_node_t *addr[C_WRAPPERS_COUNT - 1]; @@ -767,21 +815,20 @@ static struct block * generate_wrapper(struct lightrec_state *state) jit_epilog(); block->_jit = _jit; - block->function = jit_emit(); block->opcode_list = NULL; block->flags = 0; block->nb_ops = 0; + block->function = lightrec_emit_code(state, _jit, + &block->code_size); + if (!block->function) + goto err_free_block; + state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function; for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) state->wrappers_eps[i] = jit_address(addr[i]); - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); - - block->code_size = code_size; - if (ENABLE_DISASSEMBLER) { pr_debug("Wrapper block:\n"); jit_disassemble(); @@ -825,10 +872,9 @@ static struct block * generate_dispatcher(struct lightrec_state *state) { struct block *block; jit_state_t *_jit; - jit_node_t *to_end, *to_c, *loop, *addr, *addr2, *addr3; + jit_node_t *to_end, *loop, *addr, *addr2, *addr3; unsigned int i; - u32 offset, ram_len; - jit_word_t code_size; + u32 offset; block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -888,21 +934,27 @@ static struct block * generate_dispatcher(struct lightrec_state *state) to_end = jit_blei(LIGHTREC_REG_CYCLE, 0); /* Convert next PC to KUNSEG and avoid mirrors */ - ram_len = state->maps[PSX_MAP_KERNEL_USER_RAM].length; - jit_andi(JIT_R0, JIT_V0, 0x10000000 | (ram_len - 1)); - to_c = jit_bgei(JIT_R0, ram_len); - - /* Fast path: code is running from RAM, use the code LUT */ - if (__WORDSIZE == 64) + jit_andi(JIT_R0, JIT_V0, 0x10000000 | (RAM_SIZE - 1)); + jit_rshi_u(JIT_R1, JIT_R0, 28); + jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1); + jit_addi(JIT_R2, JIT_R2, RAM_SIZE); + jit_movnr(JIT_R0, JIT_R2, JIT_R1); + + /* If possible, use the code LUT */ + if (!lut_is_32bit(state)) jit_lshi(JIT_R0, JIT_R0, 1); jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE); - jit_ldxi(JIT_R0, JIT_R0, offsetof(struct lightrec_state, code_lut)); + + offset = offsetof(struct lightrec_state, code_lut); + if (lut_is_32bit(state)) + jit_ldxi_ui(JIT_R0, JIT_R0, offset); + else + jit_ldxi(JIT_R0, JIT_R0, offset); /* If we get non-NULL, loop */ jit_patch_at(jit_bnei(JIT_R0, 0), loop); /* Slow path: call C function get_next_block_func() */ - jit_patch(to_c); if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* We may call the interpreter - update state->current_cycle */ @@ -946,15 +998,14 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_epilog(); block->_jit = _jit; - block->function = jit_emit(); block->opcode_list = NULL; block->flags = 0; block->nb_ops = 0; - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); - - block->code_size = code_size; + block->function = lightrec_emit_code(state, _jit, + &block->code_size); + if (!block->function) + goto err_free_block; state->eob_wrapper_func = jit_address(addr2); if (OPT_REPLACE_MEMSET) @@ -984,7 +1035,7 @@ union code lightrec_read_opcode(struct lightrec_state *state, u32 pc) lightrec_get_map(state, &host, kunseg(pc)); const u32 *code = (u32 *)host; - return (union code) *code; + return (union code) LE32TOH(*code); } unsigned int lightrec_cycles_of_opcode(union code code) @@ -1101,7 +1152,7 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, block->flags |= BLOCK_FULLY_TAGGED; if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) - state->code_lut[lut_offset(pc)] = state->memset_func; + lut_write(state, lut_offset(pc), state->memset_func); block->hash = lightrec_calculate_block_hash(block); @@ -1160,6 +1211,19 @@ static void lightrec_reap_jit(struct lightrec_state *state, void *data) _jit_destroy_state(data); } +static void lightrec_free_function(struct lightrec_state *state, void *fn) +{ + if (ENABLE_CODE_BUFFER && state->tlsf) { + pr_debug("Freeing code block at 0x%" PRIxPTR "\n", (uintptr_t) fn); + tlsf_free(state->tlsf, fn); + } +} + +static void lightrec_reap_function(struct lightrec_state *state, void *data) +{ + lightrec_free_function(state, data); +} + int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block) { @@ -1171,7 +1235,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, jit_state_t *_jit, *oldjit; jit_node_t *start_of_block; bool skip_next = false; - jit_word_t code_size; + void *old_fn; unsigned int i, j; u32 offset; @@ -1184,6 +1248,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, return -ENOMEM; oldjit = block->_jit; + old_fn = block->function; block->_jit = _jit; lightrec_regcache_reset(cstate->reg_cache); @@ -1261,11 +1326,16 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, jit_ret(); jit_epilog(); - block->function = jit_emit(); + block->function = lightrec_emit_code(state, _jit, + &block->code_size); + if (!block->function) { + pr_err("Unable to compile block!\n"); + } + block->flags &= ~BLOCK_SHOULD_RECOMPILE; /* Add compiled function to the LUT */ - state->code_lut[lut_offset(block->pc)] = block->function; + lut_write(state, lut_offset(block->pc), block->function); if (ENABLE_THREADED_COMPILER) { /* Since we might try to reap the same block multiple times, @@ -1302,7 +1372,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, * be compiled. We can override the LUT entry with our new * block's entry point. */ offset = lut_offset(block->pc) + target->offset; - state->code_lut[offset] = jit_address(target->label); + lut_write(state, offset, jit_address(target->label)); if (block2) { pr_debug("Reap block 0x%08x as it's covered by block " @@ -1323,11 +1393,6 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, if (ENABLE_THREADED_COMPILER) lightrec_reaper_continue(state->reaper); - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); - - block->code_size = code_size; - if (ENABLE_DISASSEMBLER) { pr_debug("Compiling block at PC: 0x%08x\n", block->pc); jit_disassemble(); @@ -1350,11 +1415,15 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, pr_debug("Block 0x%08x recompiled, reaping old jit context.\n", block->pc); - if (ENABLE_THREADED_COMPILER) + if (ENABLE_THREADED_COMPILER) { lightrec_reaper_add(state->reaper, lightrec_reap_jit, oldjit); - else + lightrec_reaper_add(state->reaper, + lightrec_reap_function, old_fn); + } else { _jit_destroy_state(oldjit); + lightrec_free_function(state, old_fn); + } } return 0; @@ -1435,6 +1504,7 @@ void lightrec_free_block(struct lightrec_state *state, struct block *block) lightrec_free_opcode_list(state, block); if (block->_jit) _jit_destroy_state(block->_jit); + lightrec_free_function(state, block->function); lightrec_unregister(MEM_FOR_CODE, block->code_size); lightrec_free(state, MEM_FOR_IR, sizeof(*block), block); } @@ -1469,7 +1539,12 @@ struct lightrec_state * lightrec_init(char *argv0, size_t nb, const struct lightrec_ops *ops) { + const struct lightrec_mem_map *codebuf_map; struct lightrec_state *state; + uintptr_t addr; + void *tlsf = NULL; + bool with_32bit_lut = false; + size_t lut_size; /* Sanity-check ops */ if (!ops || !ops->cop2_op || !ops->enable_ram) { @@ -1477,15 +1552,37 @@ struct lightrec_state * lightrec_init(char *argv0, return NULL; } + if (ENABLE_CODE_BUFFER && nb > PSX_MAP_CODE_BUFFER) { + codebuf_map = &map[PSX_MAP_CODE_BUFFER]; + + tlsf = tlsf_create_with_pool(codebuf_map->address, + codebuf_map->length); + if (!tlsf) { + pr_err("Unable to initialize code buffer\n"); + return NULL; + } + + if (__WORDSIZE == 64) { + addr = (uintptr_t) codebuf_map->address + codebuf_map->length - 1; + with_32bit_lut = addr == (u32) addr; + } + } + + if (with_32bit_lut) + lut_size = CODE_LUT_SIZE * 4; + else + lut_size = CODE_LUT_SIZE * sizeof(void *); + init_jit(argv0); - state = calloc(1, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + state = calloc(1, sizeof(*state) + lut_size); if (!state) goto err_finish_jit; - lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + lut_size); + + state->tlsf = tlsf; + state->with_32bit_lut = with_32bit_lut; #if ENABLE_TINYMM state->tinymm = tinymm_init(malloc, free, 4096); @@ -1554,6 +1651,9 @@ struct lightrec_state * lightrec_init(char *argv0, pr_info("Memory map is sub-par. Emitted code will be slow.\n"); } + if (state->with_32bit_lut) + pr_info("Using 32-bit LUT\n"); + return state; err_free_dispatcher: @@ -1574,10 +1674,12 @@ err_free_tinymm: err_free_state: #endif lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lut_elm_size(state) * CODE_LUT_SIZE); free(state); err_finish_jit: finish_jit(); + if (ENABLE_CODE_BUFFER && tlsf) + tlsf_destroy(tlsf); return NULL; } @@ -1598,12 +1700,14 @@ void lightrec_destroy(struct lightrec_state *state) lightrec_free_block(state, state->dispatcher); lightrec_free_block(state, state->c_wrapper_block); finish_jit(); + if (ENABLE_CODE_BUFFER && state->tlsf) + tlsf_destroy(state->tlsf); #if ENABLE_TINYMM tinymm_shutdown(state->tinymm); #endif lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lut_elm_size(state) * CODE_LUT_SIZE); free(state); } @@ -1625,7 +1729,7 @@ void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len) void lightrec_invalidate_all(struct lightrec_state *state) { - memset(state->code_lut, 0, sizeof(*state->code_lut) * CODE_LUT_SIZE); + memset(state->code_lut, 0, lut_elm_size(state) * CODE_LUT_SIZE); } void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only) diff --git a/deps/lightrec/lightrec.h b/deps/lightrec/lightrec.h index e418c706..4f51e1f6 100644 --- a/deps/lightrec/lightrec.h +++ b/deps/lightrec/lightrec.h @@ -58,6 +58,9 @@ enum psx_map { PSX_MAP_MIRROR1, PSX_MAP_MIRROR2, PSX_MAP_MIRROR3, + PSX_MAP_CODE_BUFFER, + + PSX_MAP_UNKNOWN, }; struct lightrec_mem_map_ops { diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index 7304abca..562f7e00 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -817,6 +817,66 @@ static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset) to_nop->opcode = 0; } +static void lightrec_remove_useless_lui(struct block *block, unsigned int offset, + u32 known, u32 *values) +{ + struct opcode *list = block->opcode_list, + *op = &block->opcode_list[offset]; + int reader; + + if (!(op->flags & LIGHTREC_SYNC) && (known & BIT(op->i.rt)) && + values[op->i.rt] == op->i.imm << 16) { + pr_debug("Converting duplicated LUI to NOP\n"); + op->opcode = 0x0; + return; + } + + if (op->i.imm != 0 || op->i.rt == 0) + return; + + reader = find_next_reader(list, offset + 1, op->i.rt); + if (reader <= 0) + return; + + if (opcode_writes_register(list[reader].c, op->i.rt) || + reg_is_dead(list, reader, op->i.rt)) { + pr_debug("Removing useless LUI 0x0\n"); + + if (list[reader].i.rs == op->i.rt) + list[reader].i.rs = 0; + if (list[reader].i.op == OP_SPECIAL && + list[reader].i.rt == op->i.rt) + list[reader].i.rt = 0; + op->opcode = 0x0; + } +} + +static void lightrec_modify_lui(struct block *block, unsigned int offset) +{ + union code c, *lui = &block->opcode_list[offset].c; + bool stop = false, stop_next = false; + unsigned int i; + + for (i = offset + 1; !stop && i < block->nb_ops; i++) { + c = block->opcode_list[i].c; + stop = stop_next; + + if ((opcode_is_store(c) && c.i.rt == lui->i.rt) + || (!opcode_is_load(c) && opcode_reads_register(c, lui->i.rt))) + break; + + if (opcode_writes_register(c, lui->i.rt)) { + pr_debug("Convert LUI at offset 0x%x to kuseg\n", + i - 1 << 2); + lui->i.imm = kunseg(lui->i.imm << 16) >> 16; + break; + } + + if (has_delay_slot(c)) + stop_next = true; + } +} + static int lightrec_transform_ops(struct lightrec_state *state, struct block *block) { struct opcode *list = block->opcode_list; @@ -824,7 +884,6 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl u32 known = BIT(0); u32 values[32] = { 0 }; unsigned int i; - int reader; for (i = 0; i < block->nb_ops; i++) { prev = op; @@ -863,30 +922,8 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl break; case OP_LUI: - if (!(op->flags & LIGHTREC_SYNC) && - (known & BIT(op->i.rt)) && - values[op->i.rt] == op->i.imm << 16) { - pr_debug("Converting duplicated LUI to NOP\n"); - op->opcode = 0x0; - } - - if (op->i.imm != 0 || op->i.rt == 0) - break; - - reader = find_next_reader(list, i + 1, op->i.rt); - if (reader > 0 && - (opcode_writes_register(list[reader].c, op->i.rt) || - reg_is_dead(list, reader, op->i.rt))) { - - pr_debug("Removing useless LUI 0x0\n"); - - if (list[reader].i.rs == op->i.rt) - list[reader].i.rs = 0; - if (list[reader].i.op == OP_SPECIAL && - list[reader].i.rt == op->i.rt) - list[reader].i.rt = 0; - op->opcode = 0x0; - } + lightrec_modify_lui(block, i); + lightrec_remove_useless_lui(block, i, known, values); break; /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU @@ -1233,15 +1270,14 @@ static int lightrec_early_unload(struct lightrec_state *state, struct block *blo static int lightrec_flag_io(struct lightrec_state *state, struct block *block) { - const struct lightrec_mem_map *map; - struct opcode *prev2, *prev = NULL, *list = NULL; + struct opcode *prev = NULL, *list = NULL; + enum psx_map psx_map; u32 known = BIT(0); u32 values[32] = { 0 }; unsigned int i; - u32 val; + u32 val, kunseg_val; for (i = 0; i < block->nb_ops; i++) { - prev2 = prev; prev = list; list = &block->opcode_list[i]; @@ -1289,42 +1325,38 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) case OP_LWR: case OP_LWC2: if (OPT_FLAG_IO && (known & BIT(list->i.rs))) { - if (prev && prev->i.op == OP_LUI && - !(prev2 && has_delay_slot(prev2->c)) && - prev->i.rt == list->i.rs && - list->i.rt == list->i.rs && - prev->i.imm & 0x8000) { - pr_debug("Convert LUI at offset 0x%x to kuseg\n", - i - 1 << 2); - - val = kunseg(prev->i.imm << 16); - prev->i.imm = val >> 16; - values[list->i.rs] = val; - } - val = values[list->i.rs] + (s16) list->i.imm; - map = lightrec_get_map(state, NULL, kunseg(val)); - - if (!map || map->ops || - map == &state->maps[PSX_MAP_PARALLEL_PORT]) { - pr_debug("Flagging opcode %u as I/O access\n", - i); - list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); - break; - } - - if (val - map->pc < map->length) - list->flags |= LIGHTREC_NO_MASK; - - if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) { + kunseg_val = kunseg(val); + psx_map = lightrec_get_map_idx(state, kunseg_val); + + switch (psx_map) { + case PSX_MAP_KERNEL_USER_RAM: + if (val == kunseg_val) + list->flags |= LIGHTREC_NO_MASK; + /* fall-through */ + case PSX_MAP_MIRROR1: + case PSX_MAP_MIRROR2: + case PSX_MAP_MIRROR3: pr_debug("Flaging opcode %u as RAM access\n", i); list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM); - } else if (map == &state->maps[PSX_MAP_BIOS]) { + break; + case PSX_MAP_BIOS: pr_debug("Flaging opcode %u as BIOS access\n", i); list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS); - } else if (map == &state->maps[PSX_MAP_SCRATCH_PAD]) { + break; + case PSX_MAP_SCRATCH_PAD: pr_debug("Flaging opcode %u as scratchpad access\n", i); list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH); + + /* Consider that we're never going to run code from + * the scratchpad. */ + list->flags |= LIGHTREC_NO_INVALIDATE; + break; + default: + pr_debug("Flagging opcode %u as I/O access\n", + i); + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); + break; } } default: /* fall-through */ diff --git a/deps/lightrec/tlsf/.gitrepo b/deps/lightrec/tlsf/.gitrepo new file mode 100644 index 00000000..692e5425 --- /dev/null +++ b/deps/lightrec/tlsf/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme +; +[subrepo] + remote = https://github.com/mattconte/tlsf + branch = master + commit = deff9ab509341f264addbd3c8ada533678591905 + parent = 1dc0344052e7379e16753e4a285c30fd158bf78d + method = merge + cmdver = 0.4.3 diff --git a/deps/lightrec/tlsf/README.md b/deps/lightrec/tlsf/README.md new file mode 100644 index 00000000..982919fc --- /dev/null +++ b/deps/lightrec/tlsf/README.md @@ -0,0 +1,92 @@ +# tlsf +Two-Level Segregated Fit memory allocator implementation. +Written by Matthew Conte (matt@baisoku.org). +Released under the BSD license. + +Features +-------- + * O(1) cost for malloc, free, realloc, memalign + * Extremely low overhead per allocation (4 bytes) + * Low overhead per TLSF management of pools (~3kB) + * Low fragmentation + * Compiles to only a few kB of code and data + * Support for adding and removing memory pool regions on the fly + +Caveats +------- + * Currently, assumes architecture can make 4-byte aligned accesses + * Not designed to be thread safe; the user must provide this + +Notes +----- +This code was based on the TLSF 1.4 spec and documentation found at: + + http://www.gii.upv.es/tlsf/main/docs + +It also leverages the TLSF 2.0 improvement to shrink the per-block overhead from 8 to 4 bytes. + +History +------- +2016/04/10 - v3.1 + * Code moved to github + * tlsfbits.h rolled into tlsf.c + * License changed to BSD + +2014/02/08 - v3.0 + * This version is based on improvements from 3DInteractive GmbH + * Interface changed to allow more than one memory pool + * Separated pool handling from control structure (adding, removing, debugging) + * Control structure and pools can still be constructed in the same memory block + * Memory blocks for control structure and pools are checked for alignment + * Added functions to retrieve control structure size, alignment size, min and max block size, overhead of pool structure, and overhead of a single allocation + * Minimal Pool size is tlsf_block_size_min() + tlsf_pool_overhead() + * Pool must be empty when it is removed, in order to allow O(1) removal + +2011/10/20 - v2.0 + * 64-bit support + * More compiler intrinsics for ffs/fls + * ffs/fls verification during TLSF creation in debug builds + +2008/04/04 - v1.9 + * Add tlsf_heap_check, a heap integrity check + * Support a predefined tlsf_assert macro + * Fix realloc case where block should shrink; if adjacent block is in use, execution would go down the slow path + +2007/02/08 - v1.8 + * Fix for unnecessary reallocation in tlsf_realloc + +2007/02/03 - v1.7 + * tlsf_heap_walk takes a callback + * tlsf_realloc now returns NULL on failure + * tlsf_memalign optimization for 4-byte alignment + * Usage of size_t where appropriate + +2006/11/21 - v1.6 + * ffs/fls broken out into tlsfbits.h + * tlsf_overhead queries per-pool overhead + +2006/11/07 - v1.5 + * Smart realloc implementation + * Smart memalign implementation + +2006/10/11 - v1.4 + * Add some ffs/fls implementations + * Minor code footprint reduction + +2006/09/14 - v1.3 + * Profiling indicates heavy use of blocks of size 1-128, so implement small block handling + * Reduce pool overhead by about 1kb + * Reduce minimum block size from 32 to 12 bytes + * Realloc bug fix + +2006/09/09 - v1.2 + * Add tlsf_block_size + * Static assertion mechanism for invariants + * Minor bugfixes + +2006/09/01 - v1.1 + * Add tlsf_realloc + * Add tlsf_walk_heap + +2006/08/25 - v1.0 + * First release diff --git a/deps/lightrec/tlsf/tlsf.c b/deps/lightrec/tlsf/tlsf.c new file mode 100644 index 00000000..af575737 --- /dev/null +++ b/deps/lightrec/tlsf/tlsf.c @@ -0,0 +1,1264 @@ +#include +#include +#include +#include +#include +#include + +#include "tlsf.h" + +#if defined(__cplusplus) +#define tlsf_decl inline +#else +#define tlsf_decl static +#endif + +/* +** Architecture-specific bit manipulation routines. +** +** TLSF achieves O(1) cost for malloc and free operations by limiting +** the search for a free block to a free list of guaranteed size +** adequate to fulfill the request, combined with efficient free list +** queries using bitmasks and architecture-specific bit-manipulation +** routines. +** +** Most modern processors provide instructions to count leading zeroes +** in a word, find the lowest and highest set bit, etc. These +** specific implementations will be used when available, falling back +** to a reasonably efficient generic implementation. +** +** NOTE: TLSF spec relies on ffs/fls returning value 0..31. +** ffs/fls return 1-32 by default, returning 0 for error. +*/ + +/* +** Detect whether or not we are building for a 32- or 64-bit (LP/LLP) +** architecture. There is no reliable portable method at compile-time. +*/ +#if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) \ + || defined (_WIN64) || defined (__LP64__) || defined (__LLP64__) +#define TLSF_64BIT +#endif + +/* +** gcc 3.4 and above have builtin support, specialized for architecture. +** Some compilers masquerade as gcc; patchlevel test filters them out. +*/ +#if defined (__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) \ + && defined (__GNUC_PATCHLEVEL__) + +#if defined (__SNC__) +/* SNC for Playstation 3. */ + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __builtin_clz(reverse); + return bit - 1; +} + +#else + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + return __builtin_ffs(word) - 1; +} + +#endif + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __builtin_clz(word) : 0; + return bit - 1; +} + +#elif defined (_MSC_VER) && (_MSC_VER >= 1400) && (defined (_M_IX86) || defined (_M_X64)) +/* Microsoft Visual C++ support on x86/X64 architectures. */ + +#include + +#pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) + +tlsf_decl int tlsf_fls(unsigned int word) +{ + unsigned long index; + return _BitScanReverse(&index, word) ? index : -1; +} + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + unsigned long index; + return _BitScanForward(&index, word) ? index : -1; +} + +#elif defined (_MSC_VER) && defined (_M_PPC) +/* Microsoft Visual C++ support on PowerPC architectures. */ + +#include + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = 32 - _CountLeadingZeros(word); + return bit - 1; +} + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - _CountLeadingZeros(reverse); + return bit - 1; +} + +#elif defined (__ARMCC_VERSION) +/* RealView Compilation Tools for ARM */ + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __clz(reverse); + return bit - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __clz(word) : 0; + return bit - 1; +} + +#elif defined (__ghs__) +/* Green Hills support for PowerPC */ + +#include + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __CLZ32(reverse); + return bit - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __CLZ32(word) : 0; + return bit - 1; +} + +#else +/* Fall back to generic implementation. */ + +tlsf_decl int tlsf_fls_generic(unsigned int word) +{ + int bit = 32; + + if (!word) bit -= 1; + if (!(word & 0xffff0000)) { word <<= 16; bit -= 16; } + if (!(word & 0xff000000)) { word <<= 8; bit -= 8; } + if (!(word & 0xf0000000)) { word <<= 4; bit -= 4; } + if (!(word & 0xc0000000)) { word <<= 2; bit -= 2; } + if (!(word & 0x80000000)) { word <<= 1; bit -= 1; } + + return bit; +} + +/* Implement ffs in terms of fls. */ +tlsf_decl int tlsf_ffs(unsigned int word) +{ + return tlsf_fls_generic(word & (~word + 1)) - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + return tlsf_fls_generic(word) - 1; +} + +#endif + +/* Possibly 64-bit version of tlsf_fls. */ +#if defined (TLSF_64BIT) +tlsf_decl int tlsf_fls_sizet(size_t size) +{ + int high = (int)(size >> 32); + int bits = 0; + if (high) + { + bits = 32 + tlsf_fls(high); + } + else + { + bits = tlsf_fls((int)size & 0xffffffff); + + } + return bits; +} +#else +#define tlsf_fls_sizet tlsf_fls +#endif + +#undef tlsf_decl + +/* +** Constants. +*/ + +/* Public constants: may be modified. */ +enum tlsf_public +{ + /* log2 of number of linear subdivisions of block sizes. Larger + ** values require more memory in the control structure. Values of + ** 4 or 5 are typical. + */ + SL_INDEX_COUNT_LOG2 = 5, +}; + +/* Private constants: do not modify. */ +enum tlsf_private +{ +#if defined (TLSF_64BIT) + /* All allocation sizes and addresses are aligned to 8 bytes. */ + ALIGN_SIZE_LOG2 = 3, +#else + /* All allocation sizes and addresses are aligned to 4 bytes. */ + ALIGN_SIZE_LOG2 = 2, +#endif + ALIGN_SIZE = (1 << ALIGN_SIZE_LOG2), + + /* + ** We support allocations of sizes up to (1 << FL_INDEX_MAX) bits. + ** However, because we linearly subdivide the second-level lists, and + ** our minimum size granularity is 4 bytes, it doesn't make sense to + ** create first-level lists for sizes smaller than SL_INDEX_COUNT * 4, + ** or (1 << (SL_INDEX_COUNT_LOG2 + 2)) bytes, as there we will be + ** trying to split size ranges into more slots than we have available. + ** Instead, we calculate the minimum threshold size, and place all + ** blocks below that size into the 0th first-level list. + */ + +#if defined (TLSF_64BIT) + /* + ** TODO: We can increase this to support larger sizes, at the expense + ** of more overhead in the TLSF structure. + */ + FL_INDEX_MAX = 32, +#else + FL_INDEX_MAX = 30, +#endif + SL_INDEX_COUNT = (1 << SL_INDEX_COUNT_LOG2), + FL_INDEX_SHIFT = (SL_INDEX_COUNT_LOG2 + ALIGN_SIZE_LOG2), + FL_INDEX_COUNT = (FL_INDEX_MAX - FL_INDEX_SHIFT + 1), + + SMALL_BLOCK_SIZE = (1 << FL_INDEX_SHIFT), +}; + +/* +** Cast and min/max macros. +*/ + +#define tlsf_cast(t, exp) ((t) (exp)) +#define tlsf_min(a, b) ((a) < (b) ? (a) : (b)) +#define tlsf_max(a, b) ((a) > (b) ? (a) : (b)) + +/* +** Set assert macro, if it has not been provided by the user. +*/ +#if !defined (tlsf_assert) +#define tlsf_assert assert +#endif + +/* +** Static assertion mechanism. +*/ + +#define _tlsf_glue2(x, y) x ## y +#define _tlsf_glue(x, y) _tlsf_glue2(x, y) +#define tlsf_static_assert(exp) \ + typedef char _tlsf_glue(static_assert, __LINE__) [(exp) ? 1 : -1] + +/* This code has been tested on 32- and 64-bit (LP/LLP) architectures. */ +tlsf_static_assert(sizeof(int) * CHAR_BIT == 32); +tlsf_static_assert(sizeof(size_t) * CHAR_BIT >= 32); +tlsf_static_assert(sizeof(size_t) * CHAR_BIT <= 64); + +/* SL_INDEX_COUNT must be <= number of bits in sl_bitmap's storage type. */ +tlsf_static_assert(sizeof(unsigned int) * CHAR_BIT >= SL_INDEX_COUNT); + +/* Ensure we've properly tuned our sizes. */ +tlsf_static_assert(ALIGN_SIZE == SMALL_BLOCK_SIZE / SL_INDEX_COUNT); + +/* +** Data structures and associated constants. +*/ + +/* +** Block header structure. +** +** There are several implementation subtleties involved: +** - The prev_phys_block field is only valid if the previous block is free. +** - The prev_phys_block field is actually stored at the end of the +** previous block. It appears at the beginning of this structure only to +** simplify the implementation. +** - The next_free / prev_free fields are only valid if the block is free. +*/ +typedef struct block_header_t +{ + /* Points to the previous physical block. */ + struct block_header_t* prev_phys_block; + + /* The size of this block, excluding the block header. */ + size_t size; + + /* Next and previous free blocks. */ + struct block_header_t* next_free; + struct block_header_t* prev_free; +} block_header_t; + +/* +** Since block sizes are always at least a multiple of 4, the two least +** significant bits of the size field are used to store the block status: +** - bit 0: whether block is busy or free +** - bit 1: whether previous block is busy or free +*/ +static const size_t block_header_free_bit = 1 << 0; +static const size_t block_header_prev_free_bit = 1 << 1; + +/* +** The size of the block header exposed to used blocks is the size field. +** The prev_phys_block field is stored *inside* the previous free block. +*/ +static const size_t block_header_overhead = sizeof(size_t); + +/* User data starts directly after the size field in a used block. */ +static const size_t block_start_offset = + offsetof(block_header_t, size) + sizeof(size_t); + +/* +** A free block must be large enough to store its header minus the size of +** the prev_phys_block field, and no larger than the number of addressable +** bits for FL_INDEX. +*/ +static const size_t block_size_min = + sizeof(block_header_t) - sizeof(block_header_t*); +static const size_t block_size_max = tlsf_cast(size_t, 1) << FL_INDEX_MAX; + + +/* The TLSF control structure. */ +typedef struct control_t +{ + /* Empty lists point at this block to indicate they are free. */ + block_header_t block_null; + + /* Bitmaps for free lists. */ + unsigned int fl_bitmap; + unsigned int sl_bitmap[FL_INDEX_COUNT]; + + /* Head of free lists. */ + block_header_t* blocks[FL_INDEX_COUNT][SL_INDEX_COUNT]; +} control_t; + +/* A type used for casting when doing pointer arithmetic. */ +typedef ptrdiff_t tlsfptr_t; + +/* +** block_header_t member functions. +*/ + +static size_t block_size(const block_header_t* block) +{ + return block->size & ~(block_header_free_bit | block_header_prev_free_bit); +} + +static void block_set_size(block_header_t* block, size_t size) +{ + const size_t oldsize = block->size; + block->size = size | (oldsize & (block_header_free_bit | block_header_prev_free_bit)); +} + +static int block_is_last(const block_header_t* block) +{ + return block_size(block) == 0; +} + +static int block_is_free(const block_header_t* block) +{ + return tlsf_cast(int, block->size & block_header_free_bit); +} + +static void block_set_free(block_header_t* block) +{ + block->size |= block_header_free_bit; +} + +static void block_set_used(block_header_t* block) +{ + block->size &= ~block_header_free_bit; +} + +static int block_is_prev_free(const block_header_t* block) +{ + return tlsf_cast(int, block->size & block_header_prev_free_bit); +} + +static void block_set_prev_free(block_header_t* block) +{ + block->size |= block_header_prev_free_bit; +} + +static void block_set_prev_used(block_header_t* block) +{ + block->size &= ~block_header_prev_free_bit; +} + +static block_header_t* block_from_ptr(const void* ptr) +{ + return tlsf_cast(block_header_t*, + tlsf_cast(unsigned char*, ptr) - block_start_offset); +} + +static void* block_to_ptr(const block_header_t* block) +{ + return tlsf_cast(void*, + tlsf_cast(unsigned char*, block) + block_start_offset); +} + +/* Return location of next block after block of given size. */ +static block_header_t* offset_to_block(const void* ptr, size_t size) +{ + return tlsf_cast(block_header_t*, tlsf_cast(tlsfptr_t, ptr) + size); +} + +/* Return location of previous block. */ +static block_header_t* block_prev(const block_header_t* block) +{ + tlsf_assert(block_is_prev_free(block) && "previous block must be free"); + return block->prev_phys_block; +} + +/* Return location of next existing block. */ +static block_header_t* block_next(const block_header_t* block) +{ + block_header_t* next = offset_to_block(block_to_ptr(block), + block_size(block) - block_header_overhead); + tlsf_assert(!block_is_last(block)); + return next; +} + +/* Link a new block with its physical neighbor, return the neighbor. */ +static block_header_t* block_link_next(block_header_t* block) +{ + block_header_t* next = block_next(block); + next->prev_phys_block = block; + return next; +} + +static void block_mark_as_free(block_header_t* block) +{ + /* Link the block to the next block, first. */ + block_header_t* next = block_link_next(block); + block_set_prev_free(next); + block_set_free(block); +} + +static void block_mark_as_used(block_header_t* block) +{ + block_header_t* next = block_next(block); + block_set_prev_used(next); + block_set_used(block); +} + +static size_t align_up(size_t x, size_t align) +{ + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return (x + (align - 1)) & ~(align - 1); +} + +static size_t align_down(size_t x, size_t align) +{ + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return x - (x & (align - 1)); +} + +static void* align_ptr(const void* ptr, size_t align) +{ + const tlsfptr_t aligned = + (tlsf_cast(tlsfptr_t, ptr) + (align - 1)) & ~(align - 1); + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return tlsf_cast(void*, aligned); +} + +/* +** Adjust an allocation size to be aligned to word size, and no smaller +** than internal minimum. +*/ +static size_t adjust_request_size(size_t size, size_t align) +{ + size_t adjust = 0; + if (size) + { + const size_t aligned = align_up(size, align); + + /* aligned sized must not exceed block_size_max or we'll go out of bounds on sl_bitmap */ + if (aligned < block_size_max) + { + adjust = tlsf_max(aligned, block_size_min); + } + } + return adjust; +} + +/* +** TLSF utility functions. In most cases, these are direct translations of +** the documentation found in the white paper. +*/ + +static void mapping_insert(size_t size, int* fli, int* sli) +{ + int fl, sl; + if (size < SMALL_BLOCK_SIZE) + { + /* Store small blocks in first list. */ + fl = 0; + sl = tlsf_cast(int, size) / (SMALL_BLOCK_SIZE / SL_INDEX_COUNT); + } + else + { + fl = tlsf_fls_sizet(size); + sl = tlsf_cast(int, size >> (fl - SL_INDEX_COUNT_LOG2)) ^ (1 << SL_INDEX_COUNT_LOG2); + fl -= (FL_INDEX_SHIFT - 1); + } + *fli = fl; + *sli = sl; +} + +/* This version rounds up to the next block size (for allocations) */ +static void mapping_search(size_t size, int* fli, int* sli) +{ + if (size >= SMALL_BLOCK_SIZE) + { + const size_t round = (1 << (tlsf_fls_sizet(size) - SL_INDEX_COUNT_LOG2)) - 1; + size += round; + } + mapping_insert(size, fli, sli); +} + +static block_header_t* search_suitable_block(control_t* control, int* fli, int* sli) +{ + int fl = *fli; + int sl = *sli; + + /* + ** First, search for a block in the list associated with the given + ** fl/sl index. + */ + unsigned int sl_map = control->sl_bitmap[fl] & (~0U << sl); + if (!sl_map) + { + /* No block exists. Search in the next largest first-level list. */ + const unsigned int fl_map = control->fl_bitmap & (~0U << (fl + 1)); + if (!fl_map) + { + /* No free blocks available, memory has been exhausted. */ + return 0; + } + + fl = tlsf_ffs(fl_map); + *fli = fl; + sl_map = control->sl_bitmap[fl]; + } + tlsf_assert(sl_map && "internal error - second level bitmap is null"); + sl = tlsf_ffs(sl_map); + *sli = sl; + + /* Return the first block in the free list. */ + return control->blocks[fl][sl]; +} + +/* Remove a free block from the free list.*/ +static void remove_free_block(control_t* control, block_header_t* block, int fl, int sl) +{ + block_header_t* prev = block->prev_free; + block_header_t* next = block->next_free; + tlsf_assert(prev && "prev_free field can not be null"); + tlsf_assert(next && "next_free field can not be null"); + next->prev_free = prev; + prev->next_free = next; + + /* If this block is the head of the free list, set new head. */ + if (control->blocks[fl][sl] == block) + { + control->blocks[fl][sl] = next; + + /* If the new head is null, clear the bitmap. */ + if (next == &control->block_null) + { + control->sl_bitmap[fl] &= ~(1U << sl); + + /* If the second bitmap is now empty, clear the fl bitmap. */ + if (!control->sl_bitmap[fl]) + { + control->fl_bitmap &= ~(1U << fl); + } + } + } +} + +/* Insert a free block into the free block list. */ +static void insert_free_block(control_t* control, block_header_t* block, int fl, int sl) +{ + block_header_t* current = control->blocks[fl][sl]; + tlsf_assert(current && "free list cannot have a null entry"); + tlsf_assert(block && "cannot insert a null entry into the free list"); + block->next_free = current; + block->prev_free = &control->block_null; + current->prev_free = block; + + tlsf_assert(block_to_ptr(block) == align_ptr(block_to_ptr(block), ALIGN_SIZE) + && "block not aligned properly"); + /* + ** Insert the new block at the head of the list, and mark the first- + ** and second-level bitmaps appropriately. + */ + control->blocks[fl][sl] = block; + control->fl_bitmap |= (1U << fl); + control->sl_bitmap[fl] |= (1U << sl); +} + +/* Remove a given block from the free list. */ +static void block_remove(control_t* control, block_header_t* block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + remove_free_block(control, block, fl, sl); +} + +/* Insert a given block into the free list. */ +static void block_insert(control_t* control, block_header_t* block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + insert_free_block(control, block, fl, sl); +} + +static int block_can_split(block_header_t* block, size_t size) +{ + return block_size(block) >= sizeof(block_header_t) + size; +} + +/* Split a block into two, the second of which is free. */ +static block_header_t* block_split(block_header_t* block, size_t size) +{ + /* Calculate the amount of space left in the remaining block. */ + block_header_t* remaining = + offset_to_block(block_to_ptr(block), size - block_header_overhead); + + const size_t remain_size = block_size(block) - (size + block_header_overhead); + + tlsf_assert(block_to_ptr(remaining) == align_ptr(block_to_ptr(remaining), ALIGN_SIZE) + && "remaining block not aligned properly"); + + tlsf_assert(block_size(block) == remain_size + size + block_header_overhead); + block_set_size(remaining, remain_size); + tlsf_assert(block_size(remaining) >= block_size_min && "block split with invalid size"); + + block_set_size(block, size); + block_mark_as_free(remaining); + + return remaining; +} + +/* Absorb a free block's storage into an adjacent previous free block. */ +static block_header_t* block_absorb(block_header_t* prev, block_header_t* block) +{ + tlsf_assert(!block_is_last(prev) && "previous block can't be last"); + /* Note: Leaves flags untouched. */ + prev->size += block_size(block) + block_header_overhead; + block_link_next(prev); + return prev; +} + +/* Merge a just-freed block with an adjacent previous free block. */ +static block_header_t* block_merge_prev(control_t* control, block_header_t* block) +{ + if (block_is_prev_free(block)) + { + block_header_t* prev = block_prev(block); + tlsf_assert(prev && "prev physical block can't be null"); + tlsf_assert(block_is_free(prev) && "prev block is not free though marked as such"); + block_remove(control, prev); + block = block_absorb(prev, block); + } + + return block; +} + +/* Merge a just-freed block with an adjacent free block. */ +static block_header_t* block_merge_next(control_t* control, block_header_t* block) +{ + block_header_t* next = block_next(block); + tlsf_assert(next && "next physical block can't be null"); + + if (block_is_free(next)) + { + tlsf_assert(!block_is_last(block) && "previous block can't be last"); + block_remove(control, next); + block = block_absorb(block, next); + } + + return block; +} + +/* Trim any trailing block space off the end of a block, return to pool. */ +static void block_trim_free(control_t* control, block_header_t* block, size_t size) +{ + tlsf_assert(block_is_free(block) && "block must be free"); + if (block_can_split(block, size)) + { + block_header_t* remaining_block = block_split(block, size); + block_link_next(block); + block_set_prev_free(remaining_block); + block_insert(control, remaining_block); + } +} + +/* Trim any trailing block space off the end of a used block, return to pool. */ +static void block_trim_used(control_t* control, block_header_t* block, size_t size) +{ + tlsf_assert(!block_is_free(block) && "block must be used"); + if (block_can_split(block, size)) + { + /* If the next block is free, we must coalesce. */ + block_header_t* remaining_block = block_split(block, size); + block_set_prev_used(remaining_block); + + remaining_block = block_merge_next(control, remaining_block); + block_insert(control, remaining_block); + } +} + +static block_header_t* block_trim_free_leading(control_t* control, block_header_t* block, size_t size) +{ + block_header_t* remaining_block = block; + if (block_can_split(block, size)) + { + /* We want the 2nd block. */ + remaining_block = block_split(block, size - block_header_overhead); + block_set_prev_free(remaining_block); + + block_link_next(block); + block_insert(control, block); + } + + return remaining_block; +} + +static block_header_t* block_locate_free(control_t* control, size_t size) +{ + int fl = 0, sl = 0; + block_header_t* block = 0; + + if (size) + { + mapping_search(size, &fl, &sl); + + /* + ** mapping_search can futz with the size, so for excessively large sizes it can sometimes wind up + ** with indices that are off the end of the block array. + ** So, we protect against that here, since this is the only callsite of mapping_search. + ** Note that we don't need to check sl, since it comes from a modulo operation that guarantees it's always in range. + */ + if (fl < FL_INDEX_COUNT) + { + block = search_suitable_block(control, &fl, &sl); + } + } + + if (block) + { + tlsf_assert(block_size(block) >= size); + remove_free_block(control, block, fl, sl); + } + + return block; +} + +static void* block_prepare_used(control_t* control, block_header_t* block, size_t size) +{ + void* p = 0; + if (block) + { + tlsf_assert(size && "size must be non-zero"); + block_trim_free(control, block, size); + block_mark_as_used(block); + p = block_to_ptr(block); + } + return p; +} + +/* Clear structure and point all empty lists at the null block. */ +static void control_construct(control_t* control) +{ + int i, j; + + control->block_null.next_free = &control->block_null; + control->block_null.prev_free = &control->block_null; + + control->fl_bitmap = 0; + for (i = 0; i < FL_INDEX_COUNT; ++i) + { + control->sl_bitmap[i] = 0; + for (j = 0; j < SL_INDEX_COUNT; ++j) + { + control->blocks[i][j] = &control->block_null; + } + } +} + +/* +** Debugging utilities. +*/ + +typedef struct integrity_t +{ + int prev_status; + int status; +} integrity_t; + +#define tlsf_insist(x) { tlsf_assert(x); if (!(x)) { status--; } } + +static void integrity_walker(void* ptr, size_t size, int used, void* user) +{ + block_header_t* block = block_from_ptr(ptr); + integrity_t* integ = tlsf_cast(integrity_t*, user); + const int this_prev_status = block_is_prev_free(block) ? 1 : 0; + const int this_status = block_is_free(block) ? 1 : 0; + const size_t this_block_size = block_size(block); + + int status = 0; + (void)used; + tlsf_insist(integ->prev_status == this_prev_status && "prev status incorrect"); + tlsf_insist(size == this_block_size && "block size incorrect"); + + integ->prev_status = this_status; + integ->status += status; +} + +int tlsf_check(tlsf_t tlsf) +{ + int i, j; + + control_t* control = tlsf_cast(control_t*, tlsf); + int status = 0; + + /* Check that the free lists and bitmaps are accurate. */ + for (i = 0; i < FL_INDEX_COUNT; ++i) + { + for (j = 0; j < SL_INDEX_COUNT; ++j) + { + const int fl_map = control->fl_bitmap & (1U << i); + const int sl_list = control->sl_bitmap[i]; + const int sl_map = sl_list & (1U << j); + const block_header_t* block = control->blocks[i][j]; + + /* Check that first- and second-level lists agree. */ + if (!fl_map) + { + tlsf_insist(!sl_map && "second-level map must be null"); + } + + if (!sl_map) + { + tlsf_insist(block == &control->block_null && "block list must be null"); + continue; + } + + /* Check that there is at least one free block. */ + tlsf_insist(sl_list && "no free blocks in second-level map"); + tlsf_insist(block != &control->block_null && "block should not be null"); + + while (block != &control->block_null) + { + int fli, sli; + tlsf_insist(block_is_free(block) && "block should be free"); + tlsf_insist(!block_is_prev_free(block) && "blocks should have coalesced"); + tlsf_insist(!block_is_free(block_next(block)) && "blocks should have coalesced"); + tlsf_insist(block_is_prev_free(block_next(block)) && "block should be free"); + tlsf_insist(block_size(block) >= block_size_min && "block not minimum size"); + + mapping_insert(block_size(block), &fli, &sli); + tlsf_insist(fli == i && sli == j && "block size indexed in wrong list"); + block = block->next_free; + } + } + } + + return status; +} + +#undef tlsf_insist + +static void default_walker(void* ptr, size_t size, int used, void* user) +{ + (void)user; + printf("\t%p %s size: %x (%p)\n", ptr, used ? "used" : "free", (unsigned int)size, block_from_ptr(ptr)); +} + +void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user) +{ + tlsf_walker pool_walker = walker ? walker : default_walker; + block_header_t* block = + offset_to_block(pool, -(int)block_header_overhead); + + while (block && !block_is_last(block)) + { + pool_walker( + block_to_ptr(block), + block_size(block), + !block_is_free(block), + user); + block = block_next(block); + } +} + +size_t tlsf_block_size(void* ptr) +{ + size_t size = 0; + if (ptr) + { + const block_header_t* block = block_from_ptr(ptr); + size = block_size(block); + } + return size; +} + +int tlsf_check_pool(pool_t pool) +{ + /* Check that the blocks are physically correct. */ + integrity_t integ = { 0, 0 }; + tlsf_walk_pool(pool, integrity_walker, &integ); + + return integ.status; +} + +/* +** Size of the TLSF structures in a given memory block passed to +** tlsf_create, equal to the size of a control_t +*/ +size_t tlsf_size(void) +{ + return sizeof(control_t); +} + +size_t tlsf_align_size(void) +{ + return ALIGN_SIZE; +} + +size_t tlsf_block_size_min(void) +{ + return block_size_min; +} + +size_t tlsf_block_size_max(void) +{ + return block_size_max; +} + +/* +** Overhead of the TLSF structures in a given memory block passed to +** tlsf_add_pool, equal to the overhead of a free block and the +** sentinel block. +*/ +size_t tlsf_pool_overhead(void) +{ + return 2 * block_header_overhead; +} + +size_t tlsf_alloc_overhead(void) +{ + return block_header_overhead; +} + +pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes) +{ + block_header_t* block; + block_header_t* next; + + const size_t pool_overhead = tlsf_pool_overhead(); + const size_t pool_bytes = align_down(bytes - pool_overhead, ALIGN_SIZE); + + if (((ptrdiff_t)mem % ALIGN_SIZE) != 0) + { + printf("tlsf_add_pool: Memory must be aligned by %u bytes.\n", + (unsigned int)ALIGN_SIZE); + return 0; + } + + if (pool_bytes < block_size_min || pool_bytes > block_size_max) + { +#if defined (TLSF_64BIT) + printf("tlsf_add_pool: Memory size must be between 0x%x and 0x%x00 bytes.\n", + (unsigned int)(pool_overhead + block_size_min), + (unsigned int)((pool_overhead + block_size_max) / 256)); +#else + printf("tlsf_add_pool: Memory size must be between %u and %u bytes.\n", + (unsigned int)(pool_overhead + block_size_min), + (unsigned int)(pool_overhead + block_size_max)); +#endif + return 0; + } + + /* + ** Create the main free block. Offset the start of the block slightly + ** so that the prev_phys_block field falls outside of the pool - + ** it will never be used. + */ + block = offset_to_block(mem, -(tlsfptr_t)block_header_overhead); + block_set_size(block, pool_bytes); + block_set_free(block); + block_set_prev_used(block); + block_insert(tlsf_cast(control_t*, tlsf), block); + + /* Split the block to create a zero-size sentinel block. */ + next = block_link_next(block); + block_set_size(next, 0); + block_set_used(next); + block_set_prev_free(next); + + return mem; +} + +void tlsf_remove_pool(tlsf_t tlsf, pool_t pool) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + block_header_t* block = offset_to_block(pool, -(int)block_header_overhead); + + int fl = 0, sl = 0; + + tlsf_assert(block_is_free(block) && "block should be free"); + tlsf_assert(!block_is_free(block_next(block)) && "next block should not be free"); + tlsf_assert(block_size(block_next(block)) == 0 && "next block size should be zero"); + + mapping_insert(block_size(block), &fl, &sl); + remove_free_block(control, block, fl, sl); +} + +/* +** TLSF main interface. +*/ + +#if _DEBUG +int test_ffs_fls() +{ + /* Verify ffs/fls work properly. */ + int rv = 0; + rv += (tlsf_ffs(0) == -1) ? 0 : 0x1; + rv += (tlsf_fls(0) == -1) ? 0 : 0x2; + rv += (tlsf_ffs(1) == 0) ? 0 : 0x4; + rv += (tlsf_fls(1) == 0) ? 0 : 0x8; + rv += (tlsf_ffs(0x80000000) == 31) ? 0 : 0x10; + rv += (tlsf_ffs(0x80008000) == 15) ? 0 : 0x20; + rv += (tlsf_fls(0x80000008) == 31) ? 0 : 0x40; + rv += (tlsf_fls(0x7FFFFFFF) == 30) ? 0 : 0x80; + +#if defined (TLSF_64BIT) + rv += (tlsf_fls_sizet(0x80000000) == 31) ? 0 : 0x100; + rv += (tlsf_fls_sizet(0x100000000) == 32) ? 0 : 0x200; + rv += (tlsf_fls_sizet(0xffffffffffffffff) == 63) ? 0 : 0x400; +#endif + + if (rv) + { + printf("test_ffs_fls: %x ffs/fls tests failed.\n", rv); + } + return rv; +} +#endif + +tlsf_t tlsf_create(void* mem) +{ +#if _DEBUG + if (test_ffs_fls()) + { + return 0; + } +#endif + + if (((tlsfptr_t)mem % ALIGN_SIZE) != 0) + { + printf("tlsf_create: Memory must be aligned to %u bytes.\n", + (unsigned int)ALIGN_SIZE); + return 0; + } + + control_construct(tlsf_cast(control_t*, mem)); + + return tlsf_cast(tlsf_t, mem); +} + +tlsf_t tlsf_create_with_pool(void* mem, size_t bytes) +{ + tlsf_t tlsf = tlsf_create(mem); + tlsf_add_pool(tlsf, (char*)mem + tlsf_size(), bytes - tlsf_size()); + return tlsf; +} + +void tlsf_destroy(tlsf_t tlsf) +{ + /* Nothing to do. */ + (void)tlsf; +} + +pool_t tlsf_get_pool(tlsf_t tlsf) +{ + return tlsf_cast(pool_t, (char*)tlsf + tlsf_size()); +} + +void* tlsf_malloc(tlsf_t tlsf, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + block_header_t* block = block_locate_free(control, adjust); + return block_prepare_used(control, block, adjust); +} + +void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + + /* + ** We must allocate an additional minimum block size bytes so that if + ** our free block will leave an alignment gap which is smaller, we can + ** trim a leading free block and release it back to the pool. We must + ** do this because the previous physical block is in use, therefore + ** the prev_phys_block field is not valid, and we can't simply adjust + ** the size of that block. + */ + const size_t gap_minimum = sizeof(block_header_t); + const size_t size_with_gap = adjust_request_size(adjust + align + gap_minimum, align); + + /* + ** If alignment is less than or equals base alignment, we're done. + ** If we requested 0 bytes, return null, as tlsf_malloc(0) does. + */ + const size_t aligned_size = (adjust && align > ALIGN_SIZE) ? size_with_gap : adjust; + + block_header_t* block = block_locate_free(control, aligned_size); + + /* This can't be a static assert. */ + tlsf_assert(sizeof(block_header_t) == block_size_min + block_header_overhead); + + if (block) + { + void* ptr = block_to_ptr(block); + void* aligned = align_ptr(ptr, align); + size_t gap = tlsf_cast(size_t, + tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr)); + + /* If gap size is too small, offset to next aligned boundary. */ + if (gap && gap < gap_minimum) + { + const size_t gap_remain = gap_minimum - gap; + const size_t offset = tlsf_max(gap_remain, align); + const void* next_aligned = tlsf_cast(void*, + tlsf_cast(tlsfptr_t, aligned) + offset); + + aligned = align_ptr(next_aligned, align); + gap = tlsf_cast(size_t, + tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr)); + } + + if (gap) + { + tlsf_assert(gap >= gap_minimum && "gap size too small"); + block = block_trim_free_leading(control, block, gap); + } + } + + return block_prepare_used(control, block, adjust); +} + +void tlsf_free(tlsf_t tlsf, void* ptr) +{ + /* Don't attempt to free a NULL pointer. */ + if (ptr) + { + control_t* control = tlsf_cast(control_t*, tlsf); + block_header_t* block = block_from_ptr(ptr); + tlsf_assert(!block_is_free(block) && "block already marked as free"); + block_mark_as_free(block); + block = block_merge_prev(control, block); + block = block_merge_next(control, block); + block_insert(control, block); + } +} + +/* +** The TLSF block information provides us with enough information to +** provide a reasonably intelligent implementation of realloc, growing or +** shrinking the currently allocated block as required. +** +** This routine handles the somewhat esoteric edge cases of realloc: +** - a non-zero size with a null pointer will behave like malloc +** - a zero size with a non-null pointer will behave like free +** - a request that cannot be satisfied will leave the original buffer +** untouched +** - an extended buffer size will leave the newly-allocated area with +** contents undefined +*/ +void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + void* p = 0; + + /* Zero-size requests are treated as free. */ + if (ptr && size == 0) + { + tlsf_free(tlsf, ptr); + } + /* Requests with NULL pointers are treated as malloc. */ + else if (!ptr) + { + p = tlsf_malloc(tlsf, size); + } + else + { + block_header_t* block = block_from_ptr(ptr); + block_header_t* next = block_next(block); + + const size_t cursize = block_size(block); + const size_t combined = cursize + block_size(next) + block_header_overhead; + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + + tlsf_assert(!block_is_free(block) && "block already marked as free"); + + /* + ** If the next block is used, or when combined with the current + ** block, does not offer enough space, we must reallocate and copy. + */ + if (adjust > cursize && (!block_is_free(next) || adjust > combined)) + { + p = tlsf_malloc(tlsf, size); + if (p) + { + const size_t minsize = tlsf_min(cursize, size); + memcpy(p, ptr, minsize); + tlsf_free(tlsf, ptr); + } + } + else + { + /* Do we need to expand to the next block? */ + if (adjust > cursize) + { + block_merge_next(control, block); + block_mark_as_used(block); + } + + /* Trim the resulting block and return the original pointer. */ + block_trim_used(control, block, adjust); + p = ptr; + } + } + + return p; +} diff --git a/deps/lightrec/tlsf/tlsf.h b/deps/lightrec/tlsf/tlsf.h new file mode 100644 index 00000000..e9b5a91c --- /dev/null +++ b/deps/lightrec/tlsf/tlsf.h @@ -0,0 +1,90 @@ +#ifndef INCLUDED_tlsf +#define INCLUDED_tlsf + +/* +** Two Level Segregated Fit memory allocator, version 3.1. +** Written by Matthew Conte +** http://tlsf.baisoku.org +** +** Based on the original documentation by Miguel Masmano: +** http://www.gii.upv.es/tlsf/main/docs +** +** This implementation was written to the specification +** of the document, therefore no GPL restrictions apply. +** +** Copyright (c) 2006-2016, Matthew Conte +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions are met: +** * Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** * Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** * Neither the name of the copyright holder nor the +** names of its contributors may be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL MATTHEW CONTE BE LIABLE FOR ANY +** DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +/* tlsf_t: a TLSF structure. Can contain 1 to N pools. */ +/* pool_t: a block of memory that TLSF can manage. */ +typedef void* tlsf_t; +typedef void* pool_t; + +/* Create/destroy a memory pool. */ +tlsf_t tlsf_create(void* mem); +tlsf_t tlsf_create_with_pool(void* mem, size_t bytes); +void tlsf_destroy(tlsf_t tlsf); +pool_t tlsf_get_pool(tlsf_t tlsf); + +/* Add/remove memory pools. */ +pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes); +void tlsf_remove_pool(tlsf_t tlsf, pool_t pool); + +/* malloc/memalign/realloc/free replacements. */ +void* tlsf_malloc(tlsf_t tlsf, size_t bytes); +void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t bytes); +void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size); +void tlsf_free(tlsf_t tlsf, void* ptr); + +/* Returns internal block size, not original request size */ +size_t tlsf_block_size(void* ptr); + +/* Overheads/limits of internal structures. */ +size_t tlsf_size(void); +size_t tlsf_align_size(void); +size_t tlsf_block_size_min(void); +size_t tlsf_block_size_max(void); +size_t tlsf_pool_overhead(void); +size_t tlsf_alloc_overhead(void); + +/* Debugging. */ +typedef void (*tlsf_walker)(void* ptr, size_t size, int used, void* user); +void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user); +/* Returns nonzero if any internal consistency check fails. */ +int tlsf_check(tlsf_t tlsf); +int tlsf_check_pool(pool_t pool); + +#if defined(__cplusplus) +}; +#endif + +#endif diff --git a/include/lightning/lightning.h b/include/lightning/lightning.h index a155b8a8..e7afc5c2 100644 --- a/include/lightning/lightning.h +++ b/include/lightning/lightning.h @@ -123,6 +123,11 @@ typedef jit_int32_t jit_bool_t; typedef jit_int32_t jit_gpr_t; typedef jit_int32_t jit_fpr_t; +#if !defined(__powerpc__) && \ + (defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__)) +#define __powerpc__ 1 +#endif + #if defined(__i386__) || defined(__x86_64__) # include #elif defined(__mips__) @@ -305,10 +310,6 @@ typedef enum { #define jit_comr(u,v) jit_new_node_ww(jit_code_comr,u,v) jit_code_negr, jit_code_comr, -#define jit_ffsr(u,v) jit_new_node_ww(jit_code_ffsr,u,v) -#define jit_clzr(u,v) jit_new_node_ww(jit_code_clzr,u,v) - jit_code_ffsr, jit_code_clzr, - #define jit_ltr(u,v,w) jit_new_node_www(jit_code_ltr,u,v,w) #define jit_lti(u,v,w) jit_new_node_www(jit_code_lti,u,v,w) jit_code_ltr, jit_code_lti, @@ -343,9 +344,11 @@ typedef enum { #define jit_movr(u,v) jit_new_node_ww(jit_code_movr,u,v) #define jit_movi(u,v) jit_new_node_ww(jit_code_movi,u,v) jit_code_movr, jit_code_movi, + #define jit_movnr(u,v,w) jit_new_node_www(jit_code_movnr,u,v,w) #define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w) jit_code_movnr, jit_code_movzr, + #define jit_extr_c(u,v) jit_new_node_ww(jit_code_extr_c,u,v) #define jit_extr_uc(u,v) jit_new_node_ww(jit_code_extr_uc,u,v) jit_code_extr_c, jit_code_extr_uc, @@ -898,6 +901,18 @@ typedef enum { #define jit_movr_d_w(u, v) jit_new_node_ww(jit_code_movr_d_w, u, v) #define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v) +#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v) + jit_code_bswapr_us, +#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) + jit_code_bswapr_ui, +#define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) + jit_code_bswapr_ul, +#if __WORDSIZE == 32 +#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) +#else +#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) +#endif + jit_code_last_code } jit_code_t; diff --git a/include/lightrec/lightrec-config.h b/include/lightrec/lightrec-config.h index bbb2329c..34ac7a6e 100644 --- a/include/lightrec/lightrec-config.h +++ b/include/lightrec/lightrec-config.h @@ -10,6 +10,7 @@ #define ENABLE_FIRST_PASS 1 #define ENABLE_DISASSEMBLER 0 #define ENABLE_TINYMM 0 +#define ENABLE_CODE_BUFFER 0 #define HAS_DEFAULT_ELM 1