endif
LIGHTREC_CUSTOM_MAP ?= 0
+CFLAGS += -DLIGHTREC_CUSTOM_MAP=$(LIGHTREC_CUSTOM_MAP)
# core
OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore/database.o \
# dynarec
ifeq "$(DYNAREC)" "lightrec"
CFLAGS += -Ideps/lightning/include -Ideps/lightrec -Iinclude/lightning -Iinclude/lightrec \
- -DLIGHTREC -DLIGHTREC_STATIC \
- -DLIGHTREC_CUSTOM_MAP=$(LIGHTREC_CUSTOM_MAP)
-LDLIBS += -lrt
+ -DLIGHTREC -DLIGHTREC_STATIC -DHAVE_MMAP
ifeq ($(LIGHTREC_CUSTOM_MAP),1)
+LDLIBS += -lrt
OBJS += libpcsxcore/lightrec/mem.o
endif
OBJS += libpcsxcore/lightrec/plugin.o
+*
+
+*.o
+*.lo
+*.la
+
+.libs/
+.deps/
+*/.libs/
+*/.deps/
+
autom4te.cache
aclocal.m4
depcomp
size
stamp-h1
test-driver
-check/.deps
-doc/.deps
-lib/.deps
+
m4/libtool.m4
m4/lt~obsolete.m4
m4/ltoptions.m4
m4/ltsugar.m4
m4/ltversion.m4
-doc/mdate-sh
-doc/texinfo.tex
+
lightning.pc
+include/lightning.h
+
+build-aux/
[subrepo]
remote = https://github.com/pcercuei/gnu_lightning.git
branch = pcsx_rearmed
- commit = 6f101bf8eccef737d60bf7e6ba85558db49e7908
- parent = 02dbc8694f303728f19734328166a1c6dfef289c
+ commit = 2a199e4d3cb250a76bd91f42eaf56f6233d34663
+ parent = db4140baf19c727fa1a705236130edfc6f363ce0
method = merge
cmdver = 0.4.3
+2022-05-14 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning.h.in: Reorder jit_mov{n,z}r in instruction list.
+ * lib/jit_alpha.c, lib/jit_alpha-cpu.c, lib/jit_hppa.c,
+ lib/jit_hppa-cpu.c, lib/jit_ia64.c, lib/jit_ia64-cpu.c,
+ lib/jit_riscv.c, lib/jit_riscv-cpu.c, lib/jit_s390.c,
+ lib/jit_s390-cpu.c, lib/jit_sparc.c, lib/jit_sparc-cpu.c:
+ Implement fallback jit_mov{n,z}r. These are a somewhat cheap
+ implementation, but should be reviewed for the arches that already
+ have a proper conditional move.
+ * lib/jit_arm-sz.c, lib/jit_mips-sz.c: Add missing maximum size
+ estimative and reorder.
+ * lib/jit_aarch64-sz.c, lib/jit_x86-sz.c, lib/jit_ppc-sz.c:
+ Reorder entry to match definition order.
+ * lib/jit_aarch64-sz.c, lib/jit_alpha-sz.c, lib/jit_hppa-sz.c,
+ lib/jit_ia64-sz.c, lib/jit_riscv-sz.c, lib/jit_s390-sz.c,
+ lib/jit_sparc-sz.c: Add heuristic value, basically the sum of
+ the cost of a movr + beqr.
+ * lib/jit_names.c: Add entries for debug output of mov{n,z}r.
+ * lib/lightning.c: Use proper bitmask in jit_classify.
+
2021-04-03 Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>
* check/Makefile.am: Add test for the live instruction.
GNU lightning is a library to aid in making portable programs
that compile assembly code at run time. For more information,
look at the info documentation.
+
+For help building lightning, see README-hacking.
** Building
+If you intend to do development work with lightning, it's useful to build
+lightning with its disassembler feature enabled. This optional feature
+requires additional dependencies. On Ubuntu, this command should work:
+
+ $ sudo apt-get install binutils-dev libiberty-dev zlib1g-dev
+
After getting the git sources, and installing the tools above, you can run
$ ./bootstrap
After that first time, running make should suffice.
+To install lightning:
+
+ $ sudo make install
+
** Gnulib
This distribution also uses Gnulib (https://www.gnu.org/software/gnulib) to
--- /dev/null
+*.nodata
+nodata
+*.log
+*.trs
+
+3to2
+bswap
+add
+align
+allocai
+allocar
+alu_add
+alu_and
+alu_com
+alu_div
+alu_lsh
+alu_mul
+alu_neg
+alu_or
+alu_rem
+alu_rsb
+alu_rsh
+alu_sub
+alu_xor
+alux_add
+alux_sub
+bp
+branch
+call
+carg
+carry
+ccall
+clobber
+ctramp
+cva_list
+cvt
+divi
+fib
+float
+fop_abs
+fop_sqrt
+hton
+jmpr
+ldsti
+ldstr
+ldstr-c
+ldstxi
+ldstxi-c
+ldstxr
+ldstxr-c
+lightning
+live
+put
+qalu_div
+qalu_mul
+range
+ranger
+ret
+rpn
+self
+setcode
+stack
+tramp
+va_list
+varargs
ldstxi-c.tst ldstxi-c.ok \
cvt.tst cvt.ok \
hton.tst hton.ok \
+ bswap.tst bswap.ok \
branch.tst branch.ok \
alu.inc \
alu_add.tst alu_add.ok \
ldstr ldsti \
ldstxr ldstxi \
ldstr-c ldstxr-c ldstxi-c \
- cvt hton branch \
+ cvt hton bswap branch \
alu_add alux_add \
alu_sub alux_sub alu_rsb \
alu_mul alu_div alu_rem \
rpn.arm ldstr.arm ldsti.arm \
ldstxr.arm ldstxi.arm \
ldstr-c.arm ldstxr-c.arm ldstxi-c.arm \
- cvt.arm hton.arm branch.arm \
+ cvt.arm hton.arm bswap.arm branch.arm \
alu_add.arm alux_add.arm \
alu_sub.arm alux_sub.arm alu_rsb.arm \
alu_mul.arm alu_div.arm alu_rem.arm \
rpn.swf ldstr.swf ldsti.swf \
ldstxr.swf ldstxi.swf \
ldstr-c.swf ldstxr-c.swf ldstxi-c.swf \
- cvt.swf hton.swf branch.swf \
+ cvt.swf hton.swf bswap.swf branch.swf \
alu_add.swf alux_add.swf \
alu_sub.swf alux_sub.swf alu_rsb.swf \
alu_mul.swf alu_div.swf alu_rem.swf \
rpn.arm.swf ldstr.arm.swf ldsti.arm.swf \
ldstxr.arm.swf ldstxi.arm.swf \
ldstr-c.arm.swf ldstxr-c.arm.swf ldstxi-c.arm.swf \
- cvt.arm.swf hton.arm.swf branch.arm.swf \
+ cvt.arm.swf hton.arm.swf bswap.arm.swf branch.arm.swf \
alu_add.arm.swf alux_add.arm.swf \
alu_sub.arm.swf alux_sub.arm.swf alu_rsb.arm.swf \
alu_mul.arm.swf alu_div.arm.swf alu_rem.arm.swf \
rpn.arm4.swf ldstr.arm4.swf ldsti.arm4.swf \
ldstxr.arm4.swf ldstxi.arm4.swf \
ldstr-c.arm4.swf ldstxr-c.arm4.swf ldstxi-c.arm4.swf \
- cvt.arm4.swf hton.arm4.swf branch.arm4.swf \
- alu_add.arm4.swf alux_add.arm4.swf \
+ cvt.arm4.swf hton.arm4.swf bswap.arm4.swf \
+ branch.arm4.swf alu_add.arm4.swf alux_add.arm4.swf \
alu_sub.arm4.swf alux_sub.arm4.swf alu_rsb.arm4.swf \
alu_mul.arm4.swf alu_div.arm4.swf alu_rem.arm4.swf \
alu_and.arm4.swf alu_or.arm4.swf alu_xor.arm4.swf \
--- /dev/null
+.data 16
+ok:
+.c "ok\n"
+
+#define us12_i 0x1234
+#define us7f_i 0x7ff7
+#define us80_i 0x8008
+#define usff_i 0xffff
+#define ui12_i 0x01234567
+#define ui7f_i 0x7f7ff7f7
+#define ui80_i 0x80800808
+#define uiff_i 0xffffffff
+#define ul12_i 0x0123456789abcdef
+#define ul7f_i 0x7f7f7f7ff7f7f7f7
+#define ul80_i 0x8080808008080808
+#define ulff_i 0xffffffffffffffff
+
+#if __WORDSIZE == 32
+# define xus12_i 0xffff1234
+# define xus7f_i 0x10107ff7
+# define xus80_i 0x81188008
+# define xusff_i 0xeaaeffff
+#else
+# define xus12_i 0xffffffffffff1234
+# define xus7f_i 0x1010100101017ff7
+# define xus80_i 0x8181811818818008
+# define xusff_i 0xeaeaeaaeaeaeffff
+# define xui12_i 0xffffffff01234567
+# define xui7f_i 0x101001017f7ff7f7
+# define xui80_i 0x8181181880800808
+# define xuiff_i 0xeaeaaeaeffffffff
+#endif
+
+# define us12_o 0x3412
+# define us7f_o 0xf77f
+# define us80_o 0x0880
+# define usff_o 0xffff
+# define ui12_o 0x67452301
+# define ui7f_o 0xf7f77f7f
+# define ui80_o 0x08088080
+# define uiff_o 0xffffffff
+# define ul12_o 0xefcdab8967452301
+# define ul7f_o 0xf7f7f7f77f7f7f7f
+# define ul80_o 0x0808080880808080
+# define ulff_o 0xffffffffffffffff
+
+#define BSWAP4(I, O, T, R0, R1) \
+ movi %R0 I \
+ bswapr_##T %R1 %R0 \
+ beqi T##R0##R1##I %R1 O \
+ calli @abort \
+T##R0##R1##I:
+
+#define BSWAP3(T, R0, R1) \
+ BSWAP4(T##12_i, T##12_o, T, R0, R1) \
+ BSWAP4(x##T##12_i, T##12_o, T, R0, R1) \
+ BSWAP4(T##7f_i, T##7f_o, T, R0, R1) \
+ BSWAP4(x##T##7f_i, T##7f_o, T, R0, R1) \
+ BSWAP4(T##80_i, T##80_o, T, R0, R1) \
+ BSWAP4(x##T##80_i, T##80_o, T, R0, R1) \
+ BSWAP4(T##ff_i, T##ff_o, T, R0, R1) \
+ BSWAP4(x##T##ff_i, T##ff_o, T, R0, R1)
+
+#define BSWAP3x(T, R0, R1) \
+ BSWAP4(T##12_i, T##12_o, T, R0, R1) \
+ BSWAP4(T##7f_i, T##7f_o, T, R0, R1) \
+ BSWAP4(T##80_i, T##80_o, T, R0, R1) \
+ BSWAP4(T##ff_i, T##ff_o, T, R0, R1)
+
+#define BSWAP2(T, V0, V1, V2, R0, R1, R2) \
+ BSWAP3(T, V0, V0) \
+ BSWAP3(T, V0, V1) \
+ BSWAP3(T, V0, V2) \
+ BSWAP3(T, V0, R0) \
+ BSWAP3(T, V0, R1) \
+ BSWAP3(T, V0, R2) \
+
+#define BSWAP2x(T, V0, V1, V2, R0, R1, R2) \
+ BSWAP3x(T, V0, V0) \
+ BSWAP3x(T, V0, V1) \
+ BSWAP3x(T, V0, V2) \
+ BSWAP3x(T, V0, R0) \
+ BSWAP3x(T, V0, R1) \
+ BSWAP3x(T, V0, R2) \
+
+#define BSWAP1(T, V0, V1, V2, R0, R1, R2) \
+ BSWAP2(T, V0, V1, V2, R0, R1, R2) \
+ BSWAP2(T, V1, V2, R0, R1, R2, V0) \
+ BSWAP2(T, V2, R0, R1, R2, V0, V1) \
+ BSWAP2(T, R0, R1, R2, V0, V1, V2) \
+ BSWAP2(T, R1, R2, V0, V1, V2, R0) \
+ BSWAP2(T, R2, V0, V1, V2, R0, R1)
+
+#define BSWAP1x(T, V0, V1, V2, R0, R1, R2) \
+ BSWAP2x(T, V0, V1, V2, R0, R1, R2) \
+ BSWAP2x(T, V1, V2, R0, R1, R2, V0) \
+ BSWAP2x(T, V2, R0, R1, R2, V0, V1) \
+ BSWAP2x(T, R0, R1, R2, V0, V1, V2) \
+ BSWAP2x(T, R1, R2, V0, V1, V2, R0) \
+ BSWAP2x(T, R2, V0, V1, V2, R0, R1)
+
+#if __WORDSIZE == 32
+# define BSWAP(V0, V1, V2, R0, R1, R2) \
+ BSWAP1(us, V0, V1, V2, R0, R1, R2) \
+ BSWAP1x(ui, V0, V1, V2, R0, R1, R2)
+#else
+# define BSWAP(V0, V1, V2, R0, R1, R2) \
+ BSWAP1(us, V0, V1, V2, R0, R1, R2) \
+ BSWAP1(ui, V0, V1, V2, R0, R1, R2) \
+ BSWAP1x(ul, V0, V1, V2, R0, R1, R2)
+#endif
+
+.code
+ prolog
+ /* simple sequence for easier disassembly reading and encoding check */
+ movi %r0 us12_i
+ bswapr_us %r1 %r0
+ beqi us %r1 us12_o
+ calli @abort
+us:
+
+ movi %r0 xus12_i
+ bswapr_us %r1 %r0
+ beqi xus %r1 us12_o
+ calli @abort
+xus:
+ movi %r0 ui12_i
+ bswapr_ui %r1 %r0
+ beqi ui %r1 ui12_o
+ calli @abort
+ui:
+#if __WORDSIZE == 64
+ movi %r0 xui12_i
+ bswapr_ui %r1 %r0
+ beqi xui %r1 ui12_o
+ calli @abort
+xui:
+ movi %r0 ul12_i
+ bswapr_ul %r1 %r0
+ beqi ul %r1 ul12_o
+ calli @abort
+ul:
+#endif
+
+ BSWAP(v0, v1, v2, r0, r1, r2)
+
+ // just to know did not abort
+ prepare
+ pushargi ok
+ ellipsis
+ finishi @printf
+
+ ret
+ epilog
#include <stdarg.h>
#include <lightning.h>
#include <dlfcn.h>
+#include <math.h>
#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
# include <fpu_control.h>
static void htonr_ul(void); static void ntohr_ul(void);
#endif
static void htonr(void); static void ntohr(void);
+static void bswapr_us(void); static void bswapr_ui(void);
+#if __WORDSIZE == 64
+static void bswapr_ul(void);
+#endif
+static void bswapr(void);
static void movnr(void); static void movzr(void);
static void ldr_c(void); static void ldi_c(void);
static void ldr_uc(void); static void ldi_uc(void);
entry(htonr_ul), entry(ntohr_ul),
#endif
entry(htonr), entry(ntohr),
+ entry(bswapr_us), entry(bswapr_ui),
+#if __WORDSIZE == 64
+ entry(bswapr_ul),
+#endif
+ entry(bswapr),
entry(movnr), entry(movzr),
entry(ldr_c), entry(ldi_c),
entry(ldr_uc), entry(ldi_uc),
entry_ir_ir(htonr_ul) entry_ir_ir(ntohr_ul)
#endif
entry_ir_ir(htonr) entry_ir_ir(ntohr)
+entry_ir_ir(bswapr_us) entry_ir_ir(bswapr_ui)
+#if __WORDSIZE == 64
+entry_ir_ir(bswapr_ul)
+#endif
+entry_ir_ir(bswapr)
entry_ir_ir_ir(movnr) entry_ir_ir_ir(movzr)
entry_ir_ir(ldr_c) entry_ir_pm(ldi_c)
entry_ir_ir(ldr_uc) entry_ir_pm(ldi_uc)
*) ;;
esac
-AC_CHECK_FUNCS(mremap ffsl getopt_long_only isnan isinf,,)
+AC_CHECK_FUNCS(mmap mremap ffsl getopt_long_only isnan isinf,,)
AC_CHECK_HEADERS([getopt.h stdint.h],,,)
[Enable jit disassembler using binutils]),
[DISASSEMBLER=$enableval], [DISASSEMBLER=auto])
if test "x$DISASSEMBLER" != "xno"; then
- # FIXME need to check for libiberty first or will fail to link
+
AC_CHECK_LIB(iberty, htab_try_create, ,
[HAVE_IBERTY="no"])
AC_CHECK_LIB(bfd, bfd_init, ,
[HAVE_Z="no"])
AC_CHECK_LIB(opcodes, init_disassemble_info, ,
[HAVE_OPCODES="no"])
- if test "x$HAVE_IBERTY" = "xno" -o \
- "x$HAVE_BFD" = "xno" -o \
- "x$HAVE_Z" = "xno" -o \
- "x$HAVE_OPCODES" = "xno"; then
- if test "x$DISASSEMBLER" != "xauto"; then
- AC_MSG_ERROR([binutils not found, see http://www.gnu.org/software/binutils/])
- else
- AC_MSG_WARN([binutils not found, see http://www.gnu.org/software/binutils/])
- DISASSEMBLER="no"
- fi
+
+ if test "x$HAVE_IBERTY" = "xno"; then
+ if test "x$DISASSEMBLER" = "xyes"; then
+ AC_MSG_ERROR([libiberty not found])
+ else
+ AC_MSG_WARN([libiberty not found])
+ DISASSEMBLER="no"
+ fi
fi
+
+ if test "x$HAVE_BFD" = "xno"; then
+ if test "x$DISASSEMBLER" = "xyes"; then
+ AC_MSG_ERROR([binutils BFD not found, see http://www.gnu.org/software/binutils/])
+ else
+ AC_MSG_WARN([binutils BFD not found, see http://www.gnu.org/software/binutils/])
+ DISASSEMBLER="no"
+ fi
+ fi
+
+ if test "x$HAVE_Z" = "xno"; then
+ if test "x$DISASSEMBLER" = "xyes"; then
+ AC_MSG_ERROR([zlib not found, see https://zlib.net/])
+ else
+ AC_MSG_WARN([zlib not found, see https://zlib.net/])
+ DISASSEMBLER="no"
+ fi
+ fi
+
+ if test "x$HAVE_OPCODES" = "xno"; then
+ if test "x$DISASSEMBLER" = "xyes"; then
+ AC_MSG_ERROR([binutils opcodes not found, see https://www.gnu.org/software/binutils/])
+ else
+ AC_MSG_WARN([binutils opcodes not found, see https://www.gnu.org/software/binutils/])
+ DISASSEMBLER="no"
+ fi
+ fi
+
fi
AM_CONDITIONAL(with_disassembler, [test "x$DISASSEMBLER" != "xno"])
if test "x$DISASSEMBLER" != "xno"; then
*.info*
stamp-*
/version.texi
+
+texinfo.tex
+mdate-sh
+
+fact
+ifib
+incr
+printf
+rfib
+rpn
@node Installation
@chapter Configuring and installing @lightning{}
-The first thing to do to use @lightning{} is to configure the
+Here we will assume that your system already has the dependencies
+necessary to build @lightning{}. For more on dependencies, see
+@lightning{}'s @file{README-hacking} file.
+
+The first thing to do to build @lightning{} is to configure the
program, picking the set of macros to be used on the host
architecture; this configuration is automatically performed by
the @file{configure} shell script; to run it, merely type:
ntohr _us _ui _ul @r{Network-to-host order }
@end example
+@code{bswapr} can be used to unconditionally byte-swap an operand.
+On little-endian architectures, @code{htonr} and @code{ntohr} resolve
+to this.
+The @code{_ul} variant is only available in 64-bit architectures.
+@example
+bswapr _us _ui _ul 01 = byte_swap(02)
+@end example
+
@item Load operations
@code{ld} accepts two operands while @code{ldx} accepts three;
in both cases, the last can be either a register or an immediate
typedef jit_int32_t jit_gpr_t;
typedef jit_int32_t jit_fpr_t;
+#if !defined(__powerpc__) && \
+ (defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__))
+#define __powerpc__ 1
+#endif
+
#if defined(__i386__) || defined(__x86_64__)
# include <lightning/jit_x86.h>
#elif defined(__mips__)
#define jit_movr(u,v) jit_new_node_ww(jit_code_movr,u,v)
#define jit_movi(u,v) jit_new_node_ww(jit_code_movi,u,v)
jit_code_movr, jit_code_movi,
+
+#define jit_movnr(u,v,w) jit_new_node_www(jit_code_movnr,u,v,w)
+#define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w)
+ jit_code_movnr, jit_code_movzr,
+
#define jit_extr_c(u,v) jit_new_node_ww(jit_code_extr_c,u,v)
#define jit_extr_uc(u,v) jit_new_node_ww(jit_code_extr_uc,u,v)
jit_code_extr_c, jit_code_extr_uc,
#define jit_movr_d_w(u, v) jit_new_node_ww(jit_code_movr_d_w, u, v)
#define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v)
-#define jit_movnr(u,v,w) jit_new_node_www(jit_code_movnr,u,v,w)
-#define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w)
- jit_code_movnr, jit_code_movzr,
+#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v)
+ jit_code_bswapr_us,
+#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v)
+ jit_code_bswapr_ui,
+#define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v)
+ jit_code_bswapr_ul,
+#if __WORDSIZE == 32
+#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v)
+#else
+#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v)
+#endif
jit_code_last_code
} jit_code_t;
#define jit_cc_a0_flt 0x00000020 /* arg0 is immediate float */
#define jit_cc_a0_dbl 0x00000040 /* arg0 is immediate double */
#define jit_cc_a0_arg 0x00000080 /* arg1 is an argument int id */
-#define jit_cc_a1_reg 0x00000100 /* arg1 is a register */
-#define jit_cc_a1_chg 0x00000200 /* arg1 is modified */
+#define jit_cc_a0_cnd 0x00000100 /* arg1 is a conditinally set register */
+#define jit_cc_a1_reg 0x00000200 /* arg1 is a register */
+#define jit_cc_a1_chg 0x00000400 /* arg1 is modified */
#define jit_cc_a1_int 0x00001000 /* arg1 is immediate word */
#define jit_cc_a1_flt 0x00002000 /* arg1 is immediate float */
#define jit_cc_a1_dbl 0x00004000 /* arg1 is immediate double */
extern void
_emit_stxi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+extern void jit_init_print(void);
extern void jit_init_debug(const char*);
extern void jit_finish_debug(void);
# define stxr_l(r0,r1,r2) STR(r2,r1,r0)
# define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1)
static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-# if __BYTE_ORDER == __LITTLE_ENDIAN
-# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ul(r0,r1) REV(r0,r1)
-# else
-# define htonr_us(r0,r1) extr_us(r0,r1)
-# define htonr_ui(r0,r1) extr_ui(r0,r1)
-# define htonr_ul(r0,r1) movr(r0,r1)
-# endif
+# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ul(r0,r1) REV(r0,r1)
# define extr_c(r0,r1) SXTB(r0,r1)
# define extr_uc(r0,r1) UXTB(r0,r1)
# define extr_s(r0,r1) SXTH(r0,r1)
}
}
-#if __BYTE_ORDER == __LITTLE_ENDIAN
static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
- htonr_ul(r0, r1);
+ bswapr_ul(r0, r1);
rshi_u(r0, r0, 48);
}
static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
- htonr_ul(r0, r1);
+ bswapr_ul(r0, r1);
rshi_u(r0, r0, 32);
}
-#endif
static void
_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
8, /* nei */
4, /* movr */
16, /* movi */
+ 8, /* movnr */
+ 8, /* movzr */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 8, /* movnr */
- 8, /* movzr */
+ 8, /* bswapr_us */
+ 8, /* bswapr_ui */
+ 4, /* bswapr_ul */
#endif /* __WORDSIZE */
case_rr(hton, _us);
case_rr(hton, _ui);
case_rr(hton, _ul);
+ case_rr(bswap, _us);
+ case_rr(bswap, _ui);
+ case_rr(bswap, _ul);
case_rr(ext, _c);
case_rr(ext, _uc);
case_rr(ext, _s);
static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
# define movi_p(r0,i0) _movi_p(_jit,r0,i0)
static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2)
+static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
+static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define negr(r0,r1) NEGQ(r1,r0)
# define comr(r0,r1) NOT(r1,r0)
# define addr(r0,r1,r2) ADDQ(r1,r2,r0)
static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t);
# define extr_ui(r0,r1) _extr_ui(_jit,r0,r1)
static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-# if __BYTE_ORDER == __LITTLE_ENDIAN
-# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1)
-static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
-# else
-# define htonr_us(r0,r1) extr_us(r0,r1)
-# define htonr_ui(r0,r1) extr_ui(r0,r1)
-# define htonr_ul(r0,r1) movr(r0,r1)
-# endif
+# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ul(r0,r1) _bswapr_ul(_jit,r0,r1)
+static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
# define jmpr(r0) JMP(_R31_REGNO,r0,0)
# define jmpi(i0) _jmpi(_jit,i0)
static void _jmpi(jit_state_t*, jit_word_t);
return (w);
}
+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = beqi(_jit->pc.w, r2, 0);
+ MOV(r1, r0);
+ patch_at(w, _jit->pc.w);
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = bnei(_jit->pc.w, r2, 0);
+ MOV(r1, r0);
+ patch_at(w, _jit->pc.w);
+}
+
static void
_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
}
static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t t0;
t0 = jit_get_reg(jit_class_gpr);
}
static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t t0;
jit_int32_t t1;
}
static void
-_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t t0;
jit_int32_t t1;
12, /* nei */
4, /* movr */
32, /* movi */
+ 12, /* movnr */
+ 12, /* movzr */
8, /* extr_c */
8, /* extr_uc */
8, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
+ 16, /* bswapr_us */
+ 36, /* bswapr_ui */
+ 36, /* bswapr_ul */
#endif /* __WORDSIZE */
case_rr(hton, _us);
case_rr(hton, _ui);
case_rr(hton, _ul);
+ case_rr(bswap, _us);
+ case_rr(bswap, _ui);
+ case_rr(bswap, _ul);
case_rr(ext, _c);
case_rr(ext, _uc);
case_rr(ext, _s);
case_rr(ext, _us);
case_rr(ext, _i);
case_rr(ext, _ui);
+ case_rrr(movn,);
+ case_rrr(movz,);
case_rr(mov,);
case jit_code_movi:
if (node->flag & jit_flag_node) {
# define CMNI(rn,im) CC_CMNI(ARM_CC_AL,rn,im)
# define T2_CMNI(rn,im) torri(THUMB2_CMNI,rn,_R15_REGNO,im)
# define CC_TST(cc,rn,rm) corrr(cc,ARM_TST,rn,r0,rm)
-# define TST(rn,rm) CC_TST(ARM_CC_AL,rn,rm)
+# define TST(rn,rm) corrr(ARM_CC_AL,ARM_TST,rn,0,rm)
# define T1_TST(rn,rm) is(THUMB_TST|(_u3(rm)<<3)|_u3(rn))
# define T2_TST(rn,rm) torrr(THUMB2_TST,rn,_R15_REGNO,rm)
# define CC_TSTI(cc,rn,im) corri(cc,ARM_TST|ARM_I,rn,0,im)
static void _stxr_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define stxi_i(r0,r1,i0) _stxi_i(_jit,r0,r1,i0)
static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-# if __BYTE_ORDER == __LITTLE_ENDIAN
-# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-# else
-# define htonr_us(r0,r1) extr_us(r0,r1)
-# define htonr(r0,r1) movr(r0,r1)
-# endif
+# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
# define extr_c(r0,r1) _extr_c(_jit,r0,r1)
static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
# define extr_uc(r0,r1) _extr_uc(_jit,r0,r1)
}
}
-# if __BYTE_ORDER == __LITTLE_ENDIAN
static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
- jit_int32_t t0;
if (jit_thumb_p()) {
if ((r0|r1) < 8)
T1_REV(r0, r1);
rshi_u(r0, r0, 16);
}
else {
- t0 = jit_get_reg(jit_class_gpr);
- rshi(rn(t0), r1, 8);
- andi(r0, r1, 0xff);
- andi(rn(t0), rn(t0), 0xff);
- lshi(r0, r0, 8);
- orr(r0, r0, rn(t0));
- jit_unget_reg(t0);
+ generic_bswapr_us(_jit, r0, r1);
}
}
}
/* inline glibc htonl (without register clobber) */
static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
if (jit_thumb_p()) {
}
}
}
-#endif
static void
_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
14, /* nei */
4, /* movr */
8, /* movi */
+ 8, /* movnr */
+ 8, /* movzr */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
12, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
+ 8, /* bswapr_us */
+ 4, /* bswapr_ui */
+ 0, /* bswapr_ul */
#endif /* __ARM_PCS_VFP */
#endif /* __WORDSIZE */
14, /* nei */
4, /* movr */
8, /* movi */
+ 22, /* movnr */
+ 22, /* movzr */
8, /* extr_c */
4, /* extr_uc */
8, /* extr_s */
12, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 8, /* movnr */
- 8, /* movzr */
+ 20, /* bswapr_us */
+ 16, /* bswapr_ui */
+ 0, /* bswapr_ul */
#endif /* __ARM_PCS_VFP */
#endif /* __WORDSIZE */
case_wrr(stx, _i);
case_rr(hton, _us);
case_rr(hton, _ui);
+ case_rr(bswap, _us);
+ case_rr(bswap, _ui);
case_rr(ext, _c);
case_rr(ext, _uc);
case_rr(ext, _s);
void
jit_init_debug(const char *progname)
{
+ jit_init_print();
#if DISASSEMBLER
bfd_init();
bfd_check_format(disasm_bfd, bfd_object);
bfd_check_format(disasm_bfd, bfd_archive);
if (!disasm_stream)
- disasm_stream = stderr;
+ disasm_stream = stdout;
+
INIT_DISASSEMBLE_INFO(disasm_info, disasm_stream, fprintf);
-# if defined(__i386__) || defined(__x86_64__)
- disasm_info.arch = bfd_arch_i386;
-# if defined(__x86_64__)
-# if __WORDSIZE == 32
- disasm_info.mach = bfd_mach_x64_32;
-# else
- disasm_info.mach = bfd_mach_x86_64;
-# endif
-# else
- disasm_info.mach = bfd_mach_i386_i386;
-# endif
-# endif
-# if defined(__powerpc__)
- disasm_info.arch = bfd_arch_powerpc;
- disasm_info.mach = bfd_mach_ppc64;
-# if HAVE_DISASSEMBLE_INIT_FOR_TARGET
+ disasm_info.arch = bfd_get_arch(disasm_bfd);
+ disasm_info.mach = bfd_get_mach(disasm_bfd);
+
+# if HAVE_DISASSEMBLE_INIT_FOR_TARGET
disassemble_init_for_target(&disasm_info);
-# elif HAVE_DISASSEMBLE_INIT_POWERPC
- disassemble_init_powerpc(&disasm_info);
-# endif
-# if defined(__powerpc64__)
+# endif
+
+# if defined(__powerpc64__)
disasm_info.disassembler_options = "64";
-# endif
-# if HAVE_DISASSEMBLE_INIT_FOR_TARGET
- disassemble_init_for_target(&disasm_info);
-# elif HAVE_DISASSEMBLE_INIT_POWERPC
- disassemble_init_powerpc(&disasm_info);
-# endif
# endif
-# if defined(__sparc__)
+# if defined(__sparc__) || defined(__s390__) || defined(__s390x__)
disasm_info.endian = disasm_info.display_endian = BFD_ENDIAN_BIG;
# endif
# if defined(__s390__) || defined(__s390x__)
- disasm_info.arch = bfd_arch_s390;
-# if __WORDSIZE == 32
- disasm_info.mach = bfd_mach_s390_31;
-# else
- disasm_info.mach = bfd_mach_s390_64;
-# endif
- disasm_info.endian = disasm_info.display_endian = BFD_ENDIAN_BIG;
disasm_info.disassembler_options = "zarch";
-# endif
-# if defined(__alpha__)
- disasm_info.arch = bfd_arch_alpha;
- disasm_info.mach = bfd_mach_alpha_ev6;
-# endif
-# if defined(__hppa__)
- disasm_info.arch = bfd_arch_hppa;
- disasm_info.mach = bfd_mach_hppa10;
-# endif
-# if defined(__riscv)
- disasm_info.arch = bfd_arch_riscv;
-# if __WORDSIZE == 32
- disasm_info.mach = bfd_mach_riscv32;
-# else
- disasm_info.mach = bfd_mach_riscv64;
-# endif
# endif
disasm_info.print_address_func = disasm_print_address;
static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
#define movi_p(r0,i0) _movi_p(_jit,r0,i0)
static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2)
+static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
+static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
#define comr(r0,r1) UADDCM(_R0_REGNO,r1,r0)
#define negr(r0,r1) SUB(_R0_REGNO,r1,r0)
#define extr_c(r0,r1) EXTRWR(r1,31,8,r0)
#define extr_uc(r0,r1) EXTRWR_U(r1,31,8,r0)
#define extr_s(r0,r1) EXTRWR(r1,31,16,r0)
#define extr_us(r0,r1) EXTRWR_U(r1,31,16,r0)
-#if __BYTE_ORDER == __BIG_ENDIAN
-# define htonr_us(r0,r1) extr_us(r0,r1)
-# define htonr_ui(r0,r1) movr(r0,r1)
-#else
-# error need htonr implementation
-#endif
+#define bswapr_us(r0,r1) generic_bswapr_us(_jit,r0,r1)
+#define bswapr_ui(r0,r1) generic_bswapr_ui(_jit,r0,r1)
#define addr(r0,r1,r2) ADD(r1,r2,r0)
#define addi(r0,r1,i0) _addi(_jit,r0,r1,i0)
static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
return (w);
}
+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = beqi(_jit->pc.w, r2, 0);
+ COPY(r1, r0);
+ patch_at(w, _jit->pc.w);
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = bnei(_jit->pc.w, r2, 0);
+ COPY(r1, r0);
+ patch_at(w, _jit->pc.w);
+}
+
static void
_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
8, /* nei */
4, /* movr */
8, /* movi */
+ 16, /* movnr */
+ 16, /* movzr */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
+ 36, /* bswapr_us */
+ 80, /* bswapr_ui */
+ 0, /* bswapr_ul */
#endif /* __WORDSIZE */
case_rrw(rsh,);
case_rrr(rsh, _u);
case_rrw(rsh, _u);
+ case_rrr(movn,);
+ case_rrr(movz,);
case_rr(mov,);
case jit_code_movi:
if (node->flag & jit_flag_node) {
case_rr(ext, _us);
case_rr(hton, _us);
case_rr(hton, _ui);
+ case_rr(bswap, _us);
+ case_rr(bswap, _ui);
case_rrr(lt,);
case_rrw(lt,);
case_rrr(lt, _u);
static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
#define movi_p(r0,i0) _movi_p(_jit,r0,i0)
static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ul(r0,r1) MUX1(r0,r1,MUX_REV)
-#else
-# define htonr_us(r0,r1) extr_us(r0,r1)
-# define htonr_ui(r0,r1) extr_ui(r0,r1)
-# define htonr_ul(r0,r1) movr(r0,r1)
-#endif
+# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2)
+static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
+static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ul(r0,r1) MUX1(r0,r1,MUX_REV)
#define extr_c(r0,r1) SXT1(r0,r1)
#define extr_uc(r0,r1) ZXT1(r0,r1)
#define extr_s(r0,r1) SXT2(r0,r1)
return (w);
}
+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = beqi(_jit->pc.w, r2, 0);
+ movr(r0, r1);
+ patch_at(w, _jit->pc.w);
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = bnei(_jit->pc.w, r2, 0);
+ movr(r0, r1);
+ patch_at(w, _jit->pc.w);
+}
+
+static void
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ bswapr_ul(r0, r1);
+ rshi_u(r0, r0, 48);
+}
+
+static void
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ bswapr_ul(r0, r1);
+ rshi_u(r0, r0, 32);
+}
+
static void
_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
}
}
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
- jit_int32_t t0;
- t0 = jit_get_reg(jit_class_gpr);
- rshi(rn(t0), r1, 8);
- andi(r0, r1, 0xff);
- andi(rn(t0), rn(t0), 0xff);
- lshi(r0, r0, 8);
- orr(r0, r0, rn(t0));
- jit_unget_reg(t0);
-}
-
-static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
- jit_int32_t t0;
- jit_int32_t t1;
- jit_int32_t t2;
- t0 = jit_get_reg(jit_class_gpr);
- t1 = jit_get_reg(jit_class_gpr);
- t2 = jit_get_reg(jit_class_gpr);
- rshi(rn(t0), r1, 24);
- rshi(rn(t1), r1, 16);
- rshi(rn(t2), r1, 8);
- andi(rn(t0), rn(t0), 0xff);
- andi(rn(t1), rn(t1), 0xff);
- andi(rn(t2), rn(t2), 0xff);
- andi(r0, r1, 0xff);
- lshi(r0, r0, 24);
- lshi(rn(t1), rn(t1), 8);
- orr(r0, r0, rn(t0));
- lshi(rn(t2), rn(t2), 16);
- orr(r0, r0, rn(t1));
- orr(r0, r0, rn(t2));
- jit_unget_reg(t2);
- jit_unget_reg(t1);
- jit_unget_reg(t0);
-}
-#endif
-
static void
_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
16, /* lshr */
16, /* lshi */
16, /* rshr */
- 16, /* rshi */
+ 32, /* rshi */
16, /* rshr_u */
- 16, /* rshi_u */
+ 32, /* rshi_u */
16, /* negr */
16, /* comr */
32, /* ltr */
32, /* nei */
16, /* movr */
16, /* movi */
+ 48, /* movnr */
+ 48, /* movzr */
16, /* extr_c */
16, /* extr_uc */
16, /* extr_s */
16, /* extr_us */
16, /* extr_i */
16, /* extr_ui */
- 64, /* htonr_us */
- 160, /* htonr_ui */
+ 48, /* htonr_us */
+ 48, /* htonr_ui */
16, /* htonr_ul */
16, /* ldr_c */
32, /* ldi_c */
0, /* movi_d_ww */
16, /* movr_d_w */
32, /* movi_d_w */
+ 48, /* bswapr_us */
+ 48, /* bswapr_ui */
+ 16, /* bswapr_ul */
#endif /* __WORDSIZE */
case_rrw(rsh, _u);
case_rr(neg,);
case_rr(com,);
+ case_rrr(movn,);
+ case_rrr(movz,);
case_rr(mov,);
case jit_code_movi:
if (node->flag & jit_flag_node) {
case_rr(hton, _us);
case_rr(hton, _ui);
case_rr(hton, _ul);
+ case_rr(bswap, _us);
+ case_rr(bswap, _ui);
+ case_rr(bswap, _ul);
case_rr(ext, _c);
case_rr(ext, _uc);
case_rr(ext, _s);
#include <lightning.h>
#include <lightning/jit_private.h>
-#include <sys/mman.h>
/*
* Prototypes
#endif
int op;
} jit_instr_t;
-/* FIXME */
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+# define jit_mips2_p() 1
+#else
# define jit_mips2_p() 0
+#endif
# define _ZERO_REGNO 0
# define _T0_REGNO 0x08
# define _T1_REGNO 0x09
# endif
# define can_sign_extend_short_p(im) ((im) >= -32678 && (im) <= 32767)
# define can_zero_extend_short_p(im) ((im) >= 0 && (im) <= 65535)
-# define is_low_mask(im) (((im) & 1) ? (__builtin_popcountl((im) + 1) == 1) : 0)
+# define is_low_mask(im) (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0)
+# define is_middle_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) <= 1) : 0)
# define is_high_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) == 0) : 0)
# define masked_bits_count(im) __builtin_popcountl(im)
# define unmasked_bits_count(im) (__WORDSIZE - masked_bits_count(im))
# define DADDIU(rt,rs,im) hrri(MIPS_DADDIU,rs,rt,im)
# define SUBU(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_SUBU)
# define DSUBU(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_DSUBU)
+# define MUL(rd,rs,rt) hrrr_t(MIPS_SPECIAL2,rs,rt,rd,MIPS_MUL)
# define MULT(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULT)
# define MULTU(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULTU)
# define DMULT(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULT)
# define DSRL32(rd,rt,sa) rrit(rt,rd,sa,MIPS_DSRL32)
# define INS(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-1,pos,MIPS_INS)
# define DINS(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-1,pos,MIPS_DINS)
+# define DINSU(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-32-1,pos-32,MIPS_DINSU)
+# define DINSM(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,pos+size-32-1,pos,MIPS_DINSM)
# define EXT(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos,MIPS_EXT)
# define DEXT(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos,MIPS_DEXT)
+# define DEXTU(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-1,pos-32,MIPS_DEXTU)
+# define DEXTM(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-32-1,pos,MIPS_DEXTM)
# define ROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_SRL)
# define DROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL)
# define MFHI(rd) rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFHI)
# define div(rs,rt) DDIV(rs,rt)
# define divu(rs,rt) DDIVU(rs,rt)
# endif
+# define extr(rd,rt,lsb,nb) _extr(_jit,rd,rt,lsb,nb)
+static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define insr(rd,rt,lsb,nb) _insr(_jit,rd,rt,lsb,nb)
+static void _insr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0)
static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
#define addcr(r0,r1,r2) _addcr(_jit,r0,r1,r2)
# define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1)
static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# endif
-# if __BYTE_ORDER == __LITTLE_ENDIAN
-# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-# if __WORDSIZE == 64
-# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1)
-static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
-# endif
-# else
-# define htonr_us(r0,r1) extr_us(r0,r1)
-# if __WORDSIZE == 32
-# define htonr_ui(r0,r1) movr(r0,r1)
-# else
-# define htonr_ui(r0,r1) extr_ui(r0,r1)
-# define htonr_ul(r0,r1) movr(r0,r1)
-# endif
+# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+# if __WORDSIZE == 64
+# define bswapr_ul(r0,r1) generic_bswapr_ul(_jit,r0,r1)
# endif
# define extr_c(r0,r1) _extr_c(_jit,r0,r1)
static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
assert(i0 == 0);
}
+static void
+_extr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t pos, jit_int32_t size)
+{
+ assert(size > 0);
+
+ if (__WORDSIZE == 32)
+ EXT(r0, r1, pos, size);
+ else if (pos >= 32)
+ DEXTU(r0, r1, pos, size);
+ else if (size > 32)
+ DEXTM(r0, r1, pos, size);
+ else
+ DEXT(r0, r1, pos, size);
+}
+
+static void
+_insr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t pos, jit_int32_t size)
+{
+ assert(size > 0);
+
+ if (__WORDSIZE == 32)
+ INS(r0, r1, pos, size);
+ else if (pos >= 32)
+ DINSU(r0, r1, pos, size);
+ else if (size > 32)
+ DINSM(r0, r1, pos, size);
+ else
+ DINS(r0, r1, pos, size);
+}
+
static void
_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
static void
_mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- multu(r1, r2);
- MFLO(r0);
+ if (__WORDSIZE == 32)
+ MUL(r0, r1, r2);
+ else {
+ multu(r1, r2);
+ MFLO(r0);
+ }
}
static void
{
jit_int32_t reg;
if (can_zero_extend_short_p(i0))
- ANDI(r0, r1, i0);
+ ANDI(r0, r1, i0);
else if (is_low_mask(i0)) {
-#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
- if (masked_bits_count(i0) <= 32)
- EXT(r0, r1, 0, masked_bits_count(i0));
- else
-#endif
- {
- lshi(r0, r1, unmasked_bits_count(i0));
- rshi_u(r0, r0, unmasked_bits_count(i0));
- }
+ if (jit_mips2_p())
+ extr(r0, r1, 0, masked_bits_count(i0));
+ else {
+ lshi(r0, r1, unmasked_bits_count(i0));
+ rshi_u(r0, r0, unmasked_bits_count(i0));
+ }
} else if (is_high_mask(i0)) {
- rshi(r0, r1, unmasked_bits_count(i0));
- lshi(r0, r0, unmasked_bits_count(i0));
+ if (jit_mips2_p() && r0 == r1)
+ insr(r0, _ZERO_REGNO, 0, unmasked_bits_count(i0));
+ else {
+ rshi(r0, r1, unmasked_bits_count(i0));
+ lshi(r0, r0, unmasked_bits_count(i0));
+ }
+ } else if (jit_mips2_p() && is_middle_mask(i0)) {
+ extr(r0, r1, __builtin_ctzl(i0), masked_bits_count(i0));
+ lshi(r0, r0, __builtin_ctzl(i0));
+ } else if (jit_mips2_p() && is_middle_mask(~i0)) {
+ if (r0 != r1)
+ movr(r0, r1);
+ insr(r0, _ZERO_REGNO, __builtin_ctzl(~i0), masked_bits_count(~i0));
} else {
- reg = jit_get_reg(jit_class_gpr);
- movi(rn(reg), i0);
- AND(r0, r1, rn(reg));
- jit_unget_reg(reg);
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ AND(r0, r1, rn(reg));
+ jit_unget_reg(reg);
}
}
}
#endif
-# if __BYTE_ORDER == __LITTLE_ENDIAN
static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
- jit_int32_t t0;
- t0 = jit_get_reg(jit_class_gpr);
- rshi(rn(t0), r1, 8);
- andi(r0, r1, 0xff);
- andi(rn(t0), rn(t0), 0xff);
- lshi(r0, r0, 8);
- orr(r0, r0, rn(t0));
- jit_unget_reg(t0);
-}
-
-static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
- jit_int32_t t0;
- jit_int32_t t1;
- jit_int32_t t2;
- t0 = jit_get_reg(jit_class_gpr);
- t1 = jit_get_reg(jit_class_gpr);
- t2 = jit_get_reg(jit_class_gpr);
- rshi(rn(t0), r1, 24);
- rshi(rn(t1), r1, 16);
- rshi(rn(t2), r1, 8);
- andi(rn(t0), rn(t0), 0xff);
- andi(rn(t1), rn(t1), 0xff);
- andi(rn(t2), rn(t2), 0xff);
- andi(r0, r1, 0xff);
- lshi(r0, r0, 24);
- lshi(rn(t1), rn(t1), 8);
- orr(r0, r0, rn(t0));
- lshi(rn(t2), rn(t2), 16);
- orr(r0, r0, rn(t1));
- orr(r0, r0, rn(t2));
- jit_unget_reg(t2);
- jit_unget_reg(t1);
- jit_unget_reg(t0);
+ if (jit_mips2_p()) {
+ extr_us(r0, r1);
+ WSBH(r0, r0);
+ } else {
+ generic_bswapr_us(_jit, r0, r1);
+ }
}
static void
-_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
- jit_int32_t reg;
- reg = jit_get_reg(jit_class_gpr);
- rshi_u(rn(reg), r1, 32);
- htonr_ui(r0, r1);
- htonr_ui(rn(reg), rn(reg));
- lshi(r0, r0, 32);
- orr(r0, r0, rn(reg));
- jit_unget_reg(reg);
+ if (jit_mips2_p()) {
+ if (__WORDSIZE == 64) {
+ SLL(r0, r1, 0);
+ WSBH(r0, r0);
+ ROTR(r0, r0, 16);
+ extr(r0, r0, 0, 32);
+ } else {
+ WSBH(r0, r1);
+ ROTR(r0, r0, 16);
+ }
+ } else {
+ generic_bswapr_ui(_jit, r0, r1);
+ }
}
-# endif
static void
_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
static void
_extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
- lshi(r0, r1, 32);
- rshi_u(r0, r0, 32);
+ if (jit_mips2_p())
+ DEXT(r0, r1, 0, 32);
+ else {
+ lshi(r0, r1, 32);
+ rshi_u(r0, r0, 32);
+ }
}
# endif
{
jit_int32_t reg;
- if (i0 == 0) {
- SLT(r0, _ZERO_REGNO, r1);
- XORI(r0, r0, 1);
- }
+ if (can_sign_extend_short_p(i0 + 1))
+ SLTI(r0, r1, i0 + 1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
{
jit_int32_t reg;
- if (i0 == 0) {
- SLTU(r0, _ZERO_REGNO, r1);
- XORI(r0, r0, 1);
- }
+ if (can_sign_extend_short_p(i0 + 1))
+ SLTIU(r0, r1, i0 + 1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
{
jit_int32_t reg;
- reg = jit_get_reg(jit_class_gpr);
- movi(rn(reg), i0);
- ger(r0, r1, rn(reg));
- jit_unget_reg(reg);
+ if (can_sign_extend_short_p(i0)) {
+ SLTI(r0, r1, i0);
+ XORI(r0, r0, 1);
+ } else {
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ ger(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
}
static void
{
jit_int32_t reg;
- reg = jit_get_reg(jit_class_gpr);
- movi(rn(reg), i0);
- ger_u(r0, r1, rn(reg));
- jit_unget_reg(reg);
+ if (can_sign_extend_short_p(i0)) {
+ SLTIU(r0, r1, i0);
+ XORI(r0, r0, 1);
+ } else {
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ ger_u(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
}
static void
jit_word_t w;
jit_int32_t t0;
t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
- if (can_zero_extend_short_p(i1)) {
- ANDI(rn(t0), r0, i1);
- w = _jit->pc.w;
- BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
- NOP(1);
- }
- else {
- movi(rn(t0), i1);
- w = bmsr(i0, r0, rn(t0));
- }
+
+ andi(rn(t0), r0, i1);
+ w = _jit->pc.w;
+ BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
+ NOP(1);
+
jit_unget_reg(t0);
return (w);
}
jit_word_t w;
jit_int32_t t0;
t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
- if (can_zero_extend_short_p(i1)) {
- ANDI(rn(t0), r0, i1);
- w = _jit->pc.w;
- BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
- NOP(1);
- }
- else {
- movi(rn(t0), i1);
- w = bmcr(i0, r0, rn(t0));
- }
+
+ andi(rn(t0), r0, i1);
+ w = _jit->pc.w;
+ BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
+ NOP(1);
+
jit_unget_reg(t0);
return (w);
}
#if __WORDSIZE == 32
#if NEW_ABI
-#define JIT_INSTR_MAX 44
+#define JIT_INSTR_MAX 52
0, /* data */
0, /* live */
0, /* align */
28, /* subxr */
28, /* subxi */
16, /* rsbi */
- 8, /* mulr */
- 16, /* muli */
+ 4, /* mulr */
+ 12, /* muli */
12, /* qmulr */
20, /* qmuli */
12, /* qmulr_u */
8, /* nei */
4, /* movr */
8, /* movi */
+ 4, /* movnr */
+ 4, /* movzr */
8, /* extr_c */
4, /* extr_uc */
8, /* extr_s */
0, /* movi_d_ww */
4, /* movr_d_w */
12, /* movi_d_w */
+ 20, /* bswapr_us */
+ 52, /* bswapr_ui */
+ 0, /* bswapr_ul */
#endif /* NEW_ABI */
#endif /* __WORDSIZE */
28, /* subxr */
28, /* subxi */
16, /* rsbi */
- 8, /* mulr */
- 16, /* muli */
+ 4, /* mulr */
+ 12, /* muli */
12, /* qmulr */
20, /* qmuli */
12, /* qmulr_u */
8, /* nei */
4, /* movr */
8, /* movi */
+ 4, /* movnr */
+ 4, /* movzr */
8, /* extr_c */
4, /* extr_uc */
8, /* extr_s */
8, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
+ 20, /* bswapr_us */
+ 52, /* bswapr_ui */
+ 0, /* bswapr_ul */
#endif /* NEW_ABI */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
-#define JIT_INSTR_MAX 44
+#define JIT_INSTR_MAX 116
0, /* data */
0, /* live */
4, /* align */
8, /* nei */
4, /* movr */
28, /* movi */
+ 4, /* movnr */
+ 4, /* movzr */
8, /* extr_c */
4, /* extr_uc */
8, /* extr_s */
0, /* movi_d_ww */
4, /* movr_d_w */
12, /* movi_d_w */
- 4, /* movnr */
- 4, /* movzr */
+ 20, /* bswapr_us */
+ 52, /* bswapr_ui */
+ 116, /* bswapr_ul */
#endif /* __WORDSIZE */
case_rr(hton, _ui);
#if __WORDSIZE == 64
case_rr(hton, _ul);
+#endif
+ case_rr(bswap, _us);
+ case_rr(bswap, _ui);
+#if __WORDSIZE == 64
+ case_rr(bswap, _ul);
#endif
case_rr(ext, _c);
case_rr(ext, _uc);
"gtr_u", "gti_u",
"ner", "nei",
"movr", "movi",
+ "movnr", "movzr",
"extr_c", "extr_uc",
"extr_s", "extr_us",
"extr_i", "extr_ui",
"movr_f_w", "movi_f_w",
"movr_d_ww", "movi_d_ww",
"movr_d_w", "movi_d_w",
+ "bswapr_us",
+ "bswapr_ui", "bswapr_ul",
};
# define extr_i(r0,r1) EXTSW(r0,r1)
# define extr_ui(r0,r1) CLRLDI(r0,r1,32)
# endif
-# if __BYTE_ORDER == __BIG_ENDIAN
-# define htonr_us(r0,r1) extr_us(r0,r1)
-# if __WORDSIZE == 32
-# define htonr_ui(r0,r1) movr(r0,r1)
-# else
-# define htonr_ui(r0,r1) extr_ui(r0,r1)
-# define htonr_ul(r0,r1) movr(r0,r1)
-# endif
-# else
-# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-# if __WORDSIZE == 64
-# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1)
-static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
-# endif
+# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+# if __WORDSIZE == 64
+# define bswapr_ul(r0,r1) generic_bswapr_ul(_jit,r0,r1)
# endif
# define addr(r0,r1,r2) ADD(r0,r1,r2)
# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0)
return (word);
}
-# if __BYTE_ORDER == __LITTLE_ENDIAN
static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
- jit_int32_t t0;
- t0 = jit_get_reg(jit_class_gpr);
- rshi(rn(t0), r1, 8);
- andi(r0, r1, 0xff);
- andi(rn(t0), rn(t0), 0xff);
- lshi(r0, r0, 8);
- orr(r0, r0, rn(t0));
- jit_unget_reg(t0);
+ if (r0 == r1) {
+ RLWIMI(r0, r0, 16, 8, 15);
+ RLWINM(r0, r0, 24, 16, 31);
+ } else {
+ RLWINM(r0, r1, 8, 16, 23);
+ RLWIMI(r0, r1, 24, 24, 31);
+ }
}
static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr);
jit_unget_reg(reg);
}
-# if __WORDSIZE == 64
-static void
-_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
- jit_int32_t reg;
- reg = jit_get_reg(jit_class_gpr);
- rshi_u(rn(reg), r1, 32);
- htonr_ui(r0, r1);
- htonr_ui(rn(reg), rn(reg));
- lshi(r0, r0, 32);
- orr(r0, r0, rn(reg));
- jit_unget_reg(reg);
-}
-# endif
-# endif
-
static void
_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
FCTIWZ(rn(reg), r1);
/* use reserved 8 bytes area */
stxi_d(alloca_offset - 8, _FP_REGNO, rn(reg));
+# if __BYTE_ORDER == __BIG_ENDIAN
ldxi_i(r0, _FP_REGNO, alloca_offset - 4);
+# else
+ ldxi_i(r0, _FP_REGNO, alloca_offset - 8);
+# endif
jit_unget_reg(reg);
}
16, /* nei */
4, /* movr */
8, /* movi */
+ 12, /* movnr */
+ 12, /* movzr */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 12, /* movnr */
- 12, /* movzr */
+ 20, /* bswapr_us */
+ 16, /* bswapr_ui */
+ 0, /* bswapr_ul */
#endif /* _CALL_SYV */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
16, /* nei */
4, /* movr */
8, /* movi */
+ 12, /* movnr */
+ 12, /* movzr */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 12, /* movnr */
- 12, /* movzr */
+ 20, /* bswapr_us */
+ 16, /* bswapr_ui */
+ 0, /* bswapr_ul */
#endif /* _CALL_AIX */
#endif /* __BYTEORDER */
#endif /* __powerpc__ */
16, /* nei */
4, /* movr */
36, /* movi */
+ 12, /* movnr */
+ 12, /* movzr */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 12, /* movnr */
- 12, /* movzr */
+ 20, /* bswapr_us */
+ 16, /* bswapr_ui */
+ 44, /* bswapr_ul */
#endif /* __BYTEORDER */
#endif /* __powerpc__ */
#endif /* __WORDSIZE */
16, /* nei */
4, /* movr */
36, /* movi */
+ 12, /* movnr */
+ 12, /* movzr */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 12, /* movnr */
- 12, /* movzr */
+ 20, /* bswapr_us */
+ 16, /* bswapr_ui */
+ 44, /* bswapr_ul */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
#endif /* __WORDSIZE */
case_rr(hton, _ui);
# if __WORDSIZE == 64
case_rr(hton, _ul);
+# endif
+ case_rr(bswap, _us);
+ case_rr(bswap, _ui);
+# if __WORDSIZE == 64
+ case_rr(bswap, _ul);
# endif
case_rr(neg,);
case_rr(com,);
* Implementation
*/
void
-_jit_print(jit_state_t *_jit)
+jit_init_print(void)
{
- jit_node_t *node;
-
if (!print_stream)
print_stream = stderr;
+}
+
+void
+_jit_print(jit_state_t *_jit)
+{
+ jit_node_t *node;
if ((node = _jitc->head)) {
jit_print_node(node);
static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1)
static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-# define htonr_us(r0, r1) _htonr_us(_jit, r0, r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ui(r0, r1) _htonr_ui(_jit, r0, r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ul(r0, r1) _htonr_ul(_jit, r0, r1)
-static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_us(r0, r1) generic_bswapr_us(_jit, r0, r1)
+# define bswapr_ui(r0, r1) generic_bswapr_ui(_jit, r0, r1)
+# define bswapr_ul(r0, r1) generic_bswapr_ul(_jit, r0, r1)
# define extr_c(r0, r1) _extr_c(_jit, r0, r1)
static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
# define extr_uc(r0, r1) andi(r0, r1, 0xff)
static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
# define movi_p(r0, im) _movi_p(_jit, r0, im)
static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2)
+static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
+static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define ltr(r0, r1, r2) SLT(r0, r1, r2)
# define lti(r0, r1, im) _lti(_jit, r0, r1, im)
static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
DEFST(i, W)
DEFST(l, D)
-static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
- jit_int32_t t0;
- t0 = jit_get_reg(jit_class_gpr);
- rshi(rn(t0), r1, 8);
- andi(r0, r1, 0xff);
- andi(rn(t0), rn(t0), 0xff);
- lshi(r0, r0, 8);
- orr(r0, r0, rn(t0));
- jit_unget_reg(t0);
-}
-
-static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
- jit_int32_t t0;
- jit_int32_t t1;
- jit_int32_t t2;
- t0 = jit_get_reg(jit_class_gpr);
- t1 = jit_get_reg(jit_class_gpr);
- t2 = jit_get_reg(jit_class_gpr);
- rshi(rn(t0), r1, 24);
- rshi(rn(t1), r1, 16);
- rshi(rn(t2), r1, 8);
- andi(rn(t0), rn(t0), 0xff);
- andi(rn(t1), rn(t1), 0xff);
- andi(rn(t2), rn(t2), 0xff);
- andi(r0, r1, 0xff);
- lshi(r0, r0, 24);
- lshi(rn(t1), rn(t1), 8);
- orr(r0, r0, rn(t0));
- lshi(rn(t2), rn(t2), 16);
- orr(r0, r0, rn(t1));
- orr(r0, r0, rn(t2));
- jit_unget_reg(t2);
- jit_unget_reg(t1);
- jit_unget_reg(t0);
-}
-
-static void
-_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
- jit_int32_t t0;
- t0 = jit_get_reg(jit_class_gpr);
- rshi_u(rn(t0), r1, 32);
- htonr_ui(r0, r1);
- htonr_ui(rn(t0), rn(t0));
- lshi(r0, r0, 32);
- orr(r0, r0, rn(t0));
- jit_unget_reg(t0);
-}
-
static void
_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
return (w);
}
+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = beqi(_jit->pc.w, r2, 0);
+ movr(r1, r0);
+ patch_at(w, _jit->pc.w);
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = bnei(_jit->pc.w, r2, 0);
+ movr(r1, r0);
+ patch_at(w, _jit->pc.w);
+}
+
static void
_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
8, /* nei */
4, /* movr */
24, /* movi */
+ 8, /* movnr */
+ 8, /* movzr */
8, /* extr_c */
4, /* extr_uc */
8, /* extr_s */
0, /* movi_d_ww */
4, /* movr_d_w */
16, /* movi_d_w */
+ 20, /* bswapr_us */
+ 52, /* bswapr_ui */
+ 116, /* bswapr_ul */
#endif /* __WORDSIZE */
case_rr(hton, _us);
case_rr(hton, _ui);
case_rr(hton, _ul);
+ case_rr(bswap, _us);
+ case_rr(bswap, _ui);
+ case_rr(bswap, _ul);
case_rr(ext, _c);
case_rr(ext, _uc);
case_rr(ext, _s);
case_rr(ext, _us);
case_rr(ext, _i);
case_rr(ext, _ui);
+ case_rrr(movn,);
+ case_rrr(movz,);
case_rr(mov,);
case jit_code_movi:
if (node->flag & jit_flag_node) {
static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
# define movi_p(r0,i0) _movi_p(_jit,r0,i0)
static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+# define bswapr_us(r0, r1) generic_bswapr_us(_jit, r0, r1)
+# define bswapr_ui(r0, r1) generic_bswapr_ui(_jit, r0, r1)
+# define bswapr_ul(r0, r1) generic_bswapr_ul(_jit, r0, r1)
+# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2)
+static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
+static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define addr(r0,r1,r2) _addr(_jit,r0,r1,r2)
static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0)
static void _xorr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define xori(r0,r1,i0) _xori(_jit,r0,r1,i0)
static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-# define htonr_us(r0,r1) extr_us(r0,r1)
-# if __WORDSIZE == 32
-# define htonr_ui(r0,r1) movr(r0,r1)
-# else
-# define htonr_ui(r0,r1) extr_ui(r0,r1)
-# define htonr_ul(r0,r1) movr(r0,r1)
-# endif
# define extr_c(r0,r1) LGBR(r0,r1)
# define extr_uc(r0,r1) LLGCR(r0,r1)
# define extr_s(r0,r1) LGHR(r0,r1)
return (w);
}
+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = beqi_p(_jit->pc.w, r2, 0);
+#if __WORDSIZE == 32
+ LR(r0, r1);
+#else
+ LGR(r0, r1);
+#endif
+ patch_at(w, _jit->pc.w);
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = bnei_p(_jit->pc.w, r2, 0);
+#if __WORDSIZE == 32
+ LR(r0, r1);
+#else
+ LGR(r0, r1);
+#endif
+ patch_at(w, _jit->pc.w);
+}
+
static void
_addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
#if __WORDSIZE == 32
-#define JIT_INSTR_MAX 104
+#define JIT_INSTR_MAX 128
0, /* data */
0, /* live */
6, /* align */
24, /* nei */
4, /* movr */
16, /* movi */
+ 14, /* movnr */
+ 14, /* movzr */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
+ 52, /* bswapr_us */
+ 128, /* bswapr_ui */
+ 0, /* bswapr_ul */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
-#define JIT_INSTR_MAX 104
+#define JIT_INSTR_MAX 344
0, /* data */
0, /* live */
6, /* align */
24, /* nei */
4, /* movr */
16, /* movi */
+ 14, /* movnr */
+ 14, /* movzr */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
+ 68, /* bswapr_us */
+ 160, /* bswapr_ui */
+ 344, /* bswapr_ul */
#endif /* __WORDSIZE */
case_rr(hton, _ui);
#if __WORDSIZE == 64
case_rr(hton, _ul);
+#endif
+ case_rr(bswap, _us);
+ case_rr(bswap, _ui);
+#if __WORDSIZE == 64
+ case_rr(bswap, _ul);
#endif
case_rr(ext, _c);
case_rr(ext, _uc);
case_rr(ext, _i);
case_rr(ext, _ui);
#endif
+ case_rrr(movn,);
+ case_rrr(movz,);
case_rr(mov,);
case jit_code_movi:
if (node->flag & jit_flag_node) {
static void _movi(jit_state_t*, jit_int32_t, jit_word_t);
# define movi_p(r0, i0) _movi_p(_jit, r0, i0)
static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
+# define bswapr_us(r0, r1) generic_bswapr_us(_jit, r0, r1)
+# define bswapr_ui(r0, r1) generic_bswapr_ui(_jit, r0, r1)
+# define bswapr_ul(r0, r1) generic_bswapr_ul(_jit, r0, r1)
+# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2)
+static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
+static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define comr(r0, r1) XNOR(r1, 0, r0)
# define negr(r0, r1) NEG(r1, r0)
# define addr(r0, r1, r2) ADD(r1, r2, r0)
# define rshr_u(r0, r1, r2) SRLX(r1, r2, r0)
# define rshi_u(r0, r1, i0) SRLXI(r1, i0, r0)
# endif
-# define htonr_us(r0,r1) extr_us(r0,r1)
# define extr_c(r0,r1) _extr_c(_jit,r0,r1)
static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
# define extr_uc(r0,r1) andi(r0, r1, 0xff)
static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t);
# define extr_us(r0,r1) _extr_us(_jit,r0,r1)
static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-# if __WORDSIZE == 32
-# define htonr_ui(r0,r1) movr(r0,r1)
-# else
-# define htonr_ui(r0,r1) extr_ui(r0,r1)
-# define htonr_ul(r0,r1) movr(r0,r1)
+# if __WORDSIZE == 64
# define extr_i(r0,r1) _extr_i(_jit,r0,r1)
static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t);
# define extr_ui(r0,r1) _extr_ui(_jit,r0,r1)
return (w);
}
+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = beqi(_jit->pc.w, r2, 0);
+ ORI(r1, 0, r0);
+ patch_at(w, _jit->pc.w);
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = bnei(_jit->pc.w, r2, 0);
+ ORI(r1, 0, r0);
+ patch_at(w, _jit->pc.w);
+}
+
static void
_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
#if __WORDSIZE == 32
-#define JIT_INSTR_MAX 44
+#define JIT_INSTR_MAX 52
0, /* data */
0, /* live */
0, /* align */
16, /* nei */
4, /* movr */
8, /* movi */
+ 20, /* movnr */
+ 20, /* movzr */
8, /* extr_c */
4, /* extr_uc */
8, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
+ 20, /* bswapr_us */
+ 52, /* bswapr_ui */
+ 0, /* bswapr_ul */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
-#define JIT_INSTR_MAX 64
+#define JIT_INSTR_MAX 116
0, /* data */
0, /* live */
4, /* align */
16, /* nei */
4, /* movr */
24, /* movi */
+ 20, /* movnr */
+ 20, /* movzr */
8, /* extr_c */
4, /* extr_uc */
8, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
+ 20, /* bswapr_us */
+ 52, /* bswapr_ui */
+ 116, /* bswapr_ul */
#endif /* __WORDSIZE */
case_rr(hton, _ui);
#if __WORDSIZE == 64
case_rr(hton, _ul);
+#endif
+ case_rr(bswap, _us);
+ case_rr(bswap, _ui);
+#if __WORDSIZE == 64
+ case_rr(bswap, _ul);
#endif
case_rr(ext, _c);
case_rr(ext, _uc);
case_rr(ext, _i);
case_rr(ext, _ui);
#endif
+ case_rrr(movn,);
+ case_rrr(movz,);
case_rr(mov,);
case jit_code_movi:
if (node->flag & jit_flag_node) {
# define movir_u(r0, r1) _movir_u(_jit, r0, r1)
static void _movir_u(jit_state_t*,jit_int32_t,jit_int32_t);
# endif
-# define htonr_us(r0, r1) _htonr_us(_jit, r0, r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ui(r0, r1) _htonr_ui(_jit, r0, r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
# if __X64 && !__X64_32
-#define htonr_ul(r0, r1) _htonr_ul(_jit, r0, r1)
-static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+#define bswapr_ul(r0, r1) _bswapr_ul(_jit, r0, r1)
+static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
#endif
# define extr_c(r0, r1) _extr_c(_jit, r0, r1)
static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
#endif
static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
extr_us(r0, r1);
ic(0x66);
}
static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
movr(r0, r1);
rex(0, 0, _NOREG, _NOREG, r0);
#if __X64 && !__X64_32
static void
-_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
movr(r0, r1);
rex(0, 1, _NOREG, _NOREG, r0);
16, /* nei */
2, /* movr */
5, /* movi */
+ 7, /* movnr */
+ 7, /* movzr */
11, /* extr_c */
11, /* extr_uc */
3, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 7, /* movnr */
- 7, /* movzr */
+ 7, /* bswapr_us */
+ 4, /* bswapr_ui */
+ 0, /* bswapr_ul */
#endif
#if __X64
14, /* nei */
3, /* movr */
10, /* movi */
+ 7, /* movnr */
+ 7, /* movzr */
7, /* extr_c */
7, /* extr_uc */
4, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 7, /* movnr */
- 7, /* movzr */
+ 9, /* bswapr_us */
+ 6, /* bswapr_ui */
+ 6, /* bswapr_ul */
#else
# if __X64_32
14, /* nei */
3, /* movr */
6, /* movi */
+ 7, /* movnr */
+ 7, /* movzr */
7, /* extr_c */
7, /* extr_uc */
4, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 7, /* movnr */
- 7, /* movzr */
+ 9, /* bswapr_us */
+ 6, /* bswapr_ui */
+ 0, /* bswapr_ul */
# else
#define JIT_INSTR_MAX 115
14, /* nei */
3, /* movr */
10, /* movi */
+ 7, /* movnr */
+ 7, /* movzr */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 7, /* movnr */
- 7, /* movzr */
+ 9, /* bswapr_us */
+ 6, /* bswapr_ui */
+ 6, /* bswapr_ul */
#endif /* __CYGWIN__ || _WIN32 */
# endif /* __X64_32 */
#endif /* __X64 */
case_rr(hton, _ui);
#if __X64 && !__X64_32
case_rr(hton, _ul);
+#endif
+ case_rr(bswap, _us);
+ case_rr(bswap, _ui);
+#if __X64 && !__X64_32
+ case_rr(bswap, _ul);
#endif
case_rr(ext, _c);
case_rr(ext, _uc);
#include <lightning.h>
#include <lightning/jit_private.h>
-#include <sys/mman.h>
+#if HAVE_MMAP
+# include <sys/mman.h>
+#endif
#if defined(__sgi)
# include <fcntl.h>
#endif
#if DEVEL_DISASSEMBLER
jit_really_clear_state();
#endif
+#if HAVE_MMAP
if (!_jit->user_code)
munmap(_jit->code.ptr, _jit->code.length);
if (!_jit->user_data)
munmap(_jit->data.ptr, _jit->data.length);
+#endif
jit_free((jit_pointer_t *)&_jit);
}
case jit_code_truncr_f_i: case jit_code_truncr_f_l:
case jit_code_truncr_d_i: case jit_code_truncr_d_l:
case jit_code_htonr_us: case jit_code_htonr_ui: case jit_code_htonr_ul:
+ case jit_code_bswapr_us: case jit_code_bswapr_ui: case jit_code_bswapr_ul:
case jit_code_ldr_c: case jit_code_ldr_uc:
case jit_code_ldr_s: case jit_code_ldr_us: case jit_code_ldr_i:
case jit_code_ldr_ui: case jit_code_ldr_l: case jit_code_negr_f:
case jit_code_unordi_d:
mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_dbl;
break;
- case jit_code_movnr: case jit_code_movzr:
case jit_code_addr: case jit_code_addxr: case jit_code_addcr:
case jit_code_subr: case jit_code_subxr: case jit_code_subcr:
case jit_code_mulr: case jit_code_divr: case jit_code_divr_u:
case jit_code_bxsubr: case jit_code_bxsubr_u:
mask = jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a1_chg|jit_cc_a2_reg;
break;
+ case jit_code_movnr: case jit_code_movzr:
+ mask = jit_cc_a0_reg|jit_cc_a0_cnd|jit_cc_a1_reg|jit_cc_a2_reg;
+ break;
default:
abort();
}
#endif
assert(!_jitc->dataset);
+#if !HAVE_MMAP
+ assert(_jit->user_data);
+#else
if (!_jit->user_data) {
/* create read only data buffer */
close(mmap_fd);
#endif
}
+#endif /* !HAVE_MMAP */
if (!_jitc->no_data)
jit_memcpy(_jit->data.ptr, _jitc->data.ptr, _jitc->data.offset);
_jitc->emit = 1;
+#if !HAVE_MMAP
+ assert(_jit->user_code);
+#else
if (!_jit->user_code) {
#if defined(__sgi)
mmap_fd = open("/dev/zero", O_RDWR);
MAP_PRIVATE | MAP_ANON, mmap_fd, 0);
assert(_jit->code.ptr != MAP_FAILED);
}
+#endif /* !HAVE_MMAP */
_jitc->code.end = _jit->code.ptr + _jit->code.length -
jit_get_max_instr();
_jit->pc.uc = _jit->code.ptr;
node->code == jit_code_epilog))
node->flag &= ~jit_flag_patch;
}
+#if !HAVE_MMAP
+ assert(_jit->user_code);
+#else
if (_jit->user_code)
goto fail;
#if GET_JIT_SIZE
_jitc->code.end = _jit->code.ptr + _jit->code.length -
jit_get_max_instr();
_jit->pc.uc = _jit->code.ptr;
+#endif /* !HAVE_MMAP */
}
else
break;
if (_jit->user_data)
jit_free((jit_pointer_t *)&_jitc->data.ptr);
+#if HAVE_MMAP
else {
result = mprotect(_jit->data.ptr, _jit->data.length, PROT_READ);
assert(result == 0);
PROT_READ | PROT_EXEC);
assert(result == 0);
}
+#endif /* HAVE_MMAP */
return (_jit->code.ptr);
fail:
default:
value = jit_classify(node->code);
/* lack of extra information */
- if (value & jit_cc_a0_jmp)
+ if (value & (jit_cc_a0_jmp|jit_cc_a0_cnd))
return (jit_reg_change);
else if ((value & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
(jit_cc_a0_reg|jit_cc_a0_chg) &&
}
}
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define htonr_us(r0,r1) bswapr_us(r0,r1)
+# define htonr_ui(r0,r1) bswapr_ui(r0,r1)
+# if __WORDSIZE == 64
+# define htonr_ul(r0,r1) bswapr_ul(r0,r1)
+# endif
+#else
+# define htonr_us(r0,r1) extr_us(r0,r1)
+# if __WORDSIZE == 32
+# define htonr_ui(r0,r1) movr(r0,r1)
+# else
+# define htonr_ui(r0,r1) extr_ui(r0,r1)
+# define htonr_ul(r0,r1) movr(r0,r1)
+# endif
+#endif
+
+static maybe_unused void
+generic_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1);
+static maybe_unused void
+generic_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1);
+#if __WORDSIZE == 64
+static maybe_unused void
+generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1);
+#endif
+
#if defined(__i386__) || defined(__x86_64__)
# include "jit_x86.c"
#elif defined(__mips__)
#elif defined(__riscv)
# include "jit_riscv.c"
#endif
+
+static maybe_unused void
+generic_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t reg = jit_get_reg(jit_class_gpr);
+
+ rshi(rn(reg), r1, 8);
+ andi(r0, r1, 0xff);
+ andi(rn(reg), rn(reg), 0xff);
+ lshi(r0, r0, 8);
+ orr(r0, r0, rn(reg));
+
+ jit_unget_reg(reg);
+}
+
+static maybe_unused void
+generic_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t reg = jit_get_reg(jit_class_gpr);
+
+ rshi(rn(reg), r1, 16);
+ bswapr_us(r0, r1);
+ bswapr_us(rn(reg), rn(reg));
+ lshi(r0, r0, 16);
+ orr(r0, r0, rn(reg));
+
+ jit_unget_reg(reg);
+}
+
+#if __WORDSIZE == 64
+static maybe_unused void
+generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t reg = jit_get_reg(jit_class_gpr);
+
+ rshi_u(rn(reg), r1, 32);
+ bswapr_ui(r0, r1);
+ bswapr_ui(rn(reg), rn(reg));
+ lshi(r0, r0, 32);
+ orr(r0, r0, rn(reg));
+
+ jit_unget_reg(reg);
+}
+#endif
[subrepo]
remote = https://github.com/pcercuei/lightrec.git
branch = master
- commit = de06670b257004d98d30e8585a4e6530e77d3acd
- parent = e24732050e902bd5402b2b7da7c391d2ca8fa799
+ commit = 49ef275a66aad8540ab73b09b0dd2128ebe4d6dc
+ parent = a0467ff492a25521867fcfb7d66b9c617017151a
method = merge
cmdver = 0.4.3
target_link_libraries(${PROJECT_NAME} PRIVATE ${PTHREAD_LIBRARIES})
endif (ENABLE_THREADED_COMPILER)
+option(ENABLE_CODE_BUFFER "Enable external code buffer" OFF)
+if (ENABLE_CODE_BUFFER)
+ target_sources(${PROJECT_NAME} PRIVATE tlsf/tlsf.c)
+ target_include_directories(${PROJECT_NAME} PRIVATE tlsf)
+endif (ENABLE_CODE_BUFFER)
+
+if (ENABLE_CODE_BUFFER AND ENABLE_THREADED_COMPILER)
+ message(SEND_ERROR "External code buffer cannot be used along with the threaded compiler")
+endif (ENABLE_CODE_BUFFER AND ENABLE_THREADED_COMPILER)
+
find_library(LIBLIGHTNING lightning REQUIRED)
find_path(LIBLIGHTNING_INCLUDE_DIR lightning.h REQUIRED)
u32 offset = lut_offset(block->pc);
if (block->function) {
- memset(&state->code_lut[offset], 0,
- block->nb_ops * sizeof(*state->code_lut));
+ memset(lut_address(state, offset), 0,
+ block->nb_ops * lut_elm_size(state));
}
}
bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block)
{
- void **lut_entry = &state->code_lut[lut_offset(block->pc)];
+ u32 offset = lut_offset(block->pc);
bool outdated;
+ void *addr;
- if (*lut_entry)
+ if (lut_read(state, offset))
return false;
outdated = block->hash != lightrec_calculate_block_hash(block);
/* The block was marked as outdated, but the content is still
* the same */
if (block->function)
- *lut_entry = block->function;
+ addr = block->function;
else
- *lut_entry = state->get_next_block;
+ addr = state->get_next_block;
+
+ lut_write(state, offset, addr);
}
return outdated;
}
}
-void lightrec_print_disassembly(const struct block *block, const u32 *code)
+void lightrec_print_disassembly(const struct block *block, const u32 *code_ptr)
{
const struct opcode *op;
const char **flags_ptr;
size_t nb_flags, count, count2;
char buf[256], buf2[256], buf3[256];
unsigned int i;
- u32 pc, branch_pc;
+ u32 pc, branch_pc, code;
bool is_io;
for (i = 0; i < block->nb_ops; i++) {
op = &block->opcode_list[i];
branch_pc = get_branch_pc(block, i, 0);
pc = block->pc + (i << 2);
+ code = LE32TOH(code_ptr[i]);
- count = print_op((union code)code[i], pc, buf, sizeof(buf),
+ count = print_op((union code)code, pc, buf, sizeof(buf),
&flags_ptr, &nb_flags, &is_io);
flags_ptr = NULL;
count2 = print_op(op->c, branch_pc, buf2, sizeof(buf2),
&flags_ptr, &nb_flags, &is_io);
- if (code[i] == op->c.opcode) {
+ if (code == op->c.opcode) {
*buf2 = '\0';
count2 = 0;
}
lightrec_free_reg(reg_cache, rd);
}
+static void rec_movi(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
+{
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
+ jit_state_t *_jit = block->_jit;
+ u16 flags = REG_EXT;
+ u8 rt;
+
+ if (!(c.i.imm & 0x8000))
+ flags |= REG_ZEXT;
+
+ rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
+
+ jit_movi(rt, (s32)(s16) c.i.imm);
+
+ lightrec_free_reg(reg_cache, rt);
+}
+
static void rec_ADDIU(struct lightrec_cstate *state,
const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_imm(state, block, offset, jit_code_addi, false);
+
+ if (block->opcode_list[offset].c.i.rs)
+ rec_alu_imm(state, block, offset, jit_code_addi, false);
+ else
+ rec_movi(state, block, offset);
}
static void rec_ADDI(struct lightrec_cstate *state,
{
/* TODO: Handle the exception? */
_jit_name(block->_jit, __func__);
- rec_alu_imm(state, block, offset, jit_code_addi, false);
+ rec_ADDIU(state, block, offset);
}
static void rec_SLTIU(struct lightrec_cstate *state,
}
}
+static u32 rec_ram_mask(struct lightrec_state *state)
+{
+ return (RAM_SIZE << (state->mirrors_mapped * 2)) - 1;
+}
+
static void rec_store_memory(struct lightrec_cstate *cstate,
const struct block *block,
u16 offset, jit_code_t code,
+ jit_code_t swap_code,
uintptr_t addr_offset, u32 addr_mask,
bool invalidate)
{
+ const struct lightrec_state *state = cstate->state;
struct regcache *reg_cache = cstate->reg_cache;
struct opcode *op = &block->opcode_list[offset];
jit_state_t *_jit = block->_jit;
union code c = op->c;
u8 rs, rt, tmp, tmp2, tmp3, addr_reg, addr_reg2;
s16 imm = (s16)c.i.imm;
- s32 simm = (s32)imm << (__WORDSIZE / 32 - 1);
+ s32 simm = (s32)imm << (1 - lut_is_32bit(state));
s32 lut_offt = offsetof(struct lightrec_state, code_lut);
bool no_mask = op->flags & LIGHTREC_NO_MASK;
- bool add_imm = c.i.imm && invalidate && simm + lut_offt != (s16)(simm + lut_offt);
+ bool add_imm = c.i.imm &&
+ ((!state->mirrors_mapped && !no_mask) || (invalidate &&
+ ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt))));
bool need_tmp = !no_mask || addr_offset || add_imm;
bool need_tmp2 = addr_offset || invalidate;
addr_reg2 = addr_reg;
}
- jit_new_node_www(code, imm, addr_reg2, rt);
+ if (is_big_endian() && swap_code && c.i.rt) {
+ tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+ jit_new_node_ww(swap_code, tmp3, rt);
+ jit_new_node_www(code, imm, addr_reg2, tmp3);
+
+ lightrec_free_reg(reg_cache, tmp3);
+ } else {
+ jit_new_node_www(code, imm, addr_reg2, rt);
+ }
+
lightrec_free_reg(reg_cache, rt);
if (invalidate) {
addr_reg = tmp2;
}
- if (__WORDSIZE == 64) {
+ if (!lut_is_32bit(state)) {
jit_lshi(tmp2, addr_reg, 1);
addr_reg = tmp2;
}
- if (__WORDSIZE == 64 || addr_reg != rs || c.i.rs != 0) {
+ if (addr_reg == rs && c.i.rs == 0) {
+ addr_reg = LIGHTREC_REG_STATE;
+ } else {
jit_addr(tmp2, addr_reg, LIGHTREC_REG_STATE);
addr_reg = tmp2;
}
- jit_stxi(lut_offt, addr_reg, tmp3);
+ if (lut_is_32bit(state))
+ jit_stxi_i(lut_offt, addr_reg, tmp3);
+ else
+ jit_stxi(lut_offt, addr_reg, tmp3);
lightrec_free_reg(reg_cache, tmp3);
}
static void rec_store_ram(struct lightrec_cstate *cstate,
const struct block *block,
u16 offset, jit_code_t code,
- bool invalidate)
+ jit_code_t swap_code, bool invalidate)
{
+ struct lightrec_state *state = cstate->state;
+
_jit_note(block->_jit, __FILE__, __LINE__);
- return rec_store_memory(cstate, block, offset, code,
- cstate->state->offset_ram,
- RAM_SIZE - 1, invalidate);
+ return rec_store_memory(cstate, block, offset, code, swap_code,
+ state->offset_ram, rec_ram_mask(state),
+ invalidate);
}
static void rec_store_scratch(struct lightrec_cstate *cstate,
- const struct block *block,
- u16 offset, jit_code_t code)
+ const struct block *block, u16 offset,
+ jit_code_t code, jit_code_t swap_code)
{
_jit_note(block->_jit, __FILE__, __LINE__);
- return rec_store_memory(cstate, block, offset, code,
+ return rec_store_memory(cstate, block, offset, code, swap_code,
cstate->state->offset_scratch,
0x1fffffff, false);
}
static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
const struct block *block,
- u16 offset, jit_code_t code)
+ u16 offset, jit_code_t code,
+ jit_code_t swap_code)
{
struct lightrec_state *state = cstate->state;
struct regcache *reg_cache = cstate->reg_cache;
}
rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
- jit_new_node_www(code, imm, tmp, rt);
+
+ if (is_big_endian() && swap_code && c.i.rt) {
+ tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+ jit_new_node_ww(swap_code, tmp2, rt);
+ jit_new_node_www(code, imm, tmp, tmp2);
+
+ lightrec_free_reg(reg_cache, tmp2);
+ } else {
+ jit_new_node_www(code, imm, tmp, rt);
+ }
lightrec_free_reg(reg_cache, rt);
lightrec_free_reg(reg_cache, tmp);
}
static void rec_store_direct(struct lightrec_cstate *cstate, const struct block *block,
- u16 offset, jit_code_t code)
+ u16 offset, jit_code_t code, jit_code_t swap_code)
{
struct lightrec_state *state = cstate->state;
u32 ram_size = state->mirrors_mapped ? RAM_SIZE * 4 : RAM_SIZE;
/* Compute the offset to the code LUT */
jit_andi(tmp, tmp2, (RAM_SIZE - 1) & ~3);
- if (__WORDSIZE == 64)
+ if (!lut_is_32bit(state))
jit_lshi(tmp, tmp, 1);
jit_addr(tmp, LIGHTREC_REG_STATE, tmp);
/* Write NULL to the code LUT to invalidate any block that's there */
- jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3);
+ if (lut_is_32bit(state))
+ jit_stxi_i(offsetof(struct lightrec_state, code_lut), tmp, tmp3);
+ else
+ jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3);
if (state->offset_ram != state->offset_scratch) {
jit_movi(tmp, state->offset_ram);
lightrec_free_reg(reg_cache, tmp3);
rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
- jit_new_node_www(code, 0, tmp2, rt);
+
+ if (is_big_endian() && swap_code && c.i.rt) {
+ tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+ jit_new_node_ww(swap_code, tmp, rt);
+ jit_new_node_www(code, 0, tmp2, tmp);
+
+ lightrec_free_reg(reg_cache, tmp);
+ } else {
+ jit_new_node_www(code, 0, tmp2, rt);
+ }
lightrec_free_reg(reg_cache, rt);
lightrec_free_reg(reg_cache, tmp2);
}
static void rec_store(struct lightrec_cstate *state,
- const struct block *block, u16 offset, jit_code_t code)
+ const struct block *block, u16 offset,
+ jit_code_t code, jit_code_t swap_code)
{
u16 flags = block->opcode_list[offset].flags;
bool no_invalidate = (flags & LIGHTREC_NO_INVALIDATE) ||
switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) {
case LIGHTREC_IO_RAM:
- rec_store_ram(state, block, offset, code, !no_invalidate);
+ rec_store_ram(state, block, offset, code,
+ swap_code, !no_invalidate);
break;
case LIGHTREC_IO_SCRATCH:
- rec_store_scratch(state, block, offset, code);
+ rec_store_scratch(state, block, offset, code, swap_code);
break;
case LIGHTREC_IO_DIRECT:
- if (no_invalidate)
- rec_store_direct_no_invalidate(state, block, offset, code);
- else
- rec_store_direct(state, block, offset, code);
+ if (no_invalidate) {
+ rec_store_direct_no_invalidate(state, block, offset,
+ code, swap_code);
+ } else {
+ rec_store_direct(state, block, offset, code, swap_code);
+ }
break;
default:
rec_io(state, block, offset, true, false);
const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_store(state, block, offset, jit_code_stxi_c);
+ rec_store(state, block, offset, jit_code_stxi_c, 0);
}
static void rec_SH(struct lightrec_cstate *state,
const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_store(state, block, offset, jit_code_stxi_s);
+ rec_store(state, block, offset,
+ jit_code_stxi_s, jit_code_bswapr_us);
}
static void rec_SW(struct lightrec_cstate *state,
{
_jit_name(block->_jit, __func__);
- rec_store(state, block, offset, jit_code_stxi_i);
+ rec_store(state, block, offset,
+ jit_code_stxi_i, jit_code_bswapr_ui);
}
static void rec_SWL(struct lightrec_cstate *state,
}
static void rec_load_memory(struct lightrec_cstate *cstate,
- const struct block *block,
- u16 offset, jit_code_t code, bool is_unsigned,
+ const struct block *block, u16 offset,
+ jit_code_t code, jit_code_t swap_code, bool is_unsigned,
uintptr_t addr_offset, u32 addr_mask)
{
struct regcache *reg_cache = cstate->reg_cache;
struct opcode *op = &block->opcode_list[offset];
jit_state_t *_jit = block->_jit;
u8 rs, rt, addr_reg, flags = REG_EXT;
+ bool no_mask = op->flags & LIGHTREC_NO_MASK;
union code c = op->c;
+ s16 imm;
if (!c.i.rt)
return;
rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
- if (!(op->flags & LIGHTREC_NO_MASK)) {
- jit_andi(rt, rs, addr_mask);
+ if (!cstate->state->mirrors_mapped && c.i.imm && !no_mask) {
+ jit_addi(rt, rs, (s16)c.i.imm);
addr_reg = rt;
+ imm = 0;
} else {
addr_reg = rs;
+ imm = (s16)c.i.imm;
+ }
+
+ if (!no_mask) {
+ jit_andi(rt, addr_reg, addr_mask);
+ addr_reg = rt;
}
if (addr_offset) {
addr_reg = rt;
}
- jit_new_node_www(code, rt, addr_reg, (s16)c.i.imm);
+ jit_new_node_www(code, rt, addr_reg, imm);
+
+ if (is_big_endian() && swap_code) {
+ jit_new_node_ww(swap_code, rt, rt);
+
+ if (c.i.op == OP_LH)
+ jit_extr_s(rt, rt);
+ else if (c.i.op == OP_LW && __WORDSIZE == 64)
+ jit_extr_i(rt, rt);
+ }
lightrec_free_reg(reg_cache, rs);
lightrec_free_reg(reg_cache, rt);
}
static void rec_load_ram(struct lightrec_cstate *cstate,
- const struct block *block,
- u16 offset, jit_code_t code, bool is_unsigned)
+ const struct block *block, u16 offset,
+ jit_code_t code, jit_code_t swap_code, bool is_unsigned)
{
_jit_note(block->_jit, __FILE__, __LINE__);
- rec_load_memory(cstate, block, offset, code, is_unsigned,
- cstate->state->offset_ram, RAM_SIZE - 1);
+ rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
+ cstate->state->offset_ram, rec_ram_mask(cstate->state));
}
static void rec_load_bios(struct lightrec_cstate *cstate,
- const struct block *block,
- u16 offset, jit_code_t code, bool is_unsigned)
+ const struct block *block, u16 offset,
+ jit_code_t code, jit_code_t swap_code, bool is_unsigned)
{
_jit_note(block->_jit, __FILE__, __LINE__);
- rec_load_memory(cstate, block, offset, code, is_unsigned,
+ rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
cstate->state->offset_bios, 0x1fffffff);
}
static void rec_load_scratch(struct lightrec_cstate *cstate,
- const struct block *block,
- u16 offset, jit_code_t code, bool is_unsigned)
+ const struct block *block, u16 offset,
+ jit_code_t code, jit_code_t swap_code, bool is_unsigned)
{
_jit_note(block->_jit, __FILE__, __LINE__);
- rec_load_memory(cstate, block, offset, code, is_unsigned,
+ rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
cstate->state->offset_scratch, 0x1fffffff);
}
-static void rec_load_direct(struct lightrec_cstate *cstate, const struct block *block,
- u16 offset, jit_code_t code, bool is_unsigned)
+static void rec_load_direct(struct lightrec_cstate *cstate,
+ const struct block *block, u16 offset,
+ jit_code_t code, jit_code_t swap_code,
+ bool is_unsigned)
{
struct lightrec_state *state = cstate->state;
struct regcache *reg_cache = cstate->reg_cache;
jit_new_node_www(code, rt, rt, imm);
+ if (is_big_endian() && swap_code) {
+ jit_new_node_ww(swap_code, rt, rt);
+
+ if (c.i.op == OP_LH)
+ jit_extr_s(rt, rt);
+ else if (c.i.op == OP_LW && __WORDSIZE == 64)
+ jit_extr_i(rt, rt);
+ }
+
lightrec_free_reg(reg_cache, addr_reg);
lightrec_free_reg(reg_cache, rt);
lightrec_free_reg(reg_cache, tmp);
}
static void rec_load(struct lightrec_cstate *state, const struct block *block,
- u16 offset, jit_code_t code, bool is_unsigned)
+ u16 offset, jit_code_t code, jit_code_t swap_code,
+ bool is_unsigned)
{
u16 flags = block->opcode_list[offset].flags;
switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) {
case LIGHTREC_IO_RAM:
- rec_load_ram(state, block, offset, code, is_unsigned);
+ rec_load_ram(state, block, offset, code, swap_code, is_unsigned);
break;
case LIGHTREC_IO_BIOS:
- rec_load_bios(state, block, offset, code, is_unsigned);
+ rec_load_bios(state, block, offset, code, swap_code, is_unsigned);
break;
case LIGHTREC_IO_SCRATCH:
- rec_load_scratch(state, block, offset, code, is_unsigned);
+ rec_load_scratch(state, block, offset, code, swap_code, is_unsigned);
break;
case LIGHTREC_IO_DIRECT:
- rec_load_direct(state, block, offset, code, is_unsigned);
+ rec_load_direct(state, block, offset, code, swap_code, is_unsigned);
break;
default:
rec_io(state, block, offset, false, true);
static void rec_LB(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_load(state, block, offset, jit_code_ldxi_c, false);
+ rec_load(state, block, offset, jit_code_ldxi_c, 0, false);
}
static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_load(state, block, offset, jit_code_ldxi_uc, true);
+ rec_load(state, block, offset, jit_code_ldxi_uc, 0, true);
}
static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_load(state, block, offset, jit_code_ldxi_s, false);
+ rec_load(state, block, offset, jit_code_ldxi_s, jit_code_bswapr_us, false);
}
static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_load(state, block, offset, jit_code_ldxi_us, true);
+ rec_load(state, block, offset, jit_code_ldxi_us, jit_code_bswapr_us, true);
}
static void rec_LWL(struct lightrec_cstate *state, const struct block *block, u16 offset)
static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_load(state, block, offset, jit_code_ldxi_i, false);
+ rec_load(state, block, offset, jit_code_ldxi_i, jit_code_bswapr_ui, false);
}
static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u16 offset)
rec_mtc0(state, block, offset);
}
+static unsigned int cp2d_i_offset(u8 reg)
+{
+ return offsetof(struct lightrec_state, regs.cp2d[reg]);
+}
+
+static unsigned int cp2d_s_offset(u8 reg)
+{
+ return cp2d_i_offset(reg) + is_big_endian() * 2;
+}
+
+static unsigned int cp2c_i_offset(u8 reg)
+{
+ return offsetof(struct lightrec_state, regs.cp2c[reg]);
+}
+
+static unsigned int cp2c_s_offset(u8 reg)
+{
+ return cp2c_i_offset(reg) + is_big_endian() * 2;
+}
+
static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
const struct block *block, u16 offset)
{
case 9:
case 10:
case 11:
- jit_ldxi_s(rt, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, regs.cp2d[reg]));
+ jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg));
break;
case 7:
case 16:
case 17:
case 18:
case 19:
- jit_ldxi_us(rt, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, regs.cp2d[reg]));
+ jit_ldxi_us(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg));
break;
case 28:
case 29:
for (i = 0; i < 3; i++) {
out = i == 0 ? rt : tmp;
- jit_ldxi_s(tmp, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, regs.cp2d[9 + i]));
+ jit_ldxi_s(tmp, LIGHTREC_REG_STATE, cp2d_s_offset(9 + i));
jit_movi(tmp2, 0x1f);
jit_rshi(out, tmp, 7);
lightrec_free_reg(reg_cache, tmp3);
break;
default:
- jit_ldxi_i(rt, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, regs.cp2d[reg]));
+ jit_ldxi_i(rt, LIGHTREC_REG_STATE, cp2d_i_offset(reg));
break;
}
case 29:
case 30:
rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT);
- jit_ldxi_s(rt, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, regs.cp2c[c.r.rd]));
+ jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2c_s_offset(c.r.rd));
break;
default:
rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT);
- jit_ldxi_i(rt, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, regs.cp2c[c.r.rd]));
+ jit_ldxi_i(rt, LIGHTREC_REG_STATE, cp2c_i_offset(c.r.rd));
break;
}
switch (c.r.rd) {
case 15:
tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
- jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, regs.cp2d[13]));
+ jit_ldxi_i(tmp, LIGHTREC_REG_STATE, cp2d_i_offset(13));
tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
- jit_ldxi_i(tmp2, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, regs.cp2d[14]));
+ jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, cp2d_i_offset(14));
- jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[12]),
- LIGHTREC_REG_STATE, tmp);
- jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[13]),
- LIGHTREC_REG_STATE, tmp2);
- jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[14]),
- LIGHTREC_REG_STATE, rt);
+ jit_stxi_i(cp2d_i_offset(12), LIGHTREC_REG_STATE, tmp);
+ jit_stxi_i(cp2d_i_offset(13), LIGHTREC_REG_STATE, tmp2);
+ jit_stxi_i(cp2d_i_offset(14), LIGHTREC_REG_STATE, rt);
lightrec_free_reg(reg_cache, tmp);
lightrec_free_reg(reg_cache, tmp2);
jit_lshi(tmp, rt, 7);
jit_andi(tmp, tmp, 0xf80);
- jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[9]),
- LIGHTREC_REG_STATE, tmp);
+ jit_stxi_s(cp2d_s_offset(9), LIGHTREC_REG_STATE, tmp);
jit_lshi(tmp, rt, 2);
jit_andi(tmp, tmp, 0xf80);
- jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[10]),
- LIGHTREC_REG_STATE, tmp);
+ jit_stxi_s(cp2d_s_offset(10), LIGHTREC_REG_STATE, tmp);
jit_rshi(tmp, rt, 3);
jit_andi(tmp, tmp, 0xf80);
- jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[11]),
- LIGHTREC_REG_STATE, tmp);
+ jit_stxi_s(cp2d_s_offset(11), LIGHTREC_REG_STATE, tmp);
lightrec_free_reg(reg_cache, tmp);
break;
jit_patch_at(to_loop, loop);
- jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[31]),
- LIGHTREC_REG_STATE, tmp2);
- jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[30]),
- LIGHTREC_REG_STATE, rt);
+ jit_stxi_i(cp2d_i_offset(31), LIGHTREC_REG_STATE, tmp2);
+ jit_stxi_i(cp2d_i_offset(30), LIGHTREC_REG_STATE, rt);
lightrec_free_reg(reg_cache, tmp);
lightrec_free_reg(reg_cache, tmp2);
break;
default:
- jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[c.r.rd]),
- LIGHTREC_REG_STATE, rt);
+ jit_stxi_i(cp2d_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt);
break;
}
case 27:
case 29:
case 30:
- jit_stxi_s(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]),
- LIGHTREC_REG_STATE, rt);
+ jit_stxi_s(cp2c_s_offset(c.r.rd), LIGHTREC_REG_STATE, rt);
break;
case 31:
tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
jit_andi(tmp2, rt, 0x7ffff000);
jit_orr(tmp, tmp2, tmp);
- jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[31]),
- LIGHTREC_REG_STATE, tmp);
+ jit_stxi_i(cp2c_i_offset(31), LIGHTREC_REG_STATE, tmp);
lightrec_free_reg(reg_cache, tmp);
lightrec_free_reg(reg_cache, tmp2);
break;
default:
- jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]),
- LIGHTREC_REG_STATE, rt);
+ jit_stxi_i(cp2c_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt);
}
lightrec_free_reg(reg_cache, rt);
#cmakedefine01 ENABLE_FIRST_PASS
#cmakedefine01 ENABLE_DISASSEMBLER
#cmakedefine01 ENABLE_TINYMM
+#cmakedefine01 ENABLE_CODE_BUFFER
#cmakedefine01 HAS_DEFAULT_ELM
#ifndef __LIGHTREC_PRIVATE_H__
#define __LIGHTREC_PRIVATE_H__
+#include "lightning-wrapper.h"
#include "lightrec-config.h"
#include "disassembler.h"
#include "lightrec.h"
struct recompiler *rec;
struct lightrec_cstate *cstate;
struct reaper *reaper;
+ void *tlsf;
void (*eob_wrapper_func)(void);
void (*memset_func)(void);
void (*get_next_block)(void);
unsigned int nb_maps;
const struct lightrec_mem_map *maps;
uintptr_t offset_ram, offset_bios, offset_scratch;
+ _Bool with_32bit_lut;
_Bool mirrors_mapped;
_Bool invalidate_from_dma_only;
void *code_lut[];
void remove_from_code_lut(struct blockcache *cache, struct block *block);
+enum psx_map
+lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr);
+
const struct lightrec_mem_map *
lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr);
return (pc & (RAM_SIZE - 1)) >> 2; // RAM
}
+static inline _Bool is_big_endian(void)
+{
+ return __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__;
+}
+
+static inline _Bool lut_is_32bit(const struct lightrec_state *state)
+{
+ return __WORDSIZE == 32 ||
+ (ENABLE_CODE_BUFFER && state->with_32bit_lut);
+}
+
+static inline size_t lut_elm_size(const struct lightrec_state *state)
+{
+ return lut_is_32bit(state) ? 4 : sizeof(void *);
+}
+
+static inline void ** lut_address(struct lightrec_state *state, u32 offset)
+{
+ if (lut_is_32bit(state))
+ return (void **) ((uintptr_t) state->code_lut + offset * 4);
+ else
+ return &state->code_lut[offset];
+}
+
+static inline void * lut_read(struct lightrec_state *state, u32 offset)
+{
+ void **lut_entry = lut_address(state, lut_offset(offset));
+
+ if (lut_is_32bit(state))
+ return (void *)(uintptr_t) *(u32 *) lut_entry;
+ else
+ return *lut_entry;
+}
+
+static inline void lut_write(struct lightrec_state *state, u32 offset, void *ptr)
+{
+ void **lut_entry = lut_address(state, offset);
+
+ if (lut_is_32bit(state))
+ *(u32 *) lut_entry = (u32)(uintptr_t) ptr;
+ else
+ *lut_entry = ptr;
+}
+
static inline u32 get_ds_pc(const struct block *block, u16 offset, s16 imm)
{
u16 flags = block->opcode_list[offset].flags;
#include "recompiler.h"
#include "regcache.h"
#include "optimizer.h"
+#include "tlsf/tlsf.h"
#include <errno.h>
#include <inttypes.h>
const struct lightrec_mem_map *map, u32 addr, u32 len)
{
if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) {
- memset(&state->code_lut[lut_offset(addr)], 0,
- ((len + 3) / 4) * sizeof(void *));
+ memset(lut_address(state, lut_offset(addr)), 0,
+ ((len + 3) / 4) * lut_elm_size(state));
}
}
-const struct lightrec_mem_map *
-lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr)
+enum psx_map
+lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr)
{
const struct lightrec_mem_map *map;
unsigned int i;
- u32 addr;
for (i = 0; i < state->nb_maps; i++) {
- const struct lightrec_mem_map *mapi = &state->maps[i];
+ map = &state->maps[i];
- if (kaddr >= mapi->pc && kaddr < mapi->pc + mapi->length) {
- map = mapi;
- break;
- }
+ if (kaddr >= map->pc && kaddr < map->pc + map->length)
+ return (enum psx_map) i;
}
- if (i == state->nb_maps)
+ return PSX_MAP_UNKNOWN;
+}
+
+const struct lightrec_mem_map *
+lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr)
+{
+ const struct lightrec_mem_map *map;
+ enum psx_map idx;
+ u32 addr;
+
+ idx = lightrec_get_map_idx(state, kaddr);
+ if (idx == PSX_MAP_UNKNOWN)
return NULL;
+ map = &state->maps[idx];
addr = kaddr - map->pc;
while (map->mirror_of)
void *func;
for (;;) {
- func = state->code_lut[lut_offset(pc)];
+ func = lut_read(state, pc);
if (func && func != state->get_next_block)
break;
return state->target_cycle - state->current_cycle;
}
+static void * lightrec_emit_code(struct lightrec_state *state,
+ jit_state_t *_jit, unsigned int *size)
+{
+ bool has_code_buffer = ENABLE_CODE_BUFFER && state->tlsf;
+ jit_word_t code_size, new_code_size;
+ void *code;
+
+ jit_realize();
+
+ if (!ENABLE_DISASSEMBLER)
+ jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE);
+
+ if (has_code_buffer) {
+ jit_get_code(&code_size);
+ code = tlsf_malloc(state->tlsf, (size_t) code_size);
+ if (!code)
+ return NULL;
+
+ jit_set_code(code, code_size);
+ }
+
+ code = jit_emit();
+
+ jit_get_code(&new_code_size);
+ lightrec_register(MEM_FOR_CODE, new_code_size);
+
+ if (has_code_buffer) {
+ tlsf_realloc(state->tlsf, code, new_code_size);
+
+ pr_debug("Creating code block at address 0x%" PRIxPTR ", "
+ "code size: %" PRIuPTR " new: %" PRIuPTR "\n",
+ (uintptr_t) code, code_size, new_code_size);
+ }
+
+ *size = (unsigned int) new_code_size;
+
+ return code;
+}
+
static struct block * generate_wrapper(struct lightrec_state *state)
{
struct block *block;
jit_state_t *_jit;
unsigned int i;
int stack_ptr;
- jit_word_t code_size;
jit_node_t *to_tramp, *to_fn_epilog;
jit_node_t *addr[C_WRAPPERS_COUNT - 1];
jit_epilog();
block->_jit = _jit;
- block->function = jit_emit();
block->opcode_list = NULL;
block->flags = 0;
block->nb_ops = 0;
+ block->function = lightrec_emit_code(state, _jit,
+ &block->code_size);
+ if (!block->function)
+ goto err_free_block;
+
state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function;
for (i = 0; i < C_WRAPPERS_COUNT - 1; i++)
state->wrappers_eps[i] = jit_address(addr[i]);
- jit_get_code(&code_size);
- lightrec_register(MEM_FOR_CODE, code_size);
-
- block->code_size = code_size;
-
if (ENABLE_DISASSEMBLER) {
pr_debug("Wrapper block:\n");
jit_disassemble();
{
struct block *block;
jit_state_t *_jit;
- jit_node_t *to_end, *to_c, *loop, *addr, *addr2, *addr3;
+ jit_node_t *to_end, *loop, *addr, *addr2, *addr3;
unsigned int i;
- u32 offset, ram_len;
- jit_word_t code_size;
+ u32 offset;
block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
if (!block)
to_end = jit_blei(LIGHTREC_REG_CYCLE, 0);
/* Convert next PC to KUNSEG and avoid mirrors */
- ram_len = state->maps[PSX_MAP_KERNEL_USER_RAM].length;
- jit_andi(JIT_R0, JIT_V0, 0x10000000 | (ram_len - 1));
- to_c = jit_bgei(JIT_R0, ram_len);
-
- /* Fast path: code is running from RAM, use the code LUT */
- if (__WORDSIZE == 64)
+ jit_andi(JIT_R0, JIT_V0, 0x10000000 | (RAM_SIZE - 1));
+ jit_rshi_u(JIT_R1, JIT_R0, 28);
+ jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1);
+ jit_addi(JIT_R2, JIT_R2, RAM_SIZE);
+ jit_movnr(JIT_R0, JIT_R2, JIT_R1);
+
+ /* If possible, use the code LUT */
+ if (!lut_is_32bit(state))
jit_lshi(JIT_R0, JIT_R0, 1);
jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE);
- jit_ldxi(JIT_R0, JIT_R0, offsetof(struct lightrec_state, code_lut));
+
+ offset = offsetof(struct lightrec_state, code_lut);
+ if (lut_is_32bit(state))
+ jit_ldxi_ui(JIT_R0, JIT_R0, offset);
+ else
+ jit_ldxi(JIT_R0, JIT_R0, offset);
/* If we get non-NULL, loop */
jit_patch_at(jit_bnei(JIT_R0, 0), loop);
/* Slow path: call C function get_next_block_func() */
- jit_patch(to_c);
if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
/* We may call the interpreter - update state->current_cycle */
jit_epilog();
block->_jit = _jit;
- block->function = jit_emit();
block->opcode_list = NULL;
block->flags = 0;
block->nb_ops = 0;
- jit_get_code(&code_size);
- lightrec_register(MEM_FOR_CODE, code_size);
-
- block->code_size = code_size;
+ block->function = lightrec_emit_code(state, _jit,
+ &block->code_size);
+ if (!block->function)
+ goto err_free_block;
state->eob_wrapper_func = jit_address(addr2);
if (OPT_REPLACE_MEMSET)
lightrec_get_map(state, &host, kunseg(pc));
const u32 *code = (u32 *)host;
- return (union code) *code;
+ return (union code) LE32TOH(*code);
}
unsigned int lightrec_cycles_of_opcode(union code code)
block->flags |= BLOCK_FULLY_TAGGED;
if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET))
- state->code_lut[lut_offset(pc)] = state->memset_func;
+ lut_write(state, lut_offset(pc), state->memset_func);
block->hash = lightrec_calculate_block_hash(block);
_jit_destroy_state(data);
}
+static void lightrec_free_function(struct lightrec_state *state, void *fn)
+{
+ if (ENABLE_CODE_BUFFER && state->tlsf) {
+ pr_debug("Freeing code block at 0x%" PRIxPTR "\n", (uintptr_t) fn);
+ tlsf_free(state->tlsf, fn);
+ }
+}
+
+static void lightrec_reap_function(struct lightrec_state *state, void *data)
+{
+ lightrec_free_function(state, data);
+}
+
int lightrec_compile_block(struct lightrec_cstate *cstate,
struct block *block)
{
jit_state_t *_jit, *oldjit;
jit_node_t *start_of_block;
bool skip_next = false;
- jit_word_t code_size;
+ void *old_fn;
unsigned int i, j;
u32 offset;
return -ENOMEM;
oldjit = block->_jit;
+ old_fn = block->function;
block->_jit = _jit;
lightrec_regcache_reset(cstate->reg_cache);
jit_ret();
jit_epilog();
- block->function = jit_emit();
+ block->function = lightrec_emit_code(state, _jit,
+ &block->code_size);
+ if (!block->function) {
+ pr_err("Unable to compile block!\n");
+ }
+
block->flags &= ~BLOCK_SHOULD_RECOMPILE;
/* Add compiled function to the LUT */
- state->code_lut[lut_offset(block->pc)] = block->function;
+ lut_write(state, lut_offset(block->pc), block->function);
if (ENABLE_THREADED_COMPILER) {
/* Since we might try to reap the same block multiple times,
* be compiled. We can override the LUT entry with our new
* block's entry point. */
offset = lut_offset(block->pc) + target->offset;
- state->code_lut[offset] = jit_address(target->label);
+ lut_write(state, offset, jit_address(target->label));
if (block2) {
pr_debug("Reap block 0x%08x as it's covered by block "
if (ENABLE_THREADED_COMPILER)
lightrec_reaper_continue(state->reaper);
- jit_get_code(&code_size);
- lightrec_register(MEM_FOR_CODE, code_size);
-
- block->code_size = code_size;
-
if (ENABLE_DISASSEMBLER) {
pr_debug("Compiling block at PC: 0x%08x\n", block->pc);
jit_disassemble();
pr_debug("Block 0x%08x recompiled, reaping old jit context.\n",
block->pc);
- if (ENABLE_THREADED_COMPILER)
+ if (ENABLE_THREADED_COMPILER) {
lightrec_reaper_add(state->reaper,
lightrec_reap_jit, oldjit);
- else
+ lightrec_reaper_add(state->reaper,
+ lightrec_reap_function, old_fn);
+ } else {
_jit_destroy_state(oldjit);
+ lightrec_free_function(state, old_fn);
+ }
}
return 0;
lightrec_free_opcode_list(state, block);
if (block->_jit)
_jit_destroy_state(block->_jit);
+ lightrec_free_function(state, block->function);
lightrec_unregister(MEM_FOR_CODE, block->code_size);
lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
}
size_t nb,
const struct lightrec_ops *ops)
{
+ const struct lightrec_mem_map *codebuf_map;
struct lightrec_state *state;
+ uintptr_t addr;
+ void *tlsf = NULL;
+ bool with_32bit_lut = false;
+ size_t lut_size;
/* Sanity-check ops */
if (!ops || !ops->cop2_op || !ops->enable_ram) {
return NULL;
}
+ if (ENABLE_CODE_BUFFER && nb > PSX_MAP_CODE_BUFFER) {
+ codebuf_map = &map[PSX_MAP_CODE_BUFFER];
+
+ tlsf = tlsf_create_with_pool(codebuf_map->address,
+ codebuf_map->length);
+ if (!tlsf) {
+ pr_err("Unable to initialize code buffer\n");
+ return NULL;
+ }
+
+ if (__WORDSIZE == 64) {
+ addr = (uintptr_t) codebuf_map->address + codebuf_map->length - 1;
+ with_32bit_lut = addr == (u32) addr;
+ }
+ }
+
+ if (with_32bit_lut)
+ lut_size = CODE_LUT_SIZE * 4;
+ else
+ lut_size = CODE_LUT_SIZE * sizeof(void *);
+
init_jit(argv0);
- state = calloc(1, sizeof(*state) +
- sizeof(*state->code_lut) * CODE_LUT_SIZE);
+ state = calloc(1, sizeof(*state) + lut_size);
if (!state)
goto err_finish_jit;
- lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) +
- sizeof(*state->code_lut) * CODE_LUT_SIZE);
+ lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + lut_size);
+
+ state->tlsf = tlsf;
+ state->with_32bit_lut = with_32bit_lut;
#if ENABLE_TINYMM
state->tinymm = tinymm_init(malloc, free, 4096);
pr_info("Memory map is sub-par. Emitted code will be slow.\n");
}
+ if (state->with_32bit_lut)
+ pr_info("Using 32-bit LUT\n");
+
return state;
err_free_dispatcher:
err_free_state:
#endif
lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
- sizeof(*state->code_lut) * CODE_LUT_SIZE);
+ lut_elm_size(state) * CODE_LUT_SIZE);
free(state);
err_finish_jit:
finish_jit();
+ if (ENABLE_CODE_BUFFER && tlsf)
+ tlsf_destroy(tlsf);
return NULL;
}
lightrec_free_block(state, state->dispatcher);
lightrec_free_block(state, state->c_wrapper_block);
finish_jit();
+ if (ENABLE_CODE_BUFFER && state->tlsf)
+ tlsf_destroy(state->tlsf);
#if ENABLE_TINYMM
tinymm_shutdown(state->tinymm);
#endif
lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
- sizeof(*state->code_lut) * CODE_LUT_SIZE);
+ lut_elm_size(state) * CODE_LUT_SIZE);
free(state);
}
void lightrec_invalidate_all(struct lightrec_state *state)
{
- memset(state->code_lut, 0, sizeof(*state->code_lut) * CODE_LUT_SIZE);
+ memset(state->code_lut, 0, lut_elm_size(state) * CODE_LUT_SIZE);
}
void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only)
PSX_MAP_MIRROR1,
PSX_MAP_MIRROR2,
PSX_MAP_MIRROR3,
+ PSX_MAP_CODE_BUFFER,
+
+ PSX_MAP_UNKNOWN,
};
struct lightrec_mem_map_ops {
to_nop->opcode = 0;
}
+static void lightrec_remove_useless_lui(struct block *block, unsigned int offset,
+ u32 known, u32 *values)
+{
+ struct opcode *list = block->opcode_list,
+ *op = &block->opcode_list[offset];
+ int reader;
+
+ if (!(op->flags & LIGHTREC_SYNC) && (known & BIT(op->i.rt)) &&
+ values[op->i.rt] == op->i.imm << 16) {
+ pr_debug("Converting duplicated LUI to NOP\n");
+ op->opcode = 0x0;
+ return;
+ }
+
+ if (op->i.imm != 0 || op->i.rt == 0)
+ return;
+
+ reader = find_next_reader(list, offset + 1, op->i.rt);
+ if (reader <= 0)
+ return;
+
+ if (opcode_writes_register(list[reader].c, op->i.rt) ||
+ reg_is_dead(list, reader, op->i.rt)) {
+ pr_debug("Removing useless LUI 0x0\n");
+
+ if (list[reader].i.rs == op->i.rt)
+ list[reader].i.rs = 0;
+ if (list[reader].i.op == OP_SPECIAL &&
+ list[reader].i.rt == op->i.rt)
+ list[reader].i.rt = 0;
+ op->opcode = 0x0;
+ }
+}
+
+static void lightrec_modify_lui(struct block *block, unsigned int offset)
+{
+ union code c, *lui = &block->opcode_list[offset].c;
+ bool stop = false, stop_next = false;
+ unsigned int i;
+
+ for (i = offset + 1; !stop && i < block->nb_ops; i++) {
+ c = block->opcode_list[i].c;
+ stop = stop_next;
+
+ if ((opcode_is_store(c) && c.i.rt == lui->i.rt)
+ || (!opcode_is_load(c) && opcode_reads_register(c, lui->i.rt)))
+ break;
+
+ if (opcode_writes_register(c, lui->i.rt)) {
+ pr_debug("Convert LUI at offset 0x%x to kuseg\n",
+ i - 1 << 2);
+ lui->i.imm = kunseg(lui->i.imm << 16) >> 16;
+ break;
+ }
+
+ if (has_delay_slot(c))
+ stop_next = true;
+ }
+}
+
static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
{
struct opcode *list = block->opcode_list;
u32 known = BIT(0);
u32 values[32] = { 0 };
unsigned int i;
- int reader;
for (i = 0; i < block->nb_ops; i++) {
prev = op;
break;
case OP_LUI:
- if (!(op->flags & LIGHTREC_SYNC) &&
- (known & BIT(op->i.rt)) &&
- values[op->i.rt] == op->i.imm << 16) {
- pr_debug("Converting duplicated LUI to NOP\n");
- op->opcode = 0x0;
- }
-
- if (op->i.imm != 0 || op->i.rt == 0)
- break;
-
- reader = find_next_reader(list, i + 1, op->i.rt);
- if (reader > 0 &&
- (opcode_writes_register(list[reader].c, op->i.rt) ||
- reg_is_dead(list, reader, op->i.rt))) {
-
- pr_debug("Removing useless LUI 0x0\n");
-
- if (list[reader].i.rs == op->i.rt)
- list[reader].i.rs = 0;
- if (list[reader].i.op == OP_SPECIAL &&
- list[reader].i.rt == op->i.rt)
- list[reader].i.rt = 0;
- op->opcode = 0x0;
- }
+ lightrec_modify_lui(block, i);
+ lightrec_remove_useless_lui(block, i, known, values);
break;
/* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
{
- const struct lightrec_mem_map *map;
- struct opcode *prev2, *prev = NULL, *list = NULL;
+ struct opcode *prev = NULL, *list = NULL;
+ enum psx_map psx_map;
u32 known = BIT(0);
u32 values[32] = { 0 };
unsigned int i;
- u32 val;
+ u32 val, kunseg_val;
for (i = 0; i < block->nb_ops; i++) {
- prev2 = prev;
prev = list;
list = &block->opcode_list[i];
case OP_LWR:
case OP_LWC2:
if (OPT_FLAG_IO && (known & BIT(list->i.rs))) {
- if (prev && prev->i.op == OP_LUI &&
- !(prev2 && has_delay_slot(prev2->c)) &&
- prev->i.rt == list->i.rs &&
- list->i.rt == list->i.rs &&
- prev->i.imm & 0x8000) {
- pr_debug("Convert LUI at offset 0x%x to kuseg\n",
- i - 1 << 2);
-
- val = kunseg(prev->i.imm << 16);
- prev->i.imm = val >> 16;
- values[list->i.rs] = val;
- }
-
val = values[list->i.rs] + (s16) list->i.imm;
- map = lightrec_get_map(state, NULL, kunseg(val));
-
- if (!map || map->ops ||
- map == &state->maps[PSX_MAP_PARALLEL_PORT]) {
- pr_debug("Flagging opcode %u as I/O access\n",
- i);
- list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
- break;
- }
-
- if (val - map->pc < map->length)
- list->flags |= LIGHTREC_NO_MASK;
-
- if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) {
+ kunseg_val = kunseg(val);
+ psx_map = lightrec_get_map_idx(state, kunseg_val);
+
+ switch (psx_map) {
+ case PSX_MAP_KERNEL_USER_RAM:
+ if (val == kunseg_val)
+ list->flags |= LIGHTREC_NO_MASK;
+ /* fall-through */
+ case PSX_MAP_MIRROR1:
+ case PSX_MAP_MIRROR2:
+ case PSX_MAP_MIRROR3:
pr_debug("Flaging opcode %u as RAM access\n", i);
list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM);
- } else if (map == &state->maps[PSX_MAP_BIOS]) {
+ break;
+ case PSX_MAP_BIOS:
pr_debug("Flaging opcode %u as BIOS access\n", i);
list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS);
- } else if (map == &state->maps[PSX_MAP_SCRATCH_PAD]) {
+ break;
+ case PSX_MAP_SCRATCH_PAD:
pr_debug("Flaging opcode %u as scratchpad access\n", i);
list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH);
+
+ /* Consider that we're never going to run code from
+ * the scratchpad. */
+ list->flags |= LIGHTREC_NO_INVALIDATE;
+ break;
+ default:
+ pr_debug("Flagging opcode %u as I/O access\n",
+ i);
+ list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
+ break;
}
}
default: /* fall-through */
--- /dev/null
+; DO NOT EDIT (unless you know what you are doing)
+;
+; This subdirectory is a git "subrepo", and this file is maintained by the
+; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme
+;
+[subrepo]
+ remote = https://github.com/mattconte/tlsf
+ branch = master
+ commit = deff9ab509341f264addbd3c8ada533678591905
+ parent = 1dc0344052e7379e16753e4a285c30fd158bf78d
+ method = merge
+ cmdver = 0.4.3
--- /dev/null
+# tlsf
+Two-Level Segregated Fit memory allocator implementation.
+Written by Matthew Conte (matt@baisoku.org).
+Released under the BSD license.
+
+Features
+--------
+ * O(1) cost for malloc, free, realloc, memalign
+ * Extremely low overhead per allocation (4 bytes)
+ * Low overhead per TLSF management of pools (~3kB)
+ * Low fragmentation
+ * Compiles to only a few kB of code and data
+ * Support for adding and removing memory pool regions on the fly
+
+Caveats
+-------
+ * Currently, assumes architecture can make 4-byte aligned accesses
+ * Not designed to be thread safe; the user must provide this
+
+Notes
+-----
+This code was based on the TLSF 1.4 spec and documentation found at:
+
+ http://www.gii.upv.es/tlsf/main/docs
+
+It also leverages the TLSF 2.0 improvement to shrink the per-block overhead from 8 to 4 bytes.
+
+History
+-------
+2016/04/10 - v3.1
+ * Code moved to github
+ * tlsfbits.h rolled into tlsf.c
+ * License changed to BSD
+
+2014/02/08 - v3.0
+ * This version is based on improvements from 3DInteractive GmbH
+ * Interface changed to allow more than one memory pool
+ * Separated pool handling from control structure (adding, removing, debugging)
+ * Control structure and pools can still be constructed in the same memory block
+ * Memory blocks for control structure and pools are checked for alignment
+ * Added functions to retrieve control structure size, alignment size, min and max block size, overhead of pool structure, and overhead of a single allocation
+ * Minimal Pool size is tlsf_block_size_min() + tlsf_pool_overhead()
+ * Pool must be empty when it is removed, in order to allow O(1) removal
+
+2011/10/20 - v2.0
+ * 64-bit support
+ * More compiler intrinsics for ffs/fls
+ * ffs/fls verification during TLSF creation in debug builds
+
+2008/04/04 - v1.9
+ * Add tlsf_heap_check, a heap integrity check
+ * Support a predefined tlsf_assert macro
+ * Fix realloc case where block should shrink; if adjacent block is in use, execution would go down the slow path
+
+2007/02/08 - v1.8
+ * Fix for unnecessary reallocation in tlsf_realloc
+
+2007/02/03 - v1.7
+ * tlsf_heap_walk takes a callback
+ * tlsf_realloc now returns NULL on failure
+ * tlsf_memalign optimization for 4-byte alignment
+ * Usage of size_t where appropriate
+
+2006/11/21 - v1.6
+ * ffs/fls broken out into tlsfbits.h
+ * tlsf_overhead queries per-pool overhead
+
+2006/11/07 - v1.5
+ * Smart realloc implementation
+ * Smart memalign implementation
+
+2006/10/11 - v1.4
+ * Add some ffs/fls implementations
+ * Minor code footprint reduction
+
+2006/09/14 - v1.3
+ * Profiling indicates heavy use of blocks of size 1-128, so implement small block handling
+ * Reduce pool overhead by about 1kb
+ * Reduce minimum block size from 32 to 12 bytes
+ * Realloc bug fix
+
+2006/09/09 - v1.2
+ * Add tlsf_block_size
+ * Static assertion mechanism for invariants
+ * Minor bugfixes
+
+2006/09/01 - v1.1
+ * Add tlsf_realloc
+ * Add tlsf_walk_heap
+
+2006/08/25 - v1.0
+ * First release
--- /dev/null
+#include <assert.h>\r
+#include <limits.h>\r
+#include <stddef.h>\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include <string.h>\r
+\r
+#include "tlsf.h"\r
+\r
+#if defined(__cplusplus)\r
+#define tlsf_decl inline\r
+#else\r
+#define tlsf_decl static\r
+#endif\r
+\r
+/*\r
+** Architecture-specific bit manipulation routines.\r
+**\r
+** TLSF achieves O(1) cost for malloc and free operations by limiting\r
+** the search for a free block to a free list of guaranteed size\r
+** adequate to fulfill the request, combined with efficient free list\r
+** queries using bitmasks and architecture-specific bit-manipulation\r
+** routines.\r
+**\r
+** Most modern processors provide instructions to count leading zeroes\r
+** in a word, find the lowest and highest set bit, etc. These\r
+** specific implementations will be used when available, falling back\r
+** to a reasonably efficient generic implementation.\r
+**\r
+** NOTE: TLSF spec relies on ffs/fls returning value 0..31.\r
+** ffs/fls return 1-32 by default, returning 0 for error.\r
+*/\r
+\r
+/*\r
+** Detect whether or not we are building for a 32- or 64-bit (LP/LLP)\r
+** architecture. There is no reliable portable method at compile-time.\r
+*/\r
+#if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) \\r
+ || defined (_WIN64) || defined (__LP64__) || defined (__LLP64__)\r
+#define TLSF_64BIT\r
+#endif\r
+\r
+/*\r
+** gcc 3.4 and above have builtin support, specialized for architecture.\r
+** Some compilers masquerade as gcc; patchlevel test filters them out.\r
+*/\r
+#if defined (__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) \\r
+ && defined (__GNUC_PATCHLEVEL__)\r
+\r
+#if defined (__SNC__)\r
+/* SNC for Playstation 3. */\r
+\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+ const unsigned int reverse = word & (~word + 1);\r
+ const int bit = 32 - __builtin_clz(reverse);\r
+ return bit - 1;\r
+}\r
+\r
+#else\r
+\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+ return __builtin_ffs(word) - 1;\r
+}\r
+\r
+#endif\r
+\r
+tlsf_decl int tlsf_fls(unsigned int word)\r
+{\r
+ const int bit = word ? 32 - __builtin_clz(word) : 0;\r
+ return bit - 1;\r
+}\r
+\r
+#elif defined (_MSC_VER) && (_MSC_VER >= 1400) && (defined (_M_IX86) || defined (_M_X64))\r
+/* Microsoft Visual C++ support on x86/X64 architectures. */\r
+\r
+#include <intrin.h>\r
+\r
+#pragma intrinsic(_BitScanReverse)\r
+#pragma intrinsic(_BitScanForward)\r
+\r
+tlsf_decl int tlsf_fls(unsigned int word)\r
+{\r
+ unsigned long index;\r
+ return _BitScanReverse(&index, word) ? index : -1;\r
+}\r
+\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+ unsigned long index;\r
+ return _BitScanForward(&index, word) ? index : -1;\r
+}\r
+\r
+#elif defined (_MSC_VER) && defined (_M_PPC)\r
+/* Microsoft Visual C++ support on PowerPC architectures. */\r
+\r
+#include <ppcintrinsics.h>\r
+\r
+tlsf_decl int tlsf_fls(unsigned int word)\r
+{\r
+ const int bit = 32 - _CountLeadingZeros(word);\r
+ return bit - 1;\r
+}\r
+\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+ const unsigned int reverse = word & (~word + 1);\r
+ const int bit = 32 - _CountLeadingZeros(reverse);\r
+ return bit - 1;\r
+}\r
+\r
+#elif defined (__ARMCC_VERSION)\r
+/* RealView Compilation Tools for ARM */\r
+\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+ const unsigned int reverse = word & (~word + 1);\r
+ const int bit = 32 - __clz(reverse);\r
+ return bit - 1;\r
+}\r
+\r
+tlsf_decl int tlsf_fls(unsigned int word)\r
+{\r
+ const int bit = word ? 32 - __clz(word) : 0;\r
+ return bit - 1;\r
+}\r
+\r
+#elif defined (__ghs__)\r
+/* Green Hills support for PowerPC */\r
+\r
+#include <ppc_ghs.h>\r
+\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+ const unsigned int reverse = word & (~word + 1);\r
+ const int bit = 32 - __CLZ32(reverse);\r
+ return bit - 1;\r
+}\r
+\r
+tlsf_decl int tlsf_fls(unsigned int word)\r
+{\r
+ const int bit = word ? 32 - __CLZ32(word) : 0;\r
+ return bit - 1;\r
+}\r
+\r
+#else\r
+/* Fall back to generic implementation. */\r
+\r
+tlsf_decl int tlsf_fls_generic(unsigned int word)\r
+{\r
+ int bit = 32;\r
+\r
+ if (!word) bit -= 1;\r
+ if (!(word & 0xffff0000)) { word <<= 16; bit -= 16; }\r
+ if (!(word & 0xff000000)) { word <<= 8; bit -= 8; }\r
+ if (!(word & 0xf0000000)) { word <<= 4; bit -= 4; }\r
+ if (!(word & 0xc0000000)) { word <<= 2; bit -= 2; }\r
+ if (!(word & 0x80000000)) { word <<= 1; bit -= 1; }\r
+\r
+ return bit;\r
+}\r
+\r
+/* Implement ffs in terms of fls. */\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+ return tlsf_fls_generic(word & (~word + 1)) - 1;\r
+}\r
+\r
+tlsf_decl int tlsf_fls(unsigned int word)\r
+{\r
+ return tlsf_fls_generic(word) - 1;\r
+}\r
+\r
+#endif\r
+\r
+/* Possibly 64-bit version of tlsf_fls. */\r
+#if defined (TLSF_64BIT)\r
+tlsf_decl int tlsf_fls_sizet(size_t size)\r
+{\r
+ int high = (int)(size >> 32);\r
+ int bits = 0;\r
+ if (high)\r
+ {\r
+ bits = 32 + tlsf_fls(high);\r
+ }\r
+ else\r
+ {\r
+ bits = tlsf_fls((int)size & 0xffffffff);\r
+\r
+ }\r
+ return bits;\r
+}\r
+#else\r
+#define tlsf_fls_sizet tlsf_fls\r
+#endif\r
+\r
+#undef tlsf_decl\r
+\r
+/*\r
+** Constants.\r
+*/\r
+\r
+/* Public constants: may be modified. */\r
+enum tlsf_public\r
+{\r
+ /* log2 of number of linear subdivisions of block sizes. Larger\r
+ ** values require more memory in the control structure. Values of\r
+ ** 4 or 5 are typical.\r
+ */\r
+ SL_INDEX_COUNT_LOG2 = 5,\r
+};\r
+\r
+/* Private constants: do not modify. */\r
+enum tlsf_private\r
+{\r
+#if defined (TLSF_64BIT)\r
+ /* All allocation sizes and addresses are aligned to 8 bytes. */\r
+ ALIGN_SIZE_LOG2 = 3,\r
+#else\r
+ /* All allocation sizes and addresses are aligned to 4 bytes. */\r
+ ALIGN_SIZE_LOG2 = 2,\r
+#endif\r
+ ALIGN_SIZE = (1 << ALIGN_SIZE_LOG2),\r
+\r
+ /*\r
+ ** We support allocations of sizes up to (1 << FL_INDEX_MAX) bits.\r
+ ** However, because we linearly subdivide the second-level lists, and\r
+ ** our minimum size granularity is 4 bytes, it doesn't make sense to\r
+ ** create first-level lists for sizes smaller than SL_INDEX_COUNT * 4,\r
+ ** or (1 << (SL_INDEX_COUNT_LOG2 + 2)) bytes, as there we will be\r
+ ** trying to split size ranges into more slots than we have available.\r
+ ** Instead, we calculate the minimum threshold size, and place all\r
+ ** blocks below that size into the 0th first-level list.\r
+ */\r
+\r
+#if defined (TLSF_64BIT)\r
+ /*\r
+ ** TODO: We can increase this to support larger sizes, at the expense\r
+ ** of more overhead in the TLSF structure.\r
+ */\r
+ FL_INDEX_MAX = 32,\r
+#else\r
+ FL_INDEX_MAX = 30,\r
+#endif\r
+ SL_INDEX_COUNT = (1 << SL_INDEX_COUNT_LOG2),\r
+ FL_INDEX_SHIFT = (SL_INDEX_COUNT_LOG2 + ALIGN_SIZE_LOG2),\r
+ FL_INDEX_COUNT = (FL_INDEX_MAX - FL_INDEX_SHIFT + 1),\r
+\r
+ SMALL_BLOCK_SIZE = (1 << FL_INDEX_SHIFT),\r
+};\r
+\r
+/*\r
+** Cast and min/max macros.\r
+*/\r
+\r
+#define tlsf_cast(t, exp) ((t) (exp))\r
+#define tlsf_min(a, b) ((a) < (b) ? (a) : (b))\r
+#define tlsf_max(a, b) ((a) > (b) ? (a) : (b))\r
+\r
+/*\r
+** Set assert macro, if it has not been provided by the user.\r
+*/\r
+#if !defined (tlsf_assert)\r
+#define tlsf_assert assert\r
+#endif\r
+\r
+/*\r
+** Static assertion mechanism.\r
+*/\r
+\r
+#define _tlsf_glue2(x, y) x ## y\r
+#define _tlsf_glue(x, y) _tlsf_glue2(x, y)\r
+#define tlsf_static_assert(exp) \\r
+ typedef char _tlsf_glue(static_assert, __LINE__) [(exp) ? 1 : -1]\r
+\r
+/* This code has been tested on 32- and 64-bit (LP/LLP) architectures. */\r
+tlsf_static_assert(sizeof(int) * CHAR_BIT == 32);\r
+tlsf_static_assert(sizeof(size_t) * CHAR_BIT >= 32);\r
+tlsf_static_assert(sizeof(size_t) * CHAR_BIT <= 64);\r
+\r
+/* SL_INDEX_COUNT must be <= number of bits in sl_bitmap's storage type. */\r
+tlsf_static_assert(sizeof(unsigned int) * CHAR_BIT >= SL_INDEX_COUNT);\r
+\r
+/* Ensure we've properly tuned our sizes. */\r
+tlsf_static_assert(ALIGN_SIZE == SMALL_BLOCK_SIZE / SL_INDEX_COUNT);\r
+\r
+/*\r
+** Data structures and associated constants.\r
+*/\r
+\r
+/*\r
+** Block header structure.\r
+**\r
+** There are several implementation subtleties involved:\r
+** - The prev_phys_block field is only valid if the previous block is free.\r
+** - The prev_phys_block field is actually stored at the end of the\r
+** previous block. It appears at the beginning of this structure only to\r
+** simplify the implementation.\r
+** - The next_free / prev_free fields are only valid if the block is free.\r
+*/\r
+typedef struct block_header_t\r
+{\r
+ /* Points to the previous physical block. */\r
+ struct block_header_t* prev_phys_block;\r
+\r
+ /* The size of this block, excluding the block header. */\r
+ size_t size;\r
+\r
+ /* Next and previous free blocks. */\r
+ struct block_header_t* next_free;\r
+ struct block_header_t* prev_free;\r
+} block_header_t;\r
+\r
+/*\r
+** Since block sizes are always at least a multiple of 4, the two least\r
+** significant bits of the size field are used to store the block status:\r
+** - bit 0: whether block is busy or free\r
+** - bit 1: whether previous block is busy or free\r
+*/\r
+static const size_t block_header_free_bit = 1 << 0;\r
+static const size_t block_header_prev_free_bit = 1 << 1;\r
+\r
+/*\r
+** The size of the block header exposed to used blocks is the size field.\r
+** The prev_phys_block field is stored *inside* the previous free block.\r
+*/\r
+static const size_t block_header_overhead = sizeof(size_t);\r
+\r
+/* User data starts directly after the size field in a used block. */\r
+static const size_t block_start_offset =\r
+ offsetof(block_header_t, size) + sizeof(size_t);\r
+\r
+/*\r
+** A free block must be large enough to store its header minus the size of\r
+** the prev_phys_block field, and no larger than the number of addressable\r
+** bits for FL_INDEX.\r
+*/\r
+static const size_t block_size_min = \r
+ sizeof(block_header_t) - sizeof(block_header_t*);\r
+static const size_t block_size_max = tlsf_cast(size_t, 1) << FL_INDEX_MAX;\r
+\r
+\r
+/* The TLSF control structure. */\r
+typedef struct control_t\r
+{\r
+ /* Empty lists point at this block to indicate they are free. */\r
+ block_header_t block_null;\r
+\r
+ /* Bitmaps for free lists. */\r
+ unsigned int fl_bitmap;\r
+ unsigned int sl_bitmap[FL_INDEX_COUNT];\r
+\r
+ /* Head of free lists. */\r
+ block_header_t* blocks[FL_INDEX_COUNT][SL_INDEX_COUNT];\r
+} control_t;\r
+\r
+/* A type used for casting when doing pointer arithmetic. */\r
+typedef ptrdiff_t tlsfptr_t;\r
+\r
+/*\r
+** block_header_t member functions.\r
+*/\r
+\r
+static size_t block_size(const block_header_t* block)\r
+{\r
+ return block->size & ~(block_header_free_bit | block_header_prev_free_bit);\r
+}\r
+\r
+static void block_set_size(block_header_t* block, size_t size)\r
+{\r
+ const size_t oldsize = block->size;\r
+ block->size = size | (oldsize & (block_header_free_bit | block_header_prev_free_bit));\r
+}\r
+\r
+static int block_is_last(const block_header_t* block)\r
+{\r
+ return block_size(block) == 0;\r
+}\r
+\r
+static int block_is_free(const block_header_t* block)\r
+{\r
+ return tlsf_cast(int, block->size & block_header_free_bit);\r
+}\r
+\r
+static void block_set_free(block_header_t* block)\r
+{\r
+ block->size |= block_header_free_bit;\r
+}\r
+\r
+static void block_set_used(block_header_t* block)\r
+{\r
+ block->size &= ~block_header_free_bit;\r
+}\r
+\r
+static int block_is_prev_free(const block_header_t* block)\r
+{\r
+ return tlsf_cast(int, block->size & block_header_prev_free_bit);\r
+}\r
+\r
+static void block_set_prev_free(block_header_t* block)\r
+{\r
+ block->size |= block_header_prev_free_bit;\r
+}\r
+\r
+static void block_set_prev_used(block_header_t* block)\r
+{\r
+ block->size &= ~block_header_prev_free_bit;\r
+}\r
+\r
+static block_header_t* block_from_ptr(const void* ptr)\r
+{\r
+ return tlsf_cast(block_header_t*,\r
+ tlsf_cast(unsigned char*, ptr) - block_start_offset);\r
+}\r
+\r
+static void* block_to_ptr(const block_header_t* block)\r
+{\r
+ return tlsf_cast(void*,\r
+ tlsf_cast(unsigned char*, block) + block_start_offset);\r
+}\r
+\r
+/* Return location of next block after block of given size. */\r
+static block_header_t* offset_to_block(const void* ptr, size_t size)\r
+{\r
+ return tlsf_cast(block_header_t*, tlsf_cast(tlsfptr_t, ptr) + size);\r
+}\r
+\r
+/* Return location of previous block. */\r
+static block_header_t* block_prev(const block_header_t* block)\r
+{\r
+ tlsf_assert(block_is_prev_free(block) && "previous block must be free");\r
+ return block->prev_phys_block;\r
+}\r
+\r
+/* Return location of next existing block. */\r
+static block_header_t* block_next(const block_header_t* block)\r
+{\r
+ block_header_t* next = offset_to_block(block_to_ptr(block),\r
+ block_size(block) - block_header_overhead);\r
+ tlsf_assert(!block_is_last(block));\r
+ return next;\r
+}\r
+\r
+/* Link a new block with its physical neighbor, return the neighbor. */\r
+static block_header_t* block_link_next(block_header_t* block)\r
+{\r
+ block_header_t* next = block_next(block);\r
+ next->prev_phys_block = block;\r
+ return next;\r
+}\r
+\r
+static void block_mark_as_free(block_header_t* block)\r
+{\r
+ /* Link the block to the next block, first. */\r
+ block_header_t* next = block_link_next(block);\r
+ block_set_prev_free(next);\r
+ block_set_free(block);\r
+}\r
+\r
+static void block_mark_as_used(block_header_t* block)\r
+{\r
+ block_header_t* next = block_next(block);\r
+ block_set_prev_used(next);\r
+ block_set_used(block);\r
+}\r
+\r
+static size_t align_up(size_t x, size_t align)\r
+{\r
+ tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two");\r
+ return (x + (align - 1)) & ~(align - 1);\r
+}\r
+\r
+static size_t align_down(size_t x, size_t align)\r
+{\r
+ tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two");\r
+ return x - (x & (align - 1));\r
+}\r
+\r
+static void* align_ptr(const void* ptr, size_t align)\r
+{\r
+ const tlsfptr_t aligned =\r
+ (tlsf_cast(tlsfptr_t, ptr) + (align - 1)) & ~(align - 1);\r
+ tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two");\r
+ return tlsf_cast(void*, aligned);\r
+}\r
+\r
+/*\r
+** Adjust an allocation size to be aligned to word size, and no smaller\r
+** than internal minimum.\r
+*/\r
+static size_t adjust_request_size(size_t size, size_t align)\r
+{\r
+ size_t adjust = 0;\r
+ if (size)\r
+ {\r
+ const size_t aligned = align_up(size, align);\r
+\r
+ /* aligned sized must not exceed block_size_max or we'll go out of bounds on sl_bitmap */\r
+ if (aligned < block_size_max) \r
+ {\r
+ adjust = tlsf_max(aligned, block_size_min);\r
+ }\r
+ }\r
+ return adjust;\r
+}\r
+\r
+/*\r
+** TLSF utility functions. In most cases, these are direct translations of\r
+** the documentation found in the white paper.\r
+*/\r
+\r
+static void mapping_insert(size_t size, int* fli, int* sli)\r
+{\r
+ int fl, sl;\r
+ if (size < SMALL_BLOCK_SIZE)\r
+ {\r
+ /* Store small blocks in first list. */\r
+ fl = 0;\r
+ sl = tlsf_cast(int, size) / (SMALL_BLOCK_SIZE / SL_INDEX_COUNT);\r
+ }\r
+ else\r
+ {\r
+ fl = tlsf_fls_sizet(size);\r
+ sl = tlsf_cast(int, size >> (fl - SL_INDEX_COUNT_LOG2)) ^ (1 << SL_INDEX_COUNT_LOG2);\r
+ fl -= (FL_INDEX_SHIFT - 1);\r
+ }\r
+ *fli = fl;\r
+ *sli = sl;\r
+}\r
+\r
+/* This version rounds up to the next block size (for allocations) */\r
+static void mapping_search(size_t size, int* fli, int* sli)\r
+{\r
+ if (size >= SMALL_BLOCK_SIZE)\r
+ {\r
+ const size_t round = (1 << (tlsf_fls_sizet(size) - SL_INDEX_COUNT_LOG2)) - 1;\r
+ size += round;\r
+ }\r
+ mapping_insert(size, fli, sli);\r
+}\r
+\r
+static block_header_t* search_suitable_block(control_t* control, int* fli, int* sli)\r
+{\r
+ int fl = *fli;\r
+ int sl = *sli;\r
+\r
+ /*\r
+ ** First, search for a block in the list associated with the given\r
+ ** fl/sl index.\r
+ */\r
+ unsigned int sl_map = control->sl_bitmap[fl] & (~0U << sl);\r
+ if (!sl_map)\r
+ {\r
+ /* No block exists. Search in the next largest first-level list. */\r
+ const unsigned int fl_map = control->fl_bitmap & (~0U << (fl + 1));\r
+ if (!fl_map)\r
+ {\r
+ /* No free blocks available, memory has been exhausted. */\r
+ return 0;\r
+ }\r
+\r
+ fl = tlsf_ffs(fl_map);\r
+ *fli = fl;\r
+ sl_map = control->sl_bitmap[fl];\r
+ }\r
+ tlsf_assert(sl_map && "internal error - second level bitmap is null");\r
+ sl = tlsf_ffs(sl_map);\r
+ *sli = sl;\r
+\r
+ /* Return the first block in the free list. */\r
+ return control->blocks[fl][sl];\r
+}\r
+\r
+/* Remove a free block from the free list.*/\r
+static void remove_free_block(control_t* control, block_header_t* block, int fl, int sl)\r
+{\r
+ block_header_t* prev = block->prev_free;\r
+ block_header_t* next = block->next_free;\r
+ tlsf_assert(prev && "prev_free field can not be null");\r
+ tlsf_assert(next && "next_free field can not be null");\r
+ next->prev_free = prev;\r
+ prev->next_free = next;\r
+\r
+ /* If this block is the head of the free list, set new head. */\r
+ if (control->blocks[fl][sl] == block)\r
+ {\r
+ control->blocks[fl][sl] = next;\r
+\r
+ /* If the new head is null, clear the bitmap. */\r
+ if (next == &control->block_null)\r
+ {\r
+ control->sl_bitmap[fl] &= ~(1U << sl);\r
+\r
+ /* If the second bitmap is now empty, clear the fl bitmap. */\r
+ if (!control->sl_bitmap[fl])\r
+ {\r
+ control->fl_bitmap &= ~(1U << fl);\r
+ }\r
+ }\r
+ }\r
+}\r
+\r
+/* Insert a free block into the free block list. */\r
+static void insert_free_block(control_t* control, block_header_t* block, int fl, int sl)\r
+{\r
+ block_header_t* current = control->blocks[fl][sl];\r
+ tlsf_assert(current && "free list cannot have a null entry");\r
+ tlsf_assert(block && "cannot insert a null entry into the free list");\r
+ block->next_free = current;\r
+ block->prev_free = &control->block_null;\r
+ current->prev_free = block;\r
+\r
+ tlsf_assert(block_to_ptr(block) == align_ptr(block_to_ptr(block), ALIGN_SIZE)\r
+ && "block not aligned properly");\r
+ /*\r
+ ** Insert the new block at the head of the list, and mark the first-\r
+ ** and second-level bitmaps appropriately.\r
+ */\r
+ control->blocks[fl][sl] = block;\r
+ control->fl_bitmap |= (1U << fl);\r
+ control->sl_bitmap[fl] |= (1U << sl);\r
+}\r
+\r
+/* Remove a given block from the free list. */\r
+static void block_remove(control_t* control, block_header_t* block)\r
+{\r
+ int fl, sl;\r
+ mapping_insert(block_size(block), &fl, &sl);\r
+ remove_free_block(control, block, fl, sl);\r
+}\r
+\r
+/* Insert a given block into the free list. */\r
+static void block_insert(control_t* control, block_header_t* block)\r
+{\r
+ int fl, sl;\r
+ mapping_insert(block_size(block), &fl, &sl);\r
+ insert_free_block(control, block, fl, sl);\r
+}\r
+\r
+static int block_can_split(block_header_t* block, size_t size)\r
+{\r
+ return block_size(block) >= sizeof(block_header_t) + size;\r
+}\r
+\r
+/* Split a block into two, the second of which is free. */\r
+static block_header_t* block_split(block_header_t* block, size_t size)\r
+{\r
+ /* Calculate the amount of space left in the remaining block. */\r
+ block_header_t* remaining =\r
+ offset_to_block(block_to_ptr(block), size - block_header_overhead);\r
+\r
+ const size_t remain_size = block_size(block) - (size + block_header_overhead);\r
+\r
+ tlsf_assert(block_to_ptr(remaining) == align_ptr(block_to_ptr(remaining), ALIGN_SIZE)\r
+ && "remaining block not aligned properly");\r
+\r
+ tlsf_assert(block_size(block) == remain_size + size + block_header_overhead);\r
+ block_set_size(remaining, remain_size);\r
+ tlsf_assert(block_size(remaining) >= block_size_min && "block split with invalid size");\r
+\r
+ block_set_size(block, size);\r
+ block_mark_as_free(remaining);\r
+\r
+ return remaining;\r
+}\r
+\r
+/* Absorb a free block's storage into an adjacent previous free block. */\r
+static block_header_t* block_absorb(block_header_t* prev, block_header_t* block)\r
+{\r
+ tlsf_assert(!block_is_last(prev) && "previous block can't be last");\r
+ /* Note: Leaves flags untouched. */\r
+ prev->size += block_size(block) + block_header_overhead;\r
+ block_link_next(prev);\r
+ return prev;\r
+}\r
+\r
+/* Merge a just-freed block with an adjacent previous free block. */\r
+static block_header_t* block_merge_prev(control_t* control, block_header_t* block)\r
+{\r
+ if (block_is_prev_free(block))\r
+ {\r
+ block_header_t* prev = block_prev(block);\r
+ tlsf_assert(prev && "prev physical block can't be null");\r
+ tlsf_assert(block_is_free(prev) && "prev block is not free though marked as such");\r
+ block_remove(control, prev);\r
+ block = block_absorb(prev, block);\r
+ }\r
+\r
+ return block;\r
+}\r
+\r
+/* Merge a just-freed block with an adjacent free block. */\r
+static block_header_t* block_merge_next(control_t* control, block_header_t* block)\r
+{\r
+ block_header_t* next = block_next(block);\r
+ tlsf_assert(next && "next physical block can't be null");\r
+\r
+ if (block_is_free(next))\r
+ {\r
+ tlsf_assert(!block_is_last(block) && "previous block can't be last");\r
+ block_remove(control, next);\r
+ block = block_absorb(block, next);\r
+ }\r
+\r
+ return block;\r
+}\r
+\r
+/* Trim any trailing block space off the end of a block, return to pool. */\r
+static void block_trim_free(control_t* control, block_header_t* block, size_t size)\r
+{\r
+ tlsf_assert(block_is_free(block) && "block must be free");\r
+ if (block_can_split(block, size))\r
+ {\r
+ block_header_t* remaining_block = block_split(block, size);\r
+ block_link_next(block);\r
+ block_set_prev_free(remaining_block);\r
+ block_insert(control, remaining_block);\r
+ }\r
+}\r
+\r
+/* Trim any trailing block space off the end of a used block, return to pool. */\r
+static void block_trim_used(control_t* control, block_header_t* block, size_t size)\r
+{\r
+ tlsf_assert(!block_is_free(block) && "block must be used");\r
+ if (block_can_split(block, size))\r
+ {\r
+ /* If the next block is free, we must coalesce. */\r
+ block_header_t* remaining_block = block_split(block, size);\r
+ block_set_prev_used(remaining_block);\r
+\r
+ remaining_block = block_merge_next(control, remaining_block);\r
+ block_insert(control, remaining_block);\r
+ }\r
+}\r
+\r
+static block_header_t* block_trim_free_leading(control_t* control, block_header_t* block, size_t size)\r
+{\r
+ block_header_t* remaining_block = block;\r
+ if (block_can_split(block, size))\r
+ {\r
+ /* We want the 2nd block. */\r
+ remaining_block = block_split(block, size - block_header_overhead);\r
+ block_set_prev_free(remaining_block);\r
+\r
+ block_link_next(block);\r
+ block_insert(control, block);\r
+ }\r
+\r
+ return remaining_block;\r
+}\r
+\r
+static block_header_t* block_locate_free(control_t* control, size_t size)\r
+{\r
+ int fl = 0, sl = 0;\r
+ block_header_t* block = 0;\r
+\r
+ if (size)\r
+ {\r
+ mapping_search(size, &fl, &sl);\r
+ \r
+ /*\r
+ ** mapping_search can futz with the size, so for excessively large sizes it can sometimes wind up \r
+ ** with indices that are off the end of the block array.\r
+ ** So, we protect against that here, since this is the only callsite of mapping_search.\r
+ ** Note that we don't need to check sl, since it comes from a modulo operation that guarantees it's always in range.\r
+ */\r
+ if (fl < FL_INDEX_COUNT)\r
+ {\r
+ block = search_suitable_block(control, &fl, &sl);\r
+ }\r
+ }\r
+\r
+ if (block)\r
+ {\r
+ tlsf_assert(block_size(block) >= size);\r
+ remove_free_block(control, block, fl, sl);\r
+ }\r
+\r
+ return block;\r
+}\r
+\r
+static void* block_prepare_used(control_t* control, block_header_t* block, size_t size)\r
+{\r
+ void* p = 0;\r
+ if (block)\r
+ {\r
+ tlsf_assert(size && "size must be non-zero");\r
+ block_trim_free(control, block, size);\r
+ block_mark_as_used(block);\r
+ p = block_to_ptr(block);\r
+ }\r
+ return p;\r
+}\r
+\r
+/* Clear structure and point all empty lists at the null block. */\r
+static void control_construct(control_t* control)\r
+{\r
+ int i, j;\r
+\r
+ control->block_null.next_free = &control->block_null;\r
+ control->block_null.prev_free = &control->block_null;\r
+\r
+ control->fl_bitmap = 0;\r
+ for (i = 0; i < FL_INDEX_COUNT; ++i)\r
+ {\r
+ control->sl_bitmap[i] = 0;\r
+ for (j = 0; j < SL_INDEX_COUNT; ++j)\r
+ {\r
+ control->blocks[i][j] = &control->block_null;\r
+ }\r
+ }\r
+}\r
+\r
+/*\r
+** Debugging utilities.\r
+*/\r
+\r
+typedef struct integrity_t\r
+{\r
+ int prev_status;\r
+ int status;\r
+} integrity_t;\r
+\r
+#define tlsf_insist(x) { tlsf_assert(x); if (!(x)) { status--; } }\r
+\r
+static void integrity_walker(void* ptr, size_t size, int used, void* user)\r
+{\r
+ block_header_t* block = block_from_ptr(ptr);\r
+ integrity_t* integ = tlsf_cast(integrity_t*, user);\r
+ const int this_prev_status = block_is_prev_free(block) ? 1 : 0;\r
+ const int this_status = block_is_free(block) ? 1 : 0;\r
+ const size_t this_block_size = block_size(block);\r
+\r
+ int status = 0;\r
+ (void)used;\r
+ tlsf_insist(integ->prev_status == this_prev_status && "prev status incorrect");\r
+ tlsf_insist(size == this_block_size && "block size incorrect");\r
+\r
+ integ->prev_status = this_status;\r
+ integ->status += status;\r
+}\r
+\r
+int tlsf_check(tlsf_t tlsf)\r
+{\r
+ int i, j;\r
+\r
+ control_t* control = tlsf_cast(control_t*, tlsf);\r
+ int status = 0;\r
+\r
+ /* Check that the free lists and bitmaps are accurate. */\r
+ for (i = 0; i < FL_INDEX_COUNT; ++i)\r
+ {\r
+ for (j = 0; j < SL_INDEX_COUNT; ++j)\r
+ {\r
+ const int fl_map = control->fl_bitmap & (1U << i);\r
+ const int sl_list = control->sl_bitmap[i];\r
+ const int sl_map = sl_list & (1U << j);\r
+ const block_header_t* block = control->blocks[i][j];\r
+\r
+ /* Check that first- and second-level lists agree. */\r
+ if (!fl_map)\r
+ {\r
+ tlsf_insist(!sl_map && "second-level map must be null");\r
+ }\r
+\r
+ if (!sl_map)\r
+ {\r
+ tlsf_insist(block == &control->block_null && "block list must be null");\r
+ continue;\r
+ }\r
+\r
+ /* Check that there is at least one free block. */\r
+ tlsf_insist(sl_list && "no free blocks in second-level map");\r
+ tlsf_insist(block != &control->block_null && "block should not be null");\r
+\r
+ while (block != &control->block_null)\r
+ {\r
+ int fli, sli;\r
+ tlsf_insist(block_is_free(block) && "block should be free");\r
+ tlsf_insist(!block_is_prev_free(block) && "blocks should have coalesced");\r
+ tlsf_insist(!block_is_free(block_next(block)) && "blocks should have coalesced");\r
+ tlsf_insist(block_is_prev_free(block_next(block)) && "block should be free");\r
+ tlsf_insist(block_size(block) >= block_size_min && "block not minimum size");\r
+\r
+ mapping_insert(block_size(block), &fli, &sli);\r
+ tlsf_insist(fli == i && sli == j && "block size indexed in wrong list");\r
+ block = block->next_free;\r
+ }\r
+ }\r
+ }\r
+\r
+ return status;\r
+}\r
+\r
+#undef tlsf_insist\r
+\r
+static void default_walker(void* ptr, size_t size, int used, void* user)\r
+{\r
+ (void)user;\r
+ printf("\t%p %s size: %x (%p)\n", ptr, used ? "used" : "free", (unsigned int)size, block_from_ptr(ptr));\r
+}\r
+\r
+void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user)\r
+{\r
+ tlsf_walker pool_walker = walker ? walker : default_walker;\r
+ block_header_t* block =\r
+ offset_to_block(pool, -(int)block_header_overhead);\r
+\r
+ while (block && !block_is_last(block))\r
+ {\r
+ pool_walker(\r
+ block_to_ptr(block),\r
+ block_size(block),\r
+ !block_is_free(block),\r
+ user);\r
+ block = block_next(block);\r
+ }\r
+}\r
+\r
+size_t tlsf_block_size(void* ptr)\r
+{\r
+ size_t size = 0;\r
+ if (ptr)\r
+ {\r
+ const block_header_t* block = block_from_ptr(ptr);\r
+ size = block_size(block);\r
+ }\r
+ return size;\r
+}\r
+\r
+int tlsf_check_pool(pool_t pool)\r
+{\r
+ /* Check that the blocks are physically correct. */\r
+ integrity_t integ = { 0, 0 };\r
+ tlsf_walk_pool(pool, integrity_walker, &integ);\r
+\r
+ return integ.status;\r
+}\r
+\r
+/*\r
+** Size of the TLSF structures in a given memory block passed to\r
+** tlsf_create, equal to the size of a control_t\r
+*/\r
+size_t tlsf_size(void)\r
+{\r
+ return sizeof(control_t);\r
+}\r
+\r
+size_t tlsf_align_size(void)\r
+{\r
+ return ALIGN_SIZE;\r
+}\r
+\r
+size_t tlsf_block_size_min(void)\r
+{\r
+ return block_size_min;\r
+}\r
+\r
+size_t tlsf_block_size_max(void)\r
+{\r
+ return block_size_max;\r
+}\r
+\r
+/*\r
+** Overhead of the TLSF structures in a given memory block passed to\r
+** tlsf_add_pool, equal to the overhead of a free block and the\r
+** sentinel block.\r
+*/\r
+size_t tlsf_pool_overhead(void)\r
+{\r
+ return 2 * block_header_overhead;\r
+}\r
+\r
+size_t tlsf_alloc_overhead(void)\r
+{\r
+ return block_header_overhead;\r
+}\r
+\r
+pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes)\r
+{\r
+ block_header_t* block;\r
+ block_header_t* next;\r
+\r
+ const size_t pool_overhead = tlsf_pool_overhead();\r
+ const size_t pool_bytes = align_down(bytes - pool_overhead, ALIGN_SIZE);\r
+\r
+ if (((ptrdiff_t)mem % ALIGN_SIZE) != 0)\r
+ {\r
+ printf("tlsf_add_pool: Memory must be aligned by %u bytes.\n",\r
+ (unsigned int)ALIGN_SIZE);\r
+ return 0;\r
+ }\r
+\r
+ if (pool_bytes < block_size_min || pool_bytes > block_size_max)\r
+ {\r
+#if defined (TLSF_64BIT)\r
+ printf("tlsf_add_pool: Memory size must be between 0x%x and 0x%x00 bytes.\n", \r
+ (unsigned int)(pool_overhead + block_size_min),\r
+ (unsigned int)((pool_overhead + block_size_max) / 256));\r
+#else\r
+ printf("tlsf_add_pool: Memory size must be between %u and %u bytes.\n", \r
+ (unsigned int)(pool_overhead + block_size_min),\r
+ (unsigned int)(pool_overhead + block_size_max));\r
+#endif\r
+ return 0;\r
+ }\r
+\r
+ /*\r
+ ** Create the main free block. Offset the start of the block slightly\r
+ ** so that the prev_phys_block field falls outside of the pool -\r
+ ** it will never be used.\r
+ */\r
+ block = offset_to_block(mem, -(tlsfptr_t)block_header_overhead);\r
+ block_set_size(block, pool_bytes);\r
+ block_set_free(block);\r
+ block_set_prev_used(block);\r
+ block_insert(tlsf_cast(control_t*, tlsf), block);\r
+\r
+ /* Split the block to create a zero-size sentinel block. */\r
+ next = block_link_next(block);\r
+ block_set_size(next, 0);\r
+ block_set_used(next);\r
+ block_set_prev_free(next);\r
+\r
+ return mem;\r
+}\r
+\r
+void tlsf_remove_pool(tlsf_t tlsf, pool_t pool)\r
+{\r
+ control_t* control = tlsf_cast(control_t*, tlsf);\r
+ block_header_t* block = offset_to_block(pool, -(int)block_header_overhead);\r
+\r
+ int fl = 0, sl = 0;\r
+\r
+ tlsf_assert(block_is_free(block) && "block should be free");\r
+ tlsf_assert(!block_is_free(block_next(block)) && "next block should not be free");\r
+ tlsf_assert(block_size(block_next(block)) == 0 && "next block size should be zero");\r
+\r
+ mapping_insert(block_size(block), &fl, &sl);\r
+ remove_free_block(control, block, fl, sl);\r
+}\r
+\r
+/*\r
+** TLSF main interface.\r
+*/\r
+\r
+#if _DEBUG\r
+int test_ffs_fls()\r
+{\r
+ /* Verify ffs/fls work properly. */\r
+ int rv = 0;\r
+ rv += (tlsf_ffs(0) == -1) ? 0 : 0x1;\r
+ rv += (tlsf_fls(0) == -1) ? 0 : 0x2;\r
+ rv += (tlsf_ffs(1) == 0) ? 0 : 0x4;\r
+ rv += (tlsf_fls(1) == 0) ? 0 : 0x8;\r
+ rv += (tlsf_ffs(0x80000000) == 31) ? 0 : 0x10;\r
+ rv += (tlsf_ffs(0x80008000) == 15) ? 0 : 0x20;\r
+ rv += (tlsf_fls(0x80000008) == 31) ? 0 : 0x40;\r
+ rv += (tlsf_fls(0x7FFFFFFF) == 30) ? 0 : 0x80;\r
+\r
+#if defined (TLSF_64BIT)\r
+ rv += (tlsf_fls_sizet(0x80000000) == 31) ? 0 : 0x100;\r
+ rv += (tlsf_fls_sizet(0x100000000) == 32) ? 0 : 0x200;\r
+ rv += (tlsf_fls_sizet(0xffffffffffffffff) == 63) ? 0 : 0x400;\r
+#endif\r
+\r
+ if (rv)\r
+ {\r
+ printf("test_ffs_fls: %x ffs/fls tests failed.\n", rv);\r
+ }\r
+ return rv;\r
+}\r
+#endif\r
+\r
+tlsf_t tlsf_create(void* mem)\r
+{\r
+#if _DEBUG\r
+ if (test_ffs_fls())\r
+ {\r
+ return 0;\r
+ }\r
+#endif\r
+\r
+ if (((tlsfptr_t)mem % ALIGN_SIZE) != 0)\r
+ {\r
+ printf("tlsf_create: Memory must be aligned to %u bytes.\n",\r
+ (unsigned int)ALIGN_SIZE);\r
+ return 0;\r
+ }\r
+\r
+ control_construct(tlsf_cast(control_t*, mem));\r
+\r
+ return tlsf_cast(tlsf_t, mem);\r
+}\r
+\r
+tlsf_t tlsf_create_with_pool(void* mem, size_t bytes)\r
+{\r
+ tlsf_t tlsf = tlsf_create(mem);\r
+ tlsf_add_pool(tlsf, (char*)mem + tlsf_size(), bytes - tlsf_size());\r
+ return tlsf;\r
+}\r
+\r
+void tlsf_destroy(tlsf_t tlsf)\r
+{\r
+ /* Nothing to do. */\r
+ (void)tlsf;\r
+}\r
+\r
+pool_t tlsf_get_pool(tlsf_t tlsf)\r
+{\r
+ return tlsf_cast(pool_t, (char*)tlsf + tlsf_size());\r
+}\r
+\r
+void* tlsf_malloc(tlsf_t tlsf, size_t size)\r
+{\r
+ control_t* control = tlsf_cast(control_t*, tlsf);\r
+ const size_t adjust = adjust_request_size(size, ALIGN_SIZE);\r
+ block_header_t* block = block_locate_free(control, adjust);\r
+ return block_prepare_used(control, block, adjust);\r
+}\r
+\r
+void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t size)\r
+{\r
+ control_t* control = tlsf_cast(control_t*, tlsf);\r
+ const size_t adjust = adjust_request_size(size, ALIGN_SIZE);\r
+\r
+ /*\r
+ ** We must allocate an additional minimum block size bytes so that if\r
+ ** our free block will leave an alignment gap which is smaller, we can\r
+ ** trim a leading free block and release it back to the pool. We must\r
+ ** do this because the previous physical block is in use, therefore\r
+ ** the prev_phys_block field is not valid, and we can't simply adjust\r
+ ** the size of that block.\r
+ */\r
+ const size_t gap_minimum = sizeof(block_header_t);\r
+ const size_t size_with_gap = adjust_request_size(adjust + align + gap_minimum, align);\r
+\r
+ /*\r
+ ** If alignment is less than or equals base alignment, we're done.\r
+ ** If we requested 0 bytes, return null, as tlsf_malloc(0) does.\r
+ */\r
+ const size_t aligned_size = (adjust && align > ALIGN_SIZE) ? size_with_gap : adjust;\r
+\r
+ block_header_t* block = block_locate_free(control, aligned_size);\r
+\r
+ /* This can't be a static assert. */\r
+ tlsf_assert(sizeof(block_header_t) == block_size_min + block_header_overhead);\r
+\r
+ if (block)\r
+ {\r
+ void* ptr = block_to_ptr(block);\r
+ void* aligned = align_ptr(ptr, align);\r
+ size_t gap = tlsf_cast(size_t,\r
+ tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr));\r
+\r
+ /* If gap size is too small, offset to next aligned boundary. */\r
+ if (gap && gap < gap_minimum)\r
+ {\r
+ const size_t gap_remain = gap_minimum - gap;\r
+ const size_t offset = tlsf_max(gap_remain, align);\r
+ const void* next_aligned = tlsf_cast(void*,\r
+ tlsf_cast(tlsfptr_t, aligned) + offset);\r
+\r
+ aligned = align_ptr(next_aligned, align);\r
+ gap = tlsf_cast(size_t,\r
+ tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr));\r
+ }\r
+\r
+ if (gap)\r
+ {\r
+ tlsf_assert(gap >= gap_minimum && "gap size too small");\r
+ block = block_trim_free_leading(control, block, gap);\r
+ }\r
+ }\r
+\r
+ return block_prepare_used(control, block, adjust);\r
+}\r
+\r
+void tlsf_free(tlsf_t tlsf, void* ptr)\r
+{\r
+ /* Don't attempt to free a NULL pointer. */\r
+ if (ptr)\r
+ {\r
+ control_t* control = tlsf_cast(control_t*, tlsf);\r
+ block_header_t* block = block_from_ptr(ptr);\r
+ tlsf_assert(!block_is_free(block) && "block already marked as free");\r
+ block_mark_as_free(block);\r
+ block = block_merge_prev(control, block);\r
+ block = block_merge_next(control, block);\r
+ block_insert(control, block);\r
+ }\r
+}\r
+\r
+/*\r
+** The TLSF block information provides us with enough information to\r
+** provide a reasonably intelligent implementation of realloc, growing or\r
+** shrinking the currently allocated block as required.\r
+**\r
+** This routine handles the somewhat esoteric edge cases of realloc:\r
+** - a non-zero size with a null pointer will behave like malloc\r
+** - a zero size with a non-null pointer will behave like free\r
+** - a request that cannot be satisfied will leave the original buffer\r
+** untouched\r
+** - an extended buffer size will leave the newly-allocated area with\r
+** contents undefined\r
+*/\r
+void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size)\r
+{\r
+ control_t* control = tlsf_cast(control_t*, tlsf);\r
+ void* p = 0;\r
+\r
+ /* Zero-size requests are treated as free. */\r
+ if (ptr && size == 0)\r
+ {\r
+ tlsf_free(tlsf, ptr);\r
+ }\r
+ /* Requests with NULL pointers are treated as malloc. */\r
+ else if (!ptr)\r
+ {\r
+ p = tlsf_malloc(tlsf, size);\r
+ }\r
+ else\r
+ {\r
+ block_header_t* block = block_from_ptr(ptr);\r
+ block_header_t* next = block_next(block);\r
+\r
+ const size_t cursize = block_size(block);\r
+ const size_t combined = cursize + block_size(next) + block_header_overhead;\r
+ const size_t adjust = adjust_request_size(size, ALIGN_SIZE);\r
+\r
+ tlsf_assert(!block_is_free(block) && "block already marked as free");\r
+\r
+ /*\r
+ ** If the next block is used, or when combined with the current\r
+ ** block, does not offer enough space, we must reallocate and copy.\r
+ */\r
+ if (adjust > cursize && (!block_is_free(next) || adjust > combined))\r
+ {\r
+ p = tlsf_malloc(tlsf, size);\r
+ if (p)\r
+ {\r
+ const size_t minsize = tlsf_min(cursize, size);\r
+ memcpy(p, ptr, minsize);\r
+ tlsf_free(tlsf, ptr);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ /* Do we need to expand to the next block? */\r
+ if (adjust > cursize)\r
+ {\r
+ block_merge_next(control, block);\r
+ block_mark_as_used(block);\r
+ }\r
+\r
+ /* Trim the resulting block and return the original pointer. */\r
+ block_trim_used(control, block, adjust);\r
+ p = ptr;\r
+ }\r
+ }\r
+\r
+ return p;\r
+}\r
--- /dev/null
+#ifndef INCLUDED_tlsf\r
+#define INCLUDED_tlsf\r
+\r
+/*\r
+** Two Level Segregated Fit memory allocator, version 3.1.\r
+** Written by Matthew Conte\r
+** http://tlsf.baisoku.org\r
+**\r
+** Based on the original documentation by Miguel Masmano:\r
+** http://www.gii.upv.es/tlsf/main/docs\r
+**\r
+** This implementation was written to the specification\r
+** of the document, therefore no GPL restrictions apply.\r
+** \r
+** Copyright (c) 2006-2016, Matthew Conte\r
+** All rights reserved.\r
+** \r
+** Redistribution and use in source and binary forms, with or without\r
+** modification, are permitted provided that the following conditions are met:\r
+** * Redistributions of source code must retain the above copyright\r
+** notice, this list of conditions and the following disclaimer.\r
+** * Redistributions in binary form must reproduce the above copyright\r
+** notice, this list of conditions and the following disclaimer in the\r
+** documentation and/or other materials provided with the distribution.\r
+** * Neither the name of the copyright holder nor the\r
+** names of its contributors may be used to endorse or promote products\r
+** derived from this software without specific prior written permission.\r
+** \r
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND\r
+** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\r
+** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r
+** DISCLAIMED. IN NO EVENT SHALL MATTHEW CONTE BE LIABLE FOR ANY\r
+** DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\r
+** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\r
+** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\r
+** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+*/\r
+\r
+#include <stddef.h>\r
+\r
+#if defined(__cplusplus)\r
+extern "C" {\r
+#endif\r
+\r
+/* tlsf_t: a TLSF structure. Can contain 1 to N pools. */\r
+/* pool_t: a block of memory that TLSF can manage. */\r
+typedef void* tlsf_t;\r
+typedef void* pool_t;\r
+\r
+/* Create/destroy a memory pool. */\r
+tlsf_t tlsf_create(void* mem);\r
+tlsf_t tlsf_create_with_pool(void* mem, size_t bytes);\r
+void tlsf_destroy(tlsf_t tlsf);\r
+pool_t tlsf_get_pool(tlsf_t tlsf);\r
+\r
+/* Add/remove memory pools. */\r
+pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes);\r
+void tlsf_remove_pool(tlsf_t tlsf, pool_t pool);\r
+\r
+/* malloc/memalign/realloc/free replacements. */\r
+void* tlsf_malloc(tlsf_t tlsf, size_t bytes);\r
+void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t bytes);\r
+void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size);\r
+void tlsf_free(tlsf_t tlsf, void* ptr);\r
+\r
+/* Returns internal block size, not original request size */\r
+size_t tlsf_block_size(void* ptr);\r
+\r
+/* Overheads/limits of internal structures. */\r
+size_t tlsf_size(void);\r
+size_t tlsf_align_size(void);\r
+size_t tlsf_block_size_min(void);\r
+size_t tlsf_block_size_max(void);\r
+size_t tlsf_pool_overhead(void);\r
+size_t tlsf_alloc_overhead(void);\r
+\r
+/* Debugging. */\r
+typedef void (*tlsf_walker)(void* ptr, size_t size, int used, void* user);\r
+void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user);\r
+/* Returns nonzero if any internal consistency check fails. */\r
+int tlsf_check(tlsf_t tlsf);\r
+int tlsf_check_pool(pool_t pool);\r
+\r
+#if defined(__cplusplus)\r
+};\r
+#endif\r
+\r
+#endif\r
typedef jit_int32_t jit_gpr_t;
typedef jit_int32_t jit_fpr_t;
+#if !defined(__powerpc__) && \
+ (defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__))
+#define __powerpc__ 1
+#endif
+
#if defined(__i386__) || defined(__x86_64__)
# include <lightning/jit_x86.h>
#elif defined(__mips__)
#define jit_comr(u,v) jit_new_node_ww(jit_code_comr,u,v)
jit_code_negr, jit_code_comr,
-#define jit_ffsr(u,v) jit_new_node_ww(jit_code_ffsr,u,v)
-#define jit_clzr(u,v) jit_new_node_ww(jit_code_clzr,u,v)
- jit_code_ffsr, jit_code_clzr,
-
#define jit_ltr(u,v,w) jit_new_node_www(jit_code_ltr,u,v,w)
#define jit_lti(u,v,w) jit_new_node_www(jit_code_lti,u,v,w)
jit_code_ltr, jit_code_lti,
#define jit_movr(u,v) jit_new_node_ww(jit_code_movr,u,v)
#define jit_movi(u,v) jit_new_node_ww(jit_code_movi,u,v)
jit_code_movr, jit_code_movi,
+
#define jit_movnr(u,v,w) jit_new_node_www(jit_code_movnr,u,v,w)
#define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w)
jit_code_movnr, jit_code_movzr,
+
#define jit_extr_c(u,v) jit_new_node_ww(jit_code_extr_c,u,v)
#define jit_extr_uc(u,v) jit_new_node_ww(jit_code_extr_uc,u,v)
jit_code_extr_c, jit_code_extr_uc,
#define jit_movr_d_w(u, v) jit_new_node_ww(jit_code_movr_d_w, u, v)
#define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v)
+#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v)
+ jit_code_bswapr_us,
+#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v)
+ jit_code_bswapr_ui,
+#define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v)
+ jit_code_bswapr_ul,
+#if __WORDSIZE == 32
+#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v)
+#else
+#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v)
+#endif
+
jit_code_last_code
} jit_code_t;
#define ENABLE_FIRST_PASS 1
#define ENABLE_DISASSEMBLER 0
#define ENABLE_TINYMM 0
+#define ENABLE_CODE_BUFFER 0
#define HAS_DEFAULT_ELM 1