From: Paul Cercueil Date: Wed, 12 Oct 2022 10:50:18 +0000 (+0100) Subject: git subrepo pull --force deps/lightning X-Git-Tag: r24l~359^2 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c0c162422385a60ea7c8fa1dfe439e83e0a13d88;p=pcsx_rearmed.git git subrepo pull --force deps/lightning subrepo: subdir: "deps/lightning" merged: "4941e101f8" upstream: origin: "https://github.com/pcercuei/gnu_lightning.git" branch: "pcsx_rearmed" commit: "4941e101f8" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "2f68596" --- diff --git a/deps/lightning/.gitmodules b/deps/lightning/.gitmodules index acb26693..e69de29b 100644 --- a/deps/lightning/.gitmodules +++ b/deps/lightning/.gitmodules @@ -1,3 +0,0 @@ -[submodule "gnulib"] - path = gnulib - url = git://git.sv.gnu.org/gnulib.git diff --git a/deps/lightning/.gitrepo b/deps/lightning/.gitrepo index e1611ab6..5db5b905 100644 --- a/deps/lightning/.gitrepo +++ b/deps/lightning/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/gnu_lightning.git branch = pcsx_rearmed - commit = b1dfc564e2327621d15e688911a398c3a729bd82 - parent = 7393802c34796806043533cd379e5bcbd66cfd54 + commit = 4941e101f8b88837fc4aebe59ada85a0c0b17534 + parent = fdf33147c33ab1cb27c4bd06b377f744411030c3 method = merge cmdver = 0.4.3 diff --git a/deps/lightning/ChangeLog b/deps/lightning/ChangeLog index 7fe5c7c5..d25df3d0 100644 --- a/deps/lightning/ChangeLog +++ b/deps/lightning/ChangeLog @@ -1,3 +1,37 @@ +2022-10-05 Paulo Andrade + + * check/lightning.c: Remove -Dmacro=value from usage and attempt + to parse it. It was buggy and not properly implemented. Now + it pass any extra options to the generated jit. To pass any + option starting with '-' need to also use '--'. + * check/collatz.e: New sample file showing an example of jit + generation. + +2022-10-04 Paulo Andrade + + * include/lightning/jit_private.h: Add new flag to jit_block_t. + * lib/lightning.c: Rewrite register liveness and state at block + entry code to avoid a very expensive and non scaling code path. + Now it attempts to do as few as possible recomputations when + merging state of adjacent blocks, still doing one extra nop pass + (in the sense that it will not find any changes) to make sure the + logic is correct. + +2022-09-30 Paulo Andrade + + * include/lightning/jit_private.h: Implement new data structures + specific to riscv. + * lib/jit_disasm.c: Extra disassemble code for riscv constant pool. + * lib/jit_riscv-cpu.c: Modify movi to use constant pool if 3 or + more instructions are required to construct constant and modify + movi_p to use a pc relative load from a constant pool. + lib/jit_riscv-sz.c: Update for new constant pool code. Most + instructions that need 64 bit constants are significantly reduced. + * lib/jit_riscv.c: Implement most of the constant pool code. + * lib/jit_size.c: Update for estimate of code generation size. + * lib/lightning.c: Update for riscv specific code, and also make + sure to mprotect the constant pool as executable. + 2022-09-08 Paulo Andrade * lib/jit_fallback.c: Implement fallback compare and swap with diff --git a/deps/lightning/check/.gitignore b/deps/lightning/check/.gitignore index a0047bba..3ca81ec2 100644 --- a/deps/lightning/check/.gitignore +++ b/deps/lightning/check/.gitignore @@ -29,6 +29,7 @@ branch call carg carry +catomic ccall clobber ctramp @@ -50,6 +51,7 @@ ldstxr ldstxr-c lightning live +movzr put qalu_div qalu_mul diff --git a/deps/lightning/check/Makefile.am b/deps/lightning/check/Makefile.am index 3cc54d10..10537b1f 100644 --- a/deps/lightning/check/Makefile.am +++ b/deps/lightning/check/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2012-2019 Free Software Foundation, Inc. +# Copyright 2012-2022 Free Software Foundation, Inc. # # This file is part of GNU lightning. # diff --git a/deps/lightning/check/align.tst b/deps/lightning/check/align.tst index 5d5348ca..7bc10101 100644 --- a/deps/lightning/check/align.tst +++ b/deps/lightning/check/align.tst @@ -17,7 +17,7 @@ L2: align $(__WORDSIZE / 8) /* possible nops */ L3: jmpi L1 - align $(__WORDSIZE / 8) /* possible nops */ + align 32 /* Force nops */ L4: prepare pushargi fmt diff --git a/deps/lightning/check/catomic.c b/deps/lightning/check/catomic.c index 04a2f89d..ef09076c 100644 --- a/deps/lightning/check/catomic.c +++ b/deps/lightning/check/catomic.c @@ -4,6 +4,46 @@ #include #include +#if DEBUG +volatile +#endif +jit_word_t lock; +pthread_t tids[4]; + +#if DEBUG +int debug_offset(void) +{ + int i; + pthread_t self = pthread_self(); + for (i = 0; i < 4; ++i) + if (tids[i] == self) + return i; + return -1; +} + +void debug_spin(void) +{ + printf(" spin %d : %ld\n", debug_offset(), lock); +} + +void debug_lock(void) +{ + printf(" lock %d : %ld\n", debug_offset(), lock); +} + +void debug_unlock(void) +{ + printf("unlock %d : %ld\n", debug_offset(), lock); +} +#define DEBUG_SPIN() jit_calli(debug_spin) +#define DEBUG_LOCK() jit_calli(debug_lock) +#define DEBUG_UNLOCK() jit_calli(debug_unlock) +#else +#define DEBUG_SPIN() /**/ +#define DEBUG_LOCK() /**/ +#define DEBUG_UNLOCK() /**/ +#endif + void alarm_handler(int unused) { _exit(1); @@ -17,8 +57,6 @@ main(int argc, char *argv[]) jit_node_t *jmpi_main, *label; jit_node_t *func0, *func1, *func2, *func3; jit_node_t *patch0, *patch1, *patch2, *patch3; - jit_word_t lock; - pthread_t tids[4]; /* If there is any bug, do not hang in "make check" */ signal(SIGALRM, alarm_handler); @@ -35,31 +73,36 @@ main(int argc, char *argv[]) name = jit_label(); \ jit_prolog(); \ jit_movi(JIT_V0, (jit_word_t)&lock); \ - jit_movi(JIT_R1, 0); \ - jit_movi(JIT_R2, line); \ + jit_movi(JIT_V1, 0); \ + jit_movi(JIT_V2, line); \ /* spin until get the lock */ \ + DEBUG_SPIN(); \ label = jit_label(); \ - jit_casr(JIT_R0, JIT_V0, JIT_R1, JIT_R2); \ + jit_casr(JIT_R0, JIT_V0, JIT_V1, JIT_V2); \ jit_patch_at(jit_beqi(JIT_R0, 0), label); \ /* lock acquired */ \ + DEBUG_LOCK(); \ jit_prepare(); \ - /* pretend to be doing something useful for 0.01 usec + /* pretend to be doing something useful for 0.01 sec * while holding the lock */ \ jit_pushargi(10000); \ jit_finishi(usleep); \ /* release lock */ \ - jit_movi(JIT_R1, 0); \ - jit_str(JIT_V0, JIT_R1); \ + DEBUG_UNLOCK(); \ + jit_movi(JIT_V1, 0); \ + jit_str(JIT_V0, JIT_V1); \ /* Now test casi */ \ - jit_movi(JIT_R1, 0); \ - jit_movi(JIT_R2, line); \ + jit_movi(JIT_V1, 0); \ + jit_movi(JIT_V2, line); \ /* spin until get the lock */ \ + DEBUG_SPIN(); \ label = jit_label(); \ - jit_casi(JIT_R0, (jit_word_t)&lock, JIT_R1, JIT_R2); \ + jit_casi(JIT_R0, (jit_word_t)&lock, JIT_V1, JIT_V2); \ jit_patch_at(jit_beqi(JIT_R0, 0), label); \ /* lock acquired */ \ + DEBUG_LOCK(); \ jit_prepare(); \ - /* pretend to be doing something useful for 0.01 usec + /* pretend to be doing something useful for 0.01 sec * while holding the lock */ \ jit_pushargi(10000); \ jit_finishi(usleep); \ @@ -69,8 +112,9 @@ main(int argc, char *argv[]) /*jit_pushargi((jit_word_t)#name);*/ \ jit_finishi(puts); \ /* release lock */ \ - jit_movi(JIT_R1, 0); \ - jit_str(JIT_V0, JIT_R1); \ + DEBUG_UNLOCK(); \ + jit_movi(JIT_V1, 0); \ + jit_str(JIT_V0, JIT_V1); \ jit_ret(); \ jit_epilog(); defun(func0, __LINE__); @@ -126,7 +170,7 @@ main(int argc, char *argv[]) code = jit_emit(); -#if 1 +#if DEBUG jit_disassemble(); #endif diff --git a/deps/lightning/check/collatz.tst b/deps/lightning/check/collatz.tst new file mode 100644 index 00000000..85c4d41a --- /dev/null +++ b/deps/lightning/check/collatz.tst @@ -0,0 +1,79 @@ +.data 32 +str: +.c "%lu\n" +.code + jmpi main +/* + * unsigned long collatz(unsigned long n) { + * unsigned long r = n; + * if (!(r & 1)) { + * r = r / 2; + * return r; + * } + * r = r * 3; + * r = r + 1; + * return r; + * } + */ +collatz: + prolog + arg $n + getarg %r0 $n + bmsi odd %r0 1 + //divi_u %r0 %r0 2 + rshi_u %r0 %r0 1 + retr %r0 +odd: + muli %r0 %r0 3 + addi %r0 %r0 1 + retr %r0 + epilog + +/* + * int main(int argc, char *argv[]) { + * unsigned long v; + * if (argc == 2) + * v = strtoul(argv[1], NULL, 0); + * else + * v = (1L << __WORDSIZE / 2) - 1; + * while (1) { + * printf("%ld\n", v); + * if (v <= 1) + * break; + * v = collatz(v); + * } + * return 0; + * } + */ +main: + prolog + arg $argc + arg $argv + getarg %r0 $argc + bnei default %r0 2 + getarg %v0 $argv + ldxi %r0 %v0 $(__WORDSIZE >> 3) + prepare + pushargr %r0 + pushargi 0 + pushargi 0 + finishi @strtoul + retval %v0 + jmpi loop +default: + movi %v0 $((1 << __WORDSIZE / 2) - 1) +loop: + prepare + pushargi str + ellipsis + pushargr %v0 + finishi @printf + blei_u done %v0 1 + prepare + pushargr %v0 + finishi collatz + retval %v0 + jmpi loop +done: + reti 0 + epilog diff --git a/deps/lightning/check/lightning.c b/deps/lightning/check/lightning.c index 34b5440e..3d916f79 100644 --- a/deps/lightning/check/lightning.c +++ b/deps/lightning/check/lightning.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -3829,6 +3829,11 @@ execute(int argc, char *argv[]) jit_disassemble(); fprintf(stderr, " - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n"); } + if (flag_verbose && argc) { + for (result = 0; result < argc; result++) + printf("argv[%d] = %s\n", result, argv[result]); + fprintf(stderr, " - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n"); + } jit_clear_state(); if (flag_disasm) @@ -4028,12 +4033,11 @@ usage(void) { #if HAVE_GETOPT_LONG_ONLY fprintf(stderr, "\ -Usage: %s [jit assembler options] file [jit program options]\n\ +Usage: %s [jit assembler options] file [--] [jit program options]\n\ Jit assembler options:\n\ -help Display this information\n\ -v[0-3] Verbose output level\n\ - -d Do not use a data buffer\n\ - -D[=] Preprocessor options\n" + -d Do not use a data buffer\n" # if defined(__i386__) && __WORDSIZE == 32 " -mx87=1 Force using x87 when sse2 available\n" # endif @@ -4049,11 +4053,10 @@ Jit assembler options:\n\ , progname); #else fprintf(stderr, "\ -Usage: %s [jit assembler options] file [jit program options]\n\ +Usage: %s [jit assembler options] file [--] [jit program options]\n\ Jit assembler options:\n\ -h Display this information\n\ - -v Verbose output level\n\ - -D[=] Preprocessor options\n", progname); + -v Verbose output level\n", progname); #endif finish_jit(); exit(1); @@ -4228,16 +4231,6 @@ main(int argc, char *argv[]) # define cc "gcc" #endif opt_short = snprintf(cmdline, sizeof(cmdline), cc " -E -x c %s", argv[opt_index]); - for (++opt_index; opt_index < argc; opt_index++) { - if (argv[opt_index][0] == '-') - opt_short += snprintf(cmdline + opt_short, - sizeof(cmdline) - opt_short, - " %s", argv[opt_index]); - else { - --opt_index; - break; - } - } opt_short += snprintf(cmdline + opt_short, sizeof(cmdline) - opt_short, " -D__WORDSIZE=%d", __WORDSIZE); diff --git a/deps/lightning/check/setcode.c b/deps/lightning/check/setcode.c index 0047f348..62719eef 100644 --- a/deps/lightning/check/setcode.c +++ b/deps/lightning/check/setcode.c @@ -31,14 +31,24 @@ main(int argc, char *argv[]) int mmap_fd; #endif void (*function)(void); + int mmap_prot, mmap_flags; #if defined(__sgi) mmap_fd = open("/dev/zero", O_RDWR); #endif - ptr = mmap(NULL, 1024 * 1024, - PROT_EXEC | PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, mmap_fd, 0); + mmap_prot = PROT_READ | PROT_WRITE; +#if !__OpenBSD__ + mmap_prot |= PROT_EXEC; +#endif +#if __NetBSD__ + mmap_prot = PROT_MPROTECT(mmap_prot); + mmap_flags = 0; +#else + mmap_flags = MAP_PRIVATE; +#endif + mmap_flags |= MAP_ANON; + ptr = mmap(NULL, 1024 * 1024, mmap_prot, mmap_flags, mmap_fd, 0); assert(ptr != MAP_FAILED); #if defined(__sgi) close(mmap_fd); @@ -72,6 +82,9 @@ main(int argc, char *argv[]) if (function != NULL) abort(); +#if __NetBSD__ + assert(mprotect(ptr, 1024 * 1024, PROT_READ | PROT_WRITE) == 0); +#endif /* and calling again with enough space works */ jit_set_code(ptr, 1024 * 1024); function = jit_emit(); @@ -79,6 +92,9 @@ main(int argc, char *argv[]) abort(); jit_clear_state(); +#if __NetBSD__ || __OpenBSD__ + assert(mprotect(ptr, 1024 * 1024, PROT_READ | PROT_EXEC) == 0); +#endif (*function)(); jit_destroy_state(); finish_jit(); diff --git a/deps/lightning/configure.ac b/deps/lightning/configure.ac index 8200651c..63bbadb5 100644 --- a/deps/lightning/configure.ac +++ b/deps/lightning/configure.ac @@ -123,10 +123,8 @@ AM_CONDITIONAL(with_disassembler, [test "x$DISASSEMBLER" != "xno"]) if test "x$DISASSEMBLER" != "xno"; then LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDISASSEMBLER=1" save_CFLAGS=$CFLAGS - CFLAGS="$CFLAGS -I$PWD/include -D_GNU_SOURCE" + CFLAGS="$CFLAGS -D_GNU_SOURCE" AC_COMPILE_IFELSE([AC_LANG_SOURCE( - #include - #include #include int main(int argc, char *argv[]) { @@ -225,12 +223,11 @@ ac_cv_test_arm_arm= ac_cv_test_arm_swf= save_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS -I$PWD/include -D_GNU_SOURCE" +CFLAGS="$CFLAGS -D_GNU_SOURCE" if test x$cpu = x; then AC_MSG_ERROR([cpu $target_cpu not supported]) elif test $cpu = x86; then AC_RUN_IFELSE([AC_LANG_SOURCE([[ - #include int main(void) { int ac, flags; unsigned int eax, ebx, ecx, edx; diff --git a/deps/lightning/doc/Makefile.am b/deps/lightning/doc/Makefile.am index c46e0ab6..6398bceb 100644 --- a/deps/lightning/doc/Makefile.am +++ b/deps/lightning/doc/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2012-2019 Free Software Foundation, Inc. +# Copyright 2012-2022 Free Software Foundation, Inc. # # This file is part of GNU lightning. # diff --git a/deps/lightning/include/Makefile.am b/deps/lightning/include/Makefile.am index 8f915943..ce622e20 100644 --- a/deps/lightning/include/Makefile.am +++ b/deps/lightning/include/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc. +# Copyright 2000, 2001, 2002, 2012-2022 Free Software Foundation, Inc. # # This file is part of GNU lightning. # diff --git a/deps/lightning/include/lightning.h.in b/deps/lightning/include/lightning.h.in index 6f8ee030..48957cb7 100644 --- a/deps/lightning/include/lightning.h.in +++ b/deps/lightning/include/lightning.h.in @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/Makefile.am b/deps/lightning/include/lightning/Makefile.am index 9b1b3e6c..9bc1e86d 100644 --- a/deps/lightning/include/lightning/Makefile.am +++ b/deps/lightning/include/lightning/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc. +# Copyright 2000, 2001, 2002, 2012-2022 Free Software Foundation, Inc. # # This file is part of GNU lightning. # diff --git a/deps/lightning/include/lightning/jit_aarch64.h b/deps/lightning/include/lightning/jit_aarch64.h index 6e7d8be9..6a435f1a 100644 --- a/deps/lightning/include/lightning/jit_aarch64.h +++ b/deps/lightning/include/lightning/jit_aarch64.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_alpha.h b/deps/lightning/include/lightning/jit_alpha.h index 9bae3437..35934319 100644 --- a/deps/lightning/include/lightning/jit_alpha.h +++ b/deps/lightning/include/lightning/jit_alpha.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2019 Free Software Foundation, Inc. + * Copyright (C) 2014-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_arm.h b/deps/lightning/include/lightning/jit_arm.h index 81451f12..8f7278db 100644 --- a/deps/lightning/include/lightning/jit_arm.h +++ b/deps/lightning/include/lightning/jit_arm.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_hppa.h b/deps/lightning/include/lightning/jit_hppa.h index ddc3950f..afdf21da 100644 --- a/deps/lightning/include/lightning/jit_hppa.h +++ b/deps/lightning/include/lightning/jit_hppa.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_ia64.h b/deps/lightning/include/lightning/jit_ia64.h index 718f191f..7b212b9a 100644 --- a/deps/lightning/include/lightning/jit_ia64.h +++ b/deps/lightning/include/lightning/jit_ia64.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_mips.h b/deps/lightning/include/lightning/jit_mips.h index 45f3851f..a2388c9c 100644 --- a/deps/lightning/include/lightning/jit_mips.h +++ b/deps/lightning/include/lightning/jit_mips.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_ppc.h b/deps/lightning/include/lightning/jit_ppc.h index f1bdbcbb..d3d25d39 100644 --- a/deps/lightning/include/lightning/jit_ppc.h +++ b/deps/lightning/include/lightning/jit_ppc.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_private.h b/deps/lightning/include/lightning/jit_private.h index 4925a864..8b4f5289 100644 --- a/deps/lightning/include/lightning/jit_private.h +++ b/deps/lightning/include/lightning/jit_private.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -365,6 +365,8 @@ typedef struct jit_register jit_register_t; # if DISASSEMBLER typedef struct jit_data_info jit_data_info_t; # endif +#elif __riscv +typedef struct jit_const jit_const_t; #endif union jit_data { @@ -414,6 +416,9 @@ struct jit_block { jit_node_t *label; jit_regset_t reglive; jit_regset_t regmask; + jit_bool_t again; /* Flag need to rebuild regset masks + * due to changes in live and unknown + * state. */ }; struct jit_value { @@ -436,6 +441,12 @@ struct jit_data_info { jit_uword_t code; /* pointer in code buffer */ jit_word_t length; /* length of constant vector */ }; +#elif __riscv && __WORDSIZE == 64 +struct jit_const { + jit_word_t value; + jit_word_t address; + jit_const_t *next; +}; #endif struct jit_function { @@ -595,6 +606,27 @@ struct jit_compiler { jit_word_t length; } prolog; jit_bool_t jump; +#elif __riscv && __WORDSIZE == 64 + struct { + /* Hash table for constants to be resolved and patched */ + struct { + jit_const_t **table; /* very simple hash table */ + jit_word_t size; /* number of vectors in table */ + jit_word_t count; /* number of distinct entries */ + } hash; + struct { + jit_const_t **ptr; /* keep a single pointer */ + jit_const_t *list; /* free list */ + jit_word_t length; /* length of pool */ + } pool; + /* Linear list for constants that cannot be encoded easily */ + struct { + jit_word_t *instrs; /* list of direct movi instructions */ + jit_word_t *values; /* list of direct movi constants */ + jit_word_t offset; /* offset in instrs/values vector */ + jit_word_t length; /* length of instrs/values vector */ + } vector; + } consts; #endif #if GET_JIT_SIZE /* Temporary storage to calculate instructions length */ diff --git a/deps/lightning/include/lightning/jit_riscv.h b/deps/lightning/include/lightning/jit_riscv.h index 1b4f93d3..ad3f76fa 100644 --- a/deps/lightning/include/lightning/jit_riscv.h +++ b/deps/lightning/include/lightning/jit_riscv.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Free Software Foundation, Inc. + * Copyright (C) 2019-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_s390.h b/deps/lightning/include/lightning/jit_s390.h index 6ab196b1..a28b0dd3 100644 --- a/deps/lightning/include/lightning/jit_s390.h +++ b/deps/lightning/include/lightning/jit_s390.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_sparc.h b/deps/lightning/include/lightning/jit_sparc.h index bee440bb..e5988e11 100644 --- a/deps/lightning/include/lightning/jit_sparc.h +++ b/deps/lightning/include/lightning/jit_sparc.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_x86.h b/deps/lightning/include/lightning/jit_x86.h index a278d062..91f91244 100644 --- a/deps/lightning/include/lightning/jit_x86.h +++ b/deps/lightning/include/lightning/jit_x86.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/Makefile.am b/deps/lightning/lib/Makefile.am index 7e9bd89e..28baee72 100644 --- a/deps/lightning/lib/Makefile.am +++ b/deps/lightning/lib/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc. +# Copyright 2000, 2001, 2002, 2012-2022 Free Software Foundation, Inc. # # This file is part of GNU lightning. # @@ -35,6 +35,7 @@ liblightning_la_SOURCES = \ lightning.c EXTRA_DIST = \ + jit_fallback.c \ jit_rewind.c \ jit_aarch64.c \ jit_aarch64-cpu.c \ diff --git a/deps/lightning/lib/jit_aarch64-cpu.c b/deps/lightning/lib/jit_aarch64-cpu.c index 7572be7c..35ddabfd 100644 --- a/deps/lightning/lib/jit_aarch64-cpu.c +++ b/deps/lightning/lib/jit_aarch64-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1849,8 +1849,9 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, /* retry: */ retry = _jit->pc.w; LDAXR(r0, r1); - jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */ - STLXR(r0, r3, r1); + eqr(r0, r0, r2); + jump0 = beqi(_jit->pc.w r0, 0); /* beqi done r0 0 */ + STLXR(r3, r0, r1); jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */ /* done: */ CSET(r0, CC_EQ); diff --git a/deps/lightning/lib/jit_aarch64-fpu.c b/deps/lightning/lib/jit_aarch64-fpu.c index 871ba7e2..7c405393 100644 --- a/deps/lightning/lib/jit_aarch64-fpu.c +++ b/deps/lightning/lib/jit_aarch64-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_aarch64-sz.c b/deps/lightning/lib/jit_aarch64-sz.c index 90c87747..b1f451f2 100644 --- a/deps/lightning/lib/jit_aarch64-sz.c +++ b/deps/lightning/lib/jit_aarch64-sz.c @@ -404,6 +404,6 @@ 8, /* bswapr_us */ 8, /* bswapr_ui */ 4, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 28, /* casr */ + 36, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_aarch64.c b/deps/lightning/lib/jit_aarch64.c index dadf76eb..b54d0070 100644 --- a/deps/lightning/lib/jit_aarch64.c +++ b/deps/lightning/lib/jit_aarch64.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1018,11 +1018,10 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; diff --git a/deps/lightning/lib/jit_alpha-cpu.c b/deps/lightning/lib/jit_alpha-cpu.c index 3809aa3f..40f31267 100644 --- a/deps/lightning/lib/jit_alpha-cpu.c +++ b/deps/lightning/lib/jit_alpha-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2019 Free Software Foundation, Inc. + * Copyright (C) 2014-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -311,13 +311,13 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); -# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) -static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); -# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) -static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movnr(r0,r1,r2) CMOVNE(r2, r1, r0) +# define movzr(r0,r1,r2) CMOVEQ(r2, r1, r0) # define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define negr(r0,r1) NEGQ(r1,r0) # define comr(r0,r1) NOT(r1,r0) # define addr(r0,r1,r2) ADDQ(r1,r2,r0) @@ -812,29 +812,32 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } -static void -_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) -{ - jit_word_t w; - w = beqi(_jit->pc.w, r2, 0); - MOV(r1, r0); - patch_at(w, _jit->pc.w); -} - -static void -_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) -{ - jit_word_t w; - w = bnei(_jit->pc.w, r2, 0); - MOV(r1, r0); - patch_at(w, _jit->pc.w); -} - static void _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_word_t i0) { - fallback_casx(r0, r1, r2, r3, i0); + jit_word_t jump0, jump1, again, done; + jit_int32_t iscasi, r1_reg; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + again = _jit->pc.w; /* AGAIN */ + LDQ_L(r0, r1, 0); /* Load r0 locked */ + jump0 = bner(0, r0, r2); /* bne FAIL r0 r2 */ + movr(r0, r3); /* Move to r0 to attempt to store */ + STQ_C(r0, r1, 0); /* r0 is an in/out argument */ + jump1 = _jit->pc.w; + BEQ(r0, 0); /* beqi AGAIN r0 0 */ + patch_at(jump1, again); + jump1 = _jit->pc.w; + BR(_R31_REGNO, 0); /* r0 set to 1 if store succeeded */ + patch_at(jump0, _jit->pc.w); /* FAIL: */ + movi(r0, 0); /* Already locked */ + patch_at(jump1, _jit->pc.w); + if (iscasi) + jit_unget_reg(r1_reg); } static void diff --git a/deps/lightning/lib/jit_alpha-fpu.c b/deps/lightning/lib/jit_alpha-fpu.c index ea5c7465..5452a1ea 100644 --- a/deps/lightning/lib/jit_alpha-fpu.c +++ b/deps/lightning/lib/jit_alpha-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2019 Free Software Foundation, Inc. + * Copyright (C) 2014-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_alpha-sz.c b/deps/lightning/lib/jit_alpha-sz.c index 9653e35e..ac314f27 100644 --- a/deps/lightning/lib/jit_alpha-sz.c +++ b/deps/lightning/lib/jit_alpha-sz.c @@ -1,6 +1,5 @@ - #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 76 +#define JIT_INSTR_MAX 168 0, /* data */ 0, /* live */ 4, /* align */ @@ -9,7 +8,7 @@ 0, /* #name */ 0, /* #note */ 0, /* label */ - 76, /* prolog */ + 88, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ @@ -24,9 +23,9 @@ 0, /* getarg_l */ 0, /* putargr */ 0, /* putargi */ - 0, /* va_start */ - 0, /* va_arg */ - 0, /* va_arg_d */ + 20, /* va_start */ + 24, /* va_arg */ + 44, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 32, /* addi */ @@ -47,18 +46,18 @@ 56, /* qmuli */ 12, /* qmulr_u */ 32, /* qmuli_u */ - 48, /* divr */ - 72, /* divi */ - 48, /* divr_u */ - 72, /* divi_u */ - 56, /* qdivr */ - 56, /* qdivi */ - 56, /* qdivr_u */ - 56, /* qdivi_u */ - 48, /* remr */ - 72, /* remi */ - 48, /* remr_u */ - 72, /* remi_u */ + 44, /* divr */ + 68, /* divi */ + 44, /* divr_u */ + 68, /* divi_u */ + 52, /* qdivr */ + 52, /* qdivi */ + 52, /* qdivr_u */ + 52, /* qdivi_u */ + 44, /* remr */ + 68, /* remi */ + 44, /* remr_u */ + 68, /* remi_u */ 4, /* andr */ 32, /* andi */ 4, /* orr */ @@ -95,8 +94,8 @@ 12, /* nei */ 4, /* movr */ 32, /* movi */ - 12, /* movnr */ - 12, /* movzr */ + 4, /* movnr */ + 4, /* movzr */ 8, /* extr_c */ 8, /* extr_uc */ 8, /* extr_s */ @@ -121,19 +120,19 @@ 4, /* ldr_l */ 32, /* ldi_l */ 16, /* ldxr_c */ - 12, /* ldxi_c */ + 44, /* ldxi_c */ 8, /* ldxr_uc */ - 4, /* ldxi_uc */ + 36, /* ldxi_uc */ 16, /* ldxr_s */ - 12, /* ldxi_s */ + 44, /* ldxi_s */ 8, /* ldxr_us */ - 4, /* ldxi_us */ + 36, /* ldxi_us */ 8, /* ldxr_i */ - 4, /* ldxi_i */ + 36, /* ldxi_i */ 16, /* ldxr_ui */ - 12, /* ldxi_ui */ + 44, /* ldxi_ui */ 8, /* ldxr_l */ - 4, /* ldxi_l */ + 36, /* ldxi_l */ 4, /* str_c */ 32, /* sti_c */ 4, /* str_s */ @@ -143,13 +142,13 @@ 4, /* str_l */ 32, /* sti_l */ 8, /* stxr_c */ - 4, /* stxi_c */ + 36, /* stxi_c */ 8, /* stxr_s */ - 4, /* stxi_s */ + 36, /* stxi_s */ 8, /* stxr_i */ - 4, /* stxi_i */ + 36, /* stxi_i */ 8, /* stxr_l */ - 4, /* stxi_l */ + 36, /* stxi_l */ 8, /* bltr */ 8, /* blti */ 8, /* bltr_u */ @@ -190,7 +189,7 @@ 32, /* bxsubi */ 16, /* bxsubr_u */ 16, /* bxsubi_u */ - 0, /* jmpr */ + 4, /* jmpr */ 36, /* jmpi */ 8, /* callr */ 36, /* calli */ @@ -209,93 +208,93 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 68, /* epilog */ + 76, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ 8, /* addr_f */ - 32, /* addi_f */ + 40, /* addi_f */ 8, /* subr_f */ - 32, /* subi_f */ - 32, /* rsbi_f */ + 40, /* subi_f */ + 40, /* rsbi_f */ 8, /* mulr_f */ - 32, /* muli_f */ + 40, /* muli_f */ 8, /* divr_f */ - 32, /* divi_f */ + 40, /* divi_f */ 4, /* negr_f */ 4, /* absr_f */ 8, /* sqrtr_f */ 32, /* ltr_f */ - 56, /* lti_f */ + 64, /* lti_f */ 32, /* ler_f */ - 56, /* lei_f */ + 64, /* lei_f */ 32, /* eqr_f */ - 56, /* eqi_f */ + 64, /* eqi_f */ 32, /* ger_f */ - 56, /* gei_f */ + 64, /* gei_f */ 32, /* gtr_f */ - 56, /* gti_f */ + 64, /* gti_f */ 32, /* ner_f */ - 56, /* nei_f */ + 64, /* nei_f */ 32, /* unltr_f */ - 56, /* unlti_f */ + 64, /* unlti_f */ 32, /* unler_f */ - 56, /* unlei_f */ + 64, /* unlei_f */ 32, /* uneqr_f */ - 56, /* uneqi_f */ + 64, /* uneqi_f */ 32, /* unger_f */ - 56, /* ungei_f */ + 64, /* ungei_f */ 32, /* ungtr_f */ - 56, /* ungti_f */ + 64, /* ungti_f */ 32, /* ltgtr_f */ - 56, /* ltgti_f */ + 64, /* ltgti_f */ 20, /* ordr_f */ - 44, /* ordi_f */ + 52, /* ordi_f */ 20, /* unordr_f */ - 44, /* unordi_f */ + 52, /* unordi_f */ 16, /* truncr_f_i */ 16, /* truncr_f_l */ 12, /* extr_f */ 4, /* extr_d_f */ 4, /* movr_f */ - 24, /* movi_f */ + 32, /* movi_f */ 4, /* ldr_f */ 32, /* ldi_f */ 8, /* ldxr_f */ - 4, /* ldxi_f */ + 36, /* ldxi_f */ 4, /* str_f */ 32, /* sti_f */ 8, /* stxr_f */ - 4, /* stxi_f */ + 36, /* stxi_f */ 24, /* bltr_f */ - 48, /* blti_f */ + 56, /* blti_f */ 24, /* bler_f */ - 48, /* blei_f */ + 56, /* blei_f */ 24, /* beqr_f */ - 48, /* beqi_f */ + 56, /* beqi_f */ 24, /* bger_f */ - 48, /* bgei_f */ + 56, /* bgei_f */ 24, /* bgtr_f */ - 48, /* bgti_f */ + 56, /* bgti_f */ 28, /* bner_f */ - 52, /* bnei_f */ + 60, /* bnei_f */ 28, /* bunltr_f */ - 52, /* bunlti_f */ + 60, /* bunlti_f */ 28, /* bunler_f */ - 52, /* bunlei_f */ + 60, /* bunlei_f */ 28, /* buneqr_f */ - 52, /* buneqi_f */ + 60, /* buneqi_f */ 28, /* bunger_f */ - 52, /* bungei_f */ + 60, /* bungei_f */ 28, /* bungtr_f */ - 52, /* bungti_f */ + 60, /* bungti_f */ 28, /* bltgtr_f */ - 52, /* bltgti_f */ + 60, /* bltgti_f */ 12, /* bordr_f */ - 36, /* bordi_f */ + 44, /* bordi_f */ 12, /* bunordr_f */ - 36, /* bunordi_f */ + 44, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -306,87 +305,87 @@ 0, /* putargr_d */ 0, /* putargi_d */ 8, /* addr_d */ - 28, /* addi_d */ + 40, /* addi_d */ 8, /* subr_d */ - 28, /* subi_d */ - 28, /* rsbi_d */ + 40, /* subi_d */ + 40, /* rsbi_d */ 8, /* mulr_d */ - 28, /* muli_d */ + 40, /* muli_d */ 8, /* divr_d */ - 28, /* divi_d */ + 40, /* divi_d */ 4, /* negr_d */ 4, /* absr_d */ 8, /* sqrtr_d */ 32, /* ltr_d */ - 52, /* lti_d */ + 64, /* lti_d */ 32, /* ler_d */ - 52, /* lei_d */ + 64, /* lei_d */ 32, /* eqr_d */ - 52, /* eqi_d */ + 64, /* eqi_d */ 32, /* ger_d */ - 52, /* gei_d */ + 64, /* gei_d */ 32, /* gtr_d */ - 52, /* gti_d */ + 64, /* gti_d */ 32, /* ner_d */ - 52, /* nei_d */ + 64, /* nei_d */ 32, /* unltr_d */ - 52, /* unlti_d */ + 64, /* unlti_d */ 32, /* unler_d */ - 52, /* unlei_d */ + 64, /* unlei_d */ 32, /* uneqr_d */ - 52, /* uneqi_d */ + 64, /* uneqi_d */ 32, /* unger_d */ - 52, /* ungei_d */ + 64, /* ungei_d */ 32, /* ungtr_d */ - 52, /* ungti_d */ + 64, /* ungti_d */ 32, /* ltgtr_d */ - 52, /* ltgti_d */ + 64, /* ltgti_d */ 20, /* ordr_d */ - 40, /* ordi_d */ + 52, /* ordi_d */ 20, /* unordr_d */ - 40, /* unordi_d */ + 52, /* unordi_d */ 16, /* truncr_d_i */ 16, /* truncr_d_l */ 12, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 20, /* movi_d */ + 32, /* movi_d */ 4, /* ldr_d */ 32, /* ldi_d */ 8, /* ldxr_d */ - 4, /* ldxi_d */ + 36, /* ldxi_d */ 4, /* str_d */ 32, /* sti_d */ 8, /* stxr_d */ - 4, /* stxi_d */ + 36, /* stxi_d */ 24, /* bltr_d */ - 44, /* blti_d */ + 56, /* blti_d */ 24, /* bler_d */ - 44, /* blei_d */ + 56, /* blei_d */ 24, /* beqr_d */ - 44, /* beqi_d */ + 56, /* beqi_d */ 24, /* bger_d */ - 44, /* bgei_d */ + 56, /* bgei_d */ 24, /* bgtr_d */ - 44, /* bgti_d */ + 56, /* bgti_d */ 28, /* bner_d */ - 48, /* bnei_d */ + 60, /* bnei_d */ 28, /* bunltr_d */ - 48, /* bunlti_d */ + 60, /* bunlti_d */ 28, /* bunler_d */ - 48, /* bunlei_d */ + 60, /* bunlei_d */ 28, /* buneqr_d */ - 48, /* buneqi_d */ + 60, /* buneqi_d */ 28, /* bunger_d */ - 48, /* bungei_d */ + 60, /* bungei_d */ 28, /* bungtr_d */ - 48, /* bungti_d */ + 60, /* bungti_d */ 28, /* bltgtr_d */ - 48, /* bltgti_d */ + 60, /* bltgti_d */ 12, /* bordr_d */ - 32, /* bordi_d */ + 44, /* bordi_d */ 12, /* bunordr_d */ - 32, /* bunordi_d */ + 44, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ @@ -404,6 +403,6 @@ 16, /* bswapr_us */ 36, /* bswapr_ui */ 36, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 32, /* casr */ + 60, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_alpha.c b/deps/lightning/lib/jit_alpha.c index 1a78b907..678d5c6e 100644 --- a/deps/lightning/lib/jit_alpha.c +++ b/deps/lightning/lib/jit_alpha.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2019 Free Software Foundation, Inc. + * Copyright (C) 2014-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -64,7 +64,6 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); #define PROTO 1 # include "jit_alpha-cpu.c" # include "jit_alpha-fpu.c" -# include "jit_fallback.c" #undef PROTO /* @@ -979,11 +978,10 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1512,7 +1510,6 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_alpha-cpu.c" # include "jit_alpha-fpu.c" -# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_arm-cpu.c b/deps/lightning/lib/jit_arm-cpu.c index 91bb17c9..12f9a2f7 100644 --- a/deps/lightning/lib/jit_arm-cpu.c +++ b/deps/lightning/lib/jit_arm-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1679,13 +1679,14 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, /* retry: */ retry = _jit->pc.w; T2_LDREX(r0, r1, 0); - jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */ + eqr(r0, r0, r2); + jump0 = beqi(_jit->pc.w, r0, 0); /* beqi done r0 0 */ T2_STREX(r0, r3, r1, 0); jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */ - /* done: */ - done = _jit->pc.w; /* r0 = 0 if memory updated, 1 otherwise */ xori(r0, r0, 1); + /* done: */ + done = _jit->pc.w; T2_DMB(DMB_ISH); } else { @@ -1693,13 +1694,14 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, /* retry: */ retry = _jit->pc.w; LDREX(r0, r1); - jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */ + eqr(r0, r0, r2); + jump0 = beqi(_jit->pc.w, r0, 0); /* beqi done r0 0 */ STREX(r0, r3, r1); jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */ - /* done: */ - done = _jit->pc.w; /* r0 = 0 if memory updated, 1 otherwise */ xori(r0, r0, 1); + /* done: */ + done = _jit->pc.w; DMB(DMB_ISH); } patch_at(arm_patch_jump, jump0, done); diff --git a/deps/lightning/lib/jit_arm-swf.c b/deps/lightning/lib/jit_arm-swf.c index bf86ca1c..c88f9e3c 100644 --- a/deps/lightning/lib/jit_arm-swf.c +++ b/deps/lightning/lib/jit_arm-swf.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_arm-sz.c b/deps/lightning/lib/jit_arm-sz.c index 79970098..14f085ae 100644 --- a/deps/lightning/lib/jit_arm-sz.c +++ b/deps/lightning/lib/jit_arm-sz.c @@ -48,18 +48,18 @@ 12, /* qmuli */ 4, /* qmulr_u */ 8, /* qmuli_u */ - 40, /* divr */ - 48, /* divi */ - 40, /* divr_u */ - 44, /* divi_u */ - 34, /* qdivr */ - 38, /* qdivi */ - 34, /* qdivr_u */ - 38, /* qdivi_u */ - 40, /* remr */ - 48, /* remi */ - 40, /* remr_u */ - 44, /* remi_u */ + 32, /* divr */ + 36, /* divi */ + 24, /* divr_u */ + 28, /* divi_u */ + 18, /* qdivr */ + 22, /* qdivi */ + 18, /* qdivr_u */ + 22, /* qdivi_u */ + 24, /* remr */ + 32, /* remi */ + 24, /* remr_u */ + 28, /* remi_u */ 4, /* andr */ 12, /* andi */ 4, /* orr */ @@ -405,8 +405,8 @@ 8, /* bswapr_us */ 4, /* bswapr_ui */ 0, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 40, /* casr */ + 48, /* casi */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ @@ -459,18 +459,18 @@ 12, /* qmuli */ 4, /* qmulr_u */ 8, /* qmuli_u */ - 40, /* divr */ - 48, /* divi */ - 40, /* divr_u */ - 44, /* divi_u */ - 34, /* qdivr */ - 38, /* qdivi */ - 34, /* qdivr_u */ - 38, /* qdivi_u */ - 40, /* remr */ - 48, /* remi */ - 40, /* remr_u */ - 44, /* remi_u */ + 32, /* divr */ + 36, /* divi */ + 24, /* divr_u */ + 28, /* divi_u */ + 18, /* qdivr */ + 22, /* qdivi */ + 18, /* qdivr_u */ + 22, /* qdivi_u */ + 24, /* remr */ + 32, /* remi */ + 24, /* remr_u */ + 28, /* remi_u */ 4, /* andr */ 12, /* andi */ 4, /* orr */ @@ -507,8 +507,8 @@ 14, /* nei */ 4, /* movr */ 8, /* movi */ - 22, /* movnr */ - 22, /* movzr */ + 8, /* movnr */ + 8, /* movzr */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ @@ -626,50 +626,50 @@ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ - 40, /* addr_f */ - 40, /* addi_f */ - 40, /* subr_f */ - 40, /* subi_f */ - 40, /* rsbi_f */ - 40, /* mulr_f */ - 40, /* muli_f */ - 40, /* divr_f */ - 40, /* divi_f */ + 24, /* addr_f */ + 24, /* addi_f */ + 24, /* subr_f */ + 24, /* subi_f */ + 24, /* rsbi_f */ + 24, /* mulr_f */ + 24, /* muli_f */ + 24, /* divr_f */ + 24, /* divi_f */ 12, /* negr_f */ 12, /* absr_f */ - 36, /* sqrtr_f */ - 40, /* ltr_f */ - 44, /* lti_f */ - 40, /* ler_f */ - 44, /* lei_f */ - 40, /* eqr_f */ - 44, /* eqi_f */ - 40, /* ger_f */ - 44, /* gei_f */ - 40, /* gtr_f */ - 44, /* gti_f */ - 44, /* ner_f */ - 48, /* nei_f */ - 72, /* unltr_f */ - 80, /* unlti_f */ - 72, /* unler_f */ - 80, /* unlei_f */ - 72, /* uneqr_f */ - 80, /* uneqi_f */ - 72, /* unger_f */ - 80, /* ungei_f */ - 72, /* ungtr_f */ - 80, /* ungti_f */ - 76, /* ltgtr_f */ - 84, /* ltgti_f */ - 44, /* ordr_f */ - 48, /* ordi_f */ - 72, /* unordr_f */ - 80, /* unordi_f */ - 36, /* truncr_f_i */ + 20, /* sqrtr_f */ + 24, /* ltr_f */ + 30, /* lti_f */ + 24, /* ler_f */ + 32, /* lei_f */ + 24, /* eqr_f */ + 30, /* eqi_f */ + 24, /* ger_f */ + 30, /* gei_f */ + 24, /* gtr_f */ + 30, /* gti_f */ + 28, /* ner_f */ + 32, /* nei_f */ + 56, /* unltr_f */ + 64, /* unlti_f */ + 56, /* unler_f */ + 64, /* unlei_f */ + 56, /* uneqr_f */ + 64, /* uneqi_f */ + 56, /* unger_f */ + 64, /* ungei_f */ + 56, /* ungtr_f */ + 64, /* ungti_f */ + 60, /* ltgtr_f */ + 68, /* ltgti_f */ + 28, /* ordr_f */ + 32, /* ordi_f */ + 56, /* unordr_f */ + 64, /* unordi_f */ + 20, /* truncr_f_i */ 0, /* truncr_f_l */ - 36, /* extr_f */ - 38, /* extr_d_f */ + 28, /* extr_f */ + 22, /* extr_d_f */ 8, /* movr_f */ 12, /* movi_f */ 8, /* ldr_f */ @@ -680,34 +680,34 @@ 16, /* sti_f */ 8, /* stxr_f */ 16, /* stxi_f */ - 44, /* bltr_f */ - 48, /* blti_f */ - 44, /* bler_f */ - 48, /* blei_f */ - 44, /* beqr_f */ - 52, /* beqi_f */ - 44, /* bger_f */ - 48, /* bgei_f */ - 44, /* bgtr_f */ - 48, /* bgti_f */ - 44, /* bner_f */ - 48, /* bnei_f */ - 44, /* bunltr_f */ - 48, /* bunlti_f */ - 44, /* bunler_f */ - 48, /* bunlei_f */ - 76, /* buneqr_f */ - 84, /* buneqi_f */ - 44, /* bunger_f */ - 48, /* bungei_f */ - 44, /* bungtr_f */ - 48, /* bungti_f */ - 76, /* bltgtr_f */ - 84, /* bltgti_f */ - 44, /* bordr_f */ - 48, /* bordi_f */ - 44, /* bunordr_f */ - 48, /* bunordi_f */ + 28, /* bltr_f */ + 32, /* blti_f */ + 28, /* bler_f */ + 32, /* blei_f */ + 28, /* beqr_f */ + 40, /* beqi_f */ + 28, /* bger_f */ + 32, /* bgei_f */ + 28, /* bgtr_f */ + 32, /* bgti_f */ + 28, /* bner_f */ + 32, /* bnei_f */ + 28, /* bunltr_f */ + 32, /* bunlti_f */ + 28, /* bunler_f */ + 32, /* bunlei_f */ + 60, /* buneqr_f */ + 68, /* buneqi_f */ + 28, /* bunger_f */ + 32, /* bungei_f */ + 28, /* bungtr_f */ + 32, /* bungti_f */ + 60, /* bltgtr_f */ + 68, /* bltgti_f */ + 28, /* bordr_f */ + 32, /* bordi_f */ + 28, /* bunordr_f */ + 32, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -717,50 +717,50 @@ 0, /* getarg_d */ 0, /* putargr_d */ 0, /* putargi_d */ - 50, /* addr_d */ - 52, /* addi_d */ - 50, /* subr_d */ - 52, /* subi_d */ - 52, /* rsbi_d */ - 50, /* mulr_d */ - 52, /* muli_d */ - 50, /* divr_d */ - 52, /* divi_d */ + 34, /* addr_d */ + 36, /* addi_d */ + 34, /* subr_d */ + 36, /* subi_d */ + 36, /* rsbi_d */ + 34, /* mulr_d */ + 36, /* muli_d */ + 34, /* divr_d */ + 36, /* divi_d */ 20, /* negr_d */ 20, /* absr_d */ - 42, /* sqrtr_d */ - 44, /* ltr_d */ - 48, /* lti_d */ - 44, /* ler_d */ - 48, /* lei_d */ - 44, /* eqr_d */ - 48, /* eqi_d */ - 44, /* ger_d */ - 48, /* gei_d */ - 44, /* gtr_d */ - 48, /* gti_d */ - 48, /* ner_d */ - 52, /* nei_d */ - 82, /* unltr_d */ - 88, /* unlti_d */ - 82, /* unler_d */ - 88, /* unlei_d */ - 82, /* uneqr_d */ - 88, /* uneqi_d */ - 82, /* unger_d */ - 88, /* ungei_d */ - 82, /* ungtr_d */ - 88, /* ungti_d */ - 86, /* ltgtr_d */ - 92, /* ltgti_d */ - 48, /* ordr_d */ - 52, /* ordi_d */ - 82, /* unordr_d */ - 88, /* unordi_d */ - 36, /* truncr_d_i */ + 26, /* sqrtr_d */ + 28, /* ltr_d */ + 34, /* lti_d */ + 28, /* ler_d */ + 36, /* lei_d */ + 28, /* eqr_d */ + 34, /* eqi_d */ + 28, /* ger_d */ + 34, /* gei_d */ + 28, /* gtr_d */ + 34, /* gti_d */ + 32, /* ner_d */ + 36, /* nei_d */ + 66, /* unltr_d */ + 72, /* unlti_d */ + 66, /* unler_d */ + 72, /* unlei_d */ + 66, /* uneqr_d */ + 72, /* uneqi_d */ + 66, /* unger_d */ + 72, /* ungei_d */ + 66, /* ungtr_d */ + 72, /* ungti_d */ + 70, /* ltgtr_d */ + 76, /* ltgti_d */ + 32, /* ordr_d */ + 36, /* ordi_d */ + 66, /* unordr_d */ + 72, /* unordi_d */ + 20, /* truncr_d_i */ 0, /* truncr_d_l */ - 36, /* extr_d */ - 38, /* extr_f_d */ + 28, /* extr_d */ + 22, /* extr_f_d */ 16, /* movr_d */ 20, /* movi_d */ 16, /* ldr_d */ @@ -771,34 +771,34 @@ 24, /* sti_d */ 20, /* stxr_d */ 28, /* stxi_d */ - 48, /* bltr_d */ - 52, /* blti_d */ - 48, /* bler_d */ - 52, /* blei_d */ - 48, /* beqr_d */ - 60, /* beqi_d */ - 48, /* bger_d */ - 52, /* bgei_d */ - 48, /* bgtr_d */ - 52, /* bgti_d */ - 48, /* bner_d */ - 52, /* bnei_d */ - 48, /* bunltr_d */ - 52, /* bunlti_d */ - 48, /* bunler_d */ - 52, /* bunlei_d */ - 84, /* buneqr_d */ - 92, /* buneqi_d */ - 48, /* bunger_d */ - 52, /* bungei_d */ - 48, /* bungtr_d */ - 52, /* bungti_d */ - 84, /* bltgtr_d */ - 92, /* bltgti_d */ - 48, /* bordr_d */ - 52, /* bordi_d */ - 48, /* bunordr_d */ - 52, /* bunordi_d */ + 32, /* bltr_d */ + 36, /* blti_d */ + 32, /* bler_d */ + 36, /* blei_d */ + 32, /* beqr_d */ + 52, /* beqi_d */ + 32, /* bger_d */ + 36, /* bgei_d */ + 32, /* bgtr_d */ + 36, /* bgti_d */ + 32, /* bner_d */ + 36, /* bnei_d */ + 32, /* bunltr_d */ + 36, /* bunlti_d */ + 32, /* bunler_d */ + 36, /* bunlei_d */ + 68, /* buneqr_d */ + 76, /* buneqi_d */ + 32, /* bunger_d */ + 36, /* bungei_d */ + 32, /* bungtr_d */ + 36, /* bungti_d */ + 68, /* bltgtr_d */ + 76, /* bltgti_d */ + 32, /* bordr_d */ + 36, /* bordi_d */ + 32, /* bunordr_d */ + 36, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ @@ -816,7 +816,7 @@ 20, /* bswapr_us */ 16, /* bswapr_ui */ 0, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 40, /* casr */ + 44, /* casi */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_arm-vfp.c b/deps/lightning/lib/jit_arm-vfp.c index 743a3ef5..4b146d25 100644 --- a/deps/lightning/lib/jit_arm-vfp.c +++ b/deps/lightning/lib/jit_arm-vfp.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_arm.c b/deps/lightning/lib/jit_arm.c index ae0e9f52..6b121bf3 100644 --- a/deps/lightning/lib/jit_arm.c +++ b/deps/lightning/lib/jit_arm.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1400,11 +1400,10 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); break; case jit_code_note: case jit_code_name: if (must_align_p(node->next)) diff --git a/deps/lightning/lib/jit_disasm.c b/deps/lightning/lib/jit_disasm.c index 856a70bb..9ad84f1b 100644 --- a/deps/lightning/lib/jit_disasm.c +++ b/deps/lightning/lib/jit_disasm.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -60,7 +60,7 @@ static int fprintf_styled(void *, enum disassembler_style, const char* fmt, ...) int r; va_start(args, fmt); - r = vprintf(fmt, args); + r = vfprintf(disasm_stream, fmt, args); va_end(args); return r; @@ -319,7 +319,10 @@ _disassemble(jit_state_t *_jit, jit_pointer_t code, jit_int32_t length) char *name, *old_name; char *file, *old_file; int line, old_line; -#if __arm__ +#if __riscv && __WORDSIZE == 64 + jit_word_t *vector; + jit_int32_t offset; +#elif __arm__ jit_int32_t offset; jit_bool_t data_info; jit_int32_t data_offset; @@ -332,6 +335,10 @@ _disassemble(jit_state_t *_jit, jit_pointer_t code, jit_int32_t length) jit_uword_t prevw; #endif +#if __riscv && __WORDSIZE == 64 + end -= _jitc->consts.hash.count * 8; +#endif + #if __arm__ data_info = _jitc && _jitc->data_info.ptr; data_offset = 0; @@ -354,7 +361,7 @@ _disassemble(jit_state_t *_jit, jit_pointer_t code, jit_int32_t length) } while (node && (jit_uword_t)(prevw + node->offset) == (jit_uword_t)pc) { jit_print_node(node); - fputc('\n', stdout); + fputc('\n', disasm_stream); prevw += node->offset; node = node->next; } @@ -405,5 +412,16 @@ _disassemble(jit_state_t *_jit, jit_pointer_t code, jit_int32_t length) pc += (*disasm_print)(pc, &disasm_info); putc('\n', disasm_stream); } +#if __riscv && __WORDSIZE == 64 + for (vector = (jit_word_t *)end, offset = 0; + offset < _jitc->consts.hash.count; offset++) { + bytes = sprintf(buffer, address_buffer_format, + (long long)end + offset * sizeof(jit_word_t)); + (*disasm_info.fprintf_func)(disasm_stream, + "%*c0x%s\t.quad\t0x%016lx\t# (%ld)\n", + 16 - bytes, ' ', buffer, + vector[offset], vector[offset]); + } +#endif } #endif diff --git a/deps/lightning/lib/jit_fallback.c b/deps/lightning/lib/jit_fallback.c index 9251947a..8912691d 100644 --- a/deps/lightning/lib/jit_fallback.c +++ b/deps/lightning/lib/jit_fallback.c @@ -24,7 +24,7 @@ _fallback_save(jit_state_t *_jit, jit_int32_t r0) regno = jit_regno(spec); if (regno == r0) { if (!(spec & jit_class_sav)) - stxi(_jitc->function->regoff[offset], rn(JIT_FP), regno); + stxi(_jitc->function->regoff[JIT_R(offset)], rn(JIT_FP), regno); break; } } @@ -39,7 +39,7 @@ _fallback_load(jit_state_t *_jit, jit_int32_t r0) regno = jit_regno(spec); if (regno == r0) { if (!(spec & jit_class_sav)) - ldxi(regno, rn(JIT_FP), _jitc->function->regoff[offset]); + ldxi(regno, rn(JIT_FP), _jitc->function->regoff[JIT_R(offset)]); break; } } @@ -48,35 +48,25 @@ _fallback_load(jit_state_t *_jit, jit_int32_t r0) static void _fallback_save_regs(jit_state_t *_jit, jit_int32_t r0) { - jit_int32_t offset, regno, spec; - for (offset = 0; offset < JIT_R_NUM; offset++) { - regno = JIT_R(offset); - spec = _rvs[regno].spec; - if ((spec & jit_class_gpr) && regno == r0) - continue; - if (!(spec & jit_class_sav)) { - if (!_jitc->function->regoff[regno]) { - _jitc->function->regoff[regno] = - jit_allocai(sizeof(jit_word_t)); - _jitc->again = 1; - } - jit_regset_setbit(&_jitc->regsav, regno); - emit_stxi(_jitc->function->regoff[regno], JIT_FP, regno); - } - } - /* If knew for certain float registers are not used by - * pthread_mutex_lock and pthread_mutex_unlock, could skip this */ - for (offset = 0; offset < JIT_F_NUM; offset++) { - regno = JIT_F(offset); + jit_int32_t regno, spec; + for (regno = 0; regno < _jitc->reglen; regno++) { spec = _rvs[regno].spec; - if (!(spec & jit_class_sav)) { + if ((jit_regset_tstbit(&_jitc->regarg, regno) || + jit_regset_tstbit(&_jitc->reglive, regno)) && + !(spec & jit_class_sav)) { if (!_jitc->function->regoff[regno]) { _jitc->function->regoff[regno] = - jit_allocai(sizeof(jit_word_t)); + jit_allocai(spec & jit_class_gpr ? + sizeof(jit_word_t) : sizeof(jit_float64_t)); _jitc->again = 1; } + if ((spec & jit_class_gpr) && rn(regno) == r0) + continue; jit_regset_setbit(&_jitc->regsav, regno); - emit_stxi_d(_jitc->function->regoff[regno], JIT_FP, regno); + if (spec & jit_class_gpr) + emit_stxi(_jitc->function->regoff[regno], JIT_FP, regno); + else + emit_stxi_d(_jitc->function->regoff[regno], JIT_FP, regno); } } } @@ -84,25 +74,19 @@ _fallback_save_regs(jit_state_t *_jit, jit_int32_t r0) static void _fallback_load_regs(jit_state_t *_jit, jit_int32_t r0) { - jit_int32_t offset, regno, spec; - for (offset = 0; offset < JIT_R_NUM; offset++) { - regno = JIT_R(offset); - spec = _rvs[regno].spec; - if ((spec & jit_class_gpr) && regno == r0) - continue; - if (!(spec & jit_class_sav)) { - jit_regset_clrbit(&_jitc->regsav, regno); - emit_ldxi(regno, JIT_FP, _jitc->function->regoff[regno]); - } - } - /* If knew for certain float registers are not used by - * pthread_mutex_lock and pthread_mutex_unlock, could skip this */ - for (offset = 0; offset < JIT_F_NUM; offset++) { - regno = JIT_F(offset); + jit_int32_t regno, spec; + for (regno = 0; regno < _jitc->reglen; regno++) { spec = _rvs[regno].spec; - if (!(spec & jit_class_sav)) { - jit_regset_clrbit(&_jitc->regsav, regno); - emit_ldxi_d(regno, JIT_FP, _jitc->function->regoff[regno]); + if ((jit_regset_tstbit(&_jitc->regarg, regno) || + jit_regset_tstbit(&_jitc->reglive, regno)) && + !(spec & jit_class_sav)) { + if ((spec & jit_class_gpr) && rn(regno) == r0) + continue; + jit_regset_setbit(&_jitc->regsav, regno); + if (spec & jit_class_gpr) + emit_ldxi(regno, JIT_FP, _jitc->function->regoff[regno]); + else + emit_ldxi_d(regno, JIT_FP, _jitc->function->regoff[regno]); } } } @@ -110,12 +94,8 @@ _fallback_load_regs(jit_state_t *_jit, jit_int32_t r0) static void _fallback_calli(jit_state_t *_jit, jit_word_t i0, jit_word_t i1) { -# if defined(__mips__) - movi(rn(_A0), i1); -# elif defined(__arm__) +# if defined(__arm__) movi(rn(_R0), i1); -# elif defined(__sparc__) - movi(rn(_O0), i1); # elif defined(__ia64__) /* avoid confusion with pushargi patching */ if (i1 >= -2097152 && i1 <= 2097151) @@ -124,13 +104,7 @@ _fallback_calli(jit_state_t *_jit, jit_word_t i0, jit_word_t i1) MOVL(_jitc->rout, i1); # elif defined(__hppa__) movi(_R26_REGNO, i1); -# elif defined(__s390__) || defined(__s390x__) - movi(rn(_R2), i1); -# elif defined(__alpha__) - movi(rn(_A0), i1); -# elif defined(__riscv__) - movi(rn(JIT_RA0), i1); -# endif +#endif calli(i0); } @@ -143,7 +117,7 @@ _fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, /* XXX only attempts to fallback cas for lightning jit code */ static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; if ((iscasi = r1 == _NOREG)) { - r1_reg = jit_get_reg(jit_class_gpr); + r1_reg = jit_get_reg(jit_class_gpr|jit_class_sav); r1 = rn(r1_reg); movi(r1, i0); } @@ -162,11 +136,16 @@ _fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, str_l(r1, r3); # endif /* done: */ +# if defined(__ia64__) + sync(); +# endif done = _jit->pc.w; fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex); fallback_load(r0); # if defined(__arm__) patch_at(arm_patch_jump, jump, done); +# elif defined(__ia64__) + patch_at(jit_code_bnei, jump, done); # else patch_at(jump, done); # endif diff --git a/deps/lightning/lib/jit_hppa-cpu.c b/deps/lightning/lib/jit_hppa-cpu.c index 155ec91f..013460c1 100644 --- a/deps/lightning/lib/jit_hppa-cpu.c +++ b/deps/lightning/lib/jit_hppa-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1660,7 +1660,48 @@ static void _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_word_t i0) { +#if defined(__linux__) && defined(SYS_atomic_cmpxchg_32) && __WORDSIZE == 32 + /* Not defined, and unlikely to ever be defined, but could be a way to do it */ + movi(_R26_REGNO, SYS_atomic_cmpxchg_32); + if (r1 == _NOREG) + movi(_R25_REGNO, i0); + else + movr(_R25_REGNO, r1); + movr(_R24_REGNO, r2); + movr(_R23_REGNO, r3); + /* Should only fail for an invalid or unaligned address. + * Do not handle this condition. */ + calli(syscall); + movr(r0, _R28_REGNO); +#else + /* + * The only atomic operations are LDCW and LDCD, that load a value, + * and store zero at the address atomically. The (semaphore) address + * must be 16 byte aligned. + */ fallback_casx(r0, r1, r2, r3, i0); + /* + * It is important to be aware of the delayed nature of cache flush and + * purge operations, and to use SYNC instructions to force completion + * where necessary. The following example illustrates this. + * Consider two processes sharing a memory location x which is protected + * by a semaphore s. + * + * process A on Processor 1 | process B on Processor 2 | note + * -------------------------+---------------------------+------------ + * LDCW s | | A acquires semaphore + * PDC x | | A executes purge + * SYNC | | Force completion of purge + * STW s | | A releases semaphore + * | LDCW s | B acquires semaphore + * | STW x + * + * In the absence of the SYNC instruction, it would be possible for + * process B's store to x to complete before the purge of x is completed + * (since the purge may have been delayed). The purge of x could then + * destroy the new value. + */ +#endif } static void diff --git a/deps/lightning/lib/jit_hppa-fpu.c b/deps/lightning/lib/jit_hppa-fpu.c index 5fa68561..6b2838d1 100644 --- a/deps/lightning/lib/jit_hppa-fpu.c +++ b/deps/lightning/lib/jit_hppa-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_hppa-sz.c b/deps/lightning/lib/jit_hppa-sz.c index e984bacd..33ac908d 100644 --- a/deps/lightning/lib/jit_hppa-sz.c +++ b/deps/lightning/lib/jit_hppa-sz.c @@ -1,6 +1,6 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 64 +#define JIT_INSTR_MAX 196 0, /* data */ 0, /* live */ 0, /* align */ @@ -9,7 +9,7 @@ 0, /* #name */ 0, /* #note */ 0, /* label */ - 64, /* prolog */ + 156, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ @@ -24,9 +24,9 @@ 0, /* getarg_l */ 0, /* putargr */ 0, /* putargi */ - 0, /* va_start */ - 0, /* va_arg */ - 0, /* va_arg_d */ + 4, /* va_start */ + 8, /* va_arg */ + 20, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 12, /* addi */ @@ -40,13 +40,13 @@ 12, /* subci */ 4, /* subxr */ 8, /* subxi */ - 16, /* rsbi */ - 28, /* mulr */ - 36, /* muli */ + 12, /* rsbi */ + 48, /* mulr */ + 56, /* muli */ 40, /* qmulr */ 44, /* qmuli */ - 32, /* qmulr_u */ - 40, /* qmuli_u */ + 52, /* qmulr_u */ + 60, /* qmuli_u */ 36, /* divr */ 40, /* divi */ 36, /* divr_u */ @@ -95,8 +95,8 @@ 8, /* nei */ 4, /* movr */ 8, /* movi */ - 16, /* movnr */ - 16, /* movzr */ + 12, /* movnr */ + 12, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -105,7 +105,7 @@ 0, /* extr_ui */ 4, /* htonr_us */ 4, /* htonr_ui */ - 0, /* htonr_l */ + 0, /* htonr_ul */ 8, /* ldr_c */ 12, /* ldi_c */ 4, /* ldr_uc */ @@ -121,15 +121,15 @@ 0, /* ldr_l */ 0, /* ldi_l */ 8, /* ldxr_c */ - 8, /* ldxi_c */ + 12, /* ldxi_c */ 4, /* ldxr_uc */ - 4, /* ldxi_uc */ + 8, /* ldxi_uc */ 8, /* ldxr_s */ - 8, /* ldxi_s */ + 12, /* ldxi_s */ 4, /* ldxr_us */ - 4, /* ldxi_us */ + 8, /* ldxi_us */ 4, /* ldxr_i */ - 4, /* ldxi_i */ + 8, /* ldxi_i */ 0, /* ldxr_ui */ 0, /* ldxi_ui */ 0, /* ldxr_l */ @@ -143,11 +143,11 @@ 0, /* str_l */ 0, /* sti_l */ 8, /* stxr_c */ - 4, /* stxi_c */ + 12, /* stxi_c */ 8, /* stxr_s */ - 4, /* stxi_s */ + 12, /* stxi_s */ 8, /* stxr_i */ - 4, /* stxi_i */ + 12, /* stxi_i */ 0, /* stxr_l */ 0, /* stxi_l */ 8, /* bltr */ @@ -190,7 +190,7 @@ 16, /* bxsubi */ 16, /* bxsubr_u */ 20, /* bxsubi_u */ - 0, /* jmpr */ + 4, /* jmpr */ 12, /* jmpi */ 40, /* callr */ 44, /* calli */ @@ -209,93 +209,93 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 64, /* epilog */ + 196, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ 4, /* addr_f */ - 16, /* addi_f */ + 24, /* addi_f */ 4, /* subr_f */ - 16, /* subi_f */ - 16, /* rsbi_f */ + 24, /* subi_f */ + 24, /* rsbi_f */ 4, /* mulr_f */ - 16, /* muli_f */ + 24, /* muli_f */ 4, /* divr_f */ - 16, /* divi_f */ + 24, /* divi_f */ 4, /* negr_f */ 4, /* absr_f */ 4, /* sqrtr_f */ 16, /* ltr_f */ - 28, /* lti_f */ + 36, /* lti_f */ 16, /* ler_f */ - 28, /* lei_f */ + 36, /* lei_f */ 16, /* eqr_f */ - 28, /* eqi_f */ + 36, /* eqi_f */ 16, /* ger_f */ - 28, /* gei_f */ + 36, /* gei_f */ 16, /* gtr_f */ - 28, /* gti_f */ + 36, /* gti_f */ 16, /* ner_f */ - 28, /* nei_f */ + 36, /* nei_f */ 16, /* unltr_f */ - 28, /* unlti_f */ + 36, /* unlti_f */ 16, /* unler_f */ - 28, /* unlei_f */ + 36, /* unlei_f */ 16, /* uneqr_f */ - 28, /* uneqi_f */ + 36, /* uneqi_f */ 16, /* unger_f */ - 28, /* ungei_f */ + 36, /* ungei_f */ 16, /* ungtr_f */ - 28, /* ungti_f */ + 36, /* ungti_f */ 16, /* ltgtr_f */ - 28, /* ltgti_f */ + 36, /* ltgti_f */ 16, /* ordr_f */ - 28, /* ordi_f */ + 36, /* ordi_f */ 16, /* unordr_f */ - 28, /* unordi_f */ - 12, /* truncr_f_i */ + 36, /* unordi_f */ + 16, /* truncr_f_i */ 0, /* truncr_f_l */ - 12, /* extr_f */ + 20, /* extr_f */ 4, /* extr_d_f */ 4, /* movr_f */ - 12, /* movi_f */ + 20, /* movi_f */ 4, /* ldr_f */ 12, /* ldi_f */ - 4, /* ldxr_f */ - 4, /* ldxi_f */ + 8, /* ldxr_f */ + 12, /* ldxi_f */ 4, /* str_f */ 12, /* sti_f */ 8, /* stxr_f */ - 4, /* stxi_f */ + 12, /* stxi_f */ 16, /* bltr_f */ - 28, /* blti_f */ + 36, /* blti_f */ 16, /* bler_f */ - 28, /* blei_f */ + 36, /* blei_f */ 16, /* beqr_f */ - 28, /* beqi_f */ + 36, /* beqi_f */ 16, /* bger_f */ - 28, /* bgei_f */ + 36, /* bgei_f */ 16, /* bgtr_f */ - 28, /* bgti_f */ + 36, /* bgti_f */ 16, /* bner_f */ - 28, /* bnei_f */ + 36, /* bnei_f */ 16, /* bunltr_f */ - 28, /* bunlti_f */ + 36, /* bunlti_f */ 16, /* bunler_f */ - 28, /* bunlei_f */ + 36, /* bunlei_f */ 16, /* buneqr_f */ - 28, /* buneqi_f */ + 36, /* buneqi_f */ 16, /* bunger_f */ - 28, /* bungei_f */ + 36, /* bungei_f */ 16, /* bungtr_f */ - 28, /* bungti_f */ + 36, /* bungti_f */ 16, /* bltgtr_f */ - 28, /* bltgti_f */ + 36, /* bltgti_f */ 16, /* bordr_f */ - 28, /* bordi_f */ + 36, /* bordi_f */ 16, /* bunordr_f */ - 28, /* bunordi_f */ + 36, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -306,87 +306,87 @@ 0, /* putargr_d */ 0, /* putargi_d */ 4, /* addr_d */ - 24, /* addi_d */ + 32, /* addi_d */ 4, /* subr_d */ - 24, /* subi_d */ - 24, /* rsbi_d */ + 32, /* subi_d */ + 32, /* rsbi_d */ 4, /* mulr_d */ - 24, /* muli_d */ + 32, /* muli_d */ 4, /* divr_d */ - 24, /* divi_d */ + 32, /* divi_d */ 4, /* negr_d */ 4, /* absr_d */ 4, /* sqrtr_d */ 16, /* ltr_d */ - 36, /* lti_d */ + 44, /* lti_d */ 16, /* ler_d */ - 36, /* lei_d */ + 44, /* lei_d */ 16, /* eqr_d */ - 36, /* eqi_d */ + 44, /* eqi_d */ 16, /* ger_d */ - 36, /* gei_d */ + 44, /* gei_d */ 16, /* gtr_d */ - 36, /* gti_d */ + 44, /* gti_d */ 16, /* ner_d */ - 36, /* nei_d */ + 44, /* nei_d */ 16, /* unltr_d */ - 36, /* unlti_d */ + 44, /* unlti_d */ 16, /* unler_d */ - 36, /* unlei_d */ + 44, /* unlei_d */ 16, /* uneqr_d */ - 36, /* uneqi_d */ + 44, /* uneqi_d */ 16, /* unger_d */ - 36, /* ungei_d */ + 44, /* ungei_d */ 16, /* ungtr_d */ - 36, /* ungti_d */ + 44, /* ungti_d */ 16, /* ltgtr_d */ - 36, /* ltgti_d */ + 44, /* ltgti_d */ 16, /* ordr_d */ - 36, /* ordi_d */ + 44, /* ordi_d */ 16, /* unordr_d */ - 36, /* unordi_d */ - 12, /* truncr_d_i */ + 44, /* unordi_d */ + 16, /* truncr_d_i */ 0, /* truncr_d_l */ - 12, /* extr_d */ + 20, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 20, /* movi_d */ + 28, /* movi_d */ 4, /* ldr_d */ 12, /* ldi_d */ - 4, /* ldxr_d */ - 4, /* ldxi_d */ + 8, /* ldxr_d */ + 12, /* ldxi_d */ 4, /* str_d */ 12, /* sti_d */ 8, /* stxr_d */ - 4, /* stxi_d */ + 12, /* stxi_d */ 16, /* bltr_d */ - 36, /* blti_d */ + 44, /* blti_d */ 16, /* bler_d */ - 36, /* blei_d */ + 44, /* blei_d */ 16, /* beqr_d */ - 36, /* beqi_d */ + 44, /* beqi_d */ 16, /* bger_d */ - 36, /* bgei_d */ + 44, /* bgei_d */ 16, /* bgtr_d */ - 36, /* bgti_d */ + 44, /* bgti_d */ 16, /* bner_d */ - 36, /* bnei_d */ + 44, /* bnei_d */ 16, /* bunltr_d */ - 36, /* bunlti_d */ + 44, /* bunlti_d */ 16, /* bunler_d */ - 36, /* bunlei_d */ + 44, /* bunlei_d */ 16, /* buneqr_d */ - 36, /* buneqi_d */ + 44, /* buneqi_d */ 16, /* bunger_d */ - 36, /* bungei_d */ + 44, /* bungei_d */ 16, /* bungtr_d */ - 36, /* bungti_d */ + 44, /* bungti_d */ 16, /* bltgtr_d */ - 36, /* bltgti_d */ + 44, /* bltgti_d */ 16, /* bordr_d */ - 36, /* bordi_d */ + 44, /* bordi_d */ 16, /* bunordr_d */ - 36, /* bunordi_d */ + 44, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ @@ -401,9 +401,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 36, /* bswapr_us */ - 80, /* bswapr_ui */ + 28, /* bswapr_us */ + 68, /* bswapr_ui */ 0, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 88, /* casr */ + 96, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_hppa.c b/deps/lightning/lib/jit_hppa.c index b994571d..2c826d83 100644 --- a/deps/lightning/lib/jit_hppa.c +++ b/deps/lightning/lib/jit_hppa.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -19,6 +19,10 @@ #include #include +#if defined(__linux__) +# include +# include +#endif #define jit_arg_reg_p(i) (i >= 0 && i < 4) @@ -970,11 +974,10 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; diff --git a/deps/lightning/lib/jit_ia64-cpu.c b/deps/lightning/lib/jit_ia64-cpu.c index b28e8f1a..068bc07e 100644 --- a/deps/lightning/lib/jit_ia64-cpu.c +++ b/deps/lightning/lib/jit_ia64-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1175,6 +1175,8 @@ static void _X5(jit_state_t*,jit_word_t, #define ZXT2(r1,r3) I29(0x11,r3,r1) #define ZXT4(r1,r3) I29(0x12,r3,r1) +# define nop(i0) _nop(_jit,i0) +static void _nop(jit_state_t*, jit_int32_t); #define addr(r0,r1,r2) ADD(r0,r1,r2) #define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -1515,7 +1517,7 @@ static void _epilog(jit_state_t*,jit_node_t*); static void _vastart(jit_state_t*, jit_int32_t); # define vaarg(r0, r1) _vaarg(_jit, r0, r1) static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t); -#define patch_at(node,instr,label) _patch_at(_jit,node,instr,label) +#define patch_at(code,instr,label) _patch_at(_jit,code,instr,label) static void _patch_at(jit_state_t*,jit_code_t,jit_word_t,jit_word_t); #endif @@ -3029,7 +3031,7 @@ _M29(jit_state_t *_jit, jit_word_t _p, jit_word_t ar, jit_word_t r2) { assert(!(_p & ~0x3fL)); - assert(!(ar & ~0x7L)); + assert(!(ar & ~0x7fL)); assert(!(r2 & ~0x7fL)); TSTREG1(r2); TSTPRED(_p); @@ -3453,6 +3455,16 @@ _X5(jit_state_t *_jit, jit_word_t _p, inst((i1<<36)|(1L<<27)|(y<<26)|(i20<<6)|_p, INST_X); } +static void +_nop(jit_state_t *_jit, jit_int32_t i0) +{ + for (; i0 > 0; i0 -= 8) { + NOP_M(0); + sync(); + } + assert(i0 == 0); +} + static void _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -3489,28 +3501,36 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) static void _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - jit_word_t w; - w = beqi(_jit->pc.w, r2, 0); - movr(r0, r1); - patch_at(w, _jit->pc.w); + CMP_EQ(PR_6, PR_7, r2, GR_0); + MOV_p(r0, r1, PR_7); } static void _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - jit_word_t w; - w = bnei(_jit->pc.w, r2, 0); - movr(r0, r1); - patch_at(w, _jit->pc.w); + CMP_EQ(PR_6, PR_7, r2, GR_0); + MOV_p(r0, r1, PR_6); } static void _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_word_t i0) { - fallback_casx(r0, r1, r2, r3, i0); + jit_int32_t r1_reg, iscasi; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + sync(); + MOV_M_ar_rn(AR_CCV, r2); + CMPXCHG8_ACQ(r0, r1, r3); + eqr(r0, r0, r2); + if (iscasi) + jit_unget_reg(r1_reg); } + static void _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { diff --git a/deps/lightning/lib/jit_ia64-fpu.c b/deps/lightning/lib/jit_ia64-fpu.c index 19cc381a..344977ea 100644 --- a/deps/lightning/lib/jit_ia64-fpu.c +++ b/deps/lightning/lib/jit_ia64-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_ia64-sz.c b/deps/lightning/lib/jit_ia64-sz.c index 020349d8..e65da549 100644 --- a/deps/lightning/lib/jit_ia64-sz.c +++ b/deps/lightning/lib/jit_ia64-sz.c @@ -1,4 +1,3 @@ - #if __WORDSIZE == 64 #define JIT_INSTR_MAX 224 0, /* data */ @@ -68,9 +67,9 @@ 16, /* lshr */ 16, /* lshi */ 16, /* rshr */ - 32, /* rshi */ + 16, /* rshi */ 16, /* rshr_u */ - 32, /* rshi_u */ + 16, /* rshi_u */ 16, /* negr */ 16, /* comr */ 32, /* ltr */ @@ -95,16 +94,16 @@ 32, /* nei */ 16, /* movr */ 16, /* movi */ - 48, /* movnr */ - 48, /* movzr */ + 16, /* movnr */ + 16, /* movzr */ 16, /* extr_c */ 16, /* extr_uc */ 16, /* extr_s */ 16, /* extr_us */ 16, /* extr_i */ 16, /* extr_ui */ - 48, /* htonr_us */ - 48, /* htonr_ui */ + 32, /* htonr_us */ + 32, /* htonr_ui */ 16, /* htonr_ul */ 16, /* ldr_c */ 32, /* ldi_c */ @@ -401,9 +400,9 @@ 0, /* movi_d_ww */ 16, /* movr_d_w */ 32, /* movi_d_w */ - 48, /* bswapr_us */ - 48, /* bswapr_ui */ + 32, /* bswapr_us */ + 32, /* bswapr_ui */ 16, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 48, /* casr */ + 64, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_ia64.c b/deps/lightning/lib/jit_ia64.c index 5664762f..1c35fb16 100644 --- a/deps/lightning/lib/jit_ia64.c +++ b/deps/lightning/lib/jit_ia64.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1117,9 +1117,10 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - /* nothing done */ + assert(!(node->u.w & (node->u.w - 1))); + sync(); + if (node->u.w > 8) + nop(node->u.w - 8); break; case jit_code_note: case jit_code_name: sync(); diff --git a/deps/lightning/lib/jit_memory.c b/deps/lightning/lib/jit_memory.c index d1e3a144..e4e5deb3 100644 --- a/deps/lightning/lib/jit_memory.c +++ b/deps/lightning/lib/jit_memory.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_mips-cpu.c b/deps/lightning/lib/jit_mips-cpu.c index 08625923..f52d6dc8 100644 --- a/deps/lightning/lib/jit_mips-cpu.c +++ b/deps/lightning/lib/jit_mips-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -359,6 +359,7 @@ static void _nop(jit_state_t*,jit_int32_t); # define DEXTM(rt,rs,pos,size) hrrrit(MIPS_SPECIAL3,rs,rt,size-32-1,pos,MIPS_DEXTM) # define ROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_SRL) # define DROTR(rd,rt,sa) hrrrit(MIPS_SPECIAL,1,rt,rd,sa,MIPS_DSRL) +# define SYNC() rrr_t(_ZERO_REGNO,_ZERO_REGNO,_ZERO_REGNO,MIPS_SYNC) # define MFHI(rd) rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFHI) # define MFLO(rd) rrr_t(_ZERO_REGNO,_ZERO_REGNO,rd,MIPS_MFLO) # define MTHI(rs) rrr_t(rs,_ZERO_REGNO,_ZERO_REGNO,MIPS_MTHI) @@ -376,10 +377,14 @@ static void _nop(jit_state_t*,jit_int32_t); # define LW(rt,of,rb) hrri(MIPS_LW,rb,rt,of) # define LWU(rt,of,rb) hrri(MIPS_LWU,rb,rt,of) # define LD(rt,of,rb) hrri(MIPS_LD,rb,rt,of) +# define LL(rt,of,rb) hrri(MIPS_LL,rb,rt,of) +# define LLD(rt,of,rb) hrri(MIPS_LLD,rb,rt,of) # define SB(rt,of,rb) hrri(MIPS_SB,rb,rt,of) # define SH(rt,of,rb) hrri(MIPS_SH,rb,rt,of) # define SW(rt,of,rb) hrri(MIPS_SW,rb,rt,of) # define SD(rt,of,rb) hrri(MIPS_SD,rb,rt,of) +# define SC(rt,of,rb) hrri(MIPS_SC,rb,rt,of) +# define SCD(rt,of,rb) hrri(MIPS_SCD,rb,rt,of) # define WSBH(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_WSBH,MIPS_BSHFL) # define SEB(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEB,MIPS_BSHFL) # define SEH(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEH,MIPS_BSHFL) @@ -400,6 +405,7 @@ static void _nop(jit_state_t*,jit_int32_t); # define JR(r0) hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR) # endif # define J(i0) hi(MIPS_J,i0) +# define JAL(i0) hi(MIPS_JAL,i0) # define MOVN(rd,rs,rt) hrrrit(0,rs,rt,rd,0,MIPS_MOVN) # define MOVZ(rd,rs,rt) hrrrit(0,rs,rt,rd,0,MIPS_MOVZ) # define comr(r0,r1) xori(r0,r1,-1) @@ -1042,8 +1048,8 @@ _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - if (__WORDSIZE == 32) - MUL(r0, r1, r2); + if (jit_mips2_p() && __WORDSIZE == 32) + MUL(r0, r1, r2); else { multu(r1, r2); MFLO(r0); @@ -1337,7 +1343,41 @@ static void _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_word_t i0) { - fallback_casx(r0, r1, r2, r3, i0); + jit_int32_t r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + SYNC(); + /* retry: */ + retry = _jit->pc.w; +# if __WORDSIZE == 32 + LL(r0, 0, r1); +# else + LLD(r0, 0, r1); +# endif + jump0 = _jit->pc.w; + BNE(r0, r2, 1); /* bne done r0 r2 */ + movi(r0, 0); /* set to 0 in delay slot */ + movr(r0, r3); /* after jump and delay slot */ + /* store new value */ +# if __WORDSIZE == 32 + SC(r0, 0, r1); +# else + SCD(r0, 0, r1); +# endif + jump1 = _jit->pc.w; + BEQ(r0, _ZERO_REGNO, 0); /* beqi retry r0 0 */ + movi(r0, 1); /* set to 1 in delay slot */ + SYNC(); + /* done: */ + done = _jit->pc.w; + patch_at(jump0, done); + patch_at(jump1, retry); + if (iscasi) + jit_unget_reg(r1_reg); } static void diff --git a/deps/lightning/lib/jit_mips-fpu.c b/deps/lightning/lib/jit_mips-fpu.c index 7513219b..6209fd68 100644 --- a/deps/lightning/lib/jit_mips-fpu.c +++ b/deps/lightning/lib/jit_mips-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_mips-sz.c b/deps/lightning/lib/jit_mips-sz.c index 25f0712f..91deb4b4 100644 --- a/deps/lightning/lib/jit_mips-sz.c +++ b/deps/lightning/lib/jit_mips-sz.c @@ -405,8 +405,8 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 0, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 36, /* casr */ + 44, /* casi */ #endif /* NEW_ABI */ #endif /* __WORDSIZE */ @@ -816,8 +816,8 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 0, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 36, /* casr */ + 44, /* casi */ #endif /* NEW_ABI */ #endif /* __WORDSIZE */ @@ -1226,6 +1226,6 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 116, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 36, /* casr */ + 44, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_mips.c b/deps/lightning/lib/jit_mips.c index ecf025d0..d98d94e8 100644 --- a/deps/lightning/lib/jit_mips.c +++ b/deps/lightning/lib/jit_mips.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -67,7 +67,6 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); # include "jit_rewind.c" # include "jit_mips-cpu.c" # include "jit_mips-fpu.c" -# include "jit_fallback.c" #undef PROTO /* @@ -1300,11 +1299,10 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1883,7 +1881,6 @@ _emit_code(jit_state_t *_jit) # include "jit_rewind.c" # include "jit_mips-cpu.c" # include "jit_mips-fpu.c" -# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_names.c b/deps/lightning/lib/jit_names.c index 664adff8..b663b672 100644 --- a/deps/lightning/lib/jit_names.c +++ b/deps/lightning/lib/jit_names.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2019 Free Software Foundation, Inc. + * Copyright (C) 2014-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_note.c b/deps/lightning/lib/jit_note.c index c79b8186..f1c149fc 100644 --- a/deps/lightning/lib/jit_note.c +++ b/deps/lightning/lib/jit_note.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_ppc-cpu.c b/deps/lightning/lib/jit_ppc-cpu.c index ef47f9af..f205db07 100644 --- a/deps/lightning/lib/jit_ppc-cpu.c +++ b/deps/lightning/lib/jit_ppc-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1181,11 +1181,13 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, # else STDCX_(r3, _R0_REGNO, r1); # endif - jump1 = bnei(_jit->pc.w, r0, 0); /* bne retry r0 0 */ + jump1 = _jit->pc.w; + BNE(0); /* BNE retry */ /* done: */ done = _jit->pc.w; ISYNC(); MFCR(r0); + EXTRWI(r0, r0, 1, CR_EQ); patch_at(jump0, done); patch_at(jump1, retry); if (iscasi) diff --git a/deps/lightning/lib/jit_ppc-fpu.c b/deps/lightning/lib/jit_ppc-fpu.c index 18cc621a..a2edbd89 100644 --- a/deps/lightning/lib/jit_ppc-fpu.c +++ b/deps/lightning/lib/jit_ppc-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_ppc-sz.c b/deps/lightning/lib/jit_ppc-sz.c index 9cd006cd..212e6372 100644 --- a/deps/lightning/lib/jit_ppc-sz.c +++ b/deps/lightning/lib/jit_ppc-sz.c @@ -62,7 +62,7 @@ 12, /* remr_u */ 20, /* remi_u */ 4, /* andr */ - 12, /* andi */ + 4, /* andi */ 4, /* orr */ 12, /* ori */ 4, /* xorr */ @@ -97,8 +97,8 @@ 16, /* nei */ 4, /* movr */ 8, /* movi */ - 12, /* movnr */ - 12, /* movzr */ + 12, /* movnr */ + 12, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -258,7 +258,7 @@ 24, /* unordi_f */ 12, /* truncr_f_i */ 0, /* truncr_f_l */ - 20, /* extr_f */ + 36, /* extr_f */ 4, /* extr_d_f */ 4, /* movr_f */ 12, /* movi_f */ @@ -349,7 +349,7 @@ 32, /* unordi_d */ 12, /* truncr_d_i */ 0, /* truncr_d_l */ - 20, /* extr_d */ + 36, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ 24, /* movi_d */ @@ -403,12 +403,12 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 20, /* bswapr_us */ + 8, /* bswapr_us */ 16, /* bswapr_ui */ 0, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ -#endif /* _CALL_SYV */ + 36, /* casr */ + 44, /* casi */ +#endif /* _CALL_SYSV */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -821,8 +821,8 @@ 20, /* bswapr_us */ 16, /* bswapr_ui */ 0, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 36, /* casr */ + 44, /* casi */ #endif /* _CALL_AIX */ #endif /* __BYTEORDER */ #endif /* __powerpc__ */ @@ -926,8 +926,8 @@ 16, /* nei */ 4, /* movr */ 36, /* movi */ - 12, /* movnr */ - 12, /* movzr */ + 12, /* movnr */ + 12, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -1024,7 +1024,7 @@ 8, /* jmpr */ 4, /* jmpi */ 28, /* callr */ - 56, /* calli */ + 52, /* calli */ 0, /* prepare */ 0, /* pushargr */ 0, /* pushargi */ @@ -1232,12 +1232,12 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 20, /* bswapr_us */ + 8, /* bswapr_us */ 16, /* bswapr_ui */ 44, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ -#endif /* __BYTEORDER */ + 36, /* casr */ + 44, /* casi */ +#endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -1339,15 +1339,15 @@ 16, /* nei */ 4, /* movr */ 36, /* movi */ - 12, /* movnr */ - 12, /* movzr */ + 12, /* movnr */ + 12, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 4, /* extr_i */ 4, /* extr_ui */ - 20, /* htonr_us */ + 8, /* htonr_us */ 16, /* htonr_ui */ 44, /* htonr_ul */ 8, /* ldr_c */ @@ -1437,7 +1437,7 @@ 8, /* jmpr */ 4, /* jmpi */ 12, /* callr */ - 36, /* calli */ + 32, /* calli */ 0, /* prepare */ 0, /* pushargr */ 0, /* pushargi */ @@ -1645,11 +1645,11 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 20, /* bswapr_us */ + 8, /* bswapr_us */ 16, /* bswapr_ui */ 44, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 36, /* casr */ + 44, /* casi */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_ppc.c b/deps/lightning/lib/jit_ppc.c index fd6964e2..5d2b74b1 100644 --- a/deps/lightning/lib/jit_ppc.c +++ b/deps/lightning/lib/jit_ppc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1288,11 +1288,10 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; diff --git a/deps/lightning/lib/jit_print.c b/deps/lightning/lib/jit_print.c index ee37b025..a6f93380 100644 --- a/deps/lightning/lib/jit_print.c +++ b/deps/lightning/lib/jit_print.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -20,8 +20,21 @@ #include #include +#if __WORDSIZE == 32 +# define MININT 0x80000000 +#else +# define MININT 0x8000000000000000 +#endif + + #define print_chr(value) fputc(value, print_stream) -#define print_hex(value) fprintf(print_stream, "0x%lx", value) +#define print_hex(value) \ + do { \ + if (value < 0 && value != MININT) \ + fprintf(print_stream, "-0x%lx", -value); \ + else \ + fprintf(print_stream, "0x%lx", value); \ + } while (0) #define print_dec(value) fprintf(print_stream, "%ld", value) #define print_flt(value) fprintf(print_stream, "%g", value) #define print_str(value) fprintf(print_stream, "%s", value) @@ -304,12 +317,12 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node) case jit_code_name: print_chr(' '); if (node->v.p && _jitc->emit) - print_str(node->v.n->u.p); + print_str((char *)node->v.n->u.p); break; case jit_code_note: print_chr(' '); if (node->v.p && _jitc->emit) - print_str(node->v.n->u.p); + print_str((char *)node->v.n->u.p); if (node->v.p && _jitc->emit && node->w.w) print_chr(':'); if (node->w.w) diff --git a/deps/lightning/lib/jit_rewind.c b/deps/lightning/lib/jit_rewind.c index 5ef1be5e..89e94916 100644 --- a/deps/lightning/lib/jit_rewind.c +++ b/deps/lightning/lib/jit_rewind.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015-2019 Free Software Foundation, Inc. + * Copyright (C) 2015-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_riscv-cpu.c b/deps/lightning/lib/jit_riscv-cpu.c index 5046fac6..2ae11b92 100644 --- a/deps/lightning/lib/jit_riscv-cpu.c +++ b/deps/lightning/lib/jit_riscv-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Free Software Foundation, Inc. + * Copyright (C) 2019-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -456,7 +456,7 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); - define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t,jit_word_t); #define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) @@ -1280,7 +1280,9 @@ _extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { +# if __WORDSIZE == 64 if (simm32_p(i0)) { +# endif jit_int32_t lo = (jit_int32_t)i0 << 20 >> 20; jit_int32_t hi = i0 - lo; if (hi) { @@ -1290,39 +1292,26 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } else ADDIW(r0, _ZERO_REGNO, lo); +# if __WORDSIZE == 64 } - else { - jit_int32_t lo = i0 << 32 >> 32; - jit_word_t hi = i0 - lo; - jit_int32_t t0 = jit_get_reg(jit_class_gpr); - movi(rn(t0), (jit_int32_t)(hi >> 32)); - movi(r0, lo); - lshi(rn(t0), rn(t0), 32); - addr(r0, r0, rn(t0)); - jit_unget_reg(t0); - } + else + load_const(r0, i0); +# endif } static jit_word_t _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_word_t w; - jit_int32_t t0; - jit_int32_t ww = i0 << 32 >> 32; - jit_int32_t lo = ww << 20 >> 20; - jit_int32_t hi = ww - lo; w = _jit->pc.w; - t0 = jit_get_reg(jit_class_gpr); - LUI(r0, hi >> 12); - ADDIW(r0, r0, lo); - ww = i0 >> 32; - lo = ww << 20 >> 20; - hi = ww - lo; - LUI(rn(t0), hi >> 12); - ADDIW(rn(t0), rn(t0), lo); - SLLI(rn(t0), rn(t0), 32); - ADD(r0, r0, rn(t0)); - jit_unget_reg(t0); +# if __WORDSIZE == 64 + AUIPC(r0, 0); + ADDI(r0, r0, 0); + LD(r0, r0, 0); +# else + LUI(r0, 0); + ADDIW(r0, r0, 0); +# endif return (w); } @@ -1331,7 +1320,7 @@ _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; w = beqi(_jit->pc.w, r2, 0); - movr(r1, r0); + movr(r0, r1); patch_at(w, _jit->pc.w); } @@ -1340,7 +1329,7 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; w = bnei(_jit->pc.w, r2, 0); - movr(r1, r0); + movr(r0, r1); patch_at(w, _jit->pc.w); } @@ -1348,7 +1337,36 @@ static void _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_word_t i0) { - fallback_casx(r0, r1, r2, r3, i0); + jit_int32_t t0, r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + t0 = jit_get_reg(jit_class_gpr); + retry = _jit->pc.w; +# if __WORDSIZE == 32 + LR_W(r0, r1); +# else + LR_D(r0, r1); +# endif + jump0 = _jit->pc.w; + BNE(r0, r2, 0); +# if __WORDSIZE == 32 + SC_W(rn(t0), r1, r3); +# else + SC_D(rn(t0), r1, r3); +# endif + jump1 = _jit->pc.w; + BNE(rn(t0), _ZERO_REGNO, 0); + done = _jit->pc.w; + eqr(r0, r0, r2); + patch_at(jump0, done); + patch_at(jump1, retry); + jit_unget_reg(t0); + if (iscasi) + jit_unget_reg(r1_reg); } static void @@ -2296,42 +2314,44 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) u.w = instr; i.w = u.i[0]; /* movi_p? */ +# if __WORDSIZE == 64 + if (i.U.opcode == 23) { /* AUIPC */ + jit_int32_t lo, hi; + jit_word_t address, relative; + address = get_const(label); + relative = address - instr; + assert(simm32_p(relative)); + lo = (jit_int32_t)relative << 20 >> 20; + hi = relative - lo; + i.U.imm12_31 = hi >> 12; + u.i[0] = i.w; + i.w = u.i[1]; + if (i.I.opcode == 19 && i.I.funct3 == 0) { /* ADDI */ + i.I.imm11_0 = lo; + u.i[1] = i.w; + i.w = u.i[2]; + } + else + abort(); + i.w = u.i[1]; + assert(i.I.opcode == 3 && i.I.funct3 == 3); /* LD */ + } +# else if (i.U.opcode == 55) { /* LUI */ - jit_int32_t ww = label << 32 >> 32; - jit_int32_t lo = ww << 20 >> 20; - jit_int32_t hi = ww - lo; + jit_int32_t lo = (jit_int32_t)label << 20 >> 20; + jit_int32_t hi = label - lo; i.U.imm12_31 = hi >> 12; u.i[0] = i.w; i.w = u.i[1]; if (i.I.opcode == 27 && i.I.funct3 == 0) { /* ADDIW */ - i.I.imm11_0 = lo & 0xfff; + i.I.imm11_0 = lo; u.i[1] = i.w; i.w = u.i[2]; - if (i.U.opcode == 55) { /* LUI */ - ww = label >> 32; - lo = ww << 20 >> 20; - hi = ww - lo; - i.U.imm12_31 = hi >> 12; - u.i[2] = i.w; - i.w = u.i[3]; - if (i.I.opcode == 27 && i.I.funct3 == 0) { /* ADDIW */ - i.I.imm11_0 = lo & 0xfff; - u.i[3] = i.w; - i.w = u.i[4]; - assert(i.IS.opcode == 19); /* SLLI */ - assert(i.IS.shamt == 32); - i.w = u.i[5]; - assert(i.R.opcode == 51); /* ADD */ - } - else - abort(); - } - else - abort(); } else abort(); } +# endif /* b{lt,le,eq,ge,gt,ne}{,_u}? */ else if (i.B.opcode == 99) { /* B{EQ,NE,LT,GE,LTU,GEU} */ jit_word_t jmp = label - instr; diff --git a/deps/lightning/lib/jit_riscv-fpu.c b/deps/lightning/lib/jit_riscv-fpu.c index 367975e8..e7884cb9 100644 --- a/deps/lightning/lib/jit_riscv-fpu.c +++ b/deps/lightning/lib/jit_riscv-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Free Software Foundation, Inc. + * Copyright (C) 2019-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_riscv-sz.c b/deps/lightning/lib/jit_riscv-sz.c index ea2911fe..8c4cf048 100644 --- a/deps/lightning/lib/jit_riscv-sz.c +++ b/deps/lightning/lib/jit_riscv-sz.c @@ -28,42 +28,42 @@ 8, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ - 20, /* addi */ + 16, /* addi */ 12, /* addcr */ - 28, /* addci */ + 24, /* addci */ 28, /* addxr */ 28, /* addxi */ 4, /* subr */ - 20, /* subi */ + 16, /* subi */ 12, /* subcr */ - 28, /* subci */ + 24, /* subci */ 28, /* subxr */ 28, /* subxi */ - 28, /* rsbi */ + 20, /* rsbi */ 4, /* mulr */ - 20, /* muli */ + 16, /* muli */ 12, /* qmulr */ - 24, /* qmuli */ + 20, /* qmuli */ 12, /* qmulr_u */ - 24, /* qmuli_u */ + 20, /* qmuli_u */ 4, /* divr */ - 20, /* divi */ + 16, /* divi */ 4, /* divr_u */ - 20, /* divi_u */ + 16, /* divi_u */ 20, /* qdivr */ 16, /* qdivi */ 20, /* qdivr_u */ 16, /* qdivi_u */ 4, /* remr */ - 20, /* remi */ + 16, /* remi */ 4, /* remr_u */ - 20, /* remi_u */ + 16, /* remi_u */ 4, /* andr */ - 20, /* andi */ + 16, /* andi */ 4, /* orr */ - 20, /* ori */ + 16, /* ori */ 4, /* xorr */ - 20, /* xori */ + 16, /* xori */ 4, /* lshr */ 4, /* lshi */ 4, /* rshr */ @@ -93,9 +93,9 @@ 8, /* ner */ 8, /* nei */ 4, /* movr */ - 24, /* movi */ - 8, /* movnr */ - 8, /* movzr */ + 12, /* movi */ + 12, /* movnr */ + 12, /* movzr */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ @@ -106,19 +106,19 @@ 52, /* htonr_ui */ 116, /* htonr_ul */ 4, /* ldr_c */ - 12, /* ldi_c */ + 16, /* ldi_c */ 4, /* ldr_uc */ - 12, /* ldi_uc */ + 16, /* ldi_uc */ 4, /* ldr_s */ - 12, /* ldi_s */ + 16, /* ldi_s */ 4, /* ldr_us */ - 12, /* ldi_us */ + 16, /* ldi_us */ 4, /* ldr_i */ - 12, /* ldi_i */ + 16, /* ldi_i */ 4, /* ldr_ui */ - 12, /* ldi_ui */ + 16, /* ldi_ui */ 4, /* ldr_l */ - 12, /* ldi_l */ + 16, /* ldi_l */ 8, /* ldxr_c */ 16, /* ldxi_c */ 8, /* ldxr_uc */ @@ -134,13 +134,13 @@ 8, /* ldxr_l */ 16, /* ldxi_l */ 4, /* str_c */ - 12, /* sti_c */ + 16, /* sti_c */ 4, /* str_s */ - 12, /* sti_s */ + 16, /* sti_s */ 4, /* str_i */ - 12, /* sti_i */ + 16, /* sti_i */ 4, /* str_l */ - 12, /* sti_l */ + 16, /* sti_l */ 8, /* stxr_c */ 16, /* stxi_c */ 8, /* stxr_s */ @@ -158,7 +158,7 @@ 4, /* bler_u */ 8, /* blei_u */ 4, /* beqr */ - 28, /* beqi */ + 16, /* beqi */ 4, /* bger */ 8, /* bgei */ 4, /* bger_u */ @@ -168,7 +168,7 @@ 4, /* bgtr_u */ 8, /* bgti_u */ 4, /* bner */ - 20, /* bnei */ + 16, /* bnei */ 8, /* bmsr */ 12, /* bmsi */ 8, /* bmcr */ @@ -190,9 +190,9 @@ 16, /* bxsubr_u */ 20, /* bxsubi_u */ 4, /* jmpr */ - 28, /* jmpi */ + 16, /* jmpi */ 4, /* callr */ - 28, /* calli */ + 16, /* calli */ 0, /* prepare */ 0, /* pushargr */ 0, /* pushargi */ @@ -260,11 +260,11 @@ 4, /* movr_f */ 8, /* movi_f */ 4, /* ldr_f */ - 12, /* ldi_f */ + 16, /* ldi_f */ 8, /* ldxr_f */ 16, /* ldxi_f */ 4, /* str_f */ - 12, /* sti_f */ + 16, /* sti_f */ 8, /* stxr_f */ 16, /* stxi_f */ 8, /* bltr_f */ @@ -305,87 +305,87 @@ 0, /* putargr_d */ 0, /* putargi_d */ 4, /* addr_d */ - 24, /* addi_d */ + 20, /* addi_d */ 4, /* subr_d */ - 24, /* subi_d */ - 24, /* rsbi_d */ + 20, /* subi_d */ + 20, /* rsbi_d */ 4, /* mulr_d */ - 24, /* muli_d */ + 20, /* muli_d */ 4, /* divr_d */ - 24, /* divi_d */ + 20, /* divi_d */ 4, /* negr_d */ 4, /* absr_d */ 4, /* sqrtr_d */ 4, /* ltr_d */ - 24, /* lti_d */ + 20, /* lti_d */ 4, /* ler_d */ - 24, /* lei_d */ + 20, /* lei_d */ 4, /* eqr_d */ - 24, /* eqi_d */ + 20, /* eqi_d */ 4, /* ger_d */ - 24, /* gei_d */ + 20, /* gei_d */ 4, /* gtr_d */ - 24, /* gti_d */ + 20, /* gti_d */ 8, /* ner_d */ - 28, /* nei_d */ + 24, /* nei_d */ 28, /* unltr_d */ - 48, /* unlti_d */ + 44, /* unlti_d */ 28, /* unler_d */ - 48, /* unlei_d */ + 44, /* unlei_d */ 28, /* uneqr_d */ - 48, /* uneqi_d */ + 44, /* uneqi_d */ 28, /* unger_d */ - 48, /* ungei_d */ + 44, /* ungei_d */ 28, /* ungtr_d */ - 48, /* ungti_d */ + 44, /* ungti_d */ 40, /* ltgtr_d */ - 60, /* ltgti_d */ + 56, /* ltgti_d */ 28, /* ordr_d */ - 48, /* ordi_d */ + 44, /* ordi_d */ 20, /* unordr_d */ - 40, /* unordi_d */ + 36, /* unordi_d */ 4, /* truncr_d_i */ 4, /* truncr_d_l */ 4, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 20, /* movi_d */ + 16, /* movi_d */ 4, /* ldr_d */ - 12, /* ldi_d */ + 16, /* ldi_d */ 8, /* ldxr_d */ 16, /* ldxi_d */ 4, /* str_d */ - 12, /* sti_d */ + 16, /* sti_d */ 8, /* stxr_d */ 16, /* stxi_d */ 8, /* bltr_d */ - 28, /* blti_d */ + 24, /* blti_d */ 8, /* bler_d */ - 28, /* blei_d */ + 24, /* blei_d */ 8, /* beqr_d */ - 28, /* beqi_d */ + 24, /* beqi_d */ 8, /* bger_d */ - 28, /* bgei_d */ + 24, /* bgei_d */ 8, /* bgtr_d */ - 28, /* bgti_d */ + 24, /* bgti_d */ 8, /* bner_d */ - 28, /* bnei_d */ + 24, /* bnei_d */ 32, /* bunltr_d */ - 52, /* bunlti_d */ + 48, /* bunlti_d */ 32, /* bunler_d */ - 52, /* bunlei_d */ + 48, /* bunlei_d */ 32, /* buneqr_d */ - 52, /* buneqi_d */ + 48, /* buneqi_d */ 32, /* bunger_d */ - 52, /* bungei_d */ + 48, /* bungei_d */ 32, /* bungtr_d */ - 52, /* bungti_d */ + 48, /* bungti_d */ 44, /* bltgtr_d */ - 64, /* bltgti_d */ + 60, /* bltgti_d */ 32, /* bordr_d */ - 52, /* bordi_d */ + 48, /* bordi_d */ 24, /* bunordr_d */ - 44, /* bunordi_d */ + 40, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ @@ -399,10 +399,10 @@ 0, /* movr_d_ww */ 0, /* movi_d_ww */ 4, /* movr_d_w */ - 16, /* movi_d_w */ + 12, /* movi_d_w */ 20, /* bswapr_us */ 52, /* bswapr_ui */ 116, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 28, /* casr */ + 40, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_riscv.c b/deps/lightning/lib/jit_riscv.c index 966604a0..8828d4ab 100644 --- a/deps/lightning/lib/jit_riscv.c +++ b/deps/lightning/lib/jit_riscv.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Free Software Foundation, Inc. + * Copyright (C) 2019-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -28,13 +28,21 @@ typedef jit_pointer_t jit_va_list_t; /* * Prototypes */ +#if __WORDSIZE == 64 +# define load_const(r0, i0) _load_const(_jit, r0, i0) +static void _load_const(jit_state_t*, jit_int32_t, jit_word_t); +static jit_word_t hash_const(jit_word_t); +# define put_const(i0) _put_const(_jit, i0) +static void _put_const(jit_state_t*, jit_word_t); +# define get_const(i0) _get_const(_jit, i0) +static jit_word_t _get_const(jit_state_t*, jit_word_t); +#endif #define patch(instr, node) _patch(_jit, instr, node) static void _patch(jit_state_t*,jit_word_t,jit_node_t*); #define PROTO 1 # include "jit_riscv-cpu.c" # include "jit_riscv-fpu.c" -# include "jit_fallback.c" #undef PROTO /* @@ -894,6 +902,43 @@ _emit_code(jit_state_t *_jit) jit_word_t prevw; #endif +#if __WORDSIZE == 64 + if (!_jitc->consts.hash.table) { + jit_alloc((jit_pointer_t *)&_jitc->consts.hash.table, + 16 * sizeof(jit_const_t *)); + _jitc->consts.hash.size = 16; + jit_alloc((jit_pointer_t *)&_jitc->consts.pool.ptr, + sizeof(jit_const_t *)); + jit_alloc((jit_pointer_t *)_jitc->consts.pool.ptr, + 1024 * sizeof(jit_const_t)); + _jitc->consts.pool.length = 1; + } + /* Reset table if starting over jit generation */ + else + memset(_jitc->consts.hash.table, 0, + _jitc->consts.hash.size * sizeof(jit_word_t)); + for (offset = 0; offset < _jitc->consts.pool.length; offset++) { + jit_int32_t i; + jit_const_t *list = _jitc->consts.pool.ptr[offset]; + for (i = 0; i < 1023; ++i, ++list) + list->next = list + 1; + if (offset + 1 < _jitc->consts.pool.length) + list->next = _jitc->consts.pool.ptr[offset + 1]; + else + list->next = NULL; + } + _jitc->consts.pool.list = _jitc->consts.pool.ptr[0]; + _jitc->consts.hash.count = 0; + if (!_jitc->consts.vector.instrs) { + jit_alloc((jit_pointer_t *)&_jitc->consts.vector.instrs, + 16 * sizeof(jit_word_t)); + jit_alloc((jit_pointer_t *)&_jitc->consts.vector.values, + 16 * sizeof(jit_word_t)); + _jitc->consts.vector.length = 16; + } + _jitc->consts.vector.offset = 0; +#endif + _jitc->function = NULL; jit_reglive_setup(); @@ -1016,11 +1061,10 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1552,6 +1596,35 @@ _emit_code(jit_state_t *_jit) #undef case_rw #undef case_rr +#if __WORDSIZE == 64 + /* Record all constants to be patched */ + for (offset = 0; offset < _jitc->patches.offset; offset++) { + node = _jitc->patches.ptr[offset].node; + value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w; + put_const(value); + } + /* Record all direct constants */ + for (offset = 0; offset < _jitc->consts.vector.offset; offset++) + put_const(_jitc->consts.vector.values[offset]); + /* Now actually inject constants at the end of code buffer */ + if (_jitc->consts.hash.count) { + jit_const_t *entry; + /* Insert nop if aligned at 4 bytes */ + if (_jit->pc.w % sizeof(jit_word_t)) + nop(_jit->pc.w % sizeof(jit_word_t)); + for (offset = 0; offset < _jitc->consts.hash.size; offset++) { + entry = _jitc->consts.hash.table[offset]; + for (; entry; entry = entry->next) { + /* Make sure to not write out of bounds */ + if (_jit->pc.uc >= _jitc->code.end) + return (NULL); + entry->address = _jit->pc.w; + *_jit->pc.ul++ = entry->value; + } + } + } +#endif + for (offset = 0; offset < _jitc->patches.offset; offset++) { node = _jitc->patches.ptr[offset].node; word = _jitc->patches.ptr[offset].inst; @@ -1559,6 +1632,25 @@ _emit_code(jit_state_t *_jit) patch_at(word, value); } +#if __WORDSIZE == 64 + /* Patch direct complex constants */ + if (_jitc->consts.vector.instrs) { + for (offset = 0; offset < _jitc->consts.vector.offset; offset++) + patch_at(_jitc->consts.vector.instrs[offset], + _jitc->consts.vector.values[offset]); + jit_free((jit_pointer_t *)&_jitc->consts.vector.instrs); + jit_free((jit_pointer_t *)&_jitc->consts.vector.values); + } + + /* Hash table no longer need */ + if (_jitc->consts.hash.table) { + jit_free((jit_pointer_t *)&_jitc->consts.hash.table); + for (offset = 0; offset < _jitc->consts.pool.length; offset++) + jit_free((jit_pointer_t *)_jitc->consts.pool.ptr + offset); + jit_free((jit_pointer_t *)&_jitc->consts.pool.ptr); + } +#endif + jit_flush(_jit->code.ptr, _jit->pc.uc); return (_jit->code.ptr); @@ -1567,9 +1659,116 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_riscv-cpu.c" # include "jit_riscv-fpu.c" -# include "jit_fallback.c" #undef CODE +static void +_load_const(jit_state_t *_jit, jit_int32_t reg, jit_word_t value) +{ + if (_jitc->consts.vector.offset >= _jitc->consts.vector.length) { + jit_word_t new_size = _jitc->consts.vector.length * + 2 * sizeof(jit_word_t); + jit_realloc((jit_pointer_t *)&_jitc->consts.vector.instrs, + _jitc->consts.vector.length * sizeof(jit_word_t), new_size); + jit_realloc((jit_pointer_t *)&_jitc->consts.vector.values, + _jitc->consts.vector.length * sizeof(jit_word_t), new_size); + _jitc->consts.vector.length *= 2; + } + _jitc->consts.vector.instrs[_jitc->consts.vector.offset] = _jit->pc.w; + _jitc->consts.vector.values[_jitc->consts.vector.offset] = value; + ++_jitc->consts.vector.offset; + /* Resolve later the pc relative address */ + put_const(value); + AUIPC(reg, 0); + ADDI(reg, reg, 0); + LD(reg, reg, 0); +} + +static jit_word_t +hash_const(jit_word_t value) +{ + const jit_uint8_t *ptr; + jit_word_t i, key; + for (i = key = 0, ptr = (jit_uint8_t *)&value; i < 4; ++i) + key = (key << (key & 1)) ^ ptr[i]; + return (key); + +} + +static void +_put_const(jit_state_t *_jit, jit_word_t value) +{ + jit_word_t key; + jit_const_t *entry; + + /* Check if already inserted in table */ + key = hash_const(value) % _jitc->consts.hash.size; + for (entry = _jitc->consts.hash.table[key]; entry; entry = entry->next) { + if (entry->value == value) + return; + } + + /* Check if need to increase pool size */ + if (_jitc->consts.pool.list->next == NULL) { + jit_const_t *list; + jit_word_t offset; + jit_word_t new_size = (_jitc->consts.pool.length + 1) * + sizeof(jit_const_t*); + jit_realloc((jit_pointer_t *)&_jitc->consts.pool.ptr, + _jitc->consts.pool.length * sizeof(jit_const_t*), new_size); + jit_alloc((jit_pointer_t *) + _jitc->consts.pool.ptr + _jitc->consts.pool.length, + 1024 * sizeof(jit_const_t)); + list = _jitc->consts.pool.ptr[_jitc->consts.pool.length]; + _jitc->consts.pool.list->next = list; + for (offset = 0; offset < 1023; ++offset, ++list) + list->next = list + 1; + list->next = NULL; + ++_jitc->consts.pool.length; + } + + /* Rehash if more than 75% used table */ + if (_jitc->consts.hash.count > (_jitc->consts.hash.size / 4) * 3) { + jit_word_t i, k; + jit_const_t *next; + jit_const_t **table; + jit_alloc((jit_pointer_t *)&table, + _jitc->consts.hash.size * 2 * sizeof(jit_const_t *)); + for (i = 0; i < _jitc->consts.hash.size; ++i) { + for (entry = _jitc->consts.hash.table[i]; entry; entry = next) { + next = entry->next; + k = hash_const(entry->value) % (_jitc->consts.hash.size * 2); + entry->next = table[k]; + table[k] = entry; + } + } + jit_free((jit_pointer_t *)&_jitc->consts.hash.table); + _jitc->consts.hash.size *= 2; + _jitc->consts.hash.table = table; + } + + /* Insert in hash */ + entry = _jitc->consts.pool.list; + _jitc->consts.pool.list = entry->next; + ++_jitc->consts.hash.count; + entry->value = value; + entry->next = _jitc->consts.hash.table[key]; + _jitc->consts.hash.table[key] = entry; +} + +static jit_word_t +_get_const(jit_state_t *_jit, jit_word_t value) +{ + jit_word_t key; + jit_const_t *entry; + key = hash_const(value) % _jitc->consts.hash.size; + for (entry = _jitc->consts.hash.table[key]; entry; entry = entry->next) { + if (entry->value == value) + return (entry->address); + } + /* Only the final patch should call get_const() */ + abort(); +} + void jit_flush(void *fptr, void *tptr) { diff --git a/deps/lightning/lib/jit_s390-cpu.c b/deps/lightning/lib/jit_s390-cpu.c index 2c107877..55b7e1fe 100644 --- a/deps/lightning/lib/jit_s390-cpu.c +++ b/deps/lightning/lib/jit_s390-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -2477,7 +2477,33 @@ static void _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_word_t i0) { - fallback_casx(r0, r1, r2, r3, i0); + jit_int32_t iscasi, r1_reg; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg_but_zero(0); + r1 = rn(r1_reg); + movi(r1, i0); + } + /* Do not clobber r2 */ + movr(r0, r2); + /* The CS and CSG instructions below effectively do atomically: + * if (*r1 == r0) + * *r1 = r3; + * else + * r0 = *r1 + * So, we do not need to check cpu flags to know if it did work, + * just compare if values are different. + * Obviously it is somewhat of undefined behavior if old_value (r2) + * and new_value (r3) have the same value, but should still work + * as expected as a noop. + */ +# if __WORDSIZE == 32 + CS(r0, r3, 0, r1); +# else + CSG(r0, r3, 0, r1); +# endif + eqr(r0, r0, r2); + if (iscasi) + jit_unget_reg(r1_reg); } static void diff --git a/deps/lightning/lib/jit_s390-fpu.c b/deps/lightning/lib/jit_s390-fpu.c index 6d605135..edf9ddd2 100644 --- a/deps/lightning/lib/jit_s390-fpu.c +++ b/deps/lightning/lib/jit_s390-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_s390-sz.c b/deps/lightning/lib/jit_s390-sz.c index cea2d444..e70c65f4 100644 --- a/deps/lightning/lib/jit_s390-sz.c +++ b/deps/lightning/lib/jit_s390-sz.c @@ -1,15 +1,15 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 128 +#define JIT_INSTR_MAX 94 0, /* data */ 0, /* live */ - 6, /* align */ + 2, /* align */ 0, /* save */ 0, /* load */ 0, /* #name */ 0, /* #note */ 2, /* label */ - 42, /* prolog */ + 38, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ @@ -24,176 +24,176 @@ 0, /* getarg_l */ 0, /* putargr */ 0, /* putargi */ - 44, /* va_start */ - 104, /* va_arg */ - 100, /* va_arg_d */ + 40, /* va_start */ + 86, /* va_arg */ + 82, /* va_arg_d */ 0, /* va_end */ - 8, /* addr */ - 24, /* addi */ - 8, /* addcr */ - 20, /* addci */ - 8, /* addxr */ - 12, /* addxi */ - 12, /* subr */ - 24, /* subi */ - 12, /* subcr */ - 20, /* subci */ - 12, /* subxr */ - 12, /* subxi */ - 28, /* rsbi */ - 8, /* mulr */ - 24, /* muli */ - 60, /* qmulr */ - 68, /* qmuli */ - 16, /* qmulr_u */ - 32, /* qmuli_u */ - 12, /* divr */ - 28, /* divi */ + 4, /* addr */ + 12, /* addi */ + 4, /* addcr */ + 10, /* addci */ + 6, /* addxr */ + 10, /* addxi */ + 6, /* subr */ + 12, /* subi */ + 6, /* subcr */ + 10, /* subci */ + 8, /* subxr */ + 10, /* subxi */ + 14, /* rsbi */ + 6, /* mulr */ + 14, /* muli */ + 46, /* qmulr */ + 50, /* qmuli */ + 10, /* qmulr_u */ + 18, /* qmuli_u */ + 10, /* divr */ + 18, /* divi */ 16, /* divr_u */ - 32, /* divi_u */ - 16, /* qdivr */ - 20, /* qdivi */ - 20, /* qdivr_u */ - 24, /* qdivi_u */ - 12, /* remr */ - 28, /* remi */ + 24, /* divi_u */ + 12, /* qdivr */ + 16, /* qdivi */ + 18, /* qdivr_u */ + 22, /* qdivi_u */ + 10, /* remr */ + 18, /* remi */ 16, /* remr_u */ - 32, /* remi_u */ - 8, /* andr */ - 20, /* andi */ - 8, /* orr */ - 20, /* ori */ - 8, /* xorr */ - 24, /* xori */ - 6, /* lshr */ + 24, /* remi_u */ + 4, /* andr */ + 10, /* andi */ + 4, /* orr */ + 10, /* ori */ + 4, /* xorr */ + 12, /* xori */ + 8, /* lshr */ 10, /* lshi */ - 6, /* rshr */ + 8, /* rshr */ 10, /* rshi */ - 6, /* rshr_u */ + 8, /* rshr_u */ 10, /* rshi_u */ - 4, /* negr */ - 12, /* comr */ - 20, /* ltr */ - 24, /* lti */ - 20, /* ltr_u */ - 24, /* lti_u */ - 20, /* ler */ - 24, /* lei */ - 20, /* ler_u */ - 24, /* lei_u */ - 20, /* eqr */ - 24, /* eqi */ - 20, /* ger */ - 24, /* gei */ - 20, /* ger_u */ - 24, /* gei_u */ - 20, /* gtr */ - 24, /* gti */ - 20, /* gtr_u */ - 24, /* gti_u */ - 20, /* ner */ - 24, /* nei */ - 4, /* movr */ - 16, /* movi */ - 14, /* movnr */ - 14, /* movzr */ + 2, /* negr */ + 8, /* comr */ + 16, /* ltr */ + 20, /* lti */ + 16, /* ltr_u */ + 20, /* lti_u */ + 16, /* ler */ + 20, /* lei */ + 16, /* ler_u */ + 20, /* lei_u */ + 16, /* eqr */ + 20, /* eqi */ + 16, /* ger */ + 20, /* gei */ + 16, /* ger_u */ + 20, /* gei_u */ + 16, /* gtr */ + 20, /* gti */ + 16, /* gtr_u */ + 20, /* gti_u */ + 16, /* ner */ + 20, /* nei */ + 2, /* movr */ + 8, /* movi */ + 14, /* movnr */ + 14, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ - 4, /* extr_i */ - 4, /* extr_ui */ + 0, /* extr_i */ + 0, /* extr_ui */ 4, /* htonr_us */ - 4, /* htonr_ui */ - 4, /* htonr_ul */ + 2, /* htonr_ui */ + 0, /* htonr_ul */ 6, /* ldr_c */ - 18, /* ldi_c */ + 12, /* ldi_c */ 6, /* ldr_uc */ - 18, /* ldi_uc */ - 6, /* ldr_s */ - 18, /* ldi_s */ + 14, /* ldi_uc */ + 4, /* ldr_s */ + 10, /* ldi_s */ 6, /* ldr_us */ - 18, /* ldi_us */ + 12, /* ldi_us */ 6, /* ldr_i */ - 18, /* ldi_i */ - 6, /* ldr_ui */ - 18, /* ldi_ui */ - 6, /* ldr_l */ - 18, /* ldi_l */ - 14, /* ldxr_c */ - 26, /* ldxi_c */ - 14, /* ldxr_uc */ - 26, /* ldxi_uc */ - 14, /* ldxr_s */ - 26, /* ldxi_s */ - 14, /* ldxr_us */ - 26, /* ldxi_us */ - 14, /* ldxr_i */ - 26, /* ldxi_i */ - 14, /* ldxr_ui */ - 26, /* ldxi_ui */ - 14, /* ldxr_l */ - 26, /* ldxi_l */ + 12, /* ldi_i */ + 0, /* ldr_ui */ + 0, /* ldi_ui */ + 0, /* ldr_l */ + 0, /* ldi_l */ + 10, /* ldxr_c */ + 16, /* ldxi_c */ + 10, /* ldxr_uc */ + 16, /* ldxi_uc */ + 8, /* ldxr_s */ + 14, /* ldxi_s */ + 10, /* ldxr_us */ + 16, /* ldxi_us */ + 10, /* ldxr_i */ + 16, /* ldxi_i */ + 0, /* ldxr_ui */ + 0, /* ldxi_ui */ + 0, /* ldxr_l */ + 0, /* ldxi_l */ 4, /* str_c */ - 16, /* sti_c */ + 12, /* sti_c */ 4, /* str_s */ - 16, /* sti_s */ + 10, /* sti_s */ 4, /* str_i */ - 16, /* sti_i */ - 6, /* str_l */ - 18, /* sti_l */ - 12, /* stxr_c */ - 28, /* stxi_c */ - 12, /* stxr_s */ - 28, /* stxi_s */ - 12, /* stxr_i */ - 28, /* stxi_i */ - 14, /* stxr_l */ - 30, /* stxi_l */ - 10, /* bltr */ - 14, /* blti */ - 10, /* bltr_u */ - 14, /* blti_u */ - 10, /* bler */ - 14, /* blei */ - 10, /* bler_u */ - 14, /* blei_u */ - 10, /* beqr */ - 26, /* beqi */ - 10, /* bger */ - 14, /* bgei */ - 10, /* bger_u */ - 14, /* bgei_u */ - 10, /* bgtr */ - 14, /* bgti */ - 10, /* bgtr_u */ - 14, /* bgti_u */ - 10, /* bner */ - 26, /* bnei */ - 18, /* bmsr */ - 18, /* bmsi */ - 18, /* bmcr */ - 18, /* bmci */ - 10, /* boaddr */ - 14, /* boaddi */ - 10, /* boaddr_u */ - 14, /* boaddi_u */ - 10, /* bxaddr */ - 14, /* bxaddi */ - 10, /* bxaddr_u */ - 14, /* bxaddi_u */ - 10, /* bosubr */ - 14, /* bosubi */ - 10, /* bosubr_u */ - 14, /* bosubi_u */ - 10, /* bxsubr */ - 14, /* bxsubi */ - 10, /* bxsubr_u */ - 14, /* bxsubi_u */ + 10, /* sti_i */ + 0, /* str_l */ + 0, /* sti_l */ + 8, /* stxr_c */ + 16, /* stxi_c */ + 8, /* stxr_s */ + 16, /* stxi_s */ + 8, /* stxr_i */ + 16, /* stxi_i */ + 0, /* stxr_l */ + 0, /* stxi_l */ + 8, /* bltr */ + 12, /* blti */ + 8, /* bltr_u */ + 12, /* blti_u */ + 8, /* bler */ + 12, /* blei */ + 8, /* bler_u */ + 12, /* blei_u */ + 8, /* beqr */ + 16, /* beqi */ + 8, /* bger */ + 12, /* bgei */ + 8, /* bger_u */ + 12, /* bgei_u */ + 8, /* bgtr */ + 12, /* bgti */ + 8, /* bgtr_u */ + 12, /* bgti_u */ + 8, /* bner */ + 16, /* bnei */ + 12, /* bmsr */ + 14, /* bmsi */ + 12, /* bmcr */ + 14, /* bmci */ + 8, /* boaddr */ + 12, /* boaddi */ + 8, /* boaddr_u */ + 12, /* boaddi_u */ + 8, /* bxaddr */ + 12, /* bxaddi */ + 8, /* bxaddr_u */ + 12, /* bxaddi_u */ + 8, /* bosubr */ + 12, /* bosubi */ + 8, /* bosubr_u */ + 12, /* bosubi_u */ + 8, /* bxsubr */ + 12, /* bxsubi */ + 8, /* bxsubr_u */ + 12, /* bxsubi_u */ 2, /* jmpr */ - 18, /* jmpi */ + 10, /* jmpi */ 2, /* callr */ - 18, /* calli */ + 10, /* calli */ 0, /* prepare */ 0, /* pushargr */ 0, /* pushargi */ @@ -209,20 +209,20 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 40, /* epilog */ + 36, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ 6, /* addr_f */ - 26, /* addi_f */ + 24, /* addi_f */ 8, /* subr_f */ - 26, /* subi_f */ + 24, /* subi_f */ 28, /* rsbi_f */ 6, /* mulr_f */ - 26, /* muli_f */ + 24, /* muli_f */ 8, /* divr_f */ - 26, /* divi_f */ + 24, /* divi_f */ 4, /* negr_f */ 4, /* absr_f */ 4, /* sqrtr_f */ @@ -255,21 +255,21 @@ 16, /* unordr_f */ 36, /* unordi_f */ 4, /* truncr_f_i */ - 4, /* truncr_f_l */ + 0, /* truncr_f_l */ 4, /* extr_f */ 4, /* extr_d_f */ 2, /* movr_f */ 20, /* movi_f */ 4, /* ldr_f */ - 16, /* ldi_f */ - 12, /* ldxr_f */ - 24, /* ldxi_f */ + 10, /* ldi_f */ + 8, /* ldxr_f */ + 14, /* ldxi_f */ 4, /* str_f */ - 16, /* sti_f */ - 12, /* stxr_f */ - 24, /* stxi_f */ + 10, /* sti_f */ + 8, /* stxr_f */ + 14, /* stxi_f */ 10, /* bltr_f */ - 30, /* blti_f */ + 28, /* blti_f */ 10, /* bler_f */ 30, /* blei_f */ 10, /* beqr_f */ @@ -281,11 +281,11 @@ 10, /* bner_f */ 30, /* bnei_f */ 10, /* bunltr_f */ - 30, /* bunlti_f */ + 28, /* bunlti_f */ 10, /* bunler_f */ - 30, /* bunlei_f */ + 28, /* bunlei_f */ 18, /* buneqr_f */ - 38, /* buneqi_f */ + 36, /* buneqi_f */ 10, /* bunger_f */ 30, /* bungei_f */ 10, /* bungtr_f */ @@ -295,7 +295,7 @@ 10, /* bordr_f */ 30, /* bordi_f */ 10, /* bunordr_f */ - 30, /* bunordi_f */ + 28, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -306,87 +306,87 @@ 0, /* putargr_d */ 0, /* putargi_d */ 6, /* addr_d */ - 26, /* addi_d */ + 34, /* addi_d */ 8, /* subr_d */ - 26, /* subi_d */ - 28, /* rsbi_d */ + 34, /* subi_d */ + 38, /* rsbi_d */ 6, /* mulr_d */ - 26, /* muli_d */ + 34, /* muli_d */ 8, /* divr_d */ - 26, /* divi_d */ + 34, /* divi_d */ 4, /* negr_d */ 4, /* absr_d */ 4, /* sqrtr_d */ 16, /* ltr_d */ - 36, /* lti_d */ + 46, /* lti_d */ 16, /* ler_d */ - 36, /* lei_d */ + 46, /* lei_d */ 16, /* eqr_d */ - 36, /* eqi_d */ + 46, /* eqi_d */ 16, /* ger_d */ - 36, /* gei_d */ + 46, /* gei_d */ 16, /* gtr_d */ - 36, /* gti_d */ + 46, /* gti_d */ 16, /* ner_d */ - 36, /* nei_d */ + 46, /* nei_d */ 16, /* unltr_d */ - 36, /* unlti_d */ + 46, /* unlti_d */ 16, /* unler_d */ - 36, /* unlei_d */ + 46, /* unlei_d */ 20, /* uneqr_d */ - 40, /* uneqi_d */ + 50, /* uneqi_d */ 16, /* unger_d */ - 36, /* ungei_d */ + 46, /* ungei_d */ 16, /* ungtr_d */ - 36, /* ungti_d */ + 46, /* ungti_d */ 20, /* ltgtr_d */ - 40, /* ltgti_d */ + 50, /* ltgti_d */ 16, /* ordr_d */ - 36, /* ordi_d */ + 46, /* ordi_d */ 16, /* unordr_d */ - 36, /* unordi_d */ + 46, /* unordi_d */ 4, /* truncr_d_i */ - 4, /* truncr_d_l */ + 0, /* truncr_d_l */ 4, /* extr_d */ 4, /* extr_f_d */ 2, /* movr_d */ - 24, /* movi_d */ + 30, /* movi_d */ 4, /* ldr_d */ - 16, /* ldi_d */ - 12, /* ldxr_d */ - 24, /* ldxi_d */ + 10, /* ldi_d */ + 8, /* ldxr_d */ + 14, /* ldxi_d */ 4, /* str_d */ - 16, /* sti_d */ - 12, /* stxr_d */ - 24, /* stxi_d */ + 10, /* sti_d */ + 8, /* stxr_d */ + 14, /* stxi_d */ 10, /* bltr_d */ - 30, /* blti_d */ + 38, /* blti_d */ 10, /* bler_d */ - 30, /* blei_d */ + 38, /* blei_d */ 10, /* beqr_d */ - 34, /* beqi_d */ + 40, /* beqi_d */ 10, /* bger_d */ - 30, /* bgei_d */ + 40, /* bgei_d */ 10, /* bgtr_d */ - 30, /* bgti_d */ + 40, /* bgti_d */ 10, /* bner_d */ - 30, /* bnei_d */ + 40, /* bnei_d */ 10, /* bunltr_d */ - 30, /* bunlti_d */ + 38, /* bunlti_d */ 10, /* bunler_d */ - 30, /* bunlei_d */ + 38, /* bunlei_d */ 18, /* buneqr_d */ - 38, /* buneqi_d */ + 46, /* buneqi_d */ 10, /* bunger_d */ - 30, /* bungei_d */ + 40, /* bungei_d */ 10, /* bungtr_d */ - 30, /* bungti_d */ + 40, /* bungti_d */ 18, /* bltgtr_d */ - 38, /* bltgti_d */ + 48, /* bltgti_d */ 10, /* bordr_d */ - 30, /* bordi_d */ + 40, /* bordi_d */ 10, /* bunordr_d */ - 30, /* bunordi_d */ + 38, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ @@ -401,15 +401,15 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 52, /* bswapr_us */ - 128, /* bswapr_ui */ + 38, /* bswapr_us */ + 94, /* bswapr_ui */ 0, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 22, /* casr */ + 28, /* casi */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 344 +#define JIT_INSTR_MAX 300 0, /* data */ 0, /* live */ 6, /* align */ @@ -504,8 +504,8 @@ 24, /* nei */ 4, /* movr */ 16, /* movi */ - 14, /* movnr */ - 14, /* movzr */ + 18, /* movnr */ + 18, /* movzr */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ @@ -810,9 +810,9 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 68, /* bswapr_us */ - 160, /* bswapr_ui */ - 344, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 60, /* bswapr_us */ + 140, /* bswapr_ui */ + 300, /* bswapr_ul */ + 30, /* casr */ + 42, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_s390.c b/deps/lightning/lib/jit_s390.c index ef0c8998..30ab760c 100644 --- a/deps/lightning/lib/jit_s390.c +++ b/deps/lightning/lib/jit_s390.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -88,7 +88,6 @@ extern void __clear_cache(void *, void *); #define PROTO 1 # include "jit_s390-cpu.c" # include "jit_s390-fpu.c" -# include "jit_fallback.c" #undef PROTO /* @@ -1028,11 +1027,10 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1567,7 +1565,6 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_s390-cpu.c" # include "jit_s390-fpu.c" -# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_size.c b/deps/lightning/lib/jit_size.c index 3a78394d..dcfa0b0e 100644 --- a/deps/lightning/lib/jit_size.c +++ b/deps/lightning/lib/jit_size.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -28,7 +28,7 @@ */ static jit_int16_t _szs[jit_code_last_code] = { #if GET_JIT_SIZE -# define JIT_INSTR_MAX 256 +# define JIT_INSTR_MAX 512 #else # if defined(__i386__) || defined(__x86_64__) # include "jit_x86-sz.c" @@ -101,9 +101,30 @@ _jit_get_size(jit_state_t *_jit) { jit_word_t size; jit_node_t *node; +# if __riscv && __WORDSIZE == 64 + jit_word_t extra = 0; +# endif - for (size = JIT_INSTR_MAX, node = _jitc->head; node; node = node->next) + for (size = JIT_INSTR_MAX, node = _jitc->head; node; node = node->next) { +# if __riscv && __WORDSIZE == 64 + /* Get estimative of extra memory for constants at end of code. */ + switch (node->code) { + case jit_code_movi: + case jit_code_movi_f: + case jit_code_movi_d: + case jit_code_jmpi: + case jit_code_calli: + extra += sizeof(jit_word_t); + default: + break; + } +# endif size += _szs[node->code]; + } +# if __riscv && __WORDSIZE == 64 + /* Heuristically only 20% of constants are unique. */ + size += extra / 5; +# endif return size; } diff --git a/deps/lightning/lib/jit_sparc-cpu.c b/deps/lightning/lib/jit_sparc-cpu.c index ecea5066..86eb05e1 100644 --- a/deps/lightning/lib/jit_sparc-cpu.c +++ b/deps/lightning/lib/jit_sparc-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -100,6 +100,9 @@ _f2bp(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t, static void _f3r(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # if __WORDSIZE == 64 +# define f3ri(op, rd, op3, rs1, rs2) _f3ri(_jit, op, rd, op3, rs1, rs2) +static void _f3ri(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define f3rx(op, rd, op3, rs1, rs2) _f3rx(_jit, op, rd, op3, rs1, rs2) static void _f3rx(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); @@ -113,7 +116,7 @@ static void _f3s(jit_state_t*, # define f3t(cond, rs1, i, ri) _f3t(_jit, cond, rs1, i, ri) static void _f3t(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused; -# define f3a(op, rd, op3, rs1, rs2) _f3a(_jit, op, rd, op3, rs1, asi, rs2) +# define f3a(op,rd,op3,rs1,asi,rs2) _f3a(_jit, op, rd, op3, rs1, asi, rs2) static void _f3a(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused; @@ -194,6 +197,11 @@ static void _f3a(jit_state_t*,jit_int32_t, # define SWAP(rs1, rs2, rd) f3r(3, rd, 15, rs1, rs2) # define SWAPI(rs1, imm, rd) f3r(3, rd, 15, rs1, imm) # define SWAPA(rs1, rs2, asi, rd) f3a(3, rd, 23, rs1, asi, rs2) +/* Sparc v9 deprecates SWAP* in favor of CAS*A */ +# define CASA(rs1, rs2, rd) f3a(3, rd, 60, rs1, 128, rs2) +# if __WORDSIZE == 64 +# define CASXA(rs1, rs2, rd) f3a(3, rd, 62, rs1, 128, rs2) +# endif # define NOP() SETHI(0, 0) # define HI(im) ((im) >> 10) # define LO(im) ((im) & 0x3ff) @@ -1035,6 +1043,26 @@ _f3r(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, } # if __WORDSIZE == 64 +static void +_f3ri(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, + jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2) +{ + jit_instr_t v; + assert(!(op & 0xfffffffc)); + assert(!(rd & 0xffffffe0)); + assert(!(op3 & 0xffffffc0)); + assert(!(rs1 & 0xffffffe0)); + assert(!(rs2 & 0xffffffe0)); + v.op.b = op; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.i.b = 1; + v.asi.b = 0; + v.rs2.b = rs2; + ii(v.v); +} + static void _f3rx(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2) @@ -1242,7 +1270,30 @@ static void _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_word_t i0) { - fallback_casx(r0, r1, r2, r3, i0); + jit_int32_t iscasi, r1_reg; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + /* Do not clobber r2 */ + movr(r0, r3); + /* The CASXA instruction compares the value in register r[rs2] with + * the doubleword in memory pointed to by the doubleword address in + * r[rs1]. If the values are equal, the value in r[rd] is swapped + * with the doubleword pointed to by the doubleword address in r[rs1]. + * If the values are not equal, the contents of the doubleword pointed + * to by r[rs1] replaces the value in r[rd], but the memory location + * remains unchanged. + */ +# if __WORDSIZE == 32 + CASA(r1, r2, r0); +# else + CASXA(r1, r2, r0); +# endif + eqr(r0, r0, r2); + if (iscasi) + jit_unget_reg(r1_reg); } static void @@ -2421,8 +2472,12 @@ _calli(jit_state_t *_jit, jit_word_t i0) { jit_word_t w; w = (i0 - _jit->pc.w) >> 2; - CALLI(w); - NOP(); + if (s30_p(w)) { + CALLI(w); + NOP(); + } + else + (void)calli_p(i0); } static jit_word_t diff --git a/deps/lightning/lib/jit_sparc-fpu.c b/deps/lightning/lib/jit_sparc-fpu.c index ae2cbab3..95313477 100644 --- a/deps/lightning/lib/jit_sparc-fpu.c +++ b/deps/lightning/lib/jit_sparc-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_sparc-sz.c b/deps/lightning/lib/jit_sparc-sz.c index 5e7ef95f..265769dd 100644 --- a/deps/lightning/lib/jit_sparc-sz.c +++ b/deps/lightning/lib/jit_sparc-sz.c @@ -94,8 +94,8 @@ 16, /* nei */ 4, /* movr */ 8, /* movi */ - 20, /* movnr */ - 20, /* movzr */ + 16, /* movnr */ + 16, /* movzr */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ @@ -403,8 +403,8 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 0, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 24, /* casr */ + 32, /* casi */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 @@ -503,8 +503,8 @@ 16, /* nei */ 4, /* movr */ 24, /* movi */ - 20, /* movnr */ - 20, /* movzr */ + 16, /* movnr */ + 16, /* movzr */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ @@ -515,19 +515,19 @@ 8, /* htonr_ui */ 4, /* htonr_ul */ 4, /* ldr_c */ - 28, /* ldi_c */ + 24, /* ldi_c */ 4, /* ldr_uc */ - 28, /* ldi_uc */ + 24, /* ldi_uc */ 4, /* ldr_s */ - 28, /* ldi_s */ + 24, /* ldi_s */ 4, /* ldr_us */ - 28, /* ldi_us */ + 24, /* ldi_us */ 4, /* ldr_i */ - 28, /* ldi_i */ + 24, /* ldi_i */ 4, /* ldr_ui */ - 28, /* ldi_ui */ + 24, /* ldi_ui */ 4, /* ldr_l */ - 28, /* ldi_l */ + 24, /* ldi_l */ 4, /* ldxr_c */ 24, /* ldxi_c */ 4, /* ldxr_uc */ @@ -543,13 +543,13 @@ 4, /* ldxr_l */ 24, /* ldxi_l */ 4, /* str_c */ - 28, /* sti_c */ + 24, /* sti_c */ 4, /* str_s */ - 28, /* sti_s */ + 24, /* sti_s */ 4, /* str_i */ - 28, /* sti_i */ + 24, /* sti_i */ 4, /* str_l */ - 28, /* sti_l */ + 24, /* sti_l */ 4, /* stxr_c */ 24, /* stxi_c */ 4, /* stxr_s */ @@ -669,11 +669,11 @@ 16, /* movr_f */ 32, /* movi_f */ 8, /* ldr_f */ - 32, /* ldi_f */ + 28, /* ldi_f */ 8, /* ldxr_f */ 28, /* ldxi_f */ 8, /* str_f */ - 32, /* sti_f */ + 28, /* sti_f */ 8, /* stxr_f */ 28, /* stxi_f */ 20, /* bltr_f */ @@ -760,11 +760,11 @@ 4, /* movr_d */ 32, /* movi_d */ 4, /* ldr_d */ - 28, /* ldi_d */ + 24, /* ldi_d */ 4, /* ldxr_d */ 24, /* ldxi_d */ 4, /* str_d */ - 28, /* sti_d */ + 24, /* sti_d */ 4, /* stxr_d */ 24, /* stxi_d */ 12, /* bltr_d */ @@ -812,6 +812,6 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 116, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 24, /* casr */ + 44, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_sparc.c b/deps/lightning/lib/jit_sparc.c index a677998f..cd45d236 100644 --- a/deps/lightning/lib/jit_sparc.c +++ b/deps/lightning/lib/jit_sparc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -40,7 +40,6 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); #define PROTO 1 # include "jit_sparc-cpu.c" # include "jit_sparc-fpu.c" -# include "jit_fallback.c" #undef PROTO /* @@ -1322,11 +1321,10 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1884,7 +1882,6 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_sparc-cpu.c" # include "jit_sparc-fpu.c" -# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_x86-cpu.c b/deps/lightning/lib/jit_x86-cpu.c index 0d8affe8..1a473dee 100644 --- a/deps/lightning/lib/jit_x86-cpu.c +++ b/deps/lightning/lib/jit_x86-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -802,44 +802,49 @@ _rx(jit_state_t *_jit, jit_int32_t rd, jit_int32_t md, static void _nop(jit_state_t *_jit, jit_int32_t count) { - switch (count) { - case 0: - break; - case 1: /* NOP */ - ic(0x90); break; - case 2: /* 66 NOP */ - ic(0x66); ic(0x90); - break; - case 3: /* NOP DWORD ptr [EAX] */ - ic(0x0f); ic(0x1f); ic(0x00); - break; - case 4: /* NOP DWORD ptr [EAX + 00H] */ - ic(0x0f); ic(0x1f); ic(0x40); ic(0x00); - break; - case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */ - ic(0x0f); ic(0x1f); ic(0x44); ic(0x00); - ic(0x00); - break; - case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */ - ic(0x66); ic(0x0f); ic(0x1f); ic(0x44); - ic(0x00); ic(0x00); - break; - case 7: /* NOP DWORD ptr [EAX + 00000000H] */ - ic(0x0f); ic(0x1f); ic(0x80); ii(0x0000); - break; - case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ - ic(0x0f); ic(0x1f); ic(0x84); ic(0x00); - ii(0x0000); - break; - case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ - ic(0x66); ic(0x0f); ic(0x1f); ic(0x84); - ic(0x00); ii(0x0000); - break; - default: - abort(); + jit_int32_t i; + while (count) { + if (count > 9) + i = 9; + else + i = count; + switch (i) { + case 0: + break; + case 1: /* NOP */ + ic(0x90); break; + case 2: /* 66 NOP */ + ic(0x66); ic(0x90); + break; + case 3: /* NOP DWORD ptr [EAX] */ + ic(0x0f); ic(0x1f); ic(0x00); + break; + case 4: /* NOP DWORD ptr [EAX + 00H] */ + ic(0x0f); ic(0x1f); ic(0x40); ic(0x00); + break; + case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */ + ic(0x0f); ic(0x1f); ic(0x44); ic(0x00); + ic(0x00); + break; + case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */ + ic(0x66); ic(0x0f); ic(0x1f); ic(0x44); + ic(0x00); ic(0x00); + break; + case 7: /* NOP DWORD ptr [EAX + 00000000H] */ + ic(0x0f); ic(0x1f); ic(0x80); ii(0x0000); + break; + case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ + ic(0x0f); ic(0x1f); ic(0x84); ic(0x00); + ii(0x0000); + break; + case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ + ic(0x66); ic(0x0f); ic(0x1f); ic(0x84); + ic(0x00); ii(0x0000); + break; + } + count -= i; } } - static void _lea(jit_state_t *_jit, jit_int32_t md, jit_int32_t rb, jit_int32_t ri, jit_int32_t ms, jit_int32_t rd) diff --git a/deps/lightning/lib/jit_x86-sse.c b/deps/lightning/lib/jit_x86-sse.c index d09bda9b..4447a52e 100644 --- a/deps/lightning/lib/jit_x86-sse.c +++ b/deps/lightning/lib/jit_x86-sse.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_x86-sz.c b/deps/lightning/lib/jit_x86-sz.c index ff7548a1..eb668b3c 100644 --- a/deps/lightning/lib/jit_x86-sz.c +++ b/deps/lightning/lib/jit_x86-sz.c @@ -95,8 +95,8 @@ 16, /* nei */ 2, /* movr */ 5, /* movi */ - 7, /* movnr */ - 7, /* movzr */ + 5, /* movnr */ + 5, /* movzr */ 11, /* extr_c */ 11, /* extr_uc */ 3, /* extr_s */ @@ -209,93 +209,93 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 24, /* epilog */ + 23, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ 8, /* addr_f */ - 19, /* addi_f */ + 17, /* addi_f */ 12, /* subr_f */ - 19, /* subi_f */ + 17, /* subi_f */ 21, /* rsbi_f */ 8, /* mulr_f */ - 19, /* muli_f */ + 17, /* muli_f */ 12, /* divr_f */ - 19, /* divi_f */ + 17, /* divi_f */ 12, /* negr_f */ 12, /* absr_f */ - 6, /* sqrtr_f */ - 13, /* ltr_f */ + 4, /* sqrtr_f */ + 12, /* ltr_f */ 27, /* lti_f */ - 13, /* ler_f */ + 12, /* ler_f */ 27, /* lei_f */ - 15, /* eqr_f */ + 14, /* eqr_f */ 29, /* eqi_f */ - 13, /* ger_f */ + 12, /* ger_f */ 27, /* gei_f */ - 13, /* gtr_f */ + 12, /* gtr_f */ 27, /* gti_f */ - 18, /* ner_f */ + 17, /* ner_f */ 32, /* nei_f */ - 13, /* unltr_f */ + 12, /* unltr_f */ 27, /* unlti_f */ - 13, /* unler_f */ + 12, /* unler_f */ 27, /* unlei_f */ - 13, /* uneqr_f */ + 12, /* uneqr_f */ 27, /* uneqi_f */ - 13, /* unger_f */ + 12, /* unger_f */ 27, /* ungei_f */ - 13, /* ungtr_f */ + 12, /* ungtr_f */ 27, /* ungti_f */ - 13, /* ltgtr_f */ + 12, /* ltgtr_f */ 27, /* ltgti_f */ - 13, /* ordr_f */ + 12, /* ordr_f */ 27, /* ordi_f */ - 13, /* unordr_f */ + 12, /* unordr_f */ 27, /* unordi_f */ - 8, /* truncr_f_i */ + 4, /* truncr_f_i */ 0, /* truncr_f_l */ - 8, /* extr_f */ + 4, /* extr_f */ 4, /* extr_d_f */ 10, /* movr_f */ - 19, /* movi_f */ + 15, /* movi_f */ 4, /* ldr_f */ 8, /* ldi_f */ 5, /* ldxr_f */ 8, /* ldxi_f */ - 6, /* str_f */ - 10, /* sti_f */ - 7, /* stxr_f */ + 4, /* str_f */ + 8, /* sti_f */ + 5, /* stxr_f */ 8, /* stxi_f */ - 10, /* bltr_f */ - 23, /* blti_f */ - 10, /* bler_f */ - 23, /* blei_f */ - 12, /* beqr_f */ - 25, /* beqi_f */ - 10, /* bger_f */ - 23, /* bgei_f */ - 10, /* bgtr_f */ - 23, /* bgti_f */ - 13, /* bner_f */ - 26, /* bnei_f */ - 10, /* bunltr_f */ - 23, /* bunlti_f */ - 10, /* bunler_f */ - 23, /* bunlei_f */ - 10, /* buneqr_f */ - 23, /* buneqi_f */ - 10, /* bunger_f */ - 23, /* bungei_f */ - 10, /* bungtr_f */ - 23, /* bungti_f */ - 10, /* bltgtr_f */ - 23, /* bltgti_f */ - 10, /* bordr_f */ - 23, /* bordi_f */ - 10, /* bunordr_f */ - 23, /* bunordi_f */ + 9, /* bltr_f */ + 18, /* blti_f */ + 9, /* bler_f */ + 18, /* blei_f */ + 11, /* beqr_f */ + 20, /* beqi_f */ + 9, /* bger_f */ + 18, /* bgei_f */ + 9, /* bgtr_f */ + 18, /* bgti_f */ + 12, /* bner_f */ + 21, /* bnei_f */ + 9, /* bunltr_f */ + 18, /* bunlti_f */ + 9, /* bunler_f */ + 18, /* bunlei_f */ + 9, /* buneqr_f */ + 18, /* buneqi_f */ + 9, /* bunger_f */ + 18, /* bungei_f */ + 9, /* bungtr_f */ + 18, /* bungti_f */ + 9, /* bltgtr_f */ + 18, /* bltgti_f */ + 9, /* bordr_f */ + 18, /* bordi_f */ + 9, /* bunordr_f */ + 18, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -316,7 +316,7 @@ 26, /* divi_d */ 18, /* negr_d */ 13, /* absr_d */ - 6, /* sqrtr_d */ + 4, /* sqrtr_d */ 13, /* ltr_d */ 37, /* lti_d */ 13, /* ler_d */ @@ -345,9 +345,9 @@ 37, /* ordi_d */ 13, /* unordr_d */ 37, /* unordi_d */ - 8, /* truncr_d_i */ + 4, /* truncr_d_i */ 0, /* truncr_d_l */ - 8, /* extr_d */ + 4, /* extr_d */ 4, /* extr_f_d */ 10, /* movr_d */ 24, /* movi_d */ @@ -355,9 +355,9 @@ 8, /* ldi_d */ 5, /* ldxr_d */ 8, /* ldxi_d */ - 6, /* str_d */ - 10, /* sti_d */ - 7, /* stxr_d */ + 4, /* str_d */ + 8, /* sti_d */ + 5, /* stxr_d */ 8, /* stxi_d */ 10, /* bltr_d */ 28, /* blti_d */ @@ -405,7 +405,7 @@ 4, /* bswapr_ui */ 0, /* bswapr_ul */ 9, /* casr */ - 0, /* casi */ + 13, /* casi */ #endif #if __X64 @@ -1251,9 +1251,9 @@ 0, /* getarg_l */ 0, /* putargr */ 0, /* putargi */ - 42, /* va_start */ + 38, /* va_start */ 41, /* va_arg */ - 50, /* va_arg_d */ + 48, /* va_arg_d */ 0, /* va_end */ 5, /* addr */ 13, /* addi */ @@ -1417,10 +1417,10 @@ 10, /* bxsubi */ 9, /* bxsubr_u */ 10, /* bxsubi_u */ - 3, /* jmpr */ - 5, /* jmpi */ + 2, /* jmpr */ + 13, /* jmpi */ 3, /* callr */ - 13, /* calli */ + 12, /* calli */ 0, /* prepare */ 0, /* pushargr */ 0, /* pushargi */ @@ -1442,14 +1442,14 @@ 0, /* putargr_f */ 0, /* putargi_f */ 10, /* addr_f */ - 21, /* addi_f */ + 20, /* addi_f */ 15, /* subr_f */ - 21, /* subi_f */ + 20, /* subi_f */ 30, /* rsbi_f */ 10, /* mulr_f */ - 21, /* muli_f */ + 20, /* muli_f */ 15, /* divr_f */ - 21, /* divi_f */ + 20, /* divi_f */ 15, /* negr_f */ 15, /* absr_f */ 5, /* sqrtr_f */ @@ -1496,7 +1496,7 @@ 7, /* stxr_f */ 9, /* stxi_f */ 10, /* bltr_f */ - 21, /* blti_f */ + 20, /* blti_f */ 10, /* bler_f */ 25, /* blei_f */ 12, /* beqr_f */ @@ -1631,8 +1631,8 @@ 9, /* bswapr_us */ 6, /* bswapr_ui */ 6, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 11, /* casr */ + 16, /* casi */ #endif /* __CYGWIN__ || _WIN32 */ # endif /* __X64_32 */ #endif /* __X64 */ diff --git a/deps/lightning/lib/jit_x86-x87.c b/deps/lightning/lib/jit_x86-x87.c index 4453bf30..227b1a2f 100644 --- a/deps/lightning/lib/jit_x86-x87.c +++ b/deps/lightning/lib/jit_x86-x87.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_x86.c b/deps/lightning/lib/jit_x86.c index fb0b06ba..6472e566 100644 --- a/deps/lightning/lib/jit_x86.c +++ b/deps/lightning/lib/jit_x86.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1593,8 +1593,8 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); if ((word = _jit->pc.w & (node->u.w - 1))) nop(node->u.w - word); break; diff --git a/deps/lightning/lib/lightning.c b/deps/lightning/lib/lightning.c index e7ce3832..b3c245eb 100644 --- a/deps/lightning/lib/lightning.c +++ b/deps/lightning/lib/lightning.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2022 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -64,13 +64,25 @@ static void _del_label(jit_state_t*, jit_node_t*, jit_node_t*); static void _jit_dataset(jit_state_t *_jit); +#define block_update_set(block, target) _block_update_set(_jit, block, target) +static jit_bool_t _block_update_set(jit_state_t*, jit_block_t*, jit_block_t*); + +#define check_block_again() _check_block_again(_jit) +static jit_bool_t _check_block_again(jit_state_t*); + +#define do_setup() _do_setup(_jit) +static void _do_setup(jit_state_t*); + #define jit_setup(block) _jit_setup(_jit, block) static void _jit_setup(jit_state_t *_jit, jit_block_t *block); -#define jit_follow(block, todo) _jit_follow(_jit, block, todo) +#define do_follow(always) _do_follow(_jit, always) +static void _do_follow(jit_state_t*, jit_bool_t); + +#define jit_follow(block) _jit_follow(_jit, block) static void -_jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo); +_jit_follow(jit_state_t *_jit, jit_block_t *block); #define jit_update(node, live, mask) _jit_update(_jit, node, live, mask) static void @@ -1623,104 +1635,208 @@ _jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label) label->link = instr; } -void -_jit_optimize(jit_state_t *_jit) +static void +_do_setup(jit_state_t *_jit) { - jit_int32_t pass; - jit_bool_t jump; - jit_bool_t todo; - jit_int32_t mask; - jit_node_t *node; jit_block_t *block; jit_word_t offset; - _jitc->function = NULL; - - thread_jumps(); - sequential_labels(); - split_branches(); - - pass = 0; - -second_pass: /* create initial mapping of live register values * at the start of a basic block */ for (offset = 0; offset < _jitc->blocks.offset; offset++) { block = _jitc->blocks.ptr + offset; - if (!block->label) + if (!block->label || block->label->code == jit_code_epilog) continue; - if (block->label->code != jit_code_epilog) - jit_setup(block); + jit_setup(block); } +} - /* set live state of registers not referenced in a block, but - * referenced in a jump target or normal flow */ - do { - todo = 0; - for (offset = 0; offset < _jitc->blocks.offset; offset++) { - block = _jitc->blocks.ptr + offset; - if (!block->label) - continue; - if (block->label->code != jit_code_epilog) - jit_follow(block, &todo); - } - } while (todo); +static jit_bool_t +_block_update_set(jit_state_t *_jit, + jit_block_t *block, jit_block_t *target) +{ + jit_regset_t regmask; + + jit_regset_ior(®mask, &block->reglive, &target->reglive); + jit_regset_and(®mask, ®mask, &block->regmask); + if (jit_regset_set_p(®mask)) { + jit_regset_ior(&block->reglive, &block->reglive, ®mask); + jit_regset_and(®mask, &block->reglive, &block->regmask); + jit_regset_com(®mask, ®mask); + jit_regset_and(&block->regmask, &block->regmask, ®mask); + block->again = 1; + return (1); + } + return (0); +} - if (pass == 0) { - todo = 0; +static jit_bool_t +_check_block_again(jit_state_t *_jit) +{ + jit_int32_t todo; + jit_word_t offset; + jit_node_t *node, *label; + jit_block_t *block, *target; - patch_registers(); - if (simplify()) + todo = 0; + for (offset = 0; offset < _jitc->blocks.offset; offset++) { + block = _jitc->blocks.ptr + offset; + if (block->again) { todo = 1; - - /* figure out labels that are only reached with a jump - * and is required to do a simple redundant_store removal - * on jit_beqi below */ - jump = 1; - for (node = _jitc->head; node; node = node->next) { - switch (node->code) { - case jit_code_label: - if (!jump) - node->flag |= jit_flag_head; - break; - case jit_code_jmpi: case jit_code_jmpr: - case jit_code_epilog: - jump = 1; - break; - case jit_code_data: case jit_code_note: - break; - default: - jump = 0; - break; - } + break; } + } + /* If no block changed state */ + if (!todo) + return (0); + do { + todo = 0; + block = NULL; for (node = _jitc->head; node; node = node->next) { - mask = jit_classify(node->code); - if (mask & jit_cc_a0_reg) - node->u.w &= ~jit_regno_patch; - if (mask & jit_cc_a1_reg) - node->v.w &= ~jit_regno_patch; - if (mask & jit_cc_a2_reg) - node->w.w &= ~jit_regno_patch; - if (node->code == jit_code_beqi) { - if (redundant_store(node, 1)) + /* Special jumps that match jit_cc_a0_jmp */ + if (node->code == jit_code_calli || node->code == jit_code_callr) + continue; + + /* Remember current label */ + if (node->code == jit_code_label || + node->code == jit_code_prolog || + node->code == jit_code_epilog) { + + /* If previous block does not pass through */ + if (!(node->flag & jit_flag_head)) + block = NULL; + + target = _jitc->blocks.ptr + node->v.w; + /* Update if previous block pass through */ + if (block && block->again && block_update_set(target, block)) todo = 1; + block = target; + if (!block->again) + continue; } - else if (node->code == jit_code_bnei) { - if (redundant_store(node, 0)) + /* If not the first jmpi */ + else if (block) { + /* If a jump to dynamic address or if a jump to raw address */ + if (!(jit_classify(node->code) & jit_cc_a0_jmp) || + !(node->flag & jit_flag_node)) + continue; + label = node->u.n; + /* Mark predecessor needs updating due to target change */ + target = _jitc->blocks.ptr + label->v.w; + if (target->again && block_update_set(block, target)) todo = 1; } } + } + while (todo); + + return (1); +} + +static void +_do_follow(jit_state_t *_jit, jit_bool_t always) +{ + jit_block_t *block; + jit_word_t offset; + + /* set live state of registers not referenced in a block, but + * referenced in a jump target or normal flow */ + for (offset = 0; offset < _jitc->blocks.offset; offset++) { + block = _jitc->blocks.ptr + offset; + if (!block->label || block->label->code == jit_code_epilog) + continue; + if (always || block->again) { + block->again = 0; + jit_follow(block); + } + } +} + +void +_jit_optimize(jit_state_t *_jit) +{ + jit_bool_t jump; + jit_bool_t todo; + jit_int32_t mask; + jit_node_t *node; + jit_block_t *block; + jit_word_t offset; + + todo = 0; + _jitc->function = NULL; + + thread_jumps(); + sequential_labels(); + split_branches(); + do_setup(); + do_follow(1); + + patch_registers(); + if (simplify()) + todo = 1; + + /* Figure out labels that are only reached with a jump + * and is required to do a simple redundant_store removal + * on jit_beqi below */ + jump = 1; + for (node = _jitc->head; node; node = node->next) { + switch (node->code) { + case jit_code_label: + if (!jump) + node->flag |= jit_flag_head; + break; + case jit_code_jmpi: case jit_code_jmpr: + case jit_code_epilog: + jump = 1; + break; + case jit_code_data: case jit_code_note: + break; + default: + jump = 0; + break; + } + } - /* If instructions were removed, must recompute state at - * start of blocks. */ - if (todo) { - pass = 1; - goto second_pass; + for (node = _jitc->head; node; node = node->next) { + mask = jit_classify(node->code); + if (mask & jit_cc_a0_reg) + node->u.w &= ~jit_regno_patch; + if (mask & jit_cc_a1_reg) + node->v.w &= ~jit_regno_patch; + if (mask & jit_cc_a2_reg) + node->w.w &= ~jit_regno_patch; + if (node->code == jit_code_beqi) { + if (redundant_store(node, 1)) { + block = _jitc->blocks.ptr + ((jit_node_t *)node->u.n)->v.w; + block->again = 1; + todo = 1; + } + } + else if (node->code == jit_code_bnei) { + if (redundant_store(node, 0)) { + block = _jitc->blocks.ptr + ((jit_node_t *)node->u.n)->v.w; + block->again = 1; + todo = 1; + } } } + if (!todo) + todo = check_block_again(); + + /* If instructions were removed or first pass did modify the entry + * state of any block */ + if (todo) { + do_setup(); + todo = 0; + do { + do_follow(0); + /* If any block again has the entry state modified. */ + todo = check_block_again(); + } while (todo); + } + for (node = _jitc->head; node; node = node->next) { mask = jit_classify(node->code); if (mask & jit_cc_a0_reg) @@ -2094,6 +2210,7 @@ _jit_emit(jit_state_t *_jit) #if defined(__sgi) int mmap_fd; #endif + int mmap_prot, mmap_flags; if (!_jitc->realize) jit_realize(); @@ -2107,12 +2224,22 @@ _jit_emit(jit_state_t *_jit) assert(_jit->user_code); #else if (!_jit->user_code) { + mmap_prot = PROT_READ | PROT_WRITE; +#if !__OpenBSD__ + mmap_prot |= PROT_EXEC; +#endif +#if __NetBSD__ + mmap_prot = PROT_MPROTECT(mmap_prot); + mmap_flags = 0; +#else + mmap_flags = MAP_PRIVATE; +#endif + mmap_flags |= MAP_ANON; #if defined(__sgi) mmap_fd = open("/dev/zero", O_RDWR); #endif _jit->code.ptr = mmap(NULL, _jit->code.length, - PROT_EXEC | PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, mmap_fd, 0); + mmap_prot, mmap_flags, mmap_fd, 0); assert(_jit->code.ptr != MAP_FAILED); } #endif /* !HAVE_MMAP */ @@ -2121,6 +2248,11 @@ _jit_emit(jit_state_t *_jit) _jit->pc.uc = _jit->code.ptr; for (;;) { +#if __NetBSD__ + result = mprotect(_jit->code.ptr, _jit->code.length, + PROT_READ | PROT_WRITE); + assert(result == 0); +#endif if ((code = emit_code()) == NULL) { _jitc->patches.offset = 0; for (node = _jitc->head; node; node = node->next) { @@ -2184,12 +2316,17 @@ _jit_emit(jit_state_t *_jit) jit_free((jit_pointer_t *)&_jitc->data.ptr); #if HAVE_MMAP else { - result = mprotect(_jit->data.ptr, _jit->data.length, PROT_READ); + result = mprotect(_jit->data.ptr, + _jit->data.length, PROT_READ); assert(result == 0); } if (!_jit->user_code) { - result = mprotect(_jit->code.ptr, _jit->code.length, - PROT_READ | PROT_EXEC); + length = _jit->pc.uc - _jit->code.ptr; +# if __riscv && __WORDSIZE == 64 + /* FIXME should start adding consts at a page boundary */ + length -= _jitc->consts.hash.count * sizeof(jit_word_t); +# endif + result = mprotect(_jit->code.ptr, length, PROT_READ | PROT_EXEC); assert(result == 0); } #endif /* HAVE_MMAP */ @@ -2314,7 +2451,7 @@ _jit_setup(jit_state_t *_jit, jit_block_t *block) * or normal flow that have a live register not used in this block. */ static void -_jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo) +_jit_follow(jit_state_t *_jit, jit_block_t *block) { jit_node_t *node; jit_block_t *next; @@ -2343,7 +2480,7 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo) /* Remove from unknown state bitmask. */ jit_regset_com(®temp, ®temp); jit_regset_and(&block->regmask, &block->regmask, ®temp); - *todo = 1; + block->again = 1; } case jit_code_prolog: case jit_code_epilog: @@ -2445,7 +2582,7 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo) jit_regset_com(®temp, ®temp); jit_regset_and(&block->regmask, &block->regmask, ®temp); - *todo = 1; + block->again = 1; } } else { @@ -3095,6 +3232,8 @@ _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump) break; } } + + return (result); } static jit_bool_t diff --git a/deps/lightning/size.c b/deps/lightning/size.c index 4e933701..1728fb2e 100644 --- a/deps/lightning/size.c +++ b/deps/lightning/size.c @@ -99,12 +99,14 @@ main(int argc, char *argv[]) fprintf(fp, "#endif /* NEW_ABI */\n"); # endif #elif defined(__powerpc__) +# if __WORDSIZE == 32 fprintf(fp, "#endif /* " -# if !_CALL_SYSV +# if !_CALL_SYSV "!" -# endif +# endif "_CALL_SYSV" " */\n"); +# endif fprintf(fp, "#endif /* __BYTE_ORDER */\n"); fprintf(fp, "#endif /* __powerpc__ */\n"); #endif