gpu_neon: fix some missing ebuf updates

[pcsx_rearmed.git] / deps / lightning / doc / body.texi
diff --git a/deps/lightning/doc/body.texi b/deps/lightning/doc/body.texi

index 51c08d3..f71b77c 100644 (file)
--- a/deps/lightning/doc/body.texi
+++ b/deps/lightning/doc/body.texi
@@ -89,7 +89,11 @@ assembles machine instructions without further tests.
  @node Installation
  @chapter Configuring and installing @lightning{}
  
-The first thing to do to use @lightning{} is to configure the
+Here we will assume that your system already has the dependencies
+necessary to build @lightning{}. For more on dependencies, see
+@lightning{}'s @file{README-hacking} file.
+
+The first thing to do to build @lightning{} is to configure the
  program, picking the set of macros to be used on the host
  architecture; this configuration is automatically performed by
  the @file{configure} shell script; to run it, merely type:
@@ -97,17 +101,30 @@ the @file{configure} shell script; to run it, merely type:
       ./configure
  @end example
  
-@lightning{} supports the @code{--enable-disassembler} option, that
-enables linking to GNU binutils and optionally print human readable
+The @file{configure} accepts the @code{--enable-disassembler} option,
+hat enables linking to GNU binutils and optionally print human readable
  disassembly of the jit code. This option can be disabled by the
  @code{--disable-disassembler} option.
  
-Another option that @file{configure} accepts is
-@code{--enable-assertions}, which enables several consistency checks in
-the run-time assemblers.  These are not usually needed, so you can
-decide to simply forget about it; also remember that these consistency
+@file{configure} also accepts the  @code{--enable-devel-disassembler},
+option useful to check exactly hat machine instructions were generated
+for a @lightning{} instrction. Basically mixing @code{jit_print} and
+@code{jit_disassembly}.
+
+The @code{--enable-assertions} option, which enables several consistency
+hecks in the run-time assemblers.  These are not usually needed, so you
+can decide to simply forget about it; also remember that these consistency
  checks tend to slow down your code generator.
  
+The @code{--enable-devel-strong-type-checking} option that does extra type
+checking using @code{assert}. This option also enables the
+@code{--enable-assertions} unless it is explicitly disabled.
+
+The option @code{--enable-devel-get-jit-size} should only be used
+when doing updates or maintenance to lightning. It regenerates the
+@code{jit_$ARCH]-sz.c} creating a table or maximum bytes usage when
+translating a @lightning{} instruction to machine code.
+
  After you've configured @lightning{}, run @file{make} as usual.
  
  @lightning{} has an extensive set of tests to validate it is working
@@ -230,6 +247,8 @@ rsbr         _f  _d  O1 = O3 - O1
  rsbi         _f  _d  O1 = O3 - O1
  mulr         _f  _d  O1 = O2 * O3
  muli         _f  _d  O1 = O2 * O3
+hmulr    _u          O1 = ((O2 * O3) >> WORDSIZE)
+hmuli    _u          O1 = ((O2 * O3) >> WORDSIZE)
  divr     _u  _f  _d  O1 = O2 / O3
  divi     _u  _f  _d  O1 = O2 / O3
  remr     _u          O1 = O2 % O3
@@ -244,10 +263,18 @@ lshr                 O1 = O2 << O3
  lshi                 O1 = O2 << O3
  rshr     _u          O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
  rshi     _u          O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
+lrotr                O1 = (O2 << O3) | (O3 >> (WORDSIZE - O3))
+lroti                O1 = (O2 << O3) | (O3 >> (WORDSIZE - O3))
+rrotr                O1 = (O2 >> O3) | (O3 << (WORDSIZE - O3))
+rroti                O1 = (O2 >> O3) | (O3 << (WORDSIZE - O3))
  movzr                O1 = O3 ? O1 : O2
  movnr                O1 = O3 ? O2 : O1
  @end example
  
+Note that @code{lrotr}, @code{lroti}, @code{rrotr} and @code{rroti}
+are described as the fallback operation. These are bit shift/rotation
+operation.
+
  @item Four operand binary ALU operations
  These accept two result registers, and two operands; the last one can
  be an immediate. The first two arguments cannot be the same register.
@@ -262,28 +289,108 @@ minus one.
  @code{O2}. It can be used as quick way to check if a division is
  exact, in which case the remainder is zero.
  
+@code{qlsh} shifts from 0 to @emph{wordsize}, doing a normal left
+shift for the first result register and setting the second result
+resister to the overflow bits. @code{qlsh} can be used as a quick
+way to multiply by powers of two.
+
+@code{qrsh} shifts from 0 to @emph{wordsize}, doing a normal right
+shift for the first result register and setting the second result
+register to the overflow bits. @code{qrsh} can be used as a quick
+way to divide by powers of two.
+
+Note that @code{qlsh} and @code{qrsh} are basically implemented as
+two shifts. It is undefined behavior to pass a value not in the range
+0 to @emph{wordsize}. Most cpus will usually @code{and} the shift
+amount with @emph{wordsize} - 1, or possible use the @emph{remainder}.
+@lightning{} only generates code to specially handle 0 and @emph{wordsize}
+shifts. Since in a code generator for a @emph{safe language} should
+usually check the shift amount, these instructions usually should be
+used as a fast path to check for division without remainder or
+multiplication that does not overflow.
+
  @example
  qmulr    _u       O1 O2 = O3 * O4
  qmuli    _u       O1 O2 = O3 * O4
  qdivr    _u       O1 O2 = O3 / O4
  qdivi    _u       O1 O2 = O3 / O4
+qlshr    _u       O1 = O3 << O4, O2 = O3 >> (WORDSIZE - O4)
+qlshi    _u       O1 = O3 << O4, O2 = O3 >> (WORDSIZE - O4)
+qrshr    _u       O1 = O3 >> O4, O2 = O3 << (WORDSIZE - O4)
+qrshi    _u       O1 = O3 >> O4, O2 = O3 << (WORDSIZE - O4)
  @end example
  
+These four operand ALU operations are only defined for float operands.
+
+@example
+fmar         _f  _d  O1 =  O2 * O3 + O4
+fmai         _f  _d  O1 =  O2 * O3 + O4
+fmsr         _f  _d  O1 =  O2 * O3 - O4
+fmsi         _f  _d  O1 =  O2 * O3 - O4
+fnmar        _f  _d  O1 = -O2 * O3 - O4
+fnmai        _f  _d  O1 = -O2 * O3 - O4
+fnmsr        _f  _d  O1 = -O2 * O3 + O4
+fnmsi        _f  _d  O1 = -O2 * O3 + O4
+@end example
+
+These are a family of fused multiply-add instructions.
+Note that @lightning{} does not handle rounding modes nor math exceptions.
+Also note that not all backends provide a instruction for the equivalent
+@lightning{} instruction presented above. Some are completely implemented
+as fallbacks and some are composed of one or more instructions. For common
+input this should not cause major issues, but note that when implemented by
+the cpu, these are implemented as the multiplication calculated with infinite
+precision, and after the addition step rounding is done. Due to this, For
+specially crafted input different ports might show different output. When
+implemented by the CPU, it is also possible to have exceptions that do
+not happen if implemented as a fallback.
+
  @item Unary ALU operations
-These accept two operands, both of which must be registers.
+These accept two operands, the first must be a register and the
+second is a register if the @code{r} modifier is used, otherwise,
+the @code{i} modifier is used and the second argument is a constant.
+
  @example
  negr         _f  _d  O1 = -O2
+negi         _f  _d  O1 = -O2
  comr                 O1 = ~O2
+comi                 O1 = ~O2
+clor                O1 = number of leading one bits in O2
+cloi                O1 = number of leading one bits in O2
+clzr                O1 = number of leading zero bits in O2
+clzi                O1 = number of leading zero bits in O2
+ctor                O1 = number of trailing one bits in O2
+ctoi                O1 = number of trailing one bits in O2
+ctzr                O1 = number of trailing zero bits in O2
+ctzi                O1 = number of trailing zero bits in O2
+rbitr               O1 = bits of O2 reversed
+rbiti               O1 = bits of O2 reversed
+popcntr                     O1 = number of bits set in O2
+popcnti                     O1 = number of bits set in O2
  @end example
  
+Note that @code{ctzr} is basically equivalent of a @code{C} call
+@code{ffs} but indexed at bit zero, not one.
+
+Contrary to @code{__builtin_ctz} and @code{__builtin_clz}, an input
+value of zero is not an error, it just returns the number of bits
+in a word, 64 if @lightning{} generates 64 bit instructions, otherwise
+it returns 32.
+
+The @code{clor} and @code{ctor} are just counterparts of the versions
+that search for zero bits.
+
  These unary ALU operations are only defined for float operands.
+
  @example
  absr         _f  _d  O1 = fabs(O2)
-sqrtr                O1 = sqrt(O2)
+absi         _f  _d  O1 = fabs(O2)
+sqrtr        _f  _d  O1 = sqrt(O2)
+sqrti        _f  _d  O1 = sqrt(O2)
  @end example
  
-Besides requiring the @code{r} modifier, there are no unary operations
-with an immediate operand.
+Note that for @code{float} and @code{double} unary operations, @lightning{}
+will generate code to actually execute the operation at runtime.
  
  @item Compare instructions
  These accept three operands; again, the last can be an immediate.
@@ -333,6 +440,35 @@ movr                                 _f  _d  O1 = O2
  movi                                 _f  _d  O1 = O2
  extr      _c  _uc  _s  _us  _i  _ui  _f  _d  O1 = O2
  truncr                               _f  _d  O1 = trunc(O2)
+extr                                         O1 = sign_extend(O2[O3:O3+04])
+extr_u                                       O1 = O2[O3:O3+04]
+depr                                         O1[O3:O3+O4] = O2
+@end example
+
+@code{extr}, @code{extr_u} and @code{depr} are useful to access @code{C}
+compatible bit fields, provided that these are contained in a machine
+word. @code{extr} is used to @emph{extract} and signed extend a value
+from a bit field. @code{extr_u} is used to @emph{extract} and zero
+extend a value from a bit field. @code{depr} is used to @emph{deposit}
+a value into a bit field.
+
+@example
+extr(result, source, offset, length)
+extr_u(result, source, offset, length)
+depr(result, source, offset, length)
+@end example
+
+A common way to declare @code{C} and @lightning{} compatible bit fields is:
+@example
+union @{
+    struct @{
+        jit_word_t  signed_bits: @code{length};
+        jit_uword_t unsigned_bits: @code{length};
+        ...
+    @} s;
+    jit_word_t  signed_value;
+    jit_uword_t unsigned_value;
+@} u;
  @end example
  
  In 64-bit architectures it may be required to use @code{truncr_f_i},
@@ -341,10 +477,10 @@ the equivalent C code.  Only the @code{_i} modifier is available in
  32-bit architectures.
  
  @example
-truncr_f_i    = <int> O1 = <float> O2
-truncr_f_l    = <long>O1 = <float> O2
-truncr_d_i    = <int> O1 = <double>O2
-truncr_d_l    = <long>O1 = <double>O2
+truncr_f_i    <int> O1 = <float> O2
+truncr_f_l    <long>O1 = <float> O2
+truncr_d_i    <int> O1 = <double>O2
+truncr_d_l    <long>O1 = <double>O2
  @end example
  
  The float conversion operations are @emph{destination first,
@@ -352,10 +488,38 @@ source second}, but the order of the types is reversed.  This happens
  for historical reasons.
  
  @example
-extr_f_d    = <double>O1 = <float> O2
-extr_d_f    = <float> O1 = <double>O2
+extr_f_d      <double>O1 = <float> O2
+extr_d_f      <float> O1 = <double>O2
  @end example
  
+The float to/from integer transfer operations are also @emph{destination
+first, source second}. These were added later, but follow the pattern
+of historic patterns.
+
+@example
+movr_w_f     <float>O1 = <int>O2
+movi_w_f     <float>O1 = <int>O2
+movr_f_w     <int>O1 = <float>O2
+movi_f_w     <int>O1 = <float>O2
+movr_w_d     <double>O1 = <long>O2
+movi_w_d     <double>O1 = <long>O2
+movr_d_w     <long>O1 = <double>O2
+movi_d_w     <long>O1 = <double>O2
+movr_ww_d    <double>O1 = [<int>O2:<int>O3]
+movi_ww_d    <double>O1 = [<int>O2:<int>O3]
+movr_d_ww    [<int>O1:<int>O2] = <double>O3
+movi_d_ww    [<int>O1:<int>O2] = <double>O3
+@end example
+
+These are used to transfer bits to/from floats to/from integers, and are
+useful to access bits of floating point values.
+
+@code{movr_w_d}, @code{movi_w_d}, @code{movr_d_w} and @code{movi_d_w} are
+only available in 64-bit. Conversely, @code{movr_ww_d}, @code{movi_ww_d},
+@code{movr_d_ww} and @code{movi_d_ww} are only available in 32-bit.
+For the int pair to/from double transfers, integer arguments must respect
+endianess, to match how the cpu handles the verbatim byte values.
+
  @item Network extensions
  These accept two operands, both of which must be registers; these
  two instructions actually perform the same task, yet they are
@@ -368,6 +532,14 @@ htonr    _us _ui _ul @r{Host-to-network (big endian) order}
  ntohr    _us _ui _ul @r{Network-to-host order }
  @end example
  
+@code{bswapr} can be used to unconditionally byte-swap an operand.
+On little-endian architectures, @code{htonr} and @code{ntohr} resolve
+to this.
+The @code{_ul} variant is only available in 64-bit architectures.
+@example
+bswapr    _us _ui _ul  01 = byte_swap(02)
+@end example
+
  @item Load operations
  @code{ld} accepts two operands while @code{ldx} accepts three;
  in both cases, the last can be either a register or an immediate
@@ -389,31 +561,56 @@ ldxi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *(O2+O3)
  both cases, the first can be either a register or an immediate
  value. Values are sign-extended to fit a whole register.
  @example
-str     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *O1 = O2
-sti     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *O1 = O2
-stxr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *(O1+O2) = O3
-stxi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *(O1+O2) = O3
+str     _c       _s       _i       _l  _f  _d  *O1 = O2
+sti     _c       _s       _i       _l  _f  _d  *O1 = O2
+stxr    _c       _s       _i       _l  _f  _d  *(O1+O2) = O3
+stxi    _c       _s       _i       _l  _f  _d  *(O1+O2) = O3
  @end example
-As for the load operations, the @code{_ui} and @code{_l} types are
-only available in 64-bit architectures, and for convenience, there
-is a version without a type modifier for integer or pointer operands
-that uses the appropriate wordsize call.
+Note that the unsigned type modifier is not available, as the store
+only writes to the 1, 2, 4 or 8 sized memory address.
+The @code{_l} type is only available in 64-bit architectures, and for
+convenience, there is a version without a type modifier for integer or
+pointer operands that uses the appropriate wordsize call.
+
+@item Unaligned memory access
+These allow access to integers of size 3, in 32-bit, and extra sizes
+5, 6 and 7 in 64-bit.
+For floating point values only support for size 4 and 8 is provided.
+@example
+unldr       O1 = *(signed O3 byte integer)* = O2
+unldi       O1 = *(signed O3 byte integer)* = O2
+unldr_u     O1 = *(unsigned O3 byte integer)* = O2
+unldi_u     O1 = *(unsigned O3 byte integer)* = O2
+unldr_x     O1 = *(O3 byte float)* = O2
+unldi_x     O1 = *(O3 byte float)* = O2
+unstr       *(O3 byte integer)O1 = O2
+unsti       *(O3 byte integer)O1 = O2
+unstr_x     *(O3 byte float)O1 = O2
+unsti_x     *(O3 byte float)O1 = O2
+@end example
+With the exception of non standard sized integers, these might be
+implemented as normal loads and stores, if the processor supports
+unaligned memory access, or, mode can be chosen at jit initialization
+time, to generate or not generate, code that does trap on unaligned
+memory access. Letting the kernel trap means smaller code generation
+as it is required to check alignment at runtime@footnote{This requires changing jit_cpu.unaligned to 0 to disable or 1 to enable unaligned code generation. Not all ports have the C jit_cpu.unaligned value.}.
  
  @item Argument management
  These are:
  @example
  prepare     (not specified)
  va_start    (not specified)
-pushargr                                   _f  _d
-pushargi                                   _f  _d
+pushargr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+pushargi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d
  va_push     (not specified)
-arg                                        _f  _d
+arg         _c  _uc  _s  _us  _i  _ui  _l  _f  _d
  getarg      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
  va_arg                                         _d
-putargr                                    _f  _d
-putargi                                    _f  _d
+putargr     _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+putargi     _c  _uc  _s  _us  _i  _ui  _l  _f  _d
  ret         (not specified)
-retr                                       _f  _d
+retr        _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+reti        _c  _uc  _s  _us  _i  _ui  _l  _f  _d
  reti                                       _f  _d
  va_end      (not specified)
  retval      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
@@ -432,6 +629,15 @@ the @code{pushargr} or @code{pushargi} to push the arguments @strong{in
  left to right order}; and use @code{finish} or @code{call} (explained below)
  to perform the actual call.
  
+Note that @code{arg}, @code{pusharg}, @code{putarg} and @code{ret} when
+handling integer types can be used without a type modifier.
+It is suggested to use matching type modifiers to @code{arg}, @code{putarg}
+and @code{getarg} otherwise problems will happen if generating jit for
+environments that require arguments to be truncated and zero or sign
+extended by the caller and/or excess arguments might be passed packed
+in the stack. Currently only Apple systems with @code{aarch64} cpus are
+known to have this restriction.
+
  @code{va_start} returns a @code{C} compatible @code{va_list}. To fetch
  arguments, use @code{va_arg} for integers and @code{va_arg_d} for doubles.
  @code{va_push} is required when passing a @code{va_list} to another function,
@@ -553,6 +759,10 @@ bxsubr    _u          O2 -= O3@r{, goto }O1@r{ if no overflow}
  bxsubi    _u          O2 -= O3@r{, goto }O1@r{ if no overflow}
  @end example
  
+Note that the @code{C} code does not have an @code{O1} argument. It is
+required to always use the return value as an argument to @code{patch},
+@code{patch_at} or @code{patch_abs}.
+
  @item Jump and return operations
  These accept one argument except @code{ret} and @code{jmpi} which
  have none; the difference between @code{finishi} and @code{calli}
@@ -585,6 +795,20 @@ forward   (not specified)                @r{forward label}
  indirect  (not specified)                @r{special simple label}
  @end example
  
+The following instruction is used to specify a minimal alignment for
+the next instruction, usually with a label:
+@example
+align     (not specified)                @r{align code}
+@end example
+
+Similar to @code{align} is the next instruction, also usually used with
+a label:
+@example
+skip      (not specified)                @r{skip code}
+@end example
+It is used to specify a minimal number of bytes of nops to be inserted
+before the next instruction.
+
  @code{label} is normally used as @code{patch_at} argument for backward
  jumps.
  
@@ -637,6 +861,42 @@ that automatically binds the implicit label added by @code{patch} with
  the @code{movi}, but on some special conditions it is required to create
  an "unbound" label.
  
+@code{align} is useful for creating multiple entry points to a
+(trampoline) function that are all accessible through a single
+function pointer.  @code{align} receives an integer argument that
+defines the minimal alignment of the address of a label directly
+following the @code{align} instruction.  The integer argument must be
+a power of two and the effective alignment will be a power of two no
+less than the argument to @code{align}.  If the argument to
+@code{align} is 16 or more, the effective alignment will match the
+specified minimal alignment exactly.
+
+@example
+          jit_node_t *forward, *label1, *label2, *jump;
+          unsigned char *addr1, *addr2;
+forward = jit_forward();
+          jit_align(16);
+label1  = jit_indirect();                @rem{/* first entry point */}
+jump    = jit_jmpi();                    @rem{/* jump to first handler */}
+          jit_patch_at(jump, forward);
+          jit_align(16);
+label2  = jit_indirect();                @rem{/* second entry point */}
+          ...                            @rem{/* second handler */}
+          jit_jmpr(...);
+          jit_link(forward);
+          ...                            @rem{/* first handler /*}
+          jit_jmpr(...);
+          ...
+          jit_emit();
+          addr1 = jit_address(label1);
+          addr2 = jit_address(label2);
+          assert(addr2 - addr1 == 16);   @rem{/* only one of the addresses needs to be remembered */}
+@end example
+
+@code{skip} is useful for reserving space in the code buffer that can
+later be filled (possibly with the help of the pair of functions
+@code{jit_unprotect} and @code{jit_protect}).
+
  @item Function prolog
  
  These macros are used to set up a function prolog.  The @code{allocai}
@@ -869,7 +1129,7 @@ will return non zero if the argument lives in a register. This call
  is useful to know the live range of register arguments, as those
  are very fast to read and write, but have volatile values.
  
-@code{callee_save_p} exects a valid @code{JIT_Rn}, @code{JIT_Vn}, or
+@code{callee_save_p} expects a valid @code{JIT_Rn}, @code{JIT_Vn}, or
  @code{JIT_Fn}, and will return non zero if the register is callee
  save. This call is useful because on several ports, the @code{JIT_Rn}
  and @code{JIT_Fn} registers are actually callee save; no need
@@ -878,6 +1138,34 @@ to save and load the values when making function calls.
  @code{pointer_p} expects a pointer argument, and will return non
  zero if the pointer is inside the generated jit code. Must be
  called after @code{jit_emit} and before @code{jit_destroy_state}.
+
+@item Atomic operations
+Only compare-and-swap is implemented. It accepts four operands;
+the second can be an immediate.
+
+The first argument is set with a boolean value telling if the operation
+did succeed.
+
+Arguments must be different, cannot use the result register to also pass
+an argument.
+
+The second argument is the address of a machine word.
+
+The third argument is the old value.
+
+The fourth argument is the new value.
+
+@example
+casr                                  01 = (*O2 == O3) ? (*O2 = O4, 1) : 0
+casi                                  01 = (*O2 == O3) ? (*O2 = O4, 1) : 0
+@end example
+
+If value at the address in the second argument is equal to the third
+argument, the address value is atomically modified to the value of the
+fourth argument and the first argument is set to a non zero value.
+
+If the value at the address in the second argument is not equal to the
+third argument nothing is done and the first argument is set to zero.
  @end table
  
  @node GNU lightning examples
@@ -1066,26 +1354,13 @@ maps to @code{%g2} on the SPARC).
  @table @b
  @item x86_64
  @example
-    sub   $0x30,%rsp
-    mov   %rbp,(%rsp)
-    mov   %rsp,%rbp
-    sub   $0x18,%rsp
-    mov   %rdi,%rax            mov %rdi, %rax
-    add   $0x1,%rax            inc %rax
-    mov   %rbp,%rsp
-    mov   (%rsp),%rbp
-    add   $0x30,%rsp
-    retq                       retq
+    mov   %rdi,%rax
+    add   $0x1,%rax
+    ret
  @end example
-In this case, the main overhead is due to the function's prolog and
-epilog, and stack alignment after reserving stack space for word
-to/from float conversions or moving data from/to x87 to/from SSE.
-Note that besides allocating space to save callee saved registers,
-no registers are saved/restored because @lightning{} notices those
-registers are not modified. There is currently no logic to detect
-if it needs to allocate stack space for type conversions neither
-proper leaf function detection, but these are subject to change
-(FIXME).
+In this case, for the x86 port, @lightning{} has simple optimizations
+to understand it is a leaf function, and that it is not required to
+create a stack frame nor update the stack pointer.
  @end table
  
  @node printf
@@ -1249,7 +1524,7 @@ jit_node_t *compile_rpn(char *expr)
    in = jit_arg();
    stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
  
-  jit_getarg_i(JIT_R2, in);
+  jit_getarg(JIT_R2, in);
  
    while (*expr) @{
      char buf[32];
@@ -1602,6 +1877,28 @@ Get the current memory allocation function. Also, unlike the GNU GMP
  counterpart, it is an error to pass @code{NULL} pointers as arguments.
  @end deftypefun
  
+@section Protection
+Unless an alternate code buffer is used (see below), @code{jit_emit}
+set the access protections that the code buffer's memory can be read and
+executed, but not modified.  One can use the following functions after
+@code{jit_emit} but before @code{jit_clear} to temporarily lift the
+protection:
+
+@deftypefun void jit_unprotect ()
+Changes the access protection that the code buffer's memory can be read and
+modified.  Before the emitted code can be invoked, @code{jit_protect}
+has to be called to reset the change.
+
+This procedure has no effect when an alternate code buffer (see below) is used.
+@end deftypefun
+
+@deftypefun void jit_protect ()
+Changes the access protection that the code buffer's memory can be read and
+executed.
+
+This procedure has no effect when an alternate code buffer (see below) is used.
+@end deftypefun
+
  @section Alternate code buffer
  To instruct @lightning{} to use an alternate code buffer it is required
  to call @code{jit_realize} before @code{jit_emit}, and then query states