drc: avoid excessive recursion in hle mode
[pcsx_rearmed.git] / deps / lightning / lib / jit_aarch64-cpu.c
CommitLineData
4a71579b 1/*
79bfeef6 2 * Copyright (C) 2013-2023 Free Software Foundation, Inc.
4a71579b
PC
3 *
4 * This file is part of GNU lightning.
5 *
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
10 *
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
15 *
16 * Authors:
17 * Paulo Cesar Pereira de Andrade
18 */
19
20#if PROTO
21typedef union {
22/* aarch64-opc.c */
23# define ui jit_uint32_t
24# if __BYTE_ORDER == __LITTLE_ENDIAN
25 /* cond2: condition in truly conditional-executed inst. */
26 struct { ui b: 4; } cond2;
27 /* nzcv: flag bit specifier, encoded in the "nzcv" field. */
28 struct { ui b: 4; } nzcv;
29 /* defgh: d:e:f:g:h bits in AdvSIMD modified immediate. */
30 struct { ui _: 5; ui b: 5; } defgh;
31 /* abc: a:b:c bits in AdvSIMD modified immediate. */
32 struct { ui _: 16; ui b: 3; } abc;
33 /* imm19: e.g. in CBZ. */
34 struct { ui _: 5; ui b: 19; } imm19;
35 /* immhi: e.g. in ADRP. */
36 struct { ui _: 5; ui b: 19; } immhi;
37 /* immlo: e.g. in ADRP. */
38 struct { ui _: 29; ui b: 2; } immlo;
39 /* size: in most AdvSIMD and floating-point instructions. */
40 struct { ui _: 22; ui b: 2; } size;
41 /* vldst_size: size field in the AdvSIMD load/store inst. */
42 struct { ui _: 10; ui b: 2; } vldst_size;
43 /* op: in AdvSIMD modified immediate instructions. */
44 struct { ui _: 29; ui b: 1; } op;
45 /* Q: in most AdvSIMD instructions. */
46 struct { ui _: 30; ui b: 1; } Q;
47 /* Rt: in load/store instructions. */
48 struct { ui b: 5; } Rt;
49 /* Rd: in many integer instructions. */
50 struct { ui b: 5; } Rd;
51 /* Rn: in many integer instructions. */
52 struct { ui _: 5; ui b: 5; } Rn;
53 /* Rt2: in load/store pair instructions. */
54 struct { ui _: 10; ui b: 5; } Rt2;
55 /* Ra: in fp instructions. */
56 struct { ui _: 10; ui b: 5; } Ra;
57 /* op2: in the system instructions. */
58 struct { ui _: 5; ui b: 3; } op2;
59 /* CRm: in the system instructions. */
60 struct { ui _: 8; ui b: 4; } CRm;
61 /* CRn: in the system instructions. */
62 struct { ui _: 12; ui b: 4; } CRn;
63 /* op1: in the system instructions. */
64 struct { ui _: 16; ui b: 3; } op1;
65 /* op0: in the system instructions. */
66 struct { ui _: 19; ui b: 2; } op0;
67 /* imm3: in add/sub extended reg instructions. */
68 struct { ui _: 10; ui b: 3; } imm3;
69 /* cond: condition flags as a source operand. */
70 struct { ui _: 12; ui b: 4; } cond;
71 /* opcode: in advsimd load/store instructions. */
72 struct { ui _: 12; ui b: 4; } opcode;
73 /* cmode: in advsimd modified immediate instructions. */
74 struct { ui _: 12; ui b: 4; } cmode;
75 /* asisdlso_opcode: opcode in advsimd ld/st single element. */
76 struct { ui _: 13; ui b: 3; } asisdlso_opcode;
77 /* len: in advsimd tbl/tbx instructions. */
78 struct { ui _: 13; ui b: 2; } len;
79 /* Rm: in ld/st reg offset and some integer inst. */
80 struct { ui _: 16; ui b: 5; } Rm;
81 /* Rs: in load/store exclusive instructions. */
82 struct { ui _: 16; ui b: 5; } Rs;
83 /* option: in ld/st reg offset + add/sub extended reg inst. */
84 struct { ui _: 13; ui b: 3; } option;
85 /* S: in load/store reg offset instructions. */
86 struct { ui _: 12; ui b: 1; } S;
87 /* hw: in move wide constant instructions. */
88 struct { ui _: 21; ui b: 2; } hw;
89 /* opc: in load/store reg offset instructions. */
90 struct { ui _: 22; ui b: 2; } opc;
91 /* opc1: in load/store reg offset instructions. */
92 struct { ui _: 23; ui b: 1; } opc1;
93 /* shift: in add/sub reg/imm shifted instructions. */
94 struct { ui _: 22; ui b: 2; } shift;
95 /* type: floating point type field in fp data inst. */
96 struct { ui _: 22; ui b: 2; } type;
97 /* ldst_size: size field in ld/st reg offset inst. */
98 struct { ui _: 30; ui b: 2; } ldst_size;
99 /* imm6: in add/sub reg shifted instructions. */
100 struct { ui _: 10; ui b: 6; } imm6;
101 /* imm4: in advsimd ext and advsimd ins instructions. */
102 struct { ui _: 11; ui b: 4; } imm4;
103 /* imm5: in conditional compare (immediate) instructions. */
104 struct { ui _: 16; ui b: 5; } imm5;
105 /* imm7: in load/store pair pre/post index instructions. */
106 struct { ui _: 15; ui b: 7; } imm7;
107 /* imm8: in floating-point scalar move immediate inst. */
108 struct { ui _: 13; ui b: 8; } imm8;
109 /* imm9: in load/store pre/post index instructions. */
110 struct { ui _: 12; ui b: 9; } imm9;
111 /* imm12: in ld/st unsigned imm or add/sub shifted inst. */
112 struct { ui _: 10; ui b: 12; } imm12;
113 /* imm14: in test bit and branch instructions. */
114 struct { ui _: 5; ui b: 14; } imm14;
115 /* imm16: in exception instructions. */
116 struct { ui _: 5; ui b: 16; } imm16;
117 /* imm26: in unconditional branch instructions. */
118 struct { ui b: 26; } imm26;
119 /* imms: in bitfield and logical immediate instructions. */
120 struct { ui _: 10; ui b: 6; } imms;
121 /* immr: in bitfield and logical immediate instructions. */
122 struct { ui _: 16; ui b: 6; } immr;
123 /* immb: in advsimd shift by immediate instructions. */
124 struct { ui _: 16; ui b: 3; } immb;
125 /* immh: in advsimd shift by immediate instructions. */
126 struct { ui _: 19; ui b: 4; } immh;
127 /* N: in logical (immediate) instructions. */
128 struct { ui _: 22; ui b: 1; } N;
129 /* index: in ld/st inst deciding the pre/post-index. */
130 struct { ui _: 11; ui b: 1; } index;
131 /* index2: in ld/st pair inst deciding the pre/post-index. */
132 struct { ui _: 24; ui b: 1; } index2;
133 /* sf: in integer data processing instructions. */
134 struct { ui _: 31; ui b: 1; } sf;
135 /* H: in advsimd scalar x indexed element instructions. */
136 struct { ui _: 11; ui b: 1; } H;
137 /* L: in advsimd scalar x indexed element instructions. */
138 struct { ui _: 21; ui b: 1; } L;
139 /* M: in advsimd scalar x indexed element instructions. */
140 struct { ui _: 20; ui b: 1; } M;
141 /* b5: in the test bit and branch instructions. */
142 struct { ui _: 31; ui b: 1; } b5;
143 /* b40: in the test bit and branch instructions. */
144 struct { ui _: 19; ui b: 5; } b40;
145 /* scale: in the fixed-point scalar to fp converting inst. */
146 struct { ui _: 10; ui b: 6; } scale;
147# else
148 struct { ui _: 28; ui b: 4; } cond2;
149 struct { ui _: 28; ui b: 4; } nzcv;
150 struct { ui _: 22; ui b: 5; } defgh;
151 struct { ui _: 13; ui b: 3; } abc;
152 struct { ui _: 8; ui b: 19; } imm19;
153 struct { ui _: 8; ui b: 19; } immhi;
154 struct { ui _: 1; ui b: 29; } immlo;
155 struct { ui _: 8; ui b: 2; } size;
156 struct { ui _: 20; ui b: 2; } vldst_size;
157 struct { ui _: 2; ui b: 1; } op;
158 struct { ui _: 1; ui b: 1; } Q;
159 struct { ui _: 27; ui b: 1; } Rt;
160 struct { ui _: 27; ui b: 1; } Rd;
161 struct { ui _: 22; ui b: 5; } Rn;
162 struct { ui _: 17; ui b: 5; } Rt2;
163 struct { ui _: 17; ui b: 5; } Ra;
164 struct { ui _: 24; ui b: 3; } op2;
165 struct { ui _: 20; ui b: 4; } CRm;
166 struct { ui _: 16; ui b: 4; } CRn;
167 struct { ui _: 13; ui b: 3; } op1;
168 struct { ui _: 11; ui b: 2; } op0;
169 struct { ui _: 19; ui b: 3; } imm3;
170 struct { ui _: 16; ui b: 4; } cond;
171 struct { ui _: 16; ui b: 4; } opcode;
172 struct { ui _: 16; ui b: 4; } cmode;
173 struct { ui _: 16; ui b: 3; } asisdlso_opcode;
174 struct { ui _: 17; ui b: 2; } len;
175 struct { ui _: 11; ui b: 5; } Rm;
176 struct { ui _: 11; ui b: 5; } Rs;
177 struct { ui _: 16; ui b: 3; } option;
178 struct { ui _: 19; ui b: 1; } S;
179 struct { ui _: 9; ui b: 2; } hw;
180 struct { ui _: 8; ui b: 2; } opc;
181 struct { ui _: 8; ui b: 1; } opc1;
182 struct { ui _: 8; ui b: 2; } shift;
183 struct { ui _: 8; ui b: 2; } type;
184 struct { ui b: 2; } ldst_size;
185 struct { ui _: 16; ui b: 6; } imm6;
186 struct { ui _: 17; ui b: 4; } imm4;
187 struct { ui _: 11; ui b: 5; } imm5;
188 struct { ui _: 10; ui b: 7; } imm7;
189 struct { ui _: 11; ui b: 8; } imm8;
190 struct { ui _: 11; ui b: 9; } imm9;
191 struct { ui _: 10; ui b: 12; } imm12;
192 struct { ui _: 13; ui b: 14; } imm14;
193 struct { ui _: 11; ui b: 16; } imm16;
194 struct { ui _: 6; ui b: 26; } imm26;
195 struct { ui _: 16; ui b: 6; } imms;
196 struct { ui _: 10; ui b: 6; } immr;
197 struct { ui _: 13; ui b: 3; } immb;
198 struct { ui _: 9; ui b: 4; } immh;
199 struct { ui _: 9; ui b: 1; } N;
200 struct { ui _: 20; ui b: 1; } index;
201 struct { ui _: 7; ui b: 1; } index2;
202 struct { ui b: 1; } sf;
203 struct { ui _: 20; ui b: 1; } H;
204 struct { ui _: 10; ui b: 1; } L;
205 struct { ui _: 11; ui b: 1; } M;
206 struct { ui b: 1; } b5;
207 struct { ui _: 8; ui b: 5; } b40;
208 struct { ui _: 16; ui b: 6; } scale;
209# endif
210 jit_int32_t w;
211# undef ui
212} instr_t;
79bfeef6 213# define s26_p(d) ((d) >= -33554432 && (d) <= 33554431)
4a71579b
PC
214# define ii(i) *_jit->pc.ui++ = i
215# define ldr(r0,r1) ldr_l(r0,r1)
ba86ff93 216# define ldi(r0,i0) ldi_l(r0,i0)
4a71579b
PC
217# define ldxr(r0,r1,r2) ldxr_l(r0,r1,r2)
218# define ldxi(r0,r1,i0) ldxi_l(r0,r1,i0)
ba86ff93
PC
219# define str(r0,r1) str_l(r0,r1)
220# define sti(i0,r0) sti_l(i0,r0)
221# define stxr(r0,r1,r2) stxr_l(r0,r1,r2)
4a71579b
PC
222# define stxi(i0,r0,r1) stxi_l(i0,r0,r1)
223# define FP_REGNO 0x1d
224# define LR_REGNO 0x1e
225# define SP_REGNO 0x1f
226# define XZR_REGNO 0x1f
227# define WZR_REGNO XZR_REGNO
228# define LSL_12 0x00400000
229# define MOVI_LSL_16 0x00200000
230# define MOVI_LSL_32 0x00400000
231# define MOVI_LSL_48 0x00600000
232# define XS 0x80000000 /* Wn -> Xn */
233# define DS 0x00400000 /* Sn -> Dn */
234# define CC_NE 0x0
235# define CC_EQ 0x1
236# define CC_CC 0x2
237# define CC_LO CC_CC
238# define CC_CS 0x3
239# define CC_HS CC_CS
240# define CC_PL 0x4
241# define CC_MI 0x5
242# define CC_VC 0x6
243# define CC_VS 0x7
244# define CC_LS 0x8
245# define CC_HI 0x9
246# define CC_LT 0xa
247# define CC_GE 0xb
248# define CC_LE 0xc
249# define CC_GT 0xd
250# define CC_NV 0xe
251# define CC_AL 0xf
252/* Branches need inverted condition */
253# define BCC_EQ 0x0
254# define BCC_NE 0x1
255# define BCC_CS 0x2
256# define BCC_HS BCC_CS
257# define BCC_CC 0x3
258# define BCC_LO BCC_CC
259# define BCC_MI 0x4
260# define BCC_PL 0x5
261# define BCC_VS 0x6
262# define BCC_VC 0x7
263# define BCC_HI 0x8
264# define BCC_LS 0x9
265# define BCC_GE 0xa
266# define BCC_LT 0xb
267# define BCC_GT 0xc
268# define BCC_LE 0xd
269# define BCC_AL 0xe
270# define BCC_NV 0xf
271/* adapted and cut down to only tested and required by lightning,
272 * from data in binutils/aarch64-tbl.h */
273# define A64_ADCS 0x3a000000
274# define A64_SBCS 0x7a000000
275# define A64_ADDI 0x11000000
276# define A64_ADDSI 0xb1000000
277# define A64_SUBI 0x51000000
278# define A64_SUBSI 0x71000000
279# define A64_ADD 0x0b000000
280# define A64_ADDS 0x2b000000
281# define A64_SUB 0x4b000000
282# define A64_NEG 0x4b0003e0
283# define A64_SUBS 0x6b000000
284# define A64_CMP 0x6b00001f
ba86ff93 285# define A64_BFM 0x33400000
4a71579b 286# define A64_SBFM 0x93400000
ba86ff93 287# define A64_SBFX 0x13400000
4a71579b 288# define A64_UBFM 0x53400000
ba86ff93 289# define A64_UBFX 0x53400000
4a71579b
PC
290# define A64_B 0x14000000
291# define A64_BL 0x94000000
292# define A64_BR 0xd61f0000
293# define A64_BLR 0xd63f0000
294# define A64_RET 0xd65f0000
295# define A64_CBZ 0x34000000
296# define A64_CBNZ 0x35000000
297# define A64_B_C 0x54000000
298# define A64_CSINC 0x1a800400
e0659411 299# define A64_CSSEL 0x1a800000
4a71579b
PC
300# define A64_REV 0xdac00c00
301# define A64_UDIV 0x1ac00800
302# define A64_SDIV 0x1ac00c00
303# define A64_LSL 0x1ac02000
304# define A64_LSR 0x1ac02400
305# define A64_ASR 0x1ac02800
ba86ff93
PC
306# define A64_RORV 0x1ac02c00
307# define A64_EXTR 0x13800000
4a71579b
PC
308# define A64_MUL 0x1b007c00
309# define A64_SMULL 0x9b207c00
310# define A64_SMULH 0x9b407c00
311# define A64_UMULL 0x9ba07c00
312# define A64_UMULH 0x9bc07c00
313# define A64_STRBI 0x39000000
314# define A64_LDRBI 0x39400000
315# define A64_LDRSBI 0x39800000
316# define A64_STRI 0xf9000000
317# define A64_LDRI 0xf9400000
318# define A64_STRHI 0x79000000
319# define A64_LDRHI 0x79400000
320# define A64_LDRSHI 0x79800000
321# define A64_STRWI 0xb9000000
322# define A64_LDRWI 0xb9400000
323# define A64_LDRSWI 0xb9800000
324# define A64_STRB 0x38206800
325# define A64_LDRB 0x38606800
326# define A64_LDRSB 0x38e06800
327# define A64_STR 0xf8206800
328# define A64_LDR 0xf8606800
ba3814c1
PC
329# define A64_LDAXR 0xc85ffc00
330# define A64_STLXR 0xc800fc00
4a71579b
PC
331# define A64_STRH 0x78206800
332# define A64_LDRH 0x78606800
333# define A64_LDRSH 0x78a06800
334# define A64_STRW 0xb8206800
335# define A64_LDRW 0xb8606800
336# define A64_LDRSW 0xb8a06800
337# define A64_STURB 0x38000000
338# define A64_LDURB 0x38400000
339# define A64_LDURSB 0x38800000
340# define A64_STUR 0xf8000000
341# define A64_LDUR 0xf8400000
342# define A64_STURH 0x78000000
343# define A64_LDURH 0x78400000
344# define A64_LDURSH 0x78800000
345# define A64_STURW 0xb8000000
346# define A64_LDURW 0xb8400000
347# define A64_LDURSW 0xb8800000
348# define A64_STP 0x29000000
349# define A64_LDP 0x29400000
350# define A64_STP_POS 0x29800000
351# define A64_LDP_PRE 0x28c00000
352# define A64_ANDI 0x12400000
353# define A64_ORRI 0x32400000
354# define A64_EORI 0x52400000
355# define A64_ANDSI 0x72000000
356# define A64_AND 0x0a000000
357# define A64_ORR 0x2a000000
358# define A64_MOV 0x2a0003e0 /* AKA orr Rd,xzr,Rm */
359# define A64_MVN 0x2a2003e0
79bfeef6
PC
360# define A64_CLS 0x5ac01400
361# define A64_CLZ 0x5ac01000
362# define A64_RBIT 0x5ac00000
4a71579b
PC
363# define A64_UXTW 0x2a0003e0 /* AKA MOV */
364# define A64_EOR 0x4a000000
365# define A64_ANDS 0x6a000000
366# define A64_MOVN 0x12800000
367# define A64_MOVZ 0x52800000
368# define A64_MOVK 0x72800000
ba86ff93 369# define BFM(Rd,Rn,ImmR,ImmS) oxxrs(A64_BFM|XS,Rd,Rn,ImmR,ImmS)
4a71579b
PC
370# define SBFM(Rd,Rn,ImmR,ImmS) oxxrs(A64_SBFM|XS,Rd,Rn,ImmR,ImmS)
371# define UBFM(Rd,Rn,ImmR,ImmS) oxxrs(A64_UBFM|XS,Rd,Rn,ImmR,ImmS)
ba86ff93
PC
372# define SBFX(Rd,Rn,ImmR,ImmS) oxxrs(A64_SBFX|XS,Rd,Rn,ImmR,ImmS)
373# define UBFX(Rd,Rn,ImmR,ImmS) oxxrs(A64_UBFX|XS,Rd,Rn,ImmR,ImmS)
4a71579b
PC
374# define CMP(Rn,Rm) oxx_(A64_CMP|XS,Rn,Rm)
375# define CMPI(Rn,Imm12) oxxi(A64_SUBSI|XS,XZR_REGNO,Rn,Imm12)
376# define CMPI_12(Rn,Imm12) oxxi(A64_SUBSI|XS|LSL_12,XZR_REGNO,Rn,Imm12)
377# define CMNI(Rn,Imm12) oxxi(A64_ADDSI|XS,XZR_REGNO,Rn,Imm12)
378# define CMNI_12(Rn,Imm12) oxxi(A64_ADDSI|XS|LSL_12,XZR_REGNO,Rn,Imm12)
379# define CSINC(Rd,Rn,Rm,Cc) oxxxc(A64_CSINC|XS,Rd,Rn,Rm,Cc)
380# define TST(Rn,Rm) oxxx(A64_ANDS|XS,XZR_REGNO,Rn,Rm)
381/* actually should use oxxrs but logical_immediate returns proper encoding */
382# define TSTI(Rn,Imm12) oxxi(A64_ANDSI,XZR_REGNO,Rn,Imm12)
383# define MOV(Rd,Rm) ox_x(A64_MOV|XS,Rd,Rm)
384# define MVN(Rd,Rm) ox_x(A64_MVN|XS,Rd,Rm)
385# define NEG(Rd,Rm) ox_x(A64_NEG|XS,Rd,Rm)
79bfeef6
PC
386# define CLS(Rd,Rm) o_xx(A64_CLS|XS,Rd,Rm)
387# define CLZ(Rd,Rm) o_xx(A64_CLZ|XS,Rd,Rm)
388# define RBIT(Rd,Rm) o_xx(A64_RBIT|XS,Rd,Rm)
4a71579b
PC
389# define MOVN(Rd,Imm16) ox_h(A64_MOVN|XS,Rd,Imm16)
390# define MOVN_16(Rd,Imm16) ox_h(A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16)
391# define MOVN_32(Rd,Imm16) ox_h(A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16)
392# define MOVN_48(Rd,Imm16) ox_h(A64_MOVN|XS|MOVI_LSL_48,Rd,Imm16)
393# define MOVZ(Rd,Imm16) ox_h(A64_MOVZ|XS,Rd,Imm16)
394# define MOVZ_16(Rd,Imm16) ox_h(A64_MOVZ|XS|MOVI_LSL_16,Rd,Imm16)
395# define MOVZ_32(Rd,Imm16) ox_h(A64_MOVZ|XS|MOVI_LSL_32,Rd,Imm16)
396# define MOVZ_48(Rd,Imm16) ox_h(A64_MOVZ|XS|MOVI_LSL_48,Rd,Imm16)
397# define MOVK(Rd,Imm16) ox_h(A64_MOVK|XS,Rd,Imm16)
398# define MOVK_16(Rd,Imm16) ox_h(A64_MOVK|XS|MOVI_LSL_16,Rd,Imm16)
399# define MOVK_32(Rd,Imm16) ox_h(A64_MOVK|XS|MOVI_LSL_32,Rd,Imm16)
400# define MOVK_48(Rd,Imm16) ox_h(A64_MOVK|XS|MOVI_LSL_48,Rd,Imm16)
401# define ADD(Rd,Rn,Rm) oxxx(A64_ADD|XS,Rd,Rn,Rm)
402# define ADDI(Rd,Rn,Imm12) oxxi(A64_ADDI|XS,Rd,Rn,Imm12)
403# define ADDI_12(Rd,Rn,Imm12) oxxi(A64_ADDI|XS|LSL_12,Rd,Rn,Imm12)
404# define MOV_XSP(Rd,Rn) ADDI(Rd,Rn,0)
405# define ADDS(Rd,Rn,Rm) oxxx(A64_ADDS|XS,Rd,Rn,Rm)
406# define ADDSI(Rd,Rn,Imm12) oxxi(A64_ADDSI|XS,Rd,Rn,Imm12)
407# define ADDSI_12(Rd,Rn,Imm12) oxxi(A64_ADDSI|XS|LSL_12,Rd,Rn,Imm12)
408# define ADCS(Rd,Rn,Rm) oxxx(A64_ADCS|XS,Rd,Rn,Rm)
409# define SUB(Rd,Rn,Rm) oxxx(A64_SUB|XS,Rd,Rn,Rm)
410# define SUBI(Rd,Rn,Imm12) oxxi(A64_SUBI|XS,Rd,Rn,Imm12)
411# define SUBI_12(Rd,Rn,Imm12) oxxi(A64_SUBI|XS|LSL_12,Rd,Rn,Imm12)
412# define SUBS(Rd,Rn,Rm) oxxx(A64_SUBS|XS,Rd,Rn,Rm)
413# define SUBSI(Rd,Rn,Imm12) oxxi(A64_SUBSI|XS,Rd,Rn,Imm12)
414# define SUBSI_12(Rd,Rn,Imm12) oxxi(A64_SUBSI|XS|LSL_12,Rd,Rn,Imm12)
415# define SBCS(Rd,Rn,Rm) oxxx(A64_SBCS|XS,Rd,Rn,Rm)
416# define MUL(Rd,Rn,Rm) oxxx(A64_MUL|XS,Rd,Rn,Rm)
417# define SMULL(Rd,Rn,Rm) oxxx(A64_SMULL,Rd,Rn,Rm)
418# define SMULH(Rd,Rn,Rm) oxxx(A64_SMULH,Rd,Rn,Rm)
419# define UMULL(Rd,Rn,Rm) oxxx(A64_UMULL,Rd,Rn,Rm)
420# define UMULH(Rd,Rn,Rm) oxxx(A64_UMULH,Rd,Rn,Rm)
421# define SDIV(Rd,Rn,Rm) oxxx(A64_SDIV|XS,Rd,Rn,Rm)
422# define UDIV(Rd,Rn,Rm) oxxx(A64_UDIV|XS,Rd,Rn,Rm)
423# define LSL(Rd,Rn,Rm) oxxx(A64_LSL|XS,Rd,Rn,Rm)
424# define LSLI(r0,r1,i0) UBFM(r0,r1,(64-i0)&63,63-i0)
425# define ASR(Rd,Rn,Rm) oxxx(A64_ASR|XS,Rd,Rn,Rm)
426# define ASRI(r0,r1,i0) SBFM(r0,r1,i0,63)
427# define LSR(Rd,Rn,Rm) oxxx(A64_LSR|XS,Rd,Rn,Rm)
428# define LSRI(r0,r1,i0) UBFM(r0,r1,i0,63)
ba86ff93
PC
429# define RORV(Rd,Rn,Rm) oxxx(A64_RORV|XS,Rd,Rn,Rm)
430# define EXTR(Rd,Rn,Rm,Im) oxxx6(A64_EXTR|XS|DS,Rm,Im,Rn,Rd)
431# define ROR(Rd,Rn,Rm,Im) EXTR(Rd,Rn,Rm,Im)
4a71579b
PC
432# define AND(Rd,Rn,Rm) oxxx(A64_AND|XS,Rd,Rn,Rm)
433/* actually should use oxxrs but logical_immediate returns proper encoding */
434# define ANDI(Rd,Rn,Imm12) oxxi(A64_ANDI|XS,Rd,Rn,Imm12)
435# define ORR(Rd,Rn,Rm) oxxx(A64_ORR|XS,Rd,Rn,Rm)
436/* actually should use oxxrs but logical_immediate returns proper encoding */
437# define ORRI(Rd,Rn,Imm12) oxxi(A64_ORRI|XS,Rd,Rn,Imm12)
438# define EOR(Rd,Rn,Rm) oxxx(A64_EOR|XS,Rd,Rn,Rm)
439/* actually should use oxxrs but logical_immediate returns proper encoding */
440# define EORI(Rd,Rn,Imm12) oxxi(A64_EORI|XS,Rd,Rn,Imm12)
441# define SXTB(Rd,Rn) SBFM(Rd,Rn,0,7)
442# define SXTH(Rd,Rn) SBFM(Rd,Rn,0,15)
443# define SXTW(Rd,Rn) SBFM(Rd,Rn,0,31)
ba86ff93
PC
444# define UXTB(Rd,Rn) oxxrs(A64_UBFX & ~DS,Rd,Rn,0,7)
445# define UXTH(Rd,Rn) oxxrs(A64_UBFX & ~DS,Rd,Rn,0,15)
4a71579b
PC
446# define UXTW(Rd,Rm) ox_x(A64_UXTW,Rd,Rm)
447# define REV(Rd,Rn) o_xx(A64_REV,Rd,Rn)
448# define LDRSB(Rt,Rn,Rm) oxxx(A64_LDRSB,Rt,Rn,Rm)
449# define LDRSBI(Rt,Rn,Imm12) oxxi(A64_LDRSBI,Rt,Rn,Imm12)
450# define LDURSB(Rt,Rn,Imm9) oxx9(A64_LDURSB,Rt,Rn,Imm9)
451# define LDRB(Rt,Rn,Rm) oxxx(A64_LDRB,Rt,Rn,Rm)
452# define LDRBI(Rt,Rn,Imm12) oxxi(A64_LDRBI,Rt,Rn,Imm12)
453# define LDURB(Rt,Rn,Imm9) oxx9(A64_LDURB,Rt,Rn,Imm9)
454# define LDRSH(Rt,Rn,Rm) oxxx(A64_LDRSH,Rt,Rn,Rm)
455# define LDRSHI(Rt,Rn,Imm12) oxxi(A64_LDRSHI,Rt,Rn,Imm12)
456# define LDURSH(Rt,Rn,Imm9) oxx9(A64_LDURSH,Rt,Rn,Imm9)
457# define LDRH(Rt,Rn,Rm) oxxx(A64_LDRH,Rt,Rn,Rm)
458# define LDRHI(Rt,Rn,Imm12) oxxi(A64_LDRHI,Rt,Rn,Imm12)
459# define LDURH(Rt,Rn,Imm9) oxx9(A64_LDURH,Rt,Rn,Imm9)
460# define LDRSW(Rt,Rn,Rm) oxxx(A64_LDRSW,Rt,Rn,Rm)
461# define LDRSWI(Rt,Rn,Imm12) oxxi(A64_LDRSWI,Rt,Rn,Imm12)
462# define LDURSW(Rt,Rn,Imm9) oxx9(A64_LDURSW,Rt,Rn,Imm9)
463# define LDRW(Rt,Rn,Rm) oxxx(A64_LDRW,Rt,Rn,Rm)
464# define LDRWI(Rt,Rn,Imm12) oxxi(A64_LDRWI,Rt,Rn,Imm12)
465# define LDURW(Rt,Rn,Imm9) oxx9(A64_LDURW,Rt,Rn,Imm9)
466# define LDR(Rt,Rn,Rm) oxxx(A64_LDR,Rt,Rn,Rm)
467# define LDRI(Rt,Rn,Imm12) oxxi(A64_LDRI,Rt,Rn,Imm12)
468# define LDUR(Rt,Rn,Imm9) oxx9(A64_LDUR,Rt,Rn,Imm9)
ba3814c1
PC
469# define LDAXR(Rt,Rn) o_xx(A64_LDAXR,Rt,Rn)
470# define STLXR(Rs,Rt,Rn) oxxx(A64_STLXR,Rs,Rn,Rt)
4a71579b
PC
471# define STRB(Rt,Rn,Rm) oxxx(A64_STRB,Rt,Rn,Rm)
472# define STRBI(Rt,Rn,Imm12) oxxi(A64_STRBI,Rt,Rn,Imm12)
473# define STURB(Rt,Rn,Imm9) oxx9(A64_STURB,Rt,Rn,Imm9)
474# define STRH(Rt,Rn,Rm) oxxx(A64_STRH,Rt,Rn,Rm)
475# define STRHI(Rt,Rn,Imm12) oxxi(A64_STRHI,Rt,Rn,Imm12)
476# define STURH(Rt,Rn,Imm9) oxx9(A64_STURH,Rt,Rn,Imm9)
477# define STRW(Rt,Rn,Rm) oxxx(A64_STRW,Rt,Rn,Rm)
478# define STRWI(Rt,Rn,Imm12) oxxi(A64_STRWI,Rt,Rn,Imm12)
479# define STURW(Rt,Rn,Imm9) oxx9(A64_STURW,Rt,Rn,Imm9)
480# define STR(Rt,Rn,Rm) oxxx(A64_STR,Rt,Rn,Rm)
481# define STRI(Rt,Rn,Imm12) oxxi(A64_STRI,Rt,Rn,Imm12)
482# define STUR(Rt,Rn,Imm9) oxx9(A64_STUR,Rt,Rn,Imm9)
483# define LDPI(Rt,Rt2,Rn,Simm7) oxxx7(A64_LDP|XS,Rt,Rt2,Rn,Simm7)
484# define STPI(Rt,Rt2,Rn,Simm7) oxxx7(A64_STP|XS,Rt,Rt2,Rn,Simm7)
485# define LDPI_PRE(Rt,Rt2,Rn,Simm7) oxxx7(A64_LDP_PRE|XS,Rt,Rt2,Rn,Simm7)
486# define STPI_POS(Rt,Rt2,Rn,Simm7) oxxx7(A64_STP_POS|XS,Rt,Rt2,Rn,Simm7)
487# define CSET(Rd,Cc) CSINC(Rd,XZR_REGNO,XZR_REGNO,Cc)
e0659411 488# define CSEL(Rd,Rn,Rm,Cc) oxxxc(A64_CSSEL|XS,Rd,Rn,Rm,Cc)
4a71579b
PC
489# define B(Simm26) o26(A64_B,Simm26)
490# define BL(Simm26) o26(A64_BL,Simm26)
491# define BR(Rn) o_x_(A64_BR,Rn)
492# define BLR(Rn) o_x_(A64_BLR,Rn)
493# define RET() o_x_(A64_RET,LR_REGNO)
494# define B_C(Cc,Simm19) oc19(A64_B_C,Cc,Simm19)
495# define CBZ(Rd,Simm19) ox19(A64_CBZ|XS,Rd,Simm19)
496# define CBNZ(Rd,Simm19) ox19(A64_CBNZ|XS,Rd,Simm19)
497# define NOP() ii(0xd503201f)
498static jit_int32_t logical_immediate(jit_word_t);
499# define oxxx(Op,Rd,Rn,Rm) _oxxx(_jit,Op,Rd,Rn,Rm)
500static void _oxxx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
501# define oxxi(Op,Rd,Rn,Imm12) _oxxi(_jit,Op,Rd,Rn,Imm12)
502static void _oxxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
503# define oxx9(Op,Rd,Rn,Imm9) _oxx9(_jit,Op,Rd,Rn,Imm9)
504static void _oxx9(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
505# define ox19(Op,Rd,Simm19) _ox19(_jit,Op,Rd,Simm19)
506static void _ox19(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
507# define oc19(Op,Cc,Simm19) _oc19(_jit,Op,Cc,Simm19)
508static void _oc19(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
509# define o26(Op,Simm26) _o26(_jit,Op,Simm26)
510static void _oc26(jit_state_t*,jit_int32_t,jit_int32_t);
511# define ox_x(Op,Rd,Rn) _ox_x(_jit,Op,Rd,Rn)
512static void _ox_x(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
513# define o_xx(Op,Rd,Rn) _o_xx(_jit,Op,Rd,Rn)
514static void _o_xx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
515# define oxx_(Op,Rn,Rm) _oxx_(_jit,Op,Rn,Rm)
516static void _oxx_(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
517# define o_x_(Op,Rn) _o_x_(_jit,Op,Rn)
518static void _o_x_(jit_state_t*,jit_int32_t,jit_int32_t);
519# define ox_h(Op,Rd,Imm16) _ox_h(_jit,Op,Rd,Imm16)
520static void _ox_h(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
521# define oxxrs(Op,Rd,Rn,R,S) _oxxrs(_jit,Op,Rd,Rn,R,S)
522static void _oxxrs(jit_state_t*,jit_int32_t,jit_int32_t,
523 jit_int32_t,jit_int32_t,jit_int32_t);
524# define oxxxc(Op,Rd,Rn,Rm,Cc) _oxxxc(_jit,Op,Rd,Rn,Rm,Cc)
525static void _oxxxc(jit_state_t*,jit_int32_t,jit_int32_t,
526 jit_int32_t,jit_int32_t,jit_int32_t);
527# define oxxx7(Op,Rt,Rt2,Rn,Simm7) _oxxx7(_jit,Op,Rt,Rt2,Rn,Simm7)
528static void _oxxx7(jit_state_t*,jit_int32_t,
529 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
ba86ff93
PC
530# define oxxx6(Op,Rm,Imm6,Rn,Rd) _oxxx6(_jit,Op,Rm,Imm6,Rn,Rd)
531static void _oxxx6(jit_state_t*,jit_int32_t,
532 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
4a71579b
PC
533# define nop(i0) _nop(_jit,i0)
534static void _nop(jit_state_t*,jit_int32_t);
535# define addr(r0,r1,r2) ADD(r0,r1,r2)
536# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0)
537static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
538# define addcr(r0,r1,r2) ADDS(r0,r1,r2)
539# define addci(r0,r1,i0) _addci(_jit,r0,r1,i0)
540static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
541# define addxr(r0,r1,r2) ADCS(r0,r1,r2)
542# define addxi(r0,r1,i0) _addxi(_jit,r0,r1,i0)
543static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
544# define subr(r0,r1,r2) SUB(r0,r1,r2)
545# define subi(r0,r1,i0) _subi(_jit,r0,r1,i0)
546static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
547# define subcr(r0,r1,r2) SUBS(r0,r1,r2)
548# define subci(r0,r1,i0) _subci(_jit,r0,r1,i0)
549static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
550# define subxr(r0,r1,r2) SBCS(r0,r1,r2)
551# define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0)
552static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
553# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0)
554static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
555# define mulr(r0,r1,r2) MUL(r0,r1,r2)
556# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
557static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
ba86ff93
PC
558# define hmulr(r0,r1,r2) SMULH(r0,r1,r2)
559# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0)
560static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
561# define hmulr_u(r0,r1,r2) UMULH(r0,r1,r2)
562# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0)
563static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
4a71579b
PC
564# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3)
565static void _qmulr(jit_state_t*,jit_int32_t,
566 jit_int32_t,jit_int32_t,jit_int32_t);
567# define qmuli(r0,r1,r2,i0) _qmuli(_jit,r0,r1,r2,i0)
568static void _qmuli(jit_state_t*,jit_int32_t,
569 jit_int32_t,jit_int32_t,jit_word_t);
570# define qmulr_u(r0,r1,r2,r3) _qmulr_u(_jit,r0,r1,r2,r3)
571static void _qmulr_u(jit_state_t*,jit_int32_t,
572 jit_int32_t,jit_int32_t,jit_int32_t);
573# define qmuli_u(r0,r1,r2,i0) _qmuli_u(_jit,r0,r1,r2,i0)
574static void _qmuli_u(jit_state_t*,jit_int32_t,
575 jit_int32_t,jit_int32_t,jit_word_t);
576# define divr(r0,r1,r2) SDIV(r0,r1,r2)
577# define divi(r0,r1,i0) _divi(_jit,r0,r1,i0)
578static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
579# define divr_u(r0,r1,r2) UDIV(r0,r1,r2)
580# define divi_u(r0,r1,i0) _divi_u(_jit,r0,r1,i0)
581static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
582# define qdivr(r0,r1,r2,r3) _iqdivr(_jit,1,r0,r1,r2,r3)
583# define qdivr_u(r0,r1,r2,r3) _iqdivr(_jit,0,r0,r1,r2,r3)
584static void _iqdivr(jit_state_t*,jit_bool_t,
585 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
586# define qdivi(r0,r1,r2,i0) _qdivi(_jit,r0,r1,r2,i0)
587static void _qdivi(jit_state_t*,jit_int32_t,
588 jit_int32_t,jit_int32_t,jit_word_t);
589# define qdivi_u(r0,r1,r2,i0) _qdivi_u(_jit,r0,r1,r2,i0)
590static void _qdivi_u(jit_state_t*,jit_int32_t,
591 jit_int32_t,jit_int32_t,jit_word_t);
592# define remr(r0,r1,r2) _remr(_jit,r0,r1,r2)
593static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
594# define remi(r0,r1,i0) _remi(_jit,r0,r1,i0)
595static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
596# define remr_u(r0,r1,r2) _remr_u(_jit,r0,r1,r2)
597static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
598# define remi_u(r0,r1,i0) _remi_u(_jit,r0,r1,i0)
599static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
600# define lshr(r0,r1,r2) LSL(r0,r1,r2)
601# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0)
602static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
603# define rshr(r0,r1,r2) ASR(r0,r1,r2)
604# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0)
605static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
606# define rshr_u(r0,r1,r2) LSR(r0,r1,r2)
607# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
608static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
ba86ff93
PC
609# define qlshr(r0,r1,r2,r3) xlshr(1,r0,r1,r2,r3)
610# define qlshr_u(r0, r1, r2, r3) xlshr(0, r0, r1, r2, r3)
611# define xlshr(s,r0,r1,r2,r3) _xlshr(_jit,s,r0,r1,r2,r3)
612static void
613_xlshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
614# define qlshi(r0, r1, r2, i0) xlshi(1, r0, r1, r2, i0)
615# define qlshi_u(r0, r1, r2, i0) xlshi(0, r0, r1, r2, i0)
616# define xlshi(s, r0, r1, r2, i0) _xlshi(_jit, s, r0, r1, r2, i0)
617static void
618_xlshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
619# define qrshr(r0, r1, r2, r3) xrshr(1, r0, r1, r2, r3)
620# define qrshr_u(r0, r1, r2, r3) xrshr(0, r0, r1, r2, r3)
621# define xrshr(s, r0, r1, r2, r3) _xrshr(_jit, s, r0, r1, r2, r3)
622static void
623_xrshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
624# define qrshi(r0, r1, r2, i0) xrshi(1, r0, r1, r2, i0)
625# define qrshi_u(r0, r1, r2, i0) xrshi(0, r0, r1, r2, i0)
626# define xrshi(s, r0, r1, r2, i0) _xrshi(_jit, s, r0, r1, r2, i0)
627static void
628_xrshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
629# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2)
630static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
631# define lroti(r0,r1,i0) rroti(r0,r1,64-i0)
632# define rrotr(r0,r1,r2) RORV(r0,r1,r2)
633# define rroti(r0,r1,i0) ROR(r0,r1,r1,i0)
e0659411
PC
634# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2)
635static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
636# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
637static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
4a71579b
PC
638# define negr(r0,r1) NEG(r0,r1)
639# define comr(r0,r1) MVN(r0,r1)
79bfeef6
PC
640# define clor(r0, r1) _clor(_jit, r0, r1)
641static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
642# define clzr(r0, r1) CLZ(r0,r1)
643static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
644# define ctor(r0, r1) _ctor(_jit, r0, r1)
645static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
646# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
647static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
ba86ff93 648# define rbitr(r0, r1) RBIT(r0, r1)
4a71579b
PC
649# define andr(r0,r1,r2) AND(r0,r1,r2)
650# define andi(r0,r1,i0) _andi(_jit,r0,r1,i0)
651static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
652# define orr(r0,r1,r2) ORR(r0,r1,r2)
653# define ori(r0,r1,i0) _ori(_jit,r0,r1,i0)
654static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
655# define xorr(r0,r1,r2) EOR(r0,r1,r2)
656# define xori(r0,r1,i0) _xori(_jit,r0,r1,i0)
657static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
658# define ldr_c(r0,r1) LDRSBI(r0,r1,0)
659# define ldi_c(r0,i0) _ldi_c(_jit,r0,i0)
660static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
661# define ldr_uc(r0,r1) _ldr_uc(_jit,r0,r1)
662static void _ldr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
663# define ldi_uc(r0,i0) _ldi_uc(_jit,r0,i0)
664static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
665# define ldr_s(r0,r1) LDRSHI(r0,r1,0)
666# define ldi_s(r0,i0) _ldi_s(_jit,r0,i0)
667static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
668# define ldr_us(r0,r1) _ldr_us(_jit,r0,r1)
669static void _ldr_us(jit_state_t*,jit_int32_t,jit_int32_t);
670# define ldi_us(r0,i0) _ldi_us(_jit,r0,i0)
671static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
672# define ldr_i(r0,r1) LDRSWI(r0,r1,0)
673# define ldi_i(r0,i0) _ldi_i(_jit,r0,i0)
674static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
675# define ldr_ui(r0,r1) _ldr_ui(_jit,r0,r1)
676static void _ldr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
677# define ldi_ui(r0,i0) _ldi_ui(_jit,r0,i0)
678static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
679# define ldr_l(r0,r1) LDRI(r0,r1,0)
680static void _ldr_l(jit_state_t*,jit_int32_t,jit_int32_t);
681# define ldi_l(r0,i0) _ldi_l(_jit,r0,i0)
682static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
683# define ldxr_c(r0,r1,r2) _ldxr_c(_jit,r0,r1,r2)
684static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
685# define ldxi_c(r0,r1,i0) _ldxi_c(_jit,r0,r1,i0)
686static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
687# define ldxr_uc(r0,r1,r2) _ldxr_uc(_jit,r0,r1,r2)
688static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
689# define ldxi_uc(r0,r1,i0) _ldxi_uc(_jit,r0,r1,i0)
690static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
691# define ldxr_s(r0,r1,r2) LDRSH(r0,r1,r2)
692# define ldxi_s(r0,r1,i0) _ldxi_s(_jit,r0,r1,i0)
693static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
694# define ldxr_us(r0,r1,r2) _ldxr_us(_jit,r0,r1,r2)
695static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
696# define ldxi_us(r0,r1,i0) _ldxi_us(_jit,r0,r1,i0)
697static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
698# define ldxr_i(r0,r1,r2) LDRSW(r0,r1,r2)
699# define ldxi_i(r0,r1,i0) _ldxi_i(_jit,r0,r1,i0)
700static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
701# define ldxr_ui(r0,r1,r2) _ldxr_ui(_jit,r0,r1,r2)
702static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
703# define ldxi_ui(r0,r1,i0) _ldxi_ui(_jit,r0,r1,i0)
704static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
705# define ldxr_l(r0,r1,r2) LDR(r0,r1,r2)
706# define ldxi_l(r0,r1,i0) _ldxi_l(_jit,r0,r1,i0)
707static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
ba86ff93
PC
708# define unldr(r0, r1, i0) generic_unldr(r0, r1, i0)
709# define unldi(r0, i0, i1) generic_unldi(r0, i0, i1)
710# define unldr_u(r0, r1, i0) generic_unldr_u(r0, r1, i0)
711# define unldi_u(r0, i0, i1) generic_unldi_u(r0, i0, i1)
4a71579b
PC
712# define str_c(r0,r1) STRBI(r1,r0,0)
713# define sti_c(i0,r0) _sti_c(_jit,i0,r0)
714static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
715# define str_s(r0,r1) STRHI(r1,r0,0)
716# define sti_s(i0,r0) _sti_s(_jit,i0,r0)
717static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
718# define str_i(r0,r1) STRWI(r1,r0,0)
719# define sti_i(i0,r0) _sti_i(_jit,i0,r0)
720static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
721# define str_l(r0,r1) STRI(r1,r0,0)
722# define sti_l(i0,r0) _sti_l(_jit,i0,r0)
723static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
724# define stxr_c(r0,r1,r2) STRB(r2,r1,r0)
725# define stxi_c(i0,r0,r1) _stxi_c(_jit,i0,r0,r1)
726static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
727# define stxr_s(r0,r1,r2) STRH(r2,r1,r0)
728# define stxi_s(i0,r0,r1) _stxi_s(_jit,i0,r0,r1)
729static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
730# define stxr_i(r0,r1,r2) STRW(r2,r1,r0)
731# define stxi_i(i0,r0,r1) _stxi_i(_jit,i0,r0,r1)
732static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
733# define stxr_l(r0,r1,r2) STR(r2,r1,r0)
734# define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1)
735static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
ba86ff93
PC
736# define unstr(r0, r1, i0) generic_unstr(r0, r1, i0)
737# define unsti(i0, r0, i1) generic_unsti(i0, r0, i1)
40a44dcb
PC
738# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
739static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
740# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
741static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
742# define bswapr_ul(r0,r1) REV(r0,r1)
ba86ff93
PC
743#define extr(r0,r1,i0,i1) _extr(_jit,r0,r1,i0,i1)
744static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
745#define extr_u(r0,r1,i0,i1) _extr_u(_jit,r0,r1,i0,i1)
746static void _extr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
747#define depr(r0,r1,i0,i1) _depr(_jit,r0,r1,i0,i1)
748static void _depr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
4a71579b
PC
749# define extr_c(r0,r1) SXTB(r0,r1)
750# define extr_uc(r0,r1) UXTB(r0,r1)
751# define extr_s(r0,r1) SXTH(r0,r1)
752# define extr_us(r0,r1) UXTH(r0,r1)
753# define extr_i(r0,r1) SXTW(r0,r1)
754# define extr_ui(r0,r1) UXTW(r0,r1)
ba3814c1
PC
755# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
756static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
757 jit_int32_t,jit_int32_t,jit_word_t);
758#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
759#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
4a71579b
PC
760# define movr(r0,r1) _movr(_jit,r0,r1)
761static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
762# define movi(r0,i0) _movi(_jit,r0,i0)
763static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
764# define movi_p(r0,i0) _movi_p(_jit,r0,i0)
765static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
766# define ccr(cc,r0,r1,r2) _ccr(_jit,cc,r0,r1,r2)
767static void _ccr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
768# define cci(cc,r0,r1,i0) _cci(_jit,cc,r0,r1,i0)
769static void _cci(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
770# define ltr(r0,r1,r2) ccr(CC_LT,r0,r1,r2)
771# define lti(r0,r1,i0) cci(CC_LT,r0,r1,i0)
772# define ltr_u(r0,r1,r2) ccr(CC_CC,r0,r1,r2)
773# define lti_u(r0,r1,i0) cci(CC_CC,r0,r1,i0)
774# define ler(r0,r1,r2) ccr(CC_LE,r0,r1,r2)
775# define lei(r0,r1,i0) cci(CC_LE,r0,r1,i0)
776# define ler_u(r0,r1,r2) ccr(CC_LS,r0,r1,r2)
777# define lei_u(r0,r1,i0) cci(CC_LS,r0,r1,i0)
778# define eqr(r0,r1,r2) ccr(CC_EQ,r0,r1,r2)
779# define eqi(r0,r1,i0) cci(CC_EQ,r0,r1,i0)
780# define ger(r0,r1,r2) ccr(CC_GE,r0,r1,r2)
781# define gei(r0,r1,i0) cci(CC_GE,r0,r1,i0)
782# define ger_u(r0,r1,r2) ccr(CC_CS,r0,r1,r2)
783# define gei_u(r0,r1,i0) cci(CC_CS,r0,r1,i0)
784# define gtr(r0,r1,r2) ccr(CC_GT,r0,r1,r2)
785# define gti(r0,r1,i0) cci(CC_GT,r0,r1,i0)
786# define gtr_u(r0,r1,r2) ccr(CC_HI,r0,r1,r2)
787# define gti_u(r0,r1,i0) cci(CC_HI,r0,r1,i0)
788# define ner(r0,r1,r2) ccr(CC_NE,r0,r1,r2)
789# define nei(r0,r1,i0) cci(CC_NE,r0,r1,i0)
790# define bccr(cc,i0,r0,r1) _bccr(_jit,cc,i0,r0,r1)
791static jit_word_t
792_bccr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
793# define bcci(cc,i0,r0,i1) _bcci(_jit,cc,i0,r0,i1)
794static jit_word_t
795_bcci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
796# define bltr(i0,r0,r1) bccr(BCC_LT,i0,r0,r1)
797# define blti(i0,r0,i1) bcci(BCC_LT,i0,r0,i1)
798# define bltr_u(i0,r0,r1) bccr(BCC_CC,i0,r0,r1)
799# define blti_u(i0,r0,i1) bcci(BCC_CC,i0,r0,i1)
800# define bler(i0,r0,r1) bccr(BCC_LE,i0,r0,r1)
801# define blei(i0,r0,i1) bcci(BCC_LE,i0,r0,i1)
802# define bler_u(i0,r0,r1) bccr(BCC_LS,i0,r0,r1)
803# define blei_u(i0,r0,i1) bcci(BCC_LS,i0,r0,i1)
804# define beqr(i0,r0,r1) bccr(BCC_EQ,i0,r0,r1)
805# define beqi(i0,r0,i1) _beqi(_jit,i0,r0,i1)
806static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
807# define bger(i0,r0,r1) bccr(BCC_GE,i0,r0,r1)
808# define bgei(i0,r0,i1) bcci(BCC_GE,i0,r0,i1)
809# define bger_u(i0,r0,r1) bccr(BCC_CS,i0,r0,r1)
810# define bgei_u(i0,r0,i1) bcci(BCC_CS,i0,r0,i1)
811# define bgtr(i0,r0,r1) bccr(BCC_GT,i0,r0,r1)
812# define bgti(i0,r0,i1) bcci(BCC_GT,i0,r0,i1)
813# define bgtr_u(i0,r0,r1) bccr(BCC_HI,i0,r0,r1)
814# define bgti_u(i0,r0,i1) bcci(BCC_HI,i0,r0,i1)
815# define bner(i0,r0,r1) bccr(BCC_NE,i0,r0,r1)
816# define bnei(i0,r0,i1) _bnei(_jit,i0,r0,i1)
817static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
818# define baddr(cc,i0,r0,r1) _baddr(_jit,cc,i0,r0,r1)
819static jit_word_t
820_baddr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
821# define baddi(cc,i0,r0,i1) _baddi(_jit,cc,i0,r0,i1)
822static jit_word_t
823_baddi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
824# define boaddr(i0,r0,r1) baddr(BCC_VS,i0,r0,r1)
825# define boaddi(i0,r0,i1) baddi(BCC_VS,i0,r0,i1)
826# define boaddr_u(i0,r0,r1) baddr(BCC_HS,i0,r0,r1)
827# define boaddi_u(i0,r0,i1) baddi(BCC_HS,i0,r0,i1)
828# define bxaddr(i0,r0,r1) baddr(BCC_VC,i0,r0,r1)
829# define bxaddi(i0,r0,i1) baddi(BCC_VC,i0,r0,i1)
830# define bxaddr_u(i0,r0,r1) baddr(BCC_LO,i0,r0,r1)
831# define bxaddi_u(i0,r0,i1) baddi(BCC_LO,i0,r0,i1)
832# define bsubr(cc,i0,r0,r1) _bsubr(_jit,cc,i0,r0,r1)
833static jit_word_t
834_bsubr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
835# define bsubi(cc,i0,r0,i1) _bsubi(_jit,cc,i0,r0,i1)
836static jit_word_t
837_bsubi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
838# define bosubr(i0,r0,r1) bsubr(BCC_VS,i0,r0,r1)
839# define bosubi(i0,r0,i1) bsubi(BCC_VS,i0,r0,i1)
840# define bosubr_u(i0,r0,r1) bsubr(BCC_LO,i0,r0,r1)
841# define bosubi_u(i0,r0,i1) bsubi(BCC_LO,i0,r0,i1)
842# define bxsubr(i0,r0,r1) bsubr(BCC_VC,i0,r0,r1)
843# define bxsubi(i0,r0,i1) bsubi(BCC_VC,i0,r0,i1)
844# define bxsubr_u(i0,r0,r1) bsubr(BCC_HS,i0,r0,r1)
845# define bxsubi_u(i0,r0,i1) bsubi(BCC_HS,i0,r0,i1)
846# define bmxr(cc,i0,r0,r1) _bmxr(_jit,cc,i0,r0,r1)
847static jit_word_t
848_bmxr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
849# define bmxi(cc,i0,r0,r1) _bmxi(_jit,cc,i0,r0,r1)
850static jit_word_t
851_bmxi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
852# define bmsr(i0,r0,r1) bmxr(BCC_NE,i0,r0,r1)
853# define bmsi(i0,r0,i1) bmxi(BCC_NE,i0,r0,i1)
854# define bmcr(i0,r0,r1) bmxr(BCC_EQ,i0,r0,r1)
855# define bmci(i0,r0,i1) bmxi(BCC_EQ,i0,r0,i1)
856# define jmpr(r0) BR(r0)
857# define jmpi(i0) _jmpi(_jit,i0)
79bfeef6 858static jit_word_t _jmpi(jit_state_t*,jit_word_t);
4a71579b
PC
859# define jmpi_p(i0) _jmpi_p(_jit,i0)
860static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
861# define callr(r0) BLR(r0)
862# define calli(i0) _calli(_jit,i0)
79bfeef6 863static jit_word_t _calli(jit_state_t*,jit_word_t);
4a71579b
PC
864# define calli_p(i0) _calli_p(_jit,i0)
865static jit_word_t _calli_p(jit_state_t*,jit_word_t);
866# define prolog(i0) _prolog(_jit,i0)
867static void _prolog(jit_state_t*,jit_node_t*);
868# define epilog(i0) _epilog(_jit,i0)
869static void _epilog(jit_state_t*,jit_node_t*);
870# define vastart(r0) _vastart(_jit, r0)
871static void _vastart(jit_state_t*, jit_int32_t);
872# define vaarg(r0, r1) _vaarg(_jit, r0, r1)
873static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
874# define patch_at(jump,label) _patch_at(_jit,jump,label)
875static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
876#endif
877
878#if CODE
79bfeef6
PC
879/* https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/ */
880#include "aarch64-logical-immediates.c"
4a71579b
PC
881static jit_int32_t
882logical_immediate(jit_word_t imm)
883{
79bfeef6
PC
884 jit_int32_t result = encodeLogicalImmediate64(imm);
885 if (result != ENCODE_FAILED) {
886 assert(isValidLogicalImmediate64(result));
887 return (result & 0xfff);
4a71579b 888 }
79bfeef6 889 return (-1);
4a71579b
PC
890}
891
892static void
893_oxxx(jit_state_t *_jit, jit_int32_t Op,
894 jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm)
895{
896 instr_t i;
897 assert(!(Rd & ~0x1f));
898 assert(!(Rn & ~0x1f));
899 assert(!(Rm & ~0x1f));
900 assert(!(Op & ~0xffe0fc00));
901 i.w = Op;
902 i.Rd.b = Rd;
903 i.Rn.b = Rn;
904 i.Rm.b = Rm;
905 ii(i.w);
906}
907
908static void
909_oxxi(jit_state_t *_jit, jit_int32_t Op,
910 jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Imm12)
911{
912 instr_t i;
913 assert(!(Rd & ~0x1f));
914 assert(!(Rn & ~0x1f));
915 assert(!(Imm12 & ~0xfff));
916 assert(!(Op & ~0xffe00000));
917 i.w = Op;
918 i.Rd.b = Rd;
919 i.Rn.b = Rn;
920 i.imm12.b = Imm12;
921 ii(i.w);
922}
923
924static void
925_oxx9(jit_state_t *_jit, jit_int32_t Op,
926 jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Imm9)
927{
928 instr_t i;
929 assert(!(Rd & ~0x1f));
930 assert(!(Rn & ~0x1f));
931 assert(!(Imm9 & ~0x1ff));
932 assert(!(Op & ~0xffe00000));
933 i.w = Op;
934 i.Rd.b = Rd;
935 i.Rn.b = Rn;
936 i.imm9.b = Imm9;
937 ii(i.w);
938}
939
940static void
941_ox19(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Simm19)
942{
943 instr_t i;
944 assert(!(Rd & ~0x1f));
945 assert(Simm19 >= -262148 && Simm19 <= 262143);
946 assert(!(Op & ~0xff000000));
947 i.w = Op;
948 i.Rd.b = Rd;
949 i.imm19.b = Simm19;
950 ii(i.w);
951}
952
953static void
954_oc19(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Cc, jit_int32_t Simm19)
955{
956 instr_t i;
957 assert(!(Cc & ~0xf));
958 assert(Simm19 >= -262148 && Simm19 <= 262143);
959 assert(!(Op & ~0xff000000));
960 i.w = Op;
961 i.cond2.b = Cc;
962 i.imm19.b = Simm19;
963 ii(i.w);
964}
965
966static void
967_o26(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Simm26)
968{
969 instr_t i;
79bfeef6 970 assert(s26_p(Simm26));
4a71579b
PC
971 assert(!(Op & ~0xfc000000));
972 i.w = Op;
973 i.imm26.b = Simm26;
974 ii(i.w);
975}
976
977static void
978_ox_x(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Rm)
979{
980 instr_t i;
981 assert(!(Rd & ~0x1f));
982 assert(!(Rm & ~0x1f));
983 assert(!(Op & ~0xffe0ffe0));
984 i.w = Op;
985 i.Rd.b = Rd;
986 i.Rm.b = Rm;
987 ii(i.w);
988}
989
990static void
991_o_xx(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Rn)
992{
993 instr_t i;
994 assert(!(Rd & ~0x1f));
995 assert(!(Rn & ~0x1f));
996 assert(!(Op & ~0xfffffc00));
997 i.w = Op;
998 i.Rd.b = Rd;
999 i.Rn.b = Rn;
1000 ii(i.w);
1001}
1002
1003static void
1004_oxx_(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rn, jit_int32_t Rm)
1005{
1006 instr_t i;
1007 assert(!(Rn & ~0x1f));
1008 assert(!(Rm & ~0x1f));
1009 assert(!(Op & ~0xffc0fc1f));
1010 i.w = Op;
1011 i.Rn.b = Rn;
1012 i.Rm.b = Rm;
1013 ii(i.w);
1014}
1015
1016static void
1017_o_x_(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rn)
1018{
1019 instr_t i;
1020 assert(!(Rn & ~0x1f));
1021 assert(!(Op & 0x3e0));
1022 i.w = Op;
1023 i.Rn.b = Rn;
1024 ii(i.w);
1025}
1026
1027static void
1028_ox_h(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Imm16)
1029{
1030 instr_t i;
1031 assert(!(Rd & ~0x1f));
1032 assert(!(Imm16 & ~0xffff));
1033 assert(!(Op & ~0xffe00000));
1034 i.w = Op;
1035 i.Rd.b = Rd;
1036 i.imm16.b = Imm16;
1037 ii(i.w);
1038}
1039
1040static void
1041_oxxrs(jit_state_t *_jit, jit_int32_t Op,
1042 jit_int32_t Rd, jit_int32_t Rn, jit_int32_t R, jit_int32_t S)
1043{
1044 instr_t i;
1045 assert(!(Rd & ~0x1f));
1046 assert(!(Rn & ~0x1f));
1047 assert(!(R & ~0x3f));
1048 assert(!(S & ~0x3f));
1049 assert(!(Op & ~0xffc00000));
1050 i.w = Op;
1051 i.Rd.b = Rd;
1052 i.Rn.b = Rn;
1053 i.immr.b = R;
1054 i.imms.b = S;
1055 ii(i.w);
1056}
1057
1058static void
1059_oxxxc(jit_state_t *_jit, jit_int32_t Op,
1060 jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm, jit_int32_t Cc)
1061{
1062 instr_t i;
1063 assert(!(Rd & ~0x1f));
1064 assert(!(Rn & ~0x1f));
1065 assert(!(Rm & ~0x1f));
1066 assert(!(Cc & ~0xf));
1067 assert(!(Op & ~0xffc00c00));
1068 i.w = Op;
1069 i.Rd.b = Rd;
1070 i.Rn.b = Rn;
1071 i.Rm.b = Rm;
1072 i.cond.b = Cc;
1073 ii(i.w);
1074}
1075
1076static void
1077_oxxx7(jit_state_t *_jit, jit_int32_t Op,
1078 jit_int32_t Rt, jit_int32_t Rt2, jit_int32_t Rn, jit_int32_t Simm7)
1079{
1080 instr_t i;
1081 assert(!(Rt & ~0x1f));
1082 assert(!(Rt2 & ~0x1f));
1083 assert(!(Rn & ~0x1f));
1084 assert(Simm7 >= -128 && Simm7 <= 127);
1085 assert(!(Op & ~0xffc003e0));
1086 i.w = Op;
1087 i.Rt.b = Rt;
1088 i.Rt2.b = Rt2;
1089 i.Rn.b = Rn;
1090 i.imm7.b = Simm7;
1091 ii(i.w);
1092}
1093
ba86ff93
PC
1094static void
1095_oxxx6(jit_state_t *_jit, jit_int32_t Op,
1096 jit_int32_t Rm, jit_int32_t Imm6, jit_int32_t Rn, jit_int32_t Rd)
1097{
1098 instr_t i;
1099 assert(!(Rm & ~0x1f));
1100 assert(!(Rn & ~0x1f));
1101 assert(!(Rd & ~0x1f));
1102 assert(Imm6 >= 0 && Imm6 <= 63);
1103 assert(!(Op & ~0xffe0fc00));
1104 i.w = Op;
1105 i.Rm.b = Rm;
1106 i.imm6.b = Imm6;
1107 i.Rn.b = Rn;
1108 i.Rd.b = Rd;
1109 ii(i.w);
1110}
1111
4a71579b
PC
1112static void
1113_nop(jit_state_t *_jit, jit_int32_t i0)
1114{
1115 for (; i0 > 0; i0 -= 4)
1116 NOP();
1117 assert(i0 == 0);
1118}
1119
1120static void
1121_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1122{
1123 jit_int32_t reg;
1124 jit_word_t is = i0 >> 12;
1125 jit_word_t in = -i0;
1126 jit_word_t iS = in >> 12;
1127 if ( i0 >= 0 && i0 <= 0xfff)
1128 ADDI (r0, r1, i0);
1129 else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
1130 ADDI_12(r0, r1, is);
1131 else if ( in >= 0 && in <= 0xfff)
1132 SUBI (r0, r1, in);
1133 else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
1134 SUBI_12(r0, r1, iS);
1135 else {
1136 reg = jit_get_reg(jit_class_gpr);
1137 movi(rn(reg), i0);
1138 addr(r0, r1, rn(reg));
1139 jit_unget_reg(reg);
1140 }
1141}
1142
1143static void
1144_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1145{
1146 jit_int32_t reg;
1147 jit_word_t is = i0 >> 12;
1148 jit_word_t in = -i0;
1149 jit_word_t iS = in >> 12;
1150 if ( i0 >= 0 && i0 <= 0xfff)
1151 ADDSI (r0, r1, i0);
1152 else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
1153 ADDSI_12(r0, r1, is);
1154 else if ( in >= 0 && in <= 0xfff)
1155 SUBSI (r0, r1, in);
1156 else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
1157 SUBSI_12(r0, r1, iS);
1158 else {
1159 reg = jit_get_reg(jit_class_gpr);
1160 movi(rn(reg), i0);
1161 addcr(r0, r1, rn(reg));
1162 jit_unget_reg(reg);
1163 }
1164}
1165
1166static void
1167_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1168{
1169 jit_int32_t reg;
1170 reg = jit_get_reg(jit_class_gpr);
1171 movi(rn(reg), i0);
1172 addxr(r0, r1, rn(reg));
1173 jit_unget_reg(reg);
1174}
1175
1176static void
1177_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1178{
1179 jit_int32_t reg;
1180 jit_word_t is = i0 >> 12;
1181 if ( i0 >= 0 && i0 <= 0xfff)
1182 SUBI (r0, r1, i0);
1183 else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
1184 SUBI_12(r0, r1, is);
1185 else {
1186 reg = jit_get_reg(jit_class_gpr);
1187 movi(rn(reg), i0);
1188 subr(r0, r1, rn(reg));
1189 jit_unget_reg(reg);
1190 }
1191}
1192
1193static void
1194_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1195{
1196 jit_int32_t reg;
1197 jit_word_t is = i0 >> 12;
1198 if ( i0 >= 0 && i0 <= 0xfff)
1199 SUBSI (r0, r1, i0);
1200 else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
1201 SUBSI_12(r0, r1, is);
1202 else {
1203 reg = jit_get_reg(jit_class_gpr);
1204 movi(rn(reg), i0);
1205 subcr(r0, r1, rn(reg));
1206 jit_unget_reg(reg);
1207 }
1208}
1209
1210static void
1211_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1212{
1213 jit_int32_t reg;
1214 reg = jit_get_reg(jit_class_gpr);
1215 movi(rn(reg), i0);
1216 subxr(r0, r1, rn(reg));
1217 jit_unget_reg(reg);
1218}
1219
1220static void
1221_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1222{
1223 subi(r0, r1, i0);
1224 negr(r0, r0);
1225}
1226
1227static void
1228_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1229{
1230 jit_int32_t reg;
1231 reg = jit_get_reg(jit_class_gpr);
1232 movi(rn(reg), i0);
1233 mulr(r0, r1, rn(reg));
1234 jit_unget_reg(reg);
1235}
1236
ba86ff93
PC
1237static void
1238_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1239{
1240 jit_int32_t reg;
1241 reg = jit_get_reg(jit_class_gpr);
1242 movi(rn(reg), i0);
1243 hmulr(r0, r1, rn(reg));
1244 jit_unget_reg(reg);
1245}
1246
1247static void
1248_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1249{
1250 jit_int32_t reg;
1251 reg = jit_get_reg(jit_class_gpr);
1252 movi(rn(reg), i0);
1253 hmulr_u(r0, r1, rn(reg));
1254 jit_unget_reg(reg);
1255}
1256
4a71579b
PC
1257static void
1258_qmulr(jit_state_t *_jit, jit_int32_t r0,
1259 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1260{
1261 jit_int32_t reg;
1262 if (r0 == r2 || r0 == r3) {
1263 reg = jit_get_reg(jit_class_gpr);
1264 mulr(rn(reg), r2, r3);
1265 }
1266 else
1267 mulr(r0, r2, r3);
1268 SMULH(r1, r2, r3);
1269 if (r0 == r2 || r0 == r3) {
1270 movr(r0, rn(reg));
1271 jit_unget_reg(reg);
1272 }
1273}
1274
1275static void
1276_qmuli(jit_state_t *_jit, jit_int32_t r0,
1277 jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1278{
1279 jit_int32_t reg;
1280 reg = jit_get_reg(jit_class_gpr);
1281 movi(rn(reg), i0);
1282 qmulr(r0, r1, r2, rn(reg));
1283 jit_unget_reg(reg);
1284}
1285
1286static void
1287_qmulr_u(jit_state_t *_jit, jit_int32_t r0,
1288 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1289{
1290 jit_int32_t reg;
1291 if (r0 == r2 || r0 == r3) {
1292 reg = jit_get_reg(jit_class_gpr);
1293 mulr(rn(reg), r2, r3);
1294 }
1295 else
1296 mulr(r0, r2, r3);
1297 UMULH(r1, r2, r3);
1298 if (r0 == r2 || r0 == r3) {
1299 movr(r0, rn(reg));
1300 jit_unget_reg(reg);
1301 }
1302}
1303
1304static void
1305_qmuli_u(jit_state_t *_jit, jit_int32_t r0,
1306 jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1307{
1308 jit_int32_t reg;
1309 reg = jit_get_reg(jit_class_gpr);
1310 movi(rn(reg), i0);
1311 qmulr_u(r0, r1, r2, rn(reg));
1312 jit_unget_reg(reg);
1313}
1314
1315static void
1316_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1317{
1318 jit_int32_t reg;
1319 reg = jit_get_reg(jit_class_gpr);
1320 movi(rn(reg), i0);
1321 divr(r0, r1, rn(reg));
1322 jit_unget_reg(reg);
1323}
1324
1325static void
1326_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1327{
1328 jit_int32_t reg;
1329 reg = jit_get_reg(jit_class_gpr);
1330 movi(rn(reg), i0);
1331 divr_u(r0, r1, rn(reg));
1332 jit_unget_reg(reg);
1333}
1334
1335static void
1336_iqdivr(jit_state_t *_jit, jit_bool_t sign,
1337 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1338{
1339 jit_int32_t sv0, rg0;
1340 jit_int32_t sv1, rg1;
1341 if (r0 == r2 || r0 == r3) {
1342 sv0 = jit_get_reg(jit_class_gpr);
1343 rg0 = rn(sv0);
1344 }
1345 else
1346 rg0 = r0;
1347 if (r1 == r2 || r1 == r3) {
1348 sv1 = jit_get_reg(jit_class_gpr);
1349 rg1 = rn(sv1);
1350 }
1351 else
1352 rg1 = r1;
1353 if (sign)
1354 divr(rg0, r2, r3);
1355 else
1356 divr_u(rg0, r2, r3);
1357 mulr(rg1, r3, rg0);
1358 subr(rg1, r2, rg1);
1359 if (rg0 != r0) {
1360 movr(r0, rg0);
1361 jit_unget_reg(sv0);
1362 }
1363 if (rg1 != r1) {
1364 movr(r1, rg1);
1365 jit_unget_reg(sv1);
1366 }
1367}
1368
1369static void
1370_qdivi(jit_state_t *_jit, jit_int32_t r0,
1371 jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1372{
1373 jit_int32_t reg;
1374 reg = jit_get_reg(jit_class_gpr);
1375 movi(rn(reg), i0);
1376 qdivr(r0, r1, r2, rn(reg));
1377 jit_unget_reg(reg);
1378}
1379
1380static void
1381_qdivi_u(jit_state_t *_jit, jit_int32_t r0,
1382 jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1383{
1384 jit_int32_t reg;
1385 reg = jit_get_reg(jit_class_gpr);
1386 movi(rn(reg), i0);
1387 qdivr_u(r0, r1, r2, rn(reg));
1388 jit_unget_reg(reg);
1389}
1390
1391static void
1392_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1393{
1394 jit_int32_t reg;
1395 if (r0 == r1 || r0 == r2) {
1396 reg = jit_get_reg(jit_class_gpr);
1397 divr(rn(reg), r1, r2);
1398 mulr(rn(reg), r2, rn(reg));
1399 subr(r0, r1, rn(reg));
1400 jit_unget_reg(reg);
1401 }
1402 else {
1403 divr(r0, r1, r2);
1404 mulr(r0, r2, r0);
1405 subr(r0, r1, r0);
1406 }
1407}
1408
1409static void
1410_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1411{
1412 jit_int32_t reg;
1413 reg = jit_get_reg(jit_class_gpr);
1414 movi(rn(reg), i0);
1415 remr(r0, r1, rn(reg));
1416 jit_unget_reg(reg);
1417}
1418
1419static void
1420_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1421{
1422 jit_int32_t reg;
1423 if (r0 == r1 || r0 == r2) {
1424 reg = jit_get_reg(jit_class_gpr);
1425 divr_u(rn(reg), r1, r2);
1426 mulr(rn(reg), r2, rn(reg));
1427 subr(r0, r1, rn(reg));
1428 jit_unget_reg(reg);
1429 }
1430 else {
1431 divr_u(r0, r1, r2);
1432 mulr(r0, r2, r0);
1433 subr(r0, r1, r0);
1434 }
1435}
1436
1437static void
1438_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1439{
1440 jit_int32_t reg;
1441 reg = jit_get_reg(jit_class_gpr);
1442 movi(rn(reg), i0);
1443 remr_u(r0, r1, rn(reg));
1444 jit_unget_reg(reg);
1445}
1446
1447static void
1448_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1449{
1450 if (i0 == 0)
1451 movr(r0, r1);
1452 else {
1453 assert(i0 > 0 && i0 < 64);
1454 LSLI(r0, r1, i0);
1455 }
1456}
1457
1458static void
1459_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1460{
1461 if (i0 == 0)
1462 movr(r0, r1);
1463 else {
1464 assert(i0 > 0 && i0 < 64);
1465 ASRI(r0, r1, i0);
1466 }
1467}
1468
1469static void
1470_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1471{
1472 if (i0 == 0)
1473 movr(r0, r1);
1474 else {
1475 assert(i0 > 0 && i0 < 64);
1476 LSRI(r0, r1, i0);
1477 }
1478}
1479
ba86ff93
PC
1480static void
1481_xlshr(jit_state_t *_jit, jit_bool_t sign,
1482 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1483{
1484 jit_bool_t branch;
1485 jit_word_t over, zero, done, done_over;
1486 jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3;
1487 s0 = jit_get_reg(jit_class_gpr);
1488 t0 = rn(s0);
1489 if (r0 == r2 || r1 == r2) {
1490 s2 = jit_get_reg(jit_class_gpr);
1491 t2 = rn(s2);
1492 movr(t2, r2);
1493 }
1494 else
1495 t2 = r2;
1496 if (r0 == r3 || r1 == r3) {
1497 s3 = jit_get_reg(jit_class_gpr);
1498 t3 = rn(s3);
1499 movr(t3, r3);
1500 }
1501 else
1502 t3 = r3;
1503 if ((s1 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk))) {
1504 t1 = rn(s1);
1505 branch = 0;
1506 }
1507 else
1508 branch = 1;
1509 rsbi(t0, t3, __WORDSIZE);
1510 lshr(r0, t2, t3);
1511 if (sign)
1512 rshr(r1, t2, t0);
1513 else
1514 rshr_u(r1, t2, t0);
1515 if (branch) {
1516 zero = beqi(_jit->pc.w, t3, 0);
1517 over = beqi(_jit->pc.w, t3, __WORDSIZE);
1518 done = jmpi(_jit->pc.w);
1519 patch_at(over, _jit->pc.w);
1520 /* overflow */
1521 movi(r0, 0);
1522 done_over = jmpi(_jit->pc.w);
1523 /* zero */
1524 patch_at(zero, _jit->pc.w);
1525 if (sign)
1526 rshi(r1, t2, __WORDSIZE - 1);
1527 else
1528 movi(r1, 0);
1529 patch_at(done, _jit->pc.w);
1530 patch_at(done_over, _jit->pc.w);
1531 }
1532 else {
1533 if (sign)
1534 rshi(t0, t2, __WORDSIZE - 1);
1535 else
1536 movi(t0, 0);
1537 /* zero? */
1538 movzr(r1, t0, t3);
1539 /* Branchless but 4 bytes longer than branching fallback */
1540 if (sign)
1541 movi(t0, 0);
1542 /* overflow? */
1543 eqi(t1, t3, __WORDSIZE);
1544 movnr(r0, t0, t1);
1545 jit_unget_reg(s1);
1546 }
1547 jit_unget_reg(s0);
1548 if (t2 != r2)
1549 jit_unget_reg(s2);
1550 if (t3 != r3)
1551 jit_unget_reg(s3);
1552}
1553
1554static void
1555_xlshi(jit_state_t *_jit, jit_bool_t sign,
1556 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1557{
1558 if (i0 == 0) {
1559 movr(r0, r2);
1560 if (sign)
1561 rshi(r1, r2, __WORDSIZE - 1);
1562 else
1563 movi(r1, 0);
1564 }
1565 else if (i0 == __WORDSIZE) {
1566 movr(r1, r2);
1567 movi(r0, 0);
1568 }
1569 else {
1570 assert((jit_uword_t)i0 <= __WORDSIZE);
1571 if (sign)
1572 rshi(r1, r2, __WORDSIZE - i0);
1573 else
1574 rshi_u(r1, r2, __WORDSIZE - i0);
1575 lshi(r0, r2, i0);
1576 }
1577}
1578
1579static void
1580_xrshr(jit_state_t *_jit, jit_bool_t sign,
1581 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1582{
1583 jit_bool_t branch;
1584 jit_word_t over, zero, done, done_over;
1585 jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3;
1586 s0 = jit_get_reg(jit_class_gpr);
1587 t0 = rn(s0);
1588 if (r0 == r2 || r1 == r2) {
1589 s2 = jit_get_reg(jit_class_gpr);
1590 t2 = rn(s2);
1591 movr(t2, r2);
1592 }
1593 else
1594 t2 = r2;
1595 if (r0 == r3 || r1 == r3) {
1596 s3 = jit_get_reg(jit_class_gpr);
1597 t3 = rn(s3);
1598 movr(t3, r3);
1599 }
1600 else
1601 t3 = r3;
1602 if ((s1 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk))) {
1603 t1 = rn(s1);
1604 branch = 0;
1605 }
1606 else
1607 branch = 1;
1608 rsbi(t0, t3, __WORDSIZE);
1609 if (sign)
1610 rshr(r0, t2, t3);
1611 else
1612 rshr_u(r0, t2, t3);
1613 lshr(r1, t2, t0);
1614 if (branch) {
1615 zero = beqi(_jit->pc.w, t3, 0);
1616 over = beqi(_jit->pc.w, t3, __WORDSIZE);
1617 done = jmpi(_jit->pc.w);
1618 patch_at(over, _jit->pc.w);
1619 /* underflow */
1620 if (sign)
1621 rshi(r0, t2, __WORDSIZE - 1);
1622 else
1623 movi(r0, 0);
1624 done_over = jmpi(_jit->pc.w);
1625 /* zero */
1626 patch_at(zero, _jit->pc.w);
1627 if (sign)
1628 rshi(r1, t2, __WORDSIZE - 1);
1629 else
1630 movi(r1, 0);
1631 patch_at(done, _jit->pc.w);
1632 patch_at(done_over, _jit->pc.w);
1633 jit_unget_reg(s1);
1634 }
1635 else {
1636 /* zero? */
1637 if (sign)
1638 rshi(t0, t2, __WORDSIZE - 1);
1639 else
1640 movi(t0, 0);
1641 movzr(r1, t0, t3);
1642 /* underflow? */
1643 eqi(t1, t3, __WORDSIZE);
1644 movnr(r0, t0, t1);
1645 jit_unget_reg(s1);
1646 }
1647 jit_unget_reg(s0);
1648 if (t2 != r2)
1649 jit_unget_reg(s2);
1650 if (t3 != r3)
1651 jit_unget_reg(s3);
1652}
1653
1654static void
1655_xrshi(jit_state_t *_jit, jit_bool_t sign,
1656 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1657{
1658 if (i0 == 0) {
1659 movr(r0, r2);
1660 if (sign)
1661 rshi(r1, r2, __WORDSIZE - 1);
1662 else
1663 movi(r1, 0);
1664 }
1665 else if (i0 == __WORDSIZE) {
1666 movr(r1, r2);
1667 if (sign)
1668 rshi(r0, r2, __WORDSIZE - 1);
1669 else
1670 movi(r0, 0);
1671 }
1672 else {
1673 assert((jit_uword_t)i0 <= __WORDSIZE);
1674 lshi(r1, r2, __WORDSIZE - i0);
1675 if (sign)
1676 rshi(r0, r2, i0);
1677 else
1678 rshi_u(r0, r2, i0);
1679 }
1680}
1681
1682static void
1683_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1684{
1685 jit_int32_t reg;
1686 if (r0 != r1 && r0 != r2) {
1687 rsbi(r0, r2, 64);
1688 rrotr(r0, r1, r0);
1689 }
1690 else {
1691 reg = jit_get_reg(jit_class_gpr);
1692 rsbi(rn(reg), r2, 64);
1693 rrotr(r0, r1, rn(reg));
1694 jit_unget_reg(reg);
1695 }
1696}
1697
e0659411
PC
1698static void
1699_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1700{
1701 CMPI(r2, 0);
1702 CSEL(r0, r0, r1, CC_NE);
1703}
1704
1705static void
1706_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1707{
1708 CMPI(r2, 0);
1709 CSEL(r0, r0, r1, CC_EQ);
1710}
1711
ba86ff93
PC
1712static void
1713_extr(jit_state_t *_jit,
1714 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
1715{
1716 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
1717 if ( i1 == __WORDSIZE)
1718 movr(r0, r1);
1719 else {
1720# if __BYTE_ORDER == __BIG_ENDIAN
1721 i0 = __WORDSIZE - (i0 + i1);
1722# endif
1723 SBFX(r0, r1, i0, (i0 + i1) - 1);
1724 }
1725}
1726
1727static void
1728_extr_u(jit_state_t *_jit,
1729 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
1730{
1731 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
1732 if (i1 == __WORDSIZE)
1733 movr(r0, r1);
1734 else {
1735# if __BYTE_ORDER == __BIG_ENDIAN
1736 i0 = __WORDSIZE - (i0 + i1);
1737# endif
1738 UBFX(r0, r1, i0, (i0 + i1) - 1);
1739 }
1740}
1741
1742static void
1743_depr(jit_state_t *_jit,
1744 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
1745{
1746 jit_int32_t t0;
1747 jit_word_t mask;
1748 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
1749 if (i1 == __WORDSIZE)
1750 movr(r0, r1);
1751 else {
1752# if __BYTE_ORDER == __BIG_ENDIAN
1753 i0 = __WORDSIZE - (i0 + i1);
1754# endif
1755 BFM(r0, r1, -i0 & 63, i1 - 1);
1756 }
1757}
1758
79bfeef6
PC
1759static void
1760_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1761{
1762 comr(r0, r1);
1763 clzr(r0, r0);
1764}
1765
1766static void
1767_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1768{
1769 RBIT(r0, r1);
1770 clor(r0, r0);
1771}
1772
1773static void
1774_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1775{
1776 RBIT(r0, r1);
1777 clzr(r0, r0);
1778}
1779
4a71579b
PC
1780static void
1781_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1782{
1783 jit_int32_t reg;
1784 jit_int32_t imm;
1785 if (i0 == 0)
1786 movi(r0, 0);
1787 else if (i0 == -1)
1788 movr(r0, r1);
1789 else {
1790 imm = logical_immediate(i0);
1791 if (imm != -1)
1792 ANDI(r0, r1, imm);
1793 else {
1794 reg = jit_get_reg(jit_class_gpr);
1795 movi(rn(reg), i0);
1796 andr(r0, r1, rn(reg));
1797 jit_unget_reg(reg);
1798 }
1799 }
1800}
1801
1802static void
1803_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1804{
1805 jit_int32_t reg;
1806 jit_int32_t imm;
1807 if (i0 == 0)
1808 movr(r0, r1);
1809 else if (i0 == -1)
1810 movi(r0, -1);
1811 else {
1812 imm = logical_immediate(i0);
1813 if (imm != -1)
1814 ORRI(r0, r1, imm);
1815 else {
1816 reg = jit_get_reg(jit_class_gpr);
1817 movi(rn(reg), i0);
1818 orr(r0, r1, rn(reg));
1819 jit_unget_reg(reg);
1820 }
1821 }
1822}
1823
1824static void
1825_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1826{
1827 jit_int32_t reg;
1828 jit_int32_t imm;
1829 if (i0 == 0)
1830 movr(r0, r1);
1831 else if (i0 == -1)
1832 comr(r0, r1);
1833 else {
1834 imm = logical_immediate(i0);
1835 if (imm != -1)
1836 EORI(r0, r1, imm);
1837 else {
1838 reg = jit_get_reg(jit_class_gpr);
1839 movi(rn(reg), i0);
1840 xorr(r0, r1, rn(reg));
1841 jit_unget_reg(reg);
1842 }
1843 }
1844}
1845
4a71579b 1846static void
40a44dcb 1847_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4a71579b 1848{
40a44dcb 1849 bswapr_ul(r0, r1);
4a71579b
PC
1850 rshi_u(r0, r0, 48);
1851}
1852
1853static void
40a44dcb 1854_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4a71579b 1855{
40a44dcb 1856 bswapr_ul(r0, r1);
4a71579b
PC
1857 rshi_u(r0, r0, 32);
1858}
4a71579b
PC
1859
1860static void
1861_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1862{
1863 jit_int32_t reg;
1864 reg = jit_get_reg(jit_class_gpr);
1865 movi(rn(reg), i0);
1866 ldr_c(r0, rn(reg));
1867 jit_unget_reg(reg);
1868}
1869
1870static void
1871_ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1872{
1873 LDRBI(r0, r1, 0);
1874#if 0
1875 extr_uc(r0, r0);
1876#endif
1877}
1878
1879static void
1880_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1881{
1882 jit_int32_t reg;
1883 reg = jit_get_reg(jit_class_gpr);
1884 movi(rn(reg), i0);
1885 ldr_uc(r0, rn(reg));
1886 jit_unget_reg(reg);
1887}
1888
1889static void
1890_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1891{
1892 jit_int32_t reg;
1893 reg = jit_get_reg(jit_class_gpr);
1894 movi(rn(reg), i0);
1895 ldr_s(r0, rn(reg));
1896 jit_unget_reg(reg);
1897}
1898
1899static void
1900_ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1901{
1902 LDRHI(r0, r1, 0);
1903#if 0
1904 extr_us(r0, r0);
1905#endif
1906}
1907
1908static void
1909_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1910{
1911 jit_int32_t reg;
1912 reg = jit_get_reg(jit_class_gpr);
1913 movi(rn(reg), i0);
1914 ldr_us(r0, rn(reg));
1915 jit_unget_reg(reg);
1916}
1917
1918static void
1919_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1920{
1921 jit_int32_t reg;
1922 reg = jit_get_reg(jit_class_gpr);
1923 movi(rn(reg), i0);
1924 ldr_i(r0, rn(reg));
1925 jit_unget_reg(reg);
1926}
1927
1928static void
1929_ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1930{
1931 LDRWI(r0, r1, 0);
1932#if 0
1933 extr_ui(r0, r0);
1934#endif
1935}
1936
1937static void
1938_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1939{
1940 jit_int32_t reg;
1941 reg = jit_get_reg(jit_class_gpr);
1942 movi(rn(reg), i0);
1943 ldr_ui(r0, rn(reg));
1944 jit_unget_reg(reg);
1945}
1946
1947static void
1948_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1949{
1950 jit_int32_t reg;
1951 reg = jit_get_reg(jit_class_gpr);
1952 movi(rn(reg), i0);
1953 ldr_l(r0, rn(reg));
1954 jit_unget_reg(reg);
1955}
1956
1957static void
1958_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1959{
1960 LDRSB(r0, r1, r2);
1961 extr_c(r0, r0);
1962}
1963
1964static void
1965_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1966{
1967 jit_int32_t reg;
1968 if (i0 >= 0 && i0 <= 4095)
1969 LDRSBI(r0, r1, i0);
1970 else if (i0 > -256 && i0 < 0)
1971 LDURSB(r0, r1, i0 & 0x1ff);
1972 else {
1973 reg = jit_get_reg(jit_class_gpr);
1974 movi(rn(reg), i0);
1975 LDRSB(r0, r1, rn(reg));
1976 jit_unget_reg(reg);
1977 }
1978 extr_c(r0, r0);
1979}
1980
1981static void
1982_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1983{
1984 LDRB(r0, r1, r2);
1985#if 0
1986 extr_uc(r0, r0);
1987#endif
1988}
1989
1990static void
1991_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1992{
1993 jit_int32_t reg;
1994 if (i0 >= 0 && i0 <= 4095)
1995 LDRBI(r0, r1, i0);
1996 else if (i0 > -256 && i0 < 0)
1997 LDURB(r0, r1, i0 & 0x1ff);
1998 else {
1999 reg = jit_get_reg(jit_class_gpr);
2000 addi(rn(reg), r1, i0);
2001 ldr_uc(r0, rn(reg));
2002 jit_unget_reg(reg);
2003 }
2004#if 0
2005 extr_uc(r0, r0);
2006#endif
2007}
2008
2009static void
2010_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2011{
2012 jit_int32_t reg;
519a9ea1 2013 if (i0 >= 0 && i0 <= 8191 && !(i0 & 1))
4a71579b
PC
2014 LDRSHI(r0, r1, i0 >> 1);
2015 else if (i0 > -256 && i0 < 0)
2016 LDURSH(r0, r1, i0 & 0x1ff);
2017 else {
2018 reg = jit_get_reg(jit_class_gpr);
2019 movi(rn(reg), i0);
2020 LDRSH(r0, r1, rn(reg));
2021 jit_unget_reg(reg);
2022 }
2023}
2024
2025static void
2026_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2027{
2028 LDRH(r0, r1, r2);
2029#if 0
2030 extr_us(r0, r0);
2031#endif
2032}
2033
2034static void
2035_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2036{
2037 jit_int32_t reg;
519a9ea1 2038 if (i0 >= 0 && i0 <= 8191 && !(i0 & 1))
4a71579b
PC
2039 LDRHI(r0, r1, i0 >> 1);
2040 else if (i0 > -256 && i0 < 0)
2041 LDURH(r0, r1, i0 & 0x1ff);
2042 else {
2043 reg = jit_get_reg(jit_class_gpr);
2044 movi(rn(reg), i0);
2045 LDRH(r0, r1, rn(reg));
2046 jit_unget_reg(reg);
2047 }
2048#if 0
2049 extr_us(r0, r0);
2050#endif
2051}
2052
2053static void
2054_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2055{
2056 jit_int32_t reg;
519a9ea1 2057 if (i0 >= 0 && i0 <= 16383 && !(i0 & 3))
4a71579b
PC
2058 LDRSWI(r0, r1, i0 >> 2);
2059 else if (i0 > -256 && i0 < 0)
2060 LDURSW(r0, r1, i0 & 0x1ff);
2061 else {
2062 reg = jit_get_reg(jit_class_gpr);
2063 addi(rn(reg), r1, i0);
2064 ldr_i(r0, rn(reg));
2065 jit_unget_reg(reg);
2066 }
2067}
2068
2069static void
2070_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2071{
2072 LDRW(r0, r1, r2);
2073#if 0
2074 extr_ui(r0, r0);
2075#endif
2076}
2077
2078static void
2079_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2080{
2081 jit_int32_t reg;
519a9ea1 2082 if (i0 >= 0 && i0 <= 16383 && !(i0 & 3))
4a71579b
PC
2083 LDRWI(r0, r1, i0 >> 2);
2084 else if (i0 > -256 && i0 < 0)
2085 LDURW(r0, r1, i0 & 0x1ff);
2086 else {
2087 reg = jit_get_reg(jit_class_gpr);
2088 movi(rn(reg), i0);
2089 LDRW(r0, r1, rn(reg));
2090 jit_unget_reg(reg);
2091 }
2092#if 0
2093 extr_ui(r0, r0);
2094#endif
2095}
2096
2097static void
2098_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2099{
2100 jit_int32_t reg;
519a9ea1 2101 if (i0 >= 0 && i0 <= 32767 && !(i0 & 7))
4a71579b
PC
2102 LDRI(r0, r1, i0 >> 3);
2103 else if (i0 > -256 && i0 < 0)
2104 LDUR(r0, r1, i0 & 0x1ff);
2105 else {
2106 reg = jit_get_reg(jit_class_gpr);
2107 addi(rn(reg), r1, i0);
2108 ldr_l(r0, rn(reg));
2109 jit_unget_reg(reg);
2110 }
2111}
2112
2113static void
2114_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2115{
2116 jit_int32_t reg;
2117 reg = jit_get_reg(jit_class_gpr);
2118 movi(rn(reg), i0);
2119 str_c(rn(reg), r0);
2120 jit_unget_reg(reg);
2121}
2122
2123static void
2124_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2125{
2126 jit_int32_t reg;
2127 reg = jit_get_reg(jit_class_gpr);
2128 movi(rn(reg), i0);
2129 str_s(rn(reg), r0);
2130 jit_unget_reg(reg);
2131}
2132
2133static void
2134_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2135{
2136 jit_int32_t reg;
2137 reg = jit_get_reg(jit_class_gpr);
2138 movi(rn(reg), i0);
2139 str_i(rn(reg), r0);
2140 jit_unget_reg(reg);
2141}
2142
2143static void
2144_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2145{
2146 jit_int32_t reg;
2147 reg = jit_get_reg(jit_class_gpr);
2148 movi(rn(reg), i0);
2149 str_l(rn(reg), r0);
2150 jit_unget_reg(reg);
2151}
2152
2153static void
2154_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2155{
2156 jit_int32_t reg;
2157 if (i0 >= 0 && i0 <= 4095)
2158 STRBI(r1, r0, i0);
2159 else if (i0 > -256 && i0 < 0)
2160 STURB(r1, r0, i0 & 0x1ff);
2161 else {
2162 reg = jit_get_reg(jit_class_gpr);
2163 addi(rn(reg), r0, i0);
2164 str_c(rn(reg), r1);
2165 jit_unget_reg(reg);
2166 }
2167}
2168
2169static void
2170_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2171{
2172 jit_int32_t reg;
519a9ea1 2173 if (i0 >= 0 && i0 <= 8191 && !(i0 & 1))
4a71579b
PC
2174 STRHI(r1, r0, i0 >> 1);
2175 else if (i0 > -256 && i0 < 0)
2176 STURH(r1, r0, i0 & 0x1ff);
2177 else {
2178 reg = jit_get_reg(jit_class_gpr);
2179 addi(rn(reg), r0, i0);
2180 str_s(rn(reg), r1);
2181 jit_unget_reg(reg);
2182 }
2183}
2184
2185static void
2186_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2187{
2188 jit_int32_t reg;
519a9ea1 2189 if (i0 >= 0 && i0 <= 16383 && !(i0 & 3))
4a71579b
PC
2190 STRWI(r1, r0, i0 >> 2);
2191 else if (i0 > -256 && i0 < 0)
2192 STURW(r1, r0, i0 & 0x1ff);
2193 else {
2194 reg = jit_get_reg(jit_class_gpr);
2195 addi(rn(reg), r0, i0);
2196 str_i(rn(reg), r1);
2197 jit_unget_reg(reg);
2198 }
2199}
2200
2201static void
2202_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2203{
2204 jit_int32_t reg;
519a9ea1 2205 if (i0 >= 0 && i0 <= 32767 && !(i0 & 7))
4a71579b
PC
2206 STRI(r1, r0, i0 >> 3);
2207 else if (i0 > -256 && i0 < 0)
2208 STUR(r1, r0, i0 & 0x1ff);
2209 else {
2210 reg = jit_get_reg(jit_class_gpr);
2211 addi(rn(reg), r0, i0);
2212 str_l(rn(reg), r1);
2213 jit_unget_reg(reg);
2214 }
2215}
2216
ba3814c1
PC
2217static void
2218_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2219 jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
2220{
2221 jit_int32_t r1_reg, iscasi;
2222 jit_word_t retry, done, jump0, jump1;
2223 if ((iscasi = (r1 == _NOREG))) {
2224 r1_reg = jit_get_reg(jit_class_gpr);
2225 r1 = rn(r1_reg);
2226 movi(r1, i0);
2227 }
2228 /* retry: */
2229 retry = _jit->pc.w;
2230 LDAXR(r0, r1);
c0c16242 2231 eqr(r0, r0, r2);
79bfeef6 2232 jump0 = beqi(_jit->pc.w, r0, 0); /* beqi done r0 0 */
c0c16242 2233 STLXR(r3, r0, r1);
ba3814c1
PC
2234 jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */
2235 /* done: */
2236 CSET(r0, CC_EQ);
2237 done = _jit->pc.w;
2238 patch_at(jump0, done);
2239 patch_at(jump1, retry);
2240 if (iscasi)
2241 jit_unget_reg(r1_reg);
2242}
2243
4a71579b
PC
2244static void
2245_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2246{
2247 if (r0 != r1)
2248 MOV(r0, r1);
2249}
2250
2251static void
2252_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2253{
2254 jit_word_t n0, ibit, nbit;
2255 n0 = ~i0;
2256 ibit = nbit = 0;
2257 if (i0 & 0x000000000000ffffL) ibit |= 1;
2258 if (i0 & 0x00000000ffff0000L) ibit |= 2;
2259 if (i0 & 0x0000ffff00000000L) ibit |= 4;
2260 if (i0 & 0xffff000000000000L) ibit |= 8;
2261 if (n0 & 0x000000000000ffffL) nbit |= 1;
2262 if (n0 & 0x00000000ffff0000L) nbit |= 2;
2263 if (n0 & 0x0000ffff00000000L) nbit |= 4;
2264 if (n0 & 0xffff000000000000L) nbit |= 8;
2265 switch (ibit) {
2266 case 0:
2267 MOVZ (r0, 0);
2268 break;
2269 case 1:
2270 MOVZ (r0, i0 & 0xffff);
2271 break;
2272 case 2:
2273 MOVZ_16(r0, (i0 >> 16) & 0xffff);
2274 break;
2275 case 3:
2276 MOVZ (r0, i0 & 0xffff);
2277 MOVK_16(r0, (i0 >> 16) & 0xffff);
2278 break;
2279 case 4:
2280 MOVZ_32(r0, (i0 >> 32) & 0xffff);
2281 break;
2282 case 5:
2283 MOVZ (r0, i0 & 0xffff);
2284 MOVK_32(r0, (i0 >> 32) & 0xffff);
2285 break;
2286 case 6:
2287 MOVZ_16(r0, (i0 >> 16) & 0xffff);
2288 MOVK_32(r0, (i0 >> 32) & 0xffff);
2289 break;
2290 case 7:
2291 if (nbit == 8)
2292 MOVN_48(r0, (n0 >> 48) & 0xffff);
2293 else {
2294 MOVZ (r0, i0 & 0xffff);
2295 MOVK_16(r0, (i0 >> 16) & 0xffff);
2296 MOVK_32(r0, (i0 >> 32) & 0xffff);
2297 }
2298 break;
2299 case 8:
2300 MOVZ_48(r0, (i0 >> 48) & 0xffff);
2301 break;
2302 case 9:
2303 MOVZ (r0, i0 & 0xffff);
2304 MOVK_48(r0, (i0 >> 48) & 0xffff);
2305 break;
2306 case 10:
2307 MOVZ_16(r0, (i0 >> 16) & 0xffff);
2308 MOVK_48(r0, (i0 >> 48) & 0xffff);
2309 break;
2310 case 11:
2311 if (nbit == 4)
2312 MOVN_32(r0, (n0 >> 32) & 0xffff);
2313 else {
2314 MOVZ (r0, i0 & 0xffff);
2315 MOVK_16(r0, (i0 >> 16) & 0xffff);
2316 MOVK_48(r0, (i0 >> 48) & 0xffff);
2317 }
2318 break;
2319 case 12:
2320 MOVZ_32(r0, (i0 >> 32) & 0xffff);
2321 MOVK_48(r0, (i0 >> 48) & 0xffff);
2322 break;
2323 case 13:
2324 if (nbit == 2)
2325 MOVN_16(r0, (n0 >> 16) & 0xffff);
2326 else {
2327 MOVZ (r0, i0 & 0xffff);
2328 MOVK_32(r0, (i0 >> 32) & 0xffff);
2329 MOVK_48(r0, (i0 >> 48) & 0xffff);
2330 }
2331 break;
2332 case 14:
2333 if (nbit == 1)
2334 MOVN (r0, (n0) & 0xffff);
2335 else {
2336 MOVZ_16(r0, (i0 >> 16) & 0xffff);
2337 MOVK_32(r0, (i0 >> 32) & 0xffff);
2338 MOVK_48(r0, (i0 >> 48) & 0xffff);
2339 }
2340 break;
2341 case 15:
2342 if (nbit == 0)
2343 MOVN (r0, 0);
2344 else if (nbit == 1)
2345 MOVN (r0, n0 & 0xffff);
2346 else if (nbit == 8)
2347 MOVN_48(r0, (n0 >> 48) & 0xffff);
2348 else {
2349 MOVZ (r0, i0 & 0xffff);
2350 MOVK_16(r0, (i0 >> 16) & 0xffff);
2351 MOVK_32(r0, (i0 >> 32) & 0xffff);
2352 MOVK_48(r0, (i0 >> 48) & 0xffff);
2353 }
2354 break;
2355 default:
2356 abort();
2357 }
2358}
2359
2360static jit_word_t
2361_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2362{
2363 jit_word_t w;
2364 w = _jit->pc.w;
2365 MOVZ (r0, i0 & 0xffff);
2366 MOVK_16(r0, (i0 >> 16) & 0xffff);
2367 MOVK_32(r0, (i0 >> 32) & 0xffff);
2368 MOVK_48(r0, (i0 >> 48) & 0xffff);
2369 return (w);
2370}
2371
2372static void
2373_ccr(jit_state_t *_jit, jit_int32_t cc,
2374 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2375{
2376 CMP(r1, r2);
2377 CSET(r0, cc);
2378}
2379
2380static void
2381_cci(jit_state_t *_jit, jit_int32_t cc,
2382 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2383{
2384 jit_int32_t reg;
2385 jit_word_t is = i0 >> 12;
2386 jit_word_t in = -i0;
2387 jit_word_t iS = in >> 12;
2388 if ( i0 >= 0 && i0 <= 0xfff)
2389 CMPI (r1, i0);
2390 else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
2391 CMPI_12(r1, is);
2392 else if ( in >= 0 && in <= 0xfff)
2393 CMNI (r1, in);
2394 else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
2395 CMNI_12(r1, iS);
2396 else {
2397 reg = jit_get_reg(jit_class_gpr);
2398 movi(rn(reg), i0);
2399 CMP(r1, rn(reg));
2400 jit_unget_reg(reg);
2401 }
2402 CSET(r0, cc);
2403}
2404
2405static jit_word_t
2406_bccr(jit_state_t *_jit, jit_int32_t cc,
2407 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2408{
2409 jit_word_t w, d;
2410 CMP(r0, r1);
2411 w = _jit->pc.w;
2412 d = (i0 - w) >> 2;
2413 B_C(cc, d);
2414 return (w);
2415}
2416
2417static jit_word_t
2418_bcci(jit_state_t *_jit, jit_int32_t cc,
2419 jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2420{
2421 jit_int32_t reg;
2422 jit_word_t w, d;
2423 jit_word_t is = i1 >> 12;
2424 jit_word_t in = -i1;
2425 jit_word_t iS = in >> 12;
2426 if ( i1 >= 0 && i1 <= 0xfff)
2427 CMPI (r0, i1);
2428 else if ((is << 12) == i1 && is >= 0 && is <= 0xfff)
2429 CMPI_12(r0, is);
2430 else if ( in >= 0 && in <= 0xfff)
2431 CMNI (r0, in);
2432 else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
2433 CMNI_12(r0, iS);
2434 else {
2435 reg = jit_get_reg(jit_class_gpr);
2436 movi(rn(reg), i1);
2437 CMP(r0, rn(reg));
2438 jit_unget_reg(reg);
2439 }
2440 w = _jit->pc.w;
2441 d = (i0 - w) >> 2;
2442 B_C(cc, d);
2443 return (w);
2444}
2445
2446static jit_word_t
2447_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2448{
2449 jit_word_t w;
2450 if (i1 == 0) {
2451 w = _jit->pc.w;
2452 CBZ(r0, (i0 - w) >> 2);
2453 }
2454 else
2455 w = bcci(BCC_EQ, i0, r0, i1);
2456 return (w);
2457}
2458
2459static jit_word_t
2460_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2461{
2462 jit_word_t w;
2463 if (i1 == 0) {
2464 w = _jit->pc.w;
2465 CBNZ(r0, (i0 - w) >> 2);
2466 }
2467 else
2468 w = bcci(BCC_NE, i0, r0, i1);
2469 return (w);
2470}
2471
2472static jit_word_t
2473_baddr(jit_state_t *_jit, jit_int32_t cc,
2474 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2475{
2476 jit_word_t w;
2477 addcr(r0, r0, r1);
2478 w = _jit->pc.w;
2479 B_C(cc, (i0 - w) >> 2);
2480 return (w);
2481}
2482
2483static jit_word_t
2484_baddi(jit_state_t *_jit, jit_int32_t cc,
2485 jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2486{
2487 jit_word_t w;
2488 addci(r0, r0, i1);
2489 w = _jit->pc.w;
2490 B_C(cc, (i0 - w) >> 2);
2491 return (w);
2492}
2493
2494static jit_word_t
2495_bsubr(jit_state_t *_jit, jit_int32_t cc,
2496 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2497{
2498 jit_word_t w;
2499 subcr(r0, r0, r1);
2500 w = _jit->pc.w;
2501 B_C(cc, (i0 - w) >> 2);
2502 return (w);
2503}
2504
2505static jit_word_t
2506_bsubi(jit_state_t *_jit, jit_int32_t cc,
2507 jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2508{
2509 jit_word_t w;
2510 subci(r0, r0, i1);
2511 w = _jit->pc.w;
2512 B_C(cc, (i0 - w) >> 2);
2513 return (w);
2514}
2515
2516static jit_word_t
2517_bmxr(jit_state_t *_jit, jit_int32_t cc,
2518 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2519{
2520 jit_word_t w;
2521 TST(r0, r1);
2522 w = _jit->pc.w;
2523 B_C(cc, (i0 - w) >> 2);
2524 return (w);
2525}
2526
2527static jit_word_t
2528_bmxi(jit_state_t *_jit, jit_int32_t cc,
2529 jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2530{
2531 jit_word_t w;
2532 jit_int32_t reg;
2533 jit_int32_t imm;
2534 imm = logical_immediate(i1);
2535 if (imm != -1)
2536 TSTI(r0, imm);
2537 else {
2538 reg = jit_get_reg(jit_class_gpr);
2539 movi(rn(reg), i1);
2540 TST(r0, rn(reg));
2541 jit_unget_reg(reg);
2542 }
2543 w = _jit->pc.w;
2544 B_C(cc, (i0 - w) >> 2);
2545 return (w);
2546}
2547
79bfeef6 2548static jit_word_t
4a71579b
PC
2549_jmpi(jit_state_t *_jit, jit_word_t i0)
2550{
4a71579b 2551 jit_int32_t reg;
79bfeef6
PC
2552 jit_word_t d, w;
2553 w = _jit->pc.w;
2554 d = (i0 - w) >> 2;
2555 if (s26_p(d))
2556 B(d);
4a71579b
PC
2557 else {
2558 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
2559 movi(rn(reg), i0);
2560 jmpr(rn(reg));
2561 jit_unget_reg(reg);
2562 }
79bfeef6 2563 return (w);
4a71579b
PC
2564}
2565
2566static jit_word_t
2567_jmpi_p(jit_state_t *_jit, jit_word_t i0)
2568{
2569 jit_word_t w;
2570 jit_int32_t reg;
2571 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
2572 w = movi_p(rn(reg), i0);
2573 jmpr(rn(reg));
2574 jit_unget_reg(reg);
2575 return (w);
2576}
2577
79bfeef6 2578static jit_word_t
4a71579b
PC
2579_calli(jit_state_t *_jit, jit_word_t i0)
2580{
4a71579b 2581 jit_int32_t reg;
79bfeef6
PC
2582 jit_word_t d, w;
2583 w = _jit->pc.w;
2584 d = (i0 - w) >> 2;
2585 if (s26_p(d))
2586 BL(d);
4a71579b
PC
2587 else {
2588 reg = jit_get_reg(jit_class_gpr);
2589 movi(rn(reg), i0);
2590 callr(rn(reg));
2591 jit_unget_reg(reg);
2592 }
79bfeef6 2593 return (w);
4a71579b
PC
2594}
2595
2596static jit_word_t
2597_calli_p(jit_state_t *_jit, jit_word_t i0)
2598{
2599 jit_word_t w;
2600 jit_int32_t reg;
2601 reg = jit_get_reg(jit_class_gpr);
2602 w = movi_p(rn(reg), i0);
2603 callr(rn(reg));
2604 jit_unget_reg(reg);
2605 return (w);
2606}
2607
4a71579b
PC
2608static void
2609_prolog(jit_state_t *_jit, jit_node_t *node)
2610{
79bfeef6 2611 jit_int32_t reg, rreg, offs;
4a71579b
PC
2612 if (_jitc->function->define_frame || _jitc->function->assume_frame) {
2613 jit_int32_t frame = -_jitc->function->frame;
79bfeef6 2614 jit_check_frame();
4a71579b
PC
2615 assert(_jitc->function->self.aoff >= frame);
2616 if (_jitc->function->assume_frame)
2617 return;
2618 _jitc->function->self.aoff = frame;
2619 }
2620 if (_jitc->function->allocar)
2621 _jitc->function->self.aoff &= -16;
2622 _jitc->function->stack = ((_jitc->function->self.alen -
2623 /* align stack at 16 bytes */
2624 _jitc->function->self.aoff) + 15) & -16;
79bfeef6
PC
2625
2626 if (!_jitc->function->need_frame) {
2627 /* check if any callee save register needs to be saved */
2628 for (reg = 0; reg < _jitc->reglen; ++reg)
2629 if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
2630 (_rvs[reg].spec & jit_class_sav)) {
2631 jit_check_frame();
2632 break;
2633 }
2634 }
2635
2636 if (_jitc->function->need_frame) {
2637 STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(jit_framesize() >> 3));
2638 MOV_XSP(FP_REGNO, SP_REGNO);
2639 }
2640 /* callee save registers */
2641 for (reg = 0, offs = 2; reg < jit_size(iregs);) {
2642 if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
2643 for (rreg = reg + 1; rreg < jit_size(iregs); rreg++) {
2644 if (jit_regset_tstbit(&_jitc->function->regset, iregs[rreg]))
2645 break;
2646 }
2647 if (rreg < jit_size(iregs)) {
2648 STPI(rn(iregs[reg]), rn(iregs[rreg]), SP_REGNO, offs);
2649 offs += 2;
2650 reg = rreg + 1;
2651 }
2652 else {
2653 STRI(rn(iregs[reg]), SP_REGNO, offs);
2654 ++offs;
2655 /* No pair found */
2656 break;
2657 }
2658 }
2659 else
2660 ++reg;
2661 }
2662 for (reg = 0, offs <<= 3; reg < jit_size(fregs); reg++) {
2663 if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
2664 stxi_d(offs, SP_REGNO, rn(fregs[reg]));
2665 offs += sizeof(jit_float64_t);
2666 }
2667 }
2668
2669 if (_jitc->function->stack)
4a71579b
PC
2670 subi(SP_REGNO, SP_REGNO, _jitc->function->stack);
2671 if (_jitc->function->allocar) {
2672 reg = jit_get_reg(jit_class_gpr);
2673 movi(rn(reg), _jitc->function->self.aoff);
2674 stxi_i(_jitc->function->aoffoff, FP_REGNO, rn(reg));
2675 jit_unget_reg(reg);
2676 }
2677
79bfeef6 2678#if !__APPLE__
4a71579b
PC
2679 if (_jitc->function->self.call & jit_call_varargs) {
2680 /* Save gp registers in the save area, if any is a vararg */
2681 for (reg = 8 - _jitc->function->vagp / -8;
2682 jit_arg_reg_p(reg); ++reg)
2683 stxi(_jitc->function->vaoff + offsetof(jit_va_list_t, x0) +
2684 reg * 8, FP_REGNO, rn(JIT_RA0 - reg));
2685
2686 for (reg = 8 - _jitc->function->vafp / -16;
2687 jit_arg_f_reg_p(reg); ++reg)
2688 /* Save fp registers in the save area, if any is a vararg */
2689 /* Note that the full 16 byte register is not saved, because
2690 * lightning only handles float and double, and, while
2691 * attempting to provide a va_list compatible pointer as
2692 * jit_va_start return, does not guarantee it (on all ports). */
2693 stxi_d(_jitc->function->vaoff + offsetof(jit_va_list_t, q0) +
2694 reg * 16 + offsetof(jit_qreg_t, l), FP_REGNO, rn(_V0 - reg));
2695 }
79bfeef6 2696#endif
4a71579b
PC
2697}
2698
2699static void
2700_epilog(jit_state_t *_jit, jit_node_t *node)
2701{
79bfeef6 2702 jit_int32_t reg, rreg, offs;
4a71579b
PC
2703 if (_jitc->function->assume_frame)
2704 return;
2705 if (_jitc->function->stack)
2706 MOV_XSP(SP_REGNO, FP_REGNO);
79bfeef6
PC
2707 /* callee save registers */
2708 for (reg = 0, offs = 2; reg < jit_size(iregs);) {
2709 if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
2710 for (rreg = reg + 1; rreg < jit_size(iregs); rreg++) {
2711 if (jit_regset_tstbit(&_jitc->function->regset, iregs[rreg]))
2712 break;
2713 }
2714 if (rreg < jit_size(iregs)) {
2715 LDPI(rn(iregs[reg]), rn(iregs[rreg]), SP_REGNO, offs);
2716 offs += 2;
2717 reg = rreg + 1;
2718 }
2719 else {
2720 LDRI(rn(iregs[reg]), SP_REGNO, offs);
2721 ++offs;
2722 /* No pair found */
2723 break;
2724 }
2725 }
2726 else
2727 ++reg;
2728 }
2729 for (reg = 0, offs <<= 3; reg < jit_size(fregs); reg++) {
2730 if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
2731 ldxi_d(rn(fregs[reg]), SP_REGNO, offs);
2732 offs += sizeof(jit_float64_t);
2733 }
2734 }
2735
2736 if (_jitc->function->need_frame)
2737 LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, jit_framesize() >> 3);
4a71579b
PC
2738 RET();
2739}
2740
2741static void
2742_vastart(jit_state_t *_jit, jit_int32_t r0)
2743{
79bfeef6 2744#if !__APPLE__
4a71579b
PC
2745 jit_int32_t reg;
2746
2747 assert(_jitc->function->self.call & jit_call_varargs);
2748
2749 /* Return jit_va_list_t in the register argument */
2750 addi(r0, FP_REGNO, _jitc->function->vaoff);
2751
2752 reg = jit_get_reg(jit_class_gpr);
2753
2754 /* Initialize stack pointer to the first stack argument. */
79bfeef6 2755 addi(rn(reg), FP_REGNO, jit_selfsize());
4a71579b
PC
2756 stxi(offsetof(jit_va_list_t, stack), r0, rn(reg));
2757
2758 /* Initialize gp top pointer to the first stack argument. */
2759 addi(rn(reg), r0, va_gp_top_offset);
2760 stxi(offsetof(jit_va_list_t, gptop), r0, rn(reg));
2761
2762 /* Initialize fp top pointer to the first stack argument. */
2763 addi(rn(reg), r0, va_fp_top_offset);
2764 stxi(offsetof(jit_va_list_t, fptop), r0, rn(reg));
2765
2766 /* Initialize gp offset in the save area. */
2767 movi(rn(reg), _jitc->function->vagp);
2768 stxi_i(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
2769
2770 /* Initialize fp offset in the save area. */
2771 movi(rn(reg), _jitc->function->vafp);
2772 stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
2773
2774 jit_unget_reg(reg);
79bfeef6
PC
2775#else
2776 assert(_jitc->function->self.call & jit_call_varargs);
2777 addi(r0, FP_REGNO, jit_selfsize());
2778#endif
4a71579b
PC
2779}
2780
2781static void
2782_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2783{
79bfeef6 2784#if !__APPLE__
4a71579b
PC
2785 jit_word_t ge_code;
2786 jit_word_t lt_code;
2787 jit_int32_t rg0, rg1;
2788
2789 assert(_jitc->function->self.call & jit_call_varargs);
2790
2791 rg0 = jit_get_reg(jit_class_gpr);
2792 rg1 = jit_get_reg(jit_class_gpr);
2793
2794 /* Load the gp offset in save area in the first temporary. */
2795 ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, gpoff));
2796
2797 /* Jump over if there are no remaining arguments in the save area. */
2798 ge_code = bgei(_jit->pc.w, rn(rg0), 0);
2799
2800 /* Load the gp save pointer in the second temporary. */
2801 ldxi(rn(rg1), r1, offsetof(jit_va_list_t, gptop));
2802
2803 /* Load the vararg argument in the first argument. */
2804 ldxr(r0, rn(rg1), rn(rg0));
2805
2806 /* Update the gp offset. */
2807 addi(rn(rg0), rn(rg0), 8);
2808 stxi_i(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
2809
2810 /* Will only need one temporary register below. */
2811 jit_unget_reg(rg1);
2812
2813 /* Jump over overflow code. */
79bfeef6 2814 lt_code = jmpi(_jit->pc.w);
4a71579b
PC
2815
2816 /* Where to land if argument is in overflow area. */
2817 patch_at(ge_code, _jit->pc.w);
2818
2819 /* Load stack pointer. */
2820 ldxi(rn(rg0), r1, offsetof(jit_va_list_t, stack));
2821
2822 /* Load argument. */
2823 ldr(r0, rn(rg0));
2824
2825 /* Update stack pointer. */
2826 addi(rn(rg0), rn(rg0), 8);
2827 stxi(offsetof(jit_va_list_t, stack), r1, rn(rg0));
2828
2829 /* Where to land if argument is in gp save area. */
2830 patch_at(lt_code, _jit->pc.w);
2831
2832 jit_unget_reg(rg0);
79bfeef6
PC
2833#else
2834 assert(_jitc->function->self.call & jit_call_varargs);
2835 ldr(r0, r1);
2836 addi(r1, r1, sizeof(jit_word_t));
2837#endif
4a71579b
PC
2838}
2839
2840static void
2841_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
2842{
2843 instr_t i;
2844 jit_word_t d;
2845 jit_int32_t fc, ff, ffc;
2846 union {
2847 jit_int32_t *i;
2848 jit_word_t w;
2849 } u;
2850 u.w = instr;
2851 i.w = u.i[0];
2852 fc = i.w & 0xfc000000;
2853 ff = i.w & 0xff000000;
2854 ffc = i.w & 0xffc00000;
2855 if (fc == A64_B || fc == A64_BL) {
2856 d = (label - instr) >> 2;
79bfeef6 2857 assert(s26_p(d));
4a71579b
PC
2858 i.imm26.b = d;
2859 u.i[0] = i.w;
2860 }
2861 else if (ff == A64_B_C || ff == (A64_CBZ|XS) || ff == (A64_CBNZ|XS)) {
2862 d = (label - instr) >> 2;
2863 assert(d >= -262148 && d <= 262143);
2864 i.imm19.b = d;
2865 u.i[0] = i.w;
2866 }
2867 else if (ffc == (A64_MOVZ|XS)) {
2868 i.imm16.b = label;
2869 u.i[0] = i.w;
2870 i.w = u.i[1];
2871 assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_16));
2872 i.imm16.b = label >> 16;
2873 u.i[1] = i.w;
2874 i.w = u.i[2];
2875 assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_32));
2876 i.imm16.b = label >> 32;
2877 u.i[2] = i.w;
2878 i.w = u.i[3];
2879 assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_48));
2880 i.imm16.b = label >> 48;
2881 u.i[3] = i.w;
2882 }
2883 else
2884 abort();
2885}
2886#endif