git subrepo pull (merge) --force deps/lightning
[pcsx_rearmed.git] / deps / lightning / lib / jit_arm-cpu.c
1 /*
2  * Copyright (C) 2012-2023  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *      Paulo Cesar Pereira de Andrade
18  */
19
20 #if PROTO
21 #  define jit_unaligned_p()             (jit_cpu.unaligned)
22 #  define jit_vfp_unaligned_p()         (jit_cpu.vfp_unaligned)
23 #  define ldr(r0,r1)                    ldr_i(r0,r1)
24 #  define ldi(r0,i0)                    ldi_i(r0,i0)
25 #  define ldxr(r0,r1,r2)                ldxr_i(r0,r1,r2)
26 #  define ldxi(r0,r1,i0)                ldxi_i(r0,r1,i0)
27 #  define str(r0,r1)                    str_i(r0,r1)
28 #  define sti(i0,r0)                    sti_i(i0,r0)
29 #  define stxr(r0,r1,r2)                stxr_i(r0,r1,r2)
30 #  define stxi(i0,r0,r1)                stxi_i(i0,r0,r1)
31 #  define _s20P(d)                      ((d) >= -(int)0x80000 && d <= 0x7ffff)
32 #  define _s24P(d)                      ((d) >= -(int)0x800000 && d <= 0x7fffff)
33 #  define _u3(v)                        ((v) & 0x7)
34 #  define _u4(v)                        ((v) & 0xf)
35 #  define _u5(v)                        ((v) & 0x1f)
36 #  define _u8(v)                        ((v) & 0xff)
37 #  define _u12(v)                       ((v) & 0xfff)
38 #  define _u13(v)                       ((v) & 0x1fff)
39 #  define _u16(v)                       ((v) & 0xffff)
40 #  define _u24(v)                       ((v) & 0xffffff)
41 #  define jit_thumb_p()                 jit_cpu.thumb
42 #  define jit_no_set_flags()            _jitc->no_set_flags
43 #  define jit_armv5_p()                 (jit_cpu.version >= 5)
44 #  define jit_armv5e_p()                (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend))
45 #  define jit_armv6_p()                 (jit_cpu.version >= 6)
46 #  define jit_armv7_p()                 (jit_cpu.version >= 7)
47 #  define jit_armv7r_p()                (jit_cpu.version > 7 || (jit_cpu.version == 7 && jit_cpu.extend))
48 extern int      __aeabi_idivmod(int, int);
49 extern unsigned __aeabi_uidivmod(unsigned, unsigned);
50 #  define _R0_REGNO                     0x00
51 #  define _R1_REGNO                     0x01
52 #  define _R2_REGNO                     0x02
53 #  define _R3_REGNO                     0x03
54 #  define _R4_REGNO                     0x04
55 #  define _R5_REGNO                     0x05
56 #  define _R6_REGNO                     0x06
57 #  define _R7_REGNO                     0x07
58 #  define _R8_REGNO                     0x08
59 #  define _R9_REGNO                     0x09
60 #  define _R10_REGNO                    0x0a
61 #  define _R11_REGNO                    0x0b
62 #  define _R12_REGNO                    0x0c
63 #  define _R13_REGNO                    0x0d
64 #  define _R14_REGNO                    0x0e
65 #  define _R15_REGNO                    0x0f
66 #  define _FP_REGNO                     _R11_REGNO
67 #  define _SP_REGNO                     _R13_REGNO
68 #  define _LR_REGNO                     _R14_REGNO
69 #  define _PC_REGNO                     _R15_REGNO
70 #  define ARM_CC_EQ                     0x00000000      /* Z=1 */
71 #  define ARM_CC_NE                     0x10000000      /* Z=0 */
72 #  define ARM_CC_HS                     0x20000000      /* C=1 */
73 #    define ARM_CC_CS                   ARM_CC_HS
74 #  define ARM_CC_LO                     0x30000000      /* C=0 */
75 #    define ARM_CC_CC                   ARM_CC_LO
76 #  define ARM_CC_MI                     0x40000000      /* N=1 */
77 #  define ARM_CC_PL                     0x50000000      /* N=0 */
78 #  define ARM_CC_VS                     0x60000000      /* V=1 */
79 #  define ARM_CC_VC                     0x70000000      /* V=0 */
80 #  define ARM_CC_HI                     0x80000000      /* C=1 && Z=0 */
81 #  define ARM_CC_LS                     0x90000000      /* C=0 || Z=1 */
82 #  define ARM_CC_GE                     0xa0000000      /* N=V */
83 #  define ARM_CC_LT                     0xb0000000      /* N!=V */
84 #  define ARM_CC_GT                     0xc0000000      /* Z=0 && N=V */
85 #  define ARM_CC_LE                     0xd0000000      /* Z=1 || N!=V */
86 #  define ARM_CC_AL                     0xe0000000      /* always */
87 #  define ARM_CC_NV                     0xf0000000      /* reserved */
88 #  define THUMB2_IT                     0
89 #  define THUMB2_ITT                    1
90 #  define THUMB2_ITE                    2
91 #  define THUMB2_ITTT                   3
92 #  define THUMB2_ITET                   4
93 #  define THUMB2_ITTE                   5
94 #  define THUMB2_ITEE                   6
95 #  define THUMB2_ITTTT                  7
96 #  define THUMB2_ITETT                  8
97 #  define THUMB2_ITTET                  9
98 #  define THUMB2_ITEET                  10
99 #  define THUMB2_ITTTE                  11
100 #  define THUMB2_ITETE                  12
101 #  define THUMB2_ITTEE                  13
102 #  define THUMB2_ITEEE                  14
103 #  define ARM_MOV                       0x01a00000
104 #  define THUMB_MOV                         0x4600
105 #  define ARM_MOVWI                     0x03000000      /* v6t2, v7 */
106 #  define THUMB_MOVI                        0x2000
107 #  define THUMB2_MOVI                   0xf0400000
108 #  define THUMB2_MOVWI                  0xf2400000
109 #  define ARM_MOVTI                     0x03400000
110 #  define THUMB2_MOVTI                  0xf2c00000
111 #  define ARM_MVN                       0x01e00000
112 #  define THUMB_MVN                         0x43c0
113 #  define THUMB2_MVN                    0xea600000
114 #  define THUMB2_MVNI                   0xf0600000
115 #  define ARM_I                         0x02000000 /* immediate */
116 #  define ARM_S                         0x00100000 /* set flags */
117 #  define ARM_ADD                       0x00800000
118 #  define THUMB_ADD                         0x1800
119 #  define THUMB_ADDX                        0x4400
120 #  define THUMB2_ADD                    0xeb000000
121 #  define THUMB_ADDI3                       0x1c00
122 #  define THUMB_ADDI8                       0x3000
123 #  define THUMB2_ADDI                   0xf1000000
124 #  define THUMB2_ADDWI                  0xf2000000
125 #  define ARM_ADC                       0x00a00000
126 #  define THUMB_ADC                         0x4140
127 #  define THUMB2_ADC                    0xeb400000
128 #  define THUMB2_ADCI                   0xf1400000
129 #  define ARM_SUB                       0x00400000
130 #  define THUMB_SUB                         0x1a00
131 #  define THUMB2_SUB                    0xeba00000
132 #  define THUMB_SUBI3                       0x1e00
133 #  define THUMB_SUBI8                       0x3800
134 #  define THUMB2_SUBI                   0xf1a00000
135 #  define THUMB2_SUBWI                  0xf2a00000
136 #  define ARM_SBC                       0x00c00000
137 #  define THUMB_SBC                         0x4180
138 #  define THUMB2_SBC                    0xeb600000
139 #  define THUMB2_SBCI                   0xf1600000
140 #  define ARM_RSB                       0x00600000
141 #  define THUMB_RSBI                        0x4240
142 #  define THUMB2_RSBI                   0xf1c00000
143 #  define ARM_MUL                       0x00000090
144 #  define THUMB_MUL                         0x4340
145 #  define THUMB2_MUL                    0xfb00f000
146 #  define ARM_UMULL                     0x00800090
147 #  define THUMB2_UMULL                  0xfba00000
148 #  define ARM_SMULL                     0x00c00090
149 #  define THUMB2_SMULL                  0xfb800000
150 /* >> ARMv7r */
151 #  define ARM_SDIV                      0x07100010
152 #  define ARM_UDIV                      0x07300010
153 #  define THUMB2_SDIV                   0xfb90f0f0
154 #  define THUMB2_UDIV                   0xfbb0f0f0
155 /* << ARMv7r */
156 #  define ARM_AND                       0x00000000
157 #  define THUMB_AND                         0x4000
158 #  define THUMB2_AND                    0xea000000
159 #  define THUMB2_ANDI                   0xf0000000
160 #  define ARM_BIC                       0x01c00000
161 #  define THUMB2_BIC                    0xea200000
162 #  define THUMB2_BICI                   0xf0200000
163 #  define ARM_ORR                       0x01800000
164 #  define THUMB_ORR                         0x4300
165 #  define THUMB2_ORR                    0xea400000
166 #  define THUMB2_ORRI                   0xf0400000
167 #  define ARM_EOR                       0x00200000
168 #  define THUMB_EOR                         0x4040
169 #  define THUMB2_EOR                    0xea800000
170 #  define THUMB2_EORI                   0xf0800000
171 /* >> ARMv6* */
172 #  define ARM_REV                       0x06bf0f30
173 #  define THUMB_REV                         0xba00
174 #  define THUMB2_REV                    0xfa90f080
175 #  define ARM_REV16                     0x06bf0fb0
176 #  define THUMB_REV16                       0xba40
177 #  define THUMB2_REV16                  0xfa90f090
178 #  define ARM_SXTB                      0x06af0070
179 #  define THUMB_SXTB                        0xb240
180 #  define THUMB2_SXTB                   0xfa40f080
181 #  define ARM_UXTB                      0x06ef0070
182 #  define THUMB_UXTB                        0xb2c0
183 #  define THUMB2_UXTB                   0xfa50f080
184 #  define ARM_SXTH                      0x06bf0070
185 #  define THUMB_SXTH                        0xb200
186 #  define THUMB2_SXTH                   0xfa00f080
187 #  define ARM_UXTH                      0x06ff0070
188 #  define THUMB_UXTH                        0xb280
189 #  define THUMB2_UXTH                   0xfa10f080
190 #  define ARM_XTR8                      0x00000400 /* ?xt? rotate 8 bits */
191 #  define ARM_XTR16                     0x00000800 /* ?xt? rotate 16 bits */
192 #  define ARM_XTR24                     0x00000c00 /* ?xt? rotate 24 bits */
193 #  define ARM_LDREX                     0x01900090
194 #  define THUMB2_LDREX                  0xe8500000
195 #  define ARM_STREX                     0x01800090
196 #  define THUMB2_STREX                  0xe8400000
197 /* << ARMv6* */
198 /* >> ARMv6t2 */
199 #  define THUMB2_CLZ                    0xfab0f080
200 #  define THUMB2_RBIT                   0xfa90f0a0
201 #  define ARM_RBIT                      0x06f00030
202 #  define THUMB2_SBFX                   0xf3400000
203 #  define ARM_SBFX                      0x07a00050
204 #  define THUMB2_UBFX                   0xf3c00000
205 #  define ARM_UBFX                      0x07e00050
206 #  define THUMB2_BFI                    0xf3600000
207 #  define ARM_BFI                       0x07c00010
208 /* << ARMv6t2 */
209 #  define ARM_CLZ                       0x01600010
210 /* >> ARMv7 */
211 #  define ARM_DMB                       0xf57ff050
212 #  define THUMB2_DMB                    0xf3bf8f50
213 #  define DMB_SY                        0xf
214 #  define DMB_ST                        0xe
215 #  define DMB_ISH                       0xb
216 #  define DMB_ISHST                     0xa
217 #  define DMB_NSH                       0x7
218 #  define DMB_NSHT                      0x6
219 #  define DMB_OSH                       0x3
220 #  define DMB_OSHST                     0x2
221 /* << ARMv7 */
222 #  define ARM_SHIFT                     0x01a00000
223 #  define ARM_R                         0x00000010 /* register shift */
224 #  define ARM_LSL                       0x00000000
225 #  define THUMB_LSL                         0x4080
226 #  define THUMB2_LSL                    0xfa00f000
227 #  define THUMB_LSLI                        0x0000
228 #  define THUMB2_LSLI                   0xea4f0000
229 #  define ARM_LSR                       0x00000020
230 #  define THUMB_LSR                         0x40c0
231 #  define THUMB2_LSR                    0xfa20f000
232 #  define THUMB_LSRI                        0x0800
233 #  define THUMB2_LSRI                   0xea4f0010
234 #  define ARM_ASR                       0x00000040
235 #  define THUMB_ASR                         0x4100
236 #  define THUMB2_ASR                    0xfa40f000
237 #  define THUMB_ASRI                        0x1000
238 #  define THUMB2_ASRI                   0xea4f0020
239 #  define ARM_ROR                       0x00000060
240 #  define THUMB_ROR                         0x41c0
241 #  define THUMB2_ROR                    0xfa60f000
242 #  define THUMB2_RORI                   0xea4f0030
243 #  define ARM_CMP                       0x01500000
244 #  define THUMB_CMP                         0x4280
245 #  define THUMB_CMPX                        0x4500
246 #  define THUMB2_CMP                    0xebb00000
247 #  define THUMB_CMPI                        0x2800
248 #  define THUMB2_CMPI                   0xf1b00000
249 #  define ARM_CMN                       0x01700000
250 #  define THUMB_CMN                         0x42c0
251 #  define THUMB2_CMN                    0xeb100000
252 #  define THUMB2_CMNI                   0xf1100000
253 #  define ARM_TST                       0x01100000
254 #  define THUMB_TST                         0x4200
255 #  define THUMB2_TST                    0xea100000
256 #  define THUMB2_TSTI                   0xf0100000
257 #  define ARM_TEQ                       0x01300000
258 /* branch */
259 #  define ARM_BX                        0x012fff10
260 #  define ARM_BLX                       0x012fff30
261 #  define THUMB_BLX                         0x4780
262 #  define ARM_BLXI                      0xfa000000
263 #  define THUMB2_BLXI                   0xf000c000
264 #  define ARM_B                         0x0a000000
265 #  define THUMB_CC_B                        0xd000
266 #  define THUMB_B                           0xe000
267 #  define THUMB2_CC_B                   0xf0008000
268 #  define THUMB2_B                      0xf0009000
269 #  define ARM_BLI                       0x0b000000
270 #  define THUMB2_BLI                    0xf000d000
271 /* ldr/str */
272 #  define ARM_P                         0x00800000 /* positive offset */
273 #  define THUMB2_P                      0x00000400
274 #  define THUMB2_U                      0x00000200
275 #  define THUMB2_W                      0x00000100
276 #  define ARM_LDRSB                     0x011000d0
277 #  define THUMB_LDRSB                       0x5600
278 #  define THUMB2_LDRSB                  0xf9100000
279 #  define ARM_LDRSBI                    0x015000d0
280 #  define THUMB2_LDRSBI                 0xf9100c00
281 #  define THUMB2_LDRSBWI                0xf9900000
282 #  define ARM_LDRB                      0x07500000
283 #  define THUMB_LDRB                        0x5c00
284 #  define THUMB2_LDRB                   0xf8100000
285 #  define ARM_LDRBI                     0x05500000
286 #  define THUMB_LDRBI                       0x7800
287 #  define THUMB2_LDRBI                  0xf8100c00
288 #  define THUMB2_LDRBWI                 0xf8900000
289 #  define ARM_LDRSH                     0x011000f0
290 #  define THUMB_LDRSH                       0x5e00
291 #  define THUMB2_LDRSH                  0xf9300000
292 #  define ARM_LDRSHI                    0x015000f0
293 #  define THUMB2_LDRSHI                 0xf9300c00
294 #  define THUMB2_LDRSHWI                0xf9b00000
295 #  define ARM_LDRH                      0x011000b0
296 #  define THUMB_LDRH                        0x5a00
297 #  define THUMB2_LDRH                   0xf8300000
298 #  define ARM_LDRHI                     0x015000b0
299 #  define THUMB_LDRHI                       0x8800
300 #  define THUMB2_LDRHI                  0xf8300c00
301 #  define THUMB2_LDRHWI                 0xf8b00000
302 #  define ARM_LDR                       0x07100000
303 #  define THUMB_LDR                         0x5800
304 #  define THUMB2_LDR                    0xf8500000
305 #  define ARM_LDRI                      0x05100000
306 #  define THUMB_LDRI                        0x6800
307 #  define THUMB_LDRISP                      0x9800
308 #  define THUMB2_LDRI                   0xf8500c00
309 #  define THUMB2_LDRWI                  0xf8d00000
310 #  define ARM_LDRD                      0x010000d0
311 #  define ARM_LDRDI                     0x014000d0
312 #  define THUMB2_LDRDI                  0xe8500000
313 #  define ARM_STRB                      0x07400000
314 #  define THUMB_STRB                        0x5400
315 #  define THUMB2_STRB                   0xf8000000
316 #  define ARM_STRBI                     0x05400000
317 #  define THUMB_STRBI                       0x7000
318 #  define THUMB2_STRBI                  0xf8000c00
319 #  define THUMB2_STRBWI                 0xf8800000
320 #  define ARM_STRH                      0x010000b0
321 #  define THUMB_STRH                        0x5200
322 #  define THUMB2_STRH                   0xf8200000
323 #  define ARM_STRHI                     0x014000b0
324 #  define THUMB_STRHI                       0x8000
325 #  define THUMB2_STRHI                  0xf8200c00
326 #  define THUMB2_STRHWI                 0xf8a00000
327 #  define ARM_STR                       0x07000000
328 #  define THUMB_STR                         0x5000
329 #  define THUMB2_STR                    0xf8400000
330 #  define ARM_STRI                      0x05000000
331 #  define THUMB_STRI                        0x6000
332 # define THUMB2_STRWI                   0xf8c00000
333 #  define THUMB_STRISP                      0x9000
334 #  define THUMB2_STRI                   0xf8400c00
335 #  define ARM_STRD                      0x010000f0
336 # define ARM_STRDI                      0x014000f0
337 #  define THUMB2_STRDI                  0xe8400000
338 /* ldm/stm */
339 #  define ARM_M                         0x08000000
340 #  define ARM_M_L                       0x00100000 /* load; store if not set */
341 #  define ARM_M_I                       0x00800000 /* inc; dec if not set */
342 #  define ARM_M_B                       0x01000000 /* before; after if not set */
343 #  define ARM_M_U                       0x00200000 /* update Rn */
344 #  define THUMB2_LDM_W                  0x00200000
345 #  define THUMB2_LDM_P                  0x00008000
346 #  define THUMB2_LDM_M                  0x00004000
347 #  define THUMB_LDMIA                       0xc800
348 #  define THUMB2_LDMIA                  0xe8900000
349 #  define THUMB2_LDMB                   0xe9100000
350 #  define THUMB_PUSH                        0xb400
351 #  define THUMB2_PUSH                   0xe92d0000
352 #  define THUMB_POP                         0xbc00
353 #  define THUMB2_POP                    0xe8bd0000
354 #  define ii(i)                         *_jit->pc.ui++ = i
355 #  define is(i)                         *_jit->pc.us++ = i
356 #  if __BYTE_ORDER == __LITTLE_ENDIAN
357 #    define iss(i, j)                   do { is(j); is(i); } while (0)
358 #    define code2thumb(t0, t1, c0, c1)  do { t1 = c0; t0 = c1; } while (0)
359 #    define thumb2code(t0, t1, c0, c1)  do { c0 = t1; c1 = t0; } while (0)
360 #  else
361 #    define iss(i, j)                   do { is(i); is(j); } while (0)
362 #    define code2thumb(t0, t1, c0, c1)  do { t0 = c0; t1 = c1; } while (0)
363 #    define thumb2code(t0, t1, c0, c1)  do { c0 = t0; c1 = t1; } while (0)
364 #  endif
365 static int encode_arm_immediate(unsigned int v);
366 static int encode_thumb_immediate(unsigned int v);
367 static int encode_thumb_word_immediate(unsigned int v);
368 static int encode_thumb_jump(int v);
369 static int encode_thumb_cc_jump(int v);
370 static int encode_thumb_shift(int v, int type) maybe_unused;
371 #  define corrr(cc,o,rn,rd,rm)          _corrr(_jit,cc,o,rn,rd,rm)
372 static void _corrr(jit_state_t*,int,int,int,int,int);
373 #  define corri(cc,o,rn,rd,im)          _corri(_jit,cc,o,rn,rd,im)
374 static void _corri(jit_state_t*,int,int,int,int,int);
375 #define corri8(cc,o,rn,rt,im)   _corri8(_jit,cc,o,rn,rt,im)
376 static void _corri8(jit_state_t*,int,int,int,int,int);
377 #  define torrr(o,rn,rd,rm)             _torrr(_jit,o,rn,rd,rm)
378 static void _torrr(jit_state_t*,int,int,int,int);
379 #  define torrrs(o,rn,rd,rm,im)         _torrrs(_jit,o,rn,rd,rm,im)
380 static void _torrrs(jit_state_t*,int,int,int,int,int) maybe_unused;
381 #  define torxr(o,rn,rt,rm)             _torxr(_jit,o,rn,rt,rm)
382 static void _torxr(jit_state_t*,int,int,int,int);
383 #  define torrrr(o,rn,rl,rh,rm)         _torrrr(_jit,o,rn,rl,rh,rm)
384 static void _torrrr(jit_state_t*,int,int,int,int,int) maybe_unused;
385 #  define torrri8(o,rn,rt,rt2,im)       _torrri8(_jit,o,rn,rt,rt2,im)
386 static void _torrri8(jit_state_t*,int,int,int,int,int) maybe_unused;
387 #  define coriw(cc,o,rd,im)             _coriw(_jit,cc,o,rd,im)
388 static void _coriw(jit_state_t*,int,int,int,int);
389 #  define torri(o,rd,rn,im)             _torri(_jit,o,rd,rn,im)
390 static void _torri(jit_state_t*,int,int,int,int);
391 #  define torri8(o,rn,rt,im)            _torri8(_jit,o,rn,rt,im)
392 static void _torri8(jit_state_t*,int,int,int,int);
393 #  define torri12(o,rn,rt,im)           _torri12(_jit,o,rn,rt,im)
394 static void _torri12(jit_state_t*,int,int,int,int);
395 #  define tshift(o,rd,rm,im)            _tshift(_jit,o,rd,rm,im)
396 static void _tshift(jit_state_t*,int,int,int,int);
397 #  define toriw(o,rd,im)                _toriw(_jit,o,rd,im)
398 static void _toriw(jit_state_t*,int,int,int);
399 #  define tc8(cc,im)                    _tc8(_jit,cc,im)
400 static void _tc8(jit_state_t*,int,int) maybe_unused;
401 #  define t11(im)                       _t11(_jit,im)
402 static void _t11(jit_state_t*,int);
403 #  define tcb(cc,im)                    _tcb(_jit,cc,im)
404 static void _tcb(jit_state_t*,int,int);
405 #  define blxi(im)                      _blxi(_jit,im)
406 static void _blxi(jit_state_t*,int) maybe_unused;
407 #  define tb(o,im)                      _tb(_jit,o,im)
408 static void _tb(jit_state_t*,int,int);
409 #  define corrrr(cc,o,rh,rl,rm,rn)      _corrrr(_jit,cc,o,rh,rl,rm,rn)
410 static void _corrrr(jit_state_t*,int,int,int,int,int,int);
411 #  define corrrs(cc,o,rn,rd,rm,im)      _corrrs(_jit,cc,o,rn,rd,rm,im)
412 static void _corrrs(jit_state_t*,int,int,int,int,int,int);
413 #  define cshift(cc,o,rd,rm,rn,im)      _cshift(_jit,cc,o,rd,rm,rn,im)
414 static void _cshift(jit_state_t*,int,int,int,int,int,int);
415 #  define cb(cc,o,im)                   _cb(_jit,cc,o,im)
416 static void _cb(jit_state_t*,int,int,int);
417 #  define cbx(cc,o,rm)                  _cbx(_jit,cc,o,rm)
418 static void _cbx(jit_state_t*,int,int,int);
419 #  define corl(cc,o,r0,i0)              _corl(_jit,cc,o,r0,i0)
420 static void _corl(jit_state_t*,int,int,int,int);
421 #  define c6orr(cc,o,r0,r1)             _c6orr(_jit,cc,o,r0,r1)
422 static void _c6orr(jit_state_t*,int,int,int,int);
423 #  define tcit(cc,it)                   _tcit(_jit,cc,it)
424 static void _tcit(jit_state_t*,unsigned int,int);
425 #  define IT(cc)                        tcit(cc,THUMB2_IT)
426 #  define ITT(cc)                       tcit(cc,THUMB2_ITT)
427 #  define ITE(cc)                       tcit(cc,THUMB2_ITE)
428 #  define ITTT(cc)                      tcit(cc,THUMB2_ITTT)
429 #  define ITTE(cc)                      tcit(cc,THUMB2_ITTE)
430 #  define ITET(cc)                      tcit(cc,THUMB2_ITET)
431 #  define ITEE(cc)                      tcit(cc,THUMB2_ITEE)
432 #  define ITTTT(cc)                     tcit(cc,THUMB2_ITTTT)
433 #  define ITETT(cc)                     tcit(cc,THUMB2_ITETT)
434 #  define ITTET(cc)                     tcit(cc,THUMB2_ITTET)
435 #  define ITEET(cc)                     tcit(cc,THUMB2_ITEET)
436 #  define ITTTE(cc)                     tcit(cc,THUMB2_ITTTE)
437 #  define ITETE(cc)                     tcit(cc,THUMB2_ITETE)
438 #  define ITTEE(cc)                     tcit(cc,THUMB2_ITTEE)
439 #  define ITEEE(cc)                     tcit(cc,THUMB2_ITEEE)
440 #  define tpp(o,im)                     _tpp(_jit,o,im)
441 static void _tpp(jit_state_t*,int,int);
442 #  define torl(o,rn,im)                 _torl(_jit,o,rn,im)
443 static void _torl(jit_state_t*,int,int,int) maybe_unused;
444 #  define DMB(im)                       dmb(im)
445 #  define T2_DMB(im)                    tdmb(im)
446 #  define dmb(im)                       _dmb(_jit, im)
447 static void _dmb(jit_state_t *_jit, int im);
448 #  define tdmb(im)                      _tdmb(_jit, im)
449 static void _tdmb(jit_state_t *_jit, int im);
450 #  define CC_MOV(cc,rd,rm)              corrr(cc,ARM_MOV,0,rd,rm)
451 #  define MOV(rd,rm)                    CC_MOV(ARM_CC_AL,rd,rm)
452 #  define T1_MOV(rd,rm)                 is(THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7))
453 #  define T2_MOV(rd,rm)                 T2_ORR(rd,_R15_REGNO,rm)
454 #  define CC_MOVI(cc,rd,im)             corri(cc,ARM_MOV|ARM_I,0,rd,im)
455 #  define MOVI(rd,im)                   CC_MOVI(ARM_CC_AL,rd,im)
456 #  define CC_MOVWI(cc,rd,im)            coriw(cc,ARM_MOVWI,rd,im)
457 #  define MOVWI(rd,im)                  CC_MOVWI(ARM_CC_AL,rd,im)
458 #  define T1_MOVI(rd,im)                is(THUMB_MOVI|(_u3(rd)<<8)|_u8(im))
459 #  define T2_MOVI(rd,im)                torri(THUMB2_MOVI,_R15_REGNO,rd,im)
460 #  define T2_MOVWI(rd,im)               toriw(THUMB2_MOVWI,rd,im)
461 #  define CC_MOVTI(cc,rd,im)            coriw(cc,ARM_MOVTI,rd,im)
462 #  define MOVTI(rd,im)                  CC_MOVTI(ARM_CC_AL,rd,im)
463 #  define T2_MOVTI(rd,im)               toriw(THUMB2_MOVTI,rd,im)
464 #  define CC_MVN(cc,rd,rm)              corrr(cc,ARM_MVN,0,rd,rm)
465 #  define MVN(rd,rm)                    CC_MVN(ARM_CC_AL,rd,rm)
466 #  define T1_MVN(rd,rm)                 is(THUMB_MVN|(_u3(rm)<<3)|_u3(rd))
467 #  define T2_MVN(rd,rm)                 torrr(THUMB2_MVN,_R15_REGNO,rd,rm)
468 #  define CC_MVNI(cc,rd,im)             corri(cc,ARM_MVN|ARM_I,0,rd,im)
469 #  define MVNI(rd,im)                   CC_MVNI(ARM_CC_AL,rd,im)
470 #  define T2_MVNI(rd,im)                torri(THUMB2_MVNI,_R15_REGNO,rd,im)
471 #  define CC_NOT(cc,rd,rm)              CC_MVN(cc,rd,rm)
472 #  define NOT(rd,rm)                    CC_NOT(ARM_CC_AL,rd,rm)
473 #  define T1_NOT(rd,rm)                 T1_MVN(rd,rm)
474 #  define T2_NOT(rd,rm)                 T2_MVN(rd,rm)
475 #  define torrlw(o,rd,rn,lsb,wm1)       _torrlw(_jit,o,rd,rn,lsb,wm1)
476 static void _torrlw(jit_state_t*,int,int,int,int,int);
477 #  define corrlw(cc,o,rd,rn,lsb,wm1)    _corrlw(_jit,cc,o,rd,rn,lsb,wm1)
478 static void _corrlw(jit_state_t*,int,int,int,int,int,int);
479 #  define T2_SBFX(rd,rn,lsb,wm1)        torrlw(THUMB2_SBFX,rd,rn,lsb,wm1)
480 #  define CC_SBFX(cc,o,rd,rn,lsb,wm1)   corrlw(cc,o,rd,rn,lsb,wm1)
481 #  define SBFX(rd,rn,lsb,wm1)           CC_SBFX(ARM_CC_AL,ARM_SBFX,rd,rn,lsb,wm1)
482 #  define T2_UBFX(rd,rn,lsb,wm1)        torrlw(THUMB2_UBFX,rd,rn,lsb,wm1)
483 #  define CC_UBFX(cc,rd,rn,lsb,wm1)     corrlw(cc,o,rd,rn,lsb,wm1)
484 #  define UBFX(rd,rn,lsb,wm1)           CC_SBFX(ARM_CC_AL,ARM_UBFX,rd,rn,lsb,wm1)
485 #  define T2_BFI(rd,rn,lsb,wm1)         torrlw(THUMB2_BFI,rd,rn,lsb,wm1)
486 #  define CC_BFI(cc,rd,rn,lsb,wm1)      corrlw(cc,o,rd,rn,lsb,wm1)
487 #  define BFI(rd,rn,lsb,wm1)            CC_SBFX(ARM_CC_AL,ARM_BFI,rd,rn,lsb,wm1)
488 #  define T2_CLZ(rd,rm)                 torrr(THUMB2_CLZ,rm,rd,rm)
489 #  define CC_CLZ(cc,rd,rm)              corrrr(cc,ARM_CLZ,_R15_REGNO,rd,_R15_REGNO,rm)
490 #  define CLZ(rd,rm)                    CC_CLZ(ARM_CC_AL,rd,rm)
491 #  define T2_RBIT(rd,rm)                torrr(THUMB2_RBIT,rm,rd,rm)
492 #  define CC_RBIT(cc,rd,rm)             corrrr(cc,ARM_RBIT,_R15_REGNO,rd,_R15_REGNO,rm)
493 #  define RBIT(rd,rm)                   CC_RBIT(ARM_CC_AL,rd,rm)
494 #  define NOP()                         MOV(_R0_REGNO, _R0_REGNO)
495 #  define T1_NOP()                      is(0xbf00)
496 #  define CC_ADD(cc,rd,rn,rm)           corrr(cc,ARM_ADD,rn,rd,rm)
497 #  define ADD(rd,rn,rm)                 CC_ADD(ARM_CC_AL,rd,rn,rm)
498 #  define T1_ADD(rd,rn,rm)              is(THUMB_ADD|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd))
499 #  define T1_ADDX(rdn,rm)               is(THUMB_ADDX|((_u4(rdn)&8)<<4)|(_u4(rm)<<3)|(rdn&7))
500 #  define T2_ADD(rd,rn,rm)              torrr(THUMB2_ADD,rn,rd,rm)
501 #  define CC_ADDI(cc,rd,rn,im)          corri(cc,ARM_ADD|ARM_I,rn,rd,im)
502 #  define ADDI(rd,rn,im)                CC_ADDI(ARM_CC_AL,rd,rn,im)
503 #  define T1_ADDI3(rd,rn,im)            is(THUMB_ADDI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd))
504 #  define T1_ADDI8(rdn,im)              is(THUMB_ADDI8|(_u3(rdn)<<8)|_u8(im))
505 #  define T2_ADDI(rd,rn,im)             torri(THUMB2_ADDI,rn,rd,im)
506 #  define T2_ADDWI(rd,rn,im)            torri(THUMB2_ADDWI,rn,rd,im)
507 #  define CC_ADDS(cc,rd,rn,rm)          corrr(cc,ARM_ADD|ARM_S,rn,rd,rm)
508 #  define ADDS(rd,rn,rm)                CC_ADDS(ARM_CC_AL,rd,rn,rm)
509 #  define T2_ADDS(rd,rn,rm)             torrr(THUMB2_ADD|ARM_S,rn,rd,rm)
510 #  define ADDSI(rd,rn,im)               corri(ARM_CC_AL,ARM_ADD|ARM_S|ARM_I,rn,rd,im)
511 #  define T2_ADDSI(rd,rn,im)            torri(THUMB2_ADDI|ARM_S,rn,rd,im)
512 #  define CC_ADC(cc,rd,rn,rm)           corrr(cc,ARM_ADC,rn,rd,rm)
513 #  define ADC(rd,rn,rm)                 CC_ADC(ARM_CC_AL,rd,rn,rm)
514 #  define T1_ADC(rdn,rm)                is(THUMB_ADC|(_u3(rm)<<3)|_u3(rdn))
515 #  define T2_ADC(rd,rn,rm)              torrr(THUMB2_ADC,rn,rd,rm)
516 #  define CC_ADCI(cc,rd,rn,im)          corri(cc,ARM_ADC|ARM_I,rn,rd,im)
517 #  define ADCI(rd,rn,im)                CC_ADCI(ARM_CC_AL,rd,rn,im)
518 #  define T2_ADCI(rd,rn,im)             torri(THUMB2_ADCI,rn,rd,im)
519 #  define CC_ADCS(cc,rd,rn,rm)          corrr(cc,ARM_ADC|ARM_S,rn,rd,rm)
520 #  define ADCS(rd,rn,rm)                CC_ADCS(ARM_CC_AL,rd,rn,rm)
521 #  define T2_ADCS(rd,rn,rm)             torrr(THUMB2_ADC|ARM_S,rn,rd,rm)
522 #  define CC_ADCSI(cc,rd,rn,im)         corri(cc,ARM_ADC|ARM_S|ARM_I,rn,rd,im)
523 #  define ADCSI(rd,rn,im)               CC_ADCSI(ARM_CC_AL,rd,rn,im)
524 #  define T2_ADCSI(rd,rn,im)            torri(THUMB2_ADCI|ARM_S,rn,rd,im)
525 #  define CC_SUB(cc,rd,rn,rm)           corrr(cc,ARM_SUB,rn,rd,rm)
526 #  define SUB(rd,rn,rm)                 CC_SUB(ARM_CC_AL,rd,rn,rm)
527 #  define T1_SUB(rd,rn,rm)              is(THUMB_SUB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd))
528 #  define T2_SUB(rd,rn,rm)              torrr(THUMB2_SUB,rn,rd,rm)
529 #  define CC_SUBI(cc,rd,rn,im)          corri(cc,ARM_SUB|ARM_I,rn,rd,im)
530 #  define SUBI(rd,rn,im)                CC_SUBI(ARM_CC_AL,rd,rn,im)
531 #  define T1_SUBI3(rd,rn,im)            is(THUMB_SUBI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd))
532 #  define T1_SUBI8(rdn,im)              is(THUMB_SUBI8|(_u3(rdn)<<8)|_u8(im))
533 #  define T2_SUBI(rd,rn,im)             torri(THUMB2_SUBI,rn,rd,im)
534 #  define T2_SUBWI(rd,rn,im)            torri(THUMB2_SUBWI,rn,rd,im)
535 #  define CC_SUBS(cc,rd,rn,rm)          corrr(cc,ARM_SUB|ARM_S,rn,rd,rm)
536 #  define SUBS(rd,rn,rm)                CC_SUBS(ARM_CC_AL,rd,rn,rm)
537 #  define T2_SUBS(rd,rn,rm)             torrr(THUMB2_SUB|ARM_S,rn,rd,rm)
538 #  define CC_SUBSI(cc,rd,rn,im)         corri(cc,ARM_SUB|ARM_S|ARM_I,rn,rd,im)
539 #  define SUBSI(rd,rn,im)               CC_SUBSI(ARM_CC_AL,rd,rn,im)
540 #  define T2_SUBSI(rd,rn,im)            torri(THUMB2_SUBI|ARM_S,rn,rd,im)
541 #  define CC_SBC(cc,rd,rn,rm)           corrr(cc,ARM_SBC,rn,rd,rm)
542 #  define SBC(rd,rn,rm)                 CC_SBC(ARM_CC_AL,rd,rn,rm)
543 #  define T1_SBC(rdn,rm)                is(THUMB_SBC|(_u3(rm)<<3)|_u3(rdn))
544 #  define T2_SBC(rd,rn,rm)              torrr(THUMB2_SBC,rn,rd,rm)
545 #  define CC_SBCI(cc,rd,rn,im)          corri(cc,ARM_SBC|ARM_I,rn,rd,im)
546 #  define SBCI(rd,rn,im)                CC_SBCI(ARM_CC_AL,rd,rn,im)
547 #  define T2_SBCI(rd,rn,im)             torri(THUMB2_SBCI,rn,rd,im)
548 #  define CC_SBCS(cc,rd,rn,rm)          corrr(cc,ARM_SBC|ARM_S,rn,rd,rm)
549 #  define SBCS(rd,rn,rm)                CC_SBCS(ARM_CC_AL,rd,rn,rm)
550 #  define T2_SBCS(rd,rn,rm)             torrr(THUMB2_SBC|ARM_S,rn,rd,rm)
551 #  define CC_SBCSI(cc,rd,rn,im)         corri(cc,ARM_SBC|ARM_S|ARM_I,rn,rd,im)
552 #  define SBCSI(rd,rn,im)               CC_SBCSI(ARM_CC_AL,rd,rn,im)
553 #  define T2_SBCSI(rd,rn,im)            torri(THUMB2_SBCI|ARM_S,rn,rd,im)
554 #  define CC_RSB(cc,rd,rn,rm)           corrr(cc,ARM_RSB,rn,rd,rm)
555 #  define RSB(rd,rn,rm)                 CC_RSB(ARM_CC_AL,rd,rn,rm)
556 #  define T2_RSB(rd,rn,rm)              torrr(THUMB2_RSB,rn,rd,rm)
557 #  define CC_RSBI(cc,rd,rn,im)          corri(cc,ARM_RSB|ARM_I,rn,rd,im)
558 #  define RSBI(rd,rn,im)                CC_RSBI(ARM_CC_AL,rd,rn,im)
559 #  define T1_RSBI(rd,rn)                is(THUMB_RSBI|(_u3(rn)<<3)|_u3(rd))
560 #  define T2_RSBI(rd,rn,im)             torri(THUMB2_RSBI,rn,rd,im)
561 #  define CC_MUL(cc,rl,rn,rm)           corrrr(cc,ARM_MUL,rl,0,rm,rn)
562 #  define MUL(rl,rn,rm)                 CC_MUL(ARM_CC_AL,rl,rn,rm)
563 #  define T1_MUL(rdm,rn)                is(THUMB_MUL|(_u3(rn)<<3)|_u3(rdm))
564 #  define T2_MUL(rd,rn,rm)              torrr(THUMB2_MUL,rn,rd,rm)
565 #  define CC_SMULL(cc,rl,rh,rn,rm)      corrrr(cc,ARM_SMULL,rh,rl,rm,rn)
566 #  define SMULL(rl,rh,rn,rm)            CC_SMULL(ARM_CC_AL,rl,rh,rn,rm)
567 #  define T2_SMULL(rl,rh,rn,rm)         torrrr(THUMB2_SMULL,rn,rl,rh,rm)
568 #  define CC_UMULL(cc,rl,rh,rn,rm)      corrrr(cc,ARM_UMULL,rh,rl,rm,rn)
569 #  define UMULL(rl,rh,rn,rm)            CC_UMULL(ARM_CC_AL,rl,rh,rn,rm)
570 #  define T2_UMULL(rl,rh,rn,rm)         torrrr(THUMB2_UMULL,rn,rl,rh,rm)
571 #  define CC_SDIV(cc,rd,rn,rm)          corrrr(cc,ARM_SDIV,rd,15,rn,rm)
572 #  define SDIV(rd,rn,rm)                CC_SDIV(ARM_CC_AL,rd,rm,rn)
573 #  define CC_UDIV(cc,rd,rn,rm)          corrrr(cc,ARM_UDIV,rd,15,rn,rm)
574 #  define UDIV(rd,rn,rm)                CC_UDIV(ARM_CC_AL,rd,rm,rn)
575 #  define T2_SDIV(rd,rn,rm)             torrr(THUMB2_SDIV,rn,rd,rm)
576 #  define T2_UDIV(rd,rn,rm)             torrr(THUMB2_UDIV,rn,rd,rm)
577 #  define CC_AND(cc,rd,rn,rm)           corrr(cc,ARM_AND,rn,rd,rm)
578 #  define AND(rd,rn,rm)                 CC_AND(ARM_CC_AL,rd,rn,rm)
579 #  define T1_AND(rdn,rm)                is(THUMB_AND|(_u3(rm)<<3)|_u3(rdn))
580 #  define T2_AND(rd,rn,rm)              torrr(THUMB2_AND,rn,rd,rm)
581 #  define CC_ANDI(cc,rd,rn,im)          corri(cc,ARM_AND|ARM_I,rn,rd,im)
582 #  define ANDI(rd,rn,im)                CC_ANDI(ARM_CC_AL,rd,rn,im)
583 #  define T2_ANDI(rd,rn,im)             torri(THUMB2_ANDI,rn,rd,im)
584 #  define CC_ANDS(cc,rd,rn,rm)          corrr(cc,ARM_AND|ARM_S,rn,rd,rm)
585 #  define ANDS(rd,rn,rm)                CC_ANDS(ARM_CC_AL,rd,rn,rm)
586 #  define T2_ANDS(rd,rn,rm)             torrr(THUMB2_AND|ARM_S,rn,rd,rm)
587 #  define CC_ANDSI(cc,rd,rn,im)         corri(cc,ARM_AND|ARM_S|ARM_I,rn,rd,im)
588 #  define ANDSI(rd,rn,im)               CC_ANDSI(ARM_CC_AL,rd,rn,im)
589 #  define T2_ANDSI(rd,rn,im)            torri(ARM_CC_AL,THUMB2_ANDI|ARM_S,rn,rd,im)
590 #  define CC_BIC(cc,rd,rn,rm)           corrr(cc,ARM_BIC,rn,rd,rm)
591 #  define BIC(rd,rn,rm)                 CC_BIC(ARM_CC_AL,rd,rn,rm)
592 #  define T2_BIC(rd,rn,rm)              torrr(THUMB2_BIC,rn,rd,rm)
593 #  define CC_BICI(cc,rd,rn,im)          corri(cc,ARM_BIC|ARM_I,rn,rd,im)
594 #  define BICI(rd,rn,im)                CC_BICI(ARM_CC_AL,rd,rn,im)
595 #  define T2_BICI(rd,rn,im)             torri(THUMB2_BICI,rn,rd,im)
596 #  define CC_BICS(cc,rd,rn,rm)          corrr(cc,ARM_BIC|ARM_S,rn,rd,rm)
597 #  define BICS(rd,rn,rm)                CC_BICS(ARM_CC_AL,rd,rn,rm)
598 #  define T2_BICS(rd,rn,rm)             torrr(THUMB2_BIC|ARM_S,rn,rd,rm)
599 #  define CC_BICSI(cc,rd,rn,im)         corri(cc,ARM_BIC|ARM_S|ARM_I,rn,rd,im)
600 #  define BICSI(rd,rn,im)               CC_BICSI(ARM_CC_AL,rd,rn,im)
601 #  define T2_BICSI(rd,rn,im)            torri(ARM_CC_AL,THUMB2_BICI|ARM_S,rn,rd,im)
602 #  define CC_ORR(cc,rd,rn,rm)           corrr(cc,ARM_ORR,rn,rd,rm)
603 #  define ORR(rd,rn,rm)                 CC_ORR(ARM_CC_AL,rd,rn,rm)
604 #  define T1_ORR(rdn,rm)                is(THUMB_ORR|(_u3(rm)<<3)|_u3(rdn))
605 #  define T2_ORR(rd,rn,rm)              torrr(THUMB2_ORR,rn,rd,rm)
606 #  define CC_ORR_SI(cc,rd,rn,rt,sh,im)  corrrs(cc,ARM_ORR|sh,rn,rd,rm,im)
607 #  define ORR_SI(r0,r1,r2,sh,im)        CC_ORR_SI(ARM_CC_AL,r0,r1,r2,sh,im)
608 #  define CC_ORRI(cc,rd,rn,im)          corri(cc,ARM_ORR|ARM_I,rn,rd,im)
609 #  define ORRI(rd,rn,im)                CC_ORRI(ARM_CC_AL,rd,rn,im)
610 #  define T2_ORRI(rd,rn,im)             torri(THUMB2_ORRI,rn,rd,im)
611 #  define CC_EOR(cc,rd,rn,rm)           corrr(cc,ARM_EOR,rn,rd,rm)
612 #  define EOR(rd,rn,rm)                 CC_EOR(ARM_CC_AL,rd,rn,rm)
613 #  define T1_EOR(rdn,rm)                is(THUMB_EOR|(_u3(rm)<<3)|_u3(rdn))
614 #  define T2_EOR(rd,rn,rm)              torrr(THUMB2_EOR,rn,rd,rm)
615 #  define CC_EOR_SI(cc,rd,rn,rm,sh,im)  corrrs(cc,ARM_EOR|sh,rn,rd,rm,im)
616 #  define EOR_SI(r0,r1,r2,sh,im)        CC_EOR_SI(ARM_CC_AL,r0,r1,r2,sh,im)
617 #  define CC_EORI(cc,rd,rn,im)          corri(cc,ARM_EOR|ARM_I,rn,rd,im)
618 #  define EORI(rd,rn,im)                CC_EORI(ARM_CC_AL,rd,rn,im)
619 #  define T2_EORI(rd,rn,im)             torri(THUMB2_EORI,rn,rd,im)
620 #  define CC_REV(cc,rd,rm)              c6orr(cc,ARM_REV,rd,rm)
621 #  define REV(rd,rm)                    CC_REV(ARM_CC_AL,rd,rm)
622 #  define T1_REV(rd,rm)                 is(THUMB_REV|(_u3(rm)<<3)|_u3(rd))
623 #  define T2_REV(rd,rm)                 torrr(THUMB2_REV,rm,rd,rm)
624 #  define CC_REV16(cc,rd,rm)            c6orr(cc,ARM_REV16,rd,rm)
625 #  define REV16(rd,rm)                  CC_REV16(ARM_CC_AL,rd,rm)
626 #  define T1_REV16(rd,rm)               is(THUMB_REV16|(_u3(rm)<<3)|_u3(rd))
627 #  define T2_REV16(rd,rm)               torrr(THUMB2_REV16,rm,rd,rm)
628 #  define CC_SXTB(cc,rd,rm)             c6orr(cc,ARM_SXTB,rd,rm)
629 #  define SXTB(rd,rm)                   CC_SXTB(ARM_CC_AL,rd,rm)
630 #  define T1_SXTB(rd,rm)                is(THUMB_SXTB|(_u3(rm)<<3)|_u3(rd))
631 #  define T2_SXTB(rd,rm)                torrr(THUMB2_SXTB,_R15_REGNO,rd,rm)
632 #  define CC_UXTB(cc,rd,rm)             c6orr(cc,ARM_UXTB,rd,rm)
633 #  define UXTB(rd,rm)                   CC_UXTB(ARM_CC_AL,rd,rm)
634 #  define T1_UXTB(rd,rm)                is(THUMB_UXTB|(_u3(rm)<<3)|_u3(rd))
635 #  define T2_UXTB(rd,rm)                torrr(THUMB2_UXTB,_R15_REGNO,rd,rm)
636 #  define CC_SXTH(cc,rd,rm)             c6orr(cc,ARM_SXTH,rd,rm)
637 #  define SXTH(rd,rm)                   CC_SXTH(ARM_CC_AL,rd,rm)
638 #  define T1_SXTH(rd,rm)                is(THUMB_SXTH|(_u3(rm)<<3)|_u3(rd))
639 #  define T2_SXTH(rd,rm)                torrr(THUMB2_SXTH,_R15_REGNO,rd,rm)
640 #  define CC_UXTH(cc,rd,rm)             c6orr(cc,ARM_UXTH,rd,rm)
641 #  define UXTH(rd,rm)                   CC_UXTH(ARM_CC_AL,rd,rm)
642 #  define T1_UXTH(rd,rm)                is(THUMB_UXTH|(_u3(rm)<<3)|_u3(rd))
643 #  define T2_UXTH(rd,rm)                torrr(THUMB2_UXTH,_R15_REGNO,rd,rm)
644 #  define CC_SHIFT(cc,o,rd,rm,rn,im)    cshift(cc,o,rd,rm,rn,im)
645 #  define CC_LSL(cc,rd,rn,rm)           CC_SHIFT(cc,ARM_LSL|ARM_R,rd,rm,rn,0)
646 #  define LSL(rd,rn,rm)                 CC_LSL(ARM_CC_AL,rd,rn,rm)
647 #  define T1_LSL(rdn,rm)                is(THUMB_LSL|(_u3(rm)<<3)|_u3(rdn))
648 #  define T2_LSL(rd,rn,rm)              torrr(THUMB2_LSL,rn,rd,rm)
649 #  define CC_LSLI(cc,rd,rn,im)          CC_SHIFT(cc,ARM_LSL,rd,0,rn,im)
650 #  define LSLI(rd,rn,im)                CC_LSLI(ARM_CC_AL,rd,rn,im)
651 #  define T1_LSLI(rd,rm,im)             is(THUMB_LSLI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
652 #  define T2_LSLI(rd,rm,im)             tshift(THUMB2_LSLI,rd,rm,im)
653 #  define CC_LSR(cc,rd,rn,rm)           CC_SHIFT(cc,ARM_LSR|ARM_R,rd,rm,rn,0)
654 #  define LSR(rd,rn,rm)                 CC_LSR(ARM_CC_AL,rd,rn,rm)
655 #  define T1_LSR(rdn,rm)                is(THUMB_LSR|(_u3(rm)<<3)|_u3(rdn))
656 #  define T2_LSR(rd,rn,rm)              torrr(THUMB2_LSR,rn,rd,rm)
657 #  define CC_LSRI(cc,rd,rn,im)          CC_SHIFT(cc,ARM_LSR,rd,0,rn,im)
658 #  define LSRI(rd,rn,im)                CC_LSRI(ARM_CC_AL,rd,rn,im)
659 #  define T1_LSRI(rd,rm,im)             is(THUMB_LSRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
660 #  define T2_LSRI(rd,rm,im)             tshift(THUMB2_LSRI,rd,rm,im)
661 #  define CC_ASR(cc,rd,rn,rm)           CC_SHIFT(cc,ARM_ASR|ARM_R,rd,rm,rn,0)
662 #  define ASR(rd,rn,rm)                 CC_ASR(ARM_CC_AL,rd,rn,rm)
663 #  define T1_ASR(rdn,rm)                is(THUMB_ASR|(_u3(rm)<<3)|_u3(rdn))
664 #  define T2_ASR(rd,rn,rm)              torrr(THUMB2_ASR,rn,rd,rm)
665 #  define CC_ASRI(cc,rd,rn,im)          CC_SHIFT(cc,ARM_ASR,rd,0,rn,im)
666 #  define ASRI(rd,rn,im)                CC_ASRI(ARM_CC_AL,rd,rn,im)
667 #  define T1_ASRI(rd,rm,im)             is(THUMB_ASRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
668 #  define T2_ASRI(rd,rm,im)             tshift(THUMB2_ASRI,rd,rm,im)
669 #  define CC_ROR(cc,rd,rn,rm)           CC_SHIFT(cc,ARM_ROR|ARM_R,rd,rm,rn,0)
670 #  define ROR(rd,rn,rm)                 CC_ROR(ARM_CC_AL,rd,rn,rm)
671 #  define T1_ROR(rdn,rm)                is(THUMB_ROR|(_u3(rm)<<3)|_u3(rdn))
672 #  define T2_ROR(rd,rn,rm)              torrr(THUMB2_ROR,rn,rd,rm)
673 #  define CC_RORI(cc,rd,rn,im)          CC_SHIFT(cc,ARM_ROR,rd,0,rn,im)
674 #  define RORI(rd,rn,im)                CC_RORI(ARM_CC_AL,rd,rn,im)
675 #  define T2_RORI(rd,rm,im)             tshift(THUMB2_RORI,rd,rm,im)
676 #  define CC_CMP(cc,rn,rm)              corrr(cc,ARM_CMP,rn,0,rm)
677 #  define CMP(rn,rm)                    CC_CMP(ARM_CC_AL,rn,rm)
678 #  define T1_CMP(rn,rm)                 is(THUMB_CMP|(_u3(rm)<<3)|_u3(rn))
679 #  define T1_CMPX(rn,rm)                is(THUMB_CMPX|((_u4(rn)&8)<<4)|(_u4(rm)<<3)|(rn&7))
680 #  define T2_CMP(rn,rm)                 torrr(THUMB2_CMP,rn,_R15_REGNO,rm)
681 #  define CC_CMPI(cc,rn,im)             corri(cc,ARM_CMP|ARM_I,rn,0,im)
682 #  define CMPI(rn,im)                   CC_CMPI(ARM_CC_AL,rn,im)
683 #  define T1_CMPI(rn,im)                is(THUMB_CMPI|(_u3(rn)<<8)|_u8(im))
684 #  define T2_CMPI(rn,im)                torri(THUMB2_CMPI,rn,_R15_REGNO,im)
685 #  define CC_CMN(cc,rn,rm)              corrr(cc,ARM_CMN,rn,0,rm)
686 #  define CMN(rn,rm)                    CC_CMN(ARM_CC_AL,rn,rm)
687 #  define T1_CMN(rn,rm)                 is(THUMB_CMN|(_u3(rm)<<3)|_u3(rm))
688 #  define T2_CMN(rn,rm)                 torrr(THUMB2_CMN,rn,_R15_REGNO,rm)
689 #  define CC_CMNI(cc,rn,im)             corri(cc,ARM_CMN|ARM_I,rn,0,im)
690 #  define CMNI(rn,im)                   CC_CMNI(ARM_CC_AL,rn,im)
691 #  define T2_CMNI(rn,im)                torri(THUMB2_CMNI,rn,_R15_REGNO,im)
692 #  define CC_TST(cc,rn,rm)              corrr(cc,ARM_TST,rn,r0,rm)
693 #  define TST(rn,rm)                    corrr(ARM_CC_AL,ARM_TST,rn,0,rm)
694 #  define T1_TST(rn,rm)                 is(THUMB_TST|(_u3(rm)<<3)|_u3(rn))
695 #  define T2_TST(rn,rm)                 torrr(THUMB2_TST,rn,_R15_REGNO,rm)
696 #  define CC_TSTI(cc,rn,im)             corri(cc,ARM_TST|ARM_I,rn,0,im)
697 #  define TSTI(rn,im)                   CC_TSTI(ARM_CC_AL,rn,im)
698 #  define T2_TSTI(rn,im)                torri(THUMB2_TSTI,rn,_R15_REGNO,im)
699 #  define CC_TEQ(cc,rn,rm)              corrr(cc,ARM_TEQ,rn,0,rm)
700 #  define TEQ(rn,rm)                    CC_TEQ(ARM_CC_AL,rn,rm)
701 #  define CC_TEQI(cc,rm,im)             corri(cc,ARM_TEQ|ARM_I,rn,0,im)
702 #  define TEQI(rn,im)                   CC_TEQI(ARM_CC_AL,rn,im)
703 #  define CC_BX(cc,rm)                  cbx(cc,ARM_BX,rm)
704 #  define BX(rm)                        CC_BX(ARM_CC_AL,rm)
705 #  define T1_BX(rm)                     is(0x4700|(_u4(rm)<<3))
706 #  define CC_BLX(cc,rm)                 cbx(cc,ARM_BLX,rm)
707 #  define BLX(rm)                       CC_BLX(ARM_CC_AL,rm)
708 #  define T1_BLX(rm)                    is(THUMB_BLX|(_u4(rm)<<3))
709 #  define BLXI(im)                      blxi(im)
710 #  define T2_BLXI(im)                   tb(THUMB2_BLXI,im)
711 #  define CC_B(cc,im)                   cb(cc,ARM_B,im)
712 #  define B(im)                         CC_B(ARM_CC_AL,im)
713 #  define T1_CC_B(cc,im)                tc8(cc,im)
714 #  define T1_B(im)                      t11(im)
715 #  define T2_CC_B(cc,im)                tcb(cc,im)
716 #  define T2_B(im)                      tb(THUMB2_B,im)
717 #  define CC_BLI(cc,im)                 cb(cc,ARM_BLI,im)
718 #  define BLI(im)                       CC_BLI(ARM_CC_AL,im)
719 #  define T2_BLI(im)                    tb(THUMB2_BLI,im)
720 #  define CC_LDRSB(cc,rt,rn,rm)         corrr(cc,ARM_LDRSB|ARM_P,rn,rt,rm)
721 #  define LDRSB(rt,rn,rm)               CC_LDRSB(ARM_CC_AL,rt,rn,rm)
722 #  define T1_LDRSB(rt,rn,rm)            is(THUMB_LDRSB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
723 #  define T2_LDRSB(rt,rn,rm)            torxr(THUMB2_LDRSB,rn,rt,rm)
724 #  define CC_LDRSBN(cc,rt,rn,rm)        corrr(cc,ARM_LDRSB,rn,rt,rm)
725 #  define LDRSBN(rt,rn,rm)              CC_LDRSBN(ARM_CC_AL,rt,rn,rm)
726 #  define CC_LDRSBI(cc,rt,rn,im)        corri8(cc,ARM_LDRSBI|ARM_P,rn,rt,im)
727 #  define LDRSBI(rt,rn,im)              CC_LDRSBI(ARM_CC_AL,rt,rn,im)
728 #  define T2_LDRSBI(rt,rn,im)           torri8(THUMB2_LDRSBI|THUMB2_U,rn,rt,im)
729 #  define T2_LDRSBWI(rt,rn,im)          torri12(THUMB2_LDRSBWI,rn,rt,im)
730 #  define CC_LDRSBIN(cc,rt,rn,im)       corri8(cc,ARM_LDRSBI,rn,rt,im)
731 #  define LDRSBIN(rt,rn,im)             CC_LDRSBIN(ARM_CC_AL,rt,rn,im)
732 #  define T2_LDRSBIN(rt,rn,im)          torri8(THUMB2_LDRSBI,rn,rt,im)
733 #  define CC_LDRB(cc,rt,rn,rm)          corrr(cc,ARM_LDRB|ARM_P,rn,rt,rm)
734 #  define LDRB(rt,rn,rm)                CC_LDRB(ARM_CC_AL,rt,rn,rm)
735 #  define T1_LDRB(rt,rn,rm)             is(THUMB_LDRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
736 #  define T2_LDRB(rt,rn,rm)             torxr(THUMB2_LDRB,rn,rt,rm)
737 #  define CC_LDRBN(cc,rt,rn,rm)         corrr(cc,ARM_LDRB,rn,rt,rm)
738 #  define LDRBN(rt,rn,rm)               CC_LDRBN(ARM_CC_AL,rt,rn,rm)
739 #  define CC_LDRBI(cc,rt,rn,im)         corri(cc,ARM_LDRBI|ARM_P,rn,rt,im)
740 #  define LDRBI(rt,rn,im)               CC_LDRBI(ARM_CC_AL,rt,rn,im)
741 #  define T1_LDRBI(rt,rn,im)            is(THUMB_LDRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
742 #  define T2_LDRBI(rt,rn,im)            torri8(THUMB2_LDRBI|THUMB2_U,rn,rt,im)
743 #  define T2_LDRBWI(rt,rn,im)           torri12(THUMB2_LDRBWI,rn,rt,im)
744 #  define CC_LDRBIN(cc,rt,rn,im)        corri(cc,ARM_LDRBI,rn,rt,im)
745 #  define LDRBIN(rt,rn,im)              CC_LDRBIN(ARM_CC_AL,rt,rn,im)
746 #  define T2_LDRBIN(rt,rn,im)           torri8(THUMB2_LDRBI,rn,rt,im)
747 #  define CC_LDRSH(cc,rt,rn,rm)         corrr(cc,ARM_LDRSH|ARM_P,rn,rt,rm)
748 #  define LDRSH(rt,rn,rm)               CC_LDRSH(ARM_CC_AL,rt,rn,rm)
749 #  define T1_LDRSH(rt,rn,rm)            is(THUMB_LDRSH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
750 #  define T2_LDRSH(rt,rn,rm)            torxr(THUMB2_LDRSH,rn,rt,rm)
751 #  define CC_LDRSHN(cc,rt,rn,rm)        corrr(cc,ARM_LDRSH,rn,rt,rm)
752 #  define LDRSHN(rt,rn,rm)              CC_LDRSHN(ARM_CC_AL,rt,rn,rm)
753 #  define CC_LDRSHI(cc,rt,rn,im)        corri8(cc,ARM_LDRSHI|ARM_P,rn,rt,im)
754 #  define LDRSHI(rt,rn,im)              CC_LDRSHI(ARM_CC_AL,rt,rn,im)
755 #  define T2_LDRSHI(rt,rn,im)           torri8(THUMB2_LDRSHI|THUMB2_U,rn,rt,im)
756 #  define T2_LDRSHWI(rt,rn,im)          torri12(THUMB2_LDRSHWI,rn,rt,im)
757 #  define CC_LDRSHIN(cc,rt,rn,im)       corri8(cc,ARM_LDRSHI,rn,rt,im)
758 #  define LDRSHIN(rt,rn,im)             CC_LDRSHIN(ARM_CC_AL,rt,rn,im)
759 #  define T2_LDRSHIN(rt,rn,im)          torri8(THUMB2_LDRSHI,rn,rt,im)
760 #  define CC_LDRH(cc,rt,rn,rm)          corrr(cc,ARM_LDRH|ARM_P,rn,rt,rm)
761 #  define LDRH(rt,rn,rm)                CC_LDRH(ARM_CC_AL,rt,rn,rm)
762 #  define T1_LDRH(rt,rn,rm)             is(THUMB_LDRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
763 #  define T2_LDRH(rt,rn,rm)             torxr(THUMB2_LDRH,rn,rt,rm)
764 #  define CC_LDRHN(cc,rt,rn,rm)         corrr(cc,ARM_LDRH,rn,rt,rm)
765 #  define LDRHN(rt,rn,rm)               CC_LDRHN(ARM_CC_AL,rt,rn,rm)
766 #  define CC_LDRHI(cc,rt,rn,im)         corri8(cc,ARM_LDRHI|ARM_P,rn,rt,im)
767 #  define LDRHI(rt,rn,im)               CC_LDRHI(ARM_CC_AL,rt,rn,im)
768 #  define T1_LDRHI(rt,rn,im)            is(THUMB_LDRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
769 #  define T2_LDRHI(rt,rn,im)            torri8(THUMB2_LDRHI|THUMB2_U,rn,rt,im)
770 #  define T2_LDRHWI(rt,rn,im)           torri12(THUMB2_LDRHWI,rn,rt,im)
771 #  define CC_LDRHIN(cc,rt,rn,im)        corri8(cc,ARM_LDRHI,rn,rt,im)
772 #  define LDRHIN(rt,rn,im)              CC_LDRHIN(ARM_CC_AL,rt,rn,im)
773 #  define T2_LDRHIN(rt,rn,im)           torri8(THUMB2_LDRHI,rn,rt,im)
774 #  define CC_LDR(cc,rt,rn,rm)           corrr(cc,ARM_LDR|ARM_P,rn,rt,rm)
775 #  define LDR(rt,rn,rm)                 CC_LDR(ARM_CC_AL,rt,rn,rm)
776 #  define T1_LDR(rt,rn,rm)              is(THUMB_LDR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
777 #  define T2_LDR(rt,rn,rm)              torxr(THUMB2_LDR,rn,rt,rm)
778 #  define CC_LDRN(cc,rt,rn,rm)          corrr(cc,ARM_LDR,rn,rt,rm)
779 #  define LDRN(rt,rn,rm)                CC_LDRN(ARM_CC_AL,rt,rn,rm)
780 #  define CC_LDRI(cc,rt,rn,im)          corri(cc,ARM_LDRI|ARM_P,rn,rt,im)
781 #  define LDRI(rt,rn,im)                CC_LDRI(ARM_CC_AL,rt,rn,im)
782 #  define T1_LDRI(rt,rn,im)             is(THUMB_LDRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
783 #  define T1_LDRISP(rt,im)              is(THUMB_LDRISP|(_u3(rt)<<8)|_u8(im))
784 #  define T2_LDRI(rt,rn,im)             torri8(THUMB2_LDRI|THUMB2_U,rn,rt,im)
785 #  define T2_LDRWI(rt,rn,im)            torri12(THUMB2_LDRWI,rn,rt,im)
786 #  define CC_LDRIN(cc,rt,rn,im)         corri(cc,ARM_LDRI,rn,rt,im)
787 #  define LDRIN(rt,rn,im)               CC_LDRIN(ARM_CC_AL,rt,rn,im)
788 #  define T2_LDRIN(rt,rn,im)            torri8(THUMB2_LDRI,rn,rt,im)
789 #  define CC_LDRD(cc,rt,rn,rm)          corrr(cc,ARM_LDRD|ARM_P,rn,rt,rm)
790 #  define LDRD(rt,rn,rm)                CC_LDRD(ARM_CC_AL,rt,rn,rm)
791 #  define T2_LDRDI(rt,rt2,rn,im)        torrri8(THUMB2_LDRDI|ARM_P,rn,rt,rt2,im)
792 #  define CC_LDRDN(cc,rt,rn,rm)         corrr(cc,ARM_LDRD,rn,rt,rm)
793 #  define LDRDN(rd,rn,rm)               CC_LDRDN(ARM_CC_AL,rt,rn,rm)
794 #  define CC_LDRDI(cc,rt,rn,im)         corri8(cc,ARM_LDRDI|ARM_P,rn,rt,im)
795 #  define LDRDI(rt,rn,im)               CC_LDRDI(ARM_CC_AL,rt,rn,im)
796 #  define CC_LDRDIN(cc,rt,rn,im)        corri8(cc,ARM_LDRDI,rn,rt,im)
797 #  define LDRDIN(rt,rn,im)              CC_LDRDIN(ARM_CC_AL,rt,rn,im)
798 #  define T2_LDRDIN(rt,rt2,rn,im)       torrri8(THUMB2_LDRDI,rn,rt,rt2,im)
799 #  define CC_LDREX(cc,rt,rn)            corrrr(cc,ARM_LDREX,rn,rt,0xf,0xf)
800 #  define LDREX(rt,rn)                  CC_LDREX(ARM_CC_AL,rt,rn)
801 #  define T2_LDREX(rt,rn,im)            torrri8(THUMB2_LDREX,rn,rt,0xf,im)
802 #  define CC_STRB(cc,rt,rn,rm)          corrr(cc,ARM_STRB|ARM_P,rn,rt,rm)
803 #  define STRB(rt,rn,rm)                CC_STRB(ARM_CC_AL,rt,rn,rm)
804 #  define T1_STRB(rt,rn,rm)             is(THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
805 #  define T2_STRB(rt,rn,rm)             torxr(THUMB2_STRB,rn,rt,rm)
806 #  define CC_STRBN(cc,rt,rn,rm)         corrr(cc,ARM_STRB,rn,rt,rm)
807 #  define STRBN(rt,rn,rm)               CC_STRBN(ARM_CC_AL,rt,rn,rm)
808 #  define CC_STRBI(cc,rt,rn,im)         corri(cc,ARM_STRBI|ARM_P,rn,rt,im)
809 #  define STRBI(rt,rn,im)               CC_STRBI(ARM_CC_AL,rt,rn,im)
810 #  define T1_STRBI(rt,rn,im)            is(THUMB_STRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
811 #  define T2_STRBI(rt,rn,im)            torri8(THUMB2_STRBI|THUMB2_U,rn,rt,im)
812 #  define T2_STRBWI(rt,rn,im)           torri12(THUMB2_STRBWI,rn,rt,im)
813 #  define CC_STRBIN(cc,rt,rn,im)        corri(cc,ARM_STRBI,rn,rt,im)
814 #  define STRBIN(rt,rn,im)              CC_STRBIN(ARM_CC_AL,rt,rn,im)
815 #  define T2_STRBIN(rt,rn,im)           torri8(THUMB2_STRBI,rn,rt,im)
816 #  define CC_STRH(cc,rt,rn,rm)          corrr(cc,ARM_STRH|ARM_P,rn,rt,rm)
817 #  define STRH(rt,rn,rm)                CC_STRH(ARM_CC_AL,rt,rn,rm)
818 #  define T1_STRH(rt,rn,rm)             is(THUMB_STRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
819 #  define T2_STRH(rt,rn,rm)             torxr(THUMB2_STRH,rn,rt,rm)
820 #  define CC_STRHN(cc,rt,rn,rm)         corrr(cc,ARM_STRH,rn,rt,rm)
821 #  define STRHN(rt,rn,rm)               CC_STRHN(ARM_CC_AL,rt,rn,rm)
822 #  define CC_STRHI(cc,rt,rn,im)         corri8(cc,ARM_STRHI|ARM_P,rn,rt,im)
823 #  define STRHI(rt,rn,im)               CC_STRHI(ARM_CC_AL,rt,rn,im)
824 #  define T1_STRHI(rt,rn,im)            is(THUMB_STRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
825 #  define T2_STRHI(rt,rn,im)            torri8(THUMB2_STRHI|THUMB2_U,rn,rt,im)
826 #  define T2_STRHWI(rt,rn,im)           torri12(THUMB2_STRHWI,rn,rt,im)
827 #  define CC_STRHIN(cc,rt,rn,im)        corri8(cc,ARM_STRHI,rn,rt,im)
828 #  define STRHIN(rt,rn,im)              CC_STRHIN(ARM_CC_AL,rt,rn,im)
829 #  define T2_STRHIN(rt,rn,im)           torri8(THUMB2_STRHI,rn,rt,im)
830 #  define CC_STR(cc,rt,rn,rm)           corrr(cc,ARM_STR|ARM_P,rn,rt,rm)
831 #  define STR(rt,rn,rm)                 CC_STR(ARM_CC_AL,rt,rn,rm)
832 #  define T1_STR(rt,rn,rm)              is(THUMB_STR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
833 #  define T2_STR(rt,rn,rm)              torxr(THUMB2_STR,rn,rt,rm)
834 #  define CC_STRN(cc,rt,rn,rm)          corrr(cc,ARM_STR,rn,rt,rm)
835 #  define STRN(rt,rn,rm)                CC_STRN(ARM_CC_AL,rt,rn,rm)
836 #  define CC_STRI(cc,rt,rn,im)          corri(cc,ARM_STRI|ARM_P,rn,rt,im)
837 #  define STRI(rt,rn,im)                CC_STRI(ARM_CC_AL,rt,rn,im)
838 #  define T1_STRI(rt,rn,im)             is(THUMB_STRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
839 #  define T1_STRISP(rt,im)              is(THUMB_STRISP|(_u3(rt)<<8)|(_u8(im)))
840 #  define T2_STRI(rt,rn,im)             torri8(THUMB2_STRI|THUMB2_U,rn,rt,im)
841 #  define T2_STRWI(rt,rn,im)            torri12(THUMB2_STRWI,rn,rt,im)
842 #  define CC_STRIN(cc,rt,rn,im)         corri(cc,ARM_STRI,rn,rt,im)
843 #  define STRIN(rt,rn,im)               CC_STRIN(ARM_CC_AL,rt,rn,im)
844 #  define T2_STRIN(rt,rn,im)            torri8(THUMB2_STRI,rn,rt,im)
845 #  define CC_STRD(cc,rt,rn,rm)          corrr(cc,ARM_STRD|ARM_P,rn,rt,rm)
846 #  define STRD(rt,rn,rm)                CC_STRD(ARM_CC_AL,rt,rn,rm)
847 #  define CC_STRDN(cc,rt,rn,rm)         corrr(cc,ARM_STRD,rn,rt,rm)
848 #  define STRDN(rt,rn,rm)               CC_STRDN(ARM_CC_AL,rt,rn,rm)
849 #  define CC_STRDI(cc,rt,rn,im)         corri8(cc,ARM_STRDI|ARM_P,rn,rt,im)
850 #  define STRDI(rt,rn,im)               CC_STRDI(ARM_CC_AL,rt,rn,im)
851 #  define T2_STRDI(rt,rt2,rn,im)        torrri8(THUMB2_STRDI|ARM_P,rn,rt,rt2,im)
852 #  define CC_STRDIN(cc,rt,rn,im)        corri8(cc,ARM_STRDI,rn,rt,im)
853 #  define STRDIN(rt,rn,im)              CC_STRDIN(ARM_CC_AL,rt,rn,im)
854 #  define T2_STRDIN(rt,rt2,rn,im)       torrri8(THUMB2_STRDI,rn,rt,rt2,im)
855 #  define CC_STREX(cc,rd,rt,rn)         corrrr(cc,ARM_STREX,rn,rd,0xf,rt)
856 #  define STREX(rd,rt,rn)               CC_STREX(ARM_CC_AL,rd,rt,rn)
857 #  define T2_STREX(rd,rt,rn,im)         torrri8(THUMB2_STREX,rn,rt,rd,im)
858 #  define CC_LDMIA(cc,rn,im)            corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im)
859 #  define LDMIA(rn,im)                  CC_LDMIA(ARM_CC_AL,rn,im)
860 #  define CC_LDM(cc,rn,im)              CC_LDMIA(cc,rn,im)
861 #  define LDM(rn,im)                    LDMIA(rn,im)
862 #  define T1_LDMIA(rn,im)               is(THUMB_LDMIA|(_u3(rn)<<8)|im)
863 #  define T2_LDMIA(rn,im)               torl(THUMB2_LDMIA,rn,im)
864 #  define CC_LDMIA_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_U,rn,im)
865 #  define LDMIA_U(rn,im)                CC_LDMIA_U(ARM_CC_AL,rn,im)
866 #  define LDM_U(r0,i0)                  LDMIA_U(r0,i0)
867 #  define CC_LDMIB(cc,rn,im)            corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B,rn,im)
868 #  define LDMIB(rn,im)                  CC_LDMIB(ARM_CC_AL,rn,im)
869 #  define CC_LDMIB_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B|ARM_M_U,rn,im)
870 #  define LDMIB_U(rn,im)                CC_LDMIB_U(ARM_CC_AL,rn,im)
871 #  define CC_LDMDA(cc,rn,im)            corl(cc,ARM_M|ARM_M_L,rn,im)
872 #  define LDMDA(rn,im)                  CC_LDMDA(ARM_CC_AL,rn,im)
873 #  define CC_LDMDA_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_L|ARM_M_U,rn,im)
874 #  define LDMDA_U(rn,im)                CC_LDMDA_U(ARM_CC_AL,rn,im)
875 #  define CC_LDMDB(cc,rn,im)            corl(cc,ARM_M|ARM_M_L|ARM_M_B,rn,im)
876 #  define LDMDB(rn,im)                  CC_LDMDB(ARM_CC_AL,rn,im)
877 #  define T2_LDMDB(rn,im)               torl(THUMB2_LDMDB,rn,im)
878 #  define CC_LDMDB_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_L|ARM_M_B|ARM_M_U,rn,im)
879 #  define LDMDB_U(rn,im)                CC_LDMDB_U(ARM_CC_AL,rn,im)
880 #  define CC_STMIA(cc,rn,im)            corl(cc,ARM_M|ARM_M_I,rn,im)
881 #  define STMIA(rn,im)                  CC_STMIA(ARM_CC_AL,rn,im)
882 #  define CC_STM(cc,rn,im)              CC_STMIA(cc,rn,im)
883 #  define STM(rn,im)                    STMIA(rn,im)
884 #  define CC_STMIA_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_I|ARM_M_U,rn,im)
885 #  define STMIA_U(rn,im)                CC_STMIA_U(ARM_CC_AL,rn,im)
886 #  define CC_STM_U(cc,rn,im)            CC_STMIA_U(cc,rn,im)
887 #  define STM_U(rn,im)                  STMIA_U(rn,im)
888 #  define CC_STMIB(cc,rn,im)            corl(cc,ARM_M|ARM_M_I|ARM_M_B,rn,im)
889 #  define STMIB(rn,im)                  CC_STMIB(ARM_CC_AL,rn,im)
890 #  define CC_STMIB_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_I|ARM_M_B|ARM_M_U,rn,im)
891 #  define STMIB_U(rn,im)                CC_STMIB_U(ARM_CC_AL,rn,im)
892 #  define CC_STMDA(cc,rn,im)            corl(cc,ARM_M,rn,im)
893 #  define STMDA(rn,im)                  CC_STMDA(ARM_CC_AL,rn,im)
894 #  define CC_STMDA_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_U,rn,im)
895 #  define STMDA_U(rn,im)                CC_STMDA_U(ARM_CC_AL,rn,im)
896 #  define CC_STMDB(cc,rn,im)            corl(cc,ARM_M|ARM_M_B,rn,im)
897 #  define STMDB(rn,im)                  CC_STMDB(ARM_CC_AL,rn,im)
898 #  define CC_STMDB_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_B|ARM_M_U,rn,im)
899 #  define STMDB_U(rn,im)                CC_STMDB_U(ARM_CC_AL,rn,im)
900 #  define CC_PUSH(cc,im)                CC_STMDB_U(cc,_SP_REGNO,im)
901 #  define PUSH(im)                      STMDB_U(_SP_REGNO,im)
902 #  define T1_PUSH(im)                   is(THUMB_PUSH|((im&0x4000)>>6)|(im&0xff))
903 #  define T2_PUSH(im)                   tpp(THUMB2_PUSH,im)
904 #  define CC_POP(cc,im)                 LDMIA_U(cc,_SP_REGNO,im)
905 #  define POP(im)                       LDMIA_U(_SP_REGNO,im)
906 #  define T1_POP(im)                    is(THUMB_POP|((im&0x8000)>>7)|(im&0xff))
907 #  define T2_POP(im)                    tpp(THUMB2_POP,im)
908 #  define jit_get_reg_args()                                            \
909     do {                                                                \
910         CHECK_REG_ARGS();                                               \
911         jit_check_frame();                                              \
912         (void)jit_get_reg(_R0|jit_class_named|jit_class_gpr);           \
913         (void)jit_get_reg(_R1|jit_class_named|jit_class_gpr);           \
914         (void)jit_get_reg(_R2|jit_class_named|jit_class_gpr);           \
915         (void)jit_get_reg(_R3|jit_class_named|jit_class_gpr);           \
916     } while (0)
917 #  define jit_unget_reg_args()                                          \
918     do {                                                                \
919         jit_unget_reg(_R3);                                             \
920         jit_unget_reg(_R2);                                             \
921         jit_unget_reg(_R1);                                             \
922         jit_unget_reg(_R0);                                             \
923     } while (0)
924 #  define nop(i0)                       _nop(_jit,i0)
925 static void _nop(jit_state_t*,jit_int32_t);
926 #  define movr(r0,r1)                   _movr(_jit,r0,r1)
927 static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
928 #  define movi(r0,i0)                   _movi(_jit,r0,i0)
929 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
930 #  define movi_p(r0,i0)                 _movi_p(_jit,r0,i0)
931 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
932 #  define movnr(r0,r1,r2)               _movnr(_jit,r0,r1,r2)
933 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
934 #  define movzr(r0,r1,r2)               _movzr(_jit,r0,r1,r2)
935 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
936 #  define casx(r0, r1, r2, r3, i0)      _casx(_jit, r0, r1, r2, r3, i0)
937 static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
938                   jit_int32_t,jit_int32_t,jit_word_t);
939 #define casr(r0, r1, r2, r3)            casx(r0, r1, r2, r3, 0)
940 #define casi(r0, i0, r1, r2)            casx(r0, _NOREG, r1, r2, i0)
941 #  define comr(r0,r1)                   _comr(_jit,r0,r1)
942 static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
943 #  define negr(r0,r1)                   _negr(_jit,r0,r1)
944 static void _negr(jit_state_t*,jit_int32_t,jit_int32_t);
945 #  define clor(r0, r1)                  _clor(_jit, r0, r1)
946 static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
947 #  define clzr(r0, r1)                  _clzr(_jit, r0, r1)
948 static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
949 #  define ctor(r0, r1)                  _ctor(_jit, r0, r1)
950 static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
951 #  define ctzr(r0, r1)                  _ctzr(_jit, r0, r1)
952 static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
953 #  define rbitr(r0, r1)                 _rbitr(_jit, r0, r1)
954 static void _rbitr(jit_state_t*, jit_int32_t, jit_int32_t);
955 #  define popcntr(r0, r1)               _popcntr(_jit, r0, r1)
956 static void _popcntr(jit_state_t*, jit_int32_t, jit_int32_t);
957 #  define addr(r0,r1,r2)                _addr(_jit,r0,r1,r2)
958 static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
959 #  define addi(r0,r1,i0)                _addi(_jit,r0,r1,i0)
960 static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
961 #  define addcr(r0,r1,r2)               _addcr(_jit,r0,r1,r2)
962 static void _addcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
963 #  define addci(r0,r1,i0)               _addci(_jit,r0,r1,i0)
964 static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
965 #  define addxr(r0,r1,r2)               _addxr(_jit,r0,r1,r2)
966 static void _addxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
967 #  define addxi(r0,r1,i0)               _addxi(_jit,r0,r1,i0)
968 static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
969 #  define subr(r0,r1,r2)                _subr(_jit,r0,r1,r2)
970 static void _subr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
971 #  define subi(r0,r1,i0)                _subi(_jit,r0,r1,i0)
972 static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
973 #  define subcr(r0,r1,r2)               _subcr(_jit,r0,r1,r2)
974 static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
975 #  define subci(r0,r1,i0)               _subci(_jit,r0,r1,i0)
976 static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
977 #  define subxr(r0,r1,r2)               _subxr(_jit,r0,r1,r2)
978 static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
979 #  define subxi(r0,r1,i0)               _subxi(_jit,r0,r1,i0)
980 static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
981 #  define rsbi(r0, r1, i0)              _rsbi(_jit, r0, r1, i0)
982 static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
983 #  define mulr(r0,r1,r2)                _mulr(_jit,r0,r1,r2)
984 static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
985 #  define muli(r0,r1,i0)                _muli(_jit,r0,r1,i0)
986 static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
987 #  define hmulr(r0,r1,r2)               ihmulr(r0,r1,r2,1)
988 #  define hmulr_u(r0,r1,r2)             ihmulr(r0,r1,r2,0)
989 #  define ihmulr(r0,r1,r2,cc)           _ihmulr(_jit,r0,r1,r2,cc)
990 static void _ihmulr(jit_state_t*,jit_int32_t,jit_int32_t,
991                     jit_int32_t,jit_bool_t);
992 #  define hmuli(r0,r1,i0)               ihmuli(r0,r1,i0,1)
993 #  define hmuli_u(r0,r1,i0)             ihmuli(r0,r1,i0,0)
994 #  define ihmuli(r0,r1,i0,cc)           _ihmuli(_jit,r0,r1,i0,cc)
995 static void _ihmuli(jit_state_t*,jit_int32_t,jit_int32_t,
996                     jit_word_t,jit_bool_t);
997 #  define qmulr(r0,r1,r2,r3)            iqmulr(r0,r1,r2,r3,1)
998 #  define qmulr_u(r0,r1,r2,r3)          iqmulr(r0,r1,r2,r3,0)
999 #  define iqmulr(r0,r1,r2,r3,cc)        _iqmulr(_jit,r0,r1,r2,r3,cc)
1000 static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t,
1001                     jit_int32_t,jit_int32_t,jit_bool_t);
1002 #  define qmuli(r0,r1,r2,i0)            iqmuli(r0,r1,r2,i0,1)
1003 #  define qmuli_u(r0,r1,r2,i0)          iqmuli(r0,r1,r2,i0,0)
1004 #  define iqmuli(r0,r1,r2,i0,cc)        _iqmuli(_jit,r0,r1,r2,i0,cc)
1005 static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t,
1006                     jit_int32_t,jit_word_t,jit_bool_t);
1007 #  define divrem(d,s,r0,r1,r2)          _divrem(_jit,d,s,r0,r1,r2)
1008 static void _divrem(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
1009 #  define divr(r0,r1,r2)                _divr(_jit,r0,r1,r2)
1010 static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1011 #  define divi(r0,r1,i0)                _divi(_jit,r0,r1,i0)
1012 static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1013 #  define divr_u(r0,r1,r2)              _divr_u(_jit,r0,r1,r2)
1014 static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1015 #  define divi_u(r0,r1,i0)              _divi_u(_jit,r0,r1,i0)
1016 static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1017 #  define qdivr(r0,r1,r2,r3)            iqdivr(r0,r1,r2,r3,1)
1018 #  define qdivr_u(r0,r1,r2,r3)          iqdivr(r0,r1,r2,r3,0)
1019 #  define iqdivr(r0,r1,r2,r3,cc)        _iqdivr(_jit,r0,r1,r2,r3,cc)
1020 static void _iqdivr(jit_state_t*,jit_int32_t,jit_int32_t,
1021                     jit_int32_t,jit_int32_t,jit_bool_t);
1022 #  define qdivi(r0,r1,r2,i0)            iqdivi(r0,r1,r2,i0,1)
1023 #  define qdivi_u(r0,r1,r2,i0)          iqdivi(r0,r1,r2,i0,0)
1024 #  define iqdivi(r0,r1,r2,i0,cc)        _iqdivi(_jit,r0,r1,r2,i0,cc)
1025 static void _iqdivi(jit_state_t*,jit_int32_t,jit_int32_t,
1026                     jit_int32_t,jit_word_t,jit_bool_t);
1027 #  define remr(r0,r1,r2)                _remr(_jit,r0,r1,r2)
1028 static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1029 #  define remi(r0,r1,i0)                _remi(_jit,r0,r1,i0)
1030 static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1031 #  define remr_u(r0,r1,r2)              _remr_u(_jit,r0,r1,r2)
1032 static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1033 #  define remi_u(r0,r1,i0)              _remi_u(_jit,r0,r1,i0)
1034 static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1035 #  define andr(r0,r1,r2)                _andr(_jit,r0,r1,r2)
1036 static void _andr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1037 #  define andi(r0,r1,i0)                _andi(_jit,r0,r1,i0)
1038 static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1039 #  define orr(r0,r1,r2)                 _orr(_jit,r0,r1,r2)
1040 static void _orr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1041 #  define ori(r0,r1,i0)                 _ori(_jit,r0,r1,i0)
1042 static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1043 #  define xorr(r0,r1,r2)                _xorr(_jit,r0,r1,r2)
1044 static void _xorr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1045 #  define xori(r0,r1,i0)                _xori(_jit,r0,r1,i0)
1046 static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1047 #  define lshr(r0,r1,r2)                _lshr(_jit,r0,r1,r2)
1048 static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1049 #  define lshi(r0,r1,i0)                _lshi(_jit,r0,r1,i0)
1050 static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1051 #  define rshr(r0,r1,r2)                _rshr(_jit,r0,r1,r2)
1052 static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1053 #  define rshi(r0,r1,i0)                _rshi(_jit,r0,r1,i0)
1054 static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1055 #  define rshr_u(r0,r1,r2)              _rshr_u(_jit,r0,r1,r2)
1056 static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1057 #  define rshi_u(r0,r1,i0)              _rshi_u(_jit,r0,r1,i0)
1058 static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1059 #  define lrotr(r0,r1,r2)               _lrotr(_jit,r0,r1,r2)
1060 static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1061 #  define lroti(r0,r1,i0)               rroti(r0,r1,32-i0)
1062 #  define rrotr(r0,r1,r2)               _rrotr(_jit,r0,r1,r2)
1063 static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1064 #  define rroti(r0,r1,i0)               _rroti(_jit,r0,r1,i0)
1065 static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1066 #  define ccr(ct,cf,r0,r1,r2)           _ccr(_jit,ct,cf,r0,r1,r2)
1067 static void _ccr(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
1068 #  define cci(ct,cf,r0,r1,i0)           _cci(_jit,ct,cf,r0,r1,i0)
1069 static void _cci(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_word_t);
1070 #  define ltr(r0, r1, r2)               ccr(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
1071 #  define lti(r0, r1, i0)               cci(ARM_CC_LT,ARM_CC_GE,r0,r1,i0)
1072 #  define ltr_u(r0, r1, r2)             ccr(ARM_CC_LO,ARM_CC_HS,r0,r1,r2)
1073 #  define lti_u(r0, r1, i0)             cci(ARM_CC_LO,ARM_CC_HS,r0,r1,i0)
1074 #  define ler(r0, r1, r2)               ccr(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
1075 #  define lei(r0, r1, i0)               cci(ARM_CC_LE,ARM_CC_GT,r0,r1,i0)
1076 #  define ler_u(r0, r1, r2)             ccr(ARM_CC_LS,ARM_CC_HI,r0,r1,r2)
1077 #  define lei_u(r0, r1, i0)             cci(ARM_CC_LS,ARM_CC_HI,r0,r1,i0)
1078 #  define eqr(r0, r1, r2)               ccr(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
1079 #  define eqi(r0, r1, i0)               cci(ARM_CC_EQ,ARM_CC_NE,r0,r1,i0)
1080 #  define ger(r0, r1, r2)               ccr(ARM_CC_GE,ARM_CC_LT,r0,r1,r2)
1081 #  define gei(r0, r1, i0)               cci(ARM_CC_GE,ARM_CC_LT,r0,r1,i0)
1082 #  define ger_u(r0, r1, r2)             ccr(ARM_CC_HS,ARM_CC_LO,r0,r1,r2)
1083 #  define gei_u(r0, r1, i0)             cci(ARM_CC_HS,ARM_CC_LO,r0,r1,i0)
1084 #  define gtr(r0, r1, r2)               ccr(ARM_CC_GT,ARM_CC_LE,r0,r1,r2)
1085 #  define gti(r0, r1, i0)               cci(ARM_CC_GT,ARM_CC_LE,r0,r1,i0)
1086 #  define gtr_u(r0, r1, r2)             ccr(ARM_CC_HI,ARM_CC_LS,r0,r1,r2)
1087 #  define gti_u(r0, r1, i0)             cci(ARM_CC_HI,ARM_CC_LS,r0,r1,i0)
1088 #  define ner(r0,r1,r2)                 _ner(_jit,r0,r1,r2)
1089 static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1090 #  define nei(r0,r1,i0)                 _nei(_jit,r0,r1,i0)
1091 static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1092 #  define jmpr(r0)                      _jmpr(_jit,r0)
1093 static void _jmpr(jit_state_t*,jit_int32_t);
1094 #  define jmpi(i0)                      _jmpi(_jit,i0)
1095 static void _jmpi(jit_state_t*,jit_word_t);
1096 #  define jmpi_p(i0, i1)                _jmpi_p(_jit,i0, i1)
1097 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t,jit_bool_t);
1098 #  define bccr(cc,i0,r0,r1)             _bccr(_jit,cc,i0,r0,r1)
1099 static jit_word_t _bccr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
1100 #  define bcci(cc,i0,r0,i1)             _bcci(_jit,cc,i0,r0,i1)
1101 static jit_word_t _bcci(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
1102 #  define bltr(i0, r0, r1)              bccr(ARM_CC_LT,i0,r0,r1)
1103 #  define blti(i0, r0, i1)              bcci(ARM_CC_LT,i0,r0,i1)
1104 #  define bltr_u(i0, r0, r1)            bccr(ARM_CC_LO,i0,r0,r1)
1105 #  define blti_u(i0, r0, i1)            bcci(ARM_CC_LO,i0,r0,i1)
1106 #  define bler(i0, r0, r1)              bccr(ARM_CC_LE,i0,r0,r1)
1107 #  define blei(i0, r0, i1)              bcci(ARM_CC_LE,i0,r0,i1)
1108 #  define bler_u(i0, r0, r1)            bccr(ARM_CC_LS,i0,r0,r1)
1109 #  define blei_u(i0, r0, i1)            bcci(ARM_CC_LS,i0,r0,i1)
1110 #  define beqr(i0, r0, r1)              bccr(ARM_CC_EQ,i0,r0,r1)
1111 #  define beqi(i0, r0, i1)              bcci(ARM_CC_EQ,i0,r0,i1)
1112 #  define bger(i0, r0, r1)              bccr(ARM_CC_GE,i0,r0,r1)
1113 #  define bgei(i0, r0, i1)              bcci(ARM_CC_GE,i0,r0,i1)
1114 #  define bger_u(i0, r0, r1)            bccr(ARM_CC_HS,i0,r0,r1)
1115 #  define bgei_u(i0, r0, i1)            bcci(ARM_CC_HS,i0,r0,i1)
1116 #  define bgtr(i0, r0, r1)              bccr(ARM_CC_GT,i0,r0,r1)
1117 #  define bgti(i0, r0, i1)              bcci(ARM_CC_GT,i0,r0,i1)
1118 #  define bgtr_u(i0, r0, r1)            bccr(ARM_CC_HI,i0,r0,r1)
1119 #  define bgti_u(i0, r0, i1)            bcci(ARM_CC_HI,i0,r0,i1)
1120 #  define bner(i0, r0, r1)              bccr(ARM_CC_NE,i0,r0,r1)
1121 #  define bnei(i0, r0, i1)              bcci(ARM_CC_NE,i0,r0,i1)
1122 #  define baddr(cc,i0,r0,r1)            _baddr(_jit,cc,i0,r0,r1)
1123 static jit_word_t _baddr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
1124 #  define baddi(cc,i0,r0,r1)            _baddi(_jit,cc,i0,r0,r1)
1125 static jit_word_t _baddi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
1126 #  define boaddr(i0,r0,r1)              baddr(ARM_CC_VS,i0,r0,r1)
1127 #  define boaddi(i0,r0,i1)              baddi(ARM_CC_VS,i0,r0,i1)
1128 #  define boaddr_u(i0,r0,r1)            baddr(ARM_CC_HS,i0,r0,r1)
1129 #  define boaddi_u(i0,r0,i1)            baddi(ARM_CC_HS,i0,r0,i1)
1130 #  define bxaddr(i0,r0,r1)              baddr(ARM_CC_VC,i0,r0,r1)
1131 #  define bxaddi(i0,r0,i1)              baddi(ARM_CC_VC,i0,r0,i1)
1132 #  define bxaddr_u(i0,r0,r1)            baddr(ARM_CC_LO,i0,r0,r1)
1133 #  define bxaddi_u(i0,r0,i1)            baddi(ARM_CC_LO,i0,r0,i1)
1134 #  define bsubr(cc,i0,r0,r1)            _bsubr(_jit,cc,i0,r0,r1)
1135 static jit_word_t _bsubr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
1136 #  define bsubi(cc,i0,r0,r1)            _bsubi(_jit,cc,i0,r0,r1)
1137 static jit_word_t _bsubi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
1138 #  define bosubr(i0,r0,r1)              bsubr(ARM_CC_VS,i0,r0,r1)
1139 #  define bosubi(i0,r0,i1)              bsubi(ARM_CC_VS,i0,r0,i1)
1140 #  define bosubr_u(i0,r0,r1)            bsubr(ARM_CC_LO,i0,r0,r1)
1141 #  define bosubi_u(i0,r0,i1)            bsubi(ARM_CC_LO,i0,r0,i1)
1142 #  define bxsubr(i0,r0,r1)              bsubr(ARM_CC_VC,i0,r0,r1)
1143 #  define bxsubi(i0,r0,i1)              bsubi(ARM_CC_VC,i0,r0,i1)
1144 #  define bxsubr_u(i0,r0,r1)            bsubr(ARM_CC_HS,i0,r0,r1)
1145 #  define bxsubi_u(i0,r0,i1)            bsubi(ARM_CC_HS,i0,r0,i1)
1146 #  define bmxr(cc,i0,r0,r1)             _bmxr(_jit,cc,i0,r0,r1)
1147 static jit_word_t _bmxr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
1148 #  define bmxi(cc,i0,r0,r1)             _bmxi(_jit,cc,i0,r0,r1)
1149 static jit_word_t _bmxi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
1150 #  define bmsr(i0,r0,r1)                bmxr(ARM_CC_NE,i0,r0,r1)
1151 #  define bmsi(i0,r0,i1)                bmxi(ARM_CC_NE,i0,r0,i1)
1152 #  define bmcr(i0,r0,r1)                bmxr(ARM_CC_EQ,i0,r0,r1)
1153 #  define bmci(i0,r0,i1)                bmxi(ARM_CC_EQ,i0,r0,i1)
1154 #  define ldr_c(r0,r1)                  _ldr_c(_jit,r0,r1)
1155 static void _ldr_c(jit_state_t*,jit_int32_t,jit_int32_t);
1156 #  define ldi_c(r0,i0)                  _ldi_c(_jit,r0,i0)
1157 static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
1158 #  define ldxr_c(r0,r1,r2)              _ldxr_c(_jit,r0,r1,r2)
1159 static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1160 #  define ldxi_c(r0,r1,i0)              _ldxi_c(_jit,r0,r1,i0)
1161 static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1162 #  define ldr_uc(r0,r1)                 _ldr_uc(_jit,r0,r1)
1163 static void _ldr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
1164 #  define ldi_uc(r0,i0)                 _ldi_uc(_jit,r0,i0)
1165 static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
1166 #  define ldxr_uc(r0,r1,r2)             _ldxr_uc(_jit,r0,r1,r2)
1167 static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1168 #  define ldxi_uc(r0,r1,i0)             _ldxi_uc(_jit,r0,r1,i0)
1169 static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1170 #  define ldr_s(r0,r1)                  _ldr_s(_jit,r0,r1)
1171 static void _ldr_s(jit_state_t*,jit_int32_t,jit_int32_t);
1172 #  define ldi_s(r0,i0)                  _ldi_s(_jit,r0,i0)
1173 static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
1174 #  define ldxr_s(r0,r1,r2)              _ldxr_s(_jit,r0,r1,r2)
1175 static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1176 #  define ldxi_s(r0,r1,i0)              _ldxi_s(_jit,r0,r1,i0)
1177 static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1178 #  define ldr_us(r0,r1)                 _ldr_us(_jit,r0,r1)
1179 static void _ldr_us(jit_state_t*,jit_int32_t,jit_int32_t);
1180 #  define ldi_us(r0,i0)                 _ldi_us(_jit,r0,i0)
1181 static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
1182 #  define ldxr_us(r0,r1,r2)             _ldxr_us(_jit,r0,r1,r2)
1183 static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1184 #  define ldxi_us(r0,r1,i0)             _ldxi_us(_jit,r0,r1,i0)
1185 static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1186 #  define ldr_i(r0,r1)                  _ldr_i(_jit,r0,r1)
1187 static void _ldr_i(jit_state_t*,jit_int32_t,jit_int32_t);
1188 #  define ldi_i(r0,i0)                  _ldi_i(_jit,r0,i0)
1189 static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
1190 #  define ldxr_i(r0,r1,r2)              _ldxr_i(_jit,r0,r1,r2)
1191 static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1192 #  define ldxi_i(r0,r1,i0)              _ldxi_i(_jit,r0,r1,i0)
1193 static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1194 #  define unldr(r0, r1, i0)             _unldr(_jit, r0, r1, i0)
1195 static void _unldr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
1196 #  define unldi(r0, i0, i1)             _unldi(_jit, r0, i0, i1)
1197 static void _unldi(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
1198 #  define unldr_u(r0, r1, i0)           _unldr_u(_jit, r0, r1, i0)
1199 static void _unldr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
1200 #  define unldi_u(r0, i0, i1)           _unldi_u(_jit, r0, i0, i1)
1201 static void _unldi_u(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
1202 #  define str_c(r0,r1)                  _str_c(_jit,r0,r1)
1203 static void _str_c(jit_state_t*,jit_int32_t,jit_int32_t);
1204 #  define sti_c(i0,r0)                  _sti_c(_jit,i0,r0)
1205 static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
1206 #  define stxr_c(r0,r1,r2)              _stxr_c(_jit,r0,r1,r2)
1207 static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1208 #  define stxi_c(r0,r1,i0)              _stxi_c(_jit,r0,r1,i0)
1209 static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
1210 #  define str_s(r0,r1)                  _str_s(_jit,r0,r1)
1211 static void _str_s(jit_state_t*,jit_int32_t,jit_int32_t);
1212 #  define sti_s(i0,r0)                  _sti_s(_jit,i0,r0)
1213 static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
1214 #  define stxr_s(r0,r1,r2)              _stxr_s(_jit,r0,r1,r2)
1215 static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1216 #  define stxi_s(r0,r1,i0)              _stxi_s(_jit,r0,r1,i0)
1217 static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
1218 #  define str_i(r0,r1)                  _str_i(_jit,r0,r1)
1219 static void _str_i(jit_state_t*,jit_int32_t,jit_int32_t);
1220 #  define sti_i(i0,r0)                  _sti_i(_jit,i0,r0)
1221 static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
1222 #  define stxr_i(r0,r1,r2)              _stxr_i(_jit,r0,r1,r2)
1223 static void _stxr_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
1224 #  define stxi_i(r0,r1,i0)              _stxi_i(_jit,r0,r1,i0)
1225 static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
1226 #define unstr(r0, r1, i0)               _unstr(_jit, r0, r1, i0)
1227 static void _unstr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
1228 #define unsti(i0, r0, i1)               _unsti(_jit, i0, r0, i1)
1229 static void _unsti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
1230 #  define bswapr_us(r0,r1)              _bswapr_us(_jit,r0,r1)
1231 static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
1232 #  define bswapr_ui(r0,r1)              _bswapr_ui(_jit,r0,r1)
1233 static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
1234 #  define extr(r0,r1,i0,i1)             _extr(_jit,r0,r1,i0,i1)
1235 static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
1236 #  define extr_u(r0,r1,i0,i1)           _extr_u(_jit,r0,r1,i0,i1)
1237 static void _extr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
1238 #  define depr(r0,r1,i0,i1)             _depr(_jit,r0,r1,i0,i1)
1239 static void _depr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
1240 #  define extr_c(r0,r1)                 _extr_c(_jit,r0,r1)
1241 static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
1242 #  define extr_uc(r0,r1)                _extr_uc(_jit,r0,r1)
1243 static void _extr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
1244 #  define extr_s(r0,r1)                 _extr_s(_jit,r0,r1)
1245 static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t);
1246 #  define extr_us(r0,r1)                _extr_us(_jit,r0,r1)
1247 static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t);
1248 #  define prolog(i0)                    _prolog(_jit,i0)
1249 static void _prolog(jit_state_t*,jit_node_t*);
1250 #  define epilog(i0)                    _epilog(_jit,i0)
1251 static void _epilog(jit_state_t*,jit_node_t*);
1252 #  define callr(r0)                     _callr(_jit,r0)
1253 static void _callr(jit_state_t*,jit_int32_t);
1254 #  define calli(i0,i1)                  _calli(_jit,i0,i1)
1255 static void _calli(jit_state_t*,jit_word_t,jit_bool_t);
1256 #  define calli_p(i0,i1)                _calli_p(_jit,i0,i1)
1257 static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_bool_t);
1258 #  define vastart(r0)                   _vastart(_jit, r0)
1259 static void _vastart(jit_state_t*, jit_int32_t);
1260 #  define vaarg(r0, r1)                 _vaarg(_jit, r0, r1)
1261 static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
1262 #  define patch_at(kind,jump,label)     _patch_at(_jit,kind,jump,label)
1263 static void _patch_at(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t);
1264
1265 #  define NEED_FALLBACK_CASX 1
1266 #endif
1267
1268 #if CODE
1269 /* from binutils */
1270 #  define rotate_left(v, n)     (v << n | v >> (32 - n))
1271 static int
1272 encode_arm_immediate(unsigned int v)
1273 {
1274     unsigned int        a, i;
1275
1276     for (i = 0; i < 32; i += 2)
1277         if ((a = rotate_left(v, i)) <= 0xff)
1278             return (a | (i << 7));
1279
1280     return (-1);
1281 }
1282
1283 static int
1284 encode_thumb_immediate(unsigned int v)
1285 {
1286     int                 i;
1287     unsigned int        m;
1288     unsigned int        n;
1289     /* 00000000 00000000 00000000 abcdefgh */
1290     if ((v & 0xff) == v)
1291         return (v);
1292     /* 00000000 abcdefgh 00000000 abcdefgh */
1293     if ((v & 0xff00ff) == v && ((v & 0xff0000) >> 16) == (v & 0xff))
1294         return ((v & 0xff) | (1 << 12));
1295     /* abcdefgh 00000000 abcdefgh 00000000 */
1296     if (((v & 0xffff0000) >> 16) == (v & 0xffff) && (v & 0xff) == 0)
1297         return ((v & 0x000000ff) | (2 << 12));
1298     /* abcdefgh abcdefgh abcdefgh abcdefgh */
1299     if ( (v &    0xff)        == ((v &     0xff00) >>  8) &&
1300         ((v &   0xff00) >> 8) == ((v &   0xff0000) >> 16) &&
1301         ((v & 0xff0000) << 8) ==  (v & 0xff000000))
1302         return ((v & 0xff) | (3 << 12));
1303     /* 1bcdefgh << 24 ... 1bcdefgh << 1 */
1304     for (i = 8, m = 0xff000000, n = 0x80000000;
1305          i < 23; i++, m >>= 1,  n >>= 1) {
1306         if ((v & m) == v && (v & n)) {
1307             v >>= 32 - i;
1308             if (!(i & 1))
1309                 v &= 0x7f;
1310             i >>= 1;
1311             return (((i & 7) << 12) | ((i & 8) << 23) | v);
1312         }
1313     }
1314     return (-1);
1315 }
1316
1317 static int
1318 encode_thumb_word_immediate(unsigned int v)
1319 {
1320     if ((v & 0xfffff000) == 0)
1321         return (((v & 0x800) << 15) | ((v & 0x700) << 4) | (v & 0xff));
1322     return (-1);
1323 }
1324
1325 static int
1326 encode_thumb_jump(int v)
1327 {
1328     int         s, i1, i2, j1, j2;
1329     if (v >= (int)-0x800000 && v <= 0x7fffff) {
1330         s  = !!(v & 0x800000);
1331         i1 = !!(v & 0x400000);
1332         i2 = !!(v & 0x200000);
1333         j1 = s ? i1 : !i1;
1334         j2 = s ? i2 : !i2;
1335         return ((s<<26)|((v&0x1ff800)<<5)|(j1<<13)|(j2<<11)|(v&0x7ff));
1336     }
1337     return (-1);
1338 }
1339
1340 static int
1341 encode_thumb_cc_jump(int v)
1342 {
1343     int         s, j1, j2;
1344     if (v >= (int)-0x80000 && v <= 0x7ffff) {
1345         s  = !!(v & 0x80000);
1346         j1 = !!(v & 0x20000);
1347         j2 = !!(v & 0x40000);
1348         return ((s<<26)|((v&0x1f800)<<5)|(j1<<13)|(j2<<11)|(v&0x7ff));
1349     }
1350     return (-1);
1351 }
1352
1353 static int
1354 encode_thumb_shift(int v, int type)
1355 {
1356     switch (type) {
1357         case ARM_ASR:
1358         case ARM_LSL:
1359         case ARM_LSR:           type >>= 1;     break;
1360         default:                assert(!"handled shift");
1361     }
1362     assert(v >= 0 && v <= 31);
1363     return (((v & 0x1c) << 10) | ((v & 3) << 6) | type);
1364 }
1365
1366 static void
1367 _tcit(jit_state_t *_jit, unsigned int tc, int it)
1368 {
1369     int         c;
1370     int         m;
1371     c = (tc >> 28) & 1;
1372     assert(!(tc & 0xfffffff) && tc != ARM_CC_NV);
1373     switch (it) {
1374         case THUMB2_IT:         m =   1<<3;                     break;
1375         case THUMB2_ITT:        m =  (c<<3)| (1<<2);            break;
1376         case THUMB2_ITE:        m = (!c<<3)| (1<<2);            break;
1377         case THUMB2_ITTT:       m =  (c<<3)| (c<<2)| (1<<1);    break;
1378         case THUMB2_ITET:       m = (!c<<3)| (c<<2)| (1<<1);    break;
1379         case THUMB2_ITTE:       m =  (c<<3)|(!c<<2)| (1<<1);    break;
1380         case THUMB2_ITEE:       m = (!c<<3)|(!c<<2)| (1<<1);    break;
1381         case THUMB2_ITTTT:      m =  (c<<3)| (c<<2)| (c<<1)|1;  break;
1382         case THUMB2_ITETT:      m = (!c<<3)| (c<<2)| (c<<1)|1;  break;
1383         case THUMB2_ITTET:      m =  (c<<3)|(!c<<2)| (c<<1)|1;  break;
1384         case THUMB2_ITEET:      m = (!c<<3)|(!c<<2)| (c<<1)|1;  break;
1385         case THUMB2_ITTTE:      m =  (c<<3)| (c<<2)|(!c<<1)|1;  break;
1386         case THUMB2_ITETE:      m = (!c<<3)| (c<<2)|(!c<<1)|1;  break;
1387         case THUMB2_ITTEE:      m =  (c<<3)|(!c<<2)|(!c<<1)|1;  break;
1388         case THUMB2_ITEEE:      m = (!c<<3)|(!c<<2)|(!c<<1)|1;  break;
1389         default:                abort();
1390     }
1391     assert(m && (tc != ARM_CC_AL || !(m & (m - 1))));
1392     is(0xbf00 | (tc >> 24) | m);
1393 }
1394
1395 static void
1396 _corrr(jit_state_t *_jit, int cc, int o, int rn, int rd, int rm)
1397 {
1398     assert(!(cc & 0x0fffffff));
1399     assert(!(o  & 0xf00fff0f));
1400     ii(cc|o|(_u4(rn)<<16)|(_u4(rd)<<12)|_u4(rm));
1401 }
1402
1403 static void
1404 _corri(jit_state_t *_jit, int cc, int o, int rn, int rd, int im)
1405 {
1406     assert(!(cc & 0x0fffffff));
1407     assert(!(o  & 0xf00fffff));
1408     assert(!(im & 0xfffff000));
1409     ii(cc|o|(_u4(rn)<<16)|(_u4(rd)<<12)|_u12(im));
1410 }
1411
1412 static void
1413 _corri8(jit_state_t *_jit, int cc, int o, int rn, int rt, int im)
1414 {
1415     assert(!(cc & 0x0fffffff));
1416     assert(!(o  & 0xf00fff0f));
1417     assert(!(im & 0xffffff00));
1418     ii(cc|o|(_u4(rn)<<16)|(_u4(rt)<<12)|((im&0xf0)<<4)|(im&0x0f));
1419 }
1420
1421 static void
1422 _coriw(jit_state_t *_jit, int cc, int o, int rd, int im)
1423 {
1424     assert(!(cc & 0x0fffffff));
1425     assert(!(o  & 0xf00fffff));
1426     assert(!(im & 0xffff0000));
1427     ii(cc|o|((im&0xf000)<<4)|(_u4(rd)<<12)|(im&0xfff));
1428 }
1429
1430 static void
1431 _torrr(jit_state_t *_jit, int o, int rn, int rd, int rm)
1432 {
1433     jit_thumb_t thumb;
1434     assert(!(o & 0xf0f0f));
1435     thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|_u4(rm);
1436     iss(thumb.s[0], thumb.s[1]);
1437 }
1438
1439 static void
1440 _torrrs(jit_state_t *_jit, int o, int rn, int rd, int rm, int im)
1441 {
1442     jit_thumb_t thumb;
1443     assert(!(o  & 0x000f0f0f));
1444     assert(!(im & 0xffff8f0f));
1445     thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|im|_u4(rm);
1446     iss(thumb.s[0], thumb.s[1]);
1447 }
1448
1449 static void
1450 _torxr(jit_state_t *_jit, int o, int rn, int rt, int rm)
1451 {
1452     jit_thumb_t thumb;
1453     assert(!(o & 0xf0f0f));
1454     thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|_u4(rm);
1455     iss(thumb.s[0], thumb.s[1]);
1456 }
1457
1458 static void
1459 _torrrr(jit_state_t *_jit, int o, int rn, int rl, int rh, int rm)
1460 {
1461     jit_thumb_t thumb;
1462     assert(!(o & 0x000fff0f));
1463     thumb.i = o|(_u4(rn)<<16)|(_u4(rl)<<12)|(_u4(rh)<<8)|_u4(rm);
1464     iss(thumb.s[0], thumb.s[1]);
1465 }
1466
1467 static void
1468 _torrri8(jit_state_t *_jit, int o, int rn, int rt, int rt2, int im)
1469 {
1470     jit_thumb_t thumb;
1471     assert(!(o  & 0x000fffff));
1472     assert(!(im & 0xffffff00));
1473     thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|(_u4(rt2)<<8)|im;
1474     iss(thumb.s[0], thumb.s[1]);
1475 }
1476
1477 static void
1478 _torri(jit_state_t *_jit, int o, int rn, int rd, int im)
1479 {
1480     jit_thumb_t thumb;
1481     assert(!(o  & 0x0c0f7fff));
1482     assert(!(im & 0xfbff8f00));
1483     thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|im;
1484     iss(thumb.s[0], thumb.s[1]);
1485 }
1486
1487 static void
1488 _torri8(jit_state_t *_jit, int o, int rn, int rt, int im)
1489 {
1490     jit_thumb_t thumb;
1491     assert(!(o  & 0x000ff0ff));
1492     assert(!(im & 0xffffff00));
1493     thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|im;
1494     iss(thumb.s[0], thumb.s[1]);
1495 }
1496
1497 static void
1498 _torri12(jit_state_t *_jit, int o, int rn, int rt, int im)
1499 {
1500     jit_thumb_t thumb;
1501     assert(!(o  & 0x000fffff));
1502     assert(!(im & 0xfffff000));
1503     thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|im;
1504     iss(thumb.s[0], thumb.s[1]);
1505 }
1506
1507 static void
1508 _tshift(jit_state_t *_jit, int o, int rd, int rm, int im)
1509 {
1510     jit_thumb_t thumb;
1511     assert(!(o & 0x7fcf));
1512     assert(im >= 0 && im < 32);
1513     thumb.i = o|((im&0x1c)<<10)|(_u4(rd)<<8)|((im&3)<<6)|_u4(rm);
1514     iss(thumb.s[0], thumb.s[1]);
1515 }
1516
1517 static void
1518 _toriw(jit_state_t *_jit, int o, int rd, int im)
1519 {
1520     jit_thumb_t thumb;
1521     assert(!(im & 0xffff0000));
1522     thumb.i = o|((im&0xf000)<<4)|((im&0x800)<<15)|((im&0x700)<<4)|(_u4(rd)<<8)|(im&0xff);
1523     iss(thumb.s[0], thumb.s[1]);
1524 }
1525
1526 static void
1527 _tc8(jit_state_t *_jit, int cc, int im)
1528 {
1529     assert(!(cc & 0x0fffffff));
1530     assert(cc != ARM_CC_AL && cc != ARM_CC_NV);
1531     assert(im >= -128 && im <= 127);
1532     is(THUMB_CC_B|(cc>>20)|(im&0xff));
1533 }
1534
1535 static void
1536 _t11(jit_state_t *_jit, int im)
1537 {
1538     assert(!(im & 0xfffff800));
1539     is(THUMB_B|im);
1540 }
1541
1542 static void
1543 _tcb(jit_state_t *_jit, int cc, int im)
1544 {
1545     jit_thumb_t thumb;
1546     assert(!(cc & 0xfffffff));
1547     assert(cc != ARM_CC_AL && cc != ARM_CC_NV);
1548     cc = ((jit_uint32_t)cc) >> 6;
1549     assert(!(im & (THUMB2_CC_B|cc)));
1550     thumb.i = THUMB2_CC_B|cc|im;
1551     iss(thumb.s[0], thumb.s[1]);
1552 }
1553
1554 static void
1555 _blxi(jit_state_t *_jit, int im)
1556 {
1557     assert(!(im & 0xfe000000));
1558     ii(ARM_BLXI|im);
1559 }
1560
1561 static void
1562 _tb(jit_state_t *_jit, int o, int im)
1563 {
1564     jit_thumb_t thumb;
1565     assert(!(o & 0x07ff2fff));
1566     assert(!(o & im));
1567     thumb.i = o|im;
1568     iss(thumb.s[0], thumb.s[1]);
1569 }
1570
1571 static void
1572 _corrrr(jit_state_t *_jit, int cc, int o, int rh, int rl, int rm, int rn)
1573 {
1574     assert(!(cc & 0x0fffffff));
1575     assert(!(o & 0xf00fff0f));
1576     ii(cc|o|(_u4(rh)<<16)|(_u4(rl)<<12)|(_u4(rm)<<8)|_u4(rn));
1577 }
1578
1579 static void
1580 _corrrs(jit_state_t *_jit, int cc, int o, int rn, int rd, int rm, int im)
1581 {
1582     assert(!(cc & 0x0fffffff));
1583     assert(!(o  & 0xf000ff8f));
1584     ii(cc|o|(_u4(rd)<<12)|(_u4(rn)<<16)|(im<<7)|_u4(rm));
1585 }
1586
1587 static void
1588 _cshift(jit_state_t *_jit, int cc, int o, int rd, int rm, int rn, int im)
1589 {
1590     assert(!(cc & 0x0fffffff));
1591     assert(!(o  & 0xffe0ff8f));
1592     assert(((_u4(rm)<<8)&(im<<7)) == 0);
1593     ii(cc|ARM_SHIFT|o|(_u4(rd)<<12)|(_u4(rm)<<8)|(im<<7)|_u4(rn));
1594 }
1595
1596 static void
1597 _cb(jit_state_t *_jit, int cc, int o, int im)
1598 {
1599     assert(!(cc & 0x0fffffff));
1600     assert(!(o  & 0xf0ffffff));
1601     ii(cc|o|_u24(im));
1602 }
1603
1604 static void
1605 _cbx(jit_state_t *_jit, int cc, int o, int rm)
1606 {
1607     assert(!(cc & 0x0fffffff));
1608     assert(!(o  & 0xf000000f));
1609     ii(cc|o|_u4(rm));
1610 }
1611
1612 static void
1613 _corl(jit_state_t *_jit, int cc, int o, int r0, int i0)
1614 {
1615     assert(!(cc & 0x0fffffff));
1616     assert(!(o  & 0xf00fffff));
1617     ii(cc|o|(_u4(r0)<<16)|_u16(i0));
1618 }
1619
1620 static void
1621 _c6orr(jit_state_t *_jit, int cc, int o, int rd, int rm)
1622 {
1623     assert(!(cc & 0x0fffffff));
1624     assert(!(o  & 0xf000f00f));
1625     ii(cc|o|(_u4(rd)<<12)|_u4(rm));
1626 }
1627
1628 static void
1629 _tpp(jit_state_t *_jit, int o, int im)
1630 {
1631     jit_thumb_t thumb;
1632     assert(!(o & 0x0000ffff));
1633     if (o == THUMB2_PUSH)
1634         assert(!(im & 0x8000));
1635     assert(__builtin_popcount(im & 0x7fff) > 1);
1636     thumb.i = o|im;
1637     iss(thumb.s[0], thumb.s[1]);
1638 }
1639
1640 static void
1641 _torl(jit_state_t *_jit, int o, int rn, int im)
1642 {
1643     jit_thumb_t thumb;
1644     assert(!(o & 0xf1fff));
1645     assert(rn != _R15 || !im || ((o & 0xc000) == 0xc000));
1646     assert(!(o & THUMB2_LDM_W) || !(im & (1 << rn)));
1647     thumb.i = o | (_u4(rn)<<16)|_u13(im);
1648     iss(thumb.s[0], thumb.s[1]);
1649 }
1650
1651 static void
1652 _dmb(jit_state_t *_jit, int im)
1653 {
1654     assert(!(im & 0xfffffff0));
1655     ii(ARM_DMB|im);
1656 }
1657
1658 static void
1659 _tdmb(jit_state_t *_jit, int im)
1660 {
1661     jit_thumb_t thumb;
1662     assert(!(im & 0xfffffff0));
1663     thumb.i = THUMB2_DMB | im;
1664     iss(thumb.s[0], thumb.s[1]);
1665 }
1666
1667 static void
1668 _torrlw(jit_state_t *_jit, int o, int rd, int rn, int lsb, int wm1)
1669 {
1670     jit_thumb_t thumb;
1671     assert(!(o   & 0x000f0fdf));
1672     assert(!(lsb & 0xffffffe0));
1673     assert(!(wm1 & 0xffffffe0));
1674     thumb.i = o|(_u4(rn)<<16)|((lsb&28)<<10)|(_u4(rd)<<8)|((lsb&3)<<6)|_u5(wm1);
1675     iss(thumb.s[0], thumb.s[1]);
1676 }
1677
1678 static void
1679 _corrlw(jit_state_t *_jit, int cc, int o, int rd, int rn, int lsb, int wm1)
1680 {
1681     assert(!(cc  & 0x0fffffff));
1682     assert(!(o   & 0xf000f00f));
1683     assert(!(lsb & 0xffffffe0));
1684     assert(!(wm1 & 0xffffffe0));
1685     ii(cc|o|(_u5(wm1)<<16)|(_u4(rd)<<12)|(_u5(lsb)<<7)|_u4(rn));
1686 }
1687
1688 static void
1689 _nop(jit_state_t *_jit, jit_int32_t i0)
1690 {
1691     if (jit_thumb_p()) {
1692         for (; i0 > 0; i0 -= 2)
1693             T1_NOP();
1694     }
1695     else {
1696         for (; i0 > 0; i0 -= 4)
1697             NOP();
1698     }
1699     assert(i0 == 0);
1700 }
1701
1702 static void
1703 _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1704 {
1705     if (r0 != r1) {
1706         if (jit_thumb_p())
1707             T1_MOV(r0, r1);
1708         else
1709             MOV(r0, r1);
1710     }
1711 }
1712
1713 static void
1714 _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1715 {
1716     int                 i;
1717     if (jit_thumb_p()) {
1718         if (!jit_no_set_flags() && r0 < 8 && !(i0 & 0xffffff80))
1719             T1_MOVI(r0, i0);
1720         else if ((i = encode_thumb_immediate(i0)) != -1)
1721             T2_MOVI(r0, i);
1722         else if ((i = encode_thumb_immediate(~i0)) != -1)
1723             T2_MVNI(r0, i);
1724         else {
1725             T2_MOVWI(r0, (jit_uint16_t)i0);
1726             if (i0 & 0xffff0000)
1727                 T2_MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
1728         }
1729     }
1730     else {
1731         if (jit_armv6_p() && !(i0 & 0xffff0000))
1732             MOVWI(r0, i0);
1733         else if ((i = encode_arm_immediate(i0)) != -1)
1734             MOVI(r0, i);
1735         else if ((i = encode_arm_immediate(~i0)) != -1)
1736             MVNI(r0, i);
1737         else if (jit_armv6_p()) {
1738             MOVWI(r0, (jit_uint16_t)(i0));
1739             if ((i0 & 0xffff0000))
1740                 MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
1741         }
1742         else
1743             load_const(0, r0, i0);
1744     }
1745 }
1746
1747 static jit_word_t
1748 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1749 {
1750     jit_word_t          w;
1751     w = _jit->pc.w;
1752     if (jit_thumb_p()) {
1753         T2_MOVWI(r0, (jit_uint16_t)(i0));
1754         T2_MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
1755     }
1756     else
1757         load_const(1, r0, 0);
1758     return (w);
1759 }
1760
1761 static void
1762 _movznr(jit_state_t *_jit, int ct, jit_int32_t r0,
1763         jit_int32_t r1, jit_int32_t r2)
1764 {
1765     if (jit_thumb_p()) {
1766         if (r2 < 7)
1767             T1_CMPI(r2, 0);
1768         else
1769             T2_CMPI(r2, 0);
1770         IT(ct);
1771         T1_MOV(r0, r1);
1772     } else {
1773         CMPI(r2, 0);
1774         CC_MOV(ct, r0, r1);
1775     }
1776 }
1777
1778 static void
1779 _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1780 {
1781     _movznr(_jit, ARM_CC_NE, r0, r1, r2);
1782 }
1783
1784 static void
1785 _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1786 {
1787     _movznr(_jit, ARM_CC_EQ, r0, r1, r2);
1788 }
1789
1790 static void
1791 _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1792       jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
1793 {
1794     jit_int32_t         r1_reg, iscasi;
1795     jit_word_t          retry, done, jump0, jump1;
1796     if (!jit_armv7_p())
1797         fallback_casx(r0, r1, r2, r3, i0);
1798     else {
1799         if ((iscasi = (r1 == _NOREG))) {
1800             r1_reg = jit_get_reg(jit_class_gpr);
1801             r1 = rn(r1_reg);
1802             movi(r1, i0);
1803         }
1804         if (jit_thumb_p()) {
1805             T2_DMB(DMB_ISH);
1806             /* retry: */
1807             retry = _jit->pc.w;
1808             T2_LDREX(r0, r1, 0);
1809             eqr(r0, r0, r2);
1810             jump0 = beqi(_jit->pc.w, r0, 0);    /* beqi done r0 0 */
1811             T2_STREX(r0, r3, r1, 0);
1812             jump1 = bnei(_jit->pc.w, r0, 0);    /* bnei retry r0 0 */
1813             /* r0 = 0 if memory updated, 1 otherwise */
1814             xori(r0, r0, 1);
1815             /* done: */
1816             done = _jit->pc.w;
1817             T2_DMB(DMB_ISH);
1818         }
1819         else {
1820             DMB(DMB_ISH);
1821             /* retry: */
1822             retry = _jit->pc.w;
1823             LDREX(r0, r1);
1824             eqr(r0, r0, r2);
1825             jump0 = beqi(_jit->pc.w, r0, 0);    /* beqi done r0 0 */
1826             STREX(r0, r3, r1);
1827             jump1 = bnei(_jit->pc.w, r0, 0);    /* bnei retry r0 0 */
1828             /* r0 = 0 if memory updated, 1 otherwise */
1829             xori(r0, r0, 1);
1830             /* done: */
1831             done = _jit->pc.w;
1832             DMB(DMB_ISH);
1833         }
1834         patch_at(arm_patch_jump, jump0, done);
1835         patch_at(arm_patch_jump, jump1, retry);
1836         if (iscasi)
1837             jit_unget_reg(r1_reg);
1838     }
1839 }
1840
1841 static void
1842 _comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1843 {
1844     if (jit_thumb_p()) {
1845         if (!jit_no_set_flags() && (r0|r1) < 8)
1846             T1_NOT(r0, r1);
1847         else
1848             T2_NOT(r0, r1);
1849     }
1850     else
1851         NOT(r0, r1);
1852 }
1853
1854 static void
1855 _negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1856 {
1857     if (jit_thumb_p()) {
1858         if (!jit_no_set_flags() && (r0|r1) < 8)
1859             T1_RSBI(r0, r1);
1860         else
1861             T2_RSBI(r0, r1, 0);
1862     }
1863     else
1864         RSBI(r0, r1, 0);
1865 }
1866
1867 static void
1868 _clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1869 {
1870     if (!jit_thumb_p() && jit_armv5e_p())
1871         CLZ(r0, r1);
1872     else if (jit_thumb_p() && jit_armv7_p()) {  /* armv6t2 actually */
1873         T2_CLZ(r0, r1);
1874     }
1875     else
1876         fallback_clz(r0, r0);
1877 }
1878
1879 static void
1880 _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1881 {
1882     comr(r0, r1);
1883     clzr(r0, r0);
1884 }
1885
1886 static void
1887 _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1888 {
1889     if (jit_armv7_p()) {        /* armv6t2 actually */
1890         if (jit_thumb_p())
1891             T2_RBIT(r0, r1);
1892         else
1893             RBIT(r0, r1);
1894         clor(r0, r0);
1895     }
1896     else
1897         fallback_cto(r0, r1);
1898 }
1899
1900 static void
1901 _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1902 {
1903     if (jit_armv7_p()) {        /* armv6t2 actually */
1904         if (jit_thumb_p())
1905             T2_RBIT(r0, r1);
1906         else
1907             RBIT(r0, r1);
1908         clzr(r0, r0);
1909     }
1910     else
1911         fallback_ctz(r0, r1);
1912 }
1913
1914 static void
1915 _rbitr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1916 {
1917     if (jit_armv7_p()) {        /* armv6t2 actually */
1918         if (jit_thumb_p())
1919             T2_RBIT(r0, r1);
1920         else
1921             RBIT(r0, r1);
1922     }
1923     else
1924         fallback_rbit(r0, r1);
1925 }
1926
1927 static void
1928 _popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1929 {
1930     /* FIXME Untested. Apdated from aarch64 pattern. */
1931     if (jit_cpu.vfp >= 4)
1932         vfp_popcntr(r0, r1);
1933     else
1934         fallback_popcnt(r0, r1);
1935 }
1936
1937 static void
1938 _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1939 {
1940     if (jit_thumb_p()) {
1941         if (!jit_no_set_flags() && (r0|r1|r2) < 8)
1942             T1_ADD(r0, r1, r2);
1943         else if (r0 == r1 || r0 == r2)
1944             T1_ADDX(r0, r0 == r1 ? r2 : r1);
1945         else
1946             T2_ADD(r0, r1, r2);
1947     }
1948     else
1949         ADD(r0, r1, r2);
1950 }
1951
1952 static void
1953 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1954 {
1955     int                 i;
1956     jit_int32_t         reg;
1957     if (jit_thumb_p()) {
1958         if (!jit_no_set_flags() && (r0|r1) < 8 && !(i0 & ~7))
1959             T1_ADDI3(r0, r1, i0);
1960         else if (!jit_no_set_flags() && (r0|r1) < 8 && !(-i0 & ~7))
1961             T1_SUBI3(r0, r1, -i0);
1962         else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(i0 & ~0xff))
1963             T1_ADDI8(r0, i0);
1964         else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
1965             T1_SUBI8(r0, -i0);
1966         else if ((i = encode_thumb_immediate(i0)) != -1)
1967             T2_ADDI(r0, r1, i);
1968         else if ((i = encode_thumb_immediate(-i0)) != -1)
1969             T2_SUBI(r0, r1, i);
1970         else if ((i = encode_thumb_word_immediate(i0)) != -1)
1971             T2_ADDWI(r0, r1, i);
1972         else if ((i = encode_thumb_word_immediate(-i0)) != -1)
1973             T2_SUBWI(r0, r1, i);
1974         else {
1975             reg = jit_get_reg(jit_class_gpr);
1976             movi(rn(reg), i0);
1977             T2_ADD(r0, r1, rn(reg));
1978             jit_unget_reg(reg);
1979         }
1980     }
1981     else {
1982         if ((i = encode_arm_immediate(i0)) != -1)
1983             ADDI(r0, r1, i);
1984         else if ((i = encode_arm_immediate(-i0)) != -1)
1985             SUBI(r0, r1, i);
1986         else if (r0 != r1) {
1987             movi(r0, i0);
1988             ADD(r0, r1, r0);
1989         }
1990         else {
1991             reg = jit_get_reg(jit_class_gpr);
1992             movi(rn(reg), i0);
1993             ADD(r0, r1, rn(reg));
1994             jit_unget_reg(reg);
1995         }
1996     }
1997 }
1998
1999 static void
2000 _addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2001 {
2002     if (jit_thumb_p()) {
2003         /* thumb auto set carry if not inside IT block */
2004         if ((r0|r1|r2) < 8)
2005             T1_ADD(r0, r1, r2);
2006         else
2007             T2_ADDS(r0, r1, r2);
2008     }
2009     else
2010         ADDS(r0, r1, r2);
2011 }
2012
2013 static void
2014 _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2015 {
2016     int                 i;
2017     jit_int32_t         reg;
2018     if (jit_thumb_p()) {
2019         if ((r0|r1) < 8 && !(i0 & ~7))
2020             T1_ADDI3(r0, r1, i0);
2021         else if ((r0|r1) < 8 && !(-i0 & ~7))
2022             T1_SUBI3(r0, r1, -i0);
2023         else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
2024             T1_ADDI8(r0, i0);
2025         else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
2026             T1_SUBI8(r0, -i0);
2027         else if ((i = encode_thumb_immediate(i0)) != -1)
2028             T2_ADDSI(r0, r1, i);
2029         else if ((i = encode_thumb_immediate(-i0)) != -1)
2030             T2_SUBSI(r0, r1, i);
2031         else {
2032             reg = jit_get_reg(jit_class_gpr);
2033             movi(rn(reg), i0);
2034             T2_ADDS(r0, r1, rn(reg));
2035             jit_unget_reg(reg);
2036         }
2037     }
2038     else {
2039         if ((i = encode_arm_immediate(i0)) != -1)
2040             ADDSI(r0, r1, i);
2041         else if ((i = encode_arm_immediate(-i0)) != -1)
2042             SUBSI(r0, r1, i);
2043         else if (r0 != r1) {
2044             movi(r0, i0);
2045             ADDS(r0, r1, r0);
2046         }
2047         else {
2048             reg = jit_get_reg(jit_class_gpr);
2049             movi(rn(reg), i0);
2050             ADDS(r0, r1, rn(reg));
2051             jit_unget_reg(reg);
2052         }
2053     }
2054 }
2055
2056 static void
2057 _addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2058 {
2059     /* keep setting carry because don't know last ADC */
2060     if (jit_thumb_p()) {
2061         /* thumb auto set carry if not inside IT block */
2062         if ((r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
2063             T1_ADC(r0, r0 == r1 ? r2 : r1);
2064         else
2065             T2_ADCS(r0, r1, r2);
2066     }
2067     else
2068         ADCS(r0, r1, r2);
2069 }
2070
2071 static void
2072 _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2073 {
2074     int                 i;
2075     jit_int32_t         reg;
2076     int                 no_set_flags;
2077     if (jit_thumb_p()) {
2078         no_set_flags = jit_no_set_flags();
2079         jit_no_set_flags() = 1;
2080         if ((i = encode_thumb_immediate(i0)) != -1)
2081             T2_ADCSI(r0, r1, i);
2082         else if ((i = encode_thumb_immediate(-i0)) != -1)
2083             T2_SBCSI(r0, r1, i);
2084         else if (r0 != r1) {
2085             movi(r0, i0);
2086             T2_ADCS(r0, r1, r0);
2087         }
2088         else {
2089             reg = jit_get_reg(jit_class_gpr);
2090             movi(rn(reg), i0);
2091             T2_ADCS(r0, r1, rn(reg));
2092             jit_unget_reg(reg);
2093         }
2094         jit_no_set_flags() = no_set_flags;
2095     }
2096     else {
2097         if ((i = encode_arm_immediate(i0)) != -1)
2098             ADCSI(r0, r1, i);
2099         else if ((i = encode_arm_immediate(-i0)) != -1)
2100             SBCSI(r0, r1, i);
2101         else if (r0 != r1) {
2102             movi(r0, i0);
2103             ADCS(r0, r1, r0);
2104         }
2105         else {
2106             reg = jit_get_reg(jit_class_gpr);
2107             movi(rn(reg), i0);
2108             ADCS(r0, r1, rn(reg));
2109             jit_unget_reg(reg);
2110         }
2111     }
2112 }
2113
2114 static void
2115 _subr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2116 {
2117     if (jit_thumb_p()) {
2118         if (!jit_no_set_flags() && (r0|r1|r2) < 8)
2119             T1_SUB(r0, r1, r2);
2120         else
2121             T2_SUB(r0, r1, r2);
2122     }
2123     else
2124         SUB(r0, r1, r2);
2125 }
2126
2127 static void
2128 _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2129 {
2130     int                 i;
2131     jit_int32_t         reg;
2132     if (jit_thumb_p()) {
2133         if (!jit_no_set_flags() && (r0|r1) < 8 && !(i0 & ~7))
2134             T1_SUBI3(r0, r1, i0);
2135         else if (!jit_no_set_flags() && (r0|r1) < 8 && !(-i0 & ~7))
2136             T1_ADDI3(r0, r1, -i0);
2137         else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(i0 & ~0xff))
2138             T1_SUBI8(r0, i0);
2139         else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
2140             T1_ADDI8(r0, -i0);
2141         else if ((i = encode_thumb_immediate(i0)) != -1)
2142             T2_SUBI(r0, r1, i);
2143         else if ((i = encode_thumb_immediate(-i0)) != -1)
2144             T2_ADDI(r0, r1, i);
2145         else if ((i = encode_thumb_word_immediate(i0)) != -1)
2146             T2_SUBWI(r0, r1, i);
2147         else if ((i = encode_thumb_word_immediate(-i0)) != -1)
2148             T2_ADDWI(r0, r1, i);
2149         else {
2150             reg = jit_get_reg(jit_class_gpr);
2151             movi(rn(reg), i0);
2152             T2_SUB(r0, r1, rn(reg));
2153             jit_unget_reg(reg);
2154         }
2155     }
2156     else {
2157         if ((i = encode_arm_immediate(i0)) != -1)
2158             SUBI(r0, r1, i);
2159         else if ((i = encode_arm_immediate(-i0)) != -1)
2160             ADDI(r0, r1, i);
2161         else if (r0 != r1) {
2162             movi(r0, i0);
2163             SUB(r0, r1, r0);
2164         }
2165         else {
2166             reg = jit_get_reg(jit_class_gpr);
2167             movi(rn(reg), i0);
2168             SUB(r0, r1, rn(reg));
2169             jit_unget_reg(reg);
2170         }
2171     }
2172 }
2173
2174 static void
2175 _subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2176 {
2177     if (jit_thumb_p()) {
2178         /* thumb auto set carry if not inside IT block */
2179         if ((r0|r1|r2) < 8)
2180             T1_SUB(r0, r1, r2);
2181         else
2182             T2_SUBS(r0, r1, r2);
2183     }
2184     else
2185         SUBS(r0, r1, r2);
2186 }
2187
2188 static void
2189 _subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2190 {
2191     int                 i;
2192     jit_int32_t         reg;
2193     if (jit_thumb_p()) {
2194         if ((r0|r1) < 8 && !(i0 & ~7))
2195             T1_SUBI3(r0, r1, i0);
2196         else if ((r0|r1) < 8 && !(-i0 & ~7))
2197             T1_ADDI3(r0, r1, -i0);
2198         else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
2199             T1_SUBI8(r0, i0);
2200         else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
2201             T1_ADDI8(r0, -i0);
2202         else if ((i = encode_thumb_immediate(i0)) != -1)
2203             T2_SUBSI(r0, r1, i);
2204         else if ((i = encode_thumb_immediate(-i0)) != -1)
2205             T2_ADDSI(r0, r1, i);
2206         else {
2207             reg = jit_get_reg(jit_class_gpr);
2208             movi(rn(reg), i0);
2209             T2_SUBS(r0, r1, rn(reg));
2210             jit_unget_reg(reg);
2211         }
2212     }
2213     else {
2214         if ((i = encode_arm_immediate(i0)) != -1)
2215             SUBSI(r0, r1, i);
2216         else if ((i = encode_arm_immediate(-i0)) != -1)
2217             ADDSI(r0, r1, i);
2218         else if (r0 != r1) {
2219             movi(r0, i0);
2220             SUBS(r0, r1, r0);
2221         }
2222         else {
2223             reg = jit_get_reg(jit_class_gpr);
2224             movi(rn(reg), i0);
2225             SUBS(r0, r1, rn(reg));
2226             jit_unget_reg(reg);
2227         }
2228     }
2229 }
2230
2231 static void
2232 _subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2233 {
2234     /* keep setting carry because don't know last SBC */
2235     if (jit_thumb_p()) {
2236         /* thumb auto set carry if not inside IT block */
2237         if ((r0|r1|r2) < 8 && r0 == r1)
2238             T1_SBC(r0, r2);
2239         else
2240             T2_SBCS(r0, r1, r2);
2241     }
2242     else
2243         SBCS(r0, r1, r2);
2244 }
2245
2246 static void
2247 _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2248 {
2249     int                 i;
2250     jit_int32_t         reg;
2251     int                 no_set_flags;
2252     if (jit_thumb_p()) {
2253         no_set_flags = jit_no_set_flags();
2254         jit_no_set_flags() = 1;
2255         if ((i = encode_arm_immediate(i0)) != -1)
2256             T2_SBCSI(r0, r1, i);
2257         else if ((i = encode_arm_immediate(-i0)) != -1)
2258             T2_ADCSI(r0, r1, i);
2259         else if (r0 != r1) {
2260             movi(r0, i0);
2261             T2_SBCS(r0, r1, r0);
2262         }
2263         else {
2264             reg = jit_get_reg(jit_class_gpr);
2265             movi(rn(reg), i0);
2266             SBCS(r0, r1, rn(reg));
2267             jit_unget_reg(reg);
2268         }
2269         jit_no_set_flags() = no_set_flags;
2270     }
2271     else {
2272         if ((i = encode_arm_immediate(i0)) != -1)
2273             SBCSI(r0, r1, i);
2274         else if ((i = encode_arm_immediate(-i0)) != -1)
2275             ADCSI(r0, r1, i);
2276         else if (r0 != r1) {
2277             movi(r0, i0);
2278             SBCS(r0, r1, r0);
2279         }
2280         else {
2281             reg = jit_get_reg(jit_class_gpr);
2282             movi(rn(reg), i0);
2283             SBCS(r0, r1, rn(reg));
2284             jit_unget_reg(reg);
2285         }
2286     }
2287 }
2288
2289 static void
2290 _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2291 {
2292     subi(r0, r1, i0);
2293     negr(r0, r0);
2294 }
2295
2296 static void
2297 _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2298 {
2299     jit_int32_t         reg;
2300     if (jit_thumb_p()) {
2301         if (!jit_no_set_flags() && r0 == r2 && (r0|r1) < 8)
2302             T1_MUL(r0, r1);
2303         else if (!jit_no_set_flags() && r0 == r1 && (r0|r2) < 8)
2304             T1_MUL(r0, r2);
2305         else
2306             T2_MUL(r0, r1, r2);
2307     }
2308     else {
2309         if (r0 == r1 && !jit_armv6_p()) {
2310             if (r0 != r2)
2311                 MUL(r0, r2, r1);
2312             else {
2313                 reg = jit_get_reg(jit_class_gpr);
2314                 MOV(rn(reg), r1);
2315                 MUL(r0, rn(reg), r2);
2316                 jit_unget_reg(reg);
2317             }
2318         }
2319         else
2320             MUL(r0, r1, r2);
2321     }
2322 }
2323
2324 static void
2325 _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2326 {
2327     jit_int32_t         reg;
2328     reg = jit_get_reg(jit_class_gpr);
2329     movi(rn(reg), i0);
2330     mulr(r0, r1, rn(reg));
2331     jit_unget_reg(reg);
2332 }
2333
2334 static void
2335 _ihmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2336         jit_int32_t r2, jit_bool_t sign)
2337 {
2338     jit_int32_t         reg;
2339     reg = jit_get_reg(jit_class_gpr);
2340     iqmulr(rn(reg), r0, r1, r2, sign);
2341     jit_unget_reg(reg);
2342 }
2343
2344 static void
2345 _ihmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2346         jit_word_t i0, jit_bool_t sign)
2347 {
2348     jit_int32_t         t0, t1;
2349     t0 = jit_get_reg(jit_class_gpr);
2350     t1 = jit_get_reg(jit_class_gpr);
2351     movi(rn(t1), i0);
2352     iqmulr(rn(t0), r0, r1, rn(t1), sign);
2353     jit_unget_reg(t1);
2354     jit_unget_reg(t0);
2355 }
2356
2357 static void
2358 _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2359         jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
2360 {
2361     jit_int32_t         reg;
2362     if (jit_thumb_p()) {
2363         if (r2 == r3) {
2364             reg = jit_get_reg(jit_class_gpr);
2365             movr(rn(reg), r2);
2366             if (sign)
2367                 T2_SMULL(r0, r1, rn(reg), r2);
2368             else
2369                 T2_UMULL(r0, r1, rn(reg), r2);
2370             jit_unget_reg(reg);
2371         }
2372         else if (r0 != r2 && r1 != r2) {
2373             if (sign)
2374                 T2_SMULL(r0, r1, r2, r3);
2375             else
2376                 T2_UMULL(r0, r1, r2, r3);
2377         }
2378         else {
2379             if (sign)
2380                 T2_SMULL(r0, r1, r3, r2);
2381             else
2382                 T2_UMULL(r0, r1, r3, r2);
2383         }
2384     }
2385     else {
2386         if (r2 == r3) {
2387             reg = jit_get_reg(jit_class_gpr);
2388             movr(rn(reg), r2);
2389             if (sign)
2390                 SMULL(r0, r1, rn(reg), r2);
2391             else
2392                 UMULL(r0, r1, rn(reg), r2);
2393             jit_unget_reg(reg);
2394         }
2395         else if (r0 != r2 && r1 != r2) {
2396             if (sign)
2397                 SMULL(r0, r1, r2, r3);
2398             else
2399                 UMULL(r0, r1, r2, r3);
2400         }
2401         else {
2402             if (sign)
2403                 SMULL(r0, r1, r3, r2);
2404             else
2405                 UMULL(r0, r1, r3, r2);
2406         }
2407     }
2408 }
2409
2410 static void
2411 _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2412         jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
2413 {
2414     jit_int32_t         reg;
2415     reg = jit_get_reg(jit_class_gpr);
2416     movi(rn(reg), i0);
2417     iqmulr(r0, r1, r2, rn(reg), sign);
2418     jit_unget_reg(reg);
2419 }
2420
2421 static void
2422 _divrem(jit_state_t *_jit, int div, int sign,
2423         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2424 {
2425     jit_word_t          d;
2426     jit_word_t          w;
2427     jit_get_reg_args();
2428     movr(_R0_REGNO, r1);
2429     movr(_R1_REGNO, r2);
2430     if (sign)                   w = (jit_word_t)__aeabi_idivmod;
2431     else                        w = (jit_word_t)__aeabi_uidivmod;
2432     if (!jit_exchange_p()) {
2433         if (jit_thumb_p())      d = ((w - _jit->pc.w) >> 1) - 2;
2434         else                    d = ((w - _jit->pc.w) >> 2) - 2;
2435         if (_s24P(d)) {
2436             if (jit_thumb_p())  T2_BLI(encode_thumb_jump(d));
2437             else                BLI(d & 0x00ffffff);
2438         }
2439         else                    goto fallback;
2440     }
2441     else {
2442     fallback:
2443         movi(_R2_REGNO, w);
2444         if (jit_thumb_p())      T1_BLX(_R2_REGNO);
2445         else                    BLX(_R2_REGNO);
2446     }
2447     if (div)                    movr(r0, _R0_REGNO);
2448     else                        movr(r0, _R1_REGNO);
2449     jit_unget_reg_args();
2450 }
2451
2452 static void
2453 _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2454 {
2455     if (jit_armv7r_p()) {
2456         if (jit_thumb_p())
2457             T2_SDIV(r0, r1, r2);
2458         else
2459             SDIV(r0, r1, r2);
2460     }
2461     else
2462         divrem(1, 1, r0, r1, r2);
2463 }
2464
2465 static void
2466 _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2467 {
2468     jit_int32_t         reg;
2469     reg = jit_get_reg(jit_class_gpr);
2470     movi(rn(reg), i0);
2471     divr(r0, r1, rn(reg));
2472     jit_unget_reg(reg);
2473 }
2474
2475 static void
2476 _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2477 {
2478     if (jit_armv7r_p()) {
2479         if (jit_thumb_p())
2480             T2_UDIV(r0, r1, r2);
2481         else
2482             UDIV(r0, r1, r2);
2483     }
2484     else
2485         divrem(1, 0, r0, r1, r2);
2486 }
2487
2488 static void
2489 _divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2490 {
2491     jit_int32_t         reg;
2492     reg = jit_get_reg(jit_class_gpr);
2493     movi(rn(reg), i0);
2494     divr_u(r0, r1, rn(reg));
2495     jit_unget_reg(reg);
2496 }
2497
2498 static void
2499 _iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2500         jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
2501 {
2502     jit_word_t          d;
2503     jit_word_t          w;
2504     jit_get_reg_args();
2505     movr(_R0_REGNO, r2);
2506     movr(_R1_REGNO, r3);
2507     if (sign)                   w = (jit_word_t)__aeabi_idivmod;
2508     else                        w = (jit_word_t)__aeabi_uidivmod;
2509     if (!jit_exchange_p()) {
2510         if (jit_thumb_p())      d = ((w - _jit->pc.w) >> 1) - 2;
2511         else                    d = ((w - _jit->pc.w) >> 2) - 2;
2512         if (_s24P(d)) {
2513             if (jit_thumb_p())  T2_BLI(encode_thumb_jump(d));
2514             else                BLI(d & 0x00ffffff);
2515         }
2516         else                    goto fallback;
2517     }
2518     else {
2519     fallback:
2520         movi(_R2_REGNO, w);
2521         if (jit_thumb_p())      T1_BLX(_R2_REGNO);
2522         else                    BLX(_R2_REGNO);
2523     }
2524     movr(r0, _R0_REGNO);
2525     movr(r1, _R1_REGNO);
2526     jit_unget_reg_args();
2527 }
2528
2529 static void
2530 _iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2531         jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
2532 {
2533     jit_int32_t         reg;
2534     reg = jit_get_reg(jit_class_gpr);
2535     movi(rn(reg), i0);
2536     iqdivr(r0, r1, r2, rn(reg), sign);
2537     jit_unget_reg(reg);
2538 }
2539
2540 static void
2541 _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2542 {
2543     if (jit_armv7r_p()) {
2544         jit_int32_t             reg;
2545         if (r0 == r1 || r0 == r2) {
2546             reg = jit_get_reg(jit_class_gpr);
2547             divr(rn(reg), r1, r2);
2548             mulr(rn(reg), r2, rn(reg));
2549             subr(r0, r1, rn(reg));
2550             jit_unget_reg(reg);
2551         }
2552         else {
2553             divr(r0, r1, r2);
2554             mulr(r0, r2, r0);
2555             subr(r0, r1, r0);
2556         }
2557     }
2558     else
2559         divrem(0, 1, r0, r1, r2);
2560 }
2561
2562 static void
2563 _remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2564 {
2565     jit_int32_t         reg;
2566     reg = jit_get_reg(jit_class_gpr);
2567     movi(rn(reg), i0);
2568     remr(r0, r1, rn(reg));
2569     jit_unget_reg(reg);
2570 }
2571
2572 static void
2573 _remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2574 {
2575     if (jit_armv7r_p()) {
2576         jit_int32_t             reg;
2577         if (r0 == r1 || r0 == r2) {
2578             reg = jit_get_reg(jit_class_gpr);
2579             divr_u(rn(reg), r1, r2);
2580             mulr(rn(reg), r2, rn(reg));
2581             subr(r0, r1, rn(reg));
2582             jit_unget_reg(reg);
2583         }
2584         else {
2585             divr_u(r0, r1, r2);
2586             mulr(r0, r2, r0);
2587             subr(r0, r1, r0);
2588         }
2589     }
2590     else
2591         divrem(0, 0, r0, r1, r2);
2592 }
2593
2594 static void
2595 _remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2596 {
2597     jit_int32_t         reg;
2598     reg = jit_get_reg(jit_class_gpr);
2599     movi(rn(reg), i0);
2600     remr_u(r0, r1,rn(reg));
2601     jit_unget_reg(reg);
2602 }
2603
2604 static void
2605 _andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2606 {
2607     if (jit_thumb_p()) {
2608         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
2609             T1_AND(r0, r0 == r1 ? r2 : r1);
2610         else
2611             T2_AND(r0, r1, r2);
2612     }
2613     else
2614         AND(r0, r1, r2);
2615 }
2616
2617 static void
2618 _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2619 {
2620     int                 i;
2621     jit_int32_t         reg;
2622     if (jit_thumb_p()) {
2623         if ((i = encode_thumb_immediate(i0)) != -1)
2624             T2_ANDI(r0, r1, i);
2625         else if ((i = encode_thumb_immediate(~i0)) != -1)
2626             T2_BICI(r0, r1, i);
2627         else if (r0 != r1) {
2628             movi(r0, i0);
2629             T2_AND(r0, r1, r0);
2630         }
2631         else {
2632             reg = jit_get_reg(jit_class_gpr);
2633             movi(rn(reg), i0);
2634             T2_AND(r0, r1, rn(reg));
2635             jit_unget_reg(reg);
2636         }
2637     }
2638     else {
2639         if ((i = encode_arm_immediate(i0)) != -1)
2640             ANDI(r0, r1, i);
2641         else if ((i = encode_arm_immediate(~i0)) != -1)
2642             BICI(r0, r1, i);
2643         else if (r0 != r1) {
2644             movi(r0, i0);
2645             AND(r0, r1, r0);
2646         }
2647         else {
2648             reg = jit_get_reg(jit_class_gpr);
2649             movi(rn(reg), i0);
2650             AND(r0, r1, rn(reg));
2651             jit_unget_reg(reg);
2652         }
2653     }
2654 }
2655
2656 static void
2657 _orr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2658 {
2659     if (jit_thumb_p()) {
2660         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
2661             T1_ORR(r0, r0 == r1 ? r2 : r1);
2662         else
2663             T2_ORR(r0, r1, r2);
2664     }
2665     else
2666         ORR(r0, r1, r2);
2667 }
2668
2669 static void
2670 _ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2671 {
2672     int                 i;
2673     jit_int32_t         reg;
2674     if (jit_thumb_p()) {
2675         if ((i = encode_thumb_immediate(i0)) != -1)
2676             T2_ORRI(r0, r1, i);
2677         else if (r0 != r1) {
2678             movi(r0, i0);
2679             T2_ORR(r0, r1, r0);
2680         }
2681         else {
2682             reg = jit_get_reg(jit_class_gpr);
2683             movi(rn(reg), i0);
2684             T2_ORR(r0, r1, rn(reg));
2685             jit_unget_reg(reg);
2686         }
2687     }
2688     else {
2689         if ((i = encode_arm_immediate(i0)) != -1)
2690             ORRI(r0, r1, i);
2691         else if (r0 != r1) {
2692             movi(r0, i0);
2693             ORR(r0, r1, r0);
2694         }
2695         else {
2696             reg = jit_get_reg(jit_class_gpr);
2697             movi(rn(reg), i0);
2698             ORR(r0, r1, rn(reg));
2699             jit_unget_reg(reg);
2700         }
2701     }
2702 }
2703
2704 static void
2705 _xorr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2706 {
2707     if (jit_thumb_p()) {
2708         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
2709             T1_EOR(r0, r0 == r1 ? r2 : r1);
2710         else
2711             T2_EOR(r0, r1, r2);
2712     }
2713     else
2714         EOR(r0, r1, r2);
2715 }
2716
2717 static void
2718 _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2719 {
2720     int                 i;
2721     jit_int32_t         reg;
2722     if (jit_thumb_p()) {
2723         if ((i = encode_thumb_immediate(i0)) != -1)
2724             T2_EORI(r0, r1, i);
2725         else if (r0 != r1) {
2726             movi(r0, i0);
2727             T2_EOR(r0, r1, r0);
2728         }
2729         else {
2730             reg = jit_get_reg(jit_class_gpr);
2731             movi(rn(reg), i0);
2732             T2_EOR(r0, r1, rn(reg));
2733             jit_unget_reg(reg);
2734         }
2735     }
2736     else {
2737         if ((i = encode_arm_immediate(i0)) != -1)
2738             EORI(r0, r1, i);
2739         else if (r0 != r1) {
2740             movi(r0, i0);
2741             EOR(r0, r1, r0);
2742         }
2743         else {
2744             reg = jit_get_reg(jit_class_gpr);
2745             movi(rn(reg), i0);
2746             EOR(r0, r1, rn(reg));
2747             jit_unget_reg(reg);
2748         }
2749     }
2750 }
2751
2752 static void
2753 _lshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2754 {
2755     if (jit_thumb_p()) {
2756         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
2757             T1_LSL(r0, r2);
2758         else
2759             T2_LSL(r0, r1, r2);
2760     }
2761     else
2762         LSL(r0, r1, r2);
2763 }
2764
2765 static void
2766 _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2767 {
2768     assert(i0 >= 0 && i0 <= 31);
2769     if (i0 == 0)
2770         movr(r0, r1);
2771     else if (jit_thumb_p()) {
2772         if (!jit_no_set_flags() && (r0|r1) < 8)
2773             T1_LSLI(r0, r1, i0);
2774         else
2775             T2_LSLI(r0, r1, i0);
2776     }
2777     else
2778         LSLI(r0, r1, i0);
2779 }
2780
2781 static void
2782 _rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2783 {
2784     if (jit_thumb_p()) {
2785         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
2786             T1_ASR(r0, r2);
2787         else
2788             T2_ASR(r0, r1, r2);
2789     }
2790     else
2791         ASR(r0, r1, r2);
2792 }
2793
2794 static void
2795 _rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2796 {
2797     assert(i0 >= 0 && i0 <= 31);
2798     if (i0 == 0)
2799         movr(r0, r1);
2800     else if (jit_thumb_p()) {
2801         if (!jit_no_set_flags() && (r0|r1) < 8)
2802             T1_ASRI(r0, r1, i0);
2803         else
2804             T2_ASRI(r0, r1, i0);
2805     }
2806     else
2807         ASRI(r0, r1, i0);
2808 }
2809
2810 static void
2811 _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2812 {
2813     if (jit_thumb_p()) {
2814         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
2815             T1_LSR(r0, r2);
2816         else
2817             T2_LSR(r0, r1, r2);
2818     }
2819     else
2820         LSR(r0, r1, r2);
2821 }
2822
2823 static void
2824 _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2825 {
2826     assert(i0 >= 0 && i0 <= 31);
2827     if (i0 == 0)
2828         movr(r0, r1);
2829     else if (jit_thumb_p()) {
2830         if (!jit_no_set_flags() && (r0|r1) < 8)
2831             T1_LSRI(r0, r1, i0);
2832         else
2833             T2_LSRI(r0, r1, i0);
2834     }
2835     else
2836         LSRI(r0, r1, i0);
2837 }
2838
2839 static void
2840 _lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2841 {
2842     jit_int32_t         reg;
2843     if (r0 != r1 && r0 != r2) {
2844         rsbi(r0, r2, 64);
2845         rrotr(r0, r1, r0);
2846     }
2847     else {
2848         reg = jit_get_reg(jit_class_gpr);
2849         rsbi(rn(reg), r2, 64);
2850         rrotr(r0, r1, rn(reg));
2851         jit_unget_reg(reg);
2852     }
2853 }
2854
2855 static void
2856 _rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2857 {
2858     if (jit_thumb_p()) {
2859         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
2860             T1_ROR(r0, r2);
2861         else
2862             T2_ROR(r0, r1, r2);
2863     }
2864     else
2865         ROR(r0, r1, r2);
2866 }
2867
2868 static void
2869 _rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2870 {
2871     assert(i0 >= 0 && i0 <= 31);
2872     if (i0 == 0)
2873         movr(r0, r1);
2874     else if (jit_thumb_p())
2875         T2_RORI(r0, r1, i0);
2876     else
2877         RORI(r0, r1, i0);
2878 }
2879
2880 static void
2881 _ccr(jit_state_t *_jit, int ct, int cf,
2882      jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2883 {
2884     if (jit_thumb_p()) {
2885         assert((ct ^ cf) >> 28 == 1);
2886         if ((r1|r2) < 8)
2887             T1_CMP(r1, r2);
2888         else if ((r1&r2) & 8)
2889             T1_CMPX(r1, r2);
2890         else
2891             T2_CMP(r1, r2);
2892         ITE(ct);
2893         if (r0 < 8) {
2894             T1_MOVI(r0, 1);
2895             T1_MOVI(r0, 0);
2896         }
2897         else {
2898             T2_MOVI(r0, 1);
2899             T2_MOVI(r0, 0);
2900         }
2901     }
2902     else {
2903         CMP(r1, r2);
2904         CC_MOVI(ct, r0, 1);
2905         CC_MOVI(cf, r0, 0);
2906     }
2907 }
2908
2909 static void
2910 _cci(jit_state_t *_jit, int ct, int cf,
2911      jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2912 {
2913     int                 i;
2914     jit_int32_t         reg;
2915     if (jit_thumb_p()) {
2916         if (r1 < 7 && !(i0 & 0xffffff00))
2917             T1_CMPI(r1, i0);
2918         else if ((i = encode_thumb_immediate(i0)) != -1)
2919             T2_CMPI(r1, i);
2920         else if ((i = encode_thumb_immediate(-i0)) != -1)
2921             T2_CMNI(r1, i);
2922         else {
2923             reg = jit_get_reg(jit_class_gpr);
2924             movi(rn(reg), i0);
2925             ccr(ct, cf, r0, r1, rn(reg));
2926             jit_unget_reg(reg);
2927             return;
2928         }
2929         ITE(ct);
2930         if (r0 < 8) {
2931             T1_MOVI(r0, 1);
2932             T1_MOVI(r0, 0);
2933         }
2934         else {
2935             T2_MOVI(r0, 1);
2936             T2_MOVI(r0, 0);
2937         }
2938     }
2939     else {
2940         if ((i = encode_arm_immediate(i0)) != -1)
2941             CMPI(r1, i);
2942         else if ((i = encode_arm_immediate(-i0)) != -1)
2943             CMNI(r1, i);
2944         else if (r0 != r1) {
2945             movi(r0, i0);
2946             CMP(r1, r0);
2947         }
2948         else {
2949             reg = jit_get_reg(jit_class_gpr);
2950             movi(rn(reg), i0);
2951             CMP(r1, rn(reg));
2952             jit_unget_reg(reg);
2953         }
2954         CC_MOVI(ct, r0, 1);
2955         CC_MOVI(cf, r0, 0);
2956     }
2957 }
2958
2959 static void
2960 _ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2961 {
2962     if (jit_thumb_p())
2963         ccr(ARM_CC_NE, ARM_CC_EQ, r0, r1, r2);
2964     else {
2965         SUBS(r0, r1, r2);
2966         CC_MOVI(ARM_CC_NE, r0, 1);
2967     }
2968 }
2969
2970 static void
2971 _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2972 {
2973     int                 i;
2974     jit_int32_t         reg;
2975     if (jit_thumb_p())
2976         cci(ARM_CC_NE, ARM_CC_EQ, r0, r1, i0);
2977     else {
2978         if ((i = encode_arm_immediate(i0)) != -1)
2979             SUBSI(r0, r1, i);
2980         else if ((i = encode_arm_immediate(-i0)) != -1)
2981             ADDSI(r0, r1, i);
2982         else if (r0 != r1) {
2983             movi(r0, i0);
2984             SUBS(r0, r1, r0);
2985         }
2986         else {
2987             reg = jit_get_reg(jit_class_gpr);
2988             movi(rn(reg), i0);
2989             SUBS(r0, r1, rn(reg));
2990             jit_unget_reg(reg);
2991         }
2992         CC_MOVI(ARM_CC_NE, r0, 1);
2993     }
2994 }
2995
2996 static void
2997 _jmpr(jit_state_t *_jit, jit_int32_t r0)
2998 {
2999     if (jit_thumb_p())
3000         T1_MOV(_R15_REGNO, r0);
3001     else
3002         MOV(_R15_REGNO, r0);
3003 }
3004
3005 static void
3006 _jmpi(jit_state_t *_jit, jit_word_t i0)
3007 {
3008     jit_word_t          w;
3009     jit_word_t          d;
3010     jit_int32_t         reg;
3011     w = _jit->pc.w;
3012     /* if thumb and in thumb mode */
3013     if (jit_thumb_p() && _jitc->thumb) {
3014         d = ((i0 - w) >> 1) - 2;
3015         if (d >= -1024 && d <= 1023)
3016             T1_B(d & 0x7ff);
3017         else if (_s24P(d))
3018             T2_B(encode_thumb_jump(d));
3019         else {
3020             reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
3021             movi(rn(reg), i0);
3022             jmpr(rn(reg));
3023             jit_unget_reg(reg);
3024         }
3025     }
3026     else {
3027         d = ((i0 - w) >> 2) - 2;
3028         if (_s24P(d))
3029             B(d & 0x00ffffff);
3030         else {
3031             reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
3032             movi(rn(reg), i0);
3033             jmpr(rn(reg));
3034             jit_unget_reg(reg);
3035         }
3036     }
3037 }
3038
3039 static jit_word_t
3040 _jmpi_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
3041 {
3042     jit_word_t          w;
3043     jit_word_t          d;
3044     jit_int32_t         reg;
3045     /* i1 means jump is reachable in signed 24 bits  */
3046     if (i1) {
3047         w = _jit->pc.w;
3048         /* if thumb and in thumb mode */
3049         if (jit_thumb_p() && _jitc->thumb) {
3050             d = ((i0 - w) >> 1) - 2;
3051             assert(_s24P(d));
3052             T2_B(encode_thumb_jump(d));
3053         }
3054         else {
3055             d = ((i0 - w) >> 2) - 2;
3056             assert(_s24P(d));
3057             B(d & 0x00ffffff);
3058         }
3059     }
3060     else {
3061         reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
3062         w = movi_p(rn(reg), i0);
3063         jmpr(rn(reg));
3064         jit_unget_reg(reg);
3065     }
3066     return (w);
3067 }
3068
3069 static jit_word_t
3070 _bccr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3071 {
3072     jit_word_t          w;
3073     jit_word_t          d;
3074     if (jit_thumb_p()) {
3075         if ((r0|r1) < 8)
3076             T1_CMP(r0, r1);
3077         else if ((r0&r1) & 8)
3078             T1_CMPX(r0, r1);
3079         else
3080             T2_CMP(r0, r1);
3081         /* use only thumb2 conditional as does not know if will be patched */
3082         w = _jit->pc.w;
3083         d = ((i0 - w) >> 1) - 2;
3084         assert(_s20P(d));
3085         T2_CC_B(cc, encode_thumb_cc_jump(d));
3086     }
3087     else {
3088         CMP(r0, r1);
3089         w = _jit->pc.w;
3090         d = ((i0 - w) >> 2) - 2;
3091         assert(_s24P(d));
3092         CC_B(cc, d & 0x00ffffff);
3093     }
3094     return (w);
3095 }
3096
3097 static jit_word_t
3098 _bcci(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3099 {
3100     jit_word_t          w;
3101     jit_word_t          d;
3102     int                 i;
3103     jit_int32_t         reg;
3104     if (jit_thumb_p()) {
3105         if (r0 < 7 && !(i1 & 0xffffff00))
3106             T1_CMPI(r0, i1);
3107         else if ((i = encode_thumb_immediate(i1)) != -1)
3108             T2_CMPI(r0, i);
3109         else if ((i = encode_thumb_immediate(-i1)) != -1)
3110             T2_CMNI(r0, i);
3111         else {
3112             reg = jit_get_reg(jit_class_gpr);
3113             movi(rn(reg), i1);
3114             T2_CMP(r0, rn(reg));
3115             jit_unget_reg(reg);
3116         }
3117         /* use only thumb2 conditional as does not know if will be patched */
3118         w = _jit->pc.w;
3119         d = ((i0 - w) >> 1) - 2;
3120         assert(_s20P(d));
3121         T2_CC_B(cc, encode_thumb_cc_jump(d));
3122     }
3123     else {
3124         if ((i = encode_arm_immediate(i1)) != -1)
3125             CMPI(r0, i);
3126         else if ((i = encode_arm_immediate(-i1)) != -1)
3127             CMNI(r0, i);
3128         else {
3129             reg = jit_get_reg(jit_class_gpr);
3130             movi(rn(reg), i1);
3131             CMP(r0, rn(reg));
3132             jit_unget_reg(reg);
3133         }
3134         w = _jit->pc.w;
3135         d = ((i0 - w) >> 2) - 2;
3136         assert(_s24P(d));
3137         CC_B(cc, d & 0x00ffffff);
3138     }
3139     return (w);
3140 }
3141
3142 static jit_word_t
3143 _baddr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3144 {
3145     jit_word_t          w;
3146     jit_word_t          d;
3147     if (jit_thumb_p()) {
3148         if ((r0|r1) < 8)
3149             T1_ADD(r0, r0, r1);
3150         else
3151             T2_ADDS(r0, r0, r1);
3152         w = _jit->pc.w;
3153         d = ((i0 - w) >> 1) - 2;
3154         assert(_s20P(d));
3155         T2_CC_B(cc, encode_thumb_cc_jump(d));
3156     }
3157     else {
3158         ADDS(r0, r0, r1);
3159         w = _jit->pc.w;
3160         d = ((i0 - w) >> 2) - 2;
3161         assert(_s24P(d));
3162         CC_B(cc, d & 0x00ffffff);
3163     }
3164     return (w);
3165 }
3166
3167 static jit_word_t
3168 _baddi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, int i1)
3169 {
3170     int                 i;
3171     jit_word_t          w;
3172     jit_word_t          d;
3173     jit_int32_t         reg;
3174     if (jit_thumb_p()) {
3175         if (r0 < 8 && !(i1 & ~7))
3176             T1_ADDI3(r0, r0, i1);
3177         else if (r0 < 8 && !(-i1 & ~7))
3178             T1_SUBI3(r0, r0, -i1);
3179         else if (r0 < 8 && !(i1 & ~0xff))
3180             T1_ADDI8(r0, i1);
3181         else if (r0 < 8 && !(-i1 & ~0xff))
3182             T1_SUBI8(r0, -i1);
3183         else if ((i = encode_thumb_immediate(i1)) != -1)
3184             T2_ADDSI(r0, r0, i);
3185         else if ((i = encode_thumb_immediate(-i1)) != -1)
3186             T2_SUBSI(r0, r0, i);
3187         else {
3188             reg = jit_get_reg(jit_class_gpr);
3189             movi(rn(reg), i1);
3190             T2_ADDS(r0, r0, rn(reg));
3191             jit_unget_reg(reg);
3192         }
3193         w = _jit->pc.w;
3194         d = ((i0 - w) >> 1) - 2;
3195         assert(_s20P(d));
3196         T2_CC_B(cc, encode_thumb_cc_jump(d));
3197     }
3198     else {
3199         if ((i = encode_arm_immediate(i1)) != -1)
3200             ADDSI(r0, r0, i);
3201         else if ((i = encode_arm_immediate(-i1)) != -1)
3202             SUBSI(r0, r0, i);
3203         else {
3204             reg = jit_get_reg(jit_class_gpr);
3205             movi(rn(reg), i1);
3206             ADDS(r0, r0, rn(reg));
3207             jit_unget_reg(reg);
3208         }
3209         w = _jit->pc.w;
3210         d = ((i0 - w) >> 2) - 2;
3211         assert(_s24P(d));
3212         CC_B(cc, d & 0x00ffffff);
3213     }
3214     return (w);
3215 }
3216
3217 static jit_word_t
3218 _bsubr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3219 {
3220     jit_word_t          w;
3221     jit_word_t          d;
3222     if (jit_thumb_p()) {
3223         if ((r0|r1) < 8)
3224             T1_SUB(r0, r0, r1);
3225         else
3226             T2_SUBS(r0, r0, r1);
3227         w = _jit->pc.w;
3228         d = ((i0 - w) >> 1) - 2;
3229         assert(_s20P(d));
3230         T2_CC_B(cc, encode_thumb_cc_jump(d));
3231     }
3232     else {
3233         SUBS(r0, r0, r1);
3234         w = _jit->pc.w;
3235         d = ((i0 - w) >> 2) - 2;
3236         assert(_s24P(d));
3237         CC_B(cc, d & 0x00ffffff);
3238     }
3239     return (w);
3240 }
3241
3242 static jit_word_t
3243 _bsubi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, int i1)
3244 {
3245     int                 i;
3246     jit_word_t          w;
3247     jit_word_t          d;
3248     jit_int32_t         reg;
3249     if (jit_thumb_p()) {
3250         if (r0 < 8 && !(i1 & ~7))
3251             T1_SUBI3(r0, r0, i1);
3252         else if (r0 < 8 && !(-i1 & ~7))
3253             T1_ADDI3(r0, r0, -i1);
3254         else if (r0 < 8 && !(i1 & ~0xff))
3255             T1_SUBI8(r0, i1);
3256         else if (r0 < 8 && !(-i1 & ~0xff))
3257             T1_ADDI8(r0, -i1);
3258         else if ((i = encode_thumb_immediate(i1)) != -1)
3259             T2_SUBSI(r0, r0, i);
3260         else if ((i = encode_thumb_immediate(-i1)) != -1)
3261             T2_SUBSI(r0, r0, i);
3262         else {
3263             reg = jit_get_reg(jit_class_gpr);
3264             movi(rn(reg), i1);
3265             T2_SUBS(r0, r0, rn(reg));
3266             jit_unget_reg(reg);
3267         }
3268         w = _jit->pc.w;
3269         d = ((i0 - w) >> 1) - 2;
3270         assert(_s20P(d));
3271         T2_CC_B(cc, encode_thumb_cc_jump(d));
3272     }
3273     else {
3274         if ((i = encode_arm_immediate(i1)) != -1)
3275             SUBSI(r0, r0, i);
3276         else if ((i = encode_arm_immediate(-i1)) != -1)
3277             ADDSI(r0, r0, i);
3278         else {
3279             reg = jit_get_reg(jit_class_gpr);
3280             movi(rn(reg), i1);
3281             SUBS(r0, r0, rn(reg));
3282             jit_unget_reg(reg);
3283         }
3284         w = _jit->pc.w;
3285         d = ((i0 - w) >> 2) - 2;
3286         assert(_s24P(d));
3287         CC_B(cc, d & 0x00ffffff);
3288     }
3289     return (w);
3290 }
3291
3292 static jit_word_t
3293 _bmxr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3294 {
3295     jit_word_t          w;
3296     jit_word_t          d;
3297     jit_int32_t         reg;
3298     if (jit_thumb_p()) {
3299         if ((r0|r1) < 8)
3300             T1_TST(r0, r1);
3301         else
3302             T2_TST(r0, r1);
3303         w = _jit->pc.w;
3304         d = ((i0 - w) >> 1) - 2;
3305         assert(_s20P(d));
3306         T2_CC_B(cc, encode_thumb_cc_jump(d));
3307     }
3308     else {
3309         if (jit_armv5_p())
3310             TST(r0, r1);
3311         else {
3312             reg = jit_get_reg(jit_class_gpr);
3313             ANDS(rn(reg), r0, r1);
3314             jit_unget_reg(reg);
3315         }
3316         w = _jit->pc.w;
3317         d = ((i0 - w) >> 2) - 2;
3318         assert(_s24P(d));
3319         CC_B(cc, d & 0x00ffffff);
3320     }
3321     return (w);
3322 }
3323
3324 static jit_word_t
3325 _bmxi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3326 {
3327     int                 i;
3328     jit_word_t          w;
3329     jit_word_t          d;
3330     jit_int32_t         reg;
3331     if (jit_thumb_p()) {
3332         if ((i = encode_thumb_immediate(i1)) != -1)
3333             T2_TSTI(r0, i);
3334         else {
3335             reg = jit_get_reg(jit_class_gpr);
3336             movi(rn(reg), i1);
3337             T2_TST(r0, rn(reg));
3338             jit_unget_reg(reg);
3339         }
3340         w = _jit->pc.w;
3341         d = ((i0 - w) >> 1) - 2;
3342         assert(_s20P(d));
3343         T2_CC_B(cc, encode_thumb_cc_jump(d));
3344     }
3345     else {
3346         if (jit_armv5_p()) {
3347             if ((i = encode_arm_immediate(i1)) != -1)
3348                 TSTI(r0, i);
3349             else {
3350                 reg = jit_get_reg(jit_class_gpr);
3351                 movi(rn(reg), i1);
3352                 TST(r0, rn(reg));
3353                 jit_unget_reg(reg);
3354             }
3355         }
3356         else {
3357             reg = jit_get_reg(jit_class_gpr);
3358             if ((i = encode_arm_immediate(i1)) != -1)
3359                 ANDSI(rn(reg), r0, i);
3360             else if ((i = encode_arm_immediate(~i1)) != -1)
3361                 BICSI(rn(reg), r0, i);
3362             else {
3363                 movi(rn(reg), i1);
3364                 ANDS(rn(reg), r0, rn(reg));
3365             }
3366             jit_unget_reg(reg);
3367         }
3368         w = _jit->pc.w;
3369         d = ((i0 - w) >> 2) - 2;
3370         assert(_s24P(d));
3371         CC_B(cc, d & 0x00ffffff);
3372     }
3373     return (w);
3374 }
3375
3376 static void
3377 _ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3378 {
3379     if (jit_thumb_p())
3380         T2_LDRSBI(r0, r1, 0);
3381     else
3382         LDRSBI(r0, r1, 0);
3383 }
3384
3385 static void
3386 _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3387 {
3388     jit_int32_t         reg;
3389     reg = jit_get_reg(jit_class_gpr);
3390     movi(rn(reg), i0);
3391     if (jit_thumb_p())
3392         T2_LDRSBI(r0, rn(reg), 0);
3393     else
3394         LDRSBI(r0, rn(reg), 0);
3395     jit_unget_reg(reg);
3396 }
3397
3398 static void
3399 _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3400 {
3401     if (jit_thumb_p()) {
3402         if ((r0|r1|r2) < 8)
3403             T1_LDRSB(r0, r1, r2);
3404         else
3405             T2_LDRSB(r0, r1, r2);
3406     }
3407     else
3408         LDRSB(r0, r1, r2);
3409 }
3410
3411 static void
3412 _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3413 {
3414     jit_int32_t         reg;
3415     if (jit_thumb_p()) {
3416         if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3417             T2_LDRSBI(r0, r1, i0);
3418         else if (i0 < 0 && i0 >= -255)
3419             T2_LDRSBIN(r0, r1, -i0);
3420         else if (i0 >= 0 && i0 <= 4095)
3421             T2_LDRSBWI(r0, r1, i0);
3422         else if (r0 != r1) {
3423             movi(r0, i0);
3424             if ((r0|r1) < 8)
3425                 T1_LDRSB(r0, r1, r0);
3426             else
3427                 T2_LDRSB(r0, r1, r0);
3428         }
3429         else {
3430             reg = jit_get_reg(jit_class_gpr);
3431             movi(rn(reg), i0);
3432             if ((r0|r1|rn(reg)) < 8)
3433                 T1_LDRSB(r0, r1, rn(reg));
3434             else
3435                 T2_LDRSB(r0, r1, rn(reg));
3436             jit_unget_reg(reg);
3437         }
3438     }
3439     else {
3440         if (i0 >= 0 && i0 <= 255)
3441             LDRSBI(r0, r1, i0);
3442         else if (i0 < 0 && i0 >= -255)
3443             LDRSBIN(r0, r1, -i0);
3444         else if (r0 != r1) {
3445             movi(r0, i0);
3446             LDRSB(r0, r1, r0);
3447         }
3448         else {
3449             reg = jit_get_reg(jit_class_gpr);
3450             movi(rn(reg), i0);
3451             LDRSB(r0, r1, rn(reg));
3452             jit_unget_reg(reg);
3453         }
3454     }
3455 }
3456
3457 static void
3458 _ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3459 {
3460     if (jit_thumb_p())
3461         T2_LDRBI(r0, r1, 0);
3462     else
3463         LDRBI(r0, r1, 0);
3464 }
3465
3466 static void
3467 _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3468 {
3469     jit_int32_t         reg;
3470     reg = jit_get_reg(jit_class_gpr);
3471     movi(rn(reg), i0);
3472     if (jit_thumb_p())
3473         T2_LDRBI(r0, rn(reg), 0);
3474     else
3475         LDRBI(r0, rn(reg), 0);
3476     jit_unget_reg(reg);
3477 }
3478
3479 static void
3480 _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3481 {
3482     if (jit_thumb_p()) {
3483         if ((r0|r1|r2) < 8)
3484             T1_LDRB(r0, r1, r2);
3485         else
3486             T2_LDRB(r0, r1, r2);
3487     }
3488     else
3489         LDRB(r0, r1, r2);
3490 }
3491
3492 static void
3493 _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3494 {
3495     jit_int32_t         reg;
3496     if (jit_thumb_p()) {
3497         if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20)
3498             T1_LDRBI(r0, r1, i0);
3499         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3500             T2_LDRBI(r0, r1, i0);
3501         else if (i0 < 0 && i0 >= -255)
3502             T2_LDRBIN(r0, r1, -i0);
3503         else if (i0 >= 0 && i0 <= 4095)
3504             T2_LDRBWI(r0, r1, i0);
3505         else if (r0 != r1) {
3506             movi(r0, i0);
3507             if ((r0|r1) < 8)
3508                 T1_LDRB(r0, r1, r0);
3509             else
3510                 T2_LDRB(r0, r1, r0);
3511         }
3512         else {
3513             reg = jit_get_reg(jit_class_gpr);
3514             movi(rn(reg), i0);
3515             if ((r0|r1|rn(reg)) < 8)
3516                 T1_LDRB(r0, r1, rn(reg));
3517             else
3518                 T2_LDRB(r0, r1, rn(reg));
3519             jit_unget_reg(reg);
3520         }
3521     }
3522     else {
3523         if (i0 >= 0 && i0 <= 4095)
3524             LDRBI(r0, r1, i0);
3525         else if (i0 < 0 && i0 >= -4095)
3526             LDRBIN(r0, r1, -i0);
3527         else if (r0 != r1) {
3528             movi(r0, i0);
3529             LDRB(r0, r1, r0);
3530         }
3531         else {
3532             reg = jit_get_reg(jit_class_gpr);
3533             movi(rn(reg), i0);
3534             LDRB(r0, r1, rn(reg));
3535             jit_unget_reg(reg);
3536         }
3537     }
3538 }
3539
3540 static void
3541 _ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3542 {
3543     if (jit_thumb_p())
3544         T2_LDRSHI(r0, r1, 0);
3545     else
3546         LDRSHI(r0, r1, 0);
3547 }
3548
3549 static void
3550 _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3551 {
3552     jit_int32_t         reg;
3553     reg = jit_get_reg(jit_class_gpr);
3554     movi(rn(reg), i0);
3555     if (jit_thumb_p())
3556         T2_LDRSHI(r0, rn(reg), 0);
3557     else
3558         LDRSHI(r0, rn(reg), 0);
3559     jit_unget_reg(reg);
3560 }
3561
3562 static void
3563 _ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3564 {
3565     if (jit_thumb_p()) {
3566         if ((r0|r1|r2) < 8)
3567             T1_LDRSH(r0, r1, r2);
3568         else
3569             T2_LDRSH(r0, r1, r2);
3570     }
3571     else
3572         LDRSH(r0, r1, r2);
3573 }
3574
3575 static void
3576 _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3577 {
3578     jit_int32_t         reg;
3579     if (jit_thumb_p()) {
3580         if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3581             T2_LDRSHI(r0, r1, i0);
3582         else if (i0 < 0 && i0 >= -255)
3583             T2_LDRSHIN(r0, r1, -i0);
3584         else if (i0 >= 0 && i0 <= 4095)
3585             T2_LDRSHWI(r0, r1, i0);
3586         else if (r0 != r1) {
3587             movi(r0, i0);
3588             if ((r0|r1) < 8)
3589                 T1_LDRSH(r0, r1, r0);
3590             else
3591                 T2_LDRSH(r0, r1, r0);
3592         }
3593         else {
3594             reg = jit_get_reg(jit_class_gpr);
3595             movi(rn(reg), i0);
3596             if ((r0|r1|rn(reg)) < 8)
3597                 T1_LDRSH(r0, r1, rn(reg));
3598             else
3599                 T2_LDRSH(r0, r1, rn(reg));
3600             jit_unget_reg(reg);
3601         }
3602     }
3603     else {
3604         if (i0 >= 0 && i0 <= 255)
3605             LDRSHI(r0, r1, i0);
3606         else if (i0 < 0 && i0 >= -255)
3607             LDRSHIN(r0, r1, -i0);
3608         else if (r0 != r1) {
3609             movi(r0, i0);
3610             LDRSH(r0, r1, r0);
3611         }
3612         else {
3613             reg = jit_get_reg(jit_class_gpr);
3614             movi(rn(reg), i0);
3615             LDRSH(r0, r1, rn(reg));
3616             jit_unget_reg(reg);
3617         }
3618     }
3619 }
3620
3621 static void
3622 _ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3623 {
3624     if (jit_thumb_p())
3625         T2_LDRHI(r0, r1, 0);
3626     else
3627         LDRHI(r0, r1, 0);
3628 }
3629
3630 static void
3631 _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3632 {
3633     jit_int32_t         reg;
3634     reg = jit_get_reg(jit_class_gpr);
3635     movi(rn(reg), i0);
3636     if (jit_thumb_p())
3637         T2_LDRHI(r0, rn(reg), 0);
3638     else
3639         LDRHI(r0, rn(reg), 0);
3640     jit_unget_reg(reg);
3641 }
3642
3643 static void
3644 _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3645 {
3646     if (jit_thumb_p()) {
3647         if ((r0|r1|r2) < 8)
3648             T1_LDRH(r0, r1, r2);
3649         else
3650             T2_LDRH(r0, r1, r2);
3651     }
3652     else
3653         LDRH(r0, r1, r2);
3654 }
3655
3656 static void
3657 _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3658 {
3659     jit_int32_t         reg;
3660     if (jit_thumb_p()) {
3661         if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20)
3662             T1_LDRHI(r0, r1, i0 >> 1);
3663         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3664             T2_LDRHI(r0, r1, i0);
3665         else if (i0 < 0 && i0 >= -255)
3666             T2_LDRHIN(r0, r1, -i0);
3667         else if (i0 >= 0 && i0 <= 4095)
3668             T2_LDRHWI(r0, r1, i0);
3669         else if (r0 != r1) {
3670             movi(r0, i0);
3671             if ((r0|r1) < 8)
3672                 T1_LDRH(r0, r1, r0);
3673             else
3674                 T2_LDRH(r0, r1, r0);
3675         }
3676         else {
3677             reg = jit_get_reg(jit_class_gpr);
3678             movi(rn(reg), i0);
3679             if ((r0|r1|rn(reg)) < 8)
3680                 T1_LDRH(r0, r1, rn(reg));
3681             else
3682                 T2_LDRH(r0, r1, rn(reg));
3683             jit_unget_reg(reg);
3684         }
3685     }
3686     else {
3687         if (i0 >= 0 && i0 <= 255)
3688             LDRHI(r0, r1, i0);
3689         else if (i0 < 0 && i0 >= -255)
3690             LDRHIN(r0, r1, -i0);
3691         else if (r0 != r1) {
3692             movi(r0, i0);
3693             LDRH(r0, r1, r0);
3694         }
3695         else {
3696             reg = jit_get_reg(jit_class_gpr);
3697             movi(rn(reg), i0);
3698             LDRH(r0, r1, rn(reg));
3699             jit_unget_reg(reg);
3700         }
3701     }
3702 }
3703
3704 static void
3705 _ldr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3706 {
3707     if (jit_thumb_p())
3708         T2_LDRI(r0, r1, 0);
3709     else
3710         LDRI(r0, r1, 0);
3711 }
3712
3713 static void
3714 _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3715 {
3716     jit_int32_t         reg;
3717     reg = jit_get_reg(jit_class_gpr);
3718     movi(rn(reg), i0);
3719     if (jit_thumb_p())
3720         T2_LDRI(r0, rn(reg), 0);
3721     else
3722         LDRI(r0, rn(reg), 0);
3723     jit_unget_reg(reg);
3724 }
3725
3726 static void
3727 _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3728 {
3729     if (jit_thumb_p()) {
3730         if ((r0|r1|r2) < 8)
3731             T1_LDR(r0, r1, r2);
3732         else
3733             T2_LDR(r0, r1, r2);
3734     }
3735     else
3736         LDR(r0, r1, r2);
3737 }
3738
3739 static void
3740 _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3741 {
3742     jit_int32_t         reg;
3743     if (jit_thumb_p()) {
3744         if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
3745             T1_LDRI(r0, r1, i0 >> 2);
3746         else if (r1 == _R13_REGNO && r0 < 8 &&
3747                  i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
3748             T1_LDRISP(r0, i0 >> 2);
3749         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3750             T2_LDRI(r0, r1, i0);
3751         else if (i0 < 0 && i0 > -255)
3752             T2_LDRIN(r0, r1, -i0);
3753         else if (i0 >= 0 && i0 <= 4095)
3754             T2_LDRWI(r0, r1, i0);
3755         else if (r0 != r1) {
3756             movi(r0, i0);
3757             if ((r0|r1) < 8)
3758                 T1_LDR(r0, r1, r0);
3759             else
3760                 T2_LDR(r0, r1, r0);
3761         }
3762         else {
3763             reg = jit_get_reg(jit_class_gpr);
3764             movi(rn(reg), i0);
3765             if ((r0|r1|rn(reg)) < 8)
3766                 T1_LDR(r0, r1, rn(reg));
3767             else
3768                 T2_LDR(r0, r1, rn(reg));
3769             jit_unget_reg(reg);
3770         }
3771     }
3772     else {
3773         if (i0 >= 0 && i0 <= 4095)
3774             LDRI(r0, r1, i0);
3775         else if (i0 < 0 && i0 >= -4095)
3776             LDRIN(r0, r1, -i0);
3777         else if (r0 != r1) {
3778             movi(r0, i0);
3779             LDR(r0, r1, r0);
3780         }
3781         else {
3782             reg = jit_get_reg(jit_class_gpr);
3783             movi(rn(reg), i0);
3784             LDR(r0, r1, rn(reg));
3785             jit_unget_reg(reg);
3786         }
3787     }
3788 }
3789
3790 static void
3791 _unldr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3792 {
3793     if (jit_unaligned_p())
3794         fallback_unldr(r0, r1, i0);
3795     else
3796         generic_unldr(r0, r1, i0);
3797 }
3798
3799 static void
3800 _unldi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
3801 {
3802     jit_int32_t         t0, r2;
3803     if (jit_unaligned_p())
3804         fallback_unldi(r0, i0, i1);
3805     else
3806         generic_unldi(r0, i0, i1);
3807 }
3808
3809 static void
3810 _unldr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3811 {
3812     if (jit_unaligned_p())
3813         fallback_unldr_u(r0, r1, i0);
3814     else
3815         generic_unldr_u(r0, r1, i0);
3816 }
3817
3818 static void
3819 _unldi_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
3820 {
3821     jit_int32_t         t0, r2;
3822     if (jit_unaligned_p())
3823         fallback_unldi_u(r0, i0, i1);
3824     else
3825         generic_unldi_u(r0, i0, i1);
3826 }
3827
3828 static void
3829 _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3830 {
3831     if (jit_thumb_p())
3832         T2_STRBI(r1, r0, 0);
3833     else
3834         STRBI(r1, r0, 0);
3835 }
3836
3837 static void
3838 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3839 {
3840     jit_int32_t         reg;
3841     reg = jit_get_reg(jit_class_gpr);
3842     movi(rn(reg), i0);
3843     if (jit_thumb_p())
3844         T2_STRBI(r0, rn(reg), 0);
3845     else
3846         STRBI(r0, rn(reg), 0);
3847     jit_unget_reg(reg);
3848 }
3849
3850 static void
3851 _stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3852 {
3853     if (jit_thumb_p()) {
3854         if ((r0|r1|r2) < 8)
3855             T1_STRB(r2, r1, r0);
3856         else
3857             T2_STRB(r2, r1, r0);
3858     }
3859     else
3860         STRB(r2, r1, r0);
3861 }
3862
3863 static void
3864 _stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3865 {
3866     jit_int32_t         reg;
3867     if (jit_thumb_p()) {
3868         if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20)
3869             T1_STRBI(r1, r0, i0);
3870         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3871             T2_STRBI(r1, r0, i0);
3872         else if (i0 < 0 && i0 >= -255)
3873             T2_STRBIN(r1, r0, -i0);
3874         else if (i0 >= 0 && i0 <= 4095)
3875             T2_STRBWI(r1, r0, i0);
3876         else {
3877             reg = jit_get_reg(jit_class_gpr);
3878             movi(rn(reg), i0);
3879             if ((r0|r1|rn(reg)) < 8)
3880                 T1_STRB(r1, r0, rn(reg));
3881             else
3882                 T2_STRB(r1, r0, rn(reg));
3883             jit_unget_reg(reg);
3884         }
3885     }
3886     else {
3887         if (i0 >= 0 && i0 <= 4095)
3888             STRBI(r1, r0, i0);
3889         else if (i0 < 0 && i0 >= -4095)
3890             STRBIN(r1, r0, -i0);
3891         else {
3892             reg = jit_get_reg(jit_class_gpr);
3893             movi(rn(reg), i0);
3894             STRB(r1, r0, rn(reg));
3895             jit_unget_reg(reg);
3896         }
3897     }
3898 }
3899
3900 static void
3901 _str_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3902 {
3903     if (jit_thumb_p())
3904         T2_STRHI(r1, r0, 0);
3905     else
3906         STRHI(r1, r0, 0);
3907 }
3908
3909 static void
3910 _sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3911 {
3912     jit_int32_t         reg;
3913     reg = jit_get_reg(jit_class_gpr);
3914     movi(rn(reg), i0);
3915     if (jit_thumb_p())
3916         T2_STRHI(r0, rn(reg), 0);
3917     else
3918         STRHI(r0, rn(reg), 0);
3919     jit_unget_reg(reg);
3920 }
3921
3922 static void
3923 _stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3924 {
3925     if (jit_thumb_p()) {
3926         if ((r0|r1|r2) < 8)
3927             T1_STRH(r2, r1, r0);
3928         else
3929             T2_STRH(r2, r1, r0);
3930     }
3931     else
3932         STRH(r2, r1, r0);
3933 }
3934
3935 static void
3936 _stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3937 {
3938     jit_int32_t         reg;
3939     if (jit_thumb_p()) {
3940         if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20)
3941             T1_STRHI(r1, r0, i0 >> 1);
3942         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3943             T2_STRHI(r1, r0, i0);
3944         else if (i0 < 0 && i0 >= -255)
3945             T2_STRHIN(r1, r0, -i0);
3946         else if (i0 >= 0 && i0 <= 4095)
3947             T2_STRHWI(r1, r0, i0);
3948         else {
3949             reg = jit_get_reg(jit_class_gpr);
3950             movi(rn(reg), i0);
3951             if ((r0|r1|rn(reg)) < 8)
3952                 T1_STRH(r1, r0, rn(reg));
3953             else
3954                 T2_STRH(r1, r0, rn(reg));
3955             jit_unget_reg(reg);
3956         }
3957     }
3958     else {
3959         if (i0 >= 0 && i0 <= 255)
3960             STRHI(r1, r0, i0);
3961         else if (i0 < 0 && i0 >= -255)
3962             STRHIN(r1, r0, -i0);
3963         else {
3964             reg = jit_get_reg(jit_class_gpr);
3965             movi(rn(reg), i0);
3966             STRH(r1, r0, rn(reg));
3967             jit_unget_reg(reg);
3968         }
3969     }
3970 }
3971
3972 static void
3973 _str_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3974 {
3975     if (jit_thumb_p())
3976         T2_STRI(r1, r0, 0);
3977     else
3978         STRI(r1, r0, 0);
3979 }
3980
3981 static void
3982 _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3983 {
3984     jit_int32_t         reg;
3985     reg = jit_get_reg(jit_class_gpr);
3986     movi(rn(reg), i0);
3987     if (jit_thumb_p())
3988         T2_STRI(r0, rn(reg), 0);
3989     else
3990         STRI(r0, rn(reg), 0);
3991     jit_unget_reg(reg);
3992 }
3993
3994 static void
3995 _stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3996 {
3997     if (jit_thumb_p()) {
3998         if ((r0|r1|r2) < 8)
3999             T1_STR(r2, r1, r0);
4000         else
4001             T2_STR(r2, r1, r0);
4002     }
4003     else
4004         STR(r2, r1, r0);
4005 }
4006
4007 static void
4008 _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4009 {
4010     jit_int32_t         reg;
4011     if (jit_thumb_p()) {
4012         if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
4013             T1_STRI(r1, r0, i0 >> 2);
4014         else if (r0 == _R13_REGNO && r1 < 8 &&
4015                  i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
4016             T1_STRISP(r1, i0 >> 2);
4017         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
4018             T2_STRI(r1, r0, i0);
4019         else if (i0 < 0 && i0 >= -255)
4020             T2_STRIN(r1, r0, -i0);
4021         else if (i0 >= 0 && i0 <= 4095)
4022             T2_STRWI(r1, r0, i0);
4023         else {
4024             reg = jit_get_reg(jit_class_gpr);
4025             movi(rn(reg), i0);
4026             if ((r0|r1|rn(reg)) < 8)
4027                 T1_STR(r1, r0, rn(reg));
4028             else
4029                 T2_STR(r1, r0, rn(reg));
4030             jit_unget_reg(reg);
4031         }
4032     }
4033     else {
4034         if (i0 >= 0 && i0 <= 4095)
4035             STRI(r1, r0, i0);
4036         else if (i0 < 0 && i0 >= -4095)
4037             STRIN(r1, r0, -i0);
4038         else {
4039             reg = jit_get_reg(jit_class_gpr);
4040             movi(rn(reg), i0);
4041             STR(r1, r0, rn(reg));
4042             jit_unget_reg(reg);
4043         }
4044     }
4045 }
4046
4047 static void
4048 _unstr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
4049 {
4050     if (jit_unaligned_p())
4051         fallback_unstr(r0, r1, i0);
4052     else
4053         generic_unstr(r0, r1, i0);
4054 }
4055
4056 static void
4057 _unsti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4058 {
4059     if (jit_unaligned_p())
4060         fallback_unsti(i0, r0, i1);
4061     else
4062         generic_unsti(i0, r0, i1);
4063 }
4064
4065 static void
4066 _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4067 {
4068     if (jit_thumb_p()) {
4069         if ((r0|r1) < 8)
4070             T1_REV(r0, r1);
4071         else
4072             T2_REV(r0, r1);
4073         rshi_u(r0, r0, 16);
4074     }
4075     else {
4076         if (jit_armv6_p()) {
4077             REV(r0, r1);
4078             rshi_u(r0, r0, 16);
4079         }
4080         else {
4081                 generic_bswapr_us(_jit, r0, r1);
4082         }
4083     }
4084 }
4085
4086 /* inline glibc htonl (without register clobber) */
4087 static void
4088 _bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4089 {
4090     jit_int32_t         reg;
4091     if (jit_thumb_p()) {
4092         if ((r0|r1) < 8)
4093             T1_REV(r0, r1);
4094         else
4095             T2_REV(r0, r1);
4096     }
4097     else {
4098         if (jit_armv6_p())
4099             REV(r0, r1);
4100         else {
4101             reg = jit_get_reg(jit_class_gpr);
4102             EOR_SI(rn(reg), r1, r1, ARM_ROR, 16);
4103             LSRI(rn(reg), rn(reg), 8);
4104             BICI(rn(reg), rn(reg), encode_arm_immediate(0xff00));
4105             EOR_SI(r0, rn(reg), r1, ARM_ROR, 8);
4106             jit_unget_reg(reg);
4107         }
4108     }
4109 }
4110
4111 static void
4112 _extr(jit_state_t *_jit,
4113       jit_int32_t r0, jit_int32_t r1,jit_word_t i0, jit_word_t i1)
4114 {
4115     if (jit_armv7_p()) {        /* armv6t2 actually */
4116 #  if __BYTE_ORDER == __BIG_ENDIAN
4117         i0 = __WORDSIZE - (i0 + i1);
4118 #  endif
4119         if (jit_thumb_p())
4120             T2_SBFX(r0, r1, i0, i1 - 1);
4121         else
4122             SBFX(r0, r1, i0, i1 - 1);
4123     }
4124     else
4125         fallback_ext(r0, r1, i0, i1);
4126 }
4127
4128 static void
4129 _extr_u(jit_state_t *_jit,
4130         jit_int32_t r0, jit_int32_t r1,jit_word_t i0, jit_word_t i1)
4131 {
4132     if (jit_armv7_p()) {        /* armv6t2 actually */
4133 #  if __BYTE_ORDER == __BIG_ENDIAN
4134         i0 = __WORDSIZE - (i0 + i1);
4135 #  endif
4136         if (jit_thumb_p())
4137             T2_UBFX(r0, r1, i0, i1 - 1);
4138         else
4139             UBFX(r0, r1, i0, i1 - 1);
4140     }
4141     else
4142         fallback_ext_u(r0, r1, i0, i1);
4143 }
4144
4145 static void
4146 _depr(jit_state_t *_jit,
4147       jit_int32_t r0, jit_int32_t r1,jit_word_t i0, jit_word_t i1)
4148 {
4149     if (jit_armv7_p()) {        /* armv6t2 actually */
4150 #  if __BYTE_ORDER == __BIG_ENDIAN
4151         i0 = __WORDSIZE - (i0 + i1);
4152 #  endif
4153         if (jit_thumb_p())
4154             T2_BFI(r0, r1, i0, i0 + i1 - 1);
4155         else
4156             BFI(r0, r1, i0, i0 + i1 - 1);
4157     }
4158     else
4159         fallback_dep(r0, r1, i0, i1);
4160 }
4161
4162 static void
4163 _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4164 {
4165     if (jit_thumb_p()) {
4166         if ((r0|r1) < 8)
4167             T1_SXTB(r0, r1);
4168         else
4169             T2_SXTB(r0, r1);
4170     }
4171     else {
4172         if (jit_armv6_p())
4173             SXTB(r0, r1);
4174         else {
4175             LSLI(r0, r1, 24);
4176             ASRI(r0, r0, 24);
4177         }
4178     }
4179 }
4180
4181 static void
4182 _extr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4183 {
4184     if (jit_thumb_p()) {
4185         if ((r0|r1) < 8)
4186             T1_UXTB(r0, r1);
4187         else
4188             T2_UXTB(r0, r1);
4189     }
4190     else {
4191         if (jit_armv6_p())
4192             UXTB(r0, r1);
4193         else
4194             ANDI(r0, r1, 0xff);
4195     }
4196 }
4197
4198 static void
4199 _extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4200 {
4201     if (jit_thumb_p()) {
4202         if ((r0|r1) < 8)
4203             T1_SXTH(r0, r1);
4204         else
4205             T2_SXTH(r0, r1);
4206     }
4207     else {
4208         if (jit_armv6_p())
4209             SXTH(r0, r1);
4210         else {
4211             LSLI(r0, r1, 16);
4212             ASRI(r0, r0, 16);
4213         }
4214     }
4215 }
4216
4217 static void
4218 _extr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4219 {
4220     if (jit_thumb_p()) {
4221         if ((r0|r1) < 8)
4222             T1_UXTH(r0, r1);
4223         else
4224             T2_UXTH(r0, r1);
4225     }
4226     else {
4227         if (jit_armv6_p())
4228             UXTH(r0, r1);
4229         else {
4230             LSLI(r0, r1, 16);
4231             LSRI(r0, r0, 16);
4232         }
4233     }
4234 }
4235
4236 static void
4237 _callr(jit_state_t *_jit, jit_int32_t r0)
4238 {
4239     if (jit_thumb_p())
4240         T1_BLX(r0);
4241     else
4242         BLX(r0);
4243 }
4244
4245 static void
4246 _calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t exchange_p)
4247 {
4248     jit_word_t          d;
4249     jit_int32_t         reg;
4250     if (!exchange_p) {
4251         if (jit_thumb_p()) {
4252             if (jit_exchange_p())
4253                 /* skip switch from  arm to thumb 
4254                  * exchange_p set to zero means a jit function
4255                  * call in the same jit code buffer */
4256                 d = ((i0 + 8 - _jit->pc.w) >> 1) - 2;
4257             else
4258                 d = ((i0 - _jit->pc.w) >> 1) - 2;
4259         }
4260         else                    d = ((i0 - _jit->pc.w) >> 2) - 2;
4261         if (_s24P(d)) {
4262             if (jit_thumb_p())  T2_BLI(encode_thumb_jump(d));
4263             else                BLI(d & 0x00ffffff);
4264         }
4265         else                    goto fallback;
4266     }
4267     else {
4268     fallback:
4269         reg = jit_get_reg(jit_class_gpr);
4270         movi(rn(reg), i0);
4271         if (jit_thumb_p())
4272             T1_BLX(rn(reg));
4273         else
4274             BLX(rn(reg));
4275         jit_unget_reg(reg);
4276     }
4277 }
4278
4279 static jit_word_t
4280 _calli_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
4281 {
4282     jit_word_t          w;
4283     jit_word_t          d;
4284     jit_int32_t         reg;
4285     /* i1 means call is reachable in signed 24 bits  */
4286     if (i1) {
4287         w = _jit->pc.w;
4288         if (jit_thumb_p())      d = ((i0 - _jit->pc.w) >> 1) - 2;
4289         else                    d = ((i0 - _jit->pc.w) >> 2) - 2;
4290         assert(_s24P(d));
4291         if (jit_thumb_p())      T2_BLI(encode_thumb_jump(d));
4292         else                    BLI(d & 0x00ffffff);
4293     }
4294     else {
4295         reg = jit_get_reg(jit_class_gpr);
4296         w = _jit->pc.w;
4297         movi_p(rn(reg), i0);
4298         if (jit_thumb_p())
4299             T1_BLX(rn(reg));
4300         else
4301             BLX(rn(reg));
4302         jit_unget_reg(reg);
4303     }
4304     return (w);
4305 }
4306
4307 static void
4308 _prolog(jit_state_t *_jit, jit_node_t *node)
4309 {
4310     jit_int32_t         reg, mask, count;
4311     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
4312         jit_int32_t     frame = -_jitc->function->frame;
4313         jit_check_frame();
4314         assert(_jitc->function->self.aoff >= frame);
4315         if (jit_swf_p())
4316             CHECK_SWF_OFFSET();
4317         CHECK_REG_ARGS();
4318         if (_jitc->function->assume_frame) {
4319             if (jit_thumb_p() && !_jitc->thumb)
4320                 _jitc->thumb = _jit->pc.w;
4321             return;
4322         }
4323         _jitc->function->self.aoff = frame;
4324     }
4325     if (_jitc->function->allocar)
4326         _jitc->function->self.aoff &= -8;
4327     _jitc->function->stack = ((_jitc->function->self.alen -
4328                               /* align stack at 8 bytes */
4329                               _jitc->function->self.aoff) + 7) & -8;
4330     /* If this jit_check_frame() succeeds, it actually is just a need_stack,
4331      * usually for arguments, so, allocai was not called, but pusharg*
4332      * was called increasing stack size, for negative access offsets.
4333      * This can be optimized for one less prolog instruction, that is,
4334      * do not create the frame pointer, and only add _jitc->function->stack
4335      * to sp, and on epilog, instead of moving fp to sp, just add negative
4336      * value of _jitc->function->stack. Since this condition requires a
4337      * large function body for excess arguments to called function, keep
4338      * things a bit simpler for now, as this is the only place need_stack
4339      * would be useful. */
4340     if (_jitc->function->stack)
4341         jit_check_frame();
4342
4343     for (reg = mask = count = 0; reg < jit_size(iregs); reg++) {
4344         if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
4345             mask |= 1 << rn(iregs[reg]);
4346             ++count;
4347         }
4348     }
4349     /* One extra register to keep stack 8 bytes aligned */
4350     if (count & 1) {
4351         for (reg = 4; reg < 10; reg++) {
4352             if (!(mask & (1 << reg))) {
4353                 mask |= 1 << reg;
4354                 break;
4355             }
4356         }
4357     }
4358     if (_jitc->function->need_frame || _jitc->function->need_return)
4359         mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
4360     if (!jit_swf_p() && _jitc->function->save_reg_args &&
4361         !(_jitc->function->self.call & jit_call_varargs))
4362         mask |= 0xf;
4363
4364     if (jit_thumb_p()) {
4365         /*  switch to thumb mode (better approach would be to
4366          * ORR 1 address being called, but no clear distinction
4367          * of what is a pointer to a jit function, or if patching
4368          * a pointer to a jit function) */
4369         if (jit_exchange_p()) {
4370             ADDI(_R12_REGNO, _R15_REGNO, 1);
4371             BX(_R12_REGNO);
4372         }
4373         if (!_jitc->thumb)
4374             _jitc->thumb = _jit->pc.w;
4375         if (jit_swf_p() || (_jitc->function->save_reg_args &&
4376                             (_jitc->function->self.call & jit_call_varargs)))
4377             T2_PUSH(0xf);
4378         if (mask)
4379             T2_PUSH(mask);
4380     }
4381     else {
4382         if (jit_swf_p() || (_jitc->function->save_reg_args &&
4383                             (_jitc->function->self.call & jit_call_varargs)))
4384             PUSH(0xf);
4385         if (mask)
4386             PUSH(mask);
4387     }
4388     if (_jitc->function->need_frame)
4389         movr(_FP_REGNO, _SP_REGNO);
4390     if (_jitc->function->stack)
4391         subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
4392     if (_jitc->function->allocar) {
4393         reg = jit_get_reg(jit_class_gpr);
4394         movi(rn(reg), _jitc->function->self.aoff);
4395         stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg));
4396         jit_unget_reg(reg);
4397     }
4398 }
4399
4400 static void
4401 _epilog(jit_state_t *_jit, jit_node_t *node)
4402 {
4403     jit_int32_t         reg, mask, count;
4404     if (_jitc->function->assume_frame)
4405         return;
4406
4407     for (reg = mask = count = 0; reg < jit_size(iregs); reg++) {
4408         if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
4409             mask |= 1 << rn(iregs[reg]);
4410             ++count;
4411         }
4412     }
4413     /* One extra register to keep stack 8 bytes aligned */
4414     if (count & 1) {
4415         for (reg = 4; reg < 10; reg++) {
4416             if (!(mask & (1 << reg))) {
4417                 mask |= 1 << reg;
4418                 break;
4419             }
4420         }
4421     }
4422     if (_jitc->function->need_frame || _jitc->function->need_return)
4423         mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
4424     if (_jitc->function->need_frame)
4425         movr(_SP_REGNO, _FP_REGNO);
4426     if (!jit_swf_p() && _jitc->function->save_reg_args &&
4427         !(_jitc->function->self.call & jit_call_varargs))
4428         addi(_SP_REGNO, _SP_REGNO, 16);
4429     if (mask) {
4430         if (jit_thumb_p())
4431             T2_POP(mask);
4432         else
4433             POP(mask);
4434     }
4435     if (jit_swf_p() || (_jitc->function->save_reg_args &&
4436                         (_jitc->function->self.call & jit_call_varargs)))
4437         addi(_SP_REGNO, _SP_REGNO, 16);
4438     if (jit_thumb_p())
4439         T1_BX(_LR_REGNO);
4440     else
4441         BX(_LR_REGNO);
4442     if (jit_thumb_p() && (_jit->pc.w & 2))
4443         T1_NOP();
4444 }
4445
4446 static void
4447 _vastart(jit_state_t *_jit, jit_int32_t r0)
4448 {
4449     assert(_jitc->function->self.call & jit_call_varargs);
4450
4451     /* Initialize stack pointer to the first stack argument.
4452      * The -16 is to account for the 4 argument registers
4453      * always saved, and _jitc->function->vagp is to account
4454      * for declared arguments. */
4455     addi(r0, _FP_REGNO, jit_selfsize() - 16 + _jitc->function->vagp);
4456 }
4457
4458 static void
4459 _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4460 {
4461     assert(_jitc->function->self.call & jit_call_varargs);
4462
4463     /* Load argument. */
4464     ldr(r0, r1);
4465
4466     /* Update stack pointer. */
4467     addi(r1, r1, sizeof(jit_word_t));
4468 }
4469
4470 static void
4471 _patch_at(jit_state_t *_jit,
4472           jit_int32_t kind, jit_word_t instr, jit_word_t label)
4473 {
4474     jit_word_t           d;
4475     jit_thumb_t          thumb;
4476     union {
4477         jit_int16_t     *s;
4478         jit_int32_t     *i;
4479         jit_word_t       w;
4480     } u;
4481     u.w = instr;
4482     if (kind == arm_patch_call) {
4483         if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
4484             code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4485             assert((thumb.i & THUMB2_BLI) == THUMB2_BLI);
4486             /* skip code to switch from arm to thumb mode */
4487             if (jit_exchange_p())
4488                 d = ((label + 8 - instr) >> 1) - 2;
4489             else
4490                 d = ((label - instr) >> 1) - 2;
4491             assert(_s24P(d));
4492             thumb.i = THUMB2_BLI | encode_thumb_jump(d);
4493             thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4494         }
4495         else {
4496             thumb.i = u.i[0];
4497             assert((thumb.i & 0x0f000000) == ARM_BLI);
4498             d = ((label - instr) >> 2) - 2;
4499             assert(_s24P(d));
4500             u.i[0] = (thumb.i & 0xff000000) | (d & 0x00ffffff);
4501         }
4502     }
4503     else if (kind == arm_patch_jump) {
4504         if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
4505             code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4506             if ((thumb.i & THUMB2_B) == THUMB2_B) {
4507                 d = ((label - instr) >> 1) - 2;
4508                 assert(_s24P(d));
4509                 thumb.i = THUMB2_B | encode_thumb_jump(d);
4510                 thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4511             }
4512             else if ((thumb.i & THUMB2_B) == THUMB2_CC_B) {
4513                 d = ((label - instr) >> 1) - 2;
4514                 assert(_s20P(d));
4515                 thumb.i = THUMB2_CC_B | (thumb.i & 0x3c00000) |
4516                           encode_thumb_cc_jump(d);
4517                 thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4518             }
4519             else {
4520                 /* for the sake of simplicity in case choose to
4521                  * movw+movt+[bx|blx], e.g. if changing to instead
4522                  * of asserting target is reachable, load constant
4523                  * and do indirect jump if not reachable */
4524                 if ((thumb.i & 0xfbf00000) == THUMB2_MOVWI)
4525                     goto indirect_jump;
4526                 assert(!"handled branch opcode");
4527             }
4528         }
4529         else {
4530             thumb.i = u.i[0];
4531             /* 0x0e000000 because 0x01000000 is (branch&) link modifier */
4532             assert((thumb.i & 0x0e000000) == ARM_B);
4533             d = ((label - instr) >> 2) - 2;
4534             assert(_s24P(d));
4535             u.i[0] = (thumb.i & 0xff000000) | (d & 0x00ffffff);
4536         }
4537     }
4538     else if (kind == arm_patch_load) {
4539         /* offset may be negative for a forward patch because it
4540          * is relative to pc + 8, for example:
4541          *          ldr r0, [pc, #-4]
4542          *          bx r0               ;; [pc, #-8]
4543          *          .data ...           ;; [pc, #-4]
4544          *          ...                 ;; [pc]
4545          */
4546         assert(!jit_thumb_p());
4547         thumb.i = u.i[0];
4548         assert((thumb.i & 0x0f700000) == ARM_LDRI);
4549         d = label - (instr + 8);
4550         if (d < 0) {
4551             thumb.i &= ~ARM_P;
4552             d = -d;
4553         }
4554         else
4555             thumb.i |= ARM_P;
4556         assert(!(d & 0xfffff000));
4557         u.i[0] = (thumb.i & 0xfffff000) | d;
4558     }
4559     else if (kind == arm_patch_word) {
4560         if (jit_thumb_p()) {
4561             code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4562             assert((thumb.i & 0xfbf00000) == THUMB2_MOVWI);
4563         indirect_jump:
4564             thumb.i = ((thumb.i & 0xfbf00f00) |
4565                        ( (label & 0x0000f000) <<  4) |
4566                        ( (label & 0x00000800) << 15) |
4567                        ( (label & 0x00000700) <<  4) |
4568                        (  label & 0x000000ff));
4569             thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4570             label >>= 16;
4571             code2thumb(thumb.s[0], thumb.s[1], u.s[2], u.s[3]);
4572             assert((thumb.i & 0xfbf00000) == THUMB2_MOVTI);
4573             thumb.i = ((thumb.i & 0xfbf00f00) |
4574                        ( (label & 0x0000f000) <<  4) |
4575                        ( (label & 0x00000800) << 15) |
4576                        ( (label & 0x00000700) <<  4) |
4577                        (  label & 0x000000ff));
4578             thumb2code(thumb.s[0], thumb.s[1], u.s[2], u.s[3]);
4579         }
4580         else
4581             u.i[0] = label;
4582     }
4583     else
4584         assert(!"handled patch");
4585 }
4586 #endif