git subrepo pull --force deps/lightning
[pcsx_rearmed.git] / deps / lightning / lib / jit_arm-cpu.c
1 /*
2  * Copyright (C) 2012-2022  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *      Paulo Cesar Pereira de Andrade
18  */
19
20 #if PROTO
21 #  define stxi(i0,r0,r1)                stxi_i(i0,r0,r1)
22 #  define ldxi(r0,r1,i0)                ldxi_i(r0,r1,i0)
23 #  define ldr(r0,r1)                    ldr_i(r0,r1)
24 #  define _s20P(d)                      ((d) >= -(int)0x80000 && d <= 0x7ffff)
25 #  define _s24P(d)                      ((d) >= -(int)0x800000 && d <= 0x7fffff)
26 #  define _u3(v)                        ((v) & 0x7)
27 #  define _u4(v)                        ((v) & 0xf)
28 #  define _u5(v)                        ((v) & 0x1f)
29 #  define _u8(v)                        ((v) & 0xff)
30 #  define _u12(v)                       ((v) & 0xfff)
31 #  define _u13(v)                       ((v) & 0x1fff)
32 #  define _u16(v)                       ((v) & 0xffff)
33 #  define _u24(v)                       ((v) & 0xffffff)
34 #  define jit_thumb_p()                 jit_cpu.thumb
35 #  define jit_no_set_flags()            _jitc->no_set_flags
36 #  define jit_armv5_p()                 (jit_cpu.version >= 5)
37 #  define jit_armv5e_p()                (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend))
38 #  define jit_armv6_p()                 (jit_cpu.version >= 6)
39 #  define jit_armv7_p()                 (jit_cpu.version >= 7)
40 #  define jit_armv7r_p()                0
41 #  define stack_framesize               48
42 extern int      __aeabi_idivmod(int, int);
43 extern unsigned __aeabi_uidivmod(unsigned, unsigned);
44 #  define _R0_REGNO                     0x00
45 #  define _R1_REGNO                     0x01
46 #  define _R2_REGNO                     0x02
47 #  define _R3_REGNO                     0x03
48 #  define _R4_REGNO                     0x04
49 #  define _R5_REGNO                     0x05
50 #  define _R6_REGNO                     0x06
51 #  define _R7_REGNO                     0x07
52 #  define _R8_REGNO                     0x08
53 #  define _R9_REGNO                     0x09
54 #  define _R10_REGNO                    0x0a
55 #  define _R11_REGNO                    0x0b
56 #  define _R12_REGNO                    0x0c
57 #  define _R13_REGNO                    0x0d
58 #  define _R14_REGNO                    0x0e
59 #  define _R15_REGNO                    0x0f
60 #  define _FP_REGNO                     _R11_REGNO
61 #  define _SP_REGNO                     _R13_REGNO
62 #  define _LR_REGNO                     _R14_REGNO
63 #  define _PC_REGNO                     _R15_REGNO
64 #  define ARM_CC_EQ                     0x00000000      /* Z=1 */
65 #  define ARM_CC_NE                     0x10000000      /* Z=0 */
66 #  define ARM_CC_HS                     0x20000000      /* C=1 */
67 #    define ARM_CC_CS                   ARM_CC_HS
68 #  define ARM_CC_LO                     0x30000000      /* C=0 */
69 #    define ARM_CC_CC                   ARM_CC_LO
70 #  define ARM_CC_MI                     0x40000000      /* N=1 */
71 #  define ARM_CC_PL                     0x50000000      /* N=0 */
72 #  define ARM_CC_VS                     0x60000000      /* V=1 */
73 #  define ARM_CC_VC                     0x70000000      /* V=0 */
74 #  define ARM_CC_HI                     0x80000000      /* C=1 && Z=0 */
75 #  define ARM_CC_LS                     0x90000000      /* C=0 || Z=1 */
76 #  define ARM_CC_GE                     0xa0000000      /* N=V */
77 #  define ARM_CC_LT                     0xb0000000      /* N!=V */
78 #  define ARM_CC_GT                     0xc0000000      /* Z=0 && N=V */
79 #  define ARM_CC_LE                     0xd0000000      /* Z=1 || N!=V */
80 #  define ARM_CC_AL                     0xe0000000      /* always */
81 #  define ARM_CC_NV                     0xf0000000      /* reserved */
82 #  define THUMB2_IT                     0
83 #  define THUMB2_ITT                    1
84 #  define THUMB2_ITE                    2
85 #  define THUMB2_ITTT                   3
86 #  define THUMB2_ITET                   4
87 #  define THUMB2_ITTE                   5
88 #  define THUMB2_ITEE                   6
89 #  define THUMB2_ITTTT                  7
90 #  define THUMB2_ITETT                  8
91 #  define THUMB2_ITTET                  9
92 #  define THUMB2_ITEET                  10
93 #  define THUMB2_ITTTE                  11
94 #  define THUMB2_ITETE                  12
95 #  define THUMB2_ITTEE                  13
96 #  define THUMB2_ITEEE                  14
97 #  define ARM_MOV                       0x01a00000
98 #  define THUMB_MOV                         0x4600
99 #  define ARM_MOVWI                     0x03000000      /* v6t2, v7 */
100 #  define THUMB_MOVI                        0x2000
101 #  define THUMB2_MOVI                   0xf0400000
102 #  define THUMB2_MOVWI                  0xf2400000
103 #  define ARM_MOVTI                     0x03400000
104 #  define THUMB2_MOVTI                  0xf2c00000
105 #  define ARM_MVN                       0x01e00000
106 #  define THUMB_MVN                         0x43c0
107 #  define THUMB2_MVN                    0xea600000
108 #  define THUMB2_MVNI                   0xf0600000
109 #  define ARM_I                         0x02000000 /* immediate */
110 #  define ARM_S                         0x00100000 /* set flags */
111 #  define ARM_ADD                       0x00800000
112 #  define THUMB_ADD                         0x1800
113 #  define THUMB_ADDX                        0x4400
114 #  define THUMB2_ADD                    0xeb000000
115 #  define THUMB_ADDI3                       0x1c00
116 #  define THUMB_ADDI8                       0x3000
117 #  define THUMB2_ADDI                   0xf1000000
118 #  define THUMB2_ADDWI                  0xf2000000
119 #  define ARM_ADC                       0x00a00000
120 #  define THUMB_ADC                         0x4140
121 #  define THUMB2_ADC                    0xeb400000
122 #  define THUMB2_ADCI                   0xf1400000
123 #  define ARM_SUB                       0x00400000
124 #  define THUMB_SUB                         0x1a00
125 #  define THUMB2_SUB                    0xeba00000
126 #  define THUMB_SUBI3                       0x1e00
127 #  define THUMB_SUBI8                       0x3800
128 #  define THUMB2_SUBI                   0xf1a00000
129 #  define THUMB2_SUBWI                  0xf2a00000
130 #  define ARM_SBC                       0x00c00000
131 #  define THUMB_SBC                         0x4180
132 #  define THUMB2_SBC                    0xeb600000
133 #  define THUMB2_SBCI                   0xf1600000
134 #  define ARM_RSB                       0x00600000
135 #  define THUMB_RSBI                        0x4240
136 #  define THUMB2_RSBI                   0xf1c00000
137 #  define ARM_MUL                       0x00000090
138 #  define THUMB_MUL                         0x4340
139 #  define THUMB2_MUL                    0xfb00f000
140 #  define ARM_UMULL                     0x00800090
141 #  define THUMB2_UMULL                  0xfba00000
142 #  define ARM_SMULL                     0x00c00090
143 #  define THUMB2_SMULL                  0xfb800000
144 #  define THUMB2_SDIV                   0xfb90f0f0
145 #  define THUMB2_UDIV                   0xfbb0f0f0
146 #  define ARM_AND                       0x00000000
147 #  define THUMB_AND                         0x4000
148 #  define THUMB2_AND                    0xea000000
149 #  define THUMB2_ANDI                   0xf0000000
150 #  define ARM_BIC                       0x01c00000
151 #  define THUMB2_BIC                    0xea200000
152 #  define THUMB2_BICI                   0xf0200000
153 #  define ARM_ORR                       0x01800000
154 #  define THUMB_ORR                         0x4300
155 #  define THUMB2_ORR                    0xea400000
156 #  define THUMB2_ORRI                   0xf0400000
157 #  define ARM_EOR                       0x00200000
158 #  define THUMB_EOR                         0x4040
159 #  define THUMB2_EOR                    0xea800000
160 #  define THUMB2_EORI                   0xf0800000
161 /* >> ARMv6* */
162 #  define ARM_REV                       0x06bf0f30
163 #  define THUMB_REV                         0xba00
164 #  define THUMB2_REV                    0xfa90f080
165 #  define ARM_REV16                     0x06bf0fb0
166 #  define THUMB_REV16                       0xba40
167 #  define THUMB2_REV16                  0xfa90f090
168 #  define ARM_SXTB                      0x06af0070
169 #  define THUMB_SXTB                        0xb240
170 #  define THUMB2_SXTB                   0xfa40f080
171 #  define ARM_UXTB                      0x06ef0070
172 #  define THUMB_UXTB                        0xb2c0
173 #  define THUMB2_UXTB                   0xfa50f080
174 #  define ARM_SXTH                      0x06bf0070
175 #  define THUMB_SXTH                        0xb200
176 #  define THUMB2_SXTH                   0xfa00f080
177 #  define ARM_UXTH                      0x06ff0070
178 #  define THUMB_UXTH                        0xb280
179 #  define THUMB2_UXTH                   0xfa10f080
180 #  define ARM_XTR8                      0x00000400 /* ?xt? rotate 8 bits */
181 #  define ARM_XTR16                     0x00000800 /* ?xt? rotate 16 bits */
182 #  define ARM_XTR24                     0x00000c00 /* ?xt? rotate 24 bits */
183 #  define ARM_LDREX                     0x01900090
184 #  define THUMB2_LDREX                  0xe8500000
185 #  define ARM_STREX                     0x01800090
186 #  define THUMB2_STREX                  0xe8400000
187 /* << ARMv6* */
188 /* >> ARMv7 */
189 #  define ARM_DMB                       0xf57ff050
190 #  define THUMB2_DMB                    0xf3bf8f50
191 #  define DMB_SY                        0xf
192 #  define DMB_ST                        0xe
193 #  define DMB_ISH                       0xb
194 #  define DMB_ISHST                     0xa
195 #  define DMB_NSH                       0x7
196 #  define DMB_NSHT                      0x6
197 #  define DMB_OSH                       0x3
198 #  define DMB_OSHST                     0x2
199 /* << ARMv7 */
200 #  define ARM_SHIFT                     0x01a00000
201 #  define ARM_R                         0x00000010 /* register shift */
202 #  define ARM_LSL                       0x00000000
203 #  define THUMB_LSL                         0x4080
204 #  define THUMB2_LSL                    0xfa00f000
205 #  define THUMB_LSLI                        0x0000
206 #  define THUMB2_LSLI                   0xea4f0000
207 #  define ARM_LSR                       0x00000020
208 #  define THUMB_LSR                         0x40c0
209 #  define THUMB2_LSR                    0xfa20f000
210 #  define THUMB_LSRI                        0x0800
211 #  define THUMB2_LSRI                   0xea4f0010
212 #  define ARM_ASR                       0x00000040
213 #  define THUMB_ASR                         0x4100
214 #  define THUMB2_ASR                    0xfa40f000
215 #  define THUMB_ASRI                        0x1000
216 #  define THUMB2_ASRI                   0xea4f0020
217 #  define ARM_ROR                       0x00000060
218 #  define ARM_CMP                       0x01500000
219 #  define THUMB_CMP                         0x4280
220 #  define THUMB_CMPX                        0x4500
221 #  define THUMB2_CMP                    0xebb00000
222 #  define THUMB_CMPI                        0x2800
223 #  define THUMB2_CMPI                   0xf1b00000
224 #  define ARM_CMN                       0x01700000
225 #  define THUMB_CMN                         0x42c0
226 #  define THUMB2_CMN                    0xeb100000
227 #  define THUMB2_CMNI                   0xf1100000
228 #  define ARM_TST                       0x01100000
229 #  define THUMB_TST                         0x4200
230 #  define THUMB2_TST                    0xea100000
231 #  define THUMB2_TSTI                   0xf0100000
232 #  define ARM_TEQ                       0x01300000
233 /* branch */
234 #  define ARM_BX                        0x012fff10
235 #  define ARM_BLX                       0x012fff30
236 #  define THUMB_BLX                         0x4780
237 #  define ARM_BLXI                      0xfa000000
238 #  define THUMB2_BLXI                   0xf000c000
239 #  define ARM_B                         0x0a000000
240 #  define THUMB_CC_B                        0xd000
241 #  define THUMB_B                           0xe000
242 #  define THUMB2_CC_B                   0xf0008000
243 #  define THUMB2_B                      0xf0009000
244 #  define ARM_BLI                       0x0b000000
245 #  define THUMB2_BLI                    0xf000d000
246 /* ldr/str */
247 #  define ARM_P                         0x00800000 /* positive offset */
248 #  define THUMB2_P                      0x00000400
249 #  define THUMB2_U                      0x00000200
250 #  define THUMB2_W                      0x00000100
251 #  define ARM_LDRSB                     0x011000d0
252 #  define THUMB_LDRSB                       0x5600
253 #  define THUMB2_LDRSB                  0xf9100000
254 #  define ARM_LDRSBI                    0x015000d0
255 #  define THUMB2_LDRSBI                 0xf9100c00
256 #  define THUMB2_LDRSBWI                0xf9900000
257 #  define ARM_LDRB                      0x07500000
258 #  define THUMB_LDRB                        0x5c00
259 #  define THUMB2_LDRB                   0xf8100000
260 #  define ARM_LDRBI                     0x05500000
261 #  define THUMB_LDRBI                       0x7800
262 #  define THUMB2_LDRBI                  0xf8100c00
263 #  define THUMB2_LDRBWI                 0xf8900000
264 #  define ARM_LDRSH                     0x011000f0
265 #  define THUMB_LDRSH                       0x5e00
266 #  define THUMB2_LDRSH                  0xf9300000
267 #  define ARM_LDRSHI                    0x015000f0
268 #  define THUMB2_LDRSHI                 0xf9300c00
269 #  define THUMB2_LDRSHWI                0xf9b00000
270 #  define ARM_LDRH                      0x011000b0
271 #  define THUMB_LDRH                        0x5a00
272 #  define THUMB2_LDRH                   0xf8300000
273 #  define ARM_LDRHI                     0x015000b0
274 #  define THUMB_LDRHI                       0x8800
275 #  define THUMB2_LDRHI                  0xf8300c00
276 #  define THUMB2_LDRHWI                 0xf8b00000
277 #  define ARM_LDR                       0x07100000
278 #  define THUMB_LDR                         0x5800
279 #  define THUMB2_LDR                    0xf8500000
280 #  define ARM_LDRI                      0x05100000
281 #  define THUMB_LDRI                        0x6800
282 #  define THUMB_LDRISP                      0x9800
283 #  define THUMB2_LDRI                   0xf8500c00
284 #  define THUMB2_LDRWI                  0xf8d00000
285 #  define ARM_LDRD                      0x010000d0
286 #  define ARM_LDRDI                     0x014000d0
287 #  define THUMB2_LDRDI                  0xe8500000
288 #  define ARM_STRB                      0x07400000
289 #  define THUMB_STRB                        0x5400
290 #  define THUMB2_STRB                   0xf8000000
291 #  define ARM_STRBI                     0x05400000
292 #  define THUMB_STRBI                       0x7000
293 #  define THUMB2_STRBI                  0xf8000c00
294 #  define THUMB2_STRBWI                 0xf8800000
295 #  define ARM_STRH                      0x010000b0
296 #  define THUMB_STRH                        0x5200
297 #  define THUMB2_STRH                   0xf8200000
298 #  define ARM_STRHI                     0x014000b0
299 #  define THUMB_STRHI                       0x8000
300 #  define THUMB2_STRHI                  0xf8200c00
301 #  define THUMB2_STRHWI                 0xf8a00000
302 #  define ARM_STR                       0x07000000
303 #  define THUMB_STR                         0x5000
304 #  define THUMB2_STR                    0xf8400000
305 #  define ARM_STRI                      0x05000000
306 #  define THUMB_STRI                        0x6000
307 # define THUMB2_STRWI                   0xf8c00000
308 #  define THUMB_STRISP                      0x9000
309 #  define THUMB2_STRI                   0xf8400c00
310 #  define ARM_STRD                      0x010000f0
311 # define ARM_STRDI                      0x014000f0
312 #  define THUMB2_STRDI                  0xe8400000
313 /* ldm/stm */
314 #  define ARM_M                         0x08000000
315 #  define ARM_M_L                       0x00100000 /* load; store if not set */
316 #  define ARM_M_I                       0x00800000 /* inc; dec if not set */
317 #  define ARM_M_B                       0x01000000 /* before; after if not set */
318 #  define ARM_M_U                       0x00200000 /* update Rn */
319 #  define THUMB2_LDM_W                  0x00200000
320 #  define THUMB2_LDM_P                  0x00008000
321 #  define THUMB2_LDM_M                  0x00004000
322 #  define THUMB_LDMIA                       0xc800
323 #  define THUMB2_LDMIA                  0xe8900000
324 #  define THUMB2_LDMB                   0xe9100000
325 #  define THUMB_PUSH                        0xb400
326 #  define THUMB2_PUSH                   0xe92d0000
327 #  define THUMB_POP                         0xbc00
328 #  define THUMB2_POP                    0xe8bd0000
329 #  define ii(i)                         *_jit->pc.ui++ = i
330 #  define is(i)                         *_jit->pc.us++ = i
331 #  if __BYTE_ORDER == __LITTLE_ENDIAN
332 #    define iss(i, j)                   do { is(j); is(i); } while (0)
333 #    define code2thumb(t0, t1, c0, c1)  do { t1 = c0; t0 = c1; } while (0)
334 #    define thumb2code(t0, t1, c0, c1)  do { c0 = t1; c1 = t0; } while (0)
335 #  else
336 #    define iss(i, j)                   do { is(i); is(j); } while (0)
337 #    define code2thumb(t0, t1, c0, c1)  do { t0 = c0; t1 = c1; } while (0)
338 #    define thumb2code(t0, t1, c0, c1)  do { c0 = t0; c1 = t1; } while (0)
339 #  endif
340 static int encode_arm_immediate(unsigned int v);
341 static int encode_thumb_immediate(unsigned int v);
342 static int encode_thumb_word_immediate(unsigned int v);
343 static int encode_thumb_jump(int v);
344 static int encode_thumb_cc_jump(int v);
345 static int encode_thumb_shift(int v, int type) maybe_unused;
346 #  define corrr(cc,o,rn,rd,rm)          _corrr(_jit,cc,o,rn,rd,rm)
347 static void _corrr(jit_state_t*,int,int,int,int,int);
348 #  define corri(cc,o,rn,rd,im)          _corri(_jit,cc,o,rn,rd,im)
349 static void _corri(jit_state_t*,int,int,int,int,int);
350 #define corri8(cc,o,rn,rt,im)   _corri8(_jit,cc,o,rn,rt,im)
351 static void _corri8(jit_state_t*,int,int,int,int,int);
352 #  define torrr(o,rn,rd,rm)             _torrr(_jit,o,rn,rd,rm)
353 static void _torrr(jit_state_t*,int,int,int,int);
354 #  define torrrs(o,rn,rd,rm,im)         _torrrs(_jit,o,rn,rd,rm,im)
355 static void _torrrs(jit_state_t*,int,int,int,int,int) maybe_unused;
356 #  define torxr(o,rn,rt,rm)             _torxr(_jit,o,rn,rt,rm)
357 static void _torxr(jit_state_t*,int,int,int,int);
358 #  define torrrr(o,rn,rl,rh,rm)         _torrrr(_jit,o,rn,rl,rh,rm)
359 static void _torrrr(jit_state_t*,int,int,int,int,int) maybe_unused;
360 #  define torrri8(o,rn,rt,rt2,im)       _torrri8(_jit,o,rn,rt,rt2,im)
361 static void _torrri8(jit_state_t*,int,int,int,int,int) maybe_unused;
362 #  define coriw(cc,o,rd,im)             _coriw(_jit,cc,o,rd,im)
363 static void _coriw(jit_state_t*,int,int,int,int);
364 #  define torri(o,rd,rn,im)             _torri(_jit,o,rd,rn,im)
365 static void _torri(jit_state_t*,int,int,int,int);
366 #  define torri8(o,rn,rt,im)            _torri8(_jit,o,rn,rt,im)
367 static void _torri8(jit_state_t*,int,int,int,int);
368 #  define torri12(o,rn,rt,im)           _torri12(_jit,o,rn,rt,im)
369 static void _torri12(jit_state_t*,int,int,int,int);
370 #  define tshift(o,rd,rm,im)            _tshift(_jit,o,rd,rm,im)
371 static void _tshift(jit_state_t*,int,int,int,int);
372 #  define toriw(o,rd,im)                _toriw(_jit,o,rd,im)
373 static void _toriw(jit_state_t*,int,int,int);
374 #  define tc8(cc,im)                    _tc8(_jit,cc,im)
375 static void _tc8(jit_state_t*,int,int) maybe_unused;
376 #  define t11(im)                       _t11(_jit,im)
377 static void _t11(jit_state_t*,int);
378 #  define tcb(cc,im)                    _tcb(_jit,cc,im)
379 static void _tcb(jit_state_t*,int,int);
380 #  define blxi(im)                      _blxi(_jit,im)
381 static void _blxi(jit_state_t*,int) maybe_unused;
382 #  define tb(o,im)                      _tb(_jit,o,im)
383 static void _tb(jit_state_t*,int,int);
384 #  define corrrr(cc,o,rh,rl,rm,rn)      _corrrr(_jit,cc,o,rh,rl,rm,rn)
385 static void _corrrr(jit_state_t*,int,int,int,int,int,int);
386 #  define corrrs(cc,o,rn,rd,rm,im)      _corrrs(_jit,cc,o,rn,rd,rm,im)
387 static void _corrrs(jit_state_t*,int,int,int,int,int,int);
388 #  define cshift(cc,o,rd,rm,rn,im)      _cshift(_jit,cc,o,rd,rm,rn,im)
389 static void _cshift(jit_state_t*,int,int,int,int,int,int);
390 #  define cb(cc,o,im)                   _cb(_jit,cc,o,im)
391 static void _cb(jit_state_t*,int,int,int);
392 #  define cbx(cc,o,rm)                  _cbx(_jit,cc,o,rm)
393 static void _cbx(jit_state_t*,int,int,int);
394 #  define corl(cc,o,r0,i0)              _corl(_jit,cc,o,r0,i0)
395 static void _corl(jit_state_t*,int,int,int,int);
396 #  define c6orr(cc,o,r0,r1)             _c6orr(_jit,cc,o,r0,r1)
397 static void _c6orr(jit_state_t*,int,int,int,int);
398 #  define tcit(cc,it)                   _tcit(_jit,cc,it)
399 static void _tcit(jit_state_t*,unsigned int,int);
400 #  define IT(cc)                        tcit(cc,THUMB2_IT)
401 #  define ITT(cc)                       tcit(cc,THUMB2_ITT)
402 #  define ITE(cc)                       tcit(cc,THUMB2_ITE)
403 #  define ITTT(cc)                      tcit(cc,THUMB2_ITTT)
404 #  define ITTE(cc)                      tcit(cc,THUMB2_ITTE)
405 #  define ITET(cc)                      tcit(cc,THUMB2_ITET)
406 #  define ITEE(cc)                      tcit(cc,THUMB2_ITEE)
407 #  define ITTTT(cc)                     tcit(cc,THUMB2_ITTTT)
408 #  define ITETT(cc)                     tcit(cc,THUMB2_ITETT)
409 #  define ITTET(cc)                     tcit(cc,THUMB2_ITTET)
410 #  define ITEET(cc)                     tcit(cc,THUMB2_ITEET)
411 #  define ITTTE(cc)                     tcit(cc,THUMB2_ITTTE)
412 #  define ITETE(cc)                     tcit(cc,THUMB2_ITETE)
413 #  define ITTEE(cc)                     tcit(cc,THUMB2_ITTEE)
414 #  define ITEEE(cc)                     tcit(cc,THUMB2_ITEEE)
415 #  define tpp(o,im)                     _tpp(_jit,o,im)
416 static void _tpp(jit_state_t*,int,int);
417 #  define torl(o,rn,im)                 _torl(_jit,o,rn,im)
418 static void _torl(jit_state_t*,int,int,int) maybe_unused;
419 #  define DMB(im)                       dmb(im)
420 #  define T2_DMB(im)                    tdmb(im)
421 #  define dmb(im)                       _dmb(_jit, im)
422 static void _dmb(jit_state_t *_jit, int im);
423 #  define tdmb(im)                      _tdmb(_jit, im)
424 static void _tdmb(jit_state_t *_jit, int im);
425 #  define CC_MOV(cc,rd,rm)              corrr(cc,ARM_MOV,0,rd,rm)
426 #  define MOV(rd,rm)                    CC_MOV(ARM_CC_AL,rd,rm)
427 #  define T1_MOV(rd,rm)                 is(THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7))
428 #  define T2_MOV(rd,rm)                 T2_ORR(rd,_R15_REGNO,rm)
429 #  define CC_MOVI(cc,rd,im)             corri(cc,ARM_MOV|ARM_I,0,rd,im)
430 #  define MOVI(rd,im)                   CC_MOVI(ARM_CC_AL,rd,im)
431 #  define CC_MOVWI(cc,rd,im)            coriw(cc,ARM_MOVWI,rd,im)
432 #  define MOVWI(rd,im)                  CC_MOVWI(ARM_CC_AL,rd,im)
433 #  define T1_MOVI(rd,im)                is(THUMB_MOVI|(_u3(rd)<<8)|_u8(im))
434 #  define T2_MOVI(rd,im)                torri(THUMB2_MOVI,_R15_REGNO,rd,im)
435 #  define T2_MOVWI(rd,im)               toriw(THUMB2_MOVWI,rd,im)
436 #  define CC_MOVTI(cc,rd,im)            coriw(cc,ARM_MOVTI,rd,im)
437 #  define MOVTI(rd,im)                  CC_MOVTI(ARM_CC_AL,rd,im)
438 #  define T2_MOVTI(rd,im)               toriw(THUMB2_MOVTI,rd,im)
439 #  define CC_MVN(cc,rd,rm)              corrr(cc,ARM_MVN,0,rd,rm)
440 #  define MVN(rd,rm)                    CC_MVN(ARM_CC_AL,rd,rm)
441 #  define T1_MVN(rd,rm)                 is(THUMB_MVN|(_u3(rm)<<3)|_u3(rd))
442 #  define T2_MVN(rd,rm)                 torrr(THUMB2_MVN,_R15_REGNO,rd,rm)
443 #  define CC_MVNI(cc,rd,im)             corri(cc,ARM_MVN|ARM_I,0,rd,im)
444 #  define MVNI(rd,im)                   CC_MVNI(ARM_CC_AL,rd,im)
445 #  define T2_MVNI(rd,im)                torri(THUMB2_MVNI,_R15_REGNO,rd,im)
446 #  define CC_NOT(cc,rd,rm)              CC_MVN(cc,rd,rm)
447 #  define NOT(rd,rm)                    CC_NOT(ARM_CC_AL,rd,rm)
448 #  define T1_NOT(rd,rm)                 T1_MVN(rd,rm)
449 #  define T2_NOT(rd,rm)                 T2_MVN(rd,rm)
450 #  define NOP()                         MOV(_R0_REGNO, _R0_REGNO)
451 #  define T1_NOP()                      is(0xbf00)
452 #  define CC_ADD(cc,rd,rn,rm)           corrr(cc,ARM_ADD,rn,rd,rm)
453 #  define ADD(rd,rn,rm)                 CC_ADD(ARM_CC_AL,rd,rn,rm)
454 #  define T1_ADD(rd,rn,rm)              is(THUMB_ADD|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd))
455 #  define T1_ADDX(rdn,rm)               is(THUMB_ADDX|((_u4(rdn)&8)<<4)|(_u4(rm)<<3)|(rdn&7))
456 #  define T2_ADD(rd,rn,rm)              torrr(THUMB2_ADD,rn,rd,rm)
457 #  define CC_ADDI(cc,rd,rn,im)          corri(cc,ARM_ADD|ARM_I,rn,rd,im)
458 #  define ADDI(rd,rn,im)                CC_ADDI(ARM_CC_AL,rd,rn,im)
459 #  define T1_ADDI3(rd,rn,im)            is(THUMB_ADDI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd))
460 #  define T1_ADDI8(rdn,im)              is(THUMB_ADDI8|(_u3(rdn)<<8)|_u8(im))
461 #  define T2_ADDI(rd,rn,im)             torri(THUMB2_ADDI,rn,rd,im)
462 #  define T2_ADDWI(rd,rn,im)            torri(THUMB2_ADDWI,rn,rd,im)
463 #  define CC_ADDS(cc,rd,rn,rm)          corrr(cc,ARM_ADD|ARM_S,rn,rd,rm)
464 #  define ADDS(rd,rn,rm)                CC_ADDS(ARM_CC_AL,rd,rn,rm)
465 #  define T2_ADDS(rd,rn,rm)             torrr(THUMB2_ADD|ARM_S,rn,rd,rm)
466 #  define ADDSI(rd,rn,im)               corri(ARM_CC_AL,ARM_ADD|ARM_S|ARM_I,rn,rd,im)
467 #  define T2_ADDSI(rd,rn,im)            torri(THUMB2_ADDI|ARM_S,rn,rd,im)
468 #  define CC_ADC(cc,rd,rn,rm)           corrr(cc,ARM_ADC,rn,rd,rm)
469 #  define ADC(rd,rn,rm)                 CC_ADC(ARM_CC_AL,rd,rn,rm)
470 #  define T1_ADC(rdn,rm)                is(THUMB_ADC|(_u3(rm)<<3)|_u3(rdn))
471 #  define T2_ADC(rd,rn,rm)              torrr(THUMB2_ADC,rn,rd,rm)
472 #  define CC_ADCI(cc,rd,rn,im)          corri(cc,ARM_ADC|ARM_I,rn,rd,im)
473 #  define ADCI(rd,rn,im)                CC_ADCI(ARM_CC_AL,rd,rn,im)
474 #  define T2_ADCI(rd,rn,im)             torri(THUMB2_ADCI,rn,rd,im)
475 #  define CC_ADCS(cc,rd,rn,rm)          corrr(cc,ARM_ADC|ARM_S,rn,rd,rm)
476 #  define ADCS(rd,rn,rm)                CC_ADCS(ARM_CC_AL,rd,rn,rm)
477 #  define T2_ADCS(rd,rn,rm)             torrr(THUMB2_ADC|ARM_S,rn,rd,rm)
478 #  define CC_ADCSI(cc,rd,rn,im)         corri(cc,ARM_ADC|ARM_S|ARM_I,rn,rd,im)
479 #  define ADCSI(rd,rn,im)               CC_ADCSI(ARM_CC_AL,rd,rn,im)
480 #  define T2_ADCSI(rd,rn,im)            torri(THUMB2_ADCI|ARM_S,rn,rd,im)
481 #  define CC_SUB(cc,rd,rn,rm)           corrr(cc,ARM_SUB,rn,rd,rm)
482 #  define SUB(rd,rn,rm)                 CC_SUB(ARM_CC_AL,rd,rn,rm)
483 #  define T1_SUB(rd,rn,rm)              is(THUMB_SUB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd))
484 #  define T2_SUB(rd,rn,rm)              torrr(THUMB2_SUB,rn,rd,rm)
485 #  define CC_SUBI(cc,rd,rn,im)          corri(cc,ARM_SUB|ARM_I,rn,rd,im)
486 #  define SUBI(rd,rn,im)                CC_SUBI(ARM_CC_AL,rd,rn,im)
487 #  define T1_SUBI3(rd,rn,im)            is(THUMB_SUBI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd))
488 #  define T1_SUBI8(rdn,im)              is(THUMB_SUBI8|(_u3(rdn)<<8)|_u8(im))
489 #  define T2_SUBI(rd,rn,im)             torri(THUMB2_SUBI,rn,rd,im)
490 #  define T2_SUBWI(rd,rn,im)            torri(THUMB2_SUBWI,rn,rd,im)
491 #  define CC_SUBS(cc,rd,rn,rm)          corrr(cc,ARM_SUB|ARM_S,rn,rd,rm)
492 #  define SUBS(rd,rn,rm)                CC_SUBS(ARM_CC_AL,rd,rn,rm)
493 #  define T2_SUBS(rd,rn,rm)             torrr(THUMB2_SUB|ARM_S,rn,rd,rm)
494 #  define CC_SUBSI(cc,rd,rn,im)         corri(cc,ARM_SUB|ARM_S|ARM_I,rn,rd,im)
495 #  define SUBSI(rd,rn,im)               CC_SUBSI(ARM_CC_AL,rd,rn,im)
496 #  define T2_SUBSI(rd,rn,im)            torri(THUMB2_SUBI|ARM_S,rn,rd,im)
497 #  define CC_SBC(cc,rd,rn,rm)           corrr(cc,ARM_SBC,rn,rd,rm)
498 #  define SBC(rd,rn,rm)                 CC_SBC(ARM_CC_AL,rd,rn,rm)
499 #  define T1_SBC(rdn,rm)                is(THUMB_SBC|(_u3(rm)<<3)|_u3(rdn))
500 #  define T2_SBC(rd,rn,rm)              torrr(THUMB2_SBC,rn,rd,rm)
501 #  define CC_SBCI(cc,rd,rn,im)          corri(cc,ARM_SBC|ARM_I,rn,rd,im)
502 #  define SBCI(rd,rn,im)                CC_SBCI(ARM_CC_AL,rd,rn,im)
503 #  define T2_SBCI(rd,rn,im)             torri(THUMB2_SBCI,rn,rd,im)
504 #  define CC_SBCS(cc,rd,rn,rm)          corrr(cc,ARM_SBC|ARM_S,rn,rd,rm)
505 #  define SBCS(rd,rn,rm)                CC_SBCS(ARM_CC_AL,rd,rn,rm)
506 #  define T2_SBCS(rd,rn,rm)             torrr(THUMB2_SBC|ARM_S,rn,rd,rm)
507 #  define CC_SBCSI(cc,rd,rn,im)         corri(cc,ARM_SBC|ARM_S|ARM_I,rn,rd,im)
508 #  define SBCSI(rd,rn,im)               CC_SBCSI(ARM_CC_AL,rd,rn,im)
509 #  define T2_SBCSI(rd,rn,im)            torri(THUMB2_SBCI|ARM_S,rn,rd,im)
510 #  define CC_RSB(cc,rd,rn,rm)           corrr(cc,ARM_RSB,rn,rd,rm)
511 #  define RSB(rd,rn,rm)                 CC_RSB(ARM_CC_AL,rd,rn,rm)
512 #  define T2_RSB(rd,rn,rm)              torrr(THUMB2_RSB,rn,rd,rm)
513 #  define CC_RSBI(cc,rd,rn,im)          corri(cc,ARM_RSB|ARM_I,rn,rd,im)
514 #  define RSBI(rd,rn,im)                CC_RSBI(ARM_CC_AL,rd,rn,im)
515 #  define T1_RSBI(rd,rn)                is(THUMB_RSBI|(_u3(rn)<<3)|_u3(rd))
516 #  define T2_RSBI(rd,rn,im)             torri(THUMB2_RSBI,rn,rd,im)
517 #  define CC_MUL(cc,rl,rn,rm)           corrrr(cc,ARM_MUL,rl,0,rm,rn)
518 #  define MUL(rl,rn,rm)                 CC_MUL(ARM_CC_AL,rl,rn,rm)
519 #  define T1_MUL(rdm,rn)                is(THUMB_MUL|(_u3(rn)<<3)|_u3(rdm))
520 #  define T2_MUL(rd,rn,rm)              torrr(THUMB2_MUL,rn,rd,rm)
521 #  define CC_SMULL(cc,rl,rh,rn,rm)      corrrr(cc,ARM_SMULL,rh,rl,rm,rn)
522 #  define SMULL(rl,rh,rn,rm)            CC_SMULL(ARM_CC_AL,rl,rh,rn,rm)
523 #  define T2_SMULL(rl,rh,rn,rm)         torrrr(THUMB2_SMULL,rn,rl,rh,rm)
524 #  define CC_UMULL(cc,rl,rh,rn,rm)      corrrr(cc,ARM_UMULL,rh,rl,rm,rn)
525 #  define UMULL(rl,rh,rn,rm)            CC_UMULL(ARM_CC_AL,rl,rh,rn,rm)
526 #  define T2_UMULL(rl,rh,rn,rm)         torrrr(THUMB2_UMULL,rn,rl,rh,rm)
527 #  define T2_SDIV(rd,rn,rm)             torrr(THUMB2_SDIV,rn,rd,rm)
528 #  define T2_UDIV(rd,rn,rm)             torrr(THUMB2_UDIV,rn,rd,rm)
529 #  define CC_AND(cc,rd,rn,rm)           corrr(cc,ARM_AND,rn,rd,rm)
530 #  define AND(rd,rn,rm)                 CC_AND(ARM_CC_AL,rd,rn,rm)
531 #  define T1_AND(rdn,rm)                is(THUMB_AND|(_u3(rm)<<3)|_u3(rdn))
532 #  define T2_AND(rd,rn,rm)              torrr(THUMB2_AND,rn,rd,rm)
533 #  define CC_ANDI(cc,rd,rn,im)          corri(cc,ARM_AND|ARM_I,rn,rd,im)
534 #  define ANDI(rd,rn,im)                CC_ANDI(ARM_CC_AL,rd,rn,im)
535 #  define T2_ANDI(rd,rn,im)             torri(THUMB2_ANDI,rn,rd,im)
536 #  define CC_ANDS(cc,rd,rn,rm)          corrr(cc,ARM_AND|ARM_S,rn,rd,rm)
537 #  define ANDS(rd,rn,rm)                CC_ANDS(ARM_CC_AL,rd,rn,rm)
538 #  define T2_ANDS(rd,rn,rm)             torrr(THUMB2_AND|ARM_S,rn,rd,rm)
539 #  define CC_ANDSI(cc,rd,rn,im)         corri(cc,ARM_AND|ARM_S|ARM_I,rn,rd,im)
540 #  define ANDSI(rd,rn,im)               CC_ANDSI(ARM_CC_AL,rd,rn,im)
541 #  define T2_ANDSI(rd,rn,im)            torri(ARM_CC_AL,THUMB2_ANDI|ARM_S,rn,rd,im)
542 #  define CC_BIC(cc,rd,rn,rm)           corrr(cc,ARM_BIC,rn,rd,rm)
543 #  define BIC(rd,rn,rm)                 CC_BIC(ARM_CC_AL,rd,rn,rm)
544 #  define T2_BIC(rd,rn,rm)              torrr(THUMB2_BIC,rn,rd,rm)
545 #  define CC_BICI(cc,rd,rn,im)          corri(cc,ARM_BIC|ARM_I,rn,rd,im)
546 #  define BICI(rd,rn,im)                CC_BICI(ARM_CC_AL,rd,rn,im)
547 #  define T2_BICI(rd,rn,im)             torri(THUMB2_BICI,rn,rd,im)
548 #  define CC_BICS(cc,rd,rn,rm)          corrr(cc,ARM_BIC|ARM_S,rn,rd,rm)
549 #  define BICS(rd,rn,rm)                CC_BICS(ARM_CC_AL,rd,rn,rm)
550 #  define T2_BICS(rd,rn,rm)             torrr(THUMB2_BIC|ARM_S,rn,rd,rm)
551 #  define CC_BICSI(cc,rd,rn,im)         corri(cc,ARM_BIC|ARM_S|ARM_I,rn,rd,im)
552 #  define BICSI(rd,rn,im)               CC_BICSI(ARM_CC_AL,rd,rn,im)
553 #  define T2_BICSI(rd,rn,im)            torri(ARM_CC_AL,THUMB2_BICI|ARM_S,rn,rd,im)
554 #  define CC_ORR(cc,rd,rn,rm)           corrr(cc,ARM_ORR,rn,rd,rm)
555 #  define ORR(rd,rn,rm)                 CC_ORR(ARM_CC_AL,rd,rn,rm)
556 #  define T1_ORR(rdn,rm)                is(THUMB_ORR|(_u3(rm)<<3)|_u3(rdn))
557 #  define T2_ORR(rd,rn,rm)              torrr(THUMB2_ORR,rn,rd,rm)
558 #  define CC_ORR_SI(cc,rd,rn,rt,sh,im)  corrrs(cc,ARM_ORR|sh,rn,rd,rm,im)
559 #  define ORR_SI(r0,r1,r2,sh,im)        CC_ORR_SI(ARM_CC_AL,r0,r1,r2,sh,im)
560 #  define CC_ORRI(cc,rd,rn,im)          corri(cc,ARM_ORR|ARM_I,rn,rd,im)
561 #  define ORRI(rd,rn,im)                CC_ORRI(ARM_CC_AL,rd,rn,im)
562 #  define T2_ORRI(rd,rn,im)             torri(THUMB2_ORRI,rn,rd,im)
563 #  define CC_EOR(cc,rd,rn,rm)           corrr(cc,ARM_EOR,rn,rd,rm)
564 #  define EOR(rd,rn,rm)                 CC_EOR(ARM_CC_AL,rd,rn,rm)
565 #  define T1_EOR(rdn,rm)                is(THUMB_EOR|(_u3(rm)<<3)|_u3(rdn))
566 #  define T2_EOR(rd,rn,rm)              torrr(THUMB2_EOR,rn,rd,rm)
567 #  define CC_EOR_SI(cc,rd,rn,rm,sh,im)  corrrs(cc,ARM_EOR|sh,rn,rd,rm,im)
568 #  define EOR_SI(r0,r1,r2,sh,im)        CC_EOR_SI(ARM_CC_AL,r0,r1,r2,sh,im)
569 #  define CC_EORI(cc,rd,rn,im)          corri(cc,ARM_EOR|ARM_I,rn,rd,im)
570 #  define EORI(rd,rn,im)                CC_EORI(ARM_CC_AL,rd,rn,im)
571 #  define T2_EORI(rd,rn,im)             torri(THUMB2_EORI,rn,rd,im)
572 #  define CC_REV(cc,rd,rm)              c6orr(cc,ARM_REV,rd,rm)
573 #  define REV(rd,rm)                    CC_REV(ARM_CC_AL,rd,rm)
574 #  define T1_REV(rd,rm)                 is(THUMB_REV|(_u3(rm)<<3)|_u3(rd))
575 #  define T2_REV(rd,rm)                 torrr(THUMB2_REV,rm,rd,rm)
576 #  define CC_REV16(cc,rd,rm)            c6orr(cc,ARM_REV16,rd,rm)
577 #  define REV16(rd,rm)                  CC_REV16(ARM_CC_AL,rd,rm)
578 #  define T1_REV16(rd,rm)               is(THUMB_REV16|(_u3(rm)<<3)|_u3(rd))
579 #  define T2_REV16(rd,rm)               torrr(THUMB2_REV16,rm,rd,rm)
580 #  define CC_SXTB(cc,rd,rm)             c6orr(cc,ARM_SXTB,rd,rm)
581 #  define SXTB(rd,rm)                   CC_SXTB(ARM_CC_AL,rd,rm)
582 #  define T1_SXTB(rd,rm)                is(THUMB_SXTB|(_u3(rm)<<3)|_u3(rd))
583 #  define T2_SXTB(rd,rm)                torrr(THUMB2_SXTB,_R15_REGNO,rd,rm)
584 #  define CC_UXTB(cc,rd,rm)             c6orr(cc,ARM_UXTB,rd,rm)
585 #  define UXTB(rd,rm)                   CC_UXTB(ARM_CC_AL,rd,rm)
586 #  define T1_UXTB(rd,rm)                is(THUMB_UXTB|(_u3(rm)<<3)|_u3(rd))
587 #  define T2_UXTB(rd,rm)                torrr(THUMB2_UXTB,_R15_REGNO,rd,rm)
588 #  define CC_SXTH(cc,rd,rm)             c6orr(cc,ARM_SXTH,rd,rm)
589 #  define SXTH(rd,rm)                   CC_SXTH(ARM_CC_AL,rd,rm)
590 #  define T1_SXTH(rd,rm)                is(THUMB_SXTH|(_u3(rm)<<3)|_u3(rd))
591 #  define T2_SXTH(rd,rm)                torrr(THUMB2_SXTH,_R15_REGNO,rd,rm)
592 #  define CC_UXTH(cc,rd,rm)             c6orr(cc,ARM_UXTH,rd,rm)
593 #  define UXTH(rd,rm)                   CC_UXTH(ARM_CC_AL,rd,rm)
594 #  define T1_UXTH(rd,rm)                is(THUMB_UXTH|(_u3(rm)<<3)|_u3(rd))
595 #  define T2_UXTH(rd,rm)                torrr(THUMB2_UXTH,_R15_REGNO,rd,rm)
596 #  define CC_SHIFT(cc,o,rd,rm,rn,im)    cshift(cc,o,rd,rm,rn,im)
597 #  define CC_LSL(cc,rd,rn,rm)           CC_SHIFT(cc,ARM_LSL|ARM_R,rd,rm,rn,0)
598 #  define LSL(rd,rn,rm)                 CC_LSL(ARM_CC_AL,rd,rn,rm)
599 #  define T1_LSL(rdn,rm)                is(THUMB_LSL|(_u3(rm)<<3)|_u3(rdn))
600 #  define T2_LSL(rd,rn,rm)              torrr(THUMB2_LSL,rn,rd,rm)
601 #  define CC_LSLI(cc,rd,rn,im)          CC_SHIFT(cc,ARM_LSL,rd,0,rn,im)
602 #  define LSLI(rd,rn,im)                CC_LSLI(ARM_CC_AL,rd,rn,im)
603 #  define T1_LSLI(rd,rm,im)             is(THUMB_LSLI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
604 #  define T2_LSLI(rd,rm,im)             tshift(THUMB2_LSLI,rd,rm,im)
605 #  define CC_LSR(cc,rd,rn,rm)           CC_SHIFT(cc,ARM_LSR|ARM_R,rd,rm,rn,0)
606 #  define LSR(rd,rn,rm)                 CC_LSR(ARM_CC_AL,rd,rn,rm)
607 #  define T1_LSR(rdn,rm)                is(THUMB_LSR|(_u3(rm)<<3)|_u3(rdn))
608 #  define T2_LSR(rd,rn,rm)              torrr(THUMB2_LSR,rn,rd,rm)
609 #  define CC_LSRI(cc,rd,rn,im)          CC_SHIFT(cc,ARM_LSR,rd,0,rn,im)
610 #  define LSRI(rd,rn,im)                CC_LSRI(ARM_CC_AL,rd,rn,im)
611 #  define T1_LSRI(rd,rm,im)             is(THUMB_LSRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
612 #  define T2_LSRI(rd,rm,im)             tshift(THUMB2_LSRI,rd,rm,im)
613 #  define CC_ASR(cc,rd,rn,rm)           CC_SHIFT(cc,ARM_ASR|ARM_R,rd,rm,rn,0)
614 #  define ASR(rd,rn,rm)                 CC_ASR(ARM_CC_AL,rd,rn,rm)
615 #  define T1_ASR(rdn,rm)                is(THUMB_ASR|(_u3(rm)<<3)|_u3(rdn))
616 #  define T2_ASR(rd,rn,rm)              torrr(THUMB2_ASR,rn,rd,rm)
617 #  define CC_ASRI(cc,rd,rn,im)          CC_SHIFT(cc,ARM_ASR,rd,0,rn,im)
618 #  define ASRI(rd,rn,im)                CC_ASRI(ARM_CC_AL,rd,rn,im)
619 #  define T1_ASRI(rd,rm,im)             is(THUMB_ASRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
620 #  define T2_ASRI(rd,rm,im)             tshift(THUMB2_ASRI,rd,rm,im)
621 #  define CC_CMP(cc,rn,rm)              corrr(cc,ARM_CMP,rn,0,rm)
622 #  define CMP(rn,rm)                    CC_CMP(ARM_CC_AL,rn,rm)
623 #  define T1_CMP(rn,rm)                 is(THUMB_CMP|(_u3(rm)<<3)|_u3(rn))
624 #  define T1_CMPX(rn,rm)                is(THUMB_CMPX|((_u4(rn)&8)<<4)|(_u4(rm)<<3)|(rn&7))
625 #  define T2_CMP(rn,rm)                 torrr(THUMB2_CMP,rn,_R15_REGNO,rm)
626 #  define CC_CMPI(cc,rn,im)             corri(cc,ARM_CMP|ARM_I,rn,0,im)
627 #  define CMPI(rn,im)                   CC_CMPI(ARM_CC_AL,rn,im)
628 #  define T1_CMPI(rn,im)                is(THUMB_CMPI|(_u3(rn)<<8)|_u8(im))
629 #  define T2_CMPI(rn,im)                torri(THUMB2_CMPI,rn,_R15_REGNO,im)
630 #  define CC_CMN(cc,rn,rm)              corrr(cc,ARM_CMN,rn,0,rm)
631 #  define CMN(rn,rm)                    CC_CMN(ARM_CC_AL,rn,rm)
632 #  define T1_CMN(rn,rm)                 is(THUMB_CMN|(_u3(rm)<<3)|_u3(rm))
633 #  define T2_CMN(rn,rm)                 torrr(THUMB2_CMN,rn,_R15_REGNO,rm)
634 #  define CC_CMNI(cc,rn,im)             corri(cc,ARM_CMN|ARM_I,rn,0,im)
635 #  define CMNI(rn,im)                   CC_CMNI(ARM_CC_AL,rn,im)
636 #  define T2_CMNI(rn,im)                torri(THUMB2_CMNI,rn,_R15_REGNO,im)
637 #  define CC_TST(cc,rn,rm)              corrr(cc,ARM_TST,rn,r0,rm)
638 #  define TST(rn,rm)                    corrr(ARM_CC_AL,ARM_TST,rn,0,rm)
639 #  define T1_TST(rn,rm)                 is(THUMB_TST|(_u3(rm)<<3)|_u3(rn))
640 #  define T2_TST(rn,rm)                 torrr(THUMB2_TST,rn,_R15_REGNO,rm)
641 #  define CC_TSTI(cc,rn,im)             corri(cc,ARM_TST|ARM_I,rn,0,im)
642 #  define TSTI(rn,im)                   CC_TSTI(ARM_CC_AL,rn,im)
643 #  define T2_TSTI(rn,im)                torri(THUMB2_TSTI,rn,_R15_REGNO,im)
644 #  define CC_TEQ(cc,rn,rm)              corrr(cc,ARM_TEQ,rn,0,rm)
645 #  define TEQ(rn,rm)                    CC_TEQ(ARM_CC_AL,rn,rm)
646 #  define CC_TEQI(cc,rm,im)             corri(cc,ARM_TEQ|ARM_I,rn,0,im)
647 #  define TEQI(rn,im)                   CC_TEQI(ARM_CC_AL,rn,im)
648 #  define CC_BX(cc,rm)                  cbx(cc,ARM_BX,rm)
649 #  define BX(rm)                        CC_BX(ARM_CC_AL,rm)
650 #  define T1_BX(rm)                     is(0x4700|(_u4(rm)<<3))
651 #  define CC_BLX(cc,rm)                 cbx(cc,ARM_BLX,rm)
652 #  define BLX(rm)                       CC_BLX(ARM_CC_AL,rm)
653 #  define T1_BLX(rm)                    is(THUMB_BLX|(_u4(rm)<<3))
654 #  define BLXI(im)                      blxi(im)
655 #  define T2_BLXI(im)                   tb(THUMB2_BLXI,im)
656 #  define CC_B(cc,im)                   cb(cc,ARM_B,im)
657 #  define B(im)                         CC_B(ARM_CC_AL,im)
658 #  define T1_CC_B(cc,im)                tc8(cc,im)
659 #  define T1_B(im)                      t11(im)
660 #  define T2_CC_B(cc,im)                tcb(cc,im)
661 #  define T2_B(im)                      tb(THUMB2_B,im)
662 #  define CC_BLI(cc,im)                 cb(cc,ARM_BLI,im)
663 #  define BLI(im)                       CC_BLI(ARM_CC_AL,im)
664 #  define T2_BLI(im)                    tb(THUMB2_BLI,im)
665 #  define CC_LDRSB(cc,rt,rn,rm)         corrr(cc,ARM_LDRSB|ARM_P,rn,rt,rm)
666 #  define LDRSB(rt,rn,rm)               CC_LDRSB(ARM_CC_AL,rt,rn,rm)
667 #  define T1_LDRSB(rt,rn,rm)            is(THUMB_LDRSB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
668 #  define T2_LDRSB(rt,rn,rm)            torxr(THUMB2_LDRSB,rn,rt,rm)
669 #  define CC_LDRSBN(cc,rt,rn,rm)        corrr(cc,ARM_LDRSB,rn,rt,rm)
670 #  define LDRSBN(rt,rn,rm)              CC_LDRSBN(ARM_CC_AL,rt,rn,rm)
671 #  define CC_LDRSBI(cc,rt,rn,im)        corri8(cc,ARM_LDRSBI|ARM_P,rn,rt,im)
672 #  define LDRSBI(rt,rn,im)              CC_LDRSBI(ARM_CC_AL,rt,rn,im)
673 #  define T2_LDRSBI(rt,rn,im)           torri8(THUMB2_LDRSBI|THUMB2_U,rn,rt,im)
674 #  define T2_LDRSBWI(rt,rn,im)          torri12(THUMB2_LDRSBWI,rn,rt,im)
675 #  define CC_LDRSBIN(cc,rt,rn,im)       corri8(cc,ARM_LDRSBI,rn,rt,im)
676 #  define LDRSBIN(rt,rn,im)             CC_LDRSBIN(ARM_CC_AL,rt,rn,im)
677 #  define T2_LDRSBIN(rt,rn,im)          torri8(THUMB2_LDRSBI,rn,rt,im)
678 #  define CC_LDRB(cc,rt,rn,rm)          corrr(cc,ARM_LDRB|ARM_P,rn,rt,rm)
679 #  define LDRB(rt,rn,rm)                CC_LDRB(ARM_CC_AL,rt,rn,rm)
680 #  define T1_LDRB(rt,rn,rm)             is(THUMB_LDRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
681 #  define T2_LDRB(rt,rn,rm)             torxr(THUMB2_LDRB,rn,rt,rm)
682 #  define CC_LDRBN(cc,rt,rn,rm)         corrr(cc,ARM_LDRB,rn,rt,rm)
683 #  define LDRBN(rt,rn,rm)               CC_LDRBN(ARM_CC_AL,rt,rn,rm)
684 #  define CC_LDRBI(cc,rt,rn,im)         corri(cc,ARM_LDRBI|ARM_P,rn,rt,im)
685 #  define LDRBI(rt,rn,im)               CC_LDRBI(ARM_CC_AL,rt,rn,im)
686 #  define T1_LDRBI(rt,rn,im)            is(THUMB_LDRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
687 #  define T2_LDRBI(rt,rn,im)            torri8(THUMB2_LDRBI|THUMB2_U,rn,rt,im)
688 #  define T2_LDRBWI(rt,rn,im)           torri12(THUMB2_LDRBWI,rn,rt,im)
689 #  define CC_LDRBIN(cc,rt,rn,im)        corri(cc,ARM_LDRBI,rn,rt,im)
690 #  define LDRBIN(rt,rn,im)              CC_LDRBIN(ARM_CC_AL,rt,rn,im)
691 #  define T2_LDRBIN(rt,rn,im)           torri8(THUMB2_LDRBI,rn,rt,im)
692 #  define CC_LDRSH(cc,rt,rn,rm)         corrr(cc,ARM_LDRSH|ARM_P,rn,rt,rm)
693 #  define LDRSH(rt,rn,rm)               CC_LDRSH(ARM_CC_AL,rt,rn,rm)
694 #  define T1_LDRSH(rt,rn,rm)            is(THUMB_LDRSH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
695 #  define T2_LDRSH(rt,rn,rm)            torxr(THUMB2_LDRSH,rn,rt,rm)
696 #  define CC_LDRSHN(cc,rt,rn,rm)        corrr(cc,ARM_LDRSH,rn,rt,rm)
697 #  define LDRSHN(rt,rn,rm)              CC_LDRSHN(ARM_CC_AL,rt,rn,rm)
698 #  define CC_LDRSHI(cc,rt,rn,im)        corri8(cc,ARM_LDRSHI|ARM_P,rn,rt,im)
699 #  define LDRSHI(rt,rn,im)              CC_LDRSHI(ARM_CC_AL,rt,rn,im)
700 #  define T2_LDRSHI(rt,rn,im)           torri8(THUMB2_LDRSHI|THUMB2_U,rn,rt,im)
701 #  define T2_LDRSHWI(rt,rn,im)          torri12(THUMB2_LDRSHWI,rn,rt,im)
702 #  define CC_LDRSHIN(cc,rt,rn,im)       corri8(cc,ARM_LDRSHI,rn,rt,im)
703 #  define LDRSHIN(rt,rn,im)             CC_LDRSHIN(ARM_CC_AL,rt,rn,im)
704 #  define T2_LDRSHIN(rt,rn,im)          torri8(THUMB2_LDRSHI,rn,rt,im)
705 #  define CC_LDRH(cc,rt,rn,rm)          corrr(cc,ARM_LDRH|ARM_P,rn,rt,rm)
706 #  define LDRH(rt,rn,rm)                CC_LDRH(ARM_CC_AL,rt,rn,rm)
707 #  define T1_LDRH(rt,rn,rm)             is(THUMB_LDRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
708 #  define T2_LDRH(rt,rn,rm)             torxr(THUMB2_LDRH,rn,rt,rm)
709 #  define CC_LDRHN(cc,rt,rn,rm)         corrr(cc,ARM_LDRH,rn,rt,rm)
710 #  define LDRHN(rt,rn,rm)               CC_LDRHN(ARM_CC_AL,rt,rn,rm)
711 #  define CC_LDRHI(cc,rt,rn,im)         corri8(cc,ARM_LDRHI|ARM_P,rn,rt,im)
712 #  define LDRHI(rt,rn,im)               CC_LDRHI(ARM_CC_AL,rt,rn,im)
713 #  define T1_LDRHI(rt,rn,im)            is(THUMB_LDRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
714 #  define T2_LDRHI(rt,rn,im)            torri8(THUMB2_LDRHI|THUMB2_U,rn,rt,im)
715 #  define T2_LDRHWI(rt,rn,im)           torri12(THUMB2_LDRHWI,rn,rt,im)
716 #  define CC_LDRHIN(cc,rt,rn,im)        corri8(cc,ARM_LDRHI,rn,rt,im)
717 #  define LDRHIN(rt,rn,im)              CC_LDRHIN(ARM_CC_AL,rt,rn,im)
718 #  define T2_LDRHIN(rt,rn,im)           torri8(THUMB2_LDRHI,rn,rt,im)
719 #  define CC_LDR(cc,rt,rn,rm)           corrr(cc,ARM_LDR|ARM_P,rn,rt,rm)
720 #  define LDR(rt,rn,rm)                 CC_LDR(ARM_CC_AL,rt,rn,rm)
721 #  define T1_LDR(rt,rn,rm)              is(THUMB_LDR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
722 #  define T2_LDR(rt,rn,rm)              torxr(THUMB2_LDR,rn,rt,rm)
723 #  define CC_LDRN(cc,rt,rn,rm)          corrr(cc,ARM_LDR,rn,rt,rm)
724 #  define LDRN(rt,rn,rm)                CC_LDRN(ARM_CC_AL,rt,rn,rm)
725 #  define CC_LDRI(cc,rt,rn,im)          corri(cc,ARM_LDRI|ARM_P,rn,rt,im)
726 #  define LDRI(rt,rn,im)                CC_LDRI(ARM_CC_AL,rt,rn,im)
727 #  define T1_LDRI(rt,rn,im)             is(THUMB_LDRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
728 #  define T1_LDRISP(rt,im)              is(THUMB_LDRISP|(_u3(rt)<<8)|_u8(im))
729 #  define T2_LDRI(rt,rn,im)             torri8(THUMB2_LDRI|THUMB2_U,rn,rt,im)
730 #  define T2_LDRWI(rt,rn,im)            torri12(THUMB2_LDRWI,rn,rt,im)
731 #  define CC_LDRIN(cc,rt,rn,im)         corri(cc,ARM_LDRI,rn,rt,im)
732 #  define LDRIN(rt,rn,im)               CC_LDRIN(ARM_CC_AL,rt,rn,im)
733 #  define T2_LDRIN(rt,rn,im)            torri8(THUMB2_LDRI,rn,rt,im)
734 #  define CC_LDRD(cc,rt,rn,rm)          corrr(cc,ARM_LDRD|ARM_P,rn,rt,rm)
735 #  define LDRD(rt,rn,rm)                CC_LDRD(ARM_CC_AL,rt,rn,rm)
736 #  define T2_LDRDI(rt,rt2,rn,im)        torrri8(THUMB2_LDRDI|ARM_P,rn,rt,rt2,im)
737 #  define CC_LDRDN(cc,rt,rn,rm)         corrr(cc,ARM_LDRD,rn,rt,rm)
738 #  define LDRDN(rd,rn,rm)               CC_LDRDN(ARM_CC_AL,rt,rn,rm)
739 #  define CC_LDRDI(cc,rt,rn,im)         corri8(cc,ARM_LDRDI|ARM_P,rn,rt,im)
740 #  define LDRDI(rt,rn,im)               CC_LDRDI(ARM_CC_AL,rt,rn,im)
741 #  define CC_LDRDIN(cc,rt,rn,im)        corri8(cc,ARM_LDRDI,rn,rt,im)
742 #  define LDRDIN(rt,rn,im)              CC_LDRDIN(ARM_CC_AL,rt,rn,im)
743 #  define T2_LDRDIN(rt,rt2,rn,im)       torrri8(THUMB2_LDRDI,rn,rt,rt2,im)
744 #  define CC_LDREX(cc,rt,rn)            corrrr(cc,ARM_LDREX,rn,rt,0xf,0xf)
745 #  define LDREX(rt,rn)                  CC_LDREX(ARM_CC_AL,rt,rn)
746 #  define T2_LDREX(rt,rn,im)            torrri8(THUMB2_LDREX,rn,rt,0xf,im)
747 #  define CC_STRB(cc,rt,rn,rm)          corrr(cc,ARM_STRB|ARM_P,rn,rt,rm)
748 #  define STRB(rt,rn,rm)                CC_STRB(ARM_CC_AL,rt,rn,rm)
749 #  define T1_STRB(rt,rn,rm)             is(THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
750 #  define T2_STRB(rt,rn,rm)             torxr(THUMB2_STRB,rn,rt,rm)
751 #  define CC_STRBN(cc,rt,rn,rm)         corrr(cc,ARM_STRB,rn,rt,rm)
752 #  define STRBN(rt,rn,rm)               CC_STRBN(ARM_CC_AL,rt,rn,rm)
753 #  define CC_STRBI(cc,rt,rn,im)         corri(cc,ARM_STRBI|ARM_P,rn,rt,im)
754 #  define STRBI(rt,rn,im)               CC_STRBI(ARM_CC_AL,rt,rn,im)
755 #  define T1_STRBI(rt,rn,im)            is(THUMB_STRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
756 #  define T2_STRBI(rt,rn,im)            torri8(THUMB2_STRBI|THUMB2_U,rn,rt,im)
757 #  define T2_STRBWI(rt,rn,im)           torri12(THUMB2_STRBWI,rn,rt,im)
758 #  define CC_STRBIN(cc,rt,rn,im)        corri(cc,ARM_STRBI,rn,rt,im)
759 #  define STRBIN(rt,rn,im)              CC_STRBIN(ARM_CC_AL,rt,rn,im)
760 #  define T2_STRBIN(rt,rn,im)           torri8(THUMB2_STRBI,rn,rt,im)
761 #  define CC_STRH(cc,rt,rn,rm)          corrr(cc,ARM_STRH|ARM_P,rn,rt,rm)
762 #  define STRH(rt,rn,rm)                CC_STRH(ARM_CC_AL,rt,rn,rm)
763 #  define T1_STRH(rt,rn,rm)             is(THUMB_STRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
764 #  define T2_STRH(rt,rn,rm)             torxr(THUMB2_STRH,rn,rt,rm)
765 #  define CC_STRHN(cc,rt,rn,rm)         corrr(cc,ARM_STRH,rn,rt,rm)
766 #  define STRHN(rt,rn,rm)               CC_STRHN(ARM_CC_AL,rt,rn,rm)
767 #  define CC_STRHI(cc,rt,rn,im)         corri8(cc,ARM_STRHI|ARM_P,rn,rt,im)
768 #  define STRHI(rt,rn,im)               CC_STRHI(ARM_CC_AL,rt,rn,im)
769 #  define T1_STRHI(rt,rn,im)            is(THUMB_STRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
770 #  define T2_STRHI(rt,rn,im)            torri8(THUMB2_STRHI|THUMB2_U,rn,rt,im)
771 #  define T2_STRHWI(rt,rn,im)           torri12(THUMB2_STRHWI,rn,rt,im)
772 #  define CC_STRHIN(cc,rt,rn,im)        corri8(cc,ARM_STRHI,rn,rt,im)
773 #  define STRHIN(rt,rn,im)              CC_STRHIN(ARM_CC_AL,rt,rn,im)
774 #  define T2_STRHIN(rt,rn,im)           torri8(THUMB2_STRHI,rn,rt,im)
775 #  define CC_STR(cc,rt,rn,rm)           corrr(cc,ARM_STR|ARM_P,rn,rt,rm)
776 #  define STR(rt,rn,rm)                 CC_STR(ARM_CC_AL,rt,rn,rm)
777 #  define T1_STR(rt,rn,rm)              is(THUMB_STR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
778 #  define T2_STR(rt,rn,rm)              torxr(THUMB2_STR,rn,rt,rm)
779 #  define CC_STRN(cc,rt,rn,rm)          corrr(cc,ARM_STR,rn,rt,rm)
780 #  define STRN(rt,rn,rm)                CC_STRN(ARM_CC_AL,rt,rn,rm)
781 #  define CC_STRI(cc,rt,rn,im)          corri(cc,ARM_STRI|ARM_P,rn,rt,im)
782 #  define STRI(rt,rn,im)                CC_STRI(ARM_CC_AL,rt,rn,im)
783 #  define T1_STRI(rt,rn,im)             is(THUMB_STRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
784 #  define T1_STRISP(rt,im)              is(THUMB_STRISP|(_u3(rt)<<8)|(_u8(im)))
785 #  define T2_STRI(rt,rn,im)             torri8(THUMB2_STRI|THUMB2_U,rn,rt,im)
786 #  define T2_STRWI(rt,rn,im)            torri12(THUMB2_STRWI,rn,rt,im)
787 #  define CC_STRIN(cc,rt,rn,im)         corri(cc,ARM_STRI,rn,rt,im)
788 #  define STRIN(rt,rn,im)               CC_STRIN(ARM_CC_AL,rt,rn,im)
789 #  define T2_STRIN(rt,rn,im)            torri8(THUMB2_STRI,rn,rt,im)
790 #  define CC_STRD(cc,rt,rn,rm)          corrr(cc,ARM_STRD|ARM_P,rn,rt,rm)
791 #  define STRD(rt,rn,rm)                CC_STRD(ARM_CC_AL,rt,rn,rm)
792 #  define CC_STRDN(cc,rt,rn,rm)         corrr(cc,ARM_STRD,rn,rt,rm)
793 #  define STRDN(rt,rn,rm)               CC_STRDN(ARM_CC_AL,rt,rn,rm)
794 #  define CC_STRDI(cc,rt,rn,im)         corri8(cc,ARM_STRDI|ARM_P,rn,rt,im)
795 #  define STRDI(rt,rn,im)               CC_STRDI(ARM_CC_AL,rt,rn,im)
796 #  define T2_STRDI(rt,rt2,rn,im)        torrri8(THUMB2_STRDI|ARM_P,rn,rt,rt2,im)
797 #  define CC_STRDIN(cc,rt,rn,im)        corri8(cc,ARM_STRDI,rn,rt,im)
798 #  define STRDIN(rt,rn,im)              CC_STRDIN(ARM_CC_AL,rt,rn,im)
799 #  define T2_STRDIN(rt,rt2,rn,im)       torrri8(THUMB2_STRDI,rn,rt,rt2,im)
800 #  define CC_STREX(cc,rd,rt,rn)         corrrr(cc,ARM_STREX,rn,rd,0xf,rt)
801 #  define STREX(rd,rt,rn)               CC_STREX(ARM_CC_AL,rd,rt,rn)
802 #  define T2_STREX(rd,rt,rn,im)         torrri8(THUMB2_STREX,rn,rt,rd,im)
803 #  define CC_LDMIA(cc,rn,im)            corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im)
804 #  define LDMIA(rn,im)                  CC_LDMIA(ARM_CC_AL,rn,im)
805 #  define CC_LDM(cc,rn,im)              CC_LDMIA(cc,rn,im)
806 #  define LDM(rn,im)                    LDMIA(rn,im)
807 #  define T1_LDMIA(rn,im)               is(THUMB_LDMIA|(_u3(rn)<<8)|im)
808 #  define T2_LDMIA(rn,im)               torl(THUMB2_LDMIA,rn,im)
809 #  define CC_LDMIA_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_U,rn,im)
810 #  define LDMIA_U(rn,im)                CC_LDMIA_U(ARM_CC_AL,rn,im)
811 #  define LDM_U(r0,i0)                  LDMIA_U(r0,i0)
812 #  define CC_LDMIB(cc,rn,im)            corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B,rn,im)
813 #  define LDMIB(rn,im)                  CC_LDMIB(ARM_CC_AL,rn,im)
814 #  define CC_LDMIB_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B|ARM_M_U,rn,im)
815 #  define LDMIB_U(rn,im)                CC_LDMIB_U(ARM_CC_AL,rn,im)
816 #  define CC_LDMDA(cc,rn,im)            corl(cc,ARM_M|ARM_M_L,rn,im)
817 #  define LDMDA(rn,im)                  CC_LDMDA(ARM_CC_AL,rn,im)
818 #  define CC_LDMDA_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_L|ARM_M_U,rn,im)
819 #  define LDMDA_U(rn,im)                CC_LDMDA_U(ARM_CC_AL,rn,im)
820 #  define CC_LDMDB(cc,rn,im)            corl(cc,ARM_M|ARM_M_L|ARM_M_B,rn,im)
821 #  define LDMDB(rn,im)                  CC_LDMDB(ARM_CC_AL,rn,im)
822 #  define T2_LDMDB(rn,im)               torl(THUMB2_LDMDB,rn,im)
823 #  define CC_LDMDB_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_L|ARM_M_B|ARM_M_U,rn,im)
824 #  define LDMDB_U(rn,im)                CC_LDMDB_U(ARM_CC_AL,rn,im)
825 #  define CC_STMIA(cc,rn,im)            corl(cc,ARM_M|ARM_M_I,rn,im)
826 #  define STMIA(rn,im)                  CC_STMIA(ARM_CC_AL,rn,im)
827 #  define CC_STM(cc,rn,im)              CC_STMIA(cc,rn,im)
828 #  define STM(rn,im)                    STMIA(rn,im)
829 #  define CC_STMIA_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_I|ARM_M_U,rn,im)
830 #  define STMIA_U(rn,im)                CC_STMIA_U(ARM_CC_AL,rn,im)
831 #  define CC_STM_U(cc,rn,im)            CC_STMIA_U(cc,rn,im)
832 #  define STM_U(rn,im)                  STMIA_U(rn,im)
833 #  define CC_STMIB(cc,rn,im)            corl(cc,ARM_M|ARM_M_I|ARM_M_B,rn,im)
834 #  define STMIB(rn,im)                  CC_STMIB(ARM_CC_AL,rn,im)
835 #  define CC_STMIB_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_I|ARM_M_B|ARM_M_U,rn,im)
836 #  define STMIB_U(rn,im)                CC_STMIB_U(ARM_CC_AL,rn,im)
837 #  define CC_STMDA(cc,rn,im)            corl(cc,ARM_M,rn,im)
838 #  define STMDA(rn,im)                  CC_STMDA(ARM_CC_AL,rn,im)
839 #  define CC_STMDA_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_U,rn,im)
840 #  define STMDA_U(rn,im)                CC_STMDA_U(ARM_CC_AL,rn,im)
841 #  define CC_STMDB(cc,rn,im)            corl(cc,ARM_M|ARM_M_B,rn,im)
842 #  define STMDB(rn,im)                  CC_STMDB(ARM_CC_AL,rn,im)
843 #  define CC_STMDB_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_B|ARM_M_U,rn,im)
844 #  define STMDB_U(rn,im)                CC_STMDB_U(ARM_CC_AL,rn,im)
845 #  define CC_PUSH(cc,im)                CC_STMDB_U(cc,_SP_REGNO,im)
846 #  define PUSH(im)                      STMDB_U(_SP_REGNO,im)
847 #  define T1_PUSH(im)                   is(THUMB_PUSH|((im&0x4000)>>6)|(im&0xff))
848 #  define T2_PUSH(im)                   tpp(THUMB2_PUSH,im)
849 #  define CC_POP(cc,im)                 LDMIA_U(cc,_SP_REGNO,im)
850 #  define POP(im)                       LDMIA_U(_SP_REGNO,im)
851 #  define T1_POP(im)                    is(THUMB_POP|((im&0x8000)>>7)|(im&0xff))
852 #  define T2_POP(im)                    tpp(THUMB2_POP,im)
853 #  define jit_get_reg_args()                                            \
854     do {                                                                \
855         (void)jit_get_reg(_R0|jit_class_named|jit_class_gpr);           \
856         (void)jit_get_reg(_R1|jit_class_named|jit_class_gpr);           \
857         (void)jit_get_reg(_R2|jit_class_named|jit_class_gpr);           \
858         (void)jit_get_reg(_R3|jit_class_named|jit_class_gpr);           \
859     } while (0)
860 #  define jit_unget_reg_args()                                          \
861     do {                                                                \
862         jit_unget_reg(_R3);                                             \
863         jit_unget_reg(_R2);                                             \
864         jit_unget_reg(_R1);                                             \
865         jit_unget_reg(_R0);                                             \
866     } while (0)
867 #  define nop(i0)                       _nop(_jit,i0)
868 static void _nop(jit_state_t*,jit_int32_t);
869 #  define movr(r0,r1)                   _movr(_jit,r0,r1)
870 static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
871 #  define movi(r0,i0)                   _movi(_jit,r0,i0)
872 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
873 #  define movi_p(r0,i0)                 _movi_p(_jit,r0,i0)
874 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
875 #  define movnr(r0,r1,r2)               _movnr(_jit,r0,r1,r2)
876 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
877 #  define movzr(r0,r1,r2)               _movzr(_jit,r0,r1,r2)
878 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
879 #  define casx(r0, r1, r2, r3, i0)      _casx(_jit, r0, r1, r2, r3, i0)
880 static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
881                   jit_int32_t,jit_int32_t,jit_word_t);
882 #define casr(r0, r1, r2, r3)            casx(r0, r1, r2, r3, 0)
883 #define casi(r0, i0, r1, r2)            casx(r0, _NOREG, r1, r2, i0)
884 #  define comr(r0,r1)                   _comr(_jit,r0,r1)
885 static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
886 #  define negr(r0,r1)                   _negr(_jit,r0,r1)
887 static void _negr(jit_state_t*,jit_int32_t,jit_int32_t);
888 #  define addr(r0,r1,r2)                _addr(_jit,r0,r1,r2)
889 static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
890 #  define addi(r0,r1,i0)                _addi(_jit,r0,r1,i0)
891 static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
892 #  define addcr(r0,r1,r2)               _addcr(_jit,r0,r1,r2)
893 static void _addcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
894 #  define addci(r0,r1,i0)               _addci(_jit,r0,r1,i0)
895 static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
896 #  define addxr(r0,r1,r2)               _addxr(_jit,r0,r1,r2)
897 static void _addxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
898 #  define addxi(r0,r1,i0)               _addxi(_jit,r0,r1,i0)
899 static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
900 #  define subr(r0,r1,r2)                _subr(_jit,r0,r1,r2)
901 static void _subr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
902 #  define subi(r0,r1,i0)                _subi(_jit,r0,r1,i0)
903 static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
904 #  define subcr(r0,r1,r2)               _subcr(_jit,r0,r1,r2)
905 static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
906 #  define subci(r0,r1,i0)               _subci(_jit,r0,r1,i0)
907 static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
908 #  define subxr(r0,r1,r2)               _subxr(_jit,r0,r1,r2)
909 static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
910 #  define subxi(r0,r1,i0)               _subxi(_jit,r0,r1,i0)
911 static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
912 #  define rsbi(r0, r1, i0)              _rsbi(_jit, r0, r1, i0)
913 static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
914 #  define mulr(r0,r1,r2)                _mulr(_jit,r0,r1,r2)
915 static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
916 #  define muli(r0,r1,i0)                _muli(_jit,r0,r1,i0)
917 static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
918 #  define qmulr(r0,r1,r2,r3)            iqmulr(r0,r1,r2,r3,1)
919 #  define qmulr_u(r0,r1,r2,r3)          iqmulr(r0,r1,r2,r3,0)
920 #  define iqmulr(r0,r1,r2,r3,cc)        _iqmulr(_jit,r0,r1,r2,r3,cc)
921 static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t,
922                     jit_int32_t,jit_int32_t,jit_bool_t);
923 #  define qmuli(r0,r1,r2,i0)            iqmuli(r0,r1,r2,i0,1)
924 #  define qmuli_u(r0,r1,r2,i0)          iqmuli(r0,r1,r2,i0,0)
925 #  define iqmuli(r0,r1,r2,i0,cc)        _iqmuli(_jit,r0,r1,r2,i0,cc)
926 static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t,
927                     jit_int32_t,jit_word_t,jit_bool_t);
928 #  define divrem(d,s,r0,r1,r2)          _divrem(_jit,d,s,r0,r1,r2)
929 static void _divrem(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
930 #  define divr(r0,r1,r2)                _divr(_jit,r0,r1,r2)
931 static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
932 #  define divi(r0,r1,i0)                _divi(_jit,r0,r1,i0)
933 static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
934 #  define divr_u(r0,r1,r2)              _divr_u(_jit,r0,r1,r2)
935 static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
936 #  define divi_u(r0,r1,i0)              _divi_u(_jit,r0,r1,i0)
937 static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
938 #  define qdivr(r0,r1,r2,r3)            iqdivr(r0,r1,r2,r3,1)
939 #  define qdivr_u(r0,r1,r2,r3)          iqdivr(r0,r1,r2,r3,0)
940 #  define iqdivr(r0,r1,r2,r3,cc)        _iqdivr(_jit,r0,r1,r2,r3,cc)
941 static void _iqdivr(jit_state_t*,jit_int32_t,jit_int32_t,
942                     jit_int32_t,jit_int32_t,jit_bool_t);
943 #  define qdivi(r0,r1,r2,i0)            iqdivi(r0,r1,r2,i0,1)
944 #  define qdivi_u(r0,r1,r2,i0)          iqdivi(r0,r1,r2,i0,0)
945 #  define iqdivi(r0,r1,r2,i0,cc)        _iqdivi(_jit,r0,r1,r2,i0,cc)
946 static void _iqdivi(jit_state_t*,jit_int32_t,jit_int32_t,
947                     jit_int32_t,jit_word_t,jit_bool_t);
948 #  define remr(r0,r1,r2)                _remr(_jit,r0,r1,r2)
949 static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
950 #  define remi(r0,r1,i0)                _remi(_jit,r0,r1,i0)
951 static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
952 #  define remr_u(r0,r1,r2)              _remr_u(_jit,r0,r1,r2)
953 static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
954 #  define remi_u(r0,r1,i0)              _remi_u(_jit,r0,r1,i0)
955 static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
956 #  define andr(r0,r1,r2)                _andr(_jit,r0,r1,r2)
957 static void _andr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
958 #  define andi(r0,r1,i0)                _andi(_jit,r0,r1,i0)
959 static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
960 #  define orr(r0,r1,r2)                 _orr(_jit,r0,r1,r2)
961 static void _orr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
962 #  define ori(r0,r1,i0)                 _ori(_jit,r0,r1,i0)
963 static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
964 #  define xorr(r0,r1,r2)                _xorr(_jit,r0,r1,r2)
965 static void _xorr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
966 #  define xori(r0,r1,i0)                _xori(_jit,r0,r1,i0)
967 static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
968 #  define lshr(r0,r1,r2)                _lshr(_jit,r0,r1,r2)
969 static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
970 #  define lshi(r0,r1,i0)                _lshi(_jit,r0,r1,i0)
971 static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
972 #  define rshr(r0,r1,r2)                _rshr(_jit,r0,r1,r2)
973 static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
974 #  define rshi(r0,r1,i0)                _rshi(_jit,r0,r1,i0)
975 static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
976 #  define rshr_u(r0,r1,r2)              _rshr_u(_jit,r0,r1,r2)
977 static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
978 #  define rshi_u(r0,r1,i0)              _rshi_u(_jit,r0,r1,i0)
979 static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
980 #  define ccr(ct,cf,r0,r1,r2)           _ccr(_jit,ct,cf,r0,r1,r2)
981 static void _ccr(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
982 #  define cci(ct,cf,r0,r1,i0)           _cci(_jit,ct,cf,r0,r1,i0)
983 static void _cci(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_word_t);
984 #  define ltr(r0, r1, r2)               ccr(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
985 #  define lti(r0, r1, i0)               cci(ARM_CC_LT,ARM_CC_GE,r0,r1,i0)
986 #  define ltr_u(r0, r1, r2)             ccr(ARM_CC_LO,ARM_CC_HS,r0,r1,r2)
987 #  define lti_u(r0, r1, i0)             cci(ARM_CC_LO,ARM_CC_HS,r0,r1,i0)
988 #  define ler(r0, r1, r2)               ccr(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
989 #  define lei(r0, r1, i0)               cci(ARM_CC_LE,ARM_CC_GT,r0,r1,i0)
990 #  define ler_u(r0, r1, r2)             ccr(ARM_CC_LS,ARM_CC_HI,r0,r1,r2)
991 #  define lei_u(r0, r1, i0)             cci(ARM_CC_LS,ARM_CC_HI,r0,r1,i0)
992 #  define eqr(r0, r1, r2)               ccr(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
993 #  define eqi(r0, r1, i0)               cci(ARM_CC_EQ,ARM_CC_NE,r0,r1,i0)
994 #  define ger(r0, r1, r2)               ccr(ARM_CC_GE,ARM_CC_LT,r0,r1,r2)
995 #  define gei(r0, r1, i0)               cci(ARM_CC_GE,ARM_CC_LT,r0,r1,i0)
996 #  define ger_u(r0, r1, r2)             ccr(ARM_CC_HS,ARM_CC_LO,r0,r1,r2)
997 #  define gei_u(r0, r1, i0)             cci(ARM_CC_HS,ARM_CC_LO,r0,r1,i0)
998 #  define gtr(r0, r1, r2)               ccr(ARM_CC_GT,ARM_CC_LE,r0,r1,r2)
999 #  define gti(r0, r1, i0)               cci(ARM_CC_GT,ARM_CC_LE,r0,r1,i0)
1000 #  define gtr_u(r0, r1, r2)             ccr(ARM_CC_HI,ARM_CC_LS,r0,r1,r2)
1001 #  define gti_u(r0, r1, i0)             cci(ARM_CC_HI,ARM_CC_LS,r0,r1,i0)
1002 #  define ner(r0,r1,r2)                 _ner(_jit,r0,r1,r2)
1003 static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1004 #  define nei(r0,r1,i0)                 _nei(_jit,r0,r1,i0)
1005 static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1006 #  define jmpr(r0)                      _jmpr(_jit,r0)
1007 static void _jmpr(jit_state_t*,jit_int32_t);
1008 #  define jmpi(i0)                      _jmpi(_jit,i0)
1009 static void _jmpi(jit_state_t*,jit_word_t);
1010 #  define jmpi_p(i0, i1)                _jmpi_p(_jit,i0, i1)
1011 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t,jit_bool_t);
1012 #  define bccr(cc,i0,r0,r1)             _bccr(_jit,cc,i0,r0,r1)
1013 static jit_word_t _bccr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
1014 #  define bcci(cc,i0,r0,i1)             _bcci(_jit,cc,i0,r0,i1)
1015 static jit_word_t _bcci(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
1016 #  define bltr(i0, r0, r1)              bccr(ARM_CC_LT,i0,r0,r1)
1017 #  define blti(i0, r0, i1)              bcci(ARM_CC_LT,i0,r0,i1)
1018 #  define bltr_u(i0, r0, r1)            bccr(ARM_CC_LO,i0,r0,r1)
1019 #  define blti_u(i0, r0, i1)            bcci(ARM_CC_LO,i0,r0,i1)
1020 #  define bler(i0, r0, r1)              bccr(ARM_CC_LE,i0,r0,r1)
1021 #  define blei(i0, r0, i1)              bcci(ARM_CC_LE,i0,r0,i1)
1022 #  define bler_u(i0, r0, r1)            bccr(ARM_CC_LS,i0,r0,r1)
1023 #  define blei_u(i0, r0, i1)            bcci(ARM_CC_LS,i0,r0,i1)
1024 #  define beqr(i0, r0, r1)              bccr(ARM_CC_EQ,i0,r0,r1)
1025 #  define beqi(i0, r0, i1)              bcci(ARM_CC_EQ,i0,r0,i1)
1026 #  define bger(i0, r0, r1)              bccr(ARM_CC_GE,i0,r0,r1)
1027 #  define bgei(i0, r0, i1)              bcci(ARM_CC_GE,i0,r0,i1)
1028 #  define bger_u(i0, r0, r1)            bccr(ARM_CC_HS,i0,r0,r1)
1029 #  define bgei_u(i0, r0, i1)            bcci(ARM_CC_HS,i0,r0,i1)
1030 #  define bgtr(i0, r0, r1)              bccr(ARM_CC_GT,i0,r0,r1)
1031 #  define bgti(i0, r0, i1)              bcci(ARM_CC_GT,i0,r0,i1)
1032 #  define bgtr_u(i0, r0, r1)            bccr(ARM_CC_HI,i0,r0,r1)
1033 #  define bgti_u(i0, r0, i1)            bcci(ARM_CC_HI,i0,r0,i1)
1034 #  define bner(i0, r0, r1)              bccr(ARM_CC_NE,i0,r0,r1)
1035 #  define bnei(i0, r0, i1)              bcci(ARM_CC_NE,i0,r0,i1)
1036 #  define baddr(cc,i0,r0,r1)            _baddr(_jit,cc,i0,r0,r1)
1037 static jit_word_t _baddr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
1038 #  define baddi(cc,i0,r0,r1)            _baddi(_jit,cc,i0,r0,r1)
1039 static jit_word_t _baddi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
1040 #  define boaddr(i0,r0,r1)              baddr(ARM_CC_VS,i0,r0,r1)
1041 #  define boaddi(i0,r0,i1)              baddi(ARM_CC_VS,i0,r0,i1)
1042 #  define boaddr_u(i0,r0,r1)            baddr(ARM_CC_HS,i0,r0,r1)
1043 #  define boaddi_u(i0,r0,i1)            baddi(ARM_CC_HS,i0,r0,i1)
1044 #  define bxaddr(i0,r0,r1)              baddr(ARM_CC_VC,i0,r0,r1)
1045 #  define bxaddi(i0,r0,i1)              baddi(ARM_CC_VC,i0,r0,i1)
1046 #  define bxaddr_u(i0,r0,r1)            baddr(ARM_CC_LO,i0,r0,r1)
1047 #  define bxaddi_u(i0,r0,i1)            baddi(ARM_CC_LO,i0,r0,i1)
1048 #  define bsubr(cc,i0,r0,r1)            _bsubr(_jit,cc,i0,r0,r1)
1049 static jit_word_t _bsubr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
1050 #  define bsubi(cc,i0,r0,r1)            _bsubi(_jit,cc,i0,r0,r1)
1051 static jit_word_t _bsubi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
1052 #  define bosubr(i0,r0,r1)              bsubr(ARM_CC_VS,i0,r0,r1)
1053 #  define bosubi(i0,r0,i1)              bsubi(ARM_CC_VS,i0,r0,i1)
1054 #  define bosubr_u(i0,r0,r1)            bsubr(ARM_CC_LO,i0,r0,r1)
1055 #  define bosubi_u(i0,r0,i1)            bsubi(ARM_CC_LO,i0,r0,i1)
1056 #  define bxsubr(i0,r0,r1)              bsubr(ARM_CC_VC,i0,r0,r1)
1057 #  define bxsubi(i0,r0,i1)              bsubi(ARM_CC_VC,i0,r0,i1)
1058 #  define bxsubr_u(i0,r0,r1)            bsubr(ARM_CC_HS,i0,r0,r1)
1059 #  define bxsubi_u(i0,r0,i1)            bsubi(ARM_CC_HS,i0,r0,i1)
1060 #  define bmxr(cc,i0,r0,r1)             _bmxr(_jit,cc,i0,r0,r1)
1061 static jit_word_t _bmxr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
1062 #  define bmxi(cc,i0,r0,r1)             _bmxi(_jit,cc,i0,r0,r1)
1063 static jit_word_t _bmxi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
1064 #  define bmsr(i0,r0,r1)                bmxr(ARM_CC_NE,i0,r0,r1)
1065 #  define bmsi(i0,r0,i1)                bmxi(ARM_CC_NE,i0,r0,i1)
1066 #  define bmcr(i0,r0,r1)                bmxr(ARM_CC_EQ,i0,r0,r1)
1067 #  define bmci(i0,r0,i1)                bmxi(ARM_CC_EQ,i0,r0,i1)
1068 #  define ldr_c(r0,r1)                  _ldr_c(_jit,r0,r1)
1069 static void _ldr_c(jit_state_t*,jit_int32_t,jit_int32_t);
1070 #  define ldi_c(r0,i0)                  _ldi_c(_jit,r0,i0)
1071 static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
1072 #  define ldxr_c(r0,r1,r2)              _ldxr_c(_jit,r0,r1,r2)
1073 static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1074 #  define ldxi_c(r0,r1,i0)              _ldxi_c(_jit,r0,r1,i0)
1075 static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1076 #  define ldr_uc(r0,r1)                 _ldr_uc(_jit,r0,r1)
1077 static void _ldr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
1078 #  define ldi_uc(r0,i0)                 _ldi_uc(_jit,r0,i0)
1079 static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
1080 #  define ldxr_uc(r0,r1,r2)             _ldxr_uc(_jit,r0,r1,r2)
1081 static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1082 #  define ldxi_uc(r0,r1,i0)             _ldxi_uc(_jit,r0,r1,i0)
1083 static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1084 #  define ldr_s(r0,r1)                  _ldr_s(_jit,r0,r1)
1085 static void _ldr_s(jit_state_t*,jit_int32_t,jit_int32_t);
1086 #  define ldi_s(r0,i0)                  _ldi_s(_jit,r0,i0)
1087 static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
1088 #  define ldxr_s(r0,r1,r2)              _ldxr_s(_jit,r0,r1,r2)
1089 static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1090 #  define ldxi_s(r0,r1,i0)              _ldxi_s(_jit,r0,r1,i0)
1091 static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1092 #  define ldr_us(r0,r1)                 _ldr_us(_jit,r0,r1)
1093 static void _ldr_us(jit_state_t*,jit_int32_t,jit_int32_t);
1094 #  define ldi_us(r0,i0)                 _ldi_us(_jit,r0,i0)
1095 static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
1096 #  define ldxr_us(r0,r1,r2)             _ldxr_us(_jit,r0,r1,r2)
1097 static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1098 #  define ldxi_us(r0,r1,i0)             _ldxi_us(_jit,r0,r1,i0)
1099 static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1100 #  define ldr_i(r0,r1)                  _ldr_i(_jit,r0,r1)
1101 static void _ldr_i(jit_state_t*,jit_int32_t,jit_int32_t);
1102 #  define ldi_i(r0,i0)                  _ldi_i(_jit,r0,i0)
1103 static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
1104 #  define ldxr_i(r0,r1,r2)              _ldxr_i(_jit,r0,r1,r2)
1105 static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1106 #  define ldxi_i(r0,r1,i0)              _ldxi_i(_jit,r0,r1,i0)
1107 static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1108 #  define str_c(r0,r1)                  _str_c(_jit,r0,r1)
1109 static void _str_c(jit_state_t*,jit_int32_t,jit_int32_t);
1110 #  define sti_c(i0,r0)                  _sti_c(_jit,i0,r0)
1111 static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
1112 #  define stxr_c(r0,r1,r2)              _stxr_c(_jit,r0,r1,r2)
1113 static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1114 #  define stxi_c(r0,r1,i0)              _stxi_c(_jit,r0,r1,i0)
1115 static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
1116 #  define str_s(r0,r1)                  _str_s(_jit,r0,r1)
1117 static void _str_s(jit_state_t*,jit_int32_t,jit_int32_t);
1118 #  define sti_s(i0,r0)                  _sti_s(_jit,i0,r0)
1119 static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
1120 #  define stxr_s(r0,r1,r2)              _stxr_s(_jit,r0,r1,r2)
1121 static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1122 #  define stxi_s(r0,r1,i0)              _stxi_s(_jit,r0,r1,i0)
1123 static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
1124 #  define str_i(r0,r1)                  _str_i(_jit,r0,r1)
1125 static void _str_i(jit_state_t*,jit_int32_t,jit_int32_t);
1126 #  define sti_i(i0,r0)                  _sti_i(_jit,i0,r0)
1127 static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
1128 #  define stxr_i(r0,r1,r2)              _stxr_i(_jit,r0,r1,r2)
1129 static void _stxr_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
1130 #  define stxi_i(r0,r1,i0)              _stxi_i(_jit,r0,r1,i0)
1131 static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
1132 #  define bswapr_us(r0,r1)              _bswapr_us(_jit,r0,r1)
1133 static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
1134 #  define bswapr_ui(r0,r1)              _bswapr_ui(_jit,r0,r1)
1135 static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
1136 #  define extr_c(r0,r1)                 _extr_c(_jit,r0,r1)
1137 static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
1138 #  define extr_uc(r0,r1)                _extr_uc(_jit,r0,r1)
1139 static void _extr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
1140 #  define extr_s(r0,r1)                 _extr_s(_jit,r0,r1)
1141 static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t);
1142 #  define extr_us(r0,r1)                _extr_us(_jit,r0,r1)
1143 static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t);
1144 #  define prolog(i0)                    _prolog(_jit,i0)
1145 static void _prolog(jit_state_t*,jit_node_t*);
1146 #  define epilog(i0)                    _epilog(_jit,i0)
1147 static void _epilog(jit_state_t*,jit_node_t*);
1148 #  define callr(r0)                     _callr(_jit,r0)
1149 static void _callr(jit_state_t*,jit_int32_t);
1150 #  define calli(i0)                     _calli(_jit,i0)
1151 static void _calli(jit_state_t*,jit_word_t);
1152 #  define calli_p(i0)                   _calli_p(_jit,i0)
1153 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
1154 #  define vastart(r0)                   _vastart(_jit, r0)
1155 static void _vastart(jit_state_t*, jit_int32_t);
1156 #  define vaarg(r0, r1)                 _vaarg(_jit, r0, r1)
1157 static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
1158 #  define patch_at(kind,jump,label)     _patch_at(_jit,kind,jump,label)
1159 static void _patch_at(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t);
1160 #endif
1161
1162 #if CODE
1163 /* from binutils */
1164 #  define rotate_left(v, n)     (v << n | v >> (32 - n))
1165 static int
1166 encode_arm_immediate(unsigned int v)
1167 {
1168     unsigned int        a, i;
1169
1170     for (i = 0; i < 32; i += 2)
1171         if ((a = rotate_left(v, i)) <= 0xff)
1172             return (a | (i << 7));
1173
1174     return (-1);
1175 }
1176
1177 static int
1178 encode_thumb_immediate(unsigned int v)
1179 {
1180     int                 i;
1181     unsigned int        m;
1182     unsigned int        n;
1183     /* 00000000 00000000 00000000 abcdefgh */
1184     if ((v & 0xff) == v)
1185         return (v);
1186     /* 00000000 abcdefgh 00000000 abcdefgh */
1187     if ((v & 0xff00ff) == v && ((v & 0xff0000) >> 16) == (v & 0xff))
1188         return ((v & 0xff) | (1 << 12));
1189     /* abcdefgh 00000000 abcdefgh 00000000 */
1190     if (((v & 0xffff0000) >> 16) == (v & 0xffff) && (v & 0xff) == 0)
1191         return ((v & 0x000000ff) | (2 << 12));
1192     /* abcdefgh abcdefgh abcdefgh abcdefgh */
1193     if ( (v &    0xff)        == ((v &     0xff00) >>  8) &&
1194         ((v &   0xff00) >> 8) == ((v &   0xff0000) >> 16) &&
1195         ((v & 0xff0000) << 8) ==  (v & 0xff000000))
1196         return ((v & 0xff) | (3 << 12));
1197     /* 1bcdefgh << 24 ... 1bcdefgh << 1 */
1198     for (i = 8, m = 0xff000000, n = 0x80000000;
1199          i < 23; i++, m >>= 1,  n >>= 1) {
1200         if ((v & m) == v && (v & n)) {
1201             v >>= 32 - i;
1202             if (!(i & 1))
1203                 v &= 0x7f;
1204             i >>= 1;
1205             return (((i & 7) << 12) | ((i & 8) << 23) | v);
1206         }
1207     }
1208     return (-1);
1209 }
1210
1211 static int
1212 encode_thumb_word_immediate(unsigned int v)
1213 {
1214     if ((v & 0xfffff000) == 0)
1215         return (((v & 0x800) << 15) | ((v & 0x700) << 4) | (v & 0xff));
1216     return (-1);
1217 }
1218
1219 static int
1220 encode_thumb_jump(int v)
1221 {
1222     int         s, i1, i2, j1, j2;
1223     if (v >= (int)-0x800000 && v <= 0x7fffff) {
1224         s  = !!(v & 0x800000);
1225         i1 = !!(v & 0x400000);
1226         i2 = !!(v & 0x200000);
1227         j1 = s ? i1 : !i1;
1228         j2 = s ? i2 : !i2;
1229         return ((s<<26)|((v&0x1ff800)<<5)|(j1<<13)|(j2<<11)|(v&0x7ff));
1230     }
1231     return (-1);
1232 }
1233
1234 static int
1235 encode_thumb_cc_jump(int v)
1236 {
1237     int         s, j1, j2;
1238     if (v >= (int)-0x80000 && v <= 0x7ffff) {
1239         s  = !!(v & 0x80000);
1240         j1 = !!(v & 0x20000);
1241         j2 = !!(v & 0x40000);
1242         return ((s<<26)|((v&0x1f800)<<5)|(j1<<13)|(j2<<11)|(v&0x7ff));
1243     }
1244     return (-1);
1245 }
1246
1247 static int
1248 encode_thumb_shift(int v, int type)
1249 {
1250     switch (type) {
1251         case ARM_ASR:
1252         case ARM_LSL:
1253         case ARM_LSR:           type >>= 1;     break;
1254         default:                assert(!"handled shift");
1255     }
1256     assert(v >= 0 && v <= 31);
1257     return (((v & 0x1c) << 10) | ((v & 3) << 6) | type);
1258 }
1259
1260 static void
1261 _tcit(jit_state_t *_jit, unsigned int tc, int it)
1262 {
1263     int         c;
1264     int         m;
1265     c = (tc >> 28) & 1;
1266     assert(!(tc & 0xfffffff) && tc != ARM_CC_NV);
1267     switch (it) {
1268         case THUMB2_IT:         m =   1<<3;                     break;
1269         case THUMB2_ITT:        m =  (c<<3)| (1<<2);            break;
1270         case THUMB2_ITE:        m = (!c<<3)| (1<<2);            break;
1271         case THUMB2_ITTT:       m =  (c<<3)| (c<<2)| (1<<1);    break;
1272         case THUMB2_ITET:       m = (!c<<3)| (c<<2)| (1<<1);    break;
1273         case THUMB2_ITTE:       m =  (c<<3)|(!c<<2)| (1<<1);    break;
1274         case THUMB2_ITEE:       m = (!c<<3)|(!c<<2)| (1<<1);    break;
1275         case THUMB2_ITTTT:      m =  (c<<3)| (c<<2)| (c<<1)|1;  break;
1276         case THUMB2_ITETT:      m = (!c<<3)| (c<<2)| (c<<1)|1;  break;
1277         case THUMB2_ITTET:      m =  (c<<3)|(!c<<2)| (c<<1)|1;  break;
1278         case THUMB2_ITEET:      m = (!c<<3)|(!c<<2)| (c<<1)|1;  break;
1279         case THUMB2_ITTTE:      m =  (c<<3)| (c<<2)|(!c<<1)|1;  break;
1280         case THUMB2_ITETE:      m = (!c<<3)| (c<<2)|(!c<<1)|1;  break;
1281         case THUMB2_ITTEE:      m =  (c<<3)|(!c<<2)|(!c<<1)|1;  break;
1282         case THUMB2_ITEEE:      m = (!c<<3)|(!c<<2)|(!c<<1)|1;  break;
1283         default:                abort();
1284     }
1285     assert(m && (tc != ARM_CC_AL || !(m & (m - 1))));
1286     is(0xbf00 | (tc >> 24) | m);
1287 }
1288
1289 static void
1290 _corrr(jit_state_t *_jit, int cc, int o, int rn, int rd, int rm)
1291 {
1292     assert(!(cc & 0x0fffffff));
1293     assert(!(o  & 0xf00fff0f));
1294     ii(cc|o|(_u4(rn)<<16)|(_u4(rd)<<12)|_u4(rm));
1295 }
1296
1297 static void
1298 _corri(jit_state_t *_jit, int cc, int o, int rn, int rd, int im)
1299 {
1300     assert(!(cc & 0x0fffffff));
1301     assert(!(o  & 0xf00fffff));
1302     assert(!(im & 0xfffff000));
1303     ii(cc|o|(_u4(rn)<<16)|(_u4(rd)<<12)|_u12(im));
1304 }
1305
1306 static void
1307 _corri8(jit_state_t *_jit, int cc, int o, int rn, int rt, int im)
1308 {
1309     assert(!(cc & 0x0fffffff));
1310     assert(!(o  & 0xf00fff0f));
1311     assert(!(im & 0xffffff00));
1312     ii(cc|o|(_u4(rn)<<16)|(_u4(rt)<<12)|((im&0xf0)<<4)|(im&0x0f));
1313 }
1314
1315 static void
1316 _coriw(jit_state_t *_jit, int cc, int o, int rd, int im)
1317 {
1318     assert(!(cc & 0x0fffffff));
1319     assert(!(o  & 0xf00fffff));
1320     assert(!(im & 0xffff0000));
1321     ii(cc|o|((im&0xf000)<<4)|(_u4(rd)<<12)|(im&0xfff));
1322 }
1323
1324 static void
1325 _torrr(jit_state_t *_jit, int o, int rn, int rd, int rm)
1326 {
1327     jit_thumb_t thumb;
1328     assert(!(o & 0xf0f0f));
1329     thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|_u4(rm);
1330     iss(thumb.s[0], thumb.s[1]);
1331 }
1332
1333 static void
1334 _torrrs(jit_state_t *_jit, int o, int rn, int rd, int rm, int im)
1335 {
1336     jit_thumb_t thumb;
1337     assert(!(o  & 0x000f0f0f));
1338     assert(!(im & 0xffff8f0f));
1339     thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|im|_u4(rm);
1340     iss(thumb.s[0], thumb.s[1]);
1341 }
1342
1343 static void
1344 _torxr(jit_state_t *_jit, int o, int rn, int rt, int rm)
1345 {
1346     jit_thumb_t thumb;
1347     assert(!(o & 0xf0f0f));
1348     thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|_u4(rm);
1349     iss(thumb.s[0], thumb.s[1]);
1350 }
1351
1352 static void
1353 _torrrr(jit_state_t *_jit, int o, int rn, int rl, int rh, int rm)
1354 {
1355     jit_thumb_t thumb;
1356     assert(!(o & 0x000fff0f));
1357     thumb.i = o|(_u4(rn)<<16)|(_u4(rl)<<12)|(_u4(rh)<<8)|_u4(rm);
1358     iss(thumb.s[0], thumb.s[1]);
1359 }
1360
1361 static void
1362 _torrri8(jit_state_t *_jit, int o, int rn, int rt, int rt2, int im)
1363 {
1364     jit_thumb_t thumb;
1365     assert(!(o  & 0x000fffff));
1366     assert(!(im & 0xffffff00));
1367     thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|(_u4(rt2)<<8)|im;
1368     iss(thumb.s[0], thumb.s[1]);
1369 }
1370
1371 static void
1372 _torri(jit_state_t *_jit, int o, int rn, int rd, int im)
1373 {
1374     jit_thumb_t thumb;
1375     assert(!(o  & 0x0c0f7fff));
1376     assert(!(im & 0xfbff8f00));
1377     thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|im;
1378     iss(thumb.s[0], thumb.s[1]);
1379 }
1380
1381 static void
1382 _torri8(jit_state_t *_jit, int o, int rn, int rt, int im)
1383 {
1384     jit_thumb_t thumb;
1385     assert(!(o  & 0x000ff0ff));
1386     assert(!(im & 0xffffff00));
1387     thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|im;
1388     iss(thumb.s[0], thumb.s[1]);
1389 }
1390
1391 static void
1392 _torri12(jit_state_t *_jit, int o, int rn, int rt, int im)
1393 {
1394     jit_thumb_t thumb;
1395     assert(!(o  & 0x000fffff));
1396     assert(!(im & 0xfffff000));
1397     thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|im;
1398     iss(thumb.s[0], thumb.s[1]);
1399 }
1400
1401 static void
1402 _tshift(jit_state_t *_jit, int o, int rd, int rm, int im)
1403 {
1404     jit_thumb_t thumb;
1405     assert(!(o & 0x7fcf));
1406     assert(im >= 0 && im < 32);
1407     thumb.i = o|((im&0x1c)<<10)|(_u4(rd)<<8)|((im&3)<<6)|_u4(rm);
1408     iss(thumb.s[0], thumb.s[1]);
1409 }
1410
1411 static void
1412 _toriw(jit_state_t *_jit, int o, int rd, int im)
1413 {
1414     jit_thumb_t thumb;
1415     assert(!(im & 0xffff0000));
1416     thumb.i = o|((im&0xf000)<<4)|((im&0x800)<<15)|((im&0x700)<<4)|(_u4(rd)<<8)|(im&0xff);
1417     iss(thumb.s[0], thumb.s[1]);
1418 }
1419
1420 static void
1421 _tc8(jit_state_t *_jit, int cc, int im)
1422 {
1423     assert(!(cc & 0x0fffffff));
1424     assert(cc != ARM_CC_AL && cc != ARM_CC_NV);
1425     assert(im >= -128 && im <= 127);
1426     is(THUMB_CC_B|(cc>>20)|(im&0xff));
1427 }
1428
1429 static void
1430 _t11(jit_state_t *_jit, int im)
1431 {
1432     assert(!(im & 0xfffff800));
1433     is(THUMB_B|im);
1434 }
1435
1436 static void
1437 _tcb(jit_state_t *_jit, int cc, int im)
1438 {
1439     jit_thumb_t thumb;
1440     assert(!(cc & 0xfffffff));
1441     assert(cc != ARM_CC_AL && cc != ARM_CC_NV);
1442     cc = ((jit_uint32_t)cc) >> 6;
1443     assert(!(im & (THUMB2_CC_B|cc)));
1444     thumb.i = THUMB2_CC_B|cc|im;
1445     iss(thumb.s[0], thumb.s[1]);
1446 }
1447
1448 static void
1449 _blxi(jit_state_t *_jit, int im)
1450 {
1451     assert(!(im & 0xfe000000));
1452     ii(ARM_BLXI|im);
1453 }
1454
1455 static void
1456 _tb(jit_state_t *_jit, int o, int im)
1457 {
1458     jit_thumb_t thumb;
1459     assert(!(o & 0x07ff2fff));
1460     assert(!(o & im));
1461     thumb.i = o|im;
1462     iss(thumb.s[0], thumb.s[1]);
1463 }
1464
1465 static void
1466 _corrrr(jit_state_t *_jit, int cc, int o, int rh, int rl, int rm, int rn)
1467 {
1468     assert(!(cc & 0x0fffffff));
1469     assert(!(o & 0xf00fff0f));
1470     ii(cc|o|(_u4(rh)<<16)|(_u4(rl)<<12)|(_u4(rm)<<8)|_u4(rn));
1471 }
1472
1473 static void
1474 _corrrs(jit_state_t *_jit, int cc, int o, int rn, int rd, int rm, int im)
1475 {
1476     assert(!(cc & 0x0fffffff));
1477     assert(!(o  & 0xf000ff8f));
1478     ii(cc|o|(_u4(rd)<<12)|(_u4(rn)<<16)|(im<<7)|_u4(rm));
1479 }
1480
1481 static void
1482 _cshift(jit_state_t *_jit, int cc, int o, int rd, int rm, int rn, int im)
1483 {
1484     assert(!(cc & 0x0fffffff));
1485     assert(!(o  & 0xffe0ff8f));
1486     assert(((_u4(rm)<<8)&(im<<7)) == 0);
1487     ii(cc|ARM_SHIFT|o|(_u4(rd)<<12)|(_u4(rm)<<8)|(im<<7)|_u4(rn));
1488 }
1489
1490 static void
1491 _cb(jit_state_t *_jit, int cc, int o, int im)
1492 {
1493     assert(!(cc & 0x0fffffff));
1494     assert(!(o  & 0xf0ffffff));
1495     ii(cc|o|_u24(im));
1496 }
1497
1498 static void
1499 _cbx(jit_state_t *_jit, int cc, int o, int rm)
1500 {
1501     assert(!(cc & 0x0fffffff));
1502     assert(!(o  & 0xf000000f));
1503     ii(cc|o|_u4(rm));
1504 }
1505
1506 static void
1507 _corl(jit_state_t *_jit, int cc, int o, int r0, int i0)
1508 {
1509     assert(!(cc & 0x0fffffff));
1510     assert(!(o  & 0xf00fffff));
1511     ii(cc|o|(_u4(r0)<<16)|_u16(i0));
1512 }
1513
1514 static void
1515 _c6orr(jit_state_t *_jit, int cc, int o, int rd, int rm)
1516 {
1517     assert(!(cc & 0x0fffffff));
1518     assert(!(o  & 0xf000f00f));
1519     ii(cc|o|(_u4(rd)<<12)|_u4(rm));
1520 }
1521
1522 static void
1523 _tpp(jit_state_t *_jit, int o, int im)
1524 {
1525     jit_thumb_t thumb;
1526     assert(!(o & 0x0000ffff));
1527     if (o == THUMB2_PUSH)
1528         assert(!(im & 0x8000));
1529     assert(__builtin_popcount(im & 0x1fff) > 1);
1530     thumb.i = o|im;
1531     iss(thumb.s[0], thumb.s[1]);
1532 }
1533
1534 static void
1535 _torl(jit_state_t *_jit, int o, int rn, int im)
1536 {
1537     jit_thumb_t thumb;
1538     assert(!(o & 0xf1fff));
1539     assert(rn != _R15 || !im || ((o & 0xc000) == 0xc000));
1540     assert(!(o & THUMB2_LDM_W) || !(im & (1 << rn)));
1541     thumb.i = o | (_u4(rn)<<16)|_u13(im);
1542     iss(thumb.s[0], thumb.s[1]);
1543 }
1544
1545 static void
1546 _dmb(jit_state_t *_jit, int im)
1547 {
1548     assert(!(im & 0xfffffff0));
1549     ii(ARM_DMB|im);
1550 }
1551
1552 static void
1553 _tdmb(jit_state_t *_jit, int im)
1554 {
1555     jit_thumb_t thumb;
1556     assert(!(im & 0xfffffff0));
1557     thumb.i = THUMB2_DMB | im;
1558     iss(thumb.s[0], thumb.s[1]);
1559 }
1560
1561 static void
1562 _nop(jit_state_t *_jit, jit_int32_t i0)
1563 {
1564     if (jit_thumb_p()) {
1565         for (; i0 > 0; i0 -= 2)
1566             T1_NOP();
1567     }
1568     else {
1569         for (; i0 > 0; i0 -= 4)
1570             NOP();
1571     }
1572     assert(i0 == 0);
1573 }
1574
1575 static void
1576 _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1577 {
1578     if (r0 != r1) {
1579         if (jit_thumb_p())
1580             T1_MOV(r0, r1);
1581         else
1582             MOV(r0, r1);
1583     }
1584 }
1585
1586 static void
1587 _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1588 {
1589     int                 i;
1590     if (jit_thumb_p()) {
1591         if (!jit_no_set_flags() && r0 < 8 && !(i0 & 0xffffff80))
1592             T1_MOVI(r0, i0);
1593         else if ((i = encode_thumb_immediate(i0)) != -1)
1594             T2_MOVI(r0, i);
1595         else if ((i = encode_thumb_immediate(~i0)) != -1)
1596             T2_MVNI(r0, i);
1597         else {
1598             T2_MOVWI(r0, (jit_uint16_t)i0);
1599             if (i0 & 0xffff0000)
1600                 T2_MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
1601         }
1602     }
1603     else {
1604         if (jit_armv6_p() && !(i0 & 0xffff0000))
1605             MOVWI(r0, i0);
1606         else if ((i = encode_arm_immediate(i0)) != -1)
1607             MOVI(r0, i);
1608         else if ((i = encode_arm_immediate(~i0)) != -1)
1609             MVNI(r0, i);
1610         else if (jit_armv6_p()) {
1611             MOVWI(r0, (jit_uint16_t)(i0));
1612             if ((i0 & 0xffff0000))
1613                 MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
1614         }
1615         else
1616             load_const(0, r0, i0);
1617     }
1618 }
1619
1620 static jit_word_t
1621 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1622 {
1623     jit_word_t          w;
1624     w = _jit->pc.w;
1625     if (jit_thumb_p()) {
1626         T2_MOVWI(r0, (jit_uint16_t)(i0));
1627         T2_MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
1628     }
1629     else
1630         load_const(1, r0, 0);
1631     return (w);
1632 }
1633
1634 static void
1635 _movznr(jit_state_t *_jit, int ct, jit_int32_t r0,
1636         jit_int32_t r1, jit_int32_t r2)
1637 {
1638     if (jit_thumb_p()) {
1639         if (r2 < 7)
1640             T1_CMPI(r2, 0);
1641         else
1642             T2_CMPI(r2, 0);
1643         IT(ct);
1644         T1_MOV(r0, r1);
1645     } else {
1646         CMPI(r2, 0);
1647         CC_MOV(ct, r0, r1);
1648     }
1649 }
1650
1651 static void
1652 _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1653 {
1654     _movznr(_jit, ARM_CC_NE, r0, r1, r2);
1655 }
1656
1657 static void
1658 _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1659 {
1660     _movznr(_jit, ARM_CC_EQ, r0, r1, r2);
1661 }
1662
1663 static void
1664 _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1665       jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
1666 {
1667     jit_int32_t         r1_reg, iscasi;
1668     jit_word_t          retry, done, jump0, jump1;
1669     if (!jit_armv7_p())
1670         fallback_casx(r0, r1, r2, r3, i0);
1671     else {
1672         if ((iscasi = (r1 == _NOREG))) {
1673             r1_reg = jit_get_reg(jit_class_gpr);
1674             r1 = rn(r1_reg);
1675             movi(r1, i0);
1676         }
1677         if (jit_thumb_p()) {
1678             T2_DMB(DMB_ISH);
1679             /* retry: */
1680             retry = _jit->pc.w;
1681             T2_LDREX(r0, r1, 0);
1682             eqr(r0, r0, r2);
1683             jump0 = beqi(_jit->pc.w, r0, 0);    /* beqi done r0 0 */
1684             T2_STREX(r0, r3, r1, 0);
1685             jump1 = bnei(_jit->pc.w, r0, 0);    /* bnei retry r0 0 */
1686             /* r0 = 0 if memory updated, 1 otherwise */
1687             xori(r0, r0, 1);
1688             /* done: */
1689             done = _jit->pc.w;
1690             T2_DMB(DMB_ISH);
1691         }
1692         else {
1693             DMB(DMB_ISH);
1694             /* retry: */
1695             retry = _jit->pc.w;
1696             LDREX(r0, r1);
1697             eqr(r0, r0, r2);
1698             jump0 = beqi(_jit->pc.w, r0, 0);    /* beqi done r0 0 */
1699             STREX(r0, r3, r1);
1700             jump1 = bnei(_jit->pc.w, r0, 0);    /* bnei retry r0 0 */
1701             /* r0 = 0 if memory updated, 1 otherwise */
1702             xori(r0, r0, 1);
1703             /* done: */
1704             done = _jit->pc.w;
1705             DMB(DMB_ISH);
1706         }
1707         patch_at(arm_patch_jump, jump0, done);
1708         patch_at(arm_patch_jump, jump1, retry);
1709         if (iscasi)
1710             jit_unget_reg(r1_reg);
1711     }
1712 }
1713
1714 static void
1715 _comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1716 {
1717     if (jit_thumb_p()) {
1718         if (!jit_no_set_flags() && (r0|r1) < 8)
1719             T1_NOT(r0, r1);
1720         else
1721             T2_NOT(r0, r1);
1722     }
1723     else
1724         NOT(r0, r1);
1725 }
1726
1727 static void
1728 _negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1729 {
1730     if (jit_thumb_p()) {
1731         if (!jit_no_set_flags() && (r0|r1) < 8)
1732             T1_RSBI(r0, r1);
1733         else
1734             T2_RSBI(r0, r1, 0);
1735     }
1736     else
1737         RSBI(r0, r1, 0);
1738 }
1739
1740 static void
1741 _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1742 {
1743     if (jit_thumb_p()) {
1744         if (!jit_no_set_flags() && (r0|r1|r2) < 8)
1745             T1_ADD(r0, r1, r2);
1746         else if (r0 == r1 || r0 == r2)
1747             T1_ADDX(r0, r0 == r1 ? r2 : r1);
1748         else
1749             T2_ADD(r0, r1, r2);
1750     }
1751     else
1752         ADD(r0, r1, r2);
1753 }
1754
1755 static void
1756 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1757 {
1758     int                 i;
1759     jit_int32_t         reg;
1760     if (jit_thumb_p()) {
1761         if (!jit_no_set_flags() && (r0|r1) < 8 && !(i0 & ~7))
1762             T1_ADDI3(r0, r1, i0);
1763         else if (!jit_no_set_flags() && (r0|r1) < 8 && !(-i0 & ~7))
1764             T1_SUBI3(r0, r1, -i0);
1765         else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(i0 & ~0xff))
1766             T1_ADDI8(r0, i0);
1767         else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
1768             T1_SUBI8(r0, -i0);
1769         else if ((i = encode_thumb_immediate(i0)) != -1)
1770             T2_ADDI(r0, r1, i);
1771         else if ((i = encode_thumb_immediate(-i0)) != -1)
1772             T2_SUBI(r0, r1, i);
1773         else if ((i = encode_thumb_word_immediate(i0)) != -1)
1774             T2_ADDWI(r0, r1, i);
1775         else if ((i = encode_thumb_word_immediate(-i0)) != -1)
1776             T2_SUBWI(r0, r1, i);
1777         else {
1778             reg = jit_get_reg(jit_class_gpr);
1779             movi(rn(reg), i0);
1780             T2_ADD(r0, r1, rn(reg));
1781             jit_unget_reg(reg);
1782         }
1783     }
1784     else {
1785         if ((i = encode_arm_immediate(i0)) != -1)
1786             ADDI(r0, r1, i);
1787         else if ((i = encode_arm_immediate(-i0)) != -1)
1788             SUBI(r0, r1, i);
1789         else if (r0 != r1) {
1790             movi(r0, i0);
1791             ADD(r0, r1, r0);
1792         }
1793         else {
1794             reg = jit_get_reg(jit_class_gpr);
1795             movi(rn(reg), i0);
1796             ADD(r0, r1, rn(reg));
1797             jit_unget_reg(reg);
1798         }
1799     }
1800 }
1801
1802 static void
1803 _addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1804 {
1805     if (jit_thumb_p()) {
1806         /* thumb auto set carry if not inside IT block */
1807         if ((r0|r1|r2) < 8)
1808             T1_ADD(r0, r1, r2);
1809         else
1810             T2_ADDS(r0, r1, r2);
1811     }
1812     else
1813         ADDS(r0, r1, r2);
1814 }
1815
1816 static void
1817 _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1818 {
1819     int                 i;
1820     jit_int32_t         reg;
1821     if (jit_thumb_p()) {
1822         if ((r0|r1) < 8 && !(i0 & ~7))
1823             T1_ADDI3(r0, r1, i0);
1824         else if ((r0|r1) < 8 && !(-i0 & ~7))
1825             T1_SUBI3(r0, r1, -i0);
1826         else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
1827             T1_ADDI8(r0, i0);
1828         else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
1829             T1_SUBI8(r0, -i0);
1830         else if ((i = encode_thumb_immediate(i0)) != -1)
1831             T2_ADDSI(r0, r1, i);
1832         else if ((i = encode_thumb_immediate(-i0)) != -1)
1833             T2_SUBSI(r0, r1, i);
1834         else {
1835             reg = jit_get_reg(jit_class_gpr);
1836             movi(rn(reg), i0);
1837             T2_ADDS(r0, r1, rn(reg));
1838             jit_unget_reg(reg);
1839         }
1840     }
1841     else {
1842         if ((i = encode_arm_immediate(i0)) != -1)
1843             ADDSI(r0, r1, i);
1844         else if ((i = encode_arm_immediate(-i0)) != -1)
1845             SUBSI(r0, r1, i);
1846         else if (r0 != r1) {
1847             movi(r0, i0);
1848             ADDS(r0, r1, r0);
1849         }
1850         else {
1851             reg = jit_get_reg(jit_class_gpr);
1852             movi(rn(reg), i0);
1853             ADDS(r0, r1, rn(reg));
1854             jit_unget_reg(reg);
1855         }
1856     }
1857 }
1858
1859 static void
1860 _addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1861 {
1862     /* keep setting carry because don't know last ADC */
1863     if (jit_thumb_p()) {
1864         /* thumb auto set carry if not inside IT block */
1865         if ((r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
1866             T1_ADC(r0, r0 == r1 ? r2 : r1);
1867         else
1868             T2_ADCS(r0, r1, r2);
1869     }
1870     else
1871         ADCS(r0, r1, r2);
1872 }
1873
1874 static void
1875 _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1876 {
1877     int                 i;
1878     jit_int32_t         reg;
1879     int                 no_set_flags;
1880     if (jit_thumb_p()) {
1881         no_set_flags = jit_no_set_flags();
1882         jit_no_set_flags() = 1;
1883         if ((i = encode_thumb_immediate(i0)) != -1)
1884             T2_ADCSI(r0, r1, i);
1885         else if ((i = encode_thumb_immediate(-i0)) != -1)
1886             T2_SBCSI(r0, r1, i);
1887         else if (r0 != r1) {
1888             movi(r0, i0);
1889             T2_ADCS(r0, r1, r0);
1890         }
1891         else {
1892             reg = jit_get_reg(jit_class_gpr);
1893             movi(rn(reg), i0);
1894             T2_ADCS(r0, r1, rn(reg));
1895             jit_unget_reg(reg);
1896         }
1897         jit_no_set_flags() = no_set_flags;
1898     }
1899     else {
1900         if ((i = encode_arm_immediate(i0)) != -1)
1901             ADCSI(r0, r1, i);
1902         else if ((i = encode_arm_immediate(-i0)) != -1)
1903             SBCSI(r0, r1, i);
1904         else if (r0 != r1) {
1905             movi(r0, i0);
1906             ADCS(r0, r1, r0);
1907         }
1908         else {
1909             reg = jit_get_reg(jit_class_gpr);
1910             movi(rn(reg), i0);
1911             ADCS(r0, r1, rn(reg));
1912             jit_unget_reg(reg);
1913         }
1914     }
1915 }
1916
1917 static void
1918 _subr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1919 {
1920     if (jit_thumb_p()) {
1921         if (!jit_no_set_flags() && (r0|r1|r2) < 8)
1922             T1_SUB(r0, r1, r2);
1923         else
1924             T2_SUB(r0, r1, r2);
1925     }
1926     else
1927         SUB(r0, r1, r2);
1928 }
1929
1930 static void
1931 _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1932 {
1933     int                 i;
1934     jit_int32_t         reg;
1935     if (jit_thumb_p()) {
1936         if (!jit_no_set_flags() && (r0|r1) < 8 && !(i0 & ~7))
1937             T1_SUBI3(r0, r1, i0);
1938         else if (!jit_no_set_flags() && (r0|r1) < 8 && !(-i0 & ~7))
1939             T1_ADDI3(r0, r1, -i0);
1940         else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(i0 & ~0xff))
1941             T1_SUBI8(r0, i0);
1942         else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
1943             T1_ADDI8(r0, -i0);
1944         else if ((i = encode_thumb_immediate(i0)) != -1)
1945             T2_SUBI(r0, r1, i);
1946         else if ((i = encode_thumb_immediate(-i0)) != -1)
1947             T2_ADDI(r0, r1, i);
1948         else if ((i = encode_thumb_word_immediate(i0)) != -1)
1949             T2_SUBWI(r0, r1, i);
1950         else if ((i = encode_thumb_word_immediate(-i0)) != -1)
1951             T2_ADDWI(r0, r1, i);
1952         else {
1953             reg = jit_get_reg(jit_class_gpr);
1954             movi(rn(reg), i0);
1955             T2_SUB(r0, r1, rn(reg));
1956             jit_unget_reg(reg);
1957         }
1958     }
1959     else {
1960         if ((i = encode_arm_immediate(i0)) != -1)
1961             SUBI(r0, r1, i);
1962         else if ((i = encode_arm_immediate(-i0)) != -1)
1963             ADDI(r0, r1, i);
1964         else if (r0 != r1) {
1965             movi(r0, i0);
1966             SUB(r0, r1, r0);
1967         }
1968         else {
1969             reg = jit_get_reg(jit_class_gpr);
1970             movi(rn(reg), i0);
1971             SUB(r0, r1, rn(reg));
1972             jit_unget_reg(reg);
1973         }
1974     }
1975 }
1976
1977 static void
1978 _subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1979 {
1980     if (jit_thumb_p()) {
1981         /* thumb auto set carry if not inside IT block */
1982         if ((r0|r1|r2) < 8)
1983             T1_SUB(r0, r1, r2);
1984         else
1985             T2_SUBS(r0, r1, r2);
1986     }
1987     else
1988         SUBS(r0, r1, r2);
1989 }
1990
1991 static void
1992 _subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1993 {
1994     int                 i;
1995     jit_int32_t         reg;
1996     if (jit_thumb_p()) {
1997         if ((r0|r1) < 8 && !(i0 & ~7))
1998             T1_SUBI3(r0, r1, i0);
1999         else if ((r0|r1) < 8 && !(-i0 & ~7))
2000             T1_ADDI3(r0, r1, -i0);
2001         else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
2002             T1_SUBI8(r0, i0);
2003         else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
2004             T1_ADDI8(r0, -i0);
2005         else if ((i = encode_thumb_immediate(i0)) != -1)
2006             T2_SUBSI(r0, r1, i);
2007         else if ((i = encode_thumb_immediate(-i0)) != -1)
2008             T2_ADDSI(r0, r1, i);
2009         else {
2010             reg = jit_get_reg(jit_class_gpr);
2011             movi(rn(reg), i0);
2012             T2_SUBS(r0, r1, rn(reg));
2013             jit_unget_reg(reg);
2014         }
2015     }
2016     else {
2017         if ((i = encode_arm_immediate(i0)) != -1)
2018             SUBSI(r0, r1, i);
2019         else if ((i = encode_arm_immediate(-i0)) != -1)
2020             ADDSI(r0, r1, i);
2021         else if (r0 != r1) {
2022             movi(r0, i0);
2023             SUBS(r0, r1, r0);
2024         }
2025         else {
2026             reg = jit_get_reg(jit_class_gpr);
2027             movi(rn(reg), i0);
2028             SUBS(r0, r1, rn(reg));
2029             jit_unget_reg(reg);
2030         }
2031     }
2032 }
2033
2034 static void
2035 _subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2036 {
2037     /* keep setting carry because don't know last SBC */
2038     if (jit_thumb_p()) {
2039         /* thumb auto set carry if not inside IT block */
2040         if ((r0|r1|r2) < 8 && r0 == r1)
2041             T1_SBC(r0, r2);
2042         else
2043             T2_SBCS(r0, r1, r2);
2044     }
2045     else
2046         SBCS(r0, r1, r2);
2047 }
2048
2049 static void
2050 _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2051 {
2052     int                 i;
2053     jit_int32_t         reg;
2054     int                 no_set_flags;
2055     if (jit_thumb_p()) {
2056         no_set_flags = jit_no_set_flags();
2057         jit_no_set_flags() = 1;
2058         if ((i = encode_arm_immediate(i0)) != -1)
2059             T2_SBCSI(r0, r1, i);
2060         else if ((i = encode_arm_immediate(-i0)) != -1)
2061             T2_ADCSI(r0, r1, i);
2062         else if (r0 != r1) {
2063             movi(r0, i0);
2064             T2_SBCS(r0, r1, r0);
2065         }
2066         else {
2067             reg = jit_get_reg(jit_class_gpr);
2068             movi(rn(reg), i0);
2069             SBCS(r0, r1, rn(reg));
2070             jit_unget_reg(reg);
2071         }
2072         jit_no_set_flags() = no_set_flags;
2073     }
2074     else {
2075         if ((i = encode_arm_immediate(i0)) != -1)
2076             SBCSI(r0, r1, i);
2077         else if ((i = encode_arm_immediate(-i0)) != -1)
2078             ADCSI(r0, r1, i);
2079         else if (r0 != r1) {
2080             movi(r0, i0);
2081             SBCS(r0, r1, r0);
2082         }
2083         else {
2084             reg = jit_get_reg(jit_class_gpr);
2085             movi(rn(reg), i0);
2086             SBCS(r0, r1, rn(reg));
2087             jit_unget_reg(reg);
2088         }
2089     }
2090 }
2091
2092 static void
2093 _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2094 {
2095     subi(r0, r1, i0);
2096     negr(r0, r0);
2097 }
2098
2099 static void
2100 _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2101 {
2102     jit_int32_t         reg;
2103     if (jit_thumb_p()) {
2104         if (!jit_no_set_flags() && r0 == r2 && (r0|r1) < 8)
2105             T1_MUL(r0, r1);
2106         else if (!jit_no_set_flags() && r0 == r1 && (r0|r2) < 8)
2107             T1_MUL(r0, r2);
2108         else
2109             T2_MUL(r0, r1, r2);
2110     }
2111     else {
2112         if (r0 == r1 && !jit_armv6_p()) {
2113             if (r0 != r2)
2114                 MUL(r0, r2, r1);
2115             else {
2116                 reg = jit_get_reg(jit_class_gpr);
2117                 MOV(rn(reg), r1);
2118                 MUL(r0, rn(reg), r2);
2119                 jit_unget_reg(reg);
2120             }
2121         }
2122         else
2123             MUL(r0, r1, r2);
2124     }
2125 }
2126
2127 static void
2128 _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2129 {
2130     jit_int32_t         reg;
2131     reg = jit_get_reg(jit_class_gpr);
2132     movi(rn(reg), i0);
2133     mulr(r0, r1, rn(reg));
2134     jit_unget_reg(reg);
2135 }
2136
2137 static void
2138 _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2139         jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
2140 {
2141     jit_int32_t         reg;
2142     if (jit_thumb_p()) {
2143         if (r2 == r3) {
2144             reg = jit_get_reg(jit_class_gpr);
2145             movr(rn(reg), r2);
2146             if (sign)
2147                 T2_SMULL(r0, r1, rn(reg), r2);
2148             else
2149                 T2_UMULL(r0, r1, rn(reg), r2);
2150             jit_unget_reg(reg);
2151         }
2152         else if (r0 != r2 && r1 != r2) {
2153             if (sign)
2154                 T2_SMULL(r0, r1, r2, r3);
2155             else
2156                 T2_UMULL(r0, r1, r2, r3);
2157         }
2158         else {
2159             if (sign)
2160                 T2_SMULL(r0, r1, r3, r2);
2161             else
2162                 T2_UMULL(r0, r1, r3, r2);
2163         }
2164     }
2165     else {
2166         if (r2 == r3) {
2167             reg = jit_get_reg(jit_class_gpr);
2168             movr(rn(reg), r2);
2169             if (sign)
2170                 SMULL(r0, r1, rn(reg), r2);
2171             else
2172                 UMULL(r0, r1, rn(reg), r2);
2173             jit_unget_reg(reg);
2174         }
2175         else if (r0 != r2 && r1 != r2) {
2176             if (sign)
2177                 SMULL(r0, r1, r2, r3);
2178             else
2179                 UMULL(r0, r1, r2, r3);
2180         }
2181         else {
2182             if (sign)
2183                 SMULL(r0, r1, r3, r2);
2184             else
2185                 UMULL(r0, r1, r3, r2);
2186         }
2187     }
2188 }
2189
2190 static void
2191 _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2192         jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
2193 {
2194     jit_int32_t         reg;
2195     reg = jit_get_reg(jit_class_gpr);
2196     movi(rn(reg), i0);
2197     iqmulr(r0, r1, r2, rn(reg), sign);
2198     jit_unget_reg(reg);
2199 }
2200
2201 static void
2202 _divrem(jit_state_t *_jit, int div, int sign,
2203         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2204 {
2205     jit_word_t          d;
2206     jit_word_t          w;
2207     jit_get_reg_args();
2208     movr(_R0_REGNO, r1);
2209     movr(_R1_REGNO, r2);
2210     if (sign)                   w = (jit_word_t)__aeabi_idivmod;
2211     else                        w = (jit_word_t)__aeabi_uidivmod;
2212     if (!jit_exchange_p()) {
2213         if (jit_thumb_p())      d = ((w - _jit->pc.w) >> 1) - 2;
2214         else                    d = ((w - _jit->pc.w) >> 2) - 2;
2215         if (_s24P(d)) {
2216             if (jit_thumb_p())  T2_BLI(encode_thumb_jump(d));
2217             else                BLI(d & 0x00ffffff);
2218         }
2219         else                    goto fallback;
2220     }
2221     else {
2222     fallback:
2223         movi(_R2_REGNO, w);
2224         if (jit_thumb_p())      T1_BLX(_R2_REGNO);
2225         else                    BLX(_R2_REGNO);
2226     }
2227     if (div)                    movr(r0, _R0_REGNO);
2228     else                        movr(r0, _R1_REGNO);
2229     jit_unget_reg_args();
2230 }
2231
2232 static void
2233 _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2234 {
2235     if (jit_armv7r_p() && jit_thumb_p())
2236         T2_SDIV(r0, r1, r2);
2237     else
2238         divrem(1, 1, r0, r1, r2);
2239 }
2240
2241 static void
2242 _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2243 {
2244     jit_int32_t         reg;
2245     reg = jit_get_reg(jit_class_gpr);
2246     movi(rn(reg), i0);
2247     divr(r0, r1, rn(reg));
2248     jit_unget_reg(reg);
2249 }
2250
2251 static void
2252 _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2253 {
2254     if (jit_armv7r_p() && jit_thumb_p())
2255         T2_UDIV(r0, r1, r2);
2256     else
2257         divrem(1, 0, r0, r1, r2);
2258 }
2259
2260 static void
2261 _divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2262 {
2263     jit_int32_t         reg;
2264     reg = jit_get_reg(jit_class_gpr);
2265     movi(rn(reg), i0);
2266     divr_u(r0, r1, rn(reg));
2267     jit_unget_reg(reg);
2268 }
2269
2270 static void
2271 _iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2272         jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
2273 {
2274     jit_word_t          d;
2275     jit_word_t          w;
2276     jit_get_reg_args();
2277     movr(_R0_REGNO, r2);
2278     movr(_R1_REGNO, r3);
2279     if (sign)                   w = (jit_word_t)__aeabi_idivmod;
2280     else                        w = (jit_word_t)__aeabi_uidivmod;
2281     if (!jit_exchange_p()) {
2282         if (jit_thumb_p())      d = ((w - _jit->pc.w) >> 1) - 2;
2283         else                    d = ((w - _jit->pc.w) >> 2) - 2;
2284         if (_s24P(d)) {
2285             if (jit_thumb_p())  T2_BLI(encode_thumb_jump(d));
2286             else                BLI(d & 0x00ffffff);
2287         }
2288         else                    goto fallback;
2289     }
2290     else {
2291     fallback:
2292         movi(_R2_REGNO, w);
2293         if (jit_thumb_p())      T1_BLX(_R2_REGNO);
2294         else                    BLX(_R2_REGNO);
2295     }
2296     movr(r0, _R0_REGNO);
2297     movr(r1, _R1_REGNO);
2298     jit_unget_reg_args();
2299 }
2300
2301 static void
2302 _iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2303         jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
2304 {
2305     jit_int32_t         reg;
2306     reg = jit_get_reg(jit_class_gpr);
2307     movi(rn(reg), i0);
2308     iqdivr(r0, r1, r2, rn(reg), sign);
2309     jit_unget_reg(reg);
2310 }
2311
2312 static void
2313 _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2314 {
2315     divrem(0, 1, r0, r1, r2);
2316 }
2317
2318 static void
2319 _remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2320 {
2321     jit_int32_t         reg;
2322     reg = jit_get_reg(jit_class_gpr);
2323     movi(rn(reg), i0);
2324     remr(r0, r1, rn(reg));
2325     jit_unget_reg(reg);
2326 }
2327
2328 static void
2329 _remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2330 {
2331     divrem(0, 0, r0, r1, r2);
2332 }
2333
2334 static void
2335 _remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2336 {
2337     jit_int32_t         reg;
2338     reg = jit_get_reg(jit_class_gpr);
2339     movi(rn(reg), i0);
2340     remr_u(r0, r1,rn(reg));
2341     jit_unget_reg(reg);
2342 }
2343
2344 static void
2345 _andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2346 {
2347     if (jit_thumb_p()) {
2348         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
2349             T1_AND(r0, r0 == r1 ? r2 : r1);
2350         else
2351             T2_AND(r0, r1, r2);
2352     }
2353     else
2354         AND(r0, r1, r2);
2355 }
2356
2357 static void
2358 _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2359 {
2360     int                 i;
2361     jit_int32_t         reg;
2362     if (jit_thumb_p()) {
2363         if ((i = encode_thumb_immediate(i0)) != -1)
2364             T2_ANDI(r0, r1, i);
2365         else if ((i = encode_thumb_immediate(~i0)) != -1)
2366             T2_BICI(r0, r1, i);
2367         else if (r0 != r1) {
2368             movi(r0, i0);
2369             T2_AND(r0, r1, r0);
2370         }
2371         else {
2372             reg = jit_get_reg(jit_class_gpr);
2373             movi(rn(reg), i0);
2374             T2_AND(r0, r1, rn(reg));
2375             jit_unget_reg(reg);
2376         }
2377     }
2378     else {
2379         if ((i = encode_arm_immediate(i0)) != -1)
2380             ANDI(r0, r1, i);
2381         else if ((i = encode_arm_immediate(~i0)) != -1)
2382             BICI(r0, r1, i);
2383         else if (r0 != r1) {
2384             movi(r0, i0);
2385             AND(r0, r1, r0);
2386         }
2387         else {
2388             reg = jit_get_reg(jit_class_gpr);
2389             movi(rn(reg), i0);
2390             AND(r0, r1, rn(reg));
2391             jit_unget_reg(reg);
2392         }
2393     }
2394 }
2395
2396 static void
2397 _orr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2398 {
2399     if (jit_thumb_p()) {
2400         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
2401             T1_ORR(r0, r0 == r1 ? r2 : r1);
2402         else
2403             T2_ORR(r0, r1, r2);
2404     }
2405     else
2406         ORR(r0, r1, r2);
2407 }
2408
2409 static void
2410 _ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2411 {
2412     int                 i;
2413     jit_int32_t         reg;
2414     if (jit_thumb_p()) {
2415         if ((i = encode_thumb_immediate(i0)) != -1)
2416             T2_ORRI(r0, r1, i);
2417         else if (r0 != r1) {
2418             movi(r0, i0);
2419             T2_ORR(r0, r1, r0);
2420         }
2421         else {
2422             reg = jit_get_reg(jit_class_gpr);
2423             movi(rn(reg), i0);
2424             T2_ORR(r0, r1, rn(reg));
2425             jit_unget_reg(reg);
2426         }
2427     }
2428     else {
2429         if ((i = encode_arm_immediate(i0)) != -1)
2430             ORRI(r0, r1, i);
2431         else if (r0 != r1) {
2432             movi(r0, i0);
2433             ORR(r0, r1, r0);
2434         }
2435         else {
2436             reg = jit_get_reg(jit_class_gpr);
2437             movi(rn(reg), i0);
2438             ORR(r0, r1, rn(reg));
2439             jit_unget_reg(reg);
2440         }
2441     }
2442 }
2443
2444 static void
2445 _xorr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2446 {
2447     if (jit_thumb_p()) {
2448         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
2449             T1_EOR(r0, r0 == r1 ? r2 : r1);
2450         else
2451             T2_EOR(r0, r1, r2);
2452     }
2453     else
2454         EOR(r0, r1, r2);
2455 }
2456
2457 static void
2458 _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2459 {
2460     int                 i;
2461     jit_int32_t         reg;
2462     if (jit_thumb_p()) {
2463         if ((i = encode_thumb_immediate(i0)) != -1)
2464             T2_EORI(r0, r1, i);
2465         else if (r0 != r1) {
2466             movi(r0, i0);
2467             T2_EOR(r0, r1, r0);
2468         }
2469         else {
2470             reg = jit_get_reg(jit_class_gpr);
2471             movi(rn(reg), i0);
2472             T2_EOR(r0, r1, rn(reg));
2473             jit_unget_reg(reg);
2474         }
2475     }
2476     else {
2477         if ((i = encode_arm_immediate(i0)) != -1)
2478             EORI(r0, r1, i);
2479         else if (r0 != r1) {
2480             movi(r0, i0);
2481             EOR(r0, r1, r0);
2482         }
2483         else {
2484             reg = jit_get_reg(jit_class_gpr);
2485             movi(rn(reg), i0);
2486             EOR(r0, r1, rn(reg));
2487             jit_unget_reg(reg);
2488         }
2489     }
2490 }
2491
2492 static void
2493 _lshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2494 {
2495     if (jit_thumb_p()) {
2496         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
2497             T1_LSL(r0, r2);
2498         else
2499             T2_LSL(r0, r1, r2);
2500     }
2501     else
2502         LSL(r0, r1, r2);
2503 }
2504
2505 static void
2506 _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2507 {
2508     assert(i0 >= 0 && i0 <= 31);
2509     if (i0 == 0)
2510         movr(r0, r1);
2511     else if (jit_thumb_p()) {
2512         if (!jit_no_set_flags() && (r0|r1) < 8)
2513             T1_LSLI(r0, r1, i0);
2514         else
2515             T2_LSLI(r0, r1, i0);
2516     }
2517     else
2518         LSLI(r0, r1, i0);
2519 }
2520
2521 static void
2522 _rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2523 {
2524     if (jit_thumb_p()) {
2525         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
2526             T1_ASR(r0, r2);
2527         else
2528             T2_ASR(r0, r1, r2);
2529     }
2530     else
2531         ASR(r0, r1, r2);
2532 }
2533
2534 static void
2535 _rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2536 {
2537     assert(i0 >= 0 && i0 <= 31);
2538     if (i0 == 0)
2539         movr(r0, r1);
2540     else if (jit_thumb_p()) {
2541         if (!jit_no_set_flags() && (r0|r1) < 8)
2542             T1_ASRI(r0, r1, i0);
2543         else
2544             T2_ASRI(r0, r1, i0);
2545     }
2546     else
2547         ASRI(r0, r1, i0);
2548 }
2549
2550 static void
2551 _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2552 {
2553     if (jit_thumb_p()) {
2554         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
2555             T1_LSR(r0, r2);
2556         else
2557             T2_LSR(r0, r1, r2);
2558     }
2559     else
2560         LSR(r0, r1, r2);
2561 }
2562
2563 static void
2564 _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2565 {
2566     assert(i0 >= 0 && i0 <= 31);
2567     if (i0 == 0)
2568         movr(r0, r1);
2569     else if (jit_thumb_p()) {
2570         if (!jit_no_set_flags() && (r0|r1) < 8)
2571             T1_LSRI(r0, r1, i0);
2572         else
2573             T2_LSRI(r0, r1, i0);
2574     }
2575     else
2576         LSRI(r0, r1, i0);
2577 }
2578
2579 static void
2580 _ccr(jit_state_t *_jit, int ct, int cf,
2581      jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2582 {
2583     if (jit_thumb_p()) {
2584         assert((ct ^ cf) >> 28 == 1);
2585         if ((r1|r2) < 8)
2586             T1_CMP(r1, r2);
2587         else if ((r1&r2) & 8)
2588             T1_CMPX(r1, r2);
2589         else
2590             T2_CMP(r1, r2);
2591         ITE(ct);
2592         if (r0 < 8) {
2593             T1_MOVI(r0, 1);
2594             T1_MOVI(r0, 0);
2595         }
2596         else {
2597             T2_MOVI(r0, 1);
2598             T2_MOVI(r0, 0);
2599         }
2600     }
2601     else {
2602         CMP(r1, r2);
2603         CC_MOVI(ct, r0, 1);
2604         CC_MOVI(cf, r0, 0);
2605     }
2606 }
2607
2608 static void
2609 _cci(jit_state_t *_jit, int ct, int cf,
2610      jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2611 {
2612     int                 i;
2613     jit_int32_t         reg;
2614     if (jit_thumb_p()) {
2615         if (r1 < 7 && !(i0 & 0xffffff00))
2616             T1_CMPI(r1, i0);
2617         else if ((i = encode_thumb_immediate(i0)) != -1)
2618             T2_CMPI(r1, i);
2619         else if ((i = encode_thumb_immediate(-i0)) != -1)
2620             T2_CMNI(r1, i);
2621         else {
2622             reg = jit_get_reg(jit_class_gpr);
2623             movi(rn(reg), i0);
2624             ccr(ct, cf, r0, r1, rn(reg));
2625             jit_unget_reg(reg);
2626             return;
2627         }
2628         ITE(ct);
2629         if (r0 < 8) {
2630             T1_MOVI(r0, 1);
2631             T1_MOVI(r0, 0);
2632         }
2633         else {
2634             T2_MOVI(r0, 1);
2635             T2_MOVI(r0, 0);
2636         }
2637     }
2638     else {
2639         if ((i = encode_arm_immediate(i0)) != -1)
2640             CMPI(r1, i);
2641         else if ((i = encode_arm_immediate(-i0)) != -1)
2642             CMNI(r1, i);
2643         else if (r0 != r1) {
2644             movi(r0, i0);
2645             CMP(r1, r0);
2646         }
2647         else {
2648             reg = jit_get_reg(jit_class_gpr);
2649             movi(rn(reg), i0);
2650             CMP(r1, rn(reg));
2651             jit_unget_reg(reg);
2652         }
2653         CC_MOVI(ct, r0, 1);
2654         CC_MOVI(cf, r0, 0);
2655     }
2656 }
2657
2658 static void
2659 _ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2660 {
2661     if (jit_thumb_p())
2662         ccr(ARM_CC_NE, ARM_CC_EQ, r0, r1, r2);
2663     else {
2664         SUBS(r0, r1, r2);
2665         CC_MOVI(ARM_CC_NE, r0, 1);
2666     }
2667 }
2668
2669 static void
2670 _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2671 {
2672     int                 i;
2673     jit_int32_t         reg;
2674     if (jit_thumb_p())
2675         cci(ARM_CC_NE, ARM_CC_EQ, r0, r1, i0);
2676     else {
2677         if ((i = encode_arm_immediate(i0)) != -1)
2678             SUBSI(r0, r1, i);
2679         else if ((i = encode_arm_immediate(-i0)) != -1)
2680             ADDSI(r0, r1, i);
2681         else if (r0 != r1) {
2682             movi(r0, i0);
2683             SUBS(r0, r1, r0);
2684         }
2685         else {
2686             reg = jit_get_reg(jit_class_gpr);
2687             movi(rn(reg), i0);
2688             SUBS(r0, r1, rn(reg));
2689             jit_unget_reg(reg);
2690         }
2691         CC_MOVI(ARM_CC_NE, r0, 1);
2692     }
2693 }
2694
2695 static void
2696 _jmpr(jit_state_t *_jit, jit_int32_t r0)
2697 {
2698     if (jit_thumb_p())
2699         T1_MOV(_R15_REGNO, r0);
2700     else
2701         MOV(_R15_REGNO, r0);
2702 }
2703
2704 static void
2705 _jmpi(jit_state_t *_jit, jit_word_t i0)
2706 {
2707     jit_word_t          w;
2708     jit_word_t          d;
2709     jit_int32_t         reg;
2710     w = _jit->pc.w;
2711     /* if thumb and in thumb mode */
2712     if (jit_thumb_p() && _jitc->thumb) {
2713         d = ((i0 - w) >> 1) - 2;
2714         if (d >= -1024 && d <= 1023)
2715             T1_B(d & 0x7ff);
2716         else if (_s24P(d))
2717             T2_B(encode_thumb_jump(d));
2718         else {
2719             reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
2720             movi(rn(reg), i0);
2721             jmpr(rn(reg));
2722             jit_unget_reg(reg);
2723         }
2724     }
2725     else {
2726         d = ((i0 - w) >> 2) - 2;
2727         if (_s24P(d))
2728             B(d & 0x00ffffff);
2729         else {
2730             reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
2731             movi(rn(reg), i0);
2732             jmpr(rn(reg));
2733             jit_unget_reg(reg);
2734         }
2735     }
2736 }
2737
2738 static jit_word_t
2739 _jmpi_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
2740 {
2741     jit_word_t          w;
2742     jit_word_t          d;
2743     jit_int32_t         reg;
2744     if (i1) {
2745         /* Assume jump is not longer than 23 bits if inside jit */
2746         w = _jit->pc.w;
2747         /* if thumb and in thumb mode */
2748         if (jit_thumb_p() && _jitc->thumb) {
2749             d = ((i0 - w) >> 1) - 2;
2750             assert(_s24P(d));
2751             T2_B(encode_thumb_jump(d));
2752         }
2753         else {
2754             d = ((i0 - w) >> 2) - 2;
2755             assert(_s24P(d));
2756             B(d & 0x00ffffff);
2757         }
2758     }
2759     else {
2760         reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
2761         w = movi_p(rn(reg), i0);
2762         jmpr(rn(reg));
2763         jit_unget_reg(reg);
2764     }
2765     return (w);
2766 }
2767
2768 static jit_word_t
2769 _bccr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2770 {
2771     jit_word_t          w;
2772     jit_word_t          d;
2773     if (jit_thumb_p()) {
2774         if ((r0|r1) < 8)
2775             T1_CMP(r0, r1);
2776         else if ((r0&r1) & 8)
2777             T1_CMPX(r0, r1);
2778         else
2779             T2_CMP(r0, r1);
2780         /* use only thumb2 conditional as does not know if will be patched */
2781         w = _jit->pc.w;
2782         d = ((i0 - w) >> 1) - 2;
2783         assert(_s20P(d));
2784         T2_CC_B(cc, encode_thumb_cc_jump(d));
2785     }
2786     else {
2787         CMP(r0, r1);
2788         w = _jit->pc.w;
2789         d = ((i0 - w) >> 2) - 2;
2790         assert(_s24P(d));
2791         CC_B(cc, d & 0x00ffffff);
2792     }
2793     return (w);
2794 }
2795
2796 static jit_word_t
2797 _bcci(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2798 {
2799     jit_word_t          w;
2800     jit_word_t          d;
2801     int                 i;
2802     jit_int32_t         reg;
2803     if (jit_thumb_p()) {
2804         if (r0 < 7 && !(i1 & 0xffffff00))
2805             T1_CMPI(r0, i1);
2806         else if ((i = encode_thumb_immediate(i1)) != -1)
2807             T2_CMPI(r0, i);
2808         else if ((i = encode_thumb_immediate(-i1)) != -1)
2809             T2_CMNI(r0, i);
2810         else {
2811             reg = jit_get_reg(jit_class_gpr);
2812             movi(rn(reg), i1);
2813             T2_CMP(r0, rn(reg));
2814             jit_unget_reg(reg);
2815         }
2816         /* use only thumb2 conditional as does not know if will be patched */
2817         w = _jit->pc.w;
2818         d = ((i0 - w) >> 1) - 2;
2819         assert(_s20P(d));
2820         T2_CC_B(cc, encode_thumb_cc_jump(d));
2821     }
2822     else {
2823         if ((i = encode_arm_immediate(i1)) != -1)
2824             CMPI(r0, i);
2825         else if ((i = encode_arm_immediate(-i1)) != -1)
2826             CMNI(r0, i);
2827         else {
2828             reg = jit_get_reg(jit_class_gpr);
2829             movi(rn(reg), i1);
2830             CMP(r0, rn(reg));
2831             jit_unget_reg(reg);
2832         }
2833         w = _jit->pc.w;
2834         d = ((i0 - w) >> 2) - 2;
2835         assert(_s24P(d));
2836         CC_B(cc, d & 0x00ffffff);
2837     }
2838     return (w);
2839 }
2840
2841 static jit_word_t
2842 _baddr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2843 {
2844     jit_word_t          w;
2845     jit_word_t          d;
2846     if (jit_thumb_p()) {
2847         if ((r0|r1) < 8)
2848             T1_ADD(r0, r0, r1);
2849         else
2850             T2_ADDS(r0, r0, r1);
2851         w = _jit->pc.w;
2852         d = ((i0 - w) >> 1) - 2;
2853         assert(_s20P(d));
2854         T2_CC_B(cc, encode_thumb_cc_jump(d));
2855     }
2856     else {
2857         ADDS(r0, r0, r1);
2858         w = _jit->pc.w;
2859         d = ((i0 - w) >> 2) - 2;
2860         assert(_s24P(d));
2861         CC_B(cc, d & 0x00ffffff);
2862     }
2863     return (w);
2864 }
2865
2866 static jit_word_t
2867 _baddi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, int i1)
2868 {
2869     int                 i;
2870     jit_word_t          w;
2871     jit_word_t          d;
2872     jit_int32_t         reg;
2873     if (jit_thumb_p()) {
2874         if (r0 < 8 && !(i1 & ~7))
2875             T1_ADDI3(r0, r0, i1);
2876         else if (r0 < 8 && !(-i1 & ~7))
2877             T1_SUBI3(r0, r0, -i1);
2878         else if (r0 < 8 && !(i1 & ~0xff))
2879             T1_ADDI8(r0, i1);
2880         else if (r0 < 8 && !(-i1 & ~0xff))
2881             T1_SUBI8(r0, -i1);
2882         else if ((i = encode_thumb_immediate(i1)) != -1)
2883             T2_ADDSI(r0, r0, i);
2884         else if ((i = encode_thumb_immediate(-i1)) != -1)
2885             T2_SUBSI(r0, r0, i);
2886         else {
2887             reg = jit_get_reg(jit_class_gpr);
2888             movi(rn(reg), i1);
2889             T2_ADDS(r0, r0, rn(reg));
2890             jit_unget_reg(reg);
2891         }
2892         w = _jit->pc.w;
2893         d = ((i0 - w) >> 1) - 2;
2894         assert(_s20P(d));
2895         T2_CC_B(cc, encode_thumb_cc_jump(d));
2896     }
2897     else {
2898         if ((i = encode_arm_immediate(i1)) != -1)
2899             ADDSI(r0, r0, i);
2900         else if ((i = encode_arm_immediate(-i1)) != -1)
2901             SUBSI(r0, r0, i);
2902         else {
2903             reg = jit_get_reg(jit_class_gpr);
2904             movi(rn(reg), i1);
2905             ADDS(r0, r0, rn(reg));
2906             jit_unget_reg(reg);
2907         }
2908         w = _jit->pc.w;
2909         d = ((i0 - w) >> 2) - 2;
2910         assert(_s24P(d));
2911         CC_B(cc, d & 0x00ffffff);
2912     }
2913     return (w);
2914 }
2915
2916 static jit_word_t
2917 _bsubr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2918 {
2919     jit_word_t          w;
2920     jit_word_t          d;
2921     if (jit_thumb_p()) {
2922         if ((r0|r1) < 8)
2923             T1_SUB(r0, r0, r1);
2924         else
2925             T2_SUBS(r0, r0, r1);
2926         w = _jit->pc.w;
2927         d = ((i0 - w) >> 1) - 2;
2928         assert(_s20P(d));
2929         T2_CC_B(cc, encode_thumb_cc_jump(d));
2930     }
2931     else {
2932         SUBS(r0, r0, r1);
2933         w = _jit->pc.w;
2934         d = ((i0 - w) >> 2) - 2;
2935         assert(_s24P(d));
2936         CC_B(cc, d & 0x00ffffff);
2937     }
2938     return (w);
2939 }
2940
2941 static jit_word_t
2942 _bsubi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, int i1)
2943 {
2944     int                 i;
2945     jit_word_t          w;
2946     jit_word_t          d;
2947     jit_int32_t         reg;
2948     if (jit_thumb_p()) {
2949         if (r0 < 8 && !(i1 & ~7))
2950             T1_SUBI3(r0, r0, i1);
2951         else if (r0 < 8 && !(-i1 & ~7))
2952             T1_ADDI3(r0, r0, -i1);
2953         else if (r0 < 8 && !(i1 & ~0xff))
2954             T1_SUBI8(r0, i1);
2955         else if (r0 < 8 && !(-i1 & ~0xff))
2956             T1_ADDI8(r0, -i1);
2957         else if ((i = encode_thumb_immediate(i1)) != -1)
2958             T2_SUBSI(r0, r0, i);
2959         else if ((i = encode_thumb_immediate(-i1)) != -1)
2960             T2_SUBSI(r0, r0, i);
2961         else {
2962             reg = jit_get_reg(jit_class_gpr);
2963             movi(rn(reg), i1);
2964             T2_SUBS(r0, r0, rn(reg));
2965             jit_unget_reg(reg);
2966         }
2967         w = _jit->pc.w;
2968         d = ((i0 - w) >> 1) - 2;
2969         assert(_s20P(d));
2970         T2_CC_B(cc, encode_thumb_cc_jump(d));
2971     }
2972     else {
2973         if ((i = encode_arm_immediate(i1)) != -1)
2974             SUBSI(r0, r0, i);
2975         else if ((i = encode_arm_immediate(-i1)) != -1)
2976             ADDSI(r0, r0, i);
2977         else {
2978             reg = jit_get_reg(jit_class_gpr);
2979             movi(rn(reg), i1);
2980             SUBS(r0, r0, rn(reg));
2981             jit_unget_reg(reg);
2982         }
2983         w = _jit->pc.w;
2984         d = ((i0 - w) >> 2) - 2;
2985         assert(_s24P(d));
2986         CC_B(cc, d & 0x00ffffff);
2987     }
2988     return (w);
2989 }
2990
2991 static jit_word_t
2992 _bmxr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2993 {
2994     jit_word_t          w;
2995     jit_word_t          d;
2996     jit_int32_t         reg;
2997     if (jit_thumb_p()) {
2998         if ((r0|r1) < 8)
2999             T1_TST(r0, r1);
3000         else
3001             T2_TST(r0, r1);
3002         w = _jit->pc.w;
3003         d = ((i0 - w) >> 1) - 2;
3004         assert(_s20P(d));
3005         T2_CC_B(cc, encode_thumb_cc_jump(d));
3006     }
3007     else {
3008         if (jit_armv5_p())
3009             TST(r0, r1);
3010         else {
3011             reg = jit_get_reg(jit_class_gpr);
3012             ANDS(rn(reg), r0, r1);
3013             jit_unget_reg(reg);
3014         }
3015         w = _jit->pc.w;
3016         d = ((i0 - w) >> 2) - 2;
3017         assert(_s24P(d));
3018         CC_B(cc, d & 0x00ffffff);
3019     }
3020     return (w);
3021 }
3022
3023 static jit_word_t
3024 _bmxi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3025 {
3026     int                 i;
3027     jit_word_t          w;
3028     jit_word_t          d;
3029     jit_int32_t         reg;
3030     if (jit_thumb_p()) {
3031         if ((i = encode_thumb_immediate(i1)) != -1)
3032             T2_TSTI(r0, i);
3033         else {
3034             reg = jit_get_reg(jit_class_gpr);
3035             movi(rn(reg), i1);
3036             T2_TST(r0, rn(reg));
3037             jit_unget_reg(reg);
3038         }
3039         w = _jit->pc.w;
3040         d = ((i0 - w) >> 1) - 2;
3041         assert(_s20P(d));
3042         T2_CC_B(cc, encode_thumb_cc_jump(d));
3043     }
3044     else {
3045         if (jit_armv5_p()) {
3046             if ((i = encode_arm_immediate(i1)) != -1)
3047                 TSTI(r0, i);
3048             else {
3049                 reg = jit_get_reg(jit_class_gpr);
3050                 movi(rn(reg), i1);
3051                 TST(r0, rn(reg));
3052                 jit_unget_reg(reg);
3053             }
3054         }
3055         else {
3056             reg = jit_get_reg(jit_class_gpr);
3057             if ((i = encode_arm_immediate(i1)) != -1)
3058                 ANDSI(rn(reg), r0, i);
3059             else if ((i = encode_arm_immediate(~i1)) != -1)
3060                 BICSI(rn(reg), r0, i);
3061             else {
3062                 movi(rn(reg), i1);
3063                 ANDS(rn(reg), r0, rn(reg));
3064             }
3065             jit_unget_reg(reg);
3066         }
3067         w = _jit->pc.w;
3068         d = ((i0 - w) >> 2) - 2;
3069         assert(_s24P(d));
3070         CC_B(cc, d & 0x00ffffff);
3071     }
3072     return (w);
3073 }
3074
3075 static void
3076 _ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3077 {
3078     if (jit_thumb_p())
3079         T2_LDRSBI(r0, r1, 0);
3080     else
3081         LDRSBI(r0, r1, 0);
3082 }
3083
3084 static void
3085 _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3086 {
3087     jit_int32_t         reg;
3088     reg = jit_get_reg(jit_class_gpr);
3089     movi(rn(reg), i0);
3090     if (jit_thumb_p())
3091         T2_LDRSBI(r0, rn(reg), 0);
3092     else
3093         LDRSBI(r0, rn(reg), 0);
3094     jit_unget_reg(reg);
3095 }
3096
3097 static void
3098 _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3099 {
3100     if (jit_thumb_p()) {
3101         if ((r0|r1|r2) < 8)
3102             T1_LDRSB(r0, r1, r2);
3103         else
3104             T2_LDRSB(r0, r1, r2);
3105     }
3106     else
3107         LDRSB(r0, r1, r2);
3108 }
3109
3110 static void
3111 _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3112 {
3113     jit_int32_t         reg;
3114     if (jit_thumb_p()) {
3115         if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3116             T2_LDRSBI(r0, r1, i0);
3117         else if (i0 < 0 && i0 >= -255)
3118             T2_LDRSBIN(r0, r1, -i0);
3119         else if (i0 >= 0 && i0 <= 4095)
3120             T2_LDRSBWI(r0, r1, i0);
3121         else if (r0 != r1) {
3122             movi(r0, i0);
3123             if ((r0|r1) < 8)
3124                 T1_LDRSB(r0, r1, r0);
3125             else
3126                 T2_LDRSB(r0, r1, r0);
3127         }
3128         else {
3129             reg = jit_get_reg(jit_class_gpr);
3130             movi(rn(reg), i0);
3131             if ((r0|r1|rn(reg)) < 8)
3132                 T1_LDRSB(r0, r1, rn(reg));
3133             else
3134                 T2_LDRSB(r0, r1, rn(reg));
3135             jit_unget_reg(reg);
3136         }
3137     }
3138     else {
3139         if (i0 >= 0 && i0 <= 255)
3140             LDRSBI(r0, r1, i0);
3141         else if (i0 < 0 && i0 >= -255)
3142             LDRSBIN(r0, r1, -i0);
3143         else if (r0 != r1) {
3144             movi(r0, i0);
3145             LDRSB(r0, r1, r0);
3146         }
3147         else {
3148             reg = jit_get_reg(jit_class_gpr);
3149             movi(rn(reg), i0);
3150             LDRSB(r0, r1, rn(reg));
3151             jit_unget_reg(reg);
3152         }
3153     }
3154 }
3155
3156 static void
3157 _ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3158 {
3159     if (jit_thumb_p())
3160         T2_LDRBI(r0, r1, 0);
3161     else
3162         LDRBI(r0, r1, 0);
3163 }
3164
3165 static void
3166 _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3167 {
3168     jit_int32_t         reg;
3169     reg = jit_get_reg(jit_class_gpr);
3170     movi(rn(reg), i0);
3171     if (jit_thumb_p())
3172         T2_LDRBI(r0, rn(reg), 0);
3173     else
3174         LDRBI(r0, rn(reg), 0);
3175     jit_unget_reg(reg);
3176 }
3177
3178 static void
3179 _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3180 {
3181     if (jit_thumb_p()) {
3182         if ((r0|r1|r2) < 8)
3183             T1_LDRB(r0, r1, r2);
3184         else
3185             T2_LDRB(r0, r1, r2);
3186     }
3187     else
3188         LDRB(r0, r1, r2);
3189 }
3190
3191 static void
3192 _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3193 {
3194     jit_int32_t         reg;
3195     if (jit_thumb_p()) {
3196         if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20)
3197             T1_LDRBI(r0, r1, i0);
3198         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3199             T2_LDRBI(r0, r1, i0);
3200         else if (i0 < 0 && i0 >= -255)
3201             T2_LDRBIN(r0, r1, -i0);
3202         else if (i0 >= 0 && i0 <= 4095)
3203             T2_LDRBWI(r0, r1, i0);
3204         else if (r0 != r1) {
3205             movi(r0, i0);
3206             if ((r0|r1) < 8)
3207                 T1_LDRB(r0, r1, r0);
3208             else
3209                 T2_LDRB(r0, r1, r0);
3210         }
3211         else {
3212             reg = jit_get_reg(jit_class_gpr);
3213             movi(rn(reg), i0);
3214             if ((r0|r1|rn(reg)) < 8)
3215                 T1_LDRB(r0, r1, rn(reg));
3216             else
3217                 T2_LDRB(r0, r1, rn(reg));
3218             jit_unget_reg(reg);
3219         }
3220     }
3221     else {
3222         if (i0 >= 0 && i0 <= 4095)
3223             LDRBI(r0, r1, i0);
3224         else if (i0 < 0 && i0 >= -4095)
3225             LDRBIN(r0, r1, -i0);
3226         else if (r0 != r1) {
3227             movi(r0, i0);
3228             LDRB(r0, r1, r0);
3229         }
3230         else {
3231             reg = jit_get_reg(jit_class_gpr);
3232             movi(rn(reg), i0);
3233             LDRB(r0, r1, rn(reg));
3234             jit_unget_reg(reg);
3235         }
3236     }
3237 }
3238
3239 static void
3240 _ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3241 {
3242     if (jit_thumb_p())
3243         T2_LDRSHI(r0, r1, 0);
3244     else
3245         LDRSHI(r0, r1, 0);
3246 }
3247
3248 static void
3249 _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3250 {
3251     jit_int32_t         reg;
3252     reg = jit_get_reg(jit_class_gpr);
3253     movi(rn(reg), i0);
3254     if (jit_thumb_p())
3255         T2_LDRSHI(r0, rn(reg), 0);
3256     else
3257         LDRSHI(r0, rn(reg), 0);
3258     jit_unget_reg(reg);
3259 }
3260
3261 static void
3262 _ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3263 {
3264     if (jit_thumb_p()) {
3265         if ((r0|r1|r2) < 8)
3266             T1_LDRSH(r0, r1, r2);
3267         else
3268             T2_LDRSH(r0, r1, r2);
3269     }
3270     else
3271         LDRSH(r0, r1, r2);
3272 }
3273
3274 static void
3275 _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3276 {
3277     jit_int32_t         reg;
3278     if (jit_thumb_p()) {
3279         if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3280             T2_LDRSHI(r0, r1, i0);
3281         else if (i0 < 0 && i0 >= -255)
3282             T2_LDRSHIN(r0, r1, -i0);
3283         else if (i0 >= 0 && i0 <= 4095)
3284             T2_LDRSHWI(r0, r1, i0);
3285         else if (r0 != r1) {
3286             movi(r0, i0);
3287             if ((r0|r1) < 8)
3288                 T1_LDRSH(r0, r1, r0);
3289             else
3290                 T2_LDRSH(r0, r1, r0);
3291         }
3292         else {
3293             reg = jit_get_reg(jit_class_gpr);
3294             movi(rn(reg), i0);
3295             if ((r0|r1|rn(reg)) < 8)
3296                 T1_LDRSH(r0, r1, rn(reg));
3297             else
3298                 T2_LDRSH(r0, r1, rn(reg));
3299             jit_unget_reg(reg);
3300         }
3301     }
3302     else {
3303         if (i0 >= 0 && i0 <= 255)
3304             LDRSHI(r0, r1, i0);
3305         else if (i0 < 0 && i0 >= -255)
3306             LDRSHIN(r0, r1, -i0);
3307         else if (r0 != r1) {
3308             movi(r0, i0);
3309             LDRSH(r0, r1, r0);
3310         }
3311         else {
3312             reg = jit_get_reg(jit_class_gpr);
3313             movi(rn(reg), i0);
3314             LDRSH(r0, r1, rn(reg));
3315             jit_unget_reg(reg);
3316         }
3317     }
3318 }
3319
3320 static void
3321 _ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3322 {
3323     if (jit_thumb_p())
3324         T2_LDRHI(r0, r1, 0);
3325     else
3326         LDRHI(r0, r1, 0);
3327 }
3328
3329 static void
3330 _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3331 {
3332     jit_int32_t         reg;
3333     reg = jit_get_reg(jit_class_gpr);
3334     movi(rn(reg), i0);
3335     if (jit_thumb_p())
3336         T2_LDRHI(r0, rn(reg), 0);
3337     else
3338         LDRHI(r0, rn(reg), 0);
3339     jit_unget_reg(reg);
3340 }
3341
3342 static void
3343 _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3344 {
3345     if (jit_thumb_p()) {
3346         if ((r0|r1|r2) < 8)
3347             T1_LDRH(r0, r1, r2);
3348         else
3349             T2_LDRH(r0, r1, r2);
3350     }
3351     else
3352         LDRH(r0, r1, r2);
3353 }
3354
3355 static void
3356 _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3357 {
3358     jit_int32_t         reg;
3359     if (jit_thumb_p()) {
3360         if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20)
3361             T1_LDRHI(r0, r1, i0 >> 1);
3362         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3363             T2_LDRHI(r0, r1, i0);
3364         else if (i0 < 0 && i0 >= -255)
3365             T2_LDRHIN(r0, r1, -i0);
3366         else if (i0 >= 0 && i0 <= 4095)
3367             T2_LDRHWI(r0, r1, i0);
3368         else if (r0 != r1) {
3369             movi(r0, i0);
3370             if ((r0|r1) < 8)
3371                 T1_LDRH(r0, r1, r0);
3372             else
3373                 T2_LDRH(r0, r1, r0);
3374         }
3375         else {
3376             reg = jit_get_reg(jit_class_gpr);
3377             movi(rn(reg), i0);
3378             if ((r0|r1|rn(reg)) < 8)
3379                 T1_LDRH(r0, r1, rn(reg));
3380             else
3381                 T2_LDRH(r0, r1, rn(reg));
3382             jit_unget_reg(reg);
3383         }
3384     }
3385     else {
3386         if (i0 >= 0 && i0 <= 255)
3387             LDRHI(r0, r1, i0);
3388         else if (i0 < 0 && i0 >= -255)
3389             LDRHIN(r0, r1, -i0);
3390         else if (r0 != r1) {
3391             movi(r0, i0);
3392             LDRH(r0, r1, r0);
3393         }
3394         else {
3395             reg = jit_get_reg(jit_class_gpr);
3396             movi(rn(reg), i0);
3397             LDRH(r0, r1, rn(reg));
3398             jit_unget_reg(reg);
3399         }
3400     }
3401 }
3402
3403 static void
3404 _ldr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3405 {
3406     if (jit_thumb_p())
3407         T2_LDRI(r0, r1, 0);
3408     else
3409         LDRI(r0, r1, 0);
3410 }
3411
3412 static void
3413 _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3414 {
3415     jit_int32_t         reg;
3416     reg = jit_get_reg(jit_class_gpr);
3417     movi(rn(reg), i0);
3418     if (jit_thumb_p())
3419         T2_LDRI(r0, rn(reg), 0);
3420     else
3421         LDRI(r0, rn(reg), 0);
3422     jit_unget_reg(reg);
3423 }
3424
3425 static void
3426 _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3427 {
3428     if (jit_thumb_p()) {
3429         if ((r0|r1|r2) < 8)
3430             T1_LDR(r0, r1, r2);
3431         else
3432             T2_LDR(r0, r1, r2);
3433     }
3434     else
3435         LDR(r0, r1, r2);
3436 }
3437
3438 static void
3439 _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3440 {
3441     jit_int32_t         reg;
3442     if (jit_thumb_p()) {
3443         if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
3444             T1_LDRI(r0, r1, i0 >> 2);
3445         else if (r1 == _R13_REGNO && r0 < 8 &&
3446                  i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
3447             T1_LDRISP(r0, i0 >> 2);
3448         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3449             T2_LDRI(r0, r1, i0);
3450         else if (i0 < 0 && i0 > -255)
3451             T2_LDRIN(r0, r1, -i0);
3452         else if (i0 >= 0 && i0 <= 4095)
3453             T2_LDRWI(r0, r1, i0);
3454         else if (r0 != r1) {
3455             movi(r0, i0);
3456             if ((r0|r1) < 8)
3457                 T1_LDR(r0, r1, r0);
3458             else
3459                 T2_LDR(r0, r1, r0);
3460         }
3461         else {
3462             reg = jit_get_reg(jit_class_gpr);
3463             movi(rn(reg), i0);
3464             if ((r0|r1|rn(reg)) < 8)
3465                 T1_LDR(r0, r1, rn(reg));
3466             else
3467                 T2_LDR(r0, r1, rn(reg));
3468             jit_unget_reg(reg);
3469         }
3470     }
3471     else {
3472         if (i0 >= 0 && i0 <= 4095)
3473             LDRI(r0, r1, i0);
3474         else if (i0 < 0 && i0 >= -4095)
3475             LDRIN(r0, r1, -i0);
3476         else if (r0 != r1) {
3477             movi(r0, i0);
3478             LDR(r0, r1, r0);
3479         }
3480         else {
3481             reg = jit_get_reg(jit_class_gpr);
3482             movi(rn(reg), i0);
3483             LDR(r0, r1, rn(reg));
3484             jit_unget_reg(reg);
3485         }
3486     }
3487 }
3488
3489 static void
3490 _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3491 {
3492     if (jit_thumb_p())
3493         T2_STRBI(r1, r0, 0);
3494     else
3495         STRBI(r1, r0, 0);
3496 }
3497
3498 static void
3499 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3500 {
3501     jit_int32_t         reg;
3502     reg = jit_get_reg(jit_class_gpr);
3503     movi(rn(reg), i0);
3504     if (jit_thumb_p())
3505         T2_STRBI(r0, rn(reg), 0);
3506     else
3507         STRBI(r0, rn(reg), 0);
3508     jit_unget_reg(reg);
3509 }
3510
3511 static void
3512 _stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3513 {
3514     if (jit_thumb_p()) {
3515         if ((r0|r1|r2) < 8)
3516             T1_STRB(r2, r1, r0);
3517         else
3518             T2_STRB(r2, r1, r0);
3519     }
3520     else
3521         STRB(r2, r1, r0);
3522 }
3523
3524 static void
3525 _stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3526 {
3527     jit_int32_t         reg;
3528     if (jit_thumb_p()) {
3529         if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20)
3530             T1_STRBI(r1, r0, i0);
3531         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3532             T2_STRBI(r1, r0, i0);
3533         else if (i0 < 0 && i0 >= -255)
3534             T2_STRBIN(r1, r0, -i0);
3535         else if (i0 >= 0 && i0 <= 4095)
3536             T2_STRBWI(r1, r0, i0);
3537         else {
3538             reg = jit_get_reg(jit_class_gpr);
3539             movi(rn(reg), i0);
3540             if ((r0|r1|rn(reg)) < 8)
3541                 T1_STRB(r1, r0, rn(reg));
3542             else
3543                 T2_STRB(r1, r0, rn(reg));
3544             jit_unget_reg(reg);
3545         }
3546     }
3547     else {
3548         if (i0 >= 0 && i0 <= 4095)
3549             STRBI(r1, r0, i0);
3550         else if (i0 < 0 && i0 >= -4095)
3551             STRBIN(r1, r0, -i0);
3552         else {
3553             reg = jit_get_reg(jit_class_gpr);
3554             movi(rn(reg), i0);
3555             STRB(r1, r0, rn(reg));
3556             jit_unget_reg(reg);
3557         }
3558     }
3559 }
3560
3561 static void
3562 _str_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3563 {
3564     if (jit_thumb_p())
3565         T2_STRHI(r1, r0, 0);
3566     else
3567         STRHI(r1, r0, 0);
3568 }
3569
3570 static void
3571 _sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3572 {
3573     jit_int32_t         reg;
3574     reg = jit_get_reg(jit_class_gpr);
3575     movi(rn(reg), i0);
3576     if (jit_thumb_p())
3577         T2_STRHI(r0, rn(reg), 0);
3578     else
3579         STRHI(r0, rn(reg), 0);
3580     jit_unget_reg(reg);
3581 }
3582
3583 static void
3584 _stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3585 {
3586     if (jit_thumb_p()) {
3587         if ((r0|r1|r2) < 8)
3588             T1_STRH(r2, r1, r0);
3589         else
3590             T2_STRH(r2, r1, r0);
3591     }
3592     else
3593         STRH(r2, r1, r0);
3594 }
3595
3596 static void
3597 _stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3598 {
3599     jit_int32_t         reg;
3600     if (jit_thumb_p()) {
3601         if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20)
3602             T1_STRHI(r1, r0, i0 >> 1);
3603         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3604             T2_STRHI(r1, r0, i0);
3605         else if (i0 < 0 && i0 >= -255)
3606             T2_STRHIN(r1, r0, -i0);
3607         else if (i0 >= 0 && i0 <= 4095)
3608             T2_STRHWI(r1, r0, i0);
3609         else {
3610             reg = jit_get_reg(jit_class_gpr);
3611             movi(rn(reg), i0);
3612             if ((r0|r1|rn(reg)) < 8)
3613                 T1_STRH(r1, r0, rn(reg));
3614             else
3615                 T2_STRH(r1, r0, rn(reg));
3616             jit_unget_reg(reg);
3617         }
3618     }
3619     else {
3620         if (i0 >= 0 && i0 <= 255)
3621             STRHI(r1, r0, i0);
3622         else if (i0 < 0 && i0 >= -255)
3623             STRHIN(r1, r0, -i0);
3624         else {
3625             reg = jit_get_reg(jit_class_gpr);
3626             movi(rn(reg), i0);
3627             STRH(r1, r0, rn(reg));
3628             jit_unget_reg(reg);
3629         }
3630     }
3631 }
3632
3633 static void
3634 _str_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3635 {
3636     if (jit_thumb_p())
3637         T2_STRI(r1, r0, 0);
3638     else
3639         STRI(r1, r0, 0);
3640 }
3641
3642 static void
3643 _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3644 {
3645     jit_int32_t         reg;
3646     reg = jit_get_reg(jit_class_gpr);
3647     movi(rn(reg), i0);
3648     if (jit_thumb_p())
3649         T2_STRI(r0, rn(reg), 0);
3650     else
3651         STRI(r0, rn(reg), 0);
3652     jit_unget_reg(reg);
3653 }
3654
3655 static void
3656 _stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3657 {
3658     if (jit_thumb_p()) {
3659         if ((r0|r1|r2) < 8)
3660             T1_STR(r2, r1, r0);
3661         else
3662             T2_STR(r2, r1, r0);
3663     }
3664     else
3665         STR(r2, r1, r0);
3666 }
3667
3668 static void
3669 _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3670 {
3671     jit_int32_t         reg;
3672     if (jit_thumb_p()) {
3673         if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
3674             T1_STRI(r1, r0, i0 >> 2);
3675         else if (r0 == _R13_REGNO && r1 < 8 &&
3676                  i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
3677             T1_STRISP(r1, i0 >> 2);
3678         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3679             T2_STRI(r1, r0, i0);
3680         else if (i0 < 0 && i0 >= -255)
3681             T2_STRIN(r1, r0, -i0);
3682         else if (i0 >= 0 && i0 <= 4095)
3683             T2_STRWI(r1, r0, i0);
3684         else {
3685             reg = jit_get_reg(jit_class_gpr);
3686             movi(rn(reg), i0);
3687             if ((r0|r1|rn(reg)) < 8)
3688                 T1_STR(r1, r0, rn(reg));
3689             else
3690                 T2_STR(r1, r0, rn(reg));
3691             jit_unget_reg(reg);
3692         }
3693     }
3694     else {
3695         if (i0 >= 0 && i0 <= 4095)
3696             STRI(r1, r0, i0);
3697         else if (i0 < 0 && i0 >= -4095)
3698             STRIN(r1, r0, -i0);
3699         else {
3700             reg = jit_get_reg(jit_class_gpr);
3701             movi(rn(reg), i0);
3702             STR(r1, r0, rn(reg));
3703             jit_unget_reg(reg);
3704         }
3705     }
3706 }
3707
3708 static void
3709 _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3710 {
3711     if (jit_thumb_p()) {
3712         if ((r0|r1) < 8)
3713             T1_REV(r0, r1);
3714         else
3715             T2_REV(r0, r1);
3716         rshi_u(r0, r0, 16);
3717     }
3718     else {
3719         if (jit_armv6_p()) {
3720             REV(r0, r1);
3721             rshi_u(r0, r0, 16);
3722         }
3723         else {
3724                 generic_bswapr_us(_jit, r0, r1);
3725         }
3726     }
3727 }
3728
3729 /* inline glibc htonl (without register clobber) */
3730 static void
3731 _bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3732 {
3733     jit_int32_t         reg;
3734     if (jit_thumb_p()) {
3735         if ((r0|r1) < 8)
3736             T1_REV(r0, r1);
3737         else
3738             T2_REV(r0, r1);
3739     }
3740     else {
3741         if (jit_armv6_p())
3742             REV(r0, r1);
3743         else {
3744             reg = jit_get_reg(jit_class_gpr);
3745             EOR_SI(rn(reg), r1, r1, ARM_ROR, 16);
3746             LSRI(rn(reg), rn(reg), 8);
3747             BICI(rn(reg), rn(reg), encode_arm_immediate(0xff00));
3748             EOR_SI(r0, rn(reg), r1, ARM_ROR, 8);
3749             jit_unget_reg(reg);
3750         }
3751     }
3752 }
3753
3754 static void
3755 _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3756 {
3757     if (jit_thumb_p()) {
3758         if ((r0|r1) < 8)
3759             T1_SXTB(r0, r1);
3760         else
3761             T2_SXTB(r0, r1);
3762     }
3763     else {
3764         if (jit_armv6_p())
3765             SXTB(r0, r1);
3766         else {
3767             LSLI(r0, r1, 24);
3768             ASRI(r0, r0, 24);
3769         }
3770     }
3771 }
3772
3773 static void
3774 _extr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3775 {
3776     if (jit_thumb_p()) {
3777         if ((r0|r1) < 8)
3778             T1_UXTB(r0, r1);
3779         else
3780             T2_UXTB(r0, r1);
3781     }
3782     else {
3783         if (jit_armv6_p())
3784             UXTB(r0, r1);
3785         else
3786             ANDI(r0, r1, 0xff);
3787     }
3788 }
3789
3790 static void
3791 _extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3792 {
3793     if (jit_thumb_p()) {
3794         if ((r0|r1) < 8)
3795             T1_SXTH(r0, r1);
3796         else
3797             T2_SXTH(r0, r1);
3798     }
3799     else {
3800         if (jit_armv6_p())
3801             SXTH(r0, r1);
3802         else {
3803             LSLI(r0, r1, 16);
3804             ASRI(r0, r0, 16);
3805         }
3806     }
3807 }
3808
3809 static void
3810 _extr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3811 {
3812     if (jit_thumb_p()) {
3813         if ((r0|r1) < 8)
3814             T1_UXTH(r0, r1);
3815         else
3816             T2_UXTH(r0, r1);
3817     }
3818     else {
3819         if (jit_armv6_p())
3820             UXTH(r0, r1);
3821         else {
3822             LSLI(r0, r1, 16);
3823             LSRI(r0, r0, 16);
3824         }
3825     }
3826 }
3827
3828 static void
3829 _callr(jit_state_t *_jit, jit_int32_t r0)
3830 {
3831     if (jit_thumb_p())
3832         T1_BLX(r0);
3833     else
3834         BLX(r0);
3835 }
3836
3837 static void
3838 _calli(jit_state_t *_jit, jit_word_t i0)
3839 {
3840     jit_word_t          d;
3841     jit_int32_t         reg;
3842     d = ((i0 - _jit->pc.w) >> 2) - 2;
3843     if (!jit_exchange_p() && !jit_thumb_p() && _s24P(d))
3844         BLI(d & 0x00ffffff);
3845     else {
3846         reg = jit_get_reg(jit_class_gpr);
3847         movi(rn(reg), i0);
3848         if (jit_thumb_p())
3849             T1_BLX(rn(reg));
3850         else
3851             BLX(rn(reg));
3852         jit_unget_reg(reg);
3853     }
3854 }
3855
3856 static jit_word_t
3857 _calli_p(jit_state_t *_jit, jit_word_t i0)
3858 {
3859     jit_word_t          w;
3860     jit_int32_t         reg;
3861     reg = jit_get_reg(jit_class_gpr);
3862     w = _jit->pc.w;
3863     movi_p(rn(reg), i0);
3864     if (jit_thumb_p())
3865         T1_BLX(rn(reg));
3866     else
3867         BLX(rn(reg));
3868     jit_unget_reg(reg);
3869     return (w);
3870 }
3871
3872 static void
3873 _prolog(jit_state_t *_jit, jit_node_t *node)
3874 {
3875     jit_int32_t         reg;
3876     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
3877         jit_int32_t     frame = -_jitc->function->frame;
3878         assert(_jitc->function->self.aoff >= frame);
3879         if (_jitc->function->assume_frame) {
3880             if (jit_thumb_p() && !_jitc->thumb)
3881                 _jitc->thumb = _jit->pc.w;
3882             return;
3883         }
3884         _jitc->function->self.aoff = frame;
3885     }
3886     if (_jitc->function->allocar)
3887         _jitc->function->self.aoff &= -8;
3888     _jitc->function->stack = ((_jitc->function->self.alen -
3889                               /* align stack at 8 bytes */
3890                               _jitc->function->self.aoff) + 7) & -8;
3891
3892     if (jit_thumb_p()) {
3893         /*  switch to thumb mode (better approach would be to
3894          * ORR 1 address being called, but no clear distinction
3895          * of what is a pointer to a jit function, or if patching
3896          * a pointer to a jit function) */
3897         ADDI(_R12_REGNO, _R15_REGNO, 1);
3898         BX(_R12_REGNO);
3899         if (!_jitc->thumb)
3900             _jitc->thumb = _jit->pc.w;
3901         if (jit_cpu.abi) {
3902             T2_PUSH(0xf);
3903             T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
3904             VPUSH_F64(_D8_REGNO, 8);
3905         }
3906         else {
3907             T2_PUSH(0xf);
3908             T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
3909         }
3910     }
3911     else {
3912         if (jit_cpu.abi) {
3913             PUSH(0xf);
3914             PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
3915             VPUSH_F64(_D8_REGNO, 8);
3916         }
3917         else {
3918             PUSH(0xf);
3919             PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
3920         }
3921     }
3922     movr(_FP_REGNO, _SP_REGNO);
3923     if (_jitc->function->stack)
3924         subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
3925     if (_jitc->function->allocar) {
3926         reg = jit_get_reg(jit_class_gpr);
3927         movi(rn(reg), _jitc->function->self.aoff);
3928         stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg));
3929         jit_unget_reg(reg);
3930     }
3931 }
3932
3933 static void
3934 _epilog(jit_state_t *_jit, jit_node_t *node)
3935 {
3936     if (_jitc->function->assume_frame)
3937         return;
3938
3939     movr(_SP_REGNO, _FP_REGNO);
3940     if (jit_cpu.abi)
3941         VPOP_F64(_D8_REGNO, 8);
3942     if (jit_thumb_p())
3943         T2_POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
3944     else
3945         POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
3946     addi(_SP_REGNO, _SP_REGNO, 16);
3947     if (jit_thumb_p())
3948         T1_BX(_LR_REGNO);
3949     else
3950         BX(_LR_REGNO);
3951     if (jit_thumb_p() && (_jit->pc.w & 2))
3952         T1_NOP();
3953 }
3954
3955 static void
3956 _vastart(jit_state_t *_jit, jit_int32_t r0)
3957 {
3958     assert(_jitc->function->self.call & jit_call_varargs);
3959
3960     /* Initialize stack pointer to the first stack argument.
3961      * The -16 is to account for the 4 argument registers
3962      * always saved, and _jitc->function->vagp is to account
3963      * for declared arguments. */
3964     addi(r0, _FP_REGNO, _jitc->function->self.size -
3965          16 + _jitc->function->vagp);
3966 }
3967
3968 static void
3969 _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3970 {
3971     assert(_jitc->function->self.call & jit_call_varargs);
3972
3973     /* Load argument. */
3974     ldr(r0, r1);
3975
3976     /* Update stack pointer. */
3977     addi(r1, r1, sizeof(jit_word_t));
3978 }
3979
3980 static void
3981 _patch_at(jit_state_t *_jit,
3982           jit_int32_t kind, jit_word_t instr, jit_word_t label)
3983 {
3984     jit_word_t           d;
3985     jit_thumb_t          thumb;
3986     union {
3987         jit_int16_t     *s;
3988         jit_int32_t     *i;
3989         jit_word_t       w;
3990     } u;
3991     u.w = instr;
3992     if (kind == arm_patch_jump) {
3993         if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
3994             code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
3995             if ((thumb.i & THUMB2_B) == THUMB2_B) {
3996                 d = ((label - instr) >> 1) - 2;
3997                 assert(_s24P(d));
3998                 thumb.i = THUMB2_B | encode_thumb_jump(d);
3999                 thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4000             }
4001             else if ((thumb.i & THUMB2_B) == THUMB2_CC_B) {
4002                 d = ((label - instr) >> 1) - 2;
4003                 assert(_s20P(d));
4004                 thumb.i = THUMB2_CC_B | (thumb.i & 0x3c00000) |
4005                           encode_thumb_cc_jump(d);
4006                 thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4007             }
4008             else {
4009                 /* for the sake of simplicity in case choose to
4010                  * movw+movt+[bx|blx], e.g. if changing to instead
4011                  * of asserting target is reachable, load constant
4012                  * and do indirect jump if not reachable */
4013                 if ((thumb.i & 0xfbf00000) == THUMB2_MOVWI)
4014                     goto indirect_jump;
4015                 assert(!"handled branch opcode");
4016             }
4017         }
4018         else {
4019             thumb.i = u.i[0];
4020             /* 0x0e000000 because 0x01000000 is (branch&) link modifier */
4021             assert((thumb.i & 0x0e000000) == ARM_B);
4022             d = ((label - instr) >> 2) - 2;
4023             assert(_s24P(d));
4024             u.i[0] = (thumb.i & 0xff000000) | (d & 0x00ffffff);
4025         }
4026     }
4027     else if (kind == arm_patch_load) {
4028         /* offset may be negative for a forward patch because it
4029          * is relative to pc + 8, for example:
4030          *          ldr r0, [pc, #-4]
4031          *          bx r0               ;; [pc, #-8]
4032          *          .data ...           ;; [pc, #-4]
4033          *          ...                 ;; [pc]
4034          */
4035         assert(!jit_thumb_p());
4036         thumb.i = u.i[0];
4037         assert((thumb.i & 0x0f700000) == ARM_LDRI);
4038         d = label - (instr + 8);
4039         if (d < 0) {
4040             thumb.i &= ~ARM_P;
4041             d = -d;
4042         }
4043         else
4044             thumb.i |= ARM_P;
4045         assert(!(d & 0xfffff000));
4046         u.i[0] = (thumb.i & 0xfffff000) | d;
4047     }
4048     else if (kind == arm_patch_word) {
4049         if (jit_thumb_p()) {
4050             code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4051             assert((thumb.i & 0xfbf00000) == THUMB2_MOVWI);
4052         indirect_jump:
4053             thumb.i = ((thumb.i & 0xfbf00f00) |
4054                        ( (label & 0x0000f000) <<  4) |
4055                        ( (label & 0x00000800) << 15) |
4056                        ( (label & 0x00000700) <<  4) |
4057                        (  label & 0x000000ff));
4058             thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4059             label >>= 16;
4060             code2thumb(thumb.s[0], thumb.s[1], u.s[2], u.s[3]);
4061             assert((thumb.i & 0xfbf00000) == THUMB2_MOVTI);
4062             thumb.i = ((thumb.i & 0xfbf00f00) |
4063                        ( (label & 0x0000f000) <<  4) |
4064                        ( (label & 0x00000800) << 15) |
4065                        ( (label & 0x00000700) <<  4) |
4066                        (  label & 0x000000ff));
4067             thumb2code(thumb.s[0], thumb.s[1], u.s[2], u.s[3]);
4068         }
4069         else
4070             u.i[0] = label;
4071     }
4072     else
4073         assert(!"handled patch");
4074 }
4075 #endif