a0852a2cd55c8716487ec55276f1fc1c016c4b44
[pcsx_rearmed.git] / deps / lightning / lib / jit_arm-cpu.c
1 /*
2  * Copyright (C) 2012-2023  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *      Paulo Cesar Pereira de Andrade
18  */
19
20 #if PROTO
21 #  define stxi(i0,r0,r1)                stxi_i(i0,r0,r1)
22 #  define ldxi(r0,r1,i0)                ldxi_i(r0,r1,i0)
23 #  define ldr(r0,r1)                    ldr_i(r0,r1)
24 #  define _s20P(d)                      ((d) >= -(int)0x80000 && d <= 0x7ffff)
25 #  define _s24P(d)                      ((d) >= -(int)0x800000 && d <= 0x7fffff)
26 #  define _u3(v)                        ((v) & 0x7)
27 #  define _u4(v)                        ((v) & 0xf)
28 #  define _u5(v)                        ((v) & 0x1f)
29 #  define _u8(v)                        ((v) & 0xff)
30 #  define _u12(v)                       ((v) & 0xfff)
31 #  define _u13(v)                       ((v) & 0x1fff)
32 #  define _u16(v)                       ((v) & 0xffff)
33 #  define _u24(v)                       ((v) & 0xffffff)
34 #  define jit_thumb_p()                 jit_cpu.thumb
35 #  define jit_no_set_flags()            _jitc->no_set_flags
36 #  define jit_armv5_p()                 (jit_cpu.version >= 5)
37 #  define jit_armv5e_p()                (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend))
38 #  define jit_armv6_p()                 (jit_cpu.version >= 6)
39 #  define jit_armv7_p()                 (jit_cpu.version >= 7)
40 #  define jit_armv7r_p()                (jit_cpu.version > 7 || (jit_cpu.version == 7 && jit_cpu.extend))
41 extern int      __aeabi_idivmod(int, int);
42 extern unsigned __aeabi_uidivmod(unsigned, unsigned);
43 #  define _R0_REGNO                     0x00
44 #  define _R1_REGNO                     0x01
45 #  define _R2_REGNO                     0x02
46 #  define _R3_REGNO                     0x03
47 #  define _R4_REGNO                     0x04
48 #  define _R5_REGNO                     0x05
49 #  define _R6_REGNO                     0x06
50 #  define _R7_REGNO                     0x07
51 #  define _R8_REGNO                     0x08
52 #  define _R9_REGNO                     0x09
53 #  define _R10_REGNO                    0x0a
54 #  define _R11_REGNO                    0x0b
55 #  define _R12_REGNO                    0x0c
56 #  define _R13_REGNO                    0x0d
57 #  define _R14_REGNO                    0x0e
58 #  define _R15_REGNO                    0x0f
59 #  define _FP_REGNO                     _R11_REGNO
60 #  define _SP_REGNO                     _R13_REGNO
61 #  define _LR_REGNO                     _R14_REGNO
62 #  define _PC_REGNO                     _R15_REGNO
63 #  define ARM_CC_EQ                     0x00000000      /* Z=1 */
64 #  define ARM_CC_NE                     0x10000000      /* Z=0 */
65 #  define ARM_CC_HS                     0x20000000      /* C=1 */
66 #    define ARM_CC_CS                   ARM_CC_HS
67 #  define ARM_CC_LO                     0x30000000      /* C=0 */
68 #    define ARM_CC_CC                   ARM_CC_LO
69 #  define ARM_CC_MI                     0x40000000      /* N=1 */
70 #  define ARM_CC_PL                     0x50000000      /* N=0 */
71 #  define ARM_CC_VS                     0x60000000      /* V=1 */
72 #  define ARM_CC_VC                     0x70000000      /* V=0 */
73 #  define ARM_CC_HI                     0x80000000      /* C=1 && Z=0 */
74 #  define ARM_CC_LS                     0x90000000      /* C=0 || Z=1 */
75 #  define ARM_CC_GE                     0xa0000000      /* N=V */
76 #  define ARM_CC_LT                     0xb0000000      /* N!=V */
77 #  define ARM_CC_GT                     0xc0000000      /* Z=0 && N=V */
78 #  define ARM_CC_LE                     0xd0000000      /* Z=1 || N!=V */
79 #  define ARM_CC_AL                     0xe0000000      /* always */
80 #  define ARM_CC_NV                     0xf0000000      /* reserved */
81 #  define THUMB2_IT                     0
82 #  define THUMB2_ITT                    1
83 #  define THUMB2_ITE                    2
84 #  define THUMB2_ITTT                   3
85 #  define THUMB2_ITET                   4
86 #  define THUMB2_ITTE                   5
87 #  define THUMB2_ITEE                   6
88 #  define THUMB2_ITTTT                  7
89 #  define THUMB2_ITETT                  8
90 #  define THUMB2_ITTET                  9
91 #  define THUMB2_ITEET                  10
92 #  define THUMB2_ITTTE                  11
93 #  define THUMB2_ITETE                  12
94 #  define THUMB2_ITTEE                  13
95 #  define THUMB2_ITEEE                  14
96 #  define ARM_MOV                       0x01a00000
97 #  define THUMB_MOV                         0x4600
98 #  define ARM_MOVWI                     0x03000000      /* v6t2, v7 */
99 #  define THUMB_MOVI                        0x2000
100 #  define THUMB2_MOVI                   0xf0400000
101 #  define THUMB2_MOVWI                  0xf2400000
102 #  define ARM_MOVTI                     0x03400000
103 #  define THUMB2_MOVTI                  0xf2c00000
104 #  define ARM_MVN                       0x01e00000
105 #  define THUMB_MVN                         0x43c0
106 #  define THUMB2_MVN                    0xea600000
107 #  define THUMB2_MVNI                   0xf0600000
108 #  define ARM_I                         0x02000000 /* immediate */
109 #  define ARM_S                         0x00100000 /* set flags */
110 #  define ARM_ADD                       0x00800000
111 #  define THUMB_ADD                         0x1800
112 #  define THUMB_ADDX                        0x4400
113 #  define THUMB2_ADD                    0xeb000000
114 #  define THUMB_ADDI3                       0x1c00
115 #  define THUMB_ADDI8                       0x3000
116 #  define THUMB2_ADDI                   0xf1000000
117 #  define THUMB2_ADDWI                  0xf2000000
118 #  define ARM_ADC                       0x00a00000
119 #  define THUMB_ADC                         0x4140
120 #  define THUMB2_ADC                    0xeb400000
121 #  define THUMB2_ADCI                   0xf1400000
122 #  define ARM_SUB                       0x00400000
123 #  define THUMB_SUB                         0x1a00
124 #  define THUMB2_SUB                    0xeba00000
125 #  define THUMB_SUBI3                       0x1e00
126 #  define THUMB_SUBI8                       0x3800
127 #  define THUMB2_SUBI                   0xf1a00000
128 #  define THUMB2_SUBWI                  0xf2a00000
129 #  define ARM_SBC                       0x00c00000
130 #  define THUMB_SBC                         0x4180
131 #  define THUMB2_SBC                    0xeb600000
132 #  define THUMB2_SBCI                   0xf1600000
133 #  define ARM_RSB                       0x00600000
134 #  define THUMB_RSBI                        0x4240
135 #  define THUMB2_RSBI                   0xf1c00000
136 #  define ARM_MUL                       0x00000090
137 #  define THUMB_MUL                         0x4340
138 #  define THUMB2_MUL                    0xfb00f000
139 #  define ARM_UMULL                     0x00800090
140 #  define THUMB2_UMULL                  0xfba00000
141 #  define ARM_SMULL                     0x00c00090
142 #  define THUMB2_SMULL                  0xfb800000
143 /* >> ARMv7r */
144 #  define ARM_SDIV                      0x07100010
145 #  define ARM_UDIV                      0x07300010
146 #  define THUMB2_SDIV                   0xfb90f0f0
147 #  define THUMB2_UDIV                   0xfbb0f0f0
148 /* << ARMv7r */
149 #  define ARM_AND                       0x00000000
150 #  define THUMB_AND                         0x4000
151 #  define THUMB2_AND                    0xea000000
152 #  define THUMB2_ANDI                   0xf0000000
153 #  define ARM_BIC                       0x01c00000
154 #  define THUMB2_BIC                    0xea200000
155 #  define THUMB2_BICI                   0xf0200000
156 #  define ARM_ORR                       0x01800000
157 #  define THUMB_ORR                         0x4300
158 #  define THUMB2_ORR                    0xea400000
159 #  define THUMB2_ORRI                   0xf0400000
160 #  define ARM_EOR                       0x00200000
161 #  define THUMB_EOR                         0x4040
162 #  define THUMB2_EOR                    0xea800000
163 #  define THUMB2_EORI                   0xf0800000
164 /* >> ARMv6* */
165 #  define ARM_REV                       0x06bf0f30
166 #  define THUMB_REV                         0xba00
167 #  define THUMB2_REV                    0xfa90f080
168 #  define ARM_REV16                     0x06bf0fb0
169 #  define THUMB_REV16                       0xba40
170 #  define THUMB2_REV16                  0xfa90f090
171 #  define ARM_SXTB                      0x06af0070
172 #  define THUMB_SXTB                        0xb240
173 #  define THUMB2_SXTB                   0xfa40f080
174 #  define ARM_UXTB                      0x06ef0070
175 #  define THUMB_UXTB                        0xb2c0
176 #  define THUMB2_UXTB                   0xfa50f080
177 #  define ARM_SXTH                      0x06bf0070
178 #  define THUMB_SXTH                        0xb200
179 #  define THUMB2_SXTH                   0xfa00f080
180 #  define ARM_UXTH                      0x06ff0070
181 #  define THUMB_UXTH                        0xb280
182 #  define THUMB2_UXTH                   0xfa10f080
183 #  define ARM_XTR8                      0x00000400 /* ?xt? rotate 8 bits */
184 #  define ARM_XTR16                     0x00000800 /* ?xt? rotate 16 bits */
185 #  define ARM_XTR24                     0x00000c00 /* ?xt? rotate 24 bits */
186 #  define ARM_LDREX                     0x01900090
187 #  define THUMB2_LDREX                  0xe8500000
188 #  define ARM_STREX                     0x01800090
189 #  define THUMB2_STREX                  0xe8400000
190 /* << ARMv6* */
191 /* >> ARMv6t2 */
192 #  define THUMB2_CLZ                    0xfab0f080
193 #  define THUMB2_RBIT                   0xfa90f0a0
194 #  define ARM_RBIT                      0x06f00030
195 /* << ARMv6t2 */
196 #  define ARM_CLZ                       0x01600010
197 /* >> ARMv7 */
198 #  define ARM_DMB                       0xf57ff050
199 #  define THUMB2_DMB                    0xf3bf8f50
200 #  define DMB_SY                        0xf
201 #  define DMB_ST                        0xe
202 #  define DMB_ISH                       0xb
203 #  define DMB_ISHST                     0xa
204 #  define DMB_NSH                       0x7
205 #  define DMB_NSHT                      0x6
206 #  define DMB_OSH                       0x3
207 #  define DMB_OSHST                     0x2
208 /* << ARMv7 */
209 #  define ARM_SHIFT                     0x01a00000
210 #  define ARM_R                         0x00000010 /* register shift */
211 #  define ARM_LSL                       0x00000000
212 #  define THUMB_LSL                         0x4080
213 #  define THUMB2_LSL                    0xfa00f000
214 #  define THUMB_LSLI                        0x0000
215 #  define THUMB2_LSLI                   0xea4f0000
216 #  define ARM_LSR                       0x00000020
217 #  define THUMB_LSR                         0x40c0
218 #  define THUMB2_LSR                    0xfa20f000
219 #  define THUMB_LSRI                        0x0800
220 #  define THUMB2_LSRI                   0xea4f0010
221 #  define ARM_ASR                       0x00000040
222 #  define THUMB_ASR                         0x4100
223 #  define THUMB2_ASR                    0xfa40f000
224 #  define THUMB_ASRI                        0x1000
225 #  define THUMB2_ASRI                   0xea4f0020
226 #  define ARM_ROR                       0x00000060
227 #  define ARM_CMP                       0x01500000
228 #  define THUMB_CMP                         0x4280
229 #  define THUMB_CMPX                        0x4500
230 #  define THUMB2_CMP                    0xebb00000
231 #  define THUMB_CMPI                        0x2800
232 #  define THUMB2_CMPI                   0xf1b00000
233 #  define ARM_CMN                       0x01700000
234 #  define THUMB_CMN                         0x42c0
235 #  define THUMB2_CMN                    0xeb100000
236 #  define THUMB2_CMNI                   0xf1100000
237 #  define ARM_TST                       0x01100000
238 #  define THUMB_TST                         0x4200
239 #  define THUMB2_TST                    0xea100000
240 #  define THUMB2_TSTI                   0xf0100000
241 #  define ARM_TEQ                       0x01300000
242 /* branch */
243 #  define ARM_BX                        0x012fff10
244 #  define ARM_BLX                       0x012fff30
245 #  define THUMB_BLX                         0x4780
246 #  define ARM_BLXI                      0xfa000000
247 #  define THUMB2_BLXI                   0xf000c000
248 #  define ARM_B                         0x0a000000
249 #  define THUMB_CC_B                        0xd000
250 #  define THUMB_B                           0xe000
251 #  define THUMB2_CC_B                   0xf0008000
252 #  define THUMB2_B                      0xf0009000
253 #  define ARM_BLI                       0x0b000000
254 #  define THUMB2_BLI                    0xf000d000
255 /* ldr/str */
256 #  define ARM_P                         0x00800000 /* positive offset */
257 #  define THUMB2_P                      0x00000400
258 #  define THUMB2_U                      0x00000200
259 #  define THUMB2_W                      0x00000100
260 #  define ARM_LDRSB                     0x011000d0
261 #  define THUMB_LDRSB                       0x5600
262 #  define THUMB2_LDRSB                  0xf9100000
263 #  define ARM_LDRSBI                    0x015000d0
264 #  define THUMB2_LDRSBI                 0xf9100c00
265 #  define THUMB2_LDRSBWI                0xf9900000
266 #  define ARM_LDRB                      0x07500000
267 #  define THUMB_LDRB                        0x5c00
268 #  define THUMB2_LDRB                   0xf8100000
269 #  define ARM_LDRBI                     0x05500000
270 #  define THUMB_LDRBI                       0x7800
271 #  define THUMB2_LDRBI                  0xf8100c00
272 #  define THUMB2_LDRBWI                 0xf8900000
273 #  define ARM_LDRSH                     0x011000f0
274 #  define THUMB_LDRSH                       0x5e00
275 #  define THUMB2_LDRSH                  0xf9300000
276 #  define ARM_LDRSHI                    0x015000f0
277 #  define THUMB2_LDRSHI                 0xf9300c00
278 #  define THUMB2_LDRSHWI                0xf9b00000
279 #  define ARM_LDRH                      0x011000b0
280 #  define THUMB_LDRH                        0x5a00
281 #  define THUMB2_LDRH                   0xf8300000
282 #  define ARM_LDRHI                     0x015000b0
283 #  define THUMB_LDRHI                       0x8800
284 #  define THUMB2_LDRHI                  0xf8300c00
285 #  define THUMB2_LDRHWI                 0xf8b00000
286 #  define ARM_LDR                       0x07100000
287 #  define THUMB_LDR                         0x5800
288 #  define THUMB2_LDR                    0xf8500000
289 #  define ARM_LDRI                      0x05100000
290 #  define THUMB_LDRI                        0x6800
291 #  define THUMB_LDRISP                      0x9800
292 #  define THUMB2_LDRI                   0xf8500c00
293 #  define THUMB2_LDRWI                  0xf8d00000
294 #  define ARM_LDRD                      0x010000d0
295 #  define ARM_LDRDI                     0x014000d0
296 #  define THUMB2_LDRDI                  0xe8500000
297 #  define ARM_STRB                      0x07400000
298 #  define THUMB_STRB                        0x5400
299 #  define THUMB2_STRB                   0xf8000000
300 #  define ARM_STRBI                     0x05400000
301 #  define THUMB_STRBI                       0x7000
302 #  define THUMB2_STRBI                  0xf8000c00
303 #  define THUMB2_STRBWI                 0xf8800000
304 #  define ARM_STRH                      0x010000b0
305 #  define THUMB_STRH                        0x5200
306 #  define THUMB2_STRH                   0xf8200000
307 #  define ARM_STRHI                     0x014000b0
308 #  define THUMB_STRHI                       0x8000
309 #  define THUMB2_STRHI                  0xf8200c00
310 #  define THUMB2_STRHWI                 0xf8a00000
311 #  define ARM_STR                       0x07000000
312 #  define THUMB_STR                         0x5000
313 #  define THUMB2_STR                    0xf8400000
314 #  define ARM_STRI                      0x05000000
315 #  define THUMB_STRI                        0x6000
316 # define THUMB2_STRWI                   0xf8c00000
317 #  define THUMB_STRISP                      0x9000
318 #  define THUMB2_STRI                   0xf8400c00
319 #  define ARM_STRD                      0x010000f0
320 # define ARM_STRDI                      0x014000f0
321 #  define THUMB2_STRDI                  0xe8400000
322 /* ldm/stm */
323 #  define ARM_M                         0x08000000
324 #  define ARM_M_L                       0x00100000 /* load; store if not set */
325 #  define ARM_M_I                       0x00800000 /* inc; dec if not set */
326 #  define ARM_M_B                       0x01000000 /* before; after if not set */
327 #  define ARM_M_U                       0x00200000 /* update Rn */
328 #  define THUMB2_LDM_W                  0x00200000
329 #  define THUMB2_LDM_P                  0x00008000
330 #  define THUMB2_LDM_M                  0x00004000
331 #  define THUMB_LDMIA                       0xc800
332 #  define THUMB2_LDMIA                  0xe8900000
333 #  define THUMB2_LDMB                   0xe9100000
334 #  define THUMB_PUSH                        0xb400
335 #  define THUMB2_PUSH                   0xe92d0000
336 #  define THUMB_POP                         0xbc00
337 #  define THUMB2_POP                    0xe8bd0000
338 #  define ii(i)                         *_jit->pc.ui++ = i
339 #  define is(i)                         *_jit->pc.us++ = i
340 #  if __BYTE_ORDER == __LITTLE_ENDIAN
341 #    define iss(i, j)                   do { is(j); is(i); } while (0)
342 #    define code2thumb(t0, t1, c0, c1)  do { t1 = c0; t0 = c1; } while (0)
343 #    define thumb2code(t0, t1, c0, c1)  do { c0 = t1; c1 = t0; } while (0)
344 #  else
345 #    define iss(i, j)                   do { is(i); is(j); } while (0)
346 #    define code2thumb(t0, t1, c0, c1)  do { t0 = c0; t1 = c1; } while (0)
347 #    define thumb2code(t0, t1, c0, c1)  do { c0 = t0; c1 = t1; } while (0)
348 #  endif
349 static int encode_arm_immediate(unsigned int v);
350 static int encode_thumb_immediate(unsigned int v);
351 static int encode_thumb_word_immediate(unsigned int v);
352 static int encode_thumb_jump(int v);
353 static int encode_thumb_cc_jump(int v);
354 static int encode_thumb_shift(int v, int type) maybe_unused;
355 #  define corrr(cc,o,rn,rd,rm)          _corrr(_jit,cc,o,rn,rd,rm)
356 static void _corrr(jit_state_t*,int,int,int,int,int);
357 #  define corri(cc,o,rn,rd,im)          _corri(_jit,cc,o,rn,rd,im)
358 static void _corri(jit_state_t*,int,int,int,int,int);
359 #define corri8(cc,o,rn,rt,im)   _corri8(_jit,cc,o,rn,rt,im)
360 static void _corri8(jit_state_t*,int,int,int,int,int);
361 #  define torrr(o,rn,rd,rm)             _torrr(_jit,o,rn,rd,rm)
362 static void _torrr(jit_state_t*,int,int,int,int);
363 #  define torrrs(o,rn,rd,rm,im)         _torrrs(_jit,o,rn,rd,rm,im)
364 static void _torrrs(jit_state_t*,int,int,int,int,int) maybe_unused;
365 #  define torxr(o,rn,rt,rm)             _torxr(_jit,o,rn,rt,rm)
366 static void _torxr(jit_state_t*,int,int,int,int);
367 #  define torrrr(o,rn,rl,rh,rm)         _torrrr(_jit,o,rn,rl,rh,rm)
368 static void _torrrr(jit_state_t*,int,int,int,int,int) maybe_unused;
369 #  define torrri8(o,rn,rt,rt2,im)       _torrri8(_jit,o,rn,rt,rt2,im)
370 static void _torrri8(jit_state_t*,int,int,int,int,int) maybe_unused;
371 #  define coriw(cc,o,rd,im)             _coriw(_jit,cc,o,rd,im)
372 static void _coriw(jit_state_t*,int,int,int,int);
373 #  define torri(o,rd,rn,im)             _torri(_jit,o,rd,rn,im)
374 static void _torri(jit_state_t*,int,int,int,int);
375 #  define torri8(o,rn,rt,im)            _torri8(_jit,o,rn,rt,im)
376 static void _torri8(jit_state_t*,int,int,int,int);
377 #  define torri12(o,rn,rt,im)           _torri12(_jit,o,rn,rt,im)
378 static void _torri12(jit_state_t*,int,int,int,int);
379 #  define tshift(o,rd,rm,im)            _tshift(_jit,o,rd,rm,im)
380 static void _tshift(jit_state_t*,int,int,int,int);
381 #  define toriw(o,rd,im)                _toriw(_jit,o,rd,im)
382 static void _toriw(jit_state_t*,int,int,int);
383 #  define tc8(cc,im)                    _tc8(_jit,cc,im)
384 static void _tc8(jit_state_t*,int,int) maybe_unused;
385 #  define t11(im)                       _t11(_jit,im)
386 static void _t11(jit_state_t*,int);
387 #  define tcb(cc,im)                    _tcb(_jit,cc,im)
388 static void _tcb(jit_state_t*,int,int);
389 #  define blxi(im)                      _blxi(_jit,im)
390 static void _blxi(jit_state_t*,int) maybe_unused;
391 #  define tb(o,im)                      _tb(_jit,o,im)
392 static void _tb(jit_state_t*,int,int);
393 #  define corrrr(cc,o,rh,rl,rm,rn)      _corrrr(_jit,cc,o,rh,rl,rm,rn)
394 static void _corrrr(jit_state_t*,int,int,int,int,int,int);
395 #  define corrrs(cc,o,rn,rd,rm,im)      _corrrs(_jit,cc,o,rn,rd,rm,im)
396 static void _corrrs(jit_state_t*,int,int,int,int,int,int);
397 #  define cshift(cc,o,rd,rm,rn,im)      _cshift(_jit,cc,o,rd,rm,rn,im)
398 static void _cshift(jit_state_t*,int,int,int,int,int,int);
399 #  define cb(cc,o,im)                   _cb(_jit,cc,o,im)
400 static void _cb(jit_state_t*,int,int,int);
401 #  define cbx(cc,o,rm)                  _cbx(_jit,cc,o,rm)
402 static void _cbx(jit_state_t*,int,int,int);
403 #  define corl(cc,o,r0,i0)              _corl(_jit,cc,o,r0,i0)
404 static void _corl(jit_state_t*,int,int,int,int);
405 #  define c6orr(cc,o,r0,r1)             _c6orr(_jit,cc,o,r0,r1)
406 static void _c6orr(jit_state_t*,int,int,int,int);
407 #  define tcit(cc,it)                   _tcit(_jit,cc,it)
408 static void _tcit(jit_state_t*,unsigned int,int);
409 #  define IT(cc)                        tcit(cc,THUMB2_IT)
410 #  define ITT(cc)                       tcit(cc,THUMB2_ITT)
411 #  define ITE(cc)                       tcit(cc,THUMB2_ITE)
412 #  define ITTT(cc)                      tcit(cc,THUMB2_ITTT)
413 #  define ITTE(cc)                      tcit(cc,THUMB2_ITTE)
414 #  define ITET(cc)                      tcit(cc,THUMB2_ITET)
415 #  define ITEE(cc)                      tcit(cc,THUMB2_ITEE)
416 #  define ITTTT(cc)                     tcit(cc,THUMB2_ITTTT)
417 #  define ITETT(cc)                     tcit(cc,THUMB2_ITETT)
418 #  define ITTET(cc)                     tcit(cc,THUMB2_ITTET)
419 #  define ITEET(cc)                     tcit(cc,THUMB2_ITEET)
420 #  define ITTTE(cc)                     tcit(cc,THUMB2_ITTTE)
421 #  define ITETE(cc)                     tcit(cc,THUMB2_ITETE)
422 #  define ITTEE(cc)                     tcit(cc,THUMB2_ITTEE)
423 #  define ITEEE(cc)                     tcit(cc,THUMB2_ITEEE)
424 #  define tpp(o,im)                     _tpp(_jit,o,im)
425 static void _tpp(jit_state_t*,int,int);
426 #  define torl(o,rn,im)                 _torl(_jit,o,rn,im)
427 static void _torl(jit_state_t*,int,int,int) maybe_unused;
428 #  define DMB(im)                       dmb(im)
429 #  define T2_DMB(im)                    tdmb(im)
430 #  define dmb(im)                       _dmb(_jit, im)
431 static void _dmb(jit_state_t *_jit, int im);
432 #  define tdmb(im)                      _tdmb(_jit, im)
433 static void _tdmb(jit_state_t *_jit, int im);
434 #  define CC_MOV(cc,rd,rm)              corrr(cc,ARM_MOV,0,rd,rm)
435 #  define MOV(rd,rm)                    CC_MOV(ARM_CC_AL,rd,rm)
436 #  define T1_MOV(rd,rm)                 is(THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7))
437 #  define T2_MOV(rd,rm)                 T2_ORR(rd,_R15_REGNO,rm)
438 #  define CC_MOVI(cc,rd,im)             corri(cc,ARM_MOV|ARM_I,0,rd,im)
439 #  define MOVI(rd,im)                   CC_MOVI(ARM_CC_AL,rd,im)
440 #  define CC_MOVWI(cc,rd,im)            coriw(cc,ARM_MOVWI,rd,im)
441 #  define MOVWI(rd,im)                  CC_MOVWI(ARM_CC_AL,rd,im)
442 #  define T1_MOVI(rd,im)                is(THUMB_MOVI|(_u3(rd)<<8)|_u8(im))
443 #  define T2_MOVI(rd,im)                torri(THUMB2_MOVI,_R15_REGNO,rd,im)
444 #  define T2_MOVWI(rd,im)               toriw(THUMB2_MOVWI,rd,im)
445 #  define CC_MOVTI(cc,rd,im)            coriw(cc,ARM_MOVTI,rd,im)
446 #  define MOVTI(rd,im)                  CC_MOVTI(ARM_CC_AL,rd,im)
447 #  define T2_MOVTI(rd,im)               toriw(THUMB2_MOVTI,rd,im)
448 #  define CC_MVN(cc,rd,rm)              corrr(cc,ARM_MVN,0,rd,rm)
449 #  define MVN(rd,rm)                    CC_MVN(ARM_CC_AL,rd,rm)
450 #  define T1_MVN(rd,rm)                 is(THUMB_MVN|(_u3(rm)<<3)|_u3(rd))
451 #  define T2_MVN(rd,rm)                 torrr(THUMB2_MVN,_R15_REGNO,rd,rm)
452 #  define CC_MVNI(cc,rd,im)             corri(cc,ARM_MVN|ARM_I,0,rd,im)
453 #  define MVNI(rd,im)                   CC_MVNI(ARM_CC_AL,rd,im)
454 #  define T2_MVNI(rd,im)                torri(THUMB2_MVNI,_R15_REGNO,rd,im)
455 #  define CC_NOT(cc,rd,rm)              CC_MVN(cc,rd,rm)
456 #  define NOT(rd,rm)                    CC_NOT(ARM_CC_AL,rd,rm)
457 #  define T1_NOT(rd,rm)                 T1_MVN(rd,rm)
458 #  define T2_NOT(rd,rm)                 T2_MVN(rd,rm)
459 #  define T2_CLZ(rd,rm)                 torrr(THUMB2_CLZ,rm,rd,rm)
460 #  define CC_CLZ(cc,rd,rm)              corrrr(cc,ARM_CLZ,_R15_REGNO,rd,_R15_REGNO,rm)
461 #  define CLZ(rd,rm)                    CC_CLZ(ARM_CC_AL,rd,rm)
462 #  define T2_RBIT(rd,rm)                torrr(THUMB2_RBIT,rm,rd,rm)
463 #  define CC_RBIT(cc,rd,rm)             corrrr(cc,ARM_RBIT,_R15_REGNO,rd,_R15_REGNO,rm)
464 #  define RBIT(rd,rm)                   CC_RBIT(ARM_CC_AL,rd,rm)
465 #  define NOP()                         MOV(_R0_REGNO, _R0_REGNO)
466 #  define T1_NOP()                      is(0xbf00)
467 #  define CC_ADD(cc,rd,rn,rm)           corrr(cc,ARM_ADD,rn,rd,rm)
468 #  define ADD(rd,rn,rm)                 CC_ADD(ARM_CC_AL,rd,rn,rm)
469 #  define T1_ADD(rd,rn,rm)              is(THUMB_ADD|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd))
470 #  define T1_ADDX(rdn,rm)               is(THUMB_ADDX|((_u4(rdn)&8)<<4)|(_u4(rm)<<3)|(rdn&7))
471 #  define T2_ADD(rd,rn,rm)              torrr(THUMB2_ADD,rn,rd,rm)
472 #  define CC_ADDI(cc,rd,rn,im)          corri(cc,ARM_ADD|ARM_I,rn,rd,im)
473 #  define ADDI(rd,rn,im)                CC_ADDI(ARM_CC_AL,rd,rn,im)
474 #  define T1_ADDI3(rd,rn,im)            is(THUMB_ADDI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd))
475 #  define T1_ADDI8(rdn,im)              is(THUMB_ADDI8|(_u3(rdn)<<8)|_u8(im))
476 #  define T2_ADDI(rd,rn,im)             torri(THUMB2_ADDI,rn,rd,im)
477 #  define T2_ADDWI(rd,rn,im)            torri(THUMB2_ADDWI,rn,rd,im)
478 #  define CC_ADDS(cc,rd,rn,rm)          corrr(cc,ARM_ADD|ARM_S,rn,rd,rm)
479 #  define ADDS(rd,rn,rm)                CC_ADDS(ARM_CC_AL,rd,rn,rm)
480 #  define T2_ADDS(rd,rn,rm)             torrr(THUMB2_ADD|ARM_S,rn,rd,rm)
481 #  define ADDSI(rd,rn,im)               corri(ARM_CC_AL,ARM_ADD|ARM_S|ARM_I,rn,rd,im)
482 #  define T2_ADDSI(rd,rn,im)            torri(THUMB2_ADDI|ARM_S,rn,rd,im)
483 #  define CC_ADC(cc,rd,rn,rm)           corrr(cc,ARM_ADC,rn,rd,rm)
484 #  define ADC(rd,rn,rm)                 CC_ADC(ARM_CC_AL,rd,rn,rm)
485 #  define T1_ADC(rdn,rm)                is(THUMB_ADC|(_u3(rm)<<3)|_u3(rdn))
486 #  define T2_ADC(rd,rn,rm)              torrr(THUMB2_ADC,rn,rd,rm)
487 #  define CC_ADCI(cc,rd,rn,im)          corri(cc,ARM_ADC|ARM_I,rn,rd,im)
488 #  define ADCI(rd,rn,im)                CC_ADCI(ARM_CC_AL,rd,rn,im)
489 #  define T2_ADCI(rd,rn,im)             torri(THUMB2_ADCI,rn,rd,im)
490 #  define CC_ADCS(cc,rd,rn,rm)          corrr(cc,ARM_ADC|ARM_S,rn,rd,rm)
491 #  define ADCS(rd,rn,rm)                CC_ADCS(ARM_CC_AL,rd,rn,rm)
492 #  define T2_ADCS(rd,rn,rm)             torrr(THUMB2_ADC|ARM_S,rn,rd,rm)
493 #  define CC_ADCSI(cc,rd,rn,im)         corri(cc,ARM_ADC|ARM_S|ARM_I,rn,rd,im)
494 #  define ADCSI(rd,rn,im)               CC_ADCSI(ARM_CC_AL,rd,rn,im)
495 #  define T2_ADCSI(rd,rn,im)            torri(THUMB2_ADCI|ARM_S,rn,rd,im)
496 #  define CC_SUB(cc,rd,rn,rm)           corrr(cc,ARM_SUB,rn,rd,rm)
497 #  define SUB(rd,rn,rm)                 CC_SUB(ARM_CC_AL,rd,rn,rm)
498 #  define T1_SUB(rd,rn,rm)              is(THUMB_SUB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rd))
499 #  define T2_SUB(rd,rn,rm)              torrr(THUMB2_SUB,rn,rd,rm)
500 #  define CC_SUBI(cc,rd,rn,im)          corri(cc,ARM_SUB|ARM_I,rn,rd,im)
501 #  define SUBI(rd,rn,im)                CC_SUBI(ARM_CC_AL,rd,rn,im)
502 #  define T1_SUBI3(rd,rn,im)            is(THUMB_SUBI3|(_u3(im)<<6)|(_u3(rn)<<3)|_u3(rd))
503 #  define T1_SUBI8(rdn,im)              is(THUMB_SUBI8|(_u3(rdn)<<8)|_u8(im))
504 #  define T2_SUBI(rd,rn,im)             torri(THUMB2_SUBI,rn,rd,im)
505 #  define T2_SUBWI(rd,rn,im)            torri(THUMB2_SUBWI,rn,rd,im)
506 #  define CC_SUBS(cc,rd,rn,rm)          corrr(cc,ARM_SUB|ARM_S,rn,rd,rm)
507 #  define SUBS(rd,rn,rm)                CC_SUBS(ARM_CC_AL,rd,rn,rm)
508 #  define T2_SUBS(rd,rn,rm)             torrr(THUMB2_SUB|ARM_S,rn,rd,rm)
509 #  define CC_SUBSI(cc,rd,rn,im)         corri(cc,ARM_SUB|ARM_S|ARM_I,rn,rd,im)
510 #  define SUBSI(rd,rn,im)               CC_SUBSI(ARM_CC_AL,rd,rn,im)
511 #  define T2_SUBSI(rd,rn,im)            torri(THUMB2_SUBI|ARM_S,rn,rd,im)
512 #  define CC_SBC(cc,rd,rn,rm)           corrr(cc,ARM_SBC,rn,rd,rm)
513 #  define SBC(rd,rn,rm)                 CC_SBC(ARM_CC_AL,rd,rn,rm)
514 #  define T1_SBC(rdn,rm)                is(THUMB_SBC|(_u3(rm)<<3)|_u3(rdn))
515 #  define T2_SBC(rd,rn,rm)              torrr(THUMB2_SBC,rn,rd,rm)
516 #  define CC_SBCI(cc,rd,rn,im)          corri(cc,ARM_SBC|ARM_I,rn,rd,im)
517 #  define SBCI(rd,rn,im)                CC_SBCI(ARM_CC_AL,rd,rn,im)
518 #  define T2_SBCI(rd,rn,im)             torri(THUMB2_SBCI,rn,rd,im)
519 #  define CC_SBCS(cc,rd,rn,rm)          corrr(cc,ARM_SBC|ARM_S,rn,rd,rm)
520 #  define SBCS(rd,rn,rm)                CC_SBCS(ARM_CC_AL,rd,rn,rm)
521 #  define T2_SBCS(rd,rn,rm)             torrr(THUMB2_SBC|ARM_S,rn,rd,rm)
522 #  define CC_SBCSI(cc,rd,rn,im)         corri(cc,ARM_SBC|ARM_S|ARM_I,rn,rd,im)
523 #  define SBCSI(rd,rn,im)               CC_SBCSI(ARM_CC_AL,rd,rn,im)
524 #  define T2_SBCSI(rd,rn,im)            torri(THUMB2_SBCI|ARM_S,rn,rd,im)
525 #  define CC_RSB(cc,rd,rn,rm)           corrr(cc,ARM_RSB,rn,rd,rm)
526 #  define RSB(rd,rn,rm)                 CC_RSB(ARM_CC_AL,rd,rn,rm)
527 #  define T2_RSB(rd,rn,rm)              torrr(THUMB2_RSB,rn,rd,rm)
528 #  define CC_RSBI(cc,rd,rn,im)          corri(cc,ARM_RSB|ARM_I,rn,rd,im)
529 #  define RSBI(rd,rn,im)                CC_RSBI(ARM_CC_AL,rd,rn,im)
530 #  define T1_RSBI(rd,rn)                is(THUMB_RSBI|(_u3(rn)<<3)|_u3(rd))
531 #  define T2_RSBI(rd,rn,im)             torri(THUMB2_RSBI,rn,rd,im)
532 #  define CC_MUL(cc,rl,rn,rm)           corrrr(cc,ARM_MUL,rl,0,rm,rn)
533 #  define MUL(rl,rn,rm)                 CC_MUL(ARM_CC_AL,rl,rn,rm)
534 #  define T1_MUL(rdm,rn)                is(THUMB_MUL|(_u3(rn)<<3)|_u3(rdm))
535 #  define T2_MUL(rd,rn,rm)              torrr(THUMB2_MUL,rn,rd,rm)
536 #  define CC_SMULL(cc,rl,rh,rn,rm)      corrrr(cc,ARM_SMULL,rh,rl,rm,rn)
537 #  define SMULL(rl,rh,rn,rm)            CC_SMULL(ARM_CC_AL,rl,rh,rn,rm)
538 #  define T2_SMULL(rl,rh,rn,rm)         torrrr(THUMB2_SMULL,rn,rl,rh,rm)
539 #  define CC_UMULL(cc,rl,rh,rn,rm)      corrrr(cc,ARM_UMULL,rh,rl,rm,rn)
540 #  define UMULL(rl,rh,rn,rm)            CC_UMULL(ARM_CC_AL,rl,rh,rn,rm)
541 #  define T2_UMULL(rl,rh,rn,rm)         torrrr(THUMB2_UMULL,rn,rl,rh,rm)
542 #  define CC_SDIV(cc,rd,rn,rm)          corrrr(cc,ARM_SDIV,rd,15,rn,rm)
543 #  define SDIV(rd,rn,rm)                CC_SDIV(ARM_CC_AL,rd,rm,rn)
544 #  define CC_UDIV(cc,rd,rn,rm)          corrrr(cc,ARM_UDIV,rd,15,rn,rm)
545 #  define UDIV(rd,rn,rm)                CC_UDIV(ARM_CC_AL,rd,rm,rn)
546 #  define T2_SDIV(rd,rn,rm)             torrr(THUMB2_SDIV,rn,rd,rm)
547 #  define T2_UDIV(rd,rn,rm)             torrr(THUMB2_UDIV,rn,rd,rm)
548 #  define CC_AND(cc,rd,rn,rm)           corrr(cc,ARM_AND,rn,rd,rm)
549 #  define AND(rd,rn,rm)                 CC_AND(ARM_CC_AL,rd,rn,rm)
550 #  define T1_AND(rdn,rm)                is(THUMB_AND|(_u3(rm)<<3)|_u3(rdn))
551 #  define T2_AND(rd,rn,rm)              torrr(THUMB2_AND,rn,rd,rm)
552 #  define CC_ANDI(cc,rd,rn,im)          corri(cc,ARM_AND|ARM_I,rn,rd,im)
553 #  define ANDI(rd,rn,im)                CC_ANDI(ARM_CC_AL,rd,rn,im)
554 #  define T2_ANDI(rd,rn,im)             torri(THUMB2_ANDI,rn,rd,im)
555 #  define CC_ANDS(cc,rd,rn,rm)          corrr(cc,ARM_AND|ARM_S,rn,rd,rm)
556 #  define ANDS(rd,rn,rm)                CC_ANDS(ARM_CC_AL,rd,rn,rm)
557 #  define T2_ANDS(rd,rn,rm)             torrr(THUMB2_AND|ARM_S,rn,rd,rm)
558 #  define CC_ANDSI(cc,rd,rn,im)         corri(cc,ARM_AND|ARM_S|ARM_I,rn,rd,im)
559 #  define ANDSI(rd,rn,im)               CC_ANDSI(ARM_CC_AL,rd,rn,im)
560 #  define T2_ANDSI(rd,rn,im)            torri(ARM_CC_AL,THUMB2_ANDI|ARM_S,rn,rd,im)
561 #  define CC_BIC(cc,rd,rn,rm)           corrr(cc,ARM_BIC,rn,rd,rm)
562 #  define BIC(rd,rn,rm)                 CC_BIC(ARM_CC_AL,rd,rn,rm)
563 #  define T2_BIC(rd,rn,rm)              torrr(THUMB2_BIC,rn,rd,rm)
564 #  define CC_BICI(cc,rd,rn,im)          corri(cc,ARM_BIC|ARM_I,rn,rd,im)
565 #  define BICI(rd,rn,im)                CC_BICI(ARM_CC_AL,rd,rn,im)
566 #  define T2_BICI(rd,rn,im)             torri(THUMB2_BICI,rn,rd,im)
567 #  define CC_BICS(cc,rd,rn,rm)          corrr(cc,ARM_BIC|ARM_S,rn,rd,rm)
568 #  define BICS(rd,rn,rm)                CC_BICS(ARM_CC_AL,rd,rn,rm)
569 #  define T2_BICS(rd,rn,rm)             torrr(THUMB2_BIC|ARM_S,rn,rd,rm)
570 #  define CC_BICSI(cc,rd,rn,im)         corri(cc,ARM_BIC|ARM_S|ARM_I,rn,rd,im)
571 #  define BICSI(rd,rn,im)               CC_BICSI(ARM_CC_AL,rd,rn,im)
572 #  define T2_BICSI(rd,rn,im)            torri(ARM_CC_AL,THUMB2_BICI|ARM_S,rn,rd,im)
573 #  define CC_ORR(cc,rd,rn,rm)           corrr(cc,ARM_ORR,rn,rd,rm)
574 #  define ORR(rd,rn,rm)                 CC_ORR(ARM_CC_AL,rd,rn,rm)
575 #  define T1_ORR(rdn,rm)                is(THUMB_ORR|(_u3(rm)<<3)|_u3(rdn))
576 #  define T2_ORR(rd,rn,rm)              torrr(THUMB2_ORR,rn,rd,rm)
577 #  define CC_ORR_SI(cc,rd,rn,rt,sh,im)  corrrs(cc,ARM_ORR|sh,rn,rd,rm,im)
578 #  define ORR_SI(r0,r1,r2,sh,im)        CC_ORR_SI(ARM_CC_AL,r0,r1,r2,sh,im)
579 #  define CC_ORRI(cc,rd,rn,im)          corri(cc,ARM_ORR|ARM_I,rn,rd,im)
580 #  define ORRI(rd,rn,im)                CC_ORRI(ARM_CC_AL,rd,rn,im)
581 #  define T2_ORRI(rd,rn,im)             torri(THUMB2_ORRI,rn,rd,im)
582 #  define CC_EOR(cc,rd,rn,rm)           corrr(cc,ARM_EOR,rn,rd,rm)
583 #  define EOR(rd,rn,rm)                 CC_EOR(ARM_CC_AL,rd,rn,rm)
584 #  define T1_EOR(rdn,rm)                is(THUMB_EOR|(_u3(rm)<<3)|_u3(rdn))
585 #  define T2_EOR(rd,rn,rm)              torrr(THUMB2_EOR,rn,rd,rm)
586 #  define CC_EOR_SI(cc,rd,rn,rm,sh,im)  corrrs(cc,ARM_EOR|sh,rn,rd,rm,im)
587 #  define EOR_SI(r0,r1,r2,sh,im)        CC_EOR_SI(ARM_CC_AL,r0,r1,r2,sh,im)
588 #  define CC_EORI(cc,rd,rn,im)          corri(cc,ARM_EOR|ARM_I,rn,rd,im)
589 #  define EORI(rd,rn,im)                CC_EORI(ARM_CC_AL,rd,rn,im)
590 #  define T2_EORI(rd,rn,im)             torri(THUMB2_EORI,rn,rd,im)
591 #  define CC_REV(cc,rd,rm)              c6orr(cc,ARM_REV,rd,rm)
592 #  define REV(rd,rm)                    CC_REV(ARM_CC_AL,rd,rm)
593 #  define T1_REV(rd,rm)                 is(THUMB_REV|(_u3(rm)<<3)|_u3(rd))
594 #  define T2_REV(rd,rm)                 torrr(THUMB2_REV,rm,rd,rm)
595 #  define CC_REV16(cc,rd,rm)            c6orr(cc,ARM_REV16,rd,rm)
596 #  define REV16(rd,rm)                  CC_REV16(ARM_CC_AL,rd,rm)
597 #  define T1_REV16(rd,rm)               is(THUMB_REV16|(_u3(rm)<<3)|_u3(rd))
598 #  define T2_REV16(rd,rm)               torrr(THUMB2_REV16,rm,rd,rm)
599 #  define CC_SXTB(cc,rd,rm)             c6orr(cc,ARM_SXTB,rd,rm)
600 #  define SXTB(rd,rm)                   CC_SXTB(ARM_CC_AL,rd,rm)
601 #  define T1_SXTB(rd,rm)                is(THUMB_SXTB|(_u3(rm)<<3)|_u3(rd))
602 #  define T2_SXTB(rd,rm)                torrr(THUMB2_SXTB,_R15_REGNO,rd,rm)
603 #  define CC_UXTB(cc,rd,rm)             c6orr(cc,ARM_UXTB,rd,rm)
604 #  define UXTB(rd,rm)                   CC_UXTB(ARM_CC_AL,rd,rm)
605 #  define T1_UXTB(rd,rm)                is(THUMB_UXTB|(_u3(rm)<<3)|_u3(rd))
606 #  define T2_UXTB(rd,rm)                torrr(THUMB2_UXTB,_R15_REGNO,rd,rm)
607 #  define CC_SXTH(cc,rd,rm)             c6orr(cc,ARM_SXTH,rd,rm)
608 #  define SXTH(rd,rm)                   CC_SXTH(ARM_CC_AL,rd,rm)
609 #  define T1_SXTH(rd,rm)                is(THUMB_SXTH|(_u3(rm)<<3)|_u3(rd))
610 #  define T2_SXTH(rd,rm)                torrr(THUMB2_SXTH,_R15_REGNO,rd,rm)
611 #  define CC_UXTH(cc,rd,rm)             c6orr(cc,ARM_UXTH,rd,rm)
612 #  define UXTH(rd,rm)                   CC_UXTH(ARM_CC_AL,rd,rm)
613 #  define T1_UXTH(rd,rm)                is(THUMB_UXTH|(_u3(rm)<<3)|_u3(rd))
614 #  define T2_UXTH(rd,rm)                torrr(THUMB2_UXTH,_R15_REGNO,rd,rm)
615 #  define CC_SHIFT(cc,o,rd,rm,rn,im)    cshift(cc,o,rd,rm,rn,im)
616 #  define CC_LSL(cc,rd,rn,rm)           CC_SHIFT(cc,ARM_LSL|ARM_R,rd,rm,rn,0)
617 #  define LSL(rd,rn,rm)                 CC_LSL(ARM_CC_AL,rd,rn,rm)
618 #  define T1_LSL(rdn,rm)                is(THUMB_LSL|(_u3(rm)<<3)|_u3(rdn))
619 #  define T2_LSL(rd,rn,rm)              torrr(THUMB2_LSL,rn,rd,rm)
620 #  define CC_LSLI(cc,rd,rn,im)          CC_SHIFT(cc,ARM_LSL,rd,0,rn,im)
621 #  define LSLI(rd,rn,im)                CC_LSLI(ARM_CC_AL,rd,rn,im)
622 #  define T1_LSLI(rd,rm,im)             is(THUMB_LSLI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
623 #  define T2_LSLI(rd,rm,im)             tshift(THUMB2_LSLI,rd,rm,im)
624 #  define CC_LSR(cc,rd,rn,rm)           CC_SHIFT(cc,ARM_LSR|ARM_R,rd,rm,rn,0)
625 #  define LSR(rd,rn,rm)                 CC_LSR(ARM_CC_AL,rd,rn,rm)
626 #  define T1_LSR(rdn,rm)                is(THUMB_LSR|(_u3(rm)<<3)|_u3(rdn))
627 #  define T2_LSR(rd,rn,rm)              torrr(THUMB2_LSR,rn,rd,rm)
628 #  define CC_LSRI(cc,rd,rn,im)          CC_SHIFT(cc,ARM_LSR,rd,0,rn,im)
629 #  define LSRI(rd,rn,im)                CC_LSRI(ARM_CC_AL,rd,rn,im)
630 #  define T1_LSRI(rd,rm,im)             is(THUMB_LSRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
631 #  define T2_LSRI(rd,rm,im)             tshift(THUMB2_LSRI,rd,rm,im)
632 #  define CC_ASR(cc,rd,rn,rm)           CC_SHIFT(cc,ARM_ASR|ARM_R,rd,rm,rn,0)
633 #  define ASR(rd,rn,rm)                 CC_ASR(ARM_CC_AL,rd,rn,rm)
634 #  define T1_ASR(rdn,rm)                is(THUMB_ASR|(_u3(rm)<<3)|_u3(rdn))
635 #  define T2_ASR(rd,rn,rm)              torrr(THUMB2_ASR,rn,rd,rm)
636 #  define CC_ASRI(cc,rd,rn,im)          CC_SHIFT(cc,ARM_ASR,rd,0,rn,im)
637 #  define ASRI(rd,rn,im)                CC_ASRI(ARM_CC_AL,rd,rn,im)
638 #  define T1_ASRI(rd,rm,im)             is(THUMB_ASRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd))
639 #  define T2_ASRI(rd,rm,im)             tshift(THUMB2_ASRI,rd,rm,im)
640 #  define CC_CMP(cc,rn,rm)              corrr(cc,ARM_CMP,rn,0,rm)
641 #  define CMP(rn,rm)                    CC_CMP(ARM_CC_AL,rn,rm)
642 #  define T1_CMP(rn,rm)                 is(THUMB_CMP|(_u3(rm)<<3)|_u3(rn))
643 #  define T1_CMPX(rn,rm)                is(THUMB_CMPX|((_u4(rn)&8)<<4)|(_u4(rm)<<3)|(rn&7))
644 #  define T2_CMP(rn,rm)                 torrr(THUMB2_CMP,rn,_R15_REGNO,rm)
645 #  define CC_CMPI(cc,rn,im)             corri(cc,ARM_CMP|ARM_I,rn,0,im)
646 #  define CMPI(rn,im)                   CC_CMPI(ARM_CC_AL,rn,im)
647 #  define T1_CMPI(rn,im)                is(THUMB_CMPI|(_u3(rn)<<8)|_u8(im))
648 #  define T2_CMPI(rn,im)                torri(THUMB2_CMPI,rn,_R15_REGNO,im)
649 #  define CC_CMN(cc,rn,rm)              corrr(cc,ARM_CMN,rn,0,rm)
650 #  define CMN(rn,rm)                    CC_CMN(ARM_CC_AL,rn,rm)
651 #  define T1_CMN(rn,rm)                 is(THUMB_CMN|(_u3(rm)<<3)|_u3(rm))
652 #  define T2_CMN(rn,rm)                 torrr(THUMB2_CMN,rn,_R15_REGNO,rm)
653 #  define CC_CMNI(cc,rn,im)             corri(cc,ARM_CMN|ARM_I,rn,0,im)
654 #  define CMNI(rn,im)                   CC_CMNI(ARM_CC_AL,rn,im)
655 #  define T2_CMNI(rn,im)                torri(THUMB2_CMNI,rn,_R15_REGNO,im)
656 #  define CC_TST(cc,rn,rm)              corrr(cc,ARM_TST,rn,r0,rm)
657 #  define TST(rn,rm)                    corrr(ARM_CC_AL,ARM_TST,rn,0,rm)
658 #  define T1_TST(rn,rm)                 is(THUMB_TST|(_u3(rm)<<3)|_u3(rn))
659 #  define T2_TST(rn,rm)                 torrr(THUMB2_TST,rn,_R15_REGNO,rm)
660 #  define CC_TSTI(cc,rn,im)             corri(cc,ARM_TST|ARM_I,rn,0,im)
661 #  define TSTI(rn,im)                   CC_TSTI(ARM_CC_AL,rn,im)
662 #  define T2_TSTI(rn,im)                torri(THUMB2_TSTI,rn,_R15_REGNO,im)
663 #  define CC_TEQ(cc,rn,rm)              corrr(cc,ARM_TEQ,rn,0,rm)
664 #  define TEQ(rn,rm)                    CC_TEQ(ARM_CC_AL,rn,rm)
665 #  define CC_TEQI(cc,rm,im)             corri(cc,ARM_TEQ|ARM_I,rn,0,im)
666 #  define TEQI(rn,im)                   CC_TEQI(ARM_CC_AL,rn,im)
667 #  define CC_BX(cc,rm)                  cbx(cc,ARM_BX,rm)
668 #  define BX(rm)                        CC_BX(ARM_CC_AL,rm)
669 #  define T1_BX(rm)                     is(0x4700|(_u4(rm)<<3))
670 #  define CC_BLX(cc,rm)                 cbx(cc,ARM_BLX,rm)
671 #  define BLX(rm)                       CC_BLX(ARM_CC_AL,rm)
672 #  define T1_BLX(rm)                    is(THUMB_BLX|(_u4(rm)<<3))
673 #  define BLXI(im)                      blxi(im)
674 #  define T2_BLXI(im)                   tb(THUMB2_BLXI,im)
675 #  define CC_B(cc,im)                   cb(cc,ARM_B,im)
676 #  define B(im)                         CC_B(ARM_CC_AL,im)
677 #  define T1_CC_B(cc,im)                tc8(cc,im)
678 #  define T1_B(im)                      t11(im)
679 #  define T2_CC_B(cc,im)                tcb(cc,im)
680 #  define T2_B(im)                      tb(THUMB2_B,im)
681 #  define CC_BLI(cc,im)                 cb(cc,ARM_BLI,im)
682 #  define BLI(im)                       CC_BLI(ARM_CC_AL,im)
683 #  define T2_BLI(im)                    tb(THUMB2_BLI,im)
684 #  define CC_LDRSB(cc,rt,rn,rm)         corrr(cc,ARM_LDRSB|ARM_P,rn,rt,rm)
685 #  define LDRSB(rt,rn,rm)               CC_LDRSB(ARM_CC_AL,rt,rn,rm)
686 #  define T1_LDRSB(rt,rn,rm)            is(THUMB_LDRSB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
687 #  define T2_LDRSB(rt,rn,rm)            torxr(THUMB2_LDRSB,rn,rt,rm)
688 #  define CC_LDRSBN(cc,rt,rn,rm)        corrr(cc,ARM_LDRSB,rn,rt,rm)
689 #  define LDRSBN(rt,rn,rm)              CC_LDRSBN(ARM_CC_AL,rt,rn,rm)
690 #  define CC_LDRSBI(cc,rt,rn,im)        corri8(cc,ARM_LDRSBI|ARM_P,rn,rt,im)
691 #  define LDRSBI(rt,rn,im)              CC_LDRSBI(ARM_CC_AL,rt,rn,im)
692 #  define T2_LDRSBI(rt,rn,im)           torri8(THUMB2_LDRSBI|THUMB2_U,rn,rt,im)
693 #  define T2_LDRSBWI(rt,rn,im)          torri12(THUMB2_LDRSBWI,rn,rt,im)
694 #  define CC_LDRSBIN(cc,rt,rn,im)       corri8(cc,ARM_LDRSBI,rn,rt,im)
695 #  define LDRSBIN(rt,rn,im)             CC_LDRSBIN(ARM_CC_AL,rt,rn,im)
696 #  define T2_LDRSBIN(rt,rn,im)          torri8(THUMB2_LDRSBI,rn,rt,im)
697 #  define CC_LDRB(cc,rt,rn,rm)          corrr(cc,ARM_LDRB|ARM_P,rn,rt,rm)
698 #  define LDRB(rt,rn,rm)                CC_LDRB(ARM_CC_AL,rt,rn,rm)
699 #  define T1_LDRB(rt,rn,rm)             is(THUMB_LDRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
700 #  define T2_LDRB(rt,rn,rm)             torxr(THUMB2_LDRB,rn,rt,rm)
701 #  define CC_LDRBN(cc,rt,rn,rm)         corrr(cc,ARM_LDRB,rn,rt,rm)
702 #  define LDRBN(rt,rn,rm)               CC_LDRBN(ARM_CC_AL,rt,rn,rm)
703 #  define CC_LDRBI(cc,rt,rn,im)         corri(cc,ARM_LDRBI|ARM_P,rn,rt,im)
704 #  define LDRBI(rt,rn,im)               CC_LDRBI(ARM_CC_AL,rt,rn,im)
705 #  define T1_LDRBI(rt,rn,im)            is(THUMB_LDRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
706 #  define T2_LDRBI(rt,rn,im)            torri8(THUMB2_LDRBI|THUMB2_U,rn,rt,im)
707 #  define T2_LDRBWI(rt,rn,im)           torri12(THUMB2_LDRBWI,rn,rt,im)
708 #  define CC_LDRBIN(cc,rt,rn,im)        corri(cc,ARM_LDRBI,rn,rt,im)
709 #  define LDRBIN(rt,rn,im)              CC_LDRBIN(ARM_CC_AL,rt,rn,im)
710 #  define T2_LDRBIN(rt,rn,im)           torri8(THUMB2_LDRBI,rn,rt,im)
711 #  define CC_LDRSH(cc,rt,rn,rm)         corrr(cc,ARM_LDRSH|ARM_P,rn,rt,rm)
712 #  define LDRSH(rt,rn,rm)               CC_LDRSH(ARM_CC_AL,rt,rn,rm)
713 #  define T1_LDRSH(rt,rn,rm)            is(THUMB_LDRSH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
714 #  define T2_LDRSH(rt,rn,rm)            torxr(THUMB2_LDRSH,rn,rt,rm)
715 #  define CC_LDRSHN(cc,rt,rn,rm)        corrr(cc,ARM_LDRSH,rn,rt,rm)
716 #  define LDRSHN(rt,rn,rm)              CC_LDRSHN(ARM_CC_AL,rt,rn,rm)
717 #  define CC_LDRSHI(cc,rt,rn,im)        corri8(cc,ARM_LDRSHI|ARM_P,rn,rt,im)
718 #  define LDRSHI(rt,rn,im)              CC_LDRSHI(ARM_CC_AL,rt,rn,im)
719 #  define T2_LDRSHI(rt,rn,im)           torri8(THUMB2_LDRSHI|THUMB2_U,rn,rt,im)
720 #  define T2_LDRSHWI(rt,rn,im)          torri12(THUMB2_LDRSHWI,rn,rt,im)
721 #  define CC_LDRSHIN(cc,rt,rn,im)       corri8(cc,ARM_LDRSHI,rn,rt,im)
722 #  define LDRSHIN(rt,rn,im)             CC_LDRSHIN(ARM_CC_AL,rt,rn,im)
723 #  define T2_LDRSHIN(rt,rn,im)          torri8(THUMB2_LDRSHI,rn,rt,im)
724 #  define CC_LDRH(cc,rt,rn,rm)          corrr(cc,ARM_LDRH|ARM_P,rn,rt,rm)
725 #  define LDRH(rt,rn,rm)                CC_LDRH(ARM_CC_AL,rt,rn,rm)
726 #  define T1_LDRH(rt,rn,rm)             is(THUMB_LDRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
727 #  define T2_LDRH(rt,rn,rm)             torxr(THUMB2_LDRH,rn,rt,rm)
728 #  define CC_LDRHN(cc,rt,rn,rm)         corrr(cc,ARM_LDRH,rn,rt,rm)
729 #  define LDRHN(rt,rn,rm)               CC_LDRHN(ARM_CC_AL,rt,rn,rm)
730 #  define CC_LDRHI(cc,rt,rn,im)         corri8(cc,ARM_LDRHI|ARM_P,rn,rt,im)
731 #  define LDRHI(rt,rn,im)               CC_LDRHI(ARM_CC_AL,rt,rn,im)
732 #  define T1_LDRHI(rt,rn,im)            is(THUMB_LDRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
733 #  define T2_LDRHI(rt,rn,im)            torri8(THUMB2_LDRHI|THUMB2_U,rn,rt,im)
734 #  define T2_LDRHWI(rt,rn,im)           torri12(THUMB2_LDRHWI,rn,rt,im)
735 #  define CC_LDRHIN(cc,rt,rn,im)        corri8(cc,ARM_LDRHI,rn,rt,im)
736 #  define LDRHIN(rt,rn,im)              CC_LDRHIN(ARM_CC_AL,rt,rn,im)
737 #  define T2_LDRHIN(rt,rn,im)           torri8(THUMB2_LDRHI,rn,rt,im)
738 #  define CC_LDR(cc,rt,rn,rm)           corrr(cc,ARM_LDR|ARM_P,rn,rt,rm)
739 #  define LDR(rt,rn,rm)                 CC_LDR(ARM_CC_AL,rt,rn,rm)
740 #  define T1_LDR(rt,rn,rm)              is(THUMB_LDR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
741 #  define T2_LDR(rt,rn,rm)              torxr(THUMB2_LDR,rn,rt,rm)
742 #  define CC_LDRN(cc,rt,rn,rm)          corrr(cc,ARM_LDR,rn,rt,rm)
743 #  define LDRN(rt,rn,rm)                CC_LDRN(ARM_CC_AL,rt,rn,rm)
744 #  define CC_LDRI(cc,rt,rn,im)          corri(cc,ARM_LDRI|ARM_P,rn,rt,im)
745 #  define LDRI(rt,rn,im)                CC_LDRI(ARM_CC_AL,rt,rn,im)
746 #  define T1_LDRI(rt,rn,im)             is(THUMB_LDRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
747 #  define T1_LDRISP(rt,im)              is(THUMB_LDRISP|(_u3(rt)<<8)|_u8(im))
748 #  define T2_LDRI(rt,rn,im)             torri8(THUMB2_LDRI|THUMB2_U,rn,rt,im)
749 #  define T2_LDRWI(rt,rn,im)            torri12(THUMB2_LDRWI,rn,rt,im)
750 #  define CC_LDRIN(cc,rt,rn,im)         corri(cc,ARM_LDRI,rn,rt,im)
751 #  define LDRIN(rt,rn,im)               CC_LDRIN(ARM_CC_AL,rt,rn,im)
752 #  define T2_LDRIN(rt,rn,im)            torri8(THUMB2_LDRI,rn,rt,im)
753 #  define CC_LDRD(cc,rt,rn,rm)          corrr(cc,ARM_LDRD|ARM_P,rn,rt,rm)
754 #  define LDRD(rt,rn,rm)                CC_LDRD(ARM_CC_AL,rt,rn,rm)
755 #  define T2_LDRDI(rt,rt2,rn,im)        torrri8(THUMB2_LDRDI|ARM_P,rn,rt,rt2,im)
756 #  define CC_LDRDN(cc,rt,rn,rm)         corrr(cc,ARM_LDRD,rn,rt,rm)
757 #  define LDRDN(rd,rn,rm)               CC_LDRDN(ARM_CC_AL,rt,rn,rm)
758 #  define CC_LDRDI(cc,rt,rn,im)         corri8(cc,ARM_LDRDI|ARM_P,rn,rt,im)
759 #  define LDRDI(rt,rn,im)               CC_LDRDI(ARM_CC_AL,rt,rn,im)
760 #  define CC_LDRDIN(cc,rt,rn,im)        corri8(cc,ARM_LDRDI,rn,rt,im)
761 #  define LDRDIN(rt,rn,im)              CC_LDRDIN(ARM_CC_AL,rt,rn,im)
762 #  define T2_LDRDIN(rt,rt2,rn,im)       torrri8(THUMB2_LDRDI,rn,rt,rt2,im)
763 #  define CC_LDREX(cc,rt,rn)            corrrr(cc,ARM_LDREX,rn,rt,0xf,0xf)
764 #  define LDREX(rt,rn)                  CC_LDREX(ARM_CC_AL,rt,rn)
765 #  define T2_LDREX(rt,rn,im)            torrri8(THUMB2_LDREX,rn,rt,0xf,im)
766 #  define CC_STRB(cc,rt,rn,rm)          corrr(cc,ARM_STRB|ARM_P,rn,rt,rm)
767 #  define STRB(rt,rn,rm)                CC_STRB(ARM_CC_AL,rt,rn,rm)
768 #  define T1_STRB(rt,rn,rm)             is(THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
769 #  define T2_STRB(rt,rn,rm)             torxr(THUMB2_STRB,rn,rt,rm)
770 #  define CC_STRBN(cc,rt,rn,rm)         corrr(cc,ARM_STRB,rn,rt,rm)
771 #  define STRBN(rt,rn,rm)               CC_STRBN(ARM_CC_AL,rt,rn,rm)
772 #  define CC_STRBI(cc,rt,rn,im)         corri(cc,ARM_STRBI|ARM_P,rn,rt,im)
773 #  define STRBI(rt,rn,im)               CC_STRBI(ARM_CC_AL,rt,rn,im)
774 #  define T1_STRBI(rt,rn,im)            is(THUMB_STRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
775 #  define T2_STRBI(rt,rn,im)            torri8(THUMB2_STRBI|THUMB2_U,rn,rt,im)
776 #  define T2_STRBWI(rt,rn,im)           torri12(THUMB2_STRBWI,rn,rt,im)
777 #  define CC_STRBIN(cc,rt,rn,im)        corri(cc,ARM_STRBI,rn,rt,im)
778 #  define STRBIN(rt,rn,im)              CC_STRBIN(ARM_CC_AL,rt,rn,im)
779 #  define T2_STRBIN(rt,rn,im)           torri8(THUMB2_STRBI,rn,rt,im)
780 #  define CC_STRH(cc,rt,rn,rm)          corrr(cc,ARM_STRH|ARM_P,rn,rt,rm)
781 #  define STRH(rt,rn,rm)                CC_STRH(ARM_CC_AL,rt,rn,rm)
782 #  define T1_STRH(rt,rn,rm)             is(THUMB_STRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
783 #  define T2_STRH(rt,rn,rm)             torxr(THUMB2_STRH,rn,rt,rm)
784 #  define CC_STRHN(cc,rt,rn,rm)         corrr(cc,ARM_STRH,rn,rt,rm)
785 #  define STRHN(rt,rn,rm)               CC_STRHN(ARM_CC_AL,rt,rn,rm)
786 #  define CC_STRHI(cc,rt,rn,im)         corri8(cc,ARM_STRHI|ARM_P,rn,rt,im)
787 #  define STRHI(rt,rn,im)               CC_STRHI(ARM_CC_AL,rt,rn,im)
788 #  define T1_STRHI(rt,rn,im)            is(THUMB_STRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
789 #  define T2_STRHI(rt,rn,im)            torri8(THUMB2_STRHI|THUMB2_U,rn,rt,im)
790 #  define T2_STRHWI(rt,rn,im)           torri12(THUMB2_STRHWI,rn,rt,im)
791 #  define CC_STRHIN(cc,rt,rn,im)        corri8(cc,ARM_STRHI,rn,rt,im)
792 #  define STRHIN(rt,rn,im)              CC_STRHIN(ARM_CC_AL,rt,rn,im)
793 #  define T2_STRHIN(rt,rn,im)           torri8(THUMB2_STRHI,rn,rt,im)
794 #  define CC_STR(cc,rt,rn,rm)           corrr(cc,ARM_STR|ARM_P,rn,rt,rm)
795 #  define STR(rt,rn,rm)                 CC_STR(ARM_CC_AL,rt,rn,rm)
796 #  define T1_STR(rt,rn,rm)              is(THUMB_STR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
797 #  define T2_STR(rt,rn,rm)              torxr(THUMB2_STR,rn,rt,rm)
798 #  define CC_STRN(cc,rt,rn,rm)          corrr(cc,ARM_STR,rn,rt,rm)
799 #  define STRN(rt,rn,rm)                CC_STRN(ARM_CC_AL,rt,rn,rm)
800 #  define CC_STRI(cc,rt,rn,im)          corri(cc,ARM_STRI|ARM_P,rn,rt,im)
801 #  define STRI(rt,rn,im)                CC_STRI(ARM_CC_AL,rt,rn,im)
802 #  define T1_STRI(rt,rn,im)             is(THUMB_STRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
803 #  define T1_STRISP(rt,im)              is(THUMB_STRISP|(_u3(rt)<<8)|(_u8(im)))
804 #  define T2_STRI(rt,rn,im)             torri8(THUMB2_STRI|THUMB2_U,rn,rt,im)
805 #  define T2_STRWI(rt,rn,im)            torri12(THUMB2_STRWI,rn,rt,im)
806 #  define CC_STRIN(cc,rt,rn,im)         corri(cc,ARM_STRI,rn,rt,im)
807 #  define STRIN(rt,rn,im)               CC_STRIN(ARM_CC_AL,rt,rn,im)
808 #  define T2_STRIN(rt,rn,im)            torri8(THUMB2_STRI,rn,rt,im)
809 #  define CC_STRD(cc,rt,rn,rm)          corrr(cc,ARM_STRD|ARM_P,rn,rt,rm)
810 #  define STRD(rt,rn,rm)                CC_STRD(ARM_CC_AL,rt,rn,rm)
811 #  define CC_STRDN(cc,rt,rn,rm)         corrr(cc,ARM_STRD,rn,rt,rm)
812 #  define STRDN(rt,rn,rm)               CC_STRDN(ARM_CC_AL,rt,rn,rm)
813 #  define CC_STRDI(cc,rt,rn,im)         corri8(cc,ARM_STRDI|ARM_P,rn,rt,im)
814 #  define STRDI(rt,rn,im)               CC_STRDI(ARM_CC_AL,rt,rn,im)
815 #  define T2_STRDI(rt,rt2,rn,im)        torrri8(THUMB2_STRDI|ARM_P,rn,rt,rt2,im)
816 #  define CC_STRDIN(cc,rt,rn,im)        corri8(cc,ARM_STRDI,rn,rt,im)
817 #  define STRDIN(rt,rn,im)              CC_STRDIN(ARM_CC_AL,rt,rn,im)
818 #  define T2_STRDIN(rt,rt2,rn,im)       torrri8(THUMB2_STRDI,rn,rt,rt2,im)
819 #  define CC_STREX(cc,rd,rt,rn)         corrrr(cc,ARM_STREX,rn,rd,0xf,rt)
820 #  define STREX(rd,rt,rn)               CC_STREX(ARM_CC_AL,rd,rt,rn)
821 #  define T2_STREX(rd,rt,rn,im)         torrri8(THUMB2_STREX,rn,rt,rd,im)
822 #  define CC_LDMIA(cc,rn,im)            corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im)
823 #  define LDMIA(rn,im)                  CC_LDMIA(ARM_CC_AL,rn,im)
824 #  define CC_LDM(cc,rn,im)              CC_LDMIA(cc,rn,im)
825 #  define LDM(rn,im)                    LDMIA(rn,im)
826 #  define T1_LDMIA(rn,im)               is(THUMB_LDMIA|(_u3(rn)<<8)|im)
827 #  define T2_LDMIA(rn,im)               torl(THUMB2_LDMIA,rn,im)
828 #  define CC_LDMIA_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_U,rn,im)
829 #  define LDMIA_U(rn,im)                CC_LDMIA_U(ARM_CC_AL,rn,im)
830 #  define LDM_U(r0,i0)                  LDMIA_U(r0,i0)
831 #  define CC_LDMIB(cc,rn,im)            corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B,rn,im)
832 #  define LDMIB(rn,im)                  CC_LDMIB(ARM_CC_AL,rn,im)
833 #  define CC_LDMIB_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B|ARM_M_U,rn,im)
834 #  define LDMIB_U(rn,im)                CC_LDMIB_U(ARM_CC_AL,rn,im)
835 #  define CC_LDMDA(cc,rn,im)            corl(cc,ARM_M|ARM_M_L,rn,im)
836 #  define LDMDA(rn,im)                  CC_LDMDA(ARM_CC_AL,rn,im)
837 #  define CC_LDMDA_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_L|ARM_M_U,rn,im)
838 #  define LDMDA_U(rn,im)                CC_LDMDA_U(ARM_CC_AL,rn,im)
839 #  define CC_LDMDB(cc,rn,im)            corl(cc,ARM_M|ARM_M_L|ARM_M_B,rn,im)
840 #  define LDMDB(rn,im)                  CC_LDMDB(ARM_CC_AL,rn,im)
841 #  define T2_LDMDB(rn,im)               torl(THUMB2_LDMDB,rn,im)
842 #  define CC_LDMDB_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_L|ARM_M_B|ARM_M_U,rn,im)
843 #  define LDMDB_U(rn,im)                CC_LDMDB_U(ARM_CC_AL,rn,im)
844 #  define CC_STMIA(cc,rn,im)            corl(cc,ARM_M|ARM_M_I,rn,im)
845 #  define STMIA(rn,im)                  CC_STMIA(ARM_CC_AL,rn,im)
846 #  define CC_STM(cc,rn,im)              CC_STMIA(cc,rn,im)
847 #  define STM(rn,im)                    STMIA(rn,im)
848 #  define CC_STMIA_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_I|ARM_M_U,rn,im)
849 #  define STMIA_U(rn,im)                CC_STMIA_U(ARM_CC_AL,rn,im)
850 #  define CC_STM_U(cc,rn,im)            CC_STMIA_U(cc,rn,im)
851 #  define STM_U(rn,im)                  STMIA_U(rn,im)
852 #  define CC_STMIB(cc,rn,im)            corl(cc,ARM_M|ARM_M_I|ARM_M_B,rn,im)
853 #  define STMIB(rn,im)                  CC_STMIB(ARM_CC_AL,rn,im)
854 #  define CC_STMIB_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_I|ARM_M_B|ARM_M_U,rn,im)
855 #  define STMIB_U(rn,im)                CC_STMIB_U(ARM_CC_AL,rn,im)
856 #  define CC_STMDA(cc,rn,im)            corl(cc,ARM_M,rn,im)
857 #  define STMDA(rn,im)                  CC_STMDA(ARM_CC_AL,rn,im)
858 #  define CC_STMDA_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_U,rn,im)
859 #  define STMDA_U(rn,im)                CC_STMDA_U(ARM_CC_AL,rn,im)
860 #  define CC_STMDB(cc,rn,im)            corl(cc,ARM_M|ARM_M_B,rn,im)
861 #  define STMDB(rn,im)                  CC_STMDB(ARM_CC_AL,rn,im)
862 #  define CC_STMDB_U(cc,rn,im)          corl(cc,ARM_M|ARM_M_B|ARM_M_U,rn,im)
863 #  define STMDB_U(rn,im)                CC_STMDB_U(ARM_CC_AL,rn,im)
864 #  define CC_PUSH(cc,im)                CC_STMDB_U(cc,_SP_REGNO,im)
865 #  define PUSH(im)                      STMDB_U(_SP_REGNO,im)
866 #  define T1_PUSH(im)                   is(THUMB_PUSH|((im&0x4000)>>6)|(im&0xff))
867 #  define T2_PUSH(im)                   tpp(THUMB2_PUSH,im)
868 #  define CC_POP(cc,im)                 LDMIA_U(cc,_SP_REGNO,im)
869 #  define POP(im)                       LDMIA_U(_SP_REGNO,im)
870 #  define T1_POP(im)                    is(THUMB_POP|((im&0x8000)>>7)|(im&0xff))
871 #  define T2_POP(im)                    tpp(THUMB2_POP,im)
872 #  define jit_get_reg_args()                                            \
873     do {                                                                \
874         CHECK_REG_ARGS();                                               \
875         jit_check_frame();                                              \
876         (void)jit_get_reg(_R0|jit_class_named|jit_class_gpr);           \
877         (void)jit_get_reg(_R1|jit_class_named|jit_class_gpr);           \
878         (void)jit_get_reg(_R2|jit_class_named|jit_class_gpr);           \
879         (void)jit_get_reg(_R3|jit_class_named|jit_class_gpr);           \
880     } while (0)
881 #  define jit_unget_reg_args()                                          \
882     do {                                                                \
883         jit_unget_reg(_R3);                                             \
884         jit_unget_reg(_R2);                                             \
885         jit_unget_reg(_R1);                                             \
886         jit_unget_reg(_R0);                                             \
887     } while (0)
888 #  define nop(i0)                       _nop(_jit,i0)
889 static void _nop(jit_state_t*,jit_int32_t);
890 #  define movr(r0,r1)                   _movr(_jit,r0,r1)
891 static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
892 #  define movi(r0,i0)                   _movi(_jit,r0,i0)
893 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
894 #  define movi_p(r0,i0)                 _movi_p(_jit,r0,i0)
895 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
896 #  define movnr(r0,r1,r2)               _movnr(_jit,r0,r1,r2)
897 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
898 #  define movzr(r0,r1,r2)               _movzr(_jit,r0,r1,r2)
899 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
900 #  define casx(r0, r1, r2, r3, i0)      _casx(_jit, r0, r1, r2, r3, i0)
901 static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
902                   jit_int32_t,jit_int32_t,jit_word_t);
903 #define casr(r0, r1, r2, r3)            casx(r0, r1, r2, r3, 0)
904 #define casi(r0, i0, r1, r2)            casx(r0, _NOREG, r1, r2, i0)
905 #  define comr(r0,r1)                   _comr(_jit,r0,r1)
906 static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
907 #  define negr(r0,r1)                   _negr(_jit,r0,r1)
908 static void _negr(jit_state_t*,jit_int32_t,jit_int32_t);
909 #  define clor(r0, r1)                  _clor(_jit, r0, r1)
910 static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
911 #  define clzr(r0, r1)                  _clzr(_jit, r0, r1)
912 static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
913 #  define ctor(r0, r1)                  _ctor(_jit, r0, r1)
914 static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
915 #  define ctzr(r0, r1)                  _ctzr(_jit, r0, r1)
916 static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
917 #  define addr(r0,r1,r2)                _addr(_jit,r0,r1,r2)
918 static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
919 #  define addi(r0,r1,i0)                _addi(_jit,r0,r1,i0)
920 static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
921 #  define addcr(r0,r1,r2)               _addcr(_jit,r0,r1,r2)
922 static void _addcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
923 #  define addci(r0,r1,i0)               _addci(_jit,r0,r1,i0)
924 static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
925 #  define addxr(r0,r1,r2)               _addxr(_jit,r0,r1,r2)
926 static void _addxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
927 #  define addxi(r0,r1,i0)               _addxi(_jit,r0,r1,i0)
928 static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
929 #  define subr(r0,r1,r2)                _subr(_jit,r0,r1,r2)
930 static void _subr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
931 #  define subi(r0,r1,i0)                _subi(_jit,r0,r1,i0)
932 static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
933 #  define subcr(r0,r1,r2)               _subcr(_jit,r0,r1,r2)
934 static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
935 #  define subci(r0,r1,i0)               _subci(_jit,r0,r1,i0)
936 static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
937 #  define subxr(r0,r1,r2)               _subxr(_jit,r0,r1,r2)
938 static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
939 #  define subxi(r0,r1,i0)               _subxi(_jit,r0,r1,i0)
940 static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
941 #  define rsbi(r0, r1, i0)              _rsbi(_jit, r0, r1, i0)
942 static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
943 #  define mulr(r0,r1,r2)                _mulr(_jit,r0,r1,r2)
944 static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
945 #  define muli(r0,r1,i0)                _muli(_jit,r0,r1,i0)
946 static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
947 #  define qmulr(r0,r1,r2,r3)            iqmulr(r0,r1,r2,r3,1)
948 #  define qmulr_u(r0,r1,r2,r3)          iqmulr(r0,r1,r2,r3,0)
949 #  define iqmulr(r0,r1,r2,r3,cc)        _iqmulr(_jit,r0,r1,r2,r3,cc)
950 static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t,
951                     jit_int32_t,jit_int32_t,jit_bool_t);
952 #  define qmuli(r0,r1,r2,i0)            iqmuli(r0,r1,r2,i0,1)
953 #  define qmuli_u(r0,r1,r2,i0)          iqmuli(r0,r1,r2,i0,0)
954 #  define iqmuli(r0,r1,r2,i0,cc)        _iqmuli(_jit,r0,r1,r2,i0,cc)
955 static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t,
956                     jit_int32_t,jit_word_t,jit_bool_t);
957 #  define divrem(d,s,r0,r1,r2)          _divrem(_jit,d,s,r0,r1,r2)
958 static void _divrem(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
959 #  define divr(r0,r1,r2)                _divr(_jit,r0,r1,r2)
960 static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
961 #  define divi(r0,r1,i0)                _divi(_jit,r0,r1,i0)
962 static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
963 #  define divr_u(r0,r1,r2)              _divr_u(_jit,r0,r1,r2)
964 static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
965 #  define divi_u(r0,r1,i0)              _divi_u(_jit,r0,r1,i0)
966 static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
967 #  define qdivr(r0,r1,r2,r3)            iqdivr(r0,r1,r2,r3,1)
968 #  define qdivr_u(r0,r1,r2,r3)          iqdivr(r0,r1,r2,r3,0)
969 #  define iqdivr(r0,r1,r2,r3,cc)        _iqdivr(_jit,r0,r1,r2,r3,cc)
970 static void _iqdivr(jit_state_t*,jit_int32_t,jit_int32_t,
971                     jit_int32_t,jit_int32_t,jit_bool_t);
972 #  define qdivi(r0,r1,r2,i0)            iqdivi(r0,r1,r2,i0,1)
973 #  define qdivi_u(r0,r1,r2,i0)          iqdivi(r0,r1,r2,i0,0)
974 #  define iqdivi(r0,r1,r2,i0,cc)        _iqdivi(_jit,r0,r1,r2,i0,cc)
975 static void _iqdivi(jit_state_t*,jit_int32_t,jit_int32_t,
976                     jit_int32_t,jit_word_t,jit_bool_t);
977 #  define remr(r0,r1,r2)                _remr(_jit,r0,r1,r2)
978 static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
979 #  define remi(r0,r1,i0)                _remi(_jit,r0,r1,i0)
980 static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
981 #  define remr_u(r0,r1,r2)              _remr_u(_jit,r0,r1,r2)
982 static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
983 #  define remi_u(r0,r1,i0)              _remi_u(_jit,r0,r1,i0)
984 static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
985 #  define andr(r0,r1,r2)                _andr(_jit,r0,r1,r2)
986 static void _andr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
987 #  define andi(r0,r1,i0)                _andi(_jit,r0,r1,i0)
988 static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
989 #  define orr(r0,r1,r2)                 _orr(_jit,r0,r1,r2)
990 static void _orr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
991 #  define ori(r0,r1,i0)                 _ori(_jit,r0,r1,i0)
992 static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
993 #  define xorr(r0,r1,r2)                _xorr(_jit,r0,r1,r2)
994 static void _xorr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
995 #  define xori(r0,r1,i0)                _xori(_jit,r0,r1,i0)
996 static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
997 #  define lshr(r0,r1,r2)                _lshr(_jit,r0,r1,r2)
998 static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
999 #  define lshi(r0,r1,i0)                _lshi(_jit,r0,r1,i0)
1000 static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1001 #  define rshr(r0,r1,r2)                _rshr(_jit,r0,r1,r2)
1002 static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1003 #  define rshi(r0,r1,i0)                _rshi(_jit,r0,r1,i0)
1004 static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1005 #  define rshr_u(r0,r1,r2)              _rshr_u(_jit,r0,r1,r2)
1006 static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1007 #  define rshi_u(r0,r1,i0)              _rshi_u(_jit,r0,r1,i0)
1008 static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1009 #  define ccr(ct,cf,r0,r1,r2)           _ccr(_jit,ct,cf,r0,r1,r2)
1010 static void _ccr(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
1011 #  define cci(ct,cf,r0,r1,i0)           _cci(_jit,ct,cf,r0,r1,i0)
1012 static void _cci(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_word_t);
1013 #  define ltr(r0, r1, r2)               ccr(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
1014 #  define lti(r0, r1, i0)               cci(ARM_CC_LT,ARM_CC_GE,r0,r1,i0)
1015 #  define ltr_u(r0, r1, r2)             ccr(ARM_CC_LO,ARM_CC_HS,r0,r1,r2)
1016 #  define lti_u(r0, r1, i0)             cci(ARM_CC_LO,ARM_CC_HS,r0,r1,i0)
1017 #  define ler(r0, r1, r2)               ccr(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
1018 #  define lei(r0, r1, i0)               cci(ARM_CC_LE,ARM_CC_GT,r0,r1,i0)
1019 #  define ler_u(r0, r1, r2)             ccr(ARM_CC_LS,ARM_CC_HI,r0,r1,r2)
1020 #  define lei_u(r0, r1, i0)             cci(ARM_CC_LS,ARM_CC_HI,r0,r1,i0)
1021 #  define eqr(r0, r1, r2)               ccr(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
1022 #  define eqi(r0, r1, i0)               cci(ARM_CC_EQ,ARM_CC_NE,r0,r1,i0)
1023 #  define ger(r0, r1, r2)               ccr(ARM_CC_GE,ARM_CC_LT,r0,r1,r2)
1024 #  define gei(r0, r1, i0)               cci(ARM_CC_GE,ARM_CC_LT,r0,r1,i0)
1025 #  define ger_u(r0, r1, r2)             ccr(ARM_CC_HS,ARM_CC_LO,r0,r1,r2)
1026 #  define gei_u(r0, r1, i0)             cci(ARM_CC_HS,ARM_CC_LO,r0,r1,i0)
1027 #  define gtr(r0, r1, r2)               ccr(ARM_CC_GT,ARM_CC_LE,r0,r1,r2)
1028 #  define gti(r0, r1, i0)               cci(ARM_CC_GT,ARM_CC_LE,r0,r1,i0)
1029 #  define gtr_u(r0, r1, r2)             ccr(ARM_CC_HI,ARM_CC_LS,r0,r1,r2)
1030 #  define gti_u(r0, r1, i0)             cci(ARM_CC_HI,ARM_CC_LS,r0,r1,i0)
1031 #  define ner(r0,r1,r2)                 _ner(_jit,r0,r1,r2)
1032 static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1033 #  define nei(r0,r1,i0)                 _nei(_jit,r0,r1,i0)
1034 static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1035 #  define jmpr(r0)                      _jmpr(_jit,r0)
1036 static void _jmpr(jit_state_t*,jit_int32_t);
1037 #  define jmpi(i0)                      _jmpi(_jit,i0)
1038 static void _jmpi(jit_state_t*,jit_word_t);
1039 #  define jmpi_p(i0, i1)                _jmpi_p(_jit,i0, i1)
1040 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t,jit_bool_t);
1041 #  define bccr(cc,i0,r0,r1)             _bccr(_jit,cc,i0,r0,r1)
1042 static jit_word_t _bccr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
1043 #  define bcci(cc,i0,r0,i1)             _bcci(_jit,cc,i0,r0,i1)
1044 static jit_word_t _bcci(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
1045 #  define bltr(i0, r0, r1)              bccr(ARM_CC_LT,i0,r0,r1)
1046 #  define blti(i0, r0, i1)              bcci(ARM_CC_LT,i0,r0,i1)
1047 #  define bltr_u(i0, r0, r1)            bccr(ARM_CC_LO,i0,r0,r1)
1048 #  define blti_u(i0, r0, i1)            bcci(ARM_CC_LO,i0,r0,i1)
1049 #  define bler(i0, r0, r1)              bccr(ARM_CC_LE,i0,r0,r1)
1050 #  define blei(i0, r0, i1)              bcci(ARM_CC_LE,i0,r0,i1)
1051 #  define bler_u(i0, r0, r1)            bccr(ARM_CC_LS,i0,r0,r1)
1052 #  define blei_u(i0, r0, i1)            bcci(ARM_CC_LS,i0,r0,i1)
1053 #  define beqr(i0, r0, r1)              bccr(ARM_CC_EQ,i0,r0,r1)
1054 #  define beqi(i0, r0, i1)              bcci(ARM_CC_EQ,i0,r0,i1)
1055 #  define bger(i0, r0, r1)              bccr(ARM_CC_GE,i0,r0,r1)
1056 #  define bgei(i0, r0, i1)              bcci(ARM_CC_GE,i0,r0,i1)
1057 #  define bger_u(i0, r0, r1)            bccr(ARM_CC_HS,i0,r0,r1)
1058 #  define bgei_u(i0, r0, i1)            bcci(ARM_CC_HS,i0,r0,i1)
1059 #  define bgtr(i0, r0, r1)              bccr(ARM_CC_GT,i0,r0,r1)
1060 #  define bgti(i0, r0, i1)              bcci(ARM_CC_GT,i0,r0,i1)
1061 #  define bgtr_u(i0, r0, r1)            bccr(ARM_CC_HI,i0,r0,r1)
1062 #  define bgti_u(i0, r0, i1)            bcci(ARM_CC_HI,i0,r0,i1)
1063 #  define bner(i0, r0, r1)              bccr(ARM_CC_NE,i0,r0,r1)
1064 #  define bnei(i0, r0, i1)              bcci(ARM_CC_NE,i0,r0,i1)
1065 #  define baddr(cc,i0,r0,r1)            _baddr(_jit,cc,i0,r0,r1)
1066 static jit_word_t _baddr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
1067 #  define baddi(cc,i0,r0,r1)            _baddi(_jit,cc,i0,r0,r1)
1068 static jit_word_t _baddi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
1069 #  define boaddr(i0,r0,r1)              baddr(ARM_CC_VS,i0,r0,r1)
1070 #  define boaddi(i0,r0,i1)              baddi(ARM_CC_VS,i0,r0,i1)
1071 #  define boaddr_u(i0,r0,r1)            baddr(ARM_CC_HS,i0,r0,r1)
1072 #  define boaddi_u(i0,r0,i1)            baddi(ARM_CC_HS,i0,r0,i1)
1073 #  define bxaddr(i0,r0,r1)              baddr(ARM_CC_VC,i0,r0,r1)
1074 #  define bxaddi(i0,r0,i1)              baddi(ARM_CC_VC,i0,r0,i1)
1075 #  define bxaddr_u(i0,r0,r1)            baddr(ARM_CC_LO,i0,r0,r1)
1076 #  define bxaddi_u(i0,r0,i1)            baddi(ARM_CC_LO,i0,r0,i1)
1077 #  define bsubr(cc,i0,r0,r1)            _bsubr(_jit,cc,i0,r0,r1)
1078 static jit_word_t _bsubr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
1079 #  define bsubi(cc,i0,r0,r1)            _bsubi(_jit,cc,i0,r0,r1)
1080 static jit_word_t _bsubi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
1081 #  define bosubr(i0,r0,r1)              bsubr(ARM_CC_VS,i0,r0,r1)
1082 #  define bosubi(i0,r0,i1)              bsubi(ARM_CC_VS,i0,r0,i1)
1083 #  define bosubr_u(i0,r0,r1)            bsubr(ARM_CC_LO,i0,r0,r1)
1084 #  define bosubi_u(i0,r0,i1)            bsubi(ARM_CC_LO,i0,r0,i1)
1085 #  define bxsubr(i0,r0,r1)              bsubr(ARM_CC_VC,i0,r0,r1)
1086 #  define bxsubi(i0,r0,i1)              bsubi(ARM_CC_VC,i0,r0,i1)
1087 #  define bxsubr_u(i0,r0,r1)            bsubr(ARM_CC_HS,i0,r0,r1)
1088 #  define bxsubi_u(i0,r0,i1)            bsubi(ARM_CC_HS,i0,r0,i1)
1089 #  define bmxr(cc,i0,r0,r1)             _bmxr(_jit,cc,i0,r0,r1)
1090 static jit_word_t _bmxr(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
1091 #  define bmxi(cc,i0,r0,r1)             _bmxi(_jit,cc,i0,r0,r1)
1092 static jit_word_t _bmxi(jit_state_t*,int,jit_word_t,jit_int32_t,jit_word_t);
1093 #  define bmsr(i0,r0,r1)                bmxr(ARM_CC_NE,i0,r0,r1)
1094 #  define bmsi(i0,r0,i1)                bmxi(ARM_CC_NE,i0,r0,i1)
1095 #  define bmcr(i0,r0,r1)                bmxr(ARM_CC_EQ,i0,r0,r1)
1096 #  define bmci(i0,r0,i1)                bmxi(ARM_CC_EQ,i0,r0,i1)
1097 #  define ldr_c(r0,r1)                  _ldr_c(_jit,r0,r1)
1098 static void _ldr_c(jit_state_t*,jit_int32_t,jit_int32_t);
1099 #  define ldi_c(r0,i0)                  _ldi_c(_jit,r0,i0)
1100 static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
1101 #  define ldxr_c(r0,r1,r2)              _ldxr_c(_jit,r0,r1,r2)
1102 static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1103 #  define ldxi_c(r0,r1,i0)              _ldxi_c(_jit,r0,r1,i0)
1104 static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1105 #  define ldr_uc(r0,r1)                 _ldr_uc(_jit,r0,r1)
1106 static void _ldr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
1107 #  define ldi_uc(r0,i0)                 _ldi_uc(_jit,r0,i0)
1108 static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
1109 #  define ldxr_uc(r0,r1,r2)             _ldxr_uc(_jit,r0,r1,r2)
1110 static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1111 #  define ldxi_uc(r0,r1,i0)             _ldxi_uc(_jit,r0,r1,i0)
1112 static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1113 #  define ldr_s(r0,r1)                  _ldr_s(_jit,r0,r1)
1114 static void _ldr_s(jit_state_t*,jit_int32_t,jit_int32_t);
1115 #  define ldi_s(r0,i0)                  _ldi_s(_jit,r0,i0)
1116 static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
1117 #  define ldxr_s(r0,r1,r2)              _ldxr_s(_jit,r0,r1,r2)
1118 static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1119 #  define ldxi_s(r0,r1,i0)              _ldxi_s(_jit,r0,r1,i0)
1120 static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1121 #  define ldr_us(r0,r1)                 _ldr_us(_jit,r0,r1)
1122 static void _ldr_us(jit_state_t*,jit_int32_t,jit_int32_t);
1123 #  define ldi_us(r0,i0)                 _ldi_us(_jit,r0,i0)
1124 static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
1125 #  define ldxr_us(r0,r1,r2)             _ldxr_us(_jit,r0,r1,r2)
1126 static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1127 #  define ldxi_us(r0,r1,i0)             _ldxi_us(_jit,r0,r1,i0)
1128 static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1129 #  define ldr_i(r0,r1)                  _ldr_i(_jit,r0,r1)
1130 static void _ldr_i(jit_state_t*,jit_int32_t,jit_int32_t);
1131 #  define ldi_i(r0,i0)                  _ldi_i(_jit,r0,i0)
1132 static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
1133 #  define ldxr_i(r0,r1,r2)              _ldxr_i(_jit,r0,r1,r2)
1134 static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1135 #  define ldxi_i(r0,r1,i0)              _ldxi_i(_jit,r0,r1,i0)
1136 static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
1137 #  define str_c(r0,r1)                  _str_c(_jit,r0,r1)
1138 static void _str_c(jit_state_t*,jit_int32_t,jit_int32_t);
1139 #  define sti_c(i0,r0)                  _sti_c(_jit,i0,r0)
1140 static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
1141 #  define stxr_c(r0,r1,r2)              _stxr_c(_jit,r0,r1,r2)
1142 static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1143 #  define stxi_c(r0,r1,i0)              _stxi_c(_jit,r0,r1,i0)
1144 static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
1145 #  define str_s(r0,r1)                  _str_s(_jit,r0,r1)
1146 static void _str_s(jit_state_t*,jit_int32_t,jit_int32_t);
1147 #  define sti_s(i0,r0)                  _sti_s(_jit,i0,r0)
1148 static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
1149 #  define stxr_s(r0,r1,r2)              _stxr_s(_jit,r0,r1,r2)
1150 static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
1151 #  define stxi_s(r0,r1,i0)              _stxi_s(_jit,r0,r1,i0)
1152 static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
1153 #  define str_i(r0,r1)                  _str_i(_jit,r0,r1)
1154 static void _str_i(jit_state_t*,jit_int32_t,jit_int32_t);
1155 #  define sti_i(i0,r0)                  _sti_i(_jit,i0,r0)
1156 static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
1157 #  define stxr_i(r0,r1,r2)              _stxr_i(_jit,r0,r1,r2)
1158 static void _stxr_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
1159 #  define stxi_i(r0,r1,i0)              _stxi_i(_jit,r0,r1,i0)
1160 static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
1161 #  define bswapr_us(r0,r1)              _bswapr_us(_jit,r0,r1)
1162 static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
1163 #  define bswapr_ui(r0,r1)              _bswapr_ui(_jit,r0,r1)
1164 static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
1165 #  define extr_c(r0,r1)                 _extr_c(_jit,r0,r1)
1166 static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
1167 #  define extr_uc(r0,r1)                _extr_uc(_jit,r0,r1)
1168 static void _extr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
1169 #  define extr_s(r0,r1)                 _extr_s(_jit,r0,r1)
1170 static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t);
1171 #  define extr_us(r0,r1)                _extr_us(_jit,r0,r1)
1172 static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t);
1173 #  define prolog(i0)                    _prolog(_jit,i0)
1174 static void _prolog(jit_state_t*,jit_node_t*);
1175 #  define epilog(i0)                    _epilog(_jit,i0)
1176 static void _epilog(jit_state_t*,jit_node_t*);
1177 #  define callr(r0)                     _callr(_jit,r0)
1178 static void _callr(jit_state_t*,jit_int32_t);
1179 #  define calli(i0,i1)                  _calli(_jit,i0,i1)
1180 static void _calli(jit_state_t*,jit_word_t,jit_bool_t);
1181 #  define calli_p(i0,i1)                _calli_p(_jit,i0,i1)
1182 static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_bool_t);
1183 #  define vastart(r0)                   _vastart(_jit, r0)
1184 static void _vastart(jit_state_t*, jit_int32_t);
1185 #  define vaarg(r0, r1)                 _vaarg(_jit, r0, r1)
1186 static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
1187 #  define patch_at(kind,jump,label)     _patch_at(_jit,kind,jump,label)
1188 static void _patch_at(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t);
1189 #endif
1190
1191 #if CODE
1192 /* from binutils */
1193 #  define rotate_left(v, n)     (v << n | v >> (32 - n))
1194 static int
1195 encode_arm_immediate(unsigned int v)
1196 {
1197     unsigned int        a, i;
1198
1199     for (i = 0; i < 32; i += 2)
1200         if ((a = rotate_left(v, i)) <= 0xff)
1201             return (a | (i << 7));
1202
1203     return (-1);
1204 }
1205
1206 static int
1207 encode_thumb_immediate(unsigned int v)
1208 {
1209     int                 i;
1210     unsigned int        m;
1211     unsigned int        n;
1212     /* 00000000 00000000 00000000 abcdefgh */
1213     if ((v & 0xff) == v)
1214         return (v);
1215     /* 00000000 abcdefgh 00000000 abcdefgh */
1216     if ((v & 0xff00ff) == v && ((v & 0xff0000) >> 16) == (v & 0xff))
1217         return ((v & 0xff) | (1 << 12));
1218     /* abcdefgh 00000000 abcdefgh 00000000 */
1219     if (((v & 0xffff0000) >> 16) == (v & 0xffff) && (v & 0xff) == 0)
1220         return ((v & 0x000000ff) | (2 << 12));
1221     /* abcdefgh abcdefgh abcdefgh abcdefgh */
1222     if ( (v &    0xff)        == ((v &     0xff00) >>  8) &&
1223         ((v &   0xff00) >> 8) == ((v &   0xff0000) >> 16) &&
1224         ((v & 0xff0000) << 8) ==  (v & 0xff000000))
1225         return ((v & 0xff) | (3 << 12));
1226     /* 1bcdefgh << 24 ... 1bcdefgh << 1 */
1227     for (i = 8, m = 0xff000000, n = 0x80000000;
1228          i < 23; i++, m >>= 1,  n >>= 1) {
1229         if ((v & m) == v && (v & n)) {
1230             v >>= 32 - i;
1231             if (!(i & 1))
1232                 v &= 0x7f;
1233             i >>= 1;
1234             return (((i & 7) << 12) | ((i & 8) << 23) | v);
1235         }
1236     }
1237     return (-1);
1238 }
1239
1240 static int
1241 encode_thumb_word_immediate(unsigned int v)
1242 {
1243     if ((v & 0xfffff000) == 0)
1244         return (((v & 0x800) << 15) | ((v & 0x700) << 4) | (v & 0xff));
1245     return (-1);
1246 }
1247
1248 static int
1249 encode_thumb_jump(int v)
1250 {
1251     int         s, i1, i2, j1, j2;
1252     if (v >= (int)-0x800000 && v <= 0x7fffff) {
1253         s  = !!(v & 0x800000);
1254         i1 = !!(v & 0x400000);
1255         i2 = !!(v & 0x200000);
1256         j1 = s ? i1 : !i1;
1257         j2 = s ? i2 : !i2;
1258         return ((s<<26)|((v&0x1ff800)<<5)|(j1<<13)|(j2<<11)|(v&0x7ff));
1259     }
1260     return (-1);
1261 }
1262
1263 static int
1264 encode_thumb_cc_jump(int v)
1265 {
1266     int         s, j1, j2;
1267     if (v >= (int)-0x80000 && v <= 0x7ffff) {
1268         s  = !!(v & 0x80000);
1269         j1 = !!(v & 0x20000);
1270         j2 = !!(v & 0x40000);
1271         return ((s<<26)|((v&0x1f800)<<5)|(j1<<13)|(j2<<11)|(v&0x7ff));
1272     }
1273     return (-1);
1274 }
1275
1276 static int
1277 encode_thumb_shift(int v, int type)
1278 {
1279     switch (type) {
1280         case ARM_ASR:
1281         case ARM_LSL:
1282         case ARM_LSR:           type >>= 1;     break;
1283         default:                assert(!"handled shift");
1284     }
1285     assert(v >= 0 && v <= 31);
1286     return (((v & 0x1c) << 10) | ((v & 3) << 6) | type);
1287 }
1288
1289 static void
1290 _tcit(jit_state_t *_jit, unsigned int tc, int it)
1291 {
1292     int         c;
1293     int         m;
1294     c = (tc >> 28) & 1;
1295     assert(!(tc & 0xfffffff) && tc != ARM_CC_NV);
1296     switch (it) {
1297         case THUMB2_IT:         m =   1<<3;                     break;
1298         case THUMB2_ITT:        m =  (c<<3)| (1<<2);            break;
1299         case THUMB2_ITE:        m = (!c<<3)| (1<<2);            break;
1300         case THUMB2_ITTT:       m =  (c<<3)| (c<<2)| (1<<1);    break;
1301         case THUMB2_ITET:       m = (!c<<3)| (c<<2)| (1<<1);    break;
1302         case THUMB2_ITTE:       m =  (c<<3)|(!c<<2)| (1<<1);    break;
1303         case THUMB2_ITEE:       m = (!c<<3)|(!c<<2)| (1<<1);    break;
1304         case THUMB2_ITTTT:      m =  (c<<3)| (c<<2)| (c<<1)|1;  break;
1305         case THUMB2_ITETT:      m = (!c<<3)| (c<<2)| (c<<1)|1;  break;
1306         case THUMB2_ITTET:      m =  (c<<3)|(!c<<2)| (c<<1)|1;  break;
1307         case THUMB2_ITEET:      m = (!c<<3)|(!c<<2)| (c<<1)|1;  break;
1308         case THUMB2_ITTTE:      m =  (c<<3)| (c<<2)|(!c<<1)|1;  break;
1309         case THUMB2_ITETE:      m = (!c<<3)| (c<<2)|(!c<<1)|1;  break;
1310         case THUMB2_ITTEE:      m =  (c<<3)|(!c<<2)|(!c<<1)|1;  break;
1311         case THUMB2_ITEEE:      m = (!c<<3)|(!c<<2)|(!c<<1)|1;  break;
1312         default:                abort();
1313     }
1314     assert(m && (tc != ARM_CC_AL || !(m & (m - 1))));
1315     is(0xbf00 | (tc >> 24) | m);
1316 }
1317
1318 static void
1319 _corrr(jit_state_t *_jit, int cc, int o, int rn, int rd, int rm)
1320 {
1321     assert(!(cc & 0x0fffffff));
1322     assert(!(o  & 0xf00fff0f));
1323     ii(cc|o|(_u4(rn)<<16)|(_u4(rd)<<12)|_u4(rm));
1324 }
1325
1326 static void
1327 _corri(jit_state_t *_jit, int cc, int o, int rn, int rd, int im)
1328 {
1329     assert(!(cc & 0x0fffffff));
1330     assert(!(o  & 0xf00fffff));
1331     assert(!(im & 0xfffff000));
1332     ii(cc|o|(_u4(rn)<<16)|(_u4(rd)<<12)|_u12(im));
1333 }
1334
1335 static void
1336 _corri8(jit_state_t *_jit, int cc, int o, int rn, int rt, int im)
1337 {
1338     assert(!(cc & 0x0fffffff));
1339     assert(!(o  & 0xf00fff0f));
1340     assert(!(im & 0xffffff00));
1341     ii(cc|o|(_u4(rn)<<16)|(_u4(rt)<<12)|((im&0xf0)<<4)|(im&0x0f));
1342 }
1343
1344 static void
1345 _coriw(jit_state_t *_jit, int cc, int o, int rd, int im)
1346 {
1347     assert(!(cc & 0x0fffffff));
1348     assert(!(o  & 0xf00fffff));
1349     assert(!(im & 0xffff0000));
1350     ii(cc|o|((im&0xf000)<<4)|(_u4(rd)<<12)|(im&0xfff));
1351 }
1352
1353 static void
1354 _torrr(jit_state_t *_jit, int o, int rn, int rd, int rm)
1355 {
1356     jit_thumb_t thumb;
1357     assert(!(o & 0xf0f0f));
1358     thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|_u4(rm);
1359     iss(thumb.s[0], thumb.s[1]);
1360 }
1361
1362 static void
1363 _torrrs(jit_state_t *_jit, int o, int rn, int rd, int rm, int im)
1364 {
1365     jit_thumb_t thumb;
1366     assert(!(o  & 0x000f0f0f));
1367     assert(!(im & 0xffff8f0f));
1368     thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|im|_u4(rm);
1369     iss(thumb.s[0], thumb.s[1]);
1370 }
1371
1372 static void
1373 _torxr(jit_state_t *_jit, int o, int rn, int rt, int rm)
1374 {
1375     jit_thumb_t thumb;
1376     assert(!(o & 0xf0f0f));
1377     thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|_u4(rm);
1378     iss(thumb.s[0], thumb.s[1]);
1379 }
1380
1381 static void
1382 _torrrr(jit_state_t *_jit, int o, int rn, int rl, int rh, int rm)
1383 {
1384     jit_thumb_t thumb;
1385     assert(!(o & 0x000fff0f));
1386     thumb.i = o|(_u4(rn)<<16)|(_u4(rl)<<12)|(_u4(rh)<<8)|_u4(rm);
1387     iss(thumb.s[0], thumb.s[1]);
1388 }
1389
1390 static void
1391 _torrri8(jit_state_t *_jit, int o, int rn, int rt, int rt2, int im)
1392 {
1393     jit_thumb_t thumb;
1394     assert(!(o  & 0x000fffff));
1395     assert(!(im & 0xffffff00));
1396     thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|(_u4(rt2)<<8)|im;
1397     iss(thumb.s[0], thumb.s[1]);
1398 }
1399
1400 static void
1401 _torri(jit_state_t *_jit, int o, int rn, int rd, int im)
1402 {
1403     jit_thumb_t thumb;
1404     assert(!(o  & 0x0c0f7fff));
1405     assert(!(im & 0xfbff8f00));
1406     thumb.i = o|(_u4(rn)<<16)|(_u4(rd)<<8)|im;
1407     iss(thumb.s[0], thumb.s[1]);
1408 }
1409
1410 static void
1411 _torri8(jit_state_t *_jit, int o, int rn, int rt, int im)
1412 {
1413     jit_thumb_t thumb;
1414     assert(!(o  & 0x000ff0ff));
1415     assert(!(im & 0xffffff00));
1416     thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|im;
1417     iss(thumb.s[0], thumb.s[1]);
1418 }
1419
1420 static void
1421 _torri12(jit_state_t *_jit, int o, int rn, int rt, int im)
1422 {
1423     jit_thumb_t thumb;
1424     assert(!(o  & 0x000fffff));
1425     assert(!(im & 0xfffff000));
1426     thumb.i = o|(_u4(rn)<<16)|(_u4(rt)<<12)|im;
1427     iss(thumb.s[0], thumb.s[1]);
1428 }
1429
1430 static void
1431 _tshift(jit_state_t *_jit, int o, int rd, int rm, int im)
1432 {
1433     jit_thumb_t thumb;
1434     assert(!(o & 0x7fcf));
1435     assert(im >= 0 && im < 32);
1436     thumb.i = o|((im&0x1c)<<10)|(_u4(rd)<<8)|((im&3)<<6)|_u4(rm);
1437     iss(thumb.s[0], thumb.s[1]);
1438 }
1439
1440 static void
1441 _toriw(jit_state_t *_jit, int o, int rd, int im)
1442 {
1443     jit_thumb_t thumb;
1444     assert(!(im & 0xffff0000));
1445     thumb.i = o|((im&0xf000)<<4)|((im&0x800)<<15)|((im&0x700)<<4)|(_u4(rd)<<8)|(im&0xff);
1446     iss(thumb.s[0], thumb.s[1]);
1447 }
1448
1449 static void
1450 _tc8(jit_state_t *_jit, int cc, int im)
1451 {
1452     assert(!(cc & 0x0fffffff));
1453     assert(cc != ARM_CC_AL && cc != ARM_CC_NV);
1454     assert(im >= -128 && im <= 127);
1455     is(THUMB_CC_B|(cc>>20)|(im&0xff));
1456 }
1457
1458 static void
1459 _t11(jit_state_t *_jit, int im)
1460 {
1461     assert(!(im & 0xfffff800));
1462     is(THUMB_B|im);
1463 }
1464
1465 static void
1466 _tcb(jit_state_t *_jit, int cc, int im)
1467 {
1468     jit_thumb_t thumb;
1469     assert(!(cc & 0xfffffff));
1470     assert(cc != ARM_CC_AL && cc != ARM_CC_NV);
1471     cc = ((jit_uint32_t)cc) >> 6;
1472     assert(!(im & (THUMB2_CC_B|cc)));
1473     thumb.i = THUMB2_CC_B|cc|im;
1474     iss(thumb.s[0], thumb.s[1]);
1475 }
1476
1477 static void
1478 _blxi(jit_state_t *_jit, int im)
1479 {
1480     assert(!(im & 0xfe000000));
1481     ii(ARM_BLXI|im);
1482 }
1483
1484 static void
1485 _tb(jit_state_t *_jit, int o, int im)
1486 {
1487     jit_thumb_t thumb;
1488     assert(!(o & 0x07ff2fff));
1489     assert(!(o & im));
1490     thumb.i = o|im;
1491     iss(thumb.s[0], thumb.s[1]);
1492 }
1493
1494 static void
1495 _corrrr(jit_state_t *_jit, int cc, int o, int rh, int rl, int rm, int rn)
1496 {
1497     assert(!(cc & 0x0fffffff));
1498     assert(!(o & 0xf00fff0f));
1499     ii(cc|o|(_u4(rh)<<16)|(_u4(rl)<<12)|(_u4(rm)<<8)|_u4(rn));
1500 }
1501
1502 static void
1503 _corrrs(jit_state_t *_jit, int cc, int o, int rn, int rd, int rm, int im)
1504 {
1505     assert(!(cc & 0x0fffffff));
1506     assert(!(o  & 0xf000ff8f));
1507     ii(cc|o|(_u4(rd)<<12)|(_u4(rn)<<16)|(im<<7)|_u4(rm));
1508 }
1509
1510 static void
1511 _cshift(jit_state_t *_jit, int cc, int o, int rd, int rm, int rn, int im)
1512 {
1513     assert(!(cc & 0x0fffffff));
1514     assert(!(o  & 0xffe0ff8f));
1515     assert(((_u4(rm)<<8)&(im<<7)) == 0);
1516     ii(cc|ARM_SHIFT|o|(_u4(rd)<<12)|(_u4(rm)<<8)|(im<<7)|_u4(rn));
1517 }
1518
1519 static void
1520 _cb(jit_state_t *_jit, int cc, int o, int im)
1521 {
1522     assert(!(cc & 0x0fffffff));
1523     assert(!(o  & 0xf0ffffff));
1524     ii(cc|o|_u24(im));
1525 }
1526
1527 static void
1528 _cbx(jit_state_t *_jit, int cc, int o, int rm)
1529 {
1530     assert(!(cc & 0x0fffffff));
1531     assert(!(o  & 0xf000000f));
1532     ii(cc|o|_u4(rm));
1533 }
1534
1535 static void
1536 _corl(jit_state_t *_jit, int cc, int o, int r0, int i0)
1537 {
1538     assert(!(cc & 0x0fffffff));
1539     assert(!(o  & 0xf00fffff));
1540     ii(cc|o|(_u4(r0)<<16)|_u16(i0));
1541 }
1542
1543 static void
1544 _c6orr(jit_state_t *_jit, int cc, int o, int rd, int rm)
1545 {
1546     assert(!(cc & 0x0fffffff));
1547     assert(!(o  & 0xf000f00f));
1548     ii(cc|o|(_u4(rd)<<12)|_u4(rm));
1549 }
1550
1551 static void
1552 _tpp(jit_state_t *_jit, int o, int im)
1553 {
1554     jit_thumb_t thumb;
1555     assert(!(o & 0x0000ffff));
1556     if (o == THUMB2_PUSH)
1557         assert(!(im & 0x8000));
1558     assert(__builtin_popcount(im & 0x7fff) > 1);
1559     thumb.i = o|im;
1560     iss(thumb.s[0], thumb.s[1]);
1561 }
1562
1563 static void
1564 _torl(jit_state_t *_jit, int o, int rn, int im)
1565 {
1566     jit_thumb_t thumb;
1567     assert(!(o & 0xf1fff));
1568     assert(rn != _R15 || !im || ((o & 0xc000) == 0xc000));
1569     assert(!(o & THUMB2_LDM_W) || !(im & (1 << rn)));
1570     thumb.i = o | (_u4(rn)<<16)|_u13(im);
1571     iss(thumb.s[0], thumb.s[1]);
1572 }
1573
1574 static void
1575 _dmb(jit_state_t *_jit, int im)
1576 {
1577     assert(!(im & 0xfffffff0));
1578     ii(ARM_DMB|im);
1579 }
1580
1581 static void
1582 _tdmb(jit_state_t *_jit, int im)
1583 {
1584     jit_thumb_t thumb;
1585     assert(!(im & 0xfffffff0));
1586     thumb.i = THUMB2_DMB | im;
1587     iss(thumb.s[0], thumb.s[1]);
1588 }
1589
1590 static void
1591 _nop(jit_state_t *_jit, jit_int32_t i0)
1592 {
1593     if (jit_thumb_p()) {
1594         for (; i0 > 0; i0 -= 2)
1595             T1_NOP();
1596     }
1597     else {
1598         for (; i0 > 0; i0 -= 4)
1599             NOP();
1600     }
1601     assert(i0 == 0);
1602 }
1603
1604 static void
1605 _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1606 {
1607     if (r0 != r1) {
1608         if (jit_thumb_p())
1609             T1_MOV(r0, r1);
1610         else
1611             MOV(r0, r1);
1612     }
1613 }
1614
1615 static void
1616 _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1617 {
1618     int                 i;
1619     if (jit_thumb_p()) {
1620         if (!jit_no_set_flags() && r0 < 8 && !(i0 & 0xffffff80))
1621             T1_MOVI(r0, i0);
1622         else if ((i = encode_thumb_immediate(i0)) != -1)
1623             T2_MOVI(r0, i);
1624         else if ((i = encode_thumb_immediate(~i0)) != -1)
1625             T2_MVNI(r0, i);
1626         else {
1627             T2_MOVWI(r0, (jit_uint16_t)i0);
1628             if (i0 & 0xffff0000)
1629                 T2_MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
1630         }
1631     }
1632     else {
1633         if (jit_armv6_p() && !(i0 & 0xffff0000))
1634             MOVWI(r0, i0);
1635         else if ((i = encode_arm_immediate(i0)) != -1)
1636             MOVI(r0, i);
1637         else if ((i = encode_arm_immediate(~i0)) != -1)
1638             MVNI(r0, i);
1639         else if (jit_armv6_p()) {
1640             MOVWI(r0, (jit_uint16_t)(i0));
1641             if ((i0 & 0xffff0000))
1642                 MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
1643         }
1644         else
1645             load_const(0, r0, i0);
1646     }
1647 }
1648
1649 static jit_word_t
1650 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1651 {
1652     jit_word_t          w;
1653     w = _jit->pc.w;
1654     if (jit_thumb_p()) {
1655         T2_MOVWI(r0, (jit_uint16_t)(i0));
1656         T2_MOVTI(r0, (jit_uint16_t)((unsigned)i0 >> 16));
1657     }
1658     else
1659         load_const(1, r0, 0);
1660     return (w);
1661 }
1662
1663 static void
1664 _movznr(jit_state_t *_jit, int ct, jit_int32_t r0,
1665         jit_int32_t r1, jit_int32_t r2)
1666 {
1667     if (jit_thumb_p()) {
1668         if (r2 < 7)
1669             T1_CMPI(r2, 0);
1670         else
1671             T2_CMPI(r2, 0);
1672         IT(ct);
1673         T1_MOV(r0, r1);
1674     } else {
1675         CMPI(r2, 0);
1676         CC_MOV(ct, r0, r1);
1677     }
1678 }
1679
1680 static void
1681 _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1682 {
1683     _movznr(_jit, ARM_CC_NE, r0, r1, r2);
1684 }
1685
1686 static void
1687 _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1688 {
1689     _movznr(_jit, ARM_CC_EQ, r0, r1, r2);
1690 }
1691
1692 static void
1693 _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1694       jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
1695 {
1696     jit_int32_t         r1_reg, iscasi;
1697     jit_word_t          retry, done, jump0, jump1;
1698     if (!jit_armv7_p())
1699         fallback_casx(r0, r1, r2, r3, i0);
1700     else {
1701         if ((iscasi = (r1 == _NOREG))) {
1702             r1_reg = jit_get_reg(jit_class_gpr);
1703             r1 = rn(r1_reg);
1704             movi(r1, i0);
1705         }
1706         if (jit_thumb_p()) {
1707             T2_DMB(DMB_ISH);
1708             /* retry: */
1709             retry = _jit->pc.w;
1710             T2_LDREX(r0, r1, 0);
1711             eqr(r0, r0, r2);
1712             jump0 = beqi(_jit->pc.w, r0, 0);    /* beqi done r0 0 */
1713             T2_STREX(r0, r3, r1, 0);
1714             jump1 = bnei(_jit->pc.w, r0, 0);    /* bnei retry r0 0 */
1715             /* r0 = 0 if memory updated, 1 otherwise */
1716             xori(r0, r0, 1);
1717             /* done: */
1718             done = _jit->pc.w;
1719             T2_DMB(DMB_ISH);
1720         }
1721         else {
1722             DMB(DMB_ISH);
1723             /* retry: */
1724             retry = _jit->pc.w;
1725             LDREX(r0, r1);
1726             eqr(r0, r0, r2);
1727             jump0 = beqi(_jit->pc.w, r0, 0);    /* beqi done r0 0 */
1728             STREX(r0, r3, r1);
1729             jump1 = bnei(_jit->pc.w, r0, 0);    /* bnei retry r0 0 */
1730             /* r0 = 0 if memory updated, 1 otherwise */
1731             xori(r0, r0, 1);
1732             /* done: */
1733             done = _jit->pc.w;
1734             DMB(DMB_ISH);
1735         }
1736         patch_at(arm_patch_jump, jump0, done);
1737         patch_at(arm_patch_jump, jump1, retry);
1738         if (iscasi)
1739             jit_unget_reg(r1_reg);
1740     }
1741 }
1742
1743 static void
1744 _comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1745 {
1746     if (jit_thumb_p()) {
1747         if (!jit_no_set_flags() && (r0|r1) < 8)
1748             T1_NOT(r0, r1);
1749         else
1750             T2_NOT(r0, r1);
1751     }
1752     else
1753         NOT(r0, r1);
1754 }
1755
1756 static void
1757 _negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1758 {
1759     if (jit_thumb_p()) {
1760         if (!jit_no_set_flags() && (r0|r1) < 8)
1761             T1_RSBI(r0, r1);
1762         else
1763             T2_RSBI(r0, r1, 0);
1764     }
1765     else
1766         RSBI(r0, r1, 0);
1767 }
1768
1769 static void
1770 _clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1771 {
1772     if (!jit_thumb_p() && jit_armv5e_p())
1773         CLZ(r0, r1);
1774     else if (jit_thumb_p() && jit_armv7_p()) {  /* armv6t2 actually */
1775         T2_CLZ(r0, r1);
1776     }
1777     else
1778         fallback_clz(r0, r0);
1779 }
1780
1781 static void
1782 _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1783 {
1784     comr(r0, r1);
1785     clzr(r0, r0);
1786 }
1787
1788 static void
1789 _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1790 {
1791     if (jit_armv7_p()) {        /* armv6t2 actually */
1792         if (jit_thumb_p())
1793             T2_RBIT(r0, r1);
1794         else
1795             RBIT(r0, r1);
1796         clor(r0, r0);
1797     }
1798     else
1799         fallback_cto(r0, r1);
1800 }
1801
1802 static void
1803 _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1804 {
1805     if (jit_armv7_p()) {        /* armv6t2 actually */
1806         if (jit_thumb_p())
1807             T2_RBIT(r0, r1);
1808         else
1809             RBIT(r0, r1);
1810         clzr(r0, r0);
1811     }
1812     else
1813         fallback_ctz(r0, r1);
1814 }
1815
1816 static void
1817 _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1818 {
1819     if (jit_thumb_p()) {
1820         if (!jit_no_set_flags() && (r0|r1|r2) < 8)
1821             T1_ADD(r0, r1, r2);
1822         else if (r0 == r1 || r0 == r2)
1823             T1_ADDX(r0, r0 == r1 ? r2 : r1);
1824         else
1825             T2_ADD(r0, r1, r2);
1826     }
1827     else
1828         ADD(r0, r1, r2);
1829 }
1830
1831 static void
1832 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1833 {
1834     int                 i;
1835     jit_int32_t         reg;
1836     if (jit_thumb_p()) {
1837         if (!jit_no_set_flags() && (r0|r1) < 8 && !(i0 & ~7))
1838             T1_ADDI3(r0, r1, i0);
1839         else if (!jit_no_set_flags() && (r0|r1) < 8 && !(-i0 & ~7))
1840             T1_SUBI3(r0, r1, -i0);
1841         else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(i0 & ~0xff))
1842             T1_ADDI8(r0, i0);
1843         else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
1844             T1_SUBI8(r0, -i0);
1845         else if ((i = encode_thumb_immediate(i0)) != -1)
1846             T2_ADDI(r0, r1, i);
1847         else if ((i = encode_thumb_immediate(-i0)) != -1)
1848             T2_SUBI(r0, r1, i);
1849         else if ((i = encode_thumb_word_immediate(i0)) != -1)
1850             T2_ADDWI(r0, r1, i);
1851         else if ((i = encode_thumb_word_immediate(-i0)) != -1)
1852             T2_SUBWI(r0, r1, i);
1853         else {
1854             reg = jit_get_reg(jit_class_gpr);
1855             movi(rn(reg), i0);
1856             T2_ADD(r0, r1, rn(reg));
1857             jit_unget_reg(reg);
1858         }
1859     }
1860     else {
1861         if ((i = encode_arm_immediate(i0)) != -1)
1862             ADDI(r0, r1, i);
1863         else if ((i = encode_arm_immediate(-i0)) != -1)
1864             SUBI(r0, r1, i);
1865         else if (r0 != r1) {
1866             movi(r0, i0);
1867             ADD(r0, r1, r0);
1868         }
1869         else {
1870             reg = jit_get_reg(jit_class_gpr);
1871             movi(rn(reg), i0);
1872             ADD(r0, r1, rn(reg));
1873             jit_unget_reg(reg);
1874         }
1875     }
1876 }
1877
1878 static void
1879 _addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1880 {
1881     if (jit_thumb_p()) {
1882         /* thumb auto set carry if not inside IT block */
1883         if ((r0|r1|r2) < 8)
1884             T1_ADD(r0, r1, r2);
1885         else
1886             T2_ADDS(r0, r1, r2);
1887     }
1888     else
1889         ADDS(r0, r1, r2);
1890 }
1891
1892 static void
1893 _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1894 {
1895     int                 i;
1896     jit_int32_t         reg;
1897     if (jit_thumb_p()) {
1898         if ((r0|r1) < 8 && !(i0 & ~7))
1899             T1_ADDI3(r0, r1, i0);
1900         else if ((r0|r1) < 8 && !(-i0 & ~7))
1901             T1_SUBI3(r0, r1, -i0);
1902         else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
1903             T1_ADDI8(r0, i0);
1904         else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
1905             T1_SUBI8(r0, -i0);
1906         else if ((i = encode_thumb_immediate(i0)) != -1)
1907             T2_ADDSI(r0, r1, i);
1908         else if ((i = encode_thumb_immediate(-i0)) != -1)
1909             T2_SUBSI(r0, r1, i);
1910         else {
1911             reg = jit_get_reg(jit_class_gpr);
1912             movi(rn(reg), i0);
1913             T2_ADDS(r0, r1, rn(reg));
1914             jit_unget_reg(reg);
1915         }
1916     }
1917     else {
1918         if ((i = encode_arm_immediate(i0)) != -1)
1919             ADDSI(r0, r1, i);
1920         else if ((i = encode_arm_immediate(-i0)) != -1)
1921             SUBSI(r0, r1, i);
1922         else if (r0 != r1) {
1923             movi(r0, i0);
1924             ADDS(r0, r1, r0);
1925         }
1926         else {
1927             reg = jit_get_reg(jit_class_gpr);
1928             movi(rn(reg), i0);
1929             ADDS(r0, r1, rn(reg));
1930             jit_unget_reg(reg);
1931         }
1932     }
1933 }
1934
1935 static void
1936 _addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1937 {
1938     /* keep setting carry because don't know last ADC */
1939     if (jit_thumb_p()) {
1940         /* thumb auto set carry if not inside IT block */
1941         if ((r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
1942             T1_ADC(r0, r0 == r1 ? r2 : r1);
1943         else
1944             T2_ADCS(r0, r1, r2);
1945     }
1946     else
1947         ADCS(r0, r1, r2);
1948 }
1949
1950 static void
1951 _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1952 {
1953     int                 i;
1954     jit_int32_t         reg;
1955     int                 no_set_flags;
1956     if (jit_thumb_p()) {
1957         no_set_flags = jit_no_set_flags();
1958         jit_no_set_flags() = 1;
1959         if ((i = encode_thumb_immediate(i0)) != -1)
1960             T2_ADCSI(r0, r1, i);
1961         else if ((i = encode_thumb_immediate(-i0)) != -1)
1962             T2_SBCSI(r0, r1, i);
1963         else if (r0 != r1) {
1964             movi(r0, i0);
1965             T2_ADCS(r0, r1, r0);
1966         }
1967         else {
1968             reg = jit_get_reg(jit_class_gpr);
1969             movi(rn(reg), i0);
1970             T2_ADCS(r0, r1, rn(reg));
1971             jit_unget_reg(reg);
1972         }
1973         jit_no_set_flags() = no_set_flags;
1974     }
1975     else {
1976         if ((i = encode_arm_immediate(i0)) != -1)
1977             ADCSI(r0, r1, i);
1978         else if ((i = encode_arm_immediate(-i0)) != -1)
1979             SBCSI(r0, r1, i);
1980         else if (r0 != r1) {
1981             movi(r0, i0);
1982             ADCS(r0, r1, r0);
1983         }
1984         else {
1985             reg = jit_get_reg(jit_class_gpr);
1986             movi(rn(reg), i0);
1987             ADCS(r0, r1, rn(reg));
1988             jit_unget_reg(reg);
1989         }
1990     }
1991 }
1992
1993 static void
1994 _subr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1995 {
1996     if (jit_thumb_p()) {
1997         if (!jit_no_set_flags() && (r0|r1|r2) < 8)
1998             T1_SUB(r0, r1, r2);
1999         else
2000             T2_SUB(r0, r1, r2);
2001     }
2002     else
2003         SUB(r0, r1, r2);
2004 }
2005
2006 static void
2007 _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2008 {
2009     int                 i;
2010     jit_int32_t         reg;
2011     if (jit_thumb_p()) {
2012         if (!jit_no_set_flags() && (r0|r1) < 8 && !(i0 & ~7))
2013             T1_SUBI3(r0, r1, i0);
2014         else if (!jit_no_set_flags() && (r0|r1) < 8 && !(-i0 & ~7))
2015             T1_ADDI3(r0, r1, -i0);
2016         else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(i0 & ~0xff))
2017             T1_SUBI8(r0, i0);
2018         else if (!jit_no_set_flags() && r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
2019             T1_ADDI8(r0, -i0);
2020         else if ((i = encode_thumb_immediate(i0)) != -1)
2021             T2_SUBI(r0, r1, i);
2022         else if ((i = encode_thumb_immediate(-i0)) != -1)
2023             T2_ADDI(r0, r1, i);
2024         else if ((i = encode_thumb_word_immediate(i0)) != -1)
2025             T2_SUBWI(r0, r1, i);
2026         else if ((i = encode_thumb_word_immediate(-i0)) != -1)
2027             T2_ADDWI(r0, r1, i);
2028         else {
2029             reg = jit_get_reg(jit_class_gpr);
2030             movi(rn(reg), i0);
2031             T2_SUB(r0, r1, rn(reg));
2032             jit_unget_reg(reg);
2033         }
2034     }
2035     else {
2036         if ((i = encode_arm_immediate(i0)) != -1)
2037             SUBI(r0, r1, i);
2038         else if ((i = encode_arm_immediate(-i0)) != -1)
2039             ADDI(r0, r1, i);
2040         else if (r0 != r1) {
2041             movi(r0, i0);
2042             SUB(r0, r1, r0);
2043         }
2044         else {
2045             reg = jit_get_reg(jit_class_gpr);
2046             movi(rn(reg), i0);
2047             SUB(r0, r1, rn(reg));
2048             jit_unget_reg(reg);
2049         }
2050     }
2051 }
2052
2053 static void
2054 _subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2055 {
2056     if (jit_thumb_p()) {
2057         /* thumb auto set carry if not inside IT block */
2058         if ((r0|r1|r2) < 8)
2059             T1_SUB(r0, r1, r2);
2060         else
2061             T2_SUBS(r0, r1, r2);
2062     }
2063     else
2064         SUBS(r0, r1, r2);
2065 }
2066
2067 static void
2068 _subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2069 {
2070     int                 i;
2071     jit_int32_t         reg;
2072     if (jit_thumb_p()) {
2073         if ((r0|r1) < 8 && !(i0 & ~7))
2074             T1_SUBI3(r0, r1, i0);
2075         else if ((r0|r1) < 8 && !(-i0 & ~7))
2076             T1_ADDI3(r0, r1, -i0);
2077         else if (r0 < 8 && r0 == r1 && !(i0 & ~0xff))
2078             T1_SUBI8(r0, i0);
2079         else if (r0 < 8 && r0 == r1 && !(-i0 & ~0xff))
2080             T1_ADDI8(r0, -i0);
2081         else if ((i = encode_thumb_immediate(i0)) != -1)
2082             T2_SUBSI(r0, r1, i);
2083         else if ((i = encode_thumb_immediate(-i0)) != -1)
2084             T2_ADDSI(r0, r1, i);
2085         else {
2086             reg = jit_get_reg(jit_class_gpr);
2087             movi(rn(reg), i0);
2088             T2_SUBS(r0, r1, rn(reg));
2089             jit_unget_reg(reg);
2090         }
2091     }
2092     else {
2093         if ((i = encode_arm_immediate(i0)) != -1)
2094             SUBSI(r0, r1, i);
2095         else if ((i = encode_arm_immediate(-i0)) != -1)
2096             ADDSI(r0, r1, i);
2097         else if (r0 != r1) {
2098             movi(r0, i0);
2099             SUBS(r0, r1, r0);
2100         }
2101         else {
2102             reg = jit_get_reg(jit_class_gpr);
2103             movi(rn(reg), i0);
2104             SUBS(r0, r1, rn(reg));
2105             jit_unget_reg(reg);
2106         }
2107     }
2108 }
2109
2110 static void
2111 _subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2112 {
2113     /* keep setting carry because don't know last SBC */
2114     if (jit_thumb_p()) {
2115         /* thumb auto set carry if not inside IT block */
2116         if ((r0|r1|r2) < 8 && r0 == r1)
2117             T1_SBC(r0, r2);
2118         else
2119             T2_SBCS(r0, r1, r2);
2120     }
2121     else
2122         SBCS(r0, r1, r2);
2123 }
2124
2125 static void
2126 _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2127 {
2128     int                 i;
2129     jit_int32_t         reg;
2130     int                 no_set_flags;
2131     if (jit_thumb_p()) {
2132         no_set_flags = jit_no_set_flags();
2133         jit_no_set_flags() = 1;
2134         if ((i = encode_arm_immediate(i0)) != -1)
2135             T2_SBCSI(r0, r1, i);
2136         else if ((i = encode_arm_immediate(-i0)) != -1)
2137             T2_ADCSI(r0, r1, i);
2138         else if (r0 != r1) {
2139             movi(r0, i0);
2140             T2_SBCS(r0, r1, r0);
2141         }
2142         else {
2143             reg = jit_get_reg(jit_class_gpr);
2144             movi(rn(reg), i0);
2145             SBCS(r0, r1, rn(reg));
2146             jit_unget_reg(reg);
2147         }
2148         jit_no_set_flags() = no_set_flags;
2149     }
2150     else {
2151         if ((i = encode_arm_immediate(i0)) != -1)
2152             SBCSI(r0, r1, i);
2153         else if ((i = encode_arm_immediate(-i0)) != -1)
2154             ADCSI(r0, r1, i);
2155         else if (r0 != r1) {
2156             movi(r0, i0);
2157             SBCS(r0, r1, r0);
2158         }
2159         else {
2160             reg = jit_get_reg(jit_class_gpr);
2161             movi(rn(reg), i0);
2162             SBCS(r0, r1, rn(reg));
2163             jit_unget_reg(reg);
2164         }
2165     }
2166 }
2167
2168 static void
2169 _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2170 {
2171     subi(r0, r1, i0);
2172     negr(r0, r0);
2173 }
2174
2175 static void
2176 _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2177 {
2178     jit_int32_t         reg;
2179     if (jit_thumb_p()) {
2180         if (!jit_no_set_flags() && r0 == r2 && (r0|r1) < 8)
2181             T1_MUL(r0, r1);
2182         else if (!jit_no_set_flags() && r0 == r1 && (r0|r2) < 8)
2183             T1_MUL(r0, r2);
2184         else
2185             T2_MUL(r0, r1, r2);
2186     }
2187     else {
2188         if (r0 == r1 && !jit_armv6_p()) {
2189             if (r0 != r2)
2190                 MUL(r0, r2, r1);
2191             else {
2192                 reg = jit_get_reg(jit_class_gpr);
2193                 MOV(rn(reg), r1);
2194                 MUL(r0, rn(reg), r2);
2195                 jit_unget_reg(reg);
2196             }
2197         }
2198         else
2199             MUL(r0, r1, r2);
2200     }
2201 }
2202
2203 static void
2204 _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2205 {
2206     jit_int32_t         reg;
2207     reg = jit_get_reg(jit_class_gpr);
2208     movi(rn(reg), i0);
2209     mulr(r0, r1, rn(reg));
2210     jit_unget_reg(reg);
2211 }
2212
2213 static void
2214 _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2215         jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
2216 {
2217     jit_int32_t         reg;
2218     if (jit_thumb_p()) {
2219         if (r2 == r3) {
2220             reg = jit_get_reg(jit_class_gpr);
2221             movr(rn(reg), r2);
2222             if (sign)
2223                 T2_SMULL(r0, r1, rn(reg), r2);
2224             else
2225                 T2_UMULL(r0, r1, rn(reg), r2);
2226             jit_unget_reg(reg);
2227         }
2228         else if (r0 != r2 && r1 != r2) {
2229             if (sign)
2230                 T2_SMULL(r0, r1, r2, r3);
2231             else
2232                 T2_UMULL(r0, r1, r2, r3);
2233         }
2234         else {
2235             if (sign)
2236                 T2_SMULL(r0, r1, r3, r2);
2237             else
2238                 T2_UMULL(r0, r1, r3, r2);
2239         }
2240     }
2241     else {
2242         if (r2 == r3) {
2243             reg = jit_get_reg(jit_class_gpr);
2244             movr(rn(reg), r2);
2245             if (sign)
2246                 SMULL(r0, r1, rn(reg), r2);
2247             else
2248                 UMULL(r0, r1, rn(reg), r2);
2249             jit_unget_reg(reg);
2250         }
2251         else if (r0 != r2 && r1 != r2) {
2252             if (sign)
2253                 SMULL(r0, r1, r2, r3);
2254             else
2255                 UMULL(r0, r1, r2, r3);
2256         }
2257         else {
2258             if (sign)
2259                 SMULL(r0, r1, r3, r2);
2260             else
2261                 UMULL(r0, r1, r3, r2);
2262         }
2263     }
2264 }
2265
2266 static void
2267 _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2268         jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
2269 {
2270     jit_int32_t         reg;
2271     reg = jit_get_reg(jit_class_gpr);
2272     movi(rn(reg), i0);
2273     iqmulr(r0, r1, r2, rn(reg), sign);
2274     jit_unget_reg(reg);
2275 }
2276
2277 static void
2278 _divrem(jit_state_t *_jit, int div, int sign,
2279         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2280 {
2281     jit_word_t          d;
2282     jit_word_t          w;
2283     jit_get_reg_args();
2284     movr(_R0_REGNO, r1);
2285     movr(_R1_REGNO, r2);
2286     if (sign)                   w = (jit_word_t)__aeabi_idivmod;
2287     else                        w = (jit_word_t)__aeabi_uidivmod;
2288     if (!jit_exchange_p()) {
2289         if (jit_thumb_p())      d = ((w - _jit->pc.w) >> 1) - 2;
2290         else                    d = ((w - _jit->pc.w) >> 2) - 2;
2291         if (_s24P(d)) {
2292             if (jit_thumb_p())  T2_BLI(encode_thumb_jump(d));
2293             else                BLI(d & 0x00ffffff);
2294         }
2295         else                    goto fallback;
2296     }
2297     else {
2298     fallback:
2299         movi(_R2_REGNO, w);
2300         if (jit_thumb_p())      T1_BLX(_R2_REGNO);
2301         else                    BLX(_R2_REGNO);
2302     }
2303     if (div)                    movr(r0, _R0_REGNO);
2304     else                        movr(r0, _R1_REGNO);
2305     jit_unget_reg_args();
2306 }
2307
2308 static void
2309 _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2310 {
2311     if (jit_armv7r_p()) {
2312         if (jit_thumb_p())
2313             T2_SDIV(r0, r1, r2);
2314         else
2315             SDIV(r0, r1, r2);
2316     }
2317     else
2318         divrem(1, 1, r0, r1, r2);
2319 }
2320
2321 static void
2322 _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2323 {
2324     jit_int32_t         reg;
2325     reg = jit_get_reg(jit_class_gpr);
2326     movi(rn(reg), i0);
2327     divr(r0, r1, rn(reg));
2328     jit_unget_reg(reg);
2329 }
2330
2331 static void
2332 _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2333 {
2334     if (jit_armv7r_p()) {
2335         if (jit_thumb_p())
2336             T2_UDIV(r0, r1, r2);
2337         else
2338             UDIV(r0, r1, r2);
2339     }
2340     else
2341         divrem(1, 0, r0, r1, r2);
2342 }
2343
2344 static void
2345 _divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2346 {
2347     jit_int32_t         reg;
2348     reg = jit_get_reg(jit_class_gpr);
2349     movi(rn(reg), i0);
2350     divr_u(r0, r1, rn(reg));
2351     jit_unget_reg(reg);
2352 }
2353
2354 static void
2355 _iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2356         jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
2357 {
2358     jit_word_t          d;
2359     jit_word_t          w;
2360     jit_get_reg_args();
2361     movr(_R0_REGNO, r2);
2362     movr(_R1_REGNO, r3);
2363     if (sign)                   w = (jit_word_t)__aeabi_idivmod;
2364     else                        w = (jit_word_t)__aeabi_uidivmod;
2365     if (!jit_exchange_p()) {
2366         if (jit_thumb_p())      d = ((w - _jit->pc.w) >> 1) - 2;
2367         else                    d = ((w - _jit->pc.w) >> 2) - 2;
2368         if (_s24P(d)) {
2369             if (jit_thumb_p())  T2_BLI(encode_thumb_jump(d));
2370             else                BLI(d & 0x00ffffff);
2371         }
2372         else                    goto fallback;
2373     }
2374     else {
2375     fallback:
2376         movi(_R2_REGNO, w);
2377         if (jit_thumb_p())      T1_BLX(_R2_REGNO);
2378         else                    BLX(_R2_REGNO);
2379     }
2380     movr(r0, _R0_REGNO);
2381     movr(r1, _R1_REGNO);
2382     jit_unget_reg_args();
2383 }
2384
2385 static void
2386 _iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2387         jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
2388 {
2389     jit_int32_t         reg;
2390     reg = jit_get_reg(jit_class_gpr);
2391     movi(rn(reg), i0);
2392     iqdivr(r0, r1, r2, rn(reg), sign);
2393     jit_unget_reg(reg);
2394 }
2395
2396 static void
2397 _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2398 {
2399     if (jit_armv7r_p()) {
2400         jit_int32_t             reg;
2401         if (r0 == r1 || r0 == r2) {
2402             reg = jit_get_reg(jit_class_gpr);
2403             divr(rn(reg), r1, r2);
2404             mulr(rn(reg), r2, rn(reg));
2405             subr(r0, r1, rn(reg));
2406             jit_unget_reg(reg);
2407         }
2408         else {
2409             divr(r0, r1, r2);
2410             mulr(r0, r2, r0);
2411             subr(r0, r1, r0);
2412         }
2413     }
2414     else
2415         divrem(0, 1, r0, r1, r2);
2416 }
2417
2418 static void
2419 _remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2420 {
2421     jit_int32_t         reg;
2422     reg = jit_get_reg(jit_class_gpr);
2423     movi(rn(reg), i0);
2424     remr(r0, r1, rn(reg));
2425     jit_unget_reg(reg);
2426 }
2427
2428 static void
2429 _remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2430 {
2431     if (jit_armv7r_p()) {
2432         jit_int32_t             reg;
2433         if (r0 == r1 || r0 == r2) {
2434             reg = jit_get_reg(jit_class_gpr);
2435             divr_u(rn(reg), r1, r2);
2436             mulr(rn(reg), r2, rn(reg));
2437             subr(r0, r1, rn(reg));
2438             jit_unget_reg(reg);
2439         }
2440         else {
2441             divr_u(r0, r1, r2);
2442             mulr(r0, r2, r0);
2443             subr(r0, r1, r0);
2444         }
2445     }
2446     else
2447         divrem(0, 0, r0, r1, r2);
2448 }
2449
2450 static void
2451 _remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2452 {
2453     jit_int32_t         reg;
2454     reg = jit_get_reg(jit_class_gpr);
2455     movi(rn(reg), i0);
2456     remr_u(r0, r1,rn(reg));
2457     jit_unget_reg(reg);
2458 }
2459
2460 static void
2461 _andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2462 {
2463     if (jit_thumb_p()) {
2464         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
2465             T1_AND(r0, r0 == r1 ? r2 : r1);
2466         else
2467             T2_AND(r0, r1, r2);
2468     }
2469     else
2470         AND(r0, r1, r2);
2471 }
2472
2473 static void
2474 _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2475 {
2476     int                 i;
2477     jit_int32_t         reg;
2478     if (jit_thumb_p()) {
2479         if ((i = encode_thumb_immediate(i0)) != -1)
2480             T2_ANDI(r0, r1, i);
2481         else if ((i = encode_thumb_immediate(~i0)) != -1)
2482             T2_BICI(r0, r1, i);
2483         else if (r0 != r1) {
2484             movi(r0, i0);
2485             T2_AND(r0, r1, r0);
2486         }
2487         else {
2488             reg = jit_get_reg(jit_class_gpr);
2489             movi(rn(reg), i0);
2490             T2_AND(r0, r1, rn(reg));
2491             jit_unget_reg(reg);
2492         }
2493     }
2494     else {
2495         if ((i = encode_arm_immediate(i0)) != -1)
2496             ANDI(r0, r1, i);
2497         else if ((i = encode_arm_immediate(~i0)) != -1)
2498             BICI(r0, r1, i);
2499         else if (r0 != r1) {
2500             movi(r0, i0);
2501             AND(r0, r1, r0);
2502         }
2503         else {
2504             reg = jit_get_reg(jit_class_gpr);
2505             movi(rn(reg), i0);
2506             AND(r0, r1, rn(reg));
2507             jit_unget_reg(reg);
2508         }
2509     }
2510 }
2511
2512 static void
2513 _orr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2514 {
2515     if (jit_thumb_p()) {
2516         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
2517             T1_ORR(r0, r0 == r1 ? r2 : r1);
2518         else
2519             T2_ORR(r0, r1, r2);
2520     }
2521     else
2522         ORR(r0, r1, r2);
2523 }
2524
2525 static void
2526 _ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2527 {
2528     int                 i;
2529     jit_int32_t         reg;
2530     if (jit_thumb_p()) {
2531         if ((i = encode_thumb_immediate(i0)) != -1)
2532             T2_ORRI(r0, r1, i);
2533         else if (r0 != r1) {
2534             movi(r0, i0);
2535             T2_ORR(r0, r1, r0);
2536         }
2537         else {
2538             reg = jit_get_reg(jit_class_gpr);
2539             movi(rn(reg), i0);
2540             T2_ORR(r0, r1, rn(reg));
2541             jit_unget_reg(reg);
2542         }
2543     }
2544     else {
2545         if ((i = encode_arm_immediate(i0)) != -1)
2546             ORRI(r0, r1, i);
2547         else if (r0 != r1) {
2548             movi(r0, i0);
2549             ORR(r0, r1, r0);
2550         }
2551         else {
2552             reg = jit_get_reg(jit_class_gpr);
2553             movi(rn(reg), i0);
2554             ORR(r0, r1, rn(reg));
2555             jit_unget_reg(reg);
2556         }
2557     }
2558 }
2559
2560 static void
2561 _xorr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2562 {
2563     if (jit_thumb_p()) {
2564         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && (r0 == r1 || r0 == r2))
2565             T1_EOR(r0, r0 == r1 ? r2 : r1);
2566         else
2567             T2_EOR(r0, r1, r2);
2568     }
2569     else
2570         EOR(r0, r1, r2);
2571 }
2572
2573 static void
2574 _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2575 {
2576     int                 i;
2577     jit_int32_t         reg;
2578     if (jit_thumb_p()) {
2579         if ((i = encode_thumb_immediate(i0)) != -1)
2580             T2_EORI(r0, r1, i);
2581         else if (r0 != r1) {
2582             movi(r0, i0);
2583             T2_EOR(r0, r1, r0);
2584         }
2585         else {
2586             reg = jit_get_reg(jit_class_gpr);
2587             movi(rn(reg), i0);
2588             T2_EOR(r0, r1, rn(reg));
2589             jit_unget_reg(reg);
2590         }
2591     }
2592     else {
2593         if ((i = encode_arm_immediate(i0)) != -1)
2594             EORI(r0, r1, i);
2595         else if (r0 != r1) {
2596             movi(r0, i0);
2597             EOR(r0, r1, r0);
2598         }
2599         else {
2600             reg = jit_get_reg(jit_class_gpr);
2601             movi(rn(reg), i0);
2602             EOR(r0, r1, rn(reg));
2603             jit_unget_reg(reg);
2604         }
2605     }
2606 }
2607
2608 static void
2609 _lshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2610 {
2611     if (jit_thumb_p()) {
2612         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
2613             T1_LSL(r0, r2);
2614         else
2615             T2_LSL(r0, r1, r2);
2616     }
2617     else
2618         LSL(r0, r1, r2);
2619 }
2620
2621 static void
2622 _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2623 {
2624     assert(i0 >= 0 && i0 <= 31);
2625     if (i0 == 0)
2626         movr(r0, r1);
2627     else if (jit_thumb_p()) {
2628         if (!jit_no_set_flags() && (r0|r1) < 8)
2629             T1_LSLI(r0, r1, i0);
2630         else
2631             T2_LSLI(r0, r1, i0);
2632     }
2633     else
2634         LSLI(r0, r1, i0);
2635 }
2636
2637 static void
2638 _rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2639 {
2640     if (jit_thumb_p()) {
2641         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
2642             T1_ASR(r0, r2);
2643         else
2644             T2_ASR(r0, r1, r2);
2645     }
2646     else
2647         ASR(r0, r1, r2);
2648 }
2649
2650 static void
2651 _rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2652 {
2653     assert(i0 >= 0 && i0 <= 31);
2654     if (i0 == 0)
2655         movr(r0, r1);
2656     else if (jit_thumb_p()) {
2657         if (!jit_no_set_flags() && (r0|r1) < 8)
2658             T1_ASRI(r0, r1, i0);
2659         else
2660             T2_ASRI(r0, r1, i0);
2661     }
2662     else
2663         ASRI(r0, r1, i0);
2664 }
2665
2666 static void
2667 _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2668 {
2669     if (jit_thumb_p()) {
2670         if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1)
2671             T1_LSR(r0, r2);
2672         else
2673             T2_LSR(r0, r1, r2);
2674     }
2675     else
2676         LSR(r0, r1, r2);
2677 }
2678
2679 static void
2680 _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2681 {
2682     assert(i0 >= 0 && i0 <= 31);
2683     if (i0 == 0)
2684         movr(r0, r1);
2685     else if (jit_thumb_p()) {
2686         if (!jit_no_set_flags() && (r0|r1) < 8)
2687             T1_LSRI(r0, r1, i0);
2688         else
2689             T2_LSRI(r0, r1, i0);
2690     }
2691     else
2692         LSRI(r0, r1, i0);
2693 }
2694
2695 static void
2696 _ccr(jit_state_t *_jit, int ct, int cf,
2697      jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2698 {
2699     if (jit_thumb_p()) {
2700         assert((ct ^ cf) >> 28 == 1);
2701         if ((r1|r2) < 8)
2702             T1_CMP(r1, r2);
2703         else if ((r1&r2) & 8)
2704             T1_CMPX(r1, r2);
2705         else
2706             T2_CMP(r1, r2);
2707         ITE(ct);
2708         if (r0 < 8) {
2709             T1_MOVI(r0, 1);
2710             T1_MOVI(r0, 0);
2711         }
2712         else {
2713             T2_MOVI(r0, 1);
2714             T2_MOVI(r0, 0);
2715         }
2716     }
2717     else {
2718         CMP(r1, r2);
2719         CC_MOVI(ct, r0, 1);
2720         CC_MOVI(cf, r0, 0);
2721     }
2722 }
2723
2724 static void
2725 _cci(jit_state_t *_jit, int ct, int cf,
2726      jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2727 {
2728     int                 i;
2729     jit_int32_t         reg;
2730     if (jit_thumb_p()) {
2731         if (r1 < 7 && !(i0 & 0xffffff00))
2732             T1_CMPI(r1, i0);
2733         else if ((i = encode_thumb_immediate(i0)) != -1)
2734             T2_CMPI(r1, i);
2735         else if ((i = encode_thumb_immediate(-i0)) != -1)
2736             T2_CMNI(r1, i);
2737         else {
2738             reg = jit_get_reg(jit_class_gpr);
2739             movi(rn(reg), i0);
2740             ccr(ct, cf, r0, r1, rn(reg));
2741             jit_unget_reg(reg);
2742             return;
2743         }
2744         ITE(ct);
2745         if (r0 < 8) {
2746             T1_MOVI(r0, 1);
2747             T1_MOVI(r0, 0);
2748         }
2749         else {
2750             T2_MOVI(r0, 1);
2751             T2_MOVI(r0, 0);
2752         }
2753     }
2754     else {
2755         if ((i = encode_arm_immediate(i0)) != -1)
2756             CMPI(r1, i);
2757         else if ((i = encode_arm_immediate(-i0)) != -1)
2758             CMNI(r1, i);
2759         else if (r0 != r1) {
2760             movi(r0, i0);
2761             CMP(r1, r0);
2762         }
2763         else {
2764             reg = jit_get_reg(jit_class_gpr);
2765             movi(rn(reg), i0);
2766             CMP(r1, rn(reg));
2767             jit_unget_reg(reg);
2768         }
2769         CC_MOVI(ct, r0, 1);
2770         CC_MOVI(cf, r0, 0);
2771     }
2772 }
2773
2774 static void
2775 _ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2776 {
2777     if (jit_thumb_p())
2778         ccr(ARM_CC_NE, ARM_CC_EQ, r0, r1, r2);
2779     else {
2780         SUBS(r0, r1, r2);
2781         CC_MOVI(ARM_CC_NE, r0, 1);
2782     }
2783 }
2784
2785 static void
2786 _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2787 {
2788     int                 i;
2789     jit_int32_t         reg;
2790     if (jit_thumb_p())
2791         cci(ARM_CC_NE, ARM_CC_EQ, r0, r1, i0);
2792     else {
2793         if ((i = encode_arm_immediate(i0)) != -1)
2794             SUBSI(r0, r1, i);
2795         else if ((i = encode_arm_immediate(-i0)) != -1)
2796             ADDSI(r0, r1, i);
2797         else if (r0 != r1) {
2798             movi(r0, i0);
2799             SUBS(r0, r1, r0);
2800         }
2801         else {
2802             reg = jit_get_reg(jit_class_gpr);
2803             movi(rn(reg), i0);
2804             SUBS(r0, r1, rn(reg));
2805             jit_unget_reg(reg);
2806         }
2807         CC_MOVI(ARM_CC_NE, r0, 1);
2808     }
2809 }
2810
2811 static void
2812 _jmpr(jit_state_t *_jit, jit_int32_t r0)
2813 {
2814     if (jit_thumb_p())
2815         T1_MOV(_R15_REGNO, r0);
2816     else
2817         MOV(_R15_REGNO, r0);
2818 }
2819
2820 static void
2821 _jmpi(jit_state_t *_jit, jit_word_t i0)
2822 {
2823     jit_word_t          w;
2824     jit_word_t          d;
2825     jit_int32_t         reg;
2826     w = _jit->pc.w;
2827     /* if thumb and in thumb mode */
2828     if (jit_thumb_p() && _jitc->thumb) {
2829         d = ((i0 - w) >> 1) - 2;
2830         if (d >= -1024 && d <= 1023)
2831             T1_B(d & 0x7ff);
2832         else if (_s24P(d))
2833             T2_B(encode_thumb_jump(d));
2834         else {
2835             reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
2836             movi(rn(reg), i0);
2837             jmpr(rn(reg));
2838             jit_unget_reg(reg);
2839         }
2840     }
2841     else {
2842         d = ((i0 - w) >> 2) - 2;
2843         if (_s24P(d))
2844             B(d & 0x00ffffff);
2845         else {
2846             reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
2847             movi(rn(reg), i0);
2848             jmpr(rn(reg));
2849             jit_unget_reg(reg);
2850         }
2851     }
2852 }
2853
2854 static jit_word_t
2855 _jmpi_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
2856 {
2857     jit_word_t          w;
2858     jit_word_t          d;
2859     jit_int32_t         reg;
2860     /* i1 means jump is reachable in signed 24 bits  */
2861     if (i1) {
2862         w = _jit->pc.w;
2863         /* if thumb and in thumb mode */
2864         if (jit_thumb_p() && _jitc->thumb) {
2865             d = ((i0 - w) >> 1) - 2;
2866             assert(_s24P(d));
2867             T2_B(encode_thumb_jump(d));
2868         }
2869         else {
2870             d = ((i0 - w) >> 2) - 2;
2871             assert(_s24P(d));
2872             B(d & 0x00ffffff);
2873         }
2874     }
2875     else {
2876         reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
2877         w = movi_p(rn(reg), i0);
2878         jmpr(rn(reg));
2879         jit_unget_reg(reg);
2880     }
2881     return (w);
2882 }
2883
2884 static jit_word_t
2885 _bccr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2886 {
2887     jit_word_t          w;
2888     jit_word_t          d;
2889     if (jit_thumb_p()) {
2890         if ((r0|r1) < 8)
2891             T1_CMP(r0, r1);
2892         else if ((r0&r1) & 8)
2893             T1_CMPX(r0, r1);
2894         else
2895             T2_CMP(r0, r1);
2896         /* use only thumb2 conditional as does not know if will be patched */
2897         w = _jit->pc.w;
2898         d = ((i0 - w) >> 1) - 2;
2899         assert(_s20P(d));
2900         T2_CC_B(cc, encode_thumb_cc_jump(d));
2901     }
2902     else {
2903         CMP(r0, r1);
2904         w = _jit->pc.w;
2905         d = ((i0 - w) >> 2) - 2;
2906         assert(_s24P(d));
2907         CC_B(cc, d & 0x00ffffff);
2908     }
2909     return (w);
2910 }
2911
2912 static jit_word_t
2913 _bcci(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2914 {
2915     jit_word_t          w;
2916     jit_word_t          d;
2917     int                 i;
2918     jit_int32_t         reg;
2919     if (jit_thumb_p()) {
2920         if (r0 < 7 && !(i1 & 0xffffff00))
2921             T1_CMPI(r0, i1);
2922         else if ((i = encode_thumb_immediate(i1)) != -1)
2923             T2_CMPI(r0, i);
2924         else if ((i = encode_thumb_immediate(-i1)) != -1)
2925             T2_CMNI(r0, i);
2926         else {
2927             reg = jit_get_reg(jit_class_gpr);
2928             movi(rn(reg), i1);
2929             T2_CMP(r0, rn(reg));
2930             jit_unget_reg(reg);
2931         }
2932         /* use only thumb2 conditional as does not know if will be patched */
2933         w = _jit->pc.w;
2934         d = ((i0 - w) >> 1) - 2;
2935         assert(_s20P(d));
2936         T2_CC_B(cc, encode_thumb_cc_jump(d));
2937     }
2938     else {
2939         if ((i = encode_arm_immediate(i1)) != -1)
2940             CMPI(r0, i);
2941         else if ((i = encode_arm_immediate(-i1)) != -1)
2942             CMNI(r0, i);
2943         else {
2944             reg = jit_get_reg(jit_class_gpr);
2945             movi(rn(reg), i1);
2946             CMP(r0, rn(reg));
2947             jit_unget_reg(reg);
2948         }
2949         w = _jit->pc.w;
2950         d = ((i0 - w) >> 2) - 2;
2951         assert(_s24P(d));
2952         CC_B(cc, d & 0x00ffffff);
2953     }
2954     return (w);
2955 }
2956
2957 static jit_word_t
2958 _baddr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2959 {
2960     jit_word_t          w;
2961     jit_word_t          d;
2962     if (jit_thumb_p()) {
2963         if ((r0|r1) < 8)
2964             T1_ADD(r0, r0, r1);
2965         else
2966             T2_ADDS(r0, r0, r1);
2967         w = _jit->pc.w;
2968         d = ((i0 - w) >> 1) - 2;
2969         assert(_s20P(d));
2970         T2_CC_B(cc, encode_thumb_cc_jump(d));
2971     }
2972     else {
2973         ADDS(r0, r0, r1);
2974         w = _jit->pc.w;
2975         d = ((i0 - w) >> 2) - 2;
2976         assert(_s24P(d));
2977         CC_B(cc, d & 0x00ffffff);
2978     }
2979     return (w);
2980 }
2981
2982 static jit_word_t
2983 _baddi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, int i1)
2984 {
2985     int                 i;
2986     jit_word_t          w;
2987     jit_word_t          d;
2988     jit_int32_t         reg;
2989     if (jit_thumb_p()) {
2990         if (r0 < 8 && !(i1 & ~7))
2991             T1_ADDI3(r0, r0, i1);
2992         else if (r0 < 8 && !(-i1 & ~7))
2993             T1_SUBI3(r0, r0, -i1);
2994         else if (r0 < 8 && !(i1 & ~0xff))
2995             T1_ADDI8(r0, i1);
2996         else if (r0 < 8 && !(-i1 & ~0xff))
2997             T1_SUBI8(r0, -i1);
2998         else if ((i = encode_thumb_immediate(i1)) != -1)
2999             T2_ADDSI(r0, r0, i);
3000         else if ((i = encode_thumb_immediate(-i1)) != -1)
3001             T2_SUBSI(r0, r0, i);
3002         else {
3003             reg = jit_get_reg(jit_class_gpr);
3004             movi(rn(reg), i1);
3005             T2_ADDS(r0, r0, rn(reg));
3006             jit_unget_reg(reg);
3007         }
3008         w = _jit->pc.w;
3009         d = ((i0 - w) >> 1) - 2;
3010         assert(_s20P(d));
3011         T2_CC_B(cc, encode_thumb_cc_jump(d));
3012     }
3013     else {
3014         if ((i = encode_arm_immediate(i1)) != -1)
3015             ADDSI(r0, r0, i);
3016         else if ((i = encode_arm_immediate(-i1)) != -1)
3017             SUBSI(r0, r0, i);
3018         else {
3019             reg = jit_get_reg(jit_class_gpr);
3020             movi(rn(reg), i1);
3021             ADDS(r0, r0, rn(reg));
3022             jit_unget_reg(reg);
3023         }
3024         w = _jit->pc.w;
3025         d = ((i0 - w) >> 2) - 2;
3026         assert(_s24P(d));
3027         CC_B(cc, d & 0x00ffffff);
3028     }
3029     return (w);
3030 }
3031
3032 static jit_word_t
3033 _bsubr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3034 {
3035     jit_word_t          w;
3036     jit_word_t          d;
3037     if (jit_thumb_p()) {
3038         if ((r0|r1) < 8)
3039             T1_SUB(r0, r0, r1);
3040         else
3041             T2_SUBS(r0, r0, r1);
3042         w = _jit->pc.w;
3043         d = ((i0 - w) >> 1) - 2;
3044         assert(_s20P(d));
3045         T2_CC_B(cc, encode_thumb_cc_jump(d));
3046     }
3047     else {
3048         SUBS(r0, r0, r1);
3049         w = _jit->pc.w;
3050         d = ((i0 - w) >> 2) - 2;
3051         assert(_s24P(d));
3052         CC_B(cc, d & 0x00ffffff);
3053     }
3054     return (w);
3055 }
3056
3057 static jit_word_t
3058 _bsubi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, int i1)
3059 {
3060     int                 i;
3061     jit_word_t          w;
3062     jit_word_t          d;
3063     jit_int32_t         reg;
3064     if (jit_thumb_p()) {
3065         if (r0 < 8 && !(i1 & ~7))
3066             T1_SUBI3(r0, r0, i1);
3067         else if (r0 < 8 && !(-i1 & ~7))
3068             T1_ADDI3(r0, r0, -i1);
3069         else if (r0 < 8 && !(i1 & ~0xff))
3070             T1_SUBI8(r0, i1);
3071         else if (r0 < 8 && !(-i1 & ~0xff))
3072             T1_ADDI8(r0, -i1);
3073         else if ((i = encode_thumb_immediate(i1)) != -1)
3074             T2_SUBSI(r0, r0, i);
3075         else if ((i = encode_thumb_immediate(-i1)) != -1)
3076             T2_SUBSI(r0, r0, i);
3077         else {
3078             reg = jit_get_reg(jit_class_gpr);
3079             movi(rn(reg), i1);
3080             T2_SUBS(r0, r0, rn(reg));
3081             jit_unget_reg(reg);
3082         }
3083         w = _jit->pc.w;
3084         d = ((i0 - w) >> 1) - 2;
3085         assert(_s20P(d));
3086         T2_CC_B(cc, encode_thumb_cc_jump(d));
3087     }
3088     else {
3089         if ((i = encode_arm_immediate(i1)) != -1)
3090             SUBSI(r0, r0, i);
3091         else if ((i = encode_arm_immediate(-i1)) != -1)
3092             ADDSI(r0, r0, i);
3093         else {
3094             reg = jit_get_reg(jit_class_gpr);
3095             movi(rn(reg), i1);
3096             SUBS(r0, r0, rn(reg));
3097             jit_unget_reg(reg);
3098         }
3099         w = _jit->pc.w;
3100         d = ((i0 - w) >> 2) - 2;
3101         assert(_s24P(d));
3102         CC_B(cc, d & 0x00ffffff);
3103     }
3104     return (w);
3105 }
3106
3107 static jit_word_t
3108 _bmxr(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3109 {
3110     jit_word_t          w;
3111     jit_word_t          d;
3112     jit_int32_t         reg;
3113     if (jit_thumb_p()) {
3114         if ((r0|r1) < 8)
3115             T1_TST(r0, r1);
3116         else
3117             T2_TST(r0, r1);
3118         w = _jit->pc.w;
3119         d = ((i0 - w) >> 1) - 2;
3120         assert(_s20P(d));
3121         T2_CC_B(cc, encode_thumb_cc_jump(d));
3122     }
3123     else {
3124         if (jit_armv5_p())
3125             TST(r0, r1);
3126         else {
3127             reg = jit_get_reg(jit_class_gpr);
3128             ANDS(rn(reg), r0, r1);
3129             jit_unget_reg(reg);
3130         }
3131         w = _jit->pc.w;
3132         d = ((i0 - w) >> 2) - 2;
3133         assert(_s24P(d));
3134         CC_B(cc, d & 0x00ffffff);
3135     }
3136     return (w);
3137 }
3138
3139 static jit_word_t
3140 _bmxi(jit_state_t *_jit, int cc, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
3141 {
3142     int                 i;
3143     jit_word_t          w;
3144     jit_word_t          d;
3145     jit_int32_t         reg;
3146     if (jit_thumb_p()) {
3147         if ((i = encode_thumb_immediate(i1)) != -1)
3148             T2_TSTI(r0, i);
3149         else {
3150             reg = jit_get_reg(jit_class_gpr);
3151             movi(rn(reg), i1);
3152             T2_TST(r0, rn(reg));
3153             jit_unget_reg(reg);
3154         }
3155         w = _jit->pc.w;
3156         d = ((i0 - w) >> 1) - 2;
3157         assert(_s20P(d));
3158         T2_CC_B(cc, encode_thumb_cc_jump(d));
3159     }
3160     else {
3161         if (jit_armv5_p()) {
3162             if ((i = encode_arm_immediate(i1)) != -1)
3163                 TSTI(r0, i);
3164             else {
3165                 reg = jit_get_reg(jit_class_gpr);
3166                 movi(rn(reg), i1);
3167                 TST(r0, rn(reg));
3168                 jit_unget_reg(reg);
3169             }
3170         }
3171         else {
3172             reg = jit_get_reg(jit_class_gpr);
3173             if ((i = encode_arm_immediate(i1)) != -1)
3174                 ANDSI(rn(reg), r0, i);
3175             else if ((i = encode_arm_immediate(~i1)) != -1)
3176                 BICSI(rn(reg), r0, i);
3177             else {
3178                 movi(rn(reg), i1);
3179                 ANDS(rn(reg), r0, rn(reg));
3180             }
3181             jit_unget_reg(reg);
3182         }
3183         w = _jit->pc.w;
3184         d = ((i0 - w) >> 2) - 2;
3185         assert(_s24P(d));
3186         CC_B(cc, d & 0x00ffffff);
3187     }
3188     return (w);
3189 }
3190
3191 static void
3192 _ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3193 {
3194     if (jit_thumb_p())
3195         T2_LDRSBI(r0, r1, 0);
3196     else
3197         LDRSBI(r0, r1, 0);
3198 }
3199
3200 static void
3201 _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3202 {
3203     jit_int32_t         reg;
3204     reg = jit_get_reg(jit_class_gpr);
3205     movi(rn(reg), i0);
3206     if (jit_thumb_p())
3207         T2_LDRSBI(r0, rn(reg), 0);
3208     else
3209         LDRSBI(r0, rn(reg), 0);
3210     jit_unget_reg(reg);
3211 }
3212
3213 static void
3214 _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3215 {
3216     if (jit_thumb_p()) {
3217         if ((r0|r1|r2) < 8)
3218             T1_LDRSB(r0, r1, r2);
3219         else
3220             T2_LDRSB(r0, r1, r2);
3221     }
3222     else
3223         LDRSB(r0, r1, r2);
3224 }
3225
3226 static void
3227 _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3228 {
3229     jit_int32_t         reg;
3230     if (jit_thumb_p()) {
3231         if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3232             T2_LDRSBI(r0, r1, i0);
3233         else if (i0 < 0 && i0 >= -255)
3234             T2_LDRSBIN(r0, r1, -i0);
3235         else if (i0 >= 0 && i0 <= 4095)
3236             T2_LDRSBWI(r0, r1, i0);
3237         else if (r0 != r1) {
3238             movi(r0, i0);
3239             if ((r0|r1) < 8)
3240                 T1_LDRSB(r0, r1, r0);
3241             else
3242                 T2_LDRSB(r0, r1, r0);
3243         }
3244         else {
3245             reg = jit_get_reg(jit_class_gpr);
3246             movi(rn(reg), i0);
3247             if ((r0|r1|rn(reg)) < 8)
3248                 T1_LDRSB(r0, r1, rn(reg));
3249             else
3250                 T2_LDRSB(r0, r1, rn(reg));
3251             jit_unget_reg(reg);
3252         }
3253     }
3254     else {
3255         if (i0 >= 0 && i0 <= 255)
3256             LDRSBI(r0, r1, i0);
3257         else if (i0 < 0 && i0 >= -255)
3258             LDRSBIN(r0, r1, -i0);
3259         else if (r0 != r1) {
3260             movi(r0, i0);
3261             LDRSB(r0, r1, r0);
3262         }
3263         else {
3264             reg = jit_get_reg(jit_class_gpr);
3265             movi(rn(reg), i0);
3266             LDRSB(r0, r1, rn(reg));
3267             jit_unget_reg(reg);
3268         }
3269     }
3270 }
3271
3272 static void
3273 _ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3274 {
3275     if (jit_thumb_p())
3276         T2_LDRBI(r0, r1, 0);
3277     else
3278         LDRBI(r0, r1, 0);
3279 }
3280
3281 static void
3282 _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3283 {
3284     jit_int32_t         reg;
3285     reg = jit_get_reg(jit_class_gpr);
3286     movi(rn(reg), i0);
3287     if (jit_thumb_p())
3288         T2_LDRBI(r0, rn(reg), 0);
3289     else
3290         LDRBI(r0, rn(reg), 0);
3291     jit_unget_reg(reg);
3292 }
3293
3294 static void
3295 _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3296 {
3297     if (jit_thumb_p()) {
3298         if ((r0|r1|r2) < 8)
3299             T1_LDRB(r0, r1, r2);
3300         else
3301             T2_LDRB(r0, r1, r2);
3302     }
3303     else
3304         LDRB(r0, r1, r2);
3305 }
3306
3307 static void
3308 _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3309 {
3310     jit_int32_t         reg;
3311     if (jit_thumb_p()) {
3312         if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20)
3313             T1_LDRBI(r0, r1, i0);
3314         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3315             T2_LDRBI(r0, r1, i0);
3316         else if (i0 < 0 && i0 >= -255)
3317             T2_LDRBIN(r0, r1, -i0);
3318         else if (i0 >= 0 && i0 <= 4095)
3319             T2_LDRBWI(r0, r1, i0);
3320         else if (r0 != r1) {
3321             movi(r0, i0);
3322             if ((r0|r1) < 8)
3323                 T1_LDRB(r0, r1, r0);
3324             else
3325                 T2_LDRB(r0, r1, r0);
3326         }
3327         else {
3328             reg = jit_get_reg(jit_class_gpr);
3329             movi(rn(reg), i0);
3330             if ((r0|r1|rn(reg)) < 8)
3331                 T1_LDRB(r0, r1, rn(reg));
3332             else
3333                 T2_LDRB(r0, r1, rn(reg));
3334             jit_unget_reg(reg);
3335         }
3336     }
3337     else {
3338         if (i0 >= 0 && i0 <= 4095)
3339             LDRBI(r0, r1, i0);
3340         else if (i0 < 0 && i0 >= -4095)
3341             LDRBIN(r0, r1, -i0);
3342         else if (r0 != r1) {
3343             movi(r0, i0);
3344             LDRB(r0, r1, r0);
3345         }
3346         else {
3347             reg = jit_get_reg(jit_class_gpr);
3348             movi(rn(reg), i0);
3349             LDRB(r0, r1, rn(reg));
3350             jit_unget_reg(reg);
3351         }
3352     }
3353 }
3354
3355 static void
3356 _ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3357 {
3358     if (jit_thumb_p())
3359         T2_LDRSHI(r0, r1, 0);
3360     else
3361         LDRSHI(r0, r1, 0);
3362 }
3363
3364 static void
3365 _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3366 {
3367     jit_int32_t         reg;
3368     reg = jit_get_reg(jit_class_gpr);
3369     movi(rn(reg), i0);
3370     if (jit_thumb_p())
3371         T2_LDRSHI(r0, rn(reg), 0);
3372     else
3373         LDRSHI(r0, rn(reg), 0);
3374     jit_unget_reg(reg);
3375 }
3376
3377 static void
3378 _ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3379 {
3380     if (jit_thumb_p()) {
3381         if ((r0|r1|r2) < 8)
3382             T1_LDRSH(r0, r1, r2);
3383         else
3384             T2_LDRSH(r0, r1, r2);
3385     }
3386     else
3387         LDRSH(r0, r1, r2);
3388 }
3389
3390 static void
3391 _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3392 {
3393     jit_int32_t         reg;
3394     if (jit_thumb_p()) {
3395         if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3396             T2_LDRSHI(r0, r1, i0);
3397         else if (i0 < 0 && i0 >= -255)
3398             T2_LDRSHIN(r0, r1, -i0);
3399         else if (i0 >= 0 && i0 <= 4095)
3400             T2_LDRSHWI(r0, r1, i0);
3401         else if (r0 != r1) {
3402             movi(r0, i0);
3403             if ((r0|r1) < 8)
3404                 T1_LDRSH(r0, r1, r0);
3405             else
3406                 T2_LDRSH(r0, r1, r0);
3407         }
3408         else {
3409             reg = jit_get_reg(jit_class_gpr);
3410             movi(rn(reg), i0);
3411             if ((r0|r1|rn(reg)) < 8)
3412                 T1_LDRSH(r0, r1, rn(reg));
3413             else
3414                 T2_LDRSH(r0, r1, rn(reg));
3415             jit_unget_reg(reg);
3416         }
3417     }
3418     else {
3419         if (i0 >= 0 && i0 <= 255)
3420             LDRSHI(r0, r1, i0);
3421         else if (i0 < 0 && i0 >= -255)
3422             LDRSHIN(r0, r1, -i0);
3423         else if (r0 != r1) {
3424             movi(r0, i0);
3425             LDRSH(r0, r1, r0);
3426         }
3427         else {
3428             reg = jit_get_reg(jit_class_gpr);
3429             movi(rn(reg), i0);
3430             LDRSH(r0, r1, rn(reg));
3431             jit_unget_reg(reg);
3432         }
3433     }
3434 }
3435
3436 static void
3437 _ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3438 {
3439     if (jit_thumb_p())
3440         T2_LDRHI(r0, r1, 0);
3441     else
3442         LDRHI(r0, r1, 0);
3443 }
3444
3445 static void
3446 _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3447 {
3448     jit_int32_t         reg;
3449     reg = jit_get_reg(jit_class_gpr);
3450     movi(rn(reg), i0);
3451     if (jit_thumb_p())
3452         T2_LDRHI(r0, rn(reg), 0);
3453     else
3454         LDRHI(r0, rn(reg), 0);
3455     jit_unget_reg(reg);
3456 }
3457
3458 static void
3459 _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3460 {
3461     if (jit_thumb_p()) {
3462         if ((r0|r1|r2) < 8)
3463             T1_LDRH(r0, r1, r2);
3464         else
3465             T2_LDRH(r0, r1, r2);
3466     }
3467     else
3468         LDRH(r0, r1, r2);
3469 }
3470
3471 static void
3472 _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3473 {
3474     jit_int32_t         reg;
3475     if (jit_thumb_p()) {
3476         if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20)
3477             T1_LDRHI(r0, r1, i0 >> 1);
3478         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3479             T2_LDRHI(r0, r1, i0);
3480         else if (i0 < 0 && i0 >= -255)
3481             T2_LDRHIN(r0, r1, -i0);
3482         else if (i0 >= 0 && i0 <= 4095)
3483             T2_LDRHWI(r0, r1, i0);
3484         else if (r0 != r1) {
3485             movi(r0, i0);
3486             if ((r0|r1) < 8)
3487                 T1_LDRH(r0, r1, r0);
3488             else
3489                 T2_LDRH(r0, r1, r0);
3490         }
3491         else {
3492             reg = jit_get_reg(jit_class_gpr);
3493             movi(rn(reg), i0);
3494             if ((r0|r1|rn(reg)) < 8)
3495                 T1_LDRH(r0, r1, rn(reg));
3496             else
3497                 T2_LDRH(r0, r1, rn(reg));
3498             jit_unget_reg(reg);
3499         }
3500     }
3501     else {
3502         if (i0 >= 0 && i0 <= 255)
3503             LDRHI(r0, r1, i0);
3504         else if (i0 < 0 && i0 >= -255)
3505             LDRHIN(r0, r1, -i0);
3506         else if (r0 != r1) {
3507             movi(r0, i0);
3508             LDRH(r0, r1, r0);
3509         }
3510         else {
3511             reg = jit_get_reg(jit_class_gpr);
3512             movi(rn(reg), i0);
3513             LDRH(r0, r1, rn(reg));
3514             jit_unget_reg(reg);
3515         }
3516     }
3517 }
3518
3519 static void
3520 _ldr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3521 {
3522     if (jit_thumb_p())
3523         T2_LDRI(r0, r1, 0);
3524     else
3525         LDRI(r0, r1, 0);
3526 }
3527
3528 static void
3529 _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3530 {
3531     jit_int32_t         reg;
3532     reg = jit_get_reg(jit_class_gpr);
3533     movi(rn(reg), i0);
3534     if (jit_thumb_p())
3535         T2_LDRI(r0, rn(reg), 0);
3536     else
3537         LDRI(r0, rn(reg), 0);
3538     jit_unget_reg(reg);
3539 }
3540
3541 static void
3542 _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3543 {
3544     if (jit_thumb_p()) {
3545         if ((r0|r1|r2) < 8)
3546             T1_LDR(r0, r1, r2);
3547         else
3548             T2_LDR(r0, r1, r2);
3549     }
3550     else
3551         LDR(r0, r1, r2);
3552 }
3553
3554 static void
3555 _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3556 {
3557     jit_int32_t         reg;
3558     if (jit_thumb_p()) {
3559         if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
3560             T1_LDRI(r0, r1, i0 >> 2);
3561         else if (r1 == _R13_REGNO && r0 < 8 &&
3562                  i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
3563             T1_LDRISP(r0, i0 >> 2);
3564         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3565             T2_LDRI(r0, r1, i0);
3566         else if (i0 < 0 && i0 > -255)
3567             T2_LDRIN(r0, r1, -i0);
3568         else if (i0 >= 0 && i0 <= 4095)
3569             T2_LDRWI(r0, r1, i0);
3570         else if (r0 != r1) {
3571             movi(r0, i0);
3572             if ((r0|r1) < 8)
3573                 T1_LDR(r0, r1, r0);
3574             else
3575                 T2_LDR(r0, r1, r0);
3576         }
3577         else {
3578             reg = jit_get_reg(jit_class_gpr);
3579             movi(rn(reg), i0);
3580             if ((r0|r1|rn(reg)) < 8)
3581                 T1_LDR(r0, r1, rn(reg));
3582             else
3583                 T2_LDR(r0, r1, rn(reg));
3584             jit_unget_reg(reg);
3585         }
3586     }
3587     else {
3588         if (i0 >= 0 && i0 <= 4095)
3589             LDRI(r0, r1, i0);
3590         else if (i0 < 0 && i0 >= -4095)
3591             LDRIN(r0, r1, -i0);
3592         else if (r0 != r1) {
3593             movi(r0, i0);
3594             LDR(r0, r1, r0);
3595         }
3596         else {
3597             reg = jit_get_reg(jit_class_gpr);
3598             movi(rn(reg), i0);
3599             LDR(r0, r1, rn(reg));
3600             jit_unget_reg(reg);
3601         }
3602     }
3603 }
3604
3605 static void
3606 _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3607 {
3608     if (jit_thumb_p())
3609         T2_STRBI(r1, r0, 0);
3610     else
3611         STRBI(r1, r0, 0);
3612 }
3613
3614 static void
3615 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3616 {
3617     jit_int32_t         reg;
3618     reg = jit_get_reg(jit_class_gpr);
3619     movi(rn(reg), i0);
3620     if (jit_thumb_p())
3621         T2_STRBI(r0, rn(reg), 0);
3622     else
3623         STRBI(r0, rn(reg), 0);
3624     jit_unget_reg(reg);
3625 }
3626
3627 static void
3628 _stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3629 {
3630     if (jit_thumb_p()) {
3631         if ((r0|r1|r2) < 8)
3632             T1_STRB(r2, r1, r0);
3633         else
3634             T2_STRB(r2, r1, r0);
3635     }
3636     else
3637         STRB(r2, r1, r0);
3638 }
3639
3640 static void
3641 _stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3642 {
3643     jit_int32_t         reg;
3644     if (jit_thumb_p()) {
3645         if ((r0|r1) < 8 && i0 >= 0 && i0 < 0x20)
3646             T1_STRBI(r1, r0, i0);
3647         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3648             T2_STRBI(r1, r0, i0);
3649         else if (i0 < 0 && i0 >= -255)
3650             T2_STRBIN(r1, r0, -i0);
3651         else if (i0 >= 0 && i0 <= 4095)
3652             T2_STRBWI(r1, r0, i0);
3653         else {
3654             reg = jit_get_reg(jit_class_gpr);
3655             movi(rn(reg), i0);
3656             if ((r0|r1|rn(reg)) < 8)
3657                 T1_STRB(r1, r0, rn(reg));
3658             else
3659                 T2_STRB(r1, r0, rn(reg));
3660             jit_unget_reg(reg);
3661         }
3662     }
3663     else {
3664         if (i0 >= 0 && i0 <= 4095)
3665             STRBI(r1, r0, i0);
3666         else if (i0 < 0 && i0 >= -4095)
3667             STRBIN(r1, r0, -i0);
3668         else {
3669             reg = jit_get_reg(jit_class_gpr);
3670             movi(rn(reg), i0);
3671             STRB(r1, r0, rn(reg));
3672             jit_unget_reg(reg);
3673         }
3674     }
3675 }
3676
3677 static void
3678 _str_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3679 {
3680     if (jit_thumb_p())
3681         T2_STRHI(r1, r0, 0);
3682     else
3683         STRHI(r1, r0, 0);
3684 }
3685
3686 static void
3687 _sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3688 {
3689     jit_int32_t         reg;
3690     reg = jit_get_reg(jit_class_gpr);
3691     movi(rn(reg), i0);
3692     if (jit_thumb_p())
3693         T2_STRHI(r0, rn(reg), 0);
3694     else
3695         STRHI(r0, rn(reg), 0);
3696     jit_unget_reg(reg);
3697 }
3698
3699 static void
3700 _stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3701 {
3702     if (jit_thumb_p()) {
3703         if ((r0|r1|r2) < 8)
3704             T1_STRH(r2, r1, r0);
3705         else
3706             T2_STRH(r2, r1, r0);
3707     }
3708     else
3709         STRH(r2, r1, r0);
3710 }
3711
3712 static void
3713 _stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3714 {
3715     jit_int32_t         reg;
3716     if (jit_thumb_p()) {
3717         if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 1) && (i0 >> 1) < 0x20)
3718             T1_STRHI(r1, r0, i0 >> 1);
3719         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3720             T2_STRHI(r1, r0, i0);
3721         else if (i0 < 0 && i0 >= -255)
3722             T2_STRHIN(r1, r0, -i0);
3723         else if (i0 >= 0 && i0 <= 4095)
3724             T2_STRHWI(r1, r0, i0);
3725         else {
3726             reg = jit_get_reg(jit_class_gpr);
3727             movi(rn(reg), i0);
3728             if ((r0|r1|rn(reg)) < 8)
3729                 T1_STRH(r1, r0, rn(reg));
3730             else
3731                 T2_STRH(r1, r0, rn(reg));
3732             jit_unget_reg(reg);
3733         }
3734     }
3735     else {
3736         if (i0 >= 0 && i0 <= 255)
3737             STRHI(r1, r0, i0);
3738         else if (i0 < 0 && i0 >= -255)
3739             STRHIN(r1, r0, -i0);
3740         else {
3741             reg = jit_get_reg(jit_class_gpr);
3742             movi(rn(reg), i0);
3743             STRH(r1, r0, rn(reg));
3744             jit_unget_reg(reg);
3745         }
3746     }
3747 }
3748
3749 static void
3750 _str_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3751 {
3752     if (jit_thumb_p())
3753         T2_STRI(r1, r0, 0);
3754     else
3755         STRI(r1, r0, 0);
3756 }
3757
3758 static void
3759 _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3760 {
3761     jit_int32_t         reg;
3762     reg = jit_get_reg(jit_class_gpr);
3763     movi(rn(reg), i0);
3764     if (jit_thumb_p())
3765         T2_STRI(r0, rn(reg), 0);
3766     else
3767         STRI(r0, rn(reg), 0);
3768     jit_unget_reg(reg);
3769 }
3770
3771 static void
3772 _stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3773 {
3774     if (jit_thumb_p()) {
3775         if ((r0|r1|r2) < 8)
3776             T1_STR(r2, r1, r0);
3777         else
3778             T2_STR(r2, r1, r0);
3779     }
3780     else
3781         STR(r2, r1, r0);
3782 }
3783
3784 static void
3785 _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3786 {
3787     jit_int32_t         reg;
3788     if (jit_thumb_p()) {
3789         if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20)
3790             T1_STRI(r1, r0, i0 >> 2);
3791         else if (r0 == _R13_REGNO && r1 < 8 &&
3792                  i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255)
3793             T1_STRISP(r1, i0 >> 2);
3794         else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255)
3795             T2_STRI(r1, r0, i0);
3796         else if (i0 < 0 && i0 >= -255)
3797             T2_STRIN(r1, r0, -i0);
3798         else if (i0 >= 0 && i0 <= 4095)
3799             T2_STRWI(r1, r0, i0);
3800         else {
3801             reg = jit_get_reg(jit_class_gpr);
3802             movi(rn(reg), i0);
3803             if ((r0|r1|rn(reg)) < 8)
3804                 T1_STR(r1, r0, rn(reg));
3805             else
3806                 T2_STR(r1, r0, rn(reg));
3807             jit_unget_reg(reg);
3808         }
3809     }
3810     else {
3811         if (i0 >= 0 && i0 <= 4095)
3812             STRI(r1, r0, i0);
3813         else if (i0 < 0 && i0 >= -4095)
3814             STRIN(r1, r0, -i0);
3815         else {
3816             reg = jit_get_reg(jit_class_gpr);
3817             movi(rn(reg), i0);
3818             STR(r1, r0, rn(reg));
3819             jit_unget_reg(reg);
3820         }
3821     }
3822 }
3823
3824 static void
3825 _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3826 {
3827     if (jit_thumb_p()) {
3828         if ((r0|r1) < 8)
3829             T1_REV(r0, r1);
3830         else
3831             T2_REV(r0, r1);
3832         rshi_u(r0, r0, 16);
3833     }
3834     else {
3835         if (jit_armv6_p()) {
3836             REV(r0, r1);
3837             rshi_u(r0, r0, 16);
3838         }
3839         else {
3840                 generic_bswapr_us(_jit, r0, r1);
3841         }
3842     }
3843 }
3844
3845 /* inline glibc htonl (without register clobber) */
3846 static void
3847 _bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3848 {
3849     jit_int32_t         reg;
3850     if (jit_thumb_p()) {
3851         if ((r0|r1) < 8)
3852             T1_REV(r0, r1);
3853         else
3854             T2_REV(r0, r1);
3855     }
3856     else {
3857         if (jit_armv6_p())
3858             REV(r0, r1);
3859         else {
3860             reg = jit_get_reg(jit_class_gpr);
3861             EOR_SI(rn(reg), r1, r1, ARM_ROR, 16);
3862             LSRI(rn(reg), rn(reg), 8);
3863             BICI(rn(reg), rn(reg), encode_arm_immediate(0xff00));
3864             EOR_SI(r0, rn(reg), r1, ARM_ROR, 8);
3865             jit_unget_reg(reg);
3866         }
3867     }
3868 }
3869
3870 static void
3871 _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3872 {
3873     if (jit_thumb_p()) {
3874         if ((r0|r1) < 8)
3875             T1_SXTB(r0, r1);
3876         else
3877             T2_SXTB(r0, r1);
3878     }
3879     else {
3880         if (jit_armv6_p())
3881             SXTB(r0, r1);
3882         else {
3883             LSLI(r0, r1, 24);
3884             ASRI(r0, r0, 24);
3885         }
3886     }
3887 }
3888
3889 static void
3890 _extr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3891 {
3892     if (jit_thumb_p()) {
3893         if ((r0|r1) < 8)
3894             T1_UXTB(r0, r1);
3895         else
3896             T2_UXTB(r0, r1);
3897     }
3898     else {
3899         if (jit_armv6_p())
3900             UXTB(r0, r1);
3901         else
3902             ANDI(r0, r1, 0xff);
3903     }
3904 }
3905
3906 static void
3907 _extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3908 {
3909     if (jit_thumb_p()) {
3910         if ((r0|r1) < 8)
3911             T1_SXTH(r0, r1);
3912         else
3913             T2_SXTH(r0, r1);
3914     }
3915     else {
3916         if (jit_armv6_p())
3917             SXTH(r0, r1);
3918         else {
3919             LSLI(r0, r1, 16);
3920             ASRI(r0, r0, 16);
3921         }
3922     }
3923 }
3924
3925 static void
3926 _extr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3927 {
3928     if (jit_thumb_p()) {
3929         if ((r0|r1) < 8)
3930             T1_UXTH(r0, r1);
3931         else
3932             T2_UXTH(r0, r1);
3933     }
3934     else {
3935         if (jit_armv6_p())
3936             UXTH(r0, r1);
3937         else {
3938             LSLI(r0, r1, 16);
3939             LSRI(r0, r0, 16);
3940         }
3941     }
3942 }
3943
3944 static void
3945 _callr(jit_state_t *_jit, jit_int32_t r0)
3946 {
3947     if (jit_thumb_p())
3948         T1_BLX(r0);
3949     else
3950         BLX(r0);
3951 }
3952
3953 static void
3954 _calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t exchange_p)
3955 {
3956     jit_word_t          d;
3957     jit_int32_t         reg;
3958     if (!exchange_p) {
3959         if (jit_thumb_p()) {
3960             if (jit_exchange_p())
3961                 /* skip switch from  arm to thumb 
3962                  * exchange_p set to zero means a jit function
3963                  * call in the same jit code buffer */
3964                 d = ((i0 + 8 - _jit->pc.w) >> 1) - 2;
3965             else
3966                 d = ((i0 - _jit->pc.w) >> 1) - 2;
3967         }
3968         else                    d = ((i0 - _jit->pc.w) >> 2) - 2;
3969         if (_s24P(d)) {
3970             if (jit_thumb_p())  T2_BLI(encode_thumb_jump(d));
3971             else                BLI(d & 0x00ffffff);
3972         }
3973         else                    goto fallback;
3974     }
3975     else {
3976     fallback:
3977         reg = jit_get_reg(jit_class_gpr);
3978         movi(rn(reg), i0);
3979         if (jit_thumb_p())
3980             T1_BLX(rn(reg));
3981         else
3982             BLX(rn(reg));
3983         jit_unget_reg(reg);
3984     }
3985 }
3986
3987 static jit_word_t
3988 _calli_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
3989 {
3990     jit_word_t          w;
3991     jit_word_t          d;
3992     jit_int32_t         reg;
3993     /* i1 means call is reachable in signed 24 bits  */
3994     if (i1) {
3995         w = _jit->pc.w;
3996         if (jit_thumb_p())      d = ((i0 - _jit->pc.w) >> 1) - 2;
3997         else                    d = ((i0 - _jit->pc.w) >> 2) - 2;
3998         assert(_s24P(d));
3999         if (jit_thumb_p())      T2_BLI(encode_thumb_jump(d));
4000         else                    BLI(d & 0x00ffffff);
4001     }
4002     else {
4003         reg = jit_get_reg(jit_class_gpr);
4004         w = _jit->pc.w;
4005         movi_p(rn(reg), i0);
4006         if (jit_thumb_p())
4007             T1_BLX(rn(reg));
4008         else
4009             BLX(rn(reg));
4010         jit_unget_reg(reg);
4011     }
4012     return (w);
4013 }
4014
4015 static void
4016 _prolog(jit_state_t *_jit, jit_node_t *node)
4017 {
4018     jit_int32_t         reg, mask, count;
4019     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
4020         jit_int32_t     frame = -_jitc->function->frame;
4021         jit_check_frame();
4022         assert(_jitc->function->self.aoff >= frame);
4023         if (jit_swf_p())
4024             CHECK_SWF_OFFSET();
4025         CHECK_REG_ARGS();
4026         if (_jitc->function->assume_frame) {
4027             if (jit_thumb_p() && !_jitc->thumb)
4028                 _jitc->thumb = _jit->pc.w;
4029             return;
4030         }
4031         _jitc->function->self.aoff = frame;
4032     }
4033     if (_jitc->function->allocar)
4034         _jitc->function->self.aoff &= -8;
4035     _jitc->function->stack = ((_jitc->function->self.alen -
4036                               /* align stack at 8 bytes */
4037                               _jitc->function->self.aoff) + 7) & -8;
4038     /* If this jit_check_frame() succeeds, it actually is just a need_stack,
4039      * usually for arguments, so, allocai was not called, but pusharg*
4040      * was called increasing stack size, for negative access offsets.
4041      * This can be optimized for one less prolog instruction, that is,
4042      * do not create the frame pointer, and only add _jitc->function->stack
4043      * to sp, and on epilog, instead of moving fp to sp, just add negative
4044      * value of _jitc->function->stack. Since this condition requires a
4045      * large function body for excess arguments to called function, keep
4046      * things a bit simpler for now, as this is the only place need_stack
4047      * would be useful. */
4048     if (_jitc->function->stack)
4049         jit_check_frame();
4050
4051     for (reg = mask = count = 0; reg < jit_size(iregs); reg++) {
4052         if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
4053             mask |= 1 << rn(iregs[reg]);
4054             ++count;
4055         }
4056     }
4057     /* One extra register to keep stack 8 bytes aligned */
4058     if (count & 1) {
4059         for (reg = 4; reg < 10; reg++) {
4060             if (!(mask & (1 << reg))) {
4061                 mask |= 1 << reg;
4062                 break;
4063             }
4064         }
4065     }
4066     if (_jitc->function->need_frame || _jitc->function->need_return)
4067         mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
4068     if (!jit_swf_p() && _jitc->function->save_reg_args &&
4069         !(_jitc->function->self.call & jit_call_varargs))
4070         mask |= 0xf;
4071
4072     if (jit_thumb_p()) {
4073         /*  switch to thumb mode (better approach would be to
4074          * ORR 1 address being called, but no clear distinction
4075          * of what is a pointer to a jit function, or if patching
4076          * a pointer to a jit function) */
4077         if (jit_exchange_p()) {
4078             ADDI(_R12_REGNO, _R15_REGNO, 1);
4079             BX(_R12_REGNO);
4080         }
4081         if (!_jitc->thumb)
4082             _jitc->thumb = _jit->pc.w;
4083         if (jit_swf_p() || (_jitc->function->save_reg_args &&
4084                             (_jitc->function->self.call & jit_call_varargs)))
4085             T2_PUSH(0xf);
4086         if (mask)
4087             T2_PUSH(mask);
4088     }
4089     else {
4090         if (jit_swf_p() || (_jitc->function->save_reg_args &&
4091                             (_jitc->function->self.call & jit_call_varargs)))
4092             PUSH(0xf);
4093         if (mask)
4094             PUSH(mask);
4095     }
4096     if (_jitc->function->need_frame)
4097         movr(_FP_REGNO, _SP_REGNO);
4098     if (_jitc->function->stack)
4099         subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
4100     if (_jitc->function->allocar) {
4101         reg = jit_get_reg(jit_class_gpr);
4102         movi(rn(reg), _jitc->function->self.aoff);
4103         stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg));
4104         jit_unget_reg(reg);
4105     }
4106 }
4107
4108 static void
4109 _epilog(jit_state_t *_jit, jit_node_t *node)
4110 {
4111     jit_int32_t         reg, mask, count;
4112     if (_jitc->function->assume_frame)
4113         return;
4114
4115     for (reg = mask = count = 0; reg < jit_size(iregs); reg++) {
4116         if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
4117             mask |= 1 << rn(iregs[reg]);
4118             ++count;
4119         }
4120     }
4121     /* One extra register to keep stack 8 bytes aligned */
4122     if (count & 1) {
4123         for (reg = 4; reg < 10; reg++) {
4124             if (!(mask & (1 << reg))) {
4125                 mask |= 1 << reg;
4126                 break;
4127             }
4128         }
4129     }
4130     if (_jitc->function->need_frame || _jitc->function->need_return)
4131         mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
4132     if (_jitc->function->need_frame)
4133         movr(_SP_REGNO, _FP_REGNO);
4134     if (!jit_swf_p() && _jitc->function->save_reg_args &&
4135         !(_jitc->function->self.call & jit_call_varargs))
4136         addi(_SP_REGNO, _SP_REGNO, 16);
4137     if (mask) {
4138         if (jit_thumb_p())
4139             T2_POP(mask);
4140         else
4141             POP(mask);
4142     }
4143     if (jit_swf_p() || (_jitc->function->save_reg_args &&
4144                         (_jitc->function->self.call & jit_call_varargs)))
4145         addi(_SP_REGNO, _SP_REGNO, 16);
4146     if (jit_thumb_p())
4147         T1_BX(_LR_REGNO);
4148     else
4149         BX(_LR_REGNO);
4150     if (jit_thumb_p() && (_jit->pc.w & 2))
4151         T1_NOP();
4152 }
4153
4154 static void
4155 _vastart(jit_state_t *_jit, jit_int32_t r0)
4156 {
4157     assert(_jitc->function->self.call & jit_call_varargs);
4158
4159     /* Initialize stack pointer to the first stack argument.
4160      * The -16 is to account for the 4 argument registers
4161      * always saved, and _jitc->function->vagp is to account
4162      * for declared arguments. */
4163     addi(r0, _FP_REGNO, jit_selfsize() - 16 + _jitc->function->vagp);
4164 }
4165
4166 static void
4167 _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4168 {
4169     assert(_jitc->function->self.call & jit_call_varargs);
4170
4171     /* Load argument. */
4172     ldr(r0, r1);
4173
4174     /* Update stack pointer. */
4175     addi(r1, r1, sizeof(jit_word_t));
4176 }
4177
4178 static void
4179 _patch_at(jit_state_t *_jit,
4180           jit_int32_t kind, jit_word_t instr, jit_word_t label)
4181 {
4182     jit_word_t           d;
4183     jit_thumb_t          thumb;
4184     union {
4185         jit_int16_t     *s;
4186         jit_int32_t     *i;
4187         jit_word_t       w;
4188     } u;
4189     u.w = instr;
4190     if (kind == arm_patch_call) {
4191         if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
4192             code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4193             assert((thumb.i & THUMB2_BLI) == THUMB2_BLI);
4194             /* skip code to switch from arm to thumb mode */
4195             if (jit_exchange_p())
4196                 d = ((label + 8 - instr) >> 1) - 2;
4197             else
4198                 d = ((label - instr) >> 1) - 2;
4199             assert(_s24P(d));
4200             thumb.i = THUMB2_BLI | encode_thumb_jump(d);
4201             thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4202         }
4203         else {
4204             thumb.i = u.i[0];
4205             assert((thumb.i & 0x0f000000) == ARM_BLI);
4206             d = ((label - instr) >> 2) - 2;
4207             assert(_s24P(d));
4208             u.i[0] = (thumb.i & 0xff000000) | (d & 0x00ffffff);
4209         }
4210     }
4211     else if (kind == arm_patch_jump) {
4212         if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
4213             code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4214             if ((thumb.i & THUMB2_B) == THUMB2_B) {
4215                 d = ((label - instr) >> 1) - 2;
4216                 assert(_s24P(d));
4217                 thumb.i = THUMB2_B | encode_thumb_jump(d);
4218                 thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4219             }
4220             else if ((thumb.i & THUMB2_B) == THUMB2_CC_B) {
4221                 d = ((label - instr) >> 1) - 2;
4222                 assert(_s20P(d));
4223                 thumb.i = THUMB2_CC_B | (thumb.i & 0x3c00000) |
4224                           encode_thumb_cc_jump(d);
4225                 thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4226             }
4227             else {
4228                 /* for the sake of simplicity in case choose to
4229                  * movw+movt+[bx|blx], e.g. if changing to instead
4230                  * of asserting target is reachable, load constant
4231                  * and do indirect jump if not reachable */
4232                 if ((thumb.i & 0xfbf00000) == THUMB2_MOVWI)
4233                     goto indirect_jump;
4234                 assert(!"handled branch opcode");
4235             }
4236         }
4237         else {
4238             thumb.i = u.i[0];
4239             /* 0x0e000000 because 0x01000000 is (branch&) link modifier */
4240             assert((thumb.i & 0x0e000000) == ARM_B);
4241             d = ((label - instr) >> 2) - 2;
4242             assert(_s24P(d));
4243             u.i[0] = (thumb.i & 0xff000000) | (d & 0x00ffffff);
4244         }
4245     }
4246     else if (kind == arm_patch_load) {
4247         /* offset may be negative for a forward patch because it
4248          * is relative to pc + 8, for example:
4249          *          ldr r0, [pc, #-4]
4250          *          bx r0               ;; [pc, #-8]
4251          *          .data ...           ;; [pc, #-4]
4252          *          ...                 ;; [pc]
4253          */
4254         assert(!jit_thumb_p());
4255         thumb.i = u.i[0];
4256         assert((thumb.i & 0x0f700000) == ARM_LDRI);
4257         d = label - (instr + 8);
4258         if (d < 0) {
4259             thumb.i &= ~ARM_P;
4260             d = -d;
4261         }
4262         else
4263             thumb.i |= ARM_P;
4264         assert(!(d & 0xfffff000));
4265         u.i[0] = (thumb.i & 0xfffff000) | d;
4266     }
4267     else if (kind == arm_patch_word) {
4268         if (jit_thumb_p()) {
4269             code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4270             assert((thumb.i & 0xfbf00000) == THUMB2_MOVWI);
4271         indirect_jump:
4272             thumb.i = ((thumb.i & 0xfbf00f00) |
4273                        ( (label & 0x0000f000) <<  4) |
4274                        ( (label & 0x00000800) << 15) |
4275                        ( (label & 0x00000700) <<  4) |
4276                        (  label & 0x000000ff));
4277             thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
4278             label >>= 16;
4279             code2thumb(thumb.s[0], thumb.s[1], u.s[2], u.s[3]);
4280             assert((thumb.i & 0xfbf00000) == THUMB2_MOVTI);
4281             thumb.i = ((thumb.i & 0xfbf00f00) |
4282                        ( (label & 0x0000f000) <<  4) |
4283                        ( (label & 0x00000800) << 15) |
4284                        ( (label & 0x00000700) <<  4) |
4285                        (  label & 0x000000ff));
4286             thumb2code(thumb.s[0], thumb.s[1], u.s[2], u.s[3]);
4287         }
4288         else
4289             u.i[0] = label;
4290     }
4291     else
4292         assert(!"handled patch");
4293 }
4294 #endif