add a thp-based huge page alloc fallback
[pcsx_rearmed.git] / deps / lightning / lib / jit_fallback.c
CommitLineData
ba3814c1 1#if PROTO
ba86ff93
PC
2#define USE_BIT_TABLES 1
3#define USE_BITSWAP_UNROLLED 0
4#define USE_BITSWAP_LOOP 0
ba3814c1
PC
5#define fallback_save(r0) _fallback_save(_jit, r0)
6static void _fallback_save(jit_state_t*, jit_int32_t);
7#define fallback_load(r0) _fallback_load(_jit, r0)
8static void _fallback_load(jit_state_t*, jit_int32_t);
9#define fallback_save_regs(r0) _fallback_save_regs(_jit, r0)
10static void _fallback_save_regs(jit_state_t*, jit_int32_t);
11#define fallback_load_regs(r0) _fallback_load_regs(_jit, r0)
12static void _fallback_load_regs(jit_state_t*, jit_int32_t);
13#define fallback_calli(i0, i1) _fallback_calli(_jit, i0, i1)
14static void _fallback_calli(jit_state_t*, jit_word_t, jit_word_t);
15#define fallback_casx(r0,r1,r2,r3,im) _fallback_casx(_jit,r0,r1,r2,r3,im)
16static void _fallback_casx(jit_state_t *, jit_int32_t, jit_int32_t,
17 jit_int32_t, jit_int32_t, jit_word_t);
ba86ff93
PC
18#if !defined(__arm__)
19# ifndef movr_w_f
20# define movr_w_f(r0, r1) fallback_movr_w_f(r0, r1)
21# define fallback_movr_w_f(r0,r1) _fallback_movr_w_f(_jit,r0,r1)
22static void _fallback_movr_w_f(jit_state_t*, jit_int32_t, jit_int32_t);
23# endif
24# ifndef movr_f_w
25# define movr_f_w(r0, r1) fallback_movr_f_w(r0, r1)
26# define fallback_movr_f_w(r0,r1) _fallback_movr_f_w(_jit,r0,r1)
27static void _fallback_movr_f_w(jit_state_t*, jit_int32_t, jit_int32_t);
28# endif
29# if __WORDSIZE == 32
30# ifndef movr_ww_d
31# define movr_ww_d(r0, r1, r2) fallback_movr_ww_d(r0, r1, r2)
32# define fallback_movr_ww_d(r0,r1,r2) _fallback_movr_ww_d(_jit,r0,r1,r2)
33static void _fallback_movr_ww_d(jit_state_t*,
34 jit_int32_t,jit_int32_t,jit_int32_t);
35# endif
36# ifndef movr_d_ww
37# define fallback_movr_d_ww(r0,r1,r2) _fallback_movr_d_ww(_jit,r0,r1,r2)
38static void _fallback_movr_d_ww(jit_state_t*,
39 jit_int32_t,jit_int32_t,jit_int32_t);
40# define movr_d_ww(r0, r1, r2) fallback_movr_d_ww(r0, r1, r2)
41# endif
42# else
43# ifndef movr_w_d
44# define movr_w_d(r0, r1) fallback_movr_w_d(r0, r1);
45# define fallback_movr_w_d(r0,r1) _fallback_movr_w_d(_jit,r0,r1)
46static void _fallback_movr_w_d(jit_state_t*, jit_int32_t, jit_int32_t);
47# endif
48# ifndef movr_d_w
49# define movr_d_w(r0, r1) fallback_movr_d_w(r0, r1);
50# define fallback_movr_d_w(r0,r1) _fallback_movr_d_w(_jit,r0,r1)
51static void _fallback_movr_d_w(jit_state_t*, jit_int32_t, jit_int32_t);
52# endif
53# endif
54#endif
55#ifndef unldr
56# define unldr(r0, r1, i0) fallback_unldr(r0, r1, i0)
57#endif
58#ifndef unldi
59# define unldi(r0, i0, i1) fallback_unldi(r0, i0, i1)
60#endif
61#ifndef unstr
62# define unstr(r0, r1, i0) fallback_unstr(r0, r1, i0)
63#endif
64#ifndef unsti
65# define unsti(i0, r0, i1) fallback_unsti(i0, r0, i1)
66#endif
79bfeef6
PC
67#define fallback_clo(r0,r1) _fallback_clo(_jit,r0,r1)
68static void _fallback_clo(jit_state_t*, jit_int32_t, jit_int32_t);
69#define fallback_clz(r0,r1) _fallback_clz(_jit,r0,r1)
70static void _fallback_clz(jit_state_t*, jit_int32_t, jit_int32_t);
71#define fallback_cto(r0,r1) _fallback_cto(_jit,r0,r1)
72static void _fallback_cto(jit_state_t*, jit_int32_t, jit_int32_t);
73#define fallback_ctz(r0,r1) _fallback_ctz(_jit,r0,r1)
74static void _fallback_ctz(jit_state_t*, jit_int32_t, jit_int32_t);
ba86ff93
PC
75#define fallback_rbit(r0,r1) _fallback_rbit(_jit, r0, r1)
76static void _fallback_rbit(jit_state_t*, jit_int32_t, jit_int32_t);
77#define fallback_popcnt(r0,r1) _fallback_popcnt(_jit, r0, r1)
78static void _fallback_popcnt(jit_state_t*, jit_int32_t, jit_int32_t);
79#define fallback_lrotr(r0, r1, r2) _fallback_lrotr(_jit, r0, r1, r2)
80static void _fallback_lrotr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
81#define fallback_lroti(r0, r1, i0) _fallback_lroti(_jit, r0, r1, i0)
82static void _fallback_lroti(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
83#define fallback_rrotr(r0, r1, r2) _fallback_rrotr(_jit, r0, r1, r2)
84static void _fallback_rrotr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
85#define fallback_rroti(r0, r1, i0) _fallback_rroti(_jit, r0, r1, i0)
86static void _fallback_rroti(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
87#define fallback_ext(r0,r1,i0,i1) _fallback_ext(_jit,r0,r1,i0,i1)
88static void _fallback_ext(jit_state_t*,
89 jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
90#define fallback_ext_u(r0,r1,i0,i1) _fallback_ext_u(_jit,r0,r1,i0,i1)
91static void _fallback_ext_u(jit_state_t*,
92 jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
93#define fallback_dep(r0,r1,i0,i1) _fallback_dep(_jit,r0,r1,i0,i1)
94static void _fallback_dep(jit_state_t*,
95 jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
96#define fallback_qlshr(r0,r1,r2,r3) _fallback_qlshr(_jit,r0,r1,r2,r3)
97static void _fallback_qlshr(jit_state_t *_jit,
98 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
99#define fallback_qlshi(r0,r1,r2,i0) _fallback_qlshi(_jit,r0,r1,r2,i0)
100static void _fallback_qlshi(jit_state_t *_jit,
101 jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
102#define fallback_qlshr_u(r0,r1,r2,r3) _fallback_qlshr_u(_jit,r0,r1,r2,r3)
103static void _fallback_qlshr_u(jit_state_t *_jit,
104 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
105#define fallback_qlshi_u(r0,r1,r2,i0) _fallback_qlshi_u(_jit,r0,r1,r2,i0)
106static void _fallback_qlshi_u(jit_state_t *_jit,
107 jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
108#define fallback_qrshr(r0,r1,r2,r3) _fallback_qrshr(_jit,r0,r1,r2,r3)
109static void _fallback_qrshr(jit_state_t *_jit,
110 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
111#define fallback_qrshi(r0,r1,r2,i0) _fallback_qrshi(_jit,r0,r1,r2,i0)
112static void _fallback_qrshi(jit_state_t *_jit,
113 jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
114#define fallback_qrshr_u(r0,r1,r2,r3) _fallback_qrshr_u(_jit,r0,r1,r2,r3)
115static void _fallback_qrshr_u(jit_state_t *_jit,
116 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
117#define fallback_qrshi_u(r0,r1,r2,i0) _fallback_qrshi_u(_jit,r0,r1,r2,i0)
118static void _fallback_qrshi_u(jit_state_t *_jit,
119 jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
120#define unldr2(r0, r1, sign) _unldr2(_jit, r0, r1, sign)
121static void _unldr2(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
122#define unldi2(r0, i1, sign) _unldi2(_jit, r0, i1, sign)
123static void _unldi2(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t);
124#define unldr3(r0, r1, sign) _unldr3(_jit, r0, r1, sign)
125static void _unldr3(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
126#define unldi3(r0, i1, sign) _unldi3(_jit, r0, i1, sign)
127static void _unldi3(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t);
128#if __WORDSIZE == 32
129# define unldr4(r0, r1) _unldr4(_jit, r0, r1)
130static void _unldr4(jit_state_t*,jit_int32_t,jit_int32_t);
131# define unldi4(r0, i1) _unldi4(_jit, r0, i1)
132static void _unldi4(jit_state_t*,jit_int32_t,jit_word_t);
133#else
134# define unldr4(r0, r1, sign) _unldr4(_jit, r0, r1, sign)
135static void _unldr4(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
136# define unldi4(r0, i1, sign) _unldi4(_jit, r0, i1, sign)
137static void _unldi4(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t);
138# define unldr5(r0, r1, sign) _unldr5(_jit, r0, r1, sign)
139static void _unldr5(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
140# define unldi5(r0, i1, sign) _unldi5(_jit, r0, i1, sign)
141static void _unldi5(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t);
142# define unldr6(r0, r1, sign) _unldr6(_jit, r0, r1, sign)
143static void _unldr6(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
144# define unldi6(r0, i1, sign) _unldi6(_jit, r0, i1, sign)
145static void _unldi6(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t);
146# define unldr7(r0, r1, sign) _unldr7(_jit, r0, r1, sign)
147static void _unldr7(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
148# define unldi7(r0, i1, sign) _unldi7(_jit, r0, i1, sign)
149static void _unldi7(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t);
150# define unldr8(r0, r1) _unldr8(_jit, r0, r1)
151static void _unldr8(jit_state_t*,jit_int32_t,jit_int32_t);
152# define unldi8(r0, i1) _unldi8(_jit, r0, i1)
153static void _unldi8(jit_state_t*,jit_int32_t,jit_word_t);
154#endif
155#define unstr2(r0, r1) _unstr2(_jit, r0, r1)
156static void _unstr2(jit_state_t*,jit_int32_t,jit_int32_t);
157#define unsti2(r0, i0) _unsti2(_jit, r0, i0)
158static void _unsti2(jit_state_t*,jit_int32_t,jit_word_t);
159#define unstr3(r0, r1) _unstr3(_jit, r0, r1)
160static void _unstr3(jit_state_t*,jit_int32_t,jit_int32_t);
161#define unsti3(r0, i0) _unsti3(_jit, r0, i0)
162static void _unsti3(jit_state_t*,jit_int32_t,jit_word_t);
163#define unstr4(r0, r1) _unstr4(_jit, r0, r1)
164static void _unstr4(jit_state_t*,jit_int32_t,jit_int32_t);
165#define unsti4(r0, i0) _unsti4(_jit, r0, i0)
166static void _unsti4(jit_state_t*,jit_int32_t,jit_word_t);
167#if __WORDSIZE == 64
168# define unstr5(r0, r1) _unstr5(_jit, r0, r1)
169static void _unstr5(jit_state_t*,jit_int32_t,jit_int32_t);
170# define unsti5(r0, i0) _unsti5(_jit, r0, i0)
171static void _unsti5(jit_state_t*,jit_int32_t,jit_word_t);
172# define unstr6(r0, r1) _unstr6(_jit, r0, r1)
173static void _unstr6(jit_state_t*,jit_int32_t,jit_int32_t);
174# define unsti6(r0, i0) _unsti6(_jit, r0, i0)
175static void _unsti6(jit_state_t*,jit_int32_t,jit_word_t);
176# define unstr7(r0, r1) _unstr7(_jit, r0, r1)
177static void _unstr7(jit_state_t*,jit_int32_t,jit_int32_t);
178# define unsti7(r0, i0) _unsti7(_jit, r0, i0)
179static void _unsti7(jit_state_t*,jit_int32_t,jit_word_t);
180# define unstr8(r0, r1) _unstr8(_jit, r0, r1)
181static void _unstr8(jit_state_t*,jit_int32_t,jit_int32_t);
182# define unsti8(r0, i0) _unsti8(_jit, r0, i0)
183static void _unsti8(jit_state_t*,jit_int32_t,jit_word_t);
184#endif
185#define unldrw(r0, r1, i0) _unldrw(_jit, r0, r1, i0)
186static void _unldrw(jit_state_t*,jit_int32_t,jit_int32_t, jit_word_t);
187#define unldiw(r0, i0, i1) _unldiw(_jit, r0, i0, i1)
188static void _unldiw(jit_state_t*,jit_int32_t,jit_word_t, jit_word_t);
189#define unldx(r0, i0) _unldx(_jit, r0, i0)
190static void _unldx(jit_state_t*,jit_int32_t,jit_word_t);
191#define unldx_u(r0, i0) _unldx_u(_jit, r0, i0)
192static void _unldx_u(jit_state_t*,jit_int32_t,jit_word_t);
193#define fallback_unldr(r0, r1, i0) _fallback_unldr(_jit, r0, r1, i0)
194static void _fallback_unldr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
195#define fallback_unldi(r0, i0, i1) _fallback_unldi(_jit, r0, i0, i1)
196static void _fallback_unldi(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t);
197#define fallback_unldr_u(r0, r1, i0) _fallback_unldr_u(_jit, r0, r1, i0)
198static void _fallback_unldr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
199#define fallback_unldi_u(r0, i0, i1) _fallback_unldi_u(_jit, r0, i0, i1)
200static void _fallback_unldi_u(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t);
201#define fallback_unstr(r0, r1, i0) _fallback_unstr(_jit, r0, r1, i0)
202static void _fallback_unstr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
203#define fallback_unsti(i0, r0, i1) _fallback_unsti(_jit, i0, r0, i1)
204static void _fallback_unsti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
205#if !defined(__arm__)
206# define fallback_unldr_x(r0, r1, i0) _fallback_unldr_x(_jit, r0, r1, i0)
207static void _fallback_unldr_x(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
208# define fallback_unldi_x(r0, i0, i1) _fallback_unldi_x(_jit, r0, i0, i1)
209static void _fallback_unldi_x(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t);
210# define fallback_unstr_x(r0, r1, i0) _fallback_unstr_x(_jit, r0, r1, i0)
211static void _fallback_unstr_x(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
212# define fallback_unsti_x(i0, r0, i1) _fallback_unsti_x(_jit, i0, r0, i1)
213static void _fallback_unsti_x(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
214#endif
215# if defined(__s390__) || defined(__s390x__)
216# define fallback_jit_get_reg(flags) jit_get_reg_but_zero(flags)
79bfeef6 217# else
ba86ff93 218# define fallback_jit_get_reg(flags) jit_get_reg(flags)
79bfeef6 219# endif
ba86ff93
PC
220# if defined(__ia64__)
221# define fallback_flush() sync()
222# elif defined(__mips__)
223# define fallback_flush() flush()
79bfeef6 224# else
ba86ff93 225# define fallback_flush() /**/
79bfeef6
PC
226# endif
227# if defined(__mips__)
ba86ff93 228# define fallback_jmpi(i0) jmpi(i0, 1)
79bfeef6 229# elif defined(__arm__)
ba86ff93 230# define fallback_jmpi(i0) jmpi_p(i0, 1)
79bfeef6 231# elif defined(__s390__) || defined(__s390x__)
ba86ff93 232# define fallback_jmpi(i0) jmpi(i0, 1)
79bfeef6
PC
233# else
234# define fallback_jmpi(i0) jmpi(i0)
235# endif
79bfeef6 236# if defined(__s390__) || defined(__s390x__)
ba86ff93
PC
237# define fallback_beqr(i0,r0,i1) beqr_p(i0,r0,i1)
238# define fallback_beqi(i0,r0,i1) beqi_p(i0,r0,i1)
239# define fallback_bner(i0,r0,r1) bner_p(i0,r0,r1)
240# define fallback_bnei(i0,r0,i1) bnei_p(i0,r0,i1)
241# define fallback_blei(i0,r0,i1) blei_p(i0,r0,i1)
79bfeef6 242# define fallback_bmsr(i0,r0,r1) bmsr_p(i0,r0,r1)
ba86ff93 243# define fallback_bmsi(i0,r0,i1) bmsi_p(i0,r0,i1)
79bfeef6 244# else
ba86ff93
PC
245# define fallback_beqr(i0,r0,r1) beqr(i0,r0,r1)
246# define fallback_beqi(i0,r0,i1) beqi(i0,r0,i1)
247# define fallback_bner(i0,r0,r1) bner(i0,r0,r1)
248# define fallback_bnei(i0,r0,i1) bnei(i0,r0,i1)
249# define fallback_blei(i0,r0,i1) blei(i0,r0,i1)
79bfeef6 250# define fallback_bmsr(i0,r0,r1) bmsr(i0,r0,r1)
ba86ff93
PC
251# define fallback_bmsi(i0,r0,i1) bmsi(i0,r0,i1)
252# endif
253# if defined(__ia64__)
254# define fallback_patch_jmpi(inst, lbl) \
255 patch_at(jit_code_jmpi, inst, lbl)
256# define fallback_patch_beqr(inst, lbl) \
257 patch_at(jit_code_beqr, inst, lbl)
258# define fallback_patch_beqi(inst, lbl) \
259 patch_at(jit_code_beqi, inst, lbl)
260# define fallback_patch_bner(inst, lbl) \
261 patch_at(jit_code_bner, inst, lbl)
262# define fallback_patch_bnei(inst, lbl) \
263 patch_at(jit_code_bnei, inst, lbl)
264# define fallback_patch_blei(inst, lbl) \
265 patch_at(jit_code_blei, inst, lbl)
266# define fallback_patch_bmsr(inst, lbl) \
267 patch_at(jit_code_bmsr, inst, lbl)
268# define fallback_patch_bmsi(inst, lbl) \
269 patch_at(jit_code_bmsi, inst, lbl)
270# elif defined(__arm__)
271# define fallback_patch_jmpi(inst, lbl) \
272 patch_at(arm_patch_jump,inst, lbl)
273# define fallback_patch_beqr(inst, lbl) \
274 patch_at(arm_patch_jump,inst, lbl)
275# define fallback_patch_beqi(inst, lbl) \
276 patch_at(arm_patch_jump,inst, lbl)
277# define fallback_patch_bner(inst, lbl) \
278 patch_at(arm_patch_jump,inst, lbl)
279# define fallback_patch_bnei(inst, lbl) \
280 patch_at(arm_patch_jump,inst, lbl)
281# define fallback_patch_blei(inst, lbl) \
282 patch_at(arm_patch_jump,inst, lbl)
283# define fallback_patch_bmsr(inst, lbl) \
284 patch_at(arm_patch_jump,inst, lbl)
285# define fallback_patch_bmsi(inst, lbl) \
286 patch_at(arm_patch_jump,inst, lbl)
287 # else
288# define fallback_patch_jmpi(inst, lbl) \
289 patch_at(inst, lbl)
290# define fallback_patch_beqr(inst, lbl) \
291 patch_at(inst, lbl)
292# define fallback_patch_beqi(inst, lbl) \
293 patch_at(inst, lbl)
294# define fallback_patch_bner(inst, lbl) \
295 patch_at(inst, lbl)
296# define fallback_patch_bnei(inst, lbl) \
297 patch_at(inst, lbl)
298# define fallback_patch_blei(inst, lbl) \
299 patch_at(inst, lbl)
300# define fallback_patch_bmsr(inst, lbl) \
301 patch_at(inst, lbl)
302# define fallback_patch_bmsi(inst, lbl) \
303 patch_at(inst, lbl)
79bfeef6 304# endif
ba3814c1
PC
305#endif
306
307#if CODE
308static void
309_fallback_save(jit_state_t *_jit, jit_int32_t r0)
310{
311 jit_int32_t offset, regno, spec;
312 for (offset = 0; offset < JIT_R_NUM; offset++) {
313 spec = _rvs[offset].spec;
314 regno = jit_regno(spec);
315 if (regno == r0) {
316 if (!(spec & jit_class_sav))
c0c16242 317 stxi(_jitc->function->regoff[JIT_R(offset)], rn(JIT_FP), regno);
ba3814c1
PC
318 break;
319 }
320 }
321}
322
323static void
324_fallback_load(jit_state_t *_jit, jit_int32_t r0)
325{
326 jit_int32_t offset, regno, spec;
327 for (offset = 0; offset < JIT_R_NUM; offset++) {
328 spec = _rvs[offset].spec;
329 regno = jit_regno(spec);
330 if (regno == r0) {
331 if (!(spec & jit_class_sav))
c0c16242 332 ldxi(regno, rn(JIT_FP), _jitc->function->regoff[JIT_R(offset)]);
ba3814c1
PC
333 break;
334 }
335 }
336}
337
338static void
339_fallback_save_regs(jit_state_t *_jit, jit_int32_t r0)
340{
c0c16242
PC
341 jit_int32_t regno, spec;
342 for (regno = 0; regno < _jitc->reglen; regno++) {
ba3814c1 343 spec = _rvs[regno].spec;
c0c16242
PC
344 if ((jit_regset_tstbit(&_jitc->regarg, regno) ||
345 jit_regset_tstbit(&_jitc->reglive, regno)) &&
346 !(spec & jit_class_sav)) {
ba3814c1
PC
347 if (!_jitc->function->regoff[regno]) {
348 _jitc->function->regoff[regno] =
c0c16242
PC
349 jit_allocai(spec & jit_class_gpr ?
350 sizeof(jit_word_t) : sizeof(jit_float64_t));
ba3814c1
PC
351 _jitc->again = 1;
352 }
c0c16242
PC
353 if ((spec & jit_class_gpr) && rn(regno) == r0)
354 continue;
ba3814c1 355 jit_regset_setbit(&_jitc->regsav, regno);
c0c16242
PC
356 if (spec & jit_class_gpr)
357 emit_stxi(_jitc->function->regoff[regno], JIT_FP, regno);
358 else
359 emit_stxi_d(_jitc->function->regoff[regno], JIT_FP, regno);
ba3814c1
PC
360 }
361 }
362}
363
364static void
365_fallback_load_regs(jit_state_t *_jit, jit_int32_t r0)
366{
c0c16242
PC
367 jit_int32_t regno, spec;
368 for (regno = 0; regno < _jitc->reglen; regno++) {
ba3814c1 369 spec = _rvs[regno].spec;
c0c16242
PC
370 if ((jit_regset_tstbit(&_jitc->regarg, regno) ||
371 jit_regset_tstbit(&_jitc->reglive, regno)) &&
372 !(spec & jit_class_sav)) {
373 if ((spec & jit_class_gpr) && rn(regno) == r0)
374 continue;
375 jit_regset_setbit(&_jitc->regsav, regno);
376 if (spec & jit_class_gpr)
377 emit_ldxi(regno, JIT_FP, _jitc->function->regoff[regno]);
378 else
379 emit_ldxi_d(regno, JIT_FP, _jitc->function->regoff[regno]);
ba3814c1
PC
380 }
381 }
382}
383
384static void
385_fallback_calli(jit_state_t *_jit, jit_word_t i0, jit_word_t i1)
386{
c0c16242 387# if defined(__arm__)
ba3814c1 388 movi(rn(_R0), i1);
ba3814c1
PC
389# elif defined(__hppa__)
390 movi(_R26_REGNO, i1);
79bfeef6
PC
391# endif
392# if defined(__arm__)
393 calli(i0, jit_exchange_p());
394# elif defined(__mips__)
395 calli(i0, 0);
396# elif defined(__powerpc__) && _CALL_SYSV
397 calli(i0, 0);
398# elif defined(__s390__) || defined(__s390x__)
399 calli(i0, 0);
400# else
ba3814c1 401 calli(i0);
79bfeef6 402# endif
ba3814c1
PC
403}
404
ba86ff93 405#ifdef NEED_FALLBACK_CASX
ba3814c1
PC
406static void
407_fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
408 jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
409{
410 jit_int32_t r1_reg, iscasi;
411 jit_word_t jump, done;
412 /* XXX only attempts to fallback cas for lightning jit code */
413 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
414 if ((iscasi = r1 == _NOREG)) {
ba86ff93 415 r1_reg = fallback_jit_get_reg(jit_class_gpr|jit_class_sav);
ba3814c1
PC
416 r1 = rn(r1_reg);
417 movi(r1, i0);
418 }
419 fallback_save_regs(r0);
420 fallback_calli((jit_word_t)pthread_mutex_lock, (jit_word_t)&mutex);
421 fallback_load(r1);
422 ldr(r0, r1);
423 fallback_load(r2);
424 eqr(r0, r0, r2);
425 fallback_save(r0);
79bfeef6 426 jump = fallback_bnei(_jit->pc.w, r0, 1);
ba3814c1
PC
427 fallback_load(r3);
428# if __WORDSIZE == 32
429 str_i(r1, r3);
430# else
431 str_l(r1, r3);
432# endif
433 /* done: */
ba86ff93 434 fallback_flush();
ba3814c1
PC
435 done = _jit->pc.w;
436 fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex);
437 fallback_load(r0);
ba86ff93
PC
438 fallback_flush();
439 fallback_patch_bnei(jump, done);
ba3814c1
PC
440 fallback_load_regs(r0);
441 if (iscasi)
442 jit_unget_reg(r1_reg);
443}
ba86ff93
PC
444#endif
445
446#ifdef fallback_movr_w_f
447static void
448_fallback_movr_w_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
449{
450 if (!_jitc->function->cvt_offset) {
451 _jitc->function->cvt_offset = jit_allocai(8);
452 _jitc->again = 1;
453 }
454 stxi_i(_jitc->function->cvt_offset, rn(JIT_FP), r1);
455 /* Useful for special debug builds */
456# if defined(__i386__) || defined(__x86_64__)
457 if (jit_cpu.sse2)
458 sse_ldxi_f(r0, rn(JIT_FP), _jitc->function->cvt_offset);
459 else
460 x87_ldxi_f(r0, rn(JIT_FP), _jitc->function->cvt_offset);
461# else
462 ldxi_f(r0, rn(JIT_FP), _jitc->function->cvt_offset);
463# endif
464}
465#endif
466
467#ifdef fallback_movr_f_w
468static void
469_fallback_movr_f_w(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
470{
471 if (!_jitc->function->cvt_offset) {
472 _jitc->function->cvt_offset = jit_allocai(8);
473 _jitc->again = 1;
474 }
475# if defined(__i386__) || defined(__x86_64__)
476 if (jit_cpu.sse2)
477 sse_stxi_f(_jitc->function->cvt_offset, rn(JIT_FP), r1);
478 else
479 x87_stxi_f(_jitc->function->cvt_offset, rn(JIT_FP), r1);
480# else
481 stxi_f(_jitc->function->cvt_offset, rn(JIT_FP), r1);
482# endif
483 ldxi_i(r0, rn(JIT_FP), _jitc->function->cvt_offset);
484}
485#endif
486
487#if __WORDSIZE == 32
488# ifdef fallback_movr_ww_d
489static void
490_fallback_movr_ww_d(jit_state_t *_jit,
491 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
492{
493 if (!_jitc->function->cvt_offset) {
494 _jitc->function->cvt_offset = jit_allocai(8);
495 _jitc->again = 1;
496 }
497 stxi_i(_jitc->function->cvt_offset, rn(JIT_FP),
498 __BYTE_ORDER == __LITTLE_ENDIAN ? r1 : r2);
499 stxi_i(_jitc->function->cvt_offset + 4, rn(JIT_FP),
500 __BYTE_ORDER == __LITTLE_ENDIAN ? r2 : r1);
501# if defined(__i386__) || defined(__x86_64__)
502 if (jit_cpu.sse2)
503 sse_ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset);
504 else
505 x87_ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset);
506# else
507 ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset);
508# endif
509}
510# endif
511
512# ifdef fallback_movr_d_ww
513static void
514_fallback_movr_d_ww(jit_state_t *_jit,
515 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
516{
517 if (!_jitc->function->cvt_offset) {
518 _jitc->function->cvt_offset = jit_allocai(8);
519 _jitc->again = 1;
520 }
521# if defined(__i386__) || defined(__x86_64__)
522 if (jit_cpu.sse2)
523 sse_stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r2);
524 else
525 x87_stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r2);
526# else
527 stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r2);
528# endif
529 ldxi_i(__BYTE_ORDER == __LITTLE_ENDIAN ? r0 : r1,
530 rn(JIT_FP), _jitc->function->cvt_offset);
531 ldxi_i(__BYTE_ORDER == __LITTLE_ENDIAN ? r1 : r0,
532 rn(JIT_FP), _jitc->function->cvt_offset + 4);
533}
534# endif
535
536#else
537# ifdef fallback_movr_w_d
538static void
539_fallback_movr_w_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
540{
541 if (!_jitc->function->cvt_offset) {
542 _jitc->function->cvt_offset = jit_allocai(8);
543 _jitc->again = 1;
544 }
545 stxi_l(_jitc->function->cvt_offset, rn(JIT_FP), r1);
546# if defined(__i386__) || defined(__x86_64__)
547 if (jit_cpu.sse2)
548 sse_ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset);
549 else
550 x87_ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset);
551# else
552 ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset);
553# endif
554}
555# endif
556
557# ifdef fallback_movr_d_w
558static void
559_fallback_movr_d_w(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
560{
561 if (!_jitc->function->cvt_offset) {
562 _jitc->function->cvt_offset = jit_allocai(8);
563 _jitc->again = 1;
564 }
565# if defined(__i386__) || defined(__x86_64__)
566 if (jit_cpu.sse2)
567 sse_stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r1);
568 else
569 x87_stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r1);
570# else
571 stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r1);
572# endif
573 ldxi_l(r0, rn(JIT_FP), _jitc->function->cvt_offset);
574}
575# endif
576#endif
79bfeef6
PC
577
578static void
579_fallback_clo(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
580{
581 jit_word_t clz, done;
582 comr(r0, r1);
583 clz = fallback_bnei(_jit->pc.w, r0, 0);
584 movi(r0, __WORDSIZE);
ba86ff93 585 fallback_flush();
79bfeef6 586 done = fallback_jmpi(_jit->pc.w);
ba86ff93
PC
587 fallback_flush();
588 fallback_patch_bnei(clz, _jit->pc.w);
79bfeef6 589 fallback_clz(r0, r0);
ba86ff93 590 fallback_flush();
79bfeef6
PC
591 fallback_patch_jmpi(done, _jit->pc.w);
592}
593
594static void
595_fallback_clz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
596{
ba86ff93
PC
597# if USE_BIT_TABLES
598 /* t0 = __WORDSIZE - 8;
599 * loop:
600 * t1 = r1 >> t0;
601 * if (t1)
602 * goto done;
603 * t0 -= 8;
604 * if (t0)
605 * goto loop;
606 * t1 = r1;
607 * done:
608 * r0 = __WORDSIZE - 8 - t0 + clz_tab[t1]
609 */
610 /* Table below is count of leading zeros of 8 bit values. */
611 static const jit_uint8_t clz_tab[256] = {
612 8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
613 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
614 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
615 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
616 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
617 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
618 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
619 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
620 };
621 jit_int32_t t0, t1;
622 jit_word_t loop, done;
623 t0 = fallback_jit_get_reg(jit_class_gpr);
624 t1 = fallback_jit_get_reg(jit_class_gpr);
625 movi(rn(t0), __WORDSIZE - 8);
626 fallback_flush();
627 loop = _jit->pc.w;
628 rshr_u(rn(t1), r1, rn(t0));
629 done = fallback_bnei(_jit->pc.w, rn(t1), 0);
630 subi(rn(t0), rn(t0), 8);
631 fallback_bnei(loop, rn(t0), 0);
632 movr(rn(t1), r1);
633 fallback_flush();
634 fallback_patch_bnei(done, _jit->pc.w);
635 rsbi(r0, rn(t0), __WORDSIZE - 8);
636 movi(rn(t0), (jit_word_t)clz_tab);
637 ldxr_uc(rn(t1), rn(t0), rn(t1));
638 addr(r0, r0, rn(t1));
639 jit_unget_reg(t1);
640 jit_unget_reg(t0);
641# else
79bfeef6
PC
642 jit_int32_t r1_reg, r2, r2_reg;
643 jit_word_t clz, l32, l16, l8, l4, l2, l1;
644 l32 = fallback_bnei(_jit->pc.w, r1, 0);
645 movi(r0, __WORDSIZE);
646 clz = fallback_jmpi(_jit->pc.w);
ba86ff93
PC
647 fallback_flush();
648 fallback_patch_bnei(l32, _jit->pc.w);
649 r2_reg = fallback_jit_get_reg(jit_class_gpr);
79bfeef6 650 r2 = rn(r2_reg);
ba86ff93 651 r1_reg = fallback_jit_get_reg(jit_class_gpr);
79bfeef6
PC
652 movr(rn(r1_reg), r1);
653 r1 = rn(r1_reg);
654 movi(r0, 0);
ba86ff93 655# if __WORDSIZE == 64
79bfeef6
PC
656 movi(r2, 0xffffffff00000000UL);
657 l32 = fallback_bmsr(_jit->pc.w, r1, r2);
658 lshi(r1, r1, 32);
659 addi(r0, r0, 32);
ba86ff93
PC
660 fallback_flush();
661 fallback_patch_bmsr(l32, _jit->pc.w);
79bfeef6 662 lshi(r2, r2, 16);
ba86ff93 663# else
79bfeef6 664 movi(r2, 0xffff0000UL);
ba86ff93 665# endif
79bfeef6
PC
666 l16 = fallback_bmsr(_jit->pc.w, r1, r2);
667 lshi(r1, r1, 16);
668 addi(r0, r0, 16);
ba86ff93
PC
669 fallback_flush();
670 fallback_patch_bmsr(l16, _jit->pc.w);
79bfeef6
PC
671 lshi(r2, r2, 8);
672 l8 = fallback_bmsr(_jit->pc.w, r1, r2);
673 lshi(r1, r1, 8);
674 addi(r0, r0, 8);
ba86ff93
PC
675 fallback_flush();
676 fallback_patch_bmsr(l8, _jit->pc.w);
79bfeef6
PC
677 lshi(r2, r2, 4);
678 l4 = fallback_bmsr(_jit->pc.w, r1, r2);
679 lshi(r1, r1, 4);
680 addi(r0, r0, 4);
ba86ff93
PC
681 fallback_flush();
682 fallback_patch_bmsr(l4, _jit->pc.w);
79bfeef6
PC
683 lshi(r2, r2, 2);
684 l2 = fallback_bmsr(_jit->pc.w, r1, r2);
685 lshi(r1, r1, 2);
686 addi(r0, r0, 2);
ba86ff93
PC
687 fallback_flush();
688 fallback_patch_bmsr(l2, _jit->pc.w);
79bfeef6
PC
689 lshi(r2, r2, 1);
690 l1 = fallback_bmsr(_jit->pc.w, r1, r2);
691 addi(r0, r0, 1);
ba86ff93
PC
692 fallback_flush();
693 fallback_patch_bmsr(l1, _jit->pc.w);
79bfeef6
PC
694 fallback_patch_jmpi(clz, _jit->pc.w);
695 jit_unget_reg(r2_reg);
696 jit_unget_reg(r1_reg);
ba86ff93 697# endif
79bfeef6
PC
698}
699
700static void
701_fallback_cto(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
702{
703 jit_word_t ctz, done;
704 comr(r0, r1);
705 ctz = fallback_bnei(_jit->pc.w, r0, 0);
706 movi(r0, __WORDSIZE);
707 done = fallback_jmpi(_jit->pc.w);
ba86ff93
PC
708 fallback_flush();
709 fallback_patch_bnei(ctz, _jit->pc.w);
79bfeef6 710 fallback_ctz(r0, r0);
ba86ff93 711 fallback_flush();
79bfeef6
PC
712 fallback_patch_jmpi(done, _jit->pc.w);
713}
714
715static void
716_fallback_ctz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
717{
ba86ff93
PC
718# if USE_BIT_TABLES
719 /* Adapted from http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightModLookup
720 * Table for 64 bits was recomputed choosing 67 as next prime number.
721 * The cost of the modulo might not compensate and could be better to
722 * use the alternate version (or rbit and use clz).
723 */
724 jit_int32_t t0;
725# if __WORDSIZE == 32
726 static const jit_uint8_t mod37[] = {
727 32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13,
728 4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9,
729 5, 20, 8, 19, 18
730 };
731 /* return mod37[(-r1 & r1) % 37]; */
732# else
733 static const jit_uint8_t mod67[] = {
734 64, 0, 1, 39, 2, 15, 40, 23, 3, 12, 16, 59, 41, 19, 24, 54,
735 4, 0, 13, 10, 17, 62, 60, 28, 42, 30, 20, 51, 25, 44, 55, 47,
736 5, 32, 0, 38, 14, 22, 11, 58, 18, 53, 63, 9, 61, 27, 29, 50,
737 43, 46, 31, 37, 21, 57, 52, 8, 26, 49, 45, 36, 56, 7, 48, 35,
738 6, 34, 33
739 };
740 /* return mod67[(-r1 & r1) % 67]; */
741# endif
742 t0 = fallback_jit_get_reg(jit_class_gpr);
743 if (r0 == r1) {
744 negr(rn(t0), r1);
745 andr(r0, rn(t0), r1);
746 }
747 else {
748 negr(r0, r1);
749 andr(r0, r0, r1);
750 }
751# if __WORDSIZE == 32
752 remi_u(r0, r0, 37);
753 movi(rn(t0), (jit_word_t)mod37);
754# else
755 remi_u(r0, r0, 67);
756 movi(rn(t0), (jit_word_t)mod67);
757# endif
758 ldxr_uc(r0, rn(t0), r0);
759 jit_unget_reg(t0);
760# else
79bfeef6
PC
761 jit_int32_t r1_reg, r2, r2_reg;
762 jit_word_t ctz, l32, l16, l8, l4, l2, l1;
763 l32 = fallback_bnei(_jit->pc.w, r1, 0);
764 movi(r0, __WORDSIZE);
765 ctz = fallback_jmpi(_jit->pc.w);
ba86ff93
PC
766 fallback_flush();
767 fallback_patch_bnei(l32, _jit->pc.w);
768 r2_reg = fallback_jit_get_reg(jit_class_gpr);
79bfeef6 769 r2 = rn(r2_reg);
ba86ff93 770 r1_reg = fallback_jit_get_reg(jit_class_gpr);
79bfeef6
PC
771 movr(rn(r1_reg), r1);
772 r1 = rn(r1_reg);
773 movi(r0, 0);
ba86ff93 774# if __WORDSIZE == 64
79bfeef6
PC
775 movi(r2, 0xffffffffUL);
776 l32 = fallback_bmsr(_jit->pc.w, r1, r2);
777 rshi_u(r1, r1, 32);
778 addi(r0, r0, 32);
ba86ff93
PC
779 fallback_flush();
780 fallback_patch_bmsr(l32, _jit->pc.w);
79bfeef6 781 rshi(r2, r2, 16);
ba86ff93 782# else
79bfeef6 783 movi(r2, 0xffffUL);
ba86ff93 784# endif
79bfeef6
PC
785 l16 = fallback_bmsr(_jit->pc.w, r1, r2);
786 rshi_u(r1, r1, 16);
787 addi(r0, r0, 16);
ba86ff93
PC
788 fallback_flush();
789 fallback_patch_bmsr(l16, _jit->pc.w);
79bfeef6
PC
790 rshi(r2, r2, 8);
791 l8 = fallback_bmsr(_jit->pc.w, r1, r2);
792 rshi_u(r1, r1, 8);
793 addi(r0, r0, 8);
ba86ff93
PC
794 fallback_flush();
795 fallback_patch_bmsr(l8, _jit->pc.w);
79bfeef6
PC
796 rshi(r2, r2, 4);
797 l4 = fallback_bmsr(_jit->pc.w, r1, r2);
798 rshi_u(r1, r1, 4);
799 addi(r0, r0, 4);
ba86ff93
PC
800 fallback_flush();
801 fallback_patch_bmsr(l4, _jit->pc.w);
79bfeef6
PC
802 rshi(r2, r2, 2);
803 l2 = fallback_bmsr(_jit->pc.w, r1, r2);
804 rshi_u(r1, r1, 2);
805 addi(r0, r0, 2);
ba86ff93
PC
806 fallback_flush();
807 fallback_patch_bmsr(l2, _jit->pc.w);
79bfeef6
PC
808 rshi(r2, r2, 1);
809 l1 = fallback_bmsr(_jit->pc.w, r1, r2);
810 addi(r0, r0, 1);
ba86ff93
PC
811 fallback_flush();
812 fallback_patch_bmsr(l1, _jit->pc.w);
79bfeef6
PC
813 fallback_patch_jmpi(ctz, _jit->pc.w);
814 jit_unget_reg(r2_reg);
815 jit_unget_reg(r1_reg);
ba86ff93
PC
816# endif
817}
818
819static void
820_fallback_rbit(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
821{
822# if USE_BIT_TABLES
823 /* t0 = r1;
824 * t1 = t0 & 0xff;
825 * t2 = swap_tab;
826 * r0 = t2[t1];
827 * t3 = 8;
828 * loop:
829 * t1 = t0 >> t3;
830 * t1 &= 0xff;
831 * r0 <<= 8;
832 * r0 |= t2[t1];
833 * t3 += 8;
834 * if (t3 < __WORDSIZE)
835 * goto loop;
836 */
837 jit_word_t loop;
838 jit_int32_t t0, r1_reg, t1, t2, t3;
839 static const unsigned char swap_tab[256] = {
840 0, 128, 64, 192, 32, 160, 96, 224,
841 16, 144, 80, 208, 48, 176, 112, 240,
842 8, 136, 72, 200, 40, 168, 104, 232,
843 24, 152, 88, 216 ,56, 184, 120, 248,
844 4, 132, 68, 196, 36, 164, 100, 228,
845 20, 148, 84, 212, 52, 180, 116, 244,
846 12, 140, 76, 204, 44, 172, 108, 236,
847 28, 156, 92, 220, 60, 188, 124, 252,
848 2, 130, 66, 194, 34, 162, 98, 226,
849 18, 146, 82, 210, 50, 178, 114, 242,
850 10, 138, 74, 202, 42, 170, 106, 234,
851 26, 154, 90, 218, 58, 186, 122, 250,
852 6, 134, 70, 198, 38, 166, 102, 230,
853 22, 150, 86, 214, 54, 182, 118, 246,
854 14, 142, 78, 206, 46, 174, 110, 238,
855 30, 158, 94, 222, 62, 190, 126, 254,
856 1, 129, 65, 193, 33, 161, 97, 225,
857 17, 145, 81, 209, 49, 177, 113, 241,
858 9, 137, 73, 201, 41, 169, 105, 233,
859 25, 153, 89, 217, 57, 185, 121, 249,
860 5, 133, 69, 197, 37, 165, 101, 229,
861 21, 149, 85, 213, 53, 181, 117, 245,
862 13, 141, 77, 205, 45, 173, 109, 237,
863 29, 157, 93, 221, 61, 189, 125, 253,
864 3, 131, 67, 195, 35, 163, 99, 227,
865 19, 147, 83, 211, 51, 179, 115, 243,
866 11, 139, 75, 203, 43, 171, 107, 235,
867 27, 155, 91, 219, 59, 187, 123, 251,
868 7, 135, 71, 199, 39, 167, 103, 231,
869 23, 151, 87, 215, 55, 183, 119, 247,
870 15, 143, 79, 207, 47, 175, 111, 239,
871 31, 159, 95, 223, 63, 191, 127, 255
872 };
873 if (r0 == r1) {
874 t0 = fallback_jit_get_reg(jit_class_gpr);
875 r1_reg = rn(t0);
876 }
877 else {
878 t0 = JIT_NOREG;
879 r1_reg = r1;
880 }
881 t1 = fallback_jit_get_reg(jit_class_gpr);
882 t2 = fallback_jit_get_reg(jit_class_gpr);
883 t3 = fallback_jit_get_reg(jit_class_gpr);
884 if (r0 == r1)
885 movr(rn(t0), r1);
886 extr_uc(rn(t1), r1_reg);
887 movi(rn(t2), (jit_word_t)swap_tab);
888 ldxr_uc(r0, rn(t2), rn(t1));
889 movi(rn(t3), 8);
890 fallback_flush();
891 loop = _jit->pc.w;
892 rshr(rn(t1), r1_reg, rn(t3));
893 extr_uc(rn(t1), rn(t1));
894 lshi(r0, r0, 8);
895 ldxr_uc(rn(t1), rn(t2), rn(t1));
896 orr(r0, r0, rn(t1));
897 addi(rn(t3), rn(t3), 8);
898 blti(loop, rn(t3), __WORDSIZE);
899 jit_unget_reg(t3);
900 jit_unget_reg(t2);
901 jit_unget_reg(t1);
902 if (t0 != JIT_NOREG)
903 jit_unget_reg(t0);
904# elif USE_BITSWAP_UNROLLED
905/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
906/*
907unsigned int v; // 32-bit word to reverse bit order
908
909// swap odd and even bits
910v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
911// swap consecutive pairs
912v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
913// swap nibbles ...
914v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
915// swap bytes
916v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
917// swap 2-byte long pairs
918v = ( v >> 16 ) | ( v << 16);
919 */
920 jit_int32_t t0, t1, t2, t3, t4;
921 movr(r0, r1);
922 t0 = fallback_jit_get_reg(jit_class_gpr);
923 t1 = fallback_jit_get_reg(jit_class_gpr);
924 t2 = fallback_jit_get_reg(jit_class_gpr);
925 movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
926 rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
927 andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
928 andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
929 lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */
930 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
931 movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
932 rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */
933 andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
934 andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
935 lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */
936 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
937 movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
938 rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */
939 andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
940 andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
941 lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */
942 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
943 movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL);
944 rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */
945 andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
946 andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
947 lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */
948 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
949# if __WORDSIZE == 32
950 rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
951 lshi(rn(t2), r0, 16); /* t2 = v << 16 */
952 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
953# else
954 movi(rn(t0), 0x0000ffff0000ffffL);
955 rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
956 andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
957 andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
958 lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */
959 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
960 rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */
961 lshi(rn(t2), r0, 32); /* t2 = v << 32 */
962 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
963# endif
964 jit_unget_reg(t2);
965 jit_unget_reg(t1);
966 jit_unget_reg(t0);
967# elif USE_BITSWAP_LOOP
968/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
969/*
970unsigned int s = sizeof(v) * CHAR_BIT; // bit size; must be power of 2
971unsigned int mask = ~0;
972while ((s >>= 1) > 0)
973{
974 mask ^= (mask << s);
975 v = ((v >> s) & mask) | ((v << s) & ~mask);
976}
977*/
978 jit_int32_t s, mask;
979 jit_word_t loop, done, t0, t1;
980 movr(v, r1);
981 s = fallback_jit_get_reg(jit_class_gpr);
982 movi(rn(s), __WORDSIZE); /* s = sizeof(v) * CHAR_BIT; */
983 mask = fallback_jit_get_reg(jit_class_gpr);
984 movi(rn(mask), ~0L); /* mask = ~0; */
985 flush();
986 loop = _jit->pc.w; /* while ((s >>= 1) > 0) */
987 rshi(rn(s), rn(s), 1); /* (s >>= 1) */
988 done = blei(_jit->pc.w, rn(s), 0); /* no loop if s <= 0 */
989 t0 = fallback_jit_get_reg(jit_class_gpr);
990 lshr(rn(t0), rn(mask), rn(s)); /* t0 = (mask << s) */
991 xorr(rn(mask), rn(mask), rn(t0)); /* mask ^= t0 */
992 rshr(rn(t0), v, rn(s)); /* t0 = v >> s */
993 andr(rn(t0), rn(t0), rn(mask)); /* t0 = t0 & mask */
994 t1 = fallback_jit_get_reg(jit_class_gpr);
995 lshr(rn(t1), v, rn(s)); /* t1 = v << s */
996 comr(v, rn(mask)); /* v = ~mask */
997 andr(rn(t1), v, rn(t1)); /* t1 = t1 & v */
998 orr(v, rn(t0), rn(t1)); /* v = t0 | t1 */
999 jmpi(loop);
1000 flush();
1001 patch_at(done, _jit->pc.w);
1002 jit_unget_reg(t1);
1003 jit_unget_reg(t0);
1004 jit_unget_reg(mask);
1005 jit_unget_reg(s);
1006# endif
1007}
1008
1009static void
1010_fallback_popcnt(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1011{
1012 /* Same approach as rbitr */
1013 /* t0 = r1;
1014 * t1 = t0 & 0xff;
1015 * t2 = pop_tab;
1016 * r0 = t2[t1];
1017 * t3 = 8;
1018 * loop:
1019 * t1 = t0 >> t3;
1020 * t1 &= 0xff;
1021 * r0 <<= 8;
1022 * r0 |= t2[t1];
1023 * t3 += 8;
1024 * if (t3 < __WORDSIZE)
1025 * goto loop;
1026 */
1027 jit_word_t loop;
1028 jit_int32_t t0, r1_reg, t1, t2, t3;
1029 static const unsigned char pop_tab[256] = {
1030 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
1031 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
1032 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
1033 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
1034 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
1035 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
1036 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
1037 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
1038 };
1039 if (r0 == r1) {
1040 t0 = fallback_jit_get_reg(jit_class_gpr);
1041 r1_reg = rn(t0);
1042 }
1043 else {
1044 t0 = JIT_NOREG;
1045 r1_reg = r1;
1046 }
1047 t1 = fallback_jit_get_reg(jit_class_gpr);
1048 t2 = fallback_jit_get_reg(jit_class_gpr);
1049 t3 = fallback_jit_get_reg(jit_class_gpr);
1050 if (r0 == r1)
1051 movr(rn(t0), r1);
1052 extr_uc(rn(t1), r1_reg);
1053 movi(rn(t2), (jit_word_t)pop_tab);
1054 ldxr_uc(r0, rn(t2), rn(t1));
1055 movi(rn(t3), 8);
1056 fallback_flush();
1057 loop = _jit->pc.w;
1058 rshr(rn(t1), r1_reg, rn(t3));
1059 extr_uc(rn(t1), rn(t1));
1060 ldxr_uc(rn(t1), rn(t2), rn(t1));
1061 addr(r0, r0, rn(t1));
1062 addi(rn(t3), rn(t3), 8);
1063 blti(loop, rn(t3), __WORDSIZE);
1064 jit_unget_reg(t3);
1065 jit_unget_reg(t2);
1066 jit_unget_reg(t1);
1067 if (t0 != JIT_NOREG)
1068 jit_unget_reg(t0);
1069}
1070
1071static void
1072_fallback_lrotr(jit_state_t *_jit,
1073 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1074{
1075 /* r0 = (r1 << r2) | (r1 >> (__WORDSIZE - r2)) */
1076 jit_int32_t t0, t1;
1077 t0 = fallback_jit_get_reg(jit_class_gpr);
1078 if (r0 == r1 || r0 == r2) {
1079 t1 = fallback_jit_get_reg(jit_class_gpr);
1080 lshr(rn(t0), r1, r2);
1081 rsbi(rn(t1), r2, __WORDSIZE);
1082 rshr_u(rn(t1), r1, rn(t1));
1083 orr(r0, rn(t0), rn(t1));
1084 jit_unget_reg(t1);
1085 }
1086 else {
1087 lshr(r0, r1, r2);
1088 rsbi(rn(t0), r2, __WORDSIZE);
1089 rshr_u(rn(t0), r1, rn(t0));
1090 orr(r0, r0, rn(t0));
1091 }
1092 jit_unget_reg(t0);
1093}
1094
1095static void
1096_fallback_lroti(jit_state_t *_jit,
1097 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1098{
1099 jit_int32_t t0;
1100 t0 = fallback_jit_get_reg(jit_class_gpr);
1101 lshi(rn(t0), r1, i0);
1102 rshi_u(r0, r1, __WORDSIZE - i0);
1103 orr(r0, r0, rn(t0));
1104 jit_unget_reg(t0);
1105}
1106
1107static void
1108_fallback_rrotr(jit_state_t *_jit,
1109 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1110{
1111 /* r0 = (r1 >> r2) | (r1 << (__WORDSIZE - r2)) */
1112 jit_int32_t t0, t1;
1113 t0 = fallback_jit_get_reg(jit_class_gpr);
1114 if (r0 == r1 || r0 == r2) {
1115 t1 = fallback_jit_get_reg(jit_class_gpr);
1116 rshr_u(rn(t0), r1, r2);
1117 rsbi(rn(t1), r2, __WORDSIZE);
1118 lshr(rn(t1), r1, rn(t1));
1119 orr(r0, rn(t0), rn(t1));
1120 jit_unget_reg(t1);
1121 }
1122 else {
1123 rshr_u(r0, r1, r2);
1124 rsbi(rn(t0), r2, __WORDSIZE);
1125 lshr(rn(t0), r1, rn(t0));
1126 orr(r0, r0, rn(t0));
1127 }
1128 jit_unget_reg(t0);
1129}
1130
1131static void
1132_fallback_rroti(jit_state_t *_jit,
1133 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1134{
1135 jit_int32_t t0;
1136 t0 = fallback_jit_get_reg(jit_class_gpr);
1137 rshi_u(rn(t0), r1, i0);
1138 lshi(r0, r1, __WORDSIZE - i0);
1139 orr(r0, r0, rn(t0));
1140 jit_unget_reg(t0);
1141}
1142
1143static void
1144_fallback_ext(jit_state_t *_jit,
1145 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
1146{
1147 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
1148 if (i1 == __WORDSIZE)
1149 movr(r0, r1);
1150 else {
1151# if __BYTE_ORDER == __BIG_ENDIAN
1152 i0 = __WORDSIZE - (i0 + i1);
1153# endif
1154 if (__WORDSIZE - (i0 + i1)) {
1155 lshi(r0, r1, __WORDSIZE - (i0 + i1));
1156 rshi(r0, r0, __WORDSIZE - i1);
1157 }
1158 else
1159 rshi(r0, r1, __WORDSIZE - i1);
1160 }
1161}
1162
1163static void
1164_fallback_ext_u(jit_state_t *_jit,
1165 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
1166{
1167 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
1168 if (i1 == __WORDSIZE)
1169 movr(r0, r1);
1170 else {
1171# if __BYTE_ORDER == __BIG_ENDIAN
1172 i0 = __WORDSIZE - (i0 + i1);
1173# endif
1174 if (i0)
1175 rshi_u(r0, r1, i0);
1176 andi(r0, r0, (1L << i1) - 1);
1177 }
1178}
1179
1180static void
1181_fallback_dep(jit_state_t *_jit,
1182 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
1183{
1184 jit_int32_t t0;
1185 jit_word_t mask;
1186 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
1187 if (i1 == __WORDSIZE)
1188 movr(r0, r1);
1189 else {
1190# if __BYTE_ORDER == __BIG_ENDIAN
1191 i0 = __WORDSIZE - (i0 + i1);
1192# endif
1193 mask = (1L << i1) - 1;
1194 t0 = fallback_jit_get_reg(jit_class_gpr);
1195 andi(rn(t0), r1, mask);
1196 if (i0) {
1197 lshi(rn(t0), rn(t0), i0);
1198 mask <<= i0;
1199 }
1200 andi(r0, r0, ~mask);
1201 orr(r0, r0, rn(t0));
1202 jit_unget_reg(t0);
1203 }
1204}
1205
1206static void
1207_fallback_qlshr(jit_state_t *_jit,
1208 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1209{
1210 /* r1 = __WORDSIZE - r3;
1211 * if (r1 != __WORDSIZE) {
1212 * r0 = r2 << r3;
1213 * if (r3 != 0)
1214 * r1 = r2 >> r1;
1215 * else
1216 * r1 = 0;
1217 * }
1218 * else {
1219 * r1 = r2;
1220 * r0 = 0;
1221 * }
1222 */
1223 jit_int32_t t0, s0, t2, s2, t3, s3;
1224 jit_word_t over, zero, done, done_over;
1225 s0 = fallback_jit_get_reg(jit_class_gpr);
1226 t0 = rn(s0);
1227 if (r0 == r2 || r1 == r2) {
1228 s2 = fallback_jit_get_reg(jit_class_gpr);
1229 t2 = rn(s2);
1230 movr(t2, r2);
1231 }
1232 else
1233 t2 = r2;
1234 if (r0 == r3 || r1 == r3) {
1235 s3 = fallback_jit_get_reg(jit_class_gpr);
1236 t3 = rn(s3);
1237 movr(t3, r3);
1238 }
1239 else
1240 t3 = r3;
1241 rsbi(t0, t3, __WORDSIZE);
1242 lshr(r0, t2, t3);
1243 rshr(r1, t2, t0);
1244 zero = fallback_beqi(_jit->pc.w, t3, 0);
1245 over = fallback_beqi(_jit->pc.w, t3, __WORDSIZE);
1246 done = fallback_jmpi(_jit->pc.w);
1247 fallback_flush();
1248 fallback_patch_jmpi(over, _jit->pc.w);
1249 /* overflow */
1250 movi(r0, 0);
1251 done_over = fallback_jmpi(_jit->pc.w);
1252 /* zero */
1253 fallback_flush();
1254 fallback_patch_beqi(zero, _jit->pc.w);
1255 rshi(r1, t2, __WORDSIZE - 1);
1256 fallback_flush();
1257 fallback_patch_jmpi(done, _jit->pc.w);
1258 fallback_patch_jmpi(done_over, _jit->pc.w);
1259 jit_unget_reg(s0);
1260 if (t2 != r2)
1261 jit_unget_reg(s2);
1262 if (t3 != r3)
1263 jit_unget_reg(s3);
1264}
1265
1266static void
1267_fallback_qlshi(jit_state_t *_jit,
1268 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1269{
1270 assert((jit_uword_t)i0 <= __WORDSIZE);
1271 if (i0 == 0) {
1272 if (r0 != r2) {
1273 movr(r0, r2);
1274 rshi(r1, r2, __WORDSIZE - 1);
1275 }
1276 else
1277 rshi(r1, r2, __WORDSIZE - 1);
1278 }
1279 else if (i0 != __WORDSIZE) {
1280 rshi(r1, r2, __WORDSIZE - i0);
1281 lshi(r0, r2, i0);
1282 }
1283 else {
1284 movr(r1, r2);
1285 movi(r0, 0);
1286 }
1287}
1288
1289static void
1290_fallback_qlshr_u(jit_state_t *_jit, jit_int32_t r0,
1291 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1292{
1293 /* r1 = __WORDSIZE - r3;
1294 * if (r1 != __WORDSIZE) {
1295 * r0 = r2 << r3;
1296 * if (r3 != 0)
1297 * r1 = (unsigned)r2 >> r1;
1298 * else
1299 * r1 = 0;
1300 * }
1301 * else {
1302 * r1 = r2;
1303 * r0 = 0;
1304 * }
1305 */
1306 jit_int32_t t0, s0, t2, s2, t3, s3;
1307 jit_word_t over, zero, done, done_over;
1308 s0 = fallback_jit_get_reg(jit_class_gpr);
1309 t0 = rn(s0);
1310 if (r0 == r2 || r1 == r2) {
1311 s2 = fallback_jit_get_reg(jit_class_gpr);
1312 t2 = rn(s2);
1313 movr(t2, r2);
1314 }
1315 else
1316 t2 = r2;
1317 if (r0 == r3 || r1 == r3) {
1318 s3 = fallback_jit_get_reg(jit_class_gpr);
1319 t3 = rn(s3);
1320 movr(t3, r3);
1321 }
1322 else
1323 t3 = r3;
1324 rsbi(t0, t3, __WORDSIZE);
1325 lshr(r0, t2, t3);
1326 rshr_u(r1, t2, t0);
1327 zero = fallback_beqi(_jit->pc.w, t3, 0);
1328 over = fallback_beqi(_jit->pc.w, t3, __WORDSIZE);
1329 done = fallback_jmpi(_jit->pc.w);
1330 fallback_flush();
1331 fallback_patch_jmpi(over, _jit->pc.w);
1332 /* overflow */
1333 movi(r0, 0);
1334 done_over = fallback_jmpi(_jit->pc.w);
1335 /* zero */
1336 fallback_flush();
1337 fallback_patch_beqi(zero, _jit->pc.w);
1338 movi(r1, 0);
1339 fallback_flush();
1340 fallback_patch_jmpi(done, _jit->pc.w);
1341 fallback_patch_jmpi(done_over, _jit->pc.w);
1342 jit_unget_reg(s0);
1343 if (t2 != r2)
1344 jit_unget_reg(s2);
1345 if (t3 != r3)
1346 jit_unget_reg(s3);
1347}
1348
1349static void
1350_fallback_qlshi_u(jit_state_t *_jit, jit_int32_t r0,
1351 jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1352{
1353 assert(i0 <= __WORDSIZE);
1354 if (i0 == 0) {
1355 movr(r0, r2);
1356 movi(r1, 0);
1357 }
1358 else if (i0 != __WORDSIZE) {
1359 rshi_u(r1, r2, __WORDSIZE - i0);
1360 lshi(r0, r2, i0);
1361 }
1362 else {
1363 movr(r1, r2);
1364 movi(r0, 0);
1365 }
1366}
1367
1368static void
1369_fallback_qrshr(jit_state_t *_jit, jit_int32_t r0,
1370 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1371{
1372 /* r1 = __WORDSIZE - r3;
1373 * if (r1 != __WORDSIZE) {
1374 * r0 = r2 >> r3;
1375 * if (r3 != 0)
1376 * r1 = r2 << r1;
1377 * else
1378 * r1 = r2 >> (__WORDSIZE - 1);
1379 * }
1380 * else {
1381 * r1 = r2;
1382 * r0 = r2 >> (__WORDSIZE - 1);
1383 * }
1384 */
1385 jit_int32_t t0, s0, t2, s2, t3, s3;
1386 jit_word_t over, zero, done, done_over;
1387 s0 = fallback_jit_get_reg(jit_class_gpr);
1388 t0 = rn(s0);
1389 if (r0 == r2 || r1 == r2) {
1390 s2 = fallback_jit_get_reg(jit_class_gpr);
1391 t2 = rn(s2);
1392 movr(t2, r2);
1393 }
1394 else
1395 t2 = r2;
1396 if (r0 == r3 || r1 == r3) {
1397 s3 = fallback_jit_get_reg(jit_class_gpr);
1398 t3 = rn(s3);
1399 movr(t3, r3);
1400 }
1401 else
1402 t3 = r3;
1403 rsbi(t0, t3, __WORDSIZE);
1404 rshr(r0, t2, t3);
1405 lshr(r1, t2, t0);
1406 zero = fallback_beqi(_jit->pc.w, t3, 0);
1407 over = fallback_beqi(_jit->pc.w, t3, __WORDSIZE);
1408 done = fallback_jmpi(_jit->pc.w);
1409 fallback_flush();
1410 fallback_patch_jmpi(over, _jit->pc.w);
1411 /* underflow */
1412 rshi(r0, t2, __WORDSIZE - 1);
1413 done_over = fallback_jmpi(_jit->pc.w);
1414 /* zero */
1415 fallback_flush();
1416 fallback_patch_beqi(zero, _jit->pc.w);
1417 rshi(r1, t2, __WORDSIZE - 1);
1418 fallback_flush();
1419 fallback_patch_jmpi(done, _jit->pc.w);
1420 fallback_patch_jmpi(done_over, _jit->pc.w);
1421 jit_unget_reg(s0);
1422 if (t2 != r2)
1423 jit_unget_reg(s2);
1424 if (t3 != r3)
1425 jit_unget_reg(s3);
1426}
1427
1428static void
1429_fallback_qrshi(jit_state_t *_jit, jit_int32_t r0,
1430 jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1431{
1432 assert((jit_uword_t)i0 <= __WORDSIZE);
1433 if (i0 == 0) {
1434 if (r0 != r2) {
1435 movr(r0, r2);
1436 rshi(r1, r2, __WORDSIZE - 1);
1437 }
1438 else
1439 rshi(r1, r2, __WORDSIZE - 1);
1440 }
1441 else if (i0 != __WORDSIZE) {
1442 lshi(r1, r2, __WORDSIZE - i0);
1443 rshi(r0, r2, i0);
1444 }
1445 else {
1446 movr(r1, r2);
1447 rshi(r0, r2, __WORDSIZE - 1);
1448 }
1449}
1450
1451static void
1452_fallback_qrshr_u(jit_state_t *_jit, jit_int32_t r0,
1453 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1454{
1455 /* r1 = __WORDSIZE - r3;
1456 * if (r1 != __WORDSIZE) {
1457 * r0 = (unsigned)r2 >> r3;
1458 * if (r3 != 0)
1459 * r1 = r2 << r1;
1460 * else
1461 * r1 = 0;
1462 * }
1463 * else {
1464 * r1 = r2;
1465 * r0 = 0;
1466 * }
1467 */
1468 jit_int32_t t0, s0, t2, s2, t3, s3;
1469 jit_word_t over, zero, done, done_over;
1470 s0 = fallback_jit_get_reg(jit_class_gpr);
1471 t0 = rn(s0);
1472 if (r0 == r2 || r1 == r2) {
1473 s2 = fallback_jit_get_reg(jit_class_gpr);
1474 t2 = rn(s2);
1475 movr(t2, r2);
1476 }
1477 else
1478 t2 = r2;
1479 if (r0 == r3 || r1 == r3) {
1480 s3 = fallback_jit_get_reg(jit_class_gpr);
1481 t3 = rn(s3);
1482 movr(t3, r3);
1483 }
1484 else
1485 t3 = r3;
1486 rsbi(t0, t3, __WORDSIZE);
1487 rshr_u(r0, t2, t3);
1488 lshr(r1, t2, t0);
1489 zero = fallback_beqi(_jit->pc.w, t3, 0);
1490 over = fallback_beqi(_jit->pc.w, t3, __WORDSIZE);
1491 done = fallback_jmpi(_jit->pc.w);
1492 fallback_flush();
1493 fallback_patch_jmpi(over, _jit->pc.w);
1494 /* underflow */
1495 movi(r0, 0);
1496 done_over = fallback_jmpi(_jit->pc.w);
1497 /* zero */
1498 fallback_flush();
1499 fallback_patch_beqi(zero, _jit->pc.w);
1500 movi(r1, 0);
1501 fallback_flush();
1502 fallback_patch_jmpi(done, _jit->pc.w);
1503 fallback_patch_jmpi(done_over, _jit->pc.w);
1504 jit_unget_reg(s0);
1505 if (t2 != r2)
1506 jit_unget_reg(s2);
1507 if (t3 != r3)
1508 jit_unget_reg(s3);
1509}
1510
1511static void
1512_fallback_qrshi_u(jit_state_t *_jit, jit_int32_t r0,
1513 jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1514{
1515 assert((jit_uword_t)i0 <= __WORDSIZE);
1516 if (i0 == 0) {
1517 movr(r0, r2);
1518 movi(r1, 0);
1519 }
1520 else if (i0 != __WORDSIZE) {
1521 lshi(r1, r2, __WORDSIZE - i0);
1522 rshi_u(r0, r2, i0);
1523 }
1524 else {
1525 movr(r1, r2);
1526 movi(r0, 0);
1527 }
1528}
1529
1530static void
1531_unldr2(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign)
1532{
1533 jit_int32_t t0, r2;
1534 jit_word_t un2, al;
1535 t0 = fallback_jit_get_reg(jit_class_gpr);
1536 r2 = rn(t0);
1537 andi(r2, r1, -2);
1538 un2 = fallback_bner(_jit->pc.w, r1, r2);
1539 if (sign)
1540 ldr_s(r0, r1);
1541 else
1542 ldr_us(r0, r1);
1543 al = fallback_jmpi(_jit->pc.w);
1544 fallback_flush();
1545 fallback_patch_bner(un2, _jit->pc.w);
1546# if __BYTE_ORDER == __LITTLE_ENDIAN
1547 ldr_uc(r0, r1);
1548 if (sign)
1549 ldxi_c(r2, r1, 1);
1550 else
1551 ldxi_uc(r2, r1, 1);
1552 lshi(r2, r2, 8);
1553# else
1554 if (sign)
1555 ldr_c(r0, r1);
1556 else
1557 ldr_uc(r0, r1);
1558 lshi(r0, r0, 8);
1559 ldxi_uc(r2, r1, 1);
1560# endif
1561 orr(r0, r0, r2);
1562 fallback_flush();
1563 fallback_patch_jmpi(al, _jit->pc.w);
1564 jit_unget_reg(t0);
1565}
1566
1567static void
1568_unldi2(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign)
1569{
1570 jit_int32_t t0, r2;
1571 if ((i1 & -2) == i1) {
1572 if (sign)
1573 ldi_s(r0, i1);
1574 else
1575 ldi_us(r0, i1);
1576 }
1577 else {
1578 t0 = fallback_jit_get_reg(jit_class_gpr);
1579 r2 = rn(t0);
1580# if __BYTE_ORDER == __LITTLE_ENDIAN
1581 ldi_uc(r0, i1);
1582 if (sign)
1583 ldi_c(r2, i1 + 1);
1584 else
1585 ldi_uc(r2, i1 + 1);
1586 lshi(r2, r2, 8);
1587# else
1588 if (sign)
1589 ldi_c(r0, i1);
1590 else
1591 ldi_uc(r0, i1);
1592 lshi(r0, r0, 8);
1593 ldi_uc(r2, i1 + 1);
1594# endif
1595 orr(r0, r0, r2);
1596 jit_unget_reg(t0);
1597 }
1598}
1599
1600static void
1601_unldr3(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign)
1602{
1603 jit_int32_t t0, r2;
1604 jit_word_t un2, or;
1605 t0 = fallback_jit_get_reg(jit_class_gpr);
1606 r2 = rn(t0);
1607 andi(r2, r1, -2);
1608 un2 = fallback_bner(_jit->pc.w, r1, r2);
1609# if __BYTE_ORDER == __LITTLE_ENDIAN
1610 ldr_us(r0, r1);
1611 if (sign)
1612 ldxi_c(r2, r1, 2);
1613 else
1614 ldxi_uc(r2, r1, 2);
1615 lshi(r2, r2, 16);
1616# else
1617 if (sign)
1618 ldr_s(r0, r1);
1619 else
1620 ldr_us(r0, r1);
1621 lshi(r0, r0, 8);
1622 ldxi_uc(r2, r1, 2);
1623# endif
1624 or = fallback_jmpi(_jit->pc.w);
1625 fallback_flush();
1626 fallback_patch_bner(un2, _jit->pc.w);
1627# if __BYTE_ORDER == __LITTLE_ENDIAN
1628 ldr_uc(r0, r1);
1629 if (sign)
1630 ldxi_s(r2, r1, 1);
1631 else
1632 ldxi_us(r2, r1, 1);
1633 lshi(r2, r2, 8);
1634# else
1635 if (sign)
1636 ldr_c(r0, r1);
1637 else
1638 ldr_uc(r0, r1);
1639 lshi(r0, r0, 16);
1640 ldxi_us(r2, r1, 1);
1641# endif
1642 fallback_flush();
1643 fallback_patch_jmpi(or, _jit->pc.w);
1644 orr(r0, r0, r2);
1645 jit_unget_reg(t0);
1646}
1647
1648static void
1649_unldi3(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign)
1650{
1651 jit_int32_t t0, r2;
1652 t0 = fallback_jit_get_reg(jit_class_gpr);
1653 r2 = rn(t0);
1654 if ((i1 & -2) == i1) {
1655# if __BYTE_ORDER == __LITTLE_ENDIAN
1656 ldi_us(r0, i1);
1657 if (sign)
1658 ldi_c(r2, i1 + 2);
1659 else
1660 ldi_uc(r2, i1 + 2);
1661 lshi(r2, r2, 16);
1662# else
1663 if (sign)
1664 ldi_s(r0, i1);
1665 else
1666 ldi_us(r0, i1);
1667 lshi(r0, r0, 8);
1668 ldi_uc(r2, i1 + 2);
1669# endif
1670 }
1671 else {
1672# if __BYTE_ORDER == __LITTLE_ENDIAN
1673 ldi_uc(r0, i1);
1674 if (sign)
1675 ldi_s(r2, i1 + 1);
1676 else
1677 ldi_us(r2, i1 + 1);
1678 lshi(r2, r2, 8);
1679# else
1680 if (sign)
1681 ldi_c(r0, i1);
1682 else
1683 ldi_uc(r0, i1);
1684 lshi(r0, r0, 16);
1685 ldi_us(r2, i1 + 1);
1686# endif
1687 }
1688 orr(r0, r0, r2);
1689 jit_unget_reg(t0);
79bfeef6 1690}
ba86ff93
PC
1691
1692static void
1693_unldr4(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1
1694# if __WORDSIZE == 64
1695 , jit_bool_t sign
1696# endif
1697 )
1698{
1699 jit_int32_t t0, r2;
1700 jit_word_t un4, un2, un3, al, or, or3;
1701 t0 = fallback_jit_get_reg(jit_class_gpr);
1702 r2 = rn(t0);
1703 andi(r2, r1, -4);
1704 un4 = fallback_bner(_jit->pc.w, r1, r2);
1705# if __WORDSIZE == 64
1706 if (sign)
1707# endif
1708 ldr_i(r0, r1);
1709# if __WORDSIZE == 64
1710 else
1711 ldr_ui(r0, r1);
1712# endif
1713 al = fallback_jmpi(_jit->pc.w);
1714 fallback_flush();
1715 fallback_patch_bner(un4, _jit->pc.w);
1716 andi(r2, r1, -2);
1717 un2 = fallback_bner(_jit->pc.w, r1, r2);
1718# if __BYTE_ORDER == __LITTLE_ENDIAN
1719 ldr_us(r0, r1);
1720# if __WORDSIZE == 64
1721 if (sign)
1722 ldxi_s(r2, r1, 2);
1723 else
1724# endif
1725 ldxi_us(r2, r1, 2);
1726 lshi(r2, r2, 16);
1727# else
1728# if __WORDSIZE == 64
1729 if (sign)
1730 ldr_s(r0, r1);
1731 else
1732# endif
1733 ldr_us(r0, r1);
1734 lshi(r0, r0, 16);
1735 ldxi_us(r2, r1, 2);
1736# endif
1737 or = fallback_jmpi(_jit->pc.w);
1738 fallback_flush();
1739 fallback_patch_bner(un2, _jit->pc.w);
1740 andi(r2, r1, 3);
1741 un3 = fallback_bnei(_jit->pc.w, r2, 3);
1742# if __BYTE_ORDER == __LITTLE_ENDIAN
1743 ldr_uc(r0, r1);
1744 ldxi_i(r2, r1, 1);
1745 lshi(r2, r2, 8);
1746# if __WORDSIZE == 64
1747 if (sign)
1748 extr_i(r2, r2);
1749 else
1750 extr_ui(r2, r2);
1751# endif
1752# else
1753# if __WORDSIZE == 64
1754 if (sign)
1755 ldr_c(r0, r1);
1756 else
1757# endif
1758 ldr_uc(r0, r1);
1759 lshi(r0, r0, 24);
1760# if __WORDSIZE == 32
1761 ldxi(r2, r1, 1);
1762# else
1763 ldxi_ui(r2, r1, 1);
1764# endif
1765 rshi_u(r2, r2, 8);
1766# endif
1767 or3 = fallback_jmpi(_jit->pc.w);
1768 fallback_flush();
1769 fallback_patch_bnei(un3, _jit->pc.w);
1770# if __BYTE_ORDER == __LITTLE_ENDIAN
1771# if __WORDSIZE == 32
1772 ldxi(r0, r1, -1);
1773# else
1774 ldxi_ui(r0, r1, -1);
1775# endif
1776 rshi_u(r0, r0, 8);
1777# if __WORDSIZE == 64
1778 if (sign)
1779 ldxi_c(r2, r1, 3);
1780 else
1781# endif
1782 ldxi_uc(r2, r1, 3);
1783 lshi(r2, r2, 24);
1784# else
1785 ldxi_i(r0, r1, -1);
1786 lshi(r0, r0, 8);
1787# if __WORDSIZE == 64
1788 if (sign)
1789 extr_i(r0, r0);
1790 else
1791 extr_ui(r0, r0);
1792# endif
1793 ldxi_uc(r2, r1, 3);
1794# endif
1795 fallback_flush();
1796 fallback_patch_jmpi(or, _jit->pc.w);
1797 fallback_patch_jmpi(or3, _jit->pc.w);
1798 orr(r0, r0, r2);
1799 fallback_flush();
1800 fallback_patch_jmpi(al, _jit->pc.w);
1801 jit_unget_reg(t0);
1802}
1803
1804static void
1805_unldi4(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1
1806# if __WORDSIZE == 64
1807 , jit_bool_t sign
1808# endif
1809 )
1810{
1811 jit_int32_t t0, r2;
1812 if ((i1 & -4) == i1) {
1813# if __WORDSIZE == 64
1814 if (sign)
1815# endif
1816 ldi_i(r0, i1);
1817# if __WORDSIZE == 64
1818 else
1819 ldi_ui(r0, i1);
1820# endif
1821 }
1822 else {
1823 t0 = fallback_jit_get_reg(jit_class_gpr);
1824 r2 = rn(t0);
1825 if ((i1 & -2) == i1) {
1826# if __BYTE_ORDER == __LITTLE_ENDIAN
1827 ldi_us(r0, i1);
1828# if __WORDSIZE == 64
1829 if (sign)
1830 ldi_s(r2, i1 + 2);
1831 else
1832# endif
1833 ldi_us(r2, i1 + 2);
1834 lshi(r2, r2, 16);
1835# else
1836# if __WORDSIZE == 64
1837 if (sign)
1838 ldi_s(r0, i1);
1839 else
1840# endif
1841 ldi_us(r0, i1);
1842 lshi(r0, r0, 16);
1843 ldi_us(r2, i1 + 2);
1844# endif
1845 }
1846 else if ((i1 & 3) == 3) {
1847# if __BYTE_ORDER == __LITTLE_ENDIAN
1848 ldi_uc(r0, i1);
1849 ldi_i(r2, i1 + 1);
1850 lshi(r2, r2, 8);
1851# if __WORDSIZE == 64
1852 if (sign)
1853 extr_i(r2, r2);
1854 else
1855 extr_ui(r2, r2);
1856# endif
1857# else
1858# if __WORDSIZE == 64
1859 if (sign)
1860 ldi_c(r0, i1);
1861 else
1862# endif
1863 ldi_uc(r0, i1);
1864 lshi(r0, r0, 24);
1865# if __WORDSIZE == 32
1866 ldi(r2, i1 + 1);
1867# else
1868 ldi_ui(r2, i1 + 1);
1869# endif
1870 rshi_u(r2, r2, 8);
1871# endif
1872 }
1873 else {
1874# if __BYTE_ORDER == __LITTLE_ENDIAN
1875# if __WORDSIZE == 32
1876 ldi(r0, i1 - 1);
1877# else
1878 ldi_ui(r0, i1 - 1);
1879# endif
1880 rshi_u(r0, r0, 8);
1881# if __WORDSIZE == 64
1882 if (sign)
1883 ldi_c(r2, i1 + 3);
1884 else
1885# endif
1886 ldi_uc(r2, i1 + 3);
1887 lshi(r2, r2, 24);
1888# else
1889 ldi_i(r0, i1 - 1);
1890 lshi(r0, r0, 8);
1891# if __WORDSIZE == 64
1892 if (sign)
1893 extr_i(r0, r0);
1894 else
1895 extr_ui(r0, r0);
1896# endif
1897 ldi_uc(r2, i1 + 3);
1898# endif
1899 }
1900 orr(r0, r0, r2);
1901 jit_unget_reg(t0);
1902 }
1903}
1904
1905# if __WORDSIZE == 64
1906static void
1907_unldr5(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign)
1908{
1909 jit_int32_t t0, r2;
1910 jit_word_t un4, un2, un3, or, or4, or3;
1911 t0 = fallback_jit_get_reg(jit_class_gpr);
1912 r2 = rn(t0);
1913 andi(r2, r1, -4);
1914 un4 = fallback_bner(_jit->pc.w, r1, r2);
1915# if __BYTE_ORDER == __LITTLE_ENDIAN
1916 ldr_ui(r0, r1);
1917 if (sign)
1918 ldxi_c(r2, r1, 4);
1919 else
1920 ldxi_uc(r2, r1, 4);
1921 lshi(r2, r2, 32);
1922# else
1923 if (sign)
1924 ldr_i(r0, r1);
1925 else
1926 ldr_ui(r0, r1);
1927 lshi(r0, r0, 8);
1928 ldxi_uc(r2, r1, 4);
1929# endif
1930 or = fallback_jmpi(_jit->pc.w);
1931 fallback_flush();
1932 fallback_patch_bner(un4, _jit->pc.w);
1933 andi(r2, r1, -2);
1934 un2 = fallback_bner(_jit->pc.w, r1, r2);
1935# if __BYTE_ORDER == __LITTLE_ENDIAN
1936 ldr_us(r0, r1);
1937 ldxi_us(r2, r1, 2);
1938 lshi(r2, r2, 16);
1939 orr(r0, r0, r2);
1940 if (sign)
1941 ldxi_c(r2, r1, 4);
1942 else
1943 ldxi_uc(r2, r1, 4);
1944 lshi(r2, r2, 32);
1945# else
1946 if (sign)
1947 ldr_s(r0, r1);
1948 else
1949 ldr_us(r0, r1);
1950 lshi(r0, r0, 24);
1951 ldxi_us(r2, r1, 2);
1952 lshi(r2, r2, 8);
1953 orr(r0, r0, r2);
1954 ldxi_uc(r2, r1, 4);
1955# endif
1956 or4 = fallback_jmpi(_jit->pc.w);
1957 fallback_flush();
1958 fallback_patch_bner(un2, _jit->pc.w);
1959 andi(r2, r1, 3);
1960 un3 = fallback_bnei(_jit->pc.w, r2, 3);
1961# if __BYTE_ORDER == __LITTLE_ENDIAN
1962 ldr_uc(r0, r1);
1963 if (sign)
1964 ldxi_i(r2, r1, 1);
1965 else
1966 ldxi_ui(r2, r1, 1);
1967 lshi(r2, r2, 8);
1968# else
1969 if (sign)
1970 ldr_c(r0, r1);
1971 else
1972 ldr_uc(r0, r1);
1973 lshi(r0, r0, 32);
1974 ldxi_ui(r2, r1, 1);
1975# endif
1976 or3 = fallback_jmpi(_jit->pc.w);
1977 fallback_flush();
1978 fallback_patch_bnei(un3, _jit->pc.w);
1979# if __BYTE_ORDER == __LITTLE_ENDIAN
1980 ldr_uc(r0, r1);
1981 ldxi_us(r2, r1, 1);
1982 lshi(r2, r2, 8);
1983 orr(r0, r0, r2);
1984 if (sign)
1985 ldxi_s(r2, r1, 3);
1986 else
1987 ldxi_us(r2, r1, 3);
1988 lshi(r2, r2, 24);
1989# else
1990 if (sign)
1991 ldr_c(r0, r1);
1992 else
1993 ldr_uc(r0, r1);
1994 lshi(r0, r0, 32);
1995 ldxi_us(r2, r1, 1);
1996 lshi(r2, r2, 16);
1997 orr(r0, r0, r2);
1998 ldxi_us(r2, r1, 3);
1999# endif
2000 fallback_flush();
2001 fallback_patch_jmpi(or, _jit->pc.w);
2002 fallback_patch_jmpi(or4, _jit->pc.w);
2003 fallback_patch_jmpi(or3, _jit->pc.w);
2004 orr(r0, r0, r2);
2005 jit_unget_reg(t0);
2006}
2007
2008static void
2009_unldi5(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign)
2010{
2011 jit_int32_t t0, r2;
2012 t0 = fallback_jit_get_reg(jit_class_gpr);
2013 r2 = rn(t0);
2014 if ((i1 & -4) == i1) {
2015# if __BYTE_ORDER == __LITTLE_ENDIAN
2016 ldi_ui(r0, i1);
2017 if (sign)
2018 ldi_c(r2, i1 + 4);
2019 else
2020 ldi_uc(r2, i1 + 4);
2021 lshi(r2, r2, 32);
2022# else
2023 if (sign)
2024 ldi_i(r0, i1);
2025 else
2026 ldi_ui(r0, i1);
2027 lshi(r0, r0, 8);
2028 ldi_uc(r2, i1 + 4);
2029# endif
2030 }
2031 else if ((i1 & -2) == i1) {
2032# if __BYTE_ORDER == __LITTLE_ENDIAN
2033 ldi_us(r0, i1);
2034 ldi_us(r2, i1 + 2);
2035 lshi(r2, r2, 16);
2036 orr(r0, r0, r2);
2037 if (sign)
2038 ldi_c(r2, i1 + 4);
2039 else
2040 ldi_uc(r2, i1 + 4);
2041 lshi(r2, r2, 32);
2042# else
2043 if (sign)
2044 ldi_s(r0, i1);
2045 else
2046 ldi_us(r0, i1);
2047 lshi(r0, r0, 24);
2048 ldi_us(r2, i1 + 2);
2049 lshi(r2, r2, 8);
2050 orr(r0, r0, r2);
2051 ldi_uc(r2, i1 + 4);
2052# endif
2053 }
2054 else if ((i1 & 3) == 3) {
2055# if __BYTE_ORDER == __LITTLE_ENDIAN
2056 ldi_uc(r0, i1);
2057 if (sign)
2058 ldi_i(r2, i1 + 1);
2059 else
2060 ldi_ui(r2, i1 + 1);
2061 lshi(r2, r2, 8);
2062# else
2063 if (sign)
2064 ldi_c(r0, i1);
2065 else
2066 ldi_uc(r0, i1);
2067 lshi(r0, r0, 32);
2068 ldi_ui(r2, i1 + 1);
2069# endif
2070 }
2071 else {
2072# if __BYTE_ORDER == __LITTLE_ENDIAN
2073 ldi_uc(r0, i1);
2074 ldi_us(r2, i1 + 1);
2075 lshi(r2, r2, 8);
2076 orr(r0, r0, r2);
2077 if (sign)
2078 ldi_s(r2, i1 + 3);
2079 else
2080 ldi_us(r2, i1 + 3);
2081 lshi(r2, r2, 24);
2082# else
2083 if (sign)
2084 ldi_c(r0, i1);
2085 else
2086 ldi_uc(r0, i1);
2087 lshi(r0, r0, 32);
2088 ldi_us(r2, i1 + 1);
2089 lshi(r2, r2, 16);
2090 orr(r0, r0, r2);
2091 ldi_us(r2, i1 + 3);
2092# endif
2093 }
2094 orr(r0, r0, r2);
2095 jit_unget_reg(t0);
2096}
2097
2098static void
2099_unldr6(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign)
2100{
2101 jit_int32_t t0, r2;
2102 jit_word_t un4, un2, un3, or, or2, or3;
2103 t0 = fallback_jit_get_reg(jit_class_gpr);
2104 r2 = rn(t0);
2105 andi(r2, r1, -4);
2106 un4 = fallback_bner(_jit->pc.w, r1, r2);
2107# if __BYTE_ORDER == __LITTLE_ENDIAN
2108 ldr_ui(r0, r1);
2109 if (sign)
2110 ldxi_s(r2, r1, 4);
2111 else
2112 ldxi_us(r2, r1, 4);
2113 lshi(r2, r2, 32);
2114# else
2115 if (sign)
2116 ldr_i(r0, r1);
2117 else
2118 ldr_ui(r0, r1);
2119 lshi(r0, r0, 16);
2120 ldxi_us(r2, r1, 4);
2121# endif
2122 or = fallback_jmpi(_jit->pc.w);
2123 fallback_flush();
2124 fallback_patch_bner(un4, _jit->pc.w);
2125 andi(r2, r1, -2);
2126 un2 = fallback_bner(_jit->pc.w, r1, r2);
2127# if __BYTE_ORDER == __LITTLE_ENDIAN
2128 ldr_us(r0, r1);
2129 if (sign)
2130 ldxi_i(r2, r1, 2);
2131 else
2132 ldxi_ui(r2, r1, 2);
2133 lshi(r2, r2, 16);
2134# else
2135 if (sign)
2136 ldr_s(r0, r1);
2137 else
2138 ldr_us(r0, r1);
2139 lshi(r0, r0, 32);
2140 ldxi_ui(r2, r1, 2);
2141# endif
2142 or2 = fallback_jmpi(_jit->pc.w);
2143 fallback_flush();
2144 fallback_patch_bner(un2, _jit->pc.w);
2145 andi(r2, r1, 3);
2146 un3 = fallback_bnei(_jit->pc.w, r2, 3);
2147# if __BYTE_ORDER == __LITTLE_ENDIAN
2148 ldr_uc(r0, r1);
2149 ldxi_ui(r2, r1, 1);
2150 lshi(r2, r2, 8);
2151 orr(r0, r0, r2);
2152 if (sign)
2153 ldxi_c(r2, r1, 5);
2154 else
2155 ldxi_uc(r2, r1, 5);
2156 lshi(r2, r2, 40);
2157# else
2158 if (sign)
2159 ldr_c(r0, r1);
2160 else
2161 ldr_uc(r0, r1);
2162 lshi(r0, r0, 40);
2163 ldxi_ui(r2, r1, 1);
2164 lshi(r2, r2, 8);
2165 orr(r0, r0, r2);
2166 ldxi_uc(r2, r1, 5);
2167# endif
2168 or3 = fallback_jmpi(_jit->pc.w);
2169 fallback_flush();
2170 fallback_patch_bner(un3, _jit->pc.w);
2171# if __BYTE_ORDER == __LITTLE_ENDIAN
2172 ldr_uc(r0, r1);
2173 ldxi_us(r2, r1, 1);
2174 lshi(r2, r2, 8);
2175 orr(r0, r0, r2);
2176 ldxi_us(r2, r1, 3);
2177 lshi(r2, r2, 24);
2178 orr(r0, r0, r2);
2179 if (sign)
2180 ldxi_c(r2, r1, 5);
2181 else
2182 ldxi_uc(r2, r1, 5);
2183 lshi(r2, r2, 40);
2184# else
2185 if (sign)
2186 ldr_c(r0, r1);
2187 else
2188 ldr_uc(r0, r1);
2189 lshi(r0, r0, 40);
2190 ldxi_us(r2, r1, 1);
2191 lshi(r2, r2, 24);
2192 orr(r0, r0, r2);
2193 ldxi_us(r2, r1, 3);
2194 lshi(r2, r2, 8);
2195 orr(r0, r0, r2);
2196 ldxi_uc(r2, r1, 5);
2197# endif
2198 fallback_flush();
2199 fallback_patch_jmpi(or, _jit->pc.w);
2200 fallback_patch_jmpi(or2, _jit->pc.w);
2201 fallback_patch_jmpi(or3, _jit->pc.w);
2202 orr(r0, r0, r2);
2203 jit_unget_reg(t0);
2204}
2205
2206static void
2207_unldi6(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign)
2208{
2209 jit_int32_t t0, r2;
2210 t0 = fallback_jit_get_reg(jit_class_gpr);
2211 r2 = rn(t0);
2212 if ((i1 & -4) == i1) {
2213# if __BYTE_ORDER == __LITTLE_ENDIAN
2214 ldi_ui(r0, i1);
2215 if (sign)
2216 ldi_s(r2, i1 + 4);
2217 else
2218 ldi_us(r2, i1 + 4);
2219 lshi(r2, r2, 32);
2220# else
2221 if (sign)
2222 ldi_i(r0, i1);
2223 else
2224 ldi_ui(r0, i1);
2225 lshi(r0, r0, 16);
2226 ldi_us(r2, i1 + 4);
2227# endif
2228 }
2229 else if ((i1 & -2) == i1) {
2230# if __BYTE_ORDER == __LITTLE_ENDIAN
2231 ldi_us(r0, i1);
2232 if (sign)
2233 ldi_i(r2, i1 + 2);
2234 else
2235 ldi_ui(r2, i1 + 2);
2236 lshi(r2, r2, 16);
2237# else
2238 if (sign)
2239 ldi_s(r0, i1);
2240 else
2241 ldi_us(r0, i1);
2242 lshi(r0, r0, 32);
2243 ldi_ui(r2, i1 + 2);
2244# endif
2245 }
2246 else if ((i1 & 3) == 3) {
2247# if __BYTE_ORDER == __LITTLE_ENDIAN
2248 ldi_uc(r0, i1);
2249 ldi_ui(r2, i1 + 1);
2250 lshi(r2, r2, 8);
2251 orr(r0, r0, r2);
2252 if (sign)
2253 ldi_c(r2, i1 + 5);
2254 else
2255 ldi_uc(r2, i1 + 5);
2256 lshi(r2, r2, 40);
2257# else
2258 if (sign)
2259 ldi_c(r0, i1);
2260 else
2261 ldi_uc(r0, i1);
2262 lshi(r0, r0, 40);
2263 ldi_ui(r2, i1 + 1);
2264 lshi(r2, r2, 8);
2265 orr(r0, r0, r2);
2266 ldi_uc(r2, i1 + 5);
2267# endif
2268 }
2269 else {
2270# if __BYTE_ORDER == __LITTLE_ENDIAN
2271 ldi_uc(r0, i1);
2272 ldi_us(r2, i1 + 1);
2273 lshi(r2, r2, 8);
2274 orr(r0, r0, r2);
2275 ldi_us(r2, i1 + 3);
2276 lshi(r2, r2, 24);
2277 orr(r0, r0, r2);
2278 if (sign)
2279 ldi_c(r2, i1 + 5);
2280 else
2281 ldi_uc(r2, i1 + 5);
2282 lshi(r2, r2, 40);
2283# else
2284 if (sign)
2285 ldi_c(r0, i1);
2286 else
2287 ldi_uc(r0, i1);
2288 lshi(r0, r0, 40);
2289 ldi_us(r2, i1 + 1);
2290 lshi(r2, r2, 24);
2291 orr(r0, r0, r2);
2292 ldi_us(r2, i1 + 3);
2293 lshi(r2, r2, 8);
2294 orr(r0, r0, r2);
2295 ldi_uc(r2, i1 + 5);
2296# endif
2297 }
2298 orr(r0, r0, r2);
2299 jit_unget_reg(t0);
2300}
2301
2302static void
2303_unldr7(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign)
2304{
2305 jit_int32_t t0, r2;
2306 jit_word_t un4, un2, un3, or, or2, or3;
2307 t0 = fallback_jit_get_reg(jit_class_gpr);
2308 r2 = rn(t0);
2309 andi(r2, r1, -4);
2310 un4 = fallback_bner(_jit->pc.w, r1, r2);
2311# if __BYTE_ORDER == __LITTLE_ENDIAN
2312 ldr_ui(r0, r1);
2313 ldxi_us(r2, r1, 4);
2314 lshi(r2, r2, 32);
2315 orr(r0, r0, r2);
2316 if (sign)
2317 ldxi_c(r2, r1, 6);
2318 else
2319 ldxi_uc(r2, r1, 6);
2320 lshi(r2, r2, 48);
2321# else
2322 if (sign)
2323 ldr_i(r0, r1);
2324 else
2325 ldr_ui(r0, r1);
2326 lshi(r0, r0, 24);
2327 ldxi_us(r2, r1, 4);
2328 lshi(r2, r2, 8);
2329 orr(r0, r0, r2);
2330 ldxi_uc(r2, r1, 6);
2331# endif
2332 or = fallback_jmpi(_jit->pc.w);
2333 fallback_flush();
2334 fallback_patch_bner(un4, _jit->pc.w);
2335 andi(r2, r1, -2);
2336 un2 = fallback_bner(_jit->pc.w, r1, r2);
2337# if __BYTE_ORDER == __LITTLE_ENDIAN
2338 ldr_us(r0, r1);
2339 ldxi_ui(r2, r1, 2);
2340 lshi(r2, r2, 16);
2341 orr(r0, r0, r2);
2342 if (sign)
2343 ldxi_c(r2, r1, 6);
2344 else
2345 ldxi_uc(r2, r1, 6);
2346 lshi(r2, r2, 48);
2347# else
2348 if (sign)
2349 ldr_s(r0, r1);
2350 else
2351 ldr_us(r0, r1);
2352 lshi(r0, r0, 40);
2353 ldxi_ui(r2, r1, 2);
2354 lshi(r2, r2, 8);
2355 orr(r0, r0, r2);
2356 ldxi_uc(r2, r1, 6);
2357# endif
2358 or2 = fallback_jmpi(_jit->pc.w);
2359 fallback_flush();
2360 fallback_patch_bner(un2, _jit->pc.w);
2361 andi(r2, r1, 3);
2362 un3 = fallback_bnei(_jit->pc.w, r2, 3);
2363# if __BYTE_ORDER == __LITTLE_ENDIAN
2364 ldr_uc(r0, r1);
2365 ldxi_ui(r2, r1, 1);
2366 lshi(r2, r2, 8);
2367 orr(r0, r0, r2);
2368 if (sign)
2369 ldxi_s(r2, r1, 5);
2370 else
2371 ldxi_us(r2, r1, 5);
2372 lshi(r2, r2, 40);
2373# else
2374 if (sign)
2375 ldr_c(r0, r1);
2376 else
2377 ldr_uc(r0, r1);
2378 lshi(r0, r0, 48);
2379 ldxi_ui(r2, r1, 1);
2380 lshi(r2, r2, 16);
2381 orr(r0, r0, r2);
2382 ldxi_us(r2, r1, 5);
2383# endif
2384 or3 = fallback_jmpi(_jit->pc.w);
2385 fallback_flush();
2386 fallback_patch_bnei(un3, _jit->pc.w);
2387# if __BYTE_ORDER == __LITTLE_ENDIAN
2388 ldr_uc(r0, r1);
2389 ldxi_us(r2, r1, 1);
2390 lshi(r2, r2, 8);
2391 orr(r0, r0, r2);
2392 if (sign)
2393 ldxi_i(r2, r1, 3);
2394 else
2395 ldxi_ui(r2, r1, 3);
2396 lshi(r2, r2, 24);
2397# else
2398 if (sign)
2399 ldr_c(r0, r1);
2400 else
2401 ldr_uc(r0, r1);
2402 lshi(r0, r0, 48);
2403 ldxi_us(r2, r1, 1);
2404 lshi(r2, r2, 32);
2405 orr(r0, r0, r2);
2406 ldxi_ui(r2, r1, 3);
2407# endif
2408 fallback_flush();
2409 fallback_patch_jmpi(or, _jit->pc.w);
2410 fallback_patch_jmpi(or2, _jit->pc.w);
2411 fallback_patch_jmpi(or3, _jit->pc.w);
2412 orr(r0, r0, r2);
2413 jit_unget_reg(t0);
2414}
2415
2416static void
2417_unldi7(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign)
2418{
2419 jit_int32_t t0, r2;
2420 t0 = fallback_jit_get_reg(jit_class_gpr);
2421 r2 = rn(t0);
2422 if ((i1 & -4) == i1) {
2423# if __BYTE_ORDER == __LITTLE_ENDIAN
2424 ldi_ui(r0, i1);
2425 ldi_us(r2, i1 + 4);
2426 lshi(r2, r2, 32);
2427 orr(r0, r0, r2);
2428 if (sign)
2429 ldi_c(r2, i1 + 6);
2430 else
2431 ldi_uc(r2, i1 + 6);
2432 lshi(r2, r2, 48);
2433# else
2434 if (sign)
2435 ldi_i(r0, i1);
2436 else
2437 ldi_ui(r0, i1);
2438 lshi(r0, r0, 24);
2439 ldi_us(r2, i1 + 4);
2440 lshi(r2, r2, 8);
2441 orr(r0, r0, r2);
2442 ldi_uc(r2, i1 + 6);
2443# endif
2444 }
2445 else if ((i1 & -2) == i1) {
2446# if __BYTE_ORDER == __LITTLE_ENDIAN
2447 ldi_us(r0, i1);
2448 ldi_ui(r2, i1 + 2);
2449 lshi(r2, r2, 16);
2450 orr(r0, r0, r2);
2451 if (sign)
2452 ldi_c(r2, i1 + 6);
2453 else
2454 ldi_uc(r2, i1 + 6);
2455 lshi(r2, r2, 48);
2456# else
2457 if (sign)
2458 ldi_s(r0, i1);
2459 else
2460 ldi_us(r0, i1);
2461 lshi(r0, r0, 40);
2462 ldi_ui(r2, i1 + 2);
2463 lshi(r2, r2, 8);
2464 orr(r0, r0, r2);
2465 ldi_uc(r2, i1 + 6);
2466# endif
2467 }
2468 else if ((i1 & 3) == 3) {
2469# if __BYTE_ORDER == __LITTLE_ENDIAN
2470 ldi_uc(r0, i1);
2471 ldi_ui(r2, i1 + 1);
2472 lshi(r2, r2, 8);
2473 orr(r0, r0, r2);
2474 if (sign)
2475 ldi_s(r2, i1 + 5);
2476 else
2477 ldi_us(r2, i1 + 5);
2478 lshi(r2, r2, 40);
2479# else
2480 if (sign)
2481 ldi_c(r0, i1);
2482 else
2483 ldi_uc(r0, i1);
2484 lshi(r0, r0, 48);
2485 ldi_ui(r2, i1 + 1);
2486 lshi(r2, r2, 16);
2487 orr(r0, r0, r2);
2488 ldi_us(r2, i1 + 5);
2489# endif
2490 }
2491 else {
2492# if __BYTE_ORDER == __LITTLE_ENDIAN
2493 ldi_uc(r0, i1);
2494 ldi_us(r2, i1 + 1);
2495 lshi(r2, r2, 8);
2496 orr(r0, r0, r2);
2497 if (sign)
2498 ldi_i(r2, i1 + 3);
2499 else
2500 ldi_ui(r2, i1 + 3);
2501 lshi(r2, r2, 24);
2502# else
2503 if (sign)
2504 ldi_c(r0, i1);
2505 else
2506 ldi_uc(r0, i1);
2507 lshi(r0, r0, 48);
2508 ldi_us(r2, i1 + 1);
2509 lshi(r2, r2, 32);
2510 orr(r0, r0, r2);
2511 ldi_ui(r2, i1 + 3);
2512# endif
2513 }
2514 orr(r0, r0, r2);
2515 jit_unget_reg(t0);
2516}
2517
2518static void
2519_unldr8(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2520{
2521 jit_int32_t t0, r2;
2522 jit_word_t un8, un4, un2, un7, un6, un5, un3, al,
2523 or, or2, or7, or6, or5, or3;
2524 t0 = fallback_jit_get_reg(jit_class_gpr);
2525 r2 = rn(t0);
2526 andi(r2, r1, -8);
2527 un8 = fallback_bner(_jit->pc.w, r1, r2);
2528 ldr_l(r0, r1);
2529 al = fallback_jmpi(_jit->pc.w);
2530 fallback_flush();
2531 fallback_patch_bner(un8, _jit->pc.w);
2532 andi(r2, r1, -4);
2533 un4 = fallback_bner(_jit->pc.w, r1, r2);
2534# if __BYTE_ORDER == __LITTLE_ENDIAN
2535 ldr_ui(r0, r1);
2536 ldxi_i(r2, r1, 4);
2537 lshi(r2, r2, 32);
2538# else
2539 ldr_i(r0, r1);
2540 ldxi_ui(r2, r1, 4);
2541 lshi(r0, r0, 32);
2542# endif
2543 or = fallback_jmpi(_jit->pc.w);
2544 fallback_flush();
2545 fallback_patch_bner(un4, _jit->pc.w);
2546 andi(r2, r1, -2);
2547 un2 = fallback_bner(_jit->pc.w, r1, r2);
2548# if __BYTE_ORDER == __LITTLE_ENDIAN
2549 ldr_us(r0, r1);
2550 ldxi_ui(r2, r1, 2);
2551 lshi(r2, r2, 16);
2552 orr(r0, r0, r2);
2553 ldxi_s(r2, r1, 6);
2554 lshi(r2, r2, 48);
2555# else
2556 ldr_s(r0, r1);
2557 lshi(r0, r0, 48);
2558 ldxi_ui(r2, r1, 2);
2559 lshi(r2, r2, 16);
2560 orr(r0, r0, r2);
2561 ldxi_us(r2, r1, 6);
2562# endif
2563 or2 = fallback_jmpi(_jit->pc.w);
2564 fallback_flush();
2565 fallback_patch_bner(un2, _jit->pc.w);
2566 andi(r2, r1, 7);
2567 un7 = fallback_bnei(_jit->pc.w, r2, 7);
2568# if __BYTE_ORDER == __LITTLE_ENDIAN
2569 ldr_uc(r0, r1);
2570 ldxi_l(r2, r1, 1);
2571 lshi(r2, r2, 8);
2572# else
2573 ldr_c(r0, r1);
2574 ldxi_l(r2, r1, 1);
2575 rshi_u(r2, r2, 8);
2576 lshi(r0, r0, 56);
2577# endif
2578 or7 = fallback_jmpi(_jit->pc.w);
2579 fallback_flush();
2580 fallback_patch_bnei(un7, _jit->pc.w);
2581 un6 = fallback_bnei(_jit->pc.w, r2, 6);
2582# if __BYTE_ORDER == __LITTLE_ENDIAN
2583 ldr_us(r0, r1);
2584 ldxi_l(r2, r1, 2);
2585 lshi(r2, r2, 16);
2586# else
2587 ldr_s(r0, r1);
2588 lshi(r0, r0, 48);
2589 ldxi_l(r2, r1, 2);
2590 rshi_u(r2, r2, 16);
2591# endif
2592 or6 = fallback_jmpi(_jit->pc.w);
2593 fallback_flush();
2594 fallback_patch_bnei(un6, _jit->pc.w);
2595 un5 = fallback_bnei(_jit->pc.w, r2, 5);
2596# if __BYTE_ORDER == __LITTLE_ENDIAN
2597 ldxi_ui(r0, r1, -1);
2598 rshi_u(r0, r0, 8);
2599 ldxi_ui(r2, r1, 3);
2600 lshi(r2, r2, 24);
2601 orr(r0, r0, r2);
2602 ldxi_c(r2, r1, 7);
2603 lshi(r2, r2, 56);
2604# else
2605 ldxi_i(r0, r1, -1);
2606 lshi(r0, r0, 40);
2607 ldxi_ui(r2, r1, 3);
2608 lshi(r2, r2, 8);
2609 orr(r0, r0, r2);
2610 ldxi_uc(r2, r1, 7);
2611# endif
2612 or5 = fallback_jmpi(_jit->pc.w);
2613 fallback_flush();
2614 fallback_patch_bnei(un5, _jit->pc.w);
2615 un3 = fallback_bnei(_jit->pc.w, r2, 3);
2616# if __BYTE_ORDER == __LITTLE_ENDIAN
2617 ldr_uc(r0, r1);
2618 ldxi_ui(r2, r1, 1);
2619 lshi(r2, r2, 8);
2620 orr(r0, r0, r2);
2621 ldxi_i(r2, r1, 5);
2622 lshi(r2, r2, 40);
2623# else
2624 ldr_c(r0, r1);
2625 lshi(r0, r0, 56);
2626 ldxi_ui(r2, r1, 1);
2627 lshi(r2, r2, 24);
2628 orr(r0, r0, r2);
2629 ldxi_ui(r2, r1, 5);
2630 rshi_u(r2, r2, 8);
2631# endif
2632 or3 = fallback_jmpi(_jit->pc.w);
2633 fallback_flush();
2634 fallback_patch_bnei(un3, _jit->pc.w);
2635# if __BYTE_ORDER == __LITTLE_ENDIAN
2636 ldr_uc(r0, r1);
2637 ldxi_us(r2, r1, 1);
2638 lshi(r2, r2, 8);
2639 orr(r0, r0, r2);
2640 ldxi_ui(r2, r1, 3);
2641 lshi(r2, r2, 24);
2642 orr(r0, r0, r2);
2643 ldxi_c(r2, r1, 7);
2644 lshi(r2, r2, 56);
2645# else
2646 ldr_c(r0, r1);
2647 lshi(r0, r0, 56);
2648 ldxi_us(r2, r1, 1);
2649 lshi(r2, r2, 40);
2650 orr(r0, r0, r2);
2651 ldxi_ui(r2, r1, 3);
2652 lshi(r2, r2, 8);
2653 orr(r0, r0, r2);
2654 ldxi_uc(r2, r1, 7);
2655# endif
2656 fallback_flush();
2657 fallback_patch_jmpi(or, _jit->pc.w);
2658 fallback_patch_jmpi(or2, _jit->pc.w);
2659 fallback_patch_jmpi(or7, _jit->pc.w);
2660 fallback_patch_jmpi(or6, _jit->pc.w);
2661 fallback_patch_jmpi(or5, _jit->pc.w);
2662 fallback_patch_jmpi(or3, _jit->pc.w);
2663 orr(r0, r0, r2);
2664 fallback_flush();
2665 fallback_patch_jmpi(al, _jit->pc.w);
2666 jit_unget_reg(t0);
2667}
2668
2669static void
2670_unldi8(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
2671{
2672 jit_int32_t t0, r2;
2673 if ((i1 & -8) == i1)
2674 ldi_l(r0, i1);
2675 else {
2676 t0 = fallback_jit_get_reg(jit_class_gpr);
2677 r2 = rn(t0);
2678 if ((i1 & -4) == i1) {
2679# if __BYTE_ORDER == __LITTLE_ENDIAN
2680 ldi_ui(r0, i1);
2681 ldi_i(r2, i1 + 4);
2682 lshi(r2, r2, 32);
2683# else
2684 ldi_i(r0, i1);
2685 ldi_ui(r2, i1 + 4);
2686 lshi(r0, r0, 32);
2687# endif
2688 }
2689 else if ((i1 & -2) == i1) {
2690# if __BYTE_ORDER == __LITTLE_ENDIAN
2691 ldi_us(r0, i1);
2692 ldi_ui(r2, i1 + 2);
2693 lshi(r2, r2, 16);
2694 orr(r0, r0, r2);
2695 ldi_s(r2, i1 + 6);
2696 lshi(r2, r2, 48);
2697# else
2698 ldi_s(r0, i1);
2699 lshi(r0, r0, 48);
2700 ldi_ui(r2, i1 + 2);
2701 lshi(r2, r2, 16);
2702 orr(r0, r0, r2);
2703 ldi_us(r2, i1 + 6);
2704# endif
2705 }
2706 else if ((i1 & 7) == 7) {
2707# if __BYTE_ORDER == __LITTLE_ENDIAN
2708 ldi_uc(r0, i1);
2709 ldi_l(r2, i1 + 1);
2710 lshi(r2, r2, 8);
2711# else
2712 ldi_c(r0, i1);
2713 ldi_l(r2, i1 + 1);
2714 rshi_u(r2, r2, 8);
2715 lshi(r0, r0, 56);
2716# endif
2717 }
2718 else if ((i1 & 7) == 6) {
2719# if __BYTE_ORDER == __LITTLE_ENDIAN
2720 ldi_us(r0, i1);
2721 ldi_l(r2, i1 + 2);
2722 lshi(r2, r2, 16);
2723# else
2724 ldi_s(r0, i1);
2725 lshi(r0, r0, 48);
2726 ldi_l(r2, i1 + 2);
2727 rshi_u(r2, r2, 16);
2728# endif
2729 }
2730 else if ((i1 & 7) == 5) {
2731# if __BYTE_ORDER == __LITTLE_ENDIAN
2732 ldi_ui(r0, i1 - 1);
2733 rshi_u(r0, r0, 8);
2734 ldi_ui(r2, i1 + 3);
2735 lshi(r2, r2, 24);
2736 orr(r0, r0, r2);
2737 ldi_c(r2, i1 + 7);
2738 lshi(r2, r2, 56);
2739# else
2740 ldi_i(r0, i1 - 1);
2741 lshi(r0, r0, 40);
2742 ldi_ui(r2, i1 + 3);
2743 lshi(r2, r2, 8);
2744 orr(r0, r0, r2);
2745 ldi_uc(r2, i1 + 7);
2746# endif
2747 }
2748 else if ((i1 & 7) == 3) {
2749# if __BYTE_ORDER == __LITTLE_ENDIAN
2750 ldi_uc(r0, i1);
2751 ldi_ui(r2, i1 + 1);
2752 lshi(r2, r2, 8);
2753 orr(r0, r0, r2);
2754 ldi_i(r2, i1 + 5);
2755 lshi(r2, r2, 40);
2756# else
2757 ldi_c(r0, i1);
2758 lshi(r0, r0, 56);
2759 ldi_ui(r2, i1 + 1);
2760 lshi(r2, r2, 24);
2761 orr(r0, r0, r2);
2762 ldi_ui(r2, i1 + 5);
2763 rshi_u(r2, r2, 8);
2764# endif
2765 }
2766 else {
2767# if __BYTE_ORDER == __LITTLE_ENDIAN
2768 ldi_uc(r0, i1);
2769 ldi_us(r2, i1 + 1);
2770 lshi(r2, r2, 8);
2771 orr(r0, r0, r2);
2772 ldi_ui(r2, i1 + 3);
2773 lshi(r2, r2, 24);
2774 orr(r0, r0, r2);
2775 ldi_c(r2, i1 + 7);
2776 lshi(r2, r2, 56);
2777# else
2778 ldi_c(r0, i1);
2779 lshi(r0, r0, 56);
2780 ldi_us(r2, i1 + 1);
2781 lshi(r2, r2, 40);
2782 orr(r0, r0, r2);
2783 ldi_ui(r2, i1 + 3);
2784 lshi(r2, r2, 8);
2785 orr(r0, r0, r2);
2786 ldi_uc(r2, i1 + 7);
2787# endif
2788 }
2789 orr(r0, r0, r2);
2790 jit_unget_reg(t0);
2791 }
2792}
2793# endif
2794
2795static void
2796_unstr2(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2797{
2798 jit_int32_t t0, r2;
2799 jit_word_t un2, al;
2800 t0 = fallback_jit_get_reg(jit_class_gpr);
2801 r2 = rn(t0);
2802 andi(r2, r1, -2);
2803 un2 = fallback_bner(_jit->pc.w, r2, r1);
2804 str_s(r1, r0);
2805 al = fallback_jmpi(_jit->pc.w);
2806 fallback_flush();
2807 fallback_patch_bner(un2, _jit->pc.w);
2808#if __BYTE_ORDER == __LITTLE_ENDIAN
2809 str_c(r1, r0);
2810 rshi_u(r2, r0, 8);
2811 stxi_c(1, r1, r2);
2812#else
2813 stxi_c(1, r1, r0);
2814 rshi_u(r2, r0, 8);
2815 str_c(r1, r2);
2816#endif
2817 fallback_flush();
2818 fallback_patch_jmpi(al, _jit->pc.w);
2819 jit_unget_reg(t0);
2820}
2821
2822static void
2823_unsti2(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
2824{
2825 jit_int32_t t0, r2;
2826 if ((i1 & -2) == i1)
2827 sti_s(i1, r0);
2828 else {
2829 t0 = fallback_jit_get_reg(jit_class_gpr);
2830 r2 = rn(t0);
2831#if __BYTE_ORDER == __LITTLE_ENDIAN
2832 sti_c(i1, r0);
2833 rshi_u(r2, r0, 8);
2834 sti_c(1 + i1, r2);
2835#else
2836 sti_c(1 + i1, r0);
2837 rshi_u(r2, r0, 8);
2838 sti_c(i1, r2);
2839#endif
2840 jit_unget_reg(t0);
2841 }
2842}
2843
2844static void
2845_unstr3(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2846{
2847 jit_int32_t t0, r2;
2848 jit_word_t un2, al;
2849 t0 = fallback_jit_get_reg(jit_class_gpr);
2850 r2 = rn(t0);
2851 andi(r2, r1, -2);
2852 un2 = fallback_bner(_jit->pc.w, r2, r1);
2853# if __BYTE_ORDER == __LITTLE_ENDIAN
2854 str_s(r1, r0);
2855 rshi(r2, r0, 16);
2856 stxi_c(2, r1, r2);
2857# else
2858 stxi_c(2, r1, r0);
2859 rshi(r2, r0, 8);
2860 str_s(r1, r2);
2861# endif
2862 al = fallback_jmpi(_jit->pc.w);
2863 fallback_flush();
2864 fallback_patch_bner(un2, _jit->pc.w);
2865# if __BYTE_ORDER == __LITTLE_ENDIAN
2866 str_c(r1, r0);
2867 rshi(r2, r0, 8);
2868 stxi_s(1, r1, r2);
2869# else
2870 stxi_s(1, r1, r0);
2871 rshi(r2, r0, 16);
2872 str_c(r1, r2);
2873# endif
2874 fallback_flush();
2875 fallback_patch_jmpi(al, _jit->pc.w);
2876 jit_unget_reg(t0);
2877}
2878
2879static void
2880_unsti3(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
2881{
2882 jit_int32_t t0, r2;
2883 t0 = fallback_jit_get_reg(jit_class_gpr);
2884 r2 = rn(t0);
2885 if ((i1 & -2) == i1) {
2886# if __BYTE_ORDER == __LITTLE_ENDIAN
2887 sti_s(i1, r0);
2888 rshi(r2, r0, 16);
2889 sti_c(2 + i1, r2);
2890# else
2891 sti_c(2 + i1, r0);
2892 rshi(r2, r0, 8);
2893 sti_s(i1, r2);
2894# endif
2895 }
2896 else {
2897# if __BYTE_ORDER == __LITTLE_ENDIAN
2898 sti_c(i1, r0);
2899 rshi(r2, r0, 8);
2900 sti_s(1 + i1, r2);
2901# else
2902 sti_s(1 + i1, r0);
2903 rshi(r2, r0, 16);
2904 sti_c(i1, r2);
2905# endif
2906 }
2907 jit_unget_reg(t0);
2908}
2909
2910static void
2911_unstr4(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2912{
2913 jit_int32_t t0, r2;
2914 jit_word_t un4, un2, al, al2;
2915 t0 = fallback_jit_get_reg(jit_class_gpr);
2916 r2 = rn(t0);
2917 andi(r2, r1, -4);
2918 un4 = fallback_bner(_jit->pc.w, r2, r1);
2919 str_i(r1, r0);
2920 al = fallback_jmpi(_jit->pc.w);
2921 fallback_flush();
2922 fallback_patch_bner(un4, _jit->pc.w);
2923 andi(r2, r1, -2);
2924 un2 = fallback_bner(_jit->pc.w, r2, r1);
2925# if __BYTE_ORDER == __LITTLE_ENDIAN
2926 str_s(r1, r0);
2927 rshi(r2, r0, 16);
2928 stxi_s(2, r1, r2);
2929# else
2930 stxi_s(2, r1, r0);
2931 rshi(r2, r0, 16);
2932 str_s(r1, r2);
2933# endif
2934 al2 = fallback_jmpi(_jit->pc.w);
2935 fallback_flush();
2936 fallback_patch_bner(un2, _jit->pc.w);
2937# if __BYTE_ORDER == __LITTLE_ENDIAN
2938 str_c(r1, r0);
2939 rshi(r2, r0, 8);
2940 stxi_s(1, r1, r2);
2941 rshi(r2, r2, 16);
2942 stxi_c(3, r1, r2);
2943# else
2944 stxi_c(3, r1, r0);
2945 rshi(r2, r0, 8);
2946 stxi_s(1, r1, r2);
2947 rshi(r2, r2, 16);
2948 str_c(r1, r2);
2949# endif
2950 fallback_flush();
2951 fallback_patch_jmpi(al, _jit->pc.w);
2952 fallback_patch_jmpi(al2, _jit->pc.w);
2953 jit_unget_reg(t0);
2954}
2955
2956static void
2957_unsti4(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
2958{
2959 jit_int32_t t0, r2;
2960 if ((i1 & -4) == i1)
2961 sti_i(i1, r0);
2962 else {
2963 t0 = fallback_jit_get_reg(jit_class_gpr);
2964 r2 = rn(t0);
2965 if ((i1 & -2) == i1) {
2966# if __BYTE_ORDER == __LITTLE_ENDIAN
2967 sti_s(i1, r0);
2968 rshi(r2, r0, 16);
2969 sti_s(2 + i1, r2);
2970# else
2971 sti_s(2 + i1, r0);
2972 rshi(r2, r0, 16);
2973 sti_s(i1, r2);
2974# endif
2975 }
2976 else {
2977# if __BYTE_ORDER == __LITTLE_ENDIAN
2978 sti_c(i1, r0);
2979 rshi(r2, r0, 8);
2980 sti_s(1 + i1, r2);
2981 rshi(r2, r2, 16);
2982 sti_c(3 + i1, r2);
2983# else
2984 sti_c(3 + i1, r0);
2985 rshi(r2, r0, 8);
2986 sti_s(1 + i1, r2);
2987 rshi(r2, r2, 16);
2988 sti_c(i1, r2);
2989# endif
2990 }
2991 jit_unget_reg(t0);
2992 }
2993}
2994
2995# if __WORDSIZE == 64
2996static void
2997_unstr5(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2998{
2999 jit_int32_t t0, r2;
3000 jit_word_t un3, un2, un1, al, al2, al1;
3001 t0 = fallback_jit_get_reg(jit_class_gpr);
3002 r2 = rn(t0);
3003 andi(r2, r1, 3);
3004 un3 = fallback_bnei(_jit->pc.w, r2, 3);
3005# if __BYTE_ORDER == __LITTLE_ENDIAN
3006 str_c(r1, r0);
3007 rshi(r2, r0, 8);
3008 stxi_i(1, r1, r2);
3009# else
3010 stxi_i(1, r1, r0);
3011 rshi(r2, r0, 32);
3012 str_c(r1, r2);
3013# endif
3014 al = fallback_jmpi(_jit->pc.w);
3015 fallback_flush();
3016 fallback_patch_bnei(un3, _jit->pc.w);
3017 un2 = fallback_bnei(_jit->pc.w, r2, 2);
3018# if __BYTE_ORDER == __LITTLE_ENDIAN
3019 str_s(r1, r0);
3020 rshi(r2, r0, 16);
3021 stxi_s(2, r1, r2);
3022 rshi(r2, r2, 16);
3023 stxi_c(4, r1, r2);
3024# else
3025 stxi_c(4, r1, r0);
3026 rshi(r2, r0, 8);
3027 stxi_s(2, r1, r2);
3028 rshi(r2, r2, 16);
3029 str_s(r1, r2);
3030# endif
3031 al2 = fallback_jmpi(_jit->pc.w);
3032 fallback_flush();
3033 fallback_patch_bnei(un2, _jit->pc.w);
3034 un1 = fallback_bnei(_jit->pc.w, r2, 1);
3035# if __BYTE_ORDER == __LITTLE_ENDIAN
3036 str_c(r1, r0);
3037 rshi(r2, r0, 8);
3038 stxi_s(1, r1, r2);
3039 rshi(r2, r2, 16);
3040 stxi_s(3, r1, r2);
3041# else
3042 stxi_s(3, r1, r0);
3043 rshi(r2, r0, 16);
3044 stxi_s(1, r1, r2);
3045 rshi(r2, r2, 16);
3046 str_c(r1, r2);
3047# endif
3048 al1 = fallback_jmpi(_jit->pc.w);
3049 fallback_flush();
3050 fallback_patch_bnei(un1, _jit->pc.w);
3051# if __BYTE_ORDER == __LITTLE_ENDIAN
3052 str_i(r1, r0);
3053 rshi(r2, r0, 32);
3054 stxi_c(4, r1, r2);
3055# else
3056 stxi_c(4, r1, r0);
3057 rshi(r2, r0, 8);
3058 str_i(r1, r2);
3059# endif
3060 fallback_flush();
3061 fallback_patch_jmpi(al, _jit->pc.w);
3062 fallback_patch_jmpi(al2, _jit->pc.w);
3063 fallback_patch_jmpi(al1, _jit->pc.w);
3064 jit_unget_reg(t0);
3065}
3066
3067static void
3068_unsti5(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
3069{
3070 jit_int32_t t0, r2;
3071 t0 = fallback_jit_get_reg(jit_class_gpr);
3072 r2 = rn(t0);
3073 if ((i1 & 3) == 3) {
3074# if __BYTE_ORDER == __LITTLE_ENDIAN
3075 sti_c(i1, r0);
3076 rshi(r2, r0, 8);
3077 sti_i(1 + i1, r2);
3078# else
3079 sti_i(1 + i1, r0);
3080 rshi(r2, r0, 32);
3081 sti_c(i1, r2);
3082# endif
3083 }
3084 else if ((i1 & 3) == 2) {
3085# if __BYTE_ORDER == __LITTLE_ENDIAN
3086 sti_s(i1, r0);
3087 rshi(r2, r0, 16);
3088 sti_s(2 + i1, r2);
3089 rshi(r2, r2, 16);
3090 sti_c(4 + i1, r2);
3091# else
3092 sti_c(4 + i1, r0);
3093 rshi(r2, r0, 8);
3094 sti_s(2 + i1, r2);
3095 rshi(r2, r2, 16);
3096 sti_s(i1, r2);
3097# endif
3098 }
3099 else if ((i1 & 3) == 1) {
3100# if __BYTE_ORDER == __LITTLE_ENDIAN
3101 sti_c(i1, r0);
3102 rshi(r2, r0, 8);
3103 sti_s(1 + i1, r2);
3104 rshi(r2, r2, 16);
3105 sti_s(3 + i1, r2);
3106# else
3107 sti_s(3 + i1, r0);
3108 rshi(r2, r0, 16);
3109 sti_s(1 + i1, r2);
3110 rshi(r2, r2, 16);
3111 sti_c(i1, r2);
3112# endif
3113 }
3114 else {
3115# if __BYTE_ORDER == __LITTLE_ENDIAN
3116 sti_i(i1, r0);
3117 rshi(r2, r0, 32);
3118 sti_c(4 + i1, r2);
3119# else
3120 sti_c(4 + i1, r0);
3121 rshi(r2, r0, 8);
3122 sti_i(i1, r2);
3123# endif
3124 }
3125 jit_unget_reg(t0);
3126}
3127
3128static void
3129_unstr6(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3130{
3131 jit_int32_t t0, r2;
3132 jit_word_t un3, un2, un1, al, al2, al1;
3133 t0 = fallback_jit_get_reg(jit_class_gpr);
3134 r2 = rn(t0);
3135 andi(r2, r1, 3);
3136 un3 = fallback_bnei(_jit->pc.w, r2, 3);
3137# if __BYTE_ORDER == __LITTLE_ENDIAN
3138 str_c(r1, r0);
3139 rshi(r2, r0, 8);
3140 stxi_i(1, r1, r2);
3141 rshi(r2, r2, 32);
3142 stxi_c(5, r1, r2);
3143# else
3144 stxi_c(5, r1, r0);
3145 rshi(r2, r0, 8);
3146 stxi_i(1, r1, r2);
3147 rshi(r2, r2, 32);
3148 str_c(r1, r2);
3149# endif
3150 al = fallback_jmpi(_jit->pc.w);
3151 fallback_flush();
3152 fallback_patch_bnei(un3, _jit->pc.w);
3153 un2 = fallback_bnei(_jit->pc.w, r2, 2);
3154# if __BYTE_ORDER == __LITTLE_ENDIAN
3155 str_s(r1, r0);
3156 rshi(r2, r0, 16);
3157 stxi_s(2, r1, r2);
3158 rshi(r2, r2, 16);
3159 stxi_s(4, r1, r2);
3160# else
3161 stxi_s(4, r1, r0);
3162 rshi(r2, r0, 16);
3163 stxi_s(2, r1, r2);
3164 rshi(r2, r2, 16);
3165 str_s(r1, r2);
3166# endif
3167 al2 = fallback_jmpi(_jit->pc.w);
3168 fallback_flush();
3169 fallback_patch_bnei(un2, _jit->pc.w);
3170 un1 = fallback_bnei(_jit->pc.w, r2, 1);
3171# if __BYTE_ORDER == __LITTLE_ENDIAN
3172 str_c(r1, r0);
3173 rshi(r2, r0, 8);
3174 stxi_s(1, r1, r2);
3175 rshi(r2, r2, 16);
3176 stxi_s(3, r1, r2);
3177 rshi(r2, r2, 16);
3178 stxi_c(5, r1, r2);
3179# else
3180 stxi_c(5, r1, r0);
3181 rshi(r2, r0, 8);
3182 stxi_s(3, r1, r2);
3183 rshi(r2, r2, 16);
3184 stxi_s(1, r1, r2);
3185 rshi(r2, r2, 16);
3186 str_c(r1, r2);
3187# endif
3188 al1 = fallback_jmpi(_jit->pc.w);
3189 fallback_flush();
3190 fallback_patch_bnei(un1, _jit->pc.w);
3191# if __BYTE_ORDER == __LITTLE_ENDIAN
3192 str_i(r1, r0);
3193 rshi(r2, r0, 32);
3194 stxi_s(4, r1, r2);
3195# else
3196 stxi_s(4, r1, r0);
3197 rshi(r2, r0, 16);
3198 str_i(r1, r2);
3199# endif
3200 fallback_flush();
3201 fallback_patch_jmpi(al, _jit->pc.w);
3202 fallback_patch_jmpi(al2, _jit->pc.w);
3203 fallback_patch_jmpi(al1, _jit->pc.w);
3204 jit_unget_reg(t0);
3205}
3206
3207static void
3208_unsti6(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
3209{
3210 jit_int32_t t0, r2;
3211 t0 = fallback_jit_get_reg(jit_class_gpr);
3212 r2 = rn(t0);
3213 if ((i1 & 3) == 3) {
3214# if __BYTE_ORDER == __LITTLE_ENDIAN
3215 sti_c(i1, r0);
3216 rshi(r2, r0, 8);
3217 sti_i(1 + i1, r2);
3218 rshi(r2, r2, 32);
3219 sti_c(5 + i1, r2);
3220# else
3221 sti_c(5 + i1, r0);
3222 rshi(r2, r0, 8);
3223 sti_i(1 + i1, r2);
3224 rshi(r2, r2, 32);
3225 sti_c(i1, r2);
3226# endif
3227 }
3228 else if ((i1 & 3) == 2) {
3229# if __BYTE_ORDER == __LITTLE_ENDIAN
3230 sti_s(i1, r0);
3231 rshi(r2, r0, 16);
3232 sti_s(2 + i1, r2);
3233 rshi(r2, r2, 16);
3234 sti_s(4 + i1, r2);
3235# else
3236 sti_s(4 + i1, r0);
3237 rshi(r2, r0, 16);
3238 sti_s(2 + i1, r2);
3239 rshi(r2, r2, 16);
3240 sti_s(i1, r2);
3241# endif
3242 }
3243 else if ((i1 & 3) == 1) {
3244# if __BYTE_ORDER == __LITTLE_ENDIAN
3245 sti_c(i1, r0);
3246 rshi(r2, r0, 8);
3247 sti_s(1 + i1, r2);
3248 rshi(r2, r2, 16);
3249 sti_s(3 + i1, r2);
3250 rshi(r2, r2, 16);
3251 sti_c(5 + i1, r2);
3252# else
3253 sti_c(5 + i1, r0);
3254 rshi(r2, r0, 8);
3255 sti_s(3 + i1, r2);
3256 rshi(r2, r2, 16);
3257 sti_s(1 + i1, r2);
3258 rshi(r2, r2, 16);
3259 sti_c(i1, r2);
3260# endif
3261 }
3262 else {
3263# if __BYTE_ORDER == __LITTLE_ENDIAN
3264 sti_i(i1, r0);
3265 rshi(r2, r0, 32);
3266 sti_s(4 + i1, r2);
3267# else
3268 sti_s(4 + i1, r0);
3269 rshi(r2, r0, 16);
3270 sti_i(i1, r2);
3271# endif
3272 }
3273 jit_unget_reg(t0);
3274}
3275
3276static void
3277_unstr7(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3278{
3279 jit_int32_t t0, r2;
3280 jit_word_t un3, un2, un1, al, al2, al1;
3281 t0 = fallback_jit_get_reg(jit_class_gpr);
3282 r2 = rn(t0);
3283 andi(r2, r1, 3);
3284 un3 = fallback_bnei(_jit->pc.w, r2, 3);
3285# if __BYTE_ORDER == __LITTLE_ENDIAN
3286 str_c(r1, r0);
3287 rshi(r2, r0, 8);
3288 stxi_i(1, r1, r2);
3289 rshi(r2, r2, 32);
3290 stxi_s(5, r1, r2);
3291# else
3292 stxi_s(5, r1, r0);
3293 rshi(r2, r0, 16);
3294 stxi_i(1, r1, r2);
3295 rshi(r2, r2, 32);
3296 str_c(r1, r2);
3297# endif
3298 al = fallback_jmpi(_jit->pc.w);
3299 fallback_flush();
3300 fallback_patch_bnei(un3, _jit->pc.w);
3301 un2 = fallback_bnei(_jit->pc.w, r2, 2);
3302# if __BYTE_ORDER == __LITTLE_ENDIAN
3303 str_s(r1, r0);
3304 rshi(r2, r0, 16);
3305 stxi_i(2, r1, r2);
3306 rshi(r2, r2, 32);
3307 stxi_c(6, r1, r2);
3308# else
3309 stxi_c(6, r1, r0);
3310 rshi(r2, r0, 8);
3311 stxi_i(2, r1, r2);
3312 rshi(r2, r2, 32);
3313 str_s(r1, r2);
3314# endif
3315 al2 = fallback_jmpi(_jit->pc.w);
3316 fallback_flush();
3317 fallback_patch_bnei(un2, _jit->pc.w);
3318 un1 = fallback_bnei(_jit->pc.w, r2, 1);
3319# if __BYTE_ORDER == __LITTLE_ENDIAN
3320 str_c(r1, r0);
3321 rshi(r2, r0, 8);
3322 stxi_s(1, r1, r2);
3323 rshi(r2, r2, 16);
3324 stxi_i(3, r1, r2);
3325# else
3326 stxi_i(3, r1, r0);
3327 rshi(r2, r0, 32);
3328 stxi_s(1, r1, r2);
3329 rshi(r2, r2, 16);
3330 str_c(r1, r2);
3331# endif
3332 al1 = fallback_jmpi(_jit->pc.w);
3333 fallback_flush();
3334 fallback_patch_bnei(un1, _jit->pc.w);
3335# if __BYTE_ORDER == __LITTLE_ENDIAN
3336 str_i(r1, r0);
3337 rshi(r2, r0, 32);
3338 stxi_s(4, r1, r2);
3339 rshi(r2, r2, 16);
3340 stxi_c(6, r1, r2);
3341# else
3342 stxi_c(6, r1, r0);
3343 rshi(r2, r0, 8);
3344 stxi_s(4, r1, r2);
3345 rshi(r2, r2, 16);
3346 str_i(r1, r2);
3347# endif
3348 fallback_flush();
3349 fallback_patch_jmpi(al, _jit->pc.w);
3350 fallback_patch_jmpi(al2, _jit->pc.w);
3351 fallback_patch_jmpi(al1, _jit->pc.w);
3352 jit_unget_reg(t0);
3353}
3354
3355static void
3356_unsti7(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
3357{
3358 jit_int32_t t0, r2;
3359 t0 = fallback_jit_get_reg(jit_class_gpr);
3360 r2 = rn(t0);
3361 if ((i1 & 3) == 3) {
3362# if __BYTE_ORDER == __LITTLE_ENDIAN
3363 sti_c(i1, r0);
3364 rshi(r2, r0, 8);
3365 sti_i(1 + i1, r2);
3366 rshi(r2, r2, 32);
3367 sti_s(5 + i1, r2);
3368# else
3369 sti_s(5 + i1, r0);
3370 rshi(r2, r0, 16);
3371 sti_i(1 + i1, r2);
3372 rshi(r2, r2, 32);
3373 sti_c(i1, r2);
3374# endif
3375 }
3376 else if ((i1 & 3) == 2) {
3377# if __BYTE_ORDER == __LITTLE_ENDIAN
3378 sti_s(i1, r0);
3379 rshi(r2, r0, 16);
3380 sti_i(2 + i1, r2);
3381 rshi(r2, r2, 32);
3382 sti_c(6 + i1, r2);
3383# else
3384 sti_c(6 + i1, r0);
3385 rshi(r2, r0, 8);
3386 sti_i(2 + i1, r2);
3387 rshi(r2, r2, 32);
3388 sti_s(i1, r2);
3389# endif
3390 }
3391 else if ((i1 & 3) == 1) {
3392# if __BYTE_ORDER == __LITTLE_ENDIAN
3393 sti_c(i1, r0);
3394 rshi(r2, r0, 8);
3395 sti_s(1 + i1, r2);
3396 rshi(r2, r2, 16);
3397 sti_i(3 + i1, r2);
3398# else
3399 sti_i(3 + i1, r0);
3400 rshi(r2, r0, 32);
3401 sti_s(1 + i1, r2);
3402 rshi(r2, r2, 16);
3403 sti_c(i1, r2);
3404# endif
3405 }
3406 else {
3407# if __BYTE_ORDER == __LITTLE_ENDIAN
3408 sti_i(i1, r0);
3409 rshi(r2, r0, 32);
3410 sti_s(4 + i1, r2);
3411 rshi(r2, r2, 16);
3412 sti_c(6 + i1, r2);
3413# else
3414 sti_c(6 + i1, r0);
3415 rshi(r2, r0, 8);
3416 sti_s(4 + i1, r2);
3417 rshi(r2, r2, 16);
3418 sti_i(i1, r2);
3419# endif
3420 }
3421 jit_unget_reg(t0);
3422}
3423
3424static void
3425_unstr8(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3426{
3427 jit_int32_t t0, r2;
3428 jit_word_t un8, un4, un2, un3, al, al4, al2, al3;
3429 t0 = fallback_jit_get_reg(jit_class_gpr);
3430 r2 = rn(t0);
3431 andi(r2, r1, -8);
3432 un8 = fallback_bner(_jit->pc.w, r2, r1);
3433 str_l(r1, r0);
3434 al = fallback_jmpi(_jit->pc.w);
3435 fallback_flush();
3436 fallback_patch_bner(un8, _jit->pc.w);
3437 andi(r2, r1, -4);
3438 un4 = fallback_bner(_jit->pc.w, r2, r1);
3439# if __BYTE_ORDER == __LITTLE_ENDIAN
3440 str_i(r1, r0);
3441 rshi(r2, r0, 32);
3442 stxi_i(4, r1, r2);
3443# else
3444 stxi_i(4, r1, r0);
3445 rshi(r2, r0, 32);
3446 str_i(r1, r2);
3447# endif
3448 al4 = fallback_jmpi(_jit->pc.w);
3449 fallback_flush();
3450 fallback_patch_bner(un4, _jit->pc.w);
3451 andi(r2, r1, -2);
3452 un2 = fallback_bner(_jit->pc.w, r2, r1);
3453# if __BYTE_ORDER == __LITTLE_ENDIAN
3454 str_s(r1, r0);
3455 rshi(r2, r0, 16);
3456 stxi_i(2, r1, r2);
3457 rshi(r2, r2, 32);
3458 stxi_s(6, r1, r2);
3459# else
3460 stxi_s(6, r1, r0);
3461 rshi(r2, r0, 16);
3462 stxi_i(2, r1, r2);
3463 rshi(r2, r2, 32);
3464 str_s(r1, r2);
3465# endif
3466 al2 = fallback_jmpi(_jit->pc.w);
3467 fallback_flush();
3468 fallback_patch_bner(un2, _jit->pc.w);
3469 andi(r2, r1, 3);
3470 un3 = fallback_bnei(_jit->pc.w, r2, 3);
3471# if __BYTE_ORDER == __LITTLE_ENDIAN
3472 str_c(r1, r0);
3473 rshi(r2, r0, 8);
3474 stxi_i(1, r1, r2);
3475 rshi(r2, r2, 32);
3476 stxi_s(5, r1, r2);
3477 rshi(r2, r2, 16);
3478 stxi_c(7, r1, r2);
3479# else
3480 stxi_c(7, r1, r0);
3481 rshi(r2, r0, 8);
3482 stxi_s(5, r1, r2);
3483 rshi(r2, r2, 16);
3484 stxi_i(1, r1, r2);
3485 rshi(r2, r2, 32);
3486 str_c(r1, r2);
3487# endif
3488 al3 = fallback_jmpi(_jit->pc.w);
3489 fallback_flush();
3490 fallback_patch_bnei(un3, _jit->pc.w);
3491# if __BYTE_ORDER == __LITTLE_ENDIAN
3492 str_c(r1, r0);
3493 rshi(r2, r0, 8);
3494 stxi_s(1, r1, r2);
3495 rshi(r2, r2, 16);
3496 stxi_i(3, r1, r2);
3497 rshi(r2, r2, 32);
3498 stxi_c(7, r1, r2);
3499# else
3500 stxi_c(7, r1, r0);
3501 rshi(r2, r0, 8);
3502 stxi_i(3, r1, r2);
3503 rshi(r2, r2, 32);
3504 stxi_s(1, r1, r2);
3505 rshi(r2, r2, 16);
3506 str_c(r1, r2);
3507# endif
3508 fallback_flush();
3509 fallback_patch_jmpi(al, _jit->pc.w);
3510 fallback_patch_jmpi(al4, _jit->pc.w);
3511 fallback_patch_jmpi(al2, _jit->pc.w);
3512 fallback_patch_jmpi(al3, _jit->pc.w);
3513 jit_unget_reg(t0);
3514}
3515
3516static void
3517_unsti8(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
3518{
3519 jit_int32_t t0, r2;
3520 if ((i1 & -8) == i1)
3521 sti_l(i1, r0);
3522 else {
3523 t0 = fallback_jit_get_reg(jit_class_gpr);
3524 r2 = rn(t0);
3525 if ((i1 & -4) == i1) {
3526# if __BYTE_ORDER == __LITTLE_ENDIAN
3527 sti_i(i1, r0);
3528 rshi(r2, r0, 32);
3529 sti_i(4 + i1, r2);
3530# else
3531 sti_i(4 + i1, r0);
3532 rshi(r2, r0, 32);
3533 sti_i(i1, r2);
3534# endif
3535 }
3536 else if ((i1 & -2) == i1) {
3537# if __BYTE_ORDER == __LITTLE_ENDIAN
3538 sti_s(i1, r0);
3539 rshi(r2, r0, 16);
3540 sti_i(2 + i1, r2);
3541 rshi(r2, r2, 32);
3542 sti_s(6 + i1, r2);
3543# else
3544 sti_s(6 + i1, r0);
3545 rshi(r2, r0, 16);
3546 sti_i(2 + i1, r2);
3547 rshi(r2, r2, 32);
3548 sti_s(i1, r2);
3549# endif
3550 }
3551 else if ((i1 & 3) == 3) {
3552# if __BYTE_ORDER == __LITTLE_ENDIAN
3553 sti_c(i1, r0);
3554 rshi(r2, r0, 8);
3555 sti_i(1 + i1, r2);
3556 rshi(r2, r2, 32);
3557 sti_s(5 + i1, r2);
3558 rshi(r2, r2, 16);
3559 sti_c(7 + i1, r2);
3560# else
3561 sti_c(7 + i1, r0);
3562 rshi(r2, r0, 8);
3563 sti_s(5 + i1, r2);
3564 rshi(r2, r2, 16);
3565 sti_i(1 + i1, r2);
3566 rshi(r2, r2, 32);
3567 sti_c(i1, r2);
3568# endif
3569 }
3570 else {
3571# if __BYTE_ORDER == __LITTLE_ENDIAN
3572 sti_c(i1, r0);
3573 rshi(r2, r0, 8);
3574 sti_s(1 + i1, r2);
3575 rshi(r2, r2, 16);
3576 sti_i(3 + i1, r2);
3577 rshi(r2, r2, 32);
3578 sti_c(7 + i1, r2);
3579# else
3580 sti_c(7 + i1, r0);
3581 rshi(r2, r0, 8);
3582 sti_i(3 + i1, r2);
3583 rshi(r2, r2, 32);
3584 sti_s(1 + i1, r2);
3585 rshi(r2, r2, 16);
3586 sti_c(i1, r2);
3587# endif
3588 }
3589 jit_unget_reg(t0);
3590 }
3591}
3592# endif
3593
3594
3595static void
3596_unldrw(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3597{
3598 jit_word_t cross, done;
3599 jit_int32_t t0, t1, t2, t3;
3600 t0 = fallback_jit_get_reg(jit_class_gpr);
3601 if (r0 == r1) {
3602 t1 = fallback_jit_get_reg(jit_class_gpr);
3603 movr(rn(t1), r1);
3604 r1 = rn(t1);
3605 }
3606 else
3607 t1 = JIT_NOREG;
3608 andi(rn(t0), r1, -(jit_word_t)sizeof(jit_word_t));
3609 ldr(r0, rn(t0));
3610 done = fallback_beqr(_jit->pc.w, rn(t0), r1);
3611 t2 = fallback_jit_get_reg(jit_class_gpr);
3612 andi(rn(t2), r1, sizeof(jit_word_t) - 1);
3613 t3 = fallback_jit_get_reg(jit_class_gpr);
3614 addi(rn(t3), rn(t2), i0);
3615 cross = fallback_blei(_jit->pc.w, rn(t3), sizeof(jit_word_t));
3616 ldxi(rn(t0), rn(t0), sizeof(jit_word_t));
3617 fallback_flush();
3618 fallback_patch_blei(cross, _jit->pc.w);
3619 jit_unget_reg(t3);
3620 lshi(rn(t2), rn(t2), 3);
3621#if __BYTE_ORDER == __LITTLE_ENDIAN
3622 rshr_u(r0, r0, rn(t2));
3623 rsbi(rn(t2), rn(t2), __WORDSIZE);
3624 lshr(rn(t0), rn(t0), rn(t2));
3625#else
3626 lshr(r0, r0, rn(t2));
3627 rsbi(rn(t2), rn(t2), __WORDSIZE);
3628 rshr_u(rn(t0), rn(t0), rn(t2));
3629#endif
3630 jit_unget_reg(t2);
3631 orr(r0, r0, rn(t0));
3632 fallback_flush();
3633 fallback_patch_beqr(done, _jit->pc.w);
3634 jit_unget_reg(t0);
3635 if (t1 != JIT_NOREG)
3636 jit_unget_reg(t1);
3637}
3638
3639static void
3640_unldiw(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
3641{
3642 jit_word_t addr;
3643 jit_int32_t t0, sh;
3644 addr = i0 & -(jit_word_t)sizeof(jit_word_t);
3645 ldi(r0, addr);
3646 if (i0 != addr) {
3647 sh = (i0 & (sizeof(jit_word_t) - 1)) << 3;
3648 if (sh + i1 > sizeof(jit_word_t)) {
3649 t0 = fallback_jit_get_reg(jit_class_gpr);
3650 ldi(rn(t0), addr + sizeof(jit_word_t));
3651#if __BYTE_ORDER == __LITTLE_ENDIAN
3652 rshi_u(r0, r0, sh);
3653 lshi(rn(t0), rn(t0), __WORDSIZE - sh);
3654#else
3655 lshi(r0, r0, sh);
3656 rshi_u(rn(t0), rn(t0), __WORDSIZE - sh);
3657#endif
3658 orr(r0, r0, rn(t0));
3659 jit_unget_reg(t0);
3660 }
3661 }
3662}
3663
3664static void
3665_unldx(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3666{
3667 switch (i0) {
3668 case 2:
3669#if __BYTE_ORDER == __LITTLE_ENDIAN
3670 extr_s(r0, r0);
3671#else
3672 rshi(r0, r0, __WORDSIZE - 16);
3673#endif
3674 break;
3675 case 3:
3676#if __BYTE_ORDER == __LITTLE_ENDIAN
3677 lshi(r0, r0, __WORDSIZE - 24);
3678#endif
3679 rshi(r0, r0, __WORDSIZE - 24);
3680 break;
3681#if __WORDSIZE == 32
3682 default:
3683#else
3684 case 4:
3685# if __BYTE_ORDER == __LITTLE_ENDIAN
3686 extr_i(r0, r0);
3687# else
3688 rshi(r0, r0, __WORDSIZE - 32);
3689# endif
3690#endif
3691 break;
3692#if __WORDSIZE == 64
3693 case 5:
3694# if __BYTE_ORDER == __LITTLE_ENDIAN
3695 lshi(r0, r0, __WORDSIZE - 40);
3696# endif
3697 rshi(r0, r0, __WORDSIZE - 40);
3698 break;
3699 case 6:
3700# if __BYTE_ORDER == __LITTLE_ENDIAN
3701 lshi(r0, r0, __WORDSIZE - 48);
3702# endif
3703 rshi(r0, r0, __WORDSIZE - 48);
3704 break;
3705 case 7:
3706# if __BYTE_ORDER == __LITTLE_ENDIAN
3707 lshi(r0, r0, __WORDSIZE - 56);
3708# endif
3709 rshi(r0, r0, __WORDSIZE - 56);
3710 break;
3711 default:
3712 break;
3713#endif
3714 }
3715}
3716
3717static void
3718_unldx_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3719{
3720 switch (i0) {
3721 case 2:
3722#if __BYTE_ORDER == __LITTLE_ENDIAN
3723 extr_us(r0, r0);
3724#else
3725 rshi_u(r0, r0, __WORDSIZE - 16);
3726#endif
3727 break;
3728 case 3:
3729#if __BYTE_ORDER == __LITTLE_ENDIAN
3730 lshi(r0, r0, __WORDSIZE - 24);
3731#endif
3732 rshi_u(r0, r0, __WORDSIZE - 24);
3733 break;
3734#if __WORDSIZE == 32
3735 default:
3736#else
3737 case 4:
3738# if __BYTE_ORDER == __LITTLE_ENDIAN
3739 extr_ui(r0, r0);
3740# else
3741 rshi_u(r0, r0, __WORDSIZE - 32);
3742# endif
3743#endif
3744 break;
3745#if __WORDSIZE == 64
3746 case 5:
3747# if __BYTE_ORDER == __LITTLE_ENDIAN
3748 lshi(r0, r0, __WORDSIZE - 40);
3749# endif
3750 rshi_u(r0, r0, __WORDSIZE - 40);
3751 break;
3752 case 6:
3753# if __BYTE_ORDER == __LITTLE_ENDIAN
3754 lshi(r0, r0, __WORDSIZE - 48);
3755# endif
3756 rshi_u(r0, r0, __WORDSIZE - 48);
3757 break;
3758 case 7:
3759# if __BYTE_ORDER == __LITTLE_ENDIAN
3760 lshi(r0, r0, __WORDSIZE - 56);
3761# endif
3762 rshi_u(r0, r0, __WORDSIZE - 56);
3763 break;
3764 default:
3765 break;
3766#endif
3767 }
3768}
3769
3770static void
3771_fallback_unldr(jit_state_t *_jit,
3772 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3773{
3774 jit_int32_t t0, r2;
3775 assert(i0 >= 1 && i0 <= (__WORDSIZE >> 3));
3776 if (i0 == 1)
3777 ldr_c(r0, r1);
3778 else if (_jitc->unld_algorithm != 0) {
3779 if (r0 == r1) {
3780 t0 = fallback_jit_get_reg(jit_class_gpr);
3781 r2 = rn(t0);
3782 movr(r2, r0);
3783 }
3784 else
3785 r2 = r1;
3786 switch (i0) {
3787 case 2: unldr2(r0, r2, 1); break;
3788 case 3: unldr3(r0, r2, 1); break;
3789# if __WORDSIZE == 32
3790 default: unldr4(r0, r2); break;
3791# else
3792 case 4: unldr4(r0, r2, 1); break;
3793 case 5: unldr5(r0, r2, 1); break;
3794 case 6: unldr6(r0, r2, 1); break;
3795 case 7: unldr7(r0, r2, 1); break;
3796 default: unldr8(r0, r2); break;
3797# endif
3798 }
3799 if (i0 > 1 && r0 == r1)
3800 jit_unget_reg(t0);
3801 }
3802 else {
3803 unldrw(r0, r1, i0);
3804 unldx(r0, i0);
3805 }
3806}
3807
3808static void
3809_fallback_unldi(jit_state_t *_jit,
3810 jit_int32_t r0, jit_word_t i0, jit_word_t i1)
3811{
3812 assert(i1 >= 1 && i1 <= (__WORDSIZE >> 3));
3813 if (i1 == 1)
3814 ldi_c(r0, i0);
3815 else if (_jitc->unld_algorithm != 0) {
3816 switch (i1) {
3817 case 2: unldi2(r0, i0, 1); break;
3818 case 3: unldi3(r0, i0, 1); break;
3819# if __WORDSIZE == 32
3820 default: unldi4(r0, i0); break;
3821# else
3822 case 4: unldi4(r0, i0, 1); break;
3823 case 5: unldi5(r0, i0, 1); break;
3824 case 6: unldi6(r0, i0, 1); break;
3825 case 7: unldi7(r0, i0, 1); break;
3826 default: unldi8(r0, i0); break;
3827# endif
3828 }
3829 }
3830 else {
3831 unldiw(r0, i0, i1);
3832 unldx(r0, i1);
3833 }
3834}
3835
3836static void
3837_fallback_unldr_u(jit_state_t *_jit,
3838 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3839{
3840 jit_int32_t t0, r2;
3841 assert(i0 >= 1 && i0 <= (__WORDSIZE >> 3));
3842 if (i0 == 1)
3843 ldr_uc(r0, r1);
3844 else if (_jitc->unld_algorithm != 0) {
3845 if (r0 == r1) {
3846 t0 = fallback_jit_get_reg(jit_class_gpr);
3847 r2 = rn(t0);
3848 movr(r2, r0);
3849 }
3850 else
3851 r2 = r1;
3852 switch (i0) {
3853 case 2: unldr2(r0, r2, 0); break;
3854 case 3: unldr3(r0, r2, 0); break;
3855# if __WORDSIZE == 32
3856 default: unldr4(r0, r2); break;
3857# else
3858 case 4: unldr4(r0, r2, 0); break;
3859 case 5: unldr5(r0, r2, 0); break;
3860 case 6: unldr6(r0, r2, 0); break;
3861 case 7: unldr7(r0, r2, 0); break;
3862 default: unldr8(r0, r2); break;
3863# endif
3864 }
3865 if (i0 > 1 && r0 == r1)
3866 jit_unget_reg(t0);
3867 }
3868 else {
3869 unldrw(r0, r1, i0);
3870 unldx_u(r0, i0);
3871 }
3872}
3873
3874static void
3875_fallback_unldi_u(jit_state_t *_jit,
3876 jit_int32_t r0, jit_word_t i0, jit_word_t i1)
3877{
3878 assert(i1 >= 1 && i1 <= (__WORDSIZE >> 3));
3879 if (i1 == 1)
3880 ldi_uc(r0, i0);
3881 else if (_jitc->unld_algorithm != 0) {
3882 switch (i1) {
3883 case 2: unldi2(r0, i0, 0); break;
3884 case 3: unldi3(r0, i0, 0); break;
3885# if __WORDSIZE == 32
3886 default: unldi4(r0, i0); break;
3887# else
3888 case 4: unldi4(r0, i0, 0); break;
3889 case 5: unldi5(r0, i0, 0); break;
3890 case 6: unldi6(r0, i0, 0); break;
3891 case 7: unldi7(r0, i0, 0); break;
3892 default: unldi8(r0, i0); break;
3893# endif
3894 }
3895
3896 }
3897 else {
3898 unldiw(r0, i0, i1);
3899 unldx_u(r0, i1);
3900 }
3901}
3902
3903static void
3904_fallback_unstr(jit_state_t *_jit,
3905 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3906{
3907 jit_word_t done;
3908 jit_int32_t t0, t1, t2, t3, t4, t5;
3909 assert(i0 > 0 && i0 <= (__WORDSIZE >> 3));
3910 if (i0 == 1)
3911 str_c(r0, r1);
3912 else if (_jitc->unst_algorithm == 0) {
3913 switch (i0) {
3914 case 2: unstr2(r1, r0); break;
3915 case 3: unstr3(r1, r0); break;
3916# if __WORDSIZE == 32
3917 default: unstr4(r1, r0); break;
3918# else
3919 case 4: unstr4(r1, r0); break;
3920 case 5: unstr5(r1, r0); break;
3921 case 6: unstr6(r1, r0); break;
3922 case 7: unstr7(r1, r0); break;
3923 default: unstr8(r1, r0); break;
3924# endif
3925 }
3926 }
3927 else {
3928 t0 = fallback_jit_get_reg(jit_class_gpr);
3929 t1 = fallback_jit_get_reg(jit_class_gpr);
3930 t2 = fallback_jit_get_reg(jit_class_gpr);
3931 t3 = fallback_jit_get_reg(jit_class_gpr);
3932 /* Zero out top bits and keep value to store in t0 */
3933 if (i0 != sizeof(jit_word_t)) {
3934 lshi(rn(t3), r1, (sizeof(jit_word_t) - i0) << 3);
3935#if __BYTE_ORDER == __LITTLE_ENDIAN
3936 rshi_u(rn(t3), rn(t3), (sizeof(jit_word_t) - i0) << 3);
3937#endif
3938 }
3939 else
3940 movr(rn(t3), r1);
3941 /* Check alignment */
3942 andi(rn(t2), r0, sizeof(jit_word_t) - 1);
3943 /* Multiply by 8 */
3944 lshi(rn(t2), rn(t2), 3);
3945 /* Split values to store (assume will need two stores) */
3946#if __BYTE_ORDER == __LITTLE_ENDIAN
3947 qlshr_u(rn(t0), rn(t1), rn(t3), rn(t2));
3948#else
3949 qrshr_u(rn(t0), rn(t1), rn(t3), rn(t2));
3950#endif
3951 /* Generate masks for values in memory */
3952 if (i0 == sizeof(jit_word_t))
3953 movi(rn(t3), -1);
3954 else {
3955#if __BYTE_ORDER == __BIG_ENDIAN
3956 movi(rn(t3), ((1L << (i0 << 3)) - 1) <<
3957 ((sizeof(jit_word_t) - i0) << 3));
3958#else
3959 movi(rn(t3), (1L << (i0 << 3)) - 1);
3960#endif
3961 }
3962#if __BYTE_ORDER == __LITTLE_ENDIAN
3963 qlshr_u(rn(t2), rn(t3), rn(t3), rn(t2));
3964#else
3965 qrshr_u(rn(t2), rn(t3), rn(t3), rn(t2));
3966#endif
3967 comr(rn(t2), rn(t2));
3968 comr(rn(t3), rn(t3));
3969 /* Allocate two extra registers later in case need temps for *q?shr_u */
3970 t4 = fallback_jit_get_reg(jit_class_gpr);
3971 t5 = fallback_jit_get_reg(jit_class_gpr);
3972 /* Store words */
3973 andi(rn(t4), r0, -(jit_word_t)sizeof(jit_word_t));
3974 ldr(rn(t5), rn(t4));
3975 andr(rn(t5), rn(t5), rn(t2));
3976 orr(rn(t0), rn(t0), rn(t5));
3977 str(rn(t4), rn(t0));
3978 /* Make sure to not read/write on possibly unmaped memory */
3979 addi(rn(t5), rn(t4), i0);
3980 done = fallback_blei(_jit->pc.w, rn(t5), sizeof(jit_word_t));
3981 /* Store second word if vlaue crosses a word boundary */
3982 ldxi(rn(t5), rn(t4), sizeof(jit_word_t));
3983 andr(rn(t5), rn(t5), rn(t3));
3984 orr(rn(t1), rn(t1), rn(t5));
3985 stxi(sizeof(jit_word_t), rn(t4), rn(t1));
3986 /* Finished */
3987 fallback_flush();
3988 fallback_patch_blei(done, _jit->pc.w);
3989 /* Generic/simple algorithm needs 6 temporaries, as it cannot
3990 * change any of the argument registers, might need to truncate
3991 * the value to store, and need a pair for values to store and
3992 * another for the masks. */
3993 jit_unget_reg(t5);
3994 jit_unget_reg(t4);
3995 jit_unget_reg(t3);
3996 jit_unget_reg(t2);
3997 jit_unget_reg(t1);
3998 jit_unget_reg(t0);
3999 }
4000}
4001
4002static void
4003_fallback_unsti(jit_state_t *_jit,
4004 jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4005{
4006 jit_word_t done, address;
4007 jit_int32_t t0, t1, t2, t3, t4;
4008 assert(i1 > 0 && i1 <= (__WORDSIZE >> 3));
4009 if (i1 == 1)
4010 sti_c(i0, r0);
4011 else if (_jitc->unst_algorithm == 0) {
4012 switch (i1) {
4013 case 1: sti_c(i0, r0); break;
4014 case 2: unsti2(r0, i0); break;
4015 case 3: unsti3(r0, i0); break;
4016# if __WORDSIZE == 32
4017 default: unsti4(r0, i0); break;
4018# else
4019 case 4: unsti4(r0, i0); break;
4020 case 5: unsti5(r0, i0); break;
4021 case 6: unsti6(r0, i0); break;
4022 case 7: unsti7(r0, i0); break;
4023 default: unsti8(r0, i0); break;
4024# endif
4025 }
4026 }
4027 else {
4028 t0 = fallback_jit_get_reg(jit_class_gpr);
4029 t1 = fallback_jit_get_reg(jit_class_gpr);
4030 t2 = fallback_jit_get_reg(jit_class_gpr);
4031 t3 = fallback_jit_get_reg(jit_class_gpr);
4032 t4 = fallback_jit_get_reg(jit_class_gpr);
4033 /* Zero out top bits and keep value to store in t0 */
4034 if (i1 != sizeof(jit_word_t)) {
4035 lshi(rn(t2), r0, (sizeof(jit_word_t) - i1) << 3);
4036#if __BYTE_ORDER == __LITTLE_ENDIAN
4037 rshi_u(rn(t2), rn(t2), (sizeof(jit_word_t) - i1) << 3);
4038 qlshi_u(rn(t0), rn(t1), rn(t2),
4039 (i0 & (sizeof(jit_word_t) - 1)) << 3);
4040#else
4041 qrshi_u(rn(t0), rn(t1), rn(t2),
4042 (i0 & (sizeof(jit_word_t)) - 1) << 3);
4043#endif
4044 }
4045 else {
4046#if __BYTE_ORDER == __LITTLE_ENDIAN
4047 qlshi_u(rn(t0), rn(t1), r0,
4048 (i0 & (sizeof(jit_word_t)) - 1) << 3);
4049#else
4050 qrshi_u(rn(t0), rn(t1), r0,
4051 (i0 & (sizeof(jit_word_t)) - 1) << 3);
4052#endif
4053 }
4054 /* Generate masks for values in memory */
4055 if (i1 == sizeof(jit_word_t))
4056 movi(rn(t2), -1);
4057 else {
4058#if __BYTE_ORDER == __BIG_ENDIAN
4059 movi(rn(t2), ((1L << (i1 << 3)) - 1) <<
4060 ((sizeof(jit_word_t) - i1) << 3));
4061#else
4062 movi(rn(t2), (1L << (i1 << 3)) - 1);
4063#endif
4064 }
4065#if __BYTE_ORDER == __LITTLE_ENDIAN
4066 qlshi_u(rn(t2), rn(t3), rn(t2), (i0 & (sizeof(jit_word_t)) - 1) << 3);
4067#else
4068 qrshi_u(rn(t2), rn(t3), rn(t2), (i0 & (sizeof(jit_word_t)) - 1) << 3);
4069#endif
4070 comr(rn(t2), rn(t2));
4071 comr(rn(t3), rn(t3));
4072 /* Store words */
4073 address = i0 & -(jit_word_t)sizeof(jit_word_t);
4074 ldi(rn(t4), address);
4075 andr(rn(t4), rn(t4), rn(t2));
4076 orr(rn(t0), rn(t0), rn(t4));
4077 sti(address, rn(t0));
4078 if (address + i1 > sizeof(jit_word_t)) {
4079 address += sizeof(jit_word_t);
4080 ldi(rn(t4), address);
4081 andr(rn(t4), rn(t4), rn(t3));
4082 orr(rn(t1), rn(t1), rn(t4));
4083 sti(address, rn(t1));
4084 }
4085 jit_unget_reg(t4);
4086 jit_unget_reg(t3);
4087 jit_unget_reg(t2);
4088 jit_unget_reg(t1);
4089 jit_unget_reg(t0);
4090 }
4091}
4092
4093# ifdef fallback_unldr_x
4094static void
4095_fallback_unldr_x(jit_state_t *_jit,
4096 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
4097{
4098 jit_int32_t t0, r2;
4099# if __WORDSIZE == 32
4100 jit_int32_t t1, r3;
4101# endif
4102 assert(i0 == 4 || i0 == 8);
4103 t0 = fallback_jit_get_reg(jit_class_gpr);
4104 r2 = rn(t0);
4105 if (i0 == 4) {
4106 unldr(r2, r1, 4);
4107 movr_w_f(r0, r2);
4108 }
4109 else {
4110# if __WORDSIZE == 32
4111 t1 = fallback_jit_get_reg(jit_class_gpr);
4112 r3 = rn(t1);
4113# if __BYTE_ORDER == __LITTLE_ENDIAN
4114 unldr(r2, r1, 4);
4115 addi(r3, r1, 4);
4116 unldr(r3, r3, 4);
4117# else
4118 unldr(r3, r1, 4);
4119 addi(r2, r1, 4);
4120 unldr(r2, r2, 4);
4121# endif
4122 movr_ww_d(r0, r2, r3);
4123 jit_unget_reg(t1);
4124# else
4125 unldr(r2, r1, 8);
4126 movr_w_d(r0, r2);
4127# endif
4128 }
4129 jit_unget_reg(t0);
4130}
4131# endif
4132
4133# ifdef fallback_unldi_x
4134static void
4135_fallback_unldi_x(jit_state_t *_jit,
4136 jit_int32_t r0, jit_word_t i0, jit_word_t i1)
4137{
4138 jit_int32_t t0, r2;
4139# if __WORDSIZE == 32
4140 jit_int32_t t1, r3;
4141# endif
4142 assert(i1 == 4 || i1 == 8);
4143 t0 = fallback_jit_get_reg(jit_class_gpr);
4144 r2 = rn(t0);
4145 if (i1 == 4) {
4146 unldi(r2, i0, 4);
4147 movr_w_f(r0, r2);
4148 }
4149 else {
4150# if __WORDSIZE == 32
4151 t1 = fallback_jit_get_reg(jit_class_gpr);
4152 r3 = rn(t1);
4153# if __BYTE_ORDER == __LITTLE_ENDIAN
4154 unldi(r2, i0, 4);
4155 unldi(r3, i0 + 4, 4);
4156# else
4157 unldi(r3, i0, 4);
4158 unldi(r2, i0 + 4, 4);
4159# endif
4160 movr_ww_d(r0, r3, r2);
4161 jit_unget_reg(t1);
4162# else
4163 unldi(r2, i0, 8);
4164 movr_w_d(r0, r2);
4165# endif
4166 }
4167 jit_unget_reg(t0);
4168}
4169# endif
4170
4171# ifdef fallback_unstr_x
4172static void
4173_fallback_unstr_x(jit_state_t *_jit,
4174 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
4175{
4176 jit_int32_t t0, r2;
4177# if __WORDSIZE == 32
4178 jit_int32_t t1, r3;
4179# endif
4180 assert(i0 == 4 || i0 == 8);
4181 t0 = fallback_jit_get_reg(jit_class_gpr);
4182 r2 = rn(t0);
4183 if (i0 == 4) {
4184 movr_f_w(r2, r1);
4185 unstr(r0, r2, 4);
4186 }
4187 else {
4188# if __WORDSIZE == 32
4189 t1 = fallback_jit_get_reg(jit_class_gpr);
4190 r3 = rn(t1);
4191 movr_d_ww(r2, r3, r1);
4192# if __BYTE_ORDER == __LITTLE_ENDIAN
4193 unstr(r0, r2, 4);
4194 addi(r2, r0, 4);
4195 unstr(r2, r3, 4);
4196# else
4197 unstr(r0, r3, 4);
4198 addi(r3, r0, 4);
4199 unstr(r3, r2, 4);
4200# endif
4201 jit_unget_reg(t1);
4202# else
4203 movr_d_w(r2, r1);
4204 unstr(r0, r2, 8);
4205# endif
4206 }
4207 jit_unget_reg(t0);
4208}
4209# endif
4210
4211# ifdef fallback_unsti_x
4212static void
4213_fallback_unsti_x(jit_state_t *_jit,
4214 jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4215{
4216 jit_int32_t t0, r2;
4217# if __WORDSIZE == 32
4218 jit_int32_t t1, r3;
4219# endif
4220 assert(i1 == 4 || i1 == 8);
4221 t0 = fallback_jit_get_reg(jit_class_gpr);
4222 r2 = rn(t0);
4223 if (i1 == 4) {
4224 movr_f_w(r2, r0);
4225 unsti(i0, r2, 4);
4226 }
4227 else {
4228# if __WORDSIZE == 32
4229 t1 = fallback_jit_get_reg(jit_class_gpr);
4230 r3 = rn(t1);
4231 movr_d_ww(r2, r3, r0);
4232# if __BYTE_ORDER == __LITTLE_ENDIAN
4233 unsti(i0, r3, 4);
4234 unsti(i0 + 4, r2, 4);
4235# else
4236 unsti(i0, r2, 4);
4237 unsti(i0 + 4, r3, 4);
4238# endif
4239 jit_unget_reg(t1);
4240# else
4241 movr_d_w(r2, r0);
4242 unsti(i0, r2, 8);
4243# endif
4244 }
4245 jit_unget_reg(t0);
4246}
4247# endif
ba3814c1 4248#endif