try a new github issue template
[pcsx_rearmed.git] / deps / lightning / lib / jit_fallback.c
CommitLineData
ba3814c1 1#if PROTO
ba86ff93
PC
2#define USE_BIT_TABLES 1
3#define USE_BITSWAP_UNROLLED 0
4#define USE_BITSWAP_LOOP 0
ba3814c1
PC
5#define fallback_save(r0) _fallback_save(_jit, r0)
6static void _fallback_save(jit_state_t*, jit_int32_t);
7#define fallback_load(r0) _fallback_load(_jit, r0)
8static void _fallback_load(jit_state_t*, jit_int32_t);
9#define fallback_save_regs(r0) _fallback_save_regs(_jit, r0)
10static void _fallback_save_regs(jit_state_t*, jit_int32_t);
11#define fallback_load_regs(r0) _fallback_load_regs(_jit, r0)
12static void _fallback_load_regs(jit_state_t*, jit_int32_t);
13#define fallback_calli(i0, i1) _fallback_calli(_jit, i0, i1)
14static void _fallback_calli(jit_state_t*, jit_word_t, jit_word_t);
15#define fallback_casx(r0,r1,r2,r3,im) _fallback_casx(_jit,r0,r1,r2,r3,im)
16static void _fallback_casx(jit_state_t *, jit_int32_t, jit_int32_t,
17 jit_int32_t, jit_int32_t, jit_word_t);
ba86ff93
PC
18#if !defined(__arm__)
19# ifndef movr_w_f
20# define movr_w_f(r0, r1) fallback_movr_w_f(r0, r1)
21# define fallback_movr_w_f(r0,r1) _fallback_movr_w_f(_jit,r0,r1)
22static void _fallback_movr_w_f(jit_state_t*, jit_int32_t, jit_int32_t);
23# endif
24# ifndef movr_f_w
25# define movr_f_w(r0, r1) fallback_movr_f_w(r0, r1)
26# define fallback_movr_f_w(r0,r1) _fallback_movr_f_w(_jit,r0,r1)
27static void _fallback_movr_f_w(jit_state_t*, jit_int32_t, jit_int32_t);
28# endif
29# if __WORDSIZE == 32
30# ifndef movr_ww_d
31# define movr_ww_d(r0, r1, r2) fallback_movr_ww_d(r0, r1, r2)
32# define fallback_movr_ww_d(r0,r1,r2) _fallback_movr_ww_d(_jit,r0,r1,r2)
33static void _fallback_movr_ww_d(jit_state_t*,
34 jit_int32_t,jit_int32_t,jit_int32_t);
35# endif
36# ifndef movr_d_ww
37# define fallback_movr_d_ww(r0,r1,r2) _fallback_movr_d_ww(_jit,r0,r1,r2)
38static void _fallback_movr_d_ww(jit_state_t*,
39 jit_int32_t,jit_int32_t,jit_int32_t);
40# define movr_d_ww(r0, r1, r2) fallback_movr_d_ww(r0, r1, r2)
41# endif
42# else
43# ifndef movr_w_d
44# define movr_w_d(r0, r1) fallback_movr_w_d(r0, r1);
45# define fallback_movr_w_d(r0,r1) _fallback_movr_w_d(_jit,r0,r1)
46static void _fallback_movr_w_d(jit_state_t*, jit_int32_t, jit_int32_t);
47# endif
48# ifndef movr_d_w
49# define movr_d_w(r0, r1) fallback_movr_d_w(r0, r1);
50# define fallback_movr_d_w(r0,r1) _fallback_movr_d_w(_jit,r0,r1)
51static void _fallback_movr_d_w(jit_state_t*, jit_int32_t, jit_int32_t);
52# endif
53# endif
54#endif
55#ifndef unldr
56# define unldr(r0, r1, i0) fallback_unldr(r0, r1, i0)
57#endif
58#ifndef unldi
59# define unldi(r0, i0, i1) fallback_unldi(r0, i0, i1)
60#endif
61#ifndef unstr
62# define unstr(r0, r1, i0) fallback_unstr(r0, r1, i0)
63#endif
64#ifndef unsti
65# define unsti(i0, r0, i1) fallback_unsti(i0, r0, i1)
66#endif
79bfeef6
PC
67#define fallback_clo(r0,r1) _fallback_clo(_jit,r0,r1)
68static void _fallback_clo(jit_state_t*, jit_int32_t, jit_int32_t);
69#define fallback_clz(r0,r1) _fallback_clz(_jit,r0,r1)
70static void _fallback_clz(jit_state_t*, jit_int32_t, jit_int32_t);
71#define fallback_cto(r0,r1) _fallback_cto(_jit,r0,r1)
72static void _fallback_cto(jit_state_t*, jit_int32_t, jit_int32_t);
73#define fallback_ctz(r0,r1) _fallback_ctz(_jit,r0,r1)
74static void _fallback_ctz(jit_state_t*, jit_int32_t, jit_int32_t);
ba86ff93
PC
75#define fallback_rbit(r0,r1) _fallback_rbit(_jit, r0, r1)
76static void _fallback_rbit(jit_state_t*, jit_int32_t, jit_int32_t);
77#define fallback_popcnt(r0,r1) _fallback_popcnt(_jit, r0, r1)
78static void _fallback_popcnt(jit_state_t*, jit_int32_t, jit_int32_t);
79#define fallback_lrotr(r0, r1, r2) _fallback_lrotr(_jit, r0, r1, r2)
80static void _fallback_lrotr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
81#define fallback_lroti(r0, r1, i0) _fallback_lroti(_jit, r0, r1, i0)
82static void _fallback_lroti(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
83#define fallback_rrotr(r0, r1, r2) _fallback_rrotr(_jit, r0, r1, r2)
84static void _fallback_rrotr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
85#define fallback_rroti(r0, r1, i0) _fallback_rroti(_jit, r0, r1, i0)
86static void _fallback_rroti(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
87#define fallback_ext(r0,r1,i0,i1) _fallback_ext(_jit,r0,r1,i0,i1)
88static void _fallback_ext(jit_state_t*,
89 jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
90#define fallback_ext_u(r0,r1,i0,i1) _fallback_ext_u(_jit,r0,r1,i0,i1)
91static void _fallback_ext_u(jit_state_t*,
92 jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
93#define fallback_dep(r0,r1,i0,i1) _fallback_dep(_jit,r0,r1,i0,i1)
94static void _fallback_dep(jit_state_t*,
95 jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
96#define fallback_qlshr(r0,r1,r2,r3) _fallback_qlshr(_jit,r0,r1,r2,r3)
97static void _fallback_qlshr(jit_state_t *_jit,
98 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
99#define fallback_qlshi(r0,r1,r2,i0) _fallback_qlshi(_jit,r0,r1,r2,i0)
100static void _fallback_qlshi(jit_state_t *_jit,
101 jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
102#define fallback_qlshr_u(r0,r1,r2,r3) _fallback_qlshr_u(_jit,r0,r1,r2,r3)
103static void _fallback_qlshr_u(jit_state_t *_jit,
104 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
105#define fallback_qlshi_u(r0,r1,r2,i0) _fallback_qlshi_u(_jit,r0,r1,r2,i0)
106static void _fallback_qlshi_u(jit_state_t *_jit,
107 jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
108#define fallback_qrshr(r0,r1,r2,r3) _fallback_qrshr(_jit,r0,r1,r2,r3)
109static void _fallback_qrshr(jit_state_t *_jit,
110 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
111#define fallback_qrshi(r0,r1,r2,i0) _fallback_qrshi(_jit,r0,r1,r2,i0)
112static void _fallback_qrshi(jit_state_t *_jit,
113 jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
114#define fallback_qrshr_u(r0,r1,r2,r3) _fallback_qrshr_u(_jit,r0,r1,r2,r3)
115static void _fallback_qrshr_u(jit_state_t *_jit,
116 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
117#define fallback_qrshi_u(r0,r1,r2,i0) _fallback_qrshi_u(_jit,r0,r1,r2,i0)
118static void _fallback_qrshi_u(jit_state_t *_jit,
119 jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
120#define unldr2(r0, r1, sign) _unldr2(_jit, r0, r1, sign)
121static void _unldr2(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
122#define unldi2(r0, i1, sign) _unldi2(_jit, r0, i1, sign)
123static void _unldi2(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t);
124#define unldr3(r0, r1, sign) _unldr3(_jit, r0, r1, sign)
125static void _unldr3(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
126#define unldi3(r0, i1, sign) _unldi3(_jit, r0, i1, sign)
127static void _unldi3(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t);
128#if __WORDSIZE == 32
129# define unldr4(r0, r1) _unldr4(_jit, r0, r1)
130static void _unldr4(jit_state_t*,jit_int32_t,jit_int32_t);
131# define unldi4(r0, i1) _unldi4(_jit, r0, i1)
132static void _unldi4(jit_state_t*,jit_int32_t,jit_word_t);
133#else
134# define unldr4(r0, r1, sign) _unldr4(_jit, r0, r1, sign)
135static void _unldr4(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
136# define unldi4(r0, i1, sign) _unldi4(_jit, r0, i1, sign)
137static void _unldi4(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t);
138# define unldr5(r0, r1, sign) _unldr5(_jit, r0, r1, sign)
139static void _unldr5(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
140# define unldi5(r0, i1, sign) _unldi5(_jit, r0, i1, sign)
141static void _unldi5(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t);
142# define unldr6(r0, r1, sign) _unldr6(_jit, r0, r1, sign)
143static void _unldr6(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
144# define unldi6(r0, i1, sign) _unldi6(_jit, r0, i1, sign)
145static void _unldi6(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t);
146# define unldr7(r0, r1, sign) _unldr7(_jit, r0, r1, sign)
147static void _unldr7(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
148# define unldi7(r0, i1, sign) _unldi7(_jit, r0, i1, sign)
149static void _unldi7(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t);
150# define unldr8(r0, r1) _unldr8(_jit, r0, r1)
151static void _unldr8(jit_state_t*,jit_int32_t,jit_int32_t);
152# define unldi8(r0, i1) _unldi8(_jit, r0, i1)
153static void _unldi8(jit_state_t*,jit_int32_t,jit_word_t);
154#endif
155#define unstr2(r0, r1) _unstr2(_jit, r0, r1)
156static void _unstr2(jit_state_t*,jit_int32_t,jit_int32_t);
157#define unsti2(r0, i0) _unsti2(_jit, r0, i0)
158static void _unsti2(jit_state_t*,jit_int32_t,jit_word_t);
159#define unstr3(r0, r1) _unstr3(_jit, r0, r1)
160static void _unstr3(jit_state_t*,jit_int32_t,jit_int32_t);
161#define unsti3(r0, i0) _unsti3(_jit, r0, i0)
162static void _unsti3(jit_state_t*,jit_int32_t,jit_word_t);
163#define unstr4(r0, r1) _unstr4(_jit, r0, r1)
164static void _unstr4(jit_state_t*,jit_int32_t,jit_int32_t);
165#define unsti4(r0, i0) _unsti4(_jit, r0, i0)
166static void _unsti4(jit_state_t*,jit_int32_t,jit_word_t);
167#if __WORDSIZE == 64
168# define unstr5(r0, r1) _unstr5(_jit, r0, r1)
169static void _unstr5(jit_state_t*,jit_int32_t,jit_int32_t);
170# define unsti5(r0, i0) _unsti5(_jit, r0, i0)
171static void _unsti5(jit_state_t*,jit_int32_t,jit_word_t);
172# define unstr6(r0, r1) _unstr6(_jit, r0, r1)
173static void _unstr6(jit_state_t*,jit_int32_t,jit_int32_t);
174# define unsti6(r0, i0) _unsti6(_jit, r0, i0)
175static void _unsti6(jit_state_t*,jit_int32_t,jit_word_t);
176# define unstr7(r0, r1) _unstr7(_jit, r0, r1)
177static void _unstr7(jit_state_t*,jit_int32_t,jit_int32_t);
178# define unsti7(r0, i0) _unsti7(_jit, r0, i0)
179static void _unsti7(jit_state_t*,jit_int32_t,jit_word_t);
180# define unstr8(r0, r1) _unstr8(_jit, r0, r1)
181static void _unstr8(jit_state_t*,jit_int32_t,jit_int32_t);
182# define unsti8(r0, i0) _unsti8(_jit, r0, i0)
183static void _unsti8(jit_state_t*,jit_int32_t,jit_word_t);
184#endif
185#define unldrw(r0, r1, i0) _unldrw(_jit, r0, r1, i0)
186static void _unldrw(jit_state_t*,jit_int32_t,jit_int32_t, jit_word_t);
187#define unldiw(r0, i0, i1) _unldiw(_jit, r0, i0, i1)
188static void _unldiw(jit_state_t*,jit_int32_t,jit_word_t, jit_word_t);
189#define unldx(r0, i0) _unldx(_jit, r0, i0)
190static void _unldx(jit_state_t*,jit_int32_t,jit_word_t);
191#define unldx_u(r0, i0) _unldx_u(_jit, r0, i0)
192static void _unldx_u(jit_state_t*,jit_int32_t,jit_word_t);
193#define fallback_unldr(r0, r1, i0) _fallback_unldr(_jit, r0, r1, i0)
194static void _fallback_unldr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
195#define fallback_unldi(r0, i0, i1) _fallback_unldi(_jit, r0, i0, i1)
196static void _fallback_unldi(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t);
197#define fallback_unldr_u(r0, r1, i0) _fallback_unldr_u(_jit, r0, r1, i0)
198static void _fallback_unldr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
199#define fallback_unldi_u(r0, i0, i1) _fallback_unldi_u(_jit, r0, i0, i1)
200static void _fallback_unldi_u(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t);
201#define fallback_unstr(r0, r1, i0) _fallback_unstr(_jit, r0, r1, i0)
202static void _fallback_unstr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
203#define fallback_unsti(i0, r0, i1) _fallback_unsti(_jit, i0, r0, i1)
204static void _fallback_unsti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
205#if !defined(__arm__)
206# define fallback_unldr_x(r0, r1, i0) _fallback_unldr_x(_jit, r0, r1, i0)
207static void _fallback_unldr_x(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
208# define fallback_unldi_x(r0, i0, i1) _fallback_unldi_x(_jit, r0, i0, i1)
209static void _fallback_unldi_x(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t);
210# define fallback_unstr_x(r0, r1, i0) _fallback_unstr_x(_jit, r0, r1, i0)
211static void _fallback_unstr_x(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
212# define fallback_unsti_x(i0, r0, i1) _fallback_unsti_x(_jit, i0, r0, i1)
213static void _fallback_unsti_x(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
214#endif
215# if defined(__s390__) || defined(__s390x__)
216# define fallback_jit_get_reg(flags) jit_get_reg_but_zero(flags)
79bfeef6 217# else
ba86ff93 218# define fallback_jit_get_reg(flags) jit_get_reg(flags)
79bfeef6 219# endif
ba86ff93
PC
220# if defined(__ia64__)
221# define fallback_flush() sync()
222# elif defined(__mips__)
223# define fallback_flush() flush()
79bfeef6 224# else
ba86ff93 225# define fallback_flush() /**/
79bfeef6
PC
226# endif
227# if defined(__mips__)
ba86ff93 228# define fallback_jmpi(i0) jmpi(i0, 1)
79bfeef6 229# elif defined(__arm__)
ba86ff93 230# define fallback_jmpi(i0) jmpi_p(i0, 1)
79bfeef6 231# elif defined(__s390__) || defined(__s390x__)
ba86ff93 232# define fallback_jmpi(i0) jmpi(i0, 1)
79bfeef6
PC
233# else
234# define fallback_jmpi(i0) jmpi(i0)
235# endif
79bfeef6 236# if defined(__s390__) || defined(__s390x__)
ba86ff93
PC
237# define fallback_beqr(i0,r0,i1) beqr_p(i0,r0,i1)
238# define fallback_beqi(i0,r0,i1) beqi_p(i0,r0,i1)
239# define fallback_bner(i0,r0,r1) bner_p(i0,r0,r1)
240# define fallback_bnei(i0,r0,i1) bnei_p(i0,r0,i1)
241# define fallback_blei(i0,r0,i1) blei_p(i0,r0,i1)
79bfeef6 242# define fallback_bmsr(i0,r0,r1) bmsr_p(i0,r0,r1)
ba86ff93 243# define fallback_bmsi(i0,r0,i1) bmsi_p(i0,r0,i1)
79bfeef6 244# else
ba86ff93
PC
245# define fallback_beqr(i0,r0,r1) beqr(i0,r0,r1)
246# define fallback_beqi(i0,r0,i1) beqi(i0,r0,i1)
247# define fallback_bner(i0,r0,r1) bner(i0,r0,r1)
248# define fallback_bnei(i0,r0,i1) bnei(i0,r0,i1)
249# define fallback_blei(i0,r0,i1) blei(i0,r0,i1)
79bfeef6 250# define fallback_bmsr(i0,r0,r1) bmsr(i0,r0,r1)
ba86ff93
PC
251# define fallback_bmsi(i0,r0,i1) bmsi(i0,r0,i1)
252# endif
253# if defined(__ia64__)
254# define fallback_patch_jmpi(inst, lbl) \
255 patch_at(jit_code_jmpi, inst, lbl)
256# define fallback_patch_beqr(inst, lbl) \
257 patch_at(jit_code_beqr, inst, lbl)
258# define fallback_patch_beqi(inst, lbl) \
259 patch_at(jit_code_beqi, inst, lbl)
260# define fallback_patch_bner(inst, lbl) \
261 patch_at(jit_code_bner, inst, lbl)
262# define fallback_patch_bnei(inst, lbl) \
263 patch_at(jit_code_bnei, inst, lbl)
264# define fallback_patch_blei(inst, lbl) \
265 patch_at(jit_code_blei, inst, lbl)
266# define fallback_patch_bmsr(inst, lbl) \
267 patch_at(jit_code_bmsr, inst, lbl)
268# define fallback_patch_bmsi(inst, lbl) \
269 patch_at(jit_code_bmsi, inst, lbl)
270# elif defined(__arm__)
271# define fallback_patch_jmpi(inst, lbl) \
272 patch_at(arm_patch_jump,inst, lbl)
273# define fallback_patch_beqr(inst, lbl) \
274 patch_at(arm_patch_jump,inst, lbl)
275# define fallback_patch_beqi(inst, lbl) \
276 patch_at(arm_patch_jump,inst, lbl)
277# define fallback_patch_bner(inst, lbl) \
278 patch_at(arm_patch_jump,inst, lbl)
279# define fallback_patch_bnei(inst, lbl) \
280 patch_at(arm_patch_jump,inst, lbl)
281# define fallback_patch_blei(inst, lbl) \
282 patch_at(arm_patch_jump,inst, lbl)
283# define fallback_patch_bmsr(inst, lbl) \
284 patch_at(arm_patch_jump,inst, lbl)
285# define fallback_patch_bmsi(inst, lbl) \
286 patch_at(arm_patch_jump,inst, lbl)
287 # else
288# define fallback_patch_jmpi(inst, lbl) \
289 patch_at(inst, lbl)
290# define fallback_patch_beqr(inst, lbl) \
291 patch_at(inst, lbl)
292# define fallback_patch_beqi(inst, lbl) \
293 patch_at(inst, lbl)
294# define fallback_patch_bner(inst, lbl) \
295 patch_at(inst, lbl)
296# define fallback_patch_bnei(inst, lbl) \
297 patch_at(inst, lbl)
298# define fallback_patch_blei(inst, lbl) \
299 patch_at(inst, lbl)
300# define fallback_patch_bmsr(inst, lbl) \
301 patch_at(inst, lbl)
302# define fallback_patch_bmsi(inst, lbl) \
303 patch_at(inst, lbl)
79bfeef6 304# endif
ba3814c1
PC
305#endif
306
307#if CODE
308static void
309_fallback_save(jit_state_t *_jit, jit_int32_t r0)
310{
311 jit_int32_t offset, regno, spec;
312 for (offset = 0; offset < JIT_R_NUM; offset++) {
313 spec = _rvs[offset].spec;
314 regno = jit_regno(spec);
315 if (regno == r0) {
316 if (!(spec & jit_class_sav))
c0c16242 317 stxi(_jitc->function->regoff[JIT_R(offset)], rn(JIT_FP), regno);
ba3814c1
PC
318 break;
319 }
320 }
321}
322
323static void
324_fallback_load(jit_state_t *_jit, jit_int32_t r0)
325{
326 jit_int32_t offset, regno, spec;
327 for (offset = 0; offset < JIT_R_NUM; offset++) {
328 spec = _rvs[offset].spec;
329 regno = jit_regno(spec);
330 if (regno == r0) {
331 if (!(spec & jit_class_sav))
c0c16242 332 ldxi(regno, rn(JIT_FP), _jitc->function->regoff[JIT_R(offset)]);
ba3814c1
PC
333 break;
334 }
335 }
336}
337
338static void
339_fallback_save_regs(jit_state_t *_jit, jit_int32_t r0)
340{
c0c16242
PC
341 jit_int32_t regno, spec;
342 for (regno = 0; regno < _jitc->reglen; regno++) {
ba3814c1 343 spec = _rvs[regno].spec;
c0c16242
PC
344 if ((jit_regset_tstbit(&_jitc->regarg, regno) ||
345 jit_regset_tstbit(&_jitc->reglive, regno)) &&
346 !(spec & jit_class_sav)) {
ba3814c1
PC
347 if (!_jitc->function->regoff[regno]) {
348 _jitc->function->regoff[regno] =
c0c16242
PC
349 jit_allocai(spec & jit_class_gpr ?
350 sizeof(jit_word_t) : sizeof(jit_float64_t));
ba3814c1
PC
351 _jitc->again = 1;
352 }
c0c16242
PC
353 if ((spec & jit_class_gpr) && rn(regno) == r0)
354 continue;
ba3814c1 355 jit_regset_setbit(&_jitc->regsav, regno);
c0c16242
PC
356 if (spec & jit_class_gpr)
357 emit_stxi(_jitc->function->regoff[regno], JIT_FP, regno);
358 else
359 emit_stxi_d(_jitc->function->regoff[regno], JIT_FP, regno);
ba3814c1
PC
360 }
361 }
362}
363
364static void
365_fallback_load_regs(jit_state_t *_jit, jit_int32_t r0)
366{
c0c16242
PC
367 jit_int32_t regno, spec;
368 for (regno = 0; regno < _jitc->reglen; regno++) {
ba3814c1 369 spec = _rvs[regno].spec;
c0c16242
PC
370 if ((jit_regset_tstbit(&_jitc->regarg, regno) ||
371 jit_regset_tstbit(&_jitc->reglive, regno)) &&
372 !(spec & jit_class_sav)) {
373 if ((spec & jit_class_gpr) && rn(regno) == r0)
374 continue;
375 jit_regset_setbit(&_jitc->regsav, regno);
376 if (spec & jit_class_gpr)
377 emit_ldxi(regno, JIT_FP, _jitc->function->regoff[regno]);
378 else
379 emit_ldxi_d(regno, JIT_FP, _jitc->function->regoff[regno]);
ba3814c1
PC
380 }
381 }
382}
383
384static void
385_fallback_calli(jit_state_t *_jit, jit_word_t i0, jit_word_t i1)
386{
c0c16242 387# if defined(__arm__)
ba3814c1 388 movi(rn(_R0), i1);
ba3814c1
PC
389# elif defined(__hppa__)
390 movi(_R26_REGNO, i1);
79bfeef6
PC
391# endif
392# if defined(__arm__)
393 calli(i0, jit_exchange_p());
394# elif defined(__mips__)
395 calli(i0, 0);
396# elif defined(__powerpc__) && _CALL_SYSV
397 calli(i0, 0);
398# elif defined(__s390__) || defined(__s390x__)
399 calli(i0, 0);
400# else
ba3814c1 401 calli(i0);
79bfeef6 402# endif
ba3814c1
PC
403}
404
ba86ff93 405#ifdef NEED_FALLBACK_CASX
ba3814c1
PC
406static void
407_fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
408 jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
409{
410 jit_int32_t r1_reg, iscasi;
411 jit_word_t jump, done;
412 /* XXX only attempts to fallback cas for lightning jit code */
413 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
414 if ((iscasi = r1 == _NOREG)) {
ba86ff93 415 r1_reg = fallback_jit_get_reg(jit_class_gpr|jit_class_sav);
ba3814c1
PC
416 r1 = rn(r1_reg);
417 movi(r1, i0);
418 }
419 fallback_save_regs(r0);
420 fallback_calli((jit_word_t)pthread_mutex_lock, (jit_word_t)&mutex);
421 fallback_load(r1);
422 ldr(r0, r1);
423 fallback_load(r2);
424 eqr(r0, r0, r2);
425 fallback_save(r0);
79bfeef6 426 jump = fallback_bnei(_jit->pc.w, r0, 1);
ba3814c1
PC
427 fallback_load(r3);
428# if __WORDSIZE == 32
429 str_i(r1, r3);
430# else
431 str_l(r1, r3);
432# endif
433 /* done: */
ba86ff93 434 fallback_flush();
ba3814c1
PC
435 done = _jit->pc.w;
436 fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex);
437 fallback_load(r0);
ba86ff93
PC
438 fallback_flush();
439 fallback_patch_bnei(jump, done);
ba3814c1
PC
440 fallback_load_regs(r0);
441 if (iscasi)
442 jit_unget_reg(r1_reg);
443}
ba86ff93
PC
444#endif
445
446#ifdef fallback_movr_w_f
447static void
448_fallback_movr_w_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
449{
450 if (!_jitc->function->cvt_offset) {
451 _jitc->function->cvt_offset = jit_allocai(8);
452 _jitc->again = 1;
453 }
454 stxi_i(_jitc->function->cvt_offset, rn(JIT_FP), r1);
455 /* Useful for special debug builds */
456# if defined(__i386__) || defined(__x86_64__)
457 if (jit_cpu.sse2)
458 sse_ldxi_f(r0, rn(JIT_FP), _jitc->function->cvt_offset);
459 else
460 x87_ldxi_f(r0, rn(JIT_FP), _jitc->function->cvt_offset);
461# else
462 ldxi_f(r0, rn(JIT_FP), _jitc->function->cvt_offset);
463# endif
464}
465#endif
466
467#ifdef fallback_movr_f_w
468static void
469_fallback_movr_f_w(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
470{
471 if (!_jitc->function->cvt_offset) {
472 _jitc->function->cvt_offset = jit_allocai(8);
473 _jitc->again = 1;
474 }
475# if defined(__i386__) || defined(__x86_64__)
476 if (jit_cpu.sse2)
477 sse_stxi_f(_jitc->function->cvt_offset, rn(JIT_FP), r1);
478 else
479 x87_stxi_f(_jitc->function->cvt_offset, rn(JIT_FP), r1);
480# else
481 stxi_f(_jitc->function->cvt_offset, rn(JIT_FP), r1);
482# endif
483 ldxi_i(r0, rn(JIT_FP), _jitc->function->cvt_offset);
484}
485#endif
486
487#if __WORDSIZE == 32
488# ifdef fallback_movr_ww_d
489static void
490_fallback_movr_ww_d(jit_state_t *_jit,
491 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
492{
493 if (!_jitc->function->cvt_offset) {
494 _jitc->function->cvt_offset = jit_allocai(8);
495 _jitc->again = 1;
496 }
497 stxi_i(_jitc->function->cvt_offset, rn(JIT_FP),
498 __BYTE_ORDER == __LITTLE_ENDIAN ? r1 : r2);
499 stxi_i(_jitc->function->cvt_offset + 4, rn(JIT_FP),
500 __BYTE_ORDER == __LITTLE_ENDIAN ? r2 : r1);
501# if defined(__i386__) || defined(__x86_64__)
502 if (jit_cpu.sse2)
503 sse_ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset);
504 else
505 x87_ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset);
506# else
507 ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset);
508# endif
509}
510# endif
511
512# ifdef fallback_movr_d_ww
513static void
514_fallback_movr_d_ww(jit_state_t *_jit,
515 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
516{
517 if (!_jitc->function->cvt_offset) {
518 _jitc->function->cvt_offset = jit_allocai(8);
519 _jitc->again = 1;
520 }
521# if defined(__i386__) || defined(__x86_64__)
522 if (jit_cpu.sse2)
523 sse_stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r2);
524 else
525 x87_stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r2);
526# else
527 stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r2);
528# endif
529 ldxi_i(__BYTE_ORDER == __LITTLE_ENDIAN ? r0 : r1,
530 rn(JIT_FP), _jitc->function->cvt_offset);
531 ldxi_i(__BYTE_ORDER == __LITTLE_ENDIAN ? r1 : r0,
532 rn(JIT_FP), _jitc->function->cvt_offset + 4);
533}
534# endif
535
536#else
537# ifdef fallback_movr_w_d
538static void
539_fallback_movr_w_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
540{
541 if (!_jitc->function->cvt_offset) {
542 _jitc->function->cvt_offset = jit_allocai(8);
543 _jitc->again = 1;
544 }
545 stxi_l(_jitc->function->cvt_offset, rn(JIT_FP), r1);
546# if defined(__i386__) || defined(__x86_64__)
547 if (jit_cpu.sse2)
548 sse_ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset);
549 else
550 x87_ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset);
551# else
552 ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset);
553# endif
554}
555# endif
556
557# ifdef fallback_movr_d_w
558static void
559_fallback_movr_d_w(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
560{
561 if (!_jitc->function->cvt_offset) {
562 _jitc->function->cvt_offset = jit_allocai(8);
563 _jitc->again = 1;
564 }
565# if defined(__i386__) || defined(__x86_64__)
566 if (jit_cpu.sse2)
567 sse_stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r1);
568 else
569 x87_stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r1);
570# else
571 stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r1);
572# endif
573 ldxi_l(r0, rn(JIT_FP), _jitc->function->cvt_offset);
574}
575# endif
576#endif
79bfeef6
PC
577
578static void
579_fallback_clo(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
580{
581 jit_word_t clz, done;
582 comr(r0, r1);
583 clz = fallback_bnei(_jit->pc.w, r0, 0);
584 movi(r0, __WORDSIZE);
ba86ff93 585 fallback_flush();
79bfeef6 586 done = fallback_jmpi(_jit->pc.w);
ba86ff93
PC
587 fallback_flush();
588 fallback_patch_bnei(clz, _jit->pc.w);
79bfeef6 589 fallback_clz(r0, r0);
ba86ff93 590 fallback_flush();
79bfeef6
PC
591 fallback_patch_jmpi(done, _jit->pc.w);
592}
593
594static void
595_fallback_clz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
596{
ba86ff93
PC
597# if USE_BIT_TABLES
598 /* t0 = __WORDSIZE - 8;
599 * loop:
600 * t1 = r1 >> t0;
601 * if (t1)
602 * goto done;
603 * t0 -= 8;
604 * if (t0)
605 * goto loop;
606 * t1 = r1;
607 * done:
608 * r0 = __WORDSIZE - 8 - t0 + clz_tab[t1]
609 */
610 /* Table below is count of leading zeros of 8 bit values. */
611 static const jit_uint8_t clz_tab[256] = {
612 8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
613 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
614 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
615 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
616 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
617 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
618 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
619 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
620 };
621 jit_int32_t t0, t1;
622 jit_word_t loop, done;
623 t0 = fallback_jit_get_reg(jit_class_gpr);
624 t1 = fallback_jit_get_reg(jit_class_gpr);
625 movi(rn(t0), __WORDSIZE - 8);
626 fallback_flush();
627 loop = _jit->pc.w;
628 rshr_u(rn(t1), r1, rn(t0));
629 done = fallback_bnei(_jit->pc.w, rn(t1), 0);
630 subi(rn(t0), rn(t0), 8);
631 fallback_bnei(loop, rn(t0), 0);
632 movr(rn(t1), r1);
633 fallback_flush();
634 fallback_patch_bnei(done, _jit->pc.w);
635 rsbi(r0, rn(t0), __WORDSIZE - 8);
636 movi(rn(t0), (jit_word_t)clz_tab);
637 ldxr_uc(rn(t1), rn(t0), rn(t1));
638 addr(r0, r0, rn(t1));
639 jit_unget_reg(t1);
640 jit_unget_reg(t0);
641# else
79bfeef6
PC
642 jit_int32_t r1_reg, r2, r2_reg;
643 jit_word_t clz, l32, l16, l8, l4, l2, l1;
644 l32 = fallback_bnei(_jit->pc.w, r1, 0);
645 movi(r0, __WORDSIZE);
646 clz = fallback_jmpi(_jit->pc.w);
ba86ff93
PC
647 fallback_flush();
648 fallback_patch_bnei(l32, _jit->pc.w);
649 r2_reg = fallback_jit_get_reg(jit_class_gpr);
79bfeef6 650 r2 = rn(r2_reg);
ba86ff93 651 r1_reg = fallback_jit_get_reg(jit_class_gpr);
79bfeef6
PC
652 movr(rn(r1_reg), r1);
653 r1 = rn(r1_reg);
654 movi(r0, 0);
ba86ff93 655# if __WORDSIZE == 64
79bfeef6
PC
656 movi(r2, 0xffffffff00000000UL);
657 l32 = fallback_bmsr(_jit->pc.w, r1, r2);
658 lshi(r1, r1, 32);
659 addi(r0, r0, 32);
ba86ff93
PC
660 fallback_flush();
661 fallback_patch_bmsr(l32, _jit->pc.w);
79bfeef6 662 lshi(r2, r2, 16);
ba86ff93 663# else
79bfeef6 664 movi(r2, 0xffff0000UL);
ba86ff93 665# endif
79bfeef6
PC
666 l16 = fallback_bmsr(_jit->pc.w, r1, r2);
667 lshi(r1, r1, 16);
668 addi(r0, r0, 16);
ba86ff93
PC
669 fallback_flush();
670 fallback_patch_bmsr(l16, _jit->pc.w);
79bfeef6
PC
671 lshi(r2, r2, 8);
672 l8 = fallback_bmsr(_jit->pc.w, r1, r2);
673 lshi(r1, r1, 8);
674 addi(r0, r0, 8);
ba86ff93
PC
675 fallback_flush();
676 fallback_patch_bmsr(l8, _jit->pc.w);
79bfeef6
PC
677 lshi(r2, r2, 4);
678 l4 = fallback_bmsr(_jit->pc.w, r1, r2);
679 lshi(r1, r1, 4);
680 addi(r0, r0, 4);
ba86ff93
PC
681 fallback_flush();
682 fallback_patch_bmsr(l4, _jit->pc.w);
79bfeef6
PC
683 lshi(r2, r2, 2);
684 l2 = fallback_bmsr(_jit->pc.w, r1, r2);
685 lshi(r1, r1, 2);
686 addi(r0, r0, 2);
ba86ff93
PC
687 fallback_flush();
688 fallback_patch_bmsr(l2, _jit->pc.w);
79bfeef6
PC
689 lshi(r2, r2, 1);
690 l1 = fallback_bmsr(_jit->pc.w, r1, r2);
691 addi(r0, r0, 1);
ba86ff93
PC
692 fallback_flush();
693 fallback_patch_bmsr(l1, _jit->pc.w);
79bfeef6
PC
694 fallback_patch_jmpi(clz, _jit->pc.w);
695 jit_unget_reg(r2_reg);
696 jit_unget_reg(r1_reg);
ba86ff93 697# endif
79bfeef6
PC
698}
699
700static void
701_fallback_cto(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
702{
703 jit_word_t ctz, done;
704 comr(r0, r1);
705 ctz = fallback_bnei(_jit->pc.w, r0, 0);
706 movi(r0, __WORDSIZE);
707 done = fallback_jmpi(_jit->pc.w);
ba86ff93
PC
708 fallback_flush();
709 fallback_patch_bnei(ctz, _jit->pc.w);
79bfeef6 710 fallback_ctz(r0, r0);
ba86ff93 711 fallback_flush();
79bfeef6
PC
712 fallback_patch_jmpi(done, _jit->pc.w);
713}
714
715static void
716_fallback_ctz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
717{
ba86ff93
PC
718# if USE_BIT_TABLES
719 /* Adapted from http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightModLookup
720 * Table for 64 bits was recomputed choosing 67 as next prime number.
721 * The cost of the modulo might not compensate and could be better to
722 * use the alternate version (or rbit and use clz).
723 */
724 jit_int32_t t0;
725# if __WORDSIZE == 32
726 static const jit_uint8_t mod37[] = {
727 32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13,
728 4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9,
729 5, 20, 8, 19, 18
730 };
731 /* return mod37[(-r1 & r1) % 37]; */
732# else
733 static const jit_uint8_t mod67[] = {
734 64, 0, 1, 39, 2, 15, 40, 23, 3, 12, 16, 59, 41, 19, 24, 54,
735 4, 0, 13, 10, 17, 62, 60, 28, 42, 30, 20, 51, 25, 44, 55, 47,
736 5, 32, 0, 38, 14, 22, 11, 58, 18, 53, 63, 9, 61, 27, 29, 50,
737 43, 46, 31, 37, 21, 57, 52, 8, 26, 49, 45, 36, 56, 7, 48, 35,
738 6, 34, 33
739 };
740 /* return mod67[(-r1 & r1) % 67]; */
741# endif
742 t0 = fallback_jit_get_reg(jit_class_gpr);
743 if (r0 == r1) {
744 negr(rn(t0), r1);
745 andr(r0, rn(t0), r1);
746 }
747 else {
748 negr(r0, r1);
749 andr(r0, r0, r1);
750 }
751# if __WORDSIZE == 32
752 remi_u(r0, r0, 37);
753 movi(rn(t0), (jit_word_t)mod37);
754# else
755 remi_u(r0, r0, 67);
756 movi(rn(t0), (jit_word_t)mod67);
757# endif
758 ldxr_uc(r0, rn(t0), r0);
759 jit_unget_reg(t0);
760# else
79bfeef6
PC
761 jit_int32_t r1_reg, r2, r2_reg;
762 jit_word_t ctz, l32, l16, l8, l4, l2, l1;
763 l32 = fallback_bnei(_jit->pc.w, r1, 0);
764 movi(r0, __WORDSIZE);
765 ctz = fallback_jmpi(_jit->pc.w);
ba86ff93
PC
766 fallback_flush();
767 fallback_patch_bnei(l32, _jit->pc.w);
768 r2_reg = fallback_jit_get_reg(jit_class_gpr);
79bfeef6 769 r2 = rn(r2_reg);
ba86ff93 770 r1_reg = fallback_jit_get_reg(jit_class_gpr);
79bfeef6
PC
771 movr(rn(r1_reg), r1);
772 r1 = rn(r1_reg);
773 movi(r0, 0);
ba86ff93 774# if __WORDSIZE == 64
79bfeef6
PC
775 movi(r2, 0xffffffffUL);
776 l32 = fallback_bmsr(_jit->pc.w, r1, r2);
777 rshi_u(r1, r1, 32);
778 addi(r0, r0, 32);
ba86ff93
PC
779 fallback_flush();
780 fallback_patch_bmsr(l32, _jit->pc.w);
79bfeef6 781 rshi(r2, r2, 16);
ba86ff93 782# else
79bfeef6 783 movi(r2, 0xffffUL);
ba86ff93 784# endif
79bfeef6
PC
785 l16 = fallback_bmsr(_jit->pc.w, r1, r2);
786 rshi_u(r1, r1, 16);
787 addi(r0, r0, 16);
ba86ff93
PC
788 fallback_flush();
789 fallback_patch_bmsr(l16, _jit->pc.w);
79bfeef6
PC
790 rshi(r2, r2, 8);
791 l8 = fallback_bmsr(_jit->pc.w, r1, r2);
792 rshi_u(r1, r1, 8);
793 addi(r0, r0, 8);
ba86ff93
PC
794 fallback_flush();
795 fallback_patch_bmsr(l8, _jit->pc.w);
79bfeef6
PC
796 rshi(r2, r2, 4);
797 l4 = fallback_bmsr(_jit->pc.w, r1, r2);
798 rshi_u(r1, r1, 4);
799 addi(r0, r0, 4);
ba86ff93
PC
800 fallback_flush();
801 fallback_patch_bmsr(l4, _jit->pc.w);
79bfeef6
PC
802 rshi(r2, r2, 2);
803 l2 = fallback_bmsr(_jit->pc.w, r1, r2);
804 rshi_u(r1, r1, 2);
805 addi(r0, r0, 2);
ba86ff93
PC
806 fallback_flush();
807 fallback_patch_bmsr(l2, _jit->pc.w);
79bfeef6
PC
808 rshi(r2, r2, 1);
809 l1 = fallback_bmsr(_jit->pc.w, r1, r2);
810 addi(r0, r0, 1);
ba86ff93
PC
811 fallback_flush();
812 fallback_patch_bmsr(l1, _jit->pc.w);
79bfeef6
PC
813 fallback_patch_jmpi(ctz, _jit->pc.w);
814 jit_unget_reg(r2_reg);
815 jit_unget_reg(r1_reg);
ba86ff93
PC
816# endif
817}
818
819static void
820_fallback_rbit(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
821{
822# if USE_BIT_TABLES
823 /* t0 = r1;
824 * t1 = t0 & 0xff;
825 * t2 = swap_tab;
826 * r0 = t2[t1];
827 * t3 = 8;
828 * loop:
829 * t1 = t0 >> t3;
830 * t1 &= 0xff;
831 * r0 <<= 8;
832 * r0 |= t2[t1];
833 * t3 += 8;
834 * if (t3 < __WORDSIZE)
835 * goto loop;
836 */
837 jit_word_t loop;
838 jit_int32_t t0, r1_reg, t1, t2, t3;
839 static const unsigned char swap_tab[256] = {
840 0, 128, 64, 192, 32, 160, 96, 224,
841 16, 144, 80, 208, 48, 176, 112, 240,
842 8, 136, 72, 200, 40, 168, 104, 232,
843 24, 152, 88, 216 ,56, 184, 120, 248,
844 4, 132, 68, 196, 36, 164, 100, 228,
845 20, 148, 84, 212, 52, 180, 116, 244,
846 12, 140, 76, 204, 44, 172, 108, 236,
847 28, 156, 92, 220, 60, 188, 124, 252,
848 2, 130, 66, 194, 34, 162, 98, 226,
849 18, 146, 82, 210, 50, 178, 114, 242,
850 10, 138, 74, 202, 42, 170, 106, 234,
851 26, 154, 90, 218, 58, 186, 122, 250,
852 6, 134, 70, 198, 38, 166, 102, 230,
853 22, 150, 86, 214, 54, 182, 118, 246,
854 14, 142, 78, 206, 46, 174, 110, 238,
855 30, 158, 94, 222, 62, 190, 126, 254,
856 1, 129, 65, 193, 33, 161, 97, 225,
857 17, 145, 81, 209, 49, 177, 113, 241,
858 9, 137, 73, 201, 41, 169, 105, 233,
859 25, 153, 89, 217, 57, 185, 121, 249,
860 5, 133, 69, 197, 37, 165, 101, 229,
861 21, 149, 85, 213, 53, 181, 117, 245,
862 13, 141, 77, 205, 45, 173, 109, 237,
863 29, 157, 93, 221, 61, 189, 125, 253,
864 3, 131, 67, 195, 35, 163, 99, 227,
865 19, 147, 83, 211, 51, 179, 115, 243,
866 11, 139, 75, 203, 43, 171, 107, 235,
867 27, 155, 91, 219, 59, 187, 123, 251,
868 7, 135, 71, 199, 39, 167, 103, 231,
869 23, 151, 87, 215, 55, 183, 119, 247,
870 15, 143, 79, 207, 47, 175, 111, 239,
871 31, 159, 95, 223, 63, 191, 127, 255
872 };
873 if (r0 == r1) {
874 t0 = fallback_jit_get_reg(jit_class_gpr);
875 r1_reg = rn(t0);
876 }
877 else {
878 t0 = JIT_NOREG;
879 r1_reg = r1;
880 }
881 t1 = fallback_jit_get_reg(jit_class_gpr);
882 t2 = fallback_jit_get_reg(jit_class_gpr);
883 t3 = fallback_jit_get_reg(jit_class_gpr);
884 if (r0 == r1)
885 movr(rn(t0), r1);
886 extr_uc(rn(t1), r1_reg);
887 movi(rn(t2), (jit_word_t)swap_tab);
888 ldxr_uc(r0, rn(t2), rn(t1));
889 movi(rn(t3), 8);
890 fallback_flush();
891 loop = _jit->pc.w;
892 rshr(rn(t1), r1_reg, rn(t3));
893 extr_uc(rn(t1), rn(t1));
894 lshi(r0, r0, 8);
895 ldxr_uc(rn(t1), rn(t2), rn(t1));
896 orr(r0, r0, rn(t1));
897 addi(rn(t3), rn(t3), 8);
898 blti(loop, rn(t3), __WORDSIZE);
899 jit_unget_reg(t3);
900 jit_unget_reg(t2);
901 jit_unget_reg(t1);
902 if (t0 != JIT_NOREG)
903 jit_unget_reg(t0);
904# elif USE_BITSWAP_UNROLLED
905/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
906/*
907unsigned int v; // 32-bit word to reverse bit order
908
909// swap odd and even bits
910v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
911// swap consecutive pairs
912v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
913// swap nibbles ...
914v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
915// swap bytes
916v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
917// swap 2-byte long pairs
918v = ( v >> 16 ) | ( v << 16);
919 */
920 jit_int32_t t0, t1, t2, t3, t4;
921 movr(r0, r1);
922 t0 = fallback_jit_get_reg(jit_class_gpr);
923 t1 = fallback_jit_get_reg(jit_class_gpr);
924 t2 = fallback_jit_get_reg(jit_class_gpr);
925 movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
926 rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
927 andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
928 andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
929 lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */
930 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
931 movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
932 rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */
933 andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
934 andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
935 lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */
936 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
937 movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
938 rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */
939 andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
940 andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
941 lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */
942 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
943 movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL);
944 rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */
945 andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
946 andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
947 lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */
948 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
949# if __WORDSIZE == 32
950 rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
951 lshi(rn(t2), r0, 16); /* t2 = v << 16 */
952 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
953# else
954 movi(rn(t0), 0x0000ffff0000ffffL);
955 rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
956 andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
957 andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
958 lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */
959 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
960 rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */
961 lshi(rn(t2), r0, 32); /* t2 = v << 32 */
962 orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
963# endif
964 jit_unget_reg(t2);
965 jit_unget_reg(t1);
966 jit_unget_reg(t0);
967# elif USE_BITSWAP_LOOP
968/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
969/*
970unsigned int s = sizeof(v) * CHAR_BIT; // bit size; must be power of 2
971unsigned int mask = ~0;
972while ((s >>= 1) > 0)
973{
974 mask ^= (mask << s);
975 v = ((v >> s) & mask) | ((v << s) & ~mask);
976}
977*/
978 jit_int32_t s, mask;
979 jit_word_t loop, done, t0, t1;
980 movr(v, r1);
981 s = fallback_jit_get_reg(jit_class_gpr);
982 movi(rn(s), __WORDSIZE); /* s = sizeof(v) * CHAR_BIT; */
983 mask = fallback_jit_get_reg(jit_class_gpr);
984 movi(rn(mask), ~0L); /* mask = ~0; */
985 flush();
986 loop = _jit->pc.w; /* while ((s >>= 1) > 0) */
987 rshi(rn(s), rn(s), 1); /* (s >>= 1) */
988 done = blei(_jit->pc.w, rn(s), 0); /* no loop if s <= 0 */
989 t0 = fallback_jit_get_reg(jit_class_gpr);
990 lshr(rn(t0), rn(mask), rn(s)); /* t0 = (mask << s) */
991 xorr(rn(mask), rn(mask), rn(t0)); /* mask ^= t0 */
992 rshr(rn(t0), v, rn(s)); /* t0 = v >> s */
993 andr(rn(t0), rn(t0), rn(mask)); /* t0 = t0 & mask */
994 t1 = fallback_jit_get_reg(jit_class_gpr);
995 lshr(rn(t1), v, rn(s)); /* t1 = v << s */
996 comr(v, rn(mask)); /* v = ~mask */
997 andr(rn(t1), v, rn(t1)); /* t1 = t1 & v */
998 orr(v, rn(t0), rn(t1)); /* v = t0 | t1 */
999 jmpi(loop);
1000 flush();
1001 patch_at(done, _jit->pc.w);
1002 jit_unget_reg(t1);
1003 jit_unget_reg(t0);
1004 jit_unget_reg(mask);
1005 jit_unget_reg(s);
1006# endif
1007}
1008
1009static void
1010_fallback_popcnt(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1011{
1012 /* Same approach as rbitr */
1013 /* t0 = r1;
1014 * t1 = t0 & 0xff;
1015 * t2 = pop_tab;
1016 * r0 = t2[t1];
1017 * t3 = 8;
1018 * loop:
1019 * t1 = t0 >> t3;
1020 * t1 &= 0xff;
1021 * r0 <<= 8;
1022 * r0 |= t2[t1];
1023 * t3 += 8;
1024 * if (t3 < __WORDSIZE)
1025 * goto loop;
1026 */
1027 jit_word_t loop;
1028 jit_int32_t t0, r1_reg, t1, t2, t3;
1029 static const unsigned char pop_tab[256] = {
1030 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
1031 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
1032 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
1033 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
1034 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
1035 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
1036 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
1037 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
1038 };
1039 if (r0 == r1) {
1040 t0 = fallback_jit_get_reg(jit_class_gpr);
1041 r1_reg = rn(t0);
1042 }
1043 else {
1044 t0 = JIT_NOREG;
1045 r1_reg = r1;
1046 }
1047 t1 = fallback_jit_get_reg(jit_class_gpr);
1048 t2 = fallback_jit_get_reg(jit_class_gpr);
1049 t3 = fallback_jit_get_reg(jit_class_gpr);
1050 if (r0 == r1)
1051 movr(rn(t0), r1);
1052 extr_uc(rn(t1), r1_reg);
1053 movi(rn(t2), (jit_word_t)pop_tab);
1054 ldxr_uc(r0, rn(t2), rn(t1));
1055 movi(rn(t3), 8);
1056 fallback_flush();
1057 loop = _jit->pc.w;
1058 rshr(rn(t1), r1_reg, rn(t3));
1059 extr_uc(rn(t1), rn(t1));
1060 ldxr_uc(rn(t1), rn(t2), rn(t1));
1061 addr(r0, r0, rn(t1));
1062 addi(rn(t3), rn(t3), 8);
1063 blti(loop, rn(t3), __WORDSIZE);
1064 jit_unget_reg(t3);
1065 jit_unget_reg(t2);
1066 jit_unget_reg(t1);
1067 if (t0 != JIT_NOREG)
1068 jit_unget_reg(t0);
1069}
1070
1071static void
1072_fallback_lrotr(jit_state_t *_jit,
1073 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1074{
1075 /* r0 = (r1 << r2) | (r1 >> (__WORDSIZE - r2)) */
1076 jit_int32_t t0, t1;
1077 t0 = fallback_jit_get_reg(jit_class_gpr);
1078 if (r0 == r1 || r0 == r2) {
1079 t1 = fallback_jit_get_reg(jit_class_gpr);
1080 lshr(rn(t0), r1, r2);
1081 rsbi(rn(t1), r2, __WORDSIZE);
1082 rshr_u(rn(t1), r1, rn(t1));
1083 orr(r0, rn(t0), rn(t1));
1084 jit_unget_reg(t1);
1085 }
1086 else {
1087 lshr(r0, r1, r2);
1088 rsbi(rn(t0), r2, __WORDSIZE);
1089 rshr_u(rn(t0), r1, rn(t0));
1090 orr(r0, r0, rn(t0));
1091 }
1092 jit_unget_reg(t0);
1093}
1094
1095static void
1096_fallback_lroti(jit_state_t *_jit,
1097 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1098{
1099 jit_int32_t t0;
1100 t0 = fallback_jit_get_reg(jit_class_gpr);
1101 lshi(rn(t0), r1, i0);
1102 rshi_u(r0, r1, __WORDSIZE - i0);
1103 orr(r0, r0, rn(t0));
1104 jit_unget_reg(t0);
1105}
1106
1107static void
1108_fallback_rrotr(jit_state_t *_jit,
1109 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1110{
1111 /* r0 = (r1 >> r2) | (r1 << (__WORDSIZE - r2)) */
1112 jit_int32_t t0, t1;
1113 t0 = fallback_jit_get_reg(jit_class_gpr);
1114 if (r0 == r1 || r0 == r2) {
1115 t1 = fallback_jit_get_reg(jit_class_gpr);
1116 rshr_u(rn(t0), r1, r2);
1117 rsbi(rn(t1), r2, __WORDSIZE);
1118 lshr(rn(t1), r1, rn(t1));
1119 orr(r0, rn(t0), rn(t1));
1120 jit_unget_reg(t1);
1121 }
1122 else {
1123 rshr_u(r0, r1, r2);
1124 rsbi(rn(t0), r2, __WORDSIZE);
1125 lshr(rn(t0), r1, rn(t0));
1126 orr(r0, r0, rn(t0));
1127 }
1128 jit_unget_reg(t0);
1129}
1130
1131static void
1132_fallback_rroti(jit_state_t *_jit,
1133 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1134{
1135 jit_int32_t t0;
1136 t0 = fallback_jit_get_reg(jit_class_gpr);
1137 rshi_u(rn(t0), r1, i0);
1138 lshi(r0, r1, __WORDSIZE - i0);
1139 orr(r0, r0, rn(t0));
1140 jit_unget_reg(t0);
1141}
1142
1143static void
1144_fallback_ext(jit_state_t *_jit,
1145 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
1146{
1147 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
1148 if (i1 == __WORDSIZE)
1149 movr(r0, r1);
1150 else {
1151# if __BYTE_ORDER == __BIG_ENDIAN
1152 i0 = __WORDSIZE - (i0 + i1);
1153# endif
1154 if (__WORDSIZE - (i0 + i1)) {
1155 lshi(r0, r1, __WORDSIZE - (i0 + i1));
1156 rshi(r0, r0, __WORDSIZE - i1);
1157 }
1158 else
1159 rshi(r0, r1, __WORDSIZE - i1);
1160 }
1161}
1162
1163static void
1164_fallback_ext_u(jit_state_t *_jit,
1165 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
1166{
1167 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
1168 if (i1 == __WORDSIZE)
1169 movr(r0, r1);
1170 else {
1171# if __BYTE_ORDER == __BIG_ENDIAN
1172 i0 = __WORDSIZE - (i0 + i1);
1173# endif
1174 if (i0)
1175 rshi_u(r0, r1, i0);
1176 andi(r0, r0, (1L << i1) - 1);
1177 }
1178}
1179
1180static void
1181_fallback_dep(jit_state_t *_jit,
1182 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
1183{
1184 jit_int32_t t0;
1185 jit_word_t mask;
1186 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
1187 if (i1 == __WORDSIZE)
1188 movr(r0, r1);
1189 else {
1190# if __BYTE_ORDER == __BIG_ENDIAN
1191 i0 = __WORDSIZE - (i0 + i1);
1192# endif
1193 mask = (1L << i1) - 1;
1194 t0 = fallback_jit_get_reg(jit_class_gpr);
1195 andi(rn(t0), r1, mask);
1196 if (i0) {
1197 lshi(rn(t0), rn(t0), i0);
1198 mask <<= i0;
1199 }
1200 andi(r0, r0, ~mask);
1201 orr(r0, r0, rn(t0));
1202 jit_unget_reg(t0);
1203 }
1204}
1205
1206static void
1207_fallback_qlshr(jit_state_t *_jit,
1208 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1209{
1210 /* r1 = __WORDSIZE - r3;
1211 * if (r1 != __WORDSIZE) {
1212 * r0 = r2 << r3;
1213 * if (r3 != 0)
1214 * r1 = r2 >> r1;
1215 * else
1216 * r1 = 0;
1217 * }
1218 * else {
1219 * r1 = r2;
1220 * r0 = 0;
1221 * }
1222 */
1223 jit_int32_t t0, s0, t2, s2, t3, s3;
1224 jit_word_t over, zero, done, done_over;
1225 s0 = fallback_jit_get_reg(jit_class_gpr);
1226 t0 = rn(s0);
1227 if (r0 == r2 || r1 == r2) {
1228 s2 = fallback_jit_get_reg(jit_class_gpr);
1229 t2 = rn(s2);
1230 movr(t2, r2);
1231 }
1232 else
1233 t2 = r2;
1234 if (r0 == r3 || r1 == r3) {
1235 s3 = fallback_jit_get_reg(jit_class_gpr);
1236 t3 = rn(s3);
1237 movr(t3, r3);
1238 }
1239 else
1240 t3 = r3;
1241 rsbi(t0, t3, __WORDSIZE);
1242 lshr(r0, t2, t3);
1243 rshr(r1, t2, t0);
1244 zero = fallback_beqi(_jit->pc.w, t3, 0);
1245 over = fallback_beqi(_jit->pc.w, t3, __WORDSIZE);
1246 done = fallback_jmpi(_jit->pc.w);
1247 fallback_flush();
1248 fallback_patch_jmpi(over, _jit->pc.w);
1249 /* overflow */
1250 movi(r0, 0);
1251 done_over = fallback_jmpi(_jit->pc.w);
1252 /* zero */
1253 fallback_flush();
1254 fallback_patch_beqi(zero, _jit->pc.w);
1255 rshi(r1, t2, __WORDSIZE - 1);
1256 fallback_flush();
1257 fallback_patch_jmpi(done, _jit->pc.w);
1258 fallback_patch_jmpi(done_over, _jit->pc.w);
1259 jit_unget_reg(s0);
1260 if (t2 != r2)
1261 jit_unget_reg(s2);
1262 if (t3 != r3)
1263 jit_unget_reg(s3);
1264}
1265
1266static void
1267_fallback_qlshi(jit_state_t *_jit,
1268 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1269{
1270 assert((jit_uword_t)i0 <= __WORDSIZE);
1271 if (i0 == 0) {
1272 if (r0 != r2) {
1273 movr(r0, r2);
1274 rshi(r1, r2, __WORDSIZE - 1);
1275 }
1276 else
1277 rshi(r1, r2, __WORDSIZE - 1);
1278 }
1279 else if (i0 != __WORDSIZE) {
1280 rshi(r1, r2, __WORDSIZE - i0);
1281 lshi(r0, r2, i0);
1282 }
1283 else {
1284 movr(r1, r2);
1285 movi(r0, 0);
1286 }
1287}
1288
1289static void
1290_fallback_qlshr_u(jit_state_t *_jit, jit_int32_t r0,
1291 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1292{
1293 /* r1 = __WORDSIZE - r3;
1294 * if (r1 != __WORDSIZE) {
1295 * r0 = r2 << r3;
1296 * if (r3 != 0)
1297 * r1 = (unsigned)r2 >> r1;
1298 * else
1299 * r1 = 0;
1300 * }
1301 * else {
1302 * r1 = r2;
1303 * r0 = 0;
1304 * }
1305 */
1306 jit_int32_t t0, s0, t2, s2, t3, s3;
1307 jit_word_t over, zero, done, done_over;
1308 s0 = fallback_jit_get_reg(jit_class_gpr);
1309 t0 = rn(s0);
1310 if (r0 == r2 || r1 == r2) {
1311 s2 = fallback_jit_get_reg(jit_class_gpr);
1312 t2 = rn(s2);
1313 movr(t2, r2);
1314 }
1315 else
1316 t2 = r2;
1317 if (r0 == r3 || r1 == r3) {
1318 s3 = fallback_jit_get_reg(jit_class_gpr);
1319 t3 = rn(s3);
1320 movr(t3, r3);
1321 }
1322 else
1323 t3 = r3;
1324 rsbi(t0, t3, __WORDSIZE);
1325 lshr(r0, t2, t3);
1326 rshr_u(r1, t2, t0);
1327 zero = fallback_beqi(_jit->pc.w, t3, 0);
1328 over = fallback_beqi(_jit->pc.w, t3, __WORDSIZE);
1329 done = fallback_jmpi(_jit->pc.w);
1330 fallback_flush();
1331 fallback_patch_jmpi(over, _jit->pc.w);
1332 /* overflow */
1333 movi(r0, 0);
1334 done_over = fallback_jmpi(_jit->pc.w);
1335 /* zero */
1336 fallback_flush();
1337 fallback_patch_beqi(zero, _jit->pc.w);
1338 movi(r1, 0);
1339 fallback_flush();
1340 fallback_patch_jmpi(done, _jit->pc.w);
1341 fallback_patch_jmpi(done_over, _jit->pc.w);
1342 jit_unget_reg(s0);
1343 if (t2 != r2)
1344 jit_unget_reg(s2);
1345 if (t3 != r3)
1346 jit_unget_reg(s3);
1347}
1348
1349static void
1350_fallback_qlshi_u(jit_state_t *_jit, jit_int32_t r0,
1351 jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1352{
1353 assert(i0 <= __WORDSIZE);
1354 if (i0 == 0) {
1355 movr(r0, r2);
1356 movi(r1, 0);
1357 }
1358 else if (i0 != __WORDSIZE) {
1359 rshi_u(r1, r2, __WORDSIZE - i0);
1360 lshi(r0, r2, i0);
1361 }
1362 else {
1363 movr(r1, r2);
1364 movi(r0, 0);
1365 }
1366}
1367
1368static void
1369_fallback_qrshr(jit_state_t *_jit, jit_int32_t r0,
1370 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1371{
1372 /* r1 = __WORDSIZE - r3;
1373 * if (r1 != __WORDSIZE) {
1374 * r0 = r2 >> r3;
1375 * if (r3 != 0)
1376 * r1 = r2 << r1;
1377 * else
1378 * r1 = r2 >> (__WORDSIZE - 1);
1379 * }
1380 * else {
1381 * r1 = r2;
1382 * r0 = r2 >> (__WORDSIZE - 1);
1383 * }
1384 */
1385 jit_int32_t t0, s0, t2, s2, t3, s3;
1386 jit_word_t over, zero, done, done_over;
1387 s0 = fallback_jit_get_reg(jit_class_gpr);
1388 t0 = rn(s0);
1389 if (r0 == r2 || r1 == r2) {
1390 s2 = fallback_jit_get_reg(jit_class_gpr);
1391 t2 = rn(s2);
1392 movr(t2, r2);
1393 }
1394 else
1395 t2 = r2;
1396 if (r0 == r3 || r1 == r3) {
1397 s3 = fallback_jit_get_reg(jit_class_gpr);
1398 t3 = rn(s3);
1399 movr(t3, r3);
1400 }
1401 else
1402 t3 = r3;
1403 rsbi(t0, t3, __WORDSIZE);
1404 rshr(r0, t2, t3);
1405 lshr(r1, t2, t0);
1406 zero = fallback_beqi(_jit->pc.w, t3, 0);
1407 over = fallback_beqi(_jit->pc.w, t3, __WORDSIZE);
1408 done = fallback_jmpi(_jit->pc.w);
1409 fallback_flush();
1410 fallback_patch_jmpi(over, _jit->pc.w);
1411 /* underflow */
1412 rshi(r0, t2, __WORDSIZE - 1);
1413 done_over = fallback_jmpi(_jit->pc.w);
1414 /* zero */
1415 fallback_flush();
1416 fallback_patch_beqi(zero, _jit->pc.w);
d481fb64 1417 movi(r1, 0);
ba86ff93
PC
1418 fallback_flush();
1419 fallback_patch_jmpi(done, _jit->pc.w);
1420 fallback_patch_jmpi(done_over, _jit->pc.w);
1421 jit_unget_reg(s0);
1422 if (t2 != r2)
1423 jit_unget_reg(s2);
1424 if (t3 != r3)
1425 jit_unget_reg(s3);
1426}
1427
1428static void
1429_fallback_qrshi(jit_state_t *_jit, jit_int32_t r0,
1430 jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1431{
1432 assert((jit_uword_t)i0 <= __WORDSIZE);
1433 if (i0 == 0) {
d481fb64
PC
1434 movr(r0, r2);
1435 movi(r1, 0);
ba86ff93
PC
1436 }
1437 else if (i0 != __WORDSIZE) {
1438 lshi(r1, r2, __WORDSIZE - i0);
1439 rshi(r0, r2, i0);
1440 }
1441 else {
1442 movr(r1, r2);
1443 rshi(r0, r2, __WORDSIZE - 1);
1444 }
1445}
1446
1447static void
1448_fallback_qrshr_u(jit_state_t *_jit, jit_int32_t r0,
1449 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1450{
1451 /* r1 = __WORDSIZE - r3;
1452 * if (r1 != __WORDSIZE) {
1453 * r0 = (unsigned)r2 >> r3;
1454 * if (r3 != 0)
1455 * r1 = r2 << r1;
1456 * else
1457 * r1 = 0;
1458 * }
1459 * else {
1460 * r1 = r2;
1461 * r0 = 0;
1462 * }
1463 */
1464 jit_int32_t t0, s0, t2, s2, t3, s3;
1465 jit_word_t over, zero, done, done_over;
1466 s0 = fallback_jit_get_reg(jit_class_gpr);
1467 t0 = rn(s0);
1468 if (r0 == r2 || r1 == r2) {
1469 s2 = fallback_jit_get_reg(jit_class_gpr);
1470 t2 = rn(s2);
1471 movr(t2, r2);
1472 }
1473 else
1474 t2 = r2;
1475 if (r0 == r3 || r1 == r3) {
1476 s3 = fallback_jit_get_reg(jit_class_gpr);
1477 t3 = rn(s3);
1478 movr(t3, r3);
1479 }
1480 else
1481 t3 = r3;
1482 rsbi(t0, t3, __WORDSIZE);
1483 rshr_u(r0, t2, t3);
1484 lshr(r1, t2, t0);
1485 zero = fallback_beqi(_jit->pc.w, t3, 0);
1486 over = fallback_beqi(_jit->pc.w, t3, __WORDSIZE);
1487 done = fallback_jmpi(_jit->pc.w);
1488 fallback_flush();
1489 fallback_patch_jmpi(over, _jit->pc.w);
1490 /* underflow */
1491 movi(r0, 0);
1492 done_over = fallback_jmpi(_jit->pc.w);
1493 /* zero */
1494 fallback_flush();
1495 fallback_patch_beqi(zero, _jit->pc.w);
1496 movi(r1, 0);
1497 fallback_flush();
1498 fallback_patch_jmpi(done, _jit->pc.w);
1499 fallback_patch_jmpi(done_over, _jit->pc.w);
1500 jit_unget_reg(s0);
1501 if (t2 != r2)
1502 jit_unget_reg(s2);
1503 if (t3 != r3)
1504 jit_unget_reg(s3);
1505}
1506
1507static void
1508_fallback_qrshi_u(jit_state_t *_jit, jit_int32_t r0,
1509 jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
1510{
1511 assert((jit_uword_t)i0 <= __WORDSIZE);
1512 if (i0 == 0) {
1513 movr(r0, r2);
1514 movi(r1, 0);
1515 }
1516 else if (i0 != __WORDSIZE) {
1517 lshi(r1, r2, __WORDSIZE - i0);
1518 rshi_u(r0, r2, i0);
1519 }
1520 else {
1521 movr(r1, r2);
1522 movi(r0, 0);
1523 }
1524}
1525
1526static void
1527_unldr2(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign)
1528{
1529 jit_int32_t t0, r2;
1530 jit_word_t un2, al;
1531 t0 = fallback_jit_get_reg(jit_class_gpr);
1532 r2 = rn(t0);
1533 andi(r2, r1, -2);
1534 un2 = fallback_bner(_jit->pc.w, r1, r2);
1535 if (sign)
1536 ldr_s(r0, r1);
1537 else
1538 ldr_us(r0, r1);
1539 al = fallback_jmpi(_jit->pc.w);
1540 fallback_flush();
1541 fallback_patch_bner(un2, _jit->pc.w);
1542# if __BYTE_ORDER == __LITTLE_ENDIAN
1543 ldr_uc(r0, r1);
1544 if (sign)
1545 ldxi_c(r2, r1, 1);
1546 else
1547 ldxi_uc(r2, r1, 1);
1548 lshi(r2, r2, 8);
1549# else
1550 if (sign)
1551 ldr_c(r0, r1);
1552 else
1553 ldr_uc(r0, r1);
1554 lshi(r0, r0, 8);
1555 ldxi_uc(r2, r1, 1);
1556# endif
1557 orr(r0, r0, r2);
1558 fallback_flush();
1559 fallback_patch_jmpi(al, _jit->pc.w);
1560 jit_unget_reg(t0);
1561}
1562
1563static void
1564_unldi2(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign)
1565{
1566 jit_int32_t t0, r2;
1567 if ((i1 & -2) == i1) {
1568 if (sign)
1569 ldi_s(r0, i1);
1570 else
1571 ldi_us(r0, i1);
1572 }
1573 else {
1574 t0 = fallback_jit_get_reg(jit_class_gpr);
1575 r2 = rn(t0);
1576# if __BYTE_ORDER == __LITTLE_ENDIAN
1577 ldi_uc(r0, i1);
1578 if (sign)
1579 ldi_c(r2, i1 + 1);
1580 else
1581 ldi_uc(r2, i1 + 1);
1582 lshi(r2, r2, 8);
1583# else
1584 if (sign)
1585 ldi_c(r0, i1);
1586 else
1587 ldi_uc(r0, i1);
1588 lshi(r0, r0, 8);
1589 ldi_uc(r2, i1 + 1);
1590# endif
1591 orr(r0, r0, r2);
1592 jit_unget_reg(t0);
1593 }
1594}
1595
1596static void
1597_unldr3(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign)
1598{
1599 jit_int32_t t0, r2;
1600 jit_word_t un2, or;
1601 t0 = fallback_jit_get_reg(jit_class_gpr);
1602 r2 = rn(t0);
1603 andi(r2, r1, -2);
1604 un2 = fallback_bner(_jit->pc.w, r1, r2);
1605# if __BYTE_ORDER == __LITTLE_ENDIAN
1606 ldr_us(r0, r1);
1607 if (sign)
1608 ldxi_c(r2, r1, 2);
1609 else
1610 ldxi_uc(r2, r1, 2);
1611 lshi(r2, r2, 16);
1612# else
1613 if (sign)
1614 ldr_s(r0, r1);
1615 else
1616 ldr_us(r0, r1);
1617 lshi(r0, r0, 8);
1618 ldxi_uc(r2, r1, 2);
1619# endif
1620 or = fallback_jmpi(_jit->pc.w);
1621 fallback_flush();
1622 fallback_patch_bner(un2, _jit->pc.w);
1623# if __BYTE_ORDER == __LITTLE_ENDIAN
1624 ldr_uc(r0, r1);
1625 if (sign)
1626 ldxi_s(r2, r1, 1);
1627 else
1628 ldxi_us(r2, r1, 1);
1629 lshi(r2, r2, 8);
1630# else
1631 if (sign)
1632 ldr_c(r0, r1);
1633 else
1634 ldr_uc(r0, r1);
1635 lshi(r0, r0, 16);
1636 ldxi_us(r2, r1, 1);
1637# endif
1638 fallback_flush();
1639 fallback_patch_jmpi(or, _jit->pc.w);
1640 orr(r0, r0, r2);
1641 jit_unget_reg(t0);
1642}
1643
1644static void
1645_unldi3(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign)
1646{
1647 jit_int32_t t0, r2;
1648 t0 = fallback_jit_get_reg(jit_class_gpr);
1649 r2 = rn(t0);
1650 if ((i1 & -2) == i1) {
1651# if __BYTE_ORDER == __LITTLE_ENDIAN
1652 ldi_us(r0, i1);
1653 if (sign)
1654 ldi_c(r2, i1 + 2);
1655 else
1656 ldi_uc(r2, i1 + 2);
1657 lshi(r2, r2, 16);
1658# else
1659 if (sign)
1660 ldi_s(r0, i1);
1661 else
1662 ldi_us(r0, i1);
1663 lshi(r0, r0, 8);
1664 ldi_uc(r2, i1 + 2);
1665# endif
1666 }
1667 else {
1668# if __BYTE_ORDER == __LITTLE_ENDIAN
1669 ldi_uc(r0, i1);
1670 if (sign)
1671 ldi_s(r2, i1 + 1);
1672 else
1673 ldi_us(r2, i1 + 1);
1674 lshi(r2, r2, 8);
1675# else
1676 if (sign)
1677 ldi_c(r0, i1);
1678 else
1679 ldi_uc(r0, i1);
1680 lshi(r0, r0, 16);
1681 ldi_us(r2, i1 + 1);
1682# endif
1683 }
1684 orr(r0, r0, r2);
1685 jit_unget_reg(t0);
79bfeef6 1686}
ba86ff93
PC
1687
1688static void
1689_unldr4(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1
1690# if __WORDSIZE == 64
1691 , jit_bool_t sign
1692# endif
1693 )
1694{
1695 jit_int32_t t0, r2;
1696 jit_word_t un4, un2, un3, al, or, or3;
1697 t0 = fallback_jit_get_reg(jit_class_gpr);
1698 r2 = rn(t0);
1699 andi(r2, r1, -4);
1700 un4 = fallback_bner(_jit->pc.w, r1, r2);
1701# if __WORDSIZE == 64
1702 if (sign)
1703# endif
1704 ldr_i(r0, r1);
1705# if __WORDSIZE == 64
1706 else
1707 ldr_ui(r0, r1);
1708# endif
1709 al = fallback_jmpi(_jit->pc.w);
1710 fallback_flush();
1711 fallback_patch_bner(un4, _jit->pc.w);
1712 andi(r2, r1, -2);
1713 un2 = fallback_bner(_jit->pc.w, r1, r2);
1714# if __BYTE_ORDER == __LITTLE_ENDIAN
1715 ldr_us(r0, r1);
1716# if __WORDSIZE == 64
1717 if (sign)
1718 ldxi_s(r2, r1, 2);
1719 else
1720# endif
1721 ldxi_us(r2, r1, 2);
1722 lshi(r2, r2, 16);
1723# else
1724# if __WORDSIZE == 64
1725 if (sign)
1726 ldr_s(r0, r1);
1727 else
1728# endif
1729 ldr_us(r0, r1);
1730 lshi(r0, r0, 16);
1731 ldxi_us(r2, r1, 2);
1732# endif
1733 or = fallback_jmpi(_jit->pc.w);
1734 fallback_flush();
1735 fallback_patch_bner(un2, _jit->pc.w);
1736 andi(r2, r1, 3);
1737 un3 = fallback_bnei(_jit->pc.w, r2, 3);
1738# if __BYTE_ORDER == __LITTLE_ENDIAN
1739 ldr_uc(r0, r1);
1740 ldxi_i(r2, r1, 1);
1741 lshi(r2, r2, 8);
1742# if __WORDSIZE == 64
1743 if (sign)
1744 extr_i(r2, r2);
1745 else
1746 extr_ui(r2, r2);
1747# endif
1748# else
1749# if __WORDSIZE == 64
1750 if (sign)
1751 ldr_c(r0, r1);
1752 else
1753# endif
1754 ldr_uc(r0, r1);
1755 lshi(r0, r0, 24);
1756# if __WORDSIZE == 32
1757 ldxi(r2, r1, 1);
1758# else
1759 ldxi_ui(r2, r1, 1);
1760# endif
1761 rshi_u(r2, r2, 8);
1762# endif
1763 or3 = fallback_jmpi(_jit->pc.w);
1764 fallback_flush();
1765 fallback_patch_bnei(un3, _jit->pc.w);
1766# if __BYTE_ORDER == __LITTLE_ENDIAN
1767# if __WORDSIZE == 32
1768 ldxi(r0, r1, -1);
1769# else
1770 ldxi_ui(r0, r1, -1);
1771# endif
1772 rshi_u(r0, r0, 8);
1773# if __WORDSIZE == 64
1774 if (sign)
1775 ldxi_c(r2, r1, 3);
1776 else
1777# endif
1778 ldxi_uc(r2, r1, 3);
1779 lshi(r2, r2, 24);
1780# else
1781 ldxi_i(r0, r1, -1);
1782 lshi(r0, r0, 8);
1783# if __WORDSIZE == 64
1784 if (sign)
1785 extr_i(r0, r0);
1786 else
1787 extr_ui(r0, r0);
1788# endif
1789 ldxi_uc(r2, r1, 3);
1790# endif
1791 fallback_flush();
1792 fallback_patch_jmpi(or, _jit->pc.w);
1793 fallback_patch_jmpi(or3, _jit->pc.w);
1794 orr(r0, r0, r2);
1795 fallback_flush();
1796 fallback_patch_jmpi(al, _jit->pc.w);
1797 jit_unget_reg(t0);
1798}
1799
1800static void
1801_unldi4(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1
1802# if __WORDSIZE == 64
1803 , jit_bool_t sign
1804# endif
1805 )
1806{
1807 jit_int32_t t0, r2;
1808 if ((i1 & -4) == i1) {
1809# if __WORDSIZE == 64
1810 if (sign)
1811# endif
1812 ldi_i(r0, i1);
1813# if __WORDSIZE == 64
1814 else
1815 ldi_ui(r0, i1);
1816# endif
1817 }
1818 else {
1819 t0 = fallback_jit_get_reg(jit_class_gpr);
1820 r2 = rn(t0);
1821 if ((i1 & -2) == i1) {
1822# if __BYTE_ORDER == __LITTLE_ENDIAN
1823 ldi_us(r0, i1);
1824# if __WORDSIZE == 64
1825 if (sign)
1826 ldi_s(r2, i1 + 2);
1827 else
1828# endif
1829 ldi_us(r2, i1 + 2);
1830 lshi(r2, r2, 16);
1831# else
1832# if __WORDSIZE == 64
1833 if (sign)
1834 ldi_s(r0, i1);
1835 else
1836# endif
1837 ldi_us(r0, i1);
1838 lshi(r0, r0, 16);
1839 ldi_us(r2, i1 + 2);
1840# endif
1841 }
1842 else if ((i1 & 3) == 3) {
1843# if __BYTE_ORDER == __LITTLE_ENDIAN
1844 ldi_uc(r0, i1);
1845 ldi_i(r2, i1 + 1);
1846 lshi(r2, r2, 8);
1847# if __WORDSIZE == 64
1848 if (sign)
1849 extr_i(r2, r2);
1850 else
1851 extr_ui(r2, r2);
1852# endif
1853# else
1854# if __WORDSIZE == 64
1855 if (sign)
1856 ldi_c(r0, i1);
1857 else
1858# endif
1859 ldi_uc(r0, i1);
1860 lshi(r0, r0, 24);
1861# if __WORDSIZE == 32
1862 ldi(r2, i1 + 1);
1863# else
1864 ldi_ui(r2, i1 + 1);
1865# endif
1866 rshi_u(r2, r2, 8);
1867# endif
1868 }
1869 else {
1870# if __BYTE_ORDER == __LITTLE_ENDIAN
1871# if __WORDSIZE == 32
1872 ldi(r0, i1 - 1);
1873# else
1874 ldi_ui(r0, i1 - 1);
1875# endif
1876 rshi_u(r0, r0, 8);
1877# if __WORDSIZE == 64
1878 if (sign)
1879 ldi_c(r2, i1 + 3);
1880 else
1881# endif
1882 ldi_uc(r2, i1 + 3);
1883 lshi(r2, r2, 24);
1884# else
1885 ldi_i(r0, i1 - 1);
1886 lshi(r0, r0, 8);
1887# if __WORDSIZE == 64
1888 if (sign)
1889 extr_i(r0, r0);
1890 else
1891 extr_ui(r0, r0);
1892# endif
1893 ldi_uc(r2, i1 + 3);
1894# endif
1895 }
1896 orr(r0, r0, r2);
1897 jit_unget_reg(t0);
1898 }
1899}
1900
1901# if __WORDSIZE == 64
1902static void
1903_unldr5(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign)
1904{
1905 jit_int32_t t0, r2;
1906 jit_word_t un4, un2, un3, or, or4, or3;
1907 t0 = fallback_jit_get_reg(jit_class_gpr);
1908 r2 = rn(t0);
1909 andi(r2, r1, -4);
1910 un4 = fallback_bner(_jit->pc.w, r1, r2);
1911# if __BYTE_ORDER == __LITTLE_ENDIAN
1912 ldr_ui(r0, r1);
1913 if (sign)
1914 ldxi_c(r2, r1, 4);
1915 else
1916 ldxi_uc(r2, r1, 4);
1917 lshi(r2, r2, 32);
1918# else
1919 if (sign)
1920 ldr_i(r0, r1);
1921 else
1922 ldr_ui(r0, r1);
1923 lshi(r0, r0, 8);
1924 ldxi_uc(r2, r1, 4);
1925# endif
1926 or = fallback_jmpi(_jit->pc.w);
1927 fallback_flush();
1928 fallback_patch_bner(un4, _jit->pc.w);
1929 andi(r2, r1, -2);
1930 un2 = fallback_bner(_jit->pc.w, r1, r2);
1931# if __BYTE_ORDER == __LITTLE_ENDIAN
1932 ldr_us(r0, r1);
1933 ldxi_us(r2, r1, 2);
1934 lshi(r2, r2, 16);
1935 orr(r0, r0, r2);
1936 if (sign)
1937 ldxi_c(r2, r1, 4);
1938 else
1939 ldxi_uc(r2, r1, 4);
1940 lshi(r2, r2, 32);
1941# else
1942 if (sign)
1943 ldr_s(r0, r1);
1944 else
1945 ldr_us(r0, r1);
1946 lshi(r0, r0, 24);
1947 ldxi_us(r2, r1, 2);
1948 lshi(r2, r2, 8);
1949 orr(r0, r0, r2);
1950 ldxi_uc(r2, r1, 4);
1951# endif
1952 or4 = fallback_jmpi(_jit->pc.w);
1953 fallback_flush();
1954 fallback_patch_bner(un2, _jit->pc.w);
1955 andi(r2, r1, 3);
1956 un3 = fallback_bnei(_jit->pc.w, r2, 3);
1957# if __BYTE_ORDER == __LITTLE_ENDIAN
1958 ldr_uc(r0, r1);
1959 if (sign)
1960 ldxi_i(r2, r1, 1);
1961 else
1962 ldxi_ui(r2, r1, 1);
1963 lshi(r2, r2, 8);
1964# else
1965 if (sign)
1966 ldr_c(r0, r1);
1967 else
1968 ldr_uc(r0, r1);
1969 lshi(r0, r0, 32);
1970 ldxi_ui(r2, r1, 1);
1971# endif
1972 or3 = fallback_jmpi(_jit->pc.w);
1973 fallback_flush();
1974 fallback_patch_bnei(un3, _jit->pc.w);
1975# if __BYTE_ORDER == __LITTLE_ENDIAN
1976 ldr_uc(r0, r1);
1977 ldxi_us(r2, r1, 1);
1978 lshi(r2, r2, 8);
1979 orr(r0, r0, r2);
1980 if (sign)
1981 ldxi_s(r2, r1, 3);
1982 else
1983 ldxi_us(r2, r1, 3);
1984 lshi(r2, r2, 24);
1985# else
1986 if (sign)
1987 ldr_c(r0, r1);
1988 else
1989 ldr_uc(r0, r1);
1990 lshi(r0, r0, 32);
1991 ldxi_us(r2, r1, 1);
1992 lshi(r2, r2, 16);
1993 orr(r0, r0, r2);
1994 ldxi_us(r2, r1, 3);
1995# endif
1996 fallback_flush();
1997 fallback_patch_jmpi(or, _jit->pc.w);
1998 fallback_patch_jmpi(or4, _jit->pc.w);
1999 fallback_patch_jmpi(or3, _jit->pc.w);
2000 orr(r0, r0, r2);
2001 jit_unget_reg(t0);
2002}
2003
2004static void
2005_unldi5(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign)
2006{
2007 jit_int32_t t0, r2;
2008 t0 = fallback_jit_get_reg(jit_class_gpr);
2009 r2 = rn(t0);
2010 if ((i1 & -4) == i1) {
2011# if __BYTE_ORDER == __LITTLE_ENDIAN
2012 ldi_ui(r0, i1);
2013 if (sign)
2014 ldi_c(r2, i1 + 4);
2015 else
2016 ldi_uc(r2, i1 + 4);
2017 lshi(r2, r2, 32);
2018# else
2019 if (sign)
2020 ldi_i(r0, i1);
2021 else
2022 ldi_ui(r0, i1);
2023 lshi(r0, r0, 8);
2024 ldi_uc(r2, i1 + 4);
2025# endif
2026 }
2027 else if ((i1 & -2) == i1) {
2028# if __BYTE_ORDER == __LITTLE_ENDIAN
2029 ldi_us(r0, i1);
2030 ldi_us(r2, i1 + 2);
2031 lshi(r2, r2, 16);
2032 orr(r0, r0, r2);
2033 if (sign)
2034 ldi_c(r2, i1 + 4);
2035 else
2036 ldi_uc(r2, i1 + 4);
2037 lshi(r2, r2, 32);
2038# else
2039 if (sign)
2040 ldi_s(r0, i1);
2041 else
2042 ldi_us(r0, i1);
2043 lshi(r0, r0, 24);
2044 ldi_us(r2, i1 + 2);
2045 lshi(r2, r2, 8);
2046 orr(r0, r0, r2);
2047 ldi_uc(r2, i1 + 4);
2048# endif
2049 }
2050 else if ((i1 & 3) == 3) {
2051# if __BYTE_ORDER == __LITTLE_ENDIAN
2052 ldi_uc(r0, i1);
2053 if (sign)
2054 ldi_i(r2, i1 + 1);
2055 else
2056 ldi_ui(r2, i1 + 1);
2057 lshi(r2, r2, 8);
2058# else
2059 if (sign)
2060 ldi_c(r0, i1);
2061 else
2062 ldi_uc(r0, i1);
2063 lshi(r0, r0, 32);
2064 ldi_ui(r2, i1 + 1);
2065# endif
2066 }
2067 else {
2068# if __BYTE_ORDER == __LITTLE_ENDIAN
2069 ldi_uc(r0, i1);
2070 ldi_us(r2, i1 + 1);
2071 lshi(r2, r2, 8);
2072 orr(r0, r0, r2);
2073 if (sign)
2074 ldi_s(r2, i1 + 3);
2075 else
2076 ldi_us(r2, i1 + 3);
2077 lshi(r2, r2, 24);
2078# else
2079 if (sign)
2080 ldi_c(r0, i1);
2081 else
2082 ldi_uc(r0, i1);
2083 lshi(r0, r0, 32);
2084 ldi_us(r2, i1 + 1);
2085 lshi(r2, r2, 16);
2086 orr(r0, r0, r2);
2087 ldi_us(r2, i1 + 3);
2088# endif
2089 }
2090 orr(r0, r0, r2);
2091 jit_unget_reg(t0);
2092}
2093
2094static void
2095_unldr6(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign)
2096{
2097 jit_int32_t t0, r2;
2098 jit_word_t un4, un2, un3, or, or2, or3;
2099 t0 = fallback_jit_get_reg(jit_class_gpr);
2100 r2 = rn(t0);
2101 andi(r2, r1, -4);
2102 un4 = fallback_bner(_jit->pc.w, r1, r2);
2103# if __BYTE_ORDER == __LITTLE_ENDIAN
2104 ldr_ui(r0, r1);
2105 if (sign)
2106 ldxi_s(r2, r1, 4);
2107 else
2108 ldxi_us(r2, r1, 4);
2109 lshi(r2, r2, 32);
2110# else
2111 if (sign)
2112 ldr_i(r0, r1);
2113 else
2114 ldr_ui(r0, r1);
2115 lshi(r0, r0, 16);
2116 ldxi_us(r2, r1, 4);
2117# endif
2118 or = fallback_jmpi(_jit->pc.w);
2119 fallback_flush();
2120 fallback_patch_bner(un4, _jit->pc.w);
2121 andi(r2, r1, -2);
2122 un2 = fallback_bner(_jit->pc.w, r1, r2);
2123# if __BYTE_ORDER == __LITTLE_ENDIAN
2124 ldr_us(r0, r1);
2125 if (sign)
2126 ldxi_i(r2, r1, 2);
2127 else
2128 ldxi_ui(r2, r1, 2);
2129 lshi(r2, r2, 16);
2130# else
2131 if (sign)
2132 ldr_s(r0, r1);
2133 else
2134 ldr_us(r0, r1);
2135 lshi(r0, r0, 32);
2136 ldxi_ui(r2, r1, 2);
2137# endif
2138 or2 = fallback_jmpi(_jit->pc.w);
2139 fallback_flush();
2140 fallback_patch_bner(un2, _jit->pc.w);
2141 andi(r2, r1, 3);
2142 un3 = fallback_bnei(_jit->pc.w, r2, 3);
2143# if __BYTE_ORDER == __LITTLE_ENDIAN
2144 ldr_uc(r0, r1);
2145 ldxi_ui(r2, r1, 1);
2146 lshi(r2, r2, 8);
2147 orr(r0, r0, r2);
2148 if (sign)
2149 ldxi_c(r2, r1, 5);
2150 else
2151 ldxi_uc(r2, r1, 5);
2152 lshi(r2, r2, 40);
2153# else
2154 if (sign)
2155 ldr_c(r0, r1);
2156 else
2157 ldr_uc(r0, r1);
2158 lshi(r0, r0, 40);
2159 ldxi_ui(r2, r1, 1);
2160 lshi(r2, r2, 8);
2161 orr(r0, r0, r2);
2162 ldxi_uc(r2, r1, 5);
2163# endif
2164 or3 = fallback_jmpi(_jit->pc.w);
2165 fallback_flush();
2166 fallback_patch_bner(un3, _jit->pc.w);
2167# if __BYTE_ORDER == __LITTLE_ENDIAN
2168 ldr_uc(r0, r1);
2169 ldxi_us(r2, r1, 1);
2170 lshi(r2, r2, 8);
2171 orr(r0, r0, r2);
2172 ldxi_us(r2, r1, 3);
2173 lshi(r2, r2, 24);
2174 orr(r0, r0, r2);
2175 if (sign)
2176 ldxi_c(r2, r1, 5);
2177 else
2178 ldxi_uc(r2, r1, 5);
2179 lshi(r2, r2, 40);
2180# else
2181 if (sign)
2182 ldr_c(r0, r1);
2183 else
2184 ldr_uc(r0, r1);
2185 lshi(r0, r0, 40);
2186 ldxi_us(r2, r1, 1);
2187 lshi(r2, r2, 24);
2188 orr(r0, r0, r2);
2189 ldxi_us(r2, r1, 3);
2190 lshi(r2, r2, 8);
2191 orr(r0, r0, r2);
2192 ldxi_uc(r2, r1, 5);
2193# endif
2194 fallback_flush();
2195 fallback_patch_jmpi(or, _jit->pc.w);
2196 fallback_patch_jmpi(or2, _jit->pc.w);
2197 fallback_patch_jmpi(or3, _jit->pc.w);
2198 orr(r0, r0, r2);
2199 jit_unget_reg(t0);
2200}
2201
2202static void
2203_unldi6(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign)
2204{
2205 jit_int32_t t0, r2;
2206 t0 = fallback_jit_get_reg(jit_class_gpr);
2207 r2 = rn(t0);
2208 if ((i1 & -4) == i1) {
2209# if __BYTE_ORDER == __LITTLE_ENDIAN
2210 ldi_ui(r0, i1);
2211 if (sign)
2212 ldi_s(r2, i1 + 4);
2213 else
2214 ldi_us(r2, i1 + 4);
2215 lshi(r2, r2, 32);
2216# else
2217 if (sign)
2218 ldi_i(r0, i1);
2219 else
2220 ldi_ui(r0, i1);
2221 lshi(r0, r0, 16);
2222 ldi_us(r2, i1 + 4);
2223# endif
2224 }
2225 else if ((i1 & -2) == i1) {
2226# if __BYTE_ORDER == __LITTLE_ENDIAN
2227 ldi_us(r0, i1);
2228 if (sign)
2229 ldi_i(r2, i1 + 2);
2230 else
2231 ldi_ui(r2, i1 + 2);
2232 lshi(r2, r2, 16);
2233# else
2234 if (sign)
2235 ldi_s(r0, i1);
2236 else
2237 ldi_us(r0, i1);
2238 lshi(r0, r0, 32);
2239 ldi_ui(r2, i1 + 2);
2240# endif
2241 }
2242 else if ((i1 & 3) == 3) {
2243# if __BYTE_ORDER == __LITTLE_ENDIAN
2244 ldi_uc(r0, i1);
2245 ldi_ui(r2, i1 + 1);
2246 lshi(r2, r2, 8);
2247 orr(r0, r0, r2);
2248 if (sign)
2249 ldi_c(r2, i1 + 5);
2250 else
2251 ldi_uc(r2, i1 + 5);
2252 lshi(r2, r2, 40);
2253# else
2254 if (sign)
2255 ldi_c(r0, i1);
2256 else
2257 ldi_uc(r0, i1);
2258 lshi(r0, r0, 40);
2259 ldi_ui(r2, i1 + 1);
2260 lshi(r2, r2, 8);
2261 orr(r0, r0, r2);
2262 ldi_uc(r2, i1 + 5);
2263# endif
2264 }
2265 else {
2266# if __BYTE_ORDER == __LITTLE_ENDIAN
2267 ldi_uc(r0, i1);
2268 ldi_us(r2, i1 + 1);
2269 lshi(r2, r2, 8);
2270 orr(r0, r0, r2);
2271 ldi_us(r2, i1 + 3);
2272 lshi(r2, r2, 24);
2273 orr(r0, r0, r2);
2274 if (sign)
2275 ldi_c(r2, i1 + 5);
2276 else
2277 ldi_uc(r2, i1 + 5);
2278 lshi(r2, r2, 40);
2279# else
2280 if (sign)
2281 ldi_c(r0, i1);
2282 else
2283 ldi_uc(r0, i1);
2284 lshi(r0, r0, 40);
2285 ldi_us(r2, i1 + 1);
2286 lshi(r2, r2, 24);
2287 orr(r0, r0, r2);
2288 ldi_us(r2, i1 + 3);
2289 lshi(r2, r2, 8);
2290 orr(r0, r0, r2);
2291 ldi_uc(r2, i1 + 5);
2292# endif
2293 }
2294 orr(r0, r0, r2);
2295 jit_unget_reg(t0);
2296}
2297
2298static void
2299_unldr7(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign)
2300{
2301 jit_int32_t t0, r2;
2302 jit_word_t un4, un2, un3, or, or2, or3;
2303 t0 = fallback_jit_get_reg(jit_class_gpr);
2304 r2 = rn(t0);
2305 andi(r2, r1, -4);
2306 un4 = fallback_bner(_jit->pc.w, r1, r2);
2307# if __BYTE_ORDER == __LITTLE_ENDIAN
2308 ldr_ui(r0, r1);
2309 ldxi_us(r2, r1, 4);
2310 lshi(r2, r2, 32);
2311 orr(r0, r0, r2);
2312 if (sign)
2313 ldxi_c(r2, r1, 6);
2314 else
2315 ldxi_uc(r2, r1, 6);
2316 lshi(r2, r2, 48);
2317# else
2318 if (sign)
2319 ldr_i(r0, r1);
2320 else
2321 ldr_ui(r0, r1);
2322 lshi(r0, r0, 24);
2323 ldxi_us(r2, r1, 4);
2324 lshi(r2, r2, 8);
2325 orr(r0, r0, r2);
2326 ldxi_uc(r2, r1, 6);
2327# endif
2328 or = fallback_jmpi(_jit->pc.w);
2329 fallback_flush();
2330 fallback_patch_bner(un4, _jit->pc.w);
2331 andi(r2, r1, -2);
2332 un2 = fallback_bner(_jit->pc.w, r1, r2);
2333# if __BYTE_ORDER == __LITTLE_ENDIAN
2334 ldr_us(r0, r1);
2335 ldxi_ui(r2, r1, 2);
2336 lshi(r2, r2, 16);
2337 orr(r0, r0, r2);
2338 if (sign)
2339 ldxi_c(r2, r1, 6);
2340 else
2341 ldxi_uc(r2, r1, 6);
2342 lshi(r2, r2, 48);
2343# else
2344 if (sign)
2345 ldr_s(r0, r1);
2346 else
2347 ldr_us(r0, r1);
2348 lshi(r0, r0, 40);
2349 ldxi_ui(r2, r1, 2);
2350 lshi(r2, r2, 8);
2351 orr(r0, r0, r2);
2352 ldxi_uc(r2, r1, 6);
2353# endif
2354 or2 = fallback_jmpi(_jit->pc.w);
2355 fallback_flush();
2356 fallback_patch_bner(un2, _jit->pc.w);
2357 andi(r2, r1, 3);
2358 un3 = fallback_bnei(_jit->pc.w, r2, 3);
2359# if __BYTE_ORDER == __LITTLE_ENDIAN
2360 ldr_uc(r0, r1);
2361 ldxi_ui(r2, r1, 1);
2362 lshi(r2, r2, 8);
2363 orr(r0, r0, r2);
2364 if (sign)
2365 ldxi_s(r2, r1, 5);
2366 else
2367 ldxi_us(r2, r1, 5);
2368 lshi(r2, r2, 40);
2369# else
2370 if (sign)
2371 ldr_c(r0, r1);
2372 else
2373 ldr_uc(r0, r1);
2374 lshi(r0, r0, 48);
2375 ldxi_ui(r2, r1, 1);
2376 lshi(r2, r2, 16);
2377 orr(r0, r0, r2);
2378 ldxi_us(r2, r1, 5);
2379# endif
2380 or3 = fallback_jmpi(_jit->pc.w);
2381 fallback_flush();
2382 fallback_patch_bnei(un3, _jit->pc.w);
2383# if __BYTE_ORDER == __LITTLE_ENDIAN
2384 ldr_uc(r0, r1);
2385 ldxi_us(r2, r1, 1);
2386 lshi(r2, r2, 8);
2387 orr(r0, r0, r2);
2388 if (sign)
2389 ldxi_i(r2, r1, 3);
2390 else
2391 ldxi_ui(r2, r1, 3);
2392 lshi(r2, r2, 24);
2393# else
2394 if (sign)
2395 ldr_c(r0, r1);
2396 else
2397 ldr_uc(r0, r1);
2398 lshi(r0, r0, 48);
2399 ldxi_us(r2, r1, 1);
2400 lshi(r2, r2, 32);
2401 orr(r0, r0, r2);
2402 ldxi_ui(r2, r1, 3);
2403# endif
2404 fallback_flush();
2405 fallback_patch_jmpi(or, _jit->pc.w);
2406 fallback_patch_jmpi(or2, _jit->pc.w);
2407 fallback_patch_jmpi(or3, _jit->pc.w);
2408 orr(r0, r0, r2);
2409 jit_unget_reg(t0);
2410}
2411
2412static void
2413_unldi7(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign)
2414{
2415 jit_int32_t t0, r2;
2416 t0 = fallback_jit_get_reg(jit_class_gpr);
2417 r2 = rn(t0);
2418 if ((i1 & -4) == i1) {
2419# if __BYTE_ORDER == __LITTLE_ENDIAN
2420 ldi_ui(r0, i1);
2421 ldi_us(r2, i1 + 4);
2422 lshi(r2, r2, 32);
2423 orr(r0, r0, r2);
2424 if (sign)
2425 ldi_c(r2, i1 + 6);
2426 else
2427 ldi_uc(r2, i1 + 6);
2428 lshi(r2, r2, 48);
2429# else
2430 if (sign)
2431 ldi_i(r0, i1);
2432 else
2433 ldi_ui(r0, i1);
2434 lshi(r0, r0, 24);
2435 ldi_us(r2, i1 + 4);
2436 lshi(r2, r2, 8);
2437 orr(r0, r0, r2);
2438 ldi_uc(r2, i1 + 6);
2439# endif
2440 }
2441 else if ((i1 & -2) == i1) {
2442# if __BYTE_ORDER == __LITTLE_ENDIAN
2443 ldi_us(r0, i1);
2444 ldi_ui(r2, i1 + 2);
2445 lshi(r2, r2, 16);
2446 orr(r0, r0, r2);
2447 if (sign)
2448 ldi_c(r2, i1 + 6);
2449 else
2450 ldi_uc(r2, i1 + 6);
2451 lshi(r2, r2, 48);
2452# else
2453 if (sign)
2454 ldi_s(r0, i1);
2455 else
2456 ldi_us(r0, i1);
2457 lshi(r0, r0, 40);
2458 ldi_ui(r2, i1 + 2);
2459 lshi(r2, r2, 8);
2460 orr(r0, r0, r2);
2461 ldi_uc(r2, i1 + 6);
2462# endif
2463 }
2464 else if ((i1 & 3) == 3) {
2465# if __BYTE_ORDER == __LITTLE_ENDIAN
2466 ldi_uc(r0, i1);
2467 ldi_ui(r2, i1 + 1);
2468 lshi(r2, r2, 8);
2469 orr(r0, r0, r2);
2470 if (sign)
2471 ldi_s(r2, i1 + 5);
2472 else
2473 ldi_us(r2, i1 + 5);
2474 lshi(r2, r2, 40);
2475# else
2476 if (sign)
2477 ldi_c(r0, i1);
2478 else
2479 ldi_uc(r0, i1);
2480 lshi(r0, r0, 48);
2481 ldi_ui(r2, i1 + 1);
2482 lshi(r2, r2, 16);
2483 orr(r0, r0, r2);
2484 ldi_us(r2, i1 + 5);
2485# endif
2486 }
2487 else {
2488# if __BYTE_ORDER == __LITTLE_ENDIAN
2489 ldi_uc(r0, i1);
2490 ldi_us(r2, i1 + 1);
2491 lshi(r2, r2, 8);
2492 orr(r0, r0, r2);
2493 if (sign)
2494 ldi_i(r2, i1 + 3);
2495 else
2496 ldi_ui(r2, i1 + 3);
2497 lshi(r2, r2, 24);
2498# else
2499 if (sign)
2500 ldi_c(r0, i1);
2501 else
2502 ldi_uc(r0, i1);
2503 lshi(r0, r0, 48);
2504 ldi_us(r2, i1 + 1);
2505 lshi(r2, r2, 32);
2506 orr(r0, r0, r2);
2507 ldi_ui(r2, i1 + 3);
2508# endif
2509 }
2510 orr(r0, r0, r2);
2511 jit_unget_reg(t0);
2512}
2513
2514static void
2515_unldr8(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2516{
2517 jit_int32_t t0, r2;
2518 jit_word_t un8, un4, un2, un7, un6, un5, un3, al,
2519 or, or2, or7, or6, or5, or3;
2520 t0 = fallback_jit_get_reg(jit_class_gpr);
2521 r2 = rn(t0);
2522 andi(r2, r1, -8);
2523 un8 = fallback_bner(_jit->pc.w, r1, r2);
2524 ldr_l(r0, r1);
2525 al = fallback_jmpi(_jit->pc.w);
2526 fallback_flush();
2527 fallback_patch_bner(un8, _jit->pc.w);
2528 andi(r2, r1, -4);
2529 un4 = fallback_bner(_jit->pc.w, r1, r2);
2530# if __BYTE_ORDER == __LITTLE_ENDIAN
2531 ldr_ui(r0, r1);
2532 ldxi_i(r2, r1, 4);
2533 lshi(r2, r2, 32);
2534# else
2535 ldr_i(r0, r1);
2536 ldxi_ui(r2, r1, 4);
2537 lshi(r0, r0, 32);
2538# endif
2539 or = fallback_jmpi(_jit->pc.w);
2540 fallback_flush();
2541 fallback_patch_bner(un4, _jit->pc.w);
2542 andi(r2, r1, -2);
2543 un2 = fallback_bner(_jit->pc.w, r1, r2);
2544# if __BYTE_ORDER == __LITTLE_ENDIAN
2545 ldr_us(r0, r1);
2546 ldxi_ui(r2, r1, 2);
2547 lshi(r2, r2, 16);
2548 orr(r0, r0, r2);
2549 ldxi_s(r2, r1, 6);
2550 lshi(r2, r2, 48);
2551# else
2552 ldr_s(r0, r1);
2553 lshi(r0, r0, 48);
2554 ldxi_ui(r2, r1, 2);
2555 lshi(r2, r2, 16);
2556 orr(r0, r0, r2);
2557 ldxi_us(r2, r1, 6);
2558# endif
2559 or2 = fallback_jmpi(_jit->pc.w);
2560 fallback_flush();
2561 fallback_patch_bner(un2, _jit->pc.w);
2562 andi(r2, r1, 7);
2563 un7 = fallback_bnei(_jit->pc.w, r2, 7);
2564# if __BYTE_ORDER == __LITTLE_ENDIAN
2565 ldr_uc(r0, r1);
2566 ldxi_l(r2, r1, 1);
2567 lshi(r2, r2, 8);
2568# else
2569 ldr_c(r0, r1);
2570 ldxi_l(r2, r1, 1);
2571 rshi_u(r2, r2, 8);
2572 lshi(r0, r0, 56);
2573# endif
2574 or7 = fallback_jmpi(_jit->pc.w);
2575 fallback_flush();
2576 fallback_patch_bnei(un7, _jit->pc.w);
2577 un6 = fallback_bnei(_jit->pc.w, r2, 6);
2578# if __BYTE_ORDER == __LITTLE_ENDIAN
2579 ldr_us(r0, r1);
2580 ldxi_l(r2, r1, 2);
2581 lshi(r2, r2, 16);
2582# else
2583 ldr_s(r0, r1);
2584 lshi(r0, r0, 48);
2585 ldxi_l(r2, r1, 2);
2586 rshi_u(r2, r2, 16);
2587# endif
2588 or6 = fallback_jmpi(_jit->pc.w);
2589 fallback_flush();
2590 fallback_patch_bnei(un6, _jit->pc.w);
2591 un5 = fallback_bnei(_jit->pc.w, r2, 5);
2592# if __BYTE_ORDER == __LITTLE_ENDIAN
2593 ldxi_ui(r0, r1, -1);
2594 rshi_u(r0, r0, 8);
2595 ldxi_ui(r2, r1, 3);
2596 lshi(r2, r2, 24);
2597 orr(r0, r0, r2);
2598 ldxi_c(r2, r1, 7);
2599 lshi(r2, r2, 56);
2600# else
2601 ldxi_i(r0, r1, -1);
2602 lshi(r0, r0, 40);
2603 ldxi_ui(r2, r1, 3);
2604 lshi(r2, r2, 8);
2605 orr(r0, r0, r2);
2606 ldxi_uc(r2, r1, 7);
2607# endif
2608 or5 = fallback_jmpi(_jit->pc.w);
2609 fallback_flush();
2610 fallback_patch_bnei(un5, _jit->pc.w);
2611 un3 = fallback_bnei(_jit->pc.w, r2, 3);
2612# if __BYTE_ORDER == __LITTLE_ENDIAN
2613 ldr_uc(r0, r1);
2614 ldxi_ui(r2, r1, 1);
2615 lshi(r2, r2, 8);
2616 orr(r0, r0, r2);
2617 ldxi_i(r2, r1, 5);
2618 lshi(r2, r2, 40);
2619# else
2620 ldr_c(r0, r1);
2621 lshi(r0, r0, 56);
2622 ldxi_ui(r2, r1, 1);
2623 lshi(r2, r2, 24);
2624 orr(r0, r0, r2);
2625 ldxi_ui(r2, r1, 5);
2626 rshi_u(r2, r2, 8);
2627# endif
2628 or3 = fallback_jmpi(_jit->pc.w);
2629 fallback_flush();
2630 fallback_patch_bnei(un3, _jit->pc.w);
2631# if __BYTE_ORDER == __LITTLE_ENDIAN
2632 ldr_uc(r0, r1);
2633 ldxi_us(r2, r1, 1);
2634 lshi(r2, r2, 8);
2635 orr(r0, r0, r2);
2636 ldxi_ui(r2, r1, 3);
2637 lshi(r2, r2, 24);
2638 orr(r0, r0, r2);
2639 ldxi_c(r2, r1, 7);
2640 lshi(r2, r2, 56);
2641# else
2642 ldr_c(r0, r1);
2643 lshi(r0, r0, 56);
2644 ldxi_us(r2, r1, 1);
2645 lshi(r2, r2, 40);
2646 orr(r0, r0, r2);
2647 ldxi_ui(r2, r1, 3);
2648 lshi(r2, r2, 8);
2649 orr(r0, r0, r2);
2650 ldxi_uc(r2, r1, 7);
2651# endif
2652 fallback_flush();
2653 fallback_patch_jmpi(or, _jit->pc.w);
2654 fallback_patch_jmpi(or2, _jit->pc.w);
2655 fallback_patch_jmpi(or7, _jit->pc.w);
2656 fallback_patch_jmpi(or6, _jit->pc.w);
2657 fallback_patch_jmpi(or5, _jit->pc.w);
2658 fallback_patch_jmpi(or3, _jit->pc.w);
2659 orr(r0, r0, r2);
2660 fallback_flush();
2661 fallback_patch_jmpi(al, _jit->pc.w);
2662 jit_unget_reg(t0);
2663}
2664
2665static void
2666_unldi8(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
2667{
2668 jit_int32_t t0, r2;
2669 if ((i1 & -8) == i1)
2670 ldi_l(r0, i1);
2671 else {
2672 t0 = fallback_jit_get_reg(jit_class_gpr);
2673 r2 = rn(t0);
2674 if ((i1 & -4) == i1) {
2675# if __BYTE_ORDER == __LITTLE_ENDIAN
2676 ldi_ui(r0, i1);
2677 ldi_i(r2, i1 + 4);
2678 lshi(r2, r2, 32);
2679# else
2680 ldi_i(r0, i1);
2681 ldi_ui(r2, i1 + 4);
2682 lshi(r0, r0, 32);
2683# endif
2684 }
2685 else if ((i1 & -2) == i1) {
2686# if __BYTE_ORDER == __LITTLE_ENDIAN
2687 ldi_us(r0, i1);
2688 ldi_ui(r2, i1 + 2);
2689 lshi(r2, r2, 16);
2690 orr(r0, r0, r2);
2691 ldi_s(r2, i1 + 6);
2692 lshi(r2, r2, 48);
2693# else
2694 ldi_s(r0, i1);
2695 lshi(r0, r0, 48);
2696 ldi_ui(r2, i1 + 2);
2697 lshi(r2, r2, 16);
2698 orr(r0, r0, r2);
2699 ldi_us(r2, i1 + 6);
2700# endif
2701 }
2702 else if ((i1 & 7) == 7) {
2703# if __BYTE_ORDER == __LITTLE_ENDIAN
2704 ldi_uc(r0, i1);
2705 ldi_l(r2, i1 + 1);
2706 lshi(r2, r2, 8);
2707# else
2708 ldi_c(r0, i1);
2709 ldi_l(r2, i1 + 1);
2710 rshi_u(r2, r2, 8);
2711 lshi(r0, r0, 56);
2712# endif
2713 }
2714 else if ((i1 & 7) == 6) {
2715# if __BYTE_ORDER == __LITTLE_ENDIAN
2716 ldi_us(r0, i1);
2717 ldi_l(r2, i1 + 2);
2718 lshi(r2, r2, 16);
2719# else
2720 ldi_s(r0, i1);
2721 lshi(r0, r0, 48);
2722 ldi_l(r2, i1 + 2);
2723 rshi_u(r2, r2, 16);
2724# endif
2725 }
2726 else if ((i1 & 7) == 5) {
2727# if __BYTE_ORDER == __LITTLE_ENDIAN
2728 ldi_ui(r0, i1 - 1);
2729 rshi_u(r0, r0, 8);
2730 ldi_ui(r2, i1 + 3);
2731 lshi(r2, r2, 24);
2732 orr(r0, r0, r2);
2733 ldi_c(r2, i1 + 7);
2734 lshi(r2, r2, 56);
2735# else
2736 ldi_i(r0, i1 - 1);
2737 lshi(r0, r0, 40);
2738 ldi_ui(r2, i1 + 3);
2739 lshi(r2, r2, 8);
2740 orr(r0, r0, r2);
2741 ldi_uc(r2, i1 + 7);
2742# endif
2743 }
2744 else if ((i1 & 7) == 3) {
2745# if __BYTE_ORDER == __LITTLE_ENDIAN
2746 ldi_uc(r0, i1);
2747 ldi_ui(r2, i1 + 1);
2748 lshi(r2, r2, 8);
2749 orr(r0, r0, r2);
2750 ldi_i(r2, i1 + 5);
2751 lshi(r2, r2, 40);
2752# else
2753 ldi_c(r0, i1);
2754 lshi(r0, r0, 56);
2755 ldi_ui(r2, i1 + 1);
2756 lshi(r2, r2, 24);
2757 orr(r0, r0, r2);
2758 ldi_ui(r2, i1 + 5);
2759 rshi_u(r2, r2, 8);
2760# endif
2761 }
2762 else {
2763# if __BYTE_ORDER == __LITTLE_ENDIAN
2764 ldi_uc(r0, i1);
2765 ldi_us(r2, i1 + 1);
2766 lshi(r2, r2, 8);
2767 orr(r0, r0, r2);
2768 ldi_ui(r2, i1 + 3);
2769 lshi(r2, r2, 24);
2770 orr(r0, r0, r2);
2771 ldi_c(r2, i1 + 7);
2772 lshi(r2, r2, 56);
2773# else
2774 ldi_c(r0, i1);
2775 lshi(r0, r0, 56);
2776 ldi_us(r2, i1 + 1);
2777 lshi(r2, r2, 40);
2778 orr(r0, r0, r2);
2779 ldi_ui(r2, i1 + 3);
2780 lshi(r2, r2, 8);
2781 orr(r0, r0, r2);
2782 ldi_uc(r2, i1 + 7);
2783# endif
2784 }
2785 orr(r0, r0, r2);
2786 jit_unget_reg(t0);
2787 }
2788}
2789# endif
2790
2791static void
2792_unstr2(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2793{
2794 jit_int32_t t0, r2;
2795 jit_word_t un2, al;
2796 t0 = fallback_jit_get_reg(jit_class_gpr);
2797 r2 = rn(t0);
2798 andi(r2, r1, -2);
2799 un2 = fallback_bner(_jit->pc.w, r2, r1);
2800 str_s(r1, r0);
2801 al = fallback_jmpi(_jit->pc.w);
2802 fallback_flush();
2803 fallback_patch_bner(un2, _jit->pc.w);
2804#if __BYTE_ORDER == __LITTLE_ENDIAN
2805 str_c(r1, r0);
2806 rshi_u(r2, r0, 8);
2807 stxi_c(1, r1, r2);
2808#else
2809 stxi_c(1, r1, r0);
2810 rshi_u(r2, r0, 8);
2811 str_c(r1, r2);
2812#endif
2813 fallback_flush();
2814 fallback_patch_jmpi(al, _jit->pc.w);
2815 jit_unget_reg(t0);
2816}
2817
2818static void
2819_unsti2(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
2820{
2821 jit_int32_t t0, r2;
2822 if ((i1 & -2) == i1)
2823 sti_s(i1, r0);
2824 else {
2825 t0 = fallback_jit_get_reg(jit_class_gpr);
2826 r2 = rn(t0);
2827#if __BYTE_ORDER == __LITTLE_ENDIAN
2828 sti_c(i1, r0);
2829 rshi_u(r2, r0, 8);
2830 sti_c(1 + i1, r2);
2831#else
2832 sti_c(1 + i1, r0);
2833 rshi_u(r2, r0, 8);
2834 sti_c(i1, r2);
2835#endif
2836 jit_unget_reg(t0);
2837 }
2838}
2839
2840static void
2841_unstr3(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2842{
2843 jit_int32_t t0, r2;
2844 jit_word_t un2, al;
2845 t0 = fallback_jit_get_reg(jit_class_gpr);
2846 r2 = rn(t0);
2847 andi(r2, r1, -2);
2848 un2 = fallback_bner(_jit->pc.w, r2, r1);
2849# if __BYTE_ORDER == __LITTLE_ENDIAN
2850 str_s(r1, r0);
2851 rshi(r2, r0, 16);
2852 stxi_c(2, r1, r2);
2853# else
2854 stxi_c(2, r1, r0);
2855 rshi(r2, r0, 8);
2856 str_s(r1, r2);
2857# endif
2858 al = fallback_jmpi(_jit->pc.w);
2859 fallback_flush();
2860 fallback_patch_bner(un2, _jit->pc.w);
2861# if __BYTE_ORDER == __LITTLE_ENDIAN
2862 str_c(r1, r0);
2863 rshi(r2, r0, 8);
2864 stxi_s(1, r1, r2);
2865# else
2866 stxi_s(1, r1, r0);
2867 rshi(r2, r0, 16);
2868 str_c(r1, r2);
2869# endif
2870 fallback_flush();
2871 fallback_patch_jmpi(al, _jit->pc.w);
2872 jit_unget_reg(t0);
2873}
2874
2875static void
2876_unsti3(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
2877{
2878 jit_int32_t t0, r2;
2879 t0 = fallback_jit_get_reg(jit_class_gpr);
2880 r2 = rn(t0);
2881 if ((i1 & -2) == i1) {
2882# if __BYTE_ORDER == __LITTLE_ENDIAN
2883 sti_s(i1, r0);
2884 rshi(r2, r0, 16);
2885 sti_c(2 + i1, r2);
2886# else
2887 sti_c(2 + i1, r0);
2888 rshi(r2, r0, 8);
2889 sti_s(i1, r2);
2890# endif
2891 }
2892 else {
2893# if __BYTE_ORDER == __LITTLE_ENDIAN
2894 sti_c(i1, r0);
2895 rshi(r2, r0, 8);
2896 sti_s(1 + i1, r2);
2897# else
2898 sti_s(1 + i1, r0);
2899 rshi(r2, r0, 16);
2900 sti_c(i1, r2);
2901# endif
2902 }
2903 jit_unget_reg(t0);
2904}
2905
2906static void
2907_unstr4(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2908{
2909 jit_int32_t t0, r2;
2910 jit_word_t un4, un2, al, al2;
2911 t0 = fallback_jit_get_reg(jit_class_gpr);
2912 r2 = rn(t0);
2913 andi(r2, r1, -4);
2914 un4 = fallback_bner(_jit->pc.w, r2, r1);
2915 str_i(r1, r0);
2916 al = fallback_jmpi(_jit->pc.w);
2917 fallback_flush();
2918 fallback_patch_bner(un4, _jit->pc.w);
2919 andi(r2, r1, -2);
2920 un2 = fallback_bner(_jit->pc.w, r2, r1);
2921# if __BYTE_ORDER == __LITTLE_ENDIAN
2922 str_s(r1, r0);
2923 rshi(r2, r0, 16);
2924 stxi_s(2, r1, r2);
2925# else
2926 stxi_s(2, r1, r0);
2927 rshi(r2, r0, 16);
2928 str_s(r1, r2);
2929# endif
2930 al2 = fallback_jmpi(_jit->pc.w);
2931 fallback_flush();
2932 fallback_patch_bner(un2, _jit->pc.w);
2933# if __BYTE_ORDER == __LITTLE_ENDIAN
2934 str_c(r1, r0);
2935 rshi(r2, r0, 8);
2936 stxi_s(1, r1, r2);
2937 rshi(r2, r2, 16);
2938 stxi_c(3, r1, r2);
2939# else
2940 stxi_c(3, r1, r0);
2941 rshi(r2, r0, 8);
2942 stxi_s(1, r1, r2);
2943 rshi(r2, r2, 16);
2944 str_c(r1, r2);
2945# endif
2946 fallback_flush();
2947 fallback_patch_jmpi(al, _jit->pc.w);
2948 fallback_patch_jmpi(al2, _jit->pc.w);
2949 jit_unget_reg(t0);
2950}
2951
2952static void
2953_unsti4(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
2954{
2955 jit_int32_t t0, r2;
2956 if ((i1 & -4) == i1)
2957 sti_i(i1, r0);
2958 else {
2959 t0 = fallback_jit_get_reg(jit_class_gpr);
2960 r2 = rn(t0);
2961 if ((i1 & -2) == i1) {
2962# if __BYTE_ORDER == __LITTLE_ENDIAN
2963 sti_s(i1, r0);
2964 rshi(r2, r0, 16);
2965 sti_s(2 + i1, r2);
2966# else
2967 sti_s(2 + i1, r0);
2968 rshi(r2, r0, 16);
2969 sti_s(i1, r2);
2970# endif
2971 }
2972 else {
2973# if __BYTE_ORDER == __LITTLE_ENDIAN
2974 sti_c(i1, r0);
2975 rshi(r2, r0, 8);
2976 sti_s(1 + i1, r2);
2977 rshi(r2, r2, 16);
2978 sti_c(3 + i1, r2);
2979# else
2980 sti_c(3 + i1, r0);
2981 rshi(r2, r0, 8);
2982 sti_s(1 + i1, r2);
2983 rshi(r2, r2, 16);
2984 sti_c(i1, r2);
2985# endif
2986 }
2987 jit_unget_reg(t0);
2988 }
2989}
2990
2991# if __WORDSIZE == 64
2992static void
2993_unstr5(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2994{
2995 jit_int32_t t0, r2;
2996 jit_word_t un3, un2, un1, al, al2, al1;
2997 t0 = fallback_jit_get_reg(jit_class_gpr);
2998 r2 = rn(t0);
2999 andi(r2, r1, 3);
3000 un3 = fallback_bnei(_jit->pc.w, r2, 3);
3001# if __BYTE_ORDER == __LITTLE_ENDIAN
3002 str_c(r1, r0);
3003 rshi(r2, r0, 8);
3004 stxi_i(1, r1, r2);
3005# else
3006 stxi_i(1, r1, r0);
3007 rshi(r2, r0, 32);
3008 str_c(r1, r2);
3009# endif
3010 al = fallback_jmpi(_jit->pc.w);
3011 fallback_flush();
3012 fallback_patch_bnei(un3, _jit->pc.w);
3013 un2 = fallback_bnei(_jit->pc.w, r2, 2);
3014# if __BYTE_ORDER == __LITTLE_ENDIAN
3015 str_s(r1, r0);
3016 rshi(r2, r0, 16);
3017 stxi_s(2, r1, r2);
3018 rshi(r2, r2, 16);
3019 stxi_c(4, r1, r2);
3020# else
3021 stxi_c(4, r1, r0);
3022 rshi(r2, r0, 8);
3023 stxi_s(2, r1, r2);
3024 rshi(r2, r2, 16);
3025 str_s(r1, r2);
3026# endif
3027 al2 = fallback_jmpi(_jit->pc.w);
3028 fallback_flush();
3029 fallback_patch_bnei(un2, _jit->pc.w);
3030 un1 = fallback_bnei(_jit->pc.w, r2, 1);
3031# if __BYTE_ORDER == __LITTLE_ENDIAN
3032 str_c(r1, r0);
3033 rshi(r2, r0, 8);
3034 stxi_s(1, r1, r2);
3035 rshi(r2, r2, 16);
3036 stxi_s(3, r1, r2);
3037# else
3038 stxi_s(3, r1, r0);
3039 rshi(r2, r0, 16);
3040 stxi_s(1, r1, r2);
3041 rshi(r2, r2, 16);
3042 str_c(r1, r2);
3043# endif
3044 al1 = fallback_jmpi(_jit->pc.w);
3045 fallback_flush();
3046 fallback_patch_bnei(un1, _jit->pc.w);
3047# if __BYTE_ORDER == __LITTLE_ENDIAN
3048 str_i(r1, r0);
3049 rshi(r2, r0, 32);
3050 stxi_c(4, r1, r2);
3051# else
3052 stxi_c(4, r1, r0);
3053 rshi(r2, r0, 8);
3054 str_i(r1, r2);
3055# endif
3056 fallback_flush();
3057 fallback_patch_jmpi(al, _jit->pc.w);
3058 fallback_patch_jmpi(al2, _jit->pc.w);
3059 fallback_patch_jmpi(al1, _jit->pc.w);
3060 jit_unget_reg(t0);
3061}
3062
3063static void
3064_unsti5(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
3065{
3066 jit_int32_t t0, r2;
3067 t0 = fallback_jit_get_reg(jit_class_gpr);
3068 r2 = rn(t0);
3069 if ((i1 & 3) == 3) {
3070# if __BYTE_ORDER == __LITTLE_ENDIAN
3071 sti_c(i1, r0);
3072 rshi(r2, r0, 8);
3073 sti_i(1 + i1, r2);
3074# else
3075 sti_i(1 + i1, r0);
3076 rshi(r2, r0, 32);
3077 sti_c(i1, r2);
3078# endif
3079 }
3080 else if ((i1 & 3) == 2) {
3081# if __BYTE_ORDER == __LITTLE_ENDIAN
3082 sti_s(i1, r0);
3083 rshi(r2, r0, 16);
3084 sti_s(2 + i1, r2);
3085 rshi(r2, r2, 16);
3086 sti_c(4 + i1, r2);
3087# else
3088 sti_c(4 + i1, r0);
3089 rshi(r2, r0, 8);
3090 sti_s(2 + i1, r2);
3091 rshi(r2, r2, 16);
3092 sti_s(i1, r2);
3093# endif
3094 }
3095 else if ((i1 & 3) == 1) {
3096# if __BYTE_ORDER == __LITTLE_ENDIAN
3097 sti_c(i1, r0);
3098 rshi(r2, r0, 8);
3099 sti_s(1 + i1, r2);
3100 rshi(r2, r2, 16);
3101 sti_s(3 + i1, r2);
3102# else
3103 sti_s(3 + i1, r0);
3104 rshi(r2, r0, 16);
3105 sti_s(1 + i1, r2);
3106 rshi(r2, r2, 16);
3107 sti_c(i1, r2);
3108# endif
3109 }
3110 else {
3111# if __BYTE_ORDER == __LITTLE_ENDIAN
3112 sti_i(i1, r0);
3113 rshi(r2, r0, 32);
3114 sti_c(4 + i1, r2);
3115# else
3116 sti_c(4 + i1, r0);
3117 rshi(r2, r0, 8);
3118 sti_i(i1, r2);
3119# endif
3120 }
3121 jit_unget_reg(t0);
3122}
3123
3124static void
3125_unstr6(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3126{
3127 jit_int32_t t0, r2;
3128 jit_word_t un3, un2, un1, al, al2, al1;
3129 t0 = fallback_jit_get_reg(jit_class_gpr);
3130 r2 = rn(t0);
3131 andi(r2, r1, 3);
3132 un3 = fallback_bnei(_jit->pc.w, r2, 3);
3133# if __BYTE_ORDER == __LITTLE_ENDIAN
3134 str_c(r1, r0);
3135 rshi(r2, r0, 8);
3136 stxi_i(1, r1, r2);
3137 rshi(r2, r2, 32);
3138 stxi_c(5, r1, r2);
3139# else
3140 stxi_c(5, r1, r0);
3141 rshi(r2, r0, 8);
3142 stxi_i(1, r1, r2);
3143 rshi(r2, r2, 32);
3144 str_c(r1, r2);
3145# endif
3146 al = fallback_jmpi(_jit->pc.w);
3147 fallback_flush();
3148 fallback_patch_bnei(un3, _jit->pc.w);
3149 un2 = fallback_bnei(_jit->pc.w, r2, 2);
3150# if __BYTE_ORDER == __LITTLE_ENDIAN
3151 str_s(r1, r0);
3152 rshi(r2, r0, 16);
3153 stxi_s(2, r1, r2);
3154 rshi(r2, r2, 16);
3155 stxi_s(4, r1, r2);
3156# else
3157 stxi_s(4, r1, r0);
3158 rshi(r2, r0, 16);
3159 stxi_s(2, r1, r2);
3160 rshi(r2, r2, 16);
3161 str_s(r1, r2);
3162# endif
3163 al2 = fallback_jmpi(_jit->pc.w);
3164 fallback_flush();
3165 fallback_patch_bnei(un2, _jit->pc.w);
3166 un1 = fallback_bnei(_jit->pc.w, r2, 1);
3167# if __BYTE_ORDER == __LITTLE_ENDIAN
3168 str_c(r1, r0);
3169 rshi(r2, r0, 8);
3170 stxi_s(1, r1, r2);
3171 rshi(r2, r2, 16);
3172 stxi_s(3, r1, r2);
3173 rshi(r2, r2, 16);
3174 stxi_c(5, r1, r2);
3175# else
3176 stxi_c(5, r1, r0);
3177 rshi(r2, r0, 8);
3178 stxi_s(3, r1, r2);
3179 rshi(r2, r2, 16);
3180 stxi_s(1, r1, r2);
3181 rshi(r2, r2, 16);
3182 str_c(r1, r2);
3183# endif
3184 al1 = fallback_jmpi(_jit->pc.w);
3185 fallback_flush();
3186 fallback_patch_bnei(un1, _jit->pc.w);
3187# if __BYTE_ORDER == __LITTLE_ENDIAN
3188 str_i(r1, r0);
3189 rshi(r2, r0, 32);
3190 stxi_s(4, r1, r2);
3191# else
3192 stxi_s(4, r1, r0);
3193 rshi(r2, r0, 16);
3194 str_i(r1, r2);
3195# endif
3196 fallback_flush();
3197 fallback_patch_jmpi(al, _jit->pc.w);
3198 fallback_patch_jmpi(al2, _jit->pc.w);
3199 fallback_patch_jmpi(al1, _jit->pc.w);
3200 jit_unget_reg(t0);
3201}
3202
3203static void
3204_unsti6(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
3205{
3206 jit_int32_t t0, r2;
3207 t0 = fallback_jit_get_reg(jit_class_gpr);
3208 r2 = rn(t0);
3209 if ((i1 & 3) == 3) {
3210# if __BYTE_ORDER == __LITTLE_ENDIAN
3211 sti_c(i1, r0);
3212 rshi(r2, r0, 8);
3213 sti_i(1 + i1, r2);
3214 rshi(r2, r2, 32);
3215 sti_c(5 + i1, r2);
3216# else
3217 sti_c(5 + i1, r0);
3218 rshi(r2, r0, 8);
3219 sti_i(1 + i1, r2);
3220 rshi(r2, r2, 32);
3221 sti_c(i1, r2);
3222# endif
3223 }
3224 else if ((i1 & 3) == 2) {
3225# if __BYTE_ORDER == __LITTLE_ENDIAN
3226 sti_s(i1, r0);
3227 rshi(r2, r0, 16);
3228 sti_s(2 + i1, r2);
3229 rshi(r2, r2, 16);
3230 sti_s(4 + i1, r2);
3231# else
3232 sti_s(4 + i1, r0);
3233 rshi(r2, r0, 16);
3234 sti_s(2 + i1, r2);
3235 rshi(r2, r2, 16);
3236 sti_s(i1, r2);
3237# endif
3238 }
3239 else if ((i1 & 3) == 1) {
3240# if __BYTE_ORDER == __LITTLE_ENDIAN
3241 sti_c(i1, r0);
3242 rshi(r2, r0, 8);
3243 sti_s(1 + i1, r2);
3244 rshi(r2, r2, 16);
3245 sti_s(3 + i1, r2);
3246 rshi(r2, r2, 16);
3247 sti_c(5 + i1, r2);
3248# else
3249 sti_c(5 + i1, r0);
3250 rshi(r2, r0, 8);
3251 sti_s(3 + i1, r2);
3252 rshi(r2, r2, 16);
3253 sti_s(1 + i1, r2);
3254 rshi(r2, r2, 16);
3255 sti_c(i1, r2);
3256# endif
3257 }
3258 else {
3259# if __BYTE_ORDER == __LITTLE_ENDIAN
3260 sti_i(i1, r0);
3261 rshi(r2, r0, 32);
3262 sti_s(4 + i1, r2);
3263# else
3264 sti_s(4 + i1, r0);
3265 rshi(r2, r0, 16);
3266 sti_i(i1, r2);
3267# endif
3268 }
3269 jit_unget_reg(t0);
3270}
3271
3272static void
3273_unstr7(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3274{
3275 jit_int32_t t0, r2;
3276 jit_word_t un3, un2, un1, al, al2, al1;
3277 t0 = fallback_jit_get_reg(jit_class_gpr);
3278 r2 = rn(t0);
3279 andi(r2, r1, 3);
3280 un3 = fallback_bnei(_jit->pc.w, r2, 3);
3281# if __BYTE_ORDER == __LITTLE_ENDIAN
3282 str_c(r1, r0);
3283 rshi(r2, r0, 8);
3284 stxi_i(1, r1, r2);
3285 rshi(r2, r2, 32);
3286 stxi_s(5, r1, r2);
3287# else
3288 stxi_s(5, r1, r0);
3289 rshi(r2, r0, 16);
3290 stxi_i(1, r1, r2);
3291 rshi(r2, r2, 32);
3292 str_c(r1, r2);
3293# endif
3294 al = fallback_jmpi(_jit->pc.w);
3295 fallback_flush();
3296 fallback_patch_bnei(un3, _jit->pc.w);
3297 un2 = fallback_bnei(_jit->pc.w, r2, 2);
3298# if __BYTE_ORDER == __LITTLE_ENDIAN
3299 str_s(r1, r0);
3300 rshi(r2, r0, 16);
3301 stxi_i(2, r1, r2);
3302 rshi(r2, r2, 32);
3303 stxi_c(6, r1, r2);
3304# else
3305 stxi_c(6, r1, r0);
3306 rshi(r2, r0, 8);
3307 stxi_i(2, r1, r2);
3308 rshi(r2, r2, 32);
3309 str_s(r1, r2);
3310# endif
3311 al2 = fallback_jmpi(_jit->pc.w);
3312 fallback_flush();
3313 fallback_patch_bnei(un2, _jit->pc.w);
3314 un1 = fallback_bnei(_jit->pc.w, r2, 1);
3315# if __BYTE_ORDER == __LITTLE_ENDIAN
3316 str_c(r1, r0);
3317 rshi(r2, r0, 8);
3318 stxi_s(1, r1, r2);
3319 rshi(r2, r2, 16);
3320 stxi_i(3, r1, r2);
3321# else
3322 stxi_i(3, r1, r0);
3323 rshi(r2, r0, 32);
3324 stxi_s(1, r1, r2);
3325 rshi(r2, r2, 16);
3326 str_c(r1, r2);
3327# endif
3328 al1 = fallback_jmpi(_jit->pc.w);
3329 fallback_flush();
3330 fallback_patch_bnei(un1, _jit->pc.w);
3331# if __BYTE_ORDER == __LITTLE_ENDIAN
3332 str_i(r1, r0);
3333 rshi(r2, r0, 32);
3334 stxi_s(4, r1, r2);
3335 rshi(r2, r2, 16);
3336 stxi_c(6, r1, r2);
3337# else
3338 stxi_c(6, r1, r0);
3339 rshi(r2, r0, 8);
3340 stxi_s(4, r1, r2);
3341 rshi(r2, r2, 16);
3342 str_i(r1, r2);
3343# endif
3344 fallback_flush();
3345 fallback_patch_jmpi(al, _jit->pc.w);
3346 fallback_patch_jmpi(al2, _jit->pc.w);
3347 fallback_patch_jmpi(al1, _jit->pc.w);
3348 jit_unget_reg(t0);
3349}
3350
3351static void
3352_unsti7(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
3353{
3354 jit_int32_t t0, r2;
3355 t0 = fallback_jit_get_reg(jit_class_gpr);
3356 r2 = rn(t0);
3357 if ((i1 & 3) == 3) {
3358# if __BYTE_ORDER == __LITTLE_ENDIAN
3359 sti_c(i1, r0);
3360 rshi(r2, r0, 8);
3361 sti_i(1 + i1, r2);
3362 rshi(r2, r2, 32);
3363 sti_s(5 + i1, r2);
3364# else
3365 sti_s(5 + i1, r0);
3366 rshi(r2, r0, 16);
3367 sti_i(1 + i1, r2);
3368 rshi(r2, r2, 32);
3369 sti_c(i1, r2);
3370# endif
3371 }
3372 else if ((i1 & 3) == 2) {
3373# if __BYTE_ORDER == __LITTLE_ENDIAN
3374 sti_s(i1, r0);
3375 rshi(r2, r0, 16);
3376 sti_i(2 + i1, r2);
3377 rshi(r2, r2, 32);
3378 sti_c(6 + i1, r2);
3379# else
3380 sti_c(6 + i1, r0);
3381 rshi(r2, r0, 8);
3382 sti_i(2 + i1, r2);
3383 rshi(r2, r2, 32);
3384 sti_s(i1, r2);
3385# endif
3386 }
3387 else if ((i1 & 3) == 1) {
3388# if __BYTE_ORDER == __LITTLE_ENDIAN
3389 sti_c(i1, r0);
3390 rshi(r2, r0, 8);
3391 sti_s(1 + i1, r2);
3392 rshi(r2, r2, 16);
3393 sti_i(3 + i1, r2);
3394# else
3395 sti_i(3 + i1, r0);
3396 rshi(r2, r0, 32);
3397 sti_s(1 + i1, r2);
3398 rshi(r2, r2, 16);
3399 sti_c(i1, r2);
3400# endif
3401 }
3402 else {
3403# if __BYTE_ORDER == __LITTLE_ENDIAN
3404 sti_i(i1, r0);
3405 rshi(r2, r0, 32);
3406 sti_s(4 + i1, r2);
3407 rshi(r2, r2, 16);
3408 sti_c(6 + i1, r2);
3409# else
3410 sti_c(6 + i1, r0);
3411 rshi(r2, r0, 8);
3412 sti_s(4 + i1, r2);
3413 rshi(r2, r2, 16);
3414 sti_i(i1, r2);
3415# endif
3416 }
3417 jit_unget_reg(t0);
3418}
3419
3420static void
3421_unstr8(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3422{
3423 jit_int32_t t0, r2;
3424 jit_word_t un8, un4, un2, un3, al, al4, al2, al3;
3425 t0 = fallback_jit_get_reg(jit_class_gpr);
3426 r2 = rn(t0);
3427 andi(r2, r1, -8);
3428 un8 = fallback_bner(_jit->pc.w, r2, r1);
3429 str_l(r1, r0);
3430 al = fallback_jmpi(_jit->pc.w);
3431 fallback_flush();
3432 fallback_patch_bner(un8, _jit->pc.w);
3433 andi(r2, r1, -4);
3434 un4 = fallback_bner(_jit->pc.w, r2, r1);
3435# if __BYTE_ORDER == __LITTLE_ENDIAN
3436 str_i(r1, r0);
3437 rshi(r2, r0, 32);
3438 stxi_i(4, r1, r2);
3439# else
3440 stxi_i(4, r1, r0);
3441 rshi(r2, r0, 32);
3442 str_i(r1, r2);
3443# endif
3444 al4 = fallback_jmpi(_jit->pc.w);
3445 fallback_flush();
3446 fallback_patch_bner(un4, _jit->pc.w);
3447 andi(r2, r1, -2);
3448 un2 = fallback_bner(_jit->pc.w, r2, r1);
3449# if __BYTE_ORDER == __LITTLE_ENDIAN
3450 str_s(r1, r0);
3451 rshi(r2, r0, 16);
3452 stxi_i(2, r1, r2);
3453 rshi(r2, r2, 32);
3454 stxi_s(6, r1, r2);
3455# else
3456 stxi_s(6, r1, r0);
3457 rshi(r2, r0, 16);
3458 stxi_i(2, r1, r2);
3459 rshi(r2, r2, 32);
3460 str_s(r1, r2);
3461# endif
3462 al2 = fallback_jmpi(_jit->pc.w);
3463 fallback_flush();
3464 fallback_patch_bner(un2, _jit->pc.w);
3465 andi(r2, r1, 3);
3466 un3 = fallback_bnei(_jit->pc.w, r2, 3);
3467# if __BYTE_ORDER == __LITTLE_ENDIAN
3468 str_c(r1, r0);
3469 rshi(r2, r0, 8);
3470 stxi_i(1, r1, r2);
3471 rshi(r2, r2, 32);
3472 stxi_s(5, r1, r2);
3473 rshi(r2, r2, 16);
3474 stxi_c(7, r1, r2);
3475# else
3476 stxi_c(7, r1, r0);
3477 rshi(r2, r0, 8);
3478 stxi_s(5, r1, r2);
3479 rshi(r2, r2, 16);
3480 stxi_i(1, r1, r2);
3481 rshi(r2, r2, 32);
3482 str_c(r1, r2);
3483# endif
3484 al3 = fallback_jmpi(_jit->pc.w);
3485 fallback_flush();
3486 fallback_patch_bnei(un3, _jit->pc.w);
3487# if __BYTE_ORDER == __LITTLE_ENDIAN
3488 str_c(r1, r0);
3489 rshi(r2, r0, 8);
3490 stxi_s(1, r1, r2);
3491 rshi(r2, r2, 16);
3492 stxi_i(3, r1, r2);
3493 rshi(r2, r2, 32);
3494 stxi_c(7, r1, r2);
3495# else
3496 stxi_c(7, r1, r0);
3497 rshi(r2, r0, 8);
3498 stxi_i(3, r1, r2);
3499 rshi(r2, r2, 32);
3500 stxi_s(1, r1, r2);
3501 rshi(r2, r2, 16);
3502 str_c(r1, r2);
3503# endif
3504 fallback_flush();
3505 fallback_patch_jmpi(al, _jit->pc.w);
3506 fallback_patch_jmpi(al4, _jit->pc.w);
3507 fallback_patch_jmpi(al2, _jit->pc.w);
3508 fallback_patch_jmpi(al3, _jit->pc.w);
3509 jit_unget_reg(t0);
3510}
3511
3512static void
3513_unsti8(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1)
3514{
3515 jit_int32_t t0, r2;
3516 if ((i1 & -8) == i1)
3517 sti_l(i1, r0);
3518 else {
3519 t0 = fallback_jit_get_reg(jit_class_gpr);
3520 r2 = rn(t0);
3521 if ((i1 & -4) == i1) {
3522# if __BYTE_ORDER == __LITTLE_ENDIAN
3523 sti_i(i1, r0);
3524 rshi(r2, r0, 32);
3525 sti_i(4 + i1, r2);
3526# else
3527 sti_i(4 + i1, r0);
3528 rshi(r2, r0, 32);
3529 sti_i(i1, r2);
3530# endif
3531 }
3532 else if ((i1 & -2) == i1) {
3533# if __BYTE_ORDER == __LITTLE_ENDIAN
3534 sti_s(i1, r0);
3535 rshi(r2, r0, 16);
3536 sti_i(2 + i1, r2);
3537 rshi(r2, r2, 32);
3538 sti_s(6 + i1, r2);
3539# else
3540 sti_s(6 + i1, r0);
3541 rshi(r2, r0, 16);
3542 sti_i(2 + i1, r2);
3543 rshi(r2, r2, 32);
3544 sti_s(i1, r2);
3545# endif
3546 }
3547 else if ((i1 & 3) == 3) {
3548# if __BYTE_ORDER == __LITTLE_ENDIAN
3549 sti_c(i1, r0);
3550 rshi(r2, r0, 8);
3551 sti_i(1 + i1, r2);
3552 rshi(r2, r2, 32);
3553 sti_s(5 + i1, r2);
3554 rshi(r2, r2, 16);
3555 sti_c(7 + i1, r2);
3556# else
3557 sti_c(7 + i1, r0);
3558 rshi(r2, r0, 8);
3559 sti_s(5 + i1, r2);
3560 rshi(r2, r2, 16);
3561 sti_i(1 + i1, r2);
3562 rshi(r2, r2, 32);
3563 sti_c(i1, r2);
3564# endif
3565 }
3566 else {
3567# if __BYTE_ORDER == __LITTLE_ENDIAN
3568 sti_c(i1, r0);
3569 rshi(r2, r0, 8);
3570 sti_s(1 + i1, r2);
3571 rshi(r2, r2, 16);
3572 sti_i(3 + i1, r2);
3573 rshi(r2, r2, 32);
3574 sti_c(7 + i1, r2);
3575# else
3576 sti_c(7 + i1, r0);
3577 rshi(r2, r0, 8);
3578 sti_i(3 + i1, r2);
3579 rshi(r2, r2, 32);
3580 sti_s(1 + i1, r2);
3581 rshi(r2, r2, 16);
3582 sti_c(i1, r2);
3583# endif
3584 }
3585 jit_unget_reg(t0);
3586 }
3587}
3588# endif
3589
3590
3591static void
3592_unldrw(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3593{
3594 jit_word_t cross, done;
3595 jit_int32_t t0, t1, t2, t3;
3596 t0 = fallback_jit_get_reg(jit_class_gpr);
3597 if (r0 == r1) {
3598 t1 = fallback_jit_get_reg(jit_class_gpr);
3599 movr(rn(t1), r1);
3600 r1 = rn(t1);
3601 }
3602 else
3603 t1 = JIT_NOREG;
3604 andi(rn(t0), r1, -(jit_word_t)sizeof(jit_word_t));
3605 ldr(r0, rn(t0));
3606 done = fallback_beqr(_jit->pc.w, rn(t0), r1);
3607 t2 = fallback_jit_get_reg(jit_class_gpr);
3608 andi(rn(t2), r1, sizeof(jit_word_t) - 1);
3609 t3 = fallback_jit_get_reg(jit_class_gpr);
3610 addi(rn(t3), rn(t2), i0);
3611 cross = fallback_blei(_jit->pc.w, rn(t3), sizeof(jit_word_t));
3612 ldxi(rn(t0), rn(t0), sizeof(jit_word_t));
3613 fallback_flush();
3614 fallback_patch_blei(cross, _jit->pc.w);
3615 jit_unget_reg(t3);
3616 lshi(rn(t2), rn(t2), 3);
3617#if __BYTE_ORDER == __LITTLE_ENDIAN
3618 rshr_u(r0, r0, rn(t2));
3619 rsbi(rn(t2), rn(t2), __WORDSIZE);
3620 lshr(rn(t0), rn(t0), rn(t2));
3621#else
3622 lshr(r0, r0, rn(t2));
3623 rsbi(rn(t2), rn(t2), __WORDSIZE);
3624 rshr_u(rn(t0), rn(t0), rn(t2));
3625#endif
3626 jit_unget_reg(t2);
3627 orr(r0, r0, rn(t0));
3628 fallback_flush();
3629 fallback_patch_beqr(done, _jit->pc.w);
3630 jit_unget_reg(t0);
3631 if (t1 != JIT_NOREG)
3632 jit_unget_reg(t1);
3633}
3634
3635static void
3636_unldiw(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
3637{
3638 jit_word_t addr;
3639 jit_int32_t t0, sh;
3640 addr = i0 & -(jit_word_t)sizeof(jit_word_t);
3641 ldi(r0, addr);
3642 if (i0 != addr) {
3643 sh = (i0 & (sizeof(jit_word_t) - 1)) << 3;
3644 if (sh + i1 > sizeof(jit_word_t)) {
3645 t0 = fallback_jit_get_reg(jit_class_gpr);
3646 ldi(rn(t0), addr + sizeof(jit_word_t));
3647#if __BYTE_ORDER == __LITTLE_ENDIAN
3648 rshi_u(r0, r0, sh);
3649 lshi(rn(t0), rn(t0), __WORDSIZE - sh);
3650#else
3651 lshi(r0, r0, sh);
3652 rshi_u(rn(t0), rn(t0), __WORDSIZE - sh);
3653#endif
3654 orr(r0, r0, rn(t0));
3655 jit_unget_reg(t0);
3656 }
3657 }
3658}
3659
3660static void
3661_unldx(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3662{
3663 switch (i0) {
3664 case 2:
3665#if __BYTE_ORDER == __LITTLE_ENDIAN
3666 extr_s(r0, r0);
3667#else
3668 rshi(r0, r0, __WORDSIZE - 16);
3669#endif
3670 break;
3671 case 3:
3672#if __BYTE_ORDER == __LITTLE_ENDIAN
3673 lshi(r0, r0, __WORDSIZE - 24);
3674#endif
3675 rshi(r0, r0, __WORDSIZE - 24);
3676 break;
3677#if __WORDSIZE == 32
3678 default:
3679#else
3680 case 4:
3681# if __BYTE_ORDER == __LITTLE_ENDIAN
3682 extr_i(r0, r0);
3683# else
3684 rshi(r0, r0, __WORDSIZE - 32);
3685# endif
3686#endif
3687 break;
3688#if __WORDSIZE == 64
3689 case 5:
3690# if __BYTE_ORDER == __LITTLE_ENDIAN
3691 lshi(r0, r0, __WORDSIZE - 40);
3692# endif
3693 rshi(r0, r0, __WORDSIZE - 40);
3694 break;
3695 case 6:
3696# if __BYTE_ORDER == __LITTLE_ENDIAN
3697 lshi(r0, r0, __WORDSIZE - 48);
3698# endif
3699 rshi(r0, r0, __WORDSIZE - 48);
3700 break;
3701 case 7:
3702# if __BYTE_ORDER == __LITTLE_ENDIAN
3703 lshi(r0, r0, __WORDSIZE - 56);
3704# endif
3705 rshi(r0, r0, __WORDSIZE - 56);
3706 break;
3707 default:
3708 break;
3709#endif
3710 }
3711}
3712
3713static void
3714_unldx_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3715{
3716 switch (i0) {
3717 case 2:
3718#if __BYTE_ORDER == __LITTLE_ENDIAN
3719 extr_us(r0, r0);
3720#else
3721 rshi_u(r0, r0, __WORDSIZE - 16);
3722#endif
3723 break;
3724 case 3:
3725#if __BYTE_ORDER == __LITTLE_ENDIAN
3726 lshi(r0, r0, __WORDSIZE - 24);
3727#endif
3728 rshi_u(r0, r0, __WORDSIZE - 24);
3729 break;
3730#if __WORDSIZE == 32
3731 default:
3732#else
3733 case 4:
3734# if __BYTE_ORDER == __LITTLE_ENDIAN
3735 extr_ui(r0, r0);
3736# else
3737 rshi_u(r0, r0, __WORDSIZE - 32);
3738# endif
3739#endif
3740 break;
3741#if __WORDSIZE == 64
3742 case 5:
3743# if __BYTE_ORDER == __LITTLE_ENDIAN
3744 lshi(r0, r0, __WORDSIZE - 40);
3745# endif
3746 rshi_u(r0, r0, __WORDSIZE - 40);
3747 break;
3748 case 6:
3749# if __BYTE_ORDER == __LITTLE_ENDIAN
3750 lshi(r0, r0, __WORDSIZE - 48);
3751# endif
3752 rshi_u(r0, r0, __WORDSIZE - 48);
3753 break;
3754 case 7:
3755# if __BYTE_ORDER == __LITTLE_ENDIAN
3756 lshi(r0, r0, __WORDSIZE - 56);
3757# endif
3758 rshi_u(r0, r0, __WORDSIZE - 56);
3759 break;
3760 default:
3761 break;
3762#endif
3763 }
3764}
3765
3766static void
3767_fallback_unldr(jit_state_t *_jit,
3768 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3769{
3770 jit_int32_t t0, r2;
3771 assert(i0 >= 1 && i0 <= (__WORDSIZE >> 3));
3772 if (i0 == 1)
3773 ldr_c(r0, r1);
3774 else if (_jitc->unld_algorithm != 0) {
3775 if (r0 == r1) {
3776 t0 = fallback_jit_get_reg(jit_class_gpr);
3777 r2 = rn(t0);
3778 movr(r2, r0);
3779 }
3780 else
3781 r2 = r1;
3782 switch (i0) {
3783 case 2: unldr2(r0, r2, 1); break;
3784 case 3: unldr3(r0, r2, 1); break;
3785# if __WORDSIZE == 32
3786 default: unldr4(r0, r2); break;
3787# else
3788 case 4: unldr4(r0, r2, 1); break;
3789 case 5: unldr5(r0, r2, 1); break;
3790 case 6: unldr6(r0, r2, 1); break;
3791 case 7: unldr7(r0, r2, 1); break;
3792 default: unldr8(r0, r2); break;
3793# endif
3794 }
3795 if (i0 > 1 && r0 == r1)
3796 jit_unget_reg(t0);
3797 }
3798 else {
3799 unldrw(r0, r1, i0);
3800 unldx(r0, i0);
3801 }
3802}
3803
3804static void
3805_fallback_unldi(jit_state_t *_jit,
3806 jit_int32_t r0, jit_word_t i0, jit_word_t i1)
3807{
3808 assert(i1 >= 1 && i1 <= (__WORDSIZE >> 3));
3809 if (i1 == 1)
3810 ldi_c(r0, i0);
3811 else if (_jitc->unld_algorithm != 0) {
3812 switch (i1) {
3813 case 2: unldi2(r0, i0, 1); break;
3814 case 3: unldi3(r0, i0, 1); break;
3815# if __WORDSIZE == 32
3816 default: unldi4(r0, i0); break;
3817# else
3818 case 4: unldi4(r0, i0, 1); break;
3819 case 5: unldi5(r0, i0, 1); break;
3820 case 6: unldi6(r0, i0, 1); break;
3821 case 7: unldi7(r0, i0, 1); break;
3822 default: unldi8(r0, i0); break;
3823# endif
3824 }
3825 }
3826 else {
3827 unldiw(r0, i0, i1);
3828 unldx(r0, i1);
3829 }
3830}
3831
3832static void
3833_fallback_unldr_u(jit_state_t *_jit,
3834 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3835{
3836 jit_int32_t t0, r2;
3837 assert(i0 >= 1 && i0 <= (__WORDSIZE >> 3));
3838 if (i0 == 1)
3839 ldr_uc(r0, r1);
3840 else if (_jitc->unld_algorithm != 0) {
3841 if (r0 == r1) {
3842 t0 = fallback_jit_get_reg(jit_class_gpr);
3843 r2 = rn(t0);
3844 movr(r2, r0);
3845 }
3846 else
3847 r2 = r1;
3848 switch (i0) {
3849 case 2: unldr2(r0, r2, 0); break;
3850 case 3: unldr3(r0, r2, 0); break;
3851# if __WORDSIZE == 32
3852 default: unldr4(r0, r2); break;
3853# else
3854 case 4: unldr4(r0, r2, 0); break;
3855 case 5: unldr5(r0, r2, 0); break;
3856 case 6: unldr6(r0, r2, 0); break;
3857 case 7: unldr7(r0, r2, 0); break;
3858 default: unldr8(r0, r2); break;
3859# endif
3860 }
3861 if (i0 > 1 && r0 == r1)
3862 jit_unget_reg(t0);
3863 }
3864 else {
3865 unldrw(r0, r1, i0);
3866 unldx_u(r0, i0);
3867 }
3868}
3869
3870static void
3871_fallback_unldi_u(jit_state_t *_jit,
3872 jit_int32_t r0, jit_word_t i0, jit_word_t i1)
3873{
3874 assert(i1 >= 1 && i1 <= (__WORDSIZE >> 3));
3875 if (i1 == 1)
3876 ldi_uc(r0, i0);
3877 else if (_jitc->unld_algorithm != 0) {
3878 switch (i1) {
3879 case 2: unldi2(r0, i0, 0); break;
3880 case 3: unldi3(r0, i0, 0); break;
3881# if __WORDSIZE == 32
3882 default: unldi4(r0, i0); break;
3883# else
3884 case 4: unldi4(r0, i0, 0); break;
3885 case 5: unldi5(r0, i0, 0); break;
3886 case 6: unldi6(r0, i0, 0); break;
3887 case 7: unldi7(r0, i0, 0); break;
3888 default: unldi8(r0, i0); break;
3889# endif
3890 }
3891
3892 }
3893 else {
3894 unldiw(r0, i0, i1);
3895 unldx_u(r0, i1);
3896 }
3897}
3898
3899static void
3900_fallback_unstr(jit_state_t *_jit,
3901 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3902{
3903 jit_word_t done;
3904 jit_int32_t t0, t1, t2, t3, t4, t5;
3905 assert(i0 > 0 && i0 <= (__WORDSIZE >> 3));
3906 if (i0 == 1)
3907 str_c(r0, r1);
3908 else if (_jitc->unst_algorithm == 0) {
3909 switch (i0) {
3910 case 2: unstr2(r1, r0); break;
3911 case 3: unstr3(r1, r0); break;
3912# if __WORDSIZE == 32
3913 default: unstr4(r1, r0); break;
3914# else
3915 case 4: unstr4(r1, r0); break;
3916 case 5: unstr5(r1, r0); break;
3917 case 6: unstr6(r1, r0); break;
3918 case 7: unstr7(r1, r0); break;
3919 default: unstr8(r1, r0); break;
3920# endif
3921 }
3922 }
3923 else {
3924 t0 = fallback_jit_get_reg(jit_class_gpr);
3925 t1 = fallback_jit_get_reg(jit_class_gpr);
3926 t2 = fallback_jit_get_reg(jit_class_gpr);
3927 t3 = fallback_jit_get_reg(jit_class_gpr);
3928 /* Zero out top bits and keep value to store in t0 */
3929 if (i0 != sizeof(jit_word_t)) {
3930 lshi(rn(t3), r1, (sizeof(jit_word_t) - i0) << 3);
3931#if __BYTE_ORDER == __LITTLE_ENDIAN
3932 rshi_u(rn(t3), rn(t3), (sizeof(jit_word_t) - i0) << 3);
3933#endif
3934 }
3935 else
3936 movr(rn(t3), r1);
3937 /* Check alignment */
3938 andi(rn(t2), r0, sizeof(jit_word_t) - 1);
3939 /* Multiply by 8 */
3940 lshi(rn(t2), rn(t2), 3);
3941 /* Split values to store (assume will need two stores) */
3942#if __BYTE_ORDER == __LITTLE_ENDIAN
3943 qlshr_u(rn(t0), rn(t1), rn(t3), rn(t2));
3944#else
3945 qrshr_u(rn(t0), rn(t1), rn(t3), rn(t2));
3946#endif
3947 /* Generate masks for values in memory */
3948 if (i0 == sizeof(jit_word_t))
3949 movi(rn(t3), -1);
3950 else {
3951#if __BYTE_ORDER == __BIG_ENDIAN
3952 movi(rn(t3), ((1L << (i0 << 3)) - 1) <<
3953 ((sizeof(jit_word_t) - i0) << 3));
3954#else
3955 movi(rn(t3), (1L << (i0 << 3)) - 1);
3956#endif
3957 }
3958#if __BYTE_ORDER == __LITTLE_ENDIAN
3959 qlshr_u(rn(t2), rn(t3), rn(t3), rn(t2));
3960#else
3961 qrshr_u(rn(t2), rn(t3), rn(t3), rn(t2));
3962#endif
3963 comr(rn(t2), rn(t2));
3964 comr(rn(t3), rn(t3));
3965 /* Allocate two extra registers later in case need temps for *q?shr_u */
3966 t4 = fallback_jit_get_reg(jit_class_gpr);
3967 t5 = fallback_jit_get_reg(jit_class_gpr);
3968 /* Store words */
3969 andi(rn(t4), r0, -(jit_word_t)sizeof(jit_word_t));
3970 ldr(rn(t5), rn(t4));
3971 andr(rn(t5), rn(t5), rn(t2));
3972 orr(rn(t0), rn(t0), rn(t5));
3973 str(rn(t4), rn(t0));
3974 /* Make sure to not read/write on possibly unmaped memory */
3975 addi(rn(t5), rn(t4), i0);
3976 done = fallback_blei(_jit->pc.w, rn(t5), sizeof(jit_word_t));
3977 /* Store second word if vlaue crosses a word boundary */
3978 ldxi(rn(t5), rn(t4), sizeof(jit_word_t));
3979 andr(rn(t5), rn(t5), rn(t3));
3980 orr(rn(t1), rn(t1), rn(t5));
3981 stxi(sizeof(jit_word_t), rn(t4), rn(t1));
3982 /* Finished */
3983 fallback_flush();
3984 fallback_patch_blei(done, _jit->pc.w);
3985 /* Generic/simple algorithm needs 6 temporaries, as it cannot
3986 * change any of the argument registers, might need to truncate
3987 * the value to store, and need a pair for values to store and
3988 * another for the masks. */
3989 jit_unget_reg(t5);
3990 jit_unget_reg(t4);
3991 jit_unget_reg(t3);
3992 jit_unget_reg(t2);
3993 jit_unget_reg(t1);
3994 jit_unget_reg(t0);
3995 }
3996}
3997
3998static void
3999_fallback_unsti(jit_state_t *_jit,
4000 jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4001{
4002 jit_word_t done, address;
4003 jit_int32_t t0, t1, t2, t3, t4;
4004 assert(i1 > 0 && i1 <= (__WORDSIZE >> 3));
4005 if (i1 == 1)
4006 sti_c(i0, r0);
4007 else if (_jitc->unst_algorithm == 0) {
4008 switch (i1) {
4009 case 1: sti_c(i0, r0); break;
4010 case 2: unsti2(r0, i0); break;
4011 case 3: unsti3(r0, i0); break;
4012# if __WORDSIZE == 32
4013 default: unsti4(r0, i0); break;
4014# else
4015 case 4: unsti4(r0, i0); break;
4016 case 5: unsti5(r0, i0); break;
4017 case 6: unsti6(r0, i0); break;
4018 case 7: unsti7(r0, i0); break;
4019 default: unsti8(r0, i0); break;
4020# endif
4021 }
4022 }
4023 else {
4024 t0 = fallback_jit_get_reg(jit_class_gpr);
4025 t1 = fallback_jit_get_reg(jit_class_gpr);
4026 t2 = fallback_jit_get_reg(jit_class_gpr);
4027 t3 = fallback_jit_get_reg(jit_class_gpr);
4028 t4 = fallback_jit_get_reg(jit_class_gpr);
4029 /* Zero out top bits and keep value to store in t0 */
4030 if (i1 != sizeof(jit_word_t)) {
4031 lshi(rn(t2), r0, (sizeof(jit_word_t) - i1) << 3);
4032#if __BYTE_ORDER == __LITTLE_ENDIAN
4033 rshi_u(rn(t2), rn(t2), (sizeof(jit_word_t) - i1) << 3);
4034 qlshi_u(rn(t0), rn(t1), rn(t2),
4035 (i0 & (sizeof(jit_word_t) - 1)) << 3);
4036#else
4037 qrshi_u(rn(t0), rn(t1), rn(t2),
4038 (i0 & (sizeof(jit_word_t)) - 1) << 3);
4039#endif
4040 }
4041 else {
4042#if __BYTE_ORDER == __LITTLE_ENDIAN
4043 qlshi_u(rn(t0), rn(t1), r0,
4044 (i0 & (sizeof(jit_word_t)) - 1) << 3);
4045#else
4046 qrshi_u(rn(t0), rn(t1), r0,
4047 (i0 & (sizeof(jit_word_t)) - 1) << 3);
4048#endif
4049 }
4050 /* Generate masks for values in memory */
4051 if (i1 == sizeof(jit_word_t))
4052 movi(rn(t2), -1);
4053 else {
4054#if __BYTE_ORDER == __BIG_ENDIAN
4055 movi(rn(t2), ((1L << (i1 << 3)) - 1) <<
4056 ((sizeof(jit_word_t) - i1) << 3));
4057#else
4058 movi(rn(t2), (1L << (i1 << 3)) - 1);
4059#endif
4060 }
4061#if __BYTE_ORDER == __LITTLE_ENDIAN
4062 qlshi_u(rn(t2), rn(t3), rn(t2), (i0 & (sizeof(jit_word_t)) - 1) << 3);
4063#else
4064 qrshi_u(rn(t2), rn(t3), rn(t2), (i0 & (sizeof(jit_word_t)) - 1) << 3);
4065#endif
4066 comr(rn(t2), rn(t2));
4067 comr(rn(t3), rn(t3));
4068 /* Store words */
4069 address = i0 & -(jit_word_t)sizeof(jit_word_t);
4070 ldi(rn(t4), address);
4071 andr(rn(t4), rn(t4), rn(t2));
4072 orr(rn(t0), rn(t0), rn(t4));
4073 sti(address, rn(t0));
4074 if (address + i1 > sizeof(jit_word_t)) {
4075 address += sizeof(jit_word_t);
4076 ldi(rn(t4), address);
4077 andr(rn(t4), rn(t4), rn(t3));
4078 orr(rn(t1), rn(t1), rn(t4));
4079 sti(address, rn(t1));
4080 }
4081 jit_unget_reg(t4);
4082 jit_unget_reg(t3);
4083 jit_unget_reg(t2);
4084 jit_unget_reg(t1);
4085 jit_unget_reg(t0);
4086 }
4087}
4088
4089# ifdef fallback_unldr_x
4090static void
4091_fallback_unldr_x(jit_state_t *_jit,
4092 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
4093{
4094 jit_int32_t t0, r2;
4095# if __WORDSIZE == 32
4096 jit_int32_t t1, r3;
4097# endif
4098 assert(i0 == 4 || i0 == 8);
4099 t0 = fallback_jit_get_reg(jit_class_gpr);
4100 r2 = rn(t0);
4101 if (i0 == 4) {
4102 unldr(r2, r1, 4);
4103 movr_w_f(r0, r2);
4104 }
4105 else {
4106# if __WORDSIZE == 32
4107 t1 = fallback_jit_get_reg(jit_class_gpr);
4108 r3 = rn(t1);
4109# if __BYTE_ORDER == __LITTLE_ENDIAN
4110 unldr(r2, r1, 4);
4111 addi(r3, r1, 4);
4112 unldr(r3, r3, 4);
4113# else
4114 unldr(r3, r1, 4);
4115 addi(r2, r1, 4);
4116 unldr(r2, r2, 4);
4117# endif
4118 movr_ww_d(r0, r2, r3);
4119 jit_unget_reg(t1);
4120# else
4121 unldr(r2, r1, 8);
4122 movr_w_d(r0, r2);
4123# endif
4124 }
4125 jit_unget_reg(t0);
4126}
4127# endif
4128
4129# ifdef fallback_unldi_x
4130static void
4131_fallback_unldi_x(jit_state_t *_jit,
4132 jit_int32_t r0, jit_word_t i0, jit_word_t i1)
4133{
4134 jit_int32_t t0, r2;
4135# if __WORDSIZE == 32
4136 jit_int32_t t1, r3;
4137# endif
4138 assert(i1 == 4 || i1 == 8);
4139 t0 = fallback_jit_get_reg(jit_class_gpr);
4140 r2 = rn(t0);
4141 if (i1 == 4) {
4142 unldi(r2, i0, 4);
4143 movr_w_f(r0, r2);
4144 }
4145 else {
4146# if __WORDSIZE == 32
4147 t1 = fallback_jit_get_reg(jit_class_gpr);
4148 r3 = rn(t1);
4149# if __BYTE_ORDER == __LITTLE_ENDIAN
4150 unldi(r2, i0, 4);
4151 unldi(r3, i0 + 4, 4);
4152# else
4153 unldi(r3, i0, 4);
4154 unldi(r2, i0 + 4, 4);
4155# endif
4156 movr_ww_d(r0, r3, r2);
4157 jit_unget_reg(t1);
4158# else
4159 unldi(r2, i0, 8);
4160 movr_w_d(r0, r2);
4161# endif
4162 }
4163 jit_unget_reg(t0);
4164}
4165# endif
4166
4167# ifdef fallback_unstr_x
4168static void
4169_fallback_unstr_x(jit_state_t *_jit,
4170 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
4171{
4172 jit_int32_t t0, r2;
4173# if __WORDSIZE == 32
4174 jit_int32_t t1, r3;
4175# endif
4176 assert(i0 == 4 || i0 == 8);
4177 t0 = fallback_jit_get_reg(jit_class_gpr);
4178 r2 = rn(t0);
4179 if (i0 == 4) {
4180 movr_f_w(r2, r1);
4181 unstr(r0, r2, 4);
4182 }
4183 else {
4184# if __WORDSIZE == 32
4185 t1 = fallback_jit_get_reg(jit_class_gpr);
4186 r3 = rn(t1);
4187 movr_d_ww(r2, r3, r1);
4188# if __BYTE_ORDER == __LITTLE_ENDIAN
4189 unstr(r0, r2, 4);
4190 addi(r2, r0, 4);
4191 unstr(r2, r3, 4);
4192# else
4193 unstr(r0, r3, 4);
4194 addi(r3, r0, 4);
4195 unstr(r3, r2, 4);
4196# endif
4197 jit_unget_reg(t1);
4198# else
4199 movr_d_w(r2, r1);
4200 unstr(r0, r2, 8);
4201# endif
4202 }
4203 jit_unget_reg(t0);
4204}
4205# endif
4206
4207# ifdef fallback_unsti_x
4208static void
4209_fallback_unsti_x(jit_state_t *_jit,
4210 jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4211{
4212 jit_int32_t t0, r2;
4213# if __WORDSIZE == 32
4214 jit_int32_t t1, r3;
4215# endif
4216 assert(i1 == 4 || i1 == 8);
4217 t0 = fallback_jit_get_reg(jit_class_gpr);
4218 r2 = rn(t0);
4219 if (i1 == 4) {
4220 movr_f_w(r2, r0);
4221 unsti(i0, r2, 4);
4222 }
4223 else {
4224# if __WORDSIZE == 32
4225 t1 = fallback_jit_get_reg(jit_class_gpr);
4226 r3 = rn(t1);
4227 movr_d_ww(r2, r3, r0);
4228# if __BYTE_ORDER == __LITTLE_ENDIAN
4229 unsti(i0, r3, 4);
4230 unsti(i0 + 4, r2, 4);
4231# else
4232 unsti(i0, r2, 4);
4233 unsti(i0 + 4, r3, 4);
4234# endif
4235 jit_unget_reg(t1);
4236# else
4237 movr_d_w(r2, r0);
4238 unsti(i0, r2, 8);
4239# endif
4240 }
4241 jit_unget_reg(t0);
4242}
4243# endif
ba3814c1 4244#endif