32x, take over sh2 cycles setting in-game
[picodrive.git] / cpu / drc / emit_arm.c
... / ...
CommitLineData
1/*
2 * Basic macros to emit ARM instructions and some utils
3 * Copyright (C) 2008,2009,2010 notaz
4 * Copyright (C) 2019-2024 irixxxx
5 *
6 * This work is licensed under the terms of MAME license.
7 * See COPYING file in the top-level directory.
8 */
9#define HOST_REGS 16
10
11// OABI/EABI: params: r0-r3, return: r0-r1, temp: r12,r14, saved: r4-r8,r10,r11
12// SP,PC: r13,r15 must not be used. saved: r9 (for platform use, e.g. on ios)
13#define RET_REG 0
14#define PARAM_REGS { 0, 1, 2, 3 }
15#ifndef __MACH__
16#define PRESERVED_REGS { 4, 5, 6, 7, 8, 9, 10, 11 }
17#else
18#define PRESERVED_REGS { 4, 5, 6, 7, 8, 10, 11 } // no r9..
19#endif
20#define TEMPORARY_REGS { 12, 14 }
21
22#define CONTEXT_REG 11
23#define STATIC_SH2_REGS { SHR_SR,10 , SHR_R(0),8 , SHR_R(1),9 }
24
25// XXX: tcache_ptr type for SVP and SH2 compilers differs..
26#define EMIT_PTR(ptr, x) \
27 do { \
28 *(u32 *)ptr = x; \
29 ptr = (void *)((u8 *)ptr + sizeof(u32)); \
30 } while (0)
31
32// ARM special registers and peephole optimization flags
33#define SP 13 // stack pointer
34#define LR 14 // link (return address)
35#define PC 15 // program counter
36#define SR 16 // CPSR, status register
37#define MEM 17 // memory access (src=LDR, dst=STR)
38#define CYC1 20 // 1 cycle interlock (LDR, reg-cntrld shift)
39#define CYC2 (CYC1+1)// 2+ cycles interlock (LDR[BH], MUL/MLA etc)
40#define NO 32 // token for "no register"
41
42// bitmask builders
43#define M1(x) (u32)(1ULL<<(x)) // u32 to have NO evaluate to 0
44#define M2(x,y) (M1(x)|M1(y))
45#define M3(x,y,z) (M2(x,y)|M1(z))
46#define M4(x,y,z,a) (M3(x,y,z)|M1(a))
47#define M5(x,y,z,a,b) (M4(x,y,z,a)|M1(b))
48#define M6(x,y,z,a,b,c) (M5(x,y,z,a,b)|M1(c))
49#define M10(a,b,c,d,e,f,g,h,i,j) (M5(a,b,c,d,e)|M5(f,g,h,i,j))
50
51// avoid a warning with clang
52static inline uintptr_t pabs(intptr_t v) { return labs(v); }
53
54// sys_cacheflush always flushes whole pages, and it's rather expensive on ARMs
55// hold a list of pending cache updates and merge requests to reduce cacheflush
56static struct { void *base, *end; } pageflush[4];
57static unsigned pagesize = 4096;
58
59static void emith_update_cache(void)
60{
61 int i;
62
63 for (i = 0; i < 4 && pageflush[i].base; i++) {
64 cache_flush_d_inval_i(pageflush[i].base, pageflush[i].end + pagesize-1);
65 pageflush[i].base = NULL;
66 }
67}
68
69static inline void emith_update_add(void *base, void *end)
70{
71 void *p_base = (void *)((uintptr_t)(base) & ~(pagesize-1));
72 void *p_end = (void *)((uintptr_t)(end ) & ~(pagesize-1));
73 int i;
74
75 for (i = 0; i < 4 && pageflush[i].base; i++) {
76 if (p_base <= pageflush[i].end+pagesize && p_end >= pageflush[i].end) {
77 if (p_base < pageflush[i].base) pageflush[i].base = p_base;
78 pageflush[i].end = p_end;
79 return;
80 }
81 if (p_base <= pageflush[i].base && p_end >= pageflush[i].base-pagesize) {
82 if (p_end > pageflush[i].end) pageflush[i].end = p_end;
83 pageflush[i].base = p_base;
84 return;
85 }
86 }
87 if (i == 4) {
88 /* list full and not mergeable -> flush list */
89 emith_update_cache();
90 i = 0;
91 }
92 pageflush[i].base = p_base, pageflush[i].end = p_end;
93}
94
95// peephole optimizer. ATM only tries to reduce interlock
96#define EMIT_CACHE_SIZE 6
97struct emit_op {
98 u32 op;
99 u32 src, dst;
100};
101
102// peephole cache, last commited insn + cache + next insn = size+2
103static struct emit_op emit_cache[EMIT_CACHE_SIZE+2];
104static int emit_index;
105#define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr-emit_index)
106
107static inline void emith_pool_adjust(int tcache_offs, int move_offs);
108
109static NOINLINE void EMIT(u32 op, u32 dst, u32 src)
110{
111 void * emit_ptr = (u32 *)tcache_ptr - emit_index;
112 struct emit_op *const ptr = emit_cache;
113 const int n = emit_index+1;
114 int i, bi, bd = 0;
115
116 // account for new insn in tcache
117 tcache_ptr = (void *)((u32 *)tcache_ptr + 1);
118 COUNT_OP;
119 // for conditional execution SR is always source
120 if (op < 0xe0000000 /*A_COND_AL << 28*/)
121 src |= M1(SR);
122 // put insn on back of queue // mask away the NO token
123 emit_cache[n] = (struct emit_op)
124 { .op=op, .src=src & ~M1(NO), .dst=dst & ~M1(NO) };
125 // check insns down the queue as long as permitted by dependencies
126 for (bd = bi = 0, i = emit_index; i > 1 && !(dst & M1(PC)); i--) {
127 int deps = 0;
128 // dst deps between i and n must not be swapped, since any deps
129 // but [i].src & [n].src lead to changed semantics if swapped.
130 if ((ptr[i].dst & ptr[n].src) || (ptr[n].dst & ptr[i].src) ||
131 (ptr[i].dst & ptr[n].dst))
132 break;
133 // don't swap insns reading PC if it's not a word pool load
134 // (ptr[i].op&0xf700000) != EOP_C_AM2_IMM(0,0,0,1,0,0,0))
135 if ((ptr[i].src & M1(PC)) && (ptr[i].op&0xf700000) != 0x5100000)
136 break;
137
138 // calculate ARM920T interlock cycles (differences only)
139#define D2(x,y) ((ptr[x].dst & ptr[y].src)?((ptr[x].src >> CYC2) & 1):0)
140#define D1(x,y) ((ptr[x].dst & ptr[y].src)?((ptr[x].src >> CYC1) & 3):0)
141 // insn sequence: [..., i-2, i-1, i, i+1, ..., n-2, n-1, n]
142 deps -= D2(i-2,i)+D2(i-1,i+1)+D2(n-2,n ) + D1(i-1,i)+D1(n-1,n);
143 deps -= !!(ptr[n].src & M2(CYC1,CYC2));// favour moving LDR down
144 // insn sequence: [..., i-2, i-1, n, i, i+1, ..., n-2, n-1]
145 deps += D2(i-2,n)+D2(i-1,i )+D2(n ,i+1) + D1(i-1,n)+D1(n ,i);
146 deps += !!(ptr[i].src & M2(CYC1,CYC2));// penalize moving LDR up
147 // remember best match found
148 if (bd > deps)
149 bd = deps, bi = i;
150 }
151 // swap if fewer depencies
152 if (bd < 0) {
153 // make room for new insn at bi
154 struct emit_op tmp = ptr[n];
155 for (i = n-1; i >= bi; i--) {
156 ptr[i+1] = ptr[i];
157 if (ptr[i].src & M1(PC))
158 emith_pool_adjust(n-i+1, 1);
159 }
160 // insert new insn at bi
161 ptr[bi] = tmp;
162 if (ptr[bi].src & M1(PC))
163 emith_pool_adjust(1, bi-n);
164 }
165 if (dst & M1(PC)) {
166 // commit everything if a branch insn is emitted
167 for (i = 1; i <= emit_index+1; i++)
168 EMIT_PTR(emit_ptr, emit_cache[i].op);
169 emit_index = 0;
170 } else if (emit_index < EMIT_CACHE_SIZE) {
171 // queue not yet full
172 emit_index++;
173 } else {
174 // commit oldest insn from cache
175 EMIT_PTR(emit_ptr, emit_cache[1].op);
176 for (i = 0; i <= emit_index; i++)
177 emit_cache[i] = emit_cache[i+1];
178 }
179}
180
181static void emith_flush(void)
182{
183 int i;
184 void *emit_ptr = tcache_ptr - emit_index*sizeof(u32);
185
186 for (i = 1; i <= emit_index; i++)
187 EMIT_PTR(emit_ptr, emit_cache[i].op);
188 emit_index = 0;
189}
190
191#define A_COND_AL 0xe
192#define A_COND_EQ 0x0
193#define A_COND_NE 0x1
194#define A_COND_HS 0x2
195#define A_COND_LO 0x3
196#define A_COND_MI 0x4
197#define A_COND_PL 0x5
198#define A_COND_VS 0x6
199#define A_COND_VC 0x7
200#define A_COND_HI 0x8
201#define A_COND_LS 0x9
202#define A_COND_GE 0xa
203#define A_COND_LT 0xb
204#define A_COND_GT 0xc
205#define A_COND_LE 0xd
206#define A_COND_CS A_COND_HS
207#define A_COND_CC A_COND_LO
208#define A_COND_NV 0xf // Not Valid (aka NeVer :-) - ATTN: not a real condition!
209
210/* unified conditions */
211#define DCOND_EQ A_COND_EQ
212#define DCOND_NE A_COND_NE
213#define DCOND_MI A_COND_MI
214#define DCOND_PL A_COND_PL
215#define DCOND_HI A_COND_HI
216#define DCOND_HS A_COND_HS
217#define DCOND_LO A_COND_LO
218#define DCOND_GE A_COND_GE
219#define DCOND_GT A_COND_GT
220#define DCOND_LT A_COND_LT
221#define DCOND_LS A_COND_LS
222#define DCOND_LE A_COND_LE
223#define DCOND_VS A_COND_VS
224#define DCOND_VC A_COND_VC
225
226#define DCOND_CS A_COND_HS
227#define DCOND_CC A_COND_LO
228
229/* addressing mode 1 */
230#define A_AM1_LSL 0
231#define A_AM1_LSR 1
232#define A_AM1_ASR 2
233#define A_AM1_ROR 3
234
235#define A_AM1_IMM(ror2,imm8) (((ror2)<<8) | (imm8) | 0x02000000)
236#define A_AM1_REG_XIMM(shift_imm,shift_op,rm) (((shift_imm)<<7) | ((shift_op)<<5) | (rm))
237#define A_AM1_REG_XREG(rs,shift_op,rm) (((rs)<<8) | ((shift_op)<<5) | 0x10 | (rm))
238
239/* data processing op */
240#define A_OP_AND 0x0
241#define A_OP_EOR 0x1
242#define A_OP_SUB 0x2
243#define A_OP_RSB 0x3
244#define A_OP_ADD 0x4
245#define A_OP_ADC 0x5
246#define A_OP_SBC 0x6
247#define A_OP_RSC 0x7
248#define A_OP_TST 0x8
249#define A_OP_TEQ 0x9
250#define A_OP_CMP 0xa
251#define A_OP_CMN 0xb
252#define A_OP_ORR 0xc
253#define A_OP_MOV 0xd
254#define A_OP_BIC 0xe
255#define A_OP_MVN 0xf
256
257// operation specific register usage in DOP
258#define A_Rn(op,rn) (((op)&0xd)!=0xd ? rn:NO) // no rn for MOV,MVN
259#define A_Rd(op,rd) (((op)&0xc)!=0x8 ? rd:NO) // no rd for TST,TEQ,CMP,CMN
260// CSPR is dst if S set, CSPR is src if op is ADC/SBC/RSC or shift is RRX
261#define A_Sd(s) ((s) ? SR:NO)
262#define A_Sr(op,sop) (((op)>=0x5 && (op)<=0x7) || (sop)>>4==A_AM1_ROR<<1 ? SR:NO)
263
264#define EOP_C_DOP_X(cond,op,s,rn,rd,sop,rm,rs) \
265 EMIT(((cond)<<28) | ((op)<< 21) | ((s)<<20) | ((rn)<<16) | ((rd)<<12) | (sop), \
266 M2(A_Rd(op,rd),A_Sd(s)), M5(A_Sr(op,sop),A_Rn(op,rn),rm,rs,rs==NO?NO:CYC1))
267
268#define EOP_C_DOP_IMM( cond,op,s,rn,rd,ror2,imm8) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_IMM(ror2,imm8), NO, NO)
269#define EOP_C_DOP_REG_XIMM(cond,op,s,rn,rd,shift_imm,shift_op,rm) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_REG_XIMM(shift_imm,shift_op,rm), rm, NO)
270#define EOP_C_DOP_REG_XREG(cond,op,s,rn,rd,rs, shift_op,rm) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_REG_XREG(rs, shift_op,rm), rm, rs)
271
272#define EOP_MOV_IMM(rd, ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_MOV,0, 0,rd,ror2,imm8)
273#define EOP_MVN_IMM(rd, ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_MVN,0, 0,rd,ror2,imm8)
274#define EOP_ORR_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_ORR,0,rn,rd,ror2,imm8)
275#define EOP_EOR_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_EOR,0,rn,rd,ror2,imm8)
276#define EOP_ADD_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_ADD,0,rn,rd,ror2,imm8)
277#define EOP_BIC_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_BIC,0,rn,rd,ror2,imm8)
278#define EOP_AND_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_AND,0,rn,rd,ror2,imm8)
279#define EOP_SUB_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_SUB,0,rn,rd,ror2,imm8)
280#define EOP_TST_IMM( rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_TST,1,rn, 0,ror2,imm8)
281#define EOP_CMP_IMM( rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_CMP,1,rn, 0,ror2,imm8)
282#define EOP_RSB_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_RSB,0,rn,rd,ror2,imm8)
283
284#define EOP_MOV_IMM_C(cond,rd, ror2,imm8) EOP_C_DOP_IMM(cond,A_OP_MOV,0, 0,rd,ror2,imm8)
285#define EOP_ORR_IMM_C(cond,rd,rn,ror2,imm8) EOP_C_DOP_IMM(cond,A_OP_ORR,0,rn,rd,ror2,imm8)
286#define EOP_RSB_IMM_C(cond,rd,rn,ror2,imm8) EOP_C_DOP_IMM(cond,A_OP_RSB,0,rn,rd,ror2,imm8)
287
288#define EOP_MOV_REG(cond,s,rd, rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_MOV,s, 0,rd,shift_imm,shift_op,rm)
289#define EOP_MVN_REG(cond,s,rd, rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_MVN,s, 0,rd,shift_imm,shift_op,rm)
290#define EOP_ORR_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_ORR,s,rn,rd,shift_imm,shift_op,rm)
291#define EOP_ADD_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_ADD,s,rn,rd,shift_imm,shift_op,rm)
292#define EOP_ADC_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_ADC,s,rn,rd,shift_imm,shift_op,rm)
293#define EOP_SUB_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_SUB,s,rn,rd,shift_imm,shift_op,rm)
294#define EOP_SBC_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_SBC,s,rn,rd,shift_imm,shift_op,rm)
295#define EOP_AND_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_AND,s,rn,rd,shift_imm,shift_op,rm)
296#define EOP_EOR_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_EOR,s,rn,rd,shift_imm,shift_op,rm)
297#define EOP_CMP_REG(cond, rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_CMP,1,rn, 0,shift_imm,shift_op,rm)
298#define EOP_TST_REG(cond, rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_TST,1,rn, 0,shift_imm,shift_op,rm)
299#define EOP_TEQ_REG(cond, rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_TEQ,1,rn, 0,shift_imm,shift_op,rm)
300
301#define EOP_MOV_REG2(s,rd, rm,shift_op,rs) EOP_C_DOP_REG_XREG(A_COND_AL,A_OP_MOV,s, 0,rd,rs,shift_op,rm)
302#define EOP_ADD_REG2(s,rd,rn,rm,shift_op,rs) EOP_C_DOP_REG_XREG(A_COND_AL,A_OP_ADD,s,rn,rd,rs,shift_op,rm)
303#define EOP_SUB_REG2(s,rd,rn,rm,shift_op,rs) EOP_C_DOP_REG_XREG(A_COND_AL,A_OP_SUB,s,rn,rd,rs,shift_op,rm)
304
305#define EOP_MOV_REG_SIMPLE(rd,rm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_LSL,0)
306#define EOP_MOV_REG_LSL(rd, rm,shift_imm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_LSL,shift_imm)
307#define EOP_MOV_REG_LSR(rd, rm,shift_imm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_LSR,shift_imm)
308#define EOP_MOV_REG_ASR(rd, rm,shift_imm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_ASR,shift_imm)
309#define EOP_MOV_REG_ROR(rd, rm,shift_imm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_ROR,shift_imm)
310
311#define EOP_ORR_REG_SIMPLE(rd,rm) EOP_ORR_REG(A_COND_AL,0,rd,rd,rm,A_AM1_LSL,0)
312#define EOP_ORR_REG_LSL(rd,rn,rm,shift_imm) EOP_ORR_REG(A_COND_AL,0,rd,rn,rm,A_AM1_LSL,shift_imm)
313#define EOP_ORR_REG_LSR(rd,rn,rm,shift_imm) EOP_ORR_REG(A_COND_AL,0,rd,rn,rm,A_AM1_LSR,shift_imm)
314#define EOP_ORR_REG_ASR(rd,rn,rm,shift_imm) EOP_ORR_REG(A_COND_AL,0,rd,rn,rm,A_AM1_ASR,shift_imm)
315#define EOP_ORR_REG_ROR(rd,rn,rm,shift_imm) EOP_ORR_REG(A_COND_AL,0,rd,rn,rm,A_AM1_ROR,shift_imm)
316
317#define EOP_ADD_REG_SIMPLE(rd,rm) EOP_ADD_REG(A_COND_AL,0,rd,rd,rm,A_AM1_LSL,0)
318#define EOP_ADD_REG_LSL(rd,rn,rm,shift_imm) EOP_ADD_REG(A_COND_AL,0,rd,rn,rm,A_AM1_LSL,shift_imm)
319#define EOP_ADD_REG_LSR(rd,rn,rm,shift_imm) EOP_ADD_REG(A_COND_AL,0,rd,rn,rm,A_AM1_LSR,shift_imm)
320
321#define EOP_TST_REG_SIMPLE(rn,rm) EOP_TST_REG(A_COND_AL, rn, 0,A_AM1_LSL,rm)
322
323#define EOP_MOV_REG2_LSL(rd, rm,rs) EOP_MOV_REG2(0,rd, rm,A_AM1_LSL,rs)
324#define EOP_MOV_REG2_ROR(rd, rm,rs) EOP_MOV_REG2(0,rd, rm,A_AM1_ROR,rs)
325#define EOP_ADD_REG2_LSL(rd,rn,rm,rs) EOP_ADD_REG2(0,rd,rn,rm,A_AM1_LSL,rs)
326#define EOP_SUB_REG2_LSL(rd,rn,rm,rs) EOP_SUB_REG2(0,rd,rn,rm,A_AM1_LSL,rs)
327
328/* addressing mode 2 */
329#define EOP_C_AM2_IMM(cond,u,b,l,rn,rd,offset_12) \
330 EMIT(((cond)<<28) | 0x05000000 | ((u)<<23) | ((b)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \
331 ((offset_12) & 0xfff), M1(l?rd:MEM), M3(rn,l?MEM:rd,l?b?CYC2:CYC1:NO))
332
333#define EOP_C_AM2_REG(cond,u,b,l,rn,rd,shift_imm,shift_op,rm) \
334 EMIT(((cond)<<28) | 0x07000000 | ((u)<<23) | ((b)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \
335 A_AM1_REG_XIMM(shift_imm, shift_op, rm), M1(l?rd:MEM), M4(rn,rm,l?MEM:rd,l?b?CYC2:CYC1:NO))
336
337/* addressing mode 3 */
338#define EOP_C_AM3(cond,u,r,l,rn,rd,s,h,immed_reg) \
339 EMIT(((cond)<<28) | 0x01000090 | ((u)<<23) | ((r)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \
340 ((s)<<6) | ((h)<<5) | (immed_reg), M1(l?rd:MEM), M4(rn,r?NO:immed_reg,l?MEM:rd,l?CYC2:NO))
341
342#define EOP_C_AM3_IMM(cond,u,l,rn,rd,s,h,offset_8) EOP_C_AM3(cond,u,1,l,rn,rd,s,h,(((offset_8)&0xf0)<<4)|((offset_8)&0xf))
343
344#define EOP_C_AM3_REG(cond,u,l,rn,rd,s,h,rm) EOP_C_AM3(cond,u,0,l,rn,rd,s,h,rm)
345
346/* ldr and str */
347#define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,1,rn,rd,pabs(offset_12))
348#define EOP_LDRB_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,1,1,rn,rd,pabs(offset_12))
349#define EOP_STR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,0,rn,rd,pabs(offset_12))
350
351#define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,1,rn,rd,pabs(offset_12))
352#define EOP_LDR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,0)
353#define EOP_STR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,0,rn,rd,pabs(offset_12))
354#define EOP_STR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,0)
355
356#define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm)
357#define EOP_LDR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,3,rn,rd,shift_imm,A_AM1_LSL,rm)
358#define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm)
359#define EOP_STR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,2,rn,rd,shift_imm,A_AM1_LSL,rm)
360
361#define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,0,1,pabs(offset_8))
362#define EOP_LDRH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,0,1,rm)
363
364#define EOP_LDRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,1,rn,rd,0,1,pabs(offset_8))
365#define EOP_LDRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,0)
366#define EOP_LDRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,1,rn,rd,0,1,rm)
367#define EOP_STRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,0,rn,rd,0,1,pabs(offset_8))
368#define EOP_STRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,0,rn,rd,0,1,0)
369#define EOP_STRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,0,rn,rd,0,1,rm)
370
371#define EOP_LDRSB_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,1,0,pabs(offset_8))
372#define EOP_LDRSB_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,1,0,rm)
373#define EOP_LDRSH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,1,1,pabs(offset_8))
374#define EOP_LDRSH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,1,1,rm)
375
376/* ldm and stm */
377#define EOP_XXM(cond,p,u,s,w,l,rn,list) \
378 EMIT(((cond)<<28) | (1<<27) | ((p)<<24) | ((u)<<23) | ((s)<<22) | ((w)<<21) | ((l)<<20) | ((rn)<<16) | (list), \
379 M2(rn,l?NO:MEM)|(l?list:0), M3(rn,l?MEM:NO,l?CYC2:NO)|(l?0:list))
380
381#define EOP_STMIA(rb,list) EOP_XXM(A_COND_AL,0,1,0,0,0,rb,list)
382#define EOP_LDMIA(rb,list) EOP_XXM(A_COND_AL,0,1,0,0,1,rb,list)
383
384#define EOP_STMFD_SP(list) EOP_XXM(A_COND_AL,1,0,0,1,0,SP,list)
385#define EOP_LDMFD_SP(list) EOP_XXM(A_COND_AL,0,1,0,1,1,SP,list)
386
387/* branches */
388#define EOP_C_BX(cond,rm) \
389 EMIT(((cond)<<28) | 0x012fff10 | (rm), M1(PC), M1(rm))
390
391#define EOP_C_B_PTR(ptr,cond,l,signed_immed_24) \
392 EMIT_PTR(ptr, ((cond)<<28) | 0x0a000000 | ((l)<<24) | (signed_immed_24))
393
394#define EOP_C_B(cond,l,signed_immed_24) \
395 EMIT(((cond)<<28) | 0x0a000000 | ((l)<<24) | (signed_immed_24), M2(PC,l?LR:NO), M1(PC))
396
397#define EOP_B( signed_immed_24) EOP_C_B(A_COND_AL,0,signed_immed_24)
398#define EOP_BL(signed_immed_24) EOP_C_B(A_COND_AL,1,signed_immed_24)
399
400/* misc */
401#define EOP_C_MUL(cond,s,rd,rs,rm) \
402 EMIT(((cond)<<28) | ((s)<<20) | ((rd)<<16) | ((rs)<<8) | 0x90 | (rm), M2(rd,s?SR:NO), M3(rs,rm,CYC2))
403
404#define EOP_C_UMULL(cond,s,rdhi,rdlo,rs,rm) \
405 EMIT(((cond)<<28) | 0x00800000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M4(rs,rm,CYC1,CYC2))
406
407#define EOP_C_SMULL(cond,s,rdhi,rdlo,rs,rm) \
408 EMIT(((cond)<<28) | 0x00c00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M4(rs,rm,CYC1,CYC2))
409
410#define EOP_C_SMLAL(cond,s,rdhi,rdlo,rs,rm) \
411 EMIT(((cond)<<28) | 0x00e00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M6(rs,rm,rdlo,rdhi,CYC1,CYC2))
412
413#define EOP_MUL(rd,rm,rs) EOP_C_MUL(A_COND_AL,0,rd,rs,rm) // note: rd != rm
414
415#define EOP_C_MRS(cond,rd) \
416 EMIT(((cond)<<28) | 0x010f0000 | ((rd)<<12), M1(rd), M1(SR))
417
418#define EOP_C_MSR_IMM(cond,ror2,imm) \
419 EMIT(((cond)<<28) | 0x0328f000 | ((ror2)<<8) | (imm), M1(SR), 0) // cpsr_f
420
421#define EOP_C_MSR_REG(cond,rm) \
422 EMIT(((cond)<<28) | 0x0128f000 | (rm), M1(SR), M1(rm)) // cpsr_f
423
424#define EOP_MRS(rd) EOP_C_MRS(A_COND_AL,rd)
425#define EOP_MSR_IMM(ror2,imm) EOP_C_MSR_IMM(A_COND_AL,ror2,imm)
426#define EOP_MSR_REG(rm) EOP_C_MSR_REG(A_COND_AL,rm)
427
428#define EOP_MOVW(cond,rd,imm) \
429 EMIT(((cond)<<28) | 0x03000000 | ((rd)<<12) | ((imm)&0xfff) | (((imm)<<4)&0xf0000), M1(rd), NO)
430
431#define EOP_MOVT(cond,rd,imm) \
432 EMIT(((cond)<<28) | 0x03400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000), M1(rd), NO)
433
434// host literal pool; must be significantly smaller than 1024 (max LDR offset = 4096)
435#define MAX_HOST_LITERALS 128
436static u32 literal_pool[MAX_HOST_LITERALS];
437static u32 *literal_insn[MAX_HOST_LITERALS];
438static int literal_pindex, literal_iindex;
439
440static inline int emith_pool_literal(u32 imm, int *offs)
441{
442 int idx = literal_pindex - 8; // max look behind in pool
443 // see if one of the last literals was the same (or close enough)
444 for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++)
445 if (abs((int)(imm - literal_pool[idx])) <= 0xff)
446 break;
447 if (idx == literal_pindex) // store new literal
448 literal_pool[literal_pindex++] = imm;
449 *offs = imm - literal_pool[idx];
450 return idx;
451}
452
453// XXX: RSB, *S will break if 1 insn is not enough
454static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int imm)
455{
456 int ror2;
457 u32 v;
458 int i;
459
460 if (cond == A_COND_NV)
461 return;
462
463 do {
464 u32 u;
465 // try to get the topmost byte empty to possibly save an insn
466 for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++)
467 v = (v << 2) | (v >> 30);
468
469 switch (op) {
470 case A_OP_MOV:
471 case A_OP_MVN:
472 rn = 0;
473 // use MVN if more bits 1 than 0
474 if (count_bits(imm) > 16) {
475 imm = ~imm;
476 op = A_OP_MVN;
477 ror2 = -1;
478 break;
479 }
480 // count insns needed for mov/orr #imm
481#ifdef HAVE_ARMV7
482 for (i = 2, u = v; i > 0 && u; i--, u >>= 8)
483 while (u > 0xff && !(u & 3))
484 u >>= 2;
485 if (u) { // 3+ insns needed...
486 if (op == A_OP_MVN)
487 imm = ~imm;
488 // ...prefer movw/movt
489 EOP_MOVW(cond,rd, imm);
490 if (imm & 0xffff0000)
491 EOP_MOVT(cond,rd, imm);
492 return;
493 }
494#else
495 for (i = 2, u = v; i > 0 && u; i--, u >>= 8)
496 while (u > 0xff && !(u & 3))
497 u >>= 2;
498 if (u) { // 3+ insns needed...
499 if (op == A_OP_MVN)
500 imm = ~imm;
501 // ...emit literal load
502 int idx, o;
503 if (literal_iindex >= MAX_HOST_LITERALS) {
504 elprintf(EL_STATUS|EL_SVP|EL_ANOMALY,
505 "pool overflow");
506 exit(1);
507 }
508 idx = emith_pool_literal(imm, &o);
509 literal_insn[literal_iindex++] = (u32 *)tcache_ptr;
510 EOP_LDR_IMM2(cond, rd, PC, idx * sizeof(u32));
511 if (o > 0)
512 EOP_C_DOP_IMM(cond, A_OP_ADD, 0,rd,rd,0,o);
513 else if (o < 0)
514 EOP_C_DOP_IMM(cond, A_OP_SUB, 0,rd,rd,0,-o);
515 return;
516 }
517#endif
518 break;
519
520 case A_OP_AND:
521 // AND must fit into 1 insn. if not, use BIC
522 for (u = v; u > 0xff && !(u & 3); u >>= 2) ;
523 if (u >> 8) {
524 imm = ~imm;
525 op = A_OP_BIC;
526 ror2 = -1;
527 }
528 break;
529
530 case A_OP_SUB:
531 case A_OP_ADD:
532 // swap ADD and SUB if more bits 1 than 0
533 if (s == 0 && count_bits(imm) > 16) {
534 imm = -imm;
535 op ^= (A_OP_ADD^A_OP_SUB);
536 ror2 = -1;
537 }
538 case A_OP_EOR:
539 case A_OP_ORR:
540 case A_OP_BIC:
541 if (s == 0 && imm == 0 && rd == rn)
542 return;
543 break;
544 }
545 } while (ror2 < 0);
546
547 do {
548 // shift down to get 'best' rot2
549 while (v > 0xff && !(v & 3))
550 v >>= 2, ror2--;
551 EOP_C_DOP_IMM(cond, op, s, rn, rd, ror2 & 0xf, v & 0xff);
552
553 switch (op) {
554 case A_OP_MOV: op = A_OP_ORR; break;
555 case A_OP_MVN: op = A_OP_BIC; break;
556 case A_OP_ADC: op = A_OP_ADD; break;
557 case A_OP_SBC: op = A_OP_SUB; break;
558 }
559 rn = rd;
560
561 v >>= 8, ror2 -= 8/2;
562 if (v && s) {
563 elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "op+s %x value too big", op);
564 exit(1);
565 }
566 } while (v);
567}
568
569#define emith_op_imm(cond, s, op, r, imm) \
570 emith_op_imm2(cond, s, op, r, r, imm)
571
572// test op
573#define emith_top_imm(cond, op, r, imm) do { \
574 u32 ror2, v; \
575 for (ror2 = 0, v = imm; v && !(v & 3); v >>= 2) \
576 ror2--; \
577 EOP_C_DOP_IMM(cond, op, 1, r, 0, ror2 & 0x0f, v & 0xff); \
578} while (0)
579
580#define is_offset_24(val) \
581 ((val) >= (int)0xff000000 && (val) <= 0x00ffffff)
582
583static int emith_xbranch(int cond, void *target, int is_call)
584{
585 int val = (u32 *)target - (u32 *)tcache_ptr - 2;
586 int direct = is_offset_24(val);
587 u32 *start_ptr = (u32 *)tcache_ptr;
588
589 if (cond == A_COND_NV)
590 return 0; // never taken
591
592 if (direct)
593 {
594 EOP_C_B(cond,is_call,val & 0xffffff); // b, bl target
595 }
596 else
597 {
598#ifdef __EPOC32__
599// elprintf(EL_SVP, "emitting indirect jmp %08x->%08x", tcache_ptr, target);
600 if (is_call)
601 EOP_ADD_IMM(LR,PC,0,8); // add lr,pc,#8
602 EOP_C_AM2_IMM(cond,1,0,1,PC,PC,0); // ldrcc pc,[pc]
603 EOP_MOV_REG_SIMPLE(PC,PC); // mov pc, pc
604 EMIT((u32)target,M1(PC),0);
605#else
606 // should never happen
607 elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "indirect jmp %8p->%8p", target, tcache_ptr);
608 exit(1);
609#endif
610 }
611
612 return (u32 *)tcache_ptr - start_ptr;
613}
614
615static void emith_pool_commit(int jumpover)
616{
617 int i, sz = literal_pindex * sizeof(u32);
618 u8 *pool = (u8 *)tcache_ptr;
619
620 // nothing to commit if pool is empty
621 if (sz == 0)
622 return;
623 // need branch over pool if not at block end
624 if (jumpover < 0 && sz == sizeof(u32)) {
625 // hack for SVP drc (patch logic detects distance 4)
626 sz += sizeof(u32);
627 } else if (jumpover) {
628 pool += sizeof(u32);
629 emith_xbranch(A_COND_AL, (u8 *)pool + sz, 0);
630 }
631 emith_flush();
632 // safety check - pool must be after insns and reachable
633 if ((u32)(pool - (u8 *)literal_insn[0] + 8) > 0xfff) {
634 elprintf(EL_STATUS|EL_SVP|EL_ANOMALY,
635 "pool offset out of range");
636 exit(1);
637 }
638 // copy pool and adjust addresses in insns accessing the pool
639 memcpy(pool, literal_pool, sz);
640 for (i = 0; i < literal_iindex; i++) {
641 *literal_insn[i] += (u8 *)pool - ((u8 *)literal_insn[i] + 8);
642 }
643 // count pool constants as insns for statistics
644 for (i = 0; i < literal_pindex; i++)
645 COUNT_OP;
646
647 tcache_ptr = (void *)((u8 *)pool + sz);
648 literal_pindex = literal_iindex = 0;
649}
650
651static inline void emith_pool_check(void)
652{
653 // check if pool must be committed
654 if (literal_iindex > MAX_HOST_LITERALS-4 || (literal_pindex &&
655 (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0xe00))
656 // pool full, or displacement is approaching the limit
657 emith_pool_commit(1);
658}
659
660static inline void emith_pool_adjust(int tcache_offs, int move_offs)
661{
662 u32 *ptr = (u32 *)tcache_ptr - tcache_offs;
663 int i;
664
665 for (i = literal_iindex-1; i >= 0 && literal_insn[i] >= ptr; i--)
666 if (literal_insn[i] == ptr)
667 literal_insn[i] += move_offs;
668}
669
670#define EMITH_HINT_COND(cond) /**/
671
672#define JMP_POS(ptr) { \
673 ptr = tcache_ptr; \
674 EMIT(0,M1(PC),0); \
675}
676
677#define JMP_EMIT(cond, ptr) { \
678 u32 val_ = (u32 *)tcache_ptr - (u32 *)(ptr) - 2; \
679 emith_flush(); /* NO insn swapping across jump targets */ \
680 EOP_C_B_PTR(ptr, cond, 0, val_ & 0xffffff); \
681}
682
683#define EMITH_JMP_START(cond) { \
684 void *cond_ptr; \
685 JMP_POS(cond_ptr)
686
687#define EMITH_JMP_END(cond) \
688 JMP_EMIT(cond, cond_ptr); \
689}
690
691// fake "simple" or "short" jump - using cond insns instead
692#define EMITH_NOTHING1(cond) \
693 (void)(cond)
694
695#define EMITH_SJMP_START(cond) EMITH_NOTHING1(cond)
696#define EMITH_SJMP_END(cond) EMITH_NOTHING1(cond)
697#define EMITH_SJMP2_START(cond) EMITH_NOTHING1(cond)
698#define EMITH_SJMP2_MID(cond) EMITH_JMP_START((cond)^1) // inverse cond
699#define EMITH_SJMP2_END(cond) EMITH_JMP_END((cond)^1)
700#define EMITH_SJMP3_START(cond) EMITH_NOTHING1(cond)
701#define EMITH_SJMP3_MID(cond) EMITH_NOTHING1(cond)
702#define EMITH_SJMP3_END()
703
704#define emith_move_r_r_c(cond, d, s) \
705 EOP_MOV_REG(cond,0,d,s,A_AM1_LSL,0)
706#define emith_move_r_r(d, s) \
707 emith_move_r_r_c(A_COND_AL, d, s)
708
709#define emith_move_r_r_ptr_c(cond, d, s) \
710 emith_move_r_r_c(cond, d, s)
711#define emith_move_r_r_ptr(d, s) \
712 emith_move_r_r(d, s)
713
714#define emith_mvn_r_r(d, s) \
715 EOP_MVN_REG(A_COND_AL,0,d,s,A_AM1_LSL,0)
716
717#define emith_add_r_r_r_lsl(d, s1, s2, lslimm) \
718 EOP_ADD_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
719#define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) \
720 emith_add_r_r_r_lsl(d, s1, s2, lslimm)
721
722#define emith_adc_r_r_r_lsl(d, s1, s2, lslimm) \
723 EOP_ADC_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
724
725#define emith_addf_r_r_r_lsl(d, s1, s2, lslimm) \
726 EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm)
727#define emith_addf_r_r_r_lsr(d, s1, s2, lslimm) \
728 EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSR,lslimm)
729
730#define emith_adcf_r_r_r_lsl(d, s1, s2, lslimm) \
731 EOP_ADC_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm)
732
733#define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) \
734 EOP_SUB_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
735
736#define emith_sbc_r_r_r_lsl(d, s1, s2, lslimm) \
737 EOP_SBC_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
738
739#define emith_subf_r_r_r_lsl(d, s1, s2, lslimm) \
740 EOP_SUB_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm)
741
742#define emith_sbcf_r_r_r_lsl(d, s1, s2, lslimm) \
743 EOP_SBC_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm)
744
745#define emith_or_r_r_r_lsl(d, s1, s2, lslimm) \
746 EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
747#define emith_or_r_r_r_lsr(d, s1, s2, lsrimm) \
748 EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm)
749
750#define emith_eor_r_r_r_lsl(d, s1, s2, lslimm) \
751 EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
752#define emith_eor_r_r_r_lsr(d, s1, s2, lsrimm) \
753 EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm)
754
755#define emith_and_r_r_r_lsl(d, s1, s2, lslimm) \
756 EOP_AND_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
757
758#define emith_or_r_r_lsl(d, s, lslimm) \
759 emith_or_r_r_r_lsl(d, d, s, lslimm)
760#define emith_or_r_r_lsr(d, s, lsrimm) \
761 emith_or_r_r_r_lsr(d, d, s, lsrimm)
762
763#define emith_eor_r_r_lsl(d, s, lslimm) \
764 emith_eor_r_r_r_lsl(d, d, s, lslimm)
765#define emith_eor_r_r_lsr(d, s, lsrimm) \
766 emith_eor_r_r_r_lsr(d, d, s, lsrimm)
767
768#define emith_add_r_r_r(d, s1, s2) \
769 emith_add_r_r_r_lsl(d, s1, s2, 0)
770
771#define emith_adc_r_r_r(d, s1, s2) \
772 emith_adc_r_r_r_lsl(d, s1, s2, 0)
773
774#define emith_addf_r_r_r(d, s1, s2) \
775 emith_addf_r_r_r_lsl(d, s1, s2, 0)
776
777#define emith_adcf_r_r_r(d, s1, s2) \
778 emith_adcf_r_r_r_lsl(d, s1, s2, 0)
779
780#define emith_sub_r_r_r(d, s1, s2) \
781 emith_sub_r_r_r_lsl(d, s1, s2, 0)
782
783#define emith_sbc_r_r_r(d, s1, s2) \
784 emith_sbc_r_r_r_lsl(d, s1, s2, 0)
785
786#define emith_subf_r_r_r(d, s1, s2) \
787 emith_subf_r_r_r_lsl(d, s1, s2, 0)
788
789#define emith_sbcf_r_r_r(d, s1, s2) \
790 emith_sbcf_r_r_r_lsl(d, s1, s2, 0)
791
792#define emith_or_r_r_r(d, s1, s2) \
793 emith_or_r_r_r_lsl(d, s1, s2, 0)
794
795#define emith_eor_r_r_r(d, s1, s2) \
796 emith_eor_r_r_r_lsl(d, s1, s2, 0)
797
798#define emith_and_r_r_r(d, s1, s2) \
799 emith_and_r_r_r_lsl(d, s1, s2, 0)
800
801#define emith_add_r_r(d, s) \
802 emith_add_r_r_r(d, d, s)
803
804#define emith_add_r_r_ptr(d, s) \
805 emith_add_r_r_r(d, d, s)
806
807#define emith_adc_r_r(d, s) \
808 emith_adc_r_r_r(d, d, s)
809
810#define emith_sub_r_r(d, s) \
811 emith_sub_r_r_r(d, d, s)
812
813#define emith_sbc_r_r(d, s) \
814 emith_sbc_r_r_r(d, d, s)
815
816#define emith_negc_r_r(d, s) \
817 EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,0,s,d,0,0)
818
819#define emith_and_r_r_c(cond, d, s) \
820 EOP_AND_REG(cond,0,d,d,s,A_AM1_LSL,0)
821#define emith_and_r_r(d, s) \
822 EOP_AND_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0)
823
824#define emith_or_r_r(d, s) \
825 emith_or_r_r_r(d, d, s)
826
827#define emith_eor_r_r(d, s) \
828 emith_eor_r_r_r(d, d, s)
829
830#define emith_tst_r_r(d, s) \
831 EOP_TST_REG(A_COND_AL,d,s,A_AM1_LSL,0)
832
833#define emith_tst_r_r_ptr(d, s) \
834 emith_tst_r_r(d, s)
835
836#define emith_teq_r_r(d, s) \
837 EOP_TEQ_REG(A_COND_AL,d,s,A_AM1_LSL,0)
838
839#define emith_cmp_r_r(d, s) \
840 EOP_CMP_REG(A_COND_AL,d,s,A_AM1_LSL,0)
841
842#define emith_addf_r_r(d, s) \
843 EOP_ADD_REG(A_COND_AL,1,d,d,s,A_AM1_LSL,0)
844
845#define emith_subf_r_r(d, s) \
846 EOP_SUB_REG(A_COND_AL,1,d,d,s,A_AM1_LSL,0)
847
848#define emith_adcf_r_r(d, s) \
849 EOP_ADC_REG(A_COND_AL,1,d,d,s,A_AM1_LSL,0)
850
851#define emith_sbcf_r_r(d, s) \
852 EOP_SBC_REG(A_COND_AL,1,d,d,s,A_AM1_LSL,0)
853
854#define emith_eorf_r_r(d, s) \
855 EOP_EOR_REG(A_COND_AL,1,d,d,s,A_AM1_LSL,0)
856
857#define emith_move_r_imm(r, imm) \
858 emith_op_imm(A_COND_AL, 0, A_OP_MOV, r, imm)
859
860#define emith_move_r_ptr_imm(r, imm) \
861 emith_move_r_imm(r, (u32)(imm))
862
863#define emith_add_r_imm(r, imm) \
864 emith_op_imm(A_COND_AL, 0, A_OP_ADD, r, imm)
865
866#define emith_adc_r_imm(r, imm) \
867 emith_op_imm(A_COND_AL, 0, A_OP_ADC, r, imm)
868
869#define emith_adcf_r_imm(r, imm) \
870 emith_op_imm(A_COND_AL, 1, A_OP_ADC, r, imm)
871
872#define emith_sub_r_imm(r, imm) \
873 emith_op_imm(A_COND_AL, 0, A_OP_SUB, r, imm)
874
875#define emith_bic_r_imm(r, imm) \
876 emith_op_imm(A_COND_AL, 0, A_OP_BIC, r, imm)
877
878#define emith_and_r_imm(r, imm) \
879 emith_op_imm(A_COND_AL, 0, A_OP_AND, r, imm)
880
881#define emith_or_r_imm(r, imm) \
882 emith_op_imm(A_COND_AL, 0, A_OP_ORR, r, imm)
883
884#define emith_eor_r_imm(r, imm) \
885 emith_op_imm(A_COND_AL, 0, A_OP_EOR, r, imm)
886
887#define emith_eor_r_imm_ptr(r, imm) \
888 emith_eor_r_imm(r, imm)
889
890// note: only use 8bit imm for these
891#define emith_tst_r_imm(r, imm) \
892 emith_top_imm(A_COND_AL, A_OP_TST, r, imm)
893
894#define emith_cmp_r_imm(r, imm) do { \
895 u32 op_ = A_OP_CMP, imm_ = (u8)imm; \
896 if ((s8)imm_ < 0) { \
897 imm_ = (u8)-imm_; \
898 op_ = A_OP_CMN; \
899 } \
900 emith_top_imm(A_COND_AL, op_, r, imm_); \
901} while (0)
902
903#define emith_subf_r_imm(r, imm) \
904 emith_op_imm(A_COND_AL, 1, A_OP_SUB, r, imm)
905
906#define emith_move_r_imm_c(cond, r, imm) \
907 emith_op_imm(cond, 0, A_OP_MOV, r, imm)
908
909#define emith_add_r_imm_c(cond, r, imm) \
910 emith_op_imm(cond, 0, A_OP_ADD, r, imm)
911
912#define emith_sub_r_imm_c(cond, r, imm) \
913 emith_op_imm(cond, 0, A_OP_SUB, r, imm)
914
915#define emith_or_r_imm_c(cond, r, imm) \
916 emith_op_imm(cond, 0, A_OP_ORR, r, imm)
917
918#define emith_eor_r_imm_c(cond, r, imm) \
919 emith_op_imm(cond, 0, A_OP_EOR, r, imm)
920
921#define emith_eor_r_imm_ptr_c(cond, r, imm) \
922 emith_eor_r_imm_c(cond, r, imm)
923
924#define emith_bic_r_imm_c(cond, r, imm) \
925 emith_op_imm(cond, 0, A_OP_BIC, r, imm)
926
927#define emith_tst_r_imm_c(cond, r, imm) \
928 emith_top_imm(cond, A_OP_TST, r, imm)
929
930#define emith_move_r_imm_s8_patchable(r, imm) do { \
931 emith_flush(); /* pin insn at current tcache_ptr for patching */ \
932 if ((s8)(imm) < 0) \
933 EOP_MVN_IMM(r, 0, (u8)~(imm)); \
934 else \
935 EOP_MOV_IMM(r, 0, (u8)(imm)); \
936} while (0)
937#define emith_move_r_imm_s8_patch(ptr, imm) do { \
938 u32 *ptr_ = (u32 *)ptr; u32 op_ = *ptr_ & 0xfe1ff000; \
939 if ((s8)(imm) < 0) \
940 EMIT_PTR(ptr_, op_ | (A_OP_MVN<<21) | (u8)~(imm));\
941 else \
942 EMIT_PTR(ptr_, op_ | (A_OP_MOV<<21) | (u8)(imm));\
943} while (0)
944
945#define emith_and_r_r_imm(d, s, imm) \
946 emith_op_imm2(A_COND_AL, 0, A_OP_AND, d, s, imm)
947
948#define emith_add_r_r_imm(d, s, imm) \
949 emith_op_imm2(A_COND_AL, 0, A_OP_ADD, d, s, imm)
950
951#define emith_add_r_r_ptr_imm(d, s, imm) \
952 emith_add_r_r_imm(d, s, imm)
953
954#define emith_sub_r_r_imm_c(cond, d, s, imm) \
955 emith_op_imm2(cond, 0, A_OP_SUB, d, s, (imm))
956
957#define emith_sub_r_r_imm(d, s, imm) \
958 emith_op_imm2(A_COND_AL, 0, A_OP_SUB, d, s, imm)
959
960#define emith_subf_r_r_imm(d, s, imm) \
961 emith_op_imm2(A_COND_AL, 1, A_OP_SUB, d, s, imm)
962
963#define emith_or_r_r_imm(d, s, imm) \
964 emith_op_imm2(A_COND_AL, 0, A_OP_ORR, d, s, imm)
965
966#define emith_eor_r_r_imm(d, s, imm) \
967 emith_op_imm2(A_COND_AL, 0, A_OP_EOR, d, s, imm)
968
969#define emith_neg_r_r(d, s) \
970 EOP_RSB_IMM(d, s, 0, 0)
971
972#define emith_lsl(d, s, cnt) \
973 EOP_MOV_REG(A_COND_AL,0,d,s,A_AM1_LSL,cnt)
974
975#define emith_lsr(d, s, cnt) \
976 EOP_MOV_REG(A_COND_AL,0,d,s,A_AM1_LSR,cnt)
977
978#define emith_asr(d, s, cnt) \
979 EOP_MOV_REG(A_COND_AL,0,d,s,A_AM1_ASR,cnt)
980
981#define emith_ror_c(cond, d, s, cnt) \
982 EOP_MOV_REG(cond,0,d,s,A_AM1_ROR,cnt)
983
984#define emith_ror(d, s, cnt) \
985 emith_ror_c(A_COND_AL, d, s, cnt)
986
987#define emith_rol(d, s, cnt) \
988 EOP_MOV_REG(A_COND_AL,0,d,s,A_AM1_ROR,32-(cnt)); \
989
990#define emith_lslf(d, s, cnt) \
991 EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_LSL,cnt)
992
993#define emith_lsrf(d, s, cnt) \
994 EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_LSR,cnt)
995
996#define emith_asrf(d, s, cnt) \
997 EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_ASR,cnt)
998
999// note: only C flag updated correctly
1000#define emith_rolf(d, s, cnt) do { \
1001 EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_ROR,32-(cnt)); \
1002 /* we don't have ROL so we shift to get the right carry */ \
1003 EOP_TST_REG(A_COND_AL,d,d,A_AM1_LSR,1); \
1004} while (0)
1005
1006#define emith_rorf(d, s, cnt) \
1007 EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_ROR,cnt)
1008
1009#define emith_rolcf(d) \
1010 emith_adcf_r_r(d, d)
1011#define emith_rolc(d) \
1012 emith_adc_r_r(d, d)
1013
1014#define emith_rorcf(d) \
1015 EOP_MOV_REG(A_COND_AL,1,d,d,A_AM1_ROR,0) /* ROR #0 -> RRX */
1016#define emith_rorc(d) \
1017 EOP_MOV_REG(A_COND_AL,0,d,d,A_AM1_ROR,0) /* ROR #0 -> RRX */
1018
1019#define emith_negcf_r_r(d, s) \
1020 EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,1,s,d,0,0)
1021
1022#define emith_mul(d, s1, s2) do { \
1023 if ((d) != (s1)) /* rd != rm limitation */ \
1024 EOP_MUL(d, s1, s2); \
1025 else \
1026 EOP_MUL(d, s2, s1); \
1027} while (0)
1028
1029#define emith_mul_u64(dlo, dhi, s1, s2) \
1030 EOP_C_UMULL(A_COND_AL,0,dhi,dlo,s1,s2)
1031
1032#define emith_mul_s64(dlo, dhi, s1, s2) \
1033 EOP_C_SMULL(A_COND_AL,0,dhi,dlo,s1,s2)
1034
1035#define emith_mula_s64_c(cond, dlo, dhi, s1, s2) \
1036 EOP_C_SMLAL(cond,0,dhi,dlo,s1,s2)
1037#define emith_mula_s64(dlo, dhi, s1, s2) \
1038 EOP_C_SMLAL(A_COND_AL,0,dhi,dlo,s1,s2)
1039
1040// misc
1041#define emith_read_r_r_offs_c(cond, r, rs, offs) \
1042 EOP_LDR_IMM2(cond, r, rs, offs)
1043#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \
1044 emith_read_r_r_offs_c(cond, r, rs, offs)
1045#define emith_read_r_r_r_c(cond, r, rs, rm) \
1046 EOP_LDR_REG_LSL(cond, r, rs, rm, 0)
1047#define emith_read_r_r_offs(r, rs, offs) \
1048 emith_read_r_r_offs_c(A_COND_AL, r, rs, offs)
1049#define emith_read_r_r_offs_ptr(r, rs, offs) \
1050 emith_read_r_r_offs_c(A_COND_AL, r, rs, offs)
1051#define emith_read_r_r_r(r, rs, rm) \
1052 EOP_LDR_REG_LSL(A_COND_AL, r, rs, rm, 0)
1053
1054#define emith_read8_r_r_offs_c(cond, r, rs, offs) \
1055 EOP_LDRB_IMM2(cond, r, rs, offs)
1056#define emith_read8_r_r_r_c(cond, r, rs, rm) \
1057 EOP_LDRB_REG_LSL(cond, r, rs, rm, 0)
1058#define emith_read8_r_r_offs(r, rs, offs) \
1059 emith_read8_r_r_offs_c(A_COND_AL, r, rs, offs)
1060#define emith_read8_r_r_r(r, rs, rm) \
1061 emith_read8_r_r_r_c(A_COND_AL, r, rs, rm)
1062
1063#define emith_read16_r_r_offs_c(cond, r, rs, offs) \
1064 EOP_LDRH_IMM2(cond, r, rs, offs)
1065#define emith_read16_r_r_r_c(cond, r, rs, rm) \
1066 EOP_LDRH_REG2(cond, r, rs, rm)
1067#define emith_read16_r_r_offs(r, rs, offs) \
1068 emith_read16_r_r_offs_c(A_COND_AL, r, rs, offs)
1069#define emith_read16_r_r_r(r, rs, rm) \
1070 emith_read16_r_r_r_c(A_COND_AL, r, rs, rm)
1071
1072#define emith_read8s_r_r_offs_c(cond, r, rs, offs) \
1073 EOP_LDRSB_IMM2(cond, r, rs, offs)
1074#define emith_read8s_r_r_r_c(cond, r, rs, rm) \
1075 EOP_LDRSB_REG2(cond, r, rs, rm)
1076#define emith_read8s_r_r_offs(r, rs, offs) \
1077 emith_read8s_r_r_offs_c(A_COND_AL, r, rs, offs)
1078#define emith_read8s_r_r_r(r, rs, rm) \
1079 emith_read8s_r_r_r_c(A_COND_AL, r, rs, rm)
1080
1081#define emith_read16s_r_r_offs_c(cond, r, rs, offs) \
1082 EOP_LDRSH_IMM2(cond, r, rs, offs)
1083#define emith_read16s_r_r_r_c(cond, r, rs, rm) \
1084 EOP_LDRSH_REG2(cond, r, rs, rm)
1085#define emith_read16s_r_r_offs(r, rs, offs) \
1086 emith_read16s_r_r_offs_c(A_COND_AL, r, rs, offs)
1087#define emith_read16s_r_r_r(r, rs, rm) \
1088 emith_read16s_r_r_r_c(A_COND_AL, r, rs, rm)
1089
1090#define emith_write_r_r_offs_c(cond, r, rs, offs) \
1091 EOP_STR_IMM2(cond, r, rs, offs)
1092#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \
1093 emith_write_r_r_offs_c(cond, r, rs, offs)
1094#define emith_write_r_r_offs(r, rs, offs) \
1095 emith_write_r_r_offs_c(A_COND_AL, r, rs, offs)
1096#define emith_write_r_r_offs_ptr(r, rs, offs) \
1097 emith_write_r_r_offs_c(A_COND_AL, r, rs, offs)
1098
1099#define emith_ctx_read_c(cond, r, offs) \
1100 emith_read_r_r_offs_c(cond, r, CONTEXT_REG, offs)
1101#define emith_ctx_read(r, offs) \
1102 emith_ctx_read_c(A_COND_AL, r, offs)
1103
1104#define emith_ctx_read_ptr(r, offs) \
1105 emith_ctx_read(r, offs)
1106
1107#define emith_ctx_write(r, offs) \
1108 EOP_STR_IMM(r, CONTEXT_REG, offs)
1109
1110#define emith_ctx_do_multiple(op, r, offs, count, tmpr) do { \
1111 int v_, r_ = r, c_ = count, b_ = CONTEXT_REG; \
1112 for (v_ = 0; c_; c_--, r_++) \
1113 v_ |= M1(r_); \
1114 if ((offs) != 0) { \
1115 EOP_ADD_IMM(tmpr,CONTEXT_REG,30/2,(offs)>>2);\
1116 b_ = tmpr; \
1117 } \
1118 op(b_,v_); \
1119} while (0)
1120
1121#define emith_ctx_read_multiple(r, offs, count, tmpr) \
1122 emith_ctx_do_multiple(EOP_LDMIA, r, offs, count, tmpr)
1123
1124#define emith_ctx_write_multiple(r, offs, count, tmpr) \
1125 emith_ctx_do_multiple(EOP_STMIA, r, offs, count, tmpr)
1126
1127#define emith_clear_msb_c(cond, d, s, count) do { \
1128 u32 t; \
1129 if ((count) <= 8) { \
1130 t = 8 - (count); \
1131 t = (0xff << t) & 0xff; \
1132 EOP_C_DOP_IMM(cond,A_OP_BIC,0,s,d,8/2,t); \
1133 } else if ((count) >= 24) { \
1134 t = (count) - 24; \
1135 t = 0xff >> t; \
1136 EOP_C_DOP_IMM(cond,A_OP_AND,0,s,d,0,t); \
1137 } else { \
1138 EOP_MOV_REG(cond,0,d,s,A_AM1_LSL,count); \
1139 EOP_MOV_REG(cond,0,d,d,A_AM1_LSR,count); \
1140 } \
1141} while (0)
1142
1143#define emith_clear_msb(d, s, count) \
1144 emith_clear_msb_c(A_COND_AL, d, s, count)
1145
1146#define emith_sext(d, s, bits) do { \
1147 EOP_MOV_REG_LSL(d,s,32 - (bits)); \
1148 EOP_MOV_REG_ASR(d,d,32 - (bits)); \
1149} while (0)
1150
1151#define emith_uext_ptr(r) /**/
1152
1153#define emith_do_caller_regs(mask, func) do { \
1154 u32 _reg_mask = (mask) & 0x500f; \
1155 if (_reg_mask) { \
1156 if (__builtin_parity(_reg_mask) == 1) \
1157 _reg_mask |= 0x10; /* eabi align */ \
1158 func(_reg_mask); \
1159 } \
1160} while (0)
1161
1162#define emith_save_caller_regs(mask) \
1163 emith_do_caller_regs(mask, EOP_STMFD_SP)
1164
1165#define emith_restore_caller_regs(mask) \
1166 emith_do_caller_regs(mask, EOP_LDMFD_SP)
1167
1168// upto 4 args
1169#define emith_pass_arg_r(arg, reg) \
1170 EOP_MOV_REG_SIMPLE(arg, reg)
1171
1172#define emith_pass_arg_imm(arg, imm) \
1173 emith_move_r_imm(arg, imm)
1174
1175#define emith_jump(target) \
1176 emith_jump_cond(A_COND_AL, target)
1177
1178#define emith_jump_patchable(target) \
1179 emith_jump(target)
1180
1181#define emith_jump_cond(cond, target) \
1182 emith_xbranch(cond, target, 0)
1183#define emith_jump_cond_inrange(target) !0
1184
1185#define emith_jump_cond_patchable(cond, target) \
1186 emith_jump_cond(cond, target)
1187
1188#define emith_jump_patch(ptr, target, pos) do { \
1189 u32 *ptr_ = (u32 *)ptr; \
1190 u32 val_ = (u32 *)(target) - ptr_ - 2; \
1191 *ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \
1192 if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \
1193} while (0)
1194#define emith_jump_patch_inrange(ptr, target) !0
1195#define emith_jump_patch_size() 4
1196
1197#define emith_jump_at(ptr, target) do { \
1198 u32 *ptr_ = (u32 *)ptr; \
1199 u32 val_ = (u32 *)(target) - ptr_ - 2; \
1200 EOP_C_B_PTR(ptr_, A_COND_AL, 0, val_ & 0xffffff); \
1201} while (0)
1202#define emith_jump_at_size() 4
1203
1204#define emith_jump_reg_c(cond, r) \
1205 EOP_C_BX(cond, r)
1206
1207#define emith_jump_reg(r) \
1208 emith_jump_reg_c(A_COND_AL, r)
1209
1210#define emith_jump_ctx_c(cond, offs) \
1211 EOP_LDR_IMM2(cond,PC,CONTEXT_REG,offs)
1212
1213#define emith_jump_ctx(offs) \
1214 emith_jump_ctx_c(A_COND_AL, offs)
1215
1216#define emith_call_cond(cond, target) \
1217 emith_xbranch(cond, target, 1)
1218
1219#define emith_call(target) \
1220 emith_call_cond(A_COND_AL, target)
1221
1222#define emith_call_reg(r) do { \
1223 emith_move_r_r(LR, PC); \
1224 EOP_C_BX(A_COND_AL, r); \
1225} while (0)
1226
1227#define emith_abicall_ctx(offs) do { \
1228 emith_move_r_r(LR, PC); \
1229 emith_jump_ctx(offs); \
1230} while (0)
1231
1232#define emith_abijump_reg(r) \
1233 emith_jump_reg(r)
1234#define emith_abijump_reg_c(cond, r) \
1235 emith_jump_reg_c(cond, r)
1236#define emith_abicall(target) \
1237 emith_call(target)
1238#define emith_abicall_cond(cond, target) \
1239 emith_call_cond(cond, target)
1240#define emith_abicall_reg(r) \
1241 emith_call_reg(r)
1242
1243#define emith_call_cleanup() /**/
1244
1245#define emith_ret_c(cond) \
1246 emith_jump_reg_c(cond, LR)
1247
1248#define emith_ret() \
1249 emith_ret_c(A_COND_AL)
1250
1251#define emith_ret_to_ctx(offs) \
1252 emith_ctx_write(LR, offs)
1253
1254#define emith_add_r_ret(r) \
1255 emith_add_r_r_ptr(r, LR)
1256
1257/* pushes r12 for eabi alignment */
1258#define emith_push_ret(r) do { \
1259 int r_ = (r >= 0 ? r : 12); \
1260 EOP_STMFD_SP(M2(r_,LR)); \
1261} while (0)
1262
1263#define emith_pop_and_ret(r) do { \
1264 int r_ = (r >= 0 ? r : 12); \
1265 EOP_LDMFD_SP(M2(r_,PC)); \
1266} while (0)
1267
1268#define host_instructions_updated(base, end, force) \
1269 do { if (force) emith_update_add(base, end); } while (0)
1270
1271#define host_call(addr, args) \
1272 addr
1273
1274#define host_arg2reg(rd, arg) \
1275 rd = arg
1276
1277#define emith_rw_offs_max() 0x1ff // minimum of offset in AM2 and AM3
1278
1279/* SH2 drc specific */
1280/* pushes r12 for eabi alignment */
1281#define emith_sh2_drc_entry() \
1282 EOP_STMFD_SP(M10(4,5,6,7,8,9,10,11,12,LR))
1283
1284#define emith_sh2_drc_exit() \
1285 EOP_LDMFD_SP(M10(4,5,6,7,8,9,10,11,12,PC))
1286
1287// assumes a is in arg0, tab, func and mask are temp
1288#define emith_sh2_rcall(a, tab, func, mask) do { \
1289 emith_lsr(mask, a, SH2_READ_SHIFT); \
1290 EOP_ADD_REG_LSL(tab, tab, mask, 3); \
1291 if (func < mask) EOP_LDMIA(tab, M2(func,mask)); /* ldm if possible */ \
1292 else { emith_read_r_r_offs(func, tab, 0); \
1293 emith_read_r_r_offs(mask, tab, 4); } \
1294 emith_addf_r_r_r(func,func,func); \
1295} while (0)
1296
1297// assumes a, val are in arg0 and arg1, tab and func are temp
1298#define emith_sh2_wcall(a, val, tab, func) do { \
1299 emith_lsr(func, a, SH2_WRITE_SHIFT); \
1300 EOP_LDR_REG_LSL(A_COND_AL,func,tab,func,2); \
1301 emith_move_r_r(2, CONTEXT_REG); /* arg2 */ \
1302 emith_abijump_reg(func); \
1303} while (0)
1304
1305#define emith_sh2_dtbf_loop() do { \
1306 int cr, rn; \
1307 int tmp_ = rcache_get_tmp(); \
1308 cr = rcache_get_reg(SHR_SR, RC_GR_RMW); \
1309 rn = rcache_get_reg((op >> 8) & 0x0f, RC_GR_RMW); \
1310 emith_sub_r_imm(rn, 1); /* sub rn, #1 */ \
1311 emith_bic_r_imm(cr, 1); /* bic cr, #1 */ \
1312 emith_sub_r_imm(cr, (cycles+1) << 12); /* sub cr, #(cycles+1)<<12 */ \
1313 cycles = 0; \
1314 emith_asrf(tmp_, cr, 2+12); /* movs tmp_, cr, asr #2+12 */\
1315 EOP_MOV_IMM_C(A_COND_MI,tmp_,0,0); /* movmi tmp_, #0 */ \
1316 emith_lsl(cr, cr, 20); /* mov cr, cr, lsl #20 */ \
1317 emith_lsr(cr, cr, 20); /* mov cr, cr, lsr #20 */ \
1318 emith_subf_r_r(rn, tmp_); /* subs rn, tmp_ */ \
1319 EOP_RSB_IMM_C(A_COND_LS,tmp_,rn,0,0); /* rsbls tmp_, rn, #0 */ \
1320 EOP_ORR_REG(A_COND_LS,0,cr,cr,tmp_,A_AM1_LSL,12+2); /* orrls cr,tmp_,lsl #12+2 */\
1321 EOP_ORR_IMM_C(A_COND_LS,cr,cr,0,1); /* orrls cr, #1 */ \
1322 EOP_MOV_IMM_C(A_COND_LS,rn,0,0); /* movls rn, #0 */ \
1323 rcache_free_tmp(tmp_); \
1324} while (0)
1325
1326#define emith_sh2_delay_loop(cycles, reg) do { \
1327 int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \
1328 int t1 = rcache_get_tmp(); \
1329 int t2 = rcache_get_tmp(); \
1330 int t3 = rcache_get_tmp(); \
1331 /* if (sr < 0) return */ \
1332 emith_cmp_r_imm(sr, 0); \
1333 EMITH_JMP_START(DCOND_LE); \
1334 /* turns = sr.cycles / cycles */ \
1335 emith_asr(t2, sr, 12); \
1336 emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \
1337 emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \
1338 rcache_free_tmp(t3); \
1339 if (reg >= 0) { \
1340 /* if (reg <= turns) turns = reg-1 */ \
1341 t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \
1342 emith_cmp_r_r(t3, t2); \
1343 emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \
1344 /* if (reg <= 1) turns = 0 */ \
1345 emith_cmp_r_imm(t3, 1); \
1346 emith_move_r_imm_c(DCOND_LS, t2, 0); \
1347 /* reg -= turns */ \
1348 emith_sub_r_r(t3, t2); \
1349 } \
1350 /* sr.cycles -= turns * cycles; */ \
1351 emith_move_r_imm(t1, cycles); \
1352 emith_mul(t1, t2, t1); \
1353 emith_sub_r_r_r_lsl(sr, sr, t1, 12); \
1354 EMITH_JMP_END(DCOND_LE); \
1355 rcache_free_tmp(t1); \
1356 rcache_free_tmp(t2); \
1357} while (0)
1358
1359#define emith_write_sr(sr, srcr) do { \
1360 emith_lsr(sr, sr, 10); \
1361 emith_or_r_r_r_lsl(sr, sr, srcr, 22); \
1362 emith_ror(sr, sr, 22); \
1363} while (0)
1364
1365#define emith_carry_to_t(srr, is_sub) do { \
1366 emith_bic_r_imm(srr, 1); \
1367 if (is_sub) /* has inverted C on ARM */ \
1368 emith_or_r_imm_c(A_COND_CC, srr, 1); \
1369 else \
1370 emith_or_r_imm_c(A_COND_CS, srr, 1); \
1371} while (0)
1372
1373#define emith_t_to_carry(srr, is_sub) do { \
1374 if (is_sub) { \
1375 int t_ = rcache_get_tmp(); \
1376 emith_eor_r_r_imm(t_, srr, 1); \
1377 emith_rorf(t_, t_, 1); \
1378 rcache_free_tmp(t_); \
1379 } else { \
1380 emith_rorf(srr, srr, 1); \
1381 emith_rol(srr, srr, 1); \
1382 } \
1383} while (0)
1384
1385#define emith_tpop_carry(sr, is_sub) do { \
1386 if (is_sub) \
1387 emith_eor_r_imm(sr, 1); \
1388 emith_lsrf(sr, sr, 1); \
1389} while (0)
1390
1391#define emith_tpush_carry(sr, is_sub) do { \
1392 emith_adc_r_r(sr, sr); \
1393 if (is_sub) \
1394 emith_eor_r_imm(sr, 1); \
1395} while (0)
1396
1397/*
1398 * T = carry(Rn = (Rn << 1) | T)
1399 * if Q
1400 * T ^= !carry(Rn += Rm)
1401 * else
1402 * T ^= !carry(Rn -= Rm)
1403 */
1404#define emith_sh2_div1_step(rn, rm, sr) do { \
1405 void *jmp0, *jmp1; \
1406 emith_tpop_carry(sr, 0); /* Rn = 2*Rn+T */\
1407 emith_adcf_r_r_r(rn, rn, rn); \
1408 emith_tpush_carry(sr, 0); \
1409 emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \
1410 JMP_POS(jmp0); /* beq do_sub */ \
1411 emith_addf_r_r(rn, rm); /* Rn += Rm */ \
1412 emith_eor_r_imm_c(A_COND_CC, sr, T); \
1413 JMP_POS(jmp1); /* b done */ \
1414 JMP_EMIT(A_COND_EQ, jmp0); /* do_sub: */ \
1415 emith_subf_r_r(rn, rm); /* Rn -= Rm */ \
1416 emith_eor_r_imm_c(A_COND_CS, sr, T); \
1417 JMP_EMIT(A_COND_AL, jmp1); /* done: */ \
1418} while (0)
1419
1420/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */
1421#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \
1422 emith_tst_r_imm(sr, S); \
1423 EMITH_SJMP2_START(DCOND_NE); \
1424 emith_mula_s64_c(DCOND_EQ, ml, mh, rn, rm); \
1425 EMITH_SJMP2_MID(DCOND_NE); \
1426 /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \
1427 emith_sext(mh, mh, 16); \
1428 emith_mula_s64(ml, mh, rn, rm); \
1429 /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \
1430 /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \
1431 emith_asr(rn, mh, 15); \
1432 emith_addf_r_r_r_lsr(rn, rn, mh, 31); \
1433 EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
1434 emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
1435 emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
1436 EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \
1437 emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
1438 emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
1439 EMITH_SJMP_END(DCOND_MI); \
1440 EMITH_SJMP_END(DCOND_EQ); \
1441 EMITH_SJMP2_END(DCOND_NE); \
1442} while (0)
1443
1444/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */
1445#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \
1446 emith_tst_r_imm(sr, S); \
1447 EMITH_SJMP2_START(DCOND_NE); \
1448 emith_mula_s64_c(DCOND_EQ, ml, mh, rn, rm); \
1449 EMITH_SJMP2_MID(DCOND_NE); \
1450 /* XXX: MACH should be untouched when S is set? */ \
1451 emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \
1452 emith_mula_s64(ml, mh, rn, rm); \
1453 /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \
1454 /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \
1455 emith_addf_r_r_r_lsr(mh, mh, ml, 31); /* sum = MACH + ((MACL>>31)&1) */\
1456 EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
1457 /* XXX: LSB signalling only in SH1, or in SH2 too? */ \
1458 emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
1459 emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \
1460 EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \
1461 emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
1462 EMITH_SJMP_END(DCOND_MI); \
1463 EMITH_SJMP_END(DCOND_EQ); \
1464 EMITH_SJMP2_END(DCOND_NE); \
1465} while (0)
1466
1467#ifdef T
1468// T bit handling
1469static int tcond = -1;
1470
1471#define emith_invert_cond(cond) \
1472 ((cond) ^ 1)
1473
1474#define emith_clr_t_cond(sr) \
1475 (void)sr
1476
1477#define emith_set_t_cond(sr, cond) \
1478 tcond = cond
1479
1480#define emith_get_t_cond() \
1481 tcond
1482
1483#define emith_invalidate_t() \
1484 tcond = -1
1485
1486#define emith_set_t(sr, val) \
1487 tcond = ((val) ? A_COND_AL: A_COND_NV)
1488
1489static void emith_sync_t(int sr)
1490{
1491 if (tcond == A_COND_AL)
1492 emith_or_r_imm(sr, T);
1493 else if (tcond == A_COND_NV)
1494 emith_bic_r_imm(sr, T);
1495 else if (tcond >= 0) {
1496 emith_bic_r_imm(sr, T);
1497 emith_or_r_imm_c(tcond, sr, T);
1498 }
1499 tcond = -1;
1500}
1501
1502static int emith_tst_t(int sr, int tf)
1503{
1504 if (tcond < 0) {
1505 emith_tst_r_imm(sr, T);
1506 return tf ? DCOND_NE: DCOND_EQ;
1507 } else if (tcond >= A_COND_AL) {
1508 // MUST sync because A_COND_NV isn't a real condition
1509 emith_sync_t(sr);
1510 emith_tst_r_imm(sr, T);
1511 return tf ? DCOND_NE: DCOND_EQ;
1512 } else
1513 return tf ? tcond : emith_invert_cond(tcond);
1514}
1515#endif