32x, take over sh2 cycles setting in-game
[picodrive.git] / cpu / drc / emit_arm.c
CommitLineData
cff531af 1/*
2 * Basic macros to emit ARM instructions and some utils
3 * Copyright (C) 2008,2009,2010 notaz
7bf552b5 4 * Copyright (C) 2019-2024 irixxxx
cff531af 5 *
6 * This work is licensed under the terms of MAME license.
7 * See COPYING file in the top-level directory.
8 */
9bd6706d 9#define HOST_REGS 16
10
11// OABI/EABI: params: r0-r3, return: r0-r1, temp: r12,r14, saved: r4-r8,r10,r11
12// SP,PC: r13,r15 must not be used. saved: r9 (for platform use, e.g. on ios)
13#define RET_REG 0
14#define PARAM_REGS { 0, 1, 2, 3 }
15#ifndef __MACH__
16#define PRESERVED_REGS { 4, 5, 6, 7, 8, 9, 10, 11 }
17#else
18#define PRESERVED_REGS { 4, 5, 6, 7, 8, 10, 11 } // no r9..
19#endif
20#define TEMPORARY_REGS { 12, 14 }
21
22#define CONTEXT_REG 11
a5e51c16 23#define STATIC_SH2_REGS { SHR_SR,10 , SHR_R(0),8 , SHR_R(1),9 }
65c75cb0 24
25// XXX: tcache_ptr type for SVP and SH2 compilers differs..
26#define EMIT_PTR(ptr, x) \
27 do { \
28 *(u32 *)ptr = x; \
29 ptr = (void *)((u8 *)ptr + sizeof(u32)); \
30 } while (0)
31
e01deede 32// ARM special registers and peephole optimization flags
33#define SP 13 // stack pointer
34#define LR 14 // link (return address)
35#define PC 15 // program counter
36#define SR 16 // CPSR, status register
37#define MEM 17 // memory access (src=LDR, dst=STR)
38#define CYC1 20 // 1 cycle interlock (LDR, reg-cntrld shift)
d39eb595 39#define CYC2 (CYC1+1)// 2+ cycles interlock (LDR[BH], MUL/MLA etc)
e01deede 40#define NO 32 // token for "no register"
41
42// bitmask builders
43#define M1(x) (u32)(1ULL<<(x)) // u32 to have NO evaluate to 0
44#define M2(x,y) (M1(x)|M1(y))
45#define M3(x,y,z) (M2(x,y)|M1(z))
46#define M4(x,y,z,a) (M3(x,y,z)|M1(a))
47#define M5(x,y,z,a,b) (M4(x,y,z,a)|M1(b))
d39eb595 48#define M6(x,y,z,a,b,c) (M5(x,y,z,a,b)|M1(c))
e01deede 49#define M10(a,b,c,d,e,f,g,h,i,j) (M5(a,b,c,d,e)|M5(f,g,h,i,j))
50
713e3a1c 51// avoid a warning with clang
52static inline uintptr_t pabs(intptr_t v) { return labs(v); }
53
f53e166c 54// sys_cacheflush always flushes whole pages, and it's rather expensive on ARMs
55// hold a list of pending cache updates and merge requests to reduce cacheflush
56static struct { void *base, *end; } pageflush[4];
57static unsigned pagesize = 4096;
58
59static void emith_update_cache(void)
60{
61 int i;
62
63 for (i = 0; i < 4 && pageflush[i].base; i++) {
64 cache_flush_d_inval_i(pageflush[i].base, pageflush[i].end + pagesize-1);
65 pageflush[i].base = NULL;
66 }
67}
68
69static inline void emith_update_add(void *base, void *end)
70{
71 void *p_base = (void *)((uintptr_t)(base) & ~(pagesize-1));
72 void *p_end = (void *)((uintptr_t)(end ) & ~(pagesize-1));
73 int i;
74
75 for (i = 0; i < 4 && pageflush[i].base; i++) {
76 if (p_base <= pageflush[i].end+pagesize && p_end >= pageflush[i].end) {
77 if (p_base < pageflush[i].base) pageflush[i].base = p_base;
78 pageflush[i].end = p_end;
79 return;
80 }
81 if (p_base <= pageflush[i].base && p_end >= pageflush[i].base-pagesize) {
82 if (p_end > pageflush[i].end) pageflush[i].end = p_end;
83 pageflush[i].base = p_base;
84 return;
85 }
86 }
87 if (i == 4) {
88 /* list full and not mergeable -> flush list */
89 emith_update_cache();
90 i = 0;
91 }
92 pageflush[i].base = p_base, pageflush[i].end = p_end;
93}
94
e01deede 95// peephole optimizer. ATM only tries to reduce interlock
d39eb595 96#define EMIT_CACHE_SIZE 6
e01deede 97struct emit_op {
98 u32 op;
99 u32 src, dst;
100};
101
d39eb595 102// peephole cache, last commited insn + cache + next insn = size+2
103static struct emit_op emit_cache[EMIT_CACHE_SIZE+2];
e01deede 104static int emit_index;
105#define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr-emit_index)
106
d39eb595 107static inline void emith_pool_adjust(int tcache_offs, int move_offs);
e01deede 108
109static NOINLINE void EMIT(u32 op, u32 dst, u32 src)
110{
d39eb595 111 void * emit_ptr = (u32 *)tcache_ptr - emit_index;
112 struct emit_op *const ptr = emit_cache;
113 const int n = emit_index+1;
114 int i, bi, bd = 0;
5c129565 115
d39eb595 116 // account for new insn in tcache
117 tcache_ptr = (void *)((u32 *)tcache_ptr + 1);
e01deede 118 COUNT_OP;
119 // for conditional execution SR is always source
120 if (op < 0xe0000000 /*A_COND_AL << 28*/)
121 src |= M1(SR);
d39eb595 122 // put insn on back of queue // mask away the NO token
123 emit_cache[n] = (struct emit_op)
124 { .op=op, .src=src & ~M1(NO), .dst=dst & ~M1(NO) };
125 // check insns down the queue as long as permitted by dependencies
126 for (bd = bi = 0, i = emit_index; i > 1 && !(dst & M1(PC)); i--) {
e01deede 127 int deps = 0;
d39eb595 128 // dst deps between i and n must not be swapped, since any deps
129 // but [i].src & [n].src lead to changed semantics if swapped.
130 if ((ptr[i].dst & ptr[n].src) || (ptr[n].dst & ptr[i].src) ||
131 (ptr[i].dst & ptr[n].dst))
132 break;
133 // don't swap insns reading PC if it's not a word pool load
134 // (ptr[i].op&0xf700000) != EOP_C_AM2_IMM(0,0,0,1,0,0,0))
135 if ((ptr[i].src & M1(PC)) && (ptr[i].op&0xf700000) != 0x5100000)
136 break;
137
138 // calculate ARM920T interlock cycles (differences only)
139#define D2(x,y) ((ptr[x].dst & ptr[y].src)?((ptr[x].src >> CYC2) & 1):0)
140#define D1(x,y) ((ptr[x].dst & ptr[y].src)?((ptr[x].src >> CYC1) & 3):0)
141 // insn sequence: [..., i-2, i-1, i, i+1, ..., n-2, n-1, n]
142 deps -= D2(i-2,i)+D2(i-1,i+1)+D2(n-2,n ) + D1(i-1,i)+D1(n-1,n);
143 deps -= !!(ptr[n].src & M2(CYC1,CYC2));// favour moving LDR down
144 // insn sequence: [..., i-2, i-1, n, i, i+1, ..., n-2, n-1]
145 deps += D2(i-2,n)+D2(i-1,i )+D2(n ,i+1) + D1(i-1,n)+D1(n ,i);
146 deps += !!(ptr[i].src & M2(CYC1,CYC2));// penalize moving LDR up
147 // remember best match found
148 if (bd > deps)
149 bd = deps, bi = i;
150 }
151 // swap if fewer depencies
152 if (bd < 0) {
153 // make room for new insn at bi
154 struct emit_op tmp = ptr[n];
155 for (i = n-1; i >= bi; i--) {
156 ptr[i+1] = ptr[i];
157 if (ptr[i].src & M1(PC))
158 emith_pool_adjust(n-i+1, 1);
e01deede 159 }
d39eb595 160 // insert new insn at bi
161 ptr[bi] = tmp;
162 if (ptr[bi].src & M1(PC))
163 emith_pool_adjust(1, bi-n);
e01deede 164 }
a0f5ba40 165 if (dst & M1(PC)) {
166 // commit everything if a branch insn is emitted
167 for (i = 1; i <= emit_index+1; i++)
168 EMIT_PTR(emit_ptr, emit_cache[i].op);
169 emit_index = 0;
d39eb595 170 } else if (emit_index < EMIT_CACHE_SIZE) {
e01deede 171 // queue not yet full
172 emit_index++;
173 } else {
174 // commit oldest insn from cache
175 EMIT_PTR(emit_ptr, emit_cache[1].op);
176 for (i = 0; i <= emit_index; i++)
177 emit_cache[i] = emit_cache[i+1];
178 }
179}
39615f60 180
e01deede 181static void emith_flush(void)
182{
183 int i;
184 void *emit_ptr = tcache_ptr - emit_index*sizeof(u32);
185
186 for (i = 1; i <= emit_index; i++)
187 EMIT_PTR(emit_ptr, emit_cache[i].op);
188 emit_index = 0;
189}
5c129565 190
191#define A_COND_AL 0xe
b9c1d012 192#define A_COND_EQ 0x0
bad5731d 193#define A_COND_NE 0x1
3863edbd 194#define A_COND_HS 0x2
195#define A_COND_LO 0x3
bad5731d 196#define A_COND_MI 0x4
197#define A_COND_PL 0x5
3863edbd 198#define A_COND_VS 0x6
199#define A_COND_VC 0x7
200#define A_COND_HI 0x8
80599a42 201#define A_COND_LS 0x9
3863edbd 202#define A_COND_GE 0xa
203#define A_COND_LT 0xb
204#define A_COND_GT 0xc
45883918 205#define A_COND_LE 0xd
ed8cf79b 206#define A_COND_CS A_COND_HS
207#define A_COND_CC A_COND_LO
39615f60 208#define A_COND_NV 0xf // Not Valid (aka NeVer :-) - ATTN: not a real condition!
5c129565 209
80599a42 210/* unified conditions */
211#define DCOND_EQ A_COND_EQ
212#define DCOND_NE A_COND_NE
213#define DCOND_MI A_COND_MI
214#define DCOND_PL A_COND_PL
3863edbd 215#define DCOND_HI A_COND_HI
216#define DCOND_HS A_COND_HS
217#define DCOND_LO A_COND_LO
218#define DCOND_GE A_COND_GE
219#define DCOND_GT A_COND_GT
220#define DCOND_LT A_COND_LT
221#define DCOND_LS A_COND_LS
222#define DCOND_LE A_COND_LE
223#define DCOND_VS A_COND_VS
224#define DCOND_VC A_COND_VC
80599a42 225
6822ba9d 226#define DCOND_CS A_COND_HS
227#define DCOND_CC A_COND_LO
228
5c129565 229/* addressing mode 1 */
230#define A_AM1_LSL 0
231#define A_AM1_LSR 1
232#define A_AM1_ASR 2
233#define A_AM1_ROR 3
234
235#define A_AM1_IMM(ror2,imm8) (((ror2)<<8) | (imm8) | 0x02000000)
236#define A_AM1_REG_XIMM(shift_imm,shift_op,rm) (((shift_imm)<<7) | ((shift_op)<<5) | (rm))
89fea1e9 237#define A_AM1_REG_XREG(rs,shift_op,rm) (((rs)<<8) | ((shift_op)<<5) | 0x10 | (rm))
5c129565 238
239/* data processing op */
5d817c91 240#define A_OP_AND 0x0
89fea1e9 241#define A_OP_EOR 0x1
5d817c91 242#define A_OP_SUB 0x2
89fea1e9 243#define A_OP_RSB 0x3
f48f5e3b 244#define A_OP_ADD 0x4
3863edbd 245#define A_OP_ADC 0x5
246#define A_OP_SBC 0x6
52d759c3 247#define A_OP_RSC 0x7
b9c1d012 248#define A_OP_TST 0x8
80599a42 249#define A_OP_TEQ 0x9
0e4d7ba5 250#define A_OP_CMP 0xa
2fa02d5a 251#define A_OP_CMN 0xb
5c129565 252#define A_OP_ORR 0xc
253#define A_OP_MOV 0xd
5d817c91 254#define A_OP_BIC 0xe
3863edbd 255#define A_OP_MVN 0xf
5c129565 256
e01deede 257// operation specific register usage in DOP
258#define A_Rn(op,rn) (((op)&0xd)!=0xd ? rn:NO) // no rn for MOV,MVN
259#define A_Rd(op,rd) (((op)&0xc)!=0x8 ? rd:NO) // no rd for TST,TEQ,CMP,CMN
260// CSPR is dst if S set, CSPR is src if op is ADC/SBC/RSC or shift is RRX
261#define A_Sd(s) ((s) ? SR:NO)
262#define A_Sr(op,sop) (((op)>=0x5 && (op)<=0x7) || (sop)>>4==A_AM1_ROR<<1 ? SR:NO)
263
264#define EOP_C_DOP_X(cond,op,s,rn,rd,sop,rm,rs) \
265 EMIT(((cond)<<28) | ((op)<< 21) | ((s)<<20) | ((rn)<<16) | ((rd)<<12) | (sop), \
266 M2(A_Rd(op,rd),A_Sd(s)), M5(A_Sr(op,sop),A_Rn(op,rn),rm,rs,rs==NO?NO:CYC1))
5c129565 267
e01deede 268#define EOP_C_DOP_IMM( cond,op,s,rn,rd,ror2,imm8) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_IMM(ror2,imm8), NO, NO)
269#define EOP_C_DOP_REG_XIMM(cond,op,s,rn,rd,shift_imm,shift_op,rm) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_REG_XIMM(shift_imm,shift_op,rm), rm, NO)
270#define EOP_C_DOP_REG_XREG(cond,op,s,rn,rd,rs, shift_op,rm) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_REG_XREG(rs, shift_op,rm), rm, rs)
5c129565 271
5d817c91 272#define EOP_MOV_IMM(rd, ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_MOV,0, 0,rd,ror2,imm8)
52d759c3 273#define EOP_MVN_IMM(rd, ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_MVN,0, 0,rd,ror2,imm8)
5d817c91 274#define EOP_ORR_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_ORR,0,rn,rd,ror2,imm8)
3863edbd 275#define EOP_EOR_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_EOR,0,rn,rd,ror2,imm8)
5d817c91 276#define EOP_ADD_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_ADD,0,rn,rd,ror2,imm8)
277#define EOP_BIC_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_BIC,0,rn,rd,ror2,imm8)
278#define EOP_AND_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_AND,0,rn,rd,ror2,imm8)
d274c33b 279#define EOP_SUB_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_SUB,0,rn,rd,ror2,imm8)
bad5731d 280#define EOP_TST_IMM( rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_TST,1,rn, 0,ror2,imm8)
45883918 281#define EOP_CMP_IMM( rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_CMP,1,rn, 0,ror2,imm8)
89fea1e9 282#define EOP_RSB_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_RSB,0,rn,rd,ror2,imm8)
5c129565 283
80599a42 284#define EOP_MOV_IMM_C(cond,rd, ror2,imm8) EOP_C_DOP_IMM(cond,A_OP_MOV,0, 0,rd,ror2,imm8)
285#define EOP_ORR_IMM_C(cond,rd,rn,ror2,imm8) EOP_C_DOP_IMM(cond,A_OP_ORR,0,rn,rd,ror2,imm8)
286#define EOP_RSB_IMM_C(cond,rd,rn,ror2,imm8) EOP_C_DOP_IMM(cond,A_OP_RSB,0,rn,rd,ror2,imm8)
287
288#define EOP_MOV_REG(cond,s,rd, rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_MOV,s, 0,rd,shift_imm,shift_op,rm)
52d759c3 289#define EOP_MVN_REG(cond,s,rd, rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_MVN,s, 0,rd,shift_imm,shift_op,rm)
80599a42 290#define EOP_ORR_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_ORR,s,rn,rd,shift_imm,shift_op,rm)
291#define EOP_ADD_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_ADD,s,rn,rd,shift_imm,shift_op,rm)
3863edbd 292#define EOP_ADC_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_ADC,s,rn,rd,shift_imm,shift_op,rm)
80599a42 293#define EOP_SUB_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_SUB,s,rn,rd,shift_imm,shift_op,rm)
3863edbd 294#define EOP_SBC_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_SBC,s,rn,rd,shift_imm,shift_op,rm)
295#define EOP_AND_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_AND,s,rn,rd,shift_imm,shift_op,rm)
296#define EOP_EOR_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_EOR,s,rn,rd,shift_imm,shift_op,rm)
297#define EOP_CMP_REG(cond, rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_CMP,1,rn, 0,shift_imm,shift_op,rm)
80599a42 298#define EOP_TST_REG(cond, rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_TST,1,rn, 0,shift_imm,shift_op,rm)
299#define EOP_TEQ_REG(cond, rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_TEQ,1,rn, 0,shift_imm,shift_op,rm)
89fea1e9 300
80599a42 301#define EOP_MOV_REG2(s,rd, rm,shift_op,rs) EOP_C_DOP_REG_XREG(A_COND_AL,A_OP_MOV,s, 0,rd,rs,shift_op,rm)
302#define EOP_ADD_REG2(s,rd,rn,rm,shift_op,rs) EOP_C_DOP_REG_XREG(A_COND_AL,A_OP_ADD,s,rn,rd,rs,shift_op,rm)
303#define EOP_SUB_REG2(s,rd,rn,rm,shift_op,rs) EOP_C_DOP_REG_XREG(A_COND_AL,A_OP_SUB,s,rn,rd,rs,shift_op,rm)
5c129565 304
80599a42 305#define EOP_MOV_REG_SIMPLE(rd,rm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_LSL,0)
306#define EOP_MOV_REG_LSL(rd, rm,shift_imm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_LSL,shift_imm)
307#define EOP_MOV_REG_LSR(rd, rm,shift_imm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_LSR,shift_imm)
308#define EOP_MOV_REG_ASR(rd, rm,shift_imm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_ASR,shift_imm)
309#define EOP_MOV_REG_ROR(rd, rm,shift_imm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_ROR,shift_imm)
5c129565 310
80599a42 311#define EOP_ORR_REG_SIMPLE(rd,rm) EOP_ORR_REG(A_COND_AL,0,rd,rd,rm,A_AM1_LSL,0)
312#define EOP_ORR_REG_LSL(rd,rn,rm,shift_imm) EOP_ORR_REG(A_COND_AL,0,rd,rn,rm,A_AM1_LSL,shift_imm)
313#define EOP_ORR_REG_LSR(rd,rn,rm,shift_imm) EOP_ORR_REG(A_COND_AL,0,rd,rn,rm,A_AM1_LSR,shift_imm)
314#define EOP_ORR_REG_ASR(rd,rn,rm,shift_imm) EOP_ORR_REG(A_COND_AL,0,rd,rn,rm,A_AM1_ASR,shift_imm)
315#define EOP_ORR_REG_ROR(rd,rn,rm,shift_imm) EOP_ORR_REG(A_COND_AL,0,rd,rn,rm,A_AM1_ROR,shift_imm)
5d817c91 316
80599a42 317#define EOP_ADD_REG_SIMPLE(rd,rm) EOP_ADD_REG(A_COND_AL,0,rd,rd,rm,A_AM1_LSL,0)
318#define EOP_ADD_REG_LSL(rd,rn,rm,shift_imm) EOP_ADD_REG(A_COND_AL,0,rd,rn,rm,A_AM1_LSL,shift_imm)
319#define EOP_ADD_REG_LSR(rd,rn,rm,shift_imm) EOP_ADD_REG(A_COND_AL,0,rd,rn,rm,A_AM1_LSR,shift_imm)
f48f5e3b 320
80599a42 321#define EOP_TST_REG_SIMPLE(rn,rm) EOP_TST_REG(A_COND_AL, rn, 0,A_AM1_LSL,rm)
b9c1d012 322
80599a42 323#define EOP_MOV_REG2_LSL(rd, rm,rs) EOP_MOV_REG2(0,rd, rm,A_AM1_LSL,rs)
324#define EOP_MOV_REG2_ROR(rd, rm,rs) EOP_MOV_REG2(0,rd, rm,A_AM1_ROR,rs)
325#define EOP_ADD_REG2_LSL(rd,rn,rm,rs) EOP_ADD_REG2(0,rd,rn,rm,A_AM1_LSL,rs)
326#define EOP_SUB_REG2_LSL(rd,rn,rm,rs) EOP_SUB_REG2(0,rd,rn,rm,A_AM1_LSL,rs)
89fea1e9 327
f48f5e3b 328/* addressing mode 2 */
329#define EOP_C_AM2_IMM(cond,u,b,l,rn,rd,offset_12) \
e01deede 330 EMIT(((cond)<<28) | 0x05000000 | ((u)<<23) | ((b)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \
331 ((offset_12) & 0xfff), M1(l?rd:MEM), M3(rn,l?MEM:rd,l?b?CYC2:CYC1:NO))
5c129565 332
e05b81fc 333#define EOP_C_AM2_REG(cond,u,b,l,rn,rd,shift_imm,shift_op,rm) \
334 EMIT(((cond)<<28) | 0x07000000 | ((u)<<23) | ((b)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \
e01deede 335 A_AM1_REG_XIMM(shift_imm, shift_op, rm), M1(l?rd:MEM), M4(rn,rm,l?MEM:rd,l?b?CYC2:CYC1:NO))
e05b81fc 336
f48f5e3b 337/* addressing mode 3 */
ede7220f 338#define EOP_C_AM3(cond,u,r,l,rn,rd,s,h,immed_reg) \
339 EMIT(((cond)<<28) | 0x01000090 | ((u)<<23) | ((r)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \
e01deede 340 ((s)<<6) | ((h)<<5) | (immed_reg), M1(l?rd:MEM), M4(rn,r?NO:immed_reg,l?MEM:rd,l?CYC2:NO))
ede7220f 341
342#define EOP_C_AM3_IMM(cond,u,l,rn,rd,s,h,offset_8) EOP_C_AM3(cond,u,1,l,rn,rd,s,h,(((offset_8)&0xf0)<<4)|((offset_8)&0xf))
343
344#define EOP_C_AM3_REG(cond,u,l,rn,rd,s,h,rm) EOP_C_AM3(cond,u,0,l,rn,rd,s,h,rm)
f48f5e3b 345
346/* ldr and str */
713e3a1c 347#define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,1,rn,rd,pabs(offset_12))
348#define EOP_LDRB_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,1,1,rn,rd,pabs(offset_12))
349#define EOP_STR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,0,rn,rd,pabs(offset_12))
e05b81fc 350
713e3a1c 351#define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,1,rn,rd,pabs(offset_12))
f48f5e3b 352#define EOP_LDR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,0)
713e3a1c 353#define EOP_STR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,0,rn,rd,pabs(offset_12))
f48f5e3b 354#define EOP_STR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,0)
355
e05b81fc 356#define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm)
9e36dd0e 357#define EOP_LDR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,3,rn,rd,shift_imm,A_AM1_LSL,rm)
e7ee5010 358#define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm)
9e36dd0e 359#define EOP_STR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,2,rn,rd,shift_imm,A_AM1_LSL,rm)
e05b81fc 360
713e3a1c 361#define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,0,1,pabs(offset_8))
6822ba9d 362#define EOP_LDRH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,0,1,rm)
b081408f 363
713e3a1c 364#define EOP_LDRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,1,rn,rd,0,1,pabs(offset_8))
5d817c91 365#define EOP_LDRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,0)
ede7220f 366#define EOP_LDRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,1,rn,rd,0,1,rm)
713e3a1c 367#define EOP_STRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,0,rn,rd,0,1,pabs(offset_8))
5d817c91 368#define EOP_STRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,0,rn,rd,0,1,0)
d5276282 369#define EOP_STRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,0,rn,rd,0,1,rm)
5c129565 370
713e3a1c 371#define EOP_LDRSB_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,1,0,pabs(offset_8))
1db36a7a 372#define EOP_LDRSB_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,1,0,rm)
713e3a1c 373#define EOP_LDRSH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,1,1,pabs(offset_8))
1db36a7a 374#define EOP_LDRSH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,1,1,rm)
375
5c129565 376/* ldm and stm */
377#define EOP_XXM(cond,p,u,s,w,l,rn,list) \
e01deede 378 EMIT(((cond)<<28) | (1<<27) | ((p)<<24) | ((u)<<23) | ((s)<<22) | ((w)<<21) | ((l)<<20) | ((rn)<<16) | (list), \
379 M2(rn,l?NO:MEM)|(l?list:0), M3(rn,l?MEM:NO,l?CYC2:NO)|(l?0:list))
5c129565 380
8b4f38f4 381#define EOP_STMIA(rb,list) EOP_XXM(A_COND_AL,0,1,0,0,0,rb,list)
382#define EOP_LDMIA(rb,list) EOP_XXM(A_COND_AL,0,1,0,0,1,rb,list)
383
e01deede 384#define EOP_STMFD_SP(list) EOP_XXM(A_COND_AL,1,0,0,1,0,SP,list)
385#define EOP_LDMFD_SP(list) EOP_XXM(A_COND_AL,0,1,0,1,1,SP,list)
5c129565 386
387/* branches */
388#define EOP_C_BX(cond,rm) \
e01deede 389 EMIT(((cond)<<28) | 0x012fff10 | (rm), M1(PC), M1(rm))
5c129565 390
f0d7b1fa 391#define EOP_C_B_PTR(ptr,cond,l,signed_immed_24) \
392 EMIT_PTR(ptr, ((cond)<<28) | 0x0a000000 | ((l)<<24) | (signed_immed_24))
393
e807ac75 394#define EOP_C_B(cond,l,signed_immed_24) \
e01deede 395 EMIT(((cond)<<28) | 0x0a000000 | ((l)<<24) | (signed_immed_24), M2(PC,l?LR:NO), M1(PC))
e807ac75 396
397#define EOP_B( signed_immed_24) EOP_C_B(A_COND_AL,0,signed_immed_24)
398#define EOP_BL(signed_immed_24) EOP_C_B(A_COND_AL,1,signed_immed_24)
399
d274c33b 400/* misc */
401#define EOP_C_MUL(cond,s,rd,rs,rm) \
e01deede 402 EMIT(((cond)<<28) | ((s)<<20) | ((rd)<<16) | ((rs)<<8) | 0x90 | (rm), M2(rd,s?SR:NO), M3(rs,rm,CYC2))
d274c33b 403
3863edbd 404#define EOP_C_UMULL(cond,s,rdhi,rdlo,rs,rm) \
d39eb595 405 EMIT(((cond)<<28) | 0x00800000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M4(rs,rm,CYC1,CYC2))
3863edbd 406
407#define EOP_C_SMULL(cond,s,rdhi,rdlo,rs,rm) \
d39eb595 408 EMIT(((cond)<<28) | 0x00c00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M4(rs,rm,CYC1,CYC2))
3863edbd 409
f0d7b1fa 410#define EOP_C_SMLAL(cond,s,rdhi,rdlo,rs,rm) \
d39eb595 411 EMIT(((cond)<<28) | 0x00e00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M6(rs,rm,rdlo,rdhi,CYC1,CYC2))
f0d7b1fa 412
d274c33b 413#define EOP_MUL(rd,rm,rs) EOP_C_MUL(A_COND_AL,0,rd,rs,rm) // note: rd != rm
414
bad5731d 415#define EOP_C_MRS(cond,rd) \
e01deede 416 EMIT(((cond)<<28) | 0x010f0000 | ((rd)<<12), M1(rd), M1(SR))
5c129565 417
6e39239f 418#define EOP_C_MSR_IMM(cond,ror2,imm) \
e01deede 419 EMIT(((cond)<<28) | 0x0328f000 | ((ror2)<<8) | (imm), M1(SR), 0) // cpsr_f
6e39239f 420
421#define EOP_C_MSR_REG(cond,rm) \
e01deede 422 EMIT(((cond)<<28) | 0x0128f000 | (rm), M1(SR), M1(rm)) // cpsr_f
6e39239f 423
424#define EOP_MRS(rd) EOP_C_MRS(A_COND_AL,rd)
425#define EOP_MSR_IMM(ror2,imm) EOP_C_MSR_IMM(A_COND_AL,ror2,imm)
426#define EOP_MSR_REG(rm) EOP_C_MSR_REG(A_COND_AL,rm)
bad5731d 427
65b37c5a 428#define EOP_MOVW(cond,rd,imm) \
429 EMIT(((cond)<<28) | 0x03000000 | ((rd)<<12) | ((imm)&0xfff) | (((imm)<<4)&0xf0000), M1(rd), NO)
98a3d79b 430
65b37c5a 431#define EOP_MOVT(cond,rd,imm) \
432 EMIT(((cond)<<28) | 0x03400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000), M1(rd), NO)
bad5731d 433
83bafe8e 434// host literal pool; must be significantly smaller than 1024 (max LDR offset = 4096)
435#define MAX_HOST_LITERALS 128
436static u32 literal_pool[MAX_HOST_LITERALS];
437static u32 *literal_insn[MAX_HOST_LITERALS];
438static int literal_pindex, literal_iindex;
439
b4c25401 440static inline int emith_pool_literal(u32 imm, int *offs)
83bafe8e 441{
442 int idx = literal_pindex - 8; // max look behind in pool
443 // see if one of the last literals was the same (or close enough)
444 for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++)
445 if (abs((int)(imm - literal_pool[idx])) <= 0xff)
446 break;
447 if (idx == literal_pindex) // store new literal
448 literal_pool[literal_pindex++] = imm;
449 *offs = imm - literal_pool[idx];
450 return idx;
2fa02d5a 451}
452
453// XXX: RSB, *S will break if 1 insn is not enough
52d759c3 454static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int imm)
5c129565 455{
52d759c3 456 int ror2;
553c3eaa 457 u32 v;
83bafe8e 458 int i;
65c75cb0 459
39615f60 460 if (cond == A_COND_NV)
461 return;
462
e7ee5010 463 do {
464 u32 u;
465 // try to get the topmost byte empty to possibly save an insn
83bafe8e 466 for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++)
467 v = (v << 2) | (v >> 30);
e7ee5010 468
469 switch (op) {
470 case A_OP_MOV:
f2d19ddf 471 case A_OP_MVN:
e7ee5010 472 rn = 0;
473 // use MVN if more bits 1 than 0
474 if (count_bits(imm) > 16) {
748b8187 475 imm = ~imm;
e7ee5010 476 op = A_OP_MVN;
477 ror2 = -1;
478 break;
479 }
480 // count insns needed for mov/orr #imm
481#ifdef HAVE_ARMV7
a80b0b42 482 for (i = 2, u = v; i > 0 && u; i--, u >>= 8)
e7ee5010 483 while (u > 0xff && !(u & 3))
484 u >>= 2;
485 if (u) { // 3+ insns needed...
486 if (op == A_OP_MVN)
487 imm = ~imm;
488 // ...prefer movw/movt
65b37c5a 489 EOP_MOVW(cond,rd, imm);
e7ee5010 490 if (imm & 0xffff0000)
65b37c5a 491 EOP_MOVT(cond,rd, imm);
e7ee5010 492 return;
83bafe8e 493 }
e7ee5010 494#else
a80b0b42 495 for (i = 2, u = v; i > 0 && u; i--, u >>= 8)
e7ee5010 496 while (u > 0xff && !(u & 3))
497 u >>= 2;
d39eb595 498 if (u) { // 3+ insns needed...
e7ee5010 499 if (op == A_OP_MVN)
500 imm = ~imm;
501 // ...emit literal load
502 int idx, o;
503 if (literal_iindex >= MAX_HOST_LITERALS) {
504 elprintf(EL_STATUS|EL_SVP|EL_ANOMALY,
505 "pool overflow");
506 exit(1);
507 }
508 idx = emith_pool_literal(imm, &o);
509 literal_insn[literal_iindex++] = (u32 *)tcache_ptr;
510 EOP_LDR_IMM2(cond, rd, PC, idx * sizeof(u32));
511 if (o > 0)
512 EOP_C_DOP_IMM(cond, A_OP_ADD, 0,rd,rd,0,o);
513 else if (o < 0)
514 EOP_C_DOP_IMM(cond, A_OP_SUB, 0,rd,rd,0,-o);
65b37c5a 515 return;
e7ee5010 516 }
748b8187 517#endif
e7ee5010 518 break;
83bafe8e 519
e7ee5010 520 case A_OP_AND:
521 // AND must fit into 1 insn. if not, use BIC
522 for (u = v; u > 0xff && !(u & 3); u >>= 2) ;
523 if (u >> 8) {
524 imm = ~imm;
525 op = A_OP_BIC;
526 ror2 = -1;
527 }
528 break;
529
530 case A_OP_SUB:
531 case A_OP_ADD:
532 // swap ADD and SUB if more bits 1 than 0
533 if (s == 0 && count_bits(imm) > 16) {
534 imm = -imm;
535 op ^= (A_OP_ADD^A_OP_SUB);
536 ror2 = -1;
537 }
538 case A_OP_EOR:
539 case A_OP_ORR:
540 case A_OP_BIC:
541 if (s == 0 && imm == 0 && rd == rn)
542 return;
543 break;
d40a5af4 544 }
e7ee5010 545 } while (ror2 < 0);
65c75cb0 546
2fa02d5a 547 do {
548 // shift down to get 'best' rot2
549 while (v > 0xff && !(v & 3))
550 v >>= 2, ror2--;
2fa02d5a 551 EOP_C_DOP_IMM(cond, op, s, rn, rd, ror2 & 0xf, v & 0xff);
552
553 switch (op) {
554 case A_OP_MOV: op = A_OP_ORR; break;
555 case A_OP_MVN: op = A_OP_BIC; break;
556 case A_OP_ADC: op = A_OP_ADD; break;
557 case A_OP_SBC: op = A_OP_SUB; break;
558 }
e05b81fc 559 rn = rd;
2fa02d5a 560
561 v >>= 8, ror2 -= 8/2;
a80b0b42 562 if (v && s) {
563 elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "op+s %x value too big", op);
564 exit(1);
565 }
2fa02d5a 566 } while (v);
259ed0ea 567}
568
52d759c3 569#define emith_op_imm(cond, s, op, r, imm) \
570 emith_op_imm2(cond, s, op, r, r, imm)
571
ed8cf79b 572// test op
18b94127 573#define emith_top_imm(cond, op, r, imm) do { \
ed8cf79b 574 u32 ror2, v; \
575 for (ror2 = 0, v = imm; v && !(v & 3); v >>= 2) \
576 ror2--; \
577 EOP_C_DOP_IMM(cond, op, 1, r, 0, ror2 & 0x0f, v & 0xff); \
18b94127 578} while (0)
ed8cf79b 579
65c75cb0 580#define is_offset_24(val) \
581 ((val) >= (int)0xff000000 && (val) <= 0x00ffffff)
5c129565 582
65c75cb0 583static int emith_xbranch(int cond, void *target, int is_call)
5c129565 584{
65c75cb0 585 int val = (u32 *)target - (u32 *)tcache_ptr - 2;
f8af9634 586 int direct = is_offset_24(val);
65c75cb0 587 u32 *start_ptr = (u32 *)tcache_ptr;
259ed0ea 588
39615f60 589 if (cond == A_COND_NV)
590 return 0; // never taken
591
f8af9634 592 if (direct)
593 {
594 EOP_C_B(cond,is_call,val & 0xffffff); // b, bl target
595 }
596 else
597 {
598#ifdef __EPOC32__
599// elprintf(EL_SVP, "emitting indirect jmp %08x->%08x", tcache_ptr, target);
600 if (is_call)
e01deede 601 EOP_ADD_IMM(LR,PC,0,8); // add lr,pc,#8
602 EOP_C_AM2_IMM(cond,1,0,1,PC,PC,0); // ldrcc pc,[pc]
603 EOP_MOV_REG_SIMPLE(PC,PC); // mov pc, pc
604 EMIT((u32)target,M1(PC),0);
f8af9634 605#else
606 // should never happen
748b8187 607 elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "indirect jmp %8p->%8p", target, tcache_ptr);
f8af9634 608 exit(1);
609#endif
610 }
611
65c75cb0 612 return (u32 *)tcache_ptr - start_ptr;
5c129565 613}
614
83bafe8e 615static void emith_pool_commit(int jumpover)
616{
617 int i, sz = literal_pindex * sizeof(u32);
618 u8 *pool = (u8 *)tcache_ptr;
619
620 // nothing to commit if pool is empty
621 if (sz == 0)
622 return;
623 // need branch over pool if not at block end
a80b0b42 624 if (jumpover < 0 && sz == sizeof(u32)) {
625 // hack for SVP drc (patch logic detects distance 4)
626 sz += sizeof(u32);
627 } else if (jumpover) {
83bafe8e 628 pool += sizeof(u32);
629 emith_xbranch(A_COND_AL, (u8 *)pool + sz, 0);
630 }
e01deede 631 emith_flush();
83bafe8e 632 // safety check - pool must be after insns and reachable
633 if ((u32)(pool - (u8 *)literal_insn[0] + 8) > 0xfff) {
634 elprintf(EL_STATUS|EL_SVP|EL_ANOMALY,
635 "pool offset out of range");
636 exit(1);
637 }
638 // copy pool and adjust addresses in insns accessing the pool
639 memcpy(pool, literal_pool, sz);
640 for (i = 0; i < literal_iindex; i++) {
641 *literal_insn[i] += (u8 *)pool - ((u8 *)literal_insn[i] + 8);
642 }
643 // count pool constants as insns for statistics
644 for (i = 0; i < literal_pindex; i++)
645 COUNT_OP;
646
647 tcache_ptr = (void *)((u8 *)pool + sz);
648 literal_pindex = literal_iindex = 0;
649}
650
651static inline void emith_pool_check(void)
652{
653 // check if pool must be committed
06bc3c06 654 if (literal_iindex > MAX_HOST_LITERALS-4 || (literal_pindex &&
655 (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0xe00))
83bafe8e 656 // pool full, or displacement is approaching the limit
657 emith_pool_commit(1);
658}
659
d39eb595 660static inline void emith_pool_adjust(int tcache_offs, int move_offs)
e01deede 661{
662 u32 *ptr = (u32 *)tcache_ptr - tcache_offs;
663 int i;
664
665 for (i = literal_iindex-1; i >= 0 && literal_insn[i] >= ptr; i--)
666 if (literal_insn[i] == ptr)
d39eb595 667 literal_insn[i] += move_offs;
e01deede 668}
669
aaea8e3e 670#define EMITH_HINT_COND(cond) /**/
671
a0f5ba40 672#define JMP_POS(ptr) { \
8796b7ee 673 ptr = tcache_ptr; \
a0f5ba40 674 EMIT(0,M1(PC),0); \
675}
8796b7ee 676
677#define JMP_EMIT(cond, ptr) { \
a2b8c5a5 678 u32 val_ = (u32 *)tcache_ptr - (u32 *)(ptr) - 2; \
a0f5ba40 679 emith_flush(); /* NO insn swapping across jump targets */ \
a2b8c5a5 680 EOP_C_B_PTR(ptr, cond, 0, val_ & 0xffffff); \
8796b7ee 681}
682
683#define EMITH_JMP_START(cond) { \
684 void *cond_ptr; \
685 JMP_POS(cond_ptr)
686
687#define EMITH_JMP_END(cond) \
688 JMP_EMIT(cond, cond_ptr); \
689}
5c129565 690
80599a42 691// fake "simple" or "short" jump - using cond insns instead
b081408f 692#define EMITH_NOTHING1(cond) \
80599a42 693 (void)(cond)
694
b081408f 695#define EMITH_SJMP_START(cond) EMITH_NOTHING1(cond)
696#define EMITH_SJMP_END(cond) EMITH_NOTHING1(cond)
1db36a7a 697#define EMITH_SJMP2_START(cond) EMITH_NOTHING1(cond)
698#define EMITH_SJMP2_MID(cond) EMITH_JMP_START((cond)^1) // inverse cond
699#define EMITH_SJMP2_END(cond) EMITH_JMP_END((cond)^1)
b081408f 700#define EMITH_SJMP3_START(cond) EMITH_NOTHING1(cond)
701#define EMITH_SJMP3_MID(cond) EMITH_NOTHING1(cond)
702#define EMITH_SJMP3_END()
80599a42 703
397ccdc6 704#define emith_move_r_r_c(cond, d, s) \
705 EOP_MOV_REG(cond,0,d,s,A_AM1_LSL,0)
80599a42 706#define emith_move_r_r(d, s) \
397ccdc6 707 emith_move_r_r_c(A_COND_AL, d, s)
80599a42 708
397ccdc6 709#define emith_move_r_r_ptr_c(cond, d, s) \
710 emith_move_r_r_c(cond, d, s)
898d51a7 711#define emith_move_r_r_ptr(d, s) \
712 emith_move_r_r(d, s)
713
52d759c3 714#define emith_mvn_r_r(d, s) \
715 EOP_MVN_REG(A_COND_AL,0,d,s,A_AM1_LSL,0)
716
bf092a36 717#define emith_add_r_r_r_lsl(d, s1, s2, lslimm) \
718 EOP_ADD_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
d40a5af4 719#define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) \
720 emith_add_r_r_r_lsl(d, s1, s2, lslimm)
bf092a36 721
aaea8e3e 722#define emith_adc_r_r_r_lsl(d, s1, s2, lslimm) \
723 EOP_ADC_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
724
4f4e9bf3 725#define emith_addf_r_r_r_lsl(d, s1, s2, lslimm) \
726 EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm)
1db36a7a 727#define emith_addf_r_r_r_lsr(d, s1, s2, lslimm) \
728 EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSR,lslimm)
729
4f4e9bf3 730#define emith_adcf_r_r_r_lsl(d, s1, s2, lslimm) \
731 EOP_ADC_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm)
732
733#define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) \
734 EOP_SUB_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
735
aaea8e3e 736#define emith_sbc_r_r_r_lsl(d, s1, s2, lslimm) \
737 EOP_SBC_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
738
4f4e9bf3 739#define emith_subf_r_r_r_lsl(d, s1, s2, lslimm) \
740 EOP_SUB_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm)
741
742#define emith_sbcf_r_r_r_lsl(d, s1, s2, lslimm) \
743 EOP_SBC_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm)
744
3863edbd 745#define emith_or_r_r_r_lsl(d, s1, s2, lslimm) \
746 EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
aaea8e3e 747#define emith_or_r_r_r_lsr(d, s1, s2, lsrimm) \
748 EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm)
3863edbd 749
750#define emith_eor_r_r_r_lsl(d, s1, s2, lslimm) \
751 EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
f0d7b1fa 752#define emith_eor_r_r_r_lsr(d, s1, s2, lsrimm) \
753 EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm)
754
4f4e9bf3 755#define emith_and_r_r_r_lsl(d, s1, s2, lslimm) \
756 EOP_AND_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
757
f0d7b1fa 758#define emith_or_r_r_lsl(d, s, lslimm) \
759 emith_or_r_r_r_lsl(d, d, s, lslimm)
aaea8e3e 760#define emith_or_r_r_lsr(d, s, lsrimm) \
761 emith_or_r_r_r_lsr(d, d, s, lsrimm)
f0d7b1fa 762
aaea8e3e 763#define emith_eor_r_r_lsl(d, s, lslimm) \
764 emith_eor_r_r_r_lsl(d, d, s, lslimm)
f0d7b1fa 765#define emith_eor_r_r_lsr(d, s, lsrimm) \
766 emith_eor_r_r_r_lsr(d, d, s, lsrimm)
767
bf092a36 768#define emith_add_r_r_r(d, s1, s2) \
769 emith_add_r_r_r_lsl(d, s1, s2, 0)
770
aaea8e3e 771#define emith_adc_r_r_r(d, s1, s2) \
772 emith_adc_r_r_r_lsl(d, s1, s2, 0)
773
4f4e9bf3 774#define emith_addf_r_r_r(d, s1, s2) \
775 emith_addf_r_r_r_lsl(d, s1, s2, 0)
776
777#define emith_adcf_r_r_r(d, s1, s2) \
778 emith_adcf_r_r_r_lsl(d, s1, s2, 0)
779
780#define emith_sub_r_r_r(d, s1, s2) \
781 emith_sub_r_r_r_lsl(d, s1, s2, 0)
782
aaea8e3e 783#define emith_sbc_r_r_r(d, s1, s2) \
784 emith_sbc_r_r_r_lsl(d, s1, s2, 0)
785
4f4e9bf3 786#define emith_subf_r_r_r(d, s1, s2) \
787 emith_subf_r_r_r_lsl(d, s1, s2, 0)
788
789#define emith_sbcf_r_r_r(d, s1, s2) \
790 emith_sbcf_r_r_r_lsl(d, s1, s2, 0)
791
3863edbd 792#define emith_or_r_r_r(d, s1, s2) \
793 emith_or_r_r_r_lsl(d, s1, s2, 0)
794
795#define emith_eor_r_r_r(d, s1, s2) \
796 emith_eor_r_r_r_lsl(d, s1, s2, 0)
797
4f4e9bf3 798#define emith_and_r_r_r(d, s1, s2) \
799 emith_and_r_r_r_lsl(d, s1, s2, 0)
800
80599a42 801#define emith_add_r_r(d, s) \
bf092a36 802 emith_add_r_r_r(d, d, s)
80599a42 803
d760c90f 804#define emith_add_r_r_ptr(d, s) \
805 emith_add_r_r_r(d, d, s)
806
aaea8e3e 807#define emith_adc_r_r(d, s) \
808 emith_adc_r_r_r(d, d, s)
809
80599a42 810#define emith_sub_r_r(d, s) \
d40a5af4 811 emith_sub_r_r_r(d, d, s)
80599a42 812
aaea8e3e 813#define emith_sbc_r_r(d, s) \
814 emith_sbc_r_r_r(d, d, s)
815
816#define emith_negc_r_r(d, s) \
817 EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,0,s,d,0,0)
8b4f38f4 818
6822ba9d 819#define emith_and_r_r_c(cond, d, s) \
820 EOP_AND_REG(cond,0,d,d,s,A_AM1_LSL,0)
3863edbd 821#define emith_and_r_r(d, s) \
822 EOP_AND_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0)
823
824#define emith_or_r_r(d, s) \
825 emith_or_r_r_r(d, d, s)
826
827#define emith_eor_r_r(d, s) \
828 emith_eor_r_r_r(d, d, s)
829
830#define emith_tst_r_r(d, s) \
831 EOP_TST_REG(A_COND_AL,d,s,A_AM1_LSL,0)
832
fda2f310 833#define emith_tst_r_r_ptr(d, s) \
834 emith_tst_r_r(d, s)
835
80599a42 836#define emith_teq_r_r(d, s) \
837 EOP_TEQ_REG(A_COND_AL,d,s,A_AM1_LSL,0)
838
3863edbd 839#define emith_cmp_r_r(d, s) \
840 EOP_CMP_REG(A_COND_AL,d,s,A_AM1_LSL,0)
841
842#define emith_addf_r_r(d, s) \
843 EOP_ADD_REG(A_COND_AL,1,d,d,s,A_AM1_LSL,0)
844
80599a42 845#define emith_subf_r_r(d, s) \
846 EOP_SUB_REG(A_COND_AL,1,d,d,s,A_AM1_LSL,0)
65c75cb0 847
3863edbd 848#define emith_adcf_r_r(d, s) \
849 EOP_ADC_REG(A_COND_AL,1,d,d,s,A_AM1_LSL,0)
850
851#define emith_sbcf_r_r(d, s) \
852 EOP_SBC_REG(A_COND_AL,1,d,d,s,A_AM1_LSL,0)
853
8796b7ee 854#define emith_eorf_r_r(d, s) \
855 EOP_EOR_REG(A_COND_AL,1,d,d,s,A_AM1_LSL,0)
856
65c75cb0 857#define emith_move_r_imm(r, imm) \
80599a42 858 emith_op_imm(A_COND_AL, 0, A_OP_MOV, r, imm)
65c75cb0 859
d40a5af4 860#define emith_move_r_ptr_imm(r, imm) \
861 emith_move_r_imm(r, (u32)(imm))
862
65c75cb0 863#define emith_add_r_imm(r, imm) \
80599a42 864 emith_op_imm(A_COND_AL, 0, A_OP_ADD, r, imm)
65c75cb0 865
5686d931 866#define emith_adc_r_imm(r, imm) \
867 emith_op_imm(A_COND_AL, 0, A_OP_ADC, r, imm)
868
1db36a7a 869#define emith_adcf_r_imm(r, imm) \
d40a5af4 870 emith_op_imm(A_COND_AL, 1, A_OP_ADC, r, imm)
1db36a7a 871
65c75cb0 872#define emith_sub_r_imm(r, imm) \
80599a42 873 emith_op_imm(A_COND_AL, 0, A_OP_SUB, r, imm)
874
875#define emith_bic_r_imm(r, imm) \
876 emith_op_imm(A_COND_AL, 0, A_OP_BIC, r, imm)
877
52d759c3 878#define emith_and_r_imm(r, imm) \
879 emith_op_imm(A_COND_AL, 0, A_OP_AND, r, imm)
880
80599a42 881#define emith_or_r_imm(r, imm) \
882 emith_op_imm(A_COND_AL, 0, A_OP_ORR, r, imm)
883
52d759c3 884#define emith_eor_r_imm(r, imm) \
885 emith_op_imm(A_COND_AL, 0, A_OP_EOR, r, imm)
886
748b8187 887#define emith_eor_r_imm_ptr(r, imm) \
888 emith_eor_r_imm(r, imm)
889
ed8cf79b 890// note: only use 8bit imm for these
80599a42 891#define emith_tst_r_imm(r, imm) \
ed8cf79b 892 emith_top_imm(A_COND_AL, A_OP_TST, r, imm)
893
e01deede 894#define emith_cmp_r_imm(r, imm) do { \
e267031a 895 u32 op_ = A_OP_CMP, imm_ = (u8)imm; \
896 if ((s8)imm_ < 0) { \
897 imm_ = (u8)-imm_; \
898 op_ = A_OP_CMN; \
8796b7ee 899 } \
e267031a 900 emith_top_imm(A_COND_AL, op_, r, imm_); \
e01deede 901} while (0)
80599a42 902
903#define emith_subf_r_imm(r, imm) \
904 emith_op_imm(A_COND_AL, 1, A_OP_SUB, r, imm)
905
8796b7ee 906#define emith_move_r_imm_c(cond, r, imm) \
907 emith_op_imm(cond, 0, A_OP_MOV, r, imm)
908
80599a42 909#define emith_add_r_imm_c(cond, r, imm) \
910 emith_op_imm(cond, 0, A_OP_ADD, r, imm)
911
912#define emith_sub_r_imm_c(cond, r, imm) \
913 emith_op_imm(cond, 0, A_OP_SUB, r, imm)
914
915#define emith_or_r_imm_c(cond, r, imm) \
916 emith_op_imm(cond, 0, A_OP_ORR, r, imm)
917
f0d7b1fa 918#define emith_eor_r_imm_c(cond, r, imm) \
919 emith_op_imm(cond, 0, A_OP_EOR, r, imm)
920
748b8187 921#define emith_eor_r_imm_ptr_c(cond, r, imm) \
922 emith_eor_r_imm_c(cond, r, imm)
923
3863edbd 924#define emith_bic_r_imm_c(cond, r, imm) \
925 emith_op_imm(cond, 0, A_OP_BIC, r, imm)
926
4f4e9bf3 927#define emith_tst_r_imm_c(cond, r, imm) \
928 emith_top_imm(cond, A_OP_TST, r, imm)
929
06bc3c06 930#define emith_move_r_imm_s8_patchable(r, imm) do { \
d39eb595 931 emith_flush(); /* pin insn at current tcache_ptr for patching */ \
4f4e9bf3 932 if ((s8)(imm) < 0) \
06bc3c06 933 EOP_MVN_IMM(r, 0, (u8)~(imm)); \
52d759c3 934 else \
06bc3c06 935 EOP_MOV_IMM(r, 0, (u8)(imm)); \
936} while (0)
937#define emith_move_r_imm_s8_patch(ptr, imm) do { \
938 u32 *ptr_ = (u32 *)ptr; u32 op_ = *ptr_ & 0xfe1ff000; \
939 if ((s8)(imm) < 0) \
940 EMIT_PTR(ptr_, op_ | (A_OP_MVN<<21) | (u8)~(imm));\
941 else \
942 EMIT_PTR(ptr_, op_ | (A_OP_MOV<<21) | (u8)(imm));\
e01deede 943} while (0)
52d759c3 944
945#define emith_and_r_r_imm(d, s, imm) \
946 emith_op_imm2(A_COND_AL, 0, A_OP_AND, d, s, imm)
947
e05b81fc 948#define emith_add_r_r_imm(d, s, imm) \
949 emith_op_imm2(A_COND_AL, 0, A_OP_ADD, d, s, imm)
950
898d51a7 951#define emith_add_r_r_ptr_imm(d, s, imm) \
952 emith_add_r_r_imm(d, s, imm)
953
213b7f42 954#define emith_sub_r_r_imm_c(cond, d, s, imm) \
955 emith_op_imm2(cond, 0, A_OP_SUB, d, s, (imm))
956
e05b81fc 957#define emith_sub_r_r_imm(d, s, imm) \
958 emith_op_imm2(A_COND_AL, 0, A_OP_SUB, d, s, imm)
959
4f4e9bf3 960#define emith_subf_r_r_imm(d, s, imm) \
d40a5af4 961 emith_op_imm2(A_COND_AL, 1, A_OP_SUB, d, s, imm)
4f4e9bf3 962
963#define emith_or_r_r_imm(d, s, imm) \
d40a5af4 964 emith_op_imm2(A_COND_AL, 0, A_OP_ORR, d, s, imm)
4f4e9bf3 965
966#define emith_eor_r_r_imm(d, s, imm) \
d40a5af4 967 emith_op_imm2(A_COND_AL, 0, A_OP_EOR, d, s, imm)
4f4e9bf3 968
52d759c3 969#define emith_neg_r_r(d, s) \
970 EOP_RSB_IMM(d, s, 0, 0)
971
80599a42 972#define emith_lsl(d, s, cnt) \
973 EOP_MOV_REG(A_COND_AL,0,d,s,A_AM1_LSL,cnt)
974
975#define emith_lsr(d, s, cnt) \
976 EOP_MOV_REG(A_COND_AL,0,d,s,A_AM1_LSR,cnt)
977
8796b7ee 978#define emith_asr(d, s, cnt) \
979 EOP_MOV_REG(A_COND_AL,0,d,s,A_AM1_ASR,cnt)
980
b081408f 981#define emith_ror_c(cond, d, s, cnt) \
982 EOP_MOV_REG(cond,0,d,s,A_AM1_ROR,cnt)
983
ed8cf79b 984#define emith_ror(d, s, cnt) \
b081408f 985 emith_ror_c(A_COND_AL, d, s, cnt)
ed8cf79b 986
52d759c3 987#define emith_rol(d, s, cnt) \
988 EOP_MOV_REG(A_COND_AL,0,d,s,A_AM1_ROR,32-(cnt)); \
989
3863edbd 990#define emith_lslf(d, s, cnt) \
991 EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_LSL,cnt)
992
ed8cf79b 993#define emith_lsrf(d, s, cnt) \
994 EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_LSR,cnt)
995
80599a42 996#define emith_asrf(d, s, cnt) \
997 EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_ASR,cnt)
998
ed8cf79b 999// note: only C flag updated correctly
e01deede 1000#define emith_rolf(d, s, cnt) do { \
ed8cf79b 1001 EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_ROR,32-(cnt)); \
1002 /* we don't have ROL so we shift to get the right carry */ \
1003 EOP_TST_REG(A_COND_AL,d,d,A_AM1_LSR,1); \
e01deede 1004} while (0)
ed8cf79b 1005
1006#define emith_rorf(d, s, cnt) \
1007 EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_ROR,cnt)
1008
1009#define emith_rolcf(d) \
1010 emith_adcf_r_r(d, d)
aaea8e3e 1011#define emith_rolc(d) \
1012 emith_adc_r_r(d, d)
ed8cf79b 1013
1014#define emith_rorcf(d) \
1015 EOP_MOV_REG(A_COND_AL,1,d,d,A_AM1_ROR,0) /* ROR #0 -> RRX */
aaea8e3e 1016#define emith_rorc(d) \
1017 EOP_MOV_REG(A_COND_AL,0,d,d,A_AM1_ROR,0) /* ROR #0 -> RRX */
ed8cf79b 1018
52d759c3 1019#define emith_negcf_r_r(d, s) \
1020 EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,1,s,d,0,0)
1021
e01deede 1022#define emith_mul(d, s1, s2) do { \
80599a42 1023 if ((d) != (s1)) /* rd != rm limitation */ \
1024 EOP_MUL(d, s1, s2); \
1025 else \
1026 EOP_MUL(d, s2, s1); \
e01deede 1027} while (0)
65c75cb0 1028
3863edbd 1029#define emith_mul_u64(dlo, dhi, s1, s2) \
1030 EOP_C_UMULL(A_COND_AL,0,dhi,dlo,s1,s2)
1031
1032#define emith_mul_s64(dlo, dhi, s1, s2) \
1033 EOP_C_SMULL(A_COND_AL,0,dhi,dlo,s1,s2)
1034
1db36a7a 1035#define emith_mula_s64_c(cond, dlo, dhi, s1, s2) \
1036 EOP_C_SMLAL(cond,0,dhi,dlo,s1,s2)
f0d7b1fa 1037#define emith_mula_s64(dlo, dhi, s1, s2) \
1038 EOP_C_SMLAL(A_COND_AL,0,dhi,dlo,s1,s2)
1039
3863edbd 1040// misc
b081408f 1041#define emith_read_r_r_offs_c(cond, r, rs, offs) \
1042 EOP_LDR_IMM2(cond, r, rs, offs)
d760c90f 1043#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \
1044 emith_read_r_r_offs_c(cond, r, rs, offs)
6822ba9d 1045#define emith_read_r_r_r_c(cond, r, rs, rm) \
1046 EOP_LDR_REG_LSL(cond, r, rs, rm, 0)
aa4c4cb9 1047#define emith_read_r_r_offs(r, rs, offs) \
1048 emith_read_r_r_offs_c(A_COND_AL, r, rs, offs)
9e36dd0e 1049#define emith_read_r_r_offs_ptr(r, rs, offs) \
1050 emith_read_r_r_offs_c(A_COND_AL, r, rs, offs)
6822ba9d 1051#define emith_read_r_r_r(r, rs, rm) \
1052 EOP_LDR_REG_LSL(A_COND_AL, r, rs, rm, 0)
b081408f 1053
1054#define emith_read8_r_r_offs_c(cond, r, rs, offs) \
1055 EOP_LDRB_IMM2(cond, r, rs, offs)
6822ba9d 1056#define emith_read8_r_r_r_c(cond, r, rs, rm) \
1057 EOP_LDRB_REG_LSL(cond, r, rs, rm, 0)
aa4c4cb9 1058#define emith_read8_r_r_offs(r, rs, offs) \
1059 emith_read8_r_r_offs_c(A_COND_AL, r, rs, offs)
6822ba9d 1060#define emith_read8_r_r_r(r, rs, rm) \
aa4c4cb9 1061 emith_read8_r_r_r_c(A_COND_AL, r, rs, rm)
b081408f 1062
1063#define emith_read16_r_r_offs_c(cond, r, rs, offs) \
1064 EOP_LDRH_IMM2(cond, r, rs, offs)
6822ba9d 1065#define emith_read16_r_r_r_c(cond, r, rs, rm) \
1066 EOP_LDRH_REG2(cond, r, rs, rm)
aa4c4cb9 1067#define emith_read16_r_r_offs(r, rs, offs) \
1068 emith_read16_r_r_offs_c(A_COND_AL, r, rs, offs)
6822ba9d 1069#define emith_read16_r_r_r(r, rs, rm) \
aa4c4cb9 1070 emith_read16_r_r_r_c(A_COND_AL, r, rs, rm)
b081408f 1071
aa4c4cb9 1072#define emith_read8s_r_r_offs_c(cond, r, rs, offs) \
1073 EOP_LDRSB_IMM2(cond, r, rs, offs)
1074#define emith_read8s_r_r_r_c(cond, r, rs, rm) \
1075 EOP_LDRSB_REG2(cond, r, rs, rm)
1db36a7a 1076#define emith_read8s_r_r_offs(r, rs, offs) \
aa4c4cb9 1077 emith_read8s_r_r_offs_c(A_COND_AL, r, rs, offs)
1078#define emith_read8s_r_r_r(r, rs, rm) \
1079 emith_read8s_r_r_r_c(A_COND_AL, r, rs, rm)
1080
1081#define emith_read16s_r_r_offs_c(cond, r, rs, offs) \
1082 EOP_LDRSH_IMM2(cond, r, rs, offs)
1083#define emith_read16s_r_r_r_c(cond, r, rs, rm) \
1084 EOP_LDRSH_REG2(cond, r, rs, rm)
1db36a7a 1085#define emith_read16s_r_r_offs(r, rs, offs) \
aa4c4cb9 1086 emith_read16s_r_r_offs_c(A_COND_AL, r, rs, offs)
1087#define emith_read16s_r_r_r(r, rs, rm) \
1088 emith_read16s_r_r_r_c(A_COND_AL, r, rs, rm)
b081408f 1089
d760c90f 1090#define emith_write_r_r_offs_c(cond, r, rs, offs) \
1091 EOP_STR_IMM2(cond, r, rs, offs)
1092#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \
1093 emith_write_r_r_offs_c(cond, r, rs, offs)
9e36dd0e 1094#define emith_write_r_r_offs(r, rs, offs) \
1095 emith_write_r_r_offs_c(A_COND_AL, r, rs, offs)
1096#define emith_write_r_r_offs_ptr(r, rs, offs) \
1097 emith_write_r_r_offs_c(A_COND_AL, r, rs, offs)
d760c90f 1098
1099#define emith_ctx_read_c(cond, r, offs) \
1100 emith_read_r_r_offs_c(cond, r, CONTEXT_REG, offs)
65c75cb0 1101#define emith_ctx_read(r, offs) \
d760c90f 1102 emith_ctx_read_c(A_COND_AL, r, offs)
65c75cb0 1103
898d51a7 1104#define emith_ctx_read_ptr(r, offs) \
1105 emith_ctx_read(r, offs)
1106
65c75cb0 1107#define emith_ctx_write(r, offs) \
1108 EOP_STR_IMM(r, CONTEXT_REG, offs)
1109
8b4f38f4 1110#define emith_ctx_do_multiple(op, r, offs, count, tmpr) do { \
1111 int v_, r_ = r, c_ = count, b_ = CONTEXT_REG; \
1112 for (v_ = 0; c_; c_--, r_++) \
e01deede 1113 v_ |= M1(r_); \
8b4f38f4 1114 if ((offs) != 0) { \
1115 EOP_ADD_IMM(tmpr,CONTEXT_REG,30/2,(offs)>>2);\
1116 b_ = tmpr; \
1117 } \
1118 op(b_,v_); \
83bafe8e 1119} while (0)
8796b7ee 1120
8b4f38f4 1121#define emith_ctx_read_multiple(r, offs, count, tmpr) \
1122 emith_ctx_do_multiple(EOP_LDMIA, r, offs, count, tmpr)
1123
1124#define emith_ctx_write_multiple(r, offs, count, tmpr) \
1125 emith_ctx_do_multiple(EOP_STMIA, r, offs, count, tmpr)
1126
e01deede 1127#define emith_clear_msb_c(cond, d, s, count) do { \
80599a42 1128 u32 t; \
1129 if ((count) <= 8) { \
d40a5af4 1130 t = 8 - (count); \
80599a42 1131 t = (0xff << t) & 0xff; \
f0d7b1fa 1132 EOP_C_DOP_IMM(cond,A_OP_BIC,0,s,d,8/2,t); \
80599a42 1133 } else if ((count) >= 24) { \
1134 t = (count) - 24; \
1135 t = 0xff >> t; \
f0d7b1fa 1136 EOP_C_DOP_IMM(cond,A_OP_AND,0,s,d,0,t); \
80599a42 1137 } else { \
f0d7b1fa 1138 EOP_MOV_REG(cond,0,d,s,A_AM1_LSL,count); \
1139 EOP_MOV_REG(cond,0,d,d,A_AM1_LSR,count); \
80599a42 1140 } \
e01deede 1141} while (0)
80599a42 1142
f0d7b1fa 1143#define emith_clear_msb(d, s, count) \
1144 emith_clear_msb_c(A_COND_AL, d, s, count)
1145
e01deede 1146#define emith_sext(d, s, bits) do { \
80599a42 1147 EOP_MOV_REG_LSL(d,s,32 - (bits)); \
1148 EOP_MOV_REG_ASR(d,d,32 - (bits)); \
e01deede 1149} while (0)
80599a42 1150
8bb48947 1151#define emith_uext_ptr(r) /**/
1152
e01deede 1153#define emith_do_caller_regs(mask, func) do { \
6d797957 1154 u32 _reg_mask = (mask) & 0x500f; \
1155 if (_reg_mask) { \
1156 if (__builtin_parity(_reg_mask) == 1) \
1157 _reg_mask |= 0x10; /* eabi align */ \
1158 func(_reg_mask); \
1159 } \
e01deede 1160} while (0)
6d797957 1161
1162#define emith_save_caller_regs(mask) \
1163 emith_do_caller_regs(mask, EOP_STMFD_SP)
1164
1165#define emith_restore_caller_regs(mask) \
1166 emith_do_caller_regs(mask, EOP_LDMFD_SP)
1167
65c75cb0 1168// upto 4 args
1169#define emith_pass_arg_r(arg, reg) \
1170 EOP_MOV_REG_SIMPLE(arg, reg)
1171
1172#define emith_pass_arg_imm(arg, imm) \
1173 emith_move_r_imm(arg, imm)
1174
e05b81fc 1175#define emith_jump(target) \
1176 emith_jump_cond(A_COND_AL, target)
65c75cb0 1177
44e6452e 1178#define emith_jump_patchable(target) \
1179 emith_jump(target)
1180
65c75cb0 1181#define emith_jump_cond(cond, target) \
1182 emith_xbranch(cond, target, 0)
36614252 1183#define emith_jump_cond_inrange(target) !0
65c75cb0 1184
44e6452e 1185#define emith_jump_cond_patchable(cond, target) \
1186 emith_jump_cond(cond, target)
18b94127 1187
36614252 1188#define emith_jump_patch(ptr, target, pos) do { \
06bc3c06 1189 u32 *ptr_ = (u32 *)ptr; \
44e6452e 1190 u32 val_ = (u32 *)(target) - ptr_ - 2; \
1191 *ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \
36614252 1192 if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \
1193} while (0)
06bc3c06 1194#define emith_jump_patch_inrange(ptr, target) !0
d80a5fd2 1195#define emith_jump_patch_size() 4
18b94127 1196
e01deede 1197#define emith_jump_at(ptr, target) do { \
f338a562 1198 u32 *ptr_ = (u32 *)ptr; \
31efd454 1199 u32 val_ = (u32 *)(target) - ptr_ - 2; \
f338a562 1200 EOP_C_B_PTR(ptr_, A_COND_AL, 0, val_ & 0xffffff); \
e01deede 1201} while (0)
06bc3c06 1202#define emith_jump_at_size() 4
a2b8c5a5 1203
e05b81fc 1204#define emith_jump_reg_c(cond, r) \
1205 EOP_C_BX(cond, r)
1206
8796b7ee 1207#define emith_jump_reg(r) \
e05b81fc 1208 emith_jump_reg_c(A_COND_AL, r)
1209
1210#define emith_jump_ctx_c(cond, offs) \
e01deede 1211 EOP_LDR_IMM2(cond,PC,CONTEXT_REG,offs)
e05b81fc 1212
1213#define emith_jump_ctx(offs) \
1214 emith_jump_ctx_c(A_COND_AL, offs)
1215
1216#define emith_call_cond(cond, target) \
1217 emith_xbranch(cond, target, 1)
1218
1219#define emith_call(target) \
1220 emith_call_cond(A_COND_AL, target)
1221
e01deede 1222#define emith_call_reg(r) do { \
1223 emith_move_r_r(LR, PC); \
aa4c4cb9 1224 EOP_C_BX(A_COND_AL, r); \
e01deede 1225} while (0)
aa4c4cb9 1226
0512a228 1227#define emith_abicall_ctx(offs) do { \
e01deede 1228 emith_move_r_r(LR, PC); \
e05b81fc 1229 emith_jump_ctx(offs); \
e01deede 1230} while (0)
e05b81fc 1231
fde25b40 1232#define emith_abijump_reg(r) \
1233 emith_jump_reg(r)
1234#define emith_abijump_reg_c(cond, r) \
1235 emith_jump_reg_c(cond, r)
1236#define emith_abicall(target) \
1237 emith_call(target)
1238#define emith_abicall_cond(cond, target) \
1239 emith_call_cond(cond, target)
1240#define emith_abicall_reg(r) \
1241 emith_call_reg(r)
1242
748b8187 1243#define emith_call_cleanup() /**/
1244
e05b81fc 1245#define emith_ret_c(cond) \
e01deede 1246 emith_jump_reg_c(cond, LR)
e05b81fc 1247
1248#define emith_ret() \
1249 emith_ret_c(A_COND_AL)
1250
1251#define emith_ret_to_ctx(offs) \
e01deede 1252 emith_ctx_write(LR, offs)
8796b7ee 1253
06bc3c06 1254#define emith_add_r_ret(r) \
1255 emith_add_r_r_ptr(r, LR)
36614252 1256
397ccdc6 1257/* pushes r12 for eabi alignment */
1258#define emith_push_ret(r) do { \
1259 int r_ = (r >= 0 ? r : 12); \
1260 EOP_STMFD_SP(M2(r_,LR)); \
1261} while (0)
a2b8c5a5 1262
397ccdc6 1263#define emith_pop_and_ret(r) do { \
1264 int r_ = (r >= 0 ? r : 12); \
1265 EOP_LDMFD_SP(M2(r_,PC)); \
1266} while (0)
a2b8c5a5 1267
4153006f 1268#define host_instructions_updated(base, end, force) \
69c22514 1269 do { if (force) emith_update_add(base, end); } while (0)
a2b8c5a5 1270
0512a228 1271#define host_call(addr, args) \
1272 addr
1273
a2b8c5a5 1274#define host_arg2reg(rd, arg) \
1275 rd = arg
1276
aa8a3b65 1277#define emith_rw_offs_max() 0x1ff // minimum of offset in AM2 and AM3
8284ab71 1278
65c75cb0 1279/* SH2 drc specific */
228ee974 1280/* pushes r12 for eabi alignment */
8796b7ee 1281#define emith_sh2_drc_entry() \
e01deede 1282 EOP_STMFD_SP(M10(4,5,6,7,8,9,10,11,12,LR))
8796b7ee 1283
1284#define emith_sh2_drc_exit() \
e01deede 1285 EOP_LDMFD_SP(M10(4,5,6,7,8,9,10,11,12,PC))
8796b7ee 1286
6822ba9d 1287// assumes a is in arg0, tab, func and mask are temp
e01deede 1288#define emith_sh2_rcall(a, tab, func, mask) do { \
6822ba9d 1289 emith_lsr(mask, a, SH2_READ_SHIFT); \
1290 EOP_ADD_REG_LSL(tab, tab, mask, 3); \
e01deede 1291 if (func < mask) EOP_LDMIA(tab, M2(func,mask)); /* ldm if possible */ \
d40a5af4 1292 else { emith_read_r_r_offs(func, tab, 0); \
1293 emith_read_r_r_offs(mask, tab, 4); } \
6822ba9d 1294 emith_addf_r_r_r(func,func,func); \
e01deede 1295} while (0)
6822ba9d 1296
1297// assumes a, val are in arg0 and arg1, tab and func are temp
e01deede 1298#define emith_sh2_wcall(a, val, tab, func) do { \
6822ba9d 1299 emith_lsr(func, a, SH2_WRITE_SHIFT); \
1300 EOP_LDR_REG_LSL(A_COND_AL,func,tab,func,2); \
1301 emith_move_r_r(2, CONTEXT_REG); /* arg2 */ \
05138bbd 1302 emith_abijump_reg(func); \
e01deede 1303} while (0)
e05b81fc 1304
e01deede 1305#define emith_sh2_dtbf_loop() do { \
80599a42 1306 int cr, rn; \
52d759c3 1307 int tmp_ = rcache_get_tmp(); \
80599a42 1308 cr = rcache_get_reg(SHR_SR, RC_GR_RMW); \
1309 rn = rcache_get_reg((op >> 8) & 0x0f, RC_GR_RMW); \
1310 emith_sub_r_imm(rn, 1); /* sub rn, #1 */ \
1311 emith_bic_r_imm(cr, 1); /* bic cr, #1 */ \
1312 emith_sub_r_imm(cr, (cycles+1) << 12); /* sub cr, #(cycles+1)<<12 */ \
1313 cycles = 0; \
52d759c3 1314 emith_asrf(tmp_, cr, 2+12); /* movs tmp_, cr, asr #2+12 */\
1315 EOP_MOV_IMM_C(A_COND_MI,tmp_,0,0); /* movmi tmp_, #0 */ \
80599a42 1316 emith_lsl(cr, cr, 20); /* mov cr, cr, lsl #20 */ \
1317 emith_lsr(cr, cr, 20); /* mov cr, cr, lsr #20 */ \
52d759c3 1318 emith_subf_r_r(rn, tmp_); /* subs rn, tmp_ */ \
1319 EOP_RSB_IMM_C(A_COND_LS,tmp_,rn,0,0); /* rsbls tmp_, rn, #0 */ \
1320 EOP_ORR_REG(A_COND_LS,0,cr,cr,tmp_,A_AM1_LSL,12+2); /* orrls cr,tmp_,lsl #12+2 */\
80599a42 1321 EOP_ORR_IMM_C(A_COND_LS,cr,cr,0,1); /* orrls cr, #1 */ \
1322 EOP_MOV_IMM_C(A_COND_LS,rn,0,0); /* movls rn, #0 */ \
52d759c3 1323 rcache_free_tmp(tmp_); \
e01deede 1324} while (0)
65c75cb0 1325
213b7f42 1326#define emith_sh2_delay_loop(cycles, reg) do { \
1327 int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \
1328 int t1 = rcache_get_tmp(); \
1329 int t2 = rcache_get_tmp(); \
1330 int t3 = rcache_get_tmp(); \
1331 /* if (sr < 0) return */ \
68e50296 1332 emith_cmp_r_imm(sr, 0); \
213b7f42 1333 EMITH_JMP_START(DCOND_LE); \
1334 /* turns = sr.cycles / cycles */ \
68e50296 1335 emith_asr(t2, sr, 12); \
38fd3bd8 1336 emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \
213b7f42 1337 emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \
1338 rcache_free_tmp(t3); \
1339 if (reg >= 0) { \
1340 /* if (reg <= turns) turns = reg-1 */ \
1341 t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \
1342 emith_cmp_r_r(t3, t2); \
748b8187 1343 emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \
213b7f42 1344 /* if (reg <= 1) turns = 0 */ \
1345 emith_cmp_r_imm(t3, 1); \
748b8187 1346 emith_move_r_imm_c(DCOND_LS, t2, 0); \
213b7f42 1347 /* reg -= turns */ \
1348 emith_sub_r_r(t3, t2); \
1349 } \
1350 /* sr.cycles -= turns * cycles; */ \
1351 emith_move_r_imm(t1, cycles); \
1352 emith_mul(t1, t2, t1); \
1353 emith_sub_r_r_r_lsl(sr, sr, t1, 12); \
1354 EMITH_JMP_END(DCOND_LE); \
1355 rcache_free_tmp(t1); \
1356 rcache_free_tmp(t2); \
1357} while (0)
1358
e01deede 1359#define emith_write_sr(sr, srcr) do { \
18b94127 1360 emith_lsr(sr, sr, 10); \
1361 emith_or_r_r_r_lsl(sr, sr, srcr, 22); \
1362 emith_ror(sr, sr, 22); \
e01deede 1363} while (0)
ed8cf79b 1364
e01deede 1365#define emith_carry_to_t(srr, is_sub) do { \
68e50296 1366 emith_bic_r_imm(srr, 1); \
1367 if (is_sub) /* has inverted C on ARM */ \
ed8cf79b 1368 emith_or_r_imm_c(A_COND_CC, srr, 1); \
68e50296 1369 else \
ed8cf79b 1370 emith_or_r_imm_c(A_COND_CS, srr, 1); \
e01deede 1371} while (0)
f0d7b1fa 1372
aaea8e3e 1373#define emith_t_to_carry(srr, is_sub) do { \
1374 if (is_sub) { \
1375 int t_ = rcache_get_tmp(); \
1376 emith_eor_r_r_imm(t_, srr, 1); \
1377 emith_rorf(t_, t_, 1); \
1378 rcache_free_tmp(t_); \
1379 } else { \
1380 emith_rorf(srr, srr, 1); \
1381 emith_rol(srr, srr, 1); \
1382 } \
1383} while (0)
1384
e01deede 1385#define emith_tpop_carry(sr, is_sub) do { \
8b4f38f4 1386 if (is_sub) \
1387 emith_eor_r_imm(sr, 1); \
1388 emith_lsrf(sr, sr, 1); \
e01deede 1389} while (0)
8b4f38f4 1390
e01deede 1391#define emith_tpush_carry(sr, is_sub) do { \
8b4f38f4 1392 emith_adc_r_r(sr, sr); \
1393 if (is_sub) \
1394 emith_eor_r_imm(sr, 1); \
e01deede 1395} while (0)
8b4f38f4 1396
f0d7b1fa 1397/*
f2d19ddf 1398 * T = carry(Rn = (Rn << 1) | T)
f0d7b1fa 1399 * if Q
f2d19ddf 1400 * T ^= !carry(Rn += Rm)
f0d7b1fa 1401 * else
f2d19ddf 1402 * T ^= !carry(Rn -= Rm)
f0d7b1fa 1403 */
e01deede 1404#define emith_sh2_div1_step(rn, rm, sr) do { \
f0d7b1fa 1405 void *jmp0, *jmp1; \
f2d19ddf 1406 emith_tpop_carry(sr, 0); /* Rn = 2*Rn+T */\
1407 emith_adcf_r_r_r(rn, rn, rn); \
1408 emith_tpush_carry(sr, 0); \
f0d7b1fa 1409 emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \
1410 JMP_POS(jmp0); /* beq do_sub */ \
f2d19ddf 1411 emith_addf_r_r(rn, rm); /* Rn += Rm */ \
1412 emith_eor_r_imm_c(A_COND_CC, sr, T); \
f0d7b1fa 1413 JMP_POS(jmp1); /* b done */ \
1414 JMP_EMIT(A_COND_EQ, jmp0); /* do_sub: */ \
f2d19ddf 1415 emith_subf_r_r(rn, rm); /* Rn -= Rm */ \
1416 emith_eor_r_imm_c(A_COND_CS, sr, T); \
f0d7b1fa 1417 JMP_EMIT(A_COND_AL, jmp1); /* done: */ \
e01deede 1418} while (0)
f0d7b1fa 1419
1db36a7a 1420/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */
e01deede 1421#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \
1db36a7a 1422 emith_tst_r_imm(sr, S); \
1423 EMITH_SJMP2_START(DCOND_NE); \
1424 emith_mula_s64_c(DCOND_EQ, ml, mh, rn, rm); \
1425 EMITH_SJMP2_MID(DCOND_NE); \
1426 /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \
1427 emith_sext(mh, mh, 16); \
1428 emith_mula_s64(ml, mh, rn, rm); \
1429 /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \
38fd3bd8 1430 /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \
1431 emith_asr(rn, mh, 15); \
1432 emith_addf_r_r_r_lsr(rn, rn, mh, 31); \
1433 EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \
1434 emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \
1435 emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \
1436 EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \
1437 emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \
1438 emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \
1439 EMITH_SJMP_END(DCOND_MI); \
1db36a7a 1440 EMITH_SJMP_END(DCOND_EQ); \
1441 EMITH_SJMP2_END(DCOND_NE); \
1442} while (0)
1443
1444/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */
aa4c4cb9 1445#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \
1db36a7a 1446 emith_tst_r_imm(sr, S); \
1447 EMITH_SJMP2_START(DCOND_NE); \
1448 emith_mula_s64_c(DCOND_EQ, ml, mh, rn, rm); \
1449 EMITH_SJMP2_MID(DCOND_NE); \
1450 /* XXX: MACH should be untouched when S is set? */ \
1451 emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \
1452 emith_mula_s64(ml, mh, rn, rm); \
1453 /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \
1454 /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \
1455 emith_addf_r_r_r_lsr(mh, mh, ml, 31); /* sum = MACH + ((MACL>>31)&1) */\
1456 EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
1457 /* XXX: LSB signalling only in SH1, or in SH2 too? */ \
1458 emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
38fd3bd8 1459 emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \
1460 EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \
1461 emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \
1462 EMITH_SJMP_END(DCOND_MI); \
1db36a7a 1463 EMITH_SJMP_END(DCOND_EQ); \
1464 EMITH_SJMP2_END(DCOND_NE); \
1465} while (0)
1466
39615f60 1467#ifdef T
1468// T bit handling
1469static int tcond = -1;
1470
1471#define emith_invert_cond(cond) \
1472 ((cond) ^ 1)
1473
1474#define emith_clr_t_cond(sr) \
1475 (void)sr
1476
1477#define emith_set_t_cond(sr, cond) \
1478 tcond = cond
1479
1480#define emith_get_t_cond() \
1481 tcond
1482
1483#define emith_invalidate_t() \
1484 tcond = -1
1485
1486#define emith_set_t(sr, val) \
1487 tcond = ((val) ? A_COND_AL: A_COND_NV)
1488
748b8187 1489static void emith_sync_t(int sr)
39615f60 1490{
1491 if (tcond == A_COND_AL)
1492 emith_or_r_imm(sr, T);
1493 else if (tcond == A_COND_NV)
1494 emith_bic_r_imm(sr, T);
1495 else if (tcond >= 0) {
68e50296 1496 emith_bic_r_imm(sr, T);
39615f60 1497 emith_or_r_imm_c(tcond, sr, T);
1498 }
1499 tcond = -1;
1500}
1501
1502static int emith_tst_t(int sr, int tf)
1503{
1504 if (tcond < 0) {
1505 emith_tst_r_imm(sr, T);
1506 return tf ? DCOND_NE: DCOND_EQ;
1507 } else if (tcond >= A_COND_AL) {
1508 // MUST sync because A_COND_NV isn't a real condition
1509 emith_sync_t(sr);
1510 emith_tst_r_imm(sr, T);
1511 return tf ? DCOND_NE: DCOND_EQ;
1512 } else
1513 return tf ? tcond : emith_invert_cond(tcond);
1514}
1515#endif