32x: drc: finish MAC, gen drc entry/exit (for statically alloced regs)
[picodrive.git] / cpu / drc / emit_x86.c
CommitLineData
3863edbd 1/*
f0d7b1fa 2 * note about silly things like emith_eor_r_r_r:
3863edbd 3 * these are here because the compiler was designed
4 * for ARM as it's primary target.
5 */
679af8a3 6#include <stdarg.h>
7
f4bb5d6b 8enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
9
f4bb5d6b 10#define CONTEXT_REG xBP
679af8a3 11
f0d7b1fa 12#define IOP_JMP 0xeb
3863edbd 13#define IOP_JO 0x70
14#define IOP_JNO 0x71
15#define IOP_JB 0x72
16#define IOP_JAE 0x73
80599a42 17#define IOP_JE 0x74
18#define IOP_JNE 0x75
19#define IOP_JBE 0x76
20#define IOP_JA 0x77
21#define IOP_JS 0x78
22#define IOP_JNS 0x79
3863edbd 23#define IOP_JL 0x7c
24#define IOP_JGE 0x7d
80599a42 25#define IOP_JLE 0x7e
3863edbd 26#define IOP_JG 0x7f
80599a42 27
28// unified conditions (we just use rel8 jump instructions for x86)
29#define DCOND_EQ IOP_JE
30#define DCOND_NE IOP_JNE
31#define DCOND_MI IOP_JS // MInus
32#define DCOND_PL IOP_JNS // PLus or zero
3863edbd 33#define DCOND_HI IOP_JA // higher (unsigned)
34#define DCOND_HS IOP_JAE // higher || same (unsigned)
35#define DCOND_LO IOP_JB // lower (unsigned)
36#define DCOND_LS IOP_JBE // lower || same (unsigned)
37#define DCOND_GE IOP_JGE // greater || equal (signed)
38#define DCOND_GT IOP_JG // greater (signed)
39#define DCOND_LE IOP_JLE // less || equal (signed)
40#define DCOND_LT IOP_JL // less (signed)
41#define DCOND_VS IOP_JO // oVerflow Set
42#define DCOND_VC IOP_JNO // oVerflow Clear
80599a42 43
679af8a3 44#define EMIT_PTR(ptr, val, type) \
45 *(type *)(ptr) = val
46
47#define EMIT(val, type) { \
48 EMIT_PTR(tcache_ptr, val, type); \
f4bb5d6b 49 tcache_ptr += sizeof(type); \
679af8a3 50}
51
e898de13 52#define EMIT_OP(op) { \
53 COUNT_OP; \
54 EMIT(op, u8); \
55}
56
679af8a3 57#define EMIT_MODRM(mod,r,rm) \
58 EMIT(((mod)<<6) | ((r)<<3) | (rm), u8)
59
f0d7b1fa 60#define EMIT_SIB(scale,index,base) \
61 EMIT(((scale)<<6) | ((index)<<3) | (base), u8)
62
679af8a3 63#define EMIT_OP_MODRM(op,mod,r,rm) { \
e898de13 64 EMIT_OP(op); \
679af8a3 65 EMIT_MODRM(mod, r, rm); \
66}
67
80599a42 68#define JMP8_POS(ptr) \
69 ptr = tcache_ptr; \
70 tcache_ptr += 2
71
72#define JMP8_EMIT(op, ptr) \
73 EMIT_PTR(ptr, op, u8); \
74 EMIT_PTR(ptr + 1, (tcache_ptr - (ptr+2)), u8)
75
3863edbd 76// _r_r
679af8a3 77#define emith_move_r_r(dst, src) \
78 EMIT_OP_MODRM(0x8b, 3, dst, src)
79
80599a42 80#define emith_add_r_r(d, s) \
81 EMIT_OP_MODRM(0x01, 3, s, d)
82
83#define emith_sub_r_r(d, s) \
84 EMIT_OP_MODRM(0x29, 3, s, d)
85
3863edbd 86#define emith_adc_r_r(d, s) \
87 EMIT_OP_MODRM(0x11, 3, s, d)
88
89#define emith_sbc_r_r(d, s) \
90 EMIT_OP_MODRM(0x19, 3, s, d) /* SBB */
91
80599a42 92#define emith_or_r_r(d, s) \
93 EMIT_OP_MODRM(0x09, 3, s, d)
94
3863edbd 95#define emith_and_r_r(d, s) \
96 EMIT_OP_MODRM(0x21, 3, s, d)
97
80599a42 98#define emith_eor_r_r(d, s) \
3863edbd 99 EMIT_OP_MODRM(0x31, 3, s, d) /* XOR */
100
101#define emith_tst_r_r(d, s) \
102 EMIT_OP_MODRM(0x85, 3, s, d) /* TEST */
103
104#define emith_cmp_r_r(d, s) \
105 EMIT_OP_MODRM(0x39, 3, s, d)
80599a42 106
107// fake teq - test equivalence - get_flags(d ^ s)
108#define emith_teq_r_r(d, s) { \
109 emith_push(d); \
110 emith_eor_r_r(d, s); \
111 emith_pop(d); \
112}
113
52d759c3 114#define emith_mvn_r_r(d, s) { \
115 if (d != s) \
116 emith_move_r_r(d, s); \
117 EMIT_OP_MODRM(0xf7, 3, 2, d); /* NOT d */ \
118}
119
120#define emith_negc_r_r(d, s) { \
121 int tmp_ = rcache_get_tmp(); \
122 emith_move_r_imm(tmp_, 0); \
123 emith_sbc_r_r(tmp_, s); \
124 emith_move_r_r(d, tmp_); \
125 rcache_free_tmp(tmp_); \
126}
127
128#define emith_neg_r_r(d, s) { \
129 if (d != s) \
130 emith_move_r_r(d, s); \
131 EMIT_OP_MODRM(0xf7, 3, 3, d); /* NEG d */ \
132}
133
3863edbd 134// _r_r_r
135#define emith_eor_r_r_r(d, s1, s2) { \
52d759c3 136 if (d == s1) { \
137 emith_eor_r_r(d, s2); \
138 } else if (d == s2) { \
139 emith_eor_r_r(d, s1); \
140 } else { \
3863edbd 141 emith_move_r_r(d, s1); \
52d759c3 142 emith_eor_r_r(d, s2); \
143 } \
3863edbd 144}
145
f0d7b1fa 146// _r_r_shift
147#define emith_or_r_r_lsl(d, s, lslimm) { \
52d759c3 148 int tmp_ = rcache_get_tmp(); \
f0d7b1fa 149 emith_lsl(tmp_, s, lslimm); \
150 emith_or_r_r(d, tmp_); \
52d759c3 151 rcache_free_tmp(tmp_); \
3863edbd 152}
153
f0d7b1fa 154// d != s
155#define emith_eor_r_r_lsr(d, s, lsrimm) { \
156 emith_push(s); \
157 emith_lsr(s, s, lsrimm); \
158 emith_eor_r_r(d, s); \
159 emith_pop(s); \
160}
161
80599a42 162// _r_imm
679af8a3 163#define emith_move_r_imm(r, imm) { \
e898de13 164 EMIT_OP(0xb8 + (r)); \
679af8a3 165 EMIT(imm, u32); \
166}
167
52d759c3 168#define emith_move_r_imm_s8(r, imm) \
169 emith_move_r_imm(r, (u32)(signed int)(signed char)(imm))
170
80599a42 171#define emith_arith_r_imm(op, r, imm) { \
172 EMIT_OP_MODRM(0x81, 3, op, r); \
679af8a3 173 EMIT(imm, u32); \
174}
175
52d759c3 176// 2 - adc, 3 - sbb
80599a42 177#define emith_add_r_imm(r, imm) \
178 emith_arith_r_imm(0, r, imm)
179
180#define emith_or_r_imm(r, imm) \
181 emith_arith_r_imm(1, r, imm)
182
183#define emith_and_r_imm(r, imm) \
184 emith_arith_r_imm(4, r, imm)
185
186#define emith_sub_r_imm(r, imm) \
187 emith_arith_r_imm(5, r, imm)
188
52d759c3 189#define emith_eor_r_imm(r, imm) \
190 emith_arith_r_imm(6, r, imm)
191
ed8cf79b 192#define emith_cmp_r_imm(r, imm) \
193 emith_arith_r_imm(7, r, imm)
194
80599a42 195#define emith_tst_r_imm(r, imm) { \
196 EMIT_OP_MODRM(0xf7, 3, 0, r); \
679af8a3 197 EMIT(imm, u32); \
198}
199
80599a42 200// fake
201#define emith_bic_r_imm(r, imm) \
202 emith_arith_r_imm(4, r, ~(imm))
203
204// fake conditionals (using SJMP instead)
8796b7ee 205#define emith_move_r_imm_c(cond, r, imm) { \
206 (void)(cond); \
207 emith_move_r_imm(r, imm); \
208}
209
80599a42 210#define emith_add_r_imm_c(cond, r, imm) { \
211 (void)(cond); \
3863edbd 212 emith_add_r_imm(r, imm); \
80599a42 213}
214
215#define emith_or_r_imm_c(cond, r, imm) { \
216 (void)(cond); \
3863edbd 217 emith_or_r_imm(r, imm); \
80599a42 218}
219
f0d7b1fa 220#define emith_eor_r_imm_c(cond, r, imm) { \
221 (void)(cond); \
222 emith_eor_r_imm(r, imm); \
223}
224
80599a42 225#define emith_sub_r_imm_c(cond, r, imm) { \
226 (void)(cond); \
3863edbd 227 emith_sub_r_imm(r, imm); \
228}
229
230#define emith_bic_r_imm_c(cond, r, imm) { \
231 (void)(cond); \
232 emith_bic_r_imm(r, imm); \
80599a42 233}
234
52d759c3 235// _r_r_imm
236#define emith_and_r_r_imm(d, s, imm) { \
237 if (d != s) \
238 emith_move_r_r(d, s); \
239 emith_and_r_imm(d, imm) \
240}
241
80599a42 242// shift
243#define emith_shift(op, d, s, cnt) { \
244 if (d != s) \
245 emith_move_r_r(d, s); \
246 EMIT_OP_MODRM(0xc1, 3, op, d); \
247 EMIT(cnt, u8); \
248}
249
80599a42 250#define emith_lsl(d, s, cnt) \
251 emith_shift(4, d, s, cnt)
252
3863edbd 253#define emith_lsr(d, s, cnt) \
254 emith_shift(5, d, s, cnt)
255
256#define emith_asr(d, s, cnt) \
257 emith_shift(7, d, s, cnt)
258
ed8cf79b 259#define emith_rol(d, s, cnt) \
260 emith_shift(0, d, s, cnt)
261
262#define emith_ror(d, s, cnt) \
263 emith_shift(1, d, s, cnt)
264
265#define emith_rolc(r) \
266 EMIT_OP_MODRM(0xd1, 3, 2, r)
267
268#define emith_rorc(r) \
269 EMIT_OP_MODRM(0xd1, 3, 3, r)
270
80599a42 271// misc
272#define emith_push(r) \
273 EMIT_OP(0x50 + (r))
274
275#define emith_pop(r) \
276 EMIT_OP(0x58 + (r))
277
278#define emith_neg_r(r) \
279 EMIT_OP_MODRM(0xf7, 3, 3, r)
280
281#define emith_clear_msb(d, s, count) { \
282 u32 t = (u32)-1; \
283 t >>= count; \
284 if (d != s) \
285 emith_move_r_r(d, s); \
286 emith_and_r_imm(d, t); \
287}
288
f0d7b1fa 289#define emith_clear_msb_c(cond, d, s, count) { \
290 (void)(cond); \
291 emith_clear_msb(d, s, count); \
292}
293
80599a42 294#define emith_sext(d, s, bits) { \
295 emith_lsl(d, s, 32 - (bits)); \
296 emith_asr(d, d, 32 - (bits)); \
297}
298
f0d7b1fa 299#define emith_setc(r) { \
300 EMIT_OP(0x0f); \
301 EMIT(0x92, u8); \
302 EMIT_MODRM(3, 0, r); /* SETC r */ \
303}
304
3863edbd 305// put bit0 of r0 to carry
306#define emith_set_carry(r0) { \
307 emith_tst_r_imm(r0, 1); /* clears C */ \
308 EMITH_SJMP_START(DCOND_EQ); \
309 EMIT_OP(0xf9); /* STC */ \
310 EMITH_SJMP_END(DCOND_EQ); \
311}
312
313// put bit0 of r0 to carry (for subtraction)
314#define emith_set_carry_sub emith_set_carry
315
80599a42 316// XXX: stupid mess
3863edbd 317#define emith_mul_(op, dlo, dhi, s1, s2) { \
80599a42 318 int rmr; \
3863edbd 319 if (dlo != xAX && dhi != xAX) \
80599a42 320 emith_push(xAX); \
3863edbd 321 if (dlo != xDX && dhi != xDX) \
322 emith_push(xDX); \
80599a42 323 if ((s1) == xAX) \
324 rmr = s2; \
325 else if ((s2) == xAX) \
326 rmr = s1; \
327 else { \
328 emith_move_r_r(xAX, s1); \
329 rmr = s2; \
330 } \
3863edbd 331 EMIT_OP_MODRM(0xf7, 3, op, rmr); /* xMUL rmr */ \
332 /* XXX: using push/pop for the case of edx->eax; eax->edx */ \
333 if (dhi != xDX && dhi != -1) \
334 emith_push(xDX); \
335 if (dlo != xAX) \
336 emith_move_r_r(dlo, xAX); \
337 if (dhi != xDX && dhi != -1) \
338 emith_pop(dhi); \
339 if (dlo != xDX && dhi != xDX) \
340 emith_pop(xDX); \
341 if (dlo != xAX && dhi != xAX) \
80599a42 342 emith_pop(xAX); \
80599a42 343}
344
3863edbd 345#define emith_mul_u64(dlo, dhi, s1, s2) \
346 emith_mul_(4, dlo, dhi, s1, s2) /* MUL */
347
348#define emith_mul_s64(dlo, dhi, s1, s2) \
349 emith_mul_(5, dlo, dhi, s1, s2) /* IMUL */
350
351#define emith_mul(d, s1, s2) \
352 emith_mul_(4, d, -1, s1, s2)
353
f0d7b1fa 354// (dlo,dhi) += signed(s1) * signed(s2)
355#define emith_mula_s64(dlo, dhi, s1, s2) { \
356 emith_push(dhi); \
357 emith_push(dlo); \
358 emith_mul_(5, dlo, dhi, s1, s2); \
359 EMIT_OP_MODRM(0x03, 0, dlo, 4); \
360 EMIT_SIB(0, 4, 4); /* add dlo, [esp] */ \
361 EMIT_OP_MODRM(0x13, 1, dhi, 4); \
362 EMIT_SIB(0, 4, 4); \
363 EMIT(4, u8); /* adc dhi, [esp+4] */ \
364 emith_add_r_imm(xSP, 4*2); \
365}
366
80599a42 367// "flag" instructions are the same
368#define emith_subf_r_imm emith_sub_r_imm
3863edbd 369#define emith_addf_r_r emith_add_r_r
80599a42 370#define emith_subf_r_r emith_sub_r_r
3863edbd 371#define emith_adcf_r_r emith_adc_r_r
372#define emith_sbcf_r_r emith_sbc_r_r
8796b7ee 373#define emith_eorf_r_r emith_eor_r_r
52d759c3 374#define emith_negcf_r_r emith_negc_r_r
3863edbd 375
ed8cf79b 376#define emith_lslf emith_lsl
377#define emith_lsrf emith_lsr
378#define emith_asrf emith_asr
379#define emith_rolf emith_rol
380#define emith_rorf emith_ror
381#define emith_rolcf emith_rolc
382#define emith_rorcf emith_rorc
80599a42 383
679af8a3 384// XXX: offs is 8bit only
8796b7ee 385#define emith_ctx_read(r, offs) do { \
65c75cb0 386 EMIT_OP_MODRM(0x8b, 1, r, xBP); \
679af8a3 387 EMIT(offs, u8); /* mov tmp, [ebp+#offs] */ \
8796b7ee 388} while (0)
679af8a3 389
8796b7ee 390#define emith_ctx_read_multiple(r, offs, cnt, tmpr) do { \
391 int r_ = r, offs_ = offs, cnt_ = cnt; \
392 for (; cnt > 0; r_++, offs_ += 4, cnt_--) \
393 emith_ctx_read(r_, offs_); \
394} while (0)
395
396#define emith_ctx_write(r, offs) do { \
65c75cb0 397 EMIT_OP_MODRM(0x89, 1, r, xBP); \
679af8a3 398 EMIT(offs, u8); /* mov [ebp+#offs], tmp */ \
8796b7ee 399} while (0)
400
401#define emith_ctx_write_multiple(r, offs, cnt, tmpr) do { \
402 int r_ = r, offs_ = offs, cnt_ = cnt; \
403 for (; cnt > 0; r_++, offs_ += 4, cnt_--) \
404 emith_ctx_write(r_, offs_); \
405} while (0)
679af8a3 406
679af8a3 407#define emith_jump(ptr) { \
408 u32 disp = (u32)ptr - ((u32)tcache_ptr + 5); \
e898de13 409 EMIT_OP(0xe9); \
679af8a3 410 EMIT(disp, u32); \
411}
412
413#define emith_call(ptr) { \
414 u32 disp = (u32)ptr - ((u32)tcache_ptr + 5); \
e898de13 415 EMIT_OP(0xe8); \
679af8a3 416 EMIT(disp, u32); \
417}
418
f0d7b1fa 419#define emith_call_cond(cond, ptr) \
420 emith_call(ptr)
421
8796b7ee 422#define emith_jump_reg(r) \
423 EMIT_OP_MODRM(0xff, 3, 4, r)
424
425#define EMITH_JMP_START(cond) { \
80599a42 426 u8 *cond_ptr; \
427 JMP8_POS(cond_ptr)
428
8796b7ee 429#define EMITH_JMP_END(cond) \
80599a42 430 JMP8_EMIT(cond, cond_ptr); \
679af8a3 431}
432
8796b7ee 433// "simple" jump (no more then a few insns)
434#define EMITH_SJMP_START EMITH_JMP_START
435#define EMITH_SJMP_END EMITH_JMP_END
436
80599a42 437#define host_arg2reg(rd, arg) \
f4bb5d6b 438 switch (arg) { \
439 case 0: rd = xAX; break; \
440 case 1: rd = xDX; break; \
441 case 2: rd = xCX; break; \
679af8a3 442 }
443
f4bb5d6b 444#define emith_pass_arg_r(arg, reg) { \
445 int rd = 7; \
80599a42 446 host_arg2reg(rd, arg); \
f4bb5d6b 447 emith_move_r_r(rd, reg); \
448}
449
450#define emith_pass_arg_imm(arg, imm) { \
451 int rd = 7; \
80599a42 452 host_arg2reg(rd, arg); \
f4bb5d6b 453 emith_move_r_imm(rd, imm); \
679af8a3 454}
455
65c75cb0 456/* SH2 drc specific */
8796b7ee 457#define emith_sh2_drc_entry() { \
458 emith_push(xBX); \
459 emith_push(xBP); \
460}
461
462#define emith_sh2_drc_exit() { \
463 emith_pop(xBP); \
464 emith_pop(xBX); \
465 EMIT_OP(0xc3); /* ret */\
466}
467
80599a42 468#define emith_sh2_test_t() { \
469 int t = rcache_get_reg(SHR_SR, RC_GR_READ); \
470 EMIT_OP_MODRM(0xf6, 3, 0, t); \
471 EMIT(0x01, u8); /* test <reg>, byte 1 */ \
472}
473
474#define emith_sh2_dtbf_loop() { \
475 u8 *jmp0; /* negative cycles check */ \
476 u8 *jmp1; /* unsinged overflow check */ \
477 int cr, rn; \
52d759c3 478 int tmp_ = rcache_get_tmp(); \
80599a42 479 cr = rcache_get_reg(SHR_SR, RC_GR_RMW); \
480 rn = rcache_get_reg((op >> 8) & 0x0f, RC_GR_RMW);\
481 emith_sub_r_imm(rn, 1); \
482 emith_sub_r_imm(cr, (cycles+1) << 12); \
483 cycles = 0; \
52d759c3 484 emith_asr(tmp_, cr, 2+12); \
80599a42 485 JMP8_POS(jmp0); /* no negative cycles */ \
52d759c3 486 emith_move_r_imm(tmp_, 0); \
80599a42 487 JMP8_EMIT(IOP_JNS, jmp0); \
488 emith_and_r_imm(cr, 0xffe); \
52d759c3 489 emith_subf_r_r(rn, tmp_); \
80599a42 490 JMP8_POS(jmp1); /* no overflow */ \
491 emith_neg_r(rn); /* count left */ \
492 emith_lsl(rn, rn, 2+12); \
493 emith_or_r_r(cr, rn); \
494 emith_or_r_imm(cr, 1); \
495 emith_move_r_imm(rn, 0); \
496 JMP8_EMIT(IOP_JA, jmp1); \
52d759c3 497 rcache_free_tmp(tmp_); \
65c75cb0 498}
499
ed8cf79b 500#define emith_write_sr(srcr) { \
52d759c3 501 int tmp_ = rcache_get_tmp(); \
ed8cf79b 502 int srr = rcache_get_reg(SHR_SR, RC_GR_RMW); \
52d759c3 503 emith_clear_msb(tmp_, srcr, 20); \
ed8cf79b 504 emith_bic_r_imm(srr, 0xfff); \
52d759c3 505 emith_or_r_r(srr, tmp_); \
506 rcache_free_tmp(tmp_); \
ed8cf79b 507}
508
509#define emith_carry_to_t(srr, is_sub) { \
52d759c3 510 int tmp_ = rcache_get_tmp(); \
f0d7b1fa 511 emith_setc(tmp_); \
ed8cf79b 512 emith_bic_r_imm(srr, 1); \
52d759c3 513 EMIT_OP_MODRM(0x08, 3, tmp_, srr); /* OR srrl, tmpl */ \
514 rcache_free_tmp(tmp_); \
ed8cf79b 515}
516
f0d7b1fa 517/*
518 * if Q
519 * t = carry(Rn += Rm)
520 * else
521 * t = carry(Rn -= Rm)
522 * T ^= t
523 */
524#define emith_sh2_div1_step(rn, rm, sr) { \
525 u8 *jmp0, *jmp1; \
526 int tmp_ = rcache_get_tmp(); \
527 emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \
528 JMP8_POS(jmp0); /* je do_sub */ \
529 emith_add_r_r(rn, rm); \
530 JMP8_POS(jmp1); /* jmp done */ \
531 JMP8_EMIT(IOP_JE, jmp0); /* do_sub: */ \
532 emith_sub_r_r(rn, rm); \
533 JMP8_EMIT(IOP_JMP, jmp1);/* done: */ \
534 emith_setc(tmp_); \
535 EMIT_OP_MODRM(0x30, 3, tmp_, sr); /* T = Q1 ^ Q2 (byte) */ \
536 rcache_free_tmp(tmp_); \
537}
538