[picodrive.git] / cpu / drc / emit_x86.c

/*
 * note about silly things like emith_eor_r_r_r_lsl:
 * these are here because the compiler was designed
 * for ARM as it's primary target.
 */
#include <stdarg.h>

enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };

#define CONTEXT_REG xBP

#define IOP_JO  0x70
#define IOP_JNO 0x71
#define IOP_JB  0x72
#define IOP_JAE 0x73
#define IOP_JE  0x74
#define IOP_JNE 0x75
#define IOP_JBE 0x76
#define IOP_JA  0x77
#define IOP_JS  0x78
#define IOP_JNS 0x79
#define IOP_JL  0x7c
#define IOP_JGE 0x7d
#define IOP_JLE 0x7e
#define IOP_JG  0x7f

// unified conditions (we just use rel8 jump instructions for x86)
#define DCOND_EQ IOP_JE
#define DCOND_NE IOP_JNE
#define DCOND_MI IOP_JS      // MInus
#define DCOND_PL IOP_JNS     // PLus or zero
#define DCOND_HI IOP_JA      // higher (unsigned)
#define DCOND_HS IOP_JAE     // higher || same (unsigned)
#define DCOND_LO IOP_JB      // lower (unsigned)
#define DCOND_LS IOP_JBE     // lower || same (unsigned)
#define DCOND_GE IOP_JGE     // greater || equal (signed)
#define DCOND_GT IOP_JG      // greater (signed)
#define DCOND_LE IOP_JLE     // less || equal (signed)
#define DCOND_LT IOP_JL      // less (signed)
#define DCOND_VS IOP_JO      // oVerflow Set
#define DCOND_VC IOP_JNO     // oVerflow Clear
#define DCOND_CS IOP_JB	     // Carry Set
#define DCOND_CC IOP_JAE     // Carry Clear

#define EMIT_PTR(ptr, val, type) \
	*(type *)(ptr) = val

#define EMIT(val, type) { \
	EMIT_PTR(tcache_ptr, val, type); \
	tcache_ptr += sizeof(type); \
}

#define EMIT_OP(op) { \
	COUNT_OP; \
	EMIT(op, u8); \
}

#define EMIT_MODRM(mod,r,rm) \
	EMIT(((mod)<<6) | ((r)<<3) | (rm), u8)

#define EMIT_OP_MODRM(op,mod,r,rm) { \
	EMIT_OP(op); \
	EMIT_MODRM(mod, r, rm); \
}

#define JMP8_POS(ptr) \
	ptr = tcache_ptr; \
	tcache_ptr += 2

#define JMP8_EMIT(op, ptr) \
	EMIT_PTR(ptr, op, u8); \
	EMIT_PTR(ptr + 1, (tcache_ptr - (ptr+2)), u8)

// _r_r
#define emith_move_r_r(dst, src) \
	EMIT_OP_MODRM(0x8b, 3, dst, src)

#define emith_add_r_r(d, s) \
	EMIT_OP_MODRM(0x01, 3, s, d)

#define emith_sub_r_r(d, s) \
	EMIT_OP_MODRM(0x29, 3, s, d)

#define emith_adc_r_r(d, s) \
	EMIT_OP_MODRM(0x11, 3, s, d)

#define emith_sbc_r_r(d, s) \
	EMIT_OP_MODRM(0x19, 3, s, d) /* SBB */

#define emith_or_r_r(d, s) \
	EMIT_OP_MODRM(0x09, 3, s, d)

#define emith_and_r_r(d, s) \
	EMIT_OP_MODRM(0x21, 3, s, d)

#define emith_eor_r_r(d, s) \
	EMIT_OP_MODRM(0x31, 3, s, d) /* XOR */

#define emith_tst_r_r(d, s) \
	EMIT_OP_MODRM(0x85, 3, s, d) /* TEST */

#define emith_cmp_r_r(d, s) \
	EMIT_OP_MODRM(0x39, 3, s, d)

// fake teq - test equivalence - get_flags(d ^ s)
#define emith_teq_r_r(d, s) { \
	emith_push(d); \
	emith_eor_r_r(d, s); \
	emith_pop(d); \
}

// _r_r_r
#define emith_eor_r_r_r(d, s1, s2) { \
	if (d != s1) \
		emith_move_r_r(d, s1); \
	emith_eor_r_r(d, s2); \
}

#define emith_or_r_r_r_lsl(d, s1, s2, lslimm) { \
	if (d != s2 && d != s1) { \
		emith_lsl(d, s2, lslimm); \
		emith_or_r_r(d, s1); \
	} else { \
		if (d != s1) \
			emith_move_r_r(d, s1); \
		emith_push(s2); \
		emith_lsl(s2, s2, lslimm); \
		emith_or_r_r(d, s2); \
		emith_pop(s2); \
	} \
}

// _r_imm
#define emith_move_r_imm(r, imm) { \
	EMIT_OP(0xb8 + (r)); \
	EMIT(imm, u32); \
}

#define emith_arith_r_imm(op, r, imm) { \
	EMIT_OP_MODRM(0x81, 3, op, r); \
	EMIT(imm, u32); \
}

// 2 - adc, 3 - sbb, 6 - xor, 7 - cmp
#define emith_add_r_imm(r, imm) \
	emith_arith_r_imm(0, r, imm)

#define emith_or_r_imm(r, imm) \
	emith_arith_r_imm(1, r, imm)

#define emith_and_r_imm(r, imm) \
	emith_arith_r_imm(4, r, imm)

#define emith_sub_r_imm(r, imm) \
	emith_arith_r_imm(5, r, imm)

#define emith_tst_r_imm(r, imm) { \
	EMIT_OP_MODRM(0xf7, 3, 0, r); \
	EMIT(imm, u32); \
}

// fake
#define emith_bic_r_imm(r, imm) \
	emith_arith_r_imm(4, r, ~(imm))

// fake conditionals (using SJMP instead)
#define emith_add_r_imm_c(cond, r, imm) { \
	(void)(cond); \
	emith_add_r_imm(r, imm); \
}

#define emith_or_r_imm_c(cond, r, imm) { \
	(void)(cond); \
	emith_or_r_imm(r, imm); \
}

#define emith_sub_r_imm_c(cond, r, imm) { \
	(void)(cond); \
	emith_sub_r_imm(r, imm); \
}

#define emith_bic_r_imm_c(cond, r, imm) { \
	(void)(cond); \
	emith_bic_r_imm(r, imm); \
}

// shift
#define emith_shift(op, d, s, cnt) { \
	if (d != s) \
		emith_move_r_r(d, s); \
	EMIT_OP_MODRM(0xc1, 3, op, d); \
	EMIT(cnt, u8); \
}

#define emith_lsl(d, s, cnt) \
	emith_shift(4, d, s, cnt)

#define emith_lsr(d, s, cnt) \
	emith_shift(5, d, s, cnt)

#define emith_asr(d, s, cnt) \
	emith_shift(7, d, s, cnt)

// misc
#define emith_push(r) \
	EMIT_OP(0x50 + (r))

#define emith_pop(r) \
	EMIT_OP(0x58 + (r))

#define emith_neg_r(r) \
	EMIT_OP_MODRM(0xf7, 3, 3, r)

#define emith_clear_msb(d, s, count) { \
	u32 t = (u32)-1; \
	t >>= count; \
	if (d != s) \
		emith_move_r_r(d, s); \
	emith_and_r_imm(d, t); \
}

#define emith_sext(d, s, bits) { \
	emith_lsl(d, s, 32 - (bits)); \
	emith_asr(d, d, 32 - (bits)); \
}

// put bit0 of r0 to carry
#define emith_set_carry(r0) { \
	emith_tst_r_imm(r0, 1); /* clears C */ \
	EMITH_SJMP_START(DCOND_EQ); \
	EMIT_OP(0xf9); /* STC */ \
	EMITH_SJMP_END(DCOND_EQ); \
}

// put bit0 of r0 to carry (for subtraction)
#define emith_set_carry_sub emith_set_carry

// XXX: stupid mess
#define emith_mul_(op, dlo, dhi, s1, s2) { \
	int rmr; \
	if (dlo != xAX && dhi != xAX) \
		emith_push(xAX); \
	if (dlo != xDX && dhi != xDX) \
		emith_push(xDX); \
	if ((s1) == xAX) \
		rmr = s2; \
	else if ((s2) == xAX) \
		rmr = s1; \
	else { \
		emith_move_r_r(xAX, s1); \
		rmr = s2; \
	} \
	EMIT_OP_MODRM(0xf7, 3, op, rmr); /* xMUL rmr */ \
	/* XXX: using push/pop for the case of edx->eax; eax->edx */ \
	if (dhi != xDX && dhi != -1) \
		emith_push(xDX); \
	if (dlo != xAX) \
		emith_move_r_r(dlo, xAX); \
	if (dhi != xDX && dhi != -1) \
		emith_pop(dhi); \
	if (dlo != xDX && dhi != xDX) \
		emith_pop(xDX); \
	if (dlo != xAX && dhi != xAX) \
		emith_pop(xAX); \
}

#define emith_mul_u64(dlo, dhi, s1, s2) \
	emith_mul_(4, dlo, dhi, s1, s2) /* MUL */

#define emith_mul_s64(dlo, dhi, s1, s2) \
	emith_mul_(5, dlo, dhi, s1, s2) /* IMUL */

#define emith_mul(d, s1, s2) \
	emith_mul_(4, d, -1, s1, s2)

// "flag" instructions are the same
#define emith_subf_r_imm emith_sub_r_imm
#define emith_addf_r_r   emith_add_r_r
#define emith_subf_r_r   emith_sub_r_r
#define emith_adcf_r_r   emith_adc_r_r
#define emith_sbcf_r_r   emith_sbc_r_r

#define emith_lslf emith_lsl
#define emith_lsrf emith_lsr
#define emith_asrf emith_asr

// XXX: offs is 8bit only
#define emith_ctx_read(r, offs) { \
	EMIT_OP_MODRM(0x8b, 1, r, xBP); \
	EMIT(offs, u8); 	/* mov tmp, [ebp+#offs] */ \
}

#define emith_ctx_write(r, offs) { \
	EMIT_OP_MODRM(0x89, 1, r, xBP); \
	EMIT(offs, u8); 	/* mov [ebp+#offs], tmp */ \
}

#define emith_jump(ptr) { \
	u32 disp = (u32)ptr - ((u32)tcache_ptr + 5); \
	EMIT_OP(0xe9); \
	EMIT(disp, u32); \
}

#define emith_call(ptr) { \
	u32 disp = (u32)ptr - ((u32)tcache_ptr + 5); \
	EMIT_OP(0xe8); \
	EMIT(disp, u32); \
}

// "simple" or "short" jump
#define EMITH_SJMP_START(cond) { \
	u8 *cond_ptr; \
	JMP8_POS(cond_ptr)

#define EMITH_SJMP_END(cond) \
	JMP8_EMIT(cond, cond_ptr); \
}

#define host_arg2reg(rd, arg) \
	switch (arg) { \
	case 0: rd = xAX; break; \
	case 1: rd = xDX; break; \
	case 2: rd = xCX; break; \
	}

#define emith_pass_arg_r(arg, reg) { \
	int rd = 7; \
	host_arg2reg(rd, arg); \
	emith_move_r_r(rd, reg); \
}

#define emith_pass_arg_imm(arg, imm) { \
	int rd = 7; \
	host_arg2reg(rd, arg); \
	emith_move_r_imm(rd, imm); \
}

/* SH2 drc specific */
#define emith_sh2_test_t() { \
	int t = rcache_get_reg(SHR_SR, RC_GR_READ); \
	EMIT_OP_MODRM(0xf6, 3, 0, t); \
	EMIT(0x01, u8); /* test <reg>, byte 1 */ \
}

#define emith_sh2_dtbf_loop() { \
	u8 *jmp0; /* negative cycles check */            \
	u8 *jmp1; /* unsinged overflow check */          \
	int cr, rn;                                      \
	tmp = rcache_get_tmp();                          \
	cr = rcache_get_reg(SHR_SR, RC_GR_RMW);          \
	rn = rcache_get_reg((op >> 8) & 0x0f, RC_GR_RMW);\
	emith_sub_r_imm(rn, 1);                          \
	emith_sub_r_imm(cr, (cycles+1) << 12);           \
	cycles = 0;                                      \
	emith_asr(tmp, cr, 2+12);                        \
	JMP8_POS(jmp0); /* no negative cycles */         \
	emith_move_r_imm(tmp, 0);                        \
	JMP8_EMIT(IOP_JNS, jmp0);                        \
	emith_and_r_imm(cr, 0xffe);                      \
	emith_subf_r_r(rn, tmp);                         \
	JMP8_POS(jmp1); /* no overflow */                \
	emith_neg_r(rn); /* count left */                \
	emith_lsl(rn, rn, 2+12);                         \
	emith_or_r_r(cr, rn);                            \
	emith_or_r_imm(cr, 1);                           \
	emith_move_r_imm(rn, 0);                         \
	JMP8_EMIT(IOP_JA, jmp1);                         \
	rcache_free_tmp(tmp);                            \
}
Commit	Line	Data
	1	/*
	2	* note about silly things like emith_eor_r_r_r_lsl:
	3	* these are here because the compiler was designed
	4	* for ARM as it's primary target.
	5	*/
	6	#include <stdarg.h>
	7
	8	enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
	9
	10	#define CONTEXT_REG xBP
	11
	12	#define IOP_JO 0x70
	13	#define IOP_JNO 0x71
	14	#define IOP_JB 0x72
	15	#define IOP_JAE 0x73
	16	#define IOP_JE 0x74
	17	#define IOP_JNE 0x75
	18	#define IOP_JBE 0x76
	19	#define IOP_JA 0x77
	20	#define IOP_JS 0x78
	21	#define IOP_JNS 0x79
	22	#define IOP_JL 0x7c
	23	#define IOP_JGE 0x7d
	24	#define IOP_JLE 0x7e
	25	#define IOP_JG 0x7f
	26
	27	// unified conditions (we just use rel8 jump instructions for x86)
	28	#define DCOND_EQ IOP_JE
	29	#define DCOND_NE IOP_JNE
	30	#define DCOND_MI IOP_JS // MInus
	31	#define DCOND_PL IOP_JNS // PLus or zero
	32	#define DCOND_HI IOP_JA // higher (unsigned)
	33	#define DCOND_HS IOP_JAE // higher \|\| same (unsigned)
	34	#define DCOND_LO IOP_JB // lower (unsigned)
	35	#define DCOND_LS IOP_JBE // lower \|\| same (unsigned)
	36	#define DCOND_GE IOP_JGE // greater \|\| equal (signed)
	37	#define DCOND_GT IOP_JG // greater (signed)
	38	#define DCOND_LE IOP_JLE // less \|\| equal (signed)
	39	#define DCOND_LT IOP_JL // less (signed)
	40	#define DCOND_VS IOP_JO // oVerflow Set
	41	#define DCOND_VC IOP_JNO // oVerflow Clear
	42	#define DCOND_CS IOP_JB // Carry Set
	43	#define DCOND_CC IOP_JAE // Carry Clear
	44
	45	#define EMIT_PTR(ptr, val, type) \
	46	(type )(ptr) = val
	47
	48	#define EMIT(val, type) { \
	49	EMIT_PTR(tcache_ptr, val, type); \
	50	tcache_ptr += sizeof(type); \
	51	}
	52
	53	#define EMIT_OP(op) { \
	54	COUNT_OP; \
	55	EMIT(op, u8); \
	56	}
	57
	58	#define EMIT_MODRM(mod,r,rm) \
	59	EMIT(((mod)<<6) \| ((r)<<3) \| (rm), u8)
	60
	61	#define EMIT_OP_MODRM(op,mod,r,rm) { \
	62	EMIT_OP(op); \
	63	EMIT_MODRM(mod, r, rm); \
	64	}
	65
	66	#define JMP8_POS(ptr) \
	67	ptr = tcache_ptr; \
	68	tcache_ptr += 2
	69
	70	#define JMP8_EMIT(op, ptr) \
	71	EMIT_PTR(ptr, op, u8); \
	72	EMIT_PTR(ptr + 1, (tcache_ptr - (ptr+2)), u8)
	73
	74	// _r_r
	75	#define emith_move_r_r(dst, src) \
	76	EMIT_OP_MODRM(0x8b, 3, dst, src)
	77
	78	#define emith_add_r_r(d, s) \
	79	EMIT_OP_MODRM(0x01, 3, s, d)
	80
	81	#define emith_sub_r_r(d, s) \
	82	EMIT_OP_MODRM(0x29, 3, s, d)
	83
	84	#define emith_adc_r_r(d, s) \
	85	EMIT_OP_MODRM(0x11, 3, s, d)
	86
	87	#define emith_sbc_r_r(d, s) \
	88	EMIT_OP_MODRM(0x19, 3, s, d) /* SBB */
	89
	90	#define emith_or_r_r(d, s) \
	91	EMIT_OP_MODRM(0x09, 3, s, d)
	92
	93	#define emith_and_r_r(d, s) \
	94	EMIT_OP_MODRM(0x21, 3, s, d)
	95
	96	#define emith_eor_r_r(d, s) \
	97	EMIT_OP_MODRM(0x31, 3, s, d) /* XOR */
	98
	99	#define emith_tst_r_r(d, s) \
	100	EMIT_OP_MODRM(0x85, 3, s, d) /* TEST */
	101
	102	#define emith_cmp_r_r(d, s) \
	103	EMIT_OP_MODRM(0x39, 3, s, d)
	104
	105	// fake teq - test equivalence - get_flags(d ^ s)
	106	#define emith_teq_r_r(d, s) { \
	107	emith_push(d); \
	108	emith_eor_r_r(d, s); \
	109	emith_pop(d); \
	110	}
	111
	112	// _r_r_r
	113	#define emith_eor_r_r_r(d, s1, s2) { \
	114	if (d != s1) \
	115	emith_move_r_r(d, s1); \
	116	emith_eor_r_r(d, s2); \
	117	}
	118
	119	#define emith_or_r_r_r_lsl(d, s1, s2, lslimm) { \
	120	if (d != s2 && d != s1) { \
	121	emith_lsl(d, s2, lslimm); \
	122	emith_or_r_r(d, s1); \
	123	} else { \
	124	if (d != s1) \
	125	emith_move_r_r(d, s1); \
	126	emith_push(s2); \
	127	emith_lsl(s2, s2, lslimm); \
	128	emith_or_r_r(d, s2); \
	129	emith_pop(s2); \
	130	} \
	131	}
	132
	133	// _r_imm
	134	#define emith_move_r_imm(r, imm) { \
	135	EMIT_OP(0xb8 + (r)); \
	136	EMIT(imm, u32); \
	137	}
	138
	139	#define emith_arith_r_imm(op, r, imm) { \
	140	EMIT_OP_MODRM(0x81, 3, op, r); \
	141	EMIT(imm, u32); \
	142	}
	143
	144	// 2 - adc, 3 - sbb, 6 - xor, 7 - cmp
	145	#define emith_add_r_imm(r, imm) \
	146	emith_arith_r_imm(0, r, imm)
	147
	148	#define emith_or_r_imm(r, imm) \
	149	emith_arith_r_imm(1, r, imm)
	150
	151	#define emith_and_r_imm(r, imm) \
	152	emith_arith_r_imm(4, r, imm)
	153
	154	#define emith_sub_r_imm(r, imm) \
	155	emith_arith_r_imm(5, r, imm)
	156
	157	#define emith_tst_r_imm(r, imm) { \
	158	EMIT_OP_MODRM(0xf7, 3, 0, r); \
	159	EMIT(imm, u32); \
	160	}
	161
	162	// fake
	163	#define emith_bic_r_imm(r, imm) \
	164	emith_arith_r_imm(4, r, ~(imm))
	165
	166	// fake conditionals (using SJMP instead)
	167	#define emith_add_r_imm_c(cond, r, imm) { \
	168	(void)(cond); \
	169	emith_add_r_imm(r, imm); \
	170	}
	171
	172	#define emith_or_r_imm_c(cond, r, imm) { \
	173	(void)(cond); \
	174	emith_or_r_imm(r, imm); \
	175	}
	176
	177	#define emith_sub_r_imm_c(cond, r, imm) { \
	178	(void)(cond); \
	179	emith_sub_r_imm(r, imm); \
	180	}
	181
	182	#define emith_bic_r_imm_c(cond, r, imm) { \
	183	(void)(cond); \
	184	emith_bic_r_imm(r, imm); \
	185	}
	186
	187	// shift
	188	#define emith_shift(op, d, s, cnt) { \
	189	if (d != s) \
	190	emith_move_r_r(d, s); \
	191	EMIT_OP_MODRM(0xc1, 3, op, d); \
	192	EMIT(cnt, u8); \
	193	}
	194
	195	#define emith_lsl(d, s, cnt) \
	196	emith_shift(4, d, s, cnt)
	197
	198	#define emith_lsr(d, s, cnt) \
	199	emith_shift(5, d, s, cnt)
	200
	201	#define emith_asr(d, s, cnt) \
	202	emith_shift(7, d, s, cnt)
	203
	204	// misc
	205	#define emith_push(r) \
	206	EMIT_OP(0x50 + (r))
	207
	208	#define emith_pop(r) \
	209	EMIT_OP(0x58 + (r))
	210
	211	#define emith_neg_r(r) \
	212	EMIT_OP_MODRM(0xf7, 3, 3, r)
	213
	214	#define emith_clear_msb(d, s, count) { \
	215	u32 t = (u32)-1; \
	216	t >>= count; \
	217	if (d != s) \
	218	emith_move_r_r(d, s); \
	219	emith_and_r_imm(d, t); \
	220	}
	221
	222	#define emith_sext(d, s, bits) { \
	223	emith_lsl(d, s, 32 - (bits)); \
	224	emith_asr(d, d, 32 - (bits)); \
	225	}
	226
	227	// put bit0 of r0 to carry
	228	#define emith_set_carry(r0) { \
	229	emith_tst_r_imm(r0, 1); /* clears C */ \
	230	EMITH_SJMP_START(DCOND_EQ); \
	231	EMIT_OP(0xf9); /* STC */ \
	232	EMITH_SJMP_END(DCOND_EQ); \
	233	}
	234
	235	// put bit0 of r0 to carry (for subtraction)
	236	#define emith_set_carry_sub emith_set_carry
	237
	238	// XXX: stupid mess
	239	#define emith_mul_(op, dlo, dhi, s1, s2) { \
	240	int rmr; \
	241	if (dlo != xAX && dhi != xAX) \
	242	emith_push(xAX); \
	243	if (dlo != xDX && dhi != xDX) \
	244	emith_push(xDX); \
	245	if ((s1) == xAX) \
	246	rmr = s2; \
	247	else if ((s2) == xAX) \
	248	rmr = s1; \
	249	else { \
	250	emith_move_r_r(xAX, s1); \
	251	rmr = s2; \
	252	} \
	253	EMIT_OP_MODRM(0xf7, 3, op, rmr); /* xMUL rmr */ \
	254	/* XXX: using push/pop for the case of edx->eax; eax->edx */ \
	255	if (dhi != xDX && dhi != -1) \
	256	emith_push(xDX); \
	257	if (dlo != xAX) \
	258	emith_move_r_r(dlo, xAX); \
	259	if (dhi != xDX && dhi != -1) \
	260	emith_pop(dhi); \
	261	if (dlo != xDX && dhi != xDX) \
	262	emith_pop(xDX); \
	263	if (dlo != xAX && dhi != xAX) \
	264	emith_pop(xAX); \
	265	}
	266
	267	#define emith_mul_u64(dlo, dhi, s1, s2) \
	268	emith_mul_(4, dlo, dhi, s1, s2) /* MUL */
	269
	270	#define emith_mul_s64(dlo, dhi, s1, s2) \
	271	emith_mul_(5, dlo, dhi, s1, s2) /* IMUL */
	272
	273	#define emith_mul(d, s1, s2) \
	274	emith_mul_(4, d, -1, s1, s2)
	275
	276	// "flag" instructions are the same
	277	#define emith_subf_r_imm emith_sub_r_imm
	278	#define emith_addf_r_r emith_add_r_r
	279	#define emith_subf_r_r emith_sub_r_r
	280	#define emith_adcf_r_r emith_adc_r_r
	281	#define emith_sbcf_r_r emith_sbc_r_r
	282
	283	#define emith_lslf emith_lsl
	284	#define emith_lsrf emith_lsr
	285	#define emith_asrf emith_asr
	286
	287	// XXX: offs is 8bit only
	288	#define emith_ctx_read(r, offs) { \
	289	EMIT_OP_MODRM(0x8b, 1, r, xBP); \
	290	EMIT(offs, u8); /* mov tmp, [ebp+#offs] */ \
	291	}
	292
	293	#define emith_ctx_write(r, offs) { \
	294	EMIT_OP_MODRM(0x89, 1, r, xBP); \
	295	EMIT(offs, u8); /* mov [ebp+#offs], tmp */ \
	296	}
	297
	298	#define emith_jump(ptr) { \
	299	u32 disp = (u32)ptr - ((u32)tcache_ptr + 5); \
	300	EMIT_OP(0xe9); \
	301	EMIT(disp, u32); \
	302	}
	303
	304	#define emith_call(ptr) { \
	305	u32 disp = (u32)ptr - ((u32)tcache_ptr + 5); \
	306	EMIT_OP(0xe8); \
	307	EMIT(disp, u32); \
	308	}
	309
	310	// "simple" or "short" jump
	311	#define EMITH_SJMP_START(cond) { \
	312	u8 *cond_ptr; \
	313	JMP8_POS(cond_ptr)
	314
	315	#define EMITH_SJMP_END(cond) \
	316	JMP8_EMIT(cond, cond_ptr); \
	317	}
	318
	319	#define host_arg2reg(rd, arg) \
	320	switch (arg) { \
	321	case 0: rd = xAX; break; \
	322	case 1: rd = xDX; break; \
	323	case 2: rd = xCX; break; \
	324	}
	325
	326	#define emith_pass_arg_r(arg, reg) { \
	327	int rd = 7; \
	328	host_arg2reg(rd, arg); \
	329	emith_move_r_r(rd, reg); \
	330	}
	331
	332	#define emith_pass_arg_imm(arg, imm) { \
	333	int rd = 7; \
	334	host_arg2reg(rd, arg); \
	335	emith_move_r_imm(rd, imm); \
	336	}
	337
	338	/* SH2 drc specific */
	339	#define emith_sh2_test_t() { \
	340	int t = rcache_get_reg(SHR_SR, RC_GR_READ); \
	341	EMIT_OP_MODRM(0xf6, 3, 0, t); \
	342	EMIT(0x01, u8); /* test <reg>, byte 1 */ \
	343	}
	344
	345	#define emith_sh2_dtbf_loop() { \
	346	u8 jmp0; / negative cycles check */ \
	347	u8 jmp1; / unsinged overflow check */ \
	348	int cr, rn; \
	349	tmp = rcache_get_tmp(); \
	350	cr = rcache_get_reg(SHR_SR, RC_GR_RMW); \
	351	rn = rcache_get_reg((op >> 8) & 0x0f, RC_GR_RMW);\
	352	emith_sub_r_imm(rn, 1); \
	353	emith_sub_r_imm(cr, (cycles+1) << 12); \
	354	cycles = 0; \
	355	emith_asr(tmp, cr, 2+12); \
	356	JMP8_POS(jmp0); /* no negative cycles */ \
	357	emith_move_r_imm(tmp, 0); \
	358	JMP8_EMIT(IOP_JNS, jmp0); \
	359	emith_and_r_imm(cr, 0xffe); \
	360	emith_subf_r_r(rn, tmp); \
	361	JMP8_POS(jmp1); /* no overflow */ \
	362	emith_neg_r(rn); /* count left */ \
	363	emith_lsl(rn, rn, 2+12); \
	364	emith_or_r_r(cr, rn); \
	365	emith_or_r_imm(cr, 1); \
	366	emith_move_r_imm(rn, 0); \
	367	JMP8_EMIT(IOP_JA, jmp1); \
	368	rcache_free_tmp(tmp); \
	369	}
	370