From: notaz <notasas@gmail.com>
Date: Wed, 21 Oct 2009 19:37:41 +0000 (+0000)
Subject: 32x: drc: ARM implementation, start unification with SVP (untested)
X-Git-Tag: v1.85~229
X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=65c75cb07df9d27905dd166b876d5b6461cb656d;p=picodrive.git

32x: drc: ARM implementation, start unification with SVP (untested)

git-svn-id: file:///home/notaz/opt/svn/PicoDrive@821 be3aeb3a-fb24-0410-a615-afba39da0efa
---

diff --git a/pico/carthw/svp/gen_arm.c b/cpu/drc/emit_arm.c
similarity index 75%
rename from pico/carthw/svp/gen_arm.c
rename to cpu/drc/emit_arm.c
index 56018a76..5a1da7f9 100644
--- a/pico/carthw/svp/gen_arm.c
+++ b/cpu/drc/emit_arm.c
@@ -1,9 +1,18 @@
 // Basic macros to emit ARM instructions and some utils
 
-// (c) Copyright 2008, Grazvydas "notaz" Ignotas
+// (c) Copyright 2008-2009, Grazvydas "notaz" Ignotas
 // Free for non-commercial use.
 
-#define EMIT(x) *tcache_ptr++ = x
+#define CONTEXT_REG 7
+
+// XXX: tcache_ptr type for SVP and SH2 compilers differs..
+#define EMIT_PTR(ptr, x) \
+	do { \
+		*(u32 *)ptr = x; \
+		ptr = (void *)((u8 *)ptr + sizeof(u32)); \
+	} while (0)
+
+#define EMIT(x) EMIT_PTR(tcache_ptr, x)
 
 #define A_R4M  (1 << 4)
 #define A_R5M  (1 << 5)
@@ -159,36 +168,41 @@
 #define EOP_MSR_REG(rm)       EOP_C_MSR_REG(A_COND_AL,rm)
 
 
-static void emit_mov_const(int cond, int d, unsigned int val)
+static void emith_op_imm(int cond, int op, int r, unsigned int imm)
 {
-	int need_or = 0;
-	if (val & 0xff000000) {
-		EOP_C_DOP_IMM(cond, A_OP_MOV, 0, 0, d, 8/2, (val>>24)&0xff);
-		need_or = 1;
-	}
-	if (val & 0x00ff0000) {
-		EOP_C_DOP_IMM(cond, need_or ? A_OP_ORR : A_OP_MOV, 0, need_or ? d : 0, d, 16/2, (val>>16)&0xff);
-		need_or = 1;
-	}
-	if (val & 0x0000ff00) {
-		EOP_C_DOP_IMM(cond, need_or ? A_OP_ORR : A_OP_MOV, 0, need_or ? d : 0, d, 24/2, (val>>8)&0xff);
-		need_or = 1;
-	}
-	if ((val &0x000000ff) || !need_or)
-		EOP_C_DOP_IMM(cond, need_or ? A_OP_ORR : A_OP_MOV, 0, need_or ? d : 0, d, 0, val&0xff);
+	u32 v, ror2;
+
+	if (imm == 0 && op != A_OP_MOV)
+		return;
+
+	/* shift down to get starting rot2 */
+	for (v = imm, ror2 = 0; v && !(v & 3); v >>= 2)
+		ror2++;
+	ror2 = 16 - ror2;
+
+	EOP_C_DOP_IMM(cond, op, 0, op == A_OP_MOV ? 0 : r, r, ror2 & 0x0f, v & 0xff);
+	if (op == A_OP_MOV)
+		op = A_OP_ORR;
+
+	v >>= 8;
+	if (v & 0xff)
+		EOP_C_DOP_IMM(cond, op, 0, r, r, (ror2 - 8/2) & 0x0f, v & 0xff);
+	v >>= 8;
+	if (v & 0xff)
+		EOP_C_DOP_IMM(cond, op, 0, r, r, (ror2 - 8/2) & 0x0f, v & 0xff);
+	v >>= 8;
+	if (v & 0xff)
+		EOP_C_DOP_IMM(cond, op, 0, r, r, (ror2 - 8/2) & 0x0f, v & 0xff);
 }
 
-static int is_offset_24(int val)
-{
-	if (val >= (int)0xff000000 && val <= 0x00ffffff) return 1;
-	return 0;
-}
+#define is_offset_24(val) \
+	((val) >= (int)0xff000000 && (val) <= 0x00ffffff)
 
-static int emit_xbranch(int cond, void *target, int is_call)
+static int emith_xbranch(int cond, void *target, int is_call)
 {
-	int val = (unsigned int *)target - tcache_ptr - 2;
+	int val = (u32 *)target - (u32 *)tcache_ptr - 2;
 	int direct = is_offset_24(val);
-	u32 *start_ptr = tcache_ptr;
+	u32 *start_ptr = (u32 *)tcache_ptr;
 
 	if (direct)
 	{
@@ -210,17 +224,7 @@ static int emit_xbranch(int cond, void *target, int is_call)
 #endif
 	}
 
-	return tcache_ptr - start_ptr;
-}
-
-static int emit_call(int cond, void *target)
-{
-	return emit_xbranch(cond, target, 1);
-}
-
-static int emit_jump(int cond, void *target)
-{
-	return emit_xbranch(cond, target, 0);
+	return (u32 *)tcache_ptr - start_ptr;
 }
 
 static void handle_caches(void)
@@ -232,3 +236,67 @@ static void handle_caches(void)
 }
 
 
+#define EMITH_CONDITIONAL(code, is_nonzero) { \
+	u32 val, cond, *ptr; \
+	cond = (is_nonzero) ? A_COND_NE : A_COND_EQ; \
+	ptr = (void *)tcache_ptr; \
+	tcache_ptr = (void *)(ptr + 1); \
+	code; \
+	val = (u32 *)tcache_ptr - (ptr + 2); \
+	EMIT_PTR(ptr, ((cond)<<28) | 0x0a000000 | (val & 0xffffff)); \
+}
+
+#define emith_move_r_r(dst, src) \
+	EOP_MOV_REG_SIMPLE(dst, src)
+
+#define emith_move_r_imm(r, imm) \
+	emith_op_imm(A_COND_AL, A_OP_MOV, r, imm)
+
+#define emith_add_r_imm(r, imm) \
+	emith_op_imm(A_COND_AL, A_OP_ADD, r, imm)
+
+#define emith_sub_r_imm(r, imm) \
+	emith_op_imm(A_COND_AL, A_OP_SUB, r, imm)
+
+#define emith_ctx_read(r, offs) \
+	EOP_LDR_IMM(r, CONTEXT_REG, offs)
+
+#define emith_ctx_write(r, offs) \
+	EOP_STR_IMM(r, CONTEXT_REG, offs)
+
+#define emith_ctx_sub(val, offs) { \
+	emith_ctx_read(0, offs); \
+	emith_sub_r_imm(0, val); \
+	emith_ctx_write(0, offs); \
+}
+
+// upto 4 args
+#define emith_pass_arg_r(arg, reg) \
+	EOP_MOV_REG_SIMPLE(arg, reg)
+
+#define emith_pass_arg_imm(arg, imm) \
+	emith_move_r_imm(arg, imm)
+
+#define emith_call_cond(cond, target) \
+	emith_xbranch(cond, target, 1)
+
+#define emith_jump_cond(cond, target) \
+	emith_xbranch(cond, target, 0)
+
+#define emith_call(target) \
+	emith_call_cond(A_COND_AL, target)
+
+#define emith_jump(target) \
+	emith_jump_cond(A_COND_AL, target)
+
+/* SH2 drc specific */
+#define emith_test_t() { \
+	int r = reg_map_g2h[SHR_SR]; \
+	if (r == -1) { \
+		emith_ctx_read(0, SHR_SR * 4); \
+		r = 0; \
+	} \
+	EOP_TST_IMM(r, 0, 1); \
+}
+
+
diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c
index 3fe31047..e45d3365 100644
--- a/cpu/drc/emit_x86.c
+++ b/cpu/drc/emit_x86.c
@@ -9,16 +9,6 @@
 
 enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI };
 
-// TODO: move
-static int reg_map_g2h[] = {
-	-1, -1, -1, -1,
-	-1, -1, -1, -1,
-	-1, -1, -1, -1,
-	-1, -1, -1, -1,
-	-1, -1, -1, -1,
-	-1, -1, -1, -1,
-};
-
 #define CONTEXT_REG xBP
 
 #define EMIT_PTR(ptr, val, type) \
@@ -62,32 +52,21 @@ static int reg_map_g2h[] = {
 
 // XXX: offs is 8bit only
 #define emith_ctx_read(r, offs) { \
-	EMIT_OP_MODRM(0x8b, 1, r, 5); \
+	EMIT_OP_MODRM(0x8b, 1, r, xBP); \
 	EMIT(offs, u8); 	/* mov tmp, [ebp+#offs] */ \
 }
 
 #define emith_ctx_write(r, offs) { \
-	EMIT_OP_MODRM(0x89, 1, r, 5); \
+	EMIT_OP_MODRM(0x89, 1, r, xBP); \
 	EMIT(offs, u8); 	/* mov [ebp+#offs], tmp */ \
 }
 
 #define emith_ctx_sub(val, offs) { \
-	EMIT_OP_MODRM(0x81, 1, 5, 5); \
+	EMIT_OP_MODRM(0x81, 1, 5, xBP); \
 	EMIT(offs, u8); \
 	EMIT(val, u32); 	/* sub [ebp+#offs], dword val */ \
 }
 
-#define emith_test_t() { \
-	if (reg_map_g2h[SHR_SR] == -1) { \
-		EMIT_OP_MODRM(0xf6, 1, 0, 5); \
-		EMIT(SHR_SR * 4, u8); \
-		EMIT(0x01, u8); /* test [ebp+SHR_SR], byte 1 */ \
-	} else { \
-		EMIT_OP_MODRM(0xf7, 3, 0, reg_map_g2h[SHR_SR]); \
-		EMIT(0x01, u16); /* test <reg>, word 1 */ \
-	} \
-}
-
 #define emith_jump(ptr) { \
 	u32 disp = (u32)ptr - ((u32)tcache_ptr + 5); \
 	EMIT_OP(0xe9); \
@@ -100,7 +79,7 @@ static int reg_map_g2h[] = {
 	EMIT(disp, u32); \
 }
 
-#define EMIT_CONDITIONAL(code, is_nonzero) { \
+#define EMITH_CONDITIONAL(code, is_nonzero) { \
 	u8 *ptr = tcache_ptr; \
 	tcache_ptr = tcache_ptr + 2; \
 	code; \
@@ -127,3 +106,15 @@ static int reg_map_g2h[] = {
 	emith_move_r_imm(rd, imm); \
 }
 
+/* SH2 drc specific */
+#define emith_test_t() { \
+	if (reg_map_g2h[SHR_SR] == -1) { \
+		EMIT_OP_MODRM(0xf6, 1, 0, 5); \
+		EMIT(SHR_SR * 4, u8); \
+		EMIT(0x01, u8); /* test [ebp+SHR_SR], byte 1 */ \
+	} else { \
+		EMIT_OP_MODRM(0xf7, 3, 0, reg_map_g2h[SHR_SR]); \
+		EMIT(0x01, u16); /* test <reg>, word 1 */ \
+	} \
+}
+
diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c
index a243aca2..4dd74e8d 100644
--- a/cpu/sh2/compiler.c
+++ b/cpu/sh2/compiler.c
@@ -52,8 +52,32 @@ static u8 *tcache_ptrs[3];
 // ptr for code emiters
 static u8 *tcache_ptr;
 
+#ifdef ARM
+#include "../drc/emit_arm.c"
+
+static const int reg_map_g2h[] = {
+	-1, -1, -1, -1,
+	-1, -1, -1, -1,
+	-1, -1, -1, -1,
+	-1, -1, -1, -1,
+	-1, -1, -1, -1,
+	-1, -1, -1, -1,
+};
+
+#else
 #include "../drc/emit_x86.c"
 
+static const int reg_map_g2h[] = {
+	-1, -1, -1, -1,
+	-1, -1, -1, -1,
+	-1, -1, -1, -1,
+	-1, -1, -1, -1,
+	-1, -1, -1, -1,
+	-1, -1, -1, -1,
+};
+
+#endif
+
 typedef enum {
   SHR_R0 = 0, SHR_R15 = 15,
   SHR_PC,  SHR_PPC, SHR_PR,   SHR_SR,
@@ -364,7 +388,7 @@ static void *sh2_translate(SH2 *sh2, block_desc *other_block)
         tmp2 = delayed_op ? SHR_PPC : SHR_PC;
         emit_move_r_imm32(tmp2, pc + (delayed_op ? 2 : 0));
         emith_test_t();
-        EMIT_CONDITIONAL(emit_move_r_imm32(tmp2, pc + tmp + 2), (op & 0x0200) ? 1 : 0);
+        EMITH_CONDITIONAL(emit_move_r_imm32(tmp2, pc + tmp + 2), (op & 0x0200) ? 1 : 0);
         cycles += 2;
         if (!delayed_op)
           goto end_block;
diff --git a/cpu/sh2/stub_arm.s b/cpu/sh2/stub_arm.s
new file mode 100644
index 00000000..4a424475
--- /dev/null
+++ b/cpu/sh2/stub_arm.s
@@ -0,0 +1,16 @@
+@ vim:filetype=armasm
+.text
+
+.global sh2_drc_entry @ SH2 *sh2, void *block
+
+sh2_drc_entry:
+    stmfd   sp!, {r7,lr}
+    mov     r7, r0
+    bx      r1
+
+
+.global sh2_drc_exit
+
+sh2_drc_exit:
+    ldmfd   sp!, {r7,pc}
+
diff --git a/pico/carthw/svp/compiler.c b/pico/carthw/svp/compiler.c
index cda7df0f..b47484f4 100644
--- a/pico/carthw/svp/compiler.c
+++ b/pico/carthw/svp/compiler.c
@@ -35,7 +35,7 @@ void ssp_drc_next_patch(void){}
 void ssp_drc_end(void){}
 #endif
 
-#include "gen_arm.c"
+#include "../../../cpu/drc/emit_arm.c"
 
 // -----------------------------------------------------
 
@@ -285,11 +285,11 @@ static void tr_flush_dirty_prs(void)
 	int i, ror = 0, reg;
 	int dirty = dirty_regb >> 8;
 	if ((dirty&7) == 7) {
-		emit_mov_const(A_COND_AL, 8, known_regs.r[0]|(known_regs.r[1]<<8)|(known_regs.r[2]<<16));
+		emith_move_r_imm(8, known_regs.r[0]|(known_regs.r[1]<<8)|(known_regs.r[2]<<16));
 		dirty &= ~7;
 	}
 	if ((dirty&0x70) == 0x70) {
-		emit_mov_const(A_COND_AL, 9, known_regs.r[4]|(known_regs.r[5]<<8)|(known_regs.r[6]<<16));
+		emith_move_r_imm(9, known_regs.r[4]|(known_regs.r[5]<<8)|(known_regs.r[6]<<16));
 		dirty &= ~0x70;
 	}
 	/* r0-r7 */
@@ -348,14 +348,14 @@ static void tr_make_dirty_ST(void)
 static void tr_mov16(int r, int val)
 {
 	if (hostreg_r[r] != val) {
-		emit_mov_const(A_COND_AL, r, val);
+		emith_move_r_imm(r, val);
 		hostreg_r[r] = val;
 	}
 }
 
 static void tr_mov16_cond(int cond, int r, int val)
 {
-	emit_mov_const(cond, r, val);
+	emith_op_imm(cond, A_OP_MOV, r, val);
 	hostreg_r[r] = -1;
 }
 
@@ -367,7 +367,7 @@ static void tr_flush_dirty_pmcrs(void)
 
 	if (dirty_regb & KRREG_PMC) {
 		val = known_regs.pmc.v;
-		emit_mov_const(A_COND_AL, 1, val);
+		emith_move_r_imm(1, val);
 		EOP_STR_IMM(1,7,0x400+SSP_PMC*4);
 
 		if (known_regs.emu_status & (SSP_PMC_SET|SSP_PMC_HAVE_ADDR)) {
@@ -380,14 +380,14 @@ static void tr_flush_dirty_pmcrs(void)
 		if (dirty_regb & (1 << (20+i))) {
 			if (val != known_regs.pmac_read[i]) {
 				val = known_regs.pmac_read[i];
-				emit_mov_const(A_COND_AL, 1, val);
+				emith_move_r_imm(1, val);
 			}
 			EOP_STR_IMM(1,7,0x454+i*4); // pmac_read
 		}
 		if (dirty_regb & (1 << (25+i))) {
 			if (val != known_regs.pmac_write[i]) {
 				val = known_regs.pmac_write[i];
-				emit_mov_const(A_COND_AL, 1, val);
+				emith_move_r_imm(1, val);
 			}
 			EOP_STR_IMM(1,7,0x46c+i*4); // pmac_write
 		}
@@ -792,7 +792,7 @@ static void tr_PMX_to_r0(int reg)
 		if      ((mode & 0xfff0) == 0x0800)
 		{
 			EOP_LDR_IMM(1,7,0x488);		// rom_ptr
-			emit_mov_const(A_COND_AL, 0, (pmcv&0xfffff)<<1);
+			emith_move_r_imm(0, (pmcv&0xfffff)<<1);
 			EOP_LDRH_REG(0,1,0);		// ldrh r0, [r1, r0]
 			known_regs.pmac_read[reg] += 1;
 		}
@@ -800,7 +800,7 @@ static void tr_PMX_to_r0(int reg)
 		{
 			int inc = get_inc(mode);
 			EOP_LDR_IMM(1,7,0x490);		// dram_ptr
-			emit_mov_const(A_COND_AL, 0, (pmcv&0xffff)<<1);
+			emith_move_r_imm(0, (pmcv&0xffff)<<1);
 			EOP_LDRH_REG(0,1,0);		// ldrh r0, [r1, r0]
 			if (reg == 4 && (pmcv == 0x187f03 || pmcv == 0x187f04)) // wait loop detection
 			{
@@ -835,7 +835,7 @@ static void tr_PMX_to_r0(int reg)
 	tr_flush_dirty_ST();
 	//tr_flush_dirty_pmcrs();
 	tr_mov16(0, reg);
-	emit_call(A_COND_AL, ssp_pm_read);
+	emith_call(ssp_pm_read);
 	hostreg_clear();
 }
 
@@ -1034,7 +1034,7 @@ static void tr_r0_to_PMX(int reg)
 			int inc = get_inc(mode);
 			if (mode & 0x0400) tr_unhandled();
 			EOP_LDR_IMM(1,7,0x490);		// dram_ptr
-			emit_mov_const(A_COND_AL, 2, addr<<1);
+			emith_move_r_imm(2, addr << 1);
 			EOP_STRH_REG(0,1,2);		// strh r0, [r1, r2]
 			known_regs.pmac_write[reg] += inc;
 		}
@@ -1042,7 +1042,7 @@ static void tr_r0_to_PMX(int reg)
 		{
 			if (mode & 0x0400) tr_unhandled();
 			EOP_LDR_IMM(1,7,0x490);		// dram_ptr
-			emit_mov_const(A_COND_AL, 2, addr<<1);
+			emith_move_r_imm(2, addr << 1);
 			EOP_STRH_REG(0,1,2);		// strh r0, [r1, r2]
 			known_regs.pmac_write[reg] += (addr&1) ? 31 : 1;
 		}
@@ -1050,7 +1050,7 @@ static void tr_r0_to_PMX(int reg)
 		{
 			int inc = get_inc(mode);
 			EOP_LDR_IMM(1,7,0x48c);		// iram_ptr
-			emit_mov_const(A_COND_AL, 2, (addr&0x3ff)<<1);
+			emith_move_r_imm(2, (addr&0x3ff) << 1);
 			EOP_STRH_REG(0,1,2);		// strh r0, [r1, r2]
 			EOP_MOV_IMM(1,0,1);
 			EOP_STR_IMM(1,7,0x494);		// iram_dirty
@@ -1076,7 +1076,7 @@ static void tr_r0_to_PMX(int reg)
 	tr_flush_dirty_ST();
 	//tr_flush_dirty_pmcrs();
 	tr_mov16(1, reg);
-	emit_call(A_COND_AL, ssp_pm_write);
+	emith_call(ssp_pm_write);
 	hostreg_clear();
 }
 
@@ -1117,7 +1117,7 @@ static void tr_r0_to_PMC(int const_val)
 	{
 		tr_flush_dirty_ST();
 		if (known_regb & KRREG_PMC) {
-			emit_mov_const(A_COND_AL, 1, known_regs.pmc.v);
+			emith_move_r_imm(1, known_regs.pmc.v);
 			EOP_STR_IMM(1,7,0x400+SSP_PMC*4);
 			known_regb &= ~KRREG_PMC;
 			dirty_regb &= ~KRREG_PMC;
@@ -1666,7 +1666,7 @@ static void emit_block_prologue(void)
 	// check if there are enough cycles..
 	// note: r0 must contain PC of current block
 	EOP_CMP_IMM(11,0,0);			// cmp r11, #0
-	emit_jump(A_COND_LE, ssp_drc_end);
+	emith_jump_cond(A_COND_LE, ssp_drc_end);
 }
 
 /* cond:
@@ -1680,16 +1680,16 @@ static void emit_block_epilogue(int cycles, int cond, int pc, int end_pc)
 
 	if (cond < 0 || (end_pc >= 0x400 && pc < 0x400)) {
 		// indirect jump, or rom -> iram jump, must use dispatcher
-		emit_jump(A_COND_AL, ssp_drc_next);
+		emith_jump(ssp_drc_next);
 	}
 	else if (cond == A_COND_AL) {
 		u32 *target = (pc < 0x400) ?
 			ssp_block_table_iram[ssp->drc.iram_context * SSP_BLOCKTAB_IRAM_ONE + pc] :
 			ssp_block_table[pc];
 		if (target != NULL)
-			emit_jump(A_COND_AL, target);
+			emith_jump(target);
 		else {
-			int ops = emit_jump(A_COND_AL, ssp_drc_next);
+			int ops = emith_jump(ssp_drc_next);
 			// cause the next block to be emitted over jump instruction
 			tcache_ptr -= ops;
 		}
@@ -1702,19 +1702,19 @@ static void emit_block_epilogue(int cycles, int cond, int pc, int end_pc)
 			ssp_block_table_iram[ssp->drc.iram_context * SSP_BLOCKTAB_IRAM_ONE + end_pc] :
 			ssp_block_table[end_pc];
 		if (target1 != NULL)
-		     emit_jump(cond, target1);
+		     emith_jump_cond(cond, target1);
 		if (target2 != NULL)
-		     emit_jump(tr_neg_cond(cond), target2); // neg_cond, to be able to swap jumps if needed
+		     emith_jump_cond(tr_neg_cond(cond), target2); // neg_cond, to be able to swap jumps if needed
 #ifndef __EPOC32__
 		// emit patchable branches
 		if (target1 == NULL)
-			emit_call(cond, ssp_drc_next_patch);
+			emith_call_cond(cond, ssp_drc_next_patch);
 		if (target2 == NULL)
-			emit_call(tr_neg_cond(cond), ssp_drc_next_patch);
+			emith_call_cond(tr_neg_cond(cond), ssp_drc_next_patch);
 #else
 		// won't patch indirect jumps
 		if (target1 == NULL || target2 == NULL)
-			emit_jump(A_COND_AL, ssp_drc_next);
+			emith_jump(ssp_drc_next);
 #endif
 	}
 }
@@ -1758,7 +1758,7 @@ void *ssp_translate_block(int pc)
 	if (ccount >= 100) {
 		end_cond = A_COND_AL;
 		jump_pc = pc;
-		emit_mov_const(A_COND_AL, 0, pc);
+		emith_move_r_imm(0, pc);
 	}
 
 	tr_flush_dirty_prs();
diff --git a/platform/linux/Makefile b/platform/linux/Makefile
index 1adb5da3..e2eaf467 100644
--- a/platform/linux/Makefile
+++ b/platform/linux/Makefile
@@ -9,6 +9,10 @@ drc_debug = 1
 
 -include Makefile.local
 
+ifndef ARCH
+ARCH = x86
+endif
+
 ifeq "$(profile)" "1"
 CFLAGS += -O3 -Wall
 CFLAGS += -ftracer -fstrength-reduce -funroll-loops -fomit-frame-pointer -fstrict-aliasing -ffast-math
@@ -18,7 +22,15 @@ CFLAGS += -ggdb -Wall -falign-functions=2
 endif
 DEFINES = _UNZIP_SUPPORT IO_STATS IN_EVDEV
 CFLAGS += -I../.. -I.
-LDFLAGS += -lX11 -lpthread
+LDFLAGS += -lpthread
+ifeq "$(ARCH)" "arm"
+CFLAGS += -mcpu=arm920t
+DEFINES += ARM
+else
+LDFLAGS += -lX11
+endif
+
+CC = $(CROSS)gcc
 
 # frontend
 OBJS += platform/gp2x/emu.o blit.o in_evdev.o plat.o sndout_oss.o gp2x.o log_io.o
@@ -50,6 +62,9 @@ OBJS += pico/sound/sound.o pico/sound/sn76496.o pico/sound/ym2612.o pico/sound/m
 # Pico - carthw
 OBJS += pico/carthw/carthw.o pico/carthw/svp/svp.o pico/carthw/svp/memory.o \
 	pico/carthw/svp/ssp16.o pico/carthw/svp/compiler.o
+ifeq "$(ARCH)" "arm"
+OBJS += pico/carthw/svp/stub_arm.o
+endif
 # zlib
 OBJS += zlib/gzio.o zlib/inffast.o zlib/inflate.o zlib/inftrees.o zlib/trees.o \
 	zlib/deflate.o zlib/crc32.o zlib/adler32.o zlib/zutil.o zlib/compress.o zlib/uncompr.o
@@ -79,7 +94,7 @@ ifeq "$(use_sh2drc)" "1"
 DEFINES += DRC_SH2 DRC_TMP
 OBJS += cpu/sh2/mame/sh2pico.o
 OBJS += cpu/sh2/compiler.o
-OBJS += cpu/sh2/stub_x86.o
+OBJS += cpu/sh2/stub_$(ARCH).o
 ifeq "$(drc_debug)" "1"
 DEFINES += DRC_DEBUG=1
 OBJS += cpu/sh2/mame/sh2dasm.o
@@ -100,6 +115,8 @@ endif
 CFLAGS += $(addprefix -D,$(DEFINES))
 
 vpath %.c = ../..
+vpath %.s = ../..
+vpath %.S = ../..
 vpath %.asm = ../..
 
 DIRS = platform platform/gp2x platform/common pico pico/cd pico/pico pico/sound pico/carthw/svp \
@@ -122,7 +139,7 @@ mkdirs:
 
 include ../common/revision.mak
 
-pico/carthw/svp/compiler.o : ../../pico/carthw/svp/gen_arm.c
+pico/carthw/svp/compiler.o : ../../cpu/drc/emit_arm.c
 pico/pico.o pico/cd/pico.o : ../../pico/pico_cmn.c ../../pico/pico_int.h
 pico/memory.o pico/cd/memory.o : ../../pico/pico_int.h ../../pico/memory.h
 
diff --git a/platform/linux/gp2x.c b/platform/linux/gp2x.c
index 4da98a53..6eb252b8 100644
--- a/platform/linux/gp2x.c
+++ b/platform/linux/gp2x.c
@@ -33,6 +33,9 @@ int crashed_940 = 0;
 int default_cpu_clock = 123;
 void *gp2x_memregs = NULL;
 
+/* ifndef is for qemu build without video out */
+#ifndef ARM
+
 /* faking GP2X pad */
 enum  { GP2X_UP=0x1,       GP2X_LEFT=0x4,       GP2X_DOWN=0x10,  GP2X_RIGHT=0x40,
         GP2X_START=1<<8,   GP2X_SELECT=1<<9,    GP2X_L=1<<10,    GP2X_R=1<<11,
@@ -252,6 +255,7 @@ static void xlib_init(void)
 	sem_wait(&xlib_sem);
 	sem_destroy(&xlib_sem);
 }
+#endif // !ARM
 
 /* --- */
 
@@ -272,6 +276,7 @@ static void realloc_screen(void)
 /* gp2x/emu.c stuff, most to be rm'd */
 static void gp2x_video_flip_(void)
 {
+#ifndef ARM
 	unsigned int *image;
 	int pixel_count, i;
 
@@ -311,6 +316,7 @@ static void gp2x_video_flip_(void)
 		realloc_screen();
 		ximage_realloc(xlib_display, DefaultVisual(xlib_display, 0));
 	}
+#endif
 }
 
 static void gp2x_video_changemode_ll_(int bpp)
@@ -405,7 +411,9 @@ void plat_init(void)
 	// snd
 	sndout_oss_init();
 
+#ifndef ARM
 	xlib_init();
+#endif
 }
 
 void plat_finish(void)
@@ -459,6 +467,10 @@ void mp3_update(int *buffer, int length, int stereo)
 {
 }
 
+void cache_flush_d_inval_i()
+{
+}
+
 /* lprintf */
 void lprintf(const char *fmt, ...)
 {
diff --git a/platform/linux/host_dasm.c b/platform/linux/host_dasm.c
index 38a7e473..99476891 100644
--- a/platform/linux/host_dasm.c
+++ b/platform/linux/host_dasm.c
@@ -11,6 +11,20 @@ extern char **g_argv;
 
 static struct disassemble_info di;
 
+#ifdef ARM
+#define print_insn_func print_insn_little_arm
+#define BFD_ARCH bfd_arch_arm
+#define BFD_MACH bfd_mach_arm_4T
+#else
+#define print_insn_func print_insn_i386_intel
+#define BFD_ARCH bfd_arch_i386
+#define BFD_MACH bfd_mach_i386_i386_intel_syntax
+#endif
+
+/* hacks for ARM */
+int floatformat_to_double;
+int floatformat_ieee_single_little;
+
 /* symbols */
 static asymbol **symbols;
 static long symcount;
@@ -141,8 +155,8 @@ static void host_dasm_init(void)
   di.print_address_func = dis_asm_print_address;
 //  di.symbol_at_address_func = dis_asm_symbol_at_address;
   di.read_memory_func = dis_asm_read_memory;
-  di.arch = bfd_arch_i386;
-  di.mach = bfd_mach_i386_i386_intel_syntax;
+  di.arch = BFD_ARCH;
+  di.mach = BFD_MACH;
   di.endian = BFD_ENDIAN_LITTLE;
   disassemble_init_for_target(&di);
 }
@@ -160,7 +174,7 @@ void host_dasm(void *addr, int len)
   vma_end = vma + len;
   while (vma < vma_end) {
     printf("  %p ", (void *)(long)vma);
-    vma += print_insn_i386_intel(vma, &di);
+    vma += print_insn_func(vma, &di);
     printf("\n");
   }
 }