+ int s,tl;
+ int ar;
+ int offset;
+ int memtarget=0,c=0;
+ void *jaddr2=NULL;
+ enum stub_type type;
+ int agr=AGEN1+(i&1);
+ int fastio_reg_override=-1;
+ u_int hr,reglist=0;
+ u_int copr=(source[i]>>16)&0x1f;
+ s=get_reg(i_regs->regmap,rs1[i]);
+ tl=get_reg(i_regs->regmap,FTEMP);
+ offset=imm[i];
+ assert(rs1[i]>0);
+ assert(tl>=0);
+
+ for(hr=0;hr<HOST_REGS;hr++) {
+ if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
+ }
+ if(i_regs->regmap[HOST_CCREG]==CCREG)
+ reglist&=~(1<<HOST_CCREG);
+
+ // get the address
+ if (opcode[i]==0x3a) { // SWC2
+ ar=get_reg(i_regs->regmap,agr);
+ if(ar<0) ar=get_reg(i_regs->regmap,-1);
+ reglist|=1<<ar;
+ } else { // LWC2
+ ar=tl;
+ }
+ if(s>=0) c=(i_regs->wasconst>>s)&1;
+ memtarget=c&&(((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE);
+ if (!offset&&!c&&s>=0) ar=s;
+ assert(ar>=0);
+
+ if (opcode[i]==0x3a) { // SWC2
+ cop2_get_dreg(copr,tl,-1);
+ type=STOREW_STUB;
+ }
+ else
+ type=LOADW_STUB;
+
+ if(c&&!memtarget) {
+ jaddr2=out;
+ emit_jmp(0); // inline_readstub/inline_writestub?
+ }
+ else {
+ if(!c) {
+ jaddr2=emit_fastpath_cmp_jump(i,ar,&fastio_reg_override);
+ }
+ else if(ram_offset&&memtarget) {
+ host_tempreg_acquire();
+ emit_addimm(ar,ram_offset,HOST_TEMPREG);
+ fastio_reg_override=HOST_TEMPREG;
+ }
+ if (opcode[i]==0x32) { // LWC2
+ int a=ar;
+ if(fastio_reg_override>=0) a=fastio_reg_override;
+ emit_readword_indexed(0,a,tl);
+ }
+ if (opcode[i]==0x3a) { // SWC2
+ #ifdef DESTRUCTIVE_SHIFT
+ if(!offset&&!c&&s>=0) emit_mov(s,ar);
+ #endif
+ int a=ar;
+ if(fastio_reg_override>=0) a=fastio_reg_override;
+ emit_writeword_indexed(tl,0,a);
+ }
+ }
+ if(fastio_reg_override==HOST_TEMPREG)
+ host_tempreg_release();
+ if(jaddr2)
+ add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj[i],reglist);
+ if(opcode[i]==0x3a) // SWC2
+ if(!(i_regs->waswritten&(1<<rs1[i])) && !HACK_ENABLED(NDHACK_NO_SMC_CHECK)) {
+#if defined(HOST_IMM8)
+ int ir=get_reg(i_regs->regmap,INVCP);
+ assert(ir>=0);
+ emit_cmpmem_indexedsr12_reg(ir,ar,1);
+#else
+ emit_cmpmem_indexedsr12_imm(invalid_code,ar,1);
+#endif
+ #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
+ emit_callne(invalidate_addr_reg[ar]);
+ #else
+ void *jaddr3 = out;
+ emit_jne(0);
+ add_stub(INVCODE_STUB,jaddr3,out,reglist|(1<<HOST_CCREG),ar,0,0,0);
+ #endif
+ }
+ if (opcode[i]==0x32) { // LWC2
+ host_tempreg_acquire();
+ cop2_put_dreg(copr,tl,HOST_TEMPREG);
+ host_tempreg_release();
+ }
+}
+
+static void cop2_assemble(int i,struct regstat *i_regs)
+{
+ u_int copr=(source[i]>>11)&0x1f;
+ signed char temp=get_reg(i_regs->regmap,-1);
+ if (opcode2[i]==0) { // MFC2
+ signed char tl=get_reg(i_regs->regmap,rt1[i]);
+ if(tl>=0&&rt1[i]!=0)
+ cop2_get_dreg(copr,tl,temp);
+ }
+ else if (opcode2[i]==4) { // MTC2
+ signed char sl=get_reg(i_regs->regmap,rs1[i]);
+ cop2_put_dreg(copr,sl,temp);
+ }
+ else if (opcode2[i]==2) // CFC2
+ {
+ signed char tl=get_reg(i_regs->regmap,rt1[i]);
+ if(tl>=0&&rt1[i]!=0)
+ emit_readword(®_cop2c[copr],tl);
+ }
+ else if (opcode2[i]==6) // CTC2
+ {
+ signed char sl=get_reg(i_regs->regmap,rs1[i]);
+ switch(copr) {
+ case 4:
+ case 12:
+ case 20:
+ case 26:
+ case 27:
+ case 29:
+ case 30:
+ emit_signextend16(sl,temp);
+ break;
+ case 31:
+ c2op_ctc2_31_assemble(sl,temp);
+ break;
+ default:
+ temp=sl;
+ break;
+ }
+ emit_writeword(temp,®_cop2c[copr]);
+ assert(sl>=0);
+ }
+}
+
+static void do_unalignedwritestub(int n)
+{
+ assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
+ literal_pool(256);
+ set_jump_target(stubs[n].addr, out);
+
+ int i=stubs[n].a;
+ struct regstat *i_regs=(struct regstat *)stubs[n].c;
+ int addr=stubs[n].b;
+ u_int reglist=stubs[n].e;
+ signed char *i_regmap=i_regs->regmap;
+ int temp2=get_reg(i_regmap,FTEMP);
+ int rt;
+ rt=get_reg(i_regmap,rs2[i]);
+ assert(rt>=0);
+ assert(addr>=0);
+ assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
+ reglist|=(1<<addr);
+ reglist&=~(1<<temp2);
+
+#if 1
+ // don't bother with it and call write handler
+ save_regs(reglist);
+ pass_args(addr,rt);
+ int cc=get_reg(i_regmap,CCREG);
+ if(cc<0)
+ emit_loadreg(CCREG,2);
+ emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
+ emit_far_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
+ emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
+ if(cc<0)
+ emit_storereg(CCREG,2);
+ restore_regs(reglist);
+ emit_jmp(stubs[n].retaddr); // return address
+#else
+ emit_andimm(addr,0xfffffffc,temp2);
+ emit_writeword(temp2,&address);
+
+ save_regs(reglist);
+ emit_shrimm(addr,16,1);
+ int cc=get_reg(i_regmap,CCREG);
+ if(cc<0) {
+ emit_loadreg(CCREG,2);
+ }
+ emit_movimm((u_int)readmem,0);
+ emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
+ emit_call((int)&indirect_jump_indexed);
+ restore_regs(reglist);
+
+ emit_readword(&readmem_dword,temp2);
+ int temp=addr; //hmh
+ emit_shlimm(addr,3,temp);
+ emit_andimm(temp,24,temp);
+ if (opcode[i]==0x2a) // SWL
+ emit_xorimm(temp,24,temp);
+ emit_movimm(-1,HOST_TEMPREG);
+ if (opcode[i]==0x2a) { // SWL
+ emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
+ emit_orrshr(rt,temp,temp2);
+ }else{
+ emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
+ emit_orrshl(rt,temp,temp2);
+ }
+ emit_readword(&address,addr);
+ emit_writeword(temp2,&word);
+ //save_regs(reglist); // don't need to, no state changes
+ emit_shrimm(addr,16,1);
+ emit_movimm((u_int)writemem,0);
+ //emit_call((int)&indirect_jump_indexed);
+ emit_mov(15,14);
+ emit_readword_dualindexedx4(0,1,15);
+ emit_readword(&Count,HOST_TEMPREG);
+ emit_readword(&next_interupt,2);
+ emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
+ emit_writeword(2,&last_count);
+ emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
+ if(cc<0) {
+ emit_storereg(CCREG,HOST_TEMPREG);
+ }
+ restore_regs(reglist);
+ emit_jmp(stubs[n].retaddr); // return address
+#endif
+}
+
+#ifndef multdiv_assemble
+void multdiv_assemble(int i,struct regstat *i_regs)
+{
+ printf("Need multdiv_assemble for this architecture.\n");
+ abort();
+}
+#endif
+
+static void mov_assemble(int i,struct regstat *i_regs)
+{
+ //if(opcode2[i]==0x10||opcode2[i]==0x12) { // MFHI/MFLO
+ //if(opcode2[i]==0x11||opcode2[i]==0x13) { // MTHI/MTLO
+ if(rt1[i]) {
+ signed char sl,tl;
+ tl=get_reg(i_regs->regmap,rt1[i]);
+ //assert(tl>=0);
+ if(tl>=0) {
+ sl=get_reg(i_regs->regmap,rs1[i]);
+ if(sl>=0) emit_mov(sl,tl);
+ else emit_loadreg(rs1[i],tl);
+ }
+ }
+}
+
+// call interpreter, exception handler, things that change pc/regs/cycles ...
+static void call_c_cpu_handler(int i, const struct regstat *i_regs, u_int pc, void *func)
+{
+ signed char ccreg=get_reg(i_regs->regmap,CCREG);
+ assert(ccreg==HOST_CCREG);
+ assert(!is_delayslot);
+ (void)ccreg;
+
+ emit_movimm(pc,3); // Get PC
+ emit_readword(&last_count,2);
+ emit_writeword(3,&psxRegs.pc);
+ emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX
+ emit_add(2,HOST_CCREG,2);
+ emit_writeword(2,&psxRegs.cycle);
+ emit_far_call(func);
+ emit_far_jump(jump_to_new_pc);
+}
+
+static void syscall_assemble(int i,struct regstat *i_regs)
+{
+ emit_movimm(0x20,0); // cause code
+ emit_movimm(0,1); // not in delay slot
+ call_c_cpu_handler(i,i_regs,start+i*4,psxException);
+}
+
+static void hlecall_assemble(int i,struct regstat *i_regs)
+{
+ void *hlefunc = psxNULL;
+ uint32_t hleCode = source[i] & 0x03ffffff;
+ if (hleCode < ARRAY_SIZE(psxHLEt))
+ hlefunc = psxHLEt[hleCode];
+
+ call_c_cpu_handler(i,i_regs,start+i*4+4,hlefunc);
+}
+
+static void intcall_assemble(int i,struct regstat *i_regs)
+{
+ call_c_cpu_handler(i,i_regs,start+i*4,execI);
+}
+
+static void speculate_mov(int rs,int rt)
+{
+ if(rt!=0) {
+ smrv_strong_next|=1<<rt;
+ smrv[rt]=smrv[rs];
+ }
+}
+
+static void speculate_mov_weak(int rs,int rt)
+{
+ if(rt!=0) {
+ smrv_weak_next|=1<<rt;
+ smrv[rt]=smrv[rs];
+ }
+}
+
+static void speculate_register_values(int i)
+{
+ if(i==0) {
+ memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
+ // gp,sp are likely to stay the same throughout the block
+ smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
+ smrv_weak_next=~smrv_strong_next;
+ //printf(" llr %08x\n", smrv[4]);
+ }
+ smrv_strong=smrv_strong_next;
+ smrv_weak=smrv_weak_next;
+ switch(itype[i]) {
+ case ALU:
+ if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
+ else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
+ else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
+ else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
+ else {
+ smrv_strong_next&=~(1<<rt1[i]);
+ smrv_weak_next&=~(1<<rt1[i]);
+ }
+ break;
+ case SHIFTIMM:
+ smrv_strong_next&=~(1<<rt1[i]);
+ smrv_weak_next&=~(1<<rt1[i]);
+ // fallthrough
+ case IMM16:
+ if(rt1[i]&&is_const(®s[i],rt1[i])) {
+ int value,hr=get_reg(regs[i].regmap,rt1[i]);
+ if(hr>=0) {
+ if(get_final_value(hr,i,&value))
+ smrv[rt1[i]]=value;
+ else smrv[rt1[i]]=constmap[i][hr];
+ smrv_strong_next|=1<<rt1[i];
+ }
+ }
+ else {
+ if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
+ else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
+ }
+ break;
+ case LOAD:
+ if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
+ // special case for BIOS
+ smrv[rt1[i]]=0xa0000000;
+ smrv_strong_next|=1<<rt1[i];
+ break;
+ }
+ // fallthrough
+ case SHIFT:
+ case LOADLR:
+ case MOV:
+ smrv_strong_next&=~(1<<rt1[i]);
+ smrv_weak_next&=~(1<<rt1[i]);
+ break;
+ case COP0:
+ case COP2:
+ if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
+ smrv_strong_next&=~(1<<rt1[i]);
+ smrv_weak_next&=~(1<<rt1[i]);
+ }
+ break;
+ case C2LS:
+ if (opcode[i]==0x32) { // LWC2
+ smrv_strong_next&=~(1<<rt1[i]);
+ smrv_weak_next&=~(1<<rt1[i]);
+ }
+ break;
+ }
+#if 0
+ int r=4;
+ printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
+ ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
+#endif
+}
+
+static void ds_assemble(int i,struct regstat *i_regs)
+{
+ speculate_register_values(i);
+ is_delayslot=1;
+ switch(itype[i]) {
+ case ALU:
+ alu_assemble(i,i_regs);break;
+ case IMM16:
+ imm16_assemble(i,i_regs);break;