+ int s,tl;
+ int ar;
+ int offset;
+ int memtarget=0,c=0;
+ void *jaddr2=NULL;
+ enum stub_type type;
+ int agr=AGEN1+(i&1);
+ int offset_reg = -1;
+ int fastio_reg_override = -1;
+ u_int reglist=get_host_reglist(i_regs->regmap);
+ u_int copr=(source[i]>>16)&0x1f;
+ s=get_reg(i_regs->regmap,dops[i].rs1);
+ tl=get_reg(i_regs->regmap,FTEMP);
+ offset=imm[i];
+ assert(dops[i].rs1>0);
+ assert(tl>=0);
+
+ if(i_regs->regmap[HOST_CCREG]==CCREG)
+ reglist&=~(1<<HOST_CCREG);
+
+ // get the address
+ if (dops[i].opcode==0x3a) { // SWC2
+ ar=get_reg(i_regs->regmap,agr);
+ if(ar<0) ar=get_reg(i_regs->regmap,-1);
+ reglist|=1<<ar;
+ } else { // LWC2
+ ar=tl;
+ }
+ if(s>=0) c=(i_regs->wasconst>>s)&1;
+ memtarget=c&&(((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE);
+ if (!offset&&!c&&s>=0) ar=s;
+ assert(ar>=0);
+
+ cop2_do_stall_check(0, i, i_regs, reglist);
+
+ if (dops[i].opcode==0x3a) { // SWC2
+ cop2_get_dreg(copr,tl,-1);
+ type=STOREW_STUB;
+ }
+ else
+ type=LOADW_STUB;
+
+ if(c&&!memtarget) {
+ jaddr2=out;
+ emit_jmp(0); // inline_readstub/inline_writestub?
+ }
+ else {
+ if(!c) {
+ jaddr2 = emit_fastpath_cmp_jump(i, i_regs, ar,
+ &offset_reg, &fastio_reg_override);
+ }
+ else if (ram_offset && memtarget) {
+ offset_reg = get_ro_reg(i_regs, 0);
+ }
+ switch (dops[i].opcode) {
+ case 0x32: { // LWC2
+ int a = ar;
+ if (fastio_reg_override >= 0)
+ a = fastio_reg_override;
+ do_load_word(a, tl, offset_reg);
+ break;
+ }
+ case 0x3a: { // SWC2
+ #ifdef DESTRUCTIVE_SHIFT
+ if(!offset&&!c&&s>=0) emit_mov(s,ar);
+ #endif
+ int a = ar;
+ if (fastio_reg_override >= 0)
+ a = fastio_reg_override;
+ do_store_word(a, 0, tl, offset_reg, 1);
+ break;
+ }
+ default:
+ assert(0);
+ }
+ }
+ if (fastio_reg_override == HOST_TEMPREG || offset_reg == HOST_TEMPREG)
+ host_tempreg_release();
+ if(jaddr2)
+ add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj_,reglist);
+ if(dops[i].opcode==0x3a) // SWC2
+ if(!(i_regs->waswritten&(1<<dops[i].rs1)) && !HACK_ENABLED(NDHACK_NO_SMC_CHECK)) {
+#if defined(HOST_IMM8)
+ int ir=get_reg(i_regs->regmap,INVCP);
+ assert(ir>=0);
+ emit_cmpmem_indexedsr12_reg(ir,ar,1);
+#else
+ emit_cmpmem_indexedsr12_imm(invalid_code,ar,1);
+#endif
+ #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
+ emit_callne(invalidate_addr_reg[ar]);
+ #else
+ void *jaddr3 = out;
+ emit_jne(0);
+ add_stub(INVCODE_STUB,jaddr3,out,reglist|(1<<HOST_CCREG),ar,0,0,0);
+ #endif
+ }
+ if (dops[i].opcode==0x32) { // LWC2
+ host_tempreg_acquire();
+ cop2_put_dreg(copr,tl,HOST_TEMPREG);
+ host_tempreg_release();
+ }
+}
+
+static void cop2_assemble(int i, const struct regstat *i_regs)
+{
+ u_int copr = (source[i]>>11) & 0x1f;
+ signed char temp = get_reg(i_regs->regmap, -1);
+
+ if (!HACK_ENABLED(NDHACK_NO_STALLS)) {
+ u_int reglist = reglist_exclude(get_host_reglist(i_regs->regmap), temp, -1);
+ if (dops[i].opcode2 == 0 || dops[i].opcode2 == 2) { // MFC2/CFC2
+ signed char tl = get_reg(i_regs->regmap, dops[i].rt1);
+ reglist = reglist_exclude(reglist, tl, -1);
+ }
+ cop2_do_stall_check(0, i, i_regs, reglist);
+ }
+ if (dops[i].opcode2==0) { // MFC2
+ signed char tl=get_reg(i_regs->regmap,dops[i].rt1);
+ if(tl>=0&&dops[i].rt1!=0)
+ cop2_get_dreg(copr,tl,temp);
+ }
+ else if (dops[i].opcode2==4) { // MTC2
+ signed char sl=get_reg(i_regs->regmap,dops[i].rs1);
+ cop2_put_dreg(copr,sl,temp);
+ }
+ else if (dops[i].opcode2==2) // CFC2
+ {
+ signed char tl=get_reg(i_regs->regmap,dops[i].rt1);
+ if(tl>=0&&dops[i].rt1!=0)
+ emit_readword(®_cop2c[copr],tl);
+ }
+ else if (dops[i].opcode2==6) // CTC2
+ {
+ signed char sl=get_reg(i_regs->regmap,dops[i].rs1);
+ switch(copr) {
+ case 4:
+ case 12:
+ case 20:
+ case 26:
+ case 27:
+ case 29:
+ case 30:
+ emit_signextend16(sl,temp);
+ break;
+ case 31:
+ c2op_ctc2_31_assemble(sl,temp);
+ break;
+ default:
+ temp=sl;
+ break;
+ }
+ emit_writeword(temp,®_cop2c[copr]);
+ assert(sl>=0);
+ }
+}
+
+static void do_unalignedwritestub(int n)
+{
+ assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
+ literal_pool(256);
+ set_jump_target(stubs[n].addr, out);
+
+ int i=stubs[n].a;
+ struct regstat *i_regs=(struct regstat *)stubs[n].c;
+ int addr=stubs[n].b;
+ u_int reglist=stubs[n].e;
+ signed char *i_regmap=i_regs->regmap;
+ int temp2=get_reg(i_regmap,FTEMP);
+ int rt;
+ rt=get_reg(i_regmap,dops[i].rs2);
+ assert(rt>=0);
+ assert(addr>=0);
+ assert(dops[i].opcode==0x2a||dops[i].opcode==0x2e); // SWL/SWR only implemented
+ reglist|=(1<<addr);
+ reglist&=~(1<<temp2);
+
+ // don't bother with it and call write handler
+ save_regs(reglist);
+ pass_args(addr,rt);
+ int cc=get_reg(i_regmap,CCREG);
+ if(cc<0)
+ emit_loadreg(CCREG,2);
+ emit_addimm(cc<0?2:cc,(int)stubs[n].d+1,2);
+ emit_far_call((dops[i].opcode==0x2a?jump_handle_swl:jump_handle_swr));
+ emit_addimm(0,-((int)stubs[n].d+1),cc<0?2:cc);
+ if(cc<0)
+ emit_storereg(CCREG,2);
+ restore_regs(reglist);
+ emit_jmp(stubs[n].retaddr); // return address
+}
+
+#ifndef multdiv_assemble
+void multdiv_assemble(int i,struct regstat *i_regs)
+{
+ printf("Need multdiv_assemble for this architecture.\n");
+ abort();
+}
+#endif
+
+static void mov_assemble(int i, const struct regstat *i_regs)
+{
+ //if(dops[i].opcode2==0x10||dops[i].opcode2==0x12) { // MFHI/MFLO
+ //if(dops[i].opcode2==0x11||dops[i].opcode2==0x13) { // MTHI/MTLO
+ if(dops[i].rt1) {
+ signed char sl,tl;
+ tl=get_reg(i_regs->regmap,dops[i].rt1);
+ //assert(tl>=0);
+ if(tl>=0) {
+ sl=get_reg(i_regs->regmap,dops[i].rs1);
+ if(sl>=0) emit_mov(sl,tl);
+ else emit_loadreg(dops[i].rs1,tl);
+ }
+ }
+ if (dops[i].rs1 == HIREG || dops[i].rs1 == LOREG) // MFHI/MFLO
+ multdiv_do_stall(i, i_regs);
+}
+
+// call interpreter, exception handler, things that change pc/regs/cycles ...
+static void call_c_cpu_handler(int i, const struct regstat *i_regs, int ccadj_, u_int pc, void *func)
+{
+ signed char ccreg=get_reg(i_regs->regmap,CCREG);
+ assert(ccreg==HOST_CCREG);
+ assert(!is_delayslot);
+ (void)ccreg;
+
+ emit_movimm(pc,3); // Get PC
+ emit_readword(&last_count,2);
+ emit_writeword(3,&psxRegs.pc);
+ emit_addimm(HOST_CCREG,ccadj_,HOST_CCREG);
+ emit_add(2,HOST_CCREG,2);
+ emit_writeword(2,&psxRegs.cycle);
+ emit_far_call(func);
+ emit_far_jump(jump_to_new_pc);
+}
+
+static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_)
+{
+ // 'break' tends to be littered around to catch things like
+ // division by 0 and is almost never executed, so don't emit much code here
+ void *func = (dops[i].opcode2 == 0x0C)
+ ? (is_delayslot ? jump_syscall_ds : jump_syscall)
+ : (is_delayslot ? jump_break_ds : jump_break);
+ signed char ccreg = get_reg(i_regs->regmap, CCREG);
+ assert(ccreg == HOST_CCREG);
+ emit_movimm(start + i*4, 2); // pc
+ emit_addimm(HOST_CCREG, ccadj_ + CLOCK_ADJUST(1), HOST_CCREG);
+ emit_far_jump(func);
+}
+
+static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_)
+{
+ void *hlefunc = psxNULL;
+ uint32_t hleCode = source[i] & 0x03ffffff;
+ if (hleCode < ARRAY_SIZE(psxHLEt))
+ hlefunc = psxHLEt[hleCode];
+
+ call_c_cpu_handler(i, i_regs, ccadj_, start + i*4+4, hlefunc);
+}
+
+static void intcall_assemble(int i, const struct regstat *i_regs, int ccadj_)
+{
+ call_c_cpu_handler(i, i_regs, ccadj_, start + i*4, execI);
+}
+
+static void speculate_mov(int rs,int rt)
+{
+ if(rt!=0) {
+ smrv_strong_next|=1<<rt;
+ smrv[rt]=smrv[rs];
+ }
+}
+
+static void speculate_mov_weak(int rs,int rt)
+{
+ if(rt!=0) {
+ smrv_weak_next|=1<<rt;
+ smrv[rt]=smrv[rs];
+ }
+}
+
+static void speculate_register_values(int i)
+{
+ if(i==0) {
+ memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
+ // gp,sp are likely to stay the same throughout the block
+ smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
+ smrv_weak_next=~smrv_strong_next;
+ //printf(" llr %08x\n", smrv[4]);
+ }
+ smrv_strong=smrv_strong_next;
+ smrv_weak=smrv_weak_next;
+ switch(dops[i].itype) {
+ case ALU:
+ if ((smrv_strong>>dops[i].rs1)&1) speculate_mov(dops[i].rs1,dops[i].rt1);
+ else if((smrv_strong>>dops[i].rs2)&1) speculate_mov(dops[i].rs2,dops[i].rt1);
+ else if((smrv_weak>>dops[i].rs1)&1) speculate_mov_weak(dops[i].rs1,dops[i].rt1);
+ else if((smrv_weak>>dops[i].rs2)&1) speculate_mov_weak(dops[i].rs2,dops[i].rt1);
+ else {
+ smrv_strong_next&=~(1<<dops[i].rt1);
+ smrv_weak_next&=~(1<<dops[i].rt1);
+ }
+ break;
+ case SHIFTIMM:
+ smrv_strong_next&=~(1<<dops[i].rt1);
+ smrv_weak_next&=~(1<<dops[i].rt1);
+ // fallthrough
+ case IMM16:
+ if(dops[i].rt1&&is_const(®s[i],dops[i].rt1)) {
+ int value,hr=get_reg(regs[i].regmap,dops[i].rt1);
+ if(hr>=0) {
+ if(get_final_value(hr,i,&value))
+ smrv[dops[i].rt1]=value;
+ else smrv[dops[i].rt1]=constmap[i][hr];
+ smrv_strong_next|=1<<dops[i].rt1;
+ }
+ }
+ else {
+ if ((smrv_strong>>dops[i].rs1)&1) speculate_mov(dops[i].rs1,dops[i].rt1);
+ else if((smrv_weak>>dops[i].rs1)&1) speculate_mov_weak(dops[i].rs1,dops[i].rt1);
+ }
+ break;
+ case LOAD:
+ if(start<0x2000&&(dops[i].rt1==26||(smrv[dops[i].rt1]>>24)==0xa0)) {
+ // special case for BIOS
+ smrv[dops[i].rt1]=0xa0000000;
+ smrv_strong_next|=1<<dops[i].rt1;
+ break;
+ }
+ // fallthrough
+ case SHIFT:
+ case LOADLR:
+ case MOV:
+ smrv_strong_next&=~(1<<dops[i].rt1);
+ smrv_weak_next&=~(1<<dops[i].rt1);
+ break;
+ case COP0:
+ case COP2:
+ if(dops[i].opcode2==0||dops[i].opcode2==2) { // MFC/CFC
+ smrv_strong_next&=~(1<<dops[i].rt1);
+ smrv_weak_next&=~(1<<dops[i].rt1);
+ }
+ break;
+ case C2LS:
+ if (dops[i].opcode==0x32) { // LWC2
+ smrv_strong_next&=~(1<<dops[i].rt1);
+ smrv_weak_next&=~(1<<dops[i].rt1);
+ }
+ break;
+ }
+#if 0
+ int r=4;
+ printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
+ ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
+#endif
+}
+
+static void ujump_assemble(int i, const struct regstat *i_regs);
+static void rjump_assemble(int i, const struct regstat *i_regs);
+static void cjump_assemble(int i, const struct regstat *i_regs);
+static void sjump_assemble(int i, const struct regstat *i_regs);
+static void pagespan_assemble(int i, const struct regstat *i_regs);
+
+static int assemble(int i, const struct regstat *i_regs, int ccadj_)
+{
+ int ds = 0;
+ switch (dops[i].itype) {
+ case ALU:
+ alu_assemble(i, i_regs);
+ break;
+ case IMM16:
+ imm16_assemble(i, i_regs);
+ break;
+ case SHIFT:
+ shift_assemble(i, i_regs);
+ break;
+ case SHIFTIMM:
+ shiftimm_assemble(i, i_regs);
+ break;
+ case LOAD:
+ load_assemble(i, i_regs, ccadj_);
+ break;
+ case LOADLR:
+ loadlr_assemble(i, i_regs, ccadj_);
+ break;
+ case STORE:
+ store_assemble(i, i_regs, ccadj_);
+ break;
+ case STORELR:
+ storelr_assemble(i, i_regs, ccadj_);
+ break;
+ case COP0:
+ cop0_assemble(i, i_regs, ccadj_);
+ break;
+ case COP1:
+ cop1_assemble(i, i_regs);
+ break;
+ case C1LS:
+ c1ls_assemble(i, i_regs);
+ break;
+ case COP2:
+ cop2_assemble(i, i_regs);
+ break;
+ case C2LS:
+ c2ls_assemble(i, i_regs, ccadj_);
+ break;
+ case C2OP:
+ c2op_assemble(i, i_regs);
+ break;
+ case MULTDIV:
+ multdiv_assemble(i, i_regs);
+ multdiv_prepare_stall(i, i_regs, ccadj_);
+ break;