RSP LLE plugin. Compile and run (slowly, eat 50% CPU) on the OpenPandora
[mupen64plus-pandora.git] / source / mupen64plus-rsp-z64 / src / rsp_recomp.cpp
CommitLineData
fc5d46b4 1/*
2 * z64
3 *
4 * Copyright (C) 2007 ziggy
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20**/
21
22#include "rsp_recomp.h"
23#include <assert.h>
24
25#define GENDEBUG
26
27struct gen_t {
28 UINT32 crc;
29 int lbc;
30 rsp_bc_t * bc;
31#ifdef GENDEBUG
32 char name[32];
33#endif
34};
35
36struct opinfo_t {
37 int visit, labeled;
38 int label;
39
40 unsigned int nbgen;
41 unsigned int szgen;
42 gen_t * gentable;
43 gen_t * curgen;
44};
45
46struct branch_t {
47 int start, end;
48};
49
50static int curvisit;
51static opinfo_t opinfo[0x1000/4];
52static int jumps[0x1000];
53static unsigned int nb_branches;
54static branch_t branches[256];
55static unsigned int nb_labels;
56static int labels[256];
57
58#define OPI(pc) opinfo[(pc)>>2]
59/*inline*/ void SETLABEL(int pc) {
60 //printf("%x\n", pc);
61 //pc &= 0xfff;
62 assert(pc >= 0 && pc < 0x1000);
63 if (OPI(pc).labeled != curvisit) {
64 labels[nb_labels] = pc;
65 OPI(pc).label = nb_labels++;
66 assert(nb_labels < sizeof(labels)/sizeof(labels[0]));
67 OPI(pc).labeled = curvisit;
68 }
69}
70
71#define ABS(addr) (((addr) << 2) & 0xfff)
72#define REL(offset) ((pc + ((offset) << 2)) & 0xfff)
73
74static UINT32 prep_gen(int pc, UINT32 crc, int & len)
75{
76 UINT32 op;
77 int br = 0;
78
79 branches[nb_branches].start = pc;
80
81 while ( !br )
82 {
83 if (OPI(pc).visit == curvisit) {
84 SETLABEL((pc)&0xfff);
85 SETLABEL((pc+4)&0xfff);
86 break;
87 }
88
89 OPI(pc).visit = curvisit;
90
91 op = ROPCODE(pc);
92 crc = ((crc<<1)|(crc>>31))^op^pc;
93 pc = (pc+4)&0xfff;
94 len++;
95
96 switch (op >> 26)
97 {
98 case 0x00: /* SPECIAL */
99 {
100 switch (op & 0x3f)
101 {
102 case 0x08: /* JR */
103 br = 1;
104 break;
105 case 0x09: /* JALR */
106 //br = 1;
107 break;
108 case 0x0d: /* BREAK */
109 br = 1;
110 break;
111 }
112 break;
113 }
114
115 case 0x01: /* REGIMM */
116 {
117 switch (RTREG)
118 {
119 case 0x00: /* BLTZ */
120 case 0x01: /* BGEZ */
121 SETLABEL(REL(SIMM16));
122 break;
123 case 0x11: /* BGEZAL */
124 //br = 1;
125 break;
126 }
127 break;
128 }
129
130 case 0x02: /* J */
131 SETLABEL(ABS(UIMM26));
132 br = 1;
133 break;
134 case 0x04: /* BEQ */
135 case 0x05: /* BNE */
136 case 0x06: /* BLEZ */
137 case 0x07: /* BGTZ */
138 SETLABEL(REL(SIMM16));
139 break;
140 case 0x03: /* JAL */
141 //SETLABEL(ABS(UIMM26));
142 //br = 1;
143 break;
144 }
145
146 }
147
148 branches[nb_branches++].end = pc;
149 assert(nb_branches < sizeof(branches)/sizeof(branches[0]));
150
151 return crc;
152}
153
154static void rsp_gen(int pc)
155{
156 unsigned int i;
157
158 curvisit++;
159 if (!curvisit) {
160 // we looped, reset all visit counters
161 for (i=0; i<0x1000/4; i++) {
162 opinfo[i].visit = 0;
163 opinfo[i].labeled = 0;
164 }
165 curvisit++;
166 }
167
168 nb_branches = 0;
169 nb_labels = 0;
170
171 int len = 0;
172 UINT32 crc = prep_gen(pc, 0, len);
173
174 for (i=0; i<nb_labels; i++) {
175 if (OPI(labels[i]).visit != curvisit)
176 crc = prep_gen(labels[i], crc, len);
177 }
178
179 opinfo_t * opi = &OPI(pc);
180 if (opi->gentable) {
181 for (i=0; i<opi->nbgen; i++)
182 if (opi->gentable[i].crc == crc) {
183 opi->curgen = opi->gentable + i;
184 return;
185 }
186 }
187 if (opi->nbgen >= opi->szgen) {
188 if (opi->szgen)
189 opi->szgen *= 2;
190 else
191 opi->szgen = 4;
192 opi->gentable = (gen_t *) realloc(opi->gentable, sizeof(gen_t)*(opi->szgen));
193 }
194 gen_t * gen;
195 gen = opi->gentable + opi->nbgen++;
196 gen->crc = crc;
197 opi->curgen = gen;
198
199 // convert to bytecode
200 unsigned int lbc = 0;
201 static rsp_bc_t bc[0x1000*2+10];
202 for (i=0; i<nb_branches; i++) {
203 int pc;
204 int loopc;
205 int cont = 1;
206 rsp_opinfo_t delayed;
207 delayed.op = 0;
208 for (pc = branches[i].start; cont || delayed.op; pc = (pc+4)&0xfff) {
209 UINT32 op = ROPCODE(pc);
210
211 // int realpc = pc;
212 // char s[128];
213 // rsp_dasm_one(s, realpc, op);
214 // printf("%d %3x\t%s\n", lbc, realpc, s);
215
216 rsp_opinfo_t info;
217 rsp_get_opinfo(op, &info);
218 if ((info.flags & RSP_OPINFO_JUMP) && !cont)
219 info.flags = 0;
220 else {
221 int nop = 0;
222 switch (info.op2) {
223 case RSP_SLL:
224 case RSP_SRL:
225 case RSP_SRA:
226 if (RDREG == RTREG && SHIFT == 0)
227 nop = 1;
228 break;
229 }
230 if (cont)
231 jumps[pc] = lbc;
232 if (!nop) {
233 bc[lbc].op = op;
234 bc[lbc].op2 = info.op2;
235 bc[lbc].flags = info.flags | (((pc&0xffc)<<5)-2) | (!cont? (1<<15):0);
236 lbc++;
237 }
238 loopc = (pc+4)&0xfff;
239 }
240 if (delayed.op) {
241 int addop = 0;
242 const UINT32 op = delayed.op;
243 switch (delayed.op2) {
244 case RSP_BLTZ:
245 case RSP_BGEZ:
246 case RSP_BEQ:
247 case RSP_BNE:
248 case RSP_BLEZ:
249 case RSP_BGTZ:
250 addop = RSP_CONDJUMPLOCAL;
251 bc[lbc].flags = (pc + (SIMM16<<2))&0xfff; // address to be resolved later
252 break;
253 case RSP_J:
254 addop = RSP_JUMPLOCAL;
255 bc[lbc].flags = (UIMM26<<2)&0xfff; // address to be resolved later
256 break;
257 case RSP_BGEZAL:
258 addop = RSP_CONDJUMP;
259 break;
260 case RSP_JAL:
261 case RSP_JR:
262 case RSP_JALR:
263 addop = RSP_JUMP;
264 break;
265 }
266 bc[lbc].op = delayed.op;
267 bc[lbc].op2 = addop;
268 lbc++;
269 }
270 if (info.flags & RSP_OPINFO_JUMP) {
271 delayed = info;
272 } else
273 delayed.op = 0;
274 if (((pc + 4)&0xfff) == branches[i].end)
275 cont = 0;
276 }
277 if (bc[lbc-1].op2 != RSP_JUMP &&
278 bc[lbc-1].op2 != RSP_JUMPLOCAL &&
279 bc[lbc-1].op2 != RSP_BREAK &&
280 bc[lbc-1].op2 != RSP_STOP) {
281
282 bc[lbc].op = 0;
283 bc[lbc].op2 = RSP_LOOP;
284 bc[lbc].flags = loopc; // address to be resolved later
285 lbc++;
286 }
287 }
288
289 // resolve local jumps
290 for (i=0; i<lbc; i++) {
291 // printf("%d %x\n", i, bc[i].op2);
292 // if (bc[i].op2 < RSP_CONTROL_OFFS) {
293 // int realpc = (bc[i].flags>>3)&0xffc;
294 // char s[128];
295 // rsp_dasm_one(s, realpc, bc[i].op);
296 // printf("%3x\t%s\n", realpc, s);
297 // }
298 switch (bc[i].op2) {
299 case RSP_JUMPLOCAL:
300 case RSP_CONDJUMPLOCAL:
301 case RSP_LOOP:
302 {
303 // int pc;
304 // for (pc = 0; pc<lbc; pc++)
305 // if (bc[pc].op2 < RSP_CONTROL_OFFS &&
306 // !(bc[pc].flags & (1<<15)) &&
307 // ((bc[pc].flags>>5)<<2) == bc[i].flags)
308 // break;
309 // assert(pc < lbc);
310 // bc[i].flags = pc<<5;
311 bc[i].flags = jumps[bc[i].flags]<<5;
312 break;
313 }
314 }
315 }
316
317 gen->lbc = lbc;
318 gen->bc = (rsp_bc_t *) malloc(sizeof(rsp_bc_t)*lbc);
319 memcpy(gen->bc, bc, sizeof(rsp_bc_t)*lbc);
320}
321
322void rsp_invalidate(int begin, int len)
323{
324 //printf("invalidate %x %x\n", begin, len);
325 begin = 0; len = 0x1000;
326 assert(begin+len<=0x1000);
327 while (len > 0) {
328 OPI(begin).curgen = 0;
329 begin += 4;
330 len -= 4;
331 }
332 rsp.inval_gen = 1;
333}
334
335inline void rsp_execute_one(RSP_REGS & rsp, const UINT32 op)
336{
337 switch (op >> 26)
338 {
339 case 0x12: /* COP2 */
340 {
341 handle_vector_ops(op);
342 break;
343 }
344
345 case 0x32: /* LWC2 */ handle_lwc2(op); break;
346 case 0x3a: /* SWC2 */ handle_swc2(op); break;
347
348 default:
349 {
350 unimplemented_opcode(op);
351 break;
352 }
353 }
354}
355
356static int cond;
357static int run(RSP_REGS & rsp, gen_t * gen)
358{
359 int pc = 0;
360
361 cond = 0;
362 for ( ; ; ) {
363 const rsp_bc_t & bc = gen->bc[pc];
364 const UINT32 op = bc.op;
365 const int op2 = bc.op2;
366
367 // if (op2 < RSP_CONTROL_OFFS) {
368 // int realpc = (bc.flags>>3)&0xffc;
369 // char s[128];
370 // rsp_dasm_one(s, realpc, op);
371 // fprintf(stderr, "%3x\t%s\n", realpc, s);
372 // }
373
374 pc++;
375 switch (op2) {
376 case RSP_LOOP:
377 pc = bc.flags>>5;
378 break;
379 case RSP_JUMPLOCAL:
380 case RSP_CONDJUMPLOCAL:
381 if (cond) {
382 pc = bc.flags>>5;
383 cond = 0;
384 }
385 break;
386 case RSP_JUMP:
387 case RSP_CONDJUMP:
388 if (cond) {
389 return 0;
390 }
391 break;
392
393 #define _LINK(l) rsp.r[l] = ((bc.flags >>3)+8)&0xffc
394 #define _JUMP_PC(a) { cond=1; rsp.nextpc = ((a) & 0xfff); }
395 #define _JUMP_PC_L(a, l) { _LINK(l); _JUMP_PC(a); }
396 #define _JUMP_REL(a) _JUMP_PC(((bc.flags >>3)+4+(a<<2))&0xffc)
397 #define _JUMP_REL_L(a, l) _JUMP_PC_L(((bc.flags >>3)+4+(a<<2))&0xffc, l)
398
399 case RSP_SLL: if (RDREG) RDVAL = (UINT32)RTVAL << SHIFT; break;
400 case RSP_SRL: if (RDREG) RDVAL = (UINT32)RTVAL >> SHIFT; break;
401 case RSP_SRA: if (RDREG) RDVAL = (INT32)RTVAL >> SHIFT; break;
402 case RSP_SLLV: if (RDREG) RDVAL = (UINT32)RTVAL << (RSVAL & 0x1f); break;
403 case RSP_SRLV: if (RDREG) RDVAL = (UINT32)RTVAL >> (RSVAL & 0x1f); break;
404 case RSP_SRAV: if (RDREG) RDVAL = (INT32)RTVAL >> (RSVAL & 0x1f); break;
405 case RSP_JR: _JUMP_PC(RSVAL); break;
406 case RSP_JALR: _JUMP_PC_L(RSVAL, RDREG); break;
407 case RSP_BREAK:
408 {
409 *z64_rspinfo.SP_STATUS_REG |= (SP_STATUS_HALT | SP_STATUS_BROKE );
410 if ((*z64_rspinfo.SP_STATUS_REG & SP_STATUS_INTR_BREAK) != 0 ) {
411 *z64_rspinfo.MI_INTR_REG |= 1;
412 z64_rspinfo.CheckInterrupts();
413 }
414 return 1;
415 }
416 case RSP_ADD: if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break;
417 case RSP_ADDU: if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break;
418 case RSP_SUB: if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break;
419 case RSP_SUBU: if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break;
420 case RSP_AND: if (RDREG) RDVAL = RSVAL & RTVAL; break;
421 case RSP_OR: if (RDREG) RDVAL = RSVAL | RTVAL; break;
422 case RSP_XOR: if (RDREG) RDVAL = RSVAL ^ RTVAL; break;
423 case RSP_NOR: if (RDREG) RDVAL = ~(RSVAL | RTVAL); break;
424 case RSP_SLT: if (RDREG) RDVAL = (INT32)RSVAL < (INT32)RTVAL; break;
425 case RSP_SLTU: if (RDREG) RDVAL = (UINT32)RSVAL < (UINT32)RTVAL; break;
426 case RSP_BLTZ: if ((INT32)(RSVAL) < 0) cond = 1; break;
427 case RSP_BGEZ: if ((INT32)(RSVAL) >= 0) cond = 1; break;
428 case RSP_BGEZAL: _LINK(31); if ((INT32)(RSVAL) >= 0) _JUMP_REL(SIMM16); break;
429 case RSP_J: cond = 1; break;
430 case RSP_JAL: _JUMP_PC_L(UIMM26<<2, 31); break;
431 case RSP_BEQ: if (RSVAL == RTVAL) cond = 1; break;
432 case RSP_BNE: if (RSVAL != RTVAL) cond = 1; break;
433 case RSP_BLEZ: if ((INT32)RSVAL <= 0) cond = 1; break;
434 case RSP_BGTZ: if ((INT32)RSVAL > 0) cond = 1; break;
435 case RSP_ADDI: if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break;
436 case RSP_ADDIU: if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break;
437 case RSP_SLTI: if (RTREG) RTVAL = (INT32)(RSVAL) < ((INT32)SIMM16); break;
438 case RSP_SLTIU: if (RTREG) RTVAL = (UINT32)(RSVAL) < (UINT32)((INT32)SIMM16); break;
439 case RSP_ANDI: if (RTREG) RTVAL = RSVAL & UIMM16; break;
440 case RSP_ORI: if (RTREG) RTVAL = RSVAL | UIMM16; break;
441 case RSP_XORI: if (RTREG) RTVAL = RSVAL ^ UIMM16; break;
442 case RSP_LUI: if (RTREG) RTVAL = UIMM16 << 16; break;
443
444 case RSP_COP0:
445 {
446 switch ((op >> 21) & 0x1f)
447 {
448 case 0x00: /* MFC0 */
449 if (RTREG)
450 RTVAL = get_cop0_reg(RDREG);
451 break;
452 case 0x04: /* MTC0 */
453 set_cop0_reg(RDREG, RTVAL);
454 if (rsp.inval_gen) {
455 rsp.inval_gen = 0;
456 sp_pc = ((bc.flags >>3) + 4)&0xffc;
457 return 2;
458 }
459 break;
460 default:
461 log(M64MSG_WARNING, "unimplemented cop0 %x (%x)\n", (op >> 21) & 0x1f, op);
462 break;
463 }
464 break;
465 }
466
467 case RSP_MFC2:
468 {
469 // 31 25 20 15 10 6 0
470 // ---------------------------------------------------
471 // | 010010 | 00000 | TTTTT | DDDDD | IIII | 0000000 |
472 // ---------------------------------------------------
473 //
474
475 int el = (op >> 7) & 0xf;
476 UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf);
477 UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf);
478 if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2));
479 break;
480 }
481 case RSP_CFC2:
482 {
483 // 31 25 20 15 10 0
484 // ------------------------------------------------
485 // | 010010 | 00010 | TTTTT | DDDDD | 00000000000 |
486 // ------------------------------------------------
487 //
488
489 // VP to sign extend or to not sign extend ?
490 //if (RTREG) RTVAL = (INT16)rsp.flag[RDREG];
491 if (RTREG) RTVAL = rsp.flag[RDREG];
492 break;
493 }
494 case RSP_MTC2:
495 {
496 // 31 25 20 15 10 6 0
497 // ---------------------------------------------------
498 // | 010010 | 00100 | TTTTT | DDDDD | IIII | 0000000 |
499 // ---------------------------------------------------
500 //
501
502 int el = (op >> 7) & 0xf;
503 VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff;
504 VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff;
505 break;
506 }
507 case RSP_CTC2:
508 {
509 // 31 25 20 15 10 0
510 // ------------------------------------------------
511 // | 010010 | 00110 | TTTTT | DDDDD | 00000000000 |
512 // ------------------------------------------------
513 //
514
515 rsp.flag[RDREG] = RTVAL & 0xffff;
516 break;
517 }
518 case RSP_LB: if (RTREG) RTVAL = (INT32)(INT8)READ8(RSVAL + SIMM16); break;
519 case RSP_LH: if (RTREG) RTVAL = (INT32)(INT16)READ16(RSVAL + SIMM16); break;
520 case RSP_LW: if (RTREG) RTVAL = READ32(RSVAL + SIMM16); break;
521 case RSP_LBU: if (RTREG) RTVAL = (UINT8)READ8(RSVAL + SIMM16); break;
522 case RSP_LHU: if (RTREG) RTVAL = (UINT16)READ16(RSVAL + SIMM16); break;
523 case RSP_SB: WRITE8(RSVAL + SIMM16, RTVAL); break;
524 case RSP_SH: WRITE16(RSVAL + SIMM16, RTVAL); break;
525 case RSP_SW: WRITE32(RSVAL + SIMM16, RTVAL); break;
526
527 default:
528 switch (op >> 26)
529 {
530 case 0x12: /* COP2 */
531 handle_vector_ops(op);
532 break;
533 case 0x32: /* LWC2 */
534 handle_lwc2(op);
535 break;
536 case 0x3a: /* SWC2 */
537 handle_swc2(op);
538 break;
539 }
540 }
541 }
542}
543
544int rsp_gen_cache_hit;
545int rsp_gen_cache_miss;
546int rsp_jump(int pc)
547{
548 pc &= 0xfff;
549 sp_pc = pc;
550 rsp.nextpc = ~0;
551 opinfo_t * opi = &OPI(pc);
552 gen_t * gen = opi->curgen;
553 rsp_gen_cache_hit++;
554 if (!gen) {
555 rsp_gen_cache_miss++;
556 rsp_gen(pc);
557 }
558 gen = opi->curgen;
559 //fprintf(stderr, "rsp_jump %x (%s)\n", pc, gen->name);
560
561 int res = run(rsp, gen);
562
563 //fprintf(stderr, "r31 %x from %x nextpc %x pc %x res %d (%s)\n", rsp.r[31], pc, rsp.nextpc, sp_pc, res, gen->name);
564 if (rsp.nextpc != ~0U)
565 {
566 sp_pc = (rsp.nextpc & 0xfff);
567 rsp.nextpc = ~0U;
568 }
569 else
570 {
571 //sp_pc = ((sp_pc+4)&0xfff);
572 }
573 return res;
574}