4 * Copyright (C) 2007 ziggy
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 Nintendo/SGI Reality Signal Processor (RSP) emulator
25 Written by Ville Linde
29 #include "rsp_opinfo.h"
30 #include <math.h> // sqrt
36 #define LOG_INSTRUCTION_EXECUTION 0
40 #define PRINT_VECREG(x) printf("V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X\n", (x), \
41 (UINT16)VREG_S((x),0), (UINT16)VREG_S((x),1), \
42 (UINT16)VREG_S((x),2), (UINT16)VREG_S((x),3), \
43 (UINT16)VREG_S((x),4), (UINT16)VREG_S((x),5), \
44 (UINT16)VREG_S((x),6), (UINT16)VREG_S((x),7))
47 extern offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op);
49 #if LOG_INSTRUCTION_EXECUTION
50 static FILE *exec_output;
54 // INLINE void sp_set_status(UINT32 status)
60 // cpunum_set_input_line(1, INPUT_LINE_HALT, ASSERT_LINE);
61 // rsp_sp_status |= SP_STATUS_HALT;
65 // rsp_sp_status |= SP_STATUS_BROKE;
67 // if (rsp_sp_status & SP_STATUS_INTR_BREAK)
69 // signal_rcp_interrupt(SP_INTERRUPT);
116 uint64_t rsptimings[512];
121 #define JUMP_ABS(addr) { rsp.nextpc = 0x04001000 | (((addr) << 2) & 0xfff); }
122 #define JUMP_ABS_L(addr,l) { rsp.nextpc = 0x04001000 | (((addr) << 2) & 0xfff); rsp.r[l] = sp_pc + 4; }
123 #define JUMP_REL(offset) { rsp.nextpc = 0x04001000 | ((sp_pc + ((offset) << 2)) & 0xfff); }
124 #define JUMP_REL_L(offset,l) { rsp.nextpc = 0x04001000 | ((sp_pc + ((offset) << 2)) & 0xfff); rsp.r[l] = sp_pc + 4; }
125 #define JUMP_PC(addr) { rsp.nextpc = 0x04001000 | ((addr) & 0xfff); }
126 #define JUMP_PC_L(addr,l) { rsp.nextpc = 0x04001000 | ((addr) & 0xfff); rsp.r[l] = sp_pc + 4; }
127 #define LINK(l) rsp.r[l] = sp_pc + 4
130 #define VDREG ((op >> 6) & 0x1f)
131 #define VS1REG ((op >> 11) & 0x1f)
132 #define VS2REG ((op >> 16) & 0x1f)
133 #define EL ((op >> 21) & 0xf)
135 #define S_VREG_B(offset) (((15 - (offset)) & 0x07) << 3)
136 #define S_VREG_S(offset) (((7 - (offset)) & 0x03) << 4)
137 #define S_VREG_L(offset) (((3 - (offset)) & 0x01) << 5)
139 #define M_VREG_B(offset) ((UINT64)0x00FF << S_VREG_B(offset))
140 #define M_VREG_S(offset) ((UINT64)0x0000FFFFul << S_VREG_S(offset))
141 #define M_VREG_L(offset) ((UINT64)0x00000000FFFFFFFFull << S_VREG_L(offset))
143 #define R_VREG_B(reg, offset) ((rsp.v[(reg)].d[(15 - (offset)) >> 3] >> S_VREG_B(offset)) & 0x00FF)
144 #define R_VREG_S(reg, offset) (INT16)((rsp.v[(reg)].d[(7 - (offset)) >> 2] >> S_VREG_S(offset)) & 0x0000FFFFul)
145 #define R_VREG_L(reg, offset) ((rsp.v[(reg)].d[(3 - (offset)) >> 1] >> S_VREG_L(offset)) & 0x00000000FFFFFFFFull)
147 #define W_VREG_B(reg, offset, val) (rsp.v[(reg)].d[(15 - (offset)) >> 3] = (rsp.v[(reg)].d[(15 - (offset)) >> 3] & ~M_VREG_B(offset)) | (M_VREG_B(offset) & ((UINT64)(val) << S_VREG_B(offset))))
148 #define W_VREG_S(reg, offset, val) (rsp.v[(reg)].d[(7 - (offset)) >> 2] = (rsp.v[(reg)].d[(7 - (offset)) >> 2] & ~M_VREG_S(offset)) | (M_VREG_S(offset) & ((UINT64)(val) << S_VREG_S(offset))))
149 #define W_VREG_L(reg, offset, val) (rsp.v[(reg)].d[(3 - (offset)) >> 1] = (rsp.v[(reg)].d[(3 - (offset)) >> 1] & ~M_VREG_L(offset)) | (M_VREG_L(offset) & ((UINT64)(val) << S_VREG_L(offset))))
152 #define VEC_EL_1(x,z) (z)
153 #define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)])
155 #define ACCUM(x) rsp.accum[((x))].q
157 #define S_ACCUM_H (3 << 4)
158 #define S_ACCUM_M (2 << 4)
159 #define S_ACCUM_L (1 << 4)
161 #define M_ACCUM_H (((INT64)0x0000FFFF) << S_ACCUM_H)
162 #define M_ACCUM_M (((INT64)0x0000FFFF) << S_ACCUM_M)
163 #define M_ACCUM_L (((INT64)0x0000FFFF) << S_ACCUM_L)
165 #define R_ACCUM_H(x) ((INT16)((ACCUM(x) >> S_ACCUM_H) & 0x00FFFF))
166 #define R_ACCUM_M(x) ((INT16)((ACCUM(x) >> S_ACCUM_M) & 0x00FFFF))
167 #define R_ACCUM_L(x) ((INT16)((ACCUM(x) >> S_ACCUM_L) & 0x00FFFF))
169 #define W_ACCUM_H(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_H) | (M_ACCUM_H & ((INT64)(y) << S_ACCUM_H)))
170 #define W_ACCUM_M(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_M) | (M_ACCUM_M & ((INT64)(y) << S_ACCUM_M)))
171 #define W_ACCUM_L(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_L) | (M_ACCUM_L & ((INT64)(y) << S_ACCUM_L)))
176 static int rsp_icount;
179 #define rsp_sp_status (*(UINT32*)z64_rspinfo.SP_STATUS_REG)
180 #define sp_mem_addr (*(UINT32*)z64_rspinfo.SP_MEM_ADDR_REG)
181 #define sp_dram_addr (*(UINT32*)z64_rspinfo.SP_DRAM_ADDR_REG)
182 #define sp_semaphore (*(UINT32*)z64_rspinfo.SP_SEMAPHORE_REG)
184 #define sp_dma_rlength (*(UINT32*)z64_rspinfo.SP_RD_LEN_REG)
185 #define sp_dma_wlength (*(UINT32*)z64_rspinfo.SP_WR_LEN_REG)
189 /*****************************************************************************/
191 UINT32 get_cop0_reg(int reg)
193 if (reg >= 0 && reg < 8)
195 return sp_read_reg(reg);
197 else if (reg >= 8 && reg < 16)
199 return n64_dp_reg_r(reg - 8, 0x00000000);
203 log(M64MSG_ERROR, "RSP: get_cop0_reg: %d", reg);
208 void set_cop0_reg(int reg, UINT32 data)
210 if (reg >= 0 && reg < 8)
212 sp_write_reg(reg, data);
214 else if (reg >= 8 && reg < 16)
216 n64_dp_reg_w(reg - 8, data, 0x00000000);
220 log(M64MSG_ERROR, "RSP: set_cop0_reg: %d, %08X\n", reg, data);
224 static int got_unimp;
225 void unimplemented_opcode(UINT32 op)
230 rsp_dasm_one(string, rsp.ppc, op);
231 printf("%08X: %s\n", rsp.ppc, string);
239 dasm = fopen("rsp_disasm.txt", "wt");
241 for (i=0; i < 0x1000; i+=4)
243 UINT32 opcode = ROPCODE(0x04001000 + i);
244 rsp_dasm_one(string, 0x04001000 + i, opcode);
245 fprintf(dasm, "%08X: %08X %s\n", 0x04001000 + i, opcode, string);
254 dmem = fopen("rsp_dmem.bin", "wb");
256 for (i=0; i < 0x1000; i++)
258 fputc(READ8(0x04000000 + i), dmem);
264 log(M64MSG_ERROR, "RSP: unknown opcode %02X (%d) (%08X) at %08X\n", op >> 26, op >> 26, op, rsp.ppc);
267 /*****************************************************************************/
269 const int vector_elements_1[16][8] =
271 { 0, 1, 2, 3, 4, 5, 6, 7 }, // none
272 { 0, 1, 2, 3, 4, 5, 6 ,7 }, // ???
273 { 1, 3, 5, 7, 0, 2, 4, 6 }, // 0q
274 { 0, 2, 4, 6, 1, 3, 5, 7 }, // 1q
275 { 1, 2, 3, 5, 6, 7, 0, 4 }, // 0h
276 { 0, 2, 3, 4, 6, 7, 1, 5 }, // 1h
277 { 0, 1, 3, 4, 5, 7, 2, 6 }, // 2h
278 { 0, 1, 2, 4, 5, 6, 3, 7 }, // 3h
279 { 1, 2, 3, 4, 5, 6, 7, 0 }, // 0
280 { 0, 2, 3, 4, 5, 6, 7, 1 }, // 1
281 { 0, 1, 3, 4, 5, 6, 7, 2 }, // 2
282 { 0, 1, 2, 4, 5, 6, 7, 3 }, // 3
283 { 0, 1, 2, 3, 5, 6, 7, 4 }, // 4
284 { 0, 1, 2, 3, 4, 6, 7, 5 }, // 5
285 { 0, 1, 2, 3, 4, 5, 7, 6 }, // 6
286 { 0, 1, 2, 3, 4, 5, 6, 7 }, // 7
289 const int vector_elements_2[16][8] =
291 { 0, 1, 2, 3, 4, 5, 6, 7 }, // none
292 { 0, 1, 2, 3, 4, 5, 6, 7 }, // ???
293 { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q
294 { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q
295 { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h
296 { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h
297 { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h
298 { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h
299 { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0
300 { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1
301 { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2
302 { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3
303 { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4
304 { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5
305 { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6
306 { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7
309 void rsp_init(RSP_INFO info)
311 #if LOG_INSTRUCTION_EXECUTION
312 exec_output = fopen("rsp_execute.txt", "wt");
315 memset(&rsp, 0, sizeof(rsp));
318 sp_pc = 0; //0x4001000;
320 //rsp_invalidate(0, 0x1000);
329 void handle_lwc2(UINT32 op)
333 int dest = (op >> 16) & 0x1f;
334 int base = (op >> 21) & 0x1f;
335 int index = (op >> 7) & 0xf;
336 int offset = (op & 0x7f);
338 offset |= 0xffffffc0;
340 switch ((op >> 11) & 0x1f)
344 // 31 25 20 15 10 6 0
345 // --------------------------------------------------
346 // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset |
347 // --------------------------------------------------
349 // Load 1 byte to vector byte index
351 ea = (base) ? rsp.r[base] + offset : offset;
352 VREG_B(dest, index) = READ8(ea);
357 // 31 25 20 15 10 6 0
358 // --------------------------------------------------
359 // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset |
360 // --------------------------------------------------
362 // Loads 2 bytes starting from vector byte index
364 ea = (base) ? rsp.r[base] + (offset * 2) : (offset * 2);
368 // VP need mask i and ea ?
369 for (i=index; i < end; i++)
371 VREG_B(dest, i) = READ8(ea);
378 // 31 25 20 15 10 6 0
379 // --------------------------------------------------
380 // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset |
381 // --------------------------------------------------
383 // Loads 4 bytes starting from vector byte index
385 ea = (base) ? rsp.r[base] + (offset * 4) : (offset * 4);
389 // VP need mask i and ea ?
390 for (i=index; i < end; i++)
392 VREG_B(dest, i) = READ8(ea);
399 // 31 25 20 15 10 6 0
400 // --------------------------------------------------
401 // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset |
402 // --------------------------------------------------
404 // Loads 8 bytes starting from vector byte index
406 ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
410 // VP need mask i and ea ?
411 for (i=index; i < end; i++)
413 VREG_B(dest, i) = READ8(ea);
420 // 31 25 20 15 10 6 0
421 // --------------------------------------------------
422 // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset |
423 // --------------------------------------------------
425 // Loads up to 16 bytes starting from vector byte index
427 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
429 end = index + (16 - (ea & 0xf));
430 if (end > 16) end = 16;
431 for (i=index; i < end; i++)
433 VREG_B(dest, i) = READ8(ea);
440 // 31 25 20 15 10 6 0
441 // --------------------------------------------------
442 // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset |
443 // --------------------------------------------------
445 // Stores up to 16 bytes starting from right side until 16-byte boundary
447 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
449 index = 16 - ((ea & 0xf) - index);
452 //assert(index == 0);
454 for (i=index; i < end; i++)
456 VREG_B(dest, i) = READ8(ea);
463 // 31 25 20 15 10 6 0
464 // --------------------------------------------------
465 // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset |
466 // --------------------------------------------------
468 // Loads a byte as the upper 8 bits of each element
470 ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
472 for (i=0; i < 8; i++)
474 VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 8;
480 // 31 25 20 15 10 6 0
481 // --------------------------------------------------
482 // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset |
483 // --------------------------------------------------
485 // Loads a byte as the bits 14-7 of each element
487 ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
489 for (i=0; i < 8; i++)
491 VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 7;
497 // 31 25 20 15 10 6 0
498 // --------------------------------------------------
499 // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset |
500 // --------------------------------------------------
502 // Loads a byte as the bits 14-7 of each element, with 2-byte stride
504 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
506 for (i=0; i < 8; i++)
508 VREG_S(dest, i) = READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7;
514 // 31 25 20 15 10 6 0
515 // --------------------------------------------------
516 // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset |
517 // --------------------------------------------------
519 // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride
521 // fatalerror("RSP: LFV\n");
523 //if (index & 0x7) fatalerror("RSP: LFV: index = %d at %08X\n", index, rsp.ppc);
525 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
527 // not sure what happens if 16-byte boundary is crossed...
528 //if ((ea & 0xf) > 0) fatalerror("RSP: LFV: 16-byte boundary crossing at %08X, recheck this!\n", rsp.ppc);
530 end = (index >> 1) + 4;
532 for (i=index >> 1; i < end; i++)
534 VREG_S(dest, i) = READ8(ea) << 7;
541 // 31 25 20 15 10 6 0
542 // --------------------------------------------------
543 // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset |
544 // --------------------------------------------------
546 // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0
547 // after byte index 15
549 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
551 // not sure what happens if 16-byte boundary is crossed...
552 //if ((ea & 0xf) > 0) fatalerror("RSP: LWV: 16-byte boundary crossing at %08X, recheck this!\n", rsp.ppc);
554 end = (16 - index) + 16;
556 for (i=(16 - index); i < end; i++)
558 VREG_B(dest, i & 0xf) = READ8(ea);
565 // 31 25 20 15 10 6 0
566 // --------------------------------------------------
567 // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset |
568 // --------------------------------------------------
570 // Loads one element to maximum of 8 vectors, while incrementing element index
572 // FIXME: has a small problem with odd indices
580 element = 7 - (index >> 1);
582 //if (index & 1) fatalerror("RSP: LTV: index = %d\n", index);
584 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
586 ea = ((ea + 8) & ~0xf) + (index & 1);
587 for (i=vs; i < ve; i++)
589 element = ((8 - (index >> 1) + (i-vs)) << 1);
590 VREG_B(i, (element & 0xf)) = READ8(ea);
591 VREG_B(i, ((element+1) & 0xf)) = READ8(ea+1);
600 unimplemented_opcode(op);
606 void handle_swc2(UINT32 op)
611 int dest = (op >> 16) & 0x1f;
612 int base = (op >> 21) & 0x1f;
613 int index = (op >> 7) & 0xf;
614 int offset = (op & 0x7f);
616 offset |= 0xffffffc0;
618 switch ((op >> 11) & 0x1f)
622 // 31 25 20 15 10 6 0
623 // --------------------------------------------------
624 // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset |
625 // --------------------------------------------------
627 // Stores 1 byte from vector byte index
629 ea = (base) ? rsp.r[base] + offset : offset;
630 WRITE8(ea, VREG_B(dest, index));
635 // 31 25 20 15 10 6 0
636 // --------------------------------------------------
637 // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset |
638 // --------------------------------------------------
640 // Stores 2 bytes starting from vector byte index
642 ea = (base) ? rsp.r[base] + (offset * 2) : (offset * 2);
646 for (i=index; i < end; i++)
648 WRITE8(ea, VREG_B(dest, i));
655 // 31 25 20 15 10 6 0
656 // --------------------------------------------------
657 // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset |
658 // --------------------------------------------------
660 // Stores 4 bytes starting from vector byte index
662 ea = (base) ? rsp.r[base] + (offset * 4) : (offset * 4);
666 for (i=index; i < end; i++)
668 WRITE8(ea, VREG_B(dest, i));
675 // 31 25 20 15 10 6 0
676 // --------------------------------------------------
677 // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset |
678 // --------------------------------------------------
680 // Stores 8 bytes starting from vector byte index
682 ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
686 for (i=index; i < end; i++)
688 WRITE8(ea, VREG_B(dest, i));
695 // 31 25 20 15 10 6 0
696 // --------------------------------------------------
697 // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset |
698 // --------------------------------------------------
700 // Stores up to 16 bytes starting from vector byte index until 16-byte boundary
702 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
704 end = index + (16 - (ea & 0xf));
706 // printf("SQV %d\n", end-index);
709 for (i=index; i < end; i++)
711 WRITE8(ea, VREG_B(dest, i & 0xf));
718 // 31 25 20 15 10 6 0
719 // --------------------------------------------------
720 // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset |
721 // --------------------------------------------------
723 // Stores up to 16 bytes starting from right side until 16-byte boundary
726 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
728 end = index + (ea & 0xf);
729 o = (16 - (ea & 0xf)) & 0xf;
732 // printf("SRV %d\n", end-index);
735 for (i=index; i < end; i++)
737 WRITE8(ea, VREG_B(dest, ((i + o) & 0xf)));
744 // 31 25 20 15 10 6 0
745 // --------------------------------------------------
746 // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset |
747 // --------------------------------------------------
749 // Stores upper 8 bits of each element
751 ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
754 for (i=index; i < end; i++)
758 WRITE8(ea, VREG_B(dest, ((i & 0xf) << 1)));
762 WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7);
770 // 31 25 20 15 10 6 0
771 // --------------------------------------------------
772 // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset |
773 // --------------------------------------------------
775 // Stores bits 14-7 of each element
777 ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
780 for (i=index; i < end; i++)
784 WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7);
788 WRITE8(ea, VREG_B(dest, ((i & 0x7) << 1)));
796 // 31 25 20 15 10 6 0
797 // --------------------------------------------------
798 // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset |
799 // --------------------------------------------------
801 // Stores bits 14-7 of each element, with 2-byte stride
803 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
805 for (i=0; i < 8; i++)
807 UINT8 d = ((VREG_B(dest, ((index + (i << 1) + 0) & 0xf))) << 1) |
808 ((VREG_B(dest, ((index + (i << 1) + 1) & 0xf))) >> 7);
817 // 31 25 20 15 10 6 0
818 // --------------------------------------------------
819 // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset |
820 // --------------------------------------------------
822 // Stores bits 14-7 of upper or lower quad, with 4-byte stride
824 // FIXME: only works for index 0 and index 8
827 log(M64MSG_WARNING, "SFV: index = %d at %08X\n", index, rsp.ppc);
829 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
834 end = (index >> 1) + 4;
836 for (i=index >> 1; i < end; i++)
838 WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7);
845 // 31 25 20 15 10 6 0
846 // --------------------------------------------------
847 // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset |
848 // --------------------------------------------------
850 // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0
851 // after byte index 15
853 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
860 for (i=index; i < end; i++)
862 WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf));
869 // 31 25 20 15 10 6 0
870 // --------------------------------------------------
871 // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset |
872 // --------------------------------------------------
874 // Stores one element from maximum of 8 vectors, while incrementing element index
876 int element, eaoffset;
882 element = 8 - (index >> 1);
883 //if (index & 0x1) fatalerror("RSP: STV: index = %d at %08X\n", index, rsp.ppc);
885 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
887 //if (ea & 0x1) fatalerror("RSP: STV: ea = %08X at %08X\n", ea, rsp.ppc);
889 eaoffset = (ea & 0xf) + (element * 2);
892 for (i=vs; i < ve; i++)
894 WRITE16(ea + (eaoffset & 0xf), VREG_S(i, element & 0x7));
903 unimplemented_opcode(op);
909 #define U16MIN 0x0000
910 #define U16MAX 0xffff
912 #define S16MIN 0x8000
913 #define S16MAX 0x7fff
915 INLINE UINT16 SATURATE_ACCUM_U(int accum)
917 if ((INT16)ACCUM_H(accum) < 0)
919 if ((UINT16)(ACCUM_H(accum)) != 0xffff)
925 if ((INT16)ACCUM_M(accum) >= 0)
931 return ACCUM_L(accum);
937 if ((UINT16)(ACCUM_H(accum)) != 0)
943 if ((INT16)ACCUM_M(accum) < 0)
949 return ACCUM_L(accum);
957 INLINE UINT16 SATURATE_ACCUM_S(int accum)
959 if ((INT16)ACCUM_H(accum) < 0)
961 if ((UINT16)(ACCUM_H(accum)) != 0xffff)
965 if ((INT16)ACCUM_M(accum) >= 0)
968 return ACCUM_M(accum);
973 if ((UINT16)(ACCUM_H(accum)) != 0)
977 if ((INT16)ACCUM_M(accum) < 0)
980 return ACCUM_M(accum);
987 #define WRITEBACK_RESULT() \
989 VREG_S(VDREG, 0) = vres[0]; \
990 VREG_S(VDREG, 1) = vres[1]; \
991 VREG_S(VDREG, 2) = vres[2]; \
992 VREG_S(VDREG, 3) = vres[3]; \
993 VREG_S(VDREG, 4) = vres[4]; \
994 VREG_S(VDREG, 5) = vres[5]; \
995 VREG_S(VDREG, 6) = vres[6]; \
996 VREG_S(VDREG, 7) = vres[7]; \
1000 void handle_vector_ops(UINT32 op)
1006 // E = VS2 element type
1007 // S = VS1, Source vector 1
1008 // T = VS2, Source vector 2
1009 // D = Destination vector
1013 case 0x00: /* VMULF */
1015 // 31 25 24 20 15 10 5 0
1016 // ------------------------------------------------------
1017 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 |
1018 // ------------------------------------------------------
1020 // Multiplies signed integer by signed integer * 2
1022 for (i=0; i < 8; i++)
1024 int del = VEC_EL_1(EL, i);
1025 int sel = VEC_EL_2(EL, del);
1026 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1027 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1028 if (s1 == -32768 && s2 == -32768)
1032 ACCUM_M(del) = -32768;
1033 ACCUM_L(del) = -32768;
1038 INT64 r = s1 * s2 * 2;
1039 r += 0x8000; // rounding ?
1040 ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit
1041 ACCUM_M(del) = (INT16)(r >> 16);
1042 ACCUM_L(del) = (UINT16)(r);
1043 vres[del] = ACCUM_M(del);
1051 case 0x01: /* VMULU */
1053 // 31 25 24 20 15 10 5 0
1054 // ------------------------------------------------------
1055 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 |
1056 // ------------------------------------------------------
1059 for (i=0; i < 8; i++)
1061 int del = VEC_EL_1(EL, i);
1062 int sel = VEC_EL_2(EL, del);
1063 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1064 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1065 INT64 r = s1 * s2 * 2;
1066 r += 0x8000; // rounding ?
1068 ACCUM_H(del) = (UINT16)(r >> 32);
1069 ACCUM_M(del) = (UINT16)(r >> 16);
1070 ACCUM_L(del) = (UINT16)(r);
1076 else if (((INT16)(ACCUM_H(del)) ^ (INT16)(ACCUM_M(del))) < 0)
1082 vres[del] = ACCUM_M(del);
1089 case 0x04: /* VMUDL */
1091 // 31 25 24 20 15 10 5 0
1092 // ------------------------------------------------------
1093 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000100 |
1094 // ------------------------------------------------------
1096 // Multiplies unsigned fraction by unsigned fraction
1097 // Stores the higher 16 bits of the 32-bit result to accumulator
1098 // The low slice of accumulator is stored into destination element
1100 for (i=0; i < 8; i++)
1102 int del = VEC_EL_1(EL, i);
1103 int sel = VEC_EL_2(EL, del);
1104 UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del);
1105 UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel);
1110 ACCUM_L(del) = (UINT16)(r >> 16);
1112 vres[del] = ACCUM_L(del);
1118 case 0x05: /* VMUDM */
1120 // 31 25 24 20 15 10 5 0
1121 // ------------------------------------------------------
1122 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 |
1123 // ------------------------------------------------------
1125 // Multiplies signed integer by unsigned fraction
1126 // The result is stored into accumulator
1127 // The middle slice of accumulator is stored into destination element
1129 for (i=0; i < 8; i++)
1131 int del = VEC_EL_1(EL, i);
1132 int sel = VEC_EL_2(EL, del);
1133 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1134 INT32 s2 = (UINT16)VREG_S(VS2REG, sel); // not sign-extended
1137 ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit
1138 ACCUM_M(del) = (INT16)(r >> 16);
1139 ACCUM_L(del) = (UINT16)(r);
1141 vres[del] = ACCUM_M(del);
1148 case 0x06: /* VMUDN */
1150 // 31 25 24 20 15 10 5 0
1151 // ------------------------------------------------------
1152 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 |
1153 // ------------------------------------------------------
1155 // Multiplies unsigned fraction by signed integer
1156 // The result is stored into accumulator
1157 // The low slice of accumulator is stored into destination element
1159 for (i=0; i < 8; i++)
1161 int del = VEC_EL_1(EL, i);
1162 int sel = VEC_EL_2(EL, del);
1163 INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended
1164 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1167 ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit
1168 ACCUM_M(del) = (INT16)(r >> 16);
1169 ACCUM_L(del) = (UINT16)(r);
1171 vres[del] = ACCUM_L(del);
1177 case 0x07: /* VMUDH */
1179 // 31 25 24 20 15 10 5 0
1180 // ------------------------------------------------------
1181 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 |
1182 // ------------------------------------------------------
1184 // Multiplies signed integer by signed integer
1185 // The result is stored into highest 32 bits of accumulator, the low slice is zero
1186 // The highest 32 bits of accumulator is saturated into destination element
1188 for (i=0; i < 8; i++)
1190 int del = VEC_EL_1(EL, i);
1191 int sel = VEC_EL_2(EL, del);
1192 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1193 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1196 ACCUM_H(del) = (INT16)(r >> 16);
1197 ACCUM_M(del) = (UINT16)(r);
1200 if (r < -32768) r = -32768;
1201 if (r > 32767) r = 32767;
1202 vres[del] = (INT16)(r);
1208 case 0x08: /* VMACF */
1210 // 31 25 24 20 15 10 5 0
1211 // ------------------------------------------------------
1212 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 |
1213 // ------------------------------------------------------
1215 // Multiplies signed integer by signed integer * 2
1216 // The result is added to accumulator
1218 for (i=0; i < 8; i++)
1221 int del = VEC_EL_1(EL, i);
1222 int sel = VEC_EL_2(EL, del);
1223 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1224 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1227 ACCUM(del) += (INT64)(r) << 17;
1228 res = SATURATE_ACCUM_S(del);
1236 case 0x09: /* VMACU */
1238 // 31 25 24 20 15 10 5 0
1239 // ------------------------------------------------------
1240 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 |
1241 // ------------------------------------------------------
1244 for (i=0; i < 8; i++)
1247 int del = VEC_EL_1(EL, i);
1248 int sel = VEC_EL_2(EL, del);
1249 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1250 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1252 UINT32 r2 = (UINT16)ACCUM_L(del) + ((UINT16)(r1) * 2);
1253 UINT32 r3 = (UINT16)ACCUM_M(del) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16);
1255 ACCUM_L(del) = (UINT16)(r2);
1256 ACCUM_M(del) = (UINT16)(r3);
1257 ACCUM_H(del) += (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31);
1259 //res = SATURATE_ACCUM(del, 1, 0x0000, 0xffff);
1260 if ((INT16)ACCUM_H(del) < 0)
1266 if (ACCUM_H(del) != 0)
1272 if ((INT16)ACCUM_M(del) < 0)
1289 case 0x0c: /* VMADL */
1291 // 31 25 24 20 15 10 5 0
1292 // ------------------------------------------------------
1293 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 |
1294 // ------------------------------------------------------
1296 // Multiplies unsigned fraction by unsigned fraction
1297 // Adds the higher 16 bits of the 32-bit result to accumulator
1298 // The low slice of accumulator is stored into destination element
1300 for (i=0; i < 8; i++)
1303 int del = VEC_EL_1(EL, i);
1304 int sel = VEC_EL_2(EL, del);
1305 UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del);
1306 UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel);
1307 UINT32 r1 = s1 * s2;
1308 UINT32 r2 = (UINT16)ACCUM_L(del) + (r1 >> 16);
1309 UINT32 r3 = (UINT16)ACCUM_M(del) + (r2 >> 16);
1311 ACCUM_L(del) = (UINT16)(r2);
1312 ACCUM_M(del) = (UINT16)(r3);
1313 ACCUM_H(del) += (INT16)(r3 >> 16);
1315 res = SATURATE_ACCUM_U(del);
1323 case 0x0d: /* VMADM */
1325 // 31 25 24 20 15 10 5 0
1326 // ------------------------------------------------------
1327 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 |
1328 // ------------------------------------------------------
1330 // Multiplies signed integer by unsigned fraction
1331 // The result is added into accumulator
1332 // The middle slice of accumulator is stored into destination element
1334 for (i=0; i < 8; i++)
1337 int del = VEC_EL_1(EL, i);
1338 int sel = VEC_EL_2(EL, del);
1339 UINT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1340 UINT32 s2 = (UINT16)VREG_S(VS2REG, sel); // not sign-extended
1341 UINT32 r1 = s1 * s2;
1342 UINT32 r2 = (UINT16)ACCUM_L(del) + (UINT16)(r1);
1343 UINT32 r3 = (UINT16)ACCUM_M(del) + (r1 >> 16) + (r2 >> 16);
1345 ACCUM_L(del) = (UINT16)(r2);
1346 ACCUM_M(del) = (UINT16)(r3);
1347 ACCUM_H(del) += (UINT16)(r3 >> 16);
1348 if ((INT32)(r1) < 0)
1351 res = SATURATE_ACCUM_S(del);
1359 case 0x0e: /* VMADN */
1361 // 31 25 24 20 15 10 5 0
1362 // ------------------------------------------------------
1363 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001110 |
1364 // ------------------------------------------------------
1366 // Multiplies unsigned fraction by signed integer
1367 // The result is added into accumulator
1368 // The low slice of accumulator is stored into destination element
1371 for (i=0; i < 8; i++)
1373 int del = VEC_EL_1(EL, i);
1374 int sel = VEC_EL_2(EL, del);
1375 INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended
1376 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1377 ACCUM(del) += (INT64)(s1*s2)<<16;
1380 for (i=0; i < 8; i++)
1383 res = SATURATE_ACCUM_U(i);
1386 VREG_S(VDREG, i) = res;
1389 for (i=0; i < 8; i++)
1392 int del = VEC_EL_1(EL, i);
1393 int sel = VEC_EL_2(EL, del);
1394 INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended
1395 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1396 UINT32 r1 = s1 * s2;
1397 UINT32 r2 = (UINT16)ACCUM_L(del) + (UINT16)(r1);
1398 UINT32 r3 = (UINT16)ACCUM_M(del) + (r1 >> 16) + (r2 >> 16);
1400 ACCUM_L(del) = (UINT16)(r2);
1401 ACCUM_M(del) = (UINT16)(r3);
1402 ACCUM_H(del) += (UINT16)(r3 >> 16);
1403 if ((INT32)(r1) < 0)
1406 res = SATURATE_ACCUM_U(del);
1415 case 0x0f: /* VMADH */
1417 // 31 25 24 20 15 10 5 0
1418 // ------------------------------------------------------
1419 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 |
1420 // ------------------------------------------------------
1422 // Multiplies signed integer by signed integer
1423 // The result is added into highest 32 bits of accumulator, the low slice is zero
1424 // The highest 32 bits of accumulator is saturated into destination element
1427 for (i=0; i < 8; i++)
1429 int del = VEC_EL_1(EL, i);
1430 int sel = VEC_EL_2(EL, del);
1431 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1432 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1434 rsp.accum[del].l[1] += s1*s2;
1437 for (i=0; i < 8; i++)
1440 res = SATURATE_ACCUM_S(i);
1443 VREG_S(VDREG, i) = res;
1446 for (i=0; i < 8; i++)
1449 int del = VEC_EL_1(EL, i);
1450 int sel = VEC_EL_2(EL, del);
1451 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1452 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1455 ACCUM(del) += (INT64)(r) << 32;
1457 res = SATURATE_ACCUM_S(del);
1466 case 0x10: /* VADD */
1468 // 31 25 24 20 15 10 5 0
1469 // ------------------------------------------------------
1470 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 |
1471 // ------------------------------------------------------
1473 // Adds two vector registers and carry flag, the result is saturated to 32767
1475 // TODO: check VS2REG == VDREG
1477 for (i=0; i < 8; i++)
1479 int del = VEC_EL_1(EL, i);
1480 int sel = VEC_EL_2(EL, del);
1481 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1482 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1483 INT32 r = s1 + s2 + CARRY_FLAG(del);
1485 ACCUM_L(del) = (INT16)(r);
1487 if (r > 32767) r = 32767;
1488 if (r < -32768) r = -32768;
1489 vres[del] = (INT16)(r);
1492 CLEAR_CARRY_FLAGS();
1497 case 0x11: /* VSUB */
1499 // 31 25 24 20 15 10 5 0
1500 // ------------------------------------------------------
1501 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 |
1502 // ------------------------------------------------------
1504 // Subtracts two vector registers and carry flag, the result is saturated to -32768
1506 // TODO: check VS2REG == VDREG
1508 for (i=0; i < 8; i++)
1510 int del = VEC_EL_1(EL, i);
1511 int sel = VEC_EL_2(EL, del);
1512 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1513 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1514 INT32 r = s1 - s2 - CARRY_FLAG(del);
1516 ACCUM_L(del) = (INT16)(r);
1518 if (r > 32767) r = 32767;
1519 if (r < -32768) r = -32768;
1521 vres[del] = (INT16)(r);
1524 CLEAR_CARRY_FLAGS();
1529 case 0x13: /* VABS */
1531 // 31 25 24 20 15 10 5 0
1532 // ------------------------------------------------------
1533 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 |
1534 // ------------------------------------------------------
1536 // Changes the sign of source register 2 if source register 1 is negative and stores
1537 // the result to destination register
1539 for (i=0; i < 8; i++)
1541 int del = VEC_EL_1(EL, i);
1542 int sel = VEC_EL_2(EL, del);
1543 INT16 s1 = (INT16)VREG_S(VS1REG, del);
1544 INT16 s2 = (INT16)VREG_S(VS2REG, sel);
1566 ACCUM_L(del) = vres[del];
1572 case 0x14: /* VADDC */
1574 // 31 25 24 20 15 10 5 0
1575 // ------------------------------------------------------
1576 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 |
1577 // ------------------------------------------------------
1579 // Adds two vector registers, the carry out is stored into carry register
1581 // TODO: check VS2REG = VDREG
1584 CLEAR_CARRY_FLAGS();
1586 for (i=0; i < 8; i++)
1588 int del = VEC_EL_1(EL, i);
1589 int sel = VEC_EL_2(EL, del);
1590 INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del);
1591 INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel);
1594 vres[del] = (INT16)(r);
1595 ACCUM_L(del) = (INT16)(r);
1599 SET_CARRY_FLAG(del);
1606 case 0x15: /* VSUBC */
1608 // 31 25 24 20 15 10 5 0
1609 // ------------------------------------------------------
1610 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 |
1611 // ------------------------------------------------------
1613 // Subtracts two vector registers, the carry out is stored into carry register
1615 // TODO: check VS2REG = VDREG
1618 CLEAR_CARRY_FLAGS();
1620 for (i=0; i < 8; i++)
1622 int del = VEC_EL_1(EL, i);
1623 int sel = VEC_EL_2(EL, del);
1624 INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del);
1625 INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel);
1628 vres[del] = (INT16)(r);
1629 ACCUM_L(del) = (UINT16)(r);
1631 if ((UINT16)(r) != 0)
1637 SET_CARRY_FLAG(del);
1644 case 0x1d: /* VSAW */
1646 // 31 25 24 20 15 10 5 0
1647 // ------------------------------------------------------
1648 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 |
1649 // ------------------------------------------------------
1651 // Stores high, middle or low slice of accumulator to destination vector
1657 for (i=0; i < 8; i++)
1659 VREG_S(VDREG, i) = ACCUM_H(i);
1665 for (i=0; i < 8; i++)
1667 VREG_S(VDREG, i) = ACCUM_M(i);
1673 for (i=0; i < 8; i++)
1675 VREG_S(VDREG, i) = ACCUM_L(i);
1679 default: log(M64MSG_ERROR, "RSP: VSAW: el = %d\n", EL);
1684 case 0x20: /* VLT */
1686 // 31 25 24 20 15 10 5 0
1687 // ------------------------------------------------------
1688 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 |
1689 // ------------------------------------------------------
1691 // Sets compare flags if elements in VS1 are less than VS2
1692 // Moves the element in VS2 to destination vector
1696 for (i=0; i < 8; i++)
1698 int del = VEC_EL_1(EL, i);
1699 int sel = VEC_EL_2(EL, del);
1701 if (VREG_S(VS1REG, del) < VREG_S(VS2REG, sel))
1703 vres[del] = VREG_S(VS1REG, del);
1704 SET_COMPARE_FLAG(del);
1706 else if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel))
1708 vres[del] = VREG_S(VS1REG, del);
1709 if (ZERO_FLAG(del) != 0 && CARRY_FLAG(del) != 0)
1711 SET_COMPARE_FLAG(del);
1716 vres[del] = VREG_S(VS2REG, sel);
1719 ACCUM_L(del) = vres[del];
1723 CLEAR_CARRY_FLAGS();
1728 case 0x21: /* VEQ */
1730 // 31 25 24 20 15 10 5 0
1731 // ------------------------------------------------------
1732 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 |
1733 // ------------------------------------------------------
1735 // Sets compare flags if elements in VS1 are equal with VS2
1736 // Moves the element in VS2 to destination vector
1740 for (i=0; i < 8; i++)
1742 int del = VEC_EL_1(EL, i);
1743 int sel = VEC_EL_2(EL, del);
1745 vres[del] = VREG_S(VS2REG, sel);
1746 ACCUM_L(del) = vres[del];
1748 if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel))
1750 if (ZERO_FLAG(del) == 0)
1752 SET_COMPARE_FLAG(del);
1758 CLEAR_CARRY_FLAGS();
1763 case 0x22: /* VNE */
1765 // 31 25 24 20 15 10 5 0
1766 // ------------------------------------------------------
1767 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 |
1768 // ------------------------------------------------------
1770 // Sets compare flags if elements in VS1 are not equal with VS2
1771 // Moves the element in VS2 to destination vector
1775 for (i=0; i < 8; i++)
1777 int del = VEC_EL_1(EL, i);
1778 int sel = VEC_EL_2(EL, del);
1780 vres[del] = VREG_S(VS1REG, del);
1781 ACCUM_L(del) = vres[del];
1783 if (VREG_S(VS1REG, del) != VREG_S(VS2REG, sel))
1785 SET_COMPARE_FLAG(del);
1789 if (ZERO_FLAG(del) != 0)
1791 SET_COMPARE_FLAG(del);
1797 CLEAR_CARRY_FLAGS();
1802 case 0x23: /* VGE */
1804 // 31 25 24 20 15 10 5 0
1805 // ------------------------------------------------------
1806 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 |
1807 // ------------------------------------------------------
1809 // Sets compare flags if elements in VS1 are greater or equal with VS2
1810 // Moves the element in VS2 to destination vector
1814 for (i=0; i < 8; i++)
1816 int del = VEC_EL_1(EL, i);
1817 int sel = VEC_EL_2(EL, del);
1819 if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel))
1821 if (ZERO_FLAG(del) == 0 || CARRY_FLAG(del) == 0)
1823 SET_COMPARE_FLAG(del);
1826 else if (VREG_S(VS1REG, del) > VREG_S(VS2REG, sel))
1828 SET_COMPARE_FLAG(del);
1831 if (COMPARE_FLAG(del) != 0)
1833 vres[del] = VREG_S(VS1REG, del);
1837 vres[del] = VREG_S(VS2REG, sel);
1840 ACCUM_L(del) = vres[del];
1844 CLEAR_CARRY_FLAGS();
1849 case 0x24: /* VCL */
1851 // 31 25 24 20 15 10 5 0
1852 // ------------------------------------------------------
1853 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 |
1854 // ------------------------------------------------------
1858 for (i=0; i < 8; i++)
1860 int del = VEC_EL_1(EL, i);
1861 int sel = VEC_EL_2(EL, del);
1862 INT16 s1 = VREG_S(VS1REG, del);
1863 INT16 s2 = VREG_S(VS2REG, sel);
1865 if (CARRY_FLAG(del) != 0)
1867 if (ZERO_FLAG(del) != 0)
1869 if (COMPARE_FLAG(del) != 0)
1871 ACCUM_L(del) = -(UINT16)s2;
1880 if (rsp.flag[2] & (1 << (del)))
1882 if (((UINT32)(INT16)(s1) + (UINT32)(INT16)(s2)) > 0x10000)
1885 CLEAR_COMPARE_FLAG(del);
1889 ACCUM_L(del) = -((UINT16)s2);
1890 SET_COMPARE_FLAG(del);
1895 if (((UINT32)(INT16)(s1) + (UINT32)(INT16)(s2)) != 0)
1898 CLEAR_COMPARE_FLAG(del);
1902 ACCUM_L(del) = -((UINT16)s2);
1903 SET_COMPARE_FLAG(del);
1910 if (ZERO_FLAG(del) != 0)
1912 if (rsp.flag[1] & (1 << (8+del)))
1923 if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0)
1926 rsp.flag[1] |= (1 << (8+del));
1931 rsp.flag[1] &= ~(1 << (8+del));
1936 vres[del] = ACCUM_L(del);
1939 CLEAR_CARRY_FLAGS();
1945 case 0x25: /* VCH */
1947 // 31 25 24 20 15 10 5 0
1948 // ------------------------------------------------------
1949 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 |
1950 // ------------------------------------------------------
1955 CLEAR_CARRY_FLAGS();
1959 for (i=0; i < 8; i++)
1961 int del = VEC_EL_1(EL, i);
1962 int sel = VEC_EL_2(EL, del);
1963 INT16 s1 = VREG_S(VS1REG, del);
1964 INT16 s2 = VREG_S(VS2REG, sel);
1968 SET_CARRY_FLAG(del);
1971 rsp.flag[1] |= (1 << (8+del));
1978 rsp.flag[2] |= (1 << (del));
1980 SET_COMPARE_FLAG(del);
1981 vres[del] = -((UINT16)s2);
2000 SET_COMPARE_FLAG(del);
2004 rsp.flag[1] |= (1 << (8+del));
2021 ACCUM_L(del) = vres[del];
2027 case 0x26: /* VCR */
2029 // 31 25 24 20 15 10 5 0
2030 // ------------------------------------------------------
2031 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 |
2032 // ------------------------------------------------------
2034 // Vector clip reverse
2040 for (i=0; i < 8; i++)
2042 int del = VEC_EL_1(EL, i);
2043 int sel = VEC_EL_2(EL, del);
2044 INT16 s1 = VREG_S(VS1REG, del);
2045 INT16 s2 = VREG_S(VS2REG, sel);
2047 if ((INT16)(s1 ^ s2) < 0)
2051 rsp.flag[1] |= (1 << (8+del));
2055 ACCUM_L(del) = ~((UINT16)s2);
2056 SET_COMPARE_FLAG(del);
2067 SET_COMPARE_FLAG(del);
2072 rsp.flag[1] |= (1 << (8+del));
2080 vres[del] = ACCUM_L(del);
2086 case 0x27: /* VMRG */
2088 // 31 25 24 20 15 10 5 0
2089 // ------------------------------------------------------
2090 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 |
2091 // ------------------------------------------------------
2093 // Merges two vectors according to compare flags
2095 for (i=0; i < 8; i++)
2097 int del = VEC_EL_1(EL, i);
2098 int sel = VEC_EL_2(EL, del);
2099 if (COMPARE_FLAG(del) != 0)
2101 vres[del] = VREG_S(VS1REG, del);
2105 vres[del] = VREG_S(VS2REG, VEC_EL_2(EL, sel));
2108 ACCUM_L(del) = vres[del];
2113 case 0x28: /* VAND */
2115 // 31 25 24 20 15 10 5 0
2116 // ------------------------------------------------------
2117 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 |
2118 // ------------------------------------------------------
2120 // Bitwise AND of two vector registers
2122 for (i=0; i < 8; i++)
2124 int del = VEC_EL_1(EL, i);
2125 int sel = VEC_EL_2(EL, del);
2126 vres[del] = VREG_S(VS1REG, del) & VREG_S(VS2REG, sel);
2127 ACCUM_L(del) = vres[del];
2132 case 0x29: /* VNAND */
2134 // 31 25 24 20 15 10 5 0
2135 // ------------------------------------------------------
2136 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 |
2137 // ------------------------------------------------------
2139 // Bitwise NOT AND of two vector registers
2141 for (i=0; i < 8; i++)
2143 int del = VEC_EL_1(EL, i);
2144 int sel = VEC_EL_2(EL, del);
2145 vres[del] = ~((VREG_S(VS1REG, del) & VREG_S(VS2REG, sel)));
2146 ACCUM_L(del) = vres[del];
2151 case 0x2a: /* VOR */
2153 // 31 25 24 20 15 10 5 0
2154 // ------------------------------------------------------
2155 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 |
2156 // ------------------------------------------------------
2158 // Bitwise OR of two vector registers
2160 for (i=0; i < 8; i++)
2162 int del = VEC_EL_1(EL, i);
2163 int sel = VEC_EL_2(EL, del);
2164 vres[del] = VREG_S(VS1REG, del) | VREG_S(VS2REG, sel);
2165 ACCUM_L(del) = vres[del];
2170 case 0x2b: /* VNOR */
2172 // 31 25 24 20 15 10 5 0
2173 // ------------------------------------------------------
2174 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 |
2175 // ------------------------------------------------------
2177 // Bitwise NOT OR of two vector registers
2179 for (i=0; i < 8; i++)
2181 int del = VEC_EL_1(EL, i);
2182 int sel = VEC_EL_2(EL, del);
2183 vres[del] = ~((VREG_S(VS1REG, del) | VREG_S(VS2REG, sel)));
2184 ACCUM_L(del) = vres[del];
2189 case 0x2c: /* VXOR */
2191 // 31 25 24 20 15 10 5 0
2192 // ------------------------------------------------------
2193 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 |
2194 // ------------------------------------------------------
2196 // Bitwise XOR of two vector registers
2198 for (i=0; i < 8; i++)
2200 int del = VEC_EL_1(EL, i);
2201 int sel = VEC_EL_2(EL, del);
2202 vres[del] = VREG_S(VS1REG, del) ^ VREG_S(VS2REG, sel);
2203 ACCUM_L(del) = vres[del];
2208 case 0x2d: /* VNXOR */
2210 // 31 25 24 20 15 10 5 0
2211 // ------------------------------------------------------
2212 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 |
2213 // ------------------------------------------------------
2215 // Bitwise NOT XOR of two vector registers
2217 for (i=0; i < 8; i++)
2219 int del = VEC_EL_1(EL, i);
2220 int sel = VEC_EL_2(EL, del);
2221 vres[del] = ~((VREG_S(VS1REG, del) ^ VREG_S(VS2REG, sel)));
2222 ACCUM_L(del) = vres[del];
2228 case 0x30: /* VRCP */
2230 // 31 25 24 20 15 10 5 0
2231 // ------------------------------------------------------
2232 // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 |
2233 // ------------------------------------------------------
2235 // Calculates reciprocal
2236 int del = (VS1REG & 7);
2237 int sel = EL&7; //VEC_EL_2(EL, del);
2240 rec = (INT16)(VREG_S(VS2REG, sel));
2244 // divide by zero -> overflow
2255 for (i = 15; i > 0; i--)
2259 rec &= ((0xffc0) >> (15 - i));
2263 rec = (INT32)(0x7fffffff / (double)rec);
2264 for (i = 31; i > 0; i--)
2268 rec &= ((0xffff8000) >> (31 - i));
2278 for (i=0; i < 8; i++)
2280 int element = VEC_EL_2(EL, i);
2281 ACCUM_L(i) = VREG_S(VS2REG, element);
2284 rsp.reciprocal_res = rec;
2286 VREG_S(VDREG, del) = (UINT16)(rsp.reciprocal_res); // store low part
2290 case 0x31: /* VRCPL */
2292 // 31 25 24 20 15 10 5 0
2293 // ------------------------------------------------------
2294 // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 |
2295 // ------------------------------------------------------
2297 // Calculates reciprocal low part
2299 int del = (VS1REG & 7);
2300 int sel = VEC_EL_2(EL, del);
2303 rec = ((UINT16)(VREG_S(VS2REG, sel)) | ((UINT32)(rsp.reciprocal_high) << 16));
2307 // divide by zero -> overflow
2315 if (((UINT32)(rec & 0xffff0000) == 0xffff0000) && ((INT16)(rec & 0xffff) < 0))
2325 for (i = 31; i > 0; i--)
2329 rec &= ((0xffc00000) >> (31 - i));
2333 rec = (0x7fffffff / rec);
2334 for (i = 31; i > 0; i--)
2338 rec &= ((0xffff8000) >> (31 - i));
2348 for (i=0; i < 8; i++)
2350 int element = VEC_EL_2(EL, i);
2351 ACCUM_L(i) = VREG_S(VS2REG, element);
2354 rsp.reciprocal_res = rec;
2356 VREG_S(VDREG, del) = (UINT16)(rsp.reciprocal_res); // store low part
2360 case 0x32: /* VRCPH */
2362 // 31 25 24 20 15 10 5 0
2363 // ------------------------------------------------------
2364 // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 |
2365 // ------------------------------------------------------
2367 // Calculates reciprocal high part
2369 int del = (VS1REG & 7);
2370 int sel = VEC_EL_2(EL, del);
2372 rsp.reciprocal_high = VREG_S(VS2REG, sel);
2374 for (i=0; i < 8; i++)
2376 int element = VEC_EL_2(EL, i);
2377 ACCUM_L(i) = VREG_S(VS2REG, element); // perhaps accumulator is used to store the intermediate values ?
2380 VREG_S(VDREG, del) = (INT16)(rsp.reciprocal_res >> 16); // store high part
2384 case 0x33: /* VMOV */
2386 // 31 25 24 20 15 10 5 0
2387 // ------------------------------------------------------
2388 // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 |
2389 // ------------------------------------------------------
2391 // Moves element from vector to destination vector
2393 int element = VS1REG & 7;
2394 VREG_S(VDREG, element) = VREG_S(VS2REG, VEC_EL_2(EL, 7-element));
2398 case 0x35: /* VRSQL */
2400 // 31 25 24 20 15 10 5 0
2401 // ------------------------------------------------------
2402 // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 |
2403 // ------------------------------------------------------
2405 // Calculates reciprocal square-root low part
2407 int del = (VS1REG & 7);
2408 int sel = VEC_EL_2(EL, del);
2411 sqr = (UINT16)(VREG_S(VS2REG, sel)) | ((UINT32)(rsp.square_root_high) << 16);
2415 // square root on 0 -> overflow
2418 else if (sqr == 0xffff8000)
2426 if (sqr > 0x7fffffff)
2428 if (((UINT32)(sqr & 0xffff0000) == 0xffff0000) && ((INT16)(sqr & 0xffff) < 0))
2438 for (i = 31; i > 0; i--)
2442 sqr &= (0xff800000 >> (31 - i));
2446 sqr = (INT32)(0x7fffffff / sqrt(sqr));
2447 for (i = 31; i > 0; i--)
2451 sqr &= (0xffff8000 >> (31 - i));
2461 for (i=0; i < 8; i++)
2463 int element = VEC_EL_2(EL, i);
2464 ACCUM_L(i) = VREG_S(VS2REG, element);
2467 rsp.square_root_res = sqr;
2469 VREG_S(VDREG, del) = (UINT16)(rsp.square_root_res); // store low part
2473 case 0x36: /* VRSQH */
2475 // 31 25 24 20 15 10 5 0
2476 // ------------------------------------------------------
2477 // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 |
2478 // ------------------------------------------------------
2480 // Calculates reciprocal square-root high part
2482 int del = (VS1REG & 7);
2483 int sel = VEC_EL_2(EL, del);
2485 rsp.square_root_high = VREG_S(VS2REG, sel);
2487 for (i=0; i < 8; i++)
2489 int element = VEC_EL_2(EL, i);
2490 ACCUM_L(i) = VREG_S(VS2REG, element); // perhaps accumulator is used to store the intermediate values ?
2493 VREG_S(VDREG, del) = (INT16)(rsp.square_root_res >> 16); // store high part
2497 default: unimplemented_opcode(op); break;
2501 int rsp_execute(int cycles)
2505 rsp_icount=1; //cycles;
2507 UINT32 ExecutedCycles=0;
2508 UINT32 BreakMarker=0;
2509 UINT32 WDCHackFlag1=0;
2510 UINT32 WDCHackFlag2=0;
2512 sp_pc = /*0x4001000 | */(sp_pc & 0xfff);
2513 if( rsp_sp_status & (SP_STATUS_HALT|SP_STATUS_BROKE))
2515 log(M64MSG_WARNING, "Quit due to SP halt/broke on start");
2520 while (rsp_icount > 0)
2529 op = ROPCODE(sp_pc);
2532 rsp_dasm_one(s, sp_pc, op);
2533 GENTRACE("%2x %3x\t%s\n", ((UINT8*)rsp_dmem)[0x1934], sp_pc, s);
2536 if (rsp.nextpc != ~0U)///DELAY SLOT USAGE
2538 sp_pc = /*0x4001000 | */(rsp.nextpc & 0xfff); //rsp.nextpc;
2543 sp_pc = /*0x4001000 | */((sp_pc+4)&0xfff);
2548 case 0x00: /* SPECIAL */
2552 case 0x00: /* SLL */ if (RDREG) RDVAL = (UINT32)RTVAL << SHIFT; break;
2553 case 0x02: /* SRL */ if (RDREG) RDVAL = (UINT32)RTVAL >> SHIFT; break;
2554 case 0x03: /* SRA */ if (RDREG) RDVAL = (INT32)RTVAL >> SHIFT; break;
2555 case 0x04: /* SLLV */ if (RDREG) RDVAL = (UINT32)RTVAL << (RSVAL & 0x1f); break;
2556 case 0x06: /* SRLV */ if (RDREG) RDVAL = (UINT32)RTVAL >> (RSVAL & 0x1f); break;
2557 case 0x07: /* SRAV */ if (RDREG) RDVAL = (INT32)RTVAL >> (RSVAL & 0x1f); break;
2558 case 0x08: /* JR */ JUMP_PC(RSVAL); break;
2559 case 0x09: /* JALR */ JUMP_PC_L(RSVAL, RDREG); break;
2560 case 0x0d: /* BREAK */
2562 *z64_rspinfo.SP_STATUS_REG |= (SP_STATUS_HALT | SP_STATUS_BROKE );
2563 if ((*z64_rspinfo.SP_STATUS_REG & SP_STATUS_INTR_BREAK) != 0 ) {
2564 *z64_rspinfo.MI_INTR_REG |= 1;
2565 z64_rspinfo.CheckInterrupts();
2567 //sp_set_status(0x3);
2572 #if LOG_INSTRUCTION_EXECUTION
2573 fprintf(exec_output, "\n---------- break ----------\n\n");
2577 case 0x20: /* ADD */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break;
2578 case 0x21: /* ADDU */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break;
2579 case 0x22: /* SUB */ if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break;
2580 case 0x23: /* SUBU */ if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break;
2581 case 0x24: /* AND */ if (RDREG) RDVAL = RSVAL & RTVAL; break;
2582 case 0x25: /* OR */ if (RDREG) RDVAL = RSVAL | RTVAL; break;
2583 case 0x26: /* XOR */ if (RDREG) RDVAL = RSVAL ^ RTVAL; break;
2584 case 0x27: /* NOR */ if (RDREG) RDVAL = ~(RSVAL | RTVAL); break;
2585 case 0x2a: /* SLT */ if (RDREG) RDVAL = (INT32)RSVAL < (INT32)RTVAL; break;
2586 case 0x2b: /* SLTU */ if (RDREG) RDVAL = (UINT32)RSVAL < (UINT32)RTVAL; break;
2587 default: unimplemented_opcode(op); break;
2592 case 0x01: /* REGIMM */
2596 case 0x00: /* BLTZ */ if ((INT32)(RSVAL) < 0) JUMP_REL(SIMM16); break;
2597 case 0x01: /* BGEZ */ if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); break;
2598 // VP according to the doc, link is performed even when condition fails,
2599 // this sound pretty stupid but let's try it that way
2600 case 0x11: /* BGEZAL */ LINK(31); if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); break;
2601 //case 0x11: /* BGEZAL */ if ((INT32)(RSVAL) >= 0) JUMP_REL_L(SIMM16, 31); break;
2602 default: unimplemented_opcode(op); break;
2607 case 0x02: /* J */ JUMP_ABS(UIMM26); break;
2608 case 0x03: /* JAL */ JUMP_ABS_L(UIMM26, 31); break;
2609 case 0x04: /* BEQ */ if (RSVAL == RTVAL) JUMP_REL(SIMM16); break;
2610 case 0x05: /* BNE */ if (RSVAL != RTVAL) JUMP_REL(SIMM16); break;
2611 case 0x06: /* BLEZ */ if ((INT32)RSVAL <= 0) JUMP_REL(SIMM16); break;
2612 case 0x07: /* BGTZ */ if ((INT32)RSVAL > 0) JUMP_REL(SIMM16); break;
2613 case 0x08: /* ADDI */ if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break;
2614 case 0x09: /* ADDIU */ if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break;
2615 case 0x0a: /* SLTI */ if (RTREG) RTVAL = (INT32)(RSVAL) < ((INT32)SIMM16); break;
2616 case 0x0b: /* SLTIU */ if (RTREG) RTVAL = (UINT32)(RSVAL) < (UINT32)((INT32)SIMM16); break;
2617 case 0x0c: /* ANDI */ if (RTREG) RTVAL = RSVAL & UIMM16; break;
2618 case 0x0d: /* ORI */ if (RTREG) RTVAL = RSVAL | UIMM16; break;
2619 case 0x0e: /* XORI */ if (RTREG) RTVAL = RSVAL ^ UIMM16; break;
2620 case 0x0f: /* LUI */ if (RTREG) RTVAL = UIMM16 << 16; break;
2622 case 0x10: /* COP0 */
2624 switch ((op >> 21) & 0x1f)
2626 case 0x00: /* MFC0 */ if (RTREG) RTVAL = get_cop0_reg(RDREG); break;
2627 case 0x04: /* MTC0 */ set_cop0_reg(RDREG, RTVAL); break;
2629 log(M64MSG_WARNING, "unimplemented cop0 %x (%x)\n", (op >> 21) & 0x1f, op);
2635 case 0x12: /* COP2 */
2637 switch ((op >> 21) & 0x1f)
2639 case 0x00: /* MFC2 */
2641 // 31 25 20 15 10 6 0
2642 // ---------------------------------------------------
2643 // | 010010 | 00000 | TTTTT | DDDDD | IIII | 0000000 |
2644 // ---------------------------------------------------
2647 int el = (op >> 7) & 0xf;
2648 UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf);
2649 UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf);
2650 if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2));
2653 case 0x02: /* CFC2 */
2656 // ------------------------------------------------
2657 // | 010010 | 00010 | TTTTT | DDDDD | 00000000000 |
2658 // ------------------------------------------------
2665 // Anciliary clipping flags
2666 RTVAL = rsp.flag[RDREG] & 0x00ff;
2670 // All other flags are 16 bits but sign-extended at retrieval
2671 RTVAL = (UINT32)rsp.flag[RDREG] | ( ( rsp.flag[RDREG] & 0x8000 ) ? 0xffff0000 : 0 );
2677 case 0x04: /* MTC2 */
2679 // 31 25 20 15 10 6 0
2680 // ---------------------------------------------------
2681 // | 010010 | 00100 | TTTTT | DDDDD | IIII | 0000000 |
2682 // ---------------------------------------------------
2685 int el = (op >> 7) & 0xf;
2686 VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff;
2687 VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff;
2690 case 0x06: /* CTC2 */
2693 // ------------------------------------------------
2694 // | 010010 | 00110 | TTTTT | DDDDD | 00000000000 |
2695 // ------------------------------------------------
2698 rsp.flag[RDREG] = RTVAL & 0xffff;
2702 case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
2703 case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f:
2705 handle_vector_ops(op);
2709 default: unimplemented_opcode(op); break;
2714 case 0x20: /* LB */ if (RTREG) RTVAL = (INT32)(INT8)READ8(RSVAL + SIMM16); break;
2715 case 0x21: /* LH */ if (RTREG) RTVAL = (INT32)(INT16)READ16(RSVAL + SIMM16); break;
2716 case 0x23: /* LW */ if (RTREG) RTVAL = READ32(RSVAL + SIMM16); break;
2717 case 0x24: /* LBU */ if (RTREG) RTVAL = (UINT8)READ8(RSVAL + SIMM16); break;
2718 case 0x25: /* LHU */ if (RTREG) RTVAL = (UINT16)READ16(RSVAL + SIMM16); break;
2719 case 0x28: /* SB */ WRITE8(RSVAL + SIMM16, RTVAL); break;
2720 case 0x29: /* SH */ WRITE16(RSVAL + SIMM16, RTVAL); break;
2721 case 0x2b: /* SW */ WRITE32(RSVAL + SIMM16, RTVAL); break;
2722 case 0x32: /* LWC2 */ handle_lwc2(op); break;
2723 case 0x3a: /* SWC2 */ handle_swc2(op); break;
2727 unimplemented_opcode(op);
2733 uint64_t time = lasttime;
2736 rsp_get_opinfo(op, &info);
2737 rsptimings[info.op2] += lasttime - time;
2738 rspcounts[info.op2]++;
2741 #if LOG_INSTRUCTION_EXECUTION
2744 static UINT32 prev_regs[32];
2745 static VECTOR_REG prev_vecs[32];
2747 rsp_dasm_one(string, rsp.ppc, op);
2749 fprintf(exec_output, "%08X: %s", rsp.ppc, string);
2754 for (i=l; i < 36; i++)
2756 fprintf(exec_output, " ");
2760 fprintf(exec_output, "| ");
2762 for (i=0; i < 32; i++)
2764 if (rsp.r[i] != prev_regs[i])
2766 fprintf(exec_output, "R%d: %08X ", i, rsp.r[i]);
2768 prev_regs[i] = rsp.r[i];
2771 for (i=0; i < 32; i++)
2773 if (rsp.v[i].d[0] != prev_vecs[i].d[0] || rsp.v[i].d[1] != prev_vecs[i].d[1])
2775 fprintf(exec_output, "V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X ", i,
2776 (UINT16)VREG_S(i,0), (UINT16)VREG_S(i,1), (UINT16)VREG_S(i,2), (UINT16)VREG_S(i,3), (UINT16)VREG_S(i,4), (UINT16)VREG_S(i,5), (UINT16)VREG_S(i,6), (UINT16)VREG_S(i,7));
2778 prev_vecs[i].d[0] = rsp.v[i].d[0];
2779 prev_vecs[i].d[1] = rsp.v[i].d[1];
2782 fprintf(exec_output, "\n");
2789 if( rsp_sp_status & SP_STATUS_SSTEP )
2791 if( rsp.step_count )
2797 rsp_sp_status |= SP_STATUS_BROKE;
2801 if( rsp_sp_status & (SP_STATUS_HALT|SP_STATUS_BROKE))
2806 log(M64MSG_WARNING, "Quit due to SP halt/broke set by MTC0\n");
2809 ///WDC&SR64 hack:VERSION3:1.8x -2x FASTER & safer
2810 if((WDCHackFlag1==0)&&(rsp.ppc>0x137)&&(rsp.ppc<0x14D))
2811 WDCHackFlag1=ExecutedCycles;
2812 if ((WDCHackFlag1!=0)&&((rsp.ppc<=0x137)||(rsp.ppc>=0x14D)))
2814 if ((WDCHackFlag1!=0)&&((ExecutedCycles-WDCHackFlag1)>=0x20)&&(rsp.ppc>0x137)&&(rsp.ppc<0x14D))
2816 // printf("WDC hack quit 1\n");
2817 rsp_icount=0;//32 cycles should be enough
2819 if((WDCHackFlag2==0)&&(rsp.ppc>0xFCB)&&(rsp.ppc<0xFD5))
2820 WDCHackFlag2=ExecutedCycles;
2821 if ((WDCHackFlag2!=0)&&((rsp.ppc<=0xFCB)||(rsp.ppc>=0xFD5)))
2823 if ((WDCHackFlag2!=0)&&((ExecutedCycles-WDCHackFlag2)>=0x20)&&(rsp.ppc>0xFCB)&&(rsp.ppc<0xFD5))
2825 // printf("WDC hack quit 2\n");
2826 rsp_icount=0;//32 cycles should be enough
2833 return ExecutedCycles;
2836 /*****************************************************************************/
2839 enum sp_dma_direction
2841 SP_DMA_RDRAM_TO_IDMEM,
2842 SP_DMA_IDMEM_TO_RDRAM
2845 static void sp_dma(enum sp_dma_direction direction)
2854 UINT32 l = sp_dma_length;
2855 length = ((l & 0xfff) | 7) + 1;
2856 skip = (l >> 20) + length;
2857 count = ((l >> 12) & 0xff) + 1;
2859 if (direction == SP_DMA_RDRAM_TO_IDMEM) // RDRAM -> I/DMEM
2861 //UINT32 src_address = sp_dram_addr & ~7;
2862 //UINT32 dst_address = (sp_mem_addr & 0x1000) ? 0x4001000 : 0x4000000;
2863 src = (UINT8*)&rdram[(sp_dram_addr&~7) / 4];
2864 dst = (sp_mem_addr & 0x1000) ? (UINT8*)&rsp_imem[(sp_mem_addr & ~7 & 0xfff) / 4] : (UINT8*)&rsp_dmem[(sp_mem_addr & ~7 &0xfff) / 4];
2865 ///cpuintrf_push_context(0);
2866 #define BYTE8_XOR_BE(a) ((a)^7)// JFG, Ocarina of Time
2868 for (j=0; j < count; j++)
2870 for (i=0; i < length; i++)
2872 ///UINT8 b = program_read_byte_64be(src_address + i + (j*skip));
2873 ///program_write_byte_64be(dst_address + (((sp_mem_addr & ~7) + i + (j*length)) & 0xfff), b);
2874 dst[BYTE8_XOR_BE((i + j*length)&0xfff)] = src[BYTE8_XOR_BE(i + j*skip)];
2878 ///cpuintrf_pop_context();
2879 *z64_rspinfo.SP_DMA_BUSY_REG = 0;
2880 *z64_rspinfo.SP_STATUS_REG &= ~SP_STATUS_DMABUSY;
2882 else if (direction == SP_DMA_IDMEM_TO_RDRAM) // I/DMEM -> RDRAM
2884 //UINT32 dst_address = sp_dram_addr & ~7;
2885 //UINT32 src_address = (sp_mem_addr & 0x1000) ? 0x4001000 : 0x4000000;
2887 dst = (UINT8*)&rdram[(sp_dram_addr&~7) / 4];
2888 src = (sp_mem_addr & 0x1000) ? (UINT8*)&rsp_imem[(sp_mem_addr & ~7 & 0xfff) / 4] : (UINT8*)&rsp_dmem[(sp_mem_addr & ~7 &0xfff) / 4];
2889 ///cpuintrf_push_context(0);
2891 for (j=0; j < count; j++)
2893 for (i=0; i < length; i++)
2895 ///UINT8 b = program_read_byte_64be(src_address + (((sp_mem_addr & ~7) + i + (j*length)) & 0xfff));
2896 ///program_write_byte_64be(dst_address + i + (j*skip), b);
2897 dst[BYTE8_XOR_BE(i + j*skip)] = src[BYTE8_XOR_BE((+i + j*length)&0xfff)];
2901 ///cpuintrf_pop_context();
2902 *z64_rspinfo.SP_DMA_BUSY_REG = 0;
2903 *z64_rspinfo.SP_STATUS_REG &= ~SP_STATUS_DMABUSY;
2913 UINT32 n64_sp_reg_r(UINT32 offset, UINT32 dummy)
2917 case 0x00/4: // SP_MEM_ADDR_REG
2920 case 0x04/4: // SP_DRAM_ADDR_REG
2921 return sp_dram_addr;
2923 case 0x08/4: // SP_RD_LEN_REG
2924 return sp_dma_rlength;
2926 case 0x10/4: // SP_STATUS_REG
2927 return rsp_sp_status;
2929 case 0x14/4: // SP_DMA_FULL_REG
2932 case 0x18/4: // SP_DMA_BUSY_REG
2935 case 0x1c/4: // SP_SEMAPHORE_REG
2936 return sp_semaphore;
2939 log(M64MSG_WARNING, "sp_reg_r: %08X\n", offset);
2946 //UINT32 n64_sp_reg_w(RSP_REGS & rsp, UINT32 offset, UINT32 data, UINT32 dummy)
2947 void n64_sp_reg_w(UINT32 offset, UINT32 data, UINT32 dummy)
2949 UINT32 InterruptPending=0;
2950 if ((offset & 0x10000) == 0)
2952 switch (offset & 0xffff)
2954 case 0x00/4: // SP_MEM_ADDR_REG
2958 case 0x04/4: // SP_DRAM_ADDR_REG
2959 sp_dram_addr = data & 0xffffff;
2962 case 0x08/4: // SP_RD_LEN_REG
2963 // sp_dma_length = data & 0xfff;
2964 // sp_dma_count = (data >> 12) & 0xff;
2965 // sp_dma_skip = (data >> 20) & 0xfff;
2967 sp_dma(SP_DMA_RDRAM_TO_IDMEM);
2970 case 0x0c/4: // SP_WR_LEN_REG
2971 // sp_dma_length = data & 0xfff;
2972 // sp_dma_count = (data >> 12) & 0xff;
2973 // sp_dma_skip = (data >> 20) & 0xfff;
2975 sp_dma(SP_DMA_IDMEM_TO_RDRAM);
2978 case 0x10/4: // SP_STATUS_REG
2980 if((data&0x1)&&(data&0x2))
2981 log(M64MSG_ERROR, "Clear halt and set halt simultaneously\n");
2982 if((data&0x8)&&(data&0x10))
2983 log(M64MSG_ERROR, "Clear int and set int simultaneously\n");
2984 if((data&0x20)&&(data&0x40))
2985 log(M64MSG_ERROR, "Clear sstep and set sstep simultaneously\n");
2986 if (data & 0x00000001) // clear halt
2988 rsp_sp_status &= ~SP_STATUS_HALT;
2992 // cpu_spinuntil_trigger(6789);
2994 // cpunum_set_input_line(1, INPUT_LINE_HALT, CLEAR_LINE);
2995 // rsp_sp_status &= ~SP_STATUS_HALT;
3002 if (data & 0x00000002) // set halt
3004 // cpunum_set_input_line(1, INPUT_LINE_HALT, ASSERT_LINE);
3005 rsp_sp_status |= SP_STATUS_HALT;
3007 if (data & 0x00000004) rsp_sp_status &= ~SP_STATUS_BROKE; // clear broke
3008 if (data & 0x00000008) // clear interrupt
3010 *z64_rspinfo.MI_INTR_REG &= ~R4300i_SP_Intr;
3011 ///TEMPORARY COMMENTED FOR SPEED
3012 /// printf("sp_reg_w clear interrupt");
3013 //clear_rcp_interrupt(SP_INTERRUPT);
3015 if (data & 0x00000010) // set interrupt
3017 //signal_rcp_interrupt(SP_INTERRUPT);
3019 if (data & 0x00000020) rsp_sp_status &= ~SP_STATUS_SSTEP; // clear single step
3020 if (data & 0x00000040) {
3021 rsp_sp_status |= SP_STATUS_SSTEP; // set single step
3022 log(M64MSG_STATUS, "RSP STATUS REG: SSTEP set\n");
3024 if (data & 0x00000080) rsp_sp_status &= ~SP_STATUS_INTR_BREAK; // clear interrupt on break
3025 if (data & 0x00000100) rsp_sp_status |= SP_STATUS_INTR_BREAK; // set interrupt on break
3026 if (data & 0x00000200) rsp_sp_status &= ~SP_STATUS_SIGNAL0; // clear signal 0
3027 if (data & 0x00000400) rsp_sp_status |= SP_STATUS_SIGNAL0; // set signal 0
3028 if (data & 0x00000800) rsp_sp_status &= ~SP_STATUS_SIGNAL1; // clear signal 1
3029 if (data & 0x00001000) rsp_sp_status |= SP_STATUS_SIGNAL1; // set signal 1
3030 if (data & 0x00002000) rsp_sp_status &= ~SP_STATUS_SIGNAL2; // clear signal 2
3031 if (data & 0x00004000) rsp_sp_status |= SP_STATUS_SIGNAL2; // set signal 2
3032 if (data & 0x00008000) rsp_sp_status &= ~SP_STATUS_SIGNAL3; // clear signal 3
3033 if (data & 0x00010000) rsp_sp_status |= SP_STATUS_SIGNAL3; // set signal 3
3034 if (data & 0x00020000) rsp_sp_status &= ~SP_STATUS_SIGNAL4; // clear signal 4
3035 if (data & 0x00040000) rsp_sp_status |= SP_STATUS_SIGNAL4; // set signal 4
3036 if (data & 0x00080000) rsp_sp_status &= ~SP_STATUS_SIGNAL5; // clear signal 5
3037 if (data & 0x00100000) rsp_sp_status |= SP_STATUS_SIGNAL5; // set signal 5
3038 if (data & 0x00200000) rsp_sp_status &= ~SP_STATUS_SIGNAL6; // clear signal 6
3039 if (data & 0x00400000) rsp_sp_status |= SP_STATUS_SIGNAL6; // set signal 6
3040 if (data & 0x00800000) rsp_sp_status &= ~SP_STATUS_SIGNAL7; // clear signal 7
3041 if (data & 0x01000000) rsp_sp_status |= SP_STATUS_SIGNAL7; // set signal 7
3043 if(InterruptPending==1)
3045 *z64_rspinfo.MI_INTR_REG |= 1;
3046 z64_rspinfo.CheckInterrupts();
3052 case 0x1c/4: // SP_SEMAPHORE_REG
3053 sp_semaphore = data;
3054 // mame_printf_debug("sp_semaphore = %08X\n", sp_semaphore);
3058 log(M64MSG_WARNING, "sp_reg_w: %08X, %08X\n", data, offset);
3064 switch (offset & 0xffff)
3066 case 0x00/4: // SP_PC_REG
3067 //cpunum_set_info_int(1, CPUINFO_INT_PC, 0x04001000 | (data & 0xfff));
3071 log(M64MSG_WARNING, "sp_reg_w: %08X, %08X\n", data, offset);
3077 UINT32 sp_read_reg(UINT32 reg)
3081 //case 4: return rsp_sp_status;
3082 default: return n64_sp_reg_r(reg, 0x00000000);
3087 void sp_write_reg(UINT32 reg, UINT32 data)
3091 default: n64_sp_reg_w(reg, data, 0x00000000); break;