RSP LLE plugin. Compile and run (slowly, eat 50% CPU) on the OpenPandora
[mupen64plus-pandora.git] / source / mupen64plus-rsp-z64 / src / rsp.cpp
CommitLineData
fc5d46b4 1/*
2 * z64
3 *
4 * Copyright (C) 2007 ziggy
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20**/
21
22/*
23Nintendo/SGI Reality Signal Processor (RSP) emulator
24
25Written by Ville Linde
26*/
27// #include "z64.h"
28#include "rsp.h"
29#include "rsp_opinfo.h"
30#include <math.h> // sqrt
31#include <assert.h>
32#include <string.h>
33
34#define INLINE inline
35
36#define LOG_INSTRUCTION_EXECUTION 0
37#define SAVE_DISASM 0
38#define SAVE_DMEM 0
39
40#define PRINT_VECREG(x) printf("V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X\n", (x), \
41 (UINT16)VREG_S((x),0), (UINT16)VREG_S((x),1), \
42 (UINT16)VREG_S((x),2), (UINT16)VREG_S((x),3), \
43 (UINT16)VREG_S((x),4), (UINT16)VREG_S((x),5), \
44 (UINT16)VREG_S((x),6), (UINT16)VREG_S((x),7))
45
46
47extern offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op);
48
49#if LOG_INSTRUCTION_EXECUTION
50static FILE *exec_output;
51#endif
52
53
54// INLINE void sp_set_status(UINT32 status)
55// {
56// if (status & 0x1)
57// {
58// cpu_trigger(6789);
59
60// cpunum_set_input_line(1, INPUT_LINE_HALT, ASSERT_LINE);
61// rsp_sp_status |= SP_STATUS_HALT;
62// }
63// if (status & 0x2)
64// {
65// rsp_sp_status |= SP_STATUS_BROKE;
66
67// if (rsp_sp_status & SP_STATUS_INTR_BREAK)
68// {
69// signal_rcp_interrupt(SP_INTERRUPT);
70// }
71// }
72// }
73
74
75#if 0
76enum
77{
78 RSP_PC = 1,
79 RSP_R0,
80 RSP_R1,
81 RSP_R2,
82 RSP_R3,
83 RSP_R4,
84 RSP_R5,
85 RSP_R6,
86 RSP_R7,
87 RSP_R8,
88 RSP_R9,
89 RSP_R10,
90 RSP_R11,
91 RSP_R12,
92 RSP_R13,
93 RSP_R14,
94 RSP_R15,
95 RSP_R16,
96 RSP_R17,
97 RSP_R18,
98 RSP_R19,
99 RSP_R20,
100 RSP_R21,
101 RSP_R22,
102 RSP_R23,
103 RSP_R24,
104 RSP_R25,
105 RSP_R26,
106 RSP_R27,
107 RSP_R28,
108 RSP_R29,
109 RSP_R30,
110 RSP_R31,
111};
112#endif
113
114
115#ifdef RSPTIMING
116uint64_t rsptimings[512];
117int rspcounts[512];
118#endif
119
120
121#define JUMP_ABS(addr) { rsp.nextpc = 0x04001000 | (((addr) << 2) & 0xfff); }
122#define JUMP_ABS_L(addr,l) { rsp.nextpc = 0x04001000 | (((addr) << 2) & 0xfff); rsp.r[l] = sp_pc + 4; }
123#define JUMP_REL(offset) { rsp.nextpc = 0x04001000 | ((sp_pc + ((offset) << 2)) & 0xfff); }
124#define JUMP_REL_L(offset,l) { rsp.nextpc = 0x04001000 | ((sp_pc + ((offset) << 2)) & 0xfff); rsp.r[l] = sp_pc + 4; }
125#define JUMP_PC(addr) { rsp.nextpc = 0x04001000 | ((addr) & 0xfff); }
126#define JUMP_PC_L(addr,l) { rsp.nextpc = 0x04001000 | ((addr) & 0xfff); rsp.r[l] = sp_pc + 4; }
127#define LINK(l) rsp.r[l] = sp_pc + 4
128
129
130#define VDREG ((op >> 6) & 0x1f)
131#define VS1REG ((op >> 11) & 0x1f)
132#define VS2REG ((op >> 16) & 0x1f)
133#define EL ((op >> 21) & 0xf)
134
135#define S_VREG_B(offset) (((15 - (offset)) & 0x07) << 3)
136#define S_VREG_S(offset) (((7 - (offset)) & 0x03) << 4)
137#define S_VREG_L(offset) (((3 - (offset)) & 0x01) << 5)
138
139#define M_VREG_B(offset) ((UINT64)0x00FF << S_VREG_B(offset))
140#define M_VREG_S(offset) ((UINT64)0x0000FFFFul << S_VREG_S(offset))
141#define M_VREG_L(offset) ((UINT64)0x00000000FFFFFFFFull << S_VREG_L(offset))
142
143#define R_VREG_B(reg, offset) ((rsp.v[(reg)].d[(15 - (offset)) >> 3] >> S_VREG_B(offset)) & 0x00FF)
144#define R_VREG_S(reg, offset) (INT16)((rsp.v[(reg)].d[(7 - (offset)) >> 2] >> S_VREG_S(offset)) & 0x0000FFFFul)
145#define R_VREG_L(reg, offset) ((rsp.v[(reg)].d[(3 - (offset)) >> 1] >> S_VREG_L(offset)) & 0x00000000FFFFFFFFull)
146
147#define W_VREG_B(reg, offset, val) (rsp.v[(reg)].d[(15 - (offset)) >> 3] = (rsp.v[(reg)].d[(15 - (offset)) >> 3] & ~M_VREG_B(offset)) | (M_VREG_B(offset) & ((UINT64)(val) << S_VREG_B(offset))))
148#define W_VREG_S(reg, offset, val) (rsp.v[(reg)].d[(7 - (offset)) >> 2] = (rsp.v[(reg)].d[(7 - (offset)) >> 2] & ~M_VREG_S(offset)) | (M_VREG_S(offset) & ((UINT64)(val) << S_VREG_S(offset))))
149#define W_VREG_L(reg, offset, val) (rsp.v[(reg)].d[(3 - (offset)) >> 1] = (rsp.v[(reg)].d[(3 - (offset)) >> 1] & ~M_VREG_L(offset)) | (M_VREG_L(offset) & ((UINT64)(val) << S_VREG_L(offset))))
150
151
152#define VEC_EL_1(x,z) (z)
153#define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)])
154
155#define ACCUM(x) rsp.accum[((x))].q
156
157#define S_ACCUM_H (3 << 4)
158#define S_ACCUM_M (2 << 4)
159#define S_ACCUM_L (1 << 4)
160
161#define M_ACCUM_H (((INT64)0x0000FFFF) << S_ACCUM_H)
162#define M_ACCUM_M (((INT64)0x0000FFFF) << S_ACCUM_M)
163#define M_ACCUM_L (((INT64)0x0000FFFF) << S_ACCUM_L)
164
165#define R_ACCUM_H(x) ((INT16)((ACCUM(x) >> S_ACCUM_H) & 0x00FFFF))
166#define R_ACCUM_M(x) ((INT16)((ACCUM(x) >> S_ACCUM_M) & 0x00FFFF))
167#define R_ACCUM_L(x) ((INT16)((ACCUM(x) >> S_ACCUM_L) & 0x00FFFF))
168
169#define W_ACCUM_H(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_H) | (M_ACCUM_H & ((INT64)(y) << S_ACCUM_H)))
170#define W_ACCUM_M(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_M) | (M_ACCUM_M & ((INT64)(y) << S_ACCUM_M)))
171#define W_ACCUM_L(x, y) (ACCUM(x) = (ACCUM(x) & ~M_ACCUM_L) | (M_ACCUM_L & ((INT64)(y) << S_ACCUM_L)))
172
173
174
175RSP_REGS rsp;
176static int rsp_icount;
177// RSP Interface
178
179#define rsp_sp_status (*(UINT32*)z64_rspinfo.SP_STATUS_REG)
180#define sp_mem_addr (*(UINT32*)z64_rspinfo.SP_MEM_ADDR_REG)
181#define sp_dram_addr (*(UINT32*)z64_rspinfo.SP_DRAM_ADDR_REG)
182#define sp_semaphore (*(UINT32*)z64_rspinfo.SP_SEMAPHORE_REG)
183
184#define sp_dma_rlength (*(UINT32*)z64_rspinfo.SP_RD_LEN_REG)
185#define sp_dma_wlength (*(UINT32*)z64_rspinfo.SP_WR_LEN_REG)
186
187INT32 sp_dma_length;
188
189/*****************************************************************************/
190
191UINT32 get_cop0_reg(int reg)
192{
193 if (reg >= 0 && reg < 8)
194 {
195 return sp_read_reg(reg);
196 }
197 else if (reg >= 8 && reg < 16)
198 {
199 return n64_dp_reg_r(reg - 8, 0x00000000);
200 }
201 else
202 {
203 log(M64MSG_ERROR, "RSP: get_cop0_reg: %d", reg);
204 return ~0;
205 }
206}
207
208void set_cop0_reg(int reg, UINT32 data)
209{
210 if (reg >= 0 && reg < 8)
211 {
212 sp_write_reg(reg, data);
213 }
214 else if (reg >= 8 && reg < 16)
215 {
216 n64_dp_reg_w(reg - 8, data, 0x00000000);
217 }
218 else
219 {
220 log(M64MSG_ERROR, "RSP: set_cop0_reg: %d, %08X\n", reg, data);
221 }
222}
223
224static int got_unimp;
225void unimplemented_opcode(UINT32 op)
226{
227 got_unimp = 1;
228#ifdef MAME_DEBUG
229 char string[200];
230 rsp_dasm_one(string, rsp.ppc, op);
231 printf("%08X: %s\n", rsp.ppc, string);
232#endif
233
234#if SAVE_DISASM
235 {
236 char string[200];
237 int i;
238 FILE *dasm;
239 dasm = fopen("rsp_disasm.txt", "wt");
240
241 for (i=0; i < 0x1000; i+=4)
242 {
243 UINT32 opcode = ROPCODE(0x04001000 + i);
244 rsp_dasm_one(string, 0x04001000 + i, opcode);
245 fprintf(dasm, "%08X: %08X %s\n", 0x04001000 + i, opcode, string);
246 }
247 fclose(dasm);
248 }
249#endif
250#if SAVE_DMEM
251 {
252 int i;
253 FILE *dmem;
254 dmem = fopen("rsp_dmem.bin", "wb");
255
256 for (i=0; i < 0x1000; i++)
257 {
258 fputc(READ8(0x04000000 + i), dmem);
259 }
260 fclose(dmem);
261 }
262#endif
263
264 log(M64MSG_ERROR, "RSP: unknown opcode %02X (%d) (%08X) at %08X\n", op >> 26, op >> 26, op, rsp.ppc);
265}
266
267/*****************************************************************************/
268
269const int vector_elements_1[16][8] =
270{
271 { 0, 1, 2, 3, 4, 5, 6, 7 }, // none
272 { 0, 1, 2, 3, 4, 5, 6 ,7 }, // ???
273 { 1, 3, 5, 7, 0, 2, 4, 6 }, // 0q
274 { 0, 2, 4, 6, 1, 3, 5, 7 }, // 1q
275 { 1, 2, 3, 5, 6, 7, 0, 4 }, // 0h
276 { 0, 2, 3, 4, 6, 7, 1, 5 }, // 1h
277 { 0, 1, 3, 4, 5, 7, 2, 6 }, // 2h
278 { 0, 1, 2, 4, 5, 6, 3, 7 }, // 3h
279 { 1, 2, 3, 4, 5, 6, 7, 0 }, // 0
280 { 0, 2, 3, 4, 5, 6, 7, 1 }, // 1
281 { 0, 1, 3, 4, 5, 6, 7, 2 }, // 2
282 { 0, 1, 2, 4, 5, 6, 7, 3 }, // 3
283 { 0, 1, 2, 3, 5, 6, 7, 4 }, // 4
284 { 0, 1, 2, 3, 4, 6, 7, 5 }, // 5
285 { 0, 1, 2, 3, 4, 5, 7, 6 }, // 6
286 { 0, 1, 2, 3, 4, 5, 6, 7 }, // 7
287};
288
289const int vector_elements_2[16][8] =
290{
291 { 0, 1, 2, 3, 4, 5, 6, 7 }, // none
292 { 0, 1, 2, 3, 4, 5, 6, 7 }, // ???
293 { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q
294 { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q
295 { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h
296 { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h
297 { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h
298 { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h
299 { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0
300 { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1
301 { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2
302 { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3
303 { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4
304 { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5
305 { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6
306 { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7
307};
308
309void rsp_init(RSP_INFO info)
310{
311#if LOG_INSTRUCTION_EXECUTION
312 exec_output = fopen("rsp_execute.txt", "wt");
313#endif
314
315 memset(&rsp, 0, sizeof(rsp));
316 rsp.ext = info;
317
318 sp_pc = 0; //0x4001000;
319 rsp.nextpc = ~0U;
320 //rsp_invalidate(0, 0x1000);
321 rsp.step_count=0;
322}
323
324void rsp_reset(void)
325{
326 rsp.nextpc = ~0U;
327}
328
329void handle_lwc2(UINT32 op)
330{
331 int i, end;
332 UINT32 ea;
333 int dest = (op >> 16) & 0x1f;
334 int base = (op >> 21) & 0x1f;
335 int index = (op >> 7) & 0xf;
336 int offset = (op & 0x7f);
337 if (offset & 0x40)
338 offset |= 0xffffffc0;
339
340 switch ((op >> 11) & 0x1f)
341 {
342 case 0x00: /* LBV */
343 {
344 // 31 25 20 15 10 6 0
345 // --------------------------------------------------
346 // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset |
347 // --------------------------------------------------
348 //
349 // Load 1 byte to vector byte index
350
351 ea = (base) ? rsp.r[base] + offset : offset;
352 VREG_B(dest, index) = READ8(ea);
353 break;
354 }
355 case 0x01: /* LSV */
356 {
357 // 31 25 20 15 10 6 0
358 // --------------------------------------------------
359 // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset |
360 // --------------------------------------------------
361 //
362 // Loads 2 bytes starting from vector byte index
363
364 ea = (base) ? rsp.r[base] + (offset * 2) : (offset * 2);
365
366 end = index + 2;
367
368 // VP need mask i and ea ?
369 for (i=index; i < end; i++)
370 {
371 VREG_B(dest, i) = READ8(ea);
372 ea++;
373 }
374 break;
375 }
376 case 0x02: /* LLV */
377 {
378 // 31 25 20 15 10 6 0
379 // --------------------------------------------------
380 // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset |
381 // --------------------------------------------------
382 //
383 // Loads 4 bytes starting from vector byte index
384
385 ea = (base) ? rsp.r[base] + (offset * 4) : (offset * 4);
386
387 end = index + 4;
388
389 // VP need mask i and ea ?
390 for (i=index; i < end; i++)
391 {
392 VREG_B(dest, i) = READ8(ea);
393 ea++;
394 }
395 break;
396 }
397 case 0x03: /* LDV */
398 {
399 // 31 25 20 15 10 6 0
400 // --------------------------------------------------
401 // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset |
402 // --------------------------------------------------
403 //
404 // Loads 8 bytes starting from vector byte index
405
406 ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
407
408 end = index + 8;
409
410 // VP need mask i and ea ?
411 for (i=index; i < end; i++)
412 {
413 VREG_B(dest, i) = READ8(ea);
414 ea++;
415 }
416 break;
417 }
418 case 0x04: /* LQV */
419 {
420 // 31 25 20 15 10 6 0
421 // --------------------------------------------------
422 // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset |
423 // --------------------------------------------------
424 //
425 // Loads up to 16 bytes starting from vector byte index
426
427 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
428
429 end = index + (16 - (ea & 0xf));
430 if (end > 16) end = 16;
431 for (i=index; i < end; i++)
432 {
433 VREG_B(dest, i) = READ8(ea);
434 ea++;
435 }
436 break;
437 }
438 case 0x05: /* LRV */
439 {
440 // 31 25 20 15 10 6 0
441 // --------------------------------------------------
442 // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset |
443 // --------------------------------------------------
444 //
445 // Stores up to 16 bytes starting from right side until 16-byte boundary
446
447 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
448
449 index = 16 - ((ea & 0xf) - index);
450 end = 16;
451 ea &= ~0xf;
452 //assert(index == 0);
453
454 for (i=index; i < end; i++)
455 {
456 VREG_B(dest, i) = READ8(ea);
457 ea++;
458 }
459 break;
460 }
461 case 0x06: /* LPV */
462 {
463 // 31 25 20 15 10 6 0
464 // --------------------------------------------------
465 // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset |
466 // --------------------------------------------------
467 //
468 // Loads a byte as the upper 8 bits of each element
469
470 ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
471
472 for (i=0; i < 8; i++)
473 {
474 VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 8;
475 }
476 break;
477 }
478 case 0x07: /* LUV */
479 {
480 // 31 25 20 15 10 6 0
481 // --------------------------------------------------
482 // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset |
483 // --------------------------------------------------
484 //
485 // Loads a byte as the bits 14-7 of each element
486
487 ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
488
489 for (i=0; i < 8; i++)
490 {
491 VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 7;
492 }
493 break;
494 }
495 case 0x08: /* LHV */
496 {
497 // 31 25 20 15 10 6 0
498 // --------------------------------------------------
499 // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset |
500 // --------------------------------------------------
501 //
502 // Loads a byte as the bits 14-7 of each element, with 2-byte stride
503
504 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
505
506 for (i=0; i < 8; i++)
507 {
508 VREG_S(dest, i) = READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7;
509 }
510 break;
511 }
512 case 0x09: /* LFV */
513 {
514 // 31 25 20 15 10 6 0
515 // --------------------------------------------------
516 // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset |
517 // --------------------------------------------------
518 //
519 // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride
520
521 // fatalerror("RSP: LFV\n");
522
523 //if (index & 0x7) fatalerror("RSP: LFV: index = %d at %08X\n", index, rsp.ppc);
524
525 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
526
527 // not sure what happens if 16-byte boundary is crossed...
528 //if ((ea & 0xf) > 0) fatalerror("RSP: LFV: 16-byte boundary crossing at %08X, recheck this!\n", rsp.ppc);
529
530 end = (index >> 1) + 4;
531
532 for (i=index >> 1; i < end; i++)
533 {
534 VREG_S(dest, i) = READ8(ea) << 7;
535 ea += 4;
536 }
537 break;
538 }
539 case 0x0a: /* LWV */
540 {
541 // 31 25 20 15 10 6 0
542 // --------------------------------------------------
543 // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset |
544 // --------------------------------------------------
545 //
546 // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0
547 // after byte index 15
548
549 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
550
551 // not sure what happens if 16-byte boundary is crossed...
552 //if ((ea & 0xf) > 0) fatalerror("RSP: LWV: 16-byte boundary crossing at %08X, recheck this!\n", rsp.ppc);
553
554 end = (16 - index) + 16;
555
556 for (i=(16 - index); i < end; i++)
557 {
558 VREG_B(dest, i & 0xf) = READ8(ea);
559 ea += 4;
560 }
561 break;
562 }
563 case 0x0b: /* LTV */
564 {
565 // 31 25 20 15 10 6 0
566 // --------------------------------------------------
567 // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset |
568 // --------------------------------------------------
569 //
570 // Loads one element to maximum of 8 vectors, while incrementing element index
571
572 // FIXME: has a small problem with odd indices
573
574 int element;
575 int vs = dest;
576 int ve = dest + 8;
577 if (ve > 32)
578 ve = 32;
579
580 element = 7 - (index >> 1);
581
582 //if (index & 1) fatalerror("RSP: LTV: index = %d\n", index);
583
584 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
585
586 ea = ((ea + 8) & ~0xf) + (index & 1);
587 for (i=vs; i < ve; i++)
588 {
589 element = ((8 - (index >> 1) + (i-vs)) << 1);
590 VREG_B(i, (element & 0xf)) = READ8(ea);
591 VREG_B(i, ((element+1) & 0xf)) = READ8(ea+1);
592
593 ea += 2;
594 }
595 break;
596 }
597
598 default:
599 {
600 unimplemented_opcode(op);
601 break;
602 }
603 }
604}
605
606void handle_swc2(UINT32 op)
607{
608 int i, end;
609 int eaoffset;
610 UINT32 ea;
611 int dest = (op >> 16) & 0x1f;
612 int base = (op >> 21) & 0x1f;
613 int index = (op >> 7) & 0xf;
614 int offset = (op & 0x7f);
615 if (offset & 0x40)
616 offset |= 0xffffffc0;
617
618 switch ((op >> 11) & 0x1f)
619 {
620 case 0x00: /* SBV */
621 {
622 // 31 25 20 15 10 6 0
623 // --------------------------------------------------
624 // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset |
625 // --------------------------------------------------
626 //
627 // Stores 1 byte from vector byte index
628
629 ea = (base) ? rsp.r[base] + offset : offset;
630 WRITE8(ea, VREG_B(dest, index));
631 break;
632 }
633 case 0x01: /* SSV */
634 {
635 // 31 25 20 15 10 6 0
636 // --------------------------------------------------
637 // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset |
638 // --------------------------------------------------
639 //
640 // Stores 2 bytes starting from vector byte index
641
642 ea = (base) ? rsp.r[base] + (offset * 2) : (offset * 2);
643
644 end = index + 2;
645
646 for (i=index; i < end; i++)
647 {
648 WRITE8(ea, VREG_B(dest, i));
649 ea++;
650 }
651 break;
652 }
653 case 0x02: /* SLV */
654 {
655 // 31 25 20 15 10 6 0
656 // --------------------------------------------------
657 // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset |
658 // --------------------------------------------------
659 //
660 // Stores 4 bytes starting from vector byte index
661
662 ea = (base) ? rsp.r[base] + (offset * 4) : (offset * 4);
663
664 end = index + 4;
665
666 for (i=index; i < end; i++)
667 {
668 WRITE8(ea, VREG_B(dest, i));
669 ea++;
670 }
671 break;
672 }
673 case 0x03: /* SDV */
674 {
675 // 31 25 20 15 10 6 0
676 // --------------------------------------------------
677 // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset |
678 // --------------------------------------------------
679 //
680 // Stores 8 bytes starting from vector byte index
681
682 ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
683
684 end = index + 8;
685
686 for (i=index; i < end; i++)
687 {
688 WRITE8(ea, VREG_B(dest, i));
689 ea++;
690 }
691 break;
692 }
693 case 0x04: /* SQV */
694 {
695 // 31 25 20 15 10 6 0
696 // --------------------------------------------------
697 // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset |
698 // --------------------------------------------------
699 //
700 // Stores up to 16 bytes starting from vector byte index until 16-byte boundary
701
702 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
703
704 end = index + (16 - (ea & 0xf));
705 // if (end != 16)
706 // printf("SQV %d\n", end-index);
707 //assert(end == 16);
708
709 for (i=index; i < end; i++)
710 {
711 WRITE8(ea, VREG_B(dest, i & 0xf));
712 ea++;
713 }
714 break;
715 }
716 case 0x05: /* SRV */
717 {
718 // 31 25 20 15 10 6 0
719 // --------------------------------------------------
720 // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset |
721 // --------------------------------------------------
722 //
723 // Stores up to 16 bytes starting from right side until 16-byte boundary
724
725 int o;
726 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
727
728 end = index + (ea & 0xf);
729 o = (16 - (ea & 0xf)) & 0xf;
730 ea &= ~0xf;
731 // if (end != 16)
732 // printf("SRV %d\n", end-index);
733 //assert(end == 16);
734
735 for (i=index; i < end; i++)
736 {
737 WRITE8(ea, VREG_B(dest, ((i + o) & 0xf)));
738 ea++;
739 }
740 break;
741 }
742 case 0x06: /* SPV */
743 {
744 // 31 25 20 15 10 6 0
745 // --------------------------------------------------
746 // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset |
747 // --------------------------------------------------
748 //
749 // Stores upper 8 bits of each element
750
751 ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
752 end = index + 8;
753
754 for (i=index; i < end; i++)
755 {
756 if ((i & 0xf) < 8)
757 {
758 WRITE8(ea, VREG_B(dest, ((i & 0xf) << 1)));
759 }
760 else
761 {
762 WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7);
763 }
764 ea++;
765 }
766 break;
767 }
768 case 0x07: /* SUV */
769 {
770 // 31 25 20 15 10 6 0
771 // --------------------------------------------------
772 // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset |
773 // --------------------------------------------------
774 //
775 // Stores bits 14-7 of each element
776
777 ea = (base) ? rsp.r[base] + (offset * 8) : (offset * 8);
778 end = index + 8;
779
780 for (i=index; i < end; i++)
781 {
782 if ((i & 0xf) < 8)
783 {
784 WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7);
785 }
786 else
787 {
788 WRITE8(ea, VREG_B(dest, ((i & 0x7) << 1)));
789 }
790 ea++;
791 }
792 break;
793 }
794 case 0x08: /* SHV */
795 {
796 // 31 25 20 15 10 6 0
797 // --------------------------------------------------
798 // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset |
799 // --------------------------------------------------
800 //
801 // Stores bits 14-7 of each element, with 2-byte stride
802
803 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
804
805 for (i=0; i < 8; i++)
806 {
807 UINT8 d = ((VREG_B(dest, ((index + (i << 1) + 0) & 0xf))) << 1) |
808 ((VREG_B(dest, ((index + (i << 1) + 1) & 0xf))) >> 7);
809
810 WRITE8(ea, d);
811 ea += 2;
812 }
813 break;
814 }
815 case 0x09: /* SFV */
816 {
817 // 31 25 20 15 10 6 0
818 // --------------------------------------------------
819 // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset |
820 // --------------------------------------------------
821 //
822 // Stores bits 14-7 of upper or lower quad, with 4-byte stride
823
824 // FIXME: only works for index 0 and index 8
825
826 if (index & 0x7)
827 log(M64MSG_WARNING, "SFV: index = %d at %08X\n", index, rsp.ppc);
828
829 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
830
831 eaoffset = ea & 0xf;
832 ea &= ~0xf;
833
834 end = (index >> 1) + 4;
835
836 for (i=index >> 1; i < end; i++)
837 {
838 WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7);
839 eaoffset += 4;
840 }
841 break;
842 }
843 case 0x0a: /* SWV */
844 {
845 // 31 25 20 15 10 6 0
846 // --------------------------------------------------
847 // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset |
848 // --------------------------------------------------
849 //
850 // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0
851 // after byte index 15
852
853 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
854
855 eaoffset = ea & 0xf;
856 ea &= ~0xf;
857
858 end = index + 16;
859
860 for (i=index; i < end; i++)
861 {
862 WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf));
863 eaoffset++;
864 }
865 break;
866 }
867 case 0x0b: /* STV */
868 {
869 // 31 25 20 15 10 6 0
870 // --------------------------------------------------
871 // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset |
872 // --------------------------------------------------
873 //
874 // Stores one element from maximum of 8 vectors, while incrementing element index
875
876 int element, eaoffset;
877 int vs = dest;
878 int ve = dest + 8;
879 if (ve > 32)
880 ve = 32;
881
882 element = 8 - (index >> 1);
883 //if (index & 0x1) fatalerror("RSP: STV: index = %d at %08X\n", index, rsp.ppc);
884
885 ea = (base) ? rsp.r[base] + (offset * 16) : (offset * 16);
886
887 //if (ea & 0x1) fatalerror("RSP: STV: ea = %08X at %08X\n", ea, rsp.ppc);
888
889 eaoffset = (ea & 0xf) + (element * 2);
890 ea &= ~0xf;
891
892 for (i=vs; i < ve; i++)
893 {
894 WRITE16(ea + (eaoffset & 0xf), VREG_S(i, element & 0x7));
895 eaoffset += 2;
896 element++;
897 }
898 break;
899 }
900
901 default:
902 {
903 unimplemented_opcode(op);
904 break;
905 }
906 }
907}
908
909#define U16MIN 0x0000
910#define U16MAX 0xffff
911
912#define S16MIN 0x8000
913#define S16MAX 0x7fff
914
915INLINE UINT16 SATURATE_ACCUM_U(int accum)
916{
917 if ((INT16)ACCUM_H(accum) < 0)
918 {
919 if ((UINT16)(ACCUM_H(accum)) != 0xffff)
920 {
921 return U16MIN;
922 }
923 else
924 {
925 if ((INT16)ACCUM_M(accum) >= 0)
926 {
927 return U16MIN;
928 }
929 else
930 {
931 return ACCUM_L(accum);
932 }
933 }
934 }
935 else
936 {
937 if ((UINT16)(ACCUM_H(accum)) != 0)
938 {
939 return U16MAX;
940 }
941 else
942 {
943 if ((INT16)ACCUM_M(accum) < 0)
944 {
945 return U16MAX;
946 }
947 else
948 {
949 return ACCUM_L(accum);
950 }
951 }
952 }
953
954 return 0;
955}
956
957INLINE UINT16 SATURATE_ACCUM_S(int accum)
958{
959 if ((INT16)ACCUM_H(accum) < 0)
960 {
961 if ((UINT16)(ACCUM_H(accum)) != 0xffff)
962 return S16MIN;
963 else
964 {
965 if ((INT16)ACCUM_M(accum) >= 0)
966 return S16MIN;
967 else
968 return ACCUM_M(accum);
969 }
970 }
971 else
972 {
973 if ((UINT16)(ACCUM_H(accum)) != 0)
974 return S16MAX;
975 else
976 {
977 if ((INT16)ACCUM_M(accum) < 0)
978 return S16MAX;
979 else
980 return ACCUM_M(accum);
981 }
982 }
983
984 return 0;
985}
986
987#define WRITEBACK_RESULT() \
988 do { \
989 VREG_S(VDREG, 0) = vres[0]; \
990 VREG_S(VDREG, 1) = vres[1]; \
991 VREG_S(VDREG, 2) = vres[2]; \
992 VREG_S(VDREG, 3) = vres[3]; \
993 VREG_S(VDREG, 4) = vres[4]; \
994 VREG_S(VDREG, 5) = vres[5]; \
995 VREG_S(VDREG, 6) = vres[6]; \
996 VREG_S(VDREG, 7) = vres[7]; \
997 } while(0)
998
999
1000void handle_vector_ops(UINT32 op)
1001{
1002 int i;
1003 INT16 vres[8];
1004
1005 // Opcode legend:
1006 // E = VS2 element type
1007 // S = VS1, Source vector 1
1008 // T = VS2, Source vector 2
1009 // D = Destination vector
1010
1011 switch (op & 0x3f)
1012 {
1013 case 0x00: /* VMULF */
1014 {
1015 // 31 25 24 20 15 10 5 0
1016 // ------------------------------------------------------
1017 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 |
1018 // ------------------------------------------------------
1019 //
1020 // Multiplies signed integer by signed integer * 2
1021
1022 for (i=0; i < 8; i++)
1023 {
1024 int del = VEC_EL_1(EL, i);
1025 int sel = VEC_EL_2(EL, del);
1026 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1027 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1028 if (s1 == -32768 && s2 == -32768)
1029 {
1030 // overflow
1031 ACCUM_H(del) = 0;
1032 ACCUM_M(del) = -32768;
1033 ACCUM_L(del) = -32768;
1034 vres[del] = 0x7fff;
1035 }
1036 else
1037 {
1038 INT64 r = s1 * s2 * 2;
1039 r += 0x8000; // rounding ?
1040 ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit
1041 ACCUM_M(del) = (INT16)(r >> 16);
1042 ACCUM_L(del) = (UINT16)(r);
1043 vres[del] = ACCUM_M(del);
1044 }
1045 }
1046 WRITEBACK_RESULT();
1047
1048 break;
1049 }
1050
1051 case 0x01: /* VMULU */
1052 {
1053 // 31 25 24 20 15 10 5 0
1054 // ------------------------------------------------------
1055 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 |
1056 // ------------------------------------------------------
1057 //
1058
1059 for (i=0; i < 8; i++)
1060 {
1061 int del = VEC_EL_1(EL, i);
1062 int sel = VEC_EL_2(EL, del);
1063 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1064 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1065 INT64 r = s1 * s2 * 2;
1066 r += 0x8000; // rounding ?
1067
1068 ACCUM_H(del) = (UINT16)(r >> 32);
1069 ACCUM_M(del) = (UINT16)(r >> 16);
1070 ACCUM_L(del) = (UINT16)(r);
1071
1072 if (r < 0)
1073 {
1074 vres[del] = 0;
1075 }
1076 else if (((INT16)(ACCUM_H(del)) ^ (INT16)(ACCUM_M(del))) < 0)
1077 {
1078 vres[del] = -1;
1079 }
1080 else
1081 {
1082 vres[del] = ACCUM_M(del);
1083 }
1084 }
1085 WRITEBACK_RESULT();
1086 break;
1087 }
1088
1089 case 0x04: /* VMUDL */
1090 {
1091 // 31 25 24 20 15 10 5 0
1092 // ------------------------------------------------------
1093 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000100 |
1094 // ------------------------------------------------------
1095 //
1096 // Multiplies unsigned fraction by unsigned fraction
1097 // Stores the higher 16 bits of the 32-bit result to accumulator
1098 // The low slice of accumulator is stored into destination element
1099
1100 for (i=0; i < 8; i++)
1101 {
1102 int del = VEC_EL_1(EL, i);
1103 int sel = VEC_EL_2(EL, del);
1104 UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del);
1105 UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel);
1106 UINT32 r = s1 * s2;
1107
1108 ACCUM_H(del) = 0;
1109 ACCUM_M(del) = 0;
1110 ACCUM_L(del) = (UINT16)(r >> 16);
1111
1112 vres[del] = ACCUM_L(del);
1113 }
1114 WRITEBACK_RESULT();
1115 break;
1116 }
1117
1118 case 0x05: /* VMUDM */
1119 {
1120 // 31 25 24 20 15 10 5 0
1121 // ------------------------------------------------------
1122 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 |
1123 // ------------------------------------------------------
1124 //
1125 // Multiplies signed integer by unsigned fraction
1126 // The result is stored into accumulator
1127 // The middle slice of accumulator is stored into destination element
1128
1129 for (i=0; i < 8; i++)
1130 {
1131 int del = VEC_EL_1(EL, i);
1132 int sel = VEC_EL_2(EL, del);
1133 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1134 INT32 s2 = (UINT16)VREG_S(VS2REG, sel); // not sign-extended
1135 INT32 r = s1 * s2;
1136
1137 ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit
1138 ACCUM_M(del) = (INT16)(r >> 16);
1139 ACCUM_L(del) = (UINT16)(r);
1140
1141 vres[del] = ACCUM_M(del);
1142 }
1143 WRITEBACK_RESULT();
1144 break;
1145
1146 }
1147
1148 case 0x06: /* VMUDN */
1149 {
1150 // 31 25 24 20 15 10 5 0
1151 // ------------------------------------------------------
1152 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 |
1153 // ------------------------------------------------------
1154 //
1155 // Multiplies unsigned fraction by signed integer
1156 // The result is stored into accumulator
1157 // The low slice of accumulator is stored into destination element
1158
1159 for (i=0; i < 8; i++)
1160 {
1161 int del = VEC_EL_1(EL, i);
1162 int sel = VEC_EL_2(EL, del);
1163 INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended
1164 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1165 INT32 r = s1 * s2;
1166
1167 ACCUM_H(del) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit
1168 ACCUM_M(del) = (INT16)(r >> 16);
1169 ACCUM_L(del) = (UINT16)(r);
1170
1171 vres[del] = ACCUM_L(del);
1172 }
1173 WRITEBACK_RESULT();
1174 break;
1175 }
1176
1177 case 0x07: /* VMUDH */
1178 {
1179 // 31 25 24 20 15 10 5 0
1180 // ------------------------------------------------------
1181 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 |
1182 // ------------------------------------------------------
1183 //
1184 // Multiplies signed integer by signed integer
1185 // The result is stored into highest 32 bits of accumulator, the low slice is zero
1186 // The highest 32 bits of accumulator is saturated into destination element
1187
1188 for (i=0; i < 8; i++)
1189 {
1190 int del = VEC_EL_1(EL, i);
1191 int sel = VEC_EL_2(EL, del);
1192 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1193 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1194 INT32 r = s1 * s2;
1195
1196 ACCUM_H(del) = (INT16)(r >> 16);
1197 ACCUM_M(del) = (UINT16)(r);
1198 ACCUM_L(del) = 0;
1199
1200 if (r < -32768) r = -32768;
1201 if (r > 32767) r = 32767;
1202 vres[del] = (INT16)(r);
1203 }
1204 WRITEBACK_RESULT();
1205 break;
1206 }
1207
1208 case 0x08: /* VMACF */
1209 {
1210 // 31 25 24 20 15 10 5 0
1211 // ------------------------------------------------------
1212 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 |
1213 // ------------------------------------------------------
1214 //
1215 // Multiplies signed integer by signed integer * 2
1216 // The result is added to accumulator
1217
1218 for (i=0; i < 8; i++)
1219 {
1220 UINT16 res;
1221 int del = VEC_EL_1(EL, i);
1222 int sel = VEC_EL_2(EL, del);
1223 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1224 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1225 INT32 r = s1 * s2;
1226
1227 ACCUM(del) += (INT64)(r) << 17;
1228 res = SATURATE_ACCUM_S(del);
1229
1230 vres[del] = res;
1231 }
1232 WRITEBACK_RESULT();
1233 break;
1234 }
1235
1236 case 0x09: /* VMACU */
1237 {
1238 // 31 25 24 20 15 10 5 0
1239 // ------------------------------------------------------
1240 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 |
1241 // ------------------------------------------------------
1242 //
1243
1244 for (i=0; i < 8; i++)
1245 {
1246 UINT16 res;
1247 int del = VEC_EL_1(EL, i);
1248 int sel = VEC_EL_2(EL, del);
1249 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1250 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1251 INT32 r1 = s1 * s2;
1252 UINT32 r2 = (UINT16)ACCUM_L(del) + ((UINT16)(r1) * 2);
1253 UINT32 r3 = (UINT16)ACCUM_M(del) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16);
1254
1255 ACCUM_L(del) = (UINT16)(r2);
1256 ACCUM_M(del) = (UINT16)(r3);
1257 ACCUM_H(del) += (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31);
1258
1259 //res = SATURATE_ACCUM(del, 1, 0x0000, 0xffff);
1260 if ((INT16)ACCUM_H(del) < 0)
1261 {
1262 res = 0;
1263 }
1264 else
1265 {
1266 if (ACCUM_H(del) != 0)
1267 {
1268 res = 0xffff;
1269 }
1270 else
1271 {
1272 if ((INT16)ACCUM_M(del) < 0)
1273 {
1274 res = 0xffff;
1275 }
1276 else
1277 {
1278 res = ACCUM_M(del);
1279 }
1280 }
1281 }
1282
1283 vres[del] = res;
1284 }
1285 WRITEBACK_RESULT();
1286 break;
1287 }
1288
1289 case 0x0c: /* VMADL */
1290 {
1291 // 31 25 24 20 15 10 5 0
1292 // ------------------------------------------------------
1293 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 |
1294 // ------------------------------------------------------
1295 //
1296 // Multiplies unsigned fraction by unsigned fraction
1297 // Adds the higher 16 bits of the 32-bit result to accumulator
1298 // The low slice of accumulator is stored into destination element
1299
1300 for (i=0; i < 8; i++)
1301 {
1302 UINT16 res;
1303 int del = VEC_EL_1(EL, i);
1304 int sel = VEC_EL_2(EL, del);
1305 UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del);
1306 UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel);
1307 UINT32 r1 = s1 * s2;
1308 UINT32 r2 = (UINT16)ACCUM_L(del) + (r1 >> 16);
1309 UINT32 r3 = (UINT16)ACCUM_M(del) + (r2 >> 16);
1310
1311 ACCUM_L(del) = (UINT16)(r2);
1312 ACCUM_M(del) = (UINT16)(r3);
1313 ACCUM_H(del) += (INT16)(r3 >> 16);
1314
1315 res = SATURATE_ACCUM_U(del);
1316
1317 vres[del] = res;
1318 }
1319 WRITEBACK_RESULT();
1320 break;
1321 }
1322
1323 case 0x0d: /* VMADM */
1324 {
1325 // 31 25 24 20 15 10 5 0
1326 // ------------------------------------------------------
1327 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 |
1328 // ------------------------------------------------------
1329 //
1330 // Multiplies signed integer by unsigned fraction
1331 // The result is added into accumulator
1332 // The middle slice of accumulator is stored into destination element
1333
1334 for (i=0; i < 8; i++)
1335 {
1336 UINT16 res;
1337 int del = VEC_EL_1(EL, i);
1338 int sel = VEC_EL_2(EL, del);
1339 UINT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1340 UINT32 s2 = (UINT16)VREG_S(VS2REG, sel); // not sign-extended
1341 UINT32 r1 = s1 * s2;
1342 UINT32 r2 = (UINT16)ACCUM_L(del) + (UINT16)(r1);
1343 UINT32 r3 = (UINT16)ACCUM_M(del) + (r1 >> 16) + (r2 >> 16);
1344
1345 ACCUM_L(del) = (UINT16)(r2);
1346 ACCUM_M(del) = (UINT16)(r3);
1347 ACCUM_H(del) += (UINT16)(r3 >> 16);
1348 if ((INT32)(r1) < 0)
1349 ACCUM_H(del) -= 1;
1350
1351 res = SATURATE_ACCUM_S(del);
1352
1353 vres[del] = res;
1354 }
1355 WRITEBACK_RESULT();
1356 break;
1357 }
1358
1359 case 0x0e: /* VMADN */
1360 {
1361 // 31 25 24 20 15 10 5 0
1362 // ------------------------------------------------------
1363 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001110 |
1364 // ------------------------------------------------------
1365 //
1366 // Multiplies unsigned fraction by signed integer
1367 // The result is added into accumulator
1368 // The low slice of accumulator is stored into destination element
1369
1370#if 1
1371 for (i=0; i < 8; i++)
1372 {
1373 int del = VEC_EL_1(EL, i);
1374 int sel = VEC_EL_2(EL, del);
1375 INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended
1376 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1377 ACCUM(del) += (INT64)(s1*s2)<<16;
1378 }
1379
1380 for (i=0; i < 8; i++)
1381 {
1382 UINT16 res;
1383 res = SATURATE_ACCUM_U(i);
1384 //res = ACCUM_L(i);
1385
1386 VREG_S(VDREG, i) = res;
1387 }
1388#else
1389 for (i=0; i < 8; i++)
1390 {
1391 UINT16 res;
1392 int del = VEC_EL_1(EL, i);
1393 int sel = VEC_EL_2(EL, del);
1394 INT32 s1 = (UINT16)VREG_S(VS1REG, del); // not sign-extended
1395 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1396 UINT32 r1 = s1 * s2;
1397 UINT32 r2 = (UINT16)ACCUM_L(del) + (UINT16)(r1);
1398 UINT32 r3 = (UINT16)ACCUM_M(del) + (r1 >> 16) + (r2 >> 16);
1399
1400 ACCUM_L(del) = (UINT16)(r2);
1401 ACCUM_M(del) = (UINT16)(r3);
1402 ACCUM_H(del) += (UINT16)(r3 >> 16);
1403 if ((INT32)(r1) < 0)
1404 ACCUM_H(del) -= 1;
1405
1406 res = SATURATE_ACCUM_U(del);
1407
1408 vres[del] = res;
1409 }
1410 WRITEBACK_RESULT();
1411#endif
1412 break;
1413 }
1414
1415 case 0x0f: /* VMADH */
1416 {
1417 // 31 25 24 20 15 10 5 0
1418 // ------------------------------------------------------
1419 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 |
1420 // ------------------------------------------------------
1421 //
1422 // Multiplies signed integer by signed integer
1423 // The result is added into highest 32 bits of accumulator, the low slice is zero
1424 // The highest 32 bits of accumulator is saturated into destination element
1425
1426#if 1
1427 for (i=0; i < 8; i++)
1428 {
1429 int del = VEC_EL_1(EL, i);
1430 int sel = VEC_EL_2(EL, del);
1431 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1432 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1433
1434 rsp.accum[del].l[1] += s1*s2;
1435
1436 }
1437 for (i=0; i < 8; i++)
1438 {
1439 UINT16 res;
1440 res = SATURATE_ACCUM_S(i);
1441 //res = ACCUM_M(i);
1442
1443 VREG_S(VDREG, i) = res;
1444 }
1445#else
1446 for (i=0; i < 8; i++)
1447 {
1448 UINT16 res;
1449 int del = VEC_EL_1(EL, i);
1450 int sel = VEC_EL_2(EL, del);
1451 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1452 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1453 INT64 r = s1 * s2;
1454
1455 ACCUM(del) += (INT64)(r) << 32;
1456
1457 res = SATURATE_ACCUM_S(del);
1458
1459 vres[del] = res;
1460 }
1461 WRITEBACK_RESULT();
1462#endif
1463 break;
1464 }
1465
1466 case 0x10: /* VADD */
1467 {
1468 // 31 25 24 20 15 10 5 0
1469 // ------------------------------------------------------
1470 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 |
1471 // ------------------------------------------------------
1472 //
1473 // Adds two vector registers and carry flag, the result is saturated to 32767
1474
1475 // TODO: check VS2REG == VDREG
1476
1477 for (i=0; i < 8; i++)
1478 {
1479 int del = VEC_EL_1(EL, i);
1480 int sel = VEC_EL_2(EL, del);
1481 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1482 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1483 INT32 r = s1 + s2 + CARRY_FLAG(del);
1484
1485 ACCUM_L(del) = (INT16)(r);
1486
1487 if (r > 32767) r = 32767;
1488 if (r < -32768) r = -32768;
1489 vres[del] = (INT16)(r);
1490 }
1491 CLEAR_ZERO_FLAGS();
1492 CLEAR_CARRY_FLAGS();
1493 WRITEBACK_RESULT();
1494 break;
1495 }
1496
1497 case 0x11: /* VSUB */
1498 {
1499 // 31 25 24 20 15 10 5 0
1500 // ------------------------------------------------------
1501 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 |
1502 // ------------------------------------------------------
1503 //
1504 // Subtracts two vector registers and carry flag, the result is saturated to -32768
1505
1506 // TODO: check VS2REG == VDREG
1507
1508 for (i=0; i < 8; i++)
1509 {
1510 int del = VEC_EL_1(EL, i);
1511 int sel = VEC_EL_2(EL, del);
1512 INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, del);
1513 INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, sel);
1514 INT32 r = s1 - s2 - CARRY_FLAG(del);
1515
1516 ACCUM_L(del) = (INT16)(r);
1517
1518 if (r > 32767) r = 32767;
1519 if (r < -32768) r = -32768;
1520
1521 vres[del] = (INT16)(r);
1522 }
1523 CLEAR_ZERO_FLAGS();
1524 CLEAR_CARRY_FLAGS();
1525 WRITEBACK_RESULT();
1526 break;
1527 }
1528
1529 case 0x13: /* VABS */
1530 {
1531 // 31 25 24 20 15 10 5 0
1532 // ------------------------------------------------------
1533 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 |
1534 // ------------------------------------------------------
1535 //
1536 // Changes the sign of source register 2 if source register 1 is negative and stores
1537 // the result to destination register
1538
1539 for (i=0; i < 8; i++)
1540 {
1541 int del = VEC_EL_1(EL, i);
1542 int sel = VEC_EL_2(EL, del);
1543 INT16 s1 = (INT16)VREG_S(VS1REG, del);
1544 INT16 s2 = (INT16)VREG_S(VS2REG, sel);
1545
1546 if (s1 < 0)
1547 {
1548 if (s2 == -32768)
1549 {
1550 vres[del] = 32767;
1551 }
1552 else
1553 {
1554 vres[del] = -s2;
1555 }
1556 }
1557 else if (s1 > 0)
1558 {
1559 vres[del] = s2;
1560 }
1561 else
1562 {
1563 vres[del] = 0;
1564 }
1565
1566 ACCUM_L(del) = vres[del];
1567 }
1568 WRITEBACK_RESULT();
1569 break;
1570 }
1571
1572 case 0x14: /* VADDC */
1573 {
1574 // 31 25 24 20 15 10 5 0
1575 // ------------------------------------------------------
1576 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 |
1577 // ------------------------------------------------------
1578 //
1579 // Adds two vector registers, the carry out is stored into carry register
1580
1581 // TODO: check VS2REG = VDREG
1582
1583 CLEAR_ZERO_FLAGS();
1584 CLEAR_CARRY_FLAGS();
1585
1586 for (i=0; i < 8; i++)
1587 {
1588 int del = VEC_EL_1(EL, i);
1589 int sel = VEC_EL_2(EL, del);
1590 INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del);
1591 INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel);
1592 INT32 r = s1 + s2;
1593
1594 vres[del] = (INT16)(r);
1595 ACCUM_L(del) = (INT16)(r);
1596
1597 if (r & 0xffff0000)
1598 {
1599 SET_CARRY_FLAG(del);
1600 }
1601 }
1602 WRITEBACK_RESULT();
1603 break;
1604 }
1605
1606 case 0x15: /* VSUBC */
1607 {
1608 // 31 25 24 20 15 10 5 0
1609 // ------------------------------------------------------
1610 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 |
1611 // ------------------------------------------------------
1612 //
1613 // Subtracts two vector registers, the carry out is stored into carry register
1614
1615 // TODO: check VS2REG = VDREG
1616
1617 CLEAR_ZERO_FLAGS();
1618 CLEAR_CARRY_FLAGS();
1619
1620 for (i=0; i < 8; i++)
1621 {
1622 int del = VEC_EL_1(EL, i);
1623 int sel = VEC_EL_2(EL, del);
1624 INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, del);
1625 INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel);
1626 INT32 r = s1 - s2;
1627
1628 vres[del] = (INT16)(r);
1629 ACCUM_L(del) = (UINT16)(r);
1630
1631 if ((UINT16)(r) != 0)
1632 {
1633 SET_ZERO_FLAG(del);
1634 }
1635 if (r & 0xffff0000)
1636 {
1637 SET_CARRY_FLAG(del);
1638 }
1639 }
1640 WRITEBACK_RESULT();
1641 break;
1642 }
1643
1644 case 0x1d: /* VSAW */
1645 {
1646 // 31 25 24 20 15 10 5 0
1647 // ------------------------------------------------------
1648 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 |
1649 // ------------------------------------------------------
1650 //
1651 // Stores high, middle or low slice of accumulator to destination vector
1652
1653 switch (EL)
1654 {
1655 case 0x08: // VSAWH
1656 {
1657 for (i=0; i < 8; i++)
1658 {
1659 VREG_S(VDREG, i) = ACCUM_H(i);
1660 }
1661 break;
1662 }
1663 case 0x09: // VSAWM
1664 {
1665 for (i=0; i < 8; i++)
1666 {
1667 VREG_S(VDREG, i) = ACCUM_M(i);
1668 }
1669 break;
1670 }
1671 case 0x0a: // VSAWL
1672 {
1673 for (i=0; i < 8; i++)
1674 {
1675 VREG_S(VDREG, i) = ACCUM_L(i);
1676 }
1677 break;
1678 }
1679 default: log(M64MSG_ERROR, "RSP: VSAW: el = %d\n", EL);
1680 }
1681 break;
1682 }
1683
1684 case 0x20: /* VLT */
1685 {
1686 // 31 25 24 20 15 10 5 0
1687 // ------------------------------------------------------
1688 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 |
1689 // ------------------------------------------------------
1690 //
1691 // Sets compare flags if elements in VS1 are less than VS2
1692 // Moves the element in VS2 to destination vector
1693
1694 rsp.flag[1] = 0;
1695
1696 for (i=0; i < 8; i++)
1697 {
1698 int del = VEC_EL_1(EL, i);
1699 int sel = VEC_EL_2(EL, del);
1700
1701 if (VREG_S(VS1REG, del) < VREG_S(VS2REG, sel))
1702 {
1703 vres[del] = VREG_S(VS1REG, del);
1704 SET_COMPARE_FLAG(del);
1705 }
1706 else if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel))
1707 {
1708 vres[del] = VREG_S(VS1REG, del);
1709 if (ZERO_FLAG(del) != 0 && CARRY_FLAG(del) != 0)
1710 {
1711 SET_COMPARE_FLAG(del);
1712 }
1713 }
1714 else
1715 {
1716 vres[del] = VREG_S(VS2REG, sel);
1717 }
1718
1719 ACCUM_L(del) = vres[del];
1720 }
1721
1722 CLEAR_ZERO_FLAGS();
1723 CLEAR_CARRY_FLAGS();
1724 WRITEBACK_RESULT();
1725 break;
1726 }
1727
1728 case 0x21: /* VEQ */
1729 {
1730 // 31 25 24 20 15 10 5 0
1731 // ------------------------------------------------------
1732 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 |
1733 // ------------------------------------------------------
1734 //
1735 // Sets compare flags if elements in VS1 are equal with VS2
1736 // Moves the element in VS2 to destination vector
1737
1738 rsp.flag[1] = 0;
1739
1740 for (i=0; i < 8; i++)
1741 {
1742 int del = VEC_EL_1(EL, i);
1743 int sel = VEC_EL_2(EL, del);
1744
1745 vres[del] = VREG_S(VS2REG, sel);
1746 ACCUM_L(del) = vres[del];
1747
1748 if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel))
1749 {
1750 if (ZERO_FLAG(del) == 0)
1751 {
1752 SET_COMPARE_FLAG(del);
1753 }
1754 }
1755 }
1756
1757 CLEAR_ZERO_FLAGS();
1758 CLEAR_CARRY_FLAGS();
1759 WRITEBACK_RESULT();
1760 break;
1761 }
1762
1763 case 0x22: /* VNE */
1764 {
1765 // 31 25 24 20 15 10 5 0
1766 // ------------------------------------------------------
1767 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 |
1768 // ------------------------------------------------------
1769 //
1770 // Sets compare flags if elements in VS1 are not equal with VS2
1771 // Moves the element in VS2 to destination vector
1772
1773 rsp.flag[1] = 0;
1774
1775 for (i=0; i < 8; i++)
1776 {
1777 int del = VEC_EL_1(EL, i);
1778 int sel = VEC_EL_2(EL, del);
1779
1780 vres[del] = VREG_S(VS1REG, del);
1781 ACCUM_L(del) = vres[del];
1782
1783 if (VREG_S(VS1REG, del) != VREG_S(VS2REG, sel))
1784 {
1785 SET_COMPARE_FLAG(del);
1786 }
1787 else
1788 {
1789 if (ZERO_FLAG(del) != 0)
1790 {
1791 SET_COMPARE_FLAG(del);
1792 }
1793 }
1794 }
1795
1796 CLEAR_ZERO_FLAGS();
1797 CLEAR_CARRY_FLAGS();
1798 WRITEBACK_RESULT();
1799 break;
1800 }
1801
1802 case 0x23: /* VGE */
1803 {
1804 // 31 25 24 20 15 10 5 0
1805 // ------------------------------------------------------
1806 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 |
1807 // ------------------------------------------------------
1808 //
1809 // Sets compare flags if elements in VS1 are greater or equal with VS2
1810 // Moves the element in VS2 to destination vector
1811
1812 rsp.flag[1] = 0;
1813
1814 for (i=0; i < 8; i++)
1815 {
1816 int del = VEC_EL_1(EL, i);
1817 int sel = VEC_EL_2(EL, del);
1818
1819 if (VREG_S(VS1REG, del) == VREG_S(VS2REG, sel))
1820 {
1821 if (ZERO_FLAG(del) == 0 || CARRY_FLAG(del) == 0)
1822 {
1823 SET_COMPARE_FLAG(del);
1824 }
1825 }
1826 else if (VREG_S(VS1REG, del) > VREG_S(VS2REG, sel))
1827 {
1828 SET_COMPARE_FLAG(del);
1829 }
1830
1831 if (COMPARE_FLAG(del) != 0)
1832 {
1833 vres[del] = VREG_S(VS1REG, del);
1834 }
1835 else
1836 {
1837 vres[del] = VREG_S(VS2REG, sel);
1838 }
1839
1840 ACCUM_L(del) = vres[del];
1841 }
1842
1843 CLEAR_ZERO_FLAGS();
1844 CLEAR_CARRY_FLAGS();
1845 WRITEBACK_RESULT();
1846 break;
1847 }
1848
1849 case 0x24: /* VCL */
1850 {
1851 // 31 25 24 20 15 10 5 0
1852 // ------------------------------------------------------
1853 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 |
1854 // ------------------------------------------------------
1855 //
1856 // Vector clip low
1857
1858 for (i=0; i < 8; i++)
1859 {
1860 int del = VEC_EL_1(EL, i);
1861 int sel = VEC_EL_2(EL, del);
1862 INT16 s1 = VREG_S(VS1REG, del);
1863 INT16 s2 = VREG_S(VS2REG, sel);
1864
1865 if (CARRY_FLAG(del) != 0)
1866 {
1867 if (ZERO_FLAG(del) != 0)
1868 {
1869 if (COMPARE_FLAG(del) != 0)
1870 {
1871 ACCUM_L(del) = -(UINT16)s2;
1872 }
1873 else
1874 {
1875 ACCUM_L(del) = s1;
1876 }
1877 }
1878 else
1879 {
1880 if (rsp.flag[2] & (1 << (del)))
1881 {
1882 if (((UINT32)(INT16)(s1) + (UINT32)(INT16)(s2)) > 0x10000)
1883 {
1884 ACCUM_L(del) = s1;
1885 CLEAR_COMPARE_FLAG(del);
1886 }
1887 else
1888 {
1889 ACCUM_L(del) = -((UINT16)s2);
1890 SET_COMPARE_FLAG(del);
1891 }
1892 }
1893 else
1894 {
1895 if (((UINT32)(INT16)(s1) + (UINT32)(INT16)(s2)) != 0)
1896 {
1897 ACCUM_L(del) = s1;
1898 CLEAR_COMPARE_FLAG(del);
1899 }
1900 else
1901 {
1902 ACCUM_L(del) = -((UINT16)s2);
1903 SET_COMPARE_FLAG(del);
1904 }
1905 }
1906 }
1907 }
1908 else
1909 {
1910 if (ZERO_FLAG(del) != 0)
1911 {
1912 if (rsp.flag[1] & (1 << (8+del)))
1913 {
1914 ACCUM_L(del) = s2;
1915 }
1916 else
1917 {
1918 ACCUM_L(del) = s1;
1919 }
1920 }
1921 else
1922 {
1923 if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0)
1924 {
1925 ACCUM_L(del) = s2;
1926 rsp.flag[1] |= (1 << (8+del));
1927 }
1928 else
1929 {
1930 ACCUM_L(del) = s1;
1931 rsp.flag[1] &= ~(1 << (8+del));
1932 }
1933 }
1934 }
1935
1936 vres[del] = ACCUM_L(del);
1937 }
1938 CLEAR_ZERO_FLAGS();
1939 CLEAR_CARRY_FLAGS();
1940 rsp.flag[2] = 0;
1941 WRITEBACK_RESULT();
1942 break;
1943 }
1944
1945 case 0x25: /* VCH */
1946 {
1947 // 31 25 24 20 15 10 5 0
1948 // ------------------------------------------------------
1949 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 |
1950 // ------------------------------------------------------
1951 //
1952 // Vector clip high
1953
1954 CLEAR_ZERO_FLAGS();
1955 CLEAR_CARRY_FLAGS();
1956 rsp.flag[1] = 0;
1957 rsp.flag[2] = 0;
1958
1959 for (i=0; i < 8; i++)
1960 {
1961 int del = VEC_EL_1(EL, i);
1962 int sel = VEC_EL_2(EL, del);
1963 INT16 s1 = VREG_S(VS1REG, del);
1964 INT16 s2 = VREG_S(VS2REG, sel);
1965
1966 if ((s1 ^ s2) < 0)
1967 {
1968 SET_CARRY_FLAG(del);
1969 if (s2 < 0)
1970 {
1971 rsp.flag[1] |= (1 << (8+del));
1972 }
1973
1974 if (s1 + s2 <= 0)
1975 {
1976 if (s1 + s2 == -1)
1977 {
1978 rsp.flag[2] |= (1 << (del));
1979 }
1980 SET_COMPARE_FLAG(del);
1981 vres[del] = -((UINT16)s2);
1982 }
1983 else
1984 {
1985 vres[del] = s1;
1986 }
1987
1988 if (s1 + s2 != 0)
1989 {
1990 if (s1 != ~s2)
1991 {
1992 SET_ZERO_FLAG(del);
1993 }
1994 }
1995 }
1996 else
1997 {
1998 if (s2 < 0)
1999 {
2000 SET_COMPARE_FLAG(del);
2001 }
2002 if (s1 - s2 >= 0)
2003 {
2004 rsp.flag[1] |= (1 << (8+del));
2005 vres[del] = s2;
2006 }
2007 else
2008 {
2009 vres[del] = s1;
2010 }
2011
2012 if ((s1 - s2) != 0)
2013 {
2014 if (s1 != ~s2)
2015 {
2016 SET_ZERO_FLAG(del);
2017 }
2018 }
2019 }
2020
2021 ACCUM_L(del) = vres[del];
2022 }
2023 WRITEBACK_RESULT();
2024 break;
2025 }
2026
2027 case 0x26: /* VCR */
2028 {
2029 // 31 25 24 20 15 10 5 0
2030 // ------------------------------------------------------
2031 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 |
2032 // ------------------------------------------------------
2033 //
2034 // Vector clip reverse
2035
2036 rsp.flag[0] = 0;
2037 rsp.flag[1] = 0;
2038 rsp.flag[2] = 0;
2039
2040 for (i=0; i < 8; i++)
2041 {
2042 int del = VEC_EL_1(EL, i);
2043 int sel = VEC_EL_2(EL, del);
2044 INT16 s1 = VREG_S(VS1REG, del);
2045 INT16 s2 = VREG_S(VS2REG, sel);
2046
2047 if ((INT16)(s1 ^ s2) < 0)
2048 {
2049 if (s2 < 0)
2050 {
2051 rsp.flag[1] |= (1 << (8+del));
2052 }
2053 if ((s1 + s2) <= 0)
2054 {
2055 ACCUM_L(del) = ~((UINT16)s2);
2056 SET_COMPARE_FLAG(del);
2057 }
2058 else
2059 {
2060 ACCUM_L(del) = s1;
2061 }
2062 }
2063 else
2064 {
2065 if (s2 < 0)
2066 {
2067 SET_COMPARE_FLAG(del);
2068 }
2069 if ((s1 - s2) >= 0)
2070 {
2071 ACCUM_L(del) = s2;
2072 rsp.flag[1] |= (1 << (8+del));
2073 }
2074 else
2075 {
2076 ACCUM_L(del) = s1;
2077 }
2078 }
2079
2080 vres[del] = ACCUM_L(del);
2081 }
2082 WRITEBACK_RESULT();
2083 break;
2084 }
2085
2086 case 0x27: /* VMRG */
2087 {
2088 // 31 25 24 20 15 10 5 0
2089 // ------------------------------------------------------
2090 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 |
2091 // ------------------------------------------------------
2092 //
2093 // Merges two vectors according to compare flags
2094
2095 for (i=0; i < 8; i++)
2096 {
2097 int del = VEC_EL_1(EL, i);
2098 int sel = VEC_EL_2(EL, del);
2099 if (COMPARE_FLAG(del) != 0)
2100 {
2101 vres[del] = VREG_S(VS1REG, del);
2102 }
2103 else
2104 {
2105 vres[del] = VREG_S(VS2REG, VEC_EL_2(EL, sel));
2106 }
2107
2108 ACCUM_L(del) = vres[del];
2109 }
2110 WRITEBACK_RESULT();
2111 break;
2112 }
2113 case 0x28: /* VAND */
2114 {
2115 // 31 25 24 20 15 10 5 0
2116 // ------------------------------------------------------
2117 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 |
2118 // ------------------------------------------------------
2119 //
2120 // Bitwise AND of two vector registers
2121
2122 for (i=0; i < 8; i++)
2123 {
2124 int del = VEC_EL_1(EL, i);
2125 int sel = VEC_EL_2(EL, del);
2126 vres[del] = VREG_S(VS1REG, del) & VREG_S(VS2REG, sel);
2127 ACCUM_L(del) = vres[del];
2128 }
2129 WRITEBACK_RESULT();
2130 break;
2131 }
2132 case 0x29: /* VNAND */
2133 {
2134 // 31 25 24 20 15 10 5 0
2135 // ------------------------------------------------------
2136 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 |
2137 // ------------------------------------------------------
2138 //
2139 // Bitwise NOT AND of two vector registers
2140
2141 for (i=0; i < 8; i++)
2142 {
2143 int del = VEC_EL_1(EL, i);
2144 int sel = VEC_EL_2(EL, del);
2145 vres[del] = ~((VREG_S(VS1REG, del) & VREG_S(VS2REG, sel)));
2146 ACCUM_L(del) = vres[del];
2147 }
2148 WRITEBACK_RESULT();
2149 break;
2150 }
2151 case 0x2a: /* VOR */
2152 {
2153 // 31 25 24 20 15 10 5 0
2154 // ------------------------------------------------------
2155 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 |
2156 // ------------------------------------------------------
2157 //
2158 // Bitwise OR of two vector registers
2159
2160 for (i=0; i < 8; i++)
2161 {
2162 int del = VEC_EL_1(EL, i);
2163 int sel = VEC_EL_2(EL, del);
2164 vres[del] = VREG_S(VS1REG, del) | VREG_S(VS2REG, sel);
2165 ACCUM_L(del) = vres[del];
2166 }
2167 WRITEBACK_RESULT();
2168 break;
2169 }
2170 case 0x2b: /* VNOR */
2171 {
2172 // 31 25 24 20 15 10 5 0
2173 // ------------------------------------------------------
2174 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 |
2175 // ------------------------------------------------------
2176 //
2177 // Bitwise NOT OR of two vector registers
2178
2179 for (i=0; i < 8; i++)
2180 {
2181 int del = VEC_EL_1(EL, i);
2182 int sel = VEC_EL_2(EL, del);
2183 vres[del] = ~((VREG_S(VS1REG, del) | VREG_S(VS2REG, sel)));
2184 ACCUM_L(del) = vres[del];
2185 }
2186 WRITEBACK_RESULT();
2187 break;
2188 }
2189 case 0x2c: /* VXOR */
2190 {
2191 // 31 25 24 20 15 10 5 0
2192 // ------------------------------------------------------
2193 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 |
2194 // ------------------------------------------------------
2195 //
2196 // Bitwise XOR of two vector registers
2197
2198 for (i=0; i < 8; i++)
2199 {
2200 int del = VEC_EL_1(EL, i);
2201 int sel = VEC_EL_2(EL, del);
2202 vres[del] = VREG_S(VS1REG, del) ^ VREG_S(VS2REG, sel);
2203 ACCUM_L(del) = vres[del];
2204 }
2205 WRITEBACK_RESULT();
2206 break;
2207 }
2208 case 0x2d: /* VNXOR */
2209 {
2210 // 31 25 24 20 15 10 5 0
2211 // ------------------------------------------------------
2212 // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 |
2213 // ------------------------------------------------------
2214 //
2215 // Bitwise NOT XOR of two vector registers
2216
2217 for (i=0; i < 8; i++)
2218 {
2219 int del = VEC_EL_1(EL, i);
2220 int sel = VEC_EL_2(EL, del);
2221 vres[del] = ~((VREG_S(VS1REG, del) ^ VREG_S(VS2REG, sel)));
2222 ACCUM_L(del) = vres[del];
2223 }
2224 WRITEBACK_RESULT();
2225 break;
2226 }
2227
2228 case 0x30: /* VRCP */
2229 {
2230 // 31 25 24 20 15 10 5 0
2231 // ------------------------------------------------------
2232 // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 |
2233 // ------------------------------------------------------
2234 //
2235 // Calculates reciprocal
2236 int del = (VS1REG & 7);
2237 int sel = EL&7; //VEC_EL_2(EL, del);
2238 INT32 rec;
2239
2240 rec = (INT16)(VREG_S(VS2REG, sel));
2241
2242 if (rec == 0)
2243 {
2244 // divide by zero -> overflow
2245 rec = 0x7fffffff;
2246 }
2247 else
2248 {
2249 int negative = 0;
2250 if (rec < 0)
2251 {
2252 rec = ~rec+1;
2253 negative = 1;
2254 }
2255 for (i = 15; i > 0; i--)
2256 {
2257 if (rec & (1 << i))
2258 {
2259 rec &= ((0xffc0) >> (15 - i));
2260 i = 0;
2261 }
2262 }
2263 rec = (INT32)(0x7fffffff / (double)rec);
2264 for (i = 31; i > 0; i--)
2265 {
2266 if (rec & (1 << i))
2267 {
2268 rec &= ((0xffff8000) >> (31 - i));
2269 i = 0;
2270 }
2271 }
2272 if (negative)
2273 {
2274 rec = ~rec;
2275 }
2276 }
2277
2278 for (i=0; i < 8; i++)
2279 {
2280 int element = VEC_EL_2(EL, i);
2281 ACCUM_L(i) = VREG_S(VS2REG, element);
2282 }
2283
2284 rsp.reciprocal_res = rec;
2285
2286 VREG_S(VDREG, del) = (UINT16)(rsp.reciprocal_res); // store low part
2287 break;
2288 }
2289
2290 case 0x31: /* VRCPL */
2291 {
2292 // 31 25 24 20 15 10 5 0
2293 // ------------------------------------------------------
2294 // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 |
2295 // ------------------------------------------------------
2296 //
2297 // Calculates reciprocal low part
2298
2299 int del = (VS1REG & 7);
2300 int sel = VEC_EL_2(EL, del);
2301 INT32 rec;
2302
2303 rec = ((UINT16)(VREG_S(VS2REG, sel)) | ((UINT32)(rsp.reciprocal_high) << 16));
2304
2305 if (rec == 0)
2306 {
2307 // divide by zero -> overflow
2308 rec = 0x7fffffff;
2309 }
2310 else
2311 {
2312 int negative = 0;
2313 if (rec < 0)
2314 {
2315 if (((UINT32)(rec & 0xffff0000) == 0xffff0000) && ((INT16)(rec & 0xffff) < 0))
2316 {
2317 rec = ~rec+1;
2318 }
2319 else
2320 {
2321 rec = ~rec;
2322 }
2323 negative = 1;
2324 }
2325 for (i = 31; i > 0; i--)
2326 {
2327 if (rec & (1 << i))
2328 {
2329 rec &= ((0xffc00000) >> (31 - i));
2330 i = 0;
2331 }
2332 }
2333 rec = (0x7fffffff / rec);
2334 for (i = 31; i > 0; i--)
2335 {
2336 if (rec & (1 << i))
2337 {
2338 rec &= ((0xffff8000) >> (31 - i));
2339 i = 0;
2340 }
2341 }
2342 if (negative)
2343 {
2344 rec = ~rec;
2345 }
2346 }
2347
2348 for (i=0; i < 8; i++)
2349 {
2350 int element = VEC_EL_2(EL, i);
2351 ACCUM_L(i) = VREG_S(VS2REG, element);
2352 }
2353
2354 rsp.reciprocal_res = rec;
2355
2356 VREG_S(VDREG, del) = (UINT16)(rsp.reciprocal_res); // store low part
2357 break;
2358 }
2359
2360 case 0x32: /* VRCPH */
2361 {
2362 // 31 25 24 20 15 10 5 0
2363 // ------------------------------------------------------
2364 // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 |
2365 // ------------------------------------------------------
2366 //
2367 // Calculates reciprocal high part
2368
2369 int del = (VS1REG & 7);
2370 int sel = VEC_EL_2(EL, del);
2371
2372 rsp.reciprocal_high = VREG_S(VS2REG, sel);
2373
2374 for (i=0; i < 8; i++)
2375 {
2376 int element = VEC_EL_2(EL, i);
2377 ACCUM_L(i) = VREG_S(VS2REG, element); // perhaps accumulator is used to store the intermediate values ?
2378 }
2379
2380 VREG_S(VDREG, del) = (INT16)(rsp.reciprocal_res >> 16); // store high part
2381 break;
2382 }
2383
2384 case 0x33: /* VMOV */
2385 {
2386 // 31 25 24 20 15 10 5 0
2387 // ------------------------------------------------------
2388 // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 |
2389 // ------------------------------------------------------
2390 //
2391 // Moves element from vector to destination vector
2392
2393 int element = VS1REG & 7;
2394 VREG_S(VDREG, element) = VREG_S(VS2REG, VEC_EL_2(EL, 7-element));
2395 break;
2396 }
2397
2398 case 0x35: /* VRSQL */
2399 {
2400 // 31 25 24 20 15 10 5 0
2401 // ------------------------------------------------------
2402 // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 |
2403 // ------------------------------------------------------
2404 //
2405 // Calculates reciprocal square-root low part
2406
2407 int del = (VS1REG & 7);
2408 int sel = VEC_EL_2(EL, del);
2409 UINT32 sqr;
2410
2411 sqr = (UINT16)(VREG_S(VS2REG, sel)) | ((UINT32)(rsp.square_root_high) << 16);
2412
2413 if (sqr == 0)
2414 {
2415 // square root on 0 -> overflow
2416 sqr = 0x7fffffff;
2417 }
2418 else if (sqr == 0xffff8000)
2419 {
2420 // overflow ?
2421 sqr = 0xffff8000;
2422 }
2423 else
2424 {
2425 int negative = 0;
2426 if (sqr > 0x7fffffff)
2427 {
2428 if (((UINT32)(sqr & 0xffff0000) == 0xffff0000) && ((INT16)(sqr & 0xffff) < 0))
2429 {
2430 sqr = ~sqr+1;
2431 }
2432 else
2433 {
2434 sqr = ~sqr;
2435 }
2436 negative = 1;
2437 }
2438 for (i = 31; i > 0; i--)
2439 {
2440 if (sqr & (1 << i))
2441 {
2442 sqr &= (0xff800000 >> (31 - i));
2443 i = 0;
2444 }
2445 }
2446 sqr = (INT32)(0x7fffffff / sqrt(sqr));
2447 for (i = 31; i > 0; i--)
2448 {
2449 if (sqr & (1 << i))
2450 {
2451 sqr &= (0xffff8000 >> (31 - i));
2452 i = 0;
2453 }
2454 }
2455 if (negative)
2456 {
2457 sqr = ~sqr;
2458 }
2459 }
2460
2461 for (i=0; i < 8; i++)
2462 {
2463 int element = VEC_EL_2(EL, i);
2464 ACCUM_L(i) = VREG_S(VS2REG, element);
2465 }
2466
2467 rsp.square_root_res = sqr;
2468
2469 VREG_S(VDREG, del) = (UINT16)(rsp.square_root_res); // store low part
2470 break;
2471 }
2472
2473 case 0x36: /* VRSQH */
2474 {
2475 // 31 25 24 20 15 10 5 0
2476 // ------------------------------------------------------
2477 // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 |
2478 // ------------------------------------------------------
2479 //
2480 // Calculates reciprocal square-root high part
2481
2482 int del = (VS1REG & 7);
2483 int sel = VEC_EL_2(EL, del);
2484
2485 rsp.square_root_high = VREG_S(VS2REG, sel);
2486
2487 for (i=0; i < 8; i++)
2488 {
2489 int element = VEC_EL_2(EL, i);
2490 ACCUM_L(i) = VREG_S(VS2REG, element); // perhaps accumulator is used to store the intermediate values ?
2491 }
2492
2493 VREG_S(VDREG, del) = (INT16)(rsp.square_root_res >> 16); // store high part
2494 break;
2495 }
2496
2497 default: unimplemented_opcode(op); break;
2498 }
2499}
2500
2501int rsp_execute(int cycles)
2502{
2503 UINT32 op;
2504
2505 rsp_icount=1; //cycles;
2506
2507 UINT32 ExecutedCycles=0;
2508 UINT32 BreakMarker=0;
2509 UINT32 WDCHackFlag1=0;
2510 UINT32 WDCHackFlag2=0;
2511
2512 sp_pc = /*0x4001000 | */(sp_pc & 0xfff);
2513 if( rsp_sp_status & (SP_STATUS_HALT|SP_STATUS_BROKE))
2514 {
2515 log(M64MSG_WARNING, "Quit due to SP halt/broke on start");
2516 rsp_icount = 0;
2517 }
2518
2519
2520 while (rsp_icount > 0)
2521 {
2522#ifdef RSPTIMING
2523 uint64_t lasttime;
2524 lasttime = RDTSC();
2525#endif
2526 rsp.ppc = sp_pc;
2527
2528
2529 op = ROPCODE(sp_pc);
2530#ifdef GENTRACE
2531 char s[128];
2532 rsp_dasm_one(s, sp_pc, op);
2533 GENTRACE("%2x %3x\t%s\n", ((UINT8*)rsp_dmem)[0x1934], sp_pc, s);
2534#endif
2535
2536 if (rsp.nextpc != ~0U)///DELAY SLOT USAGE
2537 {
2538 sp_pc = /*0x4001000 | */(rsp.nextpc & 0xfff); //rsp.nextpc;
2539 rsp.nextpc = ~0U;
2540 }
2541 else
2542 {
2543 sp_pc = /*0x4001000 | */((sp_pc+4)&0xfff);
2544 }
2545
2546 switch (op >> 26)
2547 {
2548 case 0x00: /* SPECIAL */
2549 {
2550 switch (op & 0x3f)
2551 {
2552 case 0x00: /* SLL */ if (RDREG) RDVAL = (UINT32)RTVAL << SHIFT; break;
2553 case 0x02: /* SRL */ if (RDREG) RDVAL = (UINT32)RTVAL >> SHIFT; break;
2554 case 0x03: /* SRA */ if (RDREG) RDVAL = (INT32)RTVAL >> SHIFT; break;
2555 case 0x04: /* SLLV */ if (RDREG) RDVAL = (UINT32)RTVAL << (RSVAL & 0x1f); break;
2556 case 0x06: /* SRLV */ if (RDREG) RDVAL = (UINT32)RTVAL >> (RSVAL & 0x1f); break;
2557 case 0x07: /* SRAV */ if (RDREG) RDVAL = (INT32)RTVAL >> (RSVAL & 0x1f); break;
2558 case 0x08: /* JR */ JUMP_PC(RSVAL); break;
2559 case 0x09: /* JALR */ JUMP_PC_L(RSVAL, RDREG); break;
2560 case 0x0d: /* BREAK */
2561 {
2562 *z64_rspinfo.SP_STATUS_REG |= (SP_STATUS_HALT | SP_STATUS_BROKE );
2563 if ((*z64_rspinfo.SP_STATUS_REG & SP_STATUS_INTR_BREAK) != 0 ) {
2564 *z64_rspinfo.MI_INTR_REG |= 1;
2565 z64_rspinfo.CheckInterrupts();
2566 }
2567 //sp_set_status(0x3);
2568 rsp_icount = 0;
2569
2570 BreakMarker=1;
2571
2572#if LOG_INSTRUCTION_EXECUTION
2573 fprintf(exec_output, "\n---------- break ----------\n\n");
2574#endif
2575 break;
2576 }
2577 case 0x20: /* ADD */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break;
2578 case 0x21: /* ADDU */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break;
2579 case 0x22: /* SUB */ if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break;
2580 case 0x23: /* SUBU */ if (RDREG) RDVAL = (INT32)(RSVAL - RTVAL); break;
2581 case 0x24: /* AND */ if (RDREG) RDVAL = RSVAL & RTVAL; break;
2582 case 0x25: /* OR */ if (RDREG) RDVAL = RSVAL | RTVAL; break;
2583 case 0x26: /* XOR */ if (RDREG) RDVAL = RSVAL ^ RTVAL; break;
2584 case 0x27: /* NOR */ if (RDREG) RDVAL = ~(RSVAL | RTVAL); break;
2585 case 0x2a: /* SLT */ if (RDREG) RDVAL = (INT32)RSVAL < (INT32)RTVAL; break;
2586 case 0x2b: /* SLTU */ if (RDREG) RDVAL = (UINT32)RSVAL < (UINT32)RTVAL; break;
2587 default: unimplemented_opcode(op); break;
2588 }
2589 break;
2590 }
2591
2592 case 0x01: /* REGIMM */
2593 {
2594 switch (RTREG)
2595 {
2596 case 0x00: /* BLTZ */ if ((INT32)(RSVAL) < 0) JUMP_REL(SIMM16); break;
2597 case 0x01: /* BGEZ */ if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); break;
2598 // VP according to the doc, link is performed even when condition fails,
2599 // this sound pretty stupid but let's try it that way
2600 case 0x11: /* BGEZAL */ LINK(31); if ((INT32)(RSVAL) >= 0) JUMP_REL(SIMM16); break;
2601 //case 0x11: /* BGEZAL */ if ((INT32)(RSVAL) >= 0) JUMP_REL_L(SIMM16, 31); break;
2602 default: unimplemented_opcode(op); break;
2603 }
2604 break;
2605 }
2606
2607 case 0x02: /* J */ JUMP_ABS(UIMM26); break;
2608 case 0x03: /* JAL */ JUMP_ABS_L(UIMM26, 31); break;
2609 case 0x04: /* BEQ */ if (RSVAL == RTVAL) JUMP_REL(SIMM16); break;
2610 case 0x05: /* BNE */ if (RSVAL != RTVAL) JUMP_REL(SIMM16); break;
2611 case 0x06: /* BLEZ */ if ((INT32)RSVAL <= 0) JUMP_REL(SIMM16); break;
2612 case 0x07: /* BGTZ */ if ((INT32)RSVAL > 0) JUMP_REL(SIMM16); break;
2613 case 0x08: /* ADDI */ if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break;
2614 case 0x09: /* ADDIU */ if (RTREG) RTVAL = (INT32)(RSVAL + SIMM16); break;
2615 case 0x0a: /* SLTI */ if (RTREG) RTVAL = (INT32)(RSVAL) < ((INT32)SIMM16); break;
2616 case 0x0b: /* SLTIU */ if (RTREG) RTVAL = (UINT32)(RSVAL) < (UINT32)((INT32)SIMM16); break;
2617 case 0x0c: /* ANDI */ if (RTREG) RTVAL = RSVAL & UIMM16; break;
2618 case 0x0d: /* ORI */ if (RTREG) RTVAL = RSVAL | UIMM16; break;
2619 case 0x0e: /* XORI */ if (RTREG) RTVAL = RSVAL ^ UIMM16; break;
2620 case 0x0f: /* LUI */ if (RTREG) RTVAL = UIMM16 << 16; break;
2621
2622 case 0x10: /* COP0 */
2623 {
2624 switch ((op >> 21) & 0x1f)
2625 {
2626 case 0x00: /* MFC0 */ if (RTREG) RTVAL = get_cop0_reg(RDREG); break;
2627 case 0x04: /* MTC0 */ set_cop0_reg(RDREG, RTVAL); break;
2628 default:
2629 log(M64MSG_WARNING, "unimplemented cop0 %x (%x)\n", (op >> 21) & 0x1f, op);
2630 break;
2631 }
2632 break;
2633 }
2634
2635 case 0x12: /* COP2 */
2636 {
2637 switch ((op >> 21) & 0x1f)
2638 {
2639 case 0x00: /* MFC2 */
2640 {
2641 // 31 25 20 15 10 6 0
2642 // ---------------------------------------------------
2643 // | 010010 | 00000 | TTTTT | DDDDD | IIII | 0000000 |
2644 // ---------------------------------------------------
2645 //
2646
2647 int el = (op >> 7) & 0xf;
2648 UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf);
2649 UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf);
2650 if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2));
2651 break;
2652 }
2653 case 0x02: /* CFC2 */
2654 {
2655 // 31 25 20 15 10 0
2656 // ------------------------------------------------
2657 // | 010010 | 00010 | TTTTT | DDDDD | 00000000000 |
2658 // ------------------------------------------------
2659 //
2660
2661 if (RTREG)
2662 {
2663 if (RDREG == 2)
2664 {
2665 // Anciliary clipping flags
2666 RTVAL = rsp.flag[RDREG] & 0x00ff;
2667 }
2668 else
2669 {
2670 // All other flags are 16 bits but sign-extended at retrieval
2671 RTVAL = (UINT32)rsp.flag[RDREG] | ( ( rsp.flag[RDREG] & 0x8000 ) ? 0xffff0000 : 0 );
2672 }
2673 }
2674 break;
2675
2676 }
2677 case 0x04: /* MTC2 */
2678 {
2679 // 31 25 20 15 10 6 0
2680 // ---------------------------------------------------
2681 // | 010010 | 00100 | TTTTT | DDDDD | IIII | 0000000 |
2682 // ---------------------------------------------------
2683 //
2684
2685 int el = (op >> 7) & 0xf;
2686 VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff;
2687 VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff;
2688 break;
2689 }
2690 case 0x06: /* CTC2 */
2691 {
2692 // 31 25 20 15 10 0
2693 // ------------------------------------------------
2694 // | 010010 | 00110 | TTTTT | DDDDD | 00000000000 |
2695 // ------------------------------------------------
2696 //
2697
2698 rsp.flag[RDREG] = RTVAL & 0xffff;
2699 break;
2700 }
2701
2702 case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
2703 case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f:
2704 {
2705 handle_vector_ops(op);
2706 break;
2707 }
2708
2709 default: unimplemented_opcode(op); break;
2710 }
2711 break;
2712 }
2713
2714 case 0x20: /* LB */ if (RTREG) RTVAL = (INT32)(INT8)READ8(RSVAL + SIMM16); break;
2715 case 0x21: /* LH */ if (RTREG) RTVAL = (INT32)(INT16)READ16(RSVAL + SIMM16); break;
2716 case 0x23: /* LW */ if (RTREG) RTVAL = READ32(RSVAL + SIMM16); break;
2717 case 0x24: /* LBU */ if (RTREG) RTVAL = (UINT8)READ8(RSVAL + SIMM16); break;
2718 case 0x25: /* LHU */ if (RTREG) RTVAL = (UINT16)READ16(RSVAL + SIMM16); break;
2719 case 0x28: /* SB */ WRITE8(RSVAL + SIMM16, RTVAL); break;
2720 case 0x29: /* SH */ WRITE16(RSVAL + SIMM16, RTVAL); break;
2721 case 0x2b: /* SW */ WRITE32(RSVAL + SIMM16, RTVAL); break;
2722 case 0x32: /* LWC2 */ handle_lwc2(op); break;
2723 case 0x3a: /* SWC2 */ handle_swc2(op); break;
2724
2725 default:
2726 {
2727 unimplemented_opcode(op);
2728 break;
2729 }
2730 }
2731
2732#ifdef RSPTIMING
2733 uint64_t time = lasttime;
2734 lasttime = RDTSC();
2735 rsp_opinfo_t info;
2736 rsp_get_opinfo(op, &info);
2737 rsptimings[info.op2] += lasttime - time;
2738 rspcounts[info.op2]++;
2739#endif
2740
2741#if LOG_INSTRUCTION_EXECUTION
2742 {
2743 int i, l;
2744 static UINT32 prev_regs[32];
2745 static VECTOR_REG prev_vecs[32];
2746 char string[200];
2747 rsp_dasm_one(string, rsp.ppc, op);
2748
2749 fprintf(exec_output, "%08X: %s", rsp.ppc, string);
2750
2751 l = strlen(string);
2752 if (l < 36)
2753 {
2754 for (i=l; i < 36; i++)
2755 {
2756 fprintf(exec_output, " ");
2757 }
2758 }
2759
2760 fprintf(exec_output, "| ");
2761
2762 for (i=0; i < 32; i++)
2763 {
2764 if (rsp.r[i] != prev_regs[i])
2765 {
2766 fprintf(exec_output, "R%d: %08X ", i, rsp.r[i]);
2767 }
2768 prev_regs[i] = rsp.r[i];
2769 }
2770
2771 for (i=0; i < 32; i++)
2772 {
2773 if (rsp.v[i].d[0] != prev_vecs[i].d[0] || rsp.v[i].d[1] != prev_vecs[i].d[1])
2774 {
2775 fprintf(exec_output, "V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X ", i,
2776 (UINT16)VREG_S(i,0), (UINT16)VREG_S(i,1), (UINT16)VREG_S(i,2), (UINT16)VREG_S(i,3), (UINT16)VREG_S(i,4), (UINT16)VREG_S(i,5), (UINT16)VREG_S(i,6), (UINT16)VREG_S(i,7));
2777 }
2778 prev_vecs[i].d[0] = rsp.v[i].d[0];
2779 prev_vecs[i].d[1] = rsp.v[i].d[1];
2780 }
2781
2782 fprintf(exec_output, "\n");
2783
2784 }
2785#endif
2786 // --rsp_icount;
2787
2788 ExecutedCycles++;
2789 if( rsp_sp_status & SP_STATUS_SSTEP )
2790 {
2791 if( rsp.step_count )
2792 {
2793 rsp.step_count--;
2794 }
2795 else
2796 {
2797 rsp_sp_status |= SP_STATUS_BROKE;
2798 }
2799 }
2800
2801 if( rsp_sp_status & (SP_STATUS_HALT|SP_STATUS_BROKE))
2802 {
2803 rsp_icount = 0;
2804
2805 if(BreakMarker==0)
2806 log(M64MSG_WARNING, "Quit due to SP halt/broke set by MTC0\n");
2807 }
2808
2809 ///WDC&SR64 hack:VERSION3:1.8x -2x FASTER & safer
2810 if((WDCHackFlag1==0)&&(rsp.ppc>0x137)&&(rsp.ppc<0x14D))
2811 WDCHackFlag1=ExecutedCycles;
2812 if ((WDCHackFlag1!=0)&&((rsp.ppc<=0x137)||(rsp.ppc>=0x14D)))
2813 WDCHackFlag1=0;
2814 if ((WDCHackFlag1!=0)&&((ExecutedCycles-WDCHackFlag1)>=0x20)&&(rsp.ppc>0x137)&&(rsp.ppc<0x14D))
2815 {
2816 // printf("WDC hack quit 1\n");
2817 rsp_icount=0;//32 cycles should be enough
2818 }
2819 if((WDCHackFlag2==0)&&(rsp.ppc>0xFCB)&&(rsp.ppc<0xFD5))
2820 WDCHackFlag2=ExecutedCycles;
2821 if ((WDCHackFlag2!=0)&&((rsp.ppc<=0xFCB)||(rsp.ppc>=0xFD5)))
2822 WDCHackFlag2=0;
2823 if ((WDCHackFlag2!=0)&&((ExecutedCycles-WDCHackFlag2)>=0x20)&&(rsp.ppc>0xFCB)&&(rsp.ppc<0xFD5))
2824 {
2825 // printf("WDC hack quit 2\n");
2826 rsp_icount=0;//32 cycles should be enough
2827 }
2828
2829
2830 }
2831 //sp_pc -= 4;
2832
2833 return ExecutedCycles;
2834}
2835
2836/*****************************************************************************/
2837
2838
2839enum sp_dma_direction
2840{
2841 SP_DMA_RDRAM_TO_IDMEM,
2842 SP_DMA_IDMEM_TO_RDRAM
2843};
2844
2845static void sp_dma(enum sp_dma_direction direction)
2846{
2847 UINT8 *src, *dst;
2848 int i, j;
2849 int length;
2850 int count;
2851 int skip;
2852
2853
2854 UINT32 l = sp_dma_length;
2855 length = ((l & 0xfff) | 7) + 1;
2856 skip = (l >> 20) + length;
2857 count = ((l >> 12) & 0xff) + 1;
2858
2859 if (direction == SP_DMA_RDRAM_TO_IDMEM) // RDRAM -> I/DMEM
2860 {
2861 //UINT32 src_address = sp_dram_addr & ~7;
2862 //UINT32 dst_address = (sp_mem_addr & 0x1000) ? 0x4001000 : 0x4000000;
2863 src = (UINT8*)&rdram[(sp_dram_addr&~7) / 4];
2864 dst = (sp_mem_addr & 0x1000) ? (UINT8*)&rsp_imem[(sp_mem_addr & ~7 & 0xfff) / 4] : (UINT8*)&rsp_dmem[(sp_mem_addr & ~7 &0xfff) / 4];
2865 ///cpuintrf_push_context(0);
2866#define BYTE8_XOR_BE(a) ((a)^7)// JFG, Ocarina of Time
2867
2868 for (j=0; j < count; j++)
2869 {
2870 for (i=0; i < length; i++)
2871 {
2872 ///UINT8 b = program_read_byte_64be(src_address + i + (j*skip));
2873 ///program_write_byte_64be(dst_address + (((sp_mem_addr & ~7) + i + (j*length)) & 0xfff), b);
2874 dst[BYTE8_XOR_BE((i + j*length)&0xfff)] = src[BYTE8_XOR_BE(i + j*skip)];
2875 }
2876 }
2877
2878 ///cpuintrf_pop_context();
2879 *z64_rspinfo.SP_DMA_BUSY_REG = 0;
2880 *z64_rspinfo.SP_STATUS_REG &= ~SP_STATUS_DMABUSY;
2881 }
2882 else if (direction == SP_DMA_IDMEM_TO_RDRAM) // I/DMEM -> RDRAM
2883 {
2884 //UINT32 dst_address = sp_dram_addr & ~7;
2885 //UINT32 src_address = (sp_mem_addr & 0x1000) ? 0x4001000 : 0x4000000;
2886
2887 dst = (UINT8*)&rdram[(sp_dram_addr&~7) / 4];
2888 src = (sp_mem_addr & 0x1000) ? (UINT8*)&rsp_imem[(sp_mem_addr & ~7 & 0xfff) / 4] : (UINT8*)&rsp_dmem[(sp_mem_addr & ~7 &0xfff) / 4];
2889 ///cpuintrf_push_context(0);
2890
2891 for (j=0; j < count; j++)
2892 {
2893 for (i=0; i < length; i++)
2894 {
2895 ///UINT8 b = program_read_byte_64be(src_address + (((sp_mem_addr & ~7) + i + (j*length)) & 0xfff));
2896 ///program_write_byte_64be(dst_address + i + (j*skip), b);
2897 dst[BYTE8_XOR_BE(i + j*skip)] = src[BYTE8_XOR_BE((+i + j*length)&0xfff)];
2898 }
2899 }
2900
2901 ///cpuintrf_pop_context();
2902 *z64_rspinfo.SP_DMA_BUSY_REG = 0;
2903 *z64_rspinfo.SP_STATUS_REG &= ~SP_STATUS_DMABUSY;
2904 }
2905
2906
2907}
2908
2909
2910
2911
2912
2913UINT32 n64_sp_reg_r(UINT32 offset, UINT32 dummy)
2914{
2915 switch (offset)
2916 {
2917 case 0x00/4: // SP_MEM_ADDR_REG
2918 return sp_mem_addr;
2919
2920 case 0x04/4: // SP_DRAM_ADDR_REG
2921 return sp_dram_addr;
2922
2923 case 0x08/4: // SP_RD_LEN_REG
2924 return sp_dma_rlength;
2925
2926 case 0x10/4: // SP_STATUS_REG
2927 return rsp_sp_status;
2928
2929 case 0x14/4: // SP_DMA_FULL_REG
2930 return 0;
2931
2932 case 0x18/4: // SP_DMA_BUSY_REG
2933 return 0;
2934
2935 case 0x1c/4: // SP_SEMAPHORE_REG
2936 return sp_semaphore;
2937
2938 default:
2939 log(M64MSG_WARNING, "sp_reg_r: %08X\n", offset);
2940 break;
2941 }
2942
2943 return 0;
2944}
2945
2946//UINT32 n64_sp_reg_w(RSP_REGS & rsp, UINT32 offset, UINT32 data, UINT32 dummy)
2947void n64_sp_reg_w(UINT32 offset, UINT32 data, UINT32 dummy)
2948{
2949 UINT32 InterruptPending=0;
2950 if ((offset & 0x10000) == 0)
2951 {
2952 switch (offset & 0xffff)
2953 {
2954 case 0x00/4: // SP_MEM_ADDR_REG
2955 sp_mem_addr = data;
2956 break;
2957
2958 case 0x04/4: // SP_DRAM_ADDR_REG
2959 sp_dram_addr = data & 0xffffff;
2960 break;
2961
2962 case 0x08/4: // SP_RD_LEN_REG
2963 // sp_dma_length = data & 0xfff;
2964 // sp_dma_count = (data >> 12) & 0xff;
2965 // sp_dma_skip = (data >> 20) & 0xfff;
2966 sp_dma_length=data;
2967 sp_dma(SP_DMA_RDRAM_TO_IDMEM);
2968 break;
2969
2970 case 0x0c/4: // SP_WR_LEN_REG
2971 // sp_dma_length = data & 0xfff;
2972 // sp_dma_count = (data >> 12) & 0xff;
2973 // sp_dma_skip = (data >> 20) & 0xfff;
2974 sp_dma_length=data;
2975 sp_dma(SP_DMA_IDMEM_TO_RDRAM);
2976 break;
2977
2978 case 0x10/4: // SP_STATUS_REG
2979 {
2980 if((data&0x1)&&(data&0x2))
2981 log(M64MSG_ERROR, "Clear halt and set halt simultaneously\n");
2982 if((data&0x8)&&(data&0x10))
2983 log(M64MSG_ERROR, "Clear int and set int simultaneously\n");
2984 if((data&0x20)&&(data&0x40))
2985 log(M64MSG_ERROR, "Clear sstep and set sstep simultaneously\n");
2986 if (data & 0x00000001) // clear halt
2987 {
2988 rsp_sp_status &= ~SP_STATUS_HALT;
2989
2990 // if (first_rsp)
2991 // {
2992 // cpu_spinuntil_trigger(6789);
2993
2994 // cpunum_set_input_line(1, INPUT_LINE_HALT, CLEAR_LINE);
2995 // rsp_sp_status &= ~SP_STATUS_HALT;
2996 // }
2997 // else
2998 // {
2999 // first_rsp = 1;
3000 // }
3001 }
3002 if (data & 0x00000002) // set halt
3003 {
3004 // cpunum_set_input_line(1, INPUT_LINE_HALT, ASSERT_LINE);
3005 rsp_sp_status |= SP_STATUS_HALT;
3006 }
3007 if (data & 0x00000004) rsp_sp_status &= ~SP_STATUS_BROKE; // clear broke
3008 if (data & 0x00000008) // clear interrupt
3009 {
3010 *z64_rspinfo.MI_INTR_REG &= ~R4300i_SP_Intr;
3011 ///TEMPORARY COMMENTED FOR SPEED
3012 /// printf("sp_reg_w clear interrupt");
3013 //clear_rcp_interrupt(SP_INTERRUPT);
3014 }
3015 if (data & 0x00000010) // set interrupt
3016 {
3017 //signal_rcp_interrupt(SP_INTERRUPT);
3018 }
3019 if (data & 0x00000020) rsp_sp_status &= ~SP_STATUS_SSTEP; // clear single step
3020 if (data & 0x00000040) {
3021 rsp_sp_status |= SP_STATUS_SSTEP; // set single step
3022 log(M64MSG_STATUS, "RSP STATUS REG: SSTEP set\n");
3023 }
3024 if (data & 0x00000080) rsp_sp_status &= ~SP_STATUS_INTR_BREAK; // clear interrupt on break
3025 if (data & 0x00000100) rsp_sp_status |= SP_STATUS_INTR_BREAK; // set interrupt on break
3026 if (data & 0x00000200) rsp_sp_status &= ~SP_STATUS_SIGNAL0; // clear signal 0
3027 if (data & 0x00000400) rsp_sp_status |= SP_STATUS_SIGNAL0; // set signal 0
3028 if (data & 0x00000800) rsp_sp_status &= ~SP_STATUS_SIGNAL1; // clear signal 1
3029 if (data & 0x00001000) rsp_sp_status |= SP_STATUS_SIGNAL1; // set signal 1
3030 if (data & 0x00002000) rsp_sp_status &= ~SP_STATUS_SIGNAL2; // clear signal 2
3031 if (data & 0x00004000) rsp_sp_status |= SP_STATUS_SIGNAL2; // set signal 2
3032 if (data & 0x00008000) rsp_sp_status &= ~SP_STATUS_SIGNAL3; // clear signal 3
3033 if (data & 0x00010000) rsp_sp_status |= SP_STATUS_SIGNAL3; // set signal 3
3034 if (data & 0x00020000) rsp_sp_status &= ~SP_STATUS_SIGNAL4; // clear signal 4
3035 if (data & 0x00040000) rsp_sp_status |= SP_STATUS_SIGNAL4; // set signal 4
3036 if (data & 0x00080000) rsp_sp_status &= ~SP_STATUS_SIGNAL5; // clear signal 5
3037 if (data & 0x00100000) rsp_sp_status |= SP_STATUS_SIGNAL5; // set signal 5
3038 if (data & 0x00200000) rsp_sp_status &= ~SP_STATUS_SIGNAL6; // clear signal 6
3039 if (data & 0x00400000) rsp_sp_status |= SP_STATUS_SIGNAL6; // set signal 6
3040 if (data & 0x00800000) rsp_sp_status &= ~SP_STATUS_SIGNAL7; // clear signal 7
3041 if (data & 0x01000000) rsp_sp_status |= SP_STATUS_SIGNAL7; // set signal 7
3042
3043 if(InterruptPending==1)
3044 {
3045 *z64_rspinfo.MI_INTR_REG |= 1;
3046 z64_rspinfo.CheckInterrupts();
3047 InterruptPending=0;
3048 }
3049 break;
3050 }
3051
3052 case 0x1c/4: // SP_SEMAPHORE_REG
3053 sp_semaphore = data;
3054 // mame_printf_debug("sp_semaphore = %08X\n", sp_semaphore);
3055 break;
3056
3057 default:
3058 log(M64MSG_WARNING, "sp_reg_w: %08X, %08X\n", data, offset);
3059 break;
3060 }
3061 }
3062 else
3063 {
3064 switch (offset & 0xffff)
3065 {
3066 case 0x00/4: // SP_PC_REG
3067 //cpunum_set_info_int(1, CPUINFO_INT_PC, 0x04001000 | (data & 0xfff));
3068 //break;
3069
3070 default:
3071 log(M64MSG_WARNING, "sp_reg_w: %08X, %08X\n", data, offset);
3072 break;
3073 }
3074 }
3075}
3076
3077UINT32 sp_read_reg(UINT32 reg)
3078{
3079 switch (reg)
3080 {
3081 //case 4: return rsp_sp_status;
3082 default: return n64_sp_reg_r(reg, 0x00000000);
3083 }
3084}
3085
3086
3087void sp_write_reg(UINT32 reg, UINT32 data)
3088{
3089 switch (reg)
3090 {
3091 default: n64_sp_reg_w(reg, data, 0x00000000); break;
3092 }
3093}