drc: never read to r0
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(2048)));
28
29static noinline void do_reset(void)
30{
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 memset(gpu.ex_regs, 0, sizeof(gpu.ex_regs));
33 gpu.status.reg = 0x14802000;
34 gpu.gp0 = 0;
35 gpu.regs[3] = 1;
36 gpu.screen.hres = gpu.screen.w = 256;
37 gpu.screen.vres = gpu.screen.h = 240;
38}
39
40static noinline void update_width(void)
41{
42 int sw = gpu.screen.x2 - gpu.screen.x1;
43 if (sw <= 0 || sw >= 2560)
44 // full width
45 gpu.screen.w = gpu.screen.hres;
46 else
47 gpu.screen.w = sw * gpu.screen.hres / 2560;
48}
49
50static noinline void update_height(void)
51{
52 int sh = gpu.screen.y2 - gpu.screen.y1;
53 if (gpu.status.dheight)
54 sh *= 2;
55 if (sh <= 0)
56 sh = gpu.screen.vres;
57
58 gpu.screen.h = sh;
59}
60
61static noinline void decide_frameskip(void)
62{
63 if (gpu.frameskip.active)
64 gpu.frameskip.cnt++;
65 else {
66 gpu.frameskip.cnt = 0;
67 gpu.frameskip.frame_ready = 1;
68 }
69
70 if (!gpu.frameskip.active && *gpu.frameskip.advice)
71 gpu.frameskip.active = 1;
72 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
73 gpu.frameskip.active = 1;
74 else
75 gpu.frameskip.active = 0;
76}
77
78static noinline void decide_frameskip_allow(uint32_t cmd_e3)
79{
80 // no frameskip if it decides to draw to display area,
81 // but not for interlace since it'll most likely always do that
82 uint32_t x = cmd_e3 & 0x3ff;
83 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
84 gpu.frameskip.allow = gpu.status.interlace ||
85 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
86 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
87}
88
89static noinline void get_gpu_info(uint32_t data)
90{
91 switch (data & 0x0f) {
92 case 0x02:
93 case 0x03:
94 case 0x04:
95 case 0x05:
96 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
97 break;
98 case 0x06:
99 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
100 break;
101 case 0x07:
102 gpu.gp0 = 2;
103 break;
104 default:
105 gpu.gp0 = 0;
106 break;
107 }
108}
109
110long GPUinit(void)
111{
112 int ret;
113 ret = vout_init();
114 ret |= renderer_init();
115
116 gpu.state.frame_count = &gpu.zero;
117 gpu.state.hcnt = &gpu.zero;
118 do_reset();
119 return ret;
120}
121
122long GPUshutdown(void)
123{
124 return vout_finish();
125}
126
127void GPUwriteStatus(uint32_t data)
128{
129 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
130 static const short vres[4] = { 240, 480, 256, 480 };
131 uint32_t cmd = data >> 24;
132
133 if (cmd < ARRAY_SIZE(gpu.regs)) {
134 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
135 return;
136 gpu.regs[cmd] = data;
137 }
138
139 gpu.state.fb_dirty = 1;
140
141 switch (cmd) {
142 case 0x00:
143 do_reset();
144 break;
145 case 0x03:
146 gpu.status.blanking = data & 1;
147 break;
148 case 0x04:
149 gpu.status.dma = data & 3;
150 break;
151 case 0x05:
152 gpu.screen.x = data & 0x3ff;
153 gpu.screen.y = (data >> 10) & 0x3ff;
154 if (gpu.frameskip.set) {
155 decide_frameskip_allow(gpu.ex_regs[3]);
156 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
157 decide_frameskip();
158 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
159 }
160 }
161 break;
162 case 0x06:
163 gpu.screen.x1 = data & 0xfff;
164 gpu.screen.x2 = (data >> 12) & 0xfff;
165 update_width();
166 break;
167 case 0x07:
168 gpu.screen.y1 = data & 0x3ff;
169 gpu.screen.y2 = (data >> 10) & 0x3ff;
170 update_height();
171 break;
172 case 0x08:
173 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
174 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
175 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
176 update_width();
177 update_height();
178 break;
179 default:
180 if ((cmd & 0xf0) == 0x10)
181 get_gpu_info(data);
182 break;
183 }
184}
185
186const unsigned char cmd_lengths[256] =
187{
188 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
191 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
192 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
193 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
194 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
195 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
196 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
197 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
198 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
199 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
200 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
203 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
204};
205
206#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
207
208static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
209{
210 uint16_t *vram = VRAM_MEM_XY(x, y);
211 if (is_read)
212 memcpy(mem, vram, l * 2);
213 else
214 memcpy(vram, mem, l * 2);
215}
216
217static int do_vram_io(uint32_t *data, int count, int is_read)
218{
219 int count_initial = count;
220 uint16_t *sdata = (uint16_t *)data;
221 int x = gpu.dma.x, y = gpu.dma.y;
222 int w = gpu.dma.w, h = gpu.dma.h;
223 int o = gpu.dma.offset;
224 int l;
225 count *= 2; // operate in 16bpp pixels
226
227 if (gpu.dma.offset) {
228 l = w - gpu.dma.offset;
229 if (count < l)
230 l = count;
231
232 do_vram_line(x + o, y, sdata, l, is_read);
233
234 if (o + l < w)
235 o += l;
236 else {
237 o = 0;
238 y++;
239 h--;
240 }
241 sdata += l;
242 count -= l;
243 }
244
245 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
246 y &= 511;
247 do_vram_line(x, y, sdata, w, is_read);
248 }
249
250 if (h > 0 && count > 0) {
251 y &= 511;
252 do_vram_line(x, y, sdata, count, is_read);
253 o = count;
254 count = 0;
255 }
256 gpu.dma.y = y;
257 gpu.dma.h = h;
258 gpu.dma.offset = o;
259
260 return count_initial - count / 2;
261}
262
263static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
264{
265 if (gpu.dma.h)
266 log_anomaly("start_vram_transfer while old unfinished\n");
267
268 gpu.dma.x = pos_word & 0x3ff;
269 gpu.dma.y = (pos_word >> 16) & 0x1ff;
270 gpu.dma.w = size_word & 0x3ff;
271 gpu.dma.h = (size_word >> 16) & 0x1ff;
272 gpu.dma.offset = 0;
273
274 renderer_flush_queues();
275 if (is_read) {
276 gpu.status.img = 1;
277 // XXX: wrong for width 1
278 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
279 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
280 }
281 else {
282 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
283 }
284
285 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
286 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
287}
288
289static int check_cmd(uint32_t *data, int count)
290{
291 int len, cmd, start, pos;
292 int vram_dirty = 0;
293
294 // process buffer
295 for (start = pos = 0; pos < count; )
296 {
297 cmd = -1;
298 len = 0;
299
300 if (gpu.dma.h) {
301 pos += do_vram_io(data + pos, count - pos, 0);
302 if (pos == count)
303 break;
304 start = pos;
305 }
306
307 // do look-ahead pass to detect SR changes and VRAM i/o
308 while (pos < count) {
309 uint32_t *list = data + pos;
310 cmd = list[0] >> 24;
311 len = 1 + cmd_lengths[cmd];
312
313 //printf(" %3d: %02x %d\n", pos, cmd, len);
314 if ((cmd & 0xf4) == 0x24) {
315 // flat textured prim
316 gpu.ex_regs[1] &= ~0x1ff;
317 gpu.ex_regs[1] |= list[4] & 0x1ff;
318 }
319 else if ((cmd & 0xf4) == 0x34) {
320 // shaded textured prim
321 gpu.ex_regs[1] &= ~0x1ff;
322 gpu.ex_regs[1] |= list[5] & 0x1ff;
323 }
324 else if (cmd == 0xe3)
325 decide_frameskip_allow(list[0]);
326
327 if (2 <= cmd && cmd < 0xc0)
328 vram_dirty = 1;
329 else if ((cmd & 0xf8) == 0xe0)
330 gpu.ex_regs[cmd & 7] = list[0];
331
332 if (pos + len > count) {
333 cmd = -1;
334 break; // incomplete cmd
335 }
336 if (cmd == 0xa0 || cmd == 0xc0)
337 break; // image i/o
338 pos += len;
339 }
340
341 if (pos - start > 0) {
342 if (!gpu.frameskip.active || !gpu.frameskip.allow)
343 do_cmd_list(data + start, pos - start);
344 start = pos;
345 }
346
347 if (cmd == 0xa0 || cmd == 0xc0) {
348 // consume vram write/read cmd
349 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
350 pos += len;
351 }
352 else if (cmd == -1)
353 break;
354 }
355
356 gpu.status.reg &= ~0x1fff;
357 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
358 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
359
360 if (gpu.frameskip.active)
361 renderer_sync_ecmds(gpu.ex_regs);
362 gpu.state.fb_dirty |= vram_dirty;
363
364 return count - pos;
365}
366
367static void flush_cmd_buffer(void)
368{
369 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
370 if (left > 0)
371 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
372 gpu.cmd_len = left;
373}
374
375void GPUwriteDataMem(uint32_t *mem, int count)
376{
377 int left;
378
379 log_io("gpu_dma_write %p %d\n", mem, count);
380
381 if (unlikely(gpu.cmd_len > 0))
382 flush_cmd_buffer();
383
384 left = check_cmd(mem, count);
385 if (left)
386 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
387}
388
389void GPUwriteData(uint32_t data)
390{
391 log_io("gpu_write %08x\n", data);
392 gpu.cmd_buffer[gpu.cmd_len++] = data;
393 if (gpu.cmd_len >= CMD_BUFFER_LEN)
394 flush_cmd_buffer();
395}
396
397long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
398{
399 uint32_t addr, *list;
400 uint32_t *llist_entry = NULL;
401 int len, left, count;
402 long cpu_cycles = 0;
403
404 if (unlikely(gpu.cmd_len > 0))
405 flush_cmd_buffer();
406
407 // ff7 sends it's main list twice, detect this
408 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
409 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
410 gpu.state.last_list.cycles > 2048)
411 {
412 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
413 *llist_entry |= 0x800000;
414 }
415
416 log_io("gpu_dma_chain\n");
417 addr = start_addr & 0xffffff;
418 for (count = 0; addr != 0xffffff; count++)
419 {
420 list = rambase + (addr & 0x1fffff) / 4;
421 len = list[0] >> 24;
422 addr = list[0] & 0xffffff;
423 cpu_cycles += 10;
424 if (len > 0)
425 cpu_cycles += 5 + len;
426
427 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
428
429 // loop detection marker
430 // (bit23 set causes DMA error on real machine, so
431 // unlikely to be ever set by the game)
432 list[0] |= 0x800000;
433
434 if (len) {
435 left = check_cmd(list + 1, len);
436 if (left)
437 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
438 }
439
440 if (addr & 0x800000)
441 break;
442 }
443
444 // remove loop detection markers
445 addr = start_addr & 0x1fffff;
446 while (count-- > 0) {
447 list = rambase + addr / 4;
448 addr = list[0] & 0x1fffff;
449 list[0] &= ~0x800000;
450 }
451 if (llist_entry)
452 *llist_entry &= ~0x800000;
453
454 gpu.state.last_list.frame = *gpu.state.frame_count;
455 gpu.state.last_list.hcnt = *gpu.state.hcnt;
456 gpu.state.last_list.cycles = cpu_cycles;
457 gpu.state.last_list.addr = start_addr;
458
459 return cpu_cycles;
460}
461
462void GPUreadDataMem(uint32_t *mem, int count)
463{
464 log_io("gpu_dma_read %p %d\n", mem, count);
465
466 if (unlikely(gpu.cmd_len > 0))
467 flush_cmd_buffer();
468
469 if (gpu.dma.h)
470 do_vram_io(mem, count, 1);
471}
472
473uint32_t GPUreadData(void)
474{
475 uint32_t ret;
476
477 if (unlikely(gpu.cmd_len > 0))
478 flush_cmd_buffer();
479
480 ret = gpu.gp0;
481 if (gpu.dma.h)
482 do_vram_io(&ret, 1, 1);
483
484 log_io("gpu_read %08x\n", ret);
485 return ret;
486}
487
488uint32_t GPUreadStatus(void)
489{
490 uint32_t ret;
491
492 if (unlikely(gpu.cmd_len > 0))
493 flush_cmd_buffer();
494
495 ret = gpu.status.reg;
496 log_io("gpu_read_status %08x\n", ret);
497 return ret;
498}
499
500struct GPUFreeze
501{
502 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
503 uint32_t ulStatus; // current gpu status
504 uint32_t ulControl[256]; // latest control register values
505 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
506};
507
508long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
509{
510 int i;
511
512 switch (type) {
513 case 1: // save
514 if (gpu.cmd_len > 0)
515 flush_cmd_buffer();
516 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
517 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
518 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
519 freeze->ulStatus = gpu.status.reg;
520 break;
521 case 0: // load
522 renderer_invalidate_caches(0, 0, 1024, 512);
523 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
524 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
525 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
526 gpu.status.reg = freeze->ulStatus;
527 for (i = 8; i > 0; i--) {
528 gpu.regs[i] ^= 1; // avoid reg change detection
529 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
530 }
531 renderer_sync_ecmds(gpu.ex_regs);
532 break;
533 }
534
535 return 1;
536}
537
538void GPUupdateLace(void)
539{
540 if (gpu.cmd_len > 0)
541 flush_cmd_buffer();
542 renderer_flush_queues();
543
544 if (gpu.status.blanking || !gpu.state.fb_dirty)
545 return;
546
547 if (gpu.frameskip.set) {
548 if (!gpu.frameskip.frame_ready) {
549 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
550 return;
551 gpu.frameskip.active = 0;
552 }
553 gpu.frameskip.frame_ready = 0;
554 }
555
556 vout_update();
557 gpu.state.fb_dirty = 0;
558}
559
560void GPUvBlank(int is_vblank, int lcf)
561{
562 int interlace = gpu.state.allow_interlace
563 && gpu.status.interlace && gpu.status.dheight;
564 // interlace doesn't look nice on progressive displays,
565 // so we have this "auto" mode here for games that don't read vram
566 if (gpu.state.allow_interlace == 2
567 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
568 {
569 interlace = 0;
570 }
571 if (interlace || interlace != gpu.state.old_interlace) {
572 gpu.state.old_interlace = interlace;
573
574 if (gpu.cmd_len > 0)
575 flush_cmd_buffer();
576 renderer_flush_queues();
577 renderer_set_interlace(interlace, !lcf);
578 }
579}
580
581#include "../../frontend/plugin_lib.h"
582
583void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
584{
585 gpu.frameskip.set = cbs->frameskip;
586 gpu.frameskip.advice = &cbs->fskip_advice;
587 gpu.frameskip.active = 0;
588 gpu.frameskip.frame_ready = 1;
589 gpu.state.hcnt = cbs->gpu_hcnt;
590 gpu.state.frame_count = cbs->gpu_frame_count;
591 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
592
593 if (cbs->pl_vout_set_raw_vram)
594 cbs->pl_vout_set_raw_vram(gpu.vram);
595 renderer_set_config(cbs);
596 vout_set_config(cbs);
597}
598
599// vim:shiftwidth=2:expandtab