Merge pull request #749 from pcercuei/lightrec-allow-mem-override
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17
18#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19#ifdef __GNUC__
20#define unlikely(x) __builtin_expect((x), 0)
21#define preload __builtin_prefetch
22#define noinline __attribute__((noinline))
23#else
24#define unlikely(x)
25#define preload(...)
26#define noinline
27#endif
28
29#define gpu_log(fmt, ...) \
30 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
31
32//#define log_io gpu_log
33#define log_io(...)
34//#define log_anomaly gpu_log
35#define log_anomaly(...)
36
37struct psx_gpu gpu;
38
39static noinline int do_cmd_buffer(uint32_t *data, int count);
40static void finish_vram_transfer(int is_read);
41
42static noinline void do_cmd_reset(void)
43{
44 renderer_sync();
45
46 if (unlikely(gpu.cmd_len > 0))
47 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
48 gpu.cmd_len = 0;
49
50 if (unlikely(gpu.dma.h > 0))
51 finish_vram_transfer(gpu.dma_start.is_read);
52 gpu.dma.h = 0;
53}
54
55static noinline void do_reset(void)
56{
57 unsigned int i;
58
59 do_cmd_reset();
60
61 memset(gpu.regs, 0, sizeof(gpu.regs));
62 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
63 gpu.ex_regs[i] = (0xe0 + i) << 24;
64 gpu.status = 0x14802000;
65 gpu.gp0 = 0;
66 gpu.regs[3] = 1;
67 gpu.screen.hres = gpu.screen.w = 256;
68 gpu.screen.vres = gpu.screen.h = 240;
69 gpu.screen.x = gpu.screen.y = 0;
70}
71
72static noinline void update_width(void)
73{
74 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
75 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
76 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
77 int hres = hres_all[(gpu.status >> 16) & 7];
78 int pal = gpu.status & PSX_GPU_STATUS_PAL;
79 int sw = gpu.screen.x2 - gpu.screen.x1;
80 int x = 0, x_auto;
81 if (sw <= 0)
82 /* nothing displayed? */;
83 else {
84 int s = pal ? 656 : 608; // or 600? pal is just a guess
85 x = (gpu.screen.x1 - s) / hdiv;
86 x = (x + 1) & ~1; // blitter limitation
87 sw /= hdiv;
88 sw = (sw + 2) & ~3; // according to nocash
89 switch (gpu.state.screen_centering_type) {
90 case 1:
91 break;
92 case 2:
93 x = gpu.state.screen_centering_x;
94 break;
95 default:
96 // correct if slightly miscentered
97 x_auto = (hres - sw) / 2 & ~3;
98 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
99 x = x_auto;
100 }
101 if (x + sw > hres)
102 sw = hres - x;
103 // .x range check is done in vout_update()
104 }
105 // reduce the unpleasant right border that a few games have
106 if (gpu.state.screen_centering_type == 0
107 && x <= 4 && hres - (x + sw) >= 4)
108 hres -= 4;
109 gpu.screen.x = x;
110 gpu.screen.w = sw;
111 gpu.screen.hres = hres;
112 gpu.state.dims_changed = 1;
113 //printf("xx %d %d -> %2d, %d / %d\n",
114 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
115}
116
117static noinline void update_height(void)
118{
119 int pal = gpu.status & PSX_GPU_STATUS_PAL;
120 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
121 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
122 int sh = gpu.screen.y2 - gpu.screen.y1;
123 int center_tol = 16;
124 int vres = 240;
125
126 if (pal && (sh > 240 || gpu.screen.vres == 256))
127 vres = 256;
128 if (dheight)
129 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
130 if (sh <= 0)
131 /* nothing displayed? */;
132 else {
133 switch (gpu.state.screen_centering_type) {
134 case 1:
135 break;
136 case 2:
137 y = gpu.state.screen_centering_y;
138 break;
139 default:
140 // correct if slightly miscentered
141 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
142 y = 0;
143 }
144 if (y + sh > vres)
145 sh = vres - y;
146 }
147 gpu.screen.y = y;
148 gpu.screen.h = sh;
149 gpu.screen.vres = vres;
150 gpu.state.dims_changed = 1;
151 //printf("yy %d %d -> %d, %d / %d\n",
152 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
153}
154
155static noinline void decide_frameskip(void)
156{
157 *gpu.frameskip.dirty = 1;
158
159 if (gpu.frameskip.active)
160 gpu.frameskip.cnt++;
161 else {
162 gpu.frameskip.cnt = 0;
163 gpu.frameskip.frame_ready = 1;
164 }
165
166 if (*gpu.frameskip.force)
167 gpu.frameskip.active = 1;
168 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
169 gpu.frameskip.active = 1;
170 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
171 gpu.frameskip.active = 1;
172 else
173 gpu.frameskip.active = 0;
174
175 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
176 int dummy;
177 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
178 gpu.frameskip.pending_fill[0] = 0;
179 }
180}
181
182static noinline int decide_frameskip_allow(uint32_t cmd_e3)
183{
184 // no frameskip if it decides to draw to display area,
185 // but not for interlace since it'll most likely always do that
186 uint32_t x = cmd_e3 & 0x3ff;
187 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
188 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
189 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
190 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
191 return gpu.frameskip.allow;
192}
193
194static noinline void get_gpu_info(uint32_t data)
195{
196 switch (data & 0x0f) {
197 case 0x02:
198 case 0x03:
199 case 0x04:
200 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
201 break;
202 case 0x05:
203 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
204 break;
205 case 0x07:
206 gpu.gp0 = 2;
207 break;
208 default:
209 // gpu.gp0 unchanged
210 break;
211 }
212}
213
214// double, for overdraw guard
215#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
216
217// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
218// renderer/downscaler it uses in high res modes:
219#ifdef GCW_ZERO
220 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
221 // fills. (Will change this value if it ever gets large page support)
222 #define VRAM_ALIGN 8192
223#else
224 #define VRAM_ALIGN 16
225#endif
226
227// vram ptr received from mmap/malloc/alloc (will deallocate using this)
228static uint16_t *vram_ptr_orig = NULL;
229
230#ifdef GPULIB_USE_MMAP
231static int map_vram(void)
232{
233 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
234 if (gpu.vram != NULL) {
235 // 4kb guard in front
236 gpu.vram += (4096 / 2);
237 // Align
238 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
239 return 0;
240 }
241 else {
242 fprintf(stderr, "could not map vram, expect crashes\n");
243 return -1;
244 }
245}
246#else
247static int map_vram(void)
248{
249 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
250 if (gpu.vram != NULL) {
251 // 4kb guard in front
252 gpu.vram += (4096 / 2);
253 // Align
254 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
255 return 0;
256 } else {
257 fprintf(stderr, "could not allocate vram, expect crashes\n");
258 return -1;
259 }
260}
261
262static int allocate_vram(void)
263{
264 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
265 if (gpu.vram != NULL) {
266 // 4kb guard in front
267 gpu.vram += (4096 / 2);
268 // Align
269 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
270 return 0;
271 } else {
272 fprintf(stderr, "could not allocate vram, expect crashes\n");
273 return -1;
274 }
275}
276#endif
277
278long GPUinit(void)
279{
280#ifndef GPULIB_USE_MMAP
281 if (gpu.vram == NULL) {
282 if (allocate_vram() != 0) {
283 printf("ERROR: could not allocate VRAM, exiting..\n");
284 exit(1);
285 }
286 }
287#endif
288
289 //extern uint32_t hSyncCount; // in psxcounters.cpp
290 //extern uint32_t frame_counter; // in psxcounters.cpp
291 //gpu.state.hcnt = &hSyncCount;
292 //gpu.state.frame_count = &frame_counter;
293
294 int ret;
295 ret = vout_init();
296 ret |= renderer_init();
297
298 gpu.state.frame_count = &gpu.zero;
299 gpu.state.hcnt = &gpu.zero;
300 gpu.frameskip.active = 0;
301 gpu.cmd_len = 0;
302 do_reset();
303
304 /*if (gpu.mmap != NULL) {
305 if (map_vram() != 0)
306 ret = -1;
307 }*/
308 return ret;
309}
310
311long GPUshutdown(void)
312{
313 long ret;
314
315 renderer_finish();
316 ret = vout_finish();
317
318 if (vram_ptr_orig != NULL) {
319#ifdef GPULIB_USE_MMAP
320 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
321#else
322 free(vram_ptr_orig);
323#endif
324 }
325 vram_ptr_orig = gpu.vram = NULL;
326
327 return ret;
328}
329
330void GPUwriteStatus(uint32_t data)
331{
332 uint32_t cmd = data >> 24;
333
334 if (cmd < ARRAY_SIZE(gpu.regs)) {
335 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
336 return;
337 gpu.regs[cmd] = data;
338 }
339
340 gpu.state.fb_dirty = 1;
341
342 switch (cmd) {
343 case 0x00:
344 do_reset();
345 break;
346 case 0x01:
347 do_cmd_reset();
348 break;
349 case 0x03:
350 if (data & 1) {
351 gpu.status |= PSX_GPU_STATUS_BLANKING;
352 gpu.state.dims_changed = 1; // for hud clearing
353 }
354 else
355 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
356 break;
357 case 0x04:
358 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
359 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
360 break;
361 case 0x05:
362 gpu.screen.src_x = data & 0x3ff;
363 gpu.screen.src_y = (data >> 10) & 0x1ff;
364 if (gpu.frameskip.set) {
365 decide_frameskip_allow(gpu.ex_regs[3]);
366 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
367 decide_frameskip();
368 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
369 }
370 }
371 break;
372 case 0x06:
373 gpu.screen.x1 = data & 0xfff;
374 gpu.screen.x2 = (data >> 12) & 0xfff;
375 update_width();
376 break;
377 case 0x07:
378 gpu.screen.y1 = data & 0x3ff;
379 gpu.screen.y2 = (data >> 10) & 0x3ff;
380 update_height();
381 break;
382 case 0x08:
383 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
384 update_width();
385 update_height();
386 renderer_notify_res_change();
387 break;
388 default:
389 if ((cmd & 0xf0) == 0x10)
390 get_gpu_info(data);
391 break;
392 }
393
394#ifdef GPUwriteStatus_ext
395 GPUwriteStatus_ext(data);
396#endif
397}
398
399const unsigned char cmd_lengths[256] =
400{
401 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
404 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
405 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
406 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
407 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
408 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
409 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
410 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
411 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
412 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
413 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
414 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
415 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
416 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
417};
418
419#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
420
421static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
422{
423 uint16_t *vram = VRAM_MEM_XY(x, y);
424 if (is_read)
425 memcpy(mem, vram, l * 2);
426 else
427 memcpy(vram, mem, l * 2);
428}
429
430static int do_vram_io(uint32_t *data, int count, int is_read)
431{
432 int count_initial = count;
433 uint16_t *sdata = (uint16_t *)data;
434 int x = gpu.dma.x, y = gpu.dma.y;
435 int w = gpu.dma.w, h = gpu.dma.h;
436 int o = gpu.dma.offset;
437 int l;
438 count *= 2; // operate in 16bpp pixels
439
440 renderer_sync();
441
442 if (gpu.dma.offset) {
443 l = w - gpu.dma.offset;
444 if (count < l)
445 l = count;
446
447 do_vram_line(x + o, y, sdata, l, is_read);
448
449 if (o + l < w)
450 o += l;
451 else {
452 o = 0;
453 y++;
454 h--;
455 }
456 sdata += l;
457 count -= l;
458 }
459
460 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
461 y &= 511;
462 do_vram_line(x, y, sdata, w, is_read);
463 }
464
465 if (h > 0) {
466 if (count > 0) {
467 y &= 511;
468 do_vram_line(x, y, sdata, count, is_read);
469 o = count;
470 count = 0;
471 }
472 }
473 else
474 finish_vram_transfer(is_read);
475 gpu.dma.y = y;
476 gpu.dma.h = h;
477 gpu.dma.offset = o;
478
479 return count_initial - count / 2;
480}
481
482static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
483{
484 if (gpu.dma.h)
485 log_anomaly("start_vram_transfer while old unfinished\n");
486
487 gpu.dma.x = pos_word & 0x3ff;
488 gpu.dma.y = (pos_word >> 16) & 0x1ff;
489 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
490 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
491 gpu.dma.offset = 0;
492 gpu.dma.is_read = is_read;
493 gpu.dma_start = gpu.dma;
494
495 renderer_flush_queues();
496 if (is_read) {
497 gpu.status |= PSX_GPU_STATUS_IMG;
498 // XXX: wrong for width 1
499 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
500 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
501 }
502
503 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
504 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
505}
506
507static void finish_vram_transfer(int is_read)
508{
509 if (is_read)
510 gpu.status &= ~PSX_GPU_STATUS_IMG;
511 else
512 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
513 gpu.dma_start.w, gpu.dma_start.h);
514}
515
516static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
517{
518 int cmd = 0, pos = 0, len, dummy, v;
519 int skip = 1;
520
521 gpu.frameskip.pending_fill[0] = 0;
522
523 while (pos < count && skip) {
524 uint32_t *list = data + pos;
525 cmd = LE32TOH(list[0]) >> 24;
526 len = 1 + cmd_lengths[cmd];
527
528 switch (cmd) {
529 case 0x02:
530 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
531 // clearing something large, don't skip
532 do_cmd_list(list, 3, &dummy);
533 else
534 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
535 break;
536 case 0x24 ... 0x27:
537 case 0x2c ... 0x2f:
538 case 0x34 ... 0x37:
539 case 0x3c ... 0x3f:
540 gpu.ex_regs[1] &= ~0x1ff;
541 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
542 break;
543 case 0x48 ... 0x4F:
544 for (v = 3; pos + v < count; v++)
545 {
546 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
547 break;
548 }
549 len += v - 3;
550 break;
551 case 0x58 ... 0x5F:
552 for (v = 4; pos + v < count; v += 2)
553 {
554 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
555 break;
556 }
557 len += v - 4;
558 break;
559 default:
560 if (cmd == 0xe3)
561 skip = decide_frameskip_allow(LE32TOH(list[0]));
562 if ((cmd & 0xf8) == 0xe0)
563 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
564 break;
565 }
566
567 if (pos + len > count) {
568 cmd = -1;
569 break; // incomplete cmd
570 }
571 if (0xa0 <= cmd && cmd <= 0xdf)
572 break; // image i/o
573
574 pos += len;
575 }
576
577 renderer_sync_ecmds(gpu.ex_regs);
578 *last_cmd = cmd;
579 return pos;
580}
581
582static noinline int do_cmd_buffer(uint32_t *data, int count)
583{
584 int cmd, pos;
585 uint32_t old_e3 = gpu.ex_regs[3];
586 int vram_dirty = 0;
587
588 // process buffer
589 for (pos = 0; pos < count; )
590 {
591 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
592 vram_dirty = 1;
593 pos += do_vram_io(data + pos, count - pos, 0);
594 if (pos == count)
595 break;
596 }
597
598 cmd = LE32TOH(data[pos]) >> 24;
599 if (0xa0 <= cmd && cmd <= 0xdf) {
600 if (unlikely((pos+2) >= count)) {
601 // incomplete vram write/read cmd, can't consume yet
602 cmd = -1;
603 break;
604 }
605
606 // consume vram write/read cmd
607 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
608 pos += 3;
609 continue;
610 }
611
612 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
613 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
614 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
615 else {
616 pos += do_cmd_list(data + pos, count - pos, &cmd);
617 vram_dirty = 1;
618 }
619
620 if (cmd == -1)
621 // incomplete cmd
622 break;
623 }
624
625 gpu.status &= ~0x1fff;
626 gpu.status |= gpu.ex_regs[1] & 0x7ff;
627 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
628
629 gpu.state.fb_dirty |= vram_dirty;
630
631 if (old_e3 != gpu.ex_regs[3])
632 decide_frameskip_allow(gpu.ex_regs[3]);
633
634 return count - pos;
635}
636
637static void flush_cmd_buffer(void)
638{
639 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
640 if (left > 0)
641 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
642 gpu.cmd_len = left;
643}
644
645void GPUwriteDataMem(uint32_t *mem, int count)
646{
647 int left;
648
649 log_io("gpu_dma_write %p %d\n", mem, count);
650
651 if (unlikely(gpu.cmd_len > 0))
652 flush_cmd_buffer();
653
654 left = do_cmd_buffer(mem, count);
655 if (left)
656 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
657}
658
659void GPUwriteData(uint32_t data)
660{
661 log_io("gpu_write %08x\n", data);
662 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
663 if (gpu.cmd_len >= CMD_BUFFER_LEN)
664 flush_cmd_buffer();
665}
666
667long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
668{
669 uint32_t addr, *list, ld_addr = 0;
670 int len, left, count;
671 long cpu_cycles = 0;
672
673 preload(rambase + (start_addr & 0x1fffff) / 4);
674
675 if (unlikely(gpu.cmd_len > 0))
676 flush_cmd_buffer();
677
678 log_io("gpu_dma_chain\n");
679 addr = start_addr & 0xffffff;
680 for (count = 0; (addr & 0x800000) == 0; count++)
681 {
682 list = rambase + (addr & 0x1fffff) / 4;
683 len = LE32TOH(list[0]) >> 24;
684 addr = LE32TOH(list[0]) & 0xffffff;
685 preload(rambase + (addr & 0x1fffff) / 4);
686
687 cpu_cycles += 10;
688 if (len > 0)
689 cpu_cycles += 5 + len;
690
691 log_io(".chain %08lx #%d+%d\n",
692 (long)(list - rambase) * 4, len, gpu.cmd_len);
693 if (unlikely(gpu.cmd_len > 0)) {
694 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
695 gpu.cmd_len += len;
696 flush_cmd_buffer();
697 continue;
698 }
699
700 if (len) {
701 left = do_cmd_buffer(list + 1, len);
702 if (left) {
703 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
704 gpu.cmd_len = left;
705 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
706 }
707 }
708
709 if (progress_addr) {
710 *progress_addr = addr;
711 break;
712 }
713 #define LD_THRESHOLD (8*1024)
714 if (count >= LD_THRESHOLD) {
715 if (count == LD_THRESHOLD) {
716 ld_addr = addr;
717 continue;
718 }
719
720 // loop detection marker
721 // (bit23 set causes DMA error on real machine, so
722 // unlikely to be ever set by the game)
723 list[0] |= HTOLE32(0x800000);
724 }
725 }
726
727 if (ld_addr != 0) {
728 // remove loop detection markers
729 count -= LD_THRESHOLD + 2;
730 addr = ld_addr & 0x1fffff;
731 while (count-- > 0) {
732 list = rambase + addr / 4;
733 addr = LE32TOH(list[0]) & 0x1fffff;
734 list[0] &= HTOLE32(~0x800000);
735 }
736 }
737
738 gpu.state.last_list.frame = *gpu.state.frame_count;
739 gpu.state.last_list.hcnt = *gpu.state.hcnt;
740 gpu.state.last_list.cycles = cpu_cycles;
741 gpu.state.last_list.addr = start_addr;
742
743 return cpu_cycles;
744}
745
746void GPUreadDataMem(uint32_t *mem, int count)
747{
748 log_io("gpu_dma_read %p %d\n", mem, count);
749
750 if (unlikely(gpu.cmd_len > 0))
751 flush_cmd_buffer();
752
753 if (gpu.dma.h)
754 do_vram_io(mem, count, 1);
755}
756
757uint32_t GPUreadData(void)
758{
759 uint32_t ret;
760
761 if (unlikely(gpu.cmd_len > 0))
762 flush_cmd_buffer();
763
764 ret = gpu.gp0;
765 if (gpu.dma.h) {
766 ret = HTOLE32(ret);
767 do_vram_io(&ret, 1, 1);
768 ret = LE32TOH(ret);
769 }
770
771 log_io("gpu_read %08x\n", ret);
772 return ret;
773}
774
775uint32_t GPUreadStatus(void)
776{
777 uint32_t ret;
778
779 if (unlikely(gpu.cmd_len > 0))
780 flush_cmd_buffer();
781
782 ret = gpu.status;
783 log_io("gpu_read_status %08x\n", ret);
784 return ret;
785}
786
787struct GPUFreeze
788{
789 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
790 uint32_t ulStatus; // current gpu status
791 uint32_t ulControl[256]; // latest control register values
792 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
793};
794
795long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
796{
797 int i;
798
799 switch (type) {
800 case 1: // save
801 if (gpu.cmd_len > 0)
802 flush_cmd_buffer();
803
804 renderer_sync();
805 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
806 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
807 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
808 freeze->ulStatus = gpu.status;
809 break;
810 case 0: // load
811 renderer_sync();
812 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
813 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
814 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
815 gpu.status = freeze->ulStatus;
816 gpu.cmd_len = 0;
817 for (i = 8; i > 0; i--) {
818 gpu.regs[i] ^= 1; // avoid reg change detection
819 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
820 }
821 renderer_sync_ecmds(gpu.ex_regs);
822 renderer_update_caches(0, 0, 1024, 512);
823 break;
824 }
825
826 return 1;
827}
828
829void GPUupdateLace(void)
830{
831 if (gpu.cmd_len > 0)
832 flush_cmd_buffer();
833 renderer_flush_queues();
834
835 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
836 if (!gpu.state.blanked) {
837 vout_blank();
838 gpu.state.blanked = 1;
839 gpu.state.fb_dirty = 1;
840 }
841 return;
842 }
843
844 renderer_notify_update_lace(0);
845
846 if (!gpu.state.fb_dirty)
847 return;
848
849 if (gpu.frameskip.set) {
850 if (!gpu.frameskip.frame_ready) {
851 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
852 return;
853 gpu.frameskip.active = 0;
854 }
855 gpu.frameskip.frame_ready = 0;
856 }
857
858 vout_update();
859 gpu.state.fb_dirty = 0;
860 gpu.state.blanked = 0;
861 renderer_notify_update_lace(1);
862}
863
864void GPUvBlank(int is_vblank, int lcf)
865{
866 int interlace = gpu.state.allow_interlace
867 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
868 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
869 // interlace doesn't look nice on progressive displays,
870 // so we have this "auto" mode here for games that don't read vram
871 if (gpu.state.allow_interlace == 2
872 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
873 {
874 interlace = 0;
875 }
876 if (interlace || interlace != gpu.state.old_interlace) {
877 gpu.state.old_interlace = interlace;
878
879 if (gpu.cmd_len > 0)
880 flush_cmd_buffer();
881 renderer_flush_queues();
882 renderer_set_interlace(interlace, !lcf);
883 }
884}
885
886#include "../../frontend/plugin_lib.h"
887
888void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
889{
890 gpu.frameskip.set = cbs->frameskip;
891 gpu.frameskip.advice = &cbs->fskip_advice;
892 gpu.frameskip.force = &cbs->fskip_force;
893 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
894 gpu.frameskip.active = 0;
895 gpu.frameskip.frame_ready = 1;
896 gpu.state.hcnt = cbs->gpu_hcnt;
897 gpu.state.frame_count = cbs->gpu_frame_count;
898 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
899 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
900 if (gpu.state.screen_centering_type != cbs->screen_centering_type
901 || gpu.state.screen_centering_x != cbs->screen_centering_x
902 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
903 gpu.state.screen_centering_type = cbs->screen_centering_type;
904 gpu.state.screen_centering_x = cbs->screen_centering_x;
905 gpu.state.screen_centering_y = cbs->screen_centering_y;
906 update_width();
907 update_height();
908 }
909
910 gpu.mmap = cbs->mmap;
911 gpu.munmap = cbs->munmap;
912
913 // delayed vram mmap
914 if (gpu.vram == NULL)
915 map_vram();
916
917 if (cbs->pl_vout_set_raw_vram)
918 cbs->pl_vout_set_raw_vram(gpu.vram);
919 renderer_set_config(cbs);
920 vout_set_config(cbs);
921}
922
923// vim:shiftwidth=2:expandtab