libretro: improve retro_memory_map
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17#include "gpu_timing.h"
18#include "../../libpcsxcore/gpu.h" // meh
19#include "../../frontend/plugin_lib.h"
20
21#ifndef ARRAY_SIZE
22#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
23#endif
24#ifdef __GNUC__
25#define unlikely(x) __builtin_expect((x), 0)
26#define preload __builtin_prefetch
27#define noinline __attribute__((noinline))
28#else
29#define unlikely(x)
30#define preload(...)
31#define noinline
32#endif
33
34//#define log_io gpu_log
35#define log_io(...)
36
37struct psx_gpu gpu;
38
39static noinline int do_cmd_buffer(uint32_t *data, int count,
40 int *cycles_sum, int *cycles_last);
41static void finish_vram_transfer(int is_read);
42
43static noinline void do_cmd_reset(void)
44{
45 int dummy = 0;
46 renderer_sync();
47 if (unlikely(gpu.cmd_len > 0))
48 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
49 gpu.cmd_len = 0;
50
51 if (unlikely(gpu.dma.h > 0))
52 finish_vram_transfer(gpu.dma_start.is_read);
53 gpu.dma.h = 0;
54}
55
56static noinline void do_reset(void)
57{
58 unsigned int i;
59
60 do_cmd_reset();
61
62 memset(gpu.regs, 0, sizeof(gpu.regs));
63 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
64 gpu.ex_regs[i] = (0xe0 + i) << 24;
65 gpu.status = 0x14802000;
66 gpu.gp0 = 0;
67 gpu.regs[3] = 1;
68 gpu.screen.hres = gpu.screen.w = 256;
69 gpu.screen.vres = gpu.screen.h = 240;
70 gpu.screen.x = gpu.screen.y = 0;
71 renderer_sync_ecmds(gpu.ex_regs);
72 renderer_notify_res_change();
73}
74
75static noinline void update_width(void)
76{
77 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
78 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
79 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
80 int hres = hres_all[(gpu.status >> 16) & 7];
81 int pal = gpu.status & PSX_GPU_STATUS_PAL;
82 int sw = gpu.screen.x2 - gpu.screen.x1;
83 int type = gpu.state.screen_centering_type;
84 int x = 0, x_auto;
85 if (type == C_AUTO)
86 type = gpu.state.screen_centering_type_default;
87 if (sw <= 0)
88 /* nothing displayed? */;
89 else {
90 int s = pal ? 656 : 608; // or 600? pal is just a guess
91 x = (gpu.screen.x1 - s) / hdiv;
92 x = (x + 1) & ~1; // blitter limitation
93 sw /= hdiv;
94 sw = (sw + 2) & ~3; // according to nocash
95
96 if (gpu.state.show_overscan == 2) // widescreen hack
97 sw = (sw + 63) & ~63;
98 if (gpu.state.show_overscan && sw >= hres)
99 x = 0, hres = sw;
100 switch (type) {
101 case C_INGAME:
102 break;
103 case C_MANUAL:
104 x = gpu.state.screen_centering_x;
105 break;
106 default:
107 // correct if slightly miscentered
108 x_auto = (hres - sw) / 2 & ~3;
109 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
110 x = x_auto;
111 }
112 if (x + sw > hres)
113 sw = hres - x;
114 // .x range check is done in vout_update()
115 }
116 // reduce the unpleasant right border that a few games have
117 if (gpu.state.screen_centering_type == 0
118 && x <= 4 && hres - (x + sw) >= 4)
119 hres -= 4;
120 gpu.screen.x = x;
121 gpu.screen.w = sw;
122 gpu.screen.hres = hres;
123 gpu.state.dims_changed = 1;
124 //printf("xx %d %d (%d) -> %2d, %d / %d\n", gpu.screen.x1,
125 // gpu.screen.x2, gpu.screen.x2 - gpu.screen.x1, x, sw, hres);
126}
127
128static noinline void update_height(void)
129{
130 int pal = gpu.status & PSX_GPU_STATUS_PAL;
131 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
132 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
133 int sh = gpu.screen.y2 - gpu.screen.y1;
134 int center_tol = 16;
135 int vres = 240;
136
137 if (pal && (sh > 240 || gpu.screen.vres == 256))
138 vres = 256;
139 if (dheight)
140 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
141 if (sh <= 0)
142 /* nothing displayed? */;
143 else {
144 switch (gpu.state.screen_centering_type) {
145 case C_INGAME:
146 break;
147 case C_BORDERLESS:
148 y = 0;
149 break;
150 case C_MANUAL:
151 y = gpu.state.screen_centering_y;
152 break;
153 default:
154 // correct if slightly miscentered
155 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
156 y = 0;
157 }
158 if (y + sh > vres)
159 sh = vres - y;
160 }
161 gpu.screen.y = y;
162 gpu.screen.h = sh;
163 gpu.screen.vres = vres;
164 gpu.state.dims_changed = 1;
165 //printf("yy %d %d -> %d, %d / %d\n",
166 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
167}
168
169static noinline void decide_frameskip(void)
170{
171 *gpu.frameskip.dirty = 1;
172
173 if (gpu.frameskip.active)
174 gpu.frameskip.cnt++;
175 else {
176 gpu.frameskip.cnt = 0;
177 gpu.frameskip.frame_ready = 1;
178 }
179
180 if (*gpu.frameskip.force)
181 gpu.frameskip.active = 1;
182 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
183 gpu.frameskip.active = 1;
184 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
185 gpu.frameskip.active = 1;
186 else
187 gpu.frameskip.active = 0;
188
189 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
190 int dummy = 0;
191 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
192 gpu.frameskip.pending_fill[0] = 0;
193 }
194}
195
196static noinline int decide_frameskip_allow(uint32_t cmd_e3)
197{
198 // no frameskip if it decides to draw to display area,
199 // but not for interlace since it'll most likely always do that
200 uint32_t x = cmd_e3 & 0x3ff;
201 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
202 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
203 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
204 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
205 return gpu.frameskip.allow;
206}
207
208static void flush_cmd_buffer(void);
209
210static noinline void get_gpu_info(uint32_t data)
211{
212 if (unlikely(gpu.cmd_len > 0))
213 flush_cmd_buffer();
214 switch (data & 0x0f) {
215 case 0x02:
216 case 0x03:
217 case 0x04:
218 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
219 break;
220 case 0x05:
221 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
222 break;
223 case 0x07:
224 gpu.gp0 = 2;
225 break;
226 default:
227 // gpu.gp0 unchanged
228 break;
229 }
230}
231
232#ifndef max
233#define max(a, b) (((a) > (b)) ? (a) : (b))
234#endif
235
236// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
237// renderer/downscaler it uses in high res modes:
238#ifdef GCW_ZERO
239 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
240 // fills. (Will change this value if it ever gets large page support)
241 #define VRAM_ALIGN 8192
242#else
243 #define VRAM_ALIGN 16
244#endif
245
246// double, for overdraw guard + at least 1 page before
247#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
248
249// vram ptr received from mmap/malloc/alloc (will deallocate using this)
250static uint16_t *vram_ptr_orig = NULL;
251
252#ifndef GPULIB_USE_MMAP
253# ifdef __linux__
254# define GPULIB_USE_MMAP 1
255# else
256# define GPULIB_USE_MMAP 0
257# endif
258#endif
259static int map_vram(void)
260{
261#if GPULIB_USE_MMAP
262 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
263#else
264 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
265#endif
266 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
267 // 4kb guard in front
268 gpu.vram += (4096 / 2);
269 // Align
270 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
271 return 0;
272 }
273 else {
274 fprintf(stderr, "could not map vram, expect crashes\n");
275 return -1;
276 }
277}
278
279long GPUinit(void)
280{
281 int ret;
282 ret = vout_init();
283 ret |= renderer_init();
284
285 memset(&gpu.state, 0, sizeof(gpu.state));
286 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
287 gpu.zero = 0;
288 gpu.state.frame_count = &gpu.zero;
289 gpu.state.hcnt = &gpu.zero;
290 gpu.cmd_len = 0;
291 do_reset();
292
293 /*if (gpu.mmap != NULL) {
294 if (map_vram() != 0)
295 ret = -1;
296 }*/
297 return ret;
298}
299
300long GPUshutdown(void)
301{
302 long ret;
303
304 renderer_finish();
305 ret = vout_finish();
306
307 if (vram_ptr_orig != NULL) {
308#if GPULIB_USE_MMAP
309 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
310#else
311 free(vram_ptr_orig);
312#endif
313 }
314 vram_ptr_orig = gpu.vram = NULL;
315
316 return ret;
317}
318
319void GPUwriteStatus(uint32_t data)
320{
321 uint32_t cmd = data >> 24;
322 int src_x, src_y;
323
324 if (cmd < ARRAY_SIZE(gpu.regs)) {
325 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
326 return;
327 gpu.regs[cmd] = data;
328 }
329
330 gpu.state.fb_dirty = 1;
331
332 switch (cmd) {
333 case 0x00:
334 do_reset();
335 break;
336 case 0x01:
337 do_cmd_reset();
338 break;
339 case 0x03:
340 if (data & 1) {
341 gpu.status |= PSX_GPU_STATUS_BLANKING;
342 gpu.state.dims_changed = 1; // for hud clearing
343 }
344 else
345 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
346 break;
347 case 0x04:
348 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
349 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
350 break;
351 case 0x05:
352 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
353 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
354 gpu.screen.src_x = src_x;
355 gpu.screen.src_y = src_y;
356 renderer_notify_scanout_change(src_x, src_y);
357 if (gpu.frameskip.set) {
358 decide_frameskip_allow(gpu.ex_regs[3]);
359 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
360 decide_frameskip();
361 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
362 }
363 }
364 }
365 break;
366 case 0x06:
367 gpu.screen.x1 = data & 0xfff;
368 gpu.screen.x2 = (data >> 12) & 0xfff;
369 update_width();
370 break;
371 case 0x07:
372 gpu.screen.y1 = data & 0x3ff;
373 gpu.screen.y2 = (data >> 10) & 0x3ff;
374 update_height();
375 break;
376 case 0x08:
377 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
378 update_width();
379 update_height();
380 renderer_notify_res_change();
381 break;
382 default:
383 if ((cmd & 0xf0) == 0x10)
384 get_gpu_info(data);
385 break;
386 }
387
388#ifdef GPUwriteStatus_ext
389 GPUwriteStatus_ext(data);
390#endif
391}
392
393const unsigned char cmd_lengths[256] =
394{
395 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
398 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
399 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
400 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
401 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
402 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
403 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
404 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
405 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
406 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
407 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
408 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
409 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
410 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
411};
412
413#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
414
415static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
416{
417 int i;
418 for (i = 0; i < l; i++)
419 dst[i] = src[i] | msb;
420}
421
422static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
423 int is_read, uint16_t msb)
424{
425 uint16_t *vram = VRAM_MEM_XY(x, y);
426 if (unlikely(is_read))
427 memcpy(mem, vram, l * 2);
428 else if (unlikely(msb))
429 cpy_msb(vram, mem, l, msb);
430 else
431 memcpy(vram, mem, l * 2);
432}
433
434static int do_vram_io(uint32_t *data, int count, int is_read)
435{
436 int count_initial = count;
437 uint16_t msb = gpu.ex_regs[6] << 15;
438 uint16_t *sdata = (uint16_t *)data;
439 int x = gpu.dma.x, y = gpu.dma.y;
440 int w = gpu.dma.w, h = gpu.dma.h;
441 int o = gpu.dma.offset;
442 int l;
443 count *= 2; // operate in 16bpp pixels
444
445 renderer_sync();
446
447 if (gpu.dma.offset) {
448 l = w - gpu.dma.offset;
449 if (count < l)
450 l = count;
451
452 do_vram_line(x + o, y, sdata, l, is_read, msb);
453
454 if (o + l < w)
455 o += l;
456 else {
457 o = 0;
458 y++;
459 h--;
460 }
461 sdata += l;
462 count -= l;
463 }
464
465 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
466 y &= 511;
467 do_vram_line(x, y, sdata, w, is_read, msb);
468 }
469
470 if (h > 0) {
471 if (count > 0) {
472 y &= 511;
473 do_vram_line(x, y, sdata, count, is_read, msb);
474 o = count;
475 count = 0;
476 }
477 }
478 else
479 finish_vram_transfer(is_read);
480 gpu.dma.y = y;
481 gpu.dma.h = h;
482 gpu.dma.offset = o;
483
484 return count_initial - count / 2;
485}
486
487static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
488{
489 if (gpu.dma.h)
490 log_anomaly("start_vram_transfer while old unfinished\n");
491
492 gpu.dma.x = pos_word & 0x3ff;
493 gpu.dma.y = (pos_word >> 16) & 0x1ff;
494 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
495 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
496 gpu.dma.offset = 0;
497 gpu.dma.is_read = is_read;
498 gpu.dma_start = gpu.dma;
499
500 renderer_flush_queues();
501 if (is_read) {
502 gpu.status |= PSX_GPU_STATUS_IMG;
503 // XXX: wrong for width 1
504 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
505 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
506 }
507
508 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
509 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
510 if (gpu.gpu_state_change)
511 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
512}
513
514static void finish_vram_transfer(int is_read)
515{
516 if (is_read)
517 gpu.status &= ~PSX_GPU_STATUS_IMG;
518 else {
519 gpu.state.fb_dirty = 1;
520 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
521 gpu.dma_start.w, gpu.dma_start.h, 0);
522 }
523 if (gpu.gpu_state_change)
524 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
525}
526
527static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
528{
529 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
530 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
531 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
532 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
533 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
534 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
535 uint16_t msb = gpu.ex_regs[6] << 15;
536 uint16_t lbuf[128];
537 uint32_t x, y;
538
539 *cpu_cycles += gput_copy(w, h);
540 if (sx == dx && sy == dy && msb == 0)
541 return;
542
543 renderer_flush_queues();
544
545 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
546 {
547 for (y = 0; y < h; y++)
548 {
549 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
550 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
551 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
552 {
553 uint32_t x1, w1 = w - x;
554 if (w1 > ARRAY_SIZE(lbuf))
555 w1 = ARRAY_SIZE(lbuf);
556 for (x1 = 0; x1 < w1; x1++)
557 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
558 for (x1 = 0; x1 < w1; x1++)
559 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
560 }
561 }
562 }
563 else
564 {
565 uint32_t sy1 = sy, dy1 = dy;
566 for (y = 0; y < h; y++, sy1++, dy1++)
567 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
568 }
569
570 renderer_update_caches(dx, dy, w, h, 0);
571}
572
573static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
574{
575 int cmd = 0, pos = 0, len, dummy = 0, v;
576 int skip = 1;
577
578 gpu.frameskip.pending_fill[0] = 0;
579
580 while (pos < count && skip) {
581 uint32_t *list = data + pos;
582 cmd = LE32TOH(list[0]) >> 24;
583 len = 1 + cmd_lengths[cmd];
584 if (pos + len > count) {
585 cmd = -1;
586 break; // incomplete cmd
587 }
588
589 switch (cmd) {
590 case 0x02:
591 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
592 // clearing something large, don't skip
593 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
594 else
595 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
596 break;
597 case 0x24 ... 0x27:
598 case 0x2c ... 0x2f:
599 case 0x34 ... 0x37:
600 case 0x3c ... 0x3f:
601 gpu.ex_regs[1] &= ~0x1ff;
602 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
603 break;
604 case 0x48 ... 0x4F:
605 for (v = 3; pos + v < count; v++)
606 {
607 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
608 break;
609 }
610 len += v - 3;
611 break;
612 case 0x58 ... 0x5F:
613 for (v = 4; pos + v < count; v += 2)
614 {
615 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
616 break;
617 }
618 len += v - 4;
619 break;
620 default:
621 if (cmd == 0xe3)
622 skip = decide_frameskip_allow(LE32TOH(list[0]));
623 if ((cmd & 0xf8) == 0xe0)
624 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
625 break;
626 }
627 if (0x80 <= cmd && cmd <= 0xdf)
628 break; // image i/o
629
630 pos += len;
631 }
632
633 renderer_sync_ecmds(gpu.ex_regs);
634 *last_cmd = cmd;
635 return pos;
636}
637
638static noinline int do_cmd_buffer(uint32_t *data, int count,
639 int *cycles_sum, int *cycles_last)
640{
641 int cmd, pos;
642 uint32_t old_e3 = gpu.ex_regs[3];
643 int vram_dirty = 0;
644
645 // process buffer
646 for (pos = 0; pos < count; )
647 {
648 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
649 vram_dirty = 1;
650 pos += do_vram_io(data + pos, count - pos, 0);
651 if (pos == count)
652 break;
653 }
654
655 cmd = LE32TOH(data[pos]) >> 24;
656 if (0xa0 <= cmd && cmd <= 0xdf) {
657 if (unlikely((pos+2) >= count)) {
658 // incomplete vram write/read cmd, can't consume yet
659 cmd = -1;
660 break;
661 }
662
663 // consume vram write/read cmd
664 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
665 pos += 3;
666 continue;
667 }
668 else if ((cmd & 0xe0) == 0x80) {
669 if (unlikely((pos+3) >= count)) {
670 cmd = -1; // incomplete cmd, can't consume yet
671 break;
672 }
673 renderer_sync();
674 *cycles_sum += *cycles_last;
675 *cycles_last = 0;
676 do_vram_copy(data + pos + 1, cycles_last);
677 vram_dirty = 1;
678 pos += 4;
679 continue;
680 }
681 else if (cmd == 0x1f) {
682 log_anomaly("irq1?\n");
683 pos++;
684 continue;
685 }
686
687 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
688 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
689 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
690 else {
691 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
692 vram_dirty = 1;
693 }
694
695 if (cmd == -1)
696 // incomplete cmd
697 break;
698 }
699
700 gpu.status &= ~0x1fff;
701 gpu.status |= gpu.ex_regs[1] & 0x7ff;
702 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
703
704 gpu.state.fb_dirty |= vram_dirty;
705
706 if (old_e3 != gpu.ex_regs[3])
707 decide_frameskip_allow(gpu.ex_regs[3]);
708
709 return count - pos;
710}
711
712static noinline void flush_cmd_buffer(void)
713{
714 int dummy = 0, left;
715 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
716 if (left > 0)
717 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
718 if (left != gpu.cmd_len) {
719 if (!gpu.dma.h && gpu.gpu_state_change)
720 gpu.gpu_state_change(PGS_PRIMITIVE_START);
721 gpu.cmd_len = left;
722 }
723}
724
725void GPUwriteDataMem(uint32_t *mem, int count)
726{
727 int dummy = 0, left;
728
729 log_io("gpu_dma_write %p %d\n", mem, count);
730
731 if (unlikely(gpu.cmd_len > 0))
732 flush_cmd_buffer();
733
734 left = do_cmd_buffer(mem, count, &dummy, &dummy);
735 if (left)
736 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
737}
738
739void GPUwriteData(uint32_t data)
740{
741 log_io("gpu_write %08x\n", data);
742 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
743 if (gpu.cmd_len >= CMD_BUFFER_LEN)
744 flush_cmd_buffer();
745}
746
747long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
748 uint32_t *progress_addr, int32_t *cycles_last_cmd)
749{
750 uint32_t addr, *list, ld_addr;
751 int len, left, count, ld_count = 32;
752 int cpu_cycles_sum = 0;
753 int cpu_cycles_last = 0;
754
755 preload(rambase + (start_addr & 0x1fffff) / 4);
756
757 if (unlikely(gpu.cmd_len > 0))
758 flush_cmd_buffer();
759
760 log_io("gpu_dma_chain\n");
761 addr = ld_addr = start_addr & 0xffffff;
762 for (count = 0; (addr & 0x800000) == 0; count++)
763 {
764 list = rambase + (addr & 0x1fffff) / 4;
765 len = LE32TOH(list[0]) >> 24;
766 addr = LE32TOH(list[0]) & 0xffffff;
767 preload(rambase + (addr & 0x1fffff) / 4);
768
769 cpu_cycles_sum += 10;
770 if (len > 0)
771 cpu_cycles_sum += 5 + len;
772
773 log_io(".chain %08lx #%d+%d %u+%u\n",
774 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
775 if (unlikely(gpu.cmd_len > 0)) {
776 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
777 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
778 gpu.cmd_len = 0;
779 }
780 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
781 gpu.cmd_len += len;
782 flush_cmd_buffer();
783 continue;
784 }
785
786 if (len) {
787 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
788 if (left) {
789 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
790 gpu.cmd_len = left;
791 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
792 }
793 }
794
795 if (progress_addr) {
796 *progress_addr = addr;
797 break;
798 }
799 if (addr == ld_addr) {
800 log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
801 break;
802 }
803 if (count == ld_count) {
804 ld_addr = addr;
805 ld_count *= 2;
806 }
807 }
808
809 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
810 gpu.state.last_list.frame = *gpu.state.frame_count;
811 gpu.state.last_list.hcnt = *gpu.state.hcnt;
812 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
813 gpu.state.last_list.addr = start_addr;
814
815 *cycles_last_cmd = cpu_cycles_last;
816 return cpu_cycles_sum;
817}
818
819void GPUreadDataMem(uint32_t *mem, int count)
820{
821 log_io("gpu_dma_read %p %d\n", mem, count);
822
823 if (unlikely(gpu.cmd_len > 0))
824 flush_cmd_buffer();
825
826 if (gpu.dma.h)
827 do_vram_io(mem, count, 1);
828}
829
830uint32_t GPUreadData(void)
831{
832 uint32_t ret;
833
834 if (unlikely(gpu.cmd_len > 0))
835 flush_cmd_buffer();
836
837 ret = gpu.gp0;
838 if (gpu.dma.h) {
839 ret = HTOLE32(ret);
840 do_vram_io(&ret, 1, 1);
841 ret = LE32TOH(ret);
842 }
843
844 log_io("gpu_read %08x\n", ret);
845 return ret;
846}
847
848uint32_t GPUreadStatus(void)
849{
850 uint32_t ret;
851
852 if (unlikely(gpu.cmd_len > 0))
853 flush_cmd_buffer();
854
855 ret = gpu.status;
856 log_io("gpu_read_status %08x\n", ret);
857 return ret;
858}
859
860struct GPUFreeze
861{
862 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
863 uint32_t ulStatus; // current gpu status
864 uint32_t ulControl[256]; // latest control register values
865 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
866};
867
868long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
869{
870 int i;
871
872 switch (type) {
873 case 1: // save
874 if (gpu.cmd_len > 0)
875 flush_cmd_buffer();
876
877 renderer_sync();
878 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
879 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
880 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
881 freeze->ulStatus = gpu.status;
882 break;
883 case 0: // load
884 renderer_sync();
885 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
886 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
887 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
888 gpu.status = freeze->ulStatus;
889 gpu.cmd_len = 0;
890 for (i = 8; i > 0; i--) {
891 gpu.regs[i] ^= 1; // avoid reg change detection
892 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
893 }
894 renderer_sync_ecmds(gpu.ex_regs);
895 renderer_update_caches(0, 0, 1024, 512, 0);
896 break;
897 }
898
899 return 1;
900}
901
902void GPUupdateLace(void)
903{
904 if (gpu.cmd_len > 0)
905 flush_cmd_buffer();
906 renderer_flush_queues();
907
908#ifndef RAW_FB_DISPLAY
909 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
910 if (!gpu.state.blanked) {
911 vout_blank();
912 gpu.state.blanked = 1;
913 gpu.state.fb_dirty = 1;
914 }
915 return;
916 }
917
918 renderer_notify_update_lace(0);
919
920 if (!gpu.state.fb_dirty)
921 return;
922#endif
923
924 if (gpu.frameskip.set) {
925 if (!gpu.frameskip.frame_ready) {
926 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
927 return;
928 gpu.frameskip.active = 0;
929 }
930 gpu.frameskip.frame_ready = 0;
931 }
932
933 vout_update();
934 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
935 renderer_update_caches(0, 0, 1024, 512, 1);
936 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
937 gpu.state.fb_dirty = 0;
938 gpu.state.blanked = 0;
939 renderer_notify_update_lace(1);
940}
941
942void GPUvBlank(int is_vblank, int lcf)
943{
944 int interlace = gpu.state.allow_interlace
945 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
946 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
947 // interlace doesn't look nice on progressive displays,
948 // so we have this "auto" mode here for games that don't read vram
949 if (gpu.state.allow_interlace == 2
950 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
951 {
952 interlace = 0;
953 }
954 if (interlace || interlace != gpu.state.old_interlace) {
955 gpu.state.old_interlace = interlace;
956
957 if (gpu.cmd_len > 0)
958 flush_cmd_buffer();
959 renderer_flush_queues();
960 renderer_set_interlace(interlace, !lcf);
961 }
962}
963
964void GPUgetScreenInfo(int *y, int *base_hres)
965{
966 *y = gpu.screen.y;
967 *base_hres = gpu.screen.vres;
968 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
969 *base_hres >>= 1;
970}
971
972void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
973{
974 gpu.frameskip.set = cbs->frameskip;
975 gpu.frameskip.advice = &cbs->fskip_advice;
976 gpu.frameskip.force = &cbs->fskip_force;
977 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
978 gpu.frameskip.active = 0;
979 gpu.frameskip.frame_ready = 1;
980 gpu.state.hcnt = (uint32_t *)cbs->gpu_hcnt;
981 gpu.state.frame_count = (uint32_t *)cbs->gpu_frame_count;
982 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
983 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
984 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
985 if (gpu.state.screen_centering_type != cbs->screen_centering_type
986 || gpu.state.screen_centering_x != cbs->screen_centering_x
987 || gpu.state.screen_centering_y != cbs->screen_centering_y
988 || gpu.state.show_overscan != cbs->show_overscan) {
989 gpu.state.screen_centering_type = cbs->screen_centering_type;
990 gpu.state.screen_centering_x = cbs->screen_centering_x;
991 gpu.state.screen_centering_y = cbs->screen_centering_y;
992 gpu.state.show_overscan = cbs->show_overscan;
993 update_width();
994 update_height();
995 }
996
997 gpu.mmap = cbs->mmap;
998 gpu.munmap = cbs->munmap;
999 gpu.gpu_state_change = cbs->gpu_state_change;
1000
1001 // delayed vram mmap
1002 if (gpu.vram == NULL)
1003 map_vram();
1004
1005 if (cbs->pl_vout_set_raw_vram)
1006 cbs->pl_vout_set_raw_vram(gpu.vram);
1007 renderer_set_config(cbs);
1008 vout_set_config(cbs);
1009}
1010
1011// vim:shiftwidth=2:expandtab