gpu: start doing some basic gpu timing
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17#include "gpu_timing.h"
18#include "../../libpcsxcore/gpu.h" // meh
19#include "../../frontend/plugin_lib.h"
20
21#ifndef ARRAY_SIZE
22#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
23#endif
24#ifdef __GNUC__
25#define unlikely(x) __builtin_expect((x), 0)
26#define preload __builtin_prefetch
27#define noinline __attribute__((noinline))
28#else
29#define unlikely(x)
30#define preload(...)
31#define noinline
32#endif
33
34//#define log_io gpu_log
35#define log_io(...)
36
37struct psx_gpu gpu;
38
39static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles);
40static void finish_vram_transfer(int is_read);
41
42static noinline void do_cmd_reset(void)
43{
44 int dummy = 0;
45 renderer_sync();
46 if (unlikely(gpu.cmd_len > 0))
47 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
48 gpu.cmd_len = 0;
49
50 if (unlikely(gpu.dma.h > 0))
51 finish_vram_transfer(gpu.dma_start.is_read);
52 gpu.dma.h = 0;
53}
54
55static noinline void do_reset(void)
56{
57 unsigned int i;
58
59 do_cmd_reset();
60
61 memset(gpu.regs, 0, sizeof(gpu.regs));
62 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
63 gpu.ex_regs[i] = (0xe0 + i) << 24;
64 gpu.status = 0x14802000;
65 gpu.gp0 = 0;
66 gpu.regs[3] = 1;
67 gpu.screen.hres = gpu.screen.w = 256;
68 gpu.screen.vres = gpu.screen.h = 240;
69 gpu.screen.x = gpu.screen.y = 0;
70 renderer_sync_ecmds(gpu.ex_regs);
71 renderer_notify_res_change();
72}
73
74static noinline void update_width(void)
75{
76 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
77 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
78 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
79 int hres = hres_all[(gpu.status >> 16) & 7];
80 int pal = gpu.status & PSX_GPU_STATUS_PAL;
81 int sw = gpu.screen.x2 - gpu.screen.x1;
82 int type = gpu.state.screen_centering_type;
83 int x = 0, x_auto;
84 if (type == C_AUTO)
85 type = gpu.state.screen_centering_type_default;
86 if (sw <= 0)
87 /* nothing displayed? */;
88 else {
89 int s = pal ? 656 : 608; // or 600? pal is just a guess
90 x = (gpu.screen.x1 - s) / hdiv;
91 x = (x + 1) & ~1; // blitter limitation
92 sw /= hdiv;
93 sw = (sw + 2) & ~3; // according to nocash
94 switch (type) {
95 case C_INGAME:
96 break;
97 case C_MANUAL:
98 x = gpu.state.screen_centering_x;
99 break;
100 default:
101 // correct if slightly miscentered
102 x_auto = (hres - sw) / 2 & ~3;
103 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
104 x = x_auto;
105 }
106 if (x + sw > hres)
107 sw = hres - x;
108 // .x range check is done in vout_update()
109 }
110 // reduce the unpleasant right border that a few games have
111 if (gpu.state.screen_centering_type == 0
112 && x <= 4 && hres - (x + sw) >= 4)
113 hres -= 4;
114 gpu.screen.x = x;
115 gpu.screen.w = sw;
116 gpu.screen.hres = hres;
117 gpu.state.dims_changed = 1;
118 //printf("xx %d %d -> %2d, %d / %d\n",
119 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
120}
121
122static noinline void update_height(void)
123{
124 int pal = gpu.status & PSX_GPU_STATUS_PAL;
125 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
126 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
127 int sh = gpu.screen.y2 - gpu.screen.y1;
128 int center_tol = 16;
129 int vres = 240;
130
131 if (pal && (sh > 240 || gpu.screen.vres == 256))
132 vres = 256;
133 if (dheight)
134 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
135 if (sh <= 0)
136 /* nothing displayed? */;
137 else {
138 switch (gpu.state.screen_centering_type) {
139 case C_INGAME:
140 break;
141 case C_BORDERLESS:
142 y = 0;
143 break;
144 case C_MANUAL:
145 y = gpu.state.screen_centering_y;
146 break;
147 default:
148 // correct if slightly miscentered
149 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
150 y = 0;
151 }
152 if (y + sh > vres)
153 sh = vres - y;
154 }
155 gpu.screen.y = y;
156 gpu.screen.h = sh;
157 gpu.screen.vres = vres;
158 gpu.state.dims_changed = 1;
159 //printf("yy %d %d -> %d, %d / %d\n",
160 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
161}
162
163static noinline void decide_frameskip(void)
164{
165 *gpu.frameskip.dirty = 1;
166
167 if (gpu.frameskip.active)
168 gpu.frameskip.cnt++;
169 else {
170 gpu.frameskip.cnt = 0;
171 gpu.frameskip.frame_ready = 1;
172 }
173
174 if (*gpu.frameskip.force)
175 gpu.frameskip.active = 1;
176 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
177 gpu.frameskip.active = 1;
178 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
179 gpu.frameskip.active = 1;
180 else
181 gpu.frameskip.active = 0;
182
183 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
184 int dummy = 0;
185 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy);
186 gpu.frameskip.pending_fill[0] = 0;
187 }
188}
189
190static noinline int decide_frameskip_allow(uint32_t cmd_e3)
191{
192 // no frameskip if it decides to draw to display area,
193 // but not for interlace since it'll most likely always do that
194 uint32_t x = cmd_e3 & 0x3ff;
195 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
196 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
197 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
198 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
199 return gpu.frameskip.allow;
200}
201
202static void flush_cmd_buffer(void);
203
204static noinline void get_gpu_info(uint32_t data)
205{
206 if (unlikely(gpu.cmd_len > 0))
207 flush_cmd_buffer();
208 switch (data & 0x0f) {
209 case 0x02:
210 case 0x03:
211 case 0x04:
212 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
213 break;
214 case 0x05:
215 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
216 break;
217 case 0x07:
218 gpu.gp0 = 2;
219 break;
220 default:
221 // gpu.gp0 unchanged
222 break;
223 }
224}
225
226#ifndef max
227#define max(a, b) (((a) > (b)) ? (a) : (b))
228#endif
229
230// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
231// renderer/downscaler it uses in high res modes:
232#ifdef GCW_ZERO
233 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
234 // fills. (Will change this value if it ever gets large page support)
235 #define VRAM_ALIGN 8192
236#else
237 #define VRAM_ALIGN 16
238#endif
239
240// double, for overdraw guard + at least 1 page before
241#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
242
243// vram ptr received from mmap/malloc/alloc (will deallocate using this)
244static uint16_t *vram_ptr_orig = NULL;
245
246#ifndef GPULIB_USE_MMAP
247# ifdef __linux__
248# define GPULIB_USE_MMAP 1
249# else
250# define GPULIB_USE_MMAP 0
251# endif
252#endif
253static int map_vram(void)
254{
255#if GPULIB_USE_MMAP
256 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
257#else
258 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
259#endif
260 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
261 // 4kb guard in front
262 gpu.vram += (4096 / 2);
263 // Align
264 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
265 return 0;
266 }
267 else {
268 fprintf(stderr, "could not map vram, expect crashes\n");
269 return -1;
270 }
271}
272
273long GPUinit(void)
274{
275 int ret;
276 ret = vout_init();
277 ret |= renderer_init();
278
279 memset(&gpu.state, 0, sizeof(gpu.state));
280 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
281 gpu.zero = 0;
282 gpu.state.frame_count = &gpu.zero;
283 gpu.state.hcnt = &gpu.zero;
284 gpu.cmd_len = 0;
285 do_reset();
286
287 /*if (gpu.mmap != NULL) {
288 if (map_vram() != 0)
289 ret = -1;
290 }*/
291 return ret;
292}
293
294long GPUshutdown(void)
295{
296 long ret;
297
298 renderer_finish();
299 ret = vout_finish();
300
301 if (vram_ptr_orig != NULL) {
302#if GPULIB_USE_MMAP
303 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
304#else
305 free(vram_ptr_orig);
306#endif
307 }
308 vram_ptr_orig = gpu.vram = NULL;
309
310 return ret;
311}
312
313void GPUwriteStatus(uint32_t data)
314{
315 uint32_t cmd = data >> 24;
316 int src_x, src_y;
317
318 if (cmd < ARRAY_SIZE(gpu.regs)) {
319 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
320 return;
321 gpu.regs[cmd] = data;
322 }
323
324 gpu.state.fb_dirty = 1;
325
326 switch (cmd) {
327 case 0x00:
328 do_reset();
329 break;
330 case 0x01:
331 do_cmd_reset();
332 break;
333 case 0x03:
334 if (data & 1) {
335 gpu.status |= PSX_GPU_STATUS_BLANKING;
336 gpu.state.dims_changed = 1; // for hud clearing
337 }
338 else
339 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
340 break;
341 case 0x04:
342 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
343 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
344 break;
345 case 0x05:
346 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
347 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
348 gpu.screen.src_x = src_x;
349 gpu.screen.src_y = src_y;
350 renderer_notify_scanout_change(src_x, src_y);
351 if (gpu.frameskip.set) {
352 decide_frameskip_allow(gpu.ex_regs[3]);
353 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
354 decide_frameskip();
355 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
356 }
357 }
358 }
359 break;
360 case 0x06:
361 gpu.screen.x1 = data & 0xfff;
362 gpu.screen.x2 = (data >> 12) & 0xfff;
363 update_width();
364 break;
365 case 0x07:
366 gpu.screen.y1 = data & 0x3ff;
367 gpu.screen.y2 = (data >> 10) & 0x3ff;
368 update_height();
369 break;
370 case 0x08:
371 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
372 update_width();
373 update_height();
374 renderer_notify_res_change();
375 break;
376 default:
377 if ((cmd & 0xf0) == 0x10)
378 get_gpu_info(data);
379 break;
380 }
381
382#ifdef GPUwriteStatus_ext
383 GPUwriteStatus_ext(data);
384#endif
385}
386
387const unsigned char cmd_lengths[256] =
388{
389 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
392 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
393 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
394 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
395 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
396 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
397 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
398 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
399 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
402 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
403 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
405};
406
407#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
408
409static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
410{
411 int i;
412 for (i = 0; i < l; i++)
413 dst[i] = src[i] | msb;
414}
415
416static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
417 int is_read, uint16_t msb)
418{
419 uint16_t *vram = VRAM_MEM_XY(x, y);
420 if (unlikely(is_read))
421 memcpy(mem, vram, l * 2);
422 else if (unlikely(msb))
423 cpy_msb(vram, mem, l, msb);
424 else
425 memcpy(vram, mem, l * 2);
426}
427
428static int do_vram_io(uint32_t *data, int count, int is_read)
429{
430 int count_initial = count;
431 uint16_t msb = gpu.ex_regs[6] << 15;
432 uint16_t *sdata = (uint16_t *)data;
433 int x = gpu.dma.x, y = gpu.dma.y;
434 int w = gpu.dma.w, h = gpu.dma.h;
435 int o = gpu.dma.offset;
436 int l;
437 count *= 2; // operate in 16bpp pixels
438
439 renderer_sync();
440
441 if (gpu.dma.offset) {
442 l = w - gpu.dma.offset;
443 if (count < l)
444 l = count;
445
446 do_vram_line(x + o, y, sdata, l, is_read, msb);
447
448 if (o + l < w)
449 o += l;
450 else {
451 o = 0;
452 y++;
453 h--;
454 }
455 sdata += l;
456 count -= l;
457 }
458
459 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
460 y &= 511;
461 do_vram_line(x, y, sdata, w, is_read, msb);
462 }
463
464 if (h > 0) {
465 if (count > 0) {
466 y &= 511;
467 do_vram_line(x, y, sdata, count, is_read, msb);
468 o = count;
469 count = 0;
470 }
471 }
472 else
473 finish_vram_transfer(is_read);
474 gpu.dma.y = y;
475 gpu.dma.h = h;
476 gpu.dma.offset = o;
477
478 return count_initial - count / 2;
479}
480
481static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
482{
483 if (gpu.dma.h)
484 log_anomaly("start_vram_transfer while old unfinished\n");
485
486 gpu.dma.x = pos_word & 0x3ff;
487 gpu.dma.y = (pos_word >> 16) & 0x1ff;
488 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
489 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
490 gpu.dma.offset = 0;
491 gpu.dma.is_read = is_read;
492 gpu.dma_start = gpu.dma;
493
494 renderer_flush_queues();
495 if (is_read) {
496 gpu.status |= PSX_GPU_STATUS_IMG;
497 // XXX: wrong for width 1
498 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
499 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
500 }
501
502 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
503 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
504 if (gpu.gpu_state_change)
505 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
506}
507
508static void finish_vram_transfer(int is_read)
509{
510 if (is_read)
511 gpu.status &= ~PSX_GPU_STATUS_IMG;
512 else {
513 gpu.state.fb_dirty = 1;
514 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
515 gpu.dma_start.w, gpu.dma_start.h, 0);
516 }
517 if (gpu.gpu_state_change)
518 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
519}
520
521static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
522{
523 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
524 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
525 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
526 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
527 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
528 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
529 uint16_t msb = gpu.ex_regs[6] << 15;
530 uint16_t lbuf[128];
531 uint32_t x, y;
532
533 *cpu_cycles += gput_copy(w, h);
534 if (sx == dx && sy == dy && msb == 0)
535 return;
536
537 renderer_flush_queues();
538
539 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
540 {
541 for (y = 0; y < h; y++)
542 {
543 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
544 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
545 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
546 {
547 uint32_t x1, w1 = w - x;
548 if (w1 > ARRAY_SIZE(lbuf))
549 w1 = ARRAY_SIZE(lbuf);
550 for (x1 = 0; x1 < w1; x1++)
551 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
552 for (x1 = 0; x1 < w1; x1++)
553 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
554 }
555 }
556 }
557 else
558 {
559 uint32_t sy1 = sy, dy1 = dy;
560 for (y = 0; y < h; y++, sy1++, dy1++)
561 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
562 }
563
564 renderer_update_caches(dx, dy, w, h, 0);
565}
566
567static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
568{
569 int cmd = 0, pos = 0, len, dummy = 0, v;
570 int skip = 1;
571
572 gpu.frameskip.pending_fill[0] = 0;
573
574 while (pos < count && skip) {
575 uint32_t *list = data + pos;
576 cmd = LE32TOH(list[0]) >> 24;
577 len = 1 + cmd_lengths[cmd];
578
579 switch (cmd) {
580 case 0x02:
581 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
582 // clearing something large, don't skip
583 do_cmd_list(list, 3, &dummy, &dummy);
584 else
585 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
586 break;
587 case 0x24 ... 0x27:
588 case 0x2c ... 0x2f:
589 case 0x34 ... 0x37:
590 case 0x3c ... 0x3f:
591 gpu.ex_regs[1] &= ~0x1ff;
592 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
593 break;
594 case 0x48 ... 0x4F:
595 for (v = 3; pos + v < count; v++)
596 {
597 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
598 break;
599 }
600 len += v - 3;
601 break;
602 case 0x58 ... 0x5F:
603 for (v = 4; pos + v < count; v += 2)
604 {
605 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
606 break;
607 }
608 len += v - 4;
609 break;
610 default:
611 if (cmd == 0xe3)
612 skip = decide_frameskip_allow(LE32TOH(list[0]));
613 if ((cmd & 0xf8) == 0xe0)
614 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
615 break;
616 }
617
618 if (pos + len > count) {
619 cmd = -1;
620 break; // incomplete cmd
621 }
622 if (0x80 <= cmd && cmd <= 0xdf)
623 break; // image i/o
624
625 pos += len;
626 }
627
628 renderer_sync_ecmds(gpu.ex_regs);
629 *last_cmd = cmd;
630 return pos;
631}
632
633static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles)
634{
635 int cmd, pos;
636 uint32_t old_e3 = gpu.ex_regs[3];
637 int vram_dirty = 0;
638
639 // process buffer
640 for (pos = 0; pos < count; )
641 {
642 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
643 vram_dirty = 1;
644 pos += do_vram_io(data + pos, count - pos, 0);
645 if (pos == count)
646 break;
647 }
648
649 cmd = LE32TOH(data[pos]) >> 24;
650 if (0xa0 <= cmd && cmd <= 0xdf) {
651 if (unlikely((pos+2) >= count)) {
652 // incomplete vram write/read cmd, can't consume yet
653 cmd = -1;
654 break;
655 }
656
657 // consume vram write/read cmd
658 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
659 pos += 3;
660 continue;
661 }
662 else if ((cmd & 0xe0) == 0x80) {
663 if (unlikely((pos+3) >= count)) {
664 cmd = -1; // incomplete cmd, can't consume yet
665 break;
666 }
667 do_vram_copy(data + pos + 1, cpu_cycles);
668 vram_dirty = 1;
669 pos += 4;
670 continue;
671 }
672
673 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
674 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
675 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
676 else {
677 pos += do_cmd_list(data + pos, count - pos, cpu_cycles, &cmd);
678 vram_dirty = 1;
679 }
680
681 if (cmd == -1)
682 // incomplete cmd
683 break;
684 }
685
686 gpu.status &= ~0x1fff;
687 gpu.status |= gpu.ex_regs[1] & 0x7ff;
688 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
689
690 gpu.state.fb_dirty |= vram_dirty;
691
692 if (old_e3 != gpu.ex_regs[3])
693 decide_frameskip_allow(gpu.ex_regs[3]);
694
695 return count - pos;
696}
697
698static noinline void flush_cmd_buffer(void)
699{
700 int dummy = 0, left;
701 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
702 if (left > 0)
703 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
704 if (left != gpu.cmd_len) {
705 if (!gpu.dma.h && gpu.gpu_state_change)
706 gpu.gpu_state_change(PGS_PRIMITIVE_START);
707 gpu.cmd_len = left;
708 }
709}
710
711void GPUwriteDataMem(uint32_t *mem, int count)
712{
713 int dummy = 0, left;
714
715 log_io("gpu_dma_write %p %d\n", mem, count);
716
717 if (unlikely(gpu.cmd_len > 0))
718 flush_cmd_buffer();
719
720 left = do_cmd_buffer(mem, count, &dummy);
721 if (left)
722 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
723}
724
725void GPUwriteData(uint32_t data)
726{
727 log_io("gpu_write %08x\n", data);
728 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
729 if (gpu.cmd_len >= CMD_BUFFER_LEN)
730 flush_cmd_buffer();
731}
732
733long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
734{
735 uint32_t addr, *list, ld_addr = 0;
736 int len, left, count;
737 int cpu_cycles = 0;
738
739 preload(rambase + (start_addr & 0x1fffff) / 4);
740
741 if (unlikely(gpu.cmd_len > 0))
742 flush_cmd_buffer();
743
744 log_io("gpu_dma_chain\n");
745 addr = start_addr & 0xffffff;
746 for (count = 0; (addr & 0x800000) == 0; count++)
747 {
748 list = rambase + (addr & 0x1fffff) / 4;
749 len = LE32TOH(list[0]) >> 24;
750 addr = LE32TOH(list[0]) & 0xffffff;
751 preload(rambase + (addr & 0x1fffff) / 4);
752
753 cpu_cycles += 10;
754 if (len > 0)
755 cpu_cycles += 5 + len;
756
757 log_io(".chain %08lx #%d+%d\n",
758 (long)(list - rambase) * 4, len, gpu.cmd_len);
759 if (unlikely(gpu.cmd_len > 0)) {
760 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
761 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
762 gpu.cmd_len = 0;
763 }
764 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
765 gpu.cmd_len += len;
766 flush_cmd_buffer();
767 continue;
768 }
769
770 if (len) {
771 left = do_cmd_buffer(list + 1, len, &cpu_cycles);
772 if (left) {
773 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
774 gpu.cmd_len = left;
775 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
776 }
777 }
778
779 if (progress_addr) {
780 *progress_addr = addr;
781 break;
782 }
783 #define LD_THRESHOLD (8*1024)
784 if (count >= LD_THRESHOLD) {
785 if (count == LD_THRESHOLD) {
786 ld_addr = addr;
787 continue;
788 }
789
790 // loop detection marker
791 // (bit23 set causes DMA error on real machine, so
792 // unlikely to be ever set by the game)
793 list[0] |= HTOLE32(0x800000);
794 }
795 }
796
797 if (ld_addr != 0) {
798 // remove loop detection markers
799 count -= LD_THRESHOLD + 2;
800 addr = ld_addr & 0x1fffff;
801 while (count-- > 0) {
802 list = rambase + addr / 4;
803 addr = LE32TOH(list[0]) & 0x1fffff;
804 list[0] &= HTOLE32(~0x800000);
805 }
806 }
807
808 gpu.state.last_list.frame = *gpu.state.frame_count;
809 gpu.state.last_list.hcnt = *gpu.state.hcnt;
810 gpu.state.last_list.cycles = cpu_cycles;
811 gpu.state.last_list.addr = start_addr;
812
813 return cpu_cycles;
814}
815
816void GPUreadDataMem(uint32_t *mem, int count)
817{
818 log_io("gpu_dma_read %p %d\n", mem, count);
819
820 if (unlikely(gpu.cmd_len > 0))
821 flush_cmd_buffer();
822
823 if (gpu.dma.h)
824 do_vram_io(mem, count, 1);
825}
826
827uint32_t GPUreadData(void)
828{
829 uint32_t ret;
830
831 if (unlikely(gpu.cmd_len > 0))
832 flush_cmd_buffer();
833
834 ret = gpu.gp0;
835 if (gpu.dma.h) {
836 ret = HTOLE32(ret);
837 do_vram_io(&ret, 1, 1);
838 ret = LE32TOH(ret);
839 }
840
841 log_io("gpu_read %08x\n", ret);
842 return ret;
843}
844
845uint32_t GPUreadStatus(void)
846{
847 uint32_t ret;
848
849 if (unlikely(gpu.cmd_len > 0))
850 flush_cmd_buffer();
851
852 ret = gpu.status;
853 log_io("gpu_read_status %08x\n", ret);
854 return ret;
855}
856
857struct GPUFreeze
858{
859 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
860 uint32_t ulStatus; // current gpu status
861 uint32_t ulControl[256]; // latest control register values
862 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
863};
864
865long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
866{
867 int i;
868
869 switch (type) {
870 case 1: // save
871 if (gpu.cmd_len > 0)
872 flush_cmd_buffer();
873
874 renderer_sync();
875 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
876 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
877 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
878 freeze->ulStatus = gpu.status;
879 break;
880 case 0: // load
881 renderer_sync();
882 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
883 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
884 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
885 gpu.status = freeze->ulStatus;
886 gpu.cmd_len = 0;
887 for (i = 8; i > 0; i--) {
888 gpu.regs[i] ^= 1; // avoid reg change detection
889 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
890 }
891 renderer_sync_ecmds(gpu.ex_regs);
892 renderer_update_caches(0, 0, 1024, 512, 0);
893 break;
894 }
895
896 return 1;
897}
898
899void GPUupdateLace(void)
900{
901 if (gpu.cmd_len > 0)
902 flush_cmd_buffer();
903 renderer_flush_queues();
904
905#ifndef RAW_FB_DISPLAY
906 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
907 if (!gpu.state.blanked) {
908 vout_blank();
909 gpu.state.blanked = 1;
910 gpu.state.fb_dirty = 1;
911 }
912 return;
913 }
914
915 renderer_notify_update_lace(0);
916
917 if (!gpu.state.fb_dirty)
918 return;
919#endif
920
921 if (gpu.frameskip.set) {
922 if (!gpu.frameskip.frame_ready) {
923 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
924 return;
925 gpu.frameskip.active = 0;
926 }
927 gpu.frameskip.frame_ready = 0;
928 }
929
930 vout_update();
931 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
932 renderer_update_caches(0, 0, 1024, 512, 1);
933 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
934 gpu.state.fb_dirty = 0;
935 gpu.state.blanked = 0;
936 renderer_notify_update_lace(1);
937}
938
939void GPUvBlank(int is_vblank, int lcf)
940{
941 int interlace = gpu.state.allow_interlace
942 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
943 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
944 // interlace doesn't look nice on progressive displays,
945 // so we have this "auto" mode here for games that don't read vram
946 if (gpu.state.allow_interlace == 2
947 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
948 {
949 interlace = 0;
950 }
951 if (interlace || interlace != gpu.state.old_interlace) {
952 gpu.state.old_interlace = interlace;
953
954 if (gpu.cmd_len > 0)
955 flush_cmd_buffer();
956 renderer_flush_queues();
957 renderer_set_interlace(interlace, !lcf);
958 }
959}
960
961void GPUgetScreenInfo(int *y, int *base_hres)
962{
963 *y = gpu.screen.y;
964 *base_hres = gpu.screen.vres;
965 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
966 *base_hres >>= 1;
967}
968
969void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
970{
971 gpu.frameskip.set = cbs->frameskip;
972 gpu.frameskip.advice = &cbs->fskip_advice;
973 gpu.frameskip.force = &cbs->fskip_force;
974 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
975 gpu.frameskip.active = 0;
976 gpu.frameskip.frame_ready = 1;
977 gpu.state.hcnt = cbs->gpu_hcnt;
978 gpu.state.frame_count = cbs->gpu_frame_count;
979 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
980 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
981 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
982 if (gpu.state.screen_centering_type != cbs->screen_centering_type
983 || gpu.state.screen_centering_x != cbs->screen_centering_x
984 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
985 gpu.state.screen_centering_type = cbs->screen_centering_type;
986 gpu.state.screen_centering_x = cbs->screen_centering_x;
987 gpu.state.screen_centering_y = cbs->screen_centering_y;
988 update_width();
989 update_height();
990 }
991
992 gpu.mmap = cbs->mmap;
993 gpu.munmap = cbs->munmap;
994 gpu.gpu_state_change = cbs->gpu_state_change;
995
996 // delayed vram mmap
997 if (gpu.vram == NULL)
998 map_vram();
999
1000 if (cbs->pl_vout_set_raw_vram)
1001 cbs->pl_vout_set_raw_vram(gpu.vram);
1002 renderer_set_config(cbs);
1003 vout_set_config(cbs);
1004}
1005
1006// vim:shiftwidth=2:expandtab