gpu: start doing some basic gpu timing
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include "gpu.h"
15#include "gpu_timing.h"
16#include "../../libpcsxcore/gpu.h" // meh
17#include "../../frontend/plugin_lib.h"
18
19#ifndef ARRAY_SIZE
20#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
21#endif
22#ifdef __GNUC__
23#define unlikely(x) __builtin_expect((x), 0)
24#define preload __builtin_prefetch
25#define noinline __attribute__((noinline))
26#else
27#define unlikely(x)
28#define preload(...)
29#define noinline
30#endif
31
32//#define log_io gpu_log
33#define log_io(...)
34
35struct psx_gpu gpu;
36
37static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles);
38static void finish_vram_transfer(int is_read);
39
40static noinline void do_cmd_reset(void)
41{
42 int dummy = 0;
43 if (unlikely(gpu.cmd_len > 0))
44 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
45 gpu.cmd_len = 0;
46
47 if (unlikely(gpu.dma.h > 0))
48 finish_vram_transfer(gpu.dma_start.is_read);
49 gpu.dma.h = 0;
50}
51
52static noinline void do_reset(void)
53{
54 unsigned int i;
55
56 do_cmd_reset();
57
58 memset(gpu.regs, 0, sizeof(gpu.regs));
59 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
60 gpu.ex_regs[i] = (0xe0 + i) << 24;
61 gpu.status = 0x14802000;
62 gpu.gp0 = 0;
63 gpu.regs[3] = 1;
64 gpu.screen.hres = gpu.screen.w = 256;
65 gpu.screen.vres = gpu.screen.h = 240;
66 gpu.screen.x = gpu.screen.y = 0;
67 renderer_sync_ecmds(gpu.ex_regs);
68 renderer_notify_res_change();
69}
70
71static noinline void update_width(void)
72{
73 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
74 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
75 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
76 int hres = hres_all[(gpu.status >> 16) & 7];
77 int pal = gpu.status & PSX_GPU_STATUS_PAL;
78 int sw = gpu.screen.x2 - gpu.screen.x1;
79 int type = gpu.state.screen_centering_type;
80 int x = 0, x_auto;
81 if (type == C_AUTO)
82 type = gpu.state.screen_centering_type_default;
83 if (sw <= 0)
84 /* nothing displayed? */;
85 else {
86 int s = pal ? 656 : 608; // or 600? pal is just a guess
87 x = (gpu.screen.x1 - s) / hdiv;
88 x = (x + 1) & ~1; // blitter limitation
89 sw /= hdiv;
90 sw = (sw + 2) & ~3; // according to nocash
91 switch (type) {
92 case C_INGAME:
93 break;
94 case C_MANUAL:
95 x = gpu.state.screen_centering_x;
96 break;
97 default:
98 // correct if slightly miscentered
99 x_auto = (hres - sw) / 2 & ~3;
100 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
101 x = x_auto;
102 }
103 if (x + sw > hres)
104 sw = hres - x;
105 // .x range check is done in vout_update()
106 }
107 // reduce the unpleasant right border that a few games have
108 if (gpu.state.screen_centering_type == 0
109 && x <= 4 && hres - (x + sw) >= 4)
110 hres -= 4;
111 gpu.screen.x = x;
112 gpu.screen.w = sw;
113 gpu.screen.hres = hres;
114 gpu.state.dims_changed = 1;
115 //printf("xx %d %d -> %2d, %d / %d\n",
116 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
117}
118
119static noinline void update_height(void)
120{
121 int pal = gpu.status & PSX_GPU_STATUS_PAL;
122 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
123 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
124 int sh = gpu.screen.y2 - gpu.screen.y1;
125 int center_tol = 16;
126 int vres = 240;
127
128 if (pal && (sh > 240 || gpu.screen.vres == 256))
129 vres = 256;
130 if (dheight)
131 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
132 if (sh <= 0)
133 /* nothing displayed? */;
134 else {
135 switch (gpu.state.screen_centering_type) {
136 case C_INGAME:
137 break;
138 case C_BORDERLESS:
139 y = 0;
140 break;
141 case C_MANUAL:
142 y = gpu.state.screen_centering_y;
143 break;
144 default:
145 // correct if slightly miscentered
146 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
147 y = 0;
148 }
149 if (y + sh > vres)
150 sh = vres - y;
151 }
152 gpu.screen.y = y;
153 gpu.screen.h = sh;
154 gpu.screen.vres = vres;
155 gpu.state.dims_changed = 1;
156 //printf("yy %d %d -> %d, %d / %d\n",
157 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
158}
159
160static noinline void decide_frameskip(void)
161{
162 if (gpu.frameskip.active)
163 gpu.frameskip.cnt++;
164 else {
165 gpu.frameskip.cnt = 0;
166 gpu.frameskip.frame_ready = 1;
167 }
168
169 if (!gpu.frameskip.active && *gpu.frameskip.advice)
170 gpu.frameskip.active = 1;
171 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
172 gpu.frameskip.active = 1;
173 else
174 gpu.frameskip.active = 0;
175
176 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
177 int dummy = 0;
178 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy);
179 gpu.frameskip.pending_fill[0] = 0;
180 }
181}
182
183static noinline int decide_frameskip_allow(uint32_t cmd_e3)
184{
185 // no frameskip if it decides to draw to display area,
186 // but not for interlace since it'll most likely always do that
187 uint32_t x = cmd_e3 & 0x3ff;
188 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
189 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
190 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
191 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
192 return gpu.frameskip.allow;
193}
194
195static void flush_cmd_buffer(void);
196
197static noinline void get_gpu_info(uint32_t data)
198{
199 if (unlikely(gpu.cmd_len > 0))
200 flush_cmd_buffer();
201 switch (data & 0x0f) {
202 case 0x02:
203 case 0x03:
204 case 0x04:
205 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
206 break;
207 case 0x05:
208 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
209 break;
210 case 0x07:
211 gpu.gp0 = 2;
212 break;
213 default:
214 // gpu.gp0 unchanged
215 break;
216 }
217}
218
219// double, for overdraw guard
220#define VRAM_SIZE (1024 * 512 * 2 * 2)
221
222static int map_vram(void)
223{
224 gpu.vram = gpu.mmap(VRAM_SIZE);
225 if (gpu.vram != NULL) {
226 gpu.vram += 4096 / 2;
227 return 0;
228 }
229 else {
230 fprintf(stderr, "could not map vram, expect crashes\n");
231 return -1;
232 }
233}
234
235long GPUinit(void)
236{
237 int ret;
238 ret = vout_init();
239 ret |= renderer_init();
240
241 memset(&gpu.state, 0, sizeof(gpu.state));
242 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
243 gpu.zero = 0;
244 gpu.state.frame_count = &gpu.zero;
245 gpu.state.hcnt = &gpu.zero;
246 gpu.cmd_len = 0;
247 do_reset();
248
249 if (gpu.mmap != NULL) {
250 if (map_vram() != 0)
251 ret = -1;
252 }
253 return ret;
254}
255
256long GPUshutdown(void)
257{
258 long ret;
259
260 renderer_finish();
261 ret = vout_finish();
262 if (gpu.vram != NULL) {
263 gpu.vram -= 4096 / 2;
264 gpu.munmap(gpu.vram, VRAM_SIZE);
265 }
266 gpu.vram = NULL;
267
268 return ret;
269}
270
271void GPUwriteStatus(uint32_t data)
272{
273 uint32_t cmd = data >> 24;
274 int src_x, src_y;
275
276 if (cmd < ARRAY_SIZE(gpu.regs)) {
277 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
278 return;
279 gpu.regs[cmd] = data;
280 }
281
282 gpu.state.fb_dirty = 1;
283
284 switch (cmd) {
285 case 0x00:
286 do_reset();
287 break;
288 case 0x01:
289 do_cmd_reset();
290 break;
291 case 0x03:
292 if (data & 1) {
293 gpu.status |= PSX_GPU_STATUS_BLANKING;
294 gpu.state.dims_changed = 1; // for hud clearing
295 }
296 else
297 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
298 break;
299 case 0x04:
300 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
301 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
302 break;
303 case 0x05:
304 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
305 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
306 gpu.screen.src_x = src_x;
307 gpu.screen.src_y = src_y;
308 renderer_notify_scanout_change(src_x, src_y);
309 if (gpu.frameskip.set) {
310 decide_frameskip_allow(gpu.ex_regs[3]);
311 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
312 decide_frameskip();
313 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
314 }
315 }
316 }
317 break;
318 case 0x06:
319 gpu.screen.x1 = data & 0xfff;
320 gpu.screen.x2 = (data >> 12) & 0xfff;
321 update_width();
322 break;
323 case 0x07:
324 gpu.screen.y1 = data & 0x3ff;
325 gpu.screen.y2 = (data >> 10) & 0x3ff;
326 update_height();
327 break;
328 case 0x08:
329 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
330 update_width();
331 update_height();
332 renderer_notify_res_change();
333 break;
334 default:
335 if ((cmd & 0xf0) == 0x10)
336 get_gpu_info(data);
337 break;
338 }
339
340#ifdef GPUwriteStatus_ext
341 GPUwriteStatus_ext(data);
342#endif
343}
344
345const unsigned char cmd_lengths[256] =
346{
347 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
350 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
351 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
352 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
353 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
354 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
355 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
356 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
357 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
358 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
359 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
360 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
363};
364
365#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
366
367static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
368{
369 int i;
370 for (i = 0; i < l; i++)
371 dst[i] = src[i] | msb;
372}
373
374static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
375 int is_read, uint16_t msb)
376{
377 uint16_t *vram = VRAM_MEM_XY(x, y);
378 if (unlikely(is_read))
379 memcpy(mem, vram, l * 2);
380 else if (unlikely(msb))
381 cpy_msb(vram, mem, l, msb);
382 else
383 memcpy(vram, mem, l * 2);
384}
385
386static int do_vram_io(uint32_t *data, int count, int is_read)
387{
388 int count_initial = count;
389 uint16_t msb = gpu.ex_regs[6] << 15;
390 uint16_t *sdata = (uint16_t *)data;
391 int x = gpu.dma.x, y = gpu.dma.y;
392 int w = gpu.dma.w, h = gpu.dma.h;
393 int o = gpu.dma.offset;
394 int l;
395 count *= 2; // operate in 16bpp pixels
396
397 if (gpu.dma.offset) {
398 l = w - gpu.dma.offset;
399 if (count < l)
400 l = count;
401
402 do_vram_line(x + o, y, sdata, l, is_read, msb);
403
404 if (o + l < w)
405 o += l;
406 else {
407 o = 0;
408 y++;
409 h--;
410 }
411 sdata += l;
412 count -= l;
413 }
414
415 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
416 y &= 511;
417 do_vram_line(x, y, sdata, w, is_read, msb);
418 }
419
420 if (h > 0) {
421 if (count > 0) {
422 y &= 511;
423 do_vram_line(x, y, sdata, count, is_read, msb);
424 o = count;
425 count = 0;
426 }
427 }
428 else
429 finish_vram_transfer(is_read);
430 gpu.dma.y = y;
431 gpu.dma.h = h;
432 gpu.dma.offset = o;
433
434 return count_initial - count / 2;
435}
436
437static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
438{
439 if (gpu.dma.h)
440 log_anomaly("start_vram_transfer while old unfinished\n");
441
442 gpu.dma.x = pos_word & 0x3ff;
443 gpu.dma.y = (pos_word >> 16) & 0x1ff;
444 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
445 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
446 gpu.dma.offset = 0;
447 gpu.dma.is_read = is_read;
448 gpu.dma_start = gpu.dma;
449
450 renderer_flush_queues();
451 if (is_read) {
452 gpu.status |= PSX_GPU_STATUS_IMG;
453 // XXX: wrong for width 1
454 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
455 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
456 }
457
458 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
459 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
460 if (gpu.gpu_state_change)
461 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
462}
463
464static void finish_vram_transfer(int is_read)
465{
466 if (is_read)
467 gpu.status &= ~PSX_GPU_STATUS_IMG;
468 else {
469 gpu.state.fb_dirty = 1;
470 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
471 gpu.dma_start.w, gpu.dma_start.h, 0);
472 }
473 if (gpu.gpu_state_change)
474 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
475}
476
477static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
478{
479 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
480 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
481 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
482 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
483 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
484 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
485 uint16_t msb = gpu.ex_regs[6] << 15;
486 uint16_t lbuf[128];
487 uint32_t x, y;
488
489 *cpu_cycles += gput_copy(w, h);
490 if (sx == dx && sy == dy && msb == 0)
491 return;
492
493 renderer_flush_queues();
494
495 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
496 {
497 for (y = 0; y < h; y++)
498 {
499 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
500 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
501 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
502 {
503 uint32_t x1, w1 = w - x;
504 if (w1 > ARRAY_SIZE(lbuf))
505 w1 = ARRAY_SIZE(lbuf);
506 for (x1 = 0; x1 < w1; x1++)
507 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
508 for (x1 = 0; x1 < w1; x1++)
509 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
510 }
511 }
512 }
513 else
514 {
515 uint32_t sy1 = sy, dy1 = dy;
516 for (y = 0; y < h; y++, sy1++, dy1++)
517 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
518 }
519
520 renderer_update_caches(dx, dy, w, h, 0);
521}
522
523static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
524{
525 int cmd = 0, pos = 0, len, dummy = 0, v;
526 int skip = 1;
527
528 gpu.frameskip.pending_fill[0] = 0;
529
530 while (pos < count && skip) {
531 uint32_t *list = data + pos;
532 cmd = LE32TOH(list[0]) >> 24;
533 len = 1 + cmd_lengths[cmd];
534
535 switch (cmd) {
536 case 0x02:
537 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
538 // clearing something large, don't skip
539 do_cmd_list(list, 3, &dummy, &dummy);
540 else
541 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
542 break;
543 case 0x24 ... 0x27:
544 case 0x2c ... 0x2f:
545 case 0x34 ... 0x37:
546 case 0x3c ... 0x3f:
547 gpu.ex_regs[1] &= ~0x1ff;
548 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
549 break;
550 case 0x48 ... 0x4F:
551 for (v = 3; pos + v < count; v++)
552 {
553 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
554 break;
555 }
556 len += v - 3;
557 break;
558 case 0x58 ... 0x5F:
559 for (v = 4; pos + v < count; v += 2)
560 {
561 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
562 break;
563 }
564 len += v - 4;
565 break;
566 default:
567 if (cmd == 0xe3)
568 skip = decide_frameskip_allow(LE32TOH(list[0]));
569 if ((cmd & 0xf8) == 0xe0)
570 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
571 break;
572 }
573
574 if (pos + len > count) {
575 cmd = -1;
576 break; // incomplete cmd
577 }
578 if (0x80 <= cmd && cmd <= 0xdf)
579 break; // image i/o
580
581 pos += len;
582 }
583
584 renderer_sync_ecmds(gpu.ex_regs);
585 *last_cmd = cmd;
586 return pos;
587}
588
589static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles)
590{
591 int cmd, pos;
592 uint32_t old_e3 = gpu.ex_regs[3];
593 int vram_dirty = 0;
594
595 // process buffer
596 for (pos = 0; pos < count; )
597 {
598 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
599 vram_dirty = 1;
600 pos += do_vram_io(data + pos, count - pos, 0);
601 if (pos == count)
602 break;
603 }
604
605 cmd = LE32TOH(data[pos]) >> 24;
606 if (0xa0 <= cmd && cmd <= 0xdf) {
607 if (unlikely((pos+2) >= count)) {
608 // incomplete vram write/read cmd, can't consume yet
609 cmd = -1;
610 break;
611 }
612
613 // consume vram write/read cmd
614 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
615 pos += 3;
616 continue;
617 }
618 else if ((cmd & 0xe0) == 0x80) {
619 if (unlikely((pos+3) >= count)) {
620 cmd = -1; // incomplete cmd, can't consume yet
621 break;
622 }
623 do_vram_copy(data + pos + 1, cpu_cycles);
624 vram_dirty = 1;
625 pos += 4;
626 continue;
627 }
628
629 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
630 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
631 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
632 else {
633 pos += do_cmd_list(data + pos, count - pos, cpu_cycles, &cmd);
634 vram_dirty = 1;
635 }
636
637 if (cmd == -1)
638 // incomplete cmd
639 break;
640 }
641
642 gpu.status &= ~0x1fff;
643 gpu.status |= gpu.ex_regs[1] & 0x7ff;
644 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
645
646 gpu.state.fb_dirty |= vram_dirty;
647
648 if (old_e3 != gpu.ex_regs[3])
649 decide_frameskip_allow(gpu.ex_regs[3]);
650
651 return count - pos;
652}
653
654static noinline void flush_cmd_buffer(void)
655{
656 int dummy = 0, left;
657 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
658 if (left > 0)
659 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
660 if (left != gpu.cmd_len) {
661 if (!gpu.dma.h && gpu.gpu_state_change)
662 gpu.gpu_state_change(PGS_PRIMITIVE_START);
663 gpu.cmd_len = left;
664 }
665}
666
667void GPUwriteDataMem(uint32_t *mem, int count)
668{
669 int dummy = 0, left;
670
671 log_io("gpu_dma_write %p %d\n", mem, count);
672
673 if (unlikely(gpu.cmd_len > 0))
674 flush_cmd_buffer();
675
676 left = do_cmd_buffer(mem, count, &dummy);
677 if (left)
678 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
679}
680
681void GPUwriteData(uint32_t data)
682{
683 log_io("gpu_write %08x\n", data);
684 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
685 if (gpu.cmd_len >= CMD_BUFFER_LEN)
686 flush_cmd_buffer();
687}
688
689long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
690{
691 uint32_t addr, *list, ld_addr = 0;
692 int len, left, count;
693 int cpu_cycles = 0;
694
695 preload(rambase + (start_addr & 0x1fffff) / 4);
696
697 if (unlikely(gpu.cmd_len > 0))
698 flush_cmd_buffer();
699
700 log_io("gpu_dma_chain\n");
701 addr = start_addr & 0xffffff;
702 for (count = 0; (addr & 0x800000) == 0; count++)
703 {
704 list = rambase + (addr & 0x1fffff) / 4;
705 len = LE32TOH(list[0]) >> 24;
706 addr = LE32TOH(list[0]) & 0xffffff;
707 preload(rambase + (addr & 0x1fffff) / 4);
708
709 cpu_cycles += 10;
710 if (len > 0)
711 cpu_cycles += 5 + len;
712
713 log_io(".chain %08lx #%d+%d\n",
714 (long)(list - rambase) * 4, len, gpu.cmd_len);
715 if (unlikely(gpu.cmd_len > 0)) {
716 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
717 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
718 gpu.cmd_len = 0;
719 }
720 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
721 gpu.cmd_len += len;
722 flush_cmd_buffer();
723 continue;
724 }
725
726 if (len) {
727 left = do_cmd_buffer(list + 1, len, &cpu_cycles);
728 if (left) {
729 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
730 gpu.cmd_len = left;
731 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
732 }
733 }
734
735 if (progress_addr) {
736 *progress_addr = addr;
737 break;
738 }
739 #define LD_THRESHOLD (8*1024)
740 if (count >= LD_THRESHOLD) {
741 if (count == LD_THRESHOLD) {
742 ld_addr = addr;
743 continue;
744 }
745
746 // loop detection marker
747 // (bit23 set causes DMA error on real machine, so
748 // unlikely to be ever set by the game)
749 list[0] |= HTOLE32(0x800000);
750 }
751 }
752
753 if (ld_addr != 0) {
754 // remove loop detection markers
755 count -= LD_THRESHOLD + 2;
756 addr = ld_addr & 0x1fffff;
757 while (count-- > 0) {
758 list = rambase + addr / 4;
759 addr = LE32TOH(list[0]) & 0x1fffff;
760 list[0] &= HTOLE32(~0x800000);
761 }
762 }
763
764 gpu.state.last_list.frame = *gpu.state.frame_count;
765 gpu.state.last_list.hcnt = *gpu.state.hcnt;
766 gpu.state.last_list.cycles = cpu_cycles;
767 gpu.state.last_list.addr = start_addr;
768
769 return cpu_cycles;
770}
771
772void GPUreadDataMem(uint32_t *mem, int count)
773{
774 log_io("gpu_dma_read %p %d\n", mem, count);
775
776 if (unlikely(gpu.cmd_len > 0))
777 flush_cmd_buffer();
778
779 if (gpu.dma.h)
780 do_vram_io(mem, count, 1);
781}
782
783uint32_t GPUreadData(void)
784{
785 uint32_t ret;
786
787 if (unlikely(gpu.cmd_len > 0))
788 flush_cmd_buffer();
789
790 ret = gpu.gp0;
791 if (gpu.dma.h) {
792 ret = HTOLE32(ret);
793 do_vram_io(&ret, 1, 1);
794 ret = LE32TOH(ret);
795 }
796
797 log_io("gpu_read %08x\n", ret);
798 return ret;
799}
800
801uint32_t GPUreadStatus(void)
802{
803 uint32_t ret;
804
805 if (unlikely(gpu.cmd_len > 0))
806 flush_cmd_buffer();
807
808 ret = gpu.status;
809 log_io("gpu_read_status %08x\n", ret);
810 return ret;
811}
812
813struct GPUFreeze
814{
815 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
816 uint32_t ulStatus; // current gpu status
817 uint32_t ulControl[256]; // latest control register values
818 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
819};
820
821long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
822{
823 int i;
824
825 switch (type) {
826 case 1: // save
827 if (gpu.cmd_len > 0)
828 flush_cmd_buffer();
829 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
830 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
831 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
832 freeze->ulStatus = gpu.status;
833 break;
834 case 0: // load
835 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
836 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
837 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
838 gpu.status = freeze->ulStatus;
839 gpu.cmd_len = 0;
840 for (i = 8; i > 0; i--) {
841 gpu.regs[i] ^= 1; // avoid reg change detection
842 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
843 }
844 renderer_sync_ecmds(gpu.ex_regs);
845 renderer_update_caches(0, 0, 1024, 512, 0);
846 break;
847 }
848
849 return 1;
850}
851
852void GPUupdateLace(void)
853{
854 if (gpu.cmd_len > 0)
855 flush_cmd_buffer();
856 renderer_flush_queues();
857
858#ifndef RAW_FB_DISPLAY
859 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
860 if (!gpu.state.blanked) {
861 vout_blank();
862 gpu.state.blanked = 1;
863 gpu.state.fb_dirty = 1;
864 }
865 return;
866 }
867
868 if (!gpu.state.fb_dirty)
869 return;
870#endif
871
872 if (gpu.frameskip.set) {
873 if (!gpu.frameskip.frame_ready) {
874 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
875 return;
876 gpu.frameskip.active = 0;
877 }
878 gpu.frameskip.frame_ready = 0;
879 }
880
881 vout_update();
882 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
883 renderer_update_caches(0, 0, 1024, 512, 1);
884 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
885 gpu.state.fb_dirty = 0;
886 gpu.state.blanked = 0;
887}
888
889void GPUvBlank(int is_vblank, int lcf)
890{
891 int interlace = gpu.state.allow_interlace
892 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
893 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
894 // interlace doesn't look nice on progressive displays,
895 // so we have this "auto" mode here for games that don't read vram
896 if (gpu.state.allow_interlace == 2
897 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
898 {
899 interlace = 0;
900 }
901 if (interlace || interlace != gpu.state.old_interlace) {
902 gpu.state.old_interlace = interlace;
903
904 if (gpu.cmd_len > 0)
905 flush_cmd_buffer();
906 renderer_flush_queues();
907 renderer_set_interlace(interlace, !lcf);
908 }
909}
910
911void GPUgetScreenInfo(int *y, int *base_hres)
912{
913 *y = gpu.screen.y;
914 *base_hres = gpu.screen.vres;
915 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
916 *base_hres >>= 1;
917}
918
919void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
920{
921 gpu.frameskip.set = cbs->frameskip;
922 gpu.frameskip.advice = &cbs->fskip_advice;
923 gpu.frameskip.active = 0;
924 gpu.frameskip.frame_ready = 1;
925 gpu.state.hcnt = cbs->gpu_hcnt;
926 gpu.state.frame_count = cbs->gpu_frame_count;
927 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
928 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
929 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
930 if (gpu.state.screen_centering_type != cbs->screen_centering_type
931 || gpu.state.screen_centering_x != cbs->screen_centering_x
932 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
933 gpu.state.screen_centering_type = cbs->screen_centering_type;
934 gpu.state.screen_centering_x = cbs->screen_centering_x;
935 gpu.state.screen_centering_y = cbs->screen_centering_y;
936 update_width();
937 update_height();
938 }
939
940 gpu.mmap = cbs->mmap;
941 gpu.munmap = cbs->munmap;
942 gpu.gpu_state_change = cbs->gpu_state_change;
943
944 // delayed vram mmap
945 if (gpu.vram == NULL)
946 map_vram();
947
948 if (cbs->pl_vout_set_raw_vram)
949 cbs->pl_vout_set_raw_vram(gpu.vram);
950 renderer_set_config(cbs);
951 vout_set_config(cbs);
952}
953
954// vim:shiftwidth=2:expandtab