gpu: a bit better idle bit handling
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include "gpu.h"
15#include "../../libpcsxcore/gpu.h" // meh
16
17#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
18#ifdef __GNUC__
19#define unlikely(x) __builtin_expect((x), 0)
20#define preload __builtin_prefetch
21#define noinline __attribute__((noinline))
22#else
23#define unlikely(x)
24#define preload(...)
25#define noinline
26#endif
27
28//#define log_io gpu_log
29#define log_io(...)
30
31struct psx_gpu gpu;
32
33static noinline int do_cmd_buffer(uint32_t *data, int count);
34static void finish_vram_transfer(int is_read);
35
36static noinline void do_cmd_reset(void)
37{
38 if (unlikely(gpu.cmd_len > 0))
39 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
40 gpu.cmd_len = 0;
41
42 if (unlikely(gpu.dma.h > 0))
43 finish_vram_transfer(gpu.dma_start.is_read);
44 gpu.dma.h = 0;
45}
46
47static noinline void do_reset(void)
48{
49 unsigned int i;
50
51 do_cmd_reset();
52
53 memset(gpu.regs, 0, sizeof(gpu.regs));
54 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
55 gpu.ex_regs[i] = (0xe0 + i) << 24;
56 gpu.status = 0x14802000;
57 gpu.gp0 = 0;
58 gpu.regs[3] = 1;
59 gpu.screen.hres = gpu.screen.w = 256;
60 gpu.screen.vres = gpu.screen.h = 240;
61 gpu.screen.x = gpu.screen.y = 0;
62 renderer_sync_ecmds(gpu.ex_regs);
63 renderer_notify_res_change();
64}
65
66static noinline void update_width(void)
67{
68 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
69 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
70 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
71 int hres = hres_all[(gpu.status >> 16) & 7];
72 int pal = gpu.status & PSX_GPU_STATUS_PAL;
73 int sw = gpu.screen.x2 - gpu.screen.x1;
74 int x = 0, x_auto;
75 if (sw <= 0)
76 /* nothing displayed? */;
77 else {
78 int s = pal ? 656 : 608; // or 600? pal is just a guess
79 x = (gpu.screen.x1 - s) / hdiv;
80 x = (x + 1) & ~1; // blitter limitation
81 sw /= hdiv;
82 sw = (sw + 2) & ~3; // according to nocash
83 switch (gpu.state.screen_centering_type) {
84 case 1:
85 break;
86 case 2:
87 x = gpu.state.screen_centering_x;
88 break;
89 default:
90 // correct if slightly miscentered
91 x_auto = (hres - sw) / 2 & ~3;
92 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
93 x = x_auto;
94 }
95 if (x + sw > hres)
96 sw = hres - x;
97 // .x range check is done in vout_update()
98 }
99 // reduce the unpleasant right border that a few games have
100 if (gpu.state.screen_centering_type == 0
101 && x <= 4 && hres - (x + sw) >= 4)
102 hres -= 4;
103 gpu.screen.x = x;
104 gpu.screen.w = sw;
105 gpu.screen.hres = hres;
106 gpu.state.dims_changed = 1;
107 //printf("xx %d %d -> %2d, %d / %d\n",
108 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
109}
110
111static noinline void update_height(void)
112{
113 int pal = gpu.status & PSX_GPU_STATUS_PAL;
114 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
115 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
116 int sh = gpu.screen.y2 - gpu.screen.y1;
117 int center_tol = 16;
118 int vres = 240;
119
120 if (pal && (sh > 240 || gpu.screen.vres == 256))
121 vres = 256;
122 if (dheight)
123 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
124 if (sh <= 0)
125 /* nothing displayed? */;
126 else {
127 switch (gpu.state.screen_centering_type) {
128 case 1:
129 break;
130 case 2:
131 y = gpu.state.screen_centering_y;
132 break;
133 default:
134 // correct if slightly miscentered
135 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
136 y = 0;
137 }
138 if (y + sh > vres)
139 sh = vres - y;
140 }
141 gpu.screen.y = y;
142 gpu.screen.h = sh;
143 gpu.screen.vres = vres;
144 gpu.state.dims_changed = 1;
145 //printf("yy %d %d -> %d, %d / %d\n",
146 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
147}
148
149static noinline void decide_frameskip(void)
150{
151 if (gpu.frameskip.active)
152 gpu.frameskip.cnt++;
153 else {
154 gpu.frameskip.cnt = 0;
155 gpu.frameskip.frame_ready = 1;
156 }
157
158 if (!gpu.frameskip.active && *gpu.frameskip.advice)
159 gpu.frameskip.active = 1;
160 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
161 gpu.frameskip.active = 1;
162 else
163 gpu.frameskip.active = 0;
164
165 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
166 int dummy;
167 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
168 gpu.frameskip.pending_fill[0] = 0;
169 }
170}
171
172static noinline int decide_frameskip_allow(uint32_t cmd_e3)
173{
174 // no frameskip if it decides to draw to display area,
175 // but not for interlace since it'll most likely always do that
176 uint32_t x = cmd_e3 & 0x3ff;
177 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
178 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
179 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
180 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
181 return gpu.frameskip.allow;
182}
183
184static void flush_cmd_buffer(void);
185
186static noinline void get_gpu_info(uint32_t data)
187{
188 if (unlikely(gpu.cmd_len > 0))
189 flush_cmd_buffer();
190 switch (data & 0x0f) {
191 case 0x02:
192 case 0x03:
193 case 0x04:
194 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
195 break;
196 case 0x05:
197 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
198 break;
199 case 0x07:
200 gpu.gp0 = 2;
201 break;
202 default:
203 // gpu.gp0 unchanged
204 break;
205 }
206}
207
208// double, for overdraw guard
209#define VRAM_SIZE (1024 * 512 * 2 * 2)
210
211static int map_vram(void)
212{
213 gpu.vram = gpu.mmap(VRAM_SIZE);
214 if (gpu.vram != NULL) {
215 gpu.vram += 4096 / 2;
216 return 0;
217 }
218 else {
219 fprintf(stderr, "could not map vram, expect crashes\n");
220 return -1;
221 }
222}
223
224long GPUinit(void)
225{
226 int ret;
227 ret = vout_init();
228 ret |= renderer_init();
229
230 memset(&gpu.state, 0, sizeof(gpu.state));
231 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
232 gpu.zero = 0;
233 gpu.state.frame_count = &gpu.zero;
234 gpu.state.hcnt = &gpu.zero;
235 gpu.cmd_len = 0;
236 do_reset();
237
238 if (gpu.mmap != NULL) {
239 if (map_vram() != 0)
240 ret = -1;
241 }
242 return ret;
243}
244
245long GPUshutdown(void)
246{
247 long ret;
248
249 renderer_finish();
250 ret = vout_finish();
251 if (gpu.vram != NULL) {
252 gpu.vram -= 4096 / 2;
253 gpu.munmap(gpu.vram, VRAM_SIZE);
254 }
255 gpu.vram = NULL;
256
257 return ret;
258}
259
260void GPUwriteStatus(uint32_t data)
261{
262 uint32_t cmd = data >> 24;
263
264 if (cmd < ARRAY_SIZE(gpu.regs)) {
265 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
266 return;
267 gpu.regs[cmd] = data;
268 }
269
270 gpu.state.fb_dirty = 1;
271
272 switch (cmd) {
273 case 0x00:
274 do_reset();
275 break;
276 case 0x01:
277 do_cmd_reset();
278 break;
279 case 0x03:
280 if (data & 1) {
281 gpu.status |= PSX_GPU_STATUS_BLANKING;
282 gpu.state.dims_changed = 1; // for hud clearing
283 }
284 else
285 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
286 break;
287 case 0x04:
288 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
289 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
290 break;
291 case 0x05:
292 gpu.screen.src_x = data & 0x3ff;
293 gpu.screen.src_y = (data >> 10) & 0x1ff;
294 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
295 if (gpu.frameskip.set) {
296 decide_frameskip_allow(gpu.ex_regs[3]);
297 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
298 decide_frameskip();
299 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
300 }
301 }
302 break;
303 case 0x06:
304 gpu.screen.x1 = data & 0xfff;
305 gpu.screen.x2 = (data >> 12) & 0xfff;
306 update_width();
307 break;
308 case 0x07:
309 gpu.screen.y1 = data & 0x3ff;
310 gpu.screen.y2 = (data >> 10) & 0x3ff;
311 update_height();
312 break;
313 case 0x08:
314 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
315 update_width();
316 update_height();
317 renderer_notify_res_change();
318 break;
319 default:
320 if ((cmd & 0xf0) == 0x10)
321 get_gpu_info(data);
322 break;
323 }
324
325#ifdef GPUwriteStatus_ext
326 GPUwriteStatus_ext(data);
327#endif
328}
329
330const unsigned char cmd_lengths[256] =
331{
332 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
333 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
334 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
335 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
336 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
337 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
338 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
339 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
340 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
341 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
342 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
343 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
344 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
345 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
348};
349
350#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
351
352static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
353{
354 int i;
355 for (i = 0; i < l; i++)
356 dst[i] = src[i] | msb;
357}
358
359static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
360 int is_read, uint16_t msb)
361{
362 uint16_t *vram = VRAM_MEM_XY(x, y);
363 if (unlikely(is_read))
364 memcpy(mem, vram, l * 2);
365 else if (unlikely(msb))
366 cpy_msb(vram, mem, l, msb);
367 else
368 memcpy(vram, mem, l * 2);
369}
370
371static int do_vram_io(uint32_t *data, int count, int is_read)
372{
373 int count_initial = count;
374 uint16_t msb = gpu.ex_regs[6] << 15;
375 uint16_t *sdata = (uint16_t *)data;
376 int x = gpu.dma.x, y = gpu.dma.y;
377 int w = gpu.dma.w, h = gpu.dma.h;
378 int o = gpu.dma.offset;
379 int l;
380 count *= 2; // operate in 16bpp pixels
381
382 if (gpu.dma.offset) {
383 l = w - gpu.dma.offset;
384 if (count < l)
385 l = count;
386
387 do_vram_line(x + o, y, sdata, l, is_read, msb);
388
389 if (o + l < w)
390 o += l;
391 else {
392 o = 0;
393 y++;
394 h--;
395 }
396 sdata += l;
397 count -= l;
398 }
399
400 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
401 y &= 511;
402 do_vram_line(x, y, sdata, w, is_read, msb);
403 }
404
405 if (h > 0) {
406 if (count > 0) {
407 y &= 511;
408 do_vram_line(x, y, sdata, count, is_read, msb);
409 o = count;
410 count = 0;
411 }
412 }
413 else
414 finish_vram_transfer(is_read);
415 gpu.dma.y = y;
416 gpu.dma.h = h;
417 gpu.dma.offset = o;
418
419 return count_initial - count / 2;
420}
421
422static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
423{
424 if (gpu.dma.h)
425 log_anomaly("start_vram_transfer while old unfinished\n");
426
427 gpu.dma.x = pos_word & 0x3ff;
428 gpu.dma.y = (pos_word >> 16) & 0x1ff;
429 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
430 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
431 gpu.dma.offset = 0;
432 gpu.dma.is_read = is_read;
433 gpu.dma_start = gpu.dma;
434
435 renderer_flush_queues();
436 if (is_read) {
437 gpu.status |= PSX_GPU_STATUS_IMG;
438 // XXX: wrong for width 1
439 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
440 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
441 }
442
443 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
444 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
445 if (gpu.gpu_state_change)
446 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
447}
448
449static void finish_vram_transfer(int is_read)
450{
451 if (is_read)
452 gpu.status &= ~PSX_GPU_STATUS_IMG;
453 else {
454 gpu.state.fb_dirty = 1;
455 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
456 gpu.dma_start.w, gpu.dma_start.h, 0);
457 }
458 if (gpu.gpu_state_change)
459 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
460}
461
462static void do_vram_copy(const uint32_t *params)
463{
464 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
465 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
466 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
467 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
468 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
469 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
470 uint16_t msb = gpu.ex_regs[6] << 15;
471 uint16_t lbuf[128];
472 uint32_t x, y;
473
474 if (sx == dx && sy == dy && msb == 0)
475 return;
476
477 renderer_flush_queues();
478
479 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
480 {
481 for (y = 0; y < h; y++)
482 {
483 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
484 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
485 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
486 {
487 uint32_t x1, w1 = w - x;
488 if (w1 > ARRAY_SIZE(lbuf))
489 w1 = ARRAY_SIZE(lbuf);
490 for (x1 = 0; x1 < w1; x1++)
491 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
492 for (x1 = 0; x1 < w1; x1++)
493 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
494 }
495 }
496 }
497 else
498 {
499 uint32_t sy1 = sy, dy1 = dy;
500 for (y = 0; y < h; y++, sy1++, dy1++)
501 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
502 }
503
504 renderer_update_caches(dx, dy, w, h, 0);
505}
506
507static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
508{
509 int cmd = 0, pos = 0, len, dummy, v;
510 int skip = 1;
511
512 gpu.frameskip.pending_fill[0] = 0;
513
514 while (pos < count && skip) {
515 uint32_t *list = data + pos;
516 cmd = LE32TOH(list[0]) >> 24;
517 len = 1 + cmd_lengths[cmd];
518
519 switch (cmd) {
520 case 0x02:
521 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
522 // clearing something large, don't skip
523 do_cmd_list(list, 3, &dummy);
524 else
525 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
526 break;
527 case 0x24 ... 0x27:
528 case 0x2c ... 0x2f:
529 case 0x34 ... 0x37:
530 case 0x3c ... 0x3f:
531 gpu.ex_regs[1] &= ~0x1ff;
532 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
533 break;
534 case 0x48 ... 0x4F:
535 for (v = 3; pos + v < count; v++)
536 {
537 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
538 break;
539 }
540 len += v - 3;
541 break;
542 case 0x58 ... 0x5F:
543 for (v = 4; pos + v < count; v += 2)
544 {
545 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
546 break;
547 }
548 len += v - 4;
549 break;
550 default:
551 if (cmd == 0xe3)
552 skip = decide_frameskip_allow(LE32TOH(list[0]));
553 if ((cmd & 0xf8) == 0xe0)
554 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
555 break;
556 }
557
558 if (pos + len > count) {
559 cmd = -1;
560 break; // incomplete cmd
561 }
562 if (0x80 <= cmd && cmd <= 0xdf)
563 break; // image i/o
564
565 pos += len;
566 }
567
568 renderer_sync_ecmds(gpu.ex_regs);
569 *last_cmd = cmd;
570 return pos;
571}
572
573static noinline int do_cmd_buffer(uint32_t *data, int count)
574{
575 int cmd, pos;
576 uint32_t old_e3 = gpu.ex_regs[3];
577 int vram_dirty = 0;
578
579 // process buffer
580 for (pos = 0; pos < count; )
581 {
582 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
583 vram_dirty = 1;
584 pos += do_vram_io(data + pos, count - pos, 0);
585 if (pos == count)
586 break;
587 }
588
589 cmd = LE32TOH(data[pos]) >> 24;
590 if (0xa0 <= cmd && cmd <= 0xdf) {
591 if (unlikely((pos+2) >= count)) {
592 // incomplete vram write/read cmd, can't consume yet
593 cmd = -1;
594 break;
595 }
596
597 // consume vram write/read cmd
598 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
599 pos += 3;
600 continue;
601 }
602 else if ((cmd & 0xe0) == 0x80) {
603 if (unlikely((pos+3) >= count)) {
604 cmd = -1; // incomplete cmd, can't consume yet
605 break;
606 }
607 do_vram_copy(data + pos + 1);
608 vram_dirty = 1;
609 pos += 4;
610 continue;
611 }
612
613 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
614 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
615 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
616 else {
617 pos += do_cmd_list(data + pos, count - pos, &cmd);
618 vram_dirty = 1;
619 }
620
621 if (cmd == -1)
622 // incomplete cmd
623 break;
624 }
625
626 gpu.status &= ~0x1fff;
627 gpu.status |= gpu.ex_regs[1] & 0x7ff;
628 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
629
630 gpu.state.fb_dirty |= vram_dirty;
631
632 if (old_e3 != gpu.ex_regs[3])
633 decide_frameskip_allow(gpu.ex_regs[3]);
634
635 return count - pos;
636}
637
638static noinline void flush_cmd_buffer(void)
639{
640 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
641 if (left > 0)
642 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
643 if (left != gpu.cmd_len) {
644 if (!gpu.dma.h && gpu.gpu_state_change)
645 gpu.gpu_state_change(PGS_PRIMITIVE_START);
646 gpu.cmd_len = left;
647 }
648}
649
650void GPUwriteDataMem(uint32_t *mem, int count)
651{
652 int left;
653
654 log_io("gpu_dma_write %p %d\n", mem, count);
655
656 if (unlikely(gpu.cmd_len > 0))
657 flush_cmd_buffer();
658
659 left = do_cmd_buffer(mem, count);
660 if (left)
661 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
662}
663
664void GPUwriteData(uint32_t data)
665{
666 log_io("gpu_write %08x\n", data);
667 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
668 if (gpu.cmd_len >= CMD_BUFFER_LEN)
669 flush_cmd_buffer();
670}
671
672long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
673{
674 uint32_t addr, *list, ld_addr = 0;
675 int len, left, count;
676 long cpu_cycles = 0;
677
678 preload(rambase + (start_addr & 0x1fffff) / 4);
679
680 if (unlikely(gpu.cmd_len > 0))
681 flush_cmd_buffer();
682
683 log_io("gpu_dma_chain\n");
684 addr = start_addr & 0xffffff;
685 for (count = 0; (addr & 0x800000) == 0; count++)
686 {
687 list = rambase + (addr & 0x1fffff) / 4;
688 len = LE32TOH(list[0]) >> 24;
689 addr = LE32TOH(list[0]) & 0xffffff;
690 preload(rambase + (addr & 0x1fffff) / 4);
691
692 cpu_cycles += 10;
693 if (len > 0)
694 cpu_cycles += 5 + len;
695
696 log_io(".chain %08lx #%d+%d\n",
697 (long)(list - rambase) * 4, len, gpu.cmd_len);
698 if (unlikely(gpu.cmd_len > 0)) {
699 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
700 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
701 gpu.cmd_len = 0;
702 }
703 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
704 gpu.cmd_len += len;
705 flush_cmd_buffer();
706 continue;
707 }
708
709 if (len) {
710 left = do_cmd_buffer(list + 1, len);
711 if (left) {
712 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
713 gpu.cmd_len = left;
714 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
715 }
716 }
717
718 if (progress_addr) {
719 *progress_addr = addr;
720 break;
721 }
722 #define LD_THRESHOLD (8*1024)
723 if (count >= LD_THRESHOLD) {
724 if (count == LD_THRESHOLD) {
725 ld_addr = addr;
726 continue;
727 }
728
729 // loop detection marker
730 // (bit23 set causes DMA error on real machine, so
731 // unlikely to be ever set by the game)
732 list[0] |= HTOLE32(0x800000);
733 }
734 }
735
736 if (ld_addr != 0) {
737 // remove loop detection markers
738 count -= LD_THRESHOLD + 2;
739 addr = ld_addr & 0x1fffff;
740 while (count-- > 0) {
741 list = rambase + addr / 4;
742 addr = LE32TOH(list[0]) & 0x1fffff;
743 list[0] &= HTOLE32(~0x800000);
744 }
745 }
746
747 gpu.state.last_list.frame = *gpu.state.frame_count;
748 gpu.state.last_list.hcnt = *gpu.state.hcnt;
749 gpu.state.last_list.cycles = cpu_cycles;
750 gpu.state.last_list.addr = start_addr;
751
752 return cpu_cycles;
753}
754
755void GPUreadDataMem(uint32_t *mem, int count)
756{
757 log_io("gpu_dma_read %p %d\n", mem, count);
758
759 if (unlikely(gpu.cmd_len > 0))
760 flush_cmd_buffer();
761
762 if (gpu.dma.h)
763 do_vram_io(mem, count, 1);
764}
765
766uint32_t GPUreadData(void)
767{
768 uint32_t ret;
769
770 if (unlikely(gpu.cmd_len > 0))
771 flush_cmd_buffer();
772
773 ret = gpu.gp0;
774 if (gpu.dma.h) {
775 ret = HTOLE32(ret);
776 do_vram_io(&ret, 1, 1);
777 ret = LE32TOH(ret);
778 }
779
780 log_io("gpu_read %08x\n", ret);
781 return ret;
782}
783
784uint32_t GPUreadStatus(void)
785{
786 uint32_t ret;
787
788 if (unlikely(gpu.cmd_len > 0))
789 flush_cmd_buffer();
790
791 ret = gpu.status;
792 log_io("gpu_read_status %08x\n", ret);
793 return ret;
794}
795
796struct GPUFreeze
797{
798 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
799 uint32_t ulStatus; // current gpu status
800 uint32_t ulControl[256]; // latest control register values
801 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
802};
803
804long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
805{
806 int i;
807
808 switch (type) {
809 case 1: // save
810 if (gpu.cmd_len > 0)
811 flush_cmd_buffer();
812 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
813 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
814 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
815 freeze->ulStatus = gpu.status;
816 break;
817 case 0: // load
818 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
819 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
820 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
821 gpu.status = freeze->ulStatus;
822 gpu.cmd_len = 0;
823 for (i = 8; i > 0; i--) {
824 gpu.regs[i] ^= 1; // avoid reg change detection
825 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
826 }
827 renderer_sync_ecmds(gpu.ex_regs);
828 renderer_update_caches(0, 0, 1024, 512, 1);
829 break;
830 }
831
832 return 1;
833}
834
835void GPUupdateLace(void)
836{
837 if (gpu.cmd_len > 0)
838 flush_cmd_buffer();
839 renderer_flush_queues();
840
841#ifndef RAW_FB_DISPLAY
842 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
843 if (!gpu.state.blanked) {
844 vout_blank();
845 gpu.state.blanked = 1;
846 gpu.state.fb_dirty = 1;
847 }
848 return;
849 }
850
851 if (!gpu.state.fb_dirty)
852 return;
853#endif
854
855 if (gpu.frameskip.set) {
856 if (!gpu.frameskip.frame_ready) {
857 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
858 return;
859 gpu.frameskip.active = 0;
860 }
861 gpu.frameskip.frame_ready = 0;
862 }
863
864 vout_update();
865 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
866 renderer_update_caches(0, 0, 1024, 512, 1);
867 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
868 gpu.state.fb_dirty = 0;
869 gpu.state.blanked = 0;
870}
871
872void GPUvBlank(int is_vblank, int lcf)
873{
874 int interlace = gpu.state.allow_interlace
875 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
876 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
877 // interlace doesn't look nice on progressive displays,
878 // so we have this "auto" mode here for games that don't read vram
879 if (gpu.state.allow_interlace == 2
880 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
881 {
882 interlace = 0;
883 }
884 if (interlace || interlace != gpu.state.old_interlace) {
885 gpu.state.old_interlace = interlace;
886
887 if (gpu.cmd_len > 0)
888 flush_cmd_buffer();
889 renderer_flush_queues();
890 renderer_set_interlace(interlace, !lcf);
891 }
892}
893
894void GPUgetScreenInfo(int *y, int *base_hres)
895{
896 *y = gpu.screen.y;
897 *base_hres = gpu.screen.vres;
898 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
899 *base_hres >>= 1;
900}
901
902#include "../../frontend/plugin_lib.h"
903
904void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
905{
906 gpu.frameskip.set = cbs->frameskip;
907 gpu.frameskip.advice = &cbs->fskip_advice;
908 gpu.frameskip.active = 0;
909 gpu.frameskip.frame_ready = 1;
910 gpu.state.hcnt = cbs->gpu_hcnt;
911 gpu.state.frame_count = cbs->gpu_frame_count;
912 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
913 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
914 if (gpu.state.screen_centering_type != cbs->screen_centering_type
915 || gpu.state.screen_centering_x != cbs->screen_centering_x
916 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
917 gpu.state.screen_centering_type = cbs->screen_centering_type;
918 gpu.state.screen_centering_x = cbs->screen_centering_x;
919 gpu.state.screen_centering_y = cbs->screen_centering_y;
920 update_width();
921 update_height();
922 }
923
924 gpu.mmap = cbs->mmap;
925 gpu.munmap = cbs->munmap;
926 gpu.gpu_state_change = cbs->gpu_state_change;
927
928 // delayed vram mmap
929 if (gpu.vram == NULL)
930 map_vram();
931
932 if (cbs->pl_vout_set_raw_vram)
933 cbs->pl_vout_set_raw_vram(gpu.vram);
934 renderer_set_config(cbs);
935 vout_set_config(cbs);
936}
937
938// vim:shiftwidth=2:expandtab