libretro: improve retro_memory_map
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include "gpu.h"
15#include "gpu_timing.h"
16#include "../../libpcsxcore/gpu.h" // meh
17#include "../../frontend/plugin_lib.h"
18
19#ifndef ARRAY_SIZE
20#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
21#endif
22#ifdef __GNUC__
23#define unlikely(x) __builtin_expect((x), 0)
24#define preload __builtin_prefetch
25#define noinline __attribute__((noinline))
26#else
27#define unlikely(x)
28#define preload(...)
29#define noinline
30#endif
31
32//#define log_io gpu_log
33#define log_io(...)
34
35struct psx_gpu gpu;
36
37static noinline int do_cmd_buffer(uint32_t *data, int count,
38 int *cycles_sum, int *cycles_last);
39static void finish_vram_transfer(int is_read);
40
41static noinline void do_cmd_reset(void)
42{
43 int dummy = 0;
44 if (unlikely(gpu.cmd_len > 0))
45 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
46 gpu.cmd_len = 0;
47
48 if (unlikely(gpu.dma.h > 0))
49 finish_vram_transfer(gpu.dma_start.is_read);
50 gpu.dma.h = 0;
51}
52
53static noinline void do_reset(void)
54{
55 unsigned int i;
56
57 do_cmd_reset();
58
59 memset(gpu.regs, 0, sizeof(gpu.regs));
60 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
61 gpu.ex_regs[i] = (0xe0 + i) << 24;
62 gpu.status = 0x14802000;
63 gpu.gp0 = 0;
64 gpu.regs[3] = 1;
65 gpu.screen.hres = gpu.screen.w = 256;
66 gpu.screen.vres = gpu.screen.h = 240;
67 gpu.screen.x = gpu.screen.y = 0;
68 renderer_sync_ecmds(gpu.ex_regs);
69 renderer_notify_res_change();
70}
71
72static noinline void update_width(void)
73{
74 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
75 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
76 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
77 int hres = hres_all[(gpu.status >> 16) & 7];
78 int pal = gpu.status & PSX_GPU_STATUS_PAL;
79 int sw = gpu.screen.x2 - gpu.screen.x1;
80 int type = gpu.state.screen_centering_type;
81 int x = 0, x_auto;
82 if (type == C_AUTO)
83 type = gpu.state.screen_centering_type_default;
84 if (sw <= 0)
85 /* nothing displayed? */;
86 else {
87 int s = pal ? 656 : 608; // or 600? pal is just a guess
88 x = (gpu.screen.x1 - s) / hdiv;
89 x = (x + 1) & ~1; // blitter limitation
90 sw /= hdiv;
91 sw = (sw + 2) & ~3; // according to nocash
92
93 if (gpu.state.show_overscan == 2) // widescreen hack
94 sw = (sw + 63) & ~63;
95 if (gpu.state.show_overscan && sw >= hres)
96 x = 0, hres = sw;
97 switch (type) {
98 case C_INGAME:
99 break;
100 case C_MANUAL:
101 x = gpu.state.screen_centering_x;
102 break;
103 default:
104 // correct if slightly miscentered
105 x_auto = (hres - sw) / 2 & ~3;
106 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
107 x = x_auto;
108 }
109 if (x + sw > hres)
110 sw = hres - x;
111 // .x range check is done in vout_update()
112 }
113 // reduce the unpleasant right border that a few games have
114 if (gpu.state.screen_centering_type == 0
115 && x <= 4 && hres - (x + sw) >= 4)
116 hres -= 4;
117 gpu.screen.x = x;
118 gpu.screen.w = sw;
119 gpu.screen.hres = hres;
120 gpu.state.dims_changed = 1;
121 //printf("xx %d %d (%d) -> %2d, %d / %d\n", gpu.screen.x1,
122 // gpu.screen.x2, gpu.screen.x2 - gpu.screen.x1, x, sw, hres);
123}
124
125static noinline void update_height(void)
126{
127 int pal = gpu.status & PSX_GPU_STATUS_PAL;
128 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
129 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
130 int sh = gpu.screen.y2 - gpu.screen.y1;
131 int center_tol = 16;
132 int vres = 240;
133
134 if (pal && (sh > 240 || gpu.screen.vres == 256))
135 vres = 256;
136 if (dheight)
137 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
138 if (sh <= 0)
139 /* nothing displayed? */;
140 else {
141 switch (gpu.state.screen_centering_type) {
142 case C_INGAME:
143 break;
144 case C_BORDERLESS:
145 y = 0;
146 break;
147 case C_MANUAL:
148 y = gpu.state.screen_centering_y;
149 break;
150 default:
151 // correct if slightly miscentered
152 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
153 y = 0;
154 }
155 if (y + sh > vres)
156 sh = vres - y;
157 }
158 gpu.screen.y = y;
159 gpu.screen.h = sh;
160 gpu.screen.vres = vres;
161 gpu.state.dims_changed = 1;
162 //printf("yy %d %d -> %d, %d / %d\n",
163 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
164}
165
166static noinline void decide_frameskip(void)
167{
168 if (gpu.frameskip.active)
169 gpu.frameskip.cnt++;
170 else {
171 gpu.frameskip.cnt = 0;
172 gpu.frameskip.frame_ready = 1;
173 }
174
175 if (!gpu.frameskip.active && *gpu.frameskip.advice)
176 gpu.frameskip.active = 1;
177 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
178 gpu.frameskip.active = 1;
179 else
180 gpu.frameskip.active = 0;
181
182 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
183 int dummy = 0;
184 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
185 gpu.frameskip.pending_fill[0] = 0;
186 }
187}
188
189static noinline int decide_frameskip_allow(uint32_t cmd_e3)
190{
191 // no frameskip if it decides to draw to display area,
192 // but not for interlace since it'll most likely always do that
193 uint32_t x = cmd_e3 & 0x3ff;
194 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
195 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
196 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
197 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
198 return gpu.frameskip.allow;
199}
200
201static void flush_cmd_buffer(void);
202
203static noinline void get_gpu_info(uint32_t data)
204{
205 if (unlikely(gpu.cmd_len > 0))
206 flush_cmd_buffer();
207 switch (data & 0x0f) {
208 case 0x02:
209 case 0x03:
210 case 0x04:
211 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
212 break;
213 case 0x05:
214 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
215 break;
216 case 0x07:
217 gpu.gp0 = 2;
218 break;
219 default:
220 // gpu.gp0 unchanged
221 break;
222 }
223}
224
225// double, for overdraw guard
226#define VRAM_SIZE (1024 * 512 * 2 * 2)
227
228static int map_vram(void)
229{
230 gpu.vram = gpu.mmap(VRAM_SIZE);
231 if (gpu.vram != NULL) {
232 gpu.vram += 4096 / 2;
233 return 0;
234 }
235 else {
236 fprintf(stderr, "could not map vram, expect crashes\n");
237 return -1;
238 }
239}
240
241long GPUinit(void)
242{
243 int ret;
244 ret = vout_init();
245 ret |= renderer_init();
246
247 memset(&gpu.state, 0, sizeof(gpu.state));
248 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
249 gpu.zero = 0;
250 gpu.state.frame_count = &gpu.zero;
251 gpu.state.hcnt = &gpu.zero;
252 gpu.cmd_len = 0;
253 do_reset();
254
255 if (gpu.mmap != NULL) {
256 if (map_vram() != 0)
257 ret = -1;
258 }
259 return ret;
260}
261
262long GPUshutdown(void)
263{
264 long ret;
265
266 renderer_finish();
267 ret = vout_finish();
268 if (gpu.vram != NULL) {
269 gpu.vram -= 4096 / 2;
270 gpu.munmap(gpu.vram, VRAM_SIZE);
271 }
272 gpu.vram = NULL;
273
274 return ret;
275}
276
277void GPUwriteStatus(uint32_t data)
278{
279 uint32_t cmd = data >> 24;
280 int src_x, src_y;
281
282 if (cmd < ARRAY_SIZE(gpu.regs)) {
283 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
284 return;
285 gpu.regs[cmd] = data;
286 }
287
288 gpu.state.fb_dirty = 1;
289
290 switch (cmd) {
291 case 0x00:
292 do_reset();
293 break;
294 case 0x01:
295 do_cmd_reset();
296 break;
297 case 0x03:
298 if (data & 1) {
299 gpu.status |= PSX_GPU_STATUS_BLANKING;
300 gpu.state.dims_changed = 1; // for hud clearing
301 }
302 else
303 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
304 break;
305 case 0x04:
306 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
307 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
308 break;
309 case 0x05:
310 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
311 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
312 gpu.screen.src_x = src_x;
313 gpu.screen.src_y = src_y;
314 renderer_notify_scanout_change(src_x, src_y);
315 if (gpu.frameskip.set) {
316 decide_frameskip_allow(gpu.ex_regs[3]);
317 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
318 decide_frameskip();
319 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
320 }
321 }
322 }
323 break;
324 case 0x06:
325 gpu.screen.x1 = data & 0xfff;
326 gpu.screen.x2 = (data >> 12) & 0xfff;
327 update_width();
328 break;
329 case 0x07:
330 gpu.screen.y1 = data & 0x3ff;
331 gpu.screen.y2 = (data >> 10) & 0x3ff;
332 update_height();
333 break;
334 case 0x08:
335 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
336 update_width();
337 update_height();
338 renderer_notify_res_change();
339 break;
340 default:
341 if ((cmd & 0xf0) == 0x10)
342 get_gpu_info(data);
343 break;
344 }
345
346#ifdef GPUwriteStatus_ext
347 GPUwriteStatus_ext(data);
348#endif
349}
350
351const unsigned char cmd_lengths[256] =
352{
353 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
355 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
356 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
357 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
358 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
359 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
360 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
361 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
362 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
363 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
364 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
365 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
366 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
369};
370
371#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
372
373// this isn't very useful so should be rare
374static void cpy_mask(uint16_t *dst, const uint16_t *src, int l, uint32_t r6)
375{
376 int i;
377 if (r6 == 1) {
378 for (i = 0; i < l; i++)
379 dst[i] = src[i] | 0x8000;
380 }
381 else {
382 uint16_t msb = r6 << 15;
383 for (i = 0; i < l; i++) {
384 uint16_t mask = (int16_t)dst[i] >> 15;
385 dst[i] = (dst[i] & mask) | ((src[i] | msb) & ~mask);
386 }
387 }
388}
389
390static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
391 int is_read, uint32_t r6)
392{
393 uint16_t *vram = VRAM_MEM_XY(x, y);
394 if (unlikely(is_read))
395 memcpy(mem, vram, l * 2);
396 else if (unlikely(r6))
397 cpy_mask(vram, mem, l, r6);
398 else
399 memcpy(vram, mem, l * 2);
400}
401
402static int do_vram_io(uint32_t *data, int count, int is_read)
403{
404 int count_initial = count;
405 uint32_t r6 = gpu.ex_regs[6] & 3;
406 uint16_t *sdata = (uint16_t *)data;
407 int x = gpu.dma.x, y = gpu.dma.y;
408 int w = gpu.dma.w, h = gpu.dma.h;
409 int o = gpu.dma.offset;
410 int l;
411 count *= 2; // operate in 16bpp pixels
412
413 if (gpu.dma.offset) {
414 l = w - gpu.dma.offset;
415 if (count < l)
416 l = count;
417
418 do_vram_line(x + o, y, sdata, l, is_read, r6);
419
420 if (o + l < w)
421 o += l;
422 else {
423 o = 0;
424 y++;
425 h--;
426 }
427 sdata += l;
428 count -= l;
429 }
430
431 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
432 y &= 511;
433 do_vram_line(x, y, sdata, w, is_read, r6);
434 }
435
436 if (h > 0) {
437 if (count > 0) {
438 y &= 511;
439 do_vram_line(x, y, sdata, count, is_read, r6);
440 o = count;
441 count = 0;
442 }
443 }
444 else
445 finish_vram_transfer(is_read);
446 gpu.dma.y = y;
447 gpu.dma.h = h;
448 gpu.dma.offset = o;
449
450 return count_initial - count / 2;
451}
452
453static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
454{
455 if (gpu.dma.h)
456 log_anomaly("start_vram_transfer while old unfinished\n");
457
458 gpu.dma.x = pos_word & 0x3ff;
459 gpu.dma.y = (pos_word >> 16) & 0x1ff;
460 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
461 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
462 gpu.dma.offset = 0;
463 gpu.dma.is_read = is_read;
464 gpu.dma_start = gpu.dma;
465
466 renderer_flush_queues();
467 if (is_read) {
468 gpu.status |= PSX_GPU_STATUS_IMG;
469 // XXX: wrong for width 1
470 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
471 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
472 }
473
474 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
475 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
476 if (gpu.gpu_state_change)
477 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
478}
479
480static void finish_vram_transfer(int is_read)
481{
482 if (is_read)
483 gpu.status &= ~PSX_GPU_STATUS_IMG;
484 else {
485 gpu.state.fb_dirty = 1;
486 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
487 gpu.dma_start.w, gpu.dma_start.h, 0);
488 }
489 if (gpu.gpu_state_change)
490 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
491}
492
493static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
494{
495 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
496 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
497 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
498 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
499 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
500 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
501 uint16_t msb = gpu.ex_regs[6] << 15;
502 uint16_t lbuf[128];
503 uint32_t x, y;
504
505 *cpu_cycles += gput_copy(w, h);
506 if (sx == dx && sy == dy && msb == 0)
507 return;
508
509 renderer_flush_queues();
510
511 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
512 {
513 for (y = 0; y < h; y++)
514 {
515 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
516 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
517 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
518 {
519 uint32_t x1, w1 = w - x;
520 if (w1 > ARRAY_SIZE(lbuf))
521 w1 = ARRAY_SIZE(lbuf);
522 for (x1 = 0; x1 < w1; x1++)
523 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
524 for (x1 = 0; x1 < w1; x1++)
525 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
526 }
527 }
528 }
529 else
530 {
531 uint32_t sy1 = sy, dy1 = dy;
532 for (y = 0; y < h; y++, sy1++, dy1++)
533 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
534 }
535
536 renderer_update_caches(dx, dy, w, h, 0);
537}
538
539static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
540{
541 int cmd = 0, pos = 0, len, dummy = 0, v;
542 int skip = 1;
543
544 gpu.frameskip.pending_fill[0] = 0;
545
546 while (pos < count && skip) {
547 uint32_t *list = data + pos;
548 cmd = LE32TOH(list[0]) >> 24;
549 len = 1 + cmd_lengths[cmd];
550 if (pos + len > count) {
551 cmd = -1;
552 break; // incomplete cmd
553 }
554
555 switch (cmd) {
556 case 0x02:
557 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
558 // clearing something large, don't skip
559 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
560 else
561 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
562 break;
563 case 0x24 ... 0x27:
564 case 0x2c ... 0x2f:
565 case 0x34 ... 0x37:
566 case 0x3c ... 0x3f:
567 gpu.ex_regs[1] &= ~0x1ff;
568 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
569 break;
570 case 0x48 ... 0x4F:
571 for (v = 3; pos + v < count; v++)
572 {
573 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
574 break;
575 }
576 len += v - 3;
577 break;
578 case 0x58 ... 0x5F:
579 for (v = 4; pos + v < count; v += 2)
580 {
581 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
582 break;
583 }
584 len += v - 4;
585 break;
586 default:
587 if (cmd == 0xe3)
588 skip = decide_frameskip_allow(LE32TOH(list[0]));
589 if ((cmd & 0xf8) == 0xe0)
590 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
591 break;
592 }
593 if (0x80 <= cmd && cmd <= 0xdf)
594 break; // image i/o
595
596 pos += len;
597 }
598
599 renderer_sync_ecmds(gpu.ex_regs);
600 *last_cmd = cmd;
601 return pos;
602}
603
604static noinline int do_cmd_buffer(uint32_t *data, int count,
605 int *cycles_sum, int *cycles_last)
606{
607 int cmd, pos;
608 uint32_t old_e3 = gpu.ex_regs[3];
609 int vram_dirty = 0;
610
611 // process buffer
612 for (pos = 0; pos < count; )
613 {
614 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
615 vram_dirty = 1;
616 pos += do_vram_io(data + pos, count - pos, 0);
617 if (pos == count)
618 break;
619 }
620
621 cmd = LE32TOH(data[pos]) >> 24;
622 if (0xa0 <= cmd && cmd <= 0xdf) {
623 if (unlikely((pos+2) >= count)) {
624 // incomplete vram write/read cmd, can't consume yet
625 cmd = -1;
626 break;
627 }
628
629 // consume vram write/read cmd
630 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
631 pos += 3;
632 continue;
633 }
634 else if ((cmd & 0xe0) == 0x80) {
635 if (unlikely((pos+3) >= count)) {
636 cmd = -1; // incomplete cmd, can't consume yet
637 break;
638 }
639 *cycles_sum += *cycles_last;
640 *cycles_last = 0;
641 do_vram_copy(data + pos + 1, cycles_last);
642 vram_dirty = 1;
643 pos += 4;
644 continue;
645 }
646 else if (cmd == 0x1f) {
647 log_anomaly("irq1?\n");
648 pos++;
649 continue;
650 }
651
652 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
653 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
654 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
655 else {
656 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
657 vram_dirty = 1;
658 }
659
660 if (cmd == -1)
661 // incomplete cmd
662 break;
663 }
664
665 gpu.status &= ~0x1fff;
666 gpu.status |= gpu.ex_regs[1] & 0x7ff;
667 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
668
669 gpu.state.fb_dirty |= vram_dirty;
670
671 if (old_e3 != gpu.ex_regs[3])
672 decide_frameskip_allow(gpu.ex_regs[3]);
673
674 return count - pos;
675}
676
677static noinline void flush_cmd_buffer(void)
678{
679 int dummy = 0, left;
680 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
681 if (left > 0)
682 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
683 if (left != gpu.cmd_len) {
684 if (!gpu.dma.h && gpu.gpu_state_change)
685 gpu.gpu_state_change(PGS_PRIMITIVE_START);
686 gpu.cmd_len = left;
687 }
688}
689
690void GPUwriteDataMem(uint32_t *mem, int count)
691{
692 int dummy = 0, left;
693
694 log_io("gpu_dma_write %p %d\n", mem, count);
695
696 if (unlikely(gpu.cmd_len > 0))
697 flush_cmd_buffer();
698
699 left = do_cmd_buffer(mem, count, &dummy, &dummy);
700 if (left)
701 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
702}
703
704void GPUwriteData(uint32_t data)
705{
706 log_io("gpu_write %08x\n", data);
707 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
708 if (gpu.cmd_len >= CMD_BUFFER_LEN)
709 flush_cmd_buffer();
710}
711
712long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
713 uint32_t *progress_addr, int32_t *cycles_last_cmd)
714{
715 uint32_t addr, *list, ld_addr;
716 int len, left, count, ld_count = 32;
717 int cpu_cycles_sum = 0;
718 int cpu_cycles_last = 0;
719
720 preload(rambase + (start_addr & 0x1fffff) / 4);
721
722 if (unlikely(gpu.cmd_len > 0))
723 flush_cmd_buffer();
724
725 log_io("gpu_dma_chain\n");
726 addr = ld_addr = start_addr & 0xffffff;
727 for (count = 0; (addr & 0x800000) == 0; count++)
728 {
729 list = rambase + (addr & 0x1fffff) / 4;
730 len = LE32TOH(list[0]) >> 24;
731 addr = LE32TOH(list[0]) & 0xffffff;
732 preload(rambase + (addr & 0x1fffff) / 4);
733
734 cpu_cycles_sum += 10;
735 if (len > 0)
736 cpu_cycles_sum += 5 + len;
737
738 log_io(".chain %08lx #%d+%d %u+%u\n",
739 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
740 if (unlikely(gpu.cmd_len > 0)) {
741 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
742 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
743 gpu.cmd_len = 0;
744 }
745 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
746 gpu.cmd_len += len;
747 flush_cmd_buffer();
748 continue;
749 }
750
751 if (len) {
752 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
753 if (left) {
754 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
755 gpu.cmd_len = left;
756 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
757 }
758 }
759
760 if (progress_addr) {
761 *progress_addr = addr;
762 break;
763 }
764 if (addr == ld_addr) {
765 log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
766 break;
767 }
768 if (count == ld_count) {
769 ld_addr = addr;
770 ld_count *= 2;
771 }
772 }
773
774 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
775 gpu.state.last_list.frame = *gpu.state.frame_count;
776 gpu.state.last_list.hcnt = *gpu.state.hcnt;
777 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
778 gpu.state.last_list.addr = start_addr;
779
780 *cycles_last_cmd = cpu_cycles_last;
781 return cpu_cycles_sum;
782}
783
784void GPUreadDataMem(uint32_t *mem, int count)
785{
786 log_io("gpu_dma_read %p %d\n", mem, count);
787
788 if (unlikely(gpu.cmd_len > 0))
789 flush_cmd_buffer();
790
791 if (gpu.dma.h)
792 do_vram_io(mem, count, 1);
793}
794
795uint32_t GPUreadData(void)
796{
797 uint32_t ret;
798
799 if (unlikely(gpu.cmd_len > 0))
800 flush_cmd_buffer();
801
802 ret = gpu.gp0;
803 if (gpu.dma.h) {
804 ret = HTOLE32(ret);
805 do_vram_io(&ret, 1, 1);
806 ret = LE32TOH(ret);
807 }
808
809 log_io("gpu_read %08x\n", ret);
810 return ret;
811}
812
813uint32_t GPUreadStatus(void)
814{
815 uint32_t ret;
816
817 if (unlikely(gpu.cmd_len > 0))
818 flush_cmd_buffer();
819
820 ret = gpu.status;
821 log_io("gpu_read_status %08x\n", ret);
822 return ret;
823}
824
825struct GPUFreeze
826{
827 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
828 uint32_t ulStatus; // current gpu status
829 uint32_t ulControl[256]; // latest control register values
830 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
831};
832
833long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
834{
835 int i;
836
837 switch (type) {
838 case 1: // save
839 if (gpu.cmd_len > 0)
840 flush_cmd_buffer();
841 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
842 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
843 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
844 freeze->ulStatus = gpu.status;
845 break;
846 case 0: // load
847 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
848 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
849 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
850 gpu.status = freeze->ulStatus;
851 gpu.cmd_len = 0;
852 for (i = 8; i > 0; i--) {
853 gpu.regs[i] ^= 1; // avoid reg change detection
854 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
855 }
856 renderer_sync_ecmds(gpu.ex_regs);
857 renderer_update_caches(0, 0, 1024, 512, 0);
858 break;
859 }
860
861 return 1;
862}
863
864void GPUupdateLace(void)
865{
866 if (gpu.cmd_len > 0)
867 flush_cmd_buffer();
868 renderer_flush_queues();
869
870#ifndef RAW_FB_DISPLAY
871 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
872 if (!gpu.state.blanked) {
873 vout_blank();
874 gpu.state.blanked = 1;
875 gpu.state.fb_dirty = 1;
876 }
877 return;
878 }
879
880 if (!gpu.state.fb_dirty)
881 return;
882#endif
883
884 if (gpu.frameskip.set) {
885 if (!gpu.frameskip.frame_ready) {
886 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
887 return;
888 gpu.frameskip.active = 0;
889 }
890 gpu.frameskip.frame_ready = 0;
891 }
892
893 vout_update();
894 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
895 renderer_update_caches(0, 0, 1024, 512, 1);
896 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
897 gpu.state.fb_dirty = 0;
898 gpu.state.blanked = 0;
899}
900
901void GPUvBlank(int is_vblank, int lcf)
902{
903 int interlace = gpu.state.allow_interlace
904 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
905 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
906 // interlace doesn't look nice on progressive displays,
907 // so we have this "auto" mode here for games that don't read vram
908 if (gpu.state.allow_interlace == 2
909 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
910 {
911 interlace = 0;
912 }
913 if (interlace || interlace != gpu.state.old_interlace) {
914 gpu.state.old_interlace = interlace;
915
916 if (gpu.cmd_len > 0)
917 flush_cmd_buffer();
918 renderer_flush_queues();
919 renderer_set_interlace(interlace, !lcf);
920 }
921}
922
923void GPUgetScreenInfo(int *y, int *base_hres)
924{
925 *y = gpu.screen.y;
926 *base_hres = gpu.screen.vres;
927 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
928 *base_hres >>= 1;
929}
930
931void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
932{
933 gpu.frameskip.set = cbs->frameskip;
934 gpu.frameskip.advice = &cbs->fskip_advice;
935 gpu.frameskip.active = 0;
936 gpu.frameskip.frame_ready = 1;
937 gpu.state.hcnt = (uint32_t *)cbs->gpu_hcnt;
938 gpu.state.frame_count = (uint32_t *)cbs->gpu_frame_count;
939 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
940 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
941 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
942 if (gpu.state.screen_centering_type != cbs->screen_centering_type
943 || gpu.state.screen_centering_x != cbs->screen_centering_x
944 || gpu.state.screen_centering_y != cbs->screen_centering_y
945 || gpu.state.show_overscan != cbs->show_overscan) {
946 gpu.state.screen_centering_type = cbs->screen_centering_type;
947 gpu.state.screen_centering_x = cbs->screen_centering_x;
948 gpu.state.screen_centering_y = cbs->screen_centering_y;
949 gpu.state.show_overscan = cbs->show_overscan;
950 update_width();
951 update_height();
952 }
953
954 gpu.mmap = cbs->mmap;
955 gpu.munmap = cbs->munmap;
956 gpu.gpu_state_change = cbs->gpu_state_change;
957
958 // delayed vram mmap
959 if (gpu.vram == NULL)
960 map_vram();
961
962 if (cbs->pl_vout_set_raw_vram)
963 cbs->pl_vout_set_raw_vram(gpu.vram);
964 renderer_set_config(cbs);
965 vout_set_config(cbs);
966}
967
968// vim:shiftwidth=2:expandtab