update gpulib_thread_if
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17#include "gpu_timing.h"
18#include "../../libpcsxcore/gpu.h" // meh
19#include "../../frontend/plugin_lib.h"
20
21#ifndef ARRAY_SIZE
22#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
23#endif
24#ifdef __GNUC__
25#define unlikely(x) __builtin_expect((x), 0)
26#define preload __builtin_prefetch
27#define noinline __attribute__((noinline))
28#else
29#define unlikely(x)
30#define preload(...)
31#define noinline
32#endif
33
34//#define log_io gpu_log
35#define log_io(...)
36
37struct psx_gpu gpu;
38
39static noinline int do_cmd_buffer(uint32_t *data, int count,
40 int *cycles_sum, int *cycles_last);
41static void finish_vram_transfer(int is_read);
42
43static noinline void do_cmd_reset(void)
44{
45 int dummy = 0;
46 renderer_sync();
47 if (unlikely(gpu.cmd_len > 0))
48 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
49 gpu.cmd_len = 0;
50
51 if (unlikely(gpu.dma.h > 0))
52 finish_vram_transfer(gpu.dma_start.is_read);
53 gpu.dma.h = 0;
54}
55
56static noinline void do_reset(void)
57{
58 unsigned int i;
59
60 do_cmd_reset();
61
62 memset(gpu.regs, 0, sizeof(gpu.regs));
63 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
64 gpu.ex_regs[i] = (0xe0 + i) << 24;
65 gpu.status = 0x14802000;
66 gpu.gp0 = 0;
67 gpu.regs[3] = 1;
68 gpu.screen.hres = gpu.screen.w = 256;
69 gpu.screen.vres = gpu.screen.h = 240;
70 gpu.screen.x = gpu.screen.y = 0;
71 renderer_sync_ecmds(gpu.ex_regs);
72 renderer_notify_res_change();
73}
74
75static noinline void update_width(void)
76{
77 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
78 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
79 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
80 int hres = hres_all[(gpu.status >> 16) & 7];
81 int pal = gpu.status & PSX_GPU_STATUS_PAL;
82 int sw = gpu.screen.x2 - gpu.screen.x1;
83 int type = gpu.state.screen_centering_type;
84 int x = 0, x_auto;
85 if (type == C_AUTO)
86 type = gpu.state.screen_centering_type_default;
87 if (sw <= 0)
88 /* nothing displayed? */;
89 else {
90 int s = pal ? 656 : 608; // or 600? pal is just a guess
91 x = (gpu.screen.x1 - s) / hdiv;
92 x = (x + 1) & ~1; // blitter limitation
93 sw /= hdiv;
94 sw = (sw + 2) & ~3; // according to nocash
95 switch (type) {
96 case C_INGAME:
97 break;
98 case C_MANUAL:
99 x = gpu.state.screen_centering_x;
100 break;
101 default:
102 // correct if slightly miscentered
103 x_auto = (hres - sw) / 2 & ~3;
104 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
105 x = x_auto;
106 }
107 if (x + sw > hres)
108 sw = hres - x;
109 // .x range check is done in vout_update()
110 }
111 // reduce the unpleasant right border that a few games have
112 if (gpu.state.screen_centering_type == 0
113 && x <= 4 && hres - (x + sw) >= 4)
114 hres -= 4;
115 gpu.screen.x = x;
116 gpu.screen.w = sw;
117 gpu.screen.hres = hres;
118 gpu.state.dims_changed = 1;
119 //printf("xx %d %d -> %2d, %d / %d\n",
120 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
121}
122
123static noinline void update_height(void)
124{
125 int pal = gpu.status & PSX_GPU_STATUS_PAL;
126 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
127 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
128 int sh = gpu.screen.y2 - gpu.screen.y1;
129 int center_tol = 16;
130 int vres = 240;
131
132 if (pal && (sh > 240 || gpu.screen.vres == 256))
133 vres = 256;
134 if (dheight)
135 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
136 if (sh <= 0)
137 /* nothing displayed? */;
138 else {
139 switch (gpu.state.screen_centering_type) {
140 case C_INGAME:
141 break;
142 case C_BORDERLESS:
143 y = 0;
144 break;
145 case C_MANUAL:
146 y = gpu.state.screen_centering_y;
147 break;
148 default:
149 // correct if slightly miscentered
150 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
151 y = 0;
152 }
153 if (y + sh > vres)
154 sh = vres - y;
155 }
156 gpu.screen.y = y;
157 gpu.screen.h = sh;
158 gpu.screen.vres = vres;
159 gpu.state.dims_changed = 1;
160 //printf("yy %d %d -> %d, %d / %d\n",
161 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
162}
163
164static noinline void decide_frameskip(void)
165{
166 *gpu.frameskip.dirty = 1;
167
168 if (gpu.frameskip.active)
169 gpu.frameskip.cnt++;
170 else {
171 gpu.frameskip.cnt = 0;
172 gpu.frameskip.frame_ready = 1;
173 }
174
175 if (*gpu.frameskip.force)
176 gpu.frameskip.active = 1;
177 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
178 gpu.frameskip.active = 1;
179 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
180 gpu.frameskip.active = 1;
181 else
182 gpu.frameskip.active = 0;
183
184 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
185 int dummy = 0;
186 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
187 gpu.frameskip.pending_fill[0] = 0;
188 }
189}
190
191static noinline int decide_frameskip_allow(uint32_t cmd_e3)
192{
193 // no frameskip if it decides to draw to display area,
194 // but not for interlace since it'll most likely always do that
195 uint32_t x = cmd_e3 & 0x3ff;
196 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
197 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
198 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
199 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
200 return gpu.frameskip.allow;
201}
202
203static void flush_cmd_buffer(void);
204
205static noinline void get_gpu_info(uint32_t data)
206{
207 if (unlikely(gpu.cmd_len > 0))
208 flush_cmd_buffer();
209 switch (data & 0x0f) {
210 case 0x02:
211 case 0x03:
212 case 0x04:
213 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
214 break;
215 case 0x05:
216 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
217 break;
218 case 0x07:
219 gpu.gp0 = 2;
220 break;
221 default:
222 // gpu.gp0 unchanged
223 break;
224 }
225}
226
227#ifndef max
228#define max(a, b) (((a) > (b)) ? (a) : (b))
229#endif
230
231// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
232// renderer/downscaler it uses in high res modes:
233#ifdef GCW_ZERO
234 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
235 // fills. (Will change this value if it ever gets large page support)
236 #define VRAM_ALIGN 8192
237#else
238 #define VRAM_ALIGN 16
239#endif
240
241// double, for overdraw guard + at least 1 page before
242#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
243
244// vram ptr received from mmap/malloc/alloc (will deallocate using this)
245static uint16_t *vram_ptr_orig = NULL;
246
247#ifndef GPULIB_USE_MMAP
248# ifdef __linux__
249# define GPULIB_USE_MMAP 1
250# else
251# define GPULIB_USE_MMAP 0
252# endif
253#endif
254static int map_vram(void)
255{
256#if GPULIB_USE_MMAP
257 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
258#else
259 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
260#endif
261 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
262 // 4kb guard in front
263 gpu.vram += (4096 / 2);
264 // Align
265 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
266 return 0;
267 }
268 else {
269 fprintf(stderr, "could not map vram, expect crashes\n");
270 return -1;
271 }
272}
273
274long GPUinit(void)
275{
276 int ret;
277 ret = vout_init();
278 ret |= renderer_init();
279
280 memset(&gpu.state, 0, sizeof(gpu.state));
281 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
282 gpu.zero = 0;
283 gpu.state.frame_count = &gpu.zero;
284 gpu.state.hcnt = &gpu.zero;
285 gpu.cmd_len = 0;
286 do_reset();
287
288 /*if (gpu.mmap != NULL) {
289 if (map_vram() != 0)
290 ret = -1;
291 }*/
292 return ret;
293}
294
295long GPUshutdown(void)
296{
297 long ret;
298
299 renderer_finish();
300 ret = vout_finish();
301
302 if (vram_ptr_orig != NULL) {
303#if GPULIB_USE_MMAP
304 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
305#else
306 free(vram_ptr_orig);
307#endif
308 }
309 vram_ptr_orig = gpu.vram = NULL;
310
311 return ret;
312}
313
314void GPUwriteStatus(uint32_t data)
315{
316 uint32_t cmd = data >> 24;
317 int src_x, src_y;
318
319 if (cmd < ARRAY_SIZE(gpu.regs)) {
320 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
321 return;
322 gpu.regs[cmd] = data;
323 }
324
325 gpu.state.fb_dirty = 1;
326
327 switch (cmd) {
328 case 0x00:
329 do_reset();
330 break;
331 case 0x01:
332 do_cmd_reset();
333 break;
334 case 0x03:
335 if (data & 1) {
336 gpu.status |= PSX_GPU_STATUS_BLANKING;
337 gpu.state.dims_changed = 1; // for hud clearing
338 }
339 else
340 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
341 break;
342 case 0x04:
343 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
344 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
345 break;
346 case 0x05:
347 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
348 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
349 gpu.screen.src_x = src_x;
350 gpu.screen.src_y = src_y;
351 renderer_notify_scanout_change(src_x, src_y);
352 if (gpu.frameskip.set) {
353 decide_frameskip_allow(gpu.ex_regs[3]);
354 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
355 decide_frameskip();
356 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
357 }
358 }
359 }
360 break;
361 case 0x06:
362 gpu.screen.x1 = data & 0xfff;
363 gpu.screen.x2 = (data >> 12) & 0xfff;
364 update_width();
365 break;
366 case 0x07:
367 gpu.screen.y1 = data & 0x3ff;
368 gpu.screen.y2 = (data >> 10) & 0x3ff;
369 update_height();
370 break;
371 case 0x08:
372 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
373 update_width();
374 update_height();
375 renderer_notify_res_change();
376 break;
377 default:
378 if ((cmd & 0xf0) == 0x10)
379 get_gpu_info(data);
380 break;
381 }
382
383#ifdef GPUwriteStatus_ext
384 GPUwriteStatus_ext(data);
385#endif
386}
387
388const unsigned char cmd_lengths[256] =
389{
390 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
393 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
394 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
395 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
396 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
397 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
398 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
399 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
402 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
403 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
406};
407
408#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
409
410static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
411{
412 int i;
413 for (i = 0; i < l; i++)
414 dst[i] = src[i] | msb;
415}
416
417static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
418 int is_read, uint16_t msb)
419{
420 uint16_t *vram = VRAM_MEM_XY(x, y);
421 if (unlikely(is_read))
422 memcpy(mem, vram, l * 2);
423 else if (unlikely(msb))
424 cpy_msb(vram, mem, l, msb);
425 else
426 memcpy(vram, mem, l * 2);
427}
428
429static int do_vram_io(uint32_t *data, int count, int is_read)
430{
431 int count_initial = count;
432 uint16_t msb = gpu.ex_regs[6] << 15;
433 uint16_t *sdata = (uint16_t *)data;
434 int x = gpu.dma.x, y = gpu.dma.y;
435 int w = gpu.dma.w, h = gpu.dma.h;
436 int o = gpu.dma.offset;
437 int l;
438 count *= 2; // operate in 16bpp pixels
439
440 renderer_sync();
441
442 if (gpu.dma.offset) {
443 l = w - gpu.dma.offset;
444 if (count < l)
445 l = count;
446
447 do_vram_line(x + o, y, sdata, l, is_read, msb);
448
449 if (o + l < w)
450 o += l;
451 else {
452 o = 0;
453 y++;
454 h--;
455 }
456 sdata += l;
457 count -= l;
458 }
459
460 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
461 y &= 511;
462 do_vram_line(x, y, sdata, w, is_read, msb);
463 }
464
465 if (h > 0) {
466 if (count > 0) {
467 y &= 511;
468 do_vram_line(x, y, sdata, count, is_read, msb);
469 o = count;
470 count = 0;
471 }
472 }
473 else
474 finish_vram_transfer(is_read);
475 gpu.dma.y = y;
476 gpu.dma.h = h;
477 gpu.dma.offset = o;
478
479 return count_initial - count / 2;
480}
481
482static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
483{
484 if (gpu.dma.h)
485 log_anomaly("start_vram_transfer while old unfinished\n");
486
487 gpu.dma.x = pos_word & 0x3ff;
488 gpu.dma.y = (pos_word >> 16) & 0x1ff;
489 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
490 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
491 gpu.dma.offset = 0;
492 gpu.dma.is_read = is_read;
493 gpu.dma_start = gpu.dma;
494
495 renderer_flush_queues();
496 if (is_read) {
497 gpu.status |= PSX_GPU_STATUS_IMG;
498 // XXX: wrong for width 1
499 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
500 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
501 }
502
503 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
504 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
505 if (gpu.gpu_state_change)
506 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
507}
508
509static void finish_vram_transfer(int is_read)
510{
511 if (is_read)
512 gpu.status &= ~PSX_GPU_STATUS_IMG;
513 else {
514 gpu.state.fb_dirty = 1;
515 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
516 gpu.dma_start.w, gpu.dma_start.h, 0);
517 }
518 if (gpu.gpu_state_change)
519 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
520}
521
522static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
523{
524 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
525 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
526 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
527 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
528 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
529 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
530 uint16_t msb = gpu.ex_regs[6] << 15;
531 uint16_t lbuf[128];
532 uint32_t x, y;
533
534 *cpu_cycles += gput_copy(w, h);
535 if (sx == dx && sy == dy && msb == 0)
536 return;
537
538 renderer_flush_queues();
539
540 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
541 {
542 for (y = 0; y < h; y++)
543 {
544 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
545 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
546 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
547 {
548 uint32_t x1, w1 = w - x;
549 if (w1 > ARRAY_SIZE(lbuf))
550 w1 = ARRAY_SIZE(lbuf);
551 for (x1 = 0; x1 < w1; x1++)
552 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
553 for (x1 = 0; x1 < w1; x1++)
554 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
555 }
556 }
557 }
558 else
559 {
560 uint32_t sy1 = sy, dy1 = dy;
561 for (y = 0; y < h; y++, sy1++, dy1++)
562 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
563 }
564
565 renderer_update_caches(dx, dy, w, h, 0);
566}
567
568static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
569{
570 int cmd = 0, pos = 0, len, dummy = 0, v;
571 int skip = 1;
572
573 gpu.frameskip.pending_fill[0] = 0;
574
575 while (pos < count && skip) {
576 uint32_t *list = data + pos;
577 cmd = LE32TOH(list[0]) >> 24;
578 len = 1 + cmd_lengths[cmd];
579
580 switch (cmd) {
581 case 0x02:
582 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
583 // clearing something large, don't skip
584 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
585 else
586 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
587 break;
588 case 0x24 ... 0x27:
589 case 0x2c ... 0x2f:
590 case 0x34 ... 0x37:
591 case 0x3c ... 0x3f:
592 gpu.ex_regs[1] &= ~0x1ff;
593 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
594 break;
595 case 0x48 ... 0x4F:
596 for (v = 3; pos + v < count; v++)
597 {
598 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
599 break;
600 }
601 len += v - 3;
602 break;
603 case 0x58 ... 0x5F:
604 for (v = 4; pos + v < count; v += 2)
605 {
606 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
607 break;
608 }
609 len += v - 4;
610 break;
611 default:
612 if (cmd == 0xe3)
613 skip = decide_frameskip_allow(LE32TOH(list[0]));
614 if ((cmd & 0xf8) == 0xe0)
615 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
616 break;
617 }
618
619 if (pos + len > count) {
620 cmd = -1;
621 break; // incomplete cmd
622 }
623 if (0x80 <= cmd && cmd <= 0xdf)
624 break; // image i/o
625
626 pos += len;
627 }
628
629 renderer_sync_ecmds(gpu.ex_regs);
630 *last_cmd = cmd;
631 return pos;
632}
633
634static noinline int do_cmd_buffer(uint32_t *data, int count,
635 int *cycles_sum, int *cycles_last)
636{
637 int cmd, pos;
638 uint32_t old_e3 = gpu.ex_regs[3];
639 int vram_dirty = 0;
640
641 // process buffer
642 for (pos = 0; pos < count; )
643 {
644 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
645 vram_dirty = 1;
646 pos += do_vram_io(data + pos, count - pos, 0);
647 if (pos == count)
648 break;
649 }
650
651 cmd = LE32TOH(data[pos]) >> 24;
652 if (0xa0 <= cmd && cmd <= 0xdf) {
653 if (unlikely((pos+2) >= count)) {
654 // incomplete vram write/read cmd, can't consume yet
655 cmd = -1;
656 break;
657 }
658
659 // consume vram write/read cmd
660 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
661 pos += 3;
662 continue;
663 }
664 else if ((cmd & 0xe0) == 0x80) {
665 if (unlikely((pos+3) >= count)) {
666 cmd = -1; // incomplete cmd, can't consume yet
667 break;
668 }
669 renderer_sync();
670 *cycles_sum += *cycles_last;
671 *cycles_last = 0;
672 do_vram_copy(data + pos + 1, cycles_last);
673 vram_dirty = 1;
674 pos += 4;
675 continue;
676 }
677 else if (cmd == 0x1f) {
678 log_anomaly("irq1?\n");
679 pos++;
680 continue;
681 }
682
683 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
684 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
685 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
686 else {
687 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
688 vram_dirty = 1;
689 }
690
691 if (cmd == -1)
692 // incomplete cmd
693 break;
694 }
695
696 gpu.status &= ~0x1fff;
697 gpu.status |= gpu.ex_regs[1] & 0x7ff;
698 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
699
700 gpu.state.fb_dirty |= vram_dirty;
701
702 if (old_e3 != gpu.ex_regs[3])
703 decide_frameskip_allow(gpu.ex_regs[3]);
704
705 return count - pos;
706}
707
708static noinline void flush_cmd_buffer(void)
709{
710 int dummy = 0, left;
711 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
712 if (left > 0)
713 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
714 if (left != gpu.cmd_len) {
715 if (!gpu.dma.h && gpu.gpu_state_change)
716 gpu.gpu_state_change(PGS_PRIMITIVE_START);
717 gpu.cmd_len = left;
718 }
719}
720
721void GPUwriteDataMem(uint32_t *mem, int count)
722{
723 int dummy = 0, left;
724
725 log_io("gpu_dma_write %p %d\n", mem, count);
726
727 if (unlikely(gpu.cmd_len > 0))
728 flush_cmd_buffer();
729
730 left = do_cmd_buffer(mem, count, &dummy, &dummy);
731 if (left)
732 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
733}
734
735void GPUwriteData(uint32_t data)
736{
737 log_io("gpu_write %08x\n", data);
738 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
739 if (gpu.cmd_len >= CMD_BUFFER_LEN)
740 flush_cmd_buffer();
741}
742
743long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
744 uint32_t *progress_addr, int32_t *cycles_last_cmd)
745{
746 uint32_t addr, *list, ld_addr = 0;
747 int len, left, count;
748 int cpu_cycles_sum = 0;
749 int cpu_cycles_last = 0;
750
751 preload(rambase + (start_addr & 0x1fffff) / 4);
752
753 if (unlikely(gpu.cmd_len > 0))
754 flush_cmd_buffer();
755
756 log_io("gpu_dma_chain\n");
757 addr = start_addr & 0xffffff;
758 for (count = 0; (addr & 0x800000) == 0; count++)
759 {
760 list = rambase + (addr & 0x1fffff) / 4;
761 len = LE32TOH(list[0]) >> 24;
762 addr = LE32TOH(list[0]) & 0xffffff;
763 preload(rambase + (addr & 0x1fffff) / 4);
764
765 cpu_cycles_sum += 10;
766 if (len > 0)
767 cpu_cycles_sum += 5 + len;
768
769 log_io(".chain %08lx #%d+%d %u+%u\n",
770 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
771 if (unlikely(gpu.cmd_len > 0)) {
772 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
773 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
774 gpu.cmd_len = 0;
775 }
776 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
777 gpu.cmd_len += len;
778 flush_cmd_buffer();
779 continue;
780 }
781
782 if (len) {
783 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
784 if (left) {
785 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
786 gpu.cmd_len = left;
787 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
788 }
789 }
790
791 if (progress_addr) {
792 *progress_addr = addr;
793 break;
794 }
795 #define LD_THRESHOLD (8*1024)
796 if (count >= LD_THRESHOLD) {
797 if (count == LD_THRESHOLD) {
798 ld_addr = addr;
799 continue;
800 }
801
802 // loop detection marker
803 // (bit23 set causes DMA error on real machine, so
804 // unlikely to be ever set by the game)
805 list[0] |= HTOLE32(0x800000);
806 }
807 }
808
809 if (ld_addr != 0) {
810 // remove loop detection markers
811 count -= LD_THRESHOLD + 2;
812 addr = ld_addr & 0x1fffff;
813 while (count-- > 0) {
814 list = rambase + addr / 4;
815 addr = LE32TOH(list[0]) & 0x1fffff;
816 list[0] &= HTOLE32(~0x800000);
817 }
818 }
819
820 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
821 gpu.state.last_list.frame = *gpu.state.frame_count;
822 gpu.state.last_list.hcnt = *gpu.state.hcnt;
823 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
824 gpu.state.last_list.addr = start_addr;
825
826 *cycles_last_cmd = cpu_cycles_last;
827 return cpu_cycles_sum;
828}
829
830void GPUreadDataMem(uint32_t *mem, int count)
831{
832 log_io("gpu_dma_read %p %d\n", mem, count);
833
834 if (unlikely(gpu.cmd_len > 0))
835 flush_cmd_buffer();
836
837 if (gpu.dma.h)
838 do_vram_io(mem, count, 1);
839}
840
841uint32_t GPUreadData(void)
842{
843 uint32_t ret;
844
845 if (unlikely(gpu.cmd_len > 0))
846 flush_cmd_buffer();
847
848 ret = gpu.gp0;
849 if (gpu.dma.h) {
850 ret = HTOLE32(ret);
851 do_vram_io(&ret, 1, 1);
852 ret = LE32TOH(ret);
853 }
854
855 log_io("gpu_read %08x\n", ret);
856 return ret;
857}
858
859uint32_t GPUreadStatus(void)
860{
861 uint32_t ret;
862
863 if (unlikely(gpu.cmd_len > 0))
864 flush_cmd_buffer();
865
866 ret = gpu.status;
867 log_io("gpu_read_status %08x\n", ret);
868 return ret;
869}
870
871struct GPUFreeze
872{
873 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
874 uint32_t ulStatus; // current gpu status
875 uint32_t ulControl[256]; // latest control register values
876 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
877};
878
879long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
880{
881 int i;
882
883 switch (type) {
884 case 1: // save
885 if (gpu.cmd_len > 0)
886 flush_cmd_buffer();
887
888 renderer_sync();
889 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
890 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
891 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
892 freeze->ulStatus = gpu.status;
893 break;
894 case 0: // load
895 renderer_sync();
896 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
897 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
898 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
899 gpu.status = freeze->ulStatus;
900 gpu.cmd_len = 0;
901 for (i = 8; i > 0; i--) {
902 gpu.regs[i] ^= 1; // avoid reg change detection
903 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
904 }
905 renderer_sync_ecmds(gpu.ex_regs);
906 renderer_update_caches(0, 0, 1024, 512, 0);
907 break;
908 }
909
910 return 1;
911}
912
913void GPUupdateLace(void)
914{
915 if (gpu.cmd_len > 0)
916 flush_cmd_buffer();
917 renderer_flush_queues();
918
919#ifndef RAW_FB_DISPLAY
920 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
921 if (!gpu.state.blanked) {
922 vout_blank();
923 gpu.state.blanked = 1;
924 gpu.state.fb_dirty = 1;
925 }
926 return;
927 }
928
929 renderer_notify_update_lace(0);
930
931 if (!gpu.state.fb_dirty)
932 return;
933#endif
934
935 if (gpu.frameskip.set) {
936 if (!gpu.frameskip.frame_ready) {
937 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
938 return;
939 gpu.frameskip.active = 0;
940 }
941 gpu.frameskip.frame_ready = 0;
942 }
943
944 vout_update();
945 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
946 renderer_update_caches(0, 0, 1024, 512, 1);
947 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
948 gpu.state.fb_dirty = 0;
949 gpu.state.blanked = 0;
950 renderer_notify_update_lace(1);
951}
952
953void GPUvBlank(int is_vblank, int lcf)
954{
955 int interlace = gpu.state.allow_interlace
956 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
957 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
958 // interlace doesn't look nice on progressive displays,
959 // so we have this "auto" mode here for games that don't read vram
960 if (gpu.state.allow_interlace == 2
961 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
962 {
963 interlace = 0;
964 }
965 if (interlace || interlace != gpu.state.old_interlace) {
966 gpu.state.old_interlace = interlace;
967
968 if (gpu.cmd_len > 0)
969 flush_cmd_buffer();
970 renderer_flush_queues();
971 renderer_set_interlace(interlace, !lcf);
972 }
973}
974
975void GPUgetScreenInfo(int *y, int *base_hres)
976{
977 *y = gpu.screen.y;
978 *base_hres = gpu.screen.vres;
979 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
980 *base_hres >>= 1;
981}
982
983void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
984{
985 gpu.frameskip.set = cbs->frameskip;
986 gpu.frameskip.advice = &cbs->fskip_advice;
987 gpu.frameskip.force = &cbs->fskip_force;
988 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
989 gpu.frameskip.active = 0;
990 gpu.frameskip.frame_ready = 1;
991 gpu.state.hcnt = cbs->gpu_hcnt;
992 gpu.state.frame_count = cbs->gpu_frame_count;
993 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
994 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
995 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
996 if (gpu.state.screen_centering_type != cbs->screen_centering_type
997 || gpu.state.screen_centering_x != cbs->screen_centering_x
998 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
999 gpu.state.screen_centering_type = cbs->screen_centering_type;
1000 gpu.state.screen_centering_x = cbs->screen_centering_x;
1001 gpu.state.screen_centering_y = cbs->screen_centering_y;
1002 update_width();
1003 update_height();
1004 }
1005
1006 gpu.mmap = cbs->mmap;
1007 gpu.munmap = cbs->munmap;
1008 gpu.gpu_state_change = cbs->gpu_state_change;
1009
1010 // delayed vram mmap
1011 if (gpu.vram == NULL)
1012 map_vram();
1013
1014 if (cbs->pl_vout_set_raw_vram)
1015 cbs->pl_vout_set_raw_vram(gpu.vram);
1016 renderer_set_config(cbs);
1017 vout_set_config(cbs);
1018}
1019
1020// vim:shiftwidth=2:expandtab