spu: more status bits
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17#include "gpu_timing.h"
18#include "../../libpcsxcore/gpu.h" // meh
19#include "../../frontend/plugin_lib.h"
20
21#ifndef ARRAY_SIZE
22#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
23#endif
24#ifdef __GNUC__
25#define unlikely(x) __builtin_expect((x), 0)
26#define preload __builtin_prefetch
27#define noinline __attribute__((noinline))
28#else
29#define unlikely(x)
30#define preload(...)
31#define noinline
32#endif
33
34//#define log_io gpu_log
35#define log_io(...)
36
37struct psx_gpu gpu;
38
39static noinline int do_cmd_buffer(uint32_t *data, int count,
40 int *cycles_sum, int *cycles_last);
41static void finish_vram_transfer(int is_read);
42
43static noinline void do_cmd_reset(void)
44{
45 int dummy = 0;
46 renderer_sync();
47 if (unlikely(gpu.cmd_len > 0))
48 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
49 gpu.cmd_len = 0;
50
51 if (unlikely(gpu.dma.h > 0))
52 finish_vram_transfer(gpu.dma_start.is_read);
53 gpu.dma.h = 0;
54}
55
56static noinline void do_reset(void)
57{
58 unsigned int i;
59
60 do_cmd_reset();
61
62 memset(gpu.regs, 0, sizeof(gpu.regs));
63 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
64 gpu.ex_regs[i] = (0xe0 + i) << 24;
65 gpu.status = 0x14802000;
66 gpu.gp0 = 0;
67 gpu.regs[3] = 1;
68 gpu.screen.hres = gpu.screen.w = 256;
69 gpu.screen.vres = gpu.screen.h = 240;
70 gpu.screen.x = gpu.screen.y = 0;
71 renderer_sync_ecmds(gpu.ex_regs);
72 renderer_notify_res_change();
73}
74
75static noinline void update_width(void)
76{
77 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
78 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
79 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
80 int hres = hres_all[(gpu.status >> 16) & 7];
81 int pal = gpu.status & PSX_GPU_STATUS_PAL;
82 int sw = gpu.screen.x2 - gpu.screen.x1;
83 int type = gpu.state.screen_centering_type;
84 int x = 0, x_auto;
85 if (type == C_AUTO)
86 type = gpu.state.screen_centering_type_default;
87 if (sw <= 0)
88 /* nothing displayed? */;
89 else {
90 int s = pal ? 656 : 608; // or 600? pal is just a guess
91 x = (gpu.screen.x1 - s) / hdiv;
92 x = (x + 1) & ~1; // blitter limitation
93 sw /= hdiv;
94 sw = (sw + 2) & ~3; // according to nocash
95 switch (type) {
96 case C_INGAME:
97 break;
98 case C_MANUAL:
99 x = gpu.state.screen_centering_x;
100 break;
101 default:
102 // correct if slightly miscentered
103 x_auto = (hres - sw) / 2 & ~3;
104 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
105 x = x_auto;
106 }
107 if (x + sw > hres)
108 sw = hres - x;
109 // .x range check is done in vout_update()
110 }
111 // reduce the unpleasant right border that a few games have
112 if (gpu.state.screen_centering_type == 0
113 && x <= 4 && hres - (x + sw) >= 4)
114 hres -= 4;
115 gpu.screen.x = x;
116 gpu.screen.w = sw;
117 gpu.screen.hres = hres;
118 gpu.state.dims_changed = 1;
119 //printf("xx %d %d -> %2d, %d / %d\n",
120 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
121}
122
123static noinline void update_height(void)
124{
125 int pal = gpu.status & PSX_GPU_STATUS_PAL;
126 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
127 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
128 int sh = gpu.screen.y2 - gpu.screen.y1;
129 int center_tol = 16;
130 int vres = 240;
131
132 if (pal && (sh > 240 || gpu.screen.vres == 256))
133 vres = 256;
134 if (dheight)
135 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
136 if (sh <= 0)
137 /* nothing displayed? */;
138 else {
139 switch (gpu.state.screen_centering_type) {
140 case C_INGAME:
141 break;
142 case C_BORDERLESS:
143 y = 0;
144 break;
145 case C_MANUAL:
146 y = gpu.state.screen_centering_y;
147 break;
148 default:
149 // correct if slightly miscentered
150 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
151 y = 0;
152 }
153 if (y + sh > vres)
154 sh = vres - y;
155 }
156 gpu.screen.y = y;
157 gpu.screen.h = sh;
158 gpu.screen.vres = vres;
159 gpu.state.dims_changed = 1;
160 //printf("yy %d %d -> %d, %d / %d\n",
161 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
162}
163
164static noinline void decide_frameskip(void)
165{
166 *gpu.frameskip.dirty = 1;
167
168 if (gpu.frameskip.active)
169 gpu.frameskip.cnt++;
170 else {
171 gpu.frameskip.cnt = 0;
172 gpu.frameskip.frame_ready = 1;
173 }
174
175 if (*gpu.frameskip.force)
176 gpu.frameskip.active = 1;
177 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
178 gpu.frameskip.active = 1;
179 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
180 gpu.frameskip.active = 1;
181 else
182 gpu.frameskip.active = 0;
183
184 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
185 int dummy = 0;
186 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
187 gpu.frameskip.pending_fill[0] = 0;
188 }
189}
190
191static noinline int decide_frameskip_allow(uint32_t cmd_e3)
192{
193 // no frameskip if it decides to draw to display area,
194 // but not for interlace since it'll most likely always do that
195 uint32_t x = cmd_e3 & 0x3ff;
196 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
197 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
198 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
199 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
200 return gpu.frameskip.allow;
201}
202
203static void flush_cmd_buffer(void);
204
205static noinline void get_gpu_info(uint32_t data)
206{
207 if (unlikely(gpu.cmd_len > 0))
208 flush_cmd_buffer();
209 switch (data & 0x0f) {
210 case 0x02:
211 case 0x03:
212 case 0x04:
213 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
214 break;
215 case 0x05:
216 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
217 break;
218 case 0x07:
219 gpu.gp0 = 2;
220 break;
221 default:
222 // gpu.gp0 unchanged
223 break;
224 }
225}
226
227#ifndef max
228#define max(a, b) (((a) > (b)) ? (a) : (b))
229#endif
230
231// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
232// renderer/downscaler it uses in high res modes:
233#ifdef GCW_ZERO
234 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
235 // fills. (Will change this value if it ever gets large page support)
236 #define VRAM_ALIGN 8192
237#else
238 #define VRAM_ALIGN 16
239#endif
240
241// double, for overdraw guard + at least 1 page before
242#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
243
244// vram ptr received from mmap/malloc/alloc (will deallocate using this)
245static uint16_t *vram_ptr_orig = NULL;
246
247#ifndef GPULIB_USE_MMAP
248# ifdef __linux__
249# define GPULIB_USE_MMAP 1
250# else
251# define GPULIB_USE_MMAP 0
252# endif
253#endif
254static int map_vram(void)
255{
256#if GPULIB_USE_MMAP
257 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
258#else
259 gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
260#endif
261 if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
262 // 4kb guard in front
263 gpu.vram += (4096 / 2);
264 // Align
265 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
266 return 0;
267 }
268 else {
269 fprintf(stderr, "could not map vram, expect crashes\n");
270 return -1;
271 }
272}
273
274long GPUinit(void)
275{
276 int ret;
277 ret = vout_init();
278 ret |= renderer_init();
279
280 memset(&gpu.state, 0, sizeof(gpu.state));
281 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
282 gpu.zero = 0;
283 gpu.state.frame_count = &gpu.zero;
284 gpu.state.hcnt = &gpu.zero;
285 gpu.cmd_len = 0;
286 do_reset();
287
288 /*if (gpu.mmap != NULL) {
289 if (map_vram() != 0)
290 ret = -1;
291 }*/
292 return ret;
293}
294
295long GPUshutdown(void)
296{
297 long ret;
298
299 renderer_finish();
300 ret = vout_finish();
301
302 if (vram_ptr_orig != NULL) {
303#if GPULIB_USE_MMAP
304 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
305#else
306 free(vram_ptr_orig);
307#endif
308 }
309 vram_ptr_orig = gpu.vram = NULL;
310
311 return ret;
312}
313
314void GPUwriteStatus(uint32_t data)
315{
316 uint32_t cmd = data >> 24;
317 int src_x, src_y;
318
319 if (cmd < ARRAY_SIZE(gpu.regs)) {
320 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
321 return;
322 gpu.regs[cmd] = data;
323 }
324
325 gpu.state.fb_dirty = 1;
326
327 switch (cmd) {
328 case 0x00:
329 do_reset();
330 break;
331 case 0x01:
332 do_cmd_reset();
333 break;
334 case 0x03:
335 if (data & 1) {
336 gpu.status |= PSX_GPU_STATUS_BLANKING;
337 gpu.state.dims_changed = 1; // for hud clearing
338 }
339 else
340 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
341 break;
342 case 0x04:
343 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
344 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
345 break;
346 case 0x05:
347 src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
348 if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
349 gpu.screen.src_x = src_x;
350 gpu.screen.src_y = src_y;
351 renderer_notify_scanout_change(src_x, src_y);
352 if (gpu.frameskip.set) {
353 decide_frameskip_allow(gpu.ex_regs[3]);
354 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
355 decide_frameskip();
356 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
357 }
358 }
359 }
360 break;
361 case 0x06:
362 gpu.screen.x1 = data & 0xfff;
363 gpu.screen.x2 = (data >> 12) & 0xfff;
364 update_width();
365 break;
366 case 0x07:
367 gpu.screen.y1 = data & 0x3ff;
368 gpu.screen.y2 = (data >> 10) & 0x3ff;
369 update_height();
370 break;
371 case 0x08:
372 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
373 update_width();
374 update_height();
375 renderer_notify_res_change();
376 break;
377 default:
378 if ((cmd & 0xf0) == 0x10)
379 get_gpu_info(data);
380 break;
381 }
382
383#ifdef GPUwriteStatus_ext
384 GPUwriteStatus_ext(data);
385#endif
386}
387
388const unsigned char cmd_lengths[256] =
389{
390 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
393 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
394 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
395 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
396 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
397 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
398 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
399 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
400 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
401 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
402 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
403 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
406};
407
408#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
409
410static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
411{
412 int i;
413 for (i = 0; i < l; i++)
414 dst[i] = src[i] | msb;
415}
416
417static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
418 int is_read, uint16_t msb)
419{
420 uint16_t *vram = VRAM_MEM_XY(x, y);
421 if (unlikely(is_read))
422 memcpy(mem, vram, l * 2);
423 else if (unlikely(msb))
424 cpy_msb(vram, mem, l, msb);
425 else
426 memcpy(vram, mem, l * 2);
427}
428
429static int do_vram_io(uint32_t *data, int count, int is_read)
430{
431 int count_initial = count;
432 uint16_t msb = gpu.ex_regs[6] << 15;
433 uint16_t *sdata = (uint16_t *)data;
434 int x = gpu.dma.x, y = gpu.dma.y;
435 int w = gpu.dma.w, h = gpu.dma.h;
436 int o = gpu.dma.offset;
437 int l;
438 count *= 2; // operate in 16bpp pixels
439
440 renderer_sync();
441
442 if (gpu.dma.offset) {
443 l = w - gpu.dma.offset;
444 if (count < l)
445 l = count;
446
447 do_vram_line(x + o, y, sdata, l, is_read, msb);
448
449 if (o + l < w)
450 o += l;
451 else {
452 o = 0;
453 y++;
454 h--;
455 }
456 sdata += l;
457 count -= l;
458 }
459
460 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
461 y &= 511;
462 do_vram_line(x, y, sdata, w, is_read, msb);
463 }
464
465 if (h > 0) {
466 if (count > 0) {
467 y &= 511;
468 do_vram_line(x, y, sdata, count, is_read, msb);
469 o = count;
470 count = 0;
471 }
472 }
473 else
474 finish_vram_transfer(is_read);
475 gpu.dma.y = y;
476 gpu.dma.h = h;
477 gpu.dma.offset = o;
478
479 return count_initial - count / 2;
480}
481
482static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
483{
484 if (gpu.dma.h)
485 log_anomaly("start_vram_transfer while old unfinished\n");
486
487 gpu.dma.x = pos_word & 0x3ff;
488 gpu.dma.y = (pos_word >> 16) & 0x1ff;
489 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
490 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
491 gpu.dma.offset = 0;
492 gpu.dma.is_read = is_read;
493 gpu.dma_start = gpu.dma;
494
495 renderer_flush_queues();
496 if (is_read) {
497 gpu.status |= PSX_GPU_STATUS_IMG;
498 // XXX: wrong for width 1
499 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
500 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
501 }
502
503 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
504 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
505 if (gpu.gpu_state_change)
506 gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
507}
508
509static void finish_vram_transfer(int is_read)
510{
511 if (is_read)
512 gpu.status &= ~PSX_GPU_STATUS_IMG;
513 else {
514 gpu.state.fb_dirty = 1;
515 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
516 gpu.dma_start.w, gpu.dma_start.h, 0);
517 }
518 if (gpu.gpu_state_change)
519 gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
520}
521
522static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
523{
524 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
525 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
526 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
527 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
528 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
529 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
530 uint16_t msb = gpu.ex_regs[6] << 15;
531 uint16_t lbuf[128];
532 uint32_t x, y;
533
534 *cpu_cycles += gput_copy(w, h);
535 if (sx == dx && sy == dy && msb == 0)
536 return;
537
538 renderer_flush_queues();
539
540 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
541 {
542 for (y = 0; y < h; y++)
543 {
544 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
545 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
546 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
547 {
548 uint32_t x1, w1 = w - x;
549 if (w1 > ARRAY_SIZE(lbuf))
550 w1 = ARRAY_SIZE(lbuf);
551 for (x1 = 0; x1 < w1; x1++)
552 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
553 for (x1 = 0; x1 < w1; x1++)
554 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
555 }
556 }
557 }
558 else
559 {
560 uint32_t sy1 = sy, dy1 = dy;
561 for (y = 0; y < h; y++, sy1++, dy1++)
562 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
563 }
564
565 renderer_update_caches(dx, dy, w, h, 0);
566}
567
568static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
569{
570 int cmd = 0, pos = 0, len, dummy = 0, v;
571 int skip = 1;
572
573 gpu.frameskip.pending_fill[0] = 0;
574
575 while (pos < count && skip) {
576 uint32_t *list = data + pos;
577 cmd = LE32TOH(list[0]) >> 24;
578 len = 1 + cmd_lengths[cmd];
579 if (pos + len > count) {
580 cmd = -1;
581 break; // incomplete cmd
582 }
583
584 switch (cmd) {
585 case 0x02:
586 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
587 // clearing something large, don't skip
588 do_cmd_list(list, 3, &dummy, &dummy, &dummy);
589 else
590 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
591 break;
592 case 0x24 ... 0x27:
593 case 0x2c ... 0x2f:
594 case 0x34 ... 0x37:
595 case 0x3c ... 0x3f:
596 gpu.ex_regs[1] &= ~0x1ff;
597 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
598 break;
599 case 0x48 ... 0x4F:
600 for (v = 3; pos + v < count; v++)
601 {
602 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
603 break;
604 }
605 len += v - 3;
606 break;
607 case 0x58 ... 0x5F:
608 for (v = 4; pos + v < count; v += 2)
609 {
610 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
611 break;
612 }
613 len += v - 4;
614 break;
615 default:
616 if (cmd == 0xe3)
617 skip = decide_frameskip_allow(LE32TOH(list[0]));
618 if ((cmd & 0xf8) == 0xe0)
619 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
620 break;
621 }
622 if (0x80 <= cmd && cmd <= 0xdf)
623 break; // image i/o
624
625 pos += len;
626 }
627
628 renderer_sync_ecmds(gpu.ex_regs);
629 *last_cmd = cmd;
630 return pos;
631}
632
633static noinline int do_cmd_buffer(uint32_t *data, int count,
634 int *cycles_sum, int *cycles_last)
635{
636 int cmd, pos;
637 uint32_t old_e3 = gpu.ex_regs[3];
638 int vram_dirty = 0;
639
640 // process buffer
641 for (pos = 0; pos < count; )
642 {
643 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
644 vram_dirty = 1;
645 pos += do_vram_io(data + pos, count - pos, 0);
646 if (pos == count)
647 break;
648 }
649
650 cmd = LE32TOH(data[pos]) >> 24;
651 if (0xa0 <= cmd && cmd <= 0xdf) {
652 if (unlikely((pos+2) >= count)) {
653 // incomplete vram write/read cmd, can't consume yet
654 cmd = -1;
655 break;
656 }
657
658 // consume vram write/read cmd
659 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
660 pos += 3;
661 continue;
662 }
663 else if ((cmd & 0xe0) == 0x80) {
664 if (unlikely((pos+3) >= count)) {
665 cmd = -1; // incomplete cmd, can't consume yet
666 break;
667 }
668 renderer_sync();
669 *cycles_sum += *cycles_last;
670 *cycles_last = 0;
671 do_vram_copy(data + pos + 1, cycles_last);
672 vram_dirty = 1;
673 pos += 4;
674 continue;
675 }
676 else if (cmd == 0x1f) {
677 log_anomaly("irq1?\n");
678 pos++;
679 continue;
680 }
681
682 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
683 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
684 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
685 else {
686 pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
687 vram_dirty = 1;
688 }
689
690 if (cmd == -1)
691 // incomplete cmd
692 break;
693 }
694
695 gpu.status &= ~0x1fff;
696 gpu.status |= gpu.ex_regs[1] & 0x7ff;
697 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
698
699 gpu.state.fb_dirty |= vram_dirty;
700
701 if (old_e3 != gpu.ex_regs[3])
702 decide_frameskip_allow(gpu.ex_regs[3]);
703
704 return count - pos;
705}
706
707static noinline void flush_cmd_buffer(void)
708{
709 int dummy = 0, left;
710 left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
711 if (left > 0)
712 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
713 if (left != gpu.cmd_len) {
714 if (!gpu.dma.h && gpu.gpu_state_change)
715 gpu.gpu_state_change(PGS_PRIMITIVE_START);
716 gpu.cmd_len = left;
717 }
718}
719
720void GPUwriteDataMem(uint32_t *mem, int count)
721{
722 int dummy = 0, left;
723
724 log_io("gpu_dma_write %p %d\n", mem, count);
725
726 if (unlikely(gpu.cmd_len > 0))
727 flush_cmd_buffer();
728
729 left = do_cmd_buffer(mem, count, &dummy, &dummy);
730 if (left)
731 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
732}
733
734void GPUwriteData(uint32_t data)
735{
736 log_io("gpu_write %08x\n", data);
737 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
738 if (gpu.cmd_len >= CMD_BUFFER_LEN)
739 flush_cmd_buffer();
740}
741
742long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
743 uint32_t *progress_addr, int32_t *cycles_last_cmd)
744{
745 uint32_t addr, *list, ld_addr;
746 int len, left, count, ld_count = 32;
747 int cpu_cycles_sum = 0;
748 int cpu_cycles_last = 0;
749
750 preload(rambase + (start_addr & 0x1fffff) / 4);
751
752 if (unlikely(gpu.cmd_len > 0))
753 flush_cmd_buffer();
754
755 log_io("gpu_dma_chain\n");
756 addr = ld_addr = start_addr & 0xffffff;
757 for (count = 0; (addr & 0x800000) == 0; count++)
758 {
759 list = rambase + (addr & 0x1fffff) / 4;
760 len = LE32TOH(list[0]) >> 24;
761 addr = LE32TOH(list[0]) & 0xffffff;
762 preload(rambase + (addr & 0x1fffff) / 4);
763
764 cpu_cycles_sum += 10;
765 if (len > 0)
766 cpu_cycles_sum += 5 + len;
767
768 log_io(".chain %08lx #%d+%d %u+%u\n",
769 (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
770 if (unlikely(gpu.cmd_len > 0)) {
771 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
772 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
773 gpu.cmd_len = 0;
774 }
775 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
776 gpu.cmd_len += len;
777 flush_cmd_buffer();
778 continue;
779 }
780
781 if (len) {
782 left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
783 if (left) {
784 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
785 gpu.cmd_len = left;
786 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
787 }
788 }
789
790 if (progress_addr) {
791 *progress_addr = addr;
792 break;
793 }
794 if (addr == ld_addr) {
795 log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
796 break;
797 }
798 if (count == ld_count) {
799 ld_addr = addr;
800 ld_count *= 2;
801 }
802 }
803
804 //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
805 gpu.state.last_list.frame = *gpu.state.frame_count;
806 gpu.state.last_list.hcnt = *gpu.state.hcnt;
807 gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
808 gpu.state.last_list.addr = start_addr;
809
810 *cycles_last_cmd = cpu_cycles_last;
811 return cpu_cycles_sum;
812}
813
814void GPUreadDataMem(uint32_t *mem, int count)
815{
816 log_io("gpu_dma_read %p %d\n", mem, count);
817
818 if (unlikely(gpu.cmd_len > 0))
819 flush_cmd_buffer();
820
821 if (gpu.dma.h)
822 do_vram_io(mem, count, 1);
823}
824
825uint32_t GPUreadData(void)
826{
827 uint32_t ret;
828
829 if (unlikely(gpu.cmd_len > 0))
830 flush_cmd_buffer();
831
832 ret = gpu.gp0;
833 if (gpu.dma.h) {
834 ret = HTOLE32(ret);
835 do_vram_io(&ret, 1, 1);
836 ret = LE32TOH(ret);
837 }
838
839 log_io("gpu_read %08x\n", ret);
840 return ret;
841}
842
843uint32_t GPUreadStatus(void)
844{
845 uint32_t ret;
846
847 if (unlikely(gpu.cmd_len > 0))
848 flush_cmd_buffer();
849
850 ret = gpu.status;
851 log_io("gpu_read_status %08x\n", ret);
852 return ret;
853}
854
855struct GPUFreeze
856{
857 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
858 uint32_t ulStatus; // current gpu status
859 uint32_t ulControl[256]; // latest control register values
860 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
861};
862
863long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
864{
865 int i;
866
867 switch (type) {
868 case 1: // save
869 if (gpu.cmd_len > 0)
870 flush_cmd_buffer();
871
872 renderer_sync();
873 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
874 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
875 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
876 freeze->ulStatus = gpu.status;
877 break;
878 case 0: // load
879 renderer_sync();
880 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
881 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
882 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
883 gpu.status = freeze->ulStatus;
884 gpu.cmd_len = 0;
885 for (i = 8; i > 0; i--) {
886 gpu.regs[i] ^= 1; // avoid reg change detection
887 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
888 }
889 renderer_sync_ecmds(gpu.ex_regs);
890 renderer_update_caches(0, 0, 1024, 512, 0);
891 break;
892 }
893
894 return 1;
895}
896
897void GPUupdateLace(void)
898{
899 if (gpu.cmd_len > 0)
900 flush_cmd_buffer();
901 renderer_flush_queues();
902
903#ifndef RAW_FB_DISPLAY
904 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
905 if (!gpu.state.blanked) {
906 vout_blank();
907 gpu.state.blanked = 1;
908 gpu.state.fb_dirty = 1;
909 }
910 return;
911 }
912
913 renderer_notify_update_lace(0);
914
915 if (!gpu.state.fb_dirty)
916 return;
917#endif
918
919 if (gpu.frameskip.set) {
920 if (!gpu.frameskip.frame_ready) {
921 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
922 return;
923 gpu.frameskip.active = 0;
924 }
925 gpu.frameskip.frame_ready = 0;
926 }
927
928 vout_update();
929 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
930 renderer_update_caches(0, 0, 1024, 512, 1);
931 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
932 gpu.state.fb_dirty = 0;
933 gpu.state.blanked = 0;
934 renderer_notify_update_lace(1);
935}
936
937void GPUvBlank(int is_vblank, int lcf)
938{
939 int interlace = gpu.state.allow_interlace
940 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
941 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
942 // interlace doesn't look nice on progressive displays,
943 // so we have this "auto" mode here for games that don't read vram
944 if (gpu.state.allow_interlace == 2
945 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
946 {
947 interlace = 0;
948 }
949 if (interlace || interlace != gpu.state.old_interlace) {
950 gpu.state.old_interlace = interlace;
951
952 if (gpu.cmd_len > 0)
953 flush_cmd_buffer();
954 renderer_flush_queues();
955 renderer_set_interlace(interlace, !lcf);
956 }
957}
958
959void GPUgetScreenInfo(int *y, int *base_hres)
960{
961 *y = gpu.screen.y;
962 *base_hres = gpu.screen.vres;
963 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
964 *base_hres >>= 1;
965}
966
967void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
968{
969 gpu.frameskip.set = cbs->frameskip;
970 gpu.frameskip.advice = &cbs->fskip_advice;
971 gpu.frameskip.force = &cbs->fskip_force;
972 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
973 gpu.frameskip.active = 0;
974 gpu.frameskip.frame_ready = 1;
975 gpu.state.hcnt = cbs->gpu_hcnt;
976 gpu.state.frame_count = cbs->gpu_frame_count;
977 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
978 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
979 gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
980 if (gpu.state.screen_centering_type != cbs->screen_centering_type
981 || gpu.state.screen_centering_x != cbs->screen_centering_x
982 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
983 gpu.state.screen_centering_type = cbs->screen_centering_type;
984 gpu.state.screen_centering_x = cbs->screen_centering_x;
985 gpu.state.screen_centering_y = cbs->screen_centering_y;
986 update_width();
987 update_height();
988 }
989
990 gpu.mmap = cbs->mmap;
991 gpu.munmap = cbs->munmap;
992 gpu.gpu_state_change = cbs->gpu_state_change;
993
994 // delayed vram mmap
995 if (gpu.vram == NULL)
996 map_vram();
997
998 if (cbs->pl_vout_set_raw_vram)
999 cbs->pl_vout_set_raw_vram(gpu.vram);
1000 renderer_set_config(cbs);
1001 vout_set_config(cbs);
1002}
1003
1004// vim:shiftwidth=2:expandtab