add a thp-based huge page alloc fallback
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <stdlib.h> /* for calloc */
15
16#include "gpu.h"
17
18#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
19#ifdef __GNUC__
20#define unlikely(x) __builtin_expect((x), 0)
21#define preload __builtin_prefetch
22#define noinline __attribute__((noinline))
23#else
24#define unlikely(x)
25#define preload(...)
26#define noinline
27#endif
28
29//#define log_io gpu_log
30#define log_io(...)
31
32struct psx_gpu gpu;
33
34static noinline int do_cmd_buffer(uint32_t *data, int count);
35static void finish_vram_transfer(int is_read);
36
37static noinline void do_cmd_reset(void)
38{
39 renderer_sync();
40
41 if (unlikely(gpu.cmd_len > 0))
42 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
43 gpu.cmd_len = 0;
44
45 if (unlikely(gpu.dma.h > 0))
46 finish_vram_transfer(gpu.dma_start.is_read);
47 gpu.dma.h = 0;
48}
49
50static noinline void do_reset(void)
51{
52 unsigned int i;
53
54 do_cmd_reset();
55
56 memset(gpu.regs, 0, sizeof(gpu.regs));
57 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
58 gpu.ex_regs[i] = (0xe0 + i) << 24;
59 gpu.status = 0x14802000;
60 gpu.gp0 = 0;
61 gpu.regs[3] = 1;
62 gpu.screen.hres = gpu.screen.w = 256;
63 gpu.screen.vres = gpu.screen.h = 240;
64 gpu.screen.x = gpu.screen.y = 0;
65 renderer_sync_ecmds(gpu.ex_regs);
66 renderer_notify_res_change();
67}
68
69static noinline void update_width(void)
70{
71 static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
72 static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
73 uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
74 int hres = hres_all[(gpu.status >> 16) & 7];
75 int pal = gpu.status & PSX_GPU_STATUS_PAL;
76 int sw = gpu.screen.x2 - gpu.screen.x1;
77 int x = 0, x_auto;
78 if (sw <= 0)
79 /* nothing displayed? */;
80 else {
81 int s = pal ? 656 : 608; // or 600? pal is just a guess
82 x = (gpu.screen.x1 - s) / hdiv;
83 x = (x + 1) & ~1; // blitter limitation
84 sw /= hdiv;
85 sw = (sw + 2) & ~3; // according to nocash
86 switch (gpu.state.screen_centering_type) {
87 case 1:
88 break;
89 case 2:
90 x = gpu.state.screen_centering_x;
91 break;
92 default:
93 // correct if slightly miscentered
94 x_auto = (hres - sw) / 2 & ~3;
95 if ((uint32_t)x_auto <= 8u && abs(x) < 24)
96 x = x_auto;
97 }
98 if (x + sw > hres)
99 sw = hres - x;
100 // .x range check is done in vout_update()
101 }
102 // reduce the unpleasant right border that a few games have
103 if (gpu.state.screen_centering_type == 0
104 && x <= 4 && hres - (x + sw) >= 4)
105 hres -= 4;
106 gpu.screen.x = x;
107 gpu.screen.w = sw;
108 gpu.screen.hres = hres;
109 gpu.state.dims_changed = 1;
110 //printf("xx %d %d -> %2d, %d / %d\n",
111 // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
112}
113
114static noinline void update_height(void)
115{
116 int pal = gpu.status & PSX_GPU_STATUS_PAL;
117 int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
118 int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
119 int sh = gpu.screen.y2 - gpu.screen.y1;
120 int center_tol = 16;
121 int vres = 240;
122
123 if (pal && (sh > 240 || gpu.screen.vres == 256))
124 vres = 256;
125 if (dheight)
126 y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
127 if (sh <= 0)
128 /* nothing displayed? */;
129 else {
130 switch (gpu.state.screen_centering_type) {
131 case 1:
132 break;
133 case 2:
134 y = gpu.state.screen_centering_y;
135 break;
136 default:
137 // correct if slightly miscentered
138 if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol)
139 y = 0;
140 }
141 if (y + sh > vres)
142 sh = vres - y;
143 }
144 gpu.screen.y = y;
145 gpu.screen.h = sh;
146 gpu.screen.vres = vres;
147 gpu.state.dims_changed = 1;
148 //printf("yy %d %d -> %d, %d / %d\n",
149 // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
150}
151
152static noinline void decide_frameskip(void)
153{
154 *gpu.frameskip.dirty = 1;
155
156 if (gpu.frameskip.active)
157 gpu.frameskip.cnt++;
158 else {
159 gpu.frameskip.cnt = 0;
160 gpu.frameskip.frame_ready = 1;
161 }
162
163 if (*gpu.frameskip.force)
164 gpu.frameskip.active = 1;
165 else if (!gpu.frameskip.active && *gpu.frameskip.advice)
166 gpu.frameskip.active = 1;
167 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
168 gpu.frameskip.active = 1;
169 else
170 gpu.frameskip.active = 0;
171
172 if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
173 int dummy;
174 do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
175 gpu.frameskip.pending_fill[0] = 0;
176 }
177}
178
179static noinline int decide_frameskip_allow(uint32_t cmd_e3)
180{
181 // no frameskip if it decides to draw to display area,
182 // but not for interlace since it'll most likely always do that
183 uint32_t x = cmd_e3 & 0x3ff;
184 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
185 gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
186 (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
187 (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
188 return gpu.frameskip.allow;
189}
190
191static void flush_cmd_buffer(void);
192
193static noinline void get_gpu_info(uint32_t data)
194{
195 if (unlikely(gpu.cmd_len > 0))
196 flush_cmd_buffer();
197 switch (data & 0x0f) {
198 case 0x02:
199 case 0x03:
200 case 0x04:
201 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
202 break;
203 case 0x05:
204 gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
205 break;
206 case 0x07:
207 gpu.gp0 = 2;
208 break;
209 default:
210 // gpu.gp0 unchanged
211 break;
212 }
213}
214
215// double, for overdraw guard
216#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
217
218// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
219// renderer/downscaler it uses in high res modes:
220#ifdef GCW_ZERO
221 // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
222 // fills. (Will change this value if it ever gets large page support)
223 #define VRAM_ALIGN 8192
224#else
225 #define VRAM_ALIGN 16
226#endif
227
228// vram ptr received from mmap/malloc/alloc (will deallocate using this)
229static uint16_t *vram_ptr_orig = NULL;
230
231#ifdef GPULIB_USE_MMAP
232static int map_vram(void)
233{
234 gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
235 if (gpu.vram != NULL) {
236 // 4kb guard in front
237 gpu.vram += (4096 / 2);
238 // Align
239 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
240 return 0;
241 }
242 else {
243 fprintf(stderr, "could not map vram, expect crashes\n");
244 return -1;
245 }
246}
247#else
248static int map_vram(void)
249{
250 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
251 if (gpu.vram != NULL) {
252 // 4kb guard in front
253 gpu.vram += (4096 / 2);
254 // Align
255 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
256 return 0;
257 } else {
258 fprintf(stderr, "could not allocate vram, expect crashes\n");
259 return -1;
260 }
261}
262
263static int allocate_vram(void)
264{
265 gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
266 if (gpu.vram != NULL) {
267 // 4kb guard in front
268 gpu.vram += (4096 / 2);
269 // Align
270 gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
271 return 0;
272 } else {
273 fprintf(stderr, "could not allocate vram, expect crashes\n");
274 return -1;
275 }
276}
277#endif
278
279long GPUinit(void)
280{
281#ifndef GPULIB_USE_MMAP
282 if (gpu.vram == NULL) {
283 if (allocate_vram() != 0) {
284 printf("ERROR: could not allocate VRAM, exiting..\n");
285 exit(1);
286 }
287 }
288#endif
289
290 //extern uint32_t hSyncCount; // in psxcounters.cpp
291 //extern uint32_t frame_counter; // in psxcounters.cpp
292 //gpu.state.hcnt = &hSyncCount;
293 //gpu.state.frame_count = &frame_counter;
294
295 int ret;
296 ret = vout_init();
297 ret |= renderer_init();
298
299 memset(&gpu.state, 0, sizeof(gpu.state));
300 memset(&gpu.frameskip, 0, sizeof(gpu.frameskip));
301 gpu.zero = 0;
302 gpu.state.frame_count = &gpu.zero;
303 gpu.state.hcnt = &gpu.zero;
304 gpu.cmd_len = 0;
305 do_reset();
306
307 /*if (gpu.mmap != NULL) {
308 if (map_vram() != 0)
309 ret = -1;
310 }*/
311 return ret;
312}
313
314long GPUshutdown(void)
315{
316 long ret;
317
318 renderer_finish();
319 ret = vout_finish();
320
321 if (vram_ptr_orig != NULL) {
322#ifdef GPULIB_USE_MMAP
323 gpu.munmap(vram_ptr_orig, VRAM_SIZE);
324#else
325 free(vram_ptr_orig);
326#endif
327 }
328 vram_ptr_orig = gpu.vram = NULL;
329
330 return ret;
331}
332
333void GPUwriteStatus(uint32_t data)
334{
335 uint32_t cmd = data >> 24;
336
337 if (cmd < ARRAY_SIZE(gpu.regs)) {
338 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
339 return;
340 gpu.regs[cmd] = data;
341 }
342
343 gpu.state.fb_dirty = 1;
344
345 switch (cmd) {
346 case 0x00:
347 do_reset();
348 break;
349 case 0x01:
350 do_cmd_reset();
351 break;
352 case 0x03:
353 if (data & 1) {
354 gpu.status |= PSX_GPU_STATUS_BLANKING;
355 gpu.state.dims_changed = 1; // for hud clearing
356 }
357 else
358 gpu.status &= ~PSX_GPU_STATUS_BLANKING;
359 break;
360 case 0x04:
361 gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
362 gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
363 break;
364 case 0x05:
365 gpu.screen.src_x = data & 0x3ff;
366 gpu.screen.src_y = (data >> 10) & 0x1ff;
367 renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
368 if (gpu.frameskip.set) {
369 decide_frameskip_allow(gpu.ex_regs[3]);
370 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
371 decide_frameskip();
372 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
373 }
374 }
375 break;
376 case 0x06:
377 gpu.screen.x1 = data & 0xfff;
378 gpu.screen.x2 = (data >> 12) & 0xfff;
379 update_width();
380 break;
381 case 0x07:
382 gpu.screen.y1 = data & 0x3ff;
383 gpu.screen.y2 = (data >> 10) & 0x3ff;
384 update_height();
385 break;
386 case 0x08:
387 gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
388 update_width();
389 update_height();
390 renderer_notify_res_change();
391 break;
392 default:
393 if ((cmd & 0xf0) == 0x10)
394 get_gpu_info(data);
395 break;
396 }
397
398#ifdef GPUwriteStatus_ext
399 GPUwriteStatus_ext(data);
400#endif
401}
402
403const unsigned char cmd_lengths[256] =
404{
405 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
408 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
409 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
410 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
411 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
412 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
413 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80
414 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
415 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0
416 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
417 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0
418 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
419 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
420 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
421};
422
423#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
424
425static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
426{
427 int i;
428 for (i = 0; i < l; i++)
429 dst[i] = src[i] | msb;
430}
431
432static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
433 int is_read, uint16_t msb)
434{
435 uint16_t *vram = VRAM_MEM_XY(x, y);
436 if (unlikely(is_read))
437 memcpy(mem, vram, l * 2);
438 else if (unlikely(msb))
439 cpy_msb(vram, mem, l, msb);
440 else
441 memcpy(vram, mem, l * 2);
442}
443
444static int do_vram_io(uint32_t *data, int count, int is_read)
445{
446 int count_initial = count;
447 uint16_t msb = gpu.ex_regs[6] << 15;
448 uint16_t *sdata = (uint16_t *)data;
449 int x = gpu.dma.x, y = gpu.dma.y;
450 int w = gpu.dma.w, h = gpu.dma.h;
451 int o = gpu.dma.offset;
452 int l;
453 count *= 2; // operate in 16bpp pixels
454
455 renderer_sync();
456
457 if (gpu.dma.offset) {
458 l = w - gpu.dma.offset;
459 if (count < l)
460 l = count;
461
462 do_vram_line(x + o, y, sdata, l, is_read, msb);
463
464 if (o + l < w)
465 o += l;
466 else {
467 o = 0;
468 y++;
469 h--;
470 }
471 sdata += l;
472 count -= l;
473 }
474
475 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
476 y &= 511;
477 do_vram_line(x, y, sdata, w, is_read, msb);
478 }
479
480 if (h > 0) {
481 if (count > 0) {
482 y &= 511;
483 do_vram_line(x, y, sdata, count, is_read, msb);
484 o = count;
485 count = 0;
486 }
487 }
488 else
489 finish_vram_transfer(is_read);
490 gpu.dma.y = y;
491 gpu.dma.h = h;
492 gpu.dma.offset = o;
493
494 return count_initial - count / 2;
495}
496
497static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
498{
499 if (gpu.dma.h)
500 log_anomaly("start_vram_transfer while old unfinished\n");
501
502 gpu.dma.x = pos_word & 0x3ff;
503 gpu.dma.y = (pos_word >> 16) & 0x1ff;
504 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
505 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
506 gpu.dma.offset = 0;
507 gpu.dma.is_read = is_read;
508 gpu.dma_start = gpu.dma;
509
510 renderer_flush_queues();
511 if (is_read) {
512 gpu.status |= PSX_GPU_STATUS_IMG;
513 // XXX: wrong for width 1
514 gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
515 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
516 }
517
518 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
519 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
520}
521
522static void finish_vram_transfer(int is_read)
523{
524 if (is_read)
525 gpu.status &= ~PSX_GPU_STATUS_IMG;
526 else
527 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
528 gpu.dma_start.w, gpu.dma_start.h, 0);
529}
530
531static void do_vram_copy(const uint32_t *params)
532{
533 const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
534 const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
535 const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
536 const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
537 uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
538 uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
539 uint16_t msb = gpu.ex_regs[6] << 15;
540 uint16_t lbuf[128];
541 uint32_t x, y;
542
543 if (sx == dx && sy == dy && msb == 0)
544 return;
545
546 renderer_flush_queues();
547
548 if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
549 {
550 for (y = 0; y < h; y++)
551 {
552 const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
553 uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
554 for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
555 {
556 uint32_t x1, w1 = w - x;
557 if (w1 > ARRAY_SIZE(lbuf))
558 w1 = ARRAY_SIZE(lbuf);
559 for (x1 = 0; x1 < w1; x1++)
560 lbuf[x1] = src[(sx + x + x1) & 0x3ff];
561 for (x1 = 0; x1 < w1; x1++)
562 dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb;
563 }
564 }
565 }
566 else
567 {
568 uint32_t sy1 = sy, dy1 = dy;
569 for (y = 0; y < h; y++, sy1++, dy1++)
570 memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
571 }
572
573 renderer_update_caches(dx, dy, w, h, 0);
574}
575
576static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
577{
578 int cmd = 0, pos = 0, len, dummy, v;
579 int skip = 1;
580
581 gpu.frameskip.pending_fill[0] = 0;
582
583 while (pos < count && skip) {
584 uint32_t *list = data + pos;
585 cmd = LE32TOH(list[0]) >> 24;
586 len = 1 + cmd_lengths[cmd];
587
588 switch (cmd) {
589 case 0x02:
590 if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
591 // clearing something large, don't skip
592 do_cmd_list(list, 3, &dummy);
593 else
594 memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
595 break;
596 case 0x24 ... 0x27:
597 case 0x2c ... 0x2f:
598 case 0x34 ... 0x37:
599 case 0x3c ... 0x3f:
600 gpu.ex_regs[1] &= ~0x1ff;
601 gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
602 break;
603 case 0x48 ... 0x4F:
604 for (v = 3; pos + v < count; v++)
605 {
606 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
607 break;
608 }
609 len += v - 3;
610 break;
611 case 0x58 ... 0x5F:
612 for (v = 4; pos + v < count; v += 2)
613 {
614 if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
615 break;
616 }
617 len += v - 4;
618 break;
619 default:
620 if (cmd == 0xe3)
621 skip = decide_frameskip_allow(LE32TOH(list[0]));
622 if ((cmd & 0xf8) == 0xe0)
623 gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
624 break;
625 }
626
627 if (pos + len > count) {
628 cmd = -1;
629 break; // incomplete cmd
630 }
631 if (0x80 <= cmd && cmd <= 0xdf)
632 break; // image i/o
633
634 pos += len;
635 }
636
637 renderer_sync_ecmds(gpu.ex_regs);
638 *last_cmd = cmd;
639 return pos;
640}
641
642static noinline int do_cmd_buffer(uint32_t *data, int count)
643{
644 int cmd, pos;
645 uint32_t old_e3 = gpu.ex_regs[3];
646 int vram_dirty = 0;
647
648 // process buffer
649 for (pos = 0; pos < count; )
650 {
651 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
652 vram_dirty = 1;
653 pos += do_vram_io(data + pos, count - pos, 0);
654 if (pos == count)
655 break;
656 }
657
658 cmd = LE32TOH(data[pos]) >> 24;
659 if (0xa0 <= cmd && cmd <= 0xdf) {
660 if (unlikely((pos+2) >= count)) {
661 // incomplete vram write/read cmd, can't consume yet
662 cmd = -1;
663 break;
664 }
665
666 // consume vram write/read cmd
667 start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
668 pos += 3;
669 continue;
670 }
671 else if ((cmd & 0xe0) == 0x80) {
672 if (unlikely((pos+3) >= count)) {
673 cmd = -1; // incomplete cmd, can't consume yet
674 break;
675 }
676 do_vram_copy(data + pos + 1);
677 pos += 4;
678 continue;
679 }
680
681 // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
682 if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
683 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
684 else {
685 pos += do_cmd_list(data + pos, count - pos, &cmd);
686 vram_dirty = 1;
687 }
688
689 if (cmd == -1)
690 // incomplete cmd
691 break;
692 }
693
694 gpu.status &= ~0x1fff;
695 gpu.status |= gpu.ex_regs[1] & 0x7ff;
696 gpu.status |= (gpu.ex_regs[6] & 3) << 11;
697
698 gpu.state.fb_dirty |= vram_dirty;
699
700 if (old_e3 != gpu.ex_regs[3])
701 decide_frameskip_allow(gpu.ex_regs[3]);
702
703 return count - pos;
704}
705
706static void flush_cmd_buffer(void)
707{
708 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
709 if (left > 0)
710 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
711 gpu.cmd_len = left;
712}
713
714void GPUwriteDataMem(uint32_t *mem, int count)
715{
716 int left;
717
718 log_io("gpu_dma_write %p %d\n", mem, count);
719
720 if (unlikely(gpu.cmd_len > 0))
721 flush_cmd_buffer();
722
723 left = do_cmd_buffer(mem, count);
724 if (left)
725 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
726}
727
728void GPUwriteData(uint32_t data)
729{
730 log_io("gpu_write %08x\n", data);
731 gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
732 if (gpu.cmd_len >= CMD_BUFFER_LEN)
733 flush_cmd_buffer();
734}
735
736long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
737{
738 uint32_t addr, *list, ld_addr = 0;
739 int len, left, count;
740 long cpu_cycles = 0;
741
742 preload(rambase + (start_addr & 0x1fffff) / 4);
743
744 if (unlikely(gpu.cmd_len > 0))
745 flush_cmd_buffer();
746
747 log_io("gpu_dma_chain\n");
748 addr = start_addr & 0xffffff;
749 for (count = 0; (addr & 0x800000) == 0; count++)
750 {
751 list = rambase + (addr & 0x1fffff) / 4;
752 len = LE32TOH(list[0]) >> 24;
753 addr = LE32TOH(list[0]) & 0xffffff;
754 preload(rambase + (addr & 0x1fffff) / 4);
755
756 cpu_cycles += 10;
757 if (len > 0)
758 cpu_cycles += 5 + len;
759
760 log_io(".chain %08lx #%d+%d\n",
761 (long)(list - rambase) * 4, len, gpu.cmd_len);
762 if (unlikely(gpu.cmd_len > 0)) {
763 if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
764 log_anomaly("cmd_buffer overflow, likely garbage commands\n");
765 gpu.cmd_len = 0;
766 }
767 memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
768 gpu.cmd_len += len;
769 flush_cmd_buffer();
770 continue;
771 }
772
773 if (len) {
774 left = do_cmd_buffer(list + 1, len);
775 if (left) {
776 memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
777 gpu.cmd_len = left;
778 log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
779 }
780 }
781
782 if (progress_addr) {
783 *progress_addr = addr;
784 break;
785 }
786 #define LD_THRESHOLD (8*1024)
787 if (count >= LD_THRESHOLD) {
788 if (count == LD_THRESHOLD) {
789 ld_addr = addr;
790 continue;
791 }
792
793 // loop detection marker
794 // (bit23 set causes DMA error on real machine, so
795 // unlikely to be ever set by the game)
796 list[0] |= HTOLE32(0x800000);
797 }
798 }
799
800 if (ld_addr != 0) {
801 // remove loop detection markers
802 count -= LD_THRESHOLD + 2;
803 addr = ld_addr & 0x1fffff;
804 while (count-- > 0) {
805 list = rambase + addr / 4;
806 addr = LE32TOH(list[0]) & 0x1fffff;
807 list[0] &= HTOLE32(~0x800000);
808 }
809 }
810
811 gpu.state.last_list.frame = *gpu.state.frame_count;
812 gpu.state.last_list.hcnt = *gpu.state.hcnt;
813 gpu.state.last_list.cycles = cpu_cycles;
814 gpu.state.last_list.addr = start_addr;
815
816 return cpu_cycles;
817}
818
819void GPUreadDataMem(uint32_t *mem, int count)
820{
821 log_io("gpu_dma_read %p %d\n", mem, count);
822
823 if (unlikely(gpu.cmd_len > 0))
824 flush_cmd_buffer();
825
826 if (gpu.dma.h)
827 do_vram_io(mem, count, 1);
828}
829
830uint32_t GPUreadData(void)
831{
832 uint32_t ret;
833
834 if (unlikely(gpu.cmd_len > 0))
835 flush_cmd_buffer();
836
837 ret = gpu.gp0;
838 if (gpu.dma.h) {
839 ret = HTOLE32(ret);
840 do_vram_io(&ret, 1, 1);
841 ret = LE32TOH(ret);
842 }
843
844 log_io("gpu_read %08x\n", ret);
845 return ret;
846}
847
848uint32_t GPUreadStatus(void)
849{
850 uint32_t ret;
851
852 if (unlikely(gpu.cmd_len > 0))
853 flush_cmd_buffer();
854
855 ret = gpu.status;
856 log_io("gpu_read_status %08x\n", ret);
857 return ret;
858}
859
860struct GPUFreeze
861{
862 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
863 uint32_t ulStatus; // current gpu status
864 uint32_t ulControl[256]; // latest control register values
865 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
866};
867
868long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
869{
870 int i;
871
872 switch (type) {
873 case 1: // save
874 if (gpu.cmd_len > 0)
875 flush_cmd_buffer();
876
877 renderer_sync();
878 memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
879 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
880 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
881 freeze->ulStatus = gpu.status;
882 break;
883 case 0: // load
884 renderer_sync();
885 memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
886 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
887 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
888 gpu.status = freeze->ulStatus;
889 gpu.cmd_len = 0;
890 for (i = 8; i > 0; i--) {
891 gpu.regs[i] ^= 1; // avoid reg change detection
892 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
893 }
894 renderer_sync_ecmds(gpu.ex_regs);
895 renderer_update_caches(0, 0, 1024, 512, 1);
896 break;
897 }
898
899 return 1;
900}
901
902void GPUupdateLace(void)
903{
904 if (gpu.cmd_len > 0)
905 flush_cmd_buffer();
906 renderer_flush_queues();
907
908#ifndef RAW_FB_DISPLAY
909 if (gpu.status & PSX_GPU_STATUS_BLANKING) {
910 if (!gpu.state.blanked) {
911 vout_blank();
912 gpu.state.blanked = 1;
913 gpu.state.fb_dirty = 1;
914 }
915 return;
916 }
917
918 renderer_notify_update_lace(0);
919
920 if (!gpu.state.fb_dirty)
921 return;
922#endif
923
924 if (gpu.frameskip.set) {
925 if (!gpu.frameskip.frame_ready) {
926 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
927 return;
928 gpu.frameskip.active = 0;
929 }
930 gpu.frameskip.frame_ready = 0;
931 }
932
933 vout_update();
934 if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
935 renderer_update_caches(0, 0, 1024, 512, 1);
936 gpu.state.enhancement_was_active = gpu.state.enhancement_active;
937 gpu.state.fb_dirty = 0;
938 gpu.state.blanked = 0;
939 renderer_notify_update_lace(1);
940}
941
942void GPUvBlank(int is_vblank, int lcf)
943{
944 int interlace = gpu.state.allow_interlace
945 && (gpu.status & PSX_GPU_STATUS_INTERLACE)
946 && (gpu.status & PSX_GPU_STATUS_DHEIGHT);
947 // interlace doesn't look nice on progressive displays,
948 // so we have this "auto" mode here for games that don't read vram
949 if (gpu.state.allow_interlace == 2
950 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
951 {
952 interlace = 0;
953 }
954 if (interlace || interlace != gpu.state.old_interlace) {
955 gpu.state.old_interlace = interlace;
956
957 if (gpu.cmd_len > 0)
958 flush_cmd_buffer();
959 renderer_flush_queues();
960 renderer_set_interlace(interlace, !lcf);
961 }
962}
963
964void GPUgetScreenInfo(int *y, int *base_hres)
965{
966 *y = gpu.screen.y;
967 *base_hres = gpu.screen.vres;
968 if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
969 *base_hres >>= 1;
970}
971
972#include "../../frontend/plugin_lib.h"
973
974void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
975{
976 gpu.frameskip.set = cbs->frameskip;
977 gpu.frameskip.advice = &cbs->fskip_advice;
978 gpu.frameskip.force = &cbs->fskip_force;
979 gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
980 gpu.frameskip.active = 0;
981 gpu.frameskip.frame_ready = 1;
982 gpu.state.hcnt = cbs->gpu_hcnt;
983 gpu.state.frame_count = cbs->gpu_frame_count;
984 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
985 gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
986 if (gpu.state.screen_centering_type != cbs->screen_centering_type
987 || gpu.state.screen_centering_x != cbs->screen_centering_x
988 || gpu.state.screen_centering_y != cbs->screen_centering_y) {
989 gpu.state.screen_centering_type = cbs->screen_centering_type;
990 gpu.state.screen_centering_x = cbs->screen_centering_x;
991 gpu.state.screen_centering_y = cbs->screen_centering_y;
992 update_width();
993 update_height();
994 }
995
996 gpu.mmap = cbs->mmap;
997 gpu.munmap = cbs->munmap;
998
999 // delayed vram mmap
1000 if (gpu.vram == NULL)
1001 map_vram();
1002
1003 if (cbs->pl_vout_set_raw_vram)
1004 cbs->pl_vout_set_raw_vram(gpu.vram);
1005 renderer_set_config(cbs);
1006 vout_set_config(cbs);
1007}
1008
1009// vim:shiftwidth=2:expandtab