frontend: add minimize support
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(2048)));
28
29static noinline int do_cmd_buffer(uint32_t *data, int count);
30
31static noinline void do_cmd_reset(void)
32{
33 if (unlikely(gpu.cmd_len > 0))
34 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
35
36 gpu.cmd_len = 0;
37 gpu.dma.h = 0;
38}
39
40static noinline void do_reset(void)
41{
42 int i;
43
44 do_cmd_reset();
45
46 memset(gpu.regs, 0, sizeof(gpu.regs));
47 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
48 gpu.ex_regs[i] = (0xe0 + i) << 24;
49 gpu.status.reg = 0x14802000;
50 gpu.gp0 = 0;
51 gpu.regs[3] = 1;
52 gpu.screen.hres = gpu.screen.w = 256;
53 gpu.screen.vres = gpu.screen.h = 240;
54}
55
56static noinline void update_width(void)
57{
58 int sw = gpu.screen.x2 - gpu.screen.x1;
59 if (sw <= 0 || sw >= 2560)
60 // full width
61 gpu.screen.w = gpu.screen.hres;
62 else
63 gpu.screen.w = sw * gpu.screen.hres / 2560;
64}
65
66static noinline void update_height(void)
67{
68 int sh = gpu.screen.y2 - gpu.screen.y1;
69 if (gpu.status.dheight)
70 sh *= 2;
71 if (sh <= 0)
72 sh = gpu.screen.vres;
73
74 gpu.screen.h = sh;
75}
76
77static noinline void decide_frameskip(void)
78{
79 if (gpu.frameskip.active)
80 gpu.frameskip.cnt++;
81 else {
82 gpu.frameskip.cnt = 0;
83 gpu.frameskip.frame_ready = 1;
84 }
85
86 if (!gpu.frameskip.active && *gpu.frameskip.advice)
87 gpu.frameskip.active = 1;
88 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
89 gpu.frameskip.active = 1;
90 else
91 gpu.frameskip.active = 0;
92}
93
94static noinline void decide_frameskip_allow(uint32_t cmd_e3)
95{
96 // no frameskip if it decides to draw to display area,
97 // but not for interlace since it'll most likely always do that
98 uint32_t x = cmd_e3 & 0x3ff;
99 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
100 gpu.frameskip.allow = gpu.status.interlace ||
101 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
102 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
103}
104
105static noinline void get_gpu_info(uint32_t data)
106{
107 switch (data & 0x0f) {
108 case 0x02:
109 case 0x03:
110 case 0x04:
111 case 0x05:
112 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
113 break;
114 case 0x06:
115 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
116 break;
117 case 0x07:
118 gpu.gp0 = 2;
119 break;
120 default:
121 gpu.gp0 = 0;
122 break;
123 }
124}
125
126long GPUinit(void)
127{
128 int ret;
129 ret = vout_init();
130 ret |= renderer_init();
131
132 gpu.state.frame_count = &gpu.zero;
133 gpu.state.hcnt = &gpu.zero;
134 gpu.frameskip.active = 0;
135 gpu.cmd_len = 0;
136 do_reset();
137
138 return ret;
139}
140
141long GPUshutdown(void)
142{
143 return vout_finish();
144}
145
146void GPUwriteStatus(uint32_t data)
147{
148 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
149 static const short vres[4] = { 240, 480, 256, 480 };
150 uint32_t cmd = data >> 24;
151
152 if (cmd < ARRAY_SIZE(gpu.regs)) {
153 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
154 return;
155 gpu.regs[cmd] = data;
156 }
157
158 gpu.state.fb_dirty = 1;
159
160 switch (cmd) {
161 case 0x00:
162 do_reset();
163 break;
164 case 0x01:
165 do_cmd_reset();
166 break;
167 case 0x03:
168 gpu.status.blanking = data & 1;
169 break;
170 case 0x04:
171 gpu.status.dma = data & 3;
172 break;
173 case 0x05:
174 gpu.screen.x = data & 0x3ff;
175 gpu.screen.y = (data >> 10) & 0x3ff;
176 if (gpu.frameskip.set) {
177 decide_frameskip_allow(gpu.ex_regs[3]);
178 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
179 decide_frameskip();
180 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
181 }
182 }
183 break;
184 case 0x06:
185 gpu.screen.x1 = data & 0xfff;
186 gpu.screen.x2 = (data >> 12) & 0xfff;
187 update_width();
188 break;
189 case 0x07:
190 gpu.screen.y1 = data & 0x3ff;
191 gpu.screen.y2 = (data >> 10) & 0x3ff;
192 update_height();
193 break;
194 case 0x08:
195 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
196 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
197 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
198 update_width();
199 update_height();
200 break;
201 default:
202 if ((cmd & 0xf0) == 0x10)
203 get_gpu_info(data);
204 break;
205 }
206}
207
208const unsigned char cmd_lengths[256] =
209{
210 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
213 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
214 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
215 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
216 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
217 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
218 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
221 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
226};
227
228#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
229
230static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
231{
232 uint16_t *vram = VRAM_MEM_XY(x, y);
233 if (is_read)
234 memcpy(mem, vram, l * 2);
235 else
236 memcpy(vram, mem, l * 2);
237}
238
239static int do_vram_io(uint32_t *data, int count, int is_read)
240{
241 int count_initial = count;
242 uint16_t *sdata = (uint16_t *)data;
243 int x = gpu.dma.x, y = gpu.dma.y;
244 int w = gpu.dma.w, h = gpu.dma.h;
245 int o = gpu.dma.offset;
246 int l;
247 count *= 2; // operate in 16bpp pixels
248
249 if (gpu.dma.offset) {
250 l = w - gpu.dma.offset;
251 if (count < l)
252 l = count;
253
254 do_vram_line(x + o, y, sdata, l, is_read);
255
256 if (o + l < w)
257 o += l;
258 else {
259 o = 0;
260 y++;
261 h--;
262 }
263 sdata += l;
264 count -= l;
265 }
266
267 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
268 y &= 511;
269 do_vram_line(x, y, sdata, w, is_read);
270 }
271
272 if (h > 0 && count > 0) {
273 y &= 511;
274 do_vram_line(x, y, sdata, count, is_read);
275 o = count;
276 count = 0;
277 }
278 gpu.dma.y = y;
279 gpu.dma.h = h;
280 gpu.dma.offset = o;
281
282 return count_initial - count / 2;
283}
284
285static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
286{
287 if (gpu.dma.h)
288 log_anomaly("start_vram_transfer while old unfinished\n");
289
290 gpu.dma.x = pos_word & 0x3ff;
291 gpu.dma.y = (pos_word >> 16) & 0x1ff;
292 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
293 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
294 gpu.dma.offset = 0;
295
296 renderer_flush_queues();
297 if (is_read) {
298 gpu.status.img = 1;
299 // XXX: wrong for width 1
300 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
301 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
302 }
303 else {
304 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
305 }
306
307 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
308 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
309}
310
311static noinline int do_cmd_buffer(uint32_t *data, int count)
312{
313 int len, cmd, start, pos;
314 int vram_dirty = 0;
315
316 // process buffer
317 for (start = pos = 0; pos < count; )
318 {
319 cmd = -1;
320 len = 0;
321
322 if (gpu.dma.h) {
323 pos += do_vram_io(data + pos, count - pos, 0);
324 if (pos == count)
325 break;
326 start = pos;
327 }
328
329 // do look-ahead pass to detect SR changes and VRAM i/o
330 while (pos < count) {
331 uint32_t *list = data + pos;
332 cmd = list[0] >> 24;
333 len = 1 + cmd_lengths[cmd];
334
335 //printf(" %3d: %02x %d\n", pos, cmd, len);
336 if ((cmd & 0xf4) == 0x24) {
337 // flat textured prim
338 gpu.ex_regs[1] &= ~0x1ff;
339 gpu.ex_regs[1] |= list[4] & 0x1ff;
340 }
341 else if ((cmd & 0xf4) == 0x34) {
342 // shaded textured prim
343 gpu.ex_regs[1] &= ~0x1ff;
344 gpu.ex_regs[1] |= list[5] & 0x1ff;
345 }
346 else if (cmd == 0xe3)
347 decide_frameskip_allow(list[0]);
348
349 if (2 <= cmd && cmd < 0xc0)
350 vram_dirty = 1;
351 else if ((cmd & 0xf8) == 0xe0)
352 gpu.ex_regs[cmd & 7] = list[0];
353
354 if (pos + len > count) {
355 cmd = -1;
356 break; // incomplete cmd
357 }
358 if (cmd == 0xa0 || cmd == 0xc0)
359 break; // image i/o
360 pos += len;
361 }
362
363 if (pos - start > 0) {
364 if (!gpu.frameskip.active || !gpu.frameskip.allow)
365 do_cmd_list(data + start, pos - start);
366 start = pos;
367 }
368
369 if (cmd == 0xa0 || cmd == 0xc0) {
370 // consume vram write/read cmd
371 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
372 pos += len;
373 }
374 else if (cmd == -1)
375 break;
376 }
377
378 gpu.status.reg &= ~0x1fff;
379 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
380 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
381
382 if (gpu.frameskip.active)
383 renderer_sync_ecmds(gpu.ex_regs);
384 gpu.state.fb_dirty |= vram_dirty;
385
386 return count - pos;
387}
388
389static void flush_cmd_buffer(void)
390{
391 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
392 if (left > 0)
393 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
394 gpu.cmd_len = left;
395}
396
397void GPUwriteDataMem(uint32_t *mem, int count)
398{
399 int left;
400
401 log_io("gpu_dma_write %p %d\n", mem, count);
402
403 if (unlikely(gpu.cmd_len > 0))
404 flush_cmd_buffer();
405
406 left = do_cmd_buffer(mem, count);
407 if (left)
408 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
409}
410
411void GPUwriteData(uint32_t data)
412{
413 log_io("gpu_write %08x\n", data);
414 gpu.cmd_buffer[gpu.cmd_len++] = data;
415 if (gpu.cmd_len >= CMD_BUFFER_LEN)
416 flush_cmd_buffer();
417}
418
419long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
420{
421 uint32_t addr, *list;
422 uint32_t *llist_entry = NULL;
423 int len, left, count;
424 long cpu_cycles = 0;
425
426 if (unlikely(gpu.cmd_len > 0))
427 flush_cmd_buffer();
428
429 // ff7 sends it's main list twice, detect this
430 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
431 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
432 gpu.state.last_list.cycles > 2048)
433 {
434 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
435 *llist_entry |= 0x800000;
436 }
437
438 log_io("gpu_dma_chain\n");
439 addr = start_addr & 0xffffff;
440 for (count = 0; addr != 0xffffff; count++)
441 {
442 list = rambase + (addr & 0x1fffff) / 4;
443 len = list[0] >> 24;
444 addr = list[0] & 0xffffff;
445 cpu_cycles += 10;
446 if (len > 0)
447 cpu_cycles += 5 + len;
448
449 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
450
451 // loop detection marker
452 // (bit23 set causes DMA error on real machine, so
453 // unlikely to be ever set by the game)
454 list[0] |= 0x800000;
455
456 if (len) {
457 left = do_cmd_buffer(list + 1, len);
458 if (left)
459 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
460 }
461
462 if (addr & 0x800000)
463 break;
464 }
465
466 // remove loop detection markers
467 addr = start_addr & 0x1fffff;
468 while (count-- > 0) {
469 list = rambase + addr / 4;
470 addr = list[0] & 0x1fffff;
471 list[0] &= ~0x800000;
472 }
473 if (llist_entry)
474 *llist_entry &= ~0x800000;
475
476 gpu.state.last_list.frame = *gpu.state.frame_count;
477 gpu.state.last_list.hcnt = *gpu.state.hcnt;
478 gpu.state.last_list.cycles = cpu_cycles;
479 gpu.state.last_list.addr = start_addr;
480
481 return cpu_cycles;
482}
483
484void GPUreadDataMem(uint32_t *mem, int count)
485{
486 log_io("gpu_dma_read %p %d\n", mem, count);
487
488 if (unlikely(gpu.cmd_len > 0))
489 flush_cmd_buffer();
490
491 if (gpu.dma.h)
492 do_vram_io(mem, count, 1);
493}
494
495uint32_t GPUreadData(void)
496{
497 uint32_t ret;
498
499 if (unlikely(gpu.cmd_len > 0))
500 flush_cmd_buffer();
501
502 ret = gpu.gp0;
503 if (gpu.dma.h)
504 do_vram_io(&ret, 1, 1);
505
506 log_io("gpu_read %08x\n", ret);
507 return ret;
508}
509
510uint32_t GPUreadStatus(void)
511{
512 uint32_t ret;
513
514 if (unlikely(gpu.cmd_len > 0))
515 flush_cmd_buffer();
516
517 ret = gpu.status.reg;
518 log_io("gpu_read_status %08x\n", ret);
519 return ret;
520}
521
522struct GPUFreeze
523{
524 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
525 uint32_t ulStatus; // current gpu status
526 uint32_t ulControl[256]; // latest control register values
527 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
528};
529
530long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
531{
532 int i;
533
534 switch (type) {
535 case 1: // save
536 if (gpu.cmd_len > 0)
537 flush_cmd_buffer();
538 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
539 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
540 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
541 freeze->ulStatus = gpu.status.reg;
542 break;
543 case 0: // load
544 renderer_invalidate_caches(0, 0, 1024, 512);
545 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
546 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
547 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
548 gpu.status.reg = freeze->ulStatus;
549 for (i = 8; i > 0; i--) {
550 gpu.regs[i] ^= 1; // avoid reg change detection
551 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
552 }
553 renderer_sync_ecmds(gpu.ex_regs);
554 break;
555 }
556
557 return 1;
558}
559
560void GPUupdateLace(void)
561{
562 if (gpu.cmd_len > 0)
563 flush_cmd_buffer();
564 renderer_flush_queues();
565
566 if (gpu.status.blanking || !gpu.state.fb_dirty)
567 return;
568
569 if (gpu.frameskip.set) {
570 if (!gpu.frameskip.frame_ready) {
571 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
572 return;
573 gpu.frameskip.active = 0;
574 }
575 gpu.frameskip.frame_ready = 0;
576 }
577
578 vout_update();
579 gpu.state.fb_dirty = 0;
580}
581
582void GPUvBlank(int is_vblank, int lcf)
583{
584 int interlace = gpu.state.allow_interlace
585 && gpu.status.interlace && gpu.status.dheight;
586 // interlace doesn't look nice on progressive displays,
587 // so we have this "auto" mode here for games that don't read vram
588 if (gpu.state.allow_interlace == 2
589 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
590 {
591 interlace = 0;
592 }
593 if (interlace || interlace != gpu.state.old_interlace) {
594 gpu.state.old_interlace = interlace;
595
596 if (gpu.cmd_len > 0)
597 flush_cmd_buffer();
598 renderer_flush_queues();
599 renderer_set_interlace(interlace, !lcf);
600 }
601}
602
603#include "../../frontend/plugin_lib.h"
604
605void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
606{
607 gpu.frameskip.set = cbs->frameskip;
608 gpu.frameskip.advice = &cbs->fskip_advice;
609 gpu.frameskip.active = 0;
610 gpu.frameskip.frame_ready = 1;
611 gpu.state.hcnt = cbs->gpu_hcnt;
612 gpu.state.frame_count = cbs->gpu_frame_count;
613 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
614
615 if (cbs->pl_vout_set_raw_vram)
616 cbs->pl_vout_set_raw_vram(gpu.vram);
617 renderer_set_config(cbs);
618 vout_set_config(cbs);
619}
620
621// vim:shiftwidth=2:expandtab