gpulib: make gpulib.a a dependency
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(2048)));
28
29static noinline int do_cmd_buffer(uint32_t *data, int count);
30static void finish_vram_transfer(int is_read);
31
32static noinline void do_cmd_reset(void)
33{
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
36 gpu.cmd_len = 0;
37
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
40 gpu.dma.h = 0;
41}
42
43static noinline void do_reset(void)
44{
45 unsigned int i;
46
47 do_cmd_reset();
48
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
53 gpu.gp0 = 0;
54 gpu.regs[3] = 1;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
57}
58
59static noinline void update_width(void)
60{
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
63 // full width
64 gpu.screen.w = gpu.screen.hres;
65 else
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
67}
68
69static noinline void update_height(void)
70{
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
73 sh *= 2;
74 if (sh <= 0)
75 sh = gpu.screen.vres;
76
77 gpu.screen.h = sh;
78}
79
80static noinline void decide_frameskip(void)
81{
82 if (gpu.frameskip.active)
83 gpu.frameskip.cnt++;
84 else {
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
87 }
88
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
93 else
94 gpu.frameskip.active = 0;
95}
96
97static noinline int decide_frameskip_allow(uint32_t cmd_e3)
98{
99 // no frameskip if it decides to draw to display area,
100 // but not for interlace since it'll most likely always do that
101 uint32_t x = cmd_e3 & 0x3ff;
102 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
103 gpu.frameskip.allow = gpu.status.interlace ||
104 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
105 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
106 return gpu.frameskip.allow;
107}
108
109static noinline void get_gpu_info(uint32_t data)
110{
111 switch (data & 0x0f) {
112 case 0x02:
113 case 0x03:
114 case 0x04:
115 case 0x05:
116 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
117 break;
118 case 0x06:
119 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
120 break;
121 case 0x07:
122 gpu.gp0 = 2;
123 break;
124 default:
125 gpu.gp0 = 0;
126 break;
127 }
128}
129
130long GPUinit(void)
131{
132 int ret;
133 ret = vout_init();
134 ret |= renderer_init();
135
136 gpu.state.frame_count = &gpu.zero;
137 gpu.state.hcnt = &gpu.zero;
138 gpu.frameskip.active = 0;
139 gpu.cmd_len = 0;
140 do_reset();
141
142 return ret;
143}
144
145long GPUshutdown(void)
146{
147 return vout_finish();
148}
149
150void GPUwriteStatus(uint32_t data)
151{
152 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
153 static const short vres[4] = { 240, 480, 256, 480 };
154 uint32_t cmd = data >> 24;
155
156 if (cmd < ARRAY_SIZE(gpu.regs)) {
157 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
158 return;
159 gpu.regs[cmd] = data;
160 }
161
162 gpu.state.fb_dirty = 1;
163
164 switch (cmd) {
165 case 0x00:
166 do_reset();
167 break;
168 case 0x01:
169 do_cmd_reset();
170 break;
171 case 0x03:
172 gpu.status.blanking = data & 1;
173 break;
174 case 0x04:
175 gpu.status.dma = data & 3;
176 break;
177 case 0x05:
178 gpu.screen.x = data & 0x3ff;
179 gpu.screen.y = (data >> 10) & 0x3ff;
180 if (gpu.frameskip.set) {
181 decide_frameskip_allow(gpu.ex_regs[3]);
182 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
183 decide_frameskip();
184 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
185 }
186 }
187 break;
188 case 0x06:
189 gpu.screen.x1 = data & 0xfff;
190 gpu.screen.x2 = (data >> 12) & 0xfff;
191 update_width();
192 break;
193 case 0x07:
194 gpu.screen.y1 = data & 0x3ff;
195 gpu.screen.y2 = (data >> 10) & 0x3ff;
196 update_height();
197 break;
198 case 0x08:
199 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
200 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
201 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
202 update_width();
203 update_height();
204 break;
205 default:
206 if ((cmd & 0xf0) == 0x10)
207 get_gpu_info(data);
208 break;
209 }
210
211#ifdef GPUwriteStatus_ext
212 GPUwriteStatus_ext(data);
213#endif
214}
215
216const unsigned char cmd_lengths[256] =
217{
218 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
221 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
222 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
223 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
224 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
225 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
226 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
227 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
228 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
233 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
234};
235
236#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
237
238static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
239{
240 uint16_t *vram = VRAM_MEM_XY(x, y);
241 if (is_read)
242 memcpy(mem, vram, l * 2);
243 else
244 memcpy(vram, mem, l * 2);
245}
246
247static int do_vram_io(uint32_t *data, int count, int is_read)
248{
249 int count_initial = count;
250 uint16_t *sdata = (uint16_t *)data;
251 int x = gpu.dma.x, y = gpu.dma.y;
252 int w = gpu.dma.w, h = gpu.dma.h;
253 int o = gpu.dma.offset;
254 int l;
255 count *= 2; // operate in 16bpp pixels
256
257 if (gpu.dma.offset) {
258 l = w - gpu.dma.offset;
259 if (count < l)
260 l = count;
261
262 do_vram_line(x + o, y, sdata, l, is_read);
263
264 if (o + l < w)
265 o += l;
266 else {
267 o = 0;
268 y++;
269 h--;
270 }
271 sdata += l;
272 count -= l;
273 }
274
275 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
276 y &= 511;
277 do_vram_line(x, y, sdata, w, is_read);
278 }
279
280 if (h > 0) {
281 if (count > 0) {
282 y &= 511;
283 do_vram_line(x, y, sdata, count, is_read);
284 o = count;
285 count = 0;
286 }
287 }
288 else
289 finish_vram_transfer(is_read);
290 gpu.dma.y = y;
291 gpu.dma.h = h;
292 gpu.dma.offset = o;
293
294 return count_initial - count / 2;
295}
296
297static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
298{
299 if (gpu.dma.h)
300 log_anomaly("start_vram_transfer while old unfinished\n");
301
302 gpu.dma.x = pos_word & 0x3ff;
303 gpu.dma.y = (pos_word >> 16) & 0x1ff;
304 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
305 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
306 gpu.dma.offset = 0;
307 gpu.dma.is_read = is_read;
308 gpu.dma_start = gpu.dma;
309
310 renderer_flush_queues();
311 if (is_read) {
312 gpu.status.img = 1;
313 // XXX: wrong for width 1
314 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
315 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
316 }
317
318 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
319 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
320}
321
322static void finish_vram_transfer(int is_read)
323{
324 if (is_read)
325 gpu.status.img = 0;
326 else
327 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
328 gpu.dma_start.w, gpu.dma_start.h);
329}
330
331static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
332{
333 int cmd = 0, pos = 0, len, dummy;
334 int skip = 1;
335
336 while (pos < count && skip) {
337 uint32_t *list = data + pos;
338 cmd = list[0] >> 24;
339 len = 1 + cmd_lengths[cmd];
340
341 if (cmd == 0x02) {
342 if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
343 // clearing something large, don't skip
344 do_cmd_list(data + pos, 3, &dummy);
345 }
346 else if ((cmd & 0xf4) == 0x24) {
347 // flat textured prim
348 gpu.ex_regs[1] &= ~0x1ff;
349 gpu.ex_regs[1] |= list[4] & 0x1ff;
350 }
351 else if ((cmd & 0xf4) == 0x34) {
352 // shaded textured prim
353 gpu.ex_regs[1] &= ~0x1ff;
354 gpu.ex_regs[1] |= list[5] & 0x1ff;
355 }
356 else if (cmd == 0xe3)
357 skip = decide_frameskip_allow(list[0]);
358
359 if ((cmd & 0xf8) == 0xe0)
360 gpu.ex_regs[cmd & 7] = list[0];
361
362 if (pos + len > count) {
363 cmd = -1;
364 break; // incomplete cmd
365 }
366 if (cmd == 0xa0 || cmd == 0xc0)
367 break; // image i/o
368 pos += len;
369 }
370
371 renderer_sync_ecmds(gpu.ex_regs);
372 *last_cmd = cmd;
373 return pos;
374}
375
376static noinline int do_cmd_buffer(uint32_t *data, int count)
377{
378 int cmd, pos;
379 uint32_t old_e3 = gpu.ex_regs[3];
380 int vram_dirty = 0;
381
382 // process buffer
383 for (pos = 0; pos < count; )
384 {
385 if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
386 vram_dirty = 1;
387 pos += do_vram_io(data + pos, count - pos, 0);
388 if (pos == count)
389 break;
390 }
391
392 cmd = data[pos] >> 24;
393 if (cmd == 0xa0 || cmd == 0xc0) {
394 // consume vram write/read cmd
395 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
396 pos += 3;
397 continue;
398 }
399
400 if (gpu.frameskip.active && gpu.frameskip.allow)
401 pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
402 else {
403 pos += do_cmd_list(data + pos, count - pos, &cmd);
404 vram_dirty = 1;
405 }
406
407 if (cmd == -1)
408 // incomplete cmd
409 break;
410 }
411
412 gpu.status.reg &= ~0x1fff;
413 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
414 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
415
416 gpu.state.fb_dirty |= vram_dirty;
417
418 if (old_e3 != gpu.ex_regs[3])
419 decide_frameskip_allow(gpu.ex_regs[3]);
420
421 return count - pos;
422}
423
424static void flush_cmd_buffer(void)
425{
426 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
427 if (left > 0)
428 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
429 gpu.cmd_len = left;
430}
431
432void GPUwriteDataMem(uint32_t *mem, int count)
433{
434 int left;
435
436 log_io("gpu_dma_write %p %d\n", mem, count);
437
438 if (unlikely(gpu.cmd_len > 0))
439 flush_cmd_buffer();
440
441 left = do_cmd_buffer(mem, count);
442 if (left)
443 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
444}
445
446void GPUwriteData(uint32_t data)
447{
448 log_io("gpu_write %08x\n", data);
449 gpu.cmd_buffer[gpu.cmd_len++] = data;
450 if (gpu.cmd_len >= CMD_BUFFER_LEN)
451 flush_cmd_buffer();
452}
453
454long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
455{
456 uint32_t addr, *list;
457 uint32_t *llist_entry = NULL;
458 int len, left, count;
459 long cpu_cycles = 0;
460
461 if (unlikely(gpu.cmd_len > 0))
462 flush_cmd_buffer();
463
464 // ff7 sends it's main list twice, detect this
465 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
466 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
467 gpu.state.last_list.cycles > 2048)
468 {
469 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
470 *llist_entry |= 0x800000;
471 }
472
473 log_io("gpu_dma_chain\n");
474 addr = start_addr & 0xffffff;
475 for (count = 0; addr != 0xffffff; count++)
476 {
477 list = rambase + (addr & 0x1fffff) / 4;
478 len = list[0] >> 24;
479 addr = list[0] & 0xffffff;
480 cpu_cycles += 10;
481 if (len > 0)
482 cpu_cycles += 5 + len;
483
484 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
485
486 // loop detection marker
487 // (bit23 set causes DMA error on real machine, so
488 // unlikely to be ever set by the game)
489 list[0] |= 0x800000;
490
491 if (len) {
492 left = do_cmd_buffer(list + 1, len);
493 if (left)
494 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
495 }
496
497 if (addr & 0x800000)
498 break;
499 }
500
501 // remove loop detection markers
502 addr = start_addr & 0x1fffff;
503 while (count-- > 0) {
504 list = rambase + addr / 4;
505 addr = list[0] & 0x1fffff;
506 list[0] &= ~0x800000;
507 }
508 if (llist_entry)
509 *llist_entry &= ~0x800000;
510
511 gpu.state.last_list.frame = *gpu.state.frame_count;
512 gpu.state.last_list.hcnt = *gpu.state.hcnt;
513 gpu.state.last_list.cycles = cpu_cycles;
514 gpu.state.last_list.addr = start_addr;
515
516 return cpu_cycles;
517}
518
519void GPUreadDataMem(uint32_t *mem, int count)
520{
521 log_io("gpu_dma_read %p %d\n", mem, count);
522
523 if (unlikely(gpu.cmd_len > 0))
524 flush_cmd_buffer();
525
526 if (gpu.dma.h)
527 do_vram_io(mem, count, 1);
528}
529
530uint32_t GPUreadData(void)
531{
532 uint32_t ret;
533
534 if (unlikely(gpu.cmd_len > 0))
535 flush_cmd_buffer();
536
537 ret = gpu.gp0;
538 if (gpu.dma.h)
539 do_vram_io(&ret, 1, 1);
540
541 log_io("gpu_read %08x\n", ret);
542 return ret;
543}
544
545uint32_t GPUreadStatus(void)
546{
547 uint32_t ret;
548
549 if (unlikely(gpu.cmd_len > 0))
550 flush_cmd_buffer();
551
552 ret = gpu.status.reg;
553 log_io("gpu_read_status %08x\n", ret);
554 return ret;
555}
556
557struct GPUFreeze
558{
559 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
560 uint32_t ulStatus; // current gpu status
561 uint32_t ulControl[256]; // latest control register values
562 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
563};
564
565long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
566{
567 int i;
568
569 switch (type) {
570 case 1: // save
571 if (gpu.cmd_len > 0)
572 flush_cmd_buffer();
573 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
574 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
575 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
576 freeze->ulStatus = gpu.status.reg;
577 break;
578 case 0: // load
579 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
580 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
581 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
582 gpu.status.reg = freeze->ulStatus;
583 for (i = 8; i > 0; i--) {
584 gpu.regs[i] ^= 1; // avoid reg change detection
585 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
586 }
587 renderer_sync_ecmds(gpu.ex_regs);
588 renderer_update_caches(0, 0, 1024, 512);
589 break;
590 }
591
592 return 1;
593}
594
595void GPUupdateLace(void)
596{
597 if (gpu.cmd_len > 0)
598 flush_cmd_buffer();
599 renderer_flush_queues();
600
601 if (gpu.status.blanking || !gpu.state.fb_dirty)
602 return;
603
604 if (gpu.frameskip.set) {
605 if (!gpu.frameskip.frame_ready) {
606 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
607 return;
608 gpu.frameskip.active = 0;
609 }
610 gpu.frameskip.frame_ready = 0;
611 }
612
613 vout_update();
614 gpu.state.fb_dirty = 0;
615}
616
617void GPUvBlank(int is_vblank, int lcf)
618{
619 int interlace = gpu.state.allow_interlace
620 && gpu.status.interlace && gpu.status.dheight;
621 // interlace doesn't look nice on progressive displays,
622 // so we have this "auto" mode here for games that don't read vram
623 if (gpu.state.allow_interlace == 2
624 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
625 {
626 interlace = 0;
627 }
628 if (interlace || interlace != gpu.state.old_interlace) {
629 gpu.state.old_interlace = interlace;
630
631 if (gpu.cmd_len > 0)
632 flush_cmd_buffer();
633 renderer_flush_queues();
634 renderer_set_interlace(interlace, !lcf);
635 }
636}
637
638#include "../../frontend/plugin_lib.h"
639
640void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
641{
642 gpu.frameskip.set = cbs->frameskip;
643 gpu.frameskip.advice = &cbs->fskip_advice;
644 gpu.frameskip.active = 0;
645 gpu.frameskip.frame_ready = 1;
646 gpu.state.hcnt = cbs->gpu_hcnt;
647 gpu.state.frame_count = cbs->gpu_frame_count;
648 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
649
650 if (cbs->pl_vout_set_raw_vram)
651 cbs->pl_vout_set_raw_vram(gpu.vram);
652 renderer_set_config(cbs);
653 vout_set_config(cbs);
654}
655
656// vim:shiftwidth=2:expandtab