gpu-gles: schtruck/fpse merge: remove windows code
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(2048)));
28
29static noinline int do_cmd_buffer(uint32_t *data, int count);
30static void finish_vram_transfer(int is_read);
31
32static noinline void do_cmd_reset(void)
33{
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
36 gpu.cmd_len = 0;
37
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
40 gpu.dma.h = 0;
41}
42
43static noinline void do_reset(void)
44{
45 unsigned int i;
46
47 do_cmd_reset();
48
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
53 gpu.gp0 = 0;
54 gpu.regs[3] = 1;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
57}
58
59static noinline void update_width(void)
60{
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
63 // full width
64 gpu.screen.w = gpu.screen.hres;
65 else
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
67}
68
69static noinline void update_height(void)
70{
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
73 sh *= 2;
74 if (sh <= 0)
75 sh = gpu.screen.vres;
76
77 gpu.screen.h = sh;
78}
79
80static noinline void decide_frameskip(void)
81{
82 if (gpu.frameskip.active)
83 gpu.frameskip.cnt++;
84 else {
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
87 }
88
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
93 else
94 gpu.frameskip.active = 0;
95}
96
97static noinline void decide_frameskip_allow(uint32_t cmd_e3)
98{
99 // no frameskip if it decides to draw to display area,
100 // but not for interlace since it'll most likely always do that
101 uint32_t x = cmd_e3 & 0x3ff;
102 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
103 gpu.frameskip.allow = gpu.status.interlace ||
104 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
105 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
106}
107
108static noinline void get_gpu_info(uint32_t data)
109{
110 switch (data & 0x0f) {
111 case 0x02:
112 case 0x03:
113 case 0x04:
114 case 0x05:
115 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
116 break;
117 case 0x06:
118 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
119 break;
120 case 0x07:
121 gpu.gp0 = 2;
122 break;
123 default:
124 gpu.gp0 = 0;
125 break;
126 }
127}
128
129long GPUinit(void)
130{
131 int ret;
132 ret = vout_init();
133 ret |= renderer_init();
134
135 gpu.state.frame_count = &gpu.zero;
136 gpu.state.hcnt = &gpu.zero;
137 gpu.frameskip.active = 0;
138 gpu.cmd_len = 0;
139 do_reset();
140
141 return ret;
142}
143
144long GPUshutdown(void)
145{
146 return vout_finish();
147}
148
149void GPUwriteStatus(uint32_t data)
150{
151 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
152 static const short vres[4] = { 240, 480, 256, 480 };
153 uint32_t cmd = data >> 24;
154
155 if (cmd < ARRAY_SIZE(gpu.regs)) {
156 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
157 return;
158 gpu.regs[cmd] = data;
159 }
160
161 gpu.state.fb_dirty = 1;
162
163 switch (cmd) {
164 case 0x00:
165 do_reset();
166 break;
167 case 0x01:
168 do_cmd_reset();
169 break;
170 case 0x03:
171 gpu.status.blanking = data & 1;
172 break;
173 case 0x04:
174 gpu.status.dma = data & 3;
175 break;
176 case 0x05:
177 gpu.screen.x = data & 0x3ff;
178 gpu.screen.y = (data >> 10) & 0x3ff;
179 if (gpu.frameskip.set) {
180 decide_frameskip_allow(gpu.ex_regs[3]);
181 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
182 decide_frameskip();
183 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
184 }
185 }
186 break;
187 case 0x06:
188 gpu.screen.x1 = data & 0xfff;
189 gpu.screen.x2 = (data >> 12) & 0xfff;
190 update_width();
191 break;
192 case 0x07:
193 gpu.screen.y1 = data & 0x3ff;
194 gpu.screen.y2 = (data >> 10) & 0x3ff;
195 update_height();
196 break;
197 case 0x08:
198 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
199 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
200 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
201 update_width();
202 update_height();
203 break;
204 default:
205 if ((cmd & 0xf0) == 0x10)
206 get_gpu_info(data);
207 break;
208 }
209}
210
211const unsigned char cmd_lengths[256] =
212{
213 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
215 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
216 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
217 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
218 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
219 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
220 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
221 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
222 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
223 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
226 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
227 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
228 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
229};
230
231#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
232
233static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
234{
235 uint16_t *vram = VRAM_MEM_XY(x, y);
236 if (is_read)
237 memcpy(mem, vram, l * 2);
238 else
239 memcpy(vram, mem, l * 2);
240}
241
242static int do_vram_io(uint32_t *data, int count, int is_read)
243{
244 int count_initial = count;
245 uint16_t *sdata = (uint16_t *)data;
246 int x = gpu.dma.x, y = gpu.dma.y;
247 int w = gpu.dma.w, h = gpu.dma.h;
248 int o = gpu.dma.offset;
249 int l;
250 count *= 2; // operate in 16bpp pixels
251
252 if (gpu.dma.offset) {
253 l = w - gpu.dma.offset;
254 if (count < l)
255 l = count;
256
257 do_vram_line(x + o, y, sdata, l, is_read);
258
259 if (o + l < w)
260 o += l;
261 else {
262 o = 0;
263 y++;
264 h--;
265 }
266 sdata += l;
267 count -= l;
268 }
269
270 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
271 y &= 511;
272 do_vram_line(x, y, sdata, w, is_read);
273 }
274
275 if (h > 0) {
276 if (count > 0) {
277 y &= 511;
278 do_vram_line(x, y, sdata, count, is_read);
279 o = count;
280 count = 0;
281 }
282 }
283 else
284 finish_vram_transfer(is_read);
285 gpu.dma.y = y;
286 gpu.dma.h = h;
287 gpu.dma.offset = o;
288
289 return count_initial - count / 2;
290}
291
292static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
293{
294 if (gpu.dma.h)
295 log_anomaly("start_vram_transfer while old unfinished\n");
296
297 gpu.dma.x = pos_word & 0x3ff;
298 gpu.dma.y = (pos_word >> 16) & 0x1ff;
299 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
300 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
301 gpu.dma.offset = 0;
302 gpu.dma.is_read = is_read;
303 gpu.dma_start = gpu.dma;
304
305 renderer_flush_queues();
306 if (is_read) {
307 gpu.status.img = 1;
308 // XXX: wrong for width 1
309 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
310 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
311 }
312
313 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
314 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
315}
316
317static void finish_vram_transfer(int is_read)
318{
319 if (is_read)
320 gpu.status.img = 0;
321 else
322 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
323 gpu.dma_start.w, gpu.dma_start.h);
324}
325
326static noinline int do_cmd_buffer(uint32_t *data, int count)
327{
328 int len, cmd, start, pos;
329 int vram_dirty = 0;
330
331 // process buffer
332 for (start = pos = 0; pos < count; )
333 {
334 cmd = -1;
335 len = 0;
336
337 if (gpu.dma.h) {
338 pos += do_vram_io(data + pos, count - pos, 0);
339 if (pos == count)
340 break;
341 start = pos;
342 }
343
344 // do look-ahead pass to detect SR changes and VRAM i/o
345 while (pos < count) {
346 uint32_t *list = data + pos;
347 cmd = list[0] >> 24;
348 len = 1 + cmd_lengths[cmd];
349
350 //printf(" %3d: %02x %d\n", pos, cmd, len);
351 if ((cmd & 0xf4) == 0x24) {
352 // flat textured prim
353 gpu.ex_regs[1] &= ~0x1ff;
354 gpu.ex_regs[1] |= list[4] & 0x1ff;
355 }
356 else if ((cmd & 0xf4) == 0x34) {
357 // shaded textured prim
358 gpu.ex_regs[1] &= ~0x1ff;
359 gpu.ex_regs[1] |= list[5] & 0x1ff;
360 }
361 else if (cmd == 0xe3)
362 decide_frameskip_allow(list[0]);
363
364 if (2 <= cmd && cmd < 0xc0)
365 vram_dirty = 1;
366 else if ((cmd & 0xf8) == 0xe0)
367 gpu.ex_regs[cmd & 7] = list[0];
368
369 if (pos + len > count) {
370 cmd = -1;
371 break; // incomplete cmd
372 }
373 if (cmd == 0xa0 || cmd == 0xc0)
374 break; // image i/o
375 pos += len;
376 }
377
378 if (pos - start > 0) {
379 if (!gpu.frameskip.active || !gpu.frameskip.allow)
380 do_cmd_list(data + start, pos - start);
381 start = pos;
382 }
383
384 if (cmd == 0xa0 || cmd == 0xc0) {
385 // consume vram write/read cmd
386 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
387 pos += len;
388 }
389 else if (cmd == -1)
390 break;
391 }
392
393 gpu.status.reg &= ~0x1fff;
394 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
395 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
396
397 if (gpu.frameskip.active)
398 renderer_sync_ecmds(gpu.ex_regs);
399 gpu.state.fb_dirty |= vram_dirty;
400
401 return count - pos;
402}
403
404static void flush_cmd_buffer(void)
405{
406 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
407 if (left > 0)
408 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
409 gpu.cmd_len = left;
410}
411
412void GPUwriteDataMem(uint32_t *mem, int count)
413{
414 int left;
415
416 log_io("gpu_dma_write %p %d\n", mem, count);
417
418 if (unlikely(gpu.cmd_len > 0))
419 flush_cmd_buffer();
420
421 left = do_cmd_buffer(mem, count);
422 if (left)
423 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
424}
425
426void GPUwriteData(uint32_t data)
427{
428 log_io("gpu_write %08x\n", data);
429 gpu.cmd_buffer[gpu.cmd_len++] = data;
430 if (gpu.cmd_len >= CMD_BUFFER_LEN)
431 flush_cmd_buffer();
432}
433
434long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
435{
436 uint32_t addr, *list;
437 uint32_t *llist_entry = NULL;
438 int len, left, count;
439 long cpu_cycles = 0;
440
441 if (unlikely(gpu.cmd_len > 0))
442 flush_cmd_buffer();
443
444 // ff7 sends it's main list twice, detect this
445 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
446 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
447 gpu.state.last_list.cycles > 2048)
448 {
449 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
450 *llist_entry |= 0x800000;
451 }
452
453 log_io("gpu_dma_chain\n");
454 addr = start_addr & 0xffffff;
455 for (count = 0; addr != 0xffffff; count++)
456 {
457 list = rambase + (addr & 0x1fffff) / 4;
458 len = list[0] >> 24;
459 addr = list[0] & 0xffffff;
460 cpu_cycles += 10;
461 if (len > 0)
462 cpu_cycles += 5 + len;
463
464 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
465
466 // loop detection marker
467 // (bit23 set causes DMA error on real machine, so
468 // unlikely to be ever set by the game)
469 list[0] |= 0x800000;
470
471 if (len) {
472 left = do_cmd_buffer(list + 1, len);
473 if (left)
474 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
475 }
476
477 if (addr & 0x800000)
478 break;
479 }
480
481 // remove loop detection markers
482 addr = start_addr & 0x1fffff;
483 while (count-- > 0) {
484 list = rambase + addr / 4;
485 addr = list[0] & 0x1fffff;
486 list[0] &= ~0x800000;
487 }
488 if (llist_entry)
489 *llist_entry &= ~0x800000;
490
491 gpu.state.last_list.frame = *gpu.state.frame_count;
492 gpu.state.last_list.hcnt = *gpu.state.hcnt;
493 gpu.state.last_list.cycles = cpu_cycles;
494 gpu.state.last_list.addr = start_addr;
495
496 return cpu_cycles;
497}
498
499void GPUreadDataMem(uint32_t *mem, int count)
500{
501 log_io("gpu_dma_read %p %d\n", mem, count);
502
503 if (unlikely(gpu.cmd_len > 0))
504 flush_cmd_buffer();
505
506 if (gpu.dma.h)
507 do_vram_io(mem, count, 1);
508}
509
510uint32_t GPUreadData(void)
511{
512 uint32_t ret;
513
514 if (unlikely(gpu.cmd_len > 0))
515 flush_cmd_buffer();
516
517 ret = gpu.gp0;
518 if (gpu.dma.h)
519 do_vram_io(&ret, 1, 1);
520
521 log_io("gpu_read %08x\n", ret);
522 return ret;
523}
524
525uint32_t GPUreadStatus(void)
526{
527 uint32_t ret;
528
529 if (unlikely(gpu.cmd_len > 0))
530 flush_cmd_buffer();
531
532 ret = gpu.status.reg;
533 log_io("gpu_read_status %08x\n", ret);
534 return ret;
535}
536
537struct GPUFreeze
538{
539 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
540 uint32_t ulStatus; // current gpu status
541 uint32_t ulControl[256]; // latest control register values
542 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
543};
544
545long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
546{
547 int i;
548
549 switch (type) {
550 case 1: // save
551 if (gpu.cmd_len > 0)
552 flush_cmd_buffer();
553 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
554 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
555 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
556 freeze->ulStatus = gpu.status.reg;
557 break;
558 case 0: // load
559 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
560 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
561 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
562 gpu.status.reg = freeze->ulStatus;
563 for (i = 8; i > 0; i--) {
564 gpu.regs[i] ^= 1; // avoid reg change detection
565 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
566 }
567 renderer_sync_ecmds(gpu.ex_regs);
568 renderer_update_caches(0, 0, 1024, 512);
569 break;
570 }
571
572 return 1;
573}
574
575void GPUupdateLace(void)
576{
577 if (gpu.cmd_len > 0)
578 flush_cmd_buffer();
579 renderer_flush_queues();
580
581 if (gpu.status.blanking || !gpu.state.fb_dirty)
582 return;
583
584 if (gpu.frameskip.set) {
585 if (!gpu.frameskip.frame_ready) {
586 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
587 return;
588 gpu.frameskip.active = 0;
589 }
590 gpu.frameskip.frame_ready = 0;
591 }
592
593 vout_update();
594 gpu.state.fb_dirty = 0;
595}
596
597void GPUvBlank(int is_vblank, int lcf)
598{
599 int interlace = gpu.state.allow_interlace
600 && gpu.status.interlace && gpu.status.dheight;
601 // interlace doesn't look nice on progressive displays,
602 // so we have this "auto" mode here for games that don't read vram
603 if (gpu.state.allow_interlace == 2
604 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
605 {
606 interlace = 0;
607 }
608 if (interlace || interlace != gpu.state.old_interlace) {
609 gpu.state.old_interlace = interlace;
610
611 if (gpu.cmd_len > 0)
612 flush_cmd_buffer();
613 renderer_flush_queues();
614 renderer_set_interlace(interlace, !lcf);
615 }
616}
617
618#include "../../frontend/plugin_lib.h"
619
620void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
621{
622 gpu.frameskip.set = cbs->frameskip;
623 gpu.frameskip.advice = &cbs->fskip_advice;
624 gpu.frameskip.active = 0;
625 gpu.frameskip.frame_ready = 1;
626 gpu.state.hcnt = cbs->gpu_hcnt;
627 gpu.state.frame_count = cbs->gpu_frame_count;
628 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
629
630 if (cbs->pl_vout_set_raw_vram)
631 cbs->pl_vout_set_raw_vram(gpu.vram);
632 renderer_set_config(cbs);
633 vout_set_config(cbs);
634}
635
636// vim:shiftwidth=2:expandtab