gpu-gles: remove scissor test disable on fills
[pcsx_rearmed.git] / plugins / gpulib / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011-2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(2048)));
28
29static noinline int do_cmd_buffer(uint32_t *data, int count);
30static void finish_vram_transfer(int is_read);
31
32static noinline void do_cmd_reset(void)
33{
34 if (unlikely(gpu.cmd_len > 0))
35 do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
36 gpu.cmd_len = 0;
37
38 if (unlikely(gpu.dma.h > 0))
39 finish_vram_transfer(gpu.dma_start.is_read);
40 gpu.dma.h = 0;
41}
42
43static noinline void do_reset(void)
44{
45 unsigned int i;
46
47 do_cmd_reset();
48
49 memset(gpu.regs, 0, sizeof(gpu.regs));
50 for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
51 gpu.ex_regs[i] = (0xe0 + i) << 24;
52 gpu.status.reg = 0x14802000;
53 gpu.gp0 = 0;
54 gpu.regs[3] = 1;
55 gpu.screen.hres = gpu.screen.w = 256;
56 gpu.screen.vres = gpu.screen.h = 240;
57}
58
59static noinline void update_width(void)
60{
61 int sw = gpu.screen.x2 - gpu.screen.x1;
62 if (sw <= 0 || sw >= 2560)
63 // full width
64 gpu.screen.w = gpu.screen.hres;
65 else
66 gpu.screen.w = sw * gpu.screen.hres / 2560;
67}
68
69static noinline void update_height(void)
70{
71 int sh = gpu.screen.y2 - gpu.screen.y1;
72 if (gpu.status.dheight)
73 sh *= 2;
74 if (sh <= 0)
75 sh = gpu.screen.vres;
76
77 gpu.screen.h = sh;
78}
79
80static noinline void decide_frameskip(void)
81{
82 if (gpu.frameskip.active)
83 gpu.frameskip.cnt++;
84 else {
85 gpu.frameskip.cnt = 0;
86 gpu.frameskip.frame_ready = 1;
87 }
88
89 if (!gpu.frameskip.active && *gpu.frameskip.advice)
90 gpu.frameskip.active = 1;
91 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
92 gpu.frameskip.active = 1;
93 else
94 gpu.frameskip.active = 0;
95}
96
97static noinline void decide_frameskip_allow(uint32_t cmd_e3)
98{
99 // no frameskip if it decides to draw to display area,
100 // but not for interlace since it'll most likely always do that
101 uint32_t x = cmd_e3 & 0x3ff;
102 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
103 gpu.frameskip.allow = gpu.status.interlace ||
104 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
105 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
106}
107
108static noinline void get_gpu_info(uint32_t data)
109{
110 switch (data & 0x0f) {
111 case 0x02:
112 case 0x03:
113 case 0x04:
114 case 0x05:
115 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
116 break;
117 case 0x06:
118 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
119 break;
120 case 0x07:
121 gpu.gp0 = 2;
122 break;
123 default:
124 gpu.gp0 = 0;
125 break;
126 }
127}
128
129long GPUinit(void)
130{
131 int ret;
132 ret = vout_init();
133 ret |= renderer_init();
134
135 gpu.state.frame_count = &gpu.zero;
136 gpu.state.hcnt = &gpu.zero;
137 gpu.frameskip.active = 0;
138 gpu.cmd_len = 0;
139 do_reset();
140
141 return ret;
142}
143
144long GPUshutdown(void)
145{
146 return vout_finish();
147}
148
149void GPUwriteStatus(uint32_t data)
150{
151 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
152 static const short vres[4] = { 240, 480, 256, 480 };
153 uint32_t cmd = data >> 24;
154
155 if (cmd < ARRAY_SIZE(gpu.regs)) {
156 if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
157 return;
158 gpu.regs[cmd] = data;
159 }
160
161 gpu.state.fb_dirty = 1;
162
163 switch (cmd) {
164 case 0x00:
165 do_reset();
166 break;
167 case 0x01:
168 do_cmd_reset();
169 break;
170 case 0x03:
171 gpu.status.blanking = data & 1;
172 break;
173 case 0x04:
174 gpu.status.dma = data & 3;
175 break;
176 case 0x05:
177 gpu.screen.x = data & 0x3ff;
178 gpu.screen.y = (data >> 10) & 0x3ff;
179 if (gpu.frameskip.set) {
180 decide_frameskip_allow(gpu.ex_regs[3]);
181 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
182 decide_frameskip();
183 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
184 }
185 }
186 break;
187 case 0x06:
188 gpu.screen.x1 = data & 0xfff;
189 gpu.screen.x2 = (data >> 12) & 0xfff;
190 update_width();
191 break;
192 case 0x07:
193 gpu.screen.y1 = data & 0x3ff;
194 gpu.screen.y2 = (data >> 10) & 0x3ff;
195 update_height();
196 break;
197 case 0x08:
198 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
199 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
200 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
201 update_width();
202 update_height();
203 break;
204 default:
205 if ((cmd & 0xf0) == 0x10)
206 get_gpu_info(data);
207 break;
208 }
209
210#ifdef GPUwriteStatus_ext
211 GPUwriteStatus_ext(data);
212#endif
213}
214
215const unsigned char cmd_lengths[256] =
216{
217 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
218 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
220 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
221 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
222 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
223 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
224 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
225 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
226 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
227 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
228 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
229 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
230 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
233};
234
235#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
236
237static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
238{
239 uint16_t *vram = VRAM_MEM_XY(x, y);
240 if (is_read)
241 memcpy(mem, vram, l * 2);
242 else
243 memcpy(vram, mem, l * 2);
244}
245
246static int do_vram_io(uint32_t *data, int count, int is_read)
247{
248 int count_initial = count;
249 uint16_t *sdata = (uint16_t *)data;
250 int x = gpu.dma.x, y = gpu.dma.y;
251 int w = gpu.dma.w, h = gpu.dma.h;
252 int o = gpu.dma.offset;
253 int l;
254 count *= 2; // operate in 16bpp pixels
255
256 if (gpu.dma.offset) {
257 l = w - gpu.dma.offset;
258 if (count < l)
259 l = count;
260
261 do_vram_line(x + o, y, sdata, l, is_read);
262
263 if (o + l < w)
264 o += l;
265 else {
266 o = 0;
267 y++;
268 h--;
269 }
270 sdata += l;
271 count -= l;
272 }
273
274 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
275 y &= 511;
276 do_vram_line(x, y, sdata, w, is_read);
277 }
278
279 if (h > 0) {
280 if (count > 0) {
281 y &= 511;
282 do_vram_line(x, y, sdata, count, is_read);
283 o = count;
284 count = 0;
285 }
286 }
287 else
288 finish_vram_transfer(is_read);
289 gpu.dma.y = y;
290 gpu.dma.h = h;
291 gpu.dma.offset = o;
292
293 return count_initial - count / 2;
294}
295
296static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
297{
298 if (gpu.dma.h)
299 log_anomaly("start_vram_transfer while old unfinished\n");
300
301 gpu.dma.x = pos_word & 0x3ff;
302 gpu.dma.y = (pos_word >> 16) & 0x1ff;
303 gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
304 gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
305 gpu.dma.offset = 0;
306 gpu.dma.is_read = is_read;
307 gpu.dma_start = gpu.dma;
308
309 renderer_flush_queues();
310 if (is_read) {
311 gpu.status.img = 1;
312 // XXX: wrong for width 1
313 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
314 gpu.state.last_vram_read_frame = *gpu.state.frame_count;
315 }
316
317 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
318 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
319}
320
321static void finish_vram_transfer(int is_read)
322{
323 if (is_read)
324 gpu.status.img = 0;
325 else
326 renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
327 gpu.dma_start.w, gpu.dma_start.h);
328}
329
330static noinline int do_cmd_buffer(uint32_t *data, int count)
331{
332 int len, cmd, start, pos;
333 int vram_dirty = 0;
334
335 // process buffer
336 for (start = pos = 0; pos < count; )
337 {
338 cmd = -1;
339 len = 0;
340
341 if (gpu.dma.h) {
342 pos += do_vram_io(data + pos, count - pos, 0);
343 if (pos == count)
344 break;
345 start = pos;
346 }
347
348 // do look-ahead pass to detect SR changes and VRAM i/o
349 while (pos < count) {
350 uint32_t *list = data + pos;
351 cmd = list[0] >> 24;
352 len = 1 + cmd_lengths[cmd];
353
354 //printf(" %3d: %02x %d\n", pos, cmd, len);
355 if ((cmd & 0xf4) == 0x24) {
356 // flat textured prim
357 gpu.ex_regs[1] &= ~0x1ff;
358 gpu.ex_regs[1] |= list[4] & 0x1ff;
359 }
360 else if ((cmd & 0xf4) == 0x34) {
361 // shaded textured prim
362 gpu.ex_regs[1] &= ~0x1ff;
363 gpu.ex_regs[1] |= list[5] & 0x1ff;
364 }
365 else if (cmd == 0xe3)
366 decide_frameskip_allow(list[0]);
367
368 if (2 <= cmd && cmd < 0xc0)
369 vram_dirty = 1;
370 else if ((cmd & 0xf8) == 0xe0)
371 gpu.ex_regs[cmd & 7] = list[0];
372
373 if (pos + len > count) {
374 cmd = -1;
375 break; // incomplete cmd
376 }
377 if (cmd == 0xa0 || cmd == 0xc0)
378 break; // image i/o
379 pos += len;
380 }
381
382 if (pos - start > 0) {
383 if (!gpu.frameskip.active || !gpu.frameskip.allow)
384 do_cmd_list(data + start, pos - start);
385 start = pos;
386 }
387
388 if (cmd == 0xa0 || cmd == 0xc0) {
389 // consume vram write/read cmd
390 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
391 pos += len;
392 }
393 else if (cmd == -1)
394 break;
395 }
396
397 gpu.status.reg &= ~0x1fff;
398 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
399 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
400
401 if (gpu.frameskip.active)
402 renderer_sync_ecmds(gpu.ex_regs);
403 gpu.state.fb_dirty |= vram_dirty;
404
405 return count - pos;
406}
407
408static void flush_cmd_buffer(void)
409{
410 int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
411 if (left > 0)
412 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
413 gpu.cmd_len = left;
414}
415
416void GPUwriteDataMem(uint32_t *mem, int count)
417{
418 int left;
419
420 log_io("gpu_dma_write %p %d\n", mem, count);
421
422 if (unlikely(gpu.cmd_len > 0))
423 flush_cmd_buffer();
424
425 left = do_cmd_buffer(mem, count);
426 if (left)
427 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
428}
429
430void GPUwriteData(uint32_t data)
431{
432 log_io("gpu_write %08x\n", data);
433 gpu.cmd_buffer[gpu.cmd_len++] = data;
434 if (gpu.cmd_len >= CMD_BUFFER_LEN)
435 flush_cmd_buffer();
436}
437
438long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
439{
440 uint32_t addr, *list;
441 uint32_t *llist_entry = NULL;
442 int len, left, count;
443 long cpu_cycles = 0;
444
445 if (unlikely(gpu.cmd_len > 0))
446 flush_cmd_buffer();
447
448 // ff7 sends it's main list twice, detect this
449 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
450 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
451 gpu.state.last_list.cycles > 2048)
452 {
453 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
454 *llist_entry |= 0x800000;
455 }
456
457 log_io("gpu_dma_chain\n");
458 addr = start_addr & 0xffffff;
459 for (count = 0; addr != 0xffffff; count++)
460 {
461 list = rambase + (addr & 0x1fffff) / 4;
462 len = list[0] >> 24;
463 addr = list[0] & 0xffffff;
464 cpu_cycles += 10;
465 if (len > 0)
466 cpu_cycles += 5 + len;
467
468 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
469
470 // loop detection marker
471 // (bit23 set causes DMA error on real machine, so
472 // unlikely to be ever set by the game)
473 list[0] |= 0x800000;
474
475 if (len) {
476 left = do_cmd_buffer(list + 1, len);
477 if (left)
478 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
479 }
480
481 if (addr & 0x800000)
482 break;
483 }
484
485 // remove loop detection markers
486 addr = start_addr & 0x1fffff;
487 while (count-- > 0) {
488 list = rambase + addr / 4;
489 addr = list[0] & 0x1fffff;
490 list[0] &= ~0x800000;
491 }
492 if (llist_entry)
493 *llist_entry &= ~0x800000;
494
495 gpu.state.last_list.frame = *gpu.state.frame_count;
496 gpu.state.last_list.hcnt = *gpu.state.hcnt;
497 gpu.state.last_list.cycles = cpu_cycles;
498 gpu.state.last_list.addr = start_addr;
499
500 return cpu_cycles;
501}
502
503void GPUreadDataMem(uint32_t *mem, int count)
504{
505 log_io("gpu_dma_read %p %d\n", mem, count);
506
507 if (unlikely(gpu.cmd_len > 0))
508 flush_cmd_buffer();
509
510 if (gpu.dma.h)
511 do_vram_io(mem, count, 1);
512}
513
514uint32_t GPUreadData(void)
515{
516 uint32_t ret;
517
518 if (unlikely(gpu.cmd_len > 0))
519 flush_cmd_buffer();
520
521 ret = gpu.gp0;
522 if (gpu.dma.h)
523 do_vram_io(&ret, 1, 1);
524
525 log_io("gpu_read %08x\n", ret);
526 return ret;
527}
528
529uint32_t GPUreadStatus(void)
530{
531 uint32_t ret;
532
533 if (unlikely(gpu.cmd_len > 0))
534 flush_cmd_buffer();
535
536 ret = gpu.status.reg;
537 log_io("gpu_read_status %08x\n", ret);
538 return ret;
539}
540
541struct GPUFreeze
542{
543 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
544 uint32_t ulStatus; // current gpu status
545 uint32_t ulControl[256]; // latest control register values
546 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
547};
548
549long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
550{
551 int i;
552
553 switch (type) {
554 case 1: // save
555 if (gpu.cmd_len > 0)
556 flush_cmd_buffer();
557 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
558 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
559 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
560 freeze->ulStatus = gpu.status.reg;
561 break;
562 case 0: // load
563 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
564 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
565 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
566 gpu.status.reg = freeze->ulStatus;
567 for (i = 8; i > 0; i--) {
568 gpu.regs[i] ^= 1; // avoid reg change detection
569 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
570 }
571 renderer_sync_ecmds(gpu.ex_regs);
572 renderer_update_caches(0, 0, 1024, 512);
573 break;
574 }
575
576 return 1;
577}
578
579void GPUupdateLace(void)
580{
581 if (gpu.cmd_len > 0)
582 flush_cmd_buffer();
583 renderer_flush_queues();
584
585 if (gpu.status.blanking || !gpu.state.fb_dirty)
586 return;
587
588 if (gpu.frameskip.set) {
589 if (!gpu.frameskip.frame_ready) {
590 if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
591 return;
592 gpu.frameskip.active = 0;
593 }
594 gpu.frameskip.frame_ready = 0;
595 }
596
597 vout_update();
598 gpu.state.fb_dirty = 0;
599}
600
601void GPUvBlank(int is_vblank, int lcf)
602{
603 int interlace = gpu.state.allow_interlace
604 && gpu.status.interlace && gpu.status.dheight;
605 // interlace doesn't look nice on progressive displays,
606 // so we have this "auto" mode here for games that don't read vram
607 if (gpu.state.allow_interlace == 2
608 && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
609 {
610 interlace = 0;
611 }
612 if (interlace || interlace != gpu.state.old_interlace) {
613 gpu.state.old_interlace = interlace;
614
615 if (gpu.cmd_len > 0)
616 flush_cmd_buffer();
617 renderer_flush_queues();
618 renderer_set_interlace(interlace, !lcf);
619 }
620}
621
622#include "../../frontend/plugin_lib.h"
623
624void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
625{
626 gpu.frameskip.set = cbs->frameskip;
627 gpu.frameskip.advice = &cbs->fskip_advice;
628 gpu.frameskip.active = 0;
629 gpu.frameskip.frame_ready = 1;
630 gpu.state.hcnt = cbs->gpu_hcnt;
631 gpu.state.frame_count = cbs->gpu_frame_count;
632 gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
633
634 if (cbs->pl_vout_set_raw_vram)
635 cbs->pl_vout_set_raw_vram(gpu.vram);
636 renderer_set_config(cbs);
637 vout_set_config(cbs);
638}
639
640// vim:shiftwidth=2:expandtab