gpu_neon: propagate configs to rasterizers
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(64)));
28
29static noinline void do_reset(void)
30{
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
33 gpu.gp0 = 0;
34 gpu.regs[3] = 1;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
37}
38
39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
60static noinline void decide_frameskip(void)
61{
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
63
64 if (!gpu.frameskip.active && (*gpu.frameskip.advice || gpu.frameskip.set == 1))
65 gpu.frameskip.active = 1;
66 else
67 gpu.frameskip.active = 0;
68}
69
70static noinline void get_gpu_info(uint32_t data)
71{
72 switch (data & 0x0f) {
73 case 0x02:
74 case 0x03:
75 case 0x04:
76 case 0x05:
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
78 break;
79 case 0x06:
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
81 break;
82 case 0x07:
83 gpu.gp0 = 2;
84 break;
85 default:
86 gpu.gp0 = 0;
87 break;
88 }
89}
90
91long GPUinit(void)
92{
93 int ret;
94 ret = vout_init();
95 ret |= renderer_init();
96
97 gpu.state.frame_count = &gpu.zero;
98 gpu.state.hcnt = &gpu.zero;
99 do_reset();
100 return ret;
101}
102
103long GPUshutdown(void)
104{
105 return vout_finish();
106}
107
108void GPUwriteStatus(uint32_t data)
109{
110 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
111 static const short vres[4] = { 240, 480, 256, 480 };
112 uint32_t cmd = data >> 24;
113
114 if (cmd < ARRAY_SIZE(gpu.regs)) {
115 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
116 return;
117 gpu.regs[cmd] = data;
118 }
119
120 gpu.state.fb_dirty = 1;
121
122 switch (cmd) {
123 case 0x00:
124 do_reset();
125 break;
126 case 0x03:
127 gpu.status.blanking = data & 1;
128 break;
129 case 0x04:
130 gpu.status.dma = data & 3;
131 break;
132 case 0x05:
133 gpu.screen.x = data & 0x3ff;
134 gpu.screen.y = (data >> 10) & 0x3ff;
135 if (gpu.frameskip.set && gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
136 decide_frameskip();
137 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
138 }
139 break;
140 case 0x06:
141 gpu.screen.x1 = data & 0xfff;
142 gpu.screen.x2 = (data >> 12) & 0xfff;
143 update_width();
144 break;
145 case 0x07:
146 gpu.screen.y1 = data & 0x3ff;
147 gpu.screen.y2 = (data >> 10) & 0x3ff;
148 update_height();
149 break;
150 case 0x08:
151 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
152 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
153 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
154 update_width();
155 update_height();
156 break;
157 default:
158 if ((cmd & 0xf0) == 0x10)
159 get_gpu_info(data);
160 break;
161 }
162}
163
164const unsigned char cmd_lengths[256] =
165{
166 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
168 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
169 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
170 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
171 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
172 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
173 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
174 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
181 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
182};
183
184#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
185
186static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
187{
188 uint16_t *vram = VRAM_MEM_XY(x, y);
189 if (is_read)
190 memcpy(mem, vram, l * 2);
191 else
192 memcpy(vram, mem, l * 2);
193}
194
195static int do_vram_io(uint32_t *data, int count, int is_read)
196{
197 int count_initial = count;
198 uint16_t *sdata = (uint16_t *)data;
199 int x = gpu.dma.x, y = gpu.dma.y;
200 int w = gpu.dma.w, h = gpu.dma.h;
201 int o = gpu.dma.offset;
202 int l;
203 count *= 2; // operate in 16bpp pixels
204
205 if (gpu.dma.offset) {
206 l = w - gpu.dma.offset;
207 if (count < l)
208 l = count;
209
210 do_vram_line(x + o, y, sdata, l, is_read);
211
212 if (o + l < w)
213 o += l;
214 else {
215 o = 0;
216 y++;
217 h--;
218 }
219 sdata += l;
220 count -= l;
221 }
222
223 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
224 y &= 511;
225 do_vram_line(x, y, sdata, w, is_read);
226 }
227
228 if (h > 0 && count > 0) {
229 y &= 511;
230 do_vram_line(x, y, sdata, count, is_read);
231 o = count;
232 count = 0;
233 }
234 gpu.dma.y = y;
235 gpu.dma.h = h;
236 gpu.dma.offset = o;
237
238 return count_initial - count / 2;
239}
240
241static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
242{
243 if (gpu.dma.h)
244 log_anomaly("start_vram_transfer while old unfinished\n");
245
246 gpu.dma.x = pos_word & 1023;
247 gpu.dma.y = (pos_word >> 16) & 511;
248 gpu.dma.w = size_word & 0xffff; // ?
249 gpu.dma.h = size_word >> 16;
250 gpu.dma.offset = 0;
251
252 if (is_read)
253 gpu.status.img = 1;
254 else
255 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
256
257 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
258 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
259}
260
261static int check_cmd(uint32_t *data, int count)
262{
263 int len, cmd, start, pos;
264 int vram_dirty = 0;
265
266 // process buffer
267 for (start = pos = 0; pos < count; )
268 {
269 cmd = -1;
270 len = 0;
271
272 if (gpu.dma.h) {
273 pos += do_vram_io(data + pos, count - pos, 0);
274 if (pos == count)
275 break;
276 start = pos;
277 }
278
279 // do look-ahead pass to detect SR changes and VRAM i/o
280 while (pos < count) {
281 uint32_t *list = data + pos;
282 cmd = list[0] >> 24;
283 len = 1 + cmd_lengths[cmd];
284
285 //printf(" %3d: %02x %d\n", pos, cmd, len);
286 if ((cmd & 0xf4) == 0x24) {
287 // flat textured prim
288 gpu.ex_regs[1] &= ~0x1ff;
289 gpu.ex_regs[1] |= list[4] & 0x1ff;
290 }
291 else if ((cmd & 0xf4) == 0x34) {
292 // shaded textured prim
293 gpu.ex_regs[1] &= ~0x1ff;
294 gpu.ex_regs[1] |= list[5] & 0x1ff;
295 }
296 else if (cmd == 0xe3)
297 {
298 // no frameskip if it decides to draw to display area,
299 // but not for interlace since it'll most likely always do that
300 uint32_t x = list[0] & 0x3ff;
301 uint32_t y = (list[0] >> 10) & 0x3ff;
302 gpu.frameskip.allow = gpu.status.interlace ||
303 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
304 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
305 }
306 if (2 <= cmd && cmd < 0xc0)
307 vram_dirty = 1;
308 else if ((cmd & 0xf8) == 0xe0)
309 gpu.ex_regs[cmd & 7] = list[0];
310
311 if (pos + len > count) {
312 cmd = -1;
313 break; // incomplete cmd
314 }
315 if (cmd == 0xa0 || cmd == 0xc0)
316 break; // image i/o
317 pos += len;
318 }
319
320 if (pos - start > 0) {
321 if (!gpu.frameskip.active || !gpu.frameskip.allow)
322 do_cmd_list(data + start, pos - start);
323 start = pos;
324 }
325
326 if (cmd == 0xa0 || cmd == 0xc0) {
327 // consume vram write/read cmd
328 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
329 pos += len;
330 }
331 else if (cmd == -1)
332 break;
333 }
334
335 gpu.status.reg &= ~0x1fff;
336 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
337 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
338
339 if (gpu.frameskip.active)
340 renderer_sync_ecmds(gpu.ex_regs);
341 gpu.state.fb_dirty |= vram_dirty;
342
343 return count - pos;
344}
345
346static void flush_cmd_buffer(void)
347{
348 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
349 if (left > 0)
350 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
351 gpu.cmd_len = left;
352}
353
354void GPUwriteDataMem(uint32_t *mem, int count)
355{
356 int left;
357
358 log_io("gpu_dma_write %p %d\n", mem, count);
359
360 if (unlikely(gpu.cmd_len > 0))
361 flush_cmd_buffer();
362
363 left = check_cmd(mem, count);
364 if (left)
365 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
366}
367
368void GPUwriteData(uint32_t data)
369{
370 log_io("gpu_write %08x\n", data);
371 gpu.cmd_buffer[gpu.cmd_len++] = data;
372 if (gpu.cmd_len >= CMD_BUFFER_LEN)
373 flush_cmd_buffer();
374}
375
376long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
377{
378 uint32_t addr, *list;
379 uint32_t *llist_entry = NULL;
380 int len, left, count;
381 long dma_words = 0;
382
383 if (unlikely(gpu.cmd_len > 0))
384 flush_cmd_buffer();
385
386 // ff7 sends it's main list twice, detect this
387 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
388 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
389 gpu.state.last_list.words > 1024)
390 {
391 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
392 *llist_entry |= 0x800000;
393 }
394
395 log_io("gpu_dma_chain\n");
396 addr = start_addr & 0xffffff;
397 for (count = 0; addr != 0xffffff; count++)
398 {
399 list = rambase + (addr & 0x1fffff) / 4;
400 len = list[0] >> 24;
401 addr = list[0] & 0xffffff;
402 dma_words += 1 + len;
403
404 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
405
406 // loop detection marker
407 // (bit23 set causes DMA error on real machine, so
408 // unlikely to be ever set by the game)
409 list[0] |= 0x800000;
410
411 if (len) {
412 left = check_cmd(list + 1, len);
413 if (left)
414 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
415 }
416
417 if (addr & 0x800000)
418 break;
419 }
420
421 // remove loop detection markers
422 addr = start_addr & 0x1fffff;
423 while (count-- > 0) {
424 list = rambase + addr / 4;
425 addr = list[0] & 0x1fffff;
426 list[0] &= ~0x800000;
427 }
428 if (llist_entry)
429 *llist_entry &= ~0x800000;
430
431 gpu.state.last_list.frame = *gpu.state.frame_count;
432 gpu.state.last_list.hcnt = *gpu.state.hcnt;
433 gpu.state.last_list.words = dma_words;
434 gpu.state.last_list.addr = start_addr;
435
436 return dma_words;
437}
438
439void GPUreadDataMem(uint32_t *mem, int count)
440{
441 log_io("gpu_dma_read %p %d\n", mem, count);
442
443 if (unlikely(gpu.cmd_len > 0))
444 flush_cmd_buffer();
445
446 if (gpu.dma.h)
447 do_vram_io(mem, count, 1);
448}
449
450uint32_t GPUreadData(void)
451{
452 log_io("gpu_read\n");
453
454 if (unlikely(gpu.cmd_len > 0))
455 flush_cmd_buffer();
456
457 if (gpu.dma.h)
458 do_vram_io(&gpu.gp0, 1, 1);
459
460 return gpu.gp0;
461}
462
463uint32_t GPUreadStatus(void)
464{
465 uint32_t ret;
466
467 if (unlikely(gpu.cmd_len > 0))
468 flush_cmd_buffer();
469
470 ret = gpu.status.reg;
471 log_io("gpu_read_status %08x\n", ret);
472 return ret;
473}
474
475struct GPUFreeze
476{
477 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
478 uint32_t ulStatus; // current gpu status
479 uint32_t ulControl[256]; // latest control register values
480 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
481};
482
483long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
484{
485 int i;
486
487 switch (type) {
488 case 1: // save
489 if (gpu.cmd_len > 0)
490 flush_cmd_buffer();
491 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
492 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
493 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
494 freeze->ulStatus = gpu.status.reg;
495 break;
496 case 0: // load
497 renderer_invalidate_caches(0, 0, 1024, 512);
498 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
499 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
500 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
501 gpu.status.reg = freeze->ulStatus;
502 for (i = 8; i > 0; i--) {
503 gpu.regs[i] ^= 1; // avoid reg change detection
504 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
505 }
506 renderer_sync_ecmds(gpu.ex_regs);
507 break;
508 }
509
510 return 1;
511}
512
513// vim:shiftwidth=2:expandtab