gpu_neon: fix test mode of unai renderer
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(2048)));
28
29static noinline void do_reset(void)
30{
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
33 gpu.gp0 = 0;
34 gpu.regs[3] = 1;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
37}
38
39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
60static noinline void decide_frameskip(void)
61{
62 if (gpu.frameskip.active)
63 gpu.frameskip.cnt++;
64 else {
65 gpu.frameskip.cnt = 0;
66 gpu.frameskip.frame_ready = 1;
67 }
68
69 if (!gpu.frameskip.active && *gpu.frameskip.advice)
70 gpu.frameskip.active = 1;
71 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
72 gpu.frameskip.active = 1;
73 else
74 gpu.frameskip.active = 0;
75}
76
77static noinline void decide_frameskip_allow(uint32_t cmd_e3)
78{
79 // no frameskip if it decides to draw to display area,
80 // but not for interlace since it'll most likely always do that
81 uint32_t x = cmd_e3 & 0x3ff;
82 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
83 gpu.frameskip.allow = gpu.status.interlace ||
84 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
85 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
86}
87
88static noinline void get_gpu_info(uint32_t data)
89{
90 switch (data & 0x0f) {
91 case 0x02:
92 case 0x03:
93 case 0x04:
94 case 0x05:
95 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
96 break;
97 case 0x06:
98 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
99 break;
100 case 0x07:
101 gpu.gp0 = 2;
102 break;
103 default:
104 gpu.gp0 = 0;
105 break;
106 }
107}
108
109long GPUinit(void)
110{
111 int ret;
112 ret = vout_init();
113 ret |= renderer_init();
114
115 gpu.state.frame_count = &gpu.zero;
116 gpu.state.hcnt = &gpu.zero;
117 do_reset();
118 return ret;
119}
120
121long GPUshutdown(void)
122{
123 return vout_finish();
124}
125
126void GPUwriteStatus(uint32_t data)
127{
128 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
129 static const short vres[4] = { 240, 480, 256, 480 };
130 uint32_t cmd = data >> 24;
131
132 if (cmd < ARRAY_SIZE(gpu.regs)) {
133 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
134 return;
135 gpu.regs[cmd] = data;
136 }
137
138 gpu.state.fb_dirty = 1;
139
140 switch (cmd) {
141 case 0x00:
142 do_reset();
143 break;
144 case 0x03:
145 gpu.status.blanking = data & 1;
146 break;
147 case 0x04:
148 gpu.status.dma = data & 3;
149 break;
150 case 0x05:
151 gpu.screen.x = data & 0x3ff;
152 gpu.screen.y = (data >> 10) & 0x3ff;
153 if (gpu.frameskip.set) {
154 decide_frameskip_allow(gpu.ex_regs[3]);
155 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
156 decide_frameskip();
157 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
158 }
159 }
160 break;
161 case 0x06:
162 gpu.screen.x1 = data & 0xfff;
163 gpu.screen.x2 = (data >> 12) & 0xfff;
164 update_width();
165 break;
166 case 0x07:
167 gpu.screen.y1 = data & 0x3ff;
168 gpu.screen.y2 = (data >> 10) & 0x3ff;
169 update_height();
170 break;
171 case 0x08:
172 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
173 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
174 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
175 update_width();
176 update_height();
177 break;
178 default:
179 if ((cmd & 0xf0) == 0x10)
180 get_gpu_info(data);
181 break;
182 }
183}
184
185const unsigned char cmd_lengths[256] =
186{
187 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
190 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
191 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
192 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
193 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
194 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
195 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
203};
204
205#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
206
207static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
208{
209 uint16_t *vram = VRAM_MEM_XY(x, y);
210 if (is_read)
211 memcpy(mem, vram, l * 2);
212 else
213 memcpy(vram, mem, l * 2);
214}
215
216static int do_vram_io(uint32_t *data, int count, int is_read)
217{
218 int count_initial = count;
219 uint16_t *sdata = (uint16_t *)data;
220 int x = gpu.dma.x, y = gpu.dma.y;
221 int w = gpu.dma.w, h = gpu.dma.h;
222 int o = gpu.dma.offset;
223 int l;
224 count *= 2; // operate in 16bpp pixels
225
226 if (gpu.dma.offset) {
227 l = w - gpu.dma.offset;
228 if (count < l)
229 l = count;
230
231 do_vram_line(x + o, y, sdata, l, is_read);
232
233 if (o + l < w)
234 o += l;
235 else {
236 o = 0;
237 y++;
238 h--;
239 }
240 sdata += l;
241 count -= l;
242 }
243
244 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
245 y &= 511;
246 do_vram_line(x, y, sdata, w, is_read);
247 }
248
249 if (h > 0 && count > 0) {
250 y &= 511;
251 do_vram_line(x, y, sdata, count, is_read);
252 o = count;
253 count = 0;
254 }
255 gpu.dma.y = y;
256 gpu.dma.h = h;
257 gpu.dma.offset = o;
258
259 return count_initial - count / 2;
260}
261
262static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
263{
264 if (gpu.dma.h)
265 log_anomaly("start_vram_transfer while old unfinished\n");
266
267 gpu.dma.x = pos_word & 1023;
268 gpu.dma.y = (pos_word >> 16) & 511;
269 gpu.dma.w = size_word & 0xffff; // ?
270 gpu.dma.h = size_word >> 16;
271 gpu.dma.offset = 0;
272
273 if (is_read)
274 gpu.status.img = 1;
275 else
276 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
277
278 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
279 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
280}
281
282static int check_cmd(uint32_t *data, int count)
283{
284 int len, cmd, start, pos;
285 int vram_dirty = 0;
286
287 // process buffer
288 for (start = pos = 0; pos < count; )
289 {
290 cmd = -1;
291 len = 0;
292
293 if (gpu.dma.h) {
294 pos += do_vram_io(data + pos, count - pos, 0);
295 if (pos == count)
296 break;
297 start = pos;
298 }
299
300 // do look-ahead pass to detect SR changes and VRAM i/o
301 while (pos < count) {
302 uint32_t *list = data + pos;
303 cmd = list[0] >> 24;
304 len = 1 + cmd_lengths[cmd];
305
306 //printf(" %3d: %02x %d\n", pos, cmd, len);
307 if ((cmd & 0xf4) == 0x24) {
308 // flat textured prim
309 gpu.ex_regs[1] &= ~0x1ff;
310 gpu.ex_regs[1] |= list[4] & 0x1ff;
311 }
312 else if ((cmd & 0xf4) == 0x34) {
313 // shaded textured prim
314 gpu.ex_regs[1] &= ~0x1ff;
315 gpu.ex_regs[1] |= list[5] & 0x1ff;
316 }
317 else if (cmd == 0xe3)
318 decide_frameskip_allow(list[0]);
319
320 if (2 <= cmd && cmd < 0xc0)
321 vram_dirty = 1;
322 else if ((cmd & 0xf8) == 0xe0)
323 gpu.ex_regs[cmd & 7] = list[0];
324
325 if (pos + len > count) {
326 cmd = -1;
327 break; // incomplete cmd
328 }
329 if (cmd == 0xa0 || cmd == 0xc0)
330 break; // image i/o
331 pos += len;
332 }
333
334 if (pos - start > 0) {
335 if (!gpu.frameskip.active || !gpu.frameskip.allow)
336 do_cmd_list(data + start, pos - start);
337 start = pos;
338 }
339
340 if (cmd == 0xa0 || cmd == 0xc0) {
341 // consume vram write/read cmd
342 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
343 pos += len;
344 }
345 else if (cmd == -1)
346 break;
347 }
348
349 gpu.status.reg &= ~0x1fff;
350 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
351 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
352
353 if (gpu.frameskip.active)
354 renderer_sync_ecmds(gpu.ex_regs);
355 gpu.state.fb_dirty |= vram_dirty;
356
357 return count - pos;
358}
359
360static void flush_cmd_buffer(void)
361{
362 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
363 if (left > 0)
364 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
365 gpu.cmd_len = left;
366}
367
368void GPUwriteDataMem(uint32_t *mem, int count)
369{
370 int left;
371
372 log_io("gpu_dma_write %p %d\n", mem, count);
373
374 if (unlikely(gpu.cmd_len > 0))
375 flush_cmd_buffer();
376
377 left = check_cmd(mem, count);
378 if (left)
379 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
380}
381
382void GPUwriteData(uint32_t data)
383{
384 log_io("gpu_write %08x\n", data);
385 gpu.cmd_buffer[gpu.cmd_len++] = data;
386 if (gpu.cmd_len >= CMD_BUFFER_LEN)
387 flush_cmd_buffer();
388}
389
390long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
391{
392 uint32_t addr, *list;
393 uint32_t *llist_entry = NULL;
394 int len, left, count;
395 long dma_words = 0;
396
397 if (unlikely(gpu.cmd_len > 0))
398 flush_cmd_buffer();
399
400 // ff7 sends it's main list twice, detect this
401 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
402 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
403 gpu.state.last_list.words > 1024)
404 {
405 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
406 *llist_entry |= 0x800000;
407 }
408
409 log_io("gpu_dma_chain\n");
410 addr = start_addr & 0xffffff;
411 for (count = 0; addr != 0xffffff; count++)
412 {
413 list = rambase + (addr & 0x1fffff) / 4;
414 len = list[0] >> 24;
415 addr = list[0] & 0xffffff;
416 dma_words += 1 + len;
417
418 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
419
420 // loop detection marker
421 // (bit23 set causes DMA error on real machine, so
422 // unlikely to be ever set by the game)
423 list[0] |= 0x800000;
424
425 if (len) {
426 left = check_cmd(list + 1, len);
427 if (left)
428 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
429 }
430
431 if (addr & 0x800000)
432 break;
433 }
434
435 // remove loop detection markers
436 addr = start_addr & 0x1fffff;
437 while (count-- > 0) {
438 list = rambase + addr / 4;
439 addr = list[0] & 0x1fffff;
440 list[0] &= ~0x800000;
441 }
442 if (llist_entry)
443 *llist_entry &= ~0x800000;
444
445 gpu.state.last_list.frame = *gpu.state.frame_count;
446 gpu.state.last_list.hcnt = *gpu.state.hcnt;
447 gpu.state.last_list.words = dma_words;
448 gpu.state.last_list.addr = start_addr;
449
450 return dma_words;
451}
452
453void GPUreadDataMem(uint32_t *mem, int count)
454{
455 log_io("gpu_dma_read %p %d\n", mem, count);
456
457 if (unlikely(gpu.cmd_len > 0))
458 flush_cmd_buffer();
459
460 if (gpu.dma.h)
461 do_vram_io(mem, count, 1);
462}
463
464uint32_t GPUreadData(void)
465{
466 log_io("gpu_read\n");
467
468 if (unlikely(gpu.cmd_len > 0))
469 flush_cmd_buffer();
470
471 if (gpu.dma.h)
472 do_vram_io(&gpu.gp0, 1, 1);
473
474 return gpu.gp0;
475}
476
477uint32_t GPUreadStatus(void)
478{
479 uint32_t ret;
480
481 if (unlikely(gpu.cmd_len > 0))
482 flush_cmd_buffer();
483
484 ret = gpu.status.reg;
485 log_io("gpu_read_status %08x\n", ret);
486 return ret;
487}
488
489struct GPUFreeze
490{
491 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
492 uint32_t ulStatus; // current gpu status
493 uint32_t ulControl[256]; // latest control register values
494 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
495};
496
497long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
498{
499 int i;
500
501 switch (type) {
502 case 1: // save
503 if (gpu.cmd_len > 0)
504 flush_cmd_buffer();
505 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
506 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
507 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
508 freeze->ulStatus = gpu.status.reg;
509 break;
510 case 0: // load
511 renderer_invalidate_caches(0, 0, 1024, 512);
512 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
513 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
514 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
515 gpu.status.reg = freeze->ulStatus;
516 for (i = 8; i > 0; i--) {
517 gpu.regs[i] ^= 1; // avoid reg change detection
518 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
519 }
520 renderer_sync_ecmds(gpu.ex_regs);
521 break;
522 }
523
524 return 1;
525}
526
527// vim:shiftwidth=2:expandtab