gpu_neon: keep texture bits in sync
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(64)));
28
29static noinline void do_reset(void)
30{
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
33 gpu.gp0 = 0;
34 gpu.regs[3] = 1;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
37}
38
39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
60static noinline void decide_frameskip(void)
61{
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
63
64 if (!gpu.frameskip.active && (*gpu.frameskip.advice || gpu.frameskip.set == 1))
65 gpu.frameskip.active = 1;
66 else
67 gpu.frameskip.active = 0;
68}
69
70static noinline void get_gpu_info(uint32_t data)
71{
72 switch (data & 0x0f) {
73 case 0x02:
74 case 0x03:
75 case 0x04:
76 case 0x05:
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
78 break;
79 case 0x06:
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
81 break;
82 case 0x07:
83 gpu.gp0 = 2;
84 break;
85 default:
86 gpu.gp0 = 0;
87 break;
88 }
89}
90
91long GPUinit(void)
92{
93 int ret;
94 ret = vout_init();
95 ret |= renderer_init();
96
97 gpu.state.frame_count = &gpu.zero;
98 gpu.state.hcnt = &gpu.zero;
99 do_reset();
100 return ret;
101}
102
103long GPUshutdown(void)
104{
105 return vout_finish();
106}
107
108void GPUwriteStatus(uint32_t data)
109{
110 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
111 static const short vres[4] = { 240, 480, 256, 480 };
112 uint32_t cmd = data >> 24;
113
114 if (cmd < ARRAY_SIZE(gpu.regs)) {
115 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
116 return;
117 gpu.regs[cmd] = data;
118 }
119
120 gpu.state.fb_dirty = 1;
121
122 switch (cmd) {
123 case 0x00:
124 do_reset();
125 break;
126 case 0x03:
127 gpu.status.blanking = data & 1;
128 break;
129 case 0x04:
130 gpu.status.dma = data & 3;
131 break;
132 case 0x05:
133 gpu.screen.x = data & 0x3ff;
134 gpu.screen.y = (data >> 10) & 0x3ff;
135 if (gpu.frameskip.set)
136 decide_frameskip();
137 break;
138 case 0x06:
139 gpu.screen.x1 = data & 0xfff;
140 gpu.screen.x2 = (data >> 12) & 0xfff;
141 update_width();
142 break;
143 case 0x07:
144 gpu.screen.y1 = data & 0x3ff;
145 gpu.screen.y2 = (data >> 10) & 0x3ff;
146 update_height();
147 break;
148 case 0x08:
149 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
150 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
151 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
152 update_width();
153 update_height();
154 break;
155 default:
156 if ((cmd & 0xf0) == 0x10)
157 get_gpu_info(data);
158 break;
159 }
160}
161
162const unsigned char cmd_lengths[256] =
163{
164 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
167 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
168 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
169 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
170 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
171 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
172 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
180};
181
182#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
183
184static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
185{
186 uint16_t *vram = VRAM_MEM_XY(x, y);
187 if (is_read)
188 memcpy(mem, vram, l * 2);
189 else
190 memcpy(vram, mem, l * 2);
191}
192
193static int do_vram_io(uint32_t *data, int count, int is_read)
194{
195 int count_initial = count;
196 uint16_t *sdata = (uint16_t *)data;
197 int x = gpu.dma.x, y = gpu.dma.y;
198 int w = gpu.dma.w, h = gpu.dma.h;
199 int o = gpu.dma.offset;
200 int l;
201 count *= 2; // operate in 16bpp pixels
202
203 if (gpu.dma.offset) {
204 l = w - gpu.dma.offset;
205 if (count < l)
206 l = count;
207
208 do_vram_line(x + o, y, sdata, l, is_read);
209
210 if (o + l < w)
211 o += l;
212 else {
213 o = 0;
214 y++;
215 h--;
216 }
217 sdata += l;
218 count -= l;
219 }
220
221 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
222 y &= 511;
223 do_vram_line(x, y, sdata, w, is_read);
224 }
225
226 if (h > 0 && count > 0) {
227 y &= 511;
228 do_vram_line(x, y, sdata, count, is_read);
229 o = count;
230 count = 0;
231 }
232 gpu.dma.y = y;
233 gpu.dma.h = h;
234 gpu.dma.offset = o;
235
236 return count_initial - count / 2;
237}
238
239static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
240{
241 if (gpu.dma.h)
242 log_anomaly("start_vram_transfer while old unfinished\n");
243
244 gpu.dma.x = pos_word & 1023;
245 gpu.dma.y = (pos_word >> 16) & 511;
246 gpu.dma.w = size_word & 0xffff; // ?
247 gpu.dma.h = size_word >> 16;
248 gpu.dma.offset = 0;
249
250 if (is_read)
251 gpu.status.img = 1;
252 else
253 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
254
255 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
256 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
257}
258
259static int check_cmd(uint32_t *data, int count)
260{
261 int len, cmd, start, pos;
262 int vram_dirty = 0;
263
264 // process buffer
265 for (start = pos = 0; pos < count; )
266 {
267 cmd = -1;
268 len = 0;
269
270 if (gpu.dma.h) {
271 pos += do_vram_io(data + pos, count - pos, 0);
272 if (pos == count)
273 break;
274 start = pos;
275 }
276
277 // do look-ahead pass to detect SR changes and VRAM i/o
278 while (pos < count) {
279 uint32_t *list = data + pos;
280 cmd = list[0] >> 24;
281 len = 1 + cmd_lengths[cmd];
282
283 //printf(" %3d: %02x %d\n", pos, cmd, len);
284 if ((cmd & 0xf4) == 0x24) {
285 // flat textured prim
286 gpu.ex_regs[1] &= ~0x1ff;
287 gpu.ex_regs[1] |= list[4] & 0x1ff;
288 }
289 else if ((cmd & 0xf4) == 0x34) {
290 // shaded textured prim
291 gpu.ex_regs[1] &= ~0x1ff;
292 gpu.ex_regs[1] |= list[5] & 0x1ff;
293 }
294 if (2 <= cmd && cmd < 0xc0)
295 vram_dirty = 1;
296 else if ((cmd & 0xf8) == 0xe0)
297 gpu.ex_regs[cmd & 7] = list[0];
298
299 if (pos + len > count) {
300 cmd = -1;
301 break; // incomplete cmd
302 }
303 if (cmd == 0xa0 || cmd == 0xc0)
304 break; // image i/o
305 pos += len;
306 }
307
308 if (pos - start > 0) {
309 if (!gpu.frameskip.active)
310 do_cmd_list(data + start, pos - start);
311 start = pos;
312 }
313
314 if (cmd == 0xa0 || cmd == 0xc0) {
315 // consume vram write/read cmd
316 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
317 pos += len;
318 }
319 else if (cmd == -1)
320 break;
321 }
322
323 gpu.status.reg &= ~0x1fff;
324 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
325 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
326
327 if (gpu.frameskip.active)
328 renderer_sync_ecmds(gpu.ex_regs);
329 gpu.state.fb_dirty |= vram_dirty;
330
331 return count - pos;
332}
333
334static void flush_cmd_buffer(void)
335{
336 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
337 if (left > 0)
338 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
339 gpu.cmd_len = left;
340}
341
342void GPUwriteDataMem(uint32_t *mem, int count)
343{
344 int left;
345
346 log_io("gpu_dma_write %p %d\n", mem, count);
347
348 if (unlikely(gpu.cmd_len > 0))
349 flush_cmd_buffer();
350
351 left = check_cmd(mem, count);
352 if (left)
353 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
354}
355
356void GPUwriteData(uint32_t data)
357{
358 log_io("gpu_write %08x\n", data);
359 gpu.cmd_buffer[gpu.cmd_len++] = data;
360 if (gpu.cmd_len >= CMD_BUFFER_LEN)
361 flush_cmd_buffer();
362}
363
364long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
365{
366 uint32_t addr, *list;
367 uint32_t *llist_entry = NULL;
368 int len, left, count;
369 long dma_words = 0;
370
371 if (unlikely(gpu.cmd_len > 0))
372 flush_cmd_buffer();
373
374 // ff7 sends it's main list twice, detect this
375 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
376 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
377 gpu.state.last_list.words > 1024)
378 {
379 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
380 *llist_entry |= 0x800000;
381 }
382
383 log_io("gpu_dma_chain\n");
384 addr = start_addr & 0xffffff;
385 for (count = 0; addr != 0xffffff; count++)
386 {
387 list = rambase + (addr & 0x1fffff) / 4;
388 len = list[0] >> 24;
389 addr = list[0] & 0xffffff;
390 dma_words += 1 + len;
391
392 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
393
394 // loop detection marker
395 // (bit23 set causes DMA error on real machine, so
396 // unlikely to be ever set by the game)
397 list[0] |= 0x800000;
398
399 if (len) {
400 left = check_cmd(list + 1, len);
401 if (left)
402 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
403 }
404
405 if (addr & 0x800000)
406 break;
407 }
408
409 // remove loop detection markers
410 addr = start_addr & 0x1fffff;
411 while (count-- > 0) {
412 list = rambase + addr / 4;
413 addr = list[0] & 0x1fffff;
414 list[0] &= ~0x800000;
415 }
416 if (llist_entry)
417 *llist_entry &= ~0x800000;
418
419 gpu.state.last_list.frame = *gpu.state.frame_count;
420 gpu.state.last_list.hcnt = *gpu.state.hcnt;
421 gpu.state.last_list.words = dma_words;
422 gpu.state.last_list.addr = start_addr;
423
424 return dma_words;
425}
426
427void GPUreadDataMem(uint32_t *mem, int count)
428{
429 log_io("gpu_dma_read %p %d\n", mem, count);
430
431 if (unlikely(gpu.cmd_len > 0))
432 flush_cmd_buffer();
433
434 if (gpu.dma.h)
435 do_vram_io(mem, count, 1);
436}
437
438uint32_t GPUreadData(void)
439{
440 log_io("gpu_read\n");
441
442 if (unlikely(gpu.cmd_len > 0))
443 flush_cmd_buffer();
444
445 if (gpu.dma.h)
446 do_vram_io(&gpu.gp0, 1, 1);
447
448 return gpu.gp0;
449}
450
451uint32_t GPUreadStatus(void)
452{
453 uint32_t ret;
454
455 if (unlikely(gpu.cmd_len > 0))
456 flush_cmd_buffer();
457
458 ret = gpu.status.reg;
459 log_io("gpu_read_status %08x\n", ret);
460 return ret;
461}
462
463typedef struct GPUFREEZETAG
464{
465 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
466 uint32_t ulStatus; // current gpu status
467 uint32_t ulControl[256]; // latest control register values
468 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
469} GPUFreeze_t;
470
471long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
472{
473 int i;
474
475 switch (type) {
476 case 1: // save
477 if (gpu.cmd_len > 0)
478 flush_cmd_buffer();
479 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
480 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
481 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
482 freeze->ulStatus = gpu.status.reg;
483 break;
484 case 0: // load
485 renderer_invalidate_caches(0, 0, 1024, 512);
486 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
487 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
488 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
489 gpu.status.reg = freeze->ulStatus;
490 for (i = 8; i > 0; i--) {
491 gpu.regs[i] ^= 1; // avoid reg change detection
492 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
493 }
494 renderer_sync_ecmds(gpu.ex_regs);
495 break;
496 }
497
498 return 1;
499}
500
501// vim:shiftwidth=2:expandtab