gpu_neon: keep texture bits in sync
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
56f08d83 13#include "gpu.h"
1ab64c54
GI
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
8dd855cd 17#define noinline __attribute__((noinline))
1ab64c54 18
deb18d24 19#define gpu_log(fmt, ...) \
3ece2f0c 20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
deb18d24 21
22//#define log_io gpu_log
56f08d83 23#define log_io(...)
9394ada5 24//#define log_anomaly gpu_log
25#define log_anomaly(...)
56f08d83 26
27struct psx_gpu gpu __attribute__((aligned(64)));
1ab64c54 28
6e9bdaef 29static noinline void do_reset(void)
1ab64c54 30{
6e9bdaef 31 memset(gpu.regs, 0, sizeof(gpu.regs));
d30279e2 32 gpu.status.reg = 0x14802000;
6e9bdaef 33 gpu.gp0 = 0;
fc84f618 34 gpu.regs[3] = 1;
6e9bdaef 35 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 36 gpu.screen.vres = gpu.screen.h = 240;
1ab64c54
GI
37}
38
8dd855cd 39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
fc84f618 60static noinline void decide_frameskip(void)
61{
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
63
ea4a16e7 64 if (!gpu.frameskip.active && (*gpu.frameskip.advice || gpu.frameskip.set == 1))
fc84f618 65 gpu.frameskip.active = 1;
66 else
67 gpu.frameskip.active = 0;
68}
69
6e9bdaef 70static noinline void get_gpu_info(uint32_t data)
71{
72 switch (data & 0x0f) {
73 case 0x02:
74 case 0x03:
75 case 0x04:
76 case 0x05:
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
78 break;
79 case 0x06:
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
81 break;
82 case 0x07:
83 gpu.gp0 = 2;
84 break;
85 default:
86 gpu.gp0 = 0;
87 break;
88 }
89}
90
91long GPUinit(void)
92{
9394ada5 93 int ret;
94 ret = vout_init();
95 ret |= renderer_init();
96
3ece2f0c 97 gpu.state.frame_count = &gpu.zero;
deb18d24 98 gpu.state.hcnt = &gpu.zero;
9394ada5 99 do_reset();
6e9bdaef 100 return ret;
101}
102
103long GPUshutdown(void)
104{
105 return vout_finish();
106}
107
1ab64c54
GI
108void GPUwriteStatus(uint32_t data)
109{
110 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
111 static const short vres[4] = { 240, 480, 256, 480 };
112 uint32_t cmd = data >> 24;
113
fc84f618 114 if (cmd < ARRAY_SIZE(gpu.regs)) {
19e7cf87 115 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 116 return;
8dd855cd 117 gpu.regs[cmd] = data;
fc84f618 118 }
119
120 gpu.state.fb_dirty = 1;
8dd855cd 121
122 switch (cmd) {
1ab64c54 123 case 0x00:
6e9bdaef 124 do_reset();
1ab64c54
GI
125 break;
126 case 0x03:
d30279e2 127 gpu.status.blanking = data & 1;
1ab64c54
GI
128 break;
129 case 0x04:
130 gpu.status.dma = data & 3;
131 break;
132 case 0x05:
133 gpu.screen.x = data & 0x3ff;
134 gpu.screen.y = (data >> 10) & 0x3ff;
ea4a16e7 135 if (gpu.frameskip.set)
fc84f618 136 decide_frameskip();
1ab64c54 137 break;
8dd855cd 138 case 0x06:
139 gpu.screen.x1 = data & 0xfff;
140 gpu.screen.x2 = (data >> 12) & 0xfff;
141 update_width();
142 break;
1ab64c54
GI
143 case 0x07:
144 gpu.screen.y1 = data & 0x3ff;
145 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 146 update_height();
1ab64c54
GI
147 break;
148 case 0x08:
149 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 150 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
151 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
152 update_width();
153 update_height();
1ab64c54 154 break;
deb18d24 155 default:
156 if ((cmd & 0xf0) == 0x10)
157 get_gpu_info(data);
6e9bdaef 158 break;
1ab64c54 159 }
1ab64c54
GI
160}
161
56f08d83 162const unsigned char cmd_lengths[256] =
1ab64c54 163{
d30279e2
GI
164 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
167 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
168 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
169 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
170 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
171 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
172 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
180};
181
d30279e2
GI
182#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
183
184static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 185{
d30279e2
GI
186 uint16_t *vram = VRAM_MEM_XY(x, y);
187 if (is_read)
188 memcpy(mem, vram, l * 2);
189 else
190 memcpy(vram, mem, l * 2);
191}
192
193static int do_vram_io(uint32_t *data, int count, int is_read)
194{
195 int count_initial = count;
196 uint16_t *sdata = (uint16_t *)data;
197 int x = gpu.dma.x, y = gpu.dma.y;
198 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 199 int o = gpu.dma.offset;
d30279e2
GI
200 int l;
201 count *= 2; // operate in 16bpp pixels
202
203 if (gpu.dma.offset) {
204 l = w - gpu.dma.offset;
ddd56f6e 205 if (count < l)
d30279e2 206 l = count;
ddd56f6e 207
208 do_vram_line(x + o, y, sdata, l, is_read);
209
210 if (o + l < w)
211 o += l;
212 else {
213 o = 0;
214 y++;
215 h--;
216 }
d30279e2
GI
217 sdata += l;
218 count -= l;
d30279e2
GI
219 }
220
221 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
222 y &= 511;
223 do_vram_line(x, y, sdata, w, is_read);
224 }
225
226 if (h > 0 && count > 0) {
227 y &= 511;
228 do_vram_line(x, y, sdata, count, is_read);
ddd56f6e 229 o = count;
d30279e2
GI
230 count = 0;
231 }
d30279e2
GI
232 gpu.dma.y = y;
233 gpu.dma.h = h;
ddd56f6e 234 gpu.dma.offset = o;
d30279e2 235
6e9bdaef 236 return count_initial - count / 2;
d30279e2
GI
237}
238
239static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
240{
ddd56f6e 241 if (gpu.dma.h)
242 log_anomaly("start_vram_transfer while old unfinished\n");
243
d30279e2
GI
244 gpu.dma.x = pos_word & 1023;
245 gpu.dma.y = (pos_word >> 16) & 511;
246 gpu.dma.w = size_word & 0xffff; // ?
247 gpu.dma.h = size_word >> 16;
248 gpu.dma.offset = 0;
249
250 if (is_read)
251 gpu.status.img = 1;
9394ada5 252 else
253 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2 254
6e9bdaef 255 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
256 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
257}
258
259static int check_cmd(uint32_t *data, int count)
260{
261 int len, cmd, start, pos;
fc84f618 262 int vram_dirty = 0;
d30279e2 263
d30279e2 264 // process buffer
ddd56f6e 265 for (start = pos = 0; pos < count; )
d30279e2
GI
266 {
267 cmd = -1;
268 len = 0;
269
270 if (gpu.dma.h) {
271 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 272 if (pos == count)
273 break;
d30279e2
GI
274 start = pos;
275 }
276
ddd56f6e 277 // do look-ahead pass to detect SR changes and VRAM i/o
d30279e2 278 while (pos < count) {
56f08d83 279 uint32_t *list = data + pos;
280 cmd = list[0] >> 24;
d30279e2 281 len = 1 + cmd_lengths[cmd];
56f08d83 282
d30279e2 283 //printf(" %3d: %02x %d\n", pos, cmd, len);
56f08d83 284 if ((cmd & 0xf4) == 0x24) {
285 // flat textured prim
a3a9f519 286 gpu.ex_regs[1] &= ~0x1ff;
287 gpu.ex_regs[1] |= list[4] & 0x1ff;
56f08d83 288 }
289 else if ((cmd & 0xf4) == 0x34) {
290 // shaded textured prim
a3a9f519 291 gpu.ex_regs[1] &= ~0x1ff;
292 gpu.ex_regs[1] |= list[5] & 0x1ff;
56f08d83 293 }
fc84f618 294 if (2 <= cmd && cmd < 0xc0)
295 vram_dirty = 1;
6e9bdaef 296 else if ((cmd & 0xf8) == 0xe0)
297 gpu.ex_regs[cmd & 7] = list[0];
56f08d83 298
d30279e2
GI
299 if (pos + len > count) {
300 cmd = -1;
301 break; // incomplete cmd
302 }
303 if (cmd == 0xa0 || cmd == 0xc0)
304 break; // image i/o
305 pos += len;
306 }
307
308 if (pos - start > 0) {
fc84f618 309 if (!gpu.frameskip.active)
310 do_cmd_list(data + start, pos - start);
d30279e2
GI
311 start = pos;
312 }
313
314 if (cmd == 0xa0 || cmd == 0xc0) {
315 // consume vram write/read cmd
316 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
317 pos += len;
318 }
5b745e5b 319 else if (cmd == -1)
ddd56f6e 320 break;
d30279e2 321 }
ddd56f6e 322
a3a9f519 323 gpu.status.reg &= ~0x1fff;
324 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
325 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
326
5b745e5b 327 if (gpu.frameskip.active)
328 renderer_sync_ecmds(gpu.ex_regs);
fc84f618 329 gpu.state.fb_dirty |= vram_dirty;
330
ddd56f6e 331 return count - pos;
d30279e2
GI
332}
333
334static void flush_cmd_buffer(void)
335{
336 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
337 if (left > 0)
338 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
339 gpu.cmd_len = left;
1ab64c54
GI
340}
341
342void GPUwriteDataMem(uint32_t *mem, int count)
343{
d30279e2
GI
344 int left;
345
56f08d83 346 log_io("gpu_dma_write %p %d\n", mem, count);
347
d30279e2
GI
348 if (unlikely(gpu.cmd_len > 0))
349 flush_cmd_buffer();
56f08d83 350
d30279e2
GI
351 left = check_cmd(mem, count);
352 if (left)
56f08d83 353 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
354}
355
d30279e2 356void GPUwriteData(uint32_t data)
1ab64c54 357{
56f08d83 358 log_io("gpu_write %08x\n", data);
d30279e2
GI
359 gpu.cmd_buffer[gpu.cmd_len++] = data;
360 if (gpu.cmd_len >= CMD_BUFFER_LEN)
361 flush_cmd_buffer();
1ab64c54
GI
362}
363
ddd56f6e 364long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 365{
ddd56f6e 366 uint32_t addr, *list;
deb18d24 367 uint32_t *llist_entry = NULL;
ddd56f6e 368 int len, left, count;
deb18d24 369 long dma_words = 0;
d30279e2
GI
370
371 if (unlikely(gpu.cmd_len > 0))
372 flush_cmd_buffer();
373
deb18d24 374 // ff7 sends it's main list twice, detect this
3ece2f0c 375 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
376 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
377 gpu.state.last_list.words > 1024)
deb18d24 378 {
379 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
380 *llist_entry |= 0x800000;
381 }
382
56f08d83 383 log_io("gpu_dma_chain\n");
ddd56f6e 384 addr = start_addr & 0xffffff;
385 for (count = 0; addr != 0xffffff; count++)
386 {
ddd56f6e 387 list = rambase + (addr & 0x1fffff) / 4;
d30279e2
GI
388 len = list[0] >> 24;
389 addr = list[0] & 0xffffff;
deb18d24 390 dma_words += 1 + len;
391
392 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
ddd56f6e 393
394 // loop detection marker
395 // (bit23 set causes DMA error on real machine, so
396 // unlikely to be ever set by the game)
397 list[0] |= 0x800000;
398
56f08d83 399 if (len) {
400 left = check_cmd(list + 1, len);
401 if (left)
deb18d24 402 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
56f08d83 403 }
ddd56f6e 404
405 if (addr & 0x800000)
406 break;
407 }
408
409 // remove loop detection markers
410 addr = start_addr & 0x1fffff;
411 while (count-- > 0) {
412 list = rambase + addr / 4;
413 addr = list[0] & 0x1fffff;
414 list[0] &= ~0x800000;
d30279e2 415 }
deb18d24 416 if (llist_entry)
417 *llist_entry &= ~0x800000;
d30279e2 418
3ece2f0c 419 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 420 gpu.state.last_list.hcnt = *gpu.state.hcnt;
421 gpu.state.last_list.words = dma_words;
422 gpu.state.last_list.addr = start_addr;
423
424 return dma_words;
1ab64c54
GI
425}
426
d30279e2
GI
427void GPUreadDataMem(uint32_t *mem, int count)
428{
56f08d83 429 log_io("gpu_dma_read %p %d\n", mem, count);
430
d30279e2
GI
431 if (unlikely(gpu.cmd_len > 0))
432 flush_cmd_buffer();
56f08d83 433
d30279e2
GI
434 if (gpu.dma.h)
435 do_vram_io(mem, count, 1);
436}
437
438uint32_t GPUreadData(void)
439{
56f08d83 440 log_io("gpu_read\n");
441
442 if (unlikely(gpu.cmd_len > 0))
443 flush_cmd_buffer();
444
445 if (gpu.dma.h)
6e9bdaef 446 do_vram_io(&gpu.gp0, 1, 1);
56f08d83 447
6e9bdaef 448 return gpu.gp0;
d30279e2
GI
449}
450
451uint32_t GPUreadStatus(void)
452{
ddd56f6e 453 uint32_t ret;
56f08d83 454
d30279e2
GI
455 if (unlikely(gpu.cmd_len > 0))
456 flush_cmd_buffer();
457
24de2dd4 458 ret = gpu.status.reg;
ddd56f6e 459 log_io("gpu_read_status %08x\n", ret);
460 return ret;
d30279e2
GI
461}
462
1ab64c54
GI
463typedef struct GPUFREEZETAG
464{
465 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
466 uint32_t ulStatus; // current gpu status
467 uint32_t ulControl[256]; // latest control register values
468 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
469} GPUFreeze_t;
470
471long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
472{
fc84f618 473 int i;
474
1ab64c54
GI
475 switch (type) {
476 case 1: // save
d30279e2
GI
477 if (gpu.cmd_len > 0)
478 flush_cmd_buffer();
1ab64c54
GI
479 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
480 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 481 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
1ab64c54 482 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
483 break;
484 case 0: // load
9394ada5 485 renderer_invalidate_caches(0, 0, 1024, 512);
1ab64c54
GI
486 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
487 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 488 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
1ab64c54 489 gpu.status.reg = freeze->ulStatus;
fc84f618 490 for (i = 8; i > 0; i--) {
491 gpu.regs[i] ^= 1; // avoid reg change detection
492 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
493 }
5b745e5b 494 renderer_sync_ecmds(gpu.ex_regs);
1ab64c54
GI
495 break;
496 }
497
498 return 1;
499}
500
1ab64c54 501// vim:shiftwidth=2:expandtab