fix some minor frontend issues
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
56f08d83 13#include "gpu.h"
1ab64c54
GI
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
8dd855cd 17#define noinline __attribute__((noinline))
1ab64c54 18
deb18d24 19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
56f08d83 23#define log_io(...)
9394ada5 24//#define log_anomaly gpu_log
25#define log_anomaly(...)
56f08d83 26
27struct psx_gpu gpu __attribute__((aligned(64)));
1ab64c54 28
6e9bdaef 29static noinline void do_reset(void)
1ab64c54 30{
6e9bdaef 31 memset(gpu.regs, 0, sizeof(gpu.regs));
d30279e2 32 gpu.status.reg = 0x14802000;
6e9bdaef 33 gpu.gp0 = 0;
fc84f618 34 gpu.regs[3] = 1;
6e9bdaef 35 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 36 gpu.screen.vres = gpu.screen.h = 240;
1ab64c54
GI
37}
38
8dd855cd 39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
fc84f618 60static noinline void decide_frameskip(void)
61{
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
63
64 if (!gpu.frameskip.active && *gpu.frameskip.advice)
65 gpu.frameskip.active = 1;
66 else
67 gpu.frameskip.active = 0;
68}
69
6e9bdaef 70static noinline void get_gpu_info(uint32_t data)
71{
72 switch (data & 0x0f) {
73 case 0x02:
74 case 0x03:
75 case 0x04:
76 case 0x05:
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
78 break;
79 case 0x06:
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
81 break;
82 case 0x07:
83 gpu.gp0 = 2;
84 break;
85 default:
86 gpu.gp0 = 0;
87 break;
88 }
89}
90
91long GPUinit(void)
92{
9394ada5 93 int ret;
94 ret = vout_init();
95 ret |= renderer_init();
96
6e9bdaef 97 gpu.lcf_hc = &gpu.zero;
deb18d24 98 gpu.state.frame_count = 0;
99 gpu.state.hcnt = &gpu.zero;
9394ada5 100 do_reset();
6e9bdaef 101 return ret;
102}
103
104long GPUshutdown(void)
105{
106 return vout_finish();
107}
108
1ab64c54
GI
109void GPUwriteStatus(uint32_t data)
110{
111 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
112 static const short vres[4] = { 240, 480, 256, 480 };
113 uint32_t cmd = data >> 24;
114
fc84f618 115 if (cmd < ARRAY_SIZE(gpu.regs)) {
19e7cf87 116 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 117 return;
8dd855cd 118 gpu.regs[cmd] = data;
fc84f618 119 }
120
121 gpu.state.fb_dirty = 1;
8dd855cd 122
123 switch (cmd) {
1ab64c54 124 case 0x00:
6e9bdaef 125 do_reset();
1ab64c54
GI
126 break;
127 case 0x03:
d30279e2 128 gpu.status.blanking = data & 1;
1ab64c54
GI
129 break;
130 case 0x04:
131 gpu.status.dma = data & 3;
132 break;
133 case 0x05:
134 gpu.screen.x = data & 0x3ff;
135 gpu.screen.y = (data >> 10) & 0x3ff;
fc84f618 136 if (gpu.frameskip.enabled)
137 decide_frameskip();
1ab64c54 138 break;
8dd855cd 139 case 0x06:
140 gpu.screen.x1 = data & 0xfff;
141 gpu.screen.x2 = (data >> 12) & 0xfff;
142 update_width();
143 break;
1ab64c54
GI
144 case 0x07:
145 gpu.screen.y1 = data & 0x3ff;
146 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 147 update_height();
1ab64c54
GI
148 break;
149 case 0x08:
150 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 151 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
152 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
153 update_width();
154 update_height();
1ab64c54 155 break;
deb18d24 156 default:
157 if ((cmd & 0xf0) == 0x10)
158 get_gpu_info(data);
6e9bdaef 159 break;
1ab64c54 160 }
1ab64c54
GI
161}
162
56f08d83 163const unsigned char cmd_lengths[256] =
1ab64c54 164{
d30279e2
GI
165 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
168 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
169 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
170 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
171 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
172 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
173 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
181};
182
d30279e2
GI
183#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
184
185static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 186{
d30279e2
GI
187 uint16_t *vram = VRAM_MEM_XY(x, y);
188 if (is_read)
189 memcpy(mem, vram, l * 2);
190 else
191 memcpy(vram, mem, l * 2);
192}
193
194static int do_vram_io(uint32_t *data, int count, int is_read)
195{
196 int count_initial = count;
197 uint16_t *sdata = (uint16_t *)data;
198 int x = gpu.dma.x, y = gpu.dma.y;
199 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 200 int o = gpu.dma.offset;
d30279e2
GI
201 int l;
202 count *= 2; // operate in 16bpp pixels
203
204 if (gpu.dma.offset) {
205 l = w - gpu.dma.offset;
ddd56f6e 206 if (count < l)
d30279e2 207 l = count;
ddd56f6e 208
209 do_vram_line(x + o, y, sdata, l, is_read);
210
211 if (o + l < w)
212 o += l;
213 else {
214 o = 0;
215 y++;
216 h--;
217 }
d30279e2
GI
218 sdata += l;
219 count -= l;
d30279e2
GI
220 }
221
222 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
223 y &= 511;
224 do_vram_line(x, y, sdata, w, is_read);
225 }
226
227 if (h > 0 && count > 0) {
228 y &= 511;
229 do_vram_line(x, y, sdata, count, is_read);
ddd56f6e 230 o = count;
d30279e2
GI
231 count = 0;
232 }
d30279e2
GI
233 gpu.dma.y = y;
234 gpu.dma.h = h;
ddd56f6e 235 gpu.dma.offset = o;
d30279e2 236
6e9bdaef 237 return count_initial - count / 2;
d30279e2
GI
238}
239
240static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
241{
ddd56f6e 242 if (gpu.dma.h)
243 log_anomaly("start_vram_transfer while old unfinished\n");
244
d30279e2
GI
245 gpu.dma.x = pos_word & 1023;
246 gpu.dma.y = (pos_word >> 16) & 511;
247 gpu.dma.w = size_word & 0xffff; // ?
248 gpu.dma.h = size_word >> 16;
249 gpu.dma.offset = 0;
250
251 if (is_read)
252 gpu.status.img = 1;
9394ada5 253 else
254 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2 255
6e9bdaef 256 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
257 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
258}
259
260static int check_cmd(uint32_t *data, int count)
261{
262 int len, cmd, start, pos;
fc84f618 263 int vram_dirty = 0;
d30279e2 264
d30279e2 265 // process buffer
ddd56f6e 266 for (start = pos = 0; pos < count; )
d30279e2
GI
267 {
268 cmd = -1;
269 len = 0;
270
271 if (gpu.dma.h) {
272 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 273 if (pos == count)
274 break;
d30279e2
GI
275 start = pos;
276 }
277
ddd56f6e 278 // do look-ahead pass to detect SR changes and VRAM i/o
d30279e2 279 while (pos < count) {
56f08d83 280 uint32_t *list = data + pos;
281 cmd = list[0] >> 24;
d30279e2 282 len = 1 + cmd_lengths[cmd];
56f08d83 283
d30279e2 284 //printf(" %3d: %02x %d\n", pos, cmd, len);
56f08d83 285 if ((cmd & 0xf4) == 0x24) {
286 // flat textured prim
287 gpu.status.reg &= ~0x1ff;
288 gpu.status.reg |= list[4] & 0x1ff;
289 }
290 else if ((cmd & 0xf4) == 0x34) {
291 // shaded textured prim
292 gpu.status.reg &= ~0x1ff;
293 gpu.status.reg |= list[5] & 0x1ff;
294 }
295 else switch (cmd)
296 {
297 case 0xe1:
298 gpu.status.reg &= ~0x7ff;
299 gpu.status.reg |= list[0] & 0x7ff;
300 break;
301 case 0xe6:
302 gpu.status.reg &= ~0x1800;
303 gpu.status.reg |= (list[0] & 3) << 11;
304 break;
305 }
fc84f618 306 if (2 <= cmd && cmd < 0xc0)
307 vram_dirty = 1;
6e9bdaef 308 else if ((cmd & 0xf8) == 0xe0)
309 gpu.ex_regs[cmd & 7] = list[0];
56f08d83 310
d30279e2
GI
311 if (pos + len > count) {
312 cmd = -1;
313 break; // incomplete cmd
314 }
315 if (cmd == 0xa0 || cmd == 0xc0)
316 break; // image i/o
317 pos += len;
318 }
319
320 if (pos - start > 0) {
fc84f618 321 if (!gpu.frameskip.active)
322 do_cmd_list(data + start, pos - start);
d30279e2
GI
323 start = pos;
324 }
325
326 if (cmd == 0xa0 || cmd == 0xc0) {
327 // consume vram write/read cmd
328 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
329 pos += len;
330 }
5b745e5b 331 else if (cmd == -1)
ddd56f6e 332 break;
d30279e2 333 }
ddd56f6e 334
5b745e5b 335 if (gpu.frameskip.active)
336 renderer_sync_ecmds(gpu.ex_regs);
fc84f618 337 gpu.state.fb_dirty |= vram_dirty;
338
ddd56f6e 339 return count - pos;
d30279e2
GI
340}
341
342static void flush_cmd_buffer(void)
343{
344 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
345 if (left > 0)
346 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
347 gpu.cmd_len = left;
1ab64c54
GI
348}
349
350void GPUwriteDataMem(uint32_t *mem, int count)
351{
d30279e2
GI
352 int left;
353
56f08d83 354 log_io("gpu_dma_write %p %d\n", mem, count);
355
d30279e2
GI
356 if (unlikely(gpu.cmd_len > 0))
357 flush_cmd_buffer();
56f08d83 358
d30279e2
GI
359 left = check_cmd(mem, count);
360 if (left)
56f08d83 361 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
362}
363
d30279e2 364void GPUwriteData(uint32_t data)
1ab64c54 365{
56f08d83 366 log_io("gpu_write %08x\n", data);
d30279e2
GI
367 gpu.cmd_buffer[gpu.cmd_len++] = data;
368 if (gpu.cmd_len >= CMD_BUFFER_LEN)
369 flush_cmd_buffer();
1ab64c54
GI
370}
371
ddd56f6e 372long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 373{
ddd56f6e 374 uint32_t addr, *list;
deb18d24 375 uint32_t *llist_entry = NULL;
ddd56f6e 376 int len, left, count;
deb18d24 377 long dma_words = 0;
d30279e2
GI
378
379 if (unlikely(gpu.cmd_len > 0))
380 flush_cmd_buffer();
381
deb18d24 382 // ff7 sends it's main list twice, detect this
383 if (gpu.state.frame_count == gpu.state.last_list.frame &&
384 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
385 gpu.state.last_list.words > 1024)
386 {
387 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
388 *llist_entry |= 0x800000;
389 }
390
56f08d83 391 log_io("gpu_dma_chain\n");
ddd56f6e 392 addr = start_addr & 0xffffff;
393 for (count = 0; addr != 0xffffff; count++)
394 {
ddd56f6e 395 list = rambase + (addr & 0x1fffff) / 4;
d30279e2
GI
396 len = list[0] >> 24;
397 addr = list[0] & 0xffffff;
deb18d24 398 dma_words += 1 + len;
399
400 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
ddd56f6e 401
402 // loop detection marker
403 // (bit23 set causes DMA error on real machine, so
404 // unlikely to be ever set by the game)
405 list[0] |= 0x800000;
406
56f08d83 407 if (len) {
408 left = check_cmd(list + 1, len);
409 if (left)
deb18d24 410 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
56f08d83 411 }
ddd56f6e 412
413 if (addr & 0x800000)
414 break;
415 }
416
417 // remove loop detection markers
418 addr = start_addr & 0x1fffff;
419 while (count-- > 0) {
420 list = rambase + addr / 4;
421 addr = list[0] & 0x1fffff;
422 list[0] &= ~0x800000;
d30279e2 423 }
deb18d24 424 if (llist_entry)
425 *llist_entry &= ~0x800000;
d30279e2 426
deb18d24 427 gpu.state.last_list.frame = gpu.state.frame_count;
428 gpu.state.last_list.hcnt = *gpu.state.hcnt;
429 gpu.state.last_list.words = dma_words;
430 gpu.state.last_list.addr = start_addr;
431
432 return dma_words;
1ab64c54
GI
433}
434
d30279e2
GI
435void GPUreadDataMem(uint32_t *mem, int count)
436{
56f08d83 437 log_io("gpu_dma_read %p %d\n", mem, count);
438
d30279e2
GI
439 if (unlikely(gpu.cmd_len > 0))
440 flush_cmd_buffer();
56f08d83 441
d30279e2
GI
442 if (gpu.dma.h)
443 do_vram_io(mem, count, 1);
444}
445
446uint32_t GPUreadData(void)
447{
56f08d83 448 log_io("gpu_read\n");
449
450 if (unlikely(gpu.cmd_len > 0))
451 flush_cmd_buffer();
452
453 if (gpu.dma.h)
6e9bdaef 454 do_vram_io(&gpu.gp0, 1, 1);
56f08d83 455
6e9bdaef 456 return gpu.gp0;
d30279e2
GI
457}
458
459uint32_t GPUreadStatus(void)
460{
ddd56f6e 461 uint32_t ret;
56f08d83 462
d30279e2
GI
463 if (unlikely(gpu.cmd_len > 0))
464 flush_cmd_buffer();
465
ddd56f6e 466 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
467 log_io("gpu_read_status %08x\n", ret);
468 return ret;
d30279e2
GI
469}
470
1ab64c54
GI
471typedef struct GPUFREEZETAG
472{
473 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
474 uint32_t ulStatus; // current gpu status
475 uint32_t ulControl[256]; // latest control register values
476 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
477} GPUFreeze_t;
478
479long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
480{
fc84f618 481 int i;
482
1ab64c54
GI
483 switch (type) {
484 case 1: // save
d30279e2
GI
485 if (gpu.cmd_len > 0)
486 flush_cmd_buffer();
1ab64c54
GI
487 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
488 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 489 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
1ab64c54 490 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
491 break;
492 case 0: // load
9394ada5 493 renderer_invalidate_caches(0, 0, 1024, 512);
1ab64c54
GI
494 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
495 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 496 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
1ab64c54 497 gpu.status.reg = freeze->ulStatus;
fc84f618 498 for (i = 8; i > 0; i--) {
499 gpu.regs[i] ^= 1; // avoid reg change detection
500 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
501 }
5b745e5b 502 renderer_sync_ecmds(gpu.ex_regs);
1ab64c54
GI
503 break;
504 }
505
506 return 1;
507}
508
d30279e2 509void GPUvBlank(int val, uint32_t *hcnt)
1ab64c54 510{
d30279e2
GI
511 gpu.lcf_hc = &gpu.zero;
512 if (gpu.status.interlace) {
513 if (val)
514 gpu.status.lcf ^= 1;
515 }
516 else {
517 gpu.status.lcf = 0;
518 if (!val)
519 gpu.lcf_hc = hcnt;
520 }
deb18d24 521 if (!val)
522 gpu.state.frame_count++;
523
524 gpu.state.hcnt = hcnt;
1ab64c54
GI
525}
526
1ab64c54 527// vim:shiftwidth=2:expandtab