remove gpu vblank callbacks
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
56f08d83 13#include "gpu.h"
1ab64c54
GI
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
8dd855cd 17#define noinline __attribute__((noinline))
1ab64c54 18
deb18d24 19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
56f08d83 23#define log_io(...)
9394ada5 24//#define log_anomaly gpu_log
25#define log_anomaly(...)
56f08d83 26
27struct psx_gpu gpu __attribute__((aligned(64)));
1ab64c54 28
6e9bdaef 29static noinline void do_reset(void)
1ab64c54 30{
6e9bdaef 31 memset(gpu.regs, 0, sizeof(gpu.regs));
d30279e2 32 gpu.status.reg = 0x14802000;
6e9bdaef 33 gpu.gp0 = 0;
fc84f618 34 gpu.regs[3] = 1;
6e9bdaef 35 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 36 gpu.screen.vres = gpu.screen.h = 240;
1ab64c54
GI
37}
38
8dd855cd 39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
fc84f618 60static noinline void decide_frameskip(void)
61{
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
63
ea4a16e7 64 if (!gpu.frameskip.active && (*gpu.frameskip.advice || gpu.frameskip.set == 1))
fc84f618 65 gpu.frameskip.active = 1;
66 else
67 gpu.frameskip.active = 0;
68}
69
6e9bdaef 70static noinline void get_gpu_info(uint32_t data)
71{
72 switch (data & 0x0f) {
73 case 0x02:
74 case 0x03:
75 case 0x04:
76 case 0x05:
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
78 break;
79 case 0x06:
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
81 break;
82 case 0x07:
83 gpu.gp0 = 2;
84 break;
85 default:
86 gpu.gp0 = 0;
87 break;
88 }
89}
90
91long GPUinit(void)
92{
9394ada5 93 int ret;
94 ret = vout_init();
95 ret |= renderer_init();
96
deb18d24 97 gpu.state.frame_count = 0;
98 gpu.state.hcnt = &gpu.zero;
9394ada5 99 do_reset();
6e9bdaef 100 return ret;
101}
102
103long GPUshutdown(void)
104{
105 return vout_finish();
106}
107
1ab64c54
GI
108void GPUwriteStatus(uint32_t data)
109{
110 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
111 static const short vres[4] = { 240, 480, 256, 480 };
112 uint32_t cmd = data >> 24;
113
fc84f618 114 if (cmd < ARRAY_SIZE(gpu.regs)) {
19e7cf87 115 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 116 return;
8dd855cd 117 gpu.regs[cmd] = data;
fc84f618 118 }
119
120 gpu.state.fb_dirty = 1;
8dd855cd 121
122 switch (cmd) {
1ab64c54 123 case 0x00:
6e9bdaef 124 do_reset();
1ab64c54
GI
125 break;
126 case 0x03:
d30279e2 127 gpu.status.blanking = data & 1;
1ab64c54
GI
128 break;
129 case 0x04:
130 gpu.status.dma = data & 3;
131 break;
132 case 0x05:
133 gpu.screen.x = data & 0x3ff;
134 gpu.screen.y = (data >> 10) & 0x3ff;
ea4a16e7 135 if (gpu.frameskip.set)
fc84f618 136 decide_frameskip();
1ab64c54 137 break;
8dd855cd 138 case 0x06:
139 gpu.screen.x1 = data & 0xfff;
140 gpu.screen.x2 = (data >> 12) & 0xfff;
141 update_width();
142 break;
1ab64c54
GI
143 case 0x07:
144 gpu.screen.y1 = data & 0x3ff;
145 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 146 update_height();
1ab64c54
GI
147 break;
148 case 0x08:
149 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 150 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
151 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
152 update_width();
153 update_height();
1ab64c54 154 break;
deb18d24 155 default:
156 if ((cmd & 0xf0) == 0x10)
157 get_gpu_info(data);
6e9bdaef 158 break;
1ab64c54 159 }
1ab64c54
GI
160}
161
56f08d83 162const unsigned char cmd_lengths[256] =
1ab64c54 163{
d30279e2
GI
164 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
167 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
168 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
169 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
170 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
171 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
172 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
180};
181
d30279e2
GI
182#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
183
184static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 185{
d30279e2
GI
186 uint16_t *vram = VRAM_MEM_XY(x, y);
187 if (is_read)
188 memcpy(mem, vram, l * 2);
189 else
190 memcpy(vram, mem, l * 2);
191}
192
193static int do_vram_io(uint32_t *data, int count, int is_read)
194{
195 int count_initial = count;
196 uint16_t *sdata = (uint16_t *)data;
197 int x = gpu.dma.x, y = gpu.dma.y;
198 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 199 int o = gpu.dma.offset;
d30279e2
GI
200 int l;
201 count *= 2; // operate in 16bpp pixels
202
203 if (gpu.dma.offset) {
204 l = w - gpu.dma.offset;
ddd56f6e 205 if (count < l)
d30279e2 206 l = count;
ddd56f6e 207
208 do_vram_line(x + o, y, sdata, l, is_read);
209
210 if (o + l < w)
211 o += l;
212 else {
213 o = 0;
214 y++;
215 h--;
216 }
d30279e2
GI
217 sdata += l;
218 count -= l;
d30279e2
GI
219 }
220
221 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
222 y &= 511;
223 do_vram_line(x, y, sdata, w, is_read);
224 }
225
226 if (h > 0 && count > 0) {
227 y &= 511;
228 do_vram_line(x, y, sdata, count, is_read);
ddd56f6e 229 o = count;
d30279e2
GI
230 count = 0;
231 }
d30279e2
GI
232 gpu.dma.y = y;
233 gpu.dma.h = h;
ddd56f6e 234 gpu.dma.offset = o;
d30279e2 235
6e9bdaef 236 return count_initial - count / 2;
d30279e2
GI
237}
238
239static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
240{
ddd56f6e 241 if (gpu.dma.h)
242 log_anomaly("start_vram_transfer while old unfinished\n");
243
d30279e2
GI
244 gpu.dma.x = pos_word & 1023;
245 gpu.dma.y = (pos_word >> 16) & 511;
246 gpu.dma.w = size_word & 0xffff; // ?
247 gpu.dma.h = size_word >> 16;
248 gpu.dma.offset = 0;
249
250 if (is_read)
251 gpu.status.img = 1;
9394ada5 252 else
253 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2 254
6e9bdaef 255 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
256 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
257}
258
259static int check_cmd(uint32_t *data, int count)
260{
261 int len, cmd, start, pos;
fc84f618 262 int vram_dirty = 0;
d30279e2 263
d30279e2 264 // process buffer
ddd56f6e 265 for (start = pos = 0; pos < count; )
d30279e2
GI
266 {
267 cmd = -1;
268 len = 0;
269
270 if (gpu.dma.h) {
271 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 272 if (pos == count)
273 break;
d30279e2
GI
274 start = pos;
275 }
276
ddd56f6e 277 // do look-ahead pass to detect SR changes and VRAM i/o
d30279e2 278 while (pos < count) {
56f08d83 279 uint32_t *list = data + pos;
280 cmd = list[0] >> 24;
d30279e2 281 len = 1 + cmd_lengths[cmd];
56f08d83 282
d30279e2 283 //printf(" %3d: %02x %d\n", pos, cmd, len);
56f08d83 284 if ((cmd & 0xf4) == 0x24) {
285 // flat textured prim
286 gpu.status.reg &= ~0x1ff;
287 gpu.status.reg |= list[4] & 0x1ff;
288 }
289 else if ((cmd & 0xf4) == 0x34) {
290 // shaded textured prim
291 gpu.status.reg &= ~0x1ff;
292 gpu.status.reg |= list[5] & 0x1ff;
293 }
294 else switch (cmd)
295 {
296 case 0xe1:
297 gpu.status.reg &= ~0x7ff;
298 gpu.status.reg |= list[0] & 0x7ff;
299 break;
300 case 0xe6:
301 gpu.status.reg &= ~0x1800;
302 gpu.status.reg |= (list[0] & 3) << 11;
303 break;
304 }
fc84f618 305 if (2 <= cmd && cmd < 0xc0)
306 vram_dirty = 1;
6e9bdaef 307 else if ((cmd & 0xf8) == 0xe0)
308 gpu.ex_regs[cmd & 7] = list[0];
56f08d83 309
d30279e2
GI
310 if (pos + len > count) {
311 cmd = -1;
312 break; // incomplete cmd
313 }
314 if (cmd == 0xa0 || cmd == 0xc0)
315 break; // image i/o
316 pos += len;
317 }
318
319 if (pos - start > 0) {
fc84f618 320 if (!gpu.frameskip.active)
321 do_cmd_list(data + start, pos - start);
d30279e2
GI
322 start = pos;
323 }
324
325 if (cmd == 0xa0 || cmd == 0xc0) {
326 // consume vram write/read cmd
327 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
328 pos += len;
329 }
5b745e5b 330 else if (cmd == -1)
ddd56f6e 331 break;
d30279e2 332 }
ddd56f6e 333
5b745e5b 334 if (gpu.frameskip.active)
335 renderer_sync_ecmds(gpu.ex_regs);
fc84f618 336 gpu.state.fb_dirty |= vram_dirty;
337
ddd56f6e 338 return count - pos;
d30279e2
GI
339}
340
341static void flush_cmd_buffer(void)
342{
343 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
344 if (left > 0)
345 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
346 gpu.cmd_len = left;
1ab64c54
GI
347}
348
349void GPUwriteDataMem(uint32_t *mem, int count)
350{
d30279e2
GI
351 int left;
352
56f08d83 353 log_io("gpu_dma_write %p %d\n", mem, count);
354
d30279e2
GI
355 if (unlikely(gpu.cmd_len > 0))
356 flush_cmd_buffer();
56f08d83 357
d30279e2
GI
358 left = check_cmd(mem, count);
359 if (left)
56f08d83 360 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
361}
362
d30279e2 363void GPUwriteData(uint32_t data)
1ab64c54 364{
56f08d83 365 log_io("gpu_write %08x\n", data);
d30279e2
GI
366 gpu.cmd_buffer[gpu.cmd_len++] = data;
367 if (gpu.cmd_len >= CMD_BUFFER_LEN)
368 flush_cmd_buffer();
1ab64c54
GI
369}
370
ddd56f6e 371long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 372{
ddd56f6e 373 uint32_t addr, *list;
deb18d24 374 uint32_t *llist_entry = NULL;
ddd56f6e 375 int len, left, count;
deb18d24 376 long dma_words = 0;
d30279e2
GI
377
378 if (unlikely(gpu.cmd_len > 0))
379 flush_cmd_buffer();
380
deb18d24 381 // ff7 sends it's main list twice, detect this
382 if (gpu.state.frame_count == gpu.state.last_list.frame &&
383 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
384 gpu.state.last_list.words > 1024)
385 {
386 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
387 *llist_entry |= 0x800000;
388 }
389
56f08d83 390 log_io("gpu_dma_chain\n");
ddd56f6e 391 addr = start_addr & 0xffffff;
392 for (count = 0; addr != 0xffffff; count++)
393 {
ddd56f6e 394 list = rambase + (addr & 0x1fffff) / 4;
d30279e2
GI
395 len = list[0] >> 24;
396 addr = list[0] & 0xffffff;
deb18d24 397 dma_words += 1 + len;
398
399 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
ddd56f6e 400
401 // loop detection marker
402 // (bit23 set causes DMA error on real machine, so
403 // unlikely to be ever set by the game)
404 list[0] |= 0x800000;
405
56f08d83 406 if (len) {
407 left = check_cmd(list + 1, len);
408 if (left)
deb18d24 409 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
56f08d83 410 }
ddd56f6e 411
412 if (addr & 0x800000)
413 break;
414 }
415
416 // remove loop detection markers
417 addr = start_addr & 0x1fffff;
418 while (count-- > 0) {
419 list = rambase + addr / 4;
420 addr = list[0] & 0x1fffff;
421 list[0] &= ~0x800000;
d30279e2 422 }
deb18d24 423 if (llist_entry)
424 *llist_entry &= ~0x800000;
d30279e2 425
deb18d24 426 gpu.state.last_list.frame = gpu.state.frame_count;
427 gpu.state.last_list.hcnt = *gpu.state.hcnt;
428 gpu.state.last_list.words = dma_words;
429 gpu.state.last_list.addr = start_addr;
430
431 return dma_words;
1ab64c54
GI
432}
433
d30279e2
GI
434void GPUreadDataMem(uint32_t *mem, int count)
435{
56f08d83 436 log_io("gpu_dma_read %p %d\n", mem, count);
437
d30279e2
GI
438 if (unlikely(gpu.cmd_len > 0))
439 flush_cmd_buffer();
56f08d83 440
d30279e2
GI
441 if (gpu.dma.h)
442 do_vram_io(mem, count, 1);
443}
444
445uint32_t GPUreadData(void)
446{
56f08d83 447 log_io("gpu_read\n");
448
449 if (unlikely(gpu.cmd_len > 0))
450 flush_cmd_buffer();
451
452 if (gpu.dma.h)
6e9bdaef 453 do_vram_io(&gpu.gp0, 1, 1);
56f08d83 454
6e9bdaef 455 return gpu.gp0;
d30279e2
GI
456}
457
458uint32_t GPUreadStatus(void)
459{
ddd56f6e 460 uint32_t ret;
56f08d83 461
d30279e2
GI
462 if (unlikely(gpu.cmd_len > 0))
463 flush_cmd_buffer();
464
24de2dd4 465 ret = gpu.status.reg;
ddd56f6e 466 log_io("gpu_read_status %08x\n", ret);
467 return ret;
d30279e2
GI
468}
469
1ab64c54
GI
470typedef struct GPUFREEZETAG
471{
472 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
473 uint32_t ulStatus; // current gpu status
474 uint32_t ulControl[256]; // latest control register values
475 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
476} GPUFreeze_t;
477
478long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
479{
fc84f618 480 int i;
481
1ab64c54
GI
482 switch (type) {
483 case 1: // save
d30279e2
GI
484 if (gpu.cmd_len > 0)
485 flush_cmd_buffer();
1ab64c54
GI
486 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
487 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 488 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
1ab64c54 489 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
490 break;
491 case 0: // load
9394ada5 492 renderer_invalidate_caches(0, 0, 1024, 512);
1ab64c54
GI
493 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
494 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 495 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
1ab64c54 496 gpu.status.reg = freeze->ulStatus;
fc84f618 497 for (i = 8; i > 0; i--) {
498 gpu.regs[i] ^= 1; // avoid reg change detection
499 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
500 }
5b745e5b 501 renderer_sync_ecmds(gpu.ex_regs);
1ab64c54
GI
502 break;
503 }
504
505 return 1;
506}
507
1ab64c54 508// vim:shiftwidth=2:expandtab