gpu_neon: support caching renderers, update rearmed if
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
56f08d83 13#include "gpu.h"
1ab64c54
GI
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
8dd855cd 17#define noinline __attribute__((noinline))
1ab64c54 18
deb18d24 19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
56f08d83 23#define log_io(...)
9394ada5 24//#define log_anomaly gpu_log
25#define log_anomaly(...)
56f08d83 26
27struct psx_gpu gpu __attribute__((aligned(64)));
1ab64c54 28
6e9bdaef 29static noinline void do_reset(void)
1ab64c54 30{
6e9bdaef 31 memset(gpu.regs, 0, sizeof(gpu.regs));
d30279e2 32 gpu.status.reg = 0x14802000;
6e9bdaef 33 gpu.gp0 = 0;
fc84f618 34 gpu.regs[3] = 1;
6e9bdaef 35 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 36 gpu.screen.vres = gpu.screen.h = 240;
1ab64c54
GI
37}
38
8dd855cd 39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
fc84f618 60static noinline void decide_frameskip(void)
61{
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
63
64 if (!gpu.frameskip.active && *gpu.frameskip.advice)
65 gpu.frameskip.active = 1;
66 else
67 gpu.frameskip.active = 0;
68}
69
6e9bdaef 70static noinline void get_gpu_info(uint32_t data)
71{
72 switch (data & 0x0f) {
73 case 0x02:
74 case 0x03:
75 case 0x04:
76 case 0x05:
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
78 break;
79 case 0x06:
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
81 break;
82 case 0x07:
83 gpu.gp0 = 2;
84 break;
85 default:
86 gpu.gp0 = 0;
87 break;
88 }
89}
90
91long GPUinit(void)
92{
9394ada5 93 int ret;
94 ret = vout_init();
95 ret |= renderer_init();
96
6e9bdaef 97 gpu.lcf_hc = &gpu.zero;
deb18d24 98 gpu.state.frame_count = 0;
99 gpu.state.hcnt = &gpu.zero;
9394ada5 100 do_reset();
6e9bdaef 101 return ret;
102}
103
104long GPUshutdown(void)
105{
106 return vout_finish();
107}
108
1ab64c54
GI
109void GPUwriteStatus(uint32_t data)
110{
111 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
112 static const short vres[4] = { 240, 480, 256, 480 };
113 uint32_t cmd = data >> 24;
114
fc84f618 115 if (cmd < ARRAY_SIZE(gpu.regs)) {
116 if (cmd != 0 && gpu.regs[cmd] == data)
117 return;
8dd855cd 118 gpu.regs[cmd] = data;
fc84f618 119 }
120
121 gpu.state.fb_dirty = 1;
8dd855cd 122
123 switch (cmd) {
1ab64c54 124 case 0x00:
6e9bdaef 125 do_reset();
1ab64c54
GI
126 break;
127 case 0x03:
d30279e2 128 gpu.status.blanking = data & 1;
1ab64c54
GI
129 break;
130 case 0x04:
131 gpu.status.dma = data & 3;
132 break;
133 case 0x05:
134 gpu.screen.x = data & 0x3ff;
135 gpu.screen.y = (data >> 10) & 0x3ff;
fc84f618 136 if (gpu.frameskip.enabled)
137 decide_frameskip();
1ab64c54 138 break;
8dd855cd 139 case 0x06:
140 gpu.screen.x1 = data & 0xfff;
141 gpu.screen.x2 = (data >> 12) & 0xfff;
142 update_width();
143 break;
1ab64c54
GI
144 case 0x07:
145 gpu.screen.y1 = data & 0x3ff;
146 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 147 update_height();
1ab64c54
GI
148 break;
149 case 0x08:
150 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 151 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
152 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
153 update_width();
154 update_height();
1ab64c54 155 break;
deb18d24 156 default:
157 if ((cmd & 0xf0) == 0x10)
158 get_gpu_info(data);
6e9bdaef 159 break;
1ab64c54 160 }
1ab64c54
GI
161}
162
56f08d83 163const unsigned char cmd_lengths[256] =
1ab64c54 164{
d30279e2
GI
165 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
168 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
169 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
170 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
171 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
172 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
173 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
181};
182
d30279e2
GI
183#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
184
185static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 186{
d30279e2
GI
187 uint16_t *vram = VRAM_MEM_XY(x, y);
188 if (is_read)
189 memcpy(mem, vram, l * 2);
190 else
191 memcpy(vram, mem, l * 2);
192}
193
194static int do_vram_io(uint32_t *data, int count, int is_read)
195{
196 int count_initial = count;
197 uint16_t *sdata = (uint16_t *)data;
198 int x = gpu.dma.x, y = gpu.dma.y;
199 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 200 int o = gpu.dma.offset;
d30279e2
GI
201 int l;
202 count *= 2; // operate in 16bpp pixels
203
204 if (gpu.dma.offset) {
205 l = w - gpu.dma.offset;
ddd56f6e 206 if (count < l)
d30279e2 207 l = count;
ddd56f6e 208
209 do_vram_line(x + o, y, sdata, l, is_read);
210
211 if (o + l < w)
212 o += l;
213 else {
214 o = 0;
215 y++;
216 h--;
217 }
d30279e2
GI
218 sdata += l;
219 count -= l;
d30279e2
GI
220 }
221
222 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
223 y &= 511;
224 do_vram_line(x, y, sdata, w, is_read);
225 }
226
227 if (h > 0 && count > 0) {
228 y &= 511;
229 do_vram_line(x, y, sdata, count, is_read);
ddd56f6e 230 o = count;
d30279e2
GI
231 count = 0;
232 }
d30279e2
GI
233 gpu.dma.y = y;
234 gpu.dma.h = h;
ddd56f6e 235 gpu.dma.offset = o;
d30279e2 236
6e9bdaef 237 return count_initial - count / 2;
d30279e2
GI
238}
239
240static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
241{
ddd56f6e 242 if (gpu.dma.h)
243 log_anomaly("start_vram_transfer while old unfinished\n");
244
d30279e2
GI
245 gpu.dma.x = pos_word & 1023;
246 gpu.dma.y = (pos_word >> 16) & 511;
247 gpu.dma.w = size_word & 0xffff; // ?
248 gpu.dma.h = size_word >> 16;
249 gpu.dma.offset = 0;
250
251 if (is_read)
252 gpu.status.img = 1;
9394ada5 253 else
254 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2 255
6e9bdaef 256 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
257 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
258}
259
260static int check_cmd(uint32_t *data, int count)
261{
262 int len, cmd, start, pos;
fc84f618 263 int vram_dirty = 0;
d30279e2 264
d30279e2 265 // process buffer
ddd56f6e 266 for (start = pos = 0; pos < count; )
d30279e2
GI
267 {
268 cmd = -1;
269 len = 0;
270
271 if (gpu.dma.h) {
272 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 273 if (pos == count)
274 break;
d30279e2
GI
275 start = pos;
276 }
277
ddd56f6e 278 // do look-ahead pass to detect SR changes and VRAM i/o
d30279e2 279 while (pos < count) {
56f08d83 280 uint32_t *list = data + pos;
281 cmd = list[0] >> 24;
d30279e2 282 len = 1 + cmd_lengths[cmd];
56f08d83 283
d30279e2 284 //printf(" %3d: %02x %d\n", pos, cmd, len);
56f08d83 285 if ((cmd & 0xf4) == 0x24) {
286 // flat textured prim
287 gpu.status.reg &= ~0x1ff;
288 gpu.status.reg |= list[4] & 0x1ff;
289 }
290 else if ((cmd & 0xf4) == 0x34) {
291 // shaded textured prim
292 gpu.status.reg &= ~0x1ff;
293 gpu.status.reg |= list[5] & 0x1ff;
294 }
295 else switch (cmd)
296 {
297 case 0xe1:
298 gpu.status.reg &= ~0x7ff;
299 gpu.status.reg |= list[0] & 0x7ff;
300 break;
301 case 0xe6:
302 gpu.status.reg &= ~0x1800;
303 gpu.status.reg |= (list[0] & 3) << 11;
304 break;
305 }
fc84f618 306 if (2 <= cmd && cmd < 0xc0)
307 vram_dirty = 1;
6e9bdaef 308 else if ((cmd & 0xf8) == 0xe0)
309 gpu.ex_regs[cmd & 7] = list[0];
56f08d83 310
d30279e2
GI
311 if (pos + len > count) {
312 cmd = -1;
313 break; // incomplete cmd
314 }
315 if (cmd == 0xa0 || cmd == 0xc0)
316 break; // image i/o
317 pos += len;
318 }
319
320 if (pos - start > 0) {
fc84f618 321 if (!gpu.frameskip.active)
322 do_cmd_list(data + start, pos - start);
d30279e2
GI
323 start = pos;
324 }
325
326 if (cmd == 0xa0 || cmd == 0xc0) {
327 // consume vram write/read cmd
328 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
329 pos += len;
330 }
331
ddd56f6e 332 if (cmd == -1)
333 break;
d30279e2 334 }
ddd56f6e 335
fc84f618 336 gpu.state.fb_dirty |= vram_dirty;
337
ddd56f6e 338 return count - pos;
d30279e2
GI
339}
340
341static void flush_cmd_buffer(void)
342{
343 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
344 if (left > 0)
345 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
346 gpu.cmd_len = left;
1ab64c54
GI
347}
348
349void GPUwriteDataMem(uint32_t *mem, int count)
350{
d30279e2
GI
351 int left;
352
56f08d83 353 log_io("gpu_dma_write %p %d\n", mem, count);
354
d30279e2
GI
355 if (unlikely(gpu.cmd_len > 0))
356 flush_cmd_buffer();
56f08d83 357
d30279e2
GI
358 left = check_cmd(mem, count);
359 if (left)
56f08d83 360 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
361}
362
d30279e2 363void GPUwriteData(uint32_t data)
1ab64c54 364{
56f08d83 365 log_io("gpu_write %08x\n", data);
d30279e2
GI
366 gpu.cmd_buffer[gpu.cmd_len++] = data;
367 if (gpu.cmd_len >= CMD_BUFFER_LEN)
368 flush_cmd_buffer();
1ab64c54
GI
369}
370
ddd56f6e 371long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 372{
ddd56f6e 373 uint32_t addr, *list;
deb18d24 374 uint32_t *llist_entry = NULL;
ddd56f6e 375 int len, left, count;
deb18d24 376 long dma_words = 0;
d30279e2
GI
377
378 if (unlikely(gpu.cmd_len > 0))
379 flush_cmd_buffer();
380
deb18d24 381 // ff7 sends it's main list twice, detect this
382 if (gpu.state.frame_count == gpu.state.last_list.frame &&
383 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
384 gpu.state.last_list.words > 1024)
385 {
386 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
387 *llist_entry |= 0x800000;
388 }
389
56f08d83 390 log_io("gpu_dma_chain\n");
ddd56f6e 391 addr = start_addr & 0xffffff;
392 for (count = 0; addr != 0xffffff; count++)
393 {
ddd56f6e 394 list = rambase + (addr & 0x1fffff) / 4;
d30279e2
GI
395 len = list[0] >> 24;
396 addr = list[0] & 0xffffff;
deb18d24 397 dma_words += 1 + len;
398
399 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
ddd56f6e 400
401 // loop detection marker
402 // (bit23 set causes DMA error on real machine, so
403 // unlikely to be ever set by the game)
404 list[0] |= 0x800000;
405
56f08d83 406 if (len) {
407 left = check_cmd(list + 1, len);
408 if (left)
deb18d24 409 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
56f08d83 410 }
ddd56f6e 411
412 if (addr & 0x800000)
413 break;
414 }
415
416 // remove loop detection markers
417 addr = start_addr & 0x1fffff;
418 while (count-- > 0) {
419 list = rambase + addr / 4;
420 addr = list[0] & 0x1fffff;
421 list[0] &= ~0x800000;
d30279e2 422 }
deb18d24 423 if (llist_entry)
424 *llist_entry &= ~0x800000;
d30279e2 425
deb18d24 426 gpu.state.last_list.frame = gpu.state.frame_count;
427 gpu.state.last_list.hcnt = *gpu.state.hcnt;
428 gpu.state.last_list.words = dma_words;
429 gpu.state.last_list.addr = start_addr;
430
431 return dma_words;
1ab64c54
GI
432}
433
d30279e2
GI
434void GPUreadDataMem(uint32_t *mem, int count)
435{
56f08d83 436 log_io("gpu_dma_read %p %d\n", mem, count);
437
d30279e2
GI
438 if (unlikely(gpu.cmd_len > 0))
439 flush_cmd_buffer();
56f08d83 440
d30279e2
GI
441 if (gpu.dma.h)
442 do_vram_io(mem, count, 1);
443}
444
445uint32_t GPUreadData(void)
446{
56f08d83 447 log_io("gpu_read\n");
448
449 if (unlikely(gpu.cmd_len > 0))
450 flush_cmd_buffer();
451
452 if (gpu.dma.h)
6e9bdaef 453 do_vram_io(&gpu.gp0, 1, 1);
56f08d83 454
6e9bdaef 455 return gpu.gp0;
d30279e2
GI
456}
457
458uint32_t GPUreadStatus(void)
459{
ddd56f6e 460 uint32_t ret;
56f08d83 461
d30279e2
GI
462 if (unlikely(gpu.cmd_len > 0))
463 flush_cmd_buffer();
464
ddd56f6e 465 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
466 log_io("gpu_read_status %08x\n", ret);
467 return ret;
d30279e2
GI
468}
469
1ab64c54
GI
470typedef struct GPUFREEZETAG
471{
472 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
473 uint32_t ulStatus; // current gpu status
474 uint32_t ulControl[256]; // latest control register values
475 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
476} GPUFreeze_t;
477
478long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
479{
fc84f618 480 int i;
481
1ab64c54
GI
482 switch (type) {
483 case 1: // save
d30279e2
GI
484 if (gpu.cmd_len > 0)
485 flush_cmd_buffer();
1ab64c54
GI
486 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
487 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 488 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
1ab64c54 489 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
490 break;
491 case 0: // load
9394ada5 492 renderer_invalidate_caches(0, 0, 1024, 512);
1ab64c54
GI
493 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
494 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 495 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
1ab64c54 496 gpu.status.reg = freeze->ulStatus;
fc84f618 497 for (i = 8; i > 0; i--) {
498 gpu.regs[i] ^= 1; // avoid reg change detection
499 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
500 }
1ab64c54
GI
501 break;
502 }
503
504 return 1;
505}
506
d30279e2 507void GPUvBlank(int val, uint32_t *hcnt)
1ab64c54 508{
d30279e2
GI
509 gpu.lcf_hc = &gpu.zero;
510 if (gpu.status.interlace) {
511 if (val)
512 gpu.status.lcf ^= 1;
513 }
514 else {
515 gpu.status.lcf = 0;
516 if (!val)
517 gpu.lcf_hc = hcnt;
518 }
deb18d24 519 if (!val)
520 gpu.state.frame_count++;
521
522 gpu.state.hcnt = hcnt;
1ab64c54
GI
523}
524
1ab64c54 525// vim:shiftwidth=2:expandtab