gpu_neon: support caching renderers, update rearmed if
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(64)));
28
29static noinline void do_reset(void)
30{
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
33 gpu.gp0 = 0;
34 gpu.regs[3] = 1;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
37}
38
39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
60static noinline void decide_frameskip(void)
61{
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
63
64 if (!gpu.frameskip.active && *gpu.frameskip.advice)
65 gpu.frameskip.active = 1;
66 else
67 gpu.frameskip.active = 0;
68}
69
70static noinline void get_gpu_info(uint32_t data)
71{
72 switch (data & 0x0f) {
73 case 0x02:
74 case 0x03:
75 case 0x04:
76 case 0x05:
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
78 break;
79 case 0x06:
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
81 break;
82 case 0x07:
83 gpu.gp0 = 2;
84 break;
85 default:
86 gpu.gp0 = 0;
87 break;
88 }
89}
90
91long GPUinit(void)
92{
93 int ret;
94 ret = vout_init();
95 ret |= renderer_init();
96
97 gpu.lcf_hc = &gpu.zero;
98 gpu.state.frame_count = 0;
99 gpu.state.hcnt = &gpu.zero;
100 do_reset();
101 return ret;
102}
103
104long GPUshutdown(void)
105{
106 return vout_finish();
107}
108
109void GPUwriteStatus(uint32_t data)
110{
111 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
112 static const short vres[4] = { 240, 480, 256, 480 };
113 uint32_t cmd = data >> 24;
114
115 if (cmd < ARRAY_SIZE(gpu.regs)) {
116 if (cmd != 0 && gpu.regs[cmd] == data)
117 return;
118 gpu.regs[cmd] = data;
119 }
120
121 gpu.state.fb_dirty = 1;
122
123 switch (cmd) {
124 case 0x00:
125 do_reset();
126 break;
127 case 0x03:
128 gpu.status.blanking = data & 1;
129 break;
130 case 0x04:
131 gpu.status.dma = data & 3;
132 break;
133 case 0x05:
134 gpu.screen.x = data & 0x3ff;
135 gpu.screen.y = (data >> 10) & 0x3ff;
136 if (gpu.frameskip.enabled)
137 decide_frameskip();
138 break;
139 case 0x06:
140 gpu.screen.x1 = data & 0xfff;
141 gpu.screen.x2 = (data >> 12) & 0xfff;
142 update_width();
143 break;
144 case 0x07:
145 gpu.screen.y1 = data & 0x3ff;
146 gpu.screen.y2 = (data >> 10) & 0x3ff;
147 update_height();
148 break;
149 case 0x08:
150 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
151 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
152 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
153 update_width();
154 update_height();
155 break;
156 default:
157 if ((cmd & 0xf0) == 0x10)
158 get_gpu_info(data);
159 break;
160 }
161}
162
163const unsigned char cmd_lengths[256] =
164{
165 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
168 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
169 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
170 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
171 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
172 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
173 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
181};
182
183#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
184
185static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
186{
187 uint16_t *vram = VRAM_MEM_XY(x, y);
188 if (is_read)
189 memcpy(mem, vram, l * 2);
190 else
191 memcpy(vram, mem, l * 2);
192}
193
194static int do_vram_io(uint32_t *data, int count, int is_read)
195{
196 int count_initial = count;
197 uint16_t *sdata = (uint16_t *)data;
198 int x = gpu.dma.x, y = gpu.dma.y;
199 int w = gpu.dma.w, h = gpu.dma.h;
200 int o = gpu.dma.offset;
201 int l;
202 count *= 2; // operate in 16bpp pixels
203
204 if (gpu.dma.offset) {
205 l = w - gpu.dma.offset;
206 if (count < l)
207 l = count;
208
209 do_vram_line(x + o, y, sdata, l, is_read);
210
211 if (o + l < w)
212 o += l;
213 else {
214 o = 0;
215 y++;
216 h--;
217 }
218 sdata += l;
219 count -= l;
220 }
221
222 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
223 y &= 511;
224 do_vram_line(x, y, sdata, w, is_read);
225 }
226
227 if (h > 0 && count > 0) {
228 y &= 511;
229 do_vram_line(x, y, sdata, count, is_read);
230 o = count;
231 count = 0;
232 }
233 gpu.dma.y = y;
234 gpu.dma.h = h;
235 gpu.dma.offset = o;
236
237 return count_initial - count / 2;
238}
239
240static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
241{
242 if (gpu.dma.h)
243 log_anomaly("start_vram_transfer while old unfinished\n");
244
245 gpu.dma.x = pos_word & 1023;
246 gpu.dma.y = (pos_word >> 16) & 511;
247 gpu.dma.w = size_word & 0xffff; // ?
248 gpu.dma.h = size_word >> 16;
249 gpu.dma.offset = 0;
250
251 if (is_read)
252 gpu.status.img = 1;
253 else
254 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
255
256 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
257 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
258}
259
260static int check_cmd(uint32_t *data, int count)
261{
262 int len, cmd, start, pos;
263 int vram_dirty = 0;
264
265 // process buffer
266 for (start = pos = 0; pos < count; )
267 {
268 cmd = -1;
269 len = 0;
270
271 if (gpu.dma.h) {
272 pos += do_vram_io(data + pos, count - pos, 0);
273 if (pos == count)
274 break;
275 start = pos;
276 }
277
278 // do look-ahead pass to detect SR changes and VRAM i/o
279 while (pos < count) {
280 uint32_t *list = data + pos;
281 cmd = list[0] >> 24;
282 len = 1 + cmd_lengths[cmd];
283
284 //printf(" %3d: %02x %d\n", pos, cmd, len);
285 if ((cmd & 0xf4) == 0x24) {
286 // flat textured prim
287 gpu.status.reg &= ~0x1ff;
288 gpu.status.reg |= list[4] & 0x1ff;
289 }
290 else if ((cmd & 0xf4) == 0x34) {
291 // shaded textured prim
292 gpu.status.reg &= ~0x1ff;
293 gpu.status.reg |= list[5] & 0x1ff;
294 }
295 else switch (cmd)
296 {
297 case 0xe1:
298 gpu.status.reg &= ~0x7ff;
299 gpu.status.reg |= list[0] & 0x7ff;
300 break;
301 case 0xe6:
302 gpu.status.reg &= ~0x1800;
303 gpu.status.reg |= (list[0] & 3) << 11;
304 break;
305 }
306 if (2 <= cmd && cmd < 0xc0)
307 vram_dirty = 1;
308 else if ((cmd & 0xf8) == 0xe0)
309 gpu.ex_regs[cmd & 7] = list[0];
310
311 if (pos + len > count) {
312 cmd = -1;
313 break; // incomplete cmd
314 }
315 if (cmd == 0xa0 || cmd == 0xc0)
316 break; // image i/o
317 pos += len;
318 }
319
320 if (pos - start > 0) {
321 if (!gpu.frameskip.active)
322 do_cmd_list(data + start, pos - start);
323 start = pos;
324 }
325
326 if (cmd == 0xa0 || cmd == 0xc0) {
327 // consume vram write/read cmd
328 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
329 pos += len;
330 }
331
332 if (cmd == -1)
333 break;
334 }
335
336 gpu.state.fb_dirty |= vram_dirty;
337
338 return count - pos;
339}
340
341static void flush_cmd_buffer(void)
342{
343 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
344 if (left > 0)
345 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
346 gpu.cmd_len = left;
347}
348
349void GPUwriteDataMem(uint32_t *mem, int count)
350{
351 int left;
352
353 log_io("gpu_dma_write %p %d\n", mem, count);
354
355 if (unlikely(gpu.cmd_len > 0))
356 flush_cmd_buffer();
357
358 left = check_cmd(mem, count);
359 if (left)
360 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
361}
362
363void GPUwriteData(uint32_t data)
364{
365 log_io("gpu_write %08x\n", data);
366 gpu.cmd_buffer[gpu.cmd_len++] = data;
367 if (gpu.cmd_len >= CMD_BUFFER_LEN)
368 flush_cmd_buffer();
369}
370
371long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
372{
373 uint32_t addr, *list;
374 uint32_t *llist_entry = NULL;
375 int len, left, count;
376 long dma_words = 0;
377
378 if (unlikely(gpu.cmd_len > 0))
379 flush_cmd_buffer();
380
381 // ff7 sends it's main list twice, detect this
382 if (gpu.state.frame_count == gpu.state.last_list.frame &&
383 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
384 gpu.state.last_list.words > 1024)
385 {
386 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
387 *llist_entry |= 0x800000;
388 }
389
390 log_io("gpu_dma_chain\n");
391 addr = start_addr & 0xffffff;
392 for (count = 0; addr != 0xffffff; count++)
393 {
394 list = rambase + (addr & 0x1fffff) / 4;
395 len = list[0] >> 24;
396 addr = list[0] & 0xffffff;
397 dma_words += 1 + len;
398
399 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
400
401 // loop detection marker
402 // (bit23 set causes DMA error on real machine, so
403 // unlikely to be ever set by the game)
404 list[0] |= 0x800000;
405
406 if (len) {
407 left = check_cmd(list + 1, len);
408 if (left)
409 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
410 }
411
412 if (addr & 0x800000)
413 break;
414 }
415
416 // remove loop detection markers
417 addr = start_addr & 0x1fffff;
418 while (count-- > 0) {
419 list = rambase + addr / 4;
420 addr = list[0] & 0x1fffff;
421 list[0] &= ~0x800000;
422 }
423 if (llist_entry)
424 *llist_entry &= ~0x800000;
425
426 gpu.state.last_list.frame = gpu.state.frame_count;
427 gpu.state.last_list.hcnt = *gpu.state.hcnt;
428 gpu.state.last_list.words = dma_words;
429 gpu.state.last_list.addr = start_addr;
430
431 return dma_words;
432}
433
434void GPUreadDataMem(uint32_t *mem, int count)
435{
436 log_io("gpu_dma_read %p %d\n", mem, count);
437
438 if (unlikely(gpu.cmd_len > 0))
439 flush_cmd_buffer();
440
441 if (gpu.dma.h)
442 do_vram_io(mem, count, 1);
443}
444
445uint32_t GPUreadData(void)
446{
447 log_io("gpu_read\n");
448
449 if (unlikely(gpu.cmd_len > 0))
450 flush_cmd_buffer();
451
452 if (gpu.dma.h)
453 do_vram_io(&gpu.gp0, 1, 1);
454
455 return gpu.gp0;
456}
457
458uint32_t GPUreadStatus(void)
459{
460 uint32_t ret;
461
462 if (unlikely(gpu.cmd_len > 0))
463 flush_cmd_buffer();
464
465 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
466 log_io("gpu_read_status %08x\n", ret);
467 return ret;
468}
469
470typedef struct GPUFREEZETAG
471{
472 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
473 uint32_t ulStatus; // current gpu status
474 uint32_t ulControl[256]; // latest control register values
475 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
476} GPUFreeze_t;
477
478long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
479{
480 int i;
481
482 switch (type) {
483 case 1: // save
484 if (gpu.cmd_len > 0)
485 flush_cmd_buffer();
486 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
487 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
488 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
489 freeze->ulStatus = gpu.status.reg;
490 break;
491 case 0: // load
492 renderer_invalidate_caches(0, 0, 1024, 512);
493 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
494 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
495 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
496 gpu.status.reg = freeze->ulStatus;
497 for (i = 8; i > 0; i--) {
498 gpu.regs[i] ^= 1; // avoid reg change detection
499 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
500 }
501 break;
502 }
503
504 return 1;
505}
506
507void GPUvBlank(int val, uint32_t *hcnt)
508{
509 gpu.lcf_hc = &gpu.zero;
510 if (gpu.status.interlace) {
511 if (val)
512 gpu.status.lcf ^= 1;
513 }
514 else {
515 gpu.status.lcf = 0;
516 if (!val)
517 gpu.lcf_hc = hcnt;
518 }
519 if (!val)
520 gpu.state.frame_count++;
521
522 gpu.state.hcnt = hcnt;
523}
524
525// vim:shiftwidth=2:expandtab