gpu_neon: basic frameskip, change check, minor stuff
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
56f08d83 13#include "gpu.h"
1ab64c54
GI
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
8dd855cd 17#define noinline __attribute__((noinline))
1ab64c54 18
56f08d83 19//#define log_io printf
20#define log_io(...)
21#define log_anomaly printf
22
23struct psx_gpu gpu __attribute__((aligned(64)));
1ab64c54
GI
24
25long GPUinit(void)
26{
56f08d83 27 int ret = vout_init();
d30279e2 28 gpu.status.reg = 0x14802000;
fc84f618 29 gpu.status.blanking = 1;
30 gpu.regs[3] = 1;
31 gpu.screen.hres = gpu.screen.w = 320;
32 gpu.screen.vres = gpu.screen.h = 240;
56f08d83 33 gpu.lcf_hc = &gpu.zero;
34 return ret;
1ab64c54
GI
35}
36
37long GPUshutdown(void)
38{
56f08d83 39 return vout_finish();
1ab64c54
GI
40}
41
8dd855cd 42static noinline void update_width(void)
43{
44 int sw = gpu.screen.x2 - gpu.screen.x1;
45 if (sw <= 0 || sw >= 2560)
46 // full width
47 gpu.screen.w = gpu.screen.hres;
48 else
49 gpu.screen.w = sw * gpu.screen.hres / 2560;
50}
51
52static noinline void update_height(void)
53{
54 int sh = gpu.screen.y2 - gpu.screen.y1;
55 if (gpu.status.dheight)
56 sh *= 2;
57 if (sh <= 0)
58 sh = gpu.screen.vres;
59
60 gpu.screen.h = sh;
61}
62
fc84f618 63static noinline void decide_frameskip(void)
64{
65 gpu.frameskip.frame_ready = !gpu.frameskip.active;
66
67 if (!gpu.frameskip.active && *gpu.frameskip.advice)
68 gpu.frameskip.active = 1;
69 else
70 gpu.frameskip.active = 0;
71}
72
1ab64c54
GI
73void GPUwriteStatus(uint32_t data)
74{
75 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
76 static const short vres[4] = { 240, 480, 256, 480 };
77 uint32_t cmd = data >> 24;
78
fc84f618 79 if (cmd < ARRAY_SIZE(gpu.regs)) {
80 if (cmd != 0 && gpu.regs[cmd] == data)
81 return;
8dd855cd 82 gpu.regs[cmd] = data;
fc84f618 83 }
84
85 gpu.state.fb_dirty = 1;
8dd855cd 86
87 switch (cmd) {
1ab64c54 88 case 0x00:
d30279e2 89 gpu.status.reg = 0x14802000;
8dd855cd 90 gpu.status.blanking = 1;
1ab64c54
GI
91 break;
92 case 0x03:
d30279e2 93 gpu.status.blanking = data & 1;
1ab64c54
GI
94 break;
95 case 0x04:
96 gpu.status.dma = data & 3;
97 break;
98 case 0x05:
99 gpu.screen.x = data & 0x3ff;
100 gpu.screen.y = (data >> 10) & 0x3ff;
fc84f618 101 if (gpu.frameskip.enabled)
102 decide_frameskip();
1ab64c54 103 break;
8dd855cd 104 case 0x06:
105 gpu.screen.x1 = data & 0xfff;
106 gpu.screen.x2 = (data >> 12) & 0xfff;
107 update_width();
108 break;
1ab64c54
GI
109 case 0x07:
110 gpu.screen.y1 = data & 0x3ff;
111 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 112 update_height();
1ab64c54
GI
113 break;
114 case 0x08:
115 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 116 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
117 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
118 update_width();
119 update_height();
1ab64c54
GI
120 break;
121 }
1ab64c54
GI
122}
123
56f08d83 124const unsigned char cmd_lengths[256] =
1ab64c54 125{
d30279e2
GI
126 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
128 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
129 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
130 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
131 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
132 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
133 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
134 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
135 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
137 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
139 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
140 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
141 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
142};
143
d30279e2
GI
144#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
145
146static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 147{
d30279e2
GI
148 uint16_t *vram = VRAM_MEM_XY(x, y);
149 if (is_read)
150 memcpy(mem, vram, l * 2);
151 else
152 memcpy(vram, mem, l * 2);
153}
154
155static int do_vram_io(uint32_t *data, int count, int is_read)
156{
157 int count_initial = count;
158 uint16_t *sdata = (uint16_t *)data;
159 int x = gpu.dma.x, y = gpu.dma.y;
160 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 161 int o = gpu.dma.offset;
d30279e2
GI
162 int l;
163 count *= 2; // operate in 16bpp pixels
164
165 if (gpu.dma.offset) {
166 l = w - gpu.dma.offset;
ddd56f6e 167 if (count < l)
d30279e2 168 l = count;
ddd56f6e 169
170 do_vram_line(x + o, y, sdata, l, is_read);
171
172 if (o + l < w)
173 o += l;
174 else {
175 o = 0;
176 y++;
177 h--;
178 }
d30279e2
GI
179 sdata += l;
180 count -= l;
d30279e2
GI
181 }
182
183 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
184 y &= 511;
185 do_vram_line(x, y, sdata, w, is_read);
186 }
187
188 if (h > 0 && count > 0) {
189 y &= 511;
190 do_vram_line(x, y, sdata, count, is_read);
ddd56f6e 191 o = count;
d30279e2
GI
192 count = 0;
193 }
d30279e2
GI
194 gpu.dma.y = y;
195 gpu.dma.h = h;
ddd56f6e 196 gpu.dma.offset = o;
d30279e2
GI
197
198 return count_initial - (count + 1) / 2;
199}
200
201static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
202{
ddd56f6e 203 if (gpu.dma.h)
204 log_anomaly("start_vram_transfer while old unfinished\n");
205
d30279e2
GI
206 gpu.dma.x = pos_word & 1023;
207 gpu.dma.y = (pos_word >> 16) & 511;
208 gpu.dma.w = size_word & 0xffff; // ?
209 gpu.dma.h = size_word >> 16;
210 gpu.dma.offset = 0;
211
212 if (is_read)
213 gpu.status.img = 1;
214
215 //printf("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
216 // gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
217}
218
219static int check_cmd(uint32_t *data, int count)
220{
221 int len, cmd, start, pos;
fc84f618 222 int vram_dirty = 0;
d30279e2 223
d30279e2 224 // process buffer
ddd56f6e 225 for (start = pos = 0; pos < count; )
d30279e2
GI
226 {
227 cmd = -1;
228 len = 0;
229
230 if (gpu.dma.h) {
231 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 232 if (pos == count)
233 break;
d30279e2
GI
234 start = pos;
235 }
236
ddd56f6e 237 // do look-ahead pass to detect SR changes and VRAM i/o
d30279e2 238 while (pos < count) {
56f08d83 239 uint32_t *list = data + pos;
240 cmd = list[0] >> 24;
d30279e2 241 len = 1 + cmd_lengths[cmd];
56f08d83 242
d30279e2 243 //printf(" %3d: %02x %d\n", pos, cmd, len);
56f08d83 244 if ((cmd & 0xf4) == 0x24) {
245 // flat textured prim
246 gpu.status.reg &= ~0x1ff;
247 gpu.status.reg |= list[4] & 0x1ff;
248 }
249 else if ((cmd & 0xf4) == 0x34) {
250 // shaded textured prim
251 gpu.status.reg &= ~0x1ff;
252 gpu.status.reg |= list[5] & 0x1ff;
253 }
254 else switch (cmd)
255 {
256 case 0xe1:
257 gpu.status.reg &= ~0x7ff;
258 gpu.status.reg |= list[0] & 0x7ff;
259 break;
260 case 0xe6:
261 gpu.status.reg &= ~0x1800;
262 gpu.status.reg |= (list[0] & 3) << 11;
263 break;
264 }
fc84f618 265 if (2 <= cmd && cmd < 0xc0)
266 vram_dirty = 1;
56f08d83 267
d30279e2
GI
268 if (pos + len > count) {
269 cmd = -1;
270 break; // incomplete cmd
271 }
272 if (cmd == 0xa0 || cmd == 0xc0)
273 break; // image i/o
274 pos += len;
275 }
276
277 if (pos - start > 0) {
fc84f618 278 if (!gpu.frameskip.active)
279 do_cmd_list(data + start, pos - start);
d30279e2
GI
280 start = pos;
281 }
282
283 if (cmd == 0xa0 || cmd == 0xc0) {
284 // consume vram write/read cmd
285 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
286 pos += len;
287 }
288
ddd56f6e 289 if (cmd == -1)
290 break;
d30279e2 291 }
ddd56f6e 292
fc84f618 293 gpu.state.fb_dirty |= vram_dirty;
294
ddd56f6e 295 return count - pos;
d30279e2
GI
296}
297
298static void flush_cmd_buffer(void)
299{
300 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
301 if (left > 0)
302 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
303 gpu.cmd_len = left;
1ab64c54
GI
304}
305
306void GPUwriteDataMem(uint32_t *mem, int count)
307{
d30279e2
GI
308 int left;
309
56f08d83 310 log_io("gpu_dma_write %p %d\n", mem, count);
311
d30279e2
GI
312 if (unlikely(gpu.cmd_len > 0))
313 flush_cmd_buffer();
56f08d83 314
d30279e2
GI
315 left = check_cmd(mem, count);
316 if (left)
56f08d83 317 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
318}
319
d30279e2 320void GPUwriteData(uint32_t data)
1ab64c54 321{
56f08d83 322 log_io("gpu_write %08x\n", data);
d30279e2
GI
323 gpu.cmd_buffer[gpu.cmd_len++] = data;
324 if (gpu.cmd_len >= CMD_BUFFER_LEN)
325 flush_cmd_buffer();
1ab64c54
GI
326}
327
ddd56f6e 328long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 329{
ddd56f6e 330 uint32_t addr, *list;
331 int len, left, count;
d30279e2
GI
332
333 if (unlikely(gpu.cmd_len > 0))
334 flush_cmd_buffer();
335
56f08d83 336 log_io("gpu_dma_chain\n");
ddd56f6e 337 addr = start_addr & 0xffffff;
338 for (count = 0; addr != 0xffffff; count++)
339 {
56f08d83 340 log_io(".chain %08x\n", addr);
341
ddd56f6e 342 list = rambase + (addr & 0x1fffff) / 4;
d30279e2
GI
343 len = list[0] >> 24;
344 addr = list[0] & 0xffffff;
ddd56f6e 345
346 // loop detection marker
347 // (bit23 set causes DMA error on real machine, so
348 // unlikely to be ever set by the game)
349 list[0] |= 0x800000;
350
56f08d83 351 if (len) {
352 left = check_cmd(list + 1, len);
353 if (left)
354 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, len);
355 }
ddd56f6e 356
357 if (addr & 0x800000)
358 break;
359 }
360
361 // remove loop detection markers
362 addr = start_addr & 0x1fffff;
363 while (count-- > 0) {
364 list = rambase + addr / 4;
365 addr = list[0] & 0x1fffff;
366 list[0] &= ~0x800000;
d30279e2
GI
367 }
368
1ab64c54
GI
369 return 0;
370}
371
d30279e2
GI
372void GPUreadDataMem(uint32_t *mem, int count)
373{
56f08d83 374 log_io("gpu_dma_read %p %d\n", mem, count);
375
d30279e2
GI
376 if (unlikely(gpu.cmd_len > 0))
377 flush_cmd_buffer();
56f08d83 378
d30279e2
GI
379 if (gpu.dma.h)
380 do_vram_io(mem, count, 1);
381}
382
383uint32_t GPUreadData(void)
384{
385 uint32_t v = 0;
56f08d83 386
387 log_io("gpu_read\n");
388
389 if (unlikely(gpu.cmd_len > 0))
390 flush_cmd_buffer();
391
392 if (gpu.dma.h)
393 do_vram_io(&v, 1, 1);
394
d30279e2
GI
395 return v;
396}
397
398uint32_t GPUreadStatus(void)
399{
ddd56f6e 400 uint32_t ret;
56f08d83 401
d30279e2
GI
402 if (unlikely(gpu.cmd_len > 0))
403 flush_cmd_buffer();
404
ddd56f6e 405 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
406 log_io("gpu_read_status %08x\n", ret);
407 return ret;
d30279e2
GI
408}
409
1ab64c54
GI
410typedef struct GPUFREEZETAG
411{
412 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
413 uint32_t ulStatus; // current gpu status
414 uint32_t ulControl[256]; // latest control register values
415 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
416} GPUFreeze_t;
417
418long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
419{
fc84f618 420 int i;
421
1ab64c54
GI
422 switch (type) {
423 case 1: // save
d30279e2
GI
424 if (gpu.cmd_len > 0)
425 flush_cmd_buffer();
1ab64c54
GI
426 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
427 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
428 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
429 break;
430 case 0: // load
431 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
432 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
433 gpu.status.reg = freeze->ulStatus;
fc84f618 434 for (i = 8; i > 0; i--) {
435 gpu.regs[i] ^= 1; // avoid reg change detection
436 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
437 }
1ab64c54
GI
438 break;
439 }
440
441 return 1;
442}
443
d30279e2 444void GPUvBlank(int val, uint32_t *hcnt)
1ab64c54 445{
d30279e2
GI
446 gpu.lcf_hc = &gpu.zero;
447 if (gpu.status.interlace) {
448 if (val)
449 gpu.status.lcf ^= 1;
450 }
451 else {
452 gpu.status.lcf = 0;
453 if (!val)
454 gpu.lcf_hc = hcnt;
455 }
1ab64c54
GI
456}
457
1ab64c54 458// vim:shiftwidth=2:expandtab