gpu_neon: basic frameskip, change check, minor stuff
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19//#define log_io printf
20#define log_io(...)
21#define log_anomaly printf
22
23struct psx_gpu gpu __attribute__((aligned(64)));
24
25long GPUinit(void)
26{
27 int ret = vout_init();
28 gpu.status.reg = 0x14802000;
29 gpu.status.blanking = 1;
30 gpu.regs[3] = 1;
31 gpu.screen.hres = gpu.screen.w = 320;
32 gpu.screen.vres = gpu.screen.h = 240;
33 gpu.lcf_hc = &gpu.zero;
34 return ret;
35}
36
37long GPUshutdown(void)
38{
39 return vout_finish();
40}
41
42static noinline void update_width(void)
43{
44 int sw = gpu.screen.x2 - gpu.screen.x1;
45 if (sw <= 0 || sw >= 2560)
46 // full width
47 gpu.screen.w = gpu.screen.hres;
48 else
49 gpu.screen.w = sw * gpu.screen.hres / 2560;
50}
51
52static noinline void update_height(void)
53{
54 int sh = gpu.screen.y2 - gpu.screen.y1;
55 if (gpu.status.dheight)
56 sh *= 2;
57 if (sh <= 0)
58 sh = gpu.screen.vres;
59
60 gpu.screen.h = sh;
61}
62
63static noinline void decide_frameskip(void)
64{
65 gpu.frameskip.frame_ready = !gpu.frameskip.active;
66
67 if (!gpu.frameskip.active && *gpu.frameskip.advice)
68 gpu.frameskip.active = 1;
69 else
70 gpu.frameskip.active = 0;
71}
72
73void GPUwriteStatus(uint32_t data)
74{
75 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
76 static const short vres[4] = { 240, 480, 256, 480 };
77 uint32_t cmd = data >> 24;
78
79 if (cmd < ARRAY_SIZE(gpu.regs)) {
80 if (cmd != 0 && gpu.regs[cmd] == data)
81 return;
82 gpu.regs[cmd] = data;
83 }
84
85 gpu.state.fb_dirty = 1;
86
87 switch (cmd) {
88 case 0x00:
89 gpu.status.reg = 0x14802000;
90 gpu.status.blanking = 1;
91 break;
92 case 0x03:
93 gpu.status.blanking = data & 1;
94 break;
95 case 0x04:
96 gpu.status.dma = data & 3;
97 break;
98 case 0x05:
99 gpu.screen.x = data & 0x3ff;
100 gpu.screen.y = (data >> 10) & 0x3ff;
101 if (gpu.frameskip.enabled)
102 decide_frameskip();
103 break;
104 case 0x06:
105 gpu.screen.x1 = data & 0xfff;
106 gpu.screen.x2 = (data >> 12) & 0xfff;
107 update_width();
108 break;
109 case 0x07:
110 gpu.screen.y1 = data & 0x3ff;
111 gpu.screen.y2 = (data >> 10) & 0x3ff;
112 update_height();
113 break;
114 case 0x08:
115 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
116 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
117 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
118 update_width();
119 update_height();
120 break;
121 }
122}
123
124const unsigned char cmd_lengths[256] =
125{
126 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
128 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
129 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
130 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
131 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
132 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
133 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
134 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
135 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
137 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
139 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
140 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
141 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
142};
143
144#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
145
146static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
147{
148 uint16_t *vram = VRAM_MEM_XY(x, y);
149 if (is_read)
150 memcpy(mem, vram, l * 2);
151 else
152 memcpy(vram, mem, l * 2);
153}
154
155static int do_vram_io(uint32_t *data, int count, int is_read)
156{
157 int count_initial = count;
158 uint16_t *sdata = (uint16_t *)data;
159 int x = gpu.dma.x, y = gpu.dma.y;
160 int w = gpu.dma.w, h = gpu.dma.h;
161 int o = gpu.dma.offset;
162 int l;
163 count *= 2; // operate in 16bpp pixels
164
165 if (gpu.dma.offset) {
166 l = w - gpu.dma.offset;
167 if (count < l)
168 l = count;
169
170 do_vram_line(x + o, y, sdata, l, is_read);
171
172 if (o + l < w)
173 o += l;
174 else {
175 o = 0;
176 y++;
177 h--;
178 }
179 sdata += l;
180 count -= l;
181 }
182
183 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
184 y &= 511;
185 do_vram_line(x, y, sdata, w, is_read);
186 }
187
188 if (h > 0 && count > 0) {
189 y &= 511;
190 do_vram_line(x, y, sdata, count, is_read);
191 o = count;
192 count = 0;
193 }
194 gpu.dma.y = y;
195 gpu.dma.h = h;
196 gpu.dma.offset = o;
197
198 return count_initial - (count + 1) / 2;
199}
200
201static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
202{
203 if (gpu.dma.h)
204 log_anomaly("start_vram_transfer while old unfinished\n");
205
206 gpu.dma.x = pos_word & 1023;
207 gpu.dma.y = (pos_word >> 16) & 511;
208 gpu.dma.w = size_word & 0xffff; // ?
209 gpu.dma.h = size_word >> 16;
210 gpu.dma.offset = 0;
211
212 if (is_read)
213 gpu.status.img = 1;
214
215 //printf("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
216 // gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
217}
218
219static int check_cmd(uint32_t *data, int count)
220{
221 int len, cmd, start, pos;
222 int vram_dirty = 0;
223
224 // process buffer
225 for (start = pos = 0; pos < count; )
226 {
227 cmd = -1;
228 len = 0;
229
230 if (gpu.dma.h) {
231 pos += do_vram_io(data + pos, count - pos, 0);
232 if (pos == count)
233 break;
234 start = pos;
235 }
236
237 // do look-ahead pass to detect SR changes and VRAM i/o
238 while (pos < count) {
239 uint32_t *list = data + pos;
240 cmd = list[0] >> 24;
241 len = 1 + cmd_lengths[cmd];
242
243 //printf(" %3d: %02x %d\n", pos, cmd, len);
244 if ((cmd & 0xf4) == 0x24) {
245 // flat textured prim
246 gpu.status.reg &= ~0x1ff;
247 gpu.status.reg |= list[4] & 0x1ff;
248 }
249 else if ((cmd & 0xf4) == 0x34) {
250 // shaded textured prim
251 gpu.status.reg &= ~0x1ff;
252 gpu.status.reg |= list[5] & 0x1ff;
253 }
254 else switch (cmd)
255 {
256 case 0xe1:
257 gpu.status.reg &= ~0x7ff;
258 gpu.status.reg |= list[0] & 0x7ff;
259 break;
260 case 0xe6:
261 gpu.status.reg &= ~0x1800;
262 gpu.status.reg |= (list[0] & 3) << 11;
263 break;
264 }
265 if (2 <= cmd && cmd < 0xc0)
266 vram_dirty = 1;
267
268 if (pos + len > count) {
269 cmd = -1;
270 break; // incomplete cmd
271 }
272 if (cmd == 0xa0 || cmd == 0xc0)
273 break; // image i/o
274 pos += len;
275 }
276
277 if (pos - start > 0) {
278 if (!gpu.frameskip.active)
279 do_cmd_list(data + start, pos - start);
280 start = pos;
281 }
282
283 if (cmd == 0xa0 || cmd == 0xc0) {
284 // consume vram write/read cmd
285 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
286 pos += len;
287 }
288
289 if (cmd == -1)
290 break;
291 }
292
293 gpu.state.fb_dirty |= vram_dirty;
294
295 return count - pos;
296}
297
298static void flush_cmd_buffer(void)
299{
300 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
301 if (left > 0)
302 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
303 gpu.cmd_len = left;
304}
305
306void GPUwriteDataMem(uint32_t *mem, int count)
307{
308 int left;
309
310 log_io("gpu_dma_write %p %d\n", mem, count);
311
312 if (unlikely(gpu.cmd_len > 0))
313 flush_cmd_buffer();
314
315 left = check_cmd(mem, count);
316 if (left)
317 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
318}
319
320void GPUwriteData(uint32_t data)
321{
322 log_io("gpu_write %08x\n", data);
323 gpu.cmd_buffer[gpu.cmd_len++] = data;
324 if (gpu.cmd_len >= CMD_BUFFER_LEN)
325 flush_cmd_buffer();
326}
327
328long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
329{
330 uint32_t addr, *list;
331 int len, left, count;
332
333 if (unlikely(gpu.cmd_len > 0))
334 flush_cmd_buffer();
335
336 log_io("gpu_dma_chain\n");
337 addr = start_addr & 0xffffff;
338 for (count = 0; addr != 0xffffff; count++)
339 {
340 log_io(".chain %08x\n", addr);
341
342 list = rambase + (addr & 0x1fffff) / 4;
343 len = list[0] >> 24;
344 addr = list[0] & 0xffffff;
345
346 // loop detection marker
347 // (bit23 set causes DMA error on real machine, so
348 // unlikely to be ever set by the game)
349 list[0] |= 0x800000;
350
351 if (len) {
352 left = check_cmd(list + 1, len);
353 if (left)
354 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, len);
355 }
356
357 if (addr & 0x800000)
358 break;
359 }
360
361 // remove loop detection markers
362 addr = start_addr & 0x1fffff;
363 while (count-- > 0) {
364 list = rambase + addr / 4;
365 addr = list[0] & 0x1fffff;
366 list[0] &= ~0x800000;
367 }
368
369 return 0;
370}
371
372void GPUreadDataMem(uint32_t *mem, int count)
373{
374 log_io("gpu_dma_read %p %d\n", mem, count);
375
376 if (unlikely(gpu.cmd_len > 0))
377 flush_cmd_buffer();
378
379 if (gpu.dma.h)
380 do_vram_io(mem, count, 1);
381}
382
383uint32_t GPUreadData(void)
384{
385 uint32_t v = 0;
386
387 log_io("gpu_read\n");
388
389 if (unlikely(gpu.cmd_len > 0))
390 flush_cmd_buffer();
391
392 if (gpu.dma.h)
393 do_vram_io(&v, 1, 1);
394
395 return v;
396}
397
398uint32_t GPUreadStatus(void)
399{
400 uint32_t ret;
401
402 if (unlikely(gpu.cmd_len > 0))
403 flush_cmd_buffer();
404
405 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
406 log_io("gpu_read_status %08x\n", ret);
407 return ret;
408}
409
410typedef struct GPUFREEZETAG
411{
412 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
413 uint32_t ulStatus; // current gpu status
414 uint32_t ulControl[256]; // latest control register values
415 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
416} GPUFreeze_t;
417
418long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
419{
420 int i;
421
422 switch (type) {
423 case 1: // save
424 if (gpu.cmd_len > 0)
425 flush_cmd_buffer();
426 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
427 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
428 freeze->ulStatus = gpu.status.reg;
429 break;
430 case 0: // load
431 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
432 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
433 gpu.status.reg = freeze->ulStatus;
434 for (i = 8; i > 0; i--) {
435 gpu.regs[i] ^= 1; // avoid reg change detection
436 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
437 }
438 break;
439 }
440
441 return 1;
442}
443
444void GPUvBlank(int val, uint32_t *hcnt)
445{
446 gpu.lcf_hc = &gpu.zero;
447 if (gpu.status.interlace) {
448 if (val)
449 gpu.status.lcf ^= 1;
450 }
451 else {
452 gpu.status.lcf = 0;
453 if (!val)
454 gpu.lcf_hc = hcnt;
455 }
456}
457
458// vim:shiftwidth=2:expandtab