gpu_neon: gpu info reads, some bugfixes
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19//#define log_io printf
20#define log_io(...)
21#define log_anomaly printf
22
23struct psx_gpu gpu __attribute__((aligned(64)));
24
25static noinline void do_reset(void)
26{
27 memset(gpu.regs, 0, sizeof(gpu.regs));
28 gpu.status.reg = 0x14802000;
29 gpu.gp0 = 0;
30 gpu.regs[3] = 1;
31 gpu.screen.hres = gpu.screen.w = 256;
32 gpu.screen.vres = gpu.screen.h = 240;
33}
34
35static noinline void update_width(void)
36{
37 int sw = gpu.screen.x2 - gpu.screen.x1;
38 if (sw <= 0 || sw >= 2560)
39 // full width
40 gpu.screen.w = gpu.screen.hres;
41 else
42 gpu.screen.w = sw * gpu.screen.hres / 2560;
43}
44
45static noinline void update_height(void)
46{
47 int sh = gpu.screen.y2 - gpu.screen.y1;
48 if (gpu.status.dheight)
49 sh *= 2;
50 if (sh <= 0)
51 sh = gpu.screen.vres;
52
53 gpu.screen.h = sh;
54}
55
56static noinline void decide_frameskip(void)
57{
58 gpu.frameskip.frame_ready = !gpu.frameskip.active;
59
60 if (!gpu.frameskip.active && *gpu.frameskip.advice)
61 gpu.frameskip.active = 1;
62 else
63 gpu.frameskip.active = 0;
64}
65
66static noinline void get_gpu_info(uint32_t data)
67{
68 switch (data & 0x0f) {
69 case 0x02:
70 case 0x03:
71 case 0x04:
72 case 0x05:
73 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
74 break;
75 case 0x06:
76 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
77 break;
78 case 0x07:
79 gpu.gp0 = 2;
80 break;
81 default:
82 gpu.gp0 = 0;
83 break;
84 }
85}
86
87long GPUinit(void)
88{
89 int ret = vout_init();
90 do_reset();
91 gpu.lcf_hc = &gpu.zero;
92 return ret;
93}
94
95long GPUshutdown(void)
96{
97 return vout_finish();
98}
99
100void GPUwriteStatus(uint32_t data)
101{
102 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
103 static const short vres[4] = { 240, 480, 256, 480 };
104 uint32_t cmd = data >> 24;
105
106 if (cmd < ARRAY_SIZE(gpu.regs)) {
107 if (cmd != 0 && gpu.regs[cmd] == data)
108 return;
109 gpu.regs[cmd] = data;
110 }
111
112 gpu.state.fb_dirty = 1;
113
114 switch (cmd) {
115 case 0x00:
116 do_reset();
117 break;
118 case 0x03:
119 gpu.status.blanking = data & 1;
120 break;
121 case 0x04:
122 gpu.status.dma = data & 3;
123 break;
124 case 0x05:
125 gpu.screen.x = data & 0x3ff;
126 gpu.screen.y = (data >> 10) & 0x3ff;
127 if (gpu.frameskip.enabled)
128 decide_frameskip();
129 break;
130 case 0x06:
131 gpu.screen.x1 = data & 0xfff;
132 gpu.screen.x2 = (data >> 12) & 0xfff;
133 update_width();
134 break;
135 case 0x07:
136 gpu.screen.y1 = data & 0x3ff;
137 gpu.screen.y2 = (data >> 10) & 0x3ff;
138 update_height();
139 break;
140 case 0x08:
141 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
142 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
143 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
144 update_width();
145 update_height();
146 break;
147 case 0x10 ... 0x1f:
148 get_gpu_info(data);
149 break;
150 }
151}
152
153const unsigned char cmd_lengths[256] =
154{
155 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
156 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
157 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
158 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
159 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
160 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
161 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
162 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
163 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
164 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
166 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
168 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
169 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
170 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
171};
172
173#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
174
175static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
176{
177 uint16_t *vram = VRAM_MEM_XY(x, y);
178 if (is_read)
179 memcpy(mem, vram, l * 2);
180 else
181 memcpy(vram, mem, l * 2);
182}
183
184static int do_vram_io(uint32_t *data, int count, int is_read)
185{
186 int count_initial = count;
187 uint16_t *sdata = (uint16_t *)data;
188 int x = gpu.dma.x, y = gpu.dma.y;
189 int w = gpu.dma.w, h = gpu.dma.h;
190 int o = gpu.dma.offset;
191 int l;
192 count *= 2; // operate in 16bpp pixels
193
194 if (gpu.dma.offset) {
195 l = w - gpu.dma.offset;
196 if (count < l)
197 l = count;
198
199 do_vram_line(x + o, y, sdata, l, is_read);
200
201 if (o + l < w)
202 o += l;
203 else {
204 o = 0;
205 y++;
206 h--;
207 }
208 sdata += l;
209 count -= l;
210 }
211
212 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
213 y &= 511;
214 do_vram_line(x, y, sdata, w, is_read);
215 }
216
217 if (h > 0 && count > 0) {
218 y &= 511;
219 do_vram_line(x, y, sdata, count, is_read);
220 o = count;
221 count = 0;
222 }
223 gpu.dma.y = y;
224 gpu.dma.h = h;
225 gpu.dma.offset = o;
226
227 return count_initial - count / 2;
228}
229
230static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
231{
232 if (gpu.dma.h)
233 log_anomaly("start_vram_transfer while old unfinished\n");
234
235 gpu.dma.x = pos_word & 1023;
236 gpu.dma.y = (pos_word >> 16) & 511;
237 gpu.dma.w = size_word & 0xffff; // ?
238 gpu.dma.h = size_word >> 16;
239 gpu.dma.offset = 0;
240
241 if (is_read)
242 gpu.status.img = 1;
243
244 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
245 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
246}
247
248static int check_cmd(uint32_t *data, int count)
249{
250 int len, cmd, start, pos;
251 int vram_dirty = 0;
252
253 // process buffer
254 for (start = pos = 0; pos < count; )
255 {
256 cmd = -1;
257 len = 0;
258
259 if (gpu.dma.h) {
260 pos += do_vram_io(data + pos, count - pos, 0);
261 if (pos == count)
262 break;
263 start = pos;
264 }
265
266 // do look-ahead pass to detect SR changes and VRAM i/o
267 while (pos < count) {
268 uint32_t *list = data + pos;
269 cmd = list[0] >> 24;
270 len = 1 + cmd_lengths[cmd];
271
272 //printf(" %3d: %02x %d\n", pos, cmd, len);
273 if ((cmd & 0xf4) == 0x24) {
274 // flat textured prim
275 gpu.status.reg &= ~0x1ff;
276 gpu.status.reg |= list[4] & 0x1ff;
277 }
278 else if ((cmd & 0xf4) == 0x34) {
279 // shaded textured prim
280 gpu.status.reg &= ~0x1ff;
281 gpu.status.reg |= list[5] & 0x1ff;
282 }
283 else switch (cmd)
284 {
285 case 0xe1:
286 gpu.status.reg &= ~0x7ff;
287 gpu.status.reg |= list[0] & 0x7ff;
288 break;
289 case 0xe6:
290 gpu.status.reg &= ~0x1800;
291 gpu.status.reg |= (list[0] & 3) << 11;
292 break;
293 }
294 if (2 <= cmd && cmd < 0xc0)
295 vram_dirty = 1;
296 else if ((cmd & 0xf8) == 0xe0)
297 gpu.ex_regs[cmd & 7] = list[0];
298
299 if (pos + len > count) {
300 cmd = -1;
301 break; // incomplete cmd
302 }
303 if (cmd == 0xa0 || cmd == 0xc0)
304 break; // image i/o
305 pos += len;
306 }
307
308 if (pos - start > 0) {
309 if (!gpu.frameskip.active)
310 do_cmd_list(data + start, pos - start);
311 start = pos;
312 }
313
314 if (cmd == 0xa0 || cmd == 0xc0) {
315 // consume vram write/read cmd
316 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
317 pos += len;
318 }
319
320 if (cmd == -1)
321 break;
322 }
323
324 gpu.state.fb_dirty |= vram_dirty;
325
326 return count - pos;
327}
328
329static void flush_cmd_buffer(void)
330{
331 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
332 if (left > 0)
333 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
334 gpu.cmd_len = left;
335}
336
337void GPUwriteDataMem(uint32_t *mem, int count)
338{
339 int left;
340
341 log_io("gpu_dma_write %p %d\n", mem, count);
342
343 if (unlikely(gpu.cmd_len > 0))
344 flush_cmd_buffer();
345
346 left = check_cmd(mem, count);
347 if (left)
348 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
349}
350
351void GPUwriteData(uint32_t data)
352{
353 log_io("gpu_write %08x\n", data);
354 gpu.cmd_buffer[gpu.cmd_len++] = data;
355 if (gpu.cmd_len >= CMD_BUFFER_LEN)
356 flush_cmd_buffer();
357}
358
359long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
360{
361 uint32_t addr, *list;
362 int len, left, count;
363
364 if (unlikely(gpu.cmd_len > 0))
365 flush_cmd_buffer();
366
367 log_io("gpu_dma_chain\n");
368 addr = start_addr & 0xffffff;
369 for (count = 0; addr != 0xffffff; count++)
370 {
371 log_io(".chain %08x\n", addr);
372
373 list = rambase + (addr & 0x1fffff) / 4;
374 len = list[0] >> 24;
375 addr = list[0] & 0xffffff;
376
377 // loop detection marker
378 // (bit23 set causes DMA error on real machine, so
379 // unlikely to be ever set by the game)
380 list[0] |= 0x800000;
381
382 if (len) {
383 left = check_cmd(list + 1, len);
384 if (left)
385 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, len);
386 }
387
388 if (addr & 0x800000)
389 break;
390 }
391
392 // remove loop detection markers
393 addr = start_addr & 0x1fffff;
394 while (count-- > 0) {
395 list = rambase + addr / 4;
396 addr = list[0] & 0x1fffff;
397 list[0] &= ~0x800000;
398 }
399
400 return 0;
401}
402
403void GPUreadDataMem(uint32_t *mem, int count)
404{
405 log_io("gpu_dma_read %p %d\n", mem, count);
406
407 if (unlikely(gpu.cmd_len > 0))
408 flush_cmd_buffer();
409
410 if (gpu.dma.h)
411 do_vram_io(mem, count, 1);
412}
413
414uint32_t GPUreadData(void)
415{
416 log_io("gpu_read\n");
417
418 if (unlikely(gpu.cmd_len > 0))
419 flush_cmd_buffer();
420
421 if (gpu.dma.h)
422 do_vram_io(&gpu.gp0, 1, 1);
423
424 return gpu.gp0;
425}
426
427uint32_t GPUreadStatus(void)
428{
429 uint32_t ret;
430
431 if (unlikely(gpu.cmd_len > 0))
432 flush_cmd_buffer();
433
434 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
435 log_io("gpu_read_status %08x\n", ret);
436 return ret;
437}
438
439typedef struct GPUFREEZETAG
440{
441 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
442 uint32_t ulStatus; // current gpu status
443 uint32_t ulControl[256]; // latest control register values
444 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
445} GPUFreeze_t;
446
447long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
448{
449 int i;
450
451 switch (type) {
452 case 1: // save
453 if (gpu.cmd_len > 0)
454 flush_cmd_buffer();
455 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
456 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
457 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
458 freeze->ulStatus = gpu.status.reg;
459 break;
460 case 0: // load
461 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
462 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
463 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
464 gpu.status.reg = freeze->ulStatus;
465 for (i = 8; i > 0; i--) {
466 gpu.regs[i] ^= 1; // avoid reg change detection
467 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
468 }
469 break;
470 }
471
472 return 1;
473}
474
475void GPUvBlank(int val, uint32_t *hcnt)
476{
477 gpu.lcf_hc = &gpu.zero;
478 if (gpu.status.interlace) {
479 if (val)
480 gpu.status.lcf ^= 1;
481 }
482 else {
483 gpu.status.lcf = 0;
484 if (!val)
485 gpu.lcf_hc = hcnt;
486 }
487}
488
489// vim:shiftwidth=2:expandtab