gpu_neon: gpu info reads, some bugfixes
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
56f08d83 13#include "gpu.h"
1ab64c54
GI
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
8dd855cd 17#define noinline __attribute__((noinline))
1ab64c54 18
56f08d83 19//#define log_io printf
20#define log_io(...)
21#define log_anomaly printf
22
23struct psx_gpu gpu __attribute__((aligned(64)));
1ab64c54 24
6e9bdaef 25static noinline void do_reset(void)
1ab64c54 26{
6e9bdaef 27 memset(gpu.regs, 0, sizeof(gpu.regs));
d30279e2 28 gpu.status.reg = 0x14802000;
6e9bdaef 29 gpu.gp0 = 0;
fc84f618 30 gpu.regs[3] = 1;
6e9bdaef 31 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 32 gpu.screen.vres = gpu.screen.h = 240;
1ab64c54
GI
33}
34
8dd855cd 35static noinline void update_width(void)
36{
37 int sw = gpu.screen.x2 - gpu.screen.x1;
38 if (sw <= 0 || sw >= 2560)
39 // full width
40 gpu.screen.w = gpu.screen.hres;
41 else
42 gpu.screen.w = sw * gpu.screen.hres / 2560;
43}
44
45static noinline void update_height(void)
46{
47 int sh = gpu.screen.y2 - gpu.screen.y1;
48 if (gpu.status.dheight)
49 sh *= 2;
50 if (sh <= 0)
51 sh = gpu.screen.vres;
52
53 gpu.screen.h = sh;
54}
55
fc84f618 56static noinline void decide_frameskip(void)
57{
58 gpu.frameskip.frame_ready = !gpu.frameskip.active;
59
60 if (!gpu.frameskip.active && *gpu.frameskip.advice)
61 gpu.frameskip.active = 1;
62 else
63 gpu.frameskip.active = 0;
64}
65
6e9bdaef 66static noinline void get_gpu_info(uint32_t data)
67{
68 switch (data & 0x0f) {
69 case 0x02:
70 case 0x03:
71 case 0x04:
72 case 0x05:
73 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
74 break;
75 case 0x06:
76 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
77 break;
78 case 0x07:
79 gpu.gp0 = 2;
80 break;
81 default:
82 gpu.gp0 = 0;
83 break;
84 }
85}
86
87long GPUinit(void)
88{
89 int ret = vout_init();
90 do_reset();
91 gpu.lcf_hc = &gpu.zero;
92 return ret;
93}
94
95long GPUshutdown(void)
96{
97 return vout_finish();
98}
99
1ab64c54
GI
100void GPUwriteStatus(uint32_t data)
101{
102 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
103 static const short vres[4] = { 240, 480, 256, 480 };
104 uint32_t cmd = data >> 24;
105
fc84f618 106 if (cmd < ARRAY_SIZE(gpu.regs)) {
107 if (cmd != 0 && gpu.regs[cmd] == data)
108 return;
8dd855cd 109 gpu.regs[cmd] = data;
fc84f618 110 }
111
112 gpu.state.fb_dirty = 1;
8dd855cd 113
114 switch (cmd) {
1ab64c54 115 case 0x00:
6e9bdaef 116 do_reset();
1ab64c54
GI
117 break;
118 case 0x03:
d30279e2 119 gpu.status.blanking = data & 1;
1ab64c54
GI
120 break;
121 case 0x04:
122 gpu.status.dma = data & 3;
123 break;
124 case 0x05:
125 gpu.screen.x = data & 0x3ff;
126 gpu.screen.y = (data >> 10) & 0x3ff;
fc84f618 127 if (gpu.frameskip.enabled)
128 decide_frameskip();
1ab64c54 129 break;
8dd855cd 130 case 0x06:
131 gpu.screen.x1 = data & 0xfff;
132 gpu.screen.x2 = (data >> 12) & 0xfff;
133 update_width();
134 break;
1ab64c54
GI
135 case 0x07:
136 gpu.screen.y1 = data & 0x3ff;
137 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 138 update_height();
1ab64c54
GI
139 break;
140 case 0x08:
141 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 142 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
143 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
144 update_width();
145 update_height();
1ab64c54 146 break;
6e9bdaef 147 case 0x10 ... 0x1f:
148 get_gpu_info(data);
149 break;
1ab64c54 150 }
1ab64c54
GI
151}
152
56f08d83 153const unsigned char cmd_lengths[256] =
1ab64c54 154{
d30279e2
GI
155 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
156 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
157 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
158 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
159 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
160 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
161 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
162 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
163 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
164 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
166 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
168 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
169 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
170 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
171};
172
d30279e2
GI
173#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
174
175static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 176{
d30279e2
GI
177 uint16_t *vram = VRAM_MEM_XY(x, y);
178 if (is_read)
179 memcpy(mem, vram, l * 2);
180 else
181 memcpy(vram, mem, l * 2);
182}
183
184static int do_vram_io(uint32_t *data, int count, int is_read)
185{
186 int count_initial = count;
187 uint16_t *sdata = (uint16_t *)data;
188 int x = gpu.dma.x, y = gpu.dma.y;
189 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 190 int o = gpu.dma.offset;
d30279e2
GI
191 int l;
192 count *= 2; // operate in 16bpp pixels
193
194 if (gpu.dma.offset) {
195 l = w - gpu.dma.offset;
ddd56f6e 196 if (count < l)
d30279e2 197 l = count;
ddd56f6e 198
199 do_vram_line(x + o, y, sdata, l, is_read);
200
201 if (o + l < w)
202 o += l;
203 else {
204 o = 0;
205 y++;
206 h--;
207 }
d30279e2
GI
208 sdata += l;
209 count -= l;
d30279e2
GI
210 }
211
212 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
213 y &= 511;
214 do_vram_line(x, y, sdata, w, is_read);
215 }
216
217 if (h > 0 && count > 0) {
218 y &= 511;
219 do_vram_line(x, y, sdata, count, is_read);
ddd56f6e 220 o = count;
d30279e2
GI
221 count = 0;
222 }
d30279e2
GI
223 gpu.dma.y = y;
224 gpu.dma.h = h;
ddd56f6e 225 gpu.dma.offset = o;
d30279e2 226
6e9bdaef 227 return count_initial - count / 2;
d30279e2
GI
228}
229
230static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
231{
ddd56f6e 232 if (gpu.dma.h)
233 log_anomaly("start_vram_transfer while old unfinished\n");
234
d30279e2
GI
235 gpu.dma.x = pos_word & 1023;
236 gpu.dma.y = (pos_word >> 16) & 511;
237 gpu.dma.w = size_word & 0xffff; // ?
238 gpu.dma.h = size_word >> 16;
239 gpu.dma.offset = 0;
240
241 if (is_read)
242 gpu.status.img = 1;
243
6e9bdaef 244 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
245 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
246}
247
248static int check_cmd(uint32_t *data, int count)
249{
250 int len, cmd, start, pos;
fc84f618 251 int vram_dirty = 0;
d30279e2 252
d30279e2 253 // process buffer
ddd56f6e 254 for (start = pos = 0; pos < count; )
d30279e2
GI
255 {
256 cmd = -1;
257 len = 0;
258
259 if (gpu.dma.h) {
260 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 261 if (pos == count)
262 break;
d30279e2
GI
263 start = pos;
264 }
265
ddd56f6e 266 // do look-ahead pass to detect SR changes and VRAM i/o
d30279e2 267 while (pos < count) {
56f08d83 268 uint32_t *list = data + pos;
269 cmd = list[0] >> 24;
d30279e2 270 len = 1 + cmd_lengths[cmd];
56f08d83 271
d30279e2 272 //printf(" %3d: %02x %d\n", pos, cmd, len);
56f08d83 273 if ((cmd & 0xf4) == 0x24) {
274 // flat textured prim
275 gpu.status.reg &= ~0x1ff;
276 gpu.status.reg |= list[4] & 0x1ff;
277 }
278 else if ((cmd & 0xf4) == 0x34) {
279 // shaded textured prim
280 gpu.status.reg &= ~0x1ff;
281 gpu.status.reg |= list[5] & 0x1ff;
282 }
283 else switch (cmd)
284 {
285 case 0xe1:
286 gpu.status.reg &= ~0x7ff;
287 gpu.status.reg |= list[0] & 0x7ff;
288 break;
289 case 0xe6:
290 gpu.status.reg &= ~0x1800;
291 gpu.status.reg |= (list[0] & 3) << 11;
292 break;
293 }
fc84f618 294 if (2 <= cmd && cmd < 0xc0)
295 vram_dirty = 1;
6e9bdaef 296 else if ((cmd & 0xf8) == 0xe0)
297 gpu.ex_regs[cmd & 7] = list[0];
56f08d83 298
d30279e2
GI
299 if (pos + len > count) {
300 cmd = -1;
301 break; // incomplete cmd
302 }
303 if (cmd == 0xa0 || cmd == 0xc0)
304 break; // image i/o
305 pos += len;
306 }
307
308 if (pos - start > 0) {
fc84f618 309 if (!gpu.frameskip.active)
310 do_cmd_list(data + start, pos - start);
d30279e2
GI
311 start = pos;
312 }
313
314 if (cmd == 0xa0 || cmd == 0xc0) {
315 // consume vram write/read cmd
316 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
317 pos += len;
318 }
319
ddd56f6e 320 if (cmd == -1)
321 break;
d30279e2 322 }
ddd56f6e 323
fc84f618 324 gpu.state.fb_dirty |= vram_dirty;
325
ddd56f6e 326 return count - pos;
d30279e2
GI
327}
328
329static void flush_cmd_buffer(void)
330{
331 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
332 if (left > 0)
333 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
334 gpu.cmd_len = left;
1ab64c54
GI
335}
336
337void GPUwriteDataMem(uint32_t *mem, int count)
338{
d30279e2
GI
339 int left;
340
56f08d83 341 log_io("gpu_dma_write %p %d\n", mem, count);
342
d30279e2
GI
343 if (unlikely(gpu.cmd_len > 0))
344 flush_cmd_buffer();
56f08d83 345
d30279e2
GI
346 left = check_cmd(mem, count);
347 if (left)
56f08d83 348 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
349}
350
d30279e2 351void GPUwriteData(uint32_t data)
1ab64c54 352{
56f08d83 353 log_io("gpu_write %08x\n", data);
d30279e2
GI
354 gpu.cmd_buffer[gpu.cmd_len++] = data;
355 if (gpu.cmd_len >= CMD_BUFFER_LEN)
356 flush_cmd_buffer();
1ab64c54
GI
357}
358
ddd56f6e 359long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 360{
ddd56f6e 361 uint32_t addr, *list;
362 int len, left, count;
d30279e2
GI
363
364 if (unlikely(gpu.cmd_len > 0))
365 flush_cmd_buffer();
366
56f08d83 367 log_io("gpu_dma_chain\n");
ddd56f6e 368 addr = start_addr & 0xffffff;
369 for (count = 0; addr != 0xffffff; count++)
370 {
56f08d83 371 log_io(".chain %08x\n", addr);
372
ddd56f6e 373 list = rambase + (addr & 0x1fffff) / 4;
d30279e2
GI
374 len = list[0] >> 24;
375 addr = list[0] & 0xffffff;
ddd56f6e 376
377 // loop detection marker
378 // (bit23 set causes DMA error on real machine, so
379 // unlikely to be ever set by the game)
380 list[0] |= 0x800000;
381
56f08d83 382 if (len) {
383 left = check_cmd(list + 1, len);
384 if (left)
385 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, len);
386 }
ddd56f6e 387
388 if (addr & 0x800000)
389 break;
390 }
391
392 // remove loop detection markers
393 addr = start_addr & 0x1fffff;
394 while (count-- > 0) {
395 list = rambase + addr / 4;
396 addr = list[0] & 0x1fffff;
397 list[0] &= ~0x800000;
d30279e2
GI
398 }
399
1ab64c54
GI
400 return 0;
401}
402
d30279e2
GI
403void GPUreadDataMem(uint32_t *mem, int count)
404{
56f08d83 405 log_io("gpu_dma_read %p %d\n", mem, count);
406
d30279e2
GI
407 if (unlikely(gpu.cmd_len > 0))
408 flush_cmd_buffer();
56f08d83 409
d30279e2
GI
410 if (gpu.dma.h)
411 do_vram_io(mem, count, 1);
412}
413
414uint32_t GPUreadData(void)
415{
56f08d83 416 log_io("gpu_read\n");
417
418 if (unlikely(gpu.cmd_len > 0))
419 flush_cmd_buffer();
420
421 if (gpu.dma.h)
6e9bdaef 422 do_vram_io(&gpu.gp0, 1, 1);
56f08d83 423
6e9bdaef 424 return gpu.gp0;
d30279e2
GI
425}
426
427uint32_t GPUreadStatus(void)
428{
ddd56f6e 429 uint32_t ret;
56f08d83 430
d30279e2
GI
431 if (unlikely(gpu.cmd_len > 0))
432 flush_cmd_buffer();
433
ddd56f6e 434 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
435 log_io("gpu_read_status %08x\n", ret);
436 return ret;
d30279e2
GI
437}
438
1ab64c54
GI
439typedef struct GPUFREEZETAG
440{
441 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
442 uint32_t ulStatus; // current gpu status
443 uint32_t ulControl[256]; // latest control register values
444 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
445} GPUFreeze_t;
446
447long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
448{
fc84f618 449 int i;
450
1ab64c54
GI
451 switch (type) {
452 case 1: // save
d30279e2
GI
453 if (gpu.cmd_len > 0)
454 flush_cmd_buffer();
1ab64c54
GI
455 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
456 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 457 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
1ab64c54 458 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
459 break;
460 case 0: // load
461 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
462 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 463 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
1ab64c54 464 gpu.status.reg = freeze->ulStatus;
fc84f618 465 for (i = 8; i > 0; i--) {
466 gpu.regs[i] ^= 1; // avoid reg change detection
467 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
468 }
1ab64c54
GI
469 break;
470 }
471
472 return 1;
473}
474
d30279e2 475void GPUvBlank(int val, uint32_t *hcnt)
1ab64c54 476{
d30279e2
GI
477 gpu.lcf_hc = &gpu.zero;
478 if (gpu.status.interlace) {
479 if (val)
480 gpu.status.lcf ^= 1;
481 }
482 else {
483 gpu.status.lcf = 0;
484 if (!val)
485 gpu.lcf_hc = hcnt;
486 }
1ab64c54
GI
487}
488
1ab64c54 489// vim:shiftwidth=2:expandtab