gpu_neon: fix linkage for c++ code (una-i)
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
56f08d83 13#include "gpu.h"
1ab64c54
GI
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
8dd855cd 17#define noinline __attribute__((noinline))
1ab64c54 18
deb18d24 19#define gpu_log(fmt, ...) \
3ece2f0c 20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
deb18d24 21
22//#define log_io gpu_log
56f08d83 23#define log_io(...)
9394ada5 24//#define log_anomaly gpu_log
25#define log_anomaly(...)
56f08d83 26
27struct psx_gpu gpu __attribute__((aligned(64)));
1ab64c54 28
6e9bdaef 29static noinline void do_reset(void)
1ab64c54 30{
6e9bdaef 31 memset(gpu.regs, 0, sizeof(gpu.regs));
d30279e2 32 gpu.status.reg = 0x14802000;
6e9bdaef 33 gpu.gp0 = 0;
fc84f618 34 gpu.regs[3] = 1;
6e9bdaef 35 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 36 gpu.screen.vres = gpu.screen.h = 240;
1ab64c54
GI
37}
38
8dd855cd 39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
fc84f618 60static noinline void decide_frameskip(void)
61{
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
63
ea4a16e7 64 if (!gpu.frameskip.active && (*gpu.frameskip.advice || gpu.frameskip.set == 1))
fc84f618 65 gpu.frameskip.active = 1;
66 else
67 gpu.frameskip.active = 0;
68}
69
6e9bdaef 70static noinline void get_gpu_info(uint32_t data)
71{
72 switch (data & 0x0f) {
73 case 0x02:
74 case 0x03:
75 case 0x04:
76 case 0x05:
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
78 break;
79 case 0x06:
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
81 break;
82 case 0x07:
83 gpu.gp0 = 2;
84 break;
85 default:
86 gpu.gp0 = 0;
87 break;
88 }
89}
90
91long GPUinit(void)
92{
9394ada5 93 int ret;
94 ret = vout_init();
95 ret |= renderer_init();
96
3ece2f0c 97 gpu.state.frame_count = &gpu.zero;
deb18d24 98 gpu.state.hcnt = &gpu.zero;
9394ada5 99 do_reset();
6e9bdaef 100 return ret;
101}
102
103long GPUshutdown(void)
104{
105 return vout_finish();
106}
107
1ab64c54
GI
108void GPUwriteStatus(uint32_t data)
109{
110 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
111 static const short vres[4] = { 240, 480, 256, 480 };
112 uint32_t cmd = data >> 24;
113
fc84f618 114 if (cmd < ARRAY_SIZE(gpu.regs)) {
19e7cf87 115 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 116 return;
8dd855cd 117 gpu.regs[cmd] = data;
fc84f618 118 }
119
120 gpu.state.fb_dirty = 1;
8dd855cd 121
122 switch (cmd) {
1ab64c54 123 case 0x00:
6e9bdaef 124 do_reset();
1ab64c54
GI
125 break;
126 case 0x03:
d30279e2 127 gpu.status.blanking = data & 1;
1ab64c54
GI
128 break;
129 case 0x04:
130 gpu.status.dma = data & 3;
131 break;
132 case 0x05:
133 gpu.screen.x = data & 0x3ff;
134 gpu.screen.y = (data >> 10) & 0x3ff;
fb4c6fba 135 if (gpu.frameskip.set && gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
fc84f618 136 decide_frameskip();
fb4c6fba 137 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
138 }
1ab64c54 139 break;
8dd855cd 140 case 0x06:
141 gpu.screen.x1 = data & 0xfff;
142 gpu.screen.x2 = (data >> 12) & 0xfff;
143 update_width();
144 break;
1ab64c54
GI
145 case 0x07:
146 gpu.screen.y1 = data & 0x3ff;
147 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 148 update_height();
1ab64c54
GI
149 break;
150 case 0x08:
151 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 152 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
153 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
154 update_width();
155 update_height();
1ab64c54 156 break;
deb18d24 157 default:
158 if ((cmd & 0xf0) == 0x10)
159 get_gpu_info(data);
6e9bdaef 160 break;
1ab64c54 161 }
1ab64c54
GI
162}
163
56f08d83 164const unsigned char cmd_lengths[256] =
1ab64c54 165{
d30279e2
GI
166 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
168 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
169 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
170 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
171 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
172 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
173 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
174 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
181 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
182};
183
d30279e2
GI
184#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
185
186static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 187{
d30279e2
GI
188 uint16_t *vram = VRAM_MEM_XY(x, y);
189 if (is_read)
190 memcpy(mem, vram, l * 2);
191 else
192 memcpy(vram, mem, l * 2);
193}
194
195static int do_vram_io(uint32_t *data, int count, int is_read)
196{
197 int count_initial = count;
198 uint16_t *sdata = (uint16_t *)data;
199 int x = gpu.dma.x, y = gpu.dma.y;
200 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 201 int o = gpu.dma.offset;
d30279e2
GI
202 int l;
203 count *= 2; // operate in 16bpp pixels
204
205 if (gpu.dma.offset) {
206 l = w - gpu.dma.offset;
ddd56f6e 207 if (count < l)
d30279e2 208 l = count;
ddd56f6e 209
210 do_vram_line(x + o, y, sdata, l, is_read);
211
212 if (o + l < w)
213 o += l;
214 else {
215 o = 0;
216 y++;
217 h--;
218 }
d30279e2
GI
219 sdata += l;
220 count -= l;
d30279e2
GI
221 }
222
223 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
224 y &= 511;
225 do_vram_line(x, y, sdata, w, is_read);
226 }
227
228 if (h > 0 && count > 0) {
229 y &= 511;
230 do_vram_line(x, y, sdata, count, is_read);
ddd56f6e 231 o = count;
d30279e2
GI
232 count = 0;
233 }
d30279e2
GI
234 gpu.dma.y = y;
235 gpu.dma.h = h;
ddd56f6e 236 gpu.dma.offset = o;
d30279e2 237
6e9bdaef 238 return count_initial - count / 2;
d30279e2
GI
239}
240
241static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
242{
ddd56f6e 243 if (gpu.dma.h)
244 log_anomaly("start_vram_transfer while old unfinished\n");
245
d30279e2
GI
246 gpu.dma.x = pos_word & 1023;
247 gpu.dma.y = (pos_word >> 16) & 511;
248 gpu.dma.w = size_word & 0xffff; // ?
249 gpu.dma.h = size_word >> 16;
250 gpu.dma.offset = 0;
251
252 if (is_read)
253 gpu.status.img = 1;
9394ada5 254 else
255 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2 256
6e9bdaef 257 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
258 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
259}
260
261static int check_cmd(uint32_t *data, int count)
262{
263 int len, cmd, start, pos;
fc84f618 264 int vram_dirty = 0;
d30279e2 265
d30279e2 266 // process buffer
ddd56f6e 267 for (start = pos = 0; pos < count; )
d30279e2
GI
268 {
269 cmd = -1;
270 len = 0;
271
272 if (gpu.dma.h) {
273 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 274 if (pos == count)
275 break;
d30279e2
GI
276 start = pos;
277 }
278
ddd56f6e 279 // do look-ahead pass to detect SR changes and VRAM i/o
d30279e2 280 while (pos < count) {
56f08d83 281 uint32_t *list = data + pos;
282 cmd = list[0] >> 24;
d30279e2 283 len = 1 + cmd_lengths[cmd];
56f08d83 284
d30279e2 285 //printf(" %3d: %02x %d\n", pos, cmd, len);
56f08d83 286 if ((cmd & 0xf4) == 0x24) {
287 // flat textured prim
a3a9f519 288 gpu.ex_regs[1] &= ~0x1ff;
289 gpu.ex_regs[1] |= list[4] & 0x1ff;
56f08d83 290 }
291 else if ((cmd & 0xf4) == 0x34) {
292 // shaded textured prim
a3a9f519 293 gpu.ex_regs[1] &= ~0x1ff;
294 gpu.ex_regs[1] |= list[5] & 0x1ff;
56f08d83 295 }
fb4c6fba 296 else if (cmd == 0xe3)
297 {
298 // no frameskip if it decides to draw to display area,
299 // but not for interlace since it'll most likely always do that
300 uint32_t x = list[0] & 0x3ff;
301 uint32_t y = (list[0] >> 10) & 0x3ff;
302 gpu.frameskip.allow = gpu.status.interlace ||
303 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
304 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
305 }
fc84f618 306 if (2 <= cmd && cmd < 0xc0)
307 vram_dirty = 1;
6e9bdaef 308 else if ((cmd & 0xf8) == 0xe0)
309 gpu.ex_regs[cmd & 7] = list[0];
56f08d83 310
d30279e2
GI
311 if (pos + len > count) {
312 cmd = -1;
313 break; // incomplete cmd
314 }
315 if (cmd == 0xa0 || cmd == 0xc0)
316 break; // image i/o
317 pos += len;
318 }
319
320 if (pos - start > 0) {
fb4c6fba 321 if (!gpu.frameskip.active || !gpu.frameskip.allow)
fc84f618 322 do_cmd_list(data + start, pos - start);
d30279e2
GI
323 start = pos;
324 }
325
326 if (cmd == 0xa0 || cmd == 0xc0) {
327 // consume vram write/read cmd
328 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
329 pos += len;
330 }
5b745e5b 331 else if (cmd == -1)
ddd56f6e 332 break;
d30279e2 333 }
ddd56f6e 334
a3a9f519 335 gpu.status.reg &= ~0x1fff;
336 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
337 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
338
5b745e5b 339 if (gpu.frameskip.active)
340 renderer_sync_ecmds(gpu.ex_regs);
fc84f618 341 gpu.state.fb_dirty |= vram_dirty;
342
ddd56f6e 343 return count - pos;
d30279e2
GI
344}
345
346static void flush_cmd_buffer(void)
347{
348 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
349 if (left > 0)
350 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
351 gpu.cmd_len = left;
1ab64c54
GI
352}
353
354void GPUwriteDataMem(uint32_t *mem, int count)
355{
d30279e2
GI
356 int left;
357
56f08d83 358 log_io("gpu_dma_write %p %d\n", mem, count);
359
d30279e2
GI
360 if (unlikely(gpu.cmd_len > 0))
361 flush_cmd_buffer();
56f08d83 362
d30279e2
GI
363 left = check_cmd(mem, count);
364 if (left)
56f08d83 365 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
366}
367
d30279e2 368void GPUwriteData(uint32_t data)
1ab64c54 369{
56f08d83 370 log_io("gpu_write %08x\n", data);
d30279e2
GI
371 gpu.cmd_buffer[gpu.cmd_len++] = data;
372 if (gpu.cmd_len >= CMD_BUFFER_LEN)
373 flush_cmd_buffer();
1ab64c54
GI
374}
375
ddd56f6e 376long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 377{
ddd56f6e 378 uint32_t addr, *list;
deb18d24 379 uint32_t *llist_entry = NULL;
ddd56f6e 380 int len, left, count;
deb18d24 381 long dma_words = 0;
d30279e2
GI
382
383 if (unlikely(gpu.cmd_len > 0))
384 flush_cmd_buffer();
385
deb18d24 386 // ff7 sends it's main list twice, detect this
3ece2f0c 387 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
388 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
389 gpu.state.last_list.words > 1024)
deb18d24 390 {
391 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
392 *llist_entry |= 0x800000;
393 }
394
56f08d83 395 log_io("gpu_dma_chain\n");
ddd56f6e 396 addr = start_addr & 0xffffff;
397 for (count = 0; addr != 0xffffff; count++)
398 {
ddd56f6e 399 list = rambase + (addr & 0x1fffff) / 4;
d30279e2
GI
400 len = list[0] >> 24;
401 addr = list[0] & 0xffffff;
deb18d24 402 dma_words += 1 + len;
403
404 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
ddd56f6e 405
406 // loop detection marker
407 // (bit23 set causes DMA error on real machine, so
408 // unlikely to be ever set by the game)
409 list[0] |= 0x800000;
410
56f08d83 411 if (len) {
412 left = check_cmd(list + 1, len);
413 if (left)
deb18d24 414 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
56f08d83 415 }
ddd56f6e 416
417 if (addr & 0x800000)
418 break;
419 }
420
421 // remove loop detection markers
422 addr = start_addr & 0x1fffff;
423 while (count-- > 0) {
424 list = rambase + addr / 4;
425 addr = list[0] & 0x1fffff;
426 list[0] &= ~0x800000;
d30279e2 427 }
deb18d24 428 if (llist_entry)
429 *llist_entry &= ~0x800000;
d30279e2 430
3ece2f0c 431 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 432 gpu.state.last_list.hcnt = *gpu.state.hcnt;
433 gpu.state.last_list.words = dma_words;
434 gpu.state.last_list.addr = start_addr;
435
436 return dma_words;
1ab64c54
GI
437}
438
d30279e2
GI
439void GPUreadDataMem(uint32_t *mem, int count)
440{
56f08d83 441 log_io("gpu_dma_read %p %d\n", mem, count);
442
d30279e2
GI
443 if (unlikely(gpu.cmd_len > 0))
444 flush_cmd_buffer();
56f08d83 445
d30279e2
GI
446 if (gpu.dma.h)
447 do_vram_io(mem, count, 1);
448}
449
450uint32_t GPUreadData(void)
451{
56f08d83 452 log_io("gpu_read\n");
453
454 if (unlikely(gpu.cmd_len > 0))
455 flush_cmd_buffer();
456
457 if (gpu.dma.h)
6e9bdaef 458 do_vram_io(&gpu.gp0, 1, 1);
56f08d83 459
6e9bdaef 460 return gpu.gp0;
d30279e2
GI
461}
462
463uint32_t GPUreadStatus(void)
464{
ddd56f6e 465 uint32_t ret;
56f08d83 466
d30279e2
GI
467 if (unlikely(gpu.cmd_len > 0))
468 flush_cmd_buffer();
469
24de2dd4 470 ret = gpu.status.reg;
ddd56f6e 471 log_io("gpu_read_status %08x\n", ret);
472 return ret;
d30279e2
GI
473}
474
096ec49b 475struct GPUFreeze
1ab64c54
GI
476{
477 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
478 uint32_t ulStatus; // current gpu status
479 uint32_t ulControl[256]; // latest control register values
480 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 481};
1ab64c54 482
096ec49b 483long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 484{
fc84f618 485 int i;
486
1ab64c54
GI
487 switch (type) {
488 case 1: // save
d30279e2
GI
489 if (gpu.cmd_len > 0)
490 flush_cmd_buffer();
1ab64c54
GI
491 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
492 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 493 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
1ab64c54 494 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
495 break;
496 case 0: // load
9394ada5 497 renderer_invalidate_caches(0, 0, 1024, 512);
1ab64c54
GI
498 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
499 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 500 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
1ab64c54 501 gpu.status.reg = freeze->ulStatus;
fc84f618 502 for (i = 8; i > 0; i--) {
503 gpu.regs[i] ^= 1; // avoid reg change detection
504 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
505 }
5b745e5b 506 renderer_sync_ecmds(gpu.ex_regs);
1ab64c54
GI
507 break;
508 }
509
510 return 1;
511}
512
1ab64c54 513// vim:shiftwidth=2:expandtab