gpu_neon: fix x86 build
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
56f08d83 13#include "gpu.h"
1ab64c54
GI
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
8dd855cd 17#define noinline __attribute__((noinline))
1ab64c54 18
deb18d24 19#define gpu_log(fmt, ...) \
3ece2f0c 20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
deb18d24 21
22//#define log_io gpu_log
56f08d83 23#define log_io(...)
9394ada5 24//#define log_anomaly gpu_log
25#define log_anomaly(...)
56f08d83 26
7d993ee2 27struct psx_gpu gpu __attribute__((aligned(2048)));
1ab64c54 28
6e9bdaef 29static noinline void do_reset(void)
1ab64c54 30{
6e9bdaef 31 memset(gpu.regs, 0, sizeof(gpu.regs));
d30279e2 32 gpu.status.reg = 0x14802000;
6e9bdaef 33 gpu.gp0 = 0;
fc84f618 34 gpu.regs[3] = 1;
6e9bdaef 35 gpu.screen.hres = gpu.screen.w = 256;
fc84f618 36 gpu.screen.vres = gpu.screen.h = 240;
1ab64c54
GI
37}
38
8dd855cd 39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
fc84f618 60static noinline void decide_frameskip(void)
61{
9fe27e25 62 if (gpu.frameskip.active)
63 gpu.frameskip.cnt++;
64 else {
65 gpu.frameskip.cnt = 0;
66 gpu.frameskip.frame_ready = 1;
67 }
fc84f618 68
9fe27e25 69 if (!gpu.frameskip.active && *gpu.frameskip.advice)
70 gpu.frameskip.active = 1;
71 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
fc84f618 72 gpu.frameskip.active = 1;
73 else
74 gpu.frameskip.active = 0;
75}
76
9fe27e25 77static noinline void decide_frameskip_allow(uint32_t cmd_e3)
78{
79 // no frameskip if it decides to draw to display area,
80 // but not for interlace since it'll most likely always do that
81 uint32_t x = cmd_e3 & 0x3ff;
82 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
83 gpu.frameskip.allow = gpu.status.interlace ||
84 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
85 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
86}
87
6e9bdaef 88static noinline void get_gpu_info(uint32_t data)
89{
90 switch (data & 0x0f) {
91 case 0x02:
92 case 0x03:
93 case 0x04:
94 case 0x05:
95 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
96 break;
97 case 0x06:
98 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
99 break;
100 case 0x07:
101 gpu.gp0 = 2;
102 break;
103 default:
104 gpu.gp0 = 0;
105 break;
106 }
107}
108
109long GPUinit(void)
110{
9394ada5 111 int ret;
112 ret = vout_init();
113 ret |= renderer_init();
114
3ece2f0c 115 gpu.state.frame_count = &gpu.zero;
deb18d24 116 gpu.state.hcnt = &gpu.zero;
9394ada5 117 do_reset();
6e9bdaef 118 return ret;
119}
120
121long GPUshutdown(void)
122{
123 return vout_finish();
124}
125
1ab64c54
GI
126void GPUwriteStatus(uint32_t data)
127{
128 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
129 static const short vres[4] = { 240, 480, 256, 480 };
130 uint32_t cmd = data >> 24;
131
fc84f618 132 if (cmd < ARRAY_SIZE(gpu.regs)) {
19e7cf87 133 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
fc84f618 134 return;
8dd855cd 135 gpu.regs[cmd] = data;
fc84f618 136 }
137
138 gpu.state.fb_dirty = 1;
8dd855cd 139
140 switch (cmd) {
1ab64c54 141 case 0x00:
6e9bdaef 142 do_reset();
1ab64c54
GI
143 break;
144 case 0x03:
d30279e2 145 gpu.status.blanking = data & 1;
1ab64c54
GI
146 break;
147 case 0x04:
148 gpu.status.dma = data & 3;
149 break;
150 case 0x05:
151 gpu.screen.x = data & 0x3ff;
152 gpu.screen.y = (data >> 10) & 0x3ff;
9fe27e25 153 if (gpu.frameskip.set) {
154 decide_frameskip_allow(gpu.ex_regs[3]);
155 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
156 decide_frameskip();
157 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
158 }
fb4c6fba 159 }
1ab64c54 160 break;
8dd855cd 161 case 0x06:
162 gpu.screen.x1 = data & 0xfff;
163 gpu.screen.x2 = (data >> 12) & 0xfff;
164 update_width();
165 break;
1ab64c54
GI
166 case 0x07:
167 gpu.screen.y1 = data & 0x3ff;
168 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 169 update_height();
1ab64c54
GI
170 break;
171 case 0x08:
172 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 173 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
174 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
175 update_width();
176 update_height();
1ab64c54 177 break;
deb18d24 178 default:
179 if ((cmd & 0xf0) == 0x10)
180 get_gpu_info(data);
6e9bdaef 181 break;
1ab64c54 182 }
1ab64c54
GI
183}
184
56f08d83 185const unsigned char cmd_lengths[256] =
1ab64c54 186{
d30279e2
GI
187 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
190 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
191 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
192 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
193 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
194 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
195 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
203};
204
d30279e2
GI
205#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
206
207static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 208{
d30279e2
GI
209 uint16_t *vram = VRAM_MEM_XY(x, y);
210 if (is_read)
211 memcpy(mem, vram, l * 2);
212 else
213 memcpy(vram, mem, l * 2);
214}
215
216static int do_vram_io(uint32_t *data, int count, int is_read)
217{
218 int count_initial = count;
219 uint16_t *sdata = (uint16_t *)data;
220 int x = gpu.dma.x, y = gpu.dma.y;
221 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 222 int o = gpu.dma.offset;
d30279e2
GI
223 int l;
224 count *= 2; // operate in 16bpp pixels
225
226 if (gpu.dma.offset) {
227 l = w - gpu.dma.offset;
ddd56f6e 228 if (count < l)
d30279e2 229 l = count;
ddd56f6e 230
231 do_vram_line(x + o, y, sdata, l, is_read);
232
233 if (o + l < w)
234 o += l;
235 else {
236 o = 0;
237 y++;
238 h--;
239 }
d30279e2
GI
240 sdata += l;
241 count -= l;
d30279e2
GI
242 }
243
244 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
245 y &= 511;
246 do_vram_line(x, y, sdata, w, is_read);
247 }
248
249 if (h > 0 && count > 0) {
250 y &= 511;
251 do_vram_line(x, y, sdata, count, is_read);
ddd56f6e 252 o = count;
d30279e2
GI
253 count = 0;
254 }
d30279e2
GI
255 gpu.dma.y = y;
256 gpu.dma.h = h;
ddd56f6e 257 gpu.dma.offset = o;
d30279e2 258
6e9bdaef 259 return count_initial - count / 2;
d30279e2
GI
260}
261
262static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
263{
ddd56f6e 264 if (gpu.dma.h)
265 log_anomaly("start_vram_transfer while old unfinished\n");
266
d30279e2
GI
267 gpu.dma.x = pos_word & 1023;
268 gpu.dma.y = (pos_word >> 16) & 511;
269 gpu.dma.w = size_word & 0xffff; // ?
270 gpu.dma.h = size_word >> 16;
271 gpu.dma.offset = 0;
272
9e146206 273 renderer_flush_queues();
274 if (is_read) {
d30279e2 275 gpu.status.img = 1;
9e146206 276 // XXX: wrong for width 1
277 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
278 }
c64af26f 279 else {
9394ada5 280 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
c64af26f 281 }
d30279e2 282
6e9bdaef 283 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
284 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
d30279e2
GI
285}
286
287static int check_cmd(uint32_t *data, int count)
288{
289 int len, cmd, start, pos;
fc84f618 290 int vram_dirty = 0;
d30279e2 291
d30279e2 292 // process buffer
ddd56f6e 293 for (start = pos = 0; pos < count; )
d30279e2
GI
294 {
295 cmd = -1;
296 len = 0;
297
298 if (gpu.dma.h) {
299 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 300 if (pos == count)
301 break;
d30279e2
GI
302 start = pos;
303 }
304
ddd56f6e 305 // do look-ahead pass to detect SR changes and VRAM i/o
d30279e2 306 while (pos < count) {
56f08d83 307 uint32_t *list = data + pos;
308 cmd = list[0] >> 24;
d30279e2 309 len = 1 + cmd_lengths[cmd];
56f08d83 310
d30279e2 311 //printf(" %3d: %02x %d\n", pos, cmd, len);
56f08d83 312 if ((cmd & 0xf4) == 0x24) {
313 // flat textured prim
a3a9f519 314 gpu.ex_regs[1] &= ~0x1ff;
315 gpu.ex_regs[1] |= list[4] & 0x1ff;
56f08d83 316 }
317 else if ((cmd & 0xf4) == 0x34) {
318 // shaded textured prim
a3a9f519 319 gpu.ex_regs[1] &= ~0x1ff;
320 gpu.ex_regs[1] |= list[5] & 0x1ff;
56f08d83 321 }
fb4c6fba 322 else if (cmd == 0xe3)
9fe27e25 323 decide_frameskip_allow(list[0]);
324
fc84f618 325 if (2 <= cmd && cmd < 0xc0)
326 vram_dirty = 1;
6e9bdaef 327 else if ((cmd & 0xf8) == 0xe0)
328 gpu.ex_regs[cmd & 7] = list[0];
56f08d83 329
d30279e2
GI
330 if (pos + len > count) {
331 cmd = -1;
332 break; // incomplete cmd
333 }
334 if (cmd == 0xa0 || cmd == 0xc0)
335 break; // image i/o
336 pos += len;
337 }
338
339 if (pos - start > 0) {
fb4c6fba 340 if (!gpu.frameskip.active || !gpu.frameskip.allow)
fc84f618 341 do_cmd_list(data + start, pos - start);
d30279e2
GI
342 start = pos;
343 }
344
345 if (cmd == 0xa0 || cmd == 0xc0) {
346 // consume vram write/read cmd
347 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
348 pos += len;
349 }
5b745e5b 350 else if (cmd == -1)
ddd56f6e 351 break;
d30279e2 352 }
ddd56f6e 353
a3a9f519 354 gpu.status.reg &= ~0x1fff;
355 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
356 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
357
5b745e5b 358 if (gpu.frameskip.active)
359 renderer_sync_ecmds(gpu.ex_regs);
fc84f618 360 gpu.state.fb_dirty |= vram_dirty;
361
ddd56f6e 362 return count - pos;
d30279e2
GI
363}
364
c7945666 365void flush_cmd_buffer(void)
d30279e2
GI
366{
367 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
368 if (left > 0)
369 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
370 gpu.cmd_len = left;
1ab64c54
GI
371}
372
373void GPUwriteDataMem(uint32_t *mem, int count)
374{
d30279e2
GI
375 int left;
376
56f08d83 377 log_io("gpu_dma_write %p %d\n", mem, count);
378
d30279e2
GI
379 if (unlikely(gpu.cmd_len > 0))
380 flush_cmd_buffer();
56f08d83 381
d30279e2
GI
382 left = check_cmd(mem, count);
383 if (left)
56f08d83 384 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
385}
386
d30279e2 387void GPUwriteData(uint32_t data)
1ab64c54 388{
56f08d83 389 log_io("gpu_write %08x\n", data);
d30279e2
GI
390 gpu.cmd_buffer[gpu.cmd_len++] = data;
391 if (gpu.cmd_len >= CMD_BUFFER_LEN)
392 flush_cmd_buffer();
1ab64c54
GI
393}
394
ddd56f6e 395long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 396{
ddd56f6e 397 uint32_t addr, *list;
deb18d24 398 uint32_t *llist_entry = NULL;
ddd56f6e 399 int len, left, count;
1c72b1c2 400 long cpu_cycles = 0;
d30279e2
GI
401
402 if (unlikely(gpu.cmd_len > 0))
403 flush_cmd_buffer();
404
deb18d24 405 // ff7 sends it's main list twice, detect this
3ece2f0c 406 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
407 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
1c72b1c2 408 gpu.state.last_list.cycles > 2048)
deb18d24 409 {
410 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
411 *llist_entry |= 0x800000;
412 }
413
56f08d83 414 log_io("gpu_dma_chain\n");
ddd56f6e 415 addr = start_addr & 0xffffff;
416 for (count = 0; addr != 0xffffff; count++)
417 {
ddd56f6e 418 list = rambase + (addr & 0x1fffff) / 4;
d30279e2
GI
419 len = list[0] >> 24;
420 addr = list[0] & 0xffffff;
1c72b1c2 421 cpu_cycles += 10;
422 if (len > 0)
423 cpu_cycles += 5 + len;
deb18d24 424
425 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
ddd56f6e 426
427 // loop detection marker
428 // (bit23 set causes DMA error on real machine, so
429 // unlikely to be ever set by the game)
430 list[0] |= 0x800000;
431
56f08d83 432 if (len) {
433 left = check_cmd(list + 1, len);
434 if (left)
deb18d24 435 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
56f08d83 436 }
ddd56f6e 437
438 if (addr & 0x800000)
439 break;
440 }
441
442 // remove loop detection markers
443 addr = start_addr & 0x1fffff;
444 while (count-- > 0) {
445 list = rambase + addr / 4;
446 addr = list[0] & 0x1fffff;
447 list[0] &= ~0x800000;
d30279e2 448 }
deb18d24 449 if (llist_entry)
450 *llist_entry &= ~0x800000;
d30279e2 451
3ece2f0c 452 gpu.state.last_list.frame = *gpu.state.frame_count;
deb18d24 453 gpu.state.last_list.hcnt = *gpu.state.hcnt;
1c72b1c2 454 gpu.state.last_list.cycles = cpu_cycles;
deb18d24 455 gpu.state.last_list.addr = start_addr;
456
1c72b1c2 457 return cpu_cycles;
1ab64c54
GI
458}
459
d30279e2
GI
460void GPUreadDataMem(uint32_t *mem, int count)
461{
56f08d83 462 log_io("gpu_dma_read %p %d\n", mem, count);
463
d30279e2
GI
464 if (unlikely(gpu.cmd_len > 0))
465 flush_cmd_buffer();
56f08d83 466
d30279e2
GI
467 if (gpu.dma.h)
468 do_vram_io(mem, count, 1);
469}
470
471uint32_t GPUreadData(void)
472{
9e146206 473 uint32_t ret;
56f08d83 474
475 if (unlikely(gpu.cmd_len > 0))
476 flush_cmd_buffer();
477
9e146206 478 ret = gpu.gp0;
56f08d83 479 if (gpu.dma.h)
9e146206 480 do_vram_io(&ret, 1, 1);
56f08d83 481
9e146206 482 log_io("gpu_read %08x\n", ret);
483 return ret;
d30279e2
GI
484}
485
486uint32_t GPUreadStatus(void)
487{
ddd56f6e 488 uint32_t ret;
56f08d83 489
d30279e2
GI
490 if (unlikely(gpu.cmd_len > 0))
491 flush_cmd_buffer();
492
24de2dd4 493 ret = gpu.status.reg;
ddd56f6e 494 log_io("gpu_read_status %08x\n", ret);
495 return ret;
d30279e2
GI
496}
497
096ec49b 498struct GPUFreeze
1ab64c54
GI
499{
500 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
501 uint32_t ulStatus; // current gpu status
502 uint32_t ulControl[256]; // latest control register values
503 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
096ec49b 504};
1ab64c54 505
096ec49b 506long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
1ab64c54 507{
fc84f618 508 int i;
509
1ab64c54
GI
510 switch (type) {
511 case 1: // save
d30279e2
GI
512 if (gpu.cmd_len > 0)
513 flush_cmd_buffer();
1ab64c54
GI
514 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
515 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
6e9bdaef 516 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
1ab64c54 517 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
518 break;
519 case 0: // load
9394ada5 520 renderer_invalidate_caches(0, 0, 1024, 512);
1ab64c54
GI
521 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
522 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
6e9bdaef 523 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
1ab64c54 524 gpu.status.reg = freeze->ulStatus;
fc84f618 525 for (i = 8; i > 0; i--) {
526 gpu.regs[i] ^= 1; // avoid reg change detection
527 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
528 }
5b745e5b 529 renderer_sync_ecmds(gpu.ex_regs);
1ab64c54
GI
530 break;
531 }
532
533 return 1;
534}
535
1ab64c54 536// vim:shiftwidth=2:expandtab