update build for OABI
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(64)));
28
29static noinline void do_reset(void)
30{
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
33 gpu.gp0 = 0;
34 gpu.regs[3] = 1;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
37}
38
39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
60static noinline void decide_frameskip(void)
61{
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
63
64 if (!gpu.frameskip.active && (*gpu.frameskip.advice || gpu.frameskip.set == 1))
65 gpu.frameskip.active = 1;
66 else
67 gpu.frameskip.active = 0;
68}
69
70static noinline void get_gpu_info(uint32_t data)
71{
72 switch (data & 0x0f) {
73 case 0x02:
74 case 0x03:
75 case 0x04:
76 case 0x05:
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
78 break;
79 case 0x06:
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
81 break;
82 case 0x07:
83 gpu.gp0 = 2;
84 break;
85 default:
86 gpu.gp0 = 0;
87 break;
88 }
89}
90
91long GPUinit(void)
92{
93 int ret;
94 ret = vout_init();
95 ret |= renderer_init();
96
97 gpu.lcf_hc = &gpu.zero;
98 gpu.state.frame_count = 0;
99 gpu.state.hcnt = &gpu.zero;
100 do_reset();
101 return ret;
102}
103
104long GPUshutdown(void)
105{
106 return vout_finish();
107}
108
109void GPUwriteStatus(uint32_t data)
110{
111 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
112 static const short vres[4] = { 240, 480, 256, 480 };
113 uint32_t cmd = data >> 24;
114
115 if (cmd < ARRAY_SIZE(gpu.regs)) {
116 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
117 return;
118 gpu.regs[cmd] = data;
119 }
120
121 gpu.state.fb_dirty = 1;
122
123 switch (cmd) {
124 case 0x00:
125 do_reset();
126 break;
127 case 0x03:
128 gpu.status.blanking = data & 1;
129 break;
130 case 0x04:
131 gpu.status.dma = data & 3;
132 break;
133 case 0x05:
134 gpu.screen.x = data & 0x3ff;
135 gpu.screen.y = (data >> 10) & 0x3ff;
136 if (gpu.frameskip.set)
137 decide_frameskip();
138 break;
139 case 0x06:
140 gpu.screen.x1 = data & 0xfff;
141 gpu.screen.x2 = (data >> 12) & 0xfff;
142 update_width();
143 break;
144 case 0x07:
145 gpu.screen.y1 = data & 0x3ff;
146 gpu.screen.y2 = (data >> 10) & 0x3ff;
147 update_height();
148 break;
149 case 0x08:
150 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
151 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
152 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
153 update_width();
154 update_height();
155 break;
156 default:
157 if ((cmd & 0xf0) == 0x10)
158 get_gpu_info(data);
159 break;
160 }
161}
162
163const unsigned char cmd_lengths[256] =
164{
165 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
168 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
169 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
170 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
171 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
172 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
173 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
181};
182
183#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
184
185static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
186{
187 uint16_t *vram = VRAM_MEM_XY(x, y);
188 if (is_read)
189 memcpy(mem, vram, l * 2);
190 else
191 memcpy(vram, mem, l * 2);
192}
193
194static int do_vram_io(uint32_t *data, int count, int is_read)
195{
196 int count_initial = count;
197 uint16_t *sdata = (uint16_t *)data;
198 int x = gpu.dma.x, y = gpu.dma.y;
199 int w = gpu.dma.w, h = gpu.dma.h;
200 int o = gpu.dma.offset;
201 int l;
202 count *= 2; // operate in 16bpp pixels
203
204 if (gpu.dma.offset) {
205 l = w - gpu.dma.offset;
206 if (count < l)
207 l = count;
208
209 do_vram_line(x + o, y, sdata, l, is_read);
210
211 if (o + l < w)
212 o += l;
213 else {
214 o = 0;
215 y++;
216 h--;
217 }
218 sdata += l;
219 count -= l;
220 }
221
222 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
223 y &= 511;
224 do_vram_line(x, y, sdata, w, is_read);
225 }
226
227 if (h > 0 && count > 0) {
228 y &= 511;
229 do_vram_line(x, y, sdata, count, is_read);
230 o = count;
231 count = 0;
232 }
233 gpu.dma.y = y;
234 gpu.dma.h = h;
235 gpu.dma.offset = o;
236
237 return count_initial - count / 2;
238}
239
240static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
241{
242 if (gpu.dma.h)
243 log_anomaly("start_vram_transfer while old unfinished\n");
244
245 gpu.dma.x = pos_word & 1023;
246 gpu.dma.y = (pos_word >> 16) & 511;
247 gpu.dma.w = size_word & 0xffff; // ?
248 gpu.dma.h = size_word >> 16;
249 gpu.dma.offset = 0;
250
251 if (is_read)
252 gpu.status.img = 1;
253 else
254 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
255
256 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
257 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
258}
259
260static int check_cmd(uint32_t *data, int count)
261{
262 int len, cmd, start, pos;
263 int vram_dirty = 0;
264
265 // process buffer
266 for (start = pos = 0; pos < count; )
267 {
268 cmd = -1;
269 len = 0;
270
271 if (gpu.dma.h) {
272 pos += do_vram_io(data + pos, count - pos, 0);
273 if (pos == count)
274 break;
275 start = pos;
276 }
277
278 // do look-ahead pass to detect SR changes and VRAM i/o
279 while (pos < count) {
280 uint32_t *list = data + pos;
281 cmd = list[0] >> 24;
282 len = 1 + cmd_lengths[cmd];
283
284 //printf(" %3d: %02x %d\n", pos, cmd, len);
285 if ((cmd & 0xf4) == 0x24) {
286 // flat textured prim
287 gpu.status.reg &= ~0x1ff;
288 gpu.status.reg |= list[4] & 0x1ff;
289 }
290 else if ((cmd & 0xf4) == 0x34) {
291 // shaded textured prim
292 gpu.status.reg &= ~0x1ff;
293 gpu.status.reg |= list[5] & 0x1ff;
294 }
295 else switch (cmd)
296 {
297 case 0xe1:
298 gpu.status.reg &= ~0x7ff;
299 gpu.status.reg |= list[0] & 0x7ff;
300 break;
301 case 0xe6:
302 gpu.status.reg &= ~0x1800;
303 gpu.status.reg |= (list[0] & 3) << 11;
304 break;
305 }
306 if (2 <= cmd && cmd < 0xc0)
307 vram_dirty = 1;
308 else if ((cmd & 0xf8) == 0xe0)
309 gpu.ex_regs[cmd & 7] = list[0];
310
311 if (pos + len > count) {
312 cmd = -1;
313 break; // incomplete cmd
314 }
315 if (cmd == 0xa0 || cmd == 0xc0)
316 break; // image i/o
317 pos += len;
318 }
319
320 if (pos - start > 0) {
321 if (!gpu.frameskip.active)
322 do_cmd_list(data + start, pos - start);
323 start = pos;
324 }
325
326 if (cmd == 0xa0 || cmd == 0xc0) {
327 // consume vram write/read cmd
328 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
329 pos += len;
330 }
331 else if (cmd == -1)
332 break;
333 }
334
335 if (gpu.frameskip.active)
336 renderer_sync_ecmds(gpu.ex_regs);
337 gpu.state.fb_dirty |= vram_dirty;
338
339 return count - pos;
340}
341
342static void flush_cmd_buffer(void)
343{
344 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
345 if (left > 0)
346 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
347 gpu.cmd_len = left;
348}
349
350void GPUwriteDataMem(uint32_t *mem, int count)
351{
352 int left;
353
354 log_io("gpu_dma_write %p %d\n", mem, count);
355
356 if (unlikely(gpu.cmd_len > 0))
357 flush_cmd_buffer();
358
359 left = check_cmd(mem, count);
360 if (left)
361 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
362}
363
364void GPUwriteData(uint32_t data)
365{
366 log_io("gpu_write %08x\n", data);
367 gpu.cmd_buffer[gpu.cmd_len++] = data;
368 if (gpu.cmd_len >= CMD_BUFFER_LEN)
369 flush_cmd_buffer();
370}
371
372long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
373{
374 uint32_t addr, *list;
375 uint32_t *llist_entry = NULL;
376 int len, left, count;
377 long dma_words = 0;
378
379 if (unlikely(gpu.cmd_len > 0))
380 flush_cmd_buffer();
381
382 // ff7 sends it's main list twice, detect this
383 if (gpu.state.frame_count == gpu.state.last_list.frame &&
384 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
385 gpu.state.last_list.words > 1024)
386 {
387 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
388 *llist_entry |= 0x800000;
389 }
390
391 log_io("gpu_dma_chain\n");
392 addr = start_addr & 0xffffff;
393 for (count = 0; addr != 0xffffff; count++)
394 {
395 list = rambase + (addr & 0x1fffff) / 4;
396 len = list[0] >> 24;
397 addr = list[0] & 0xffffff;
398 dma_words += 1 + len;
399
400 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
401
402 // loop detection marker
403 // (bit23 set causes DMA error on real machine, so
404 // unlikely to be ever set by the game)
405 list[0] |= 0x800000;
406
407 if (len) {
408 left = check_cmd(list + 1, len);
409 if (left)
410 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
411 }
412
413 if (addr & 0x800000)
414 break;
415 }
416
417 // remove loop detection markers
418 addr = start_addr & 0x1fffff;
419 while (count-- > 0) {
420 list = rambase + addr / 4;
421 addr = list[0] & 0x1fffff;
422 list[0] &= ~0x800000;
423 }
424 if (llist_entry)
425 *llist_entry &= ~0x800000;
426
427 gpu.state.last_list.frame = gpu.state.frame_count;
428 gpu.state.last_list.hcnt = *gpu.state.hcnt;
429 gpu.state.last_list.words = dma_words;
430 gpu.state.last_list.addr = start_addr;
431
432 return dma_words;
433}
434
435void GPUreadDataMem(uint32_t *mem, int count)
436{
437 log_io("gpu_dma_read %p %d\n", mem, count);
438
439 if (unlikely(gpu.cmd_len > 0))
440 flush_cmd_buffer();
441
442 if (gpu.dma.h)
443 do_vram_io(mem, count, 1);
444}
445
446uint32_t GPUreadData(void)
447{
448 log_io("gpu_read\n");
449
450 if (unlikely(gpu.cmd_len > 0))
451 flush_cmd_buffer();
452
453 if (gpu.dma.h)
454 do_vram_io(&gpu.gp0, 1, 1);
455
456 return gpu.gp0;
457}
458
459uint32_t GPUreadStatus(void)
460{
461 uint32_t ret;
462
463 if (unlikely(gpu.cmd_len > 0))
464 flush_cmd_buffer();
465
466 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
467 log_io("gpu_read_status %08x\n", ret);
468 return ret;
469}
470
471typedef struct GPUFREEZETAG
472{
473 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
474 uint32_t ulStatus; // current gpu status
475 uint32_t ulControl[256]; // latest control register values
476 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
477} GPUFreeze_t;
478
479long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
480{
481 int i;
482
483 switch (type) {
484 case 1: // save
485 if (gpu.cmd_len > 0)
486 flush_cmd_buffer();
487 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
488 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
489 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
490 freeze->ulStatus = gpu.status.reg;
491 break;
492 case 0: // load
493 renderer_invalidate_caches(0, 0, 1024, 512);
494 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
495 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
496 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
497 gpu.status.reg = freeze->ulStatus;
498 for (i = 8; i > 0; i--) {
499 gpu.regs[i] ^= 1; // avoid reg change detection
500 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
501 }
502 renderer_sync_ecmds(gpu.ex_regs);
503 break;
504 }
505
506 return 1;
507}
508
509void GPUvBlank(int val, uint32_t *hcnt)
510{
511 gpu.lcf_hc = &gpu.zero;
512 if (gpu.status.interlace) {
513 if (val)
514 gpu.status.lcf ^= 1;
515 }
516 else {
517 gpu.status.lcf = 0;
518 if (!val)
519 gpu.lcf_hc = hcnt;
520 }
521 if (!val)
522 gpu.state.frame_count++;
523
524 gpu.state.hcnt = hcnt;
525}
526
527// vim:shiftwidth=2:expandtab