remove gpu vblank callbacks
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(64)));
28
29static noinline void do_reset(void)
30{
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
33 gpu.gp0 = 0;
34 gpu.regs[3] = 1;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
37}
38
39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
60static noinline void decide_frameskip(void)
61{
62 gpu.frameskip.frame_ready = !gpu.frameskip.active;
63
64 if (!gpu.frameskip.active && (*gpu.frameskip.advice || gpu.frameskip.set == 1))
65 gpu.frameskip.active = 1;
66 else
67 gpu.frameskip.active = 0;
68}
69
70static noinline void get_gpu_info(uint32_t data)
71{
72 switch (data & 0x0f) {
73 case 0x02:
74 case 0x03:
75 case 0x04:
76 case 0x05:
77 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
78 break;
79 case 0x06:
80 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
81 break;
82 case 0x07:
83 gpu.gp0 = 2;
84 break;
85 default:
86 gpu.gp0 = 0;
87 break;
88 }
89}
90
91long GPUinit(void)
92{
93 int ret;
94 ret = vout_init();
95 ret |= renderer_init();
96
97 gpu.state.frame_count = 0;
98 gpu.state.hcnt = &gpu.zero;
99 do_reset();
100 return ret;
101}
102
103long GPUshutdown(void)
104{
105 return vout_finish();
106}
107
108void GPUwriteStatus(uint32_t data)
109{
110 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
111 static const short vres[4] = { 240, 480, 256, 480 };
112 uint32_t cmd = data >> 24;
113
114 if (cmd < ARRAY_SIZE(gpu.regs)) {
115 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
116 return;
117 gpu.regs[cmd] = data;
118 }
119
120 gpu.state.fb_dirty = 1;
121
122 switch (cmd) {
123 case 0x00:
124 do_reset();
125 break;
126 case 0x03:
127 gpu.status.blanking = data & 1;
128 break;
129 case 0x04:
130 gpu.status.dma = data & 3;
131 break;
132 case 0x05:
133 gpu.screen.x = data & 0x3ff;
134 gpu.screen.y = (data >> 10) & 0x3ff;
135 if (gpu.frameskip.set)
136 decide_frameskip();
137 break;
138 case 0x06:
139 gpu.screen.x1 = data & 0xfff;
140 gpu.screen.x2 = (data >> 12) & 0xfff;
141 update_width();
142 break;
143 case 0x07:
144 gpu.screen.y1 = data & 0x3ff;
145 gpu.screen.y2 = (data >> 10) & 0x3ff;
146 update_height();
147 break;
148 case 0x08:
149 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
150 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
151 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
152 update_width();
153 update_height();
154 break;
155 default:
156 if ((cmd & 0xf0) == 0x10)
157 get_gpu_info(data);
158 break;
159 }
160}
161
162const unsigned char cmd_lengths[256] =
163{
164 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
167 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
168 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
169 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
170 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
171 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
172 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
180};
181
182#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
183
184static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
185{
186 uint16_t *vram = VRAM_MEM_XY(x, y);
187 if (is_read)
188 memcpy(mem, vram, l * 2);
189 else
190 memcpy(vram, mem, l * 2);
191}
192
193static int do_vram_io(uint32_t *data, int count, int is_read)
194{
195 int count_initial = count;
196 uint16_t *sdata = (uint16_t *)data;
197 int x = gpu.dma.x, y = gpu.dma.y;
198 int w = gpu.dma.w, h = gpu.dma.h;
199 int o = gpu.dma.offset;
200 int l;
201 count *= 2; // operate in 16bpp pixels
202
203 if (gpu.dma.offset) {
204 l = w - gpu.dma.offset;
205 if (count < l)
206 l = count;
207
208 do_vram_line(x + o, y, sdata, l, is_read);
209
210 if (o + l < w)
211 o += l;
212 else {
213 o = 0;
214 y++;
215 h--;
216 }
217 sdata += l;
218 count -= l;
219 }
220
221 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
222 y &= 511;
223 do_vram_line(x, y, sdata, w, is_read);
224 }
225
226 if (h > 0 && count > 0) {
227 y &= 511;
228 do_vram_line(x, y, sdata, count, is_read);
229 o = count;
230 count = 0;
231 }
232 gpu.dma.y = y;
233 gpu.dma.h = h;
234 gpu.dma.offset = o;
235
236 return count_initial - count / 2;
237}
238
239static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
240{
241 if (gpu.dma.h)
242 log_anomaly("start_vram_transfer while old unfinished\n");
243
244 gpu.dma.x = pos_word & 1023;
245 gpu.dma.y = (pos_word >> 16) & 511;
246 gpu.dma.w = size_word & 0xffff; // ?
247 gpu.dma.h = size_word >> 16;
248 gpu.dma.offset = 0;
249
250 if (is_read)
251 gpu.status.img = 1;
252 else
253 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
254
255 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
256 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
257}
258
259static int check_cmd(uint32_t *data, int count)
260{
261 int len, cmd, start, pos;
262 int vram_dirty = 0;
263
264 // process buffer
265 for (start = pos = 0; pos < count; )
266 {
267 cmd = -1;
268 len = 0;
269
270 if (gpu.dma.h) {
271 pos += do_vram_io(data + pos, count - pos, 0);
272 if (pos == count)
273 break;
274 start = pos;
275 }
276
277 // do look-ahead pass to detect SR changes and VRAM i/o
278 while (pos < count) {
279 uint32_t *list = data + pos;
280 cmd = list[0] >> 24;
281 len = 1 + cmd_lengths[cmd];
282
283 //printf(" %3d: %02x %d\n", pos, cmd, len);
284 if ((cmd & 0xf4) == 0x24) {
285 // flat textured prim
286 gpu.status.reg &= ~0x1ff;
287 gpu.status.reg |= list[4] & 0x1ff;
288 }
289 else if ((cmd & 0xf4) == 0x34) {
290 // shaded textured prim
291 gpu.status.reg &= ~0x1ff;
292 gpu.status.reg |= list[5] & 0x1ff;
293 }
294 else switch (cmd)
295 {
296 case 0xe1:
297 gpu.status.reg &= ~0x7ff;
298 gpu.status.reg |= list[0] & 0x7ff;
299 break;
300 case 0xe6:
301 gpu.status.reg &= ~0x1800;
302 gpu.status.reg |= (list[0] & 3) << 11;
303 break;
304 }
305 if (2 <= cmd && cmd < 0xc0)
306 vram_dirty = 1;
307 else if ((cmd & 0xf8) == 0xe0)
308 gpu.ex_regs[cmd & 7] = list[0];
309
310 if (pos + len > count) {
311 cmd = -1;
312 break; // incomplete cmd
313 }
314 if (cmd == 0xa0 || cmd == 0xc0)
315 break; // image i/o
316 pos += len;
317 }
318
319 if (pos - start > 0) {
320 if (!gpu.frameskip.active)
321 do_cmd_list(data + start, pos - start);
322 start = pos;
323 }
324
325 if (cmd == 0xa0 || cmd == 0xc0) {
326 // consume vram write/read cmd
327 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
328 pos += len;
329 }
330 else if (cmd == -1)
331 break;
332 }
333
334 if (gpu.frameskip.active)
335 renderer_sync_ecmds(gpu.ex_regs);
336 gpu.state.fb_dirty |= vram_dirty;
337
338 return count - pos;
339}
340
341static void flush_cmd_buffer(void)
342{
343 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
344 if (left > 0)
345 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
346 gpu.cmd_len = left;
347}
348
349void GPUwriteDataMem(uint32_t *mem, int count)
350{
351 int left;
352
353 log_io("gpu_dma_write %p %d\n", mem, count);
354
355 if (unlikely(gpu.cmd_len > 0))
356 flush_cmd_buffer();
357
358 left = check_cmd(mem, count);
359 if (left)
360 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
361}
362
363void GPUwriteData(uint32_t data)
364{
365 log_io("gpu_write %08x\n", data);
366 gpu.cmd_buffer[gpu.cmd_len++] = data;
367 if (gpu.cmd_len >= CMD_BUFFER_LEN)
368 flush_cmd_buffer();
369}
370
371long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
372{
373 uint32_t addr, *list;
374 uint32_t *llist_entry = NULL;
375 int len, left, count;
376 long dma_words = 0;
377
378 if (unlikely(gpu.cmd_len > 0))
379 flush_cmd_buffer();
380
381 // ff7 sends it's main list twice, detect this
382 if (gpu.state.frame_count == gpu.state.last_list.frame &&
383 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
384 gpu.state.last_list.words > 1024)
385 {
386 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
387 *llist_entry |= 0x800000;
388 }
389
390 log_io("gpu_dma_chain\n");
391 addr = start_addr & 0xffffff;
392 for (count = 0; addr != 0xffffff; count++)
393 {
394 list = rambase + (addr & 0x1fffff) / 4;
395 len = list[0] >> 24;
396 addr = list[0] & 0xffffff;
397 dma_words += 1 + len;
398
399 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
400
401 // loop detection marker
402 // (bit23 set causes DMA error on real machine, so
403 // unlikely to be ever set by the game)
404 list[0] |= 0x800000;
405
406 if (len) {
407 left = check_cmd(list + 1, len);
408 if (left)
409 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
410 }
411
412 if (addr & 0x800000)
413 break;
414 }
415
416 // remove loop detection markers
417 addr = start_addr & 0x1fffff;
418 while (count-- > 0) {
419 list = rambase + addr / 4;
420 addr = list[0] & 0x1fffff;
421 list[0] &= ~0x800000;
422 }
423 if (llist_entry)
424 *llist_entry &= ~0x800000;
425
426 gpu.state.last_list.frame = gpu.state.frame_count;
427 gpu.state.last_list.hcnt = *gpu.state.hcnt;
428 gpu.state.last_list.words = dma_words;
429 gpu.state.last_list.addr = start_addr;
430
431 return dma_words;
432}
433
434void GPUreadDataMem(uint32_t *mem, int count)
435{
436 log_io("gpu_dma_read %p %d\n", mem, count);
437
438 if (unlikely(gpu.cmd_len > 0))
439 flush_cmd_buffer();
440
441 if (gpu.dma.h)
442 do_vram_io(mem, count, 1);
443}
444
445uint32_t GPUreadData(void)
446{
447 log_io("gpu_read\n");
448
449 if (unlikely(gpu.cmd_len > 0))
450 flush_cmd_buffer();
451
452 if (gpu.dma.h)
453 do_vram_io(&gpu.gp0, 1, 1);
454
455 return gpu.gp0;
456}
457
458uint32_t GPUreadStatus(void)
459{
460 uint32_t ret;
461
462 if (unlikely(gpu.cmd_len > 0))
463 flush_cmd_buffer();
464
465 ret = gpu.status.reg;
466 log_io("gpu_read_status %08x\n", ret);
467 return ret;
468}
469
470typedef struct GPUFREEZETAG
471{
472 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
473 uint32_t ulStatus; // current gpu status
474 uint32_t ulControl[256]; // latest control register values
475 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
476} GPUFreeze_t;
477
478long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
479{
480 int i;
481
482 switch (type) {
483 case 1: // save
484 if (gpu.cmd_len > 0)
485 flush_cmd_buffer();
486 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
487 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
488 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
489 freeze->ulStatus = gpu.status.reg;
490 break;
491 case 0: // load
492 renderer_invalidate_caches(0, 0, 1024, 512);
493 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
494 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
495 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
496 gpu.status.reg = freeze->ulStatus;
497 for (i = 8; i > 0; i--) {
498 gpu.regs[i] ^= 1; // avoid reg change detection
499 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
500 }
501 renderer_sync_ecmds(gpu.ex_regs);
502 break;
503 }
504
505 return 1;
506}
507
508// vim:shiftwidth=2:expandtab