psx_gpu: partial interlace mode support
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(2048)));
28
29static noinline void do_reset(void)
30{
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
33 gpu.gp0 = 0;
34 gpu.regs[3] = 1;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
37}
38
39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
60static noinline void decide_frameskip(void)
61{
62 if (gpu.frameskip.active)
63 gpu.frameskip.cnt++;
64 else {
65 gpu.frameskip.cnt = 0;
66 gpu.frameskip.frame_ready = 1;
67 }
68
69 if (!gpu.frameskip.active && *gpu.frameskip.advice)
70 gpu.frameskip.active = 1;
71 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
72 gpu.frameskip.active = 1;
73 else
74 gpu.frameskip.active = 0;
75}
76
77static noinline void decide_frameskip_allow(uint32_t cmd_e3)
78{
79 // no frameskip if it decides to draw to display area,
80 // but not for interlace since it'll most likely always do that
81 uint32_t x = cmd_e3 & 0x3ff;
82 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
83 gpu.frameskip.allow = gpu.status.interlace ||
84 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
85 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
86}
87
88static noinline void get_gpu_info(uint32_t data)
89{
90 switch (data & 0x0f) {
91 case 0x02:
92 case 0x03:
93 case 0x04:
94 case 0x05:
95 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
96 break;
97 case 0x06:
98 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
99 break;
100 case 0x07:
101 gpu.gp0 = 2;
102 break;
103 default:
104 gpu.gp0 = 0;
105 break;
106 }
107}
108
109long GPUinit(void)
110{
111 int ret;
112 ret = vout_init();
113 ret |= renderer_init();
114
115 gpu.state.frame_count = &gpu.zero;
116 gpu.state.hcnt = &gpu.zero;
117 do_reset();
118 return ret;
119}
120
121long GPUshutdown(void)
122{
123 return vout_finish();
124}
125
126void GPUwriteStatus(uint32_t data)
127{
128 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
129 static const short vres[4] = { 240, 480, 256, 480 };
130 uint32_t cmd = data >> 24;
131
132 if (cmd < ARRAY_SIZE(gpu.regs)) {
133 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
134 return;
135 gpu.regs[cmd] = data;
136 }
137
138 gpu.state.fb_dirty = 1;
139
140 switch (cmd) {
141 case 0x00:
142 do_reset();
143 break;
144 case 0x03:
145 gpu.status.blanking = data & 1;
146 break;
147 case 0x04:
148 gpu.status.dma = data & 3;
149 break;
150 case 0x05:
151 gpu.screen.x = data & 0x3ff;
152 gpu.screen.y = (data >> 10) & 0x3ff;
153 if (gpu.frameskip.set) {
154 decide_frameskip_allow(gpu.ex_regs[3]);
155 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
156 decide_frameskip();
157 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
158 }
159 }
160 break;
161 case 0x06:
162 gpu.screen.x1 = data & 0xfff;
163 gpu.screen.x2 = (data >> 12) & 0xfff;
164 update_width();
165 break;
166 case 0x07:
167 gpu.screen.y1 = data & 0x3ff;
168 gpu.screen.y2 = (data >> 10) & 0x3ff;
169 update_height();
170 break;
171 case 0x08:
172 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
173 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
174 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
175 update_width();
176 update_height();
177 break;
178 default:
179 if ((cmd & 0xf0) == 0x10)
180 get_gpu_info(data);
181 break;
182 }
183}
184
185const unsigned char cmd_lengths[256] =
186{
187 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
190 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
191 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
192 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
193 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
194 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
195 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
203};
204
205#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
206
207static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
208{
209 uint16_t *vram = VRAM_MEM_XY(x, y);
210 if (is_read)
211 memcpy(mem, vram, l * 2);
212 else
213 memcpy(vram, mem, l * 2);
214}
215
216static int do_vram_io(uint32_t *data, int count, int is_read)
217{
218 int count_initial = count;
219 uint16_t *sdata = (uint16_t *)data;
220 int x = gpu.dma.x, y = gpu.dma.y;
221 int w = gpu.dma.w, h = gpu.dma.h;
222 int o = gpu.dma.offset;
223 int l;
224 count *= 2; // operate in 16bpp pixels
225
226 if (gpu.dma.offset) {
227 l = w - gpu.dma.offset;
228 if (count < l)
229 l = count;
230
231 do_vram_line(x + o, y, sdata, l, is_read);
232
233 if (o + l < w)
234 o += l;
235 else {
236 o = 0;
237 y++;
238 h--;
239 }
240 sdata += l;
241 count -= l;
242 }
243
244 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
245 y &= 511;
246 do_vram_line(x, y, sdata, w, is_read);
247 }
248
249 if (h > 0 && count > 0) {
250 y &= 511;
251 do_vram_line(x, y, sdata, count, is_read);
252 o = count;
253 count = 0;
254 }
255 gpu.dma.y = y;
256 gpu.dma.h = h;
257 gpu.dma.offset = o;
258
259 return count_initial - count / 2;
260}
261
262static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
263{
264 if (gpu.dma.h)
265 log_anomaly("start_vram_transfer while old unfinished\n");
266
267 gpu.dma.x = pos_word & 1023;
268 gpu.dma.y = (pos_word >> 16) & 511;
269 gpu.dma.w = size_word & 0xffff; // ?
270 gpu.dma.h = size_word >> 16;
271 gpu.dma.offset = 0;
272
273 renderer_flush_queues();
274 if (is_read) {
275 gpu.status.img = 1;
276 // XXX: wrong for width 1
277 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
278 }
279 else {
280 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
281 }
282
283 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
284 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
285}
286
287static int check_cmd(uint32_t *data, int count)
288{
289 int len, cmd, start, pos;
290 int vram_dirty = 0;
291
292 // process buffer
293 for (start = pos = 0; pos < count; )
294 {
295 cmd = -1;
296 len = 0;
297
298 if (gpu.dma.h) {
299 pos += do_vram_io(data + pos, count - pos, 0);
300 if (pos == count)
301 break;
302 start = pos;
303 }
304
305 // do look-ahead pass to detect SR changes and VRAM i/o
306 while (pos < count) {
307 uint32_t *list = data + pos;
308 cmd = list[0] >> 24;
309 len = 1 + cmd_lengths[cmd];
310
311 //printf(" %3d: %02x %d\n", pos, cmd, len);
312 if ((cmd & 0xf4) == 0x24) {
313 // flat textured prim
314 gpu.ex_regs[1] &= ~0x1ff;
315 gpu.ex_regs[1] |= list[4] & 0x1ff;
316 }
317 else if ((cmd & 0xf4) == 0x34) {
318 // shaded textured prim
319 gpu.ex_regs[1] &= ~0x1ff;
320 gpu.ex_regs[1] |= list[5] & 0x1ff;
321 }
322 else if (cmd == 0xe3)
323 decide_frameskip_allow(list[0]);
324
325 if (2 <= cmd && cmd < 0xc0)
326 vram_dirty = 1;
327 else if ((cmd & 0xf8) == 0xe0)
328 gpu.ex_regs[cmd & 7] = list[0];
329
330 if (pos + len > count) {
331 cmd = -1;
332 break; // incomplete cmd
333 }
334 if (cmd == 0xa0 || cmd == 0xc0)
335 break; // image i/o
336 pos += len;
337 }
338
339 if (pos - start > 0) {
340 if (!gpu.frameskip.active || !gpu.frameskip.allow)
341 do_cmd_list(data + start, pos - start);
342 start = pos;
343 }
344
345 if (cmd == 0xa0 || cmd == 0xc0) {
346 // consume vram write/read cmd
347 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
348 pos += len;
349 }
350 else if (cmd == -1)
351 break;
352 }
353
354 gpu.status.reg &= ~0x1fff;
355 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
356 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
357
358 if (gpu.frameskip.active)
359 renderer_sync_ecmds(gpu.ex_regs);
360 gpu.state.fb_dirty |= vram_dirty;
361
362 return count - pos;
363}
364
365void flush_cmd_buffer(void)
366{
367 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
368 if (left > 0)
369 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
370 gpu.cmd_len = left;
371}
372
373void GPUwriteDataMem(uint32_t *mem, int count)
374{
375 int left;
376
377 log_io("gpu_dma_write %p %d\n", mem, count);
378
379 if (unlikely(gpu.cmd_len > 0))
380 flush_cmd_buffer();
381
382 left = check_cmd(mem, count);
383 if (left)
384 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
385}
386
387void GPUwriteData(uint32_t data)
388{
389 log_io("gpu_write %08x\n", data);
390 gpu.cmd_buffer[gpu.cmd_len++] = data;
391 if (gpu.cmd_len >= CMD_BUFFER_LEN)
392 flush_cmd_buffer();
393}
394
395long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
396{
397 uint32_t addr, *list;
398 uint32_t *llist_entry = NULL;
399 int len, left, count;
400 long cpu_cycles = 0;
401
402 if (unlikely(gpu.cmd_len > 0))
403 flush_cmd_buffer();
404
405 // ff7 sends it's main list twice, detect this
406 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
407 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
408 gpu.state.last_list.cycles > 2048)
409 {
410 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
411 *llist_entry |= 0x800000;
412 }
413
414 log_io("gpu_dma_chain\n");
415 addr = start_addr & 0xffffff;
416 for (count = 0; addr != 0xffffff; count++)
417 {
418 list = rambase + (addr & 0x1fffff) / 4;
419 len = list[0] >> 24;
420 addr = list[0] & 0xffffff;
421 cpu_cycles += 10;
422 if (len > 0)
423 cpu_cycles += 5 + len;
424
425 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
426
427 // loop detection marker
428 // (bit23 set causes DMA error on real machine, so
429 // unlikely to be ever set by the game)
430 list[0] |= 0x800000;
431
432 if (len) {
433 left = check_cmd(list + 1, len);
434 if (left)
435 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
436 }
437
438 if (addr & 0x800000)
439 break;
440 }
441
442 // remove loop detection markers
443 addr = start_addr & 0x1fffff;
444 while (count-- > 0) {
445 list = rambase + addr / 4;
446 addr = list[0] & 0x1fffff;
447 list[0] &= ~0x800000;
448 }
449 if (llist_entry)
450 *llist_entry &= ~0x800000;
451
452 gpu.state.last_list.frame = *gpu.state.frame_count;
453 gpu.state.last_list.hcnt = *gpu.state.hcnt;
454 gpu.state.last_list.cycles = cpu_cycles;
455 gpu.state.last_list.addr = start_addr;
456
457 return cpu_cycles;
458}
459
460void GPUreadDataMem(uint32_t *mem, int count)
461{
462 log_io("gpu_dma_read %p %d\n", mem, count);
463
464 if (unlikely(gpu.cmd_len > 0))
465 flush_cmd_buffer();
466
467 if (gpu.dma.h)
468 do_vram_io(mem, count, 1);
469}
470
471uint32_t GPUreadData(void)
472{
473 uint32_t ret;
474
475 if (unlikely(gpu.cmd_len > 0))
476 flush_cmd_buffer();
477
478 ret = gpu.gp0;
479 if (gpu.dma.h)
480 do_vram_io(&ret, 1, 1);
481
482 log_io("gpu_read %08x\n", ret);
483 return ret;
484}
485
486uint32_t GPUreadStatus(void)
487{
488 uint32_t ret;
489
490 if (unlikely(gpu.cmd_len > 0))
491 flush_cmd_buffer();
492
493 ret = gpu.status.reg;
494 log_io("gpu_read_status %08x\n", ret);
495 return ret;
496}
497
498struct GPUFreeze
499{
500 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
501 uint32_t ulStatus; // current gpu status
502 uint32_t ulControl[256]; // latest control register values
503 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
504};
505
506long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
507{
508 int i;
509
510 switch (type) {
511 case 1: // save
512 if (gpu.cmd_len > 0)
513 flush_cmd_buffer();
514 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
515 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
516 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
517 freeze->ulStatus = gpu.status.reg;
518 break;
519 case 0: // load
520 renderer_invalidate_caches(0, 0, 1024, 512);
521 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
522 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
523 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
524 gpu.status.reg = freeze->ulStatus;
525 for (i = 8; i > 0; i--) {
526 gpu.regs[i] ^= 1; // avoid reg change detection
527 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
528 }
529 renderer_sync_ecmds(gpu.ex_regs);
530 break;
531 }
532
533 return 1;
534}
535
536void GPUvBlank(int is_vblank, int lcf)
537{
538}
539
540// vim:shiftwidth=2:expandtab