psx_gpu: improve fills
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19#define gpu_log(fmt, ...) \
20 printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
21
22//#define log_io gpu_log
23#define log_io(...)
24//#define log_anomaly gpu_log
25#define log_anomaly(...)
26
27struct psx_gpu gpu __attribute__((aligned(2048)));
28
29static noinline void do_reset(void)
30{
31 memset(gpu.regs, 0, sizeof(gpu.regs));
32 gpu.status.reg = 0x14802000;
33 gpu.gp0 = 0;
34 gpu.regs[3] = 1;
35 gpu.screen.hres = gpu.screen.w = 256;
36 gpu.screen.vres = gpu.screen.h = 240;
37}
38
39static noinline void update_width(void)
40{
41 int sw = gpu.screen.x2 - gpu.screen.x1;
42 if (sw <= 0 || sw >= 2560)
43 // full width
44 gpu.screen.w = gpu.screen.hres;
45 else
46 gpu.screen.w = sw * gpu.screen.hres / 2560;
47}
48
49static noinline void update_height(void)
50{
51 int sh = gpu.screen.y2 - gpu.screen.y1;
52 if (gpu.status.dheight)
53 sh *= 2;
54 if (sh <= 0)
55 sh = gpu.screen.vres;
56
57 gpu.screen.h = sh;
58}
59
60static noinline void decide_frameskip(void)
61{
62 if (gpu.frameskip.active)
63 gpu.frameskip.cnt++;
64 else {
65 gpu.frameskip.cnt = 0;
66 gpu.frameskip.frame_ready = 1;
67 }
68
69 if (!gpu.frameskip.active && *gpu.frameskip.advice)
70 gpu.frameskip.active = 1;
71 else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
72 gpu.frameskip.active = 1;
73 else
74 gpu.frameskip.active = 0;
75}
76
77static noinline void decide_frameskip_allow(uint32_t cmd_e3)
78{
79 // no frameskip if it decides to draw to display area,
80 // but not for interlace since it'll most likely always do that
81 uint32_t x = cmd_e3 & 0x3ff;
82 uint32_t y = (cmd_e3 >> 10) & 0x3ff;
83 gpu.frameskip.allow = gpu.status.interlace ||
84 (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
85 (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
86}
87
88static noinline void get_gpu_info(uint32_t data)
89{
90 switch (data & 0x0f) {
91 case 0x02:
92 case 0x03:
93 case 0x04:
94 case 0x05:
95 gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
96 break;
97 case 0x06:
98 gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
99 break;
100 case 0x07:
101 gpu.gp0 = 2;
102 break;
103 default:
104 gpu.gp0 = 0;
105 break;
106 }
107}
108
109long GPUinit(void)
110{
111 int ret;
112 ret = vout_init();
113 ret |= renderer_init();
114
115 gpu.state.frame_count = &gpu.zero;
116 gpu.state.hcnt = &gpu.zero;
117 do_reset();
118 return ret;
119}
120
121long GPUshutdown(void)
122{
123 return vout_finish();
124}
125
126void GPUwriteStatus(uint32_t data)
127{
128 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
129 static const short vres[4] = { 240, 480, 256, 480 };
130 uint32_t cmd = data >> 24;
131
132 if (cmd < ARRAY_SIZE(gpu.regs)) {
133 if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
134 return;
135 gpu.regs[cmd] = data;
136 }
137
138 gpu.state.fb_dirty = 1;
139
140 switch (cmd) {
141 case 0x00:
142 do_reset();
143 break;
144 case 0x03:
145 gpu.status.blanking = data & 1;
146 break;
147 case 0x04:
148 gpu.status.dma = data & 3;
149 break;
150 case 0x05:
151 gpu.screen.x = data & 0x3ff;
152 gpu.screen.y = (data >> 10) & 0x3ff;
153 if (gpu.frameskip.set) {
154 decide_frameskip_allow(gpu.ex_regs[3]);
155 if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
156 decide_frameskip();
157 gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
158 }
159 }
160 break;
161 case 0x06:
162 gpu.screen.x1 = data & 0xfff;
163 gpu.screen.x2 = (data >> 12) & 0xfff;
164 update_width();
165 break;
166 case 0x07:
167 gpu.screen.y1 = data & 0x3ff;
168 gpu.screen.y2 = (data >> 10) & 0x3ff;
169 update_height();
170 break;
171 case 0x08:
172 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
173 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
174 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
175 update_width();
176 update_height();
177 break;
178 default:
179 if ((cmd & 0xf0) == 0x10)
180 get_gpu_info(data);
181 break;
182 }
183}
184
185const unsigned char cmd_lengths[256] =
186{
187 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
190 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
191 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
192 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
193 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
194 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
195 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
203};
204
205#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
206
207static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
208{
209 uint16_t *vram = VRAM_MEM_XY(x, y);
210 if (is_read)
211 memcpy(mem, vram, l * 2);
212 else
213 memcpy(vram, mem, l * 2);
214}
215
216static int do_vram_io(uint32_t *data, int count, int is_read)
217{
218 int count_initial = count;
219 uint16_t *sdata = (uint16_t *)data;
220 int x = gpu.dma.x, y = gpu.dma.y;
221 int w = gpu.dma.w, h = gpu.dma.h;
222 int o = gpu.dma.offset;
223 int l;
224 count *= 2; // operate in 16bpp pixels
225
226 if (gpu.dma.offset) {
227 l = w - gpu.dma.offset;
228 if (count < l)
229 l = count;
230
231 do_vram_line(x + o, y, sdata, l, is_read);
232
233 if (o + l < w)
234 o += l;
235 else {
236 o = 0;
237 y++;
238 h--;
239 }
240 sdata += l;
241 count -= l;
242 }
243
244 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
245 y &= 511;
246 do_vram_line(x, y, sdata, w, is_read);
247 }
248
249 if (h > 0 && count > 0) {
250 y &= 511;
251 do_vram_line(x, y, sdata, count, is_read);
252 o = count;
253 count = 0;
254 }
255 gpu.dma.y = y;
256 gpu.dma.h = h;
257 gpu.dma.offset = o;
258
259 return count_initial - count / 2;
260}
261
262static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
263{
264 if (gpu.dma.h)
265 log_anomaly("start_vram_transfer while old unfinished\n");
266
267 gpu.dma.x = pos_word & 1023;
268 gpu.dma.y = (pos_word >> 16) & 511;
269 gpu.dma.w = size_word & 0xffff; // ?
270 gpu.dma.h = size_word >> 16;
271 gpu.dma.offset = 0;
272
273 if (is_read)
274 gpu.status.img = 1;
275 else {
276 renderer_flush_queues();
277 renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
278 }
279
280 log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
281 gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
282}
283
284static int check_cmd(uint32_t *data, int count)
285{
286 int len, cmd, start, pos;
287 int vram_dirty = 0;
288
289 // process buffer
290 for (start = pos = 0; pos < count; )
291 {
292 cmd = -1;
293 len = 0;
294
295 if (gpu.dma.h) {
296 pos += do_vram_io(data + pos, count - pos, 0);
297 if (pos == count)
298 break;
299 start = pos;
300 }
301
302 // do look-ahead pass to detect SR changes and VRAM i/o
303 while (pos < count) {
304 uint32_t *list = data + pos;
305 cmd = list[0] >> 24;
306 len = 1 + cmd_lengths[cmd];
307
308 //printf(" %3d: %02x %d\n", pos, cmd, len);
309 if ((cmd & 0xf4) == 0x24) {
310 // flat textured prim
311 gpu.ex_regs[1] &= ~0x1ff;
312 gpu.ex_regs[1] |= list[4] & 0x1ff;
313 }
314 else if ((cmd & 0xf4) == 0x34) {
315 // shaded textured prim
316 gpu.ex_regs[1] &= ~0x1ff;
317 gpu.ex_regs[1] |= list[5] & 0x1ff;
318 }
319 else if (cmd == 0xe3)
320 decide_frameskip_allow(list[0]);
321
322 if (2 <= cmd && cmd < 0xc0)
323 vram_dirty = 1;
324 else if ((cmd & 0xf8) == 0xe0)
325 gpu.ex_regs[cmd & 7] = list[0];
326
327 if (pos + len > count) {
328 cmd = -1;
329 break; // incomplete cmd
330 }
331 if (cmd == 0xa0 || cmd == 0xc0)
332 break; // image i/o
333 pos += len;
334 }
335
336 if (pos - start > 0) {
337 if (!gpu.frameskip.active || !gpu.frameskip.allow)
338 do_cmd_list(data + start, pos - start);
339 start = pos;
340 }
341
342 if (cmd == 0xa0 || cmd == 0xc0) {
343 // consume vram write/read cmd
344 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
345 pos += len;
346 }
347 else if (cmd == -1)
348 break;
349 }
350
351 gpu.status.reg &= ~0x1fff;
352 gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
353 gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
354
355 if (gpu.frameskip.active)
356 renderer_sync_ecmds(gpu.ex_regs);
357 gpu.state.fb_dirty |= vram_dirty;
358
359 return count - pos;
360}
361
362void flush_cmd_buffer(void)
363{
364 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
365 if (left > 0)
366 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
367 gpu.cmd_len = left;
368}
369
370void GPUwriteDataMem(uint32_t *mem, int count)
371{
372 int left;
373
374 log_io("gpu_dma_write %p %d\n", mem, count);
375
376 if (unlikely(gpu.cmd_len > 0))
377 flush_cmd_buffer();
378
379 left = check_cmd(mem, count);
380 if (left)
381 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
382}
383
384void GPUwriteData(uint32_t data)
385{
386 log_io("gpu_write %08x\n", data);
387 gpu.cmd_buffer[gpu.cmd_len++] = data;
388 if (gpu.cmd_len >= CMD_BUFFER_LEN)
389 flush_cmd_buffer();
390}
391
392long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
393{
394 uint32_t addr, *list;
395 uint32_t *llist_entry = NULL;
396 int len, left, count;
397 long cpu_cycles = 0;
398
399 if (unlikely(gpu.cmd_len > 0))
400 flush_cmd_buffer();
401
402 // ff7 sends it's main list twice, detect this
403 if (*gpu.state.frame_count == gpu.state.last_list.frame &&
404 *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
405 gpu.state.last_list.cycles > 2048)
406 {
407 llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
408 *llist_entry |= 0x800000;
409 }
410
411 log_io("gpu_dma_chain\n");
412 addr = start_addr & 0xffffff;
413 for (count = 0; addr != 0xffffff; count++)
414 {
415 list = rambase + (addr & 0x1fffff) / 4;
416 len = list[0] >> 24;
417 addr = list[0] & 0xffffff;
418 cpu_cycles += 10;
419 if (len > 0)
420 cpu_cycles += 5 + len;
421
422 log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
423
424 // loop detection marker
425 // (bit23 set causes DMA error on real machine, so
426 // unlikely to be ever set by the game)
427 list[0] |= 0x800000;
428
429 if (len) {
430 left = check_cmd(list + 1, len);
431 if (left)
432 log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
433 }
434
435 if (addr & 0x800000)
436 break;
437 }
438
439 // remove loop detection markers
440 addr = start_addr & 0x1fffff;
441 while (count-- > 0) {
442 list = rambase + addr / 4;
443 addr = list[0] & 0x1fffff;
444 list[0] &= ~0x800000;
445 }
446 if (llist_entry)
447 *llist_entry &= ~0x800000;
448
449 gpu.state.last_list.frame = *gpu.state.frame_count;
450 gpu.state.last_list.hcnt = *gpu.state.hcnt;
451 gpu.state.last_list.cycles = cpu_cycles;
452 gpu.state.last_list.addr = start_addr;
453
454 return cpu_cycles;
455}
456
457void GPUreadDataMem(uint32_t *mem, int count)
458{
459 log_io("gpu_dma_read %p %d\n", mem, count);
460
461 if (unlikely(gpu.cmd_len > 0))
462 flush_cmd_buffer();
463
464 if (gpu.dma.h)
465 do_vram_io(mem, count, 1);
466}
467
468uint32_t GPUreadData(void)
469{
470 log_io("gpu_read\n");
471
472 if (unlikely(gpu.cmd_len > 0))
473 flush_cmd_buffer();
474
475 if (gpu.dma.h)
476 do_vram_io(&gpu.gp0, 1, 1);
477
478 return gpu.gp0;
479}
480
481uint32_t GPUreadStatus(void)
482{
483 uint32_t ret;
484
485 if (unlikely(gpu.cmd_len > 0))
486 flush_cmd_buffer();
487
488 ret = gpu.status.reg;
489 log_io("gpu_read_status %08x\n", ret);
490 return ret;
491}
492
493struct GPUFreeze
494{
495 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
496 uint32_t ulStatus; // current gpu status
497 uint32_t ulControl[256]; // latest control register values
498 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
499};
500
501long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
502{
503 int i;
504
505 switch (type) {
506 case 1: // save
507 if (gpu.cmd_len > 0)
508 flush_cmd_buffer();
509 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
510 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
511 memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
512 freeze->ulStatus = gpu.status.reg;
513 break;
514 case 0: // load
515 renderer_invalidate_caches(0, 0, 1024, 512);
516 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
517 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
518 memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
519 gpu.status.reg = freeze->ulStatus;
520 for (i = 8; i > 0; i--) {
521 gpu.regs[i] ^= 1; // avoid reg change detection
522 GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
523 }
524 renderer_sync_ecmds(gpu.ex_regs);
525 break;
526 }
527
528 return 1;
529}
530
531// vim:shiftwidth=2:expandtab