gpu_neon: some cmd/vram io and dma handling
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <stdint.h>
13#include <string.h>
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17
18#define CMD_BUFFER_LEN 1024
19
20static struct __attribute__((aligned(64))) {
21 uint16_t vram[1024 * 512];
22 uint16_t guard[1024 * 512]; // overdraw guard
23 uint32_t cmd_buffer[CMD_BUFFER_LEN];
24 uint32_t regs[16];
25 union {
26 uint32_t reg;
27 struct {
28 uint32_t tx:4; // 0 texture page
29 uint32_t ty:1;
30 uint32_t abr:2;
31 uint32_t tp:2; // 7 t.p. mode (4,8,15bpp)
32 uint32_t dtd:1; // 9 dither
33 uint32_t dfe:1;
34 uint32_t md:1; // 11 set mask bit when drawing
35 uint32_t me:1; // 12 no draw on mask
36 uint32_t unkn:3;
37 uint32_t width1:1; // 16
38 uint32_t width0:2;
39 uint32_t dheight:1; // 19 double height
40 uint32_t video:1; // 20 NTSC,PAL
41 uint32_t rgb24:1;
42 uint32_t interlace:1; // 22 interlace on
43 uint32_t blanking:1; // 23 display not enabled
44 uint32_t unkn2:2;
45 uint32_t busy:1; // 26 !busy drawing
46 uint32_t img:1; // 27 ready to DMA image data
47 uint32_t com:1; // 28 ready for commands
48 uint32_t dma:2; // 29 off, ?, to vram, from vram
49 uint32_t lcf:1; // 31
50 };
51 } status;
52 struct {
53 int x, y, w, h;
54 int y1, y2;
55 } screen;
56 struct {
57 int x, y, w, h;
58 int offset;
59 } dma;
60 int cmd_len;
61 const uint32_t *lcf_hc;
62 uint32_t zero;
63} gpu;
64
65long GPUinit(void)
66{
67 gpu.status.reg = 0x14802000;
68 return 0;
69}
70
71long GPUshutdown(void)
72{
73 return 0;
74}
75
76void GPUwriteStatus(uint32_t data)
77{
78 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
79 static const short vres[4] = { 240, 480, 256, 480 };
80 uint32_t cmd = data >> 24;
81
82 switch (data >> 24) {
83 case 0x00:
84 gpu.status.reg = 0x14802000;
85 break;
86 case 0x03:
87 gpu.status.blanking = data & 1;
88 break;
89 case 0x04:
90 gpu.status.dma = data & 3;
91 break;
92 case 0x05:
93 gpu.screen.x = data & 0x3ff;
94 gpu.screen.y = (data >> 10) & 0x3ff;
95 break;
96 case 0x07:
97 gpu.screen.y1 = data & 0x3ff;
98 gpu.screen.y2 = (data >> 10) & 0x3ff;
99 break;
100 case 0x08:
101 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
102 gpu.screen.w = hres[(gpu.status.reg >> 16) & 7];
103 gpu.screen.h = vres[(gpu.status.reg >> 19) & 3];
104 break;
105 }
106
107 if (cmd < ARRAY_SIZE(gpu.regs))
108 gpu.regs[cmd] = data;
109}
110
111static const unsigned char cmd_lengths[256] =
112{
113 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
116 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
117 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
118 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
119 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
120 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
121 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
122 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
123 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
125 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
126 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
128 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
129};
130
131void do_cmd(uint32_t *list, int count)
132{
133 uint32_t *list_end = list + count;
134 int cmd;
135 //printf("do_cmd %p, %d\n", data, count);
136
137 for (; list < list_end; list += 1 + cmd_lengths[cmd])
138 {
139 cmd = list[0] >> 24;
140 switch (cmd)
141 {
142 case 0xe1:
143 gpu.status.reg &= ~0x7ff;
144 gpu.status.reg |= list[0] & 0x7ff;
145 break;
146 case 0xe6:
147 gpu.status.reg &= ~0x1800;
148 gpu.status.reg |= (list[0] & 3) << 11;
149 break;
150 }
151 if ((cmd & 0xf4) == 0x24) {
152 // flat textured prim
153 gpu.status.reg &= ~0x1ff;
154 gpu.status.reg |= list[4] & 0x1ff;
155 }
156 else if ((cmd & 0xf4) == 0x34) {
157 // shaded textured prim
158 gpu.status.reg &= ~0x1ff;
159 gpu.status.reg |= list[5] & 0x1ff;
160 }
161 }
162}
163
164#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
165
166static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
167{
168 uint16_t *vram = VRAM_MEM_XY(x, y);
169 if (is_read)
170 memcpy(mem, vram, l * 2);
171 else
172 memcpy(vram, mem, l * 2);
173}
174
175static int do_vram_io(uint32_t *data, int count, int is_read)
176{
177 int count_initial = count;
178 uint16_t *sdata = (uint16_t *)data;
179 int x = gpu.dma.x, y = gpu.dma.y;
180 int w = gpu.dma.w, h = gpu.dma.h;
181 int l;
182 count *= 2; // operate in 16bpp pixels
183
184 if (gpu.dma.offset) {
185 l = w - gpu.dma.offset;
186 if (l > count)
187 l = count;
188 do_vram_line(x + gpu.dma.offset, y, sdata, l, is_read);
189 sdata += l;
190 count -= l;
191 y++;
192 h--;
193 }
194
195 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
196 y &= 511;
197 do_vram_line(x, y, sdata, w, is_read);
198 }
199
200 if (h > 0 && count > 0) {
201 y &= 511;
202 do_vram_line(x, y, sdata, count, is_read);
203 gpu.dma.offset = count;
204 count = 0;
205 }
206 else
207 gpu.dma.offset = 0;
208 gpu.dma.y = y;
209 gpu.dma.h = h;
210
211 return count_initial - (count + 1) / 2;
212}
213
214static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
215{
216 gpu.dma.x = pos_word & 1023;
217 gpu.dma.y = (pos_word >> 16) & 511;
218 gpu.dma.w = size_word & 0xffff; // ?
219 gpu.dma.h = size_word >> 16;
220 gpu.dma.offset = 0;
221
222 if (is_read)
223 gpu.status.img = 1;
224
225 //printf("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
226 // gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
227}
228
229static int check_cmd(uint32_t *data, int count)
230{
231 int len, cmd, start, pos;
232
233 //printf("check_cmd %p, %d\n", data, count);
234
235 // process buffer
236 for (start = pos = 0;; )
237 {
238 cmd = -1;
239 len = 0;
240
241 if (gpu.dma.h) {
242 pos += do_vram_io(data + pos, count - pos, 0);
243 start = pos;
244 }
245
246 while (pos < count) {
247 cmd = data[pos] >> 24;
248 len = 1 + cmd_lengths[cmd];
249 //printf(" %3d: %02x %d\n", pos, cmd, len);
250 if (pos + len > count) {
251 cmd = -1;
252 break; // incomplete cmd
253 }
254 if (cmd == 0xa0 || cmd == 0xc0)
255 break; // image i/o
256 pos += len;
257 }
258
259 if (pos - start > 0) {
260 do_cmd(data + start, pos - start);
261 start = pos;
262 }
263
264 if (cmd == 0xa0 || cmd == 0xc0) {
265 // consume vram write/read cmd
266 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
267 pos += len;
268 }
269
270 if (pos == count)
271 return 0;
272
273 if (pos + len > count) {
274 //printf("discarding %d words\n", pos + len - count);
275 return pos + len - count;
276 }
277 }
278}
279
280static void flush_cmd_buffer(void)
281{
282 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
283 if (left > 0)
284 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
285 gpu.cmd_len = left;
286}
287
288void GPUwriteDataMem(uint32_t *mem, int count)
289{
290 int left;
291
292 if (unlikely(gpu.cmd_len > 0))
293 flush_cmd_buffer();
294 left = check_cmd(mem, count);
295 if (left)
296 printf("GPUwriteDataMem: discarded %d/%d words\n", left, count);
297}
298
299void GPUwriteData(uint32_t data)
300{
301 gpu.cmd_buffer[gpu.cmd_len++] = data;
302 if (gpu.cmd_len >= CMD_BUFFER_LEN)
303 flush_cmd_buffer();
304}
305
306long GPUdmaChain(uint32_t *base, uint32_t addr)
307{
308 uint32_t *list;
309 int len;
310
311 if (unlikely(gpu.cmd_len > 0))
312 flush_cmd_buffer();
313
314 while (addr != 0xffffff) {
315 list = base + (addr & 0x1fffff) / 4;
316 len = list[0] >> 24;
317 addr = list[0] & 0xffffff;
318 if (len)
319 GPUwriteDataMem(list + 1, len);
320 }
321
322 return 0;
323}
324
325void GPUreadDataMem(uint32_t *mem, int count)
326{
327 if (unlikely(gpu.cmd_len > 0))
328 flush_cmd_buffer();
329 if (gpu.dma.h)
330 do_vram_io(mem, count, 1);
331}
332
333uint32_t GPUreadData(void)
334{
335 uint32_t v = 0;
336 GPUreadDataMem(&v, 1);
337 return v;
338}
339
340uint32_t GPUreadStatus(void)
341{
342 if (unlikely(gpu.cmd_len > 0))
343 flush_cmd_buffer();
344
345 return gpu.status.reg | (*gpu.lcf_hc << 31);
346}
347
348typedef struct GPUFREEZETAG
349{
350 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
351 uint32_t ulStatus; // current gpu status
352 uint32_t ulControl[256]; // latest control register values
353 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
354} GPUFreeze_t;
355
356long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
357{
358 switch (type) {
359 case 1: // save
360 if (gpu.cmd_len > 0)
361 flush_cmd_buffer();
362 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
363 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
364 freeze->ulStatus = gpu.status.reg;
365 break;
366 case 0: // load
367 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
368 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
369 gpu.status.reg = freeze->ulStatus;
370 GPUwriteStatus((5 << 24) | gpu.regs[5]);
371 GPUwriteStatus((7 << 24) | gpu.regs[7]);
372 GPUwriteStatus((8 << 24) | gpu.regs[8]);
373 break;
374 }
375
376 return 1;
377}
378
379void GPUvBlank(int val, uint32_t *hcnt)
380{
381 gpu.lcf_hc = &gpu.zero;
382 if (gpu.status.interlace) {
383 if (val)
384 gpu.status.lcf ^= 1;
385 }
386 else {
387 gpu.status.lcf = 0;
388 if (!val)
389 gpu.lcf_hc = hcnt;
390 }
391}
392
393// rearmed specific
394
395#include "../../frontend/plugin_lib.h"
396#include "../../frontend/arm_utils.h"
397
398static const struct rearmed_cbs *cbs;
399static void *screen_buf;
400
401static void blit(void)
402{
403 static uint32_t old_status, old_h;
404 int x = gpu.screen.x & ~3; // alignment needed by blitter
405 int y = gpu.screen.y;
406 int w = gpu.screen.w;
407 int h;
408 uint16_t *srcs;
409 uint8_t *dest;
410
411 srcs = &gpu.vram[y * 1024 + x];
412
413 h = gpu.screen.y2 - gpu.screen.y1;
414 if (gpu.status.dheight)
415 h *= 2;
416
417 if (h <= 0)
418 return;
419
420 if ((gpu.status.reg ^ old_status) & ((7<<16)|(1<<21)) || h != old_h) // width|rgb24 change?
421 {
422 old_status = gpu.status.reg;
423 old_h = h;
424 screen_buf = cbs->pl_fbdev_set_mode(w, h, gpu.status.rgb24 ? 24 : 16);
425 }
426 dest = screen_buf;
427
428 if (gpu.status.rgb24)
429 {
430#ifndef MAEMO
431 for (; h-- > 0; dest += w * 3, srcs += 1024)
432 {
433 bgr888_to_rgb888(dest, srcs, w * 3);
434 }
435#else
436 for (; h-- > 0; dest += w * 2, srcs += 1024)
437 {
438 bgr888_to_rgb565(dest, srcs, w * 3);
439 }
440#endif
441 }
442 else
443 {
444 for (; h-- > 0; dest += w * 2, srcs += 1024)
445 {
446 bgr555_to_rgb565(dest, srcs, w * 2);
447 }
448 }
449
450 screen_buf = cbs->pl_fbdev_flip();
451}
452
453void GPUupdateLace(void)
454{
455 if (!gpu.status.blanking)
456 blit();
457}
458
459long GPUopen(void)
460{
461 cbs->pl_fbdev_open();
462 screen_buf = cbs->pl_fbdev_flip();
463 return 0;
464}
465
466long GPUclose(void)
467{
468 cbs->pl_fbdev_close();
469 return 0;
470}
471
472void GPUrearmedCallbacks(const struct rearmed_cbs *cbs_)
473{
474 cbs = cbs_;
475}
476
477// vim:shiftwidth=2:expandtab