gpu_neon: some cmd/vram io and dma handling
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54
GI
12#include <stdint.h>
13#include <string.h>
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
1ab64c54 17
d30279e2
GI
18#define CMD_BUFFER_LEN 1024
19
20static struct __attribute__((aligned(64))) {
1ab64c54
GI
21 uint16_t vram[1024 * 512];
22 uint16_t guard[1024 * 512]; // overdraw guard
d30279e2 23 uint32_t cmd_buffer[CMD_BUFFER_LEN];
1ab64c54
GI
24 uint32_t regs[16];
25 union {
26 uint32_t reg;
27 struct {
d30279e2 28 uint32_t tx:4; // 0 texture page
1ab64c54
GI
29 uint32_t ty:1;
30 uint32_t abr:2;
d30279e2
GI
31 uint32_t tp:2; // 7 t.p. mode (4,8,15bpp)
32 uint32_t dtd:1; // 9 dither
1ab64c54 33 uint32_t dfe:1;
d30279e2
GI
34 uint32_t md:1; // 11 set mask bit when drawing
35 uint32_t me:1; // 12 no draw on mask
1ab64c54 36 uint32_t unkn:3;
d30279e2 37 uint32_t width1:1; // 16
1ab64c54 38 uint32_t width0:2;
d30279e2
GI
39 uint32_t dheight:1; // 19 double height
40 uint32_t video:1; // 20 NTSC,PAL
1ab64c54 41 uint32_t rgb24:1;
d30279e2
GI
42 uint32_t interlace:1; // 22 interlace on
43 uint32_t blanking:1; // 23 display not enabled
1ab64c54 44 uint32_t unkn2:2;
d30279e2
GI
45 uint32_t busy:1; // 26 !busy drawing
46 uint32_t img:1; // 27 ready to DMA image data
47 uint32_t com:1; // 28 ready for commands
48 uint32_t dma:2; // 29 off, ?, to vram, from vram
49 uint32_t lcf:1; // 31
1ab64c54
GI
50 };
51 } status;
52 struct {
53 int x, y, w, h;
54 int y1, y2;
55 } screen;
d30279e2
GI
56 struct {
57 int x, y, w, h;
58 int offset;
59 } dma;
60 int cmd_len;
61 const uint32_t *lcf_hc;
62 uint32_t zero;
1ab64c54
GI
63} gpu;
64
65long GPUinit(void)
66{
d30279e2 67 gpu.status.reg = 0x14802000;
1ab64c54
GI
68 return 0;
69}
70
71long GPUshutdown(void)
72{
73 return 0;
74}
75
1ab64c54
GI
76void GPUwriteStatus(uint32_t data)
77{
78 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
79 static const short vres[4] = { 240, 480, 256, 480 };
80 uint32_t cmd = data >> 24;
81
82 switch (data >> 24) {
83 case 0x00:
d30279e2 84 gpu.status.reg = 0x14802000;
1ab64c54
GI
85 break;
86 case 0x03:
d30279e2 87 gpu.status.blanking = data & 1;
1ab64c54
GI
88 break;
89 case 0x04:
90 gpu.status.dma = data & 3;
91 break;
92 case 0x05:
93 gpu.screen.x = data & 0x3ff;
94 gpu.screen.y = (data >> 10) & 0x3ff;
95 break;
96 case 0x07:
97 gpu.screen.y1 = data & 0x3ff;
98 gpu.screen.y2 = (data >> 10) & 0x3ff;
99 break;
100 case 0x08:
101 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
102 gpu.screen.w = hres[(gpu.status.reg >> 16) & 7];
103 gpu.screen.h = vres[(gpu.status.reg >> 19) & 3];
104 break;
105 }
106
107 if (cmd < ARRAY_SIZE(gpu.regs))
108 gpu.regs[cmd] = data;
109}
110
d30279e2 111static const unsigned char cmd_lengths[256] =
1ab64c54 112{
d30279e2
GI
113 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
116 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
117 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
118 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
119 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
120 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
121 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
122 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
123 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
125 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
126 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
128 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
129};
130
131void do_cmd(uint32_t *list, int count)
132{
133 uint32_t *list_end = list + count;
134 int cmd;
135 //printf("do_cmd %p, %d\n", data, count);
136
137 for (; list < list_end; list += 1 + cmd_lengths[cmd])
138 {
139 cmd = list[0] >> 24;
140 switch (cmd)
141 {
142 case 0xe1:
143 gpu.status.reg &= ~0x7ff;
144 gpu.status.reg |= list[0] & 0x7ff;
145 break;
146 case 0xe6:
147 gpu.status.reg &= ~0x1800;
148 gpu.status.reg |= (list[0] & 3) << 11;
149 break;
150 }
151 if ((cmd & 0xf4) == 0x24) {
152 // flat textured prim
153 gpu.status.reg &= ~0x1ff;
154 gpu.status.reg |= list[4] & 0x1ff;
155 }
156 else if ((cmd & 0xf4) == 0x34) {
157 // shaded textured prim
158 gpu.status.reg &= ~0x1ff;
159 gpu.status.reg |= list[5] & 0x1ff;
160 }
161 }
1ab64c54
GI
162}
163
d30279e2
GI
164#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
165
166static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 167{
d30279e2
GI
168 uint16_t *vram = VRAM_MEM_XY(x, y);
169 if (is_read)
170 memcpy(mem, vram, l * 2);
171 else
172 memcpy(vram, mem, l * 2);
173}
174
175static int do_vram_io(uint32_t *data, int count, int is_read)
176{
177 int count_initial = count;
178 uint16_t *sdata = (uint16_t *)data;
179 int x = gpu.dma.x, y = gpu.dma.y;
180 int w = gpu.dma.w, h = gpu.dma.h;
181 int l;
182 count *= 2; // operate in 16bpp pixels
183
184 if (gpu.dma.offset) {
185 l = w - gpu.dma.offset;
186 if (l > count)
187 l = count;
188 do_vram_line(x + gpu.dma.offset, y, sdata, l, is_read);
189 sdata += l;
190 count -= l;
191 y++;
192 h--;
193 }
194
195 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
196 y &= 511;
197 do_vram_line(x, y, sdata, w, is_read);
198 }
199
200 if (h > 0 && count > 0) {
201 y &= 511;
202 do_vram_line(x, y, sdata, count, is_read);
203 gpu.dma.offset = count;
204 count = 0;
205 }
206 else
207 gpu.dma.offset = 0;
208 gpu.dma.y = y;
209 gpu.dma.h = h;
210
211 return count_initial - (count + 1) / 2;
212}
213
214static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
215{
216 gpu.dma.x = pos_word & 1023;
217 gpu.dma.y = (pos_word >> 16) & 511;
218 gpu.dma.w = size_word & 0xffff; // ?
219 gpu.dma.h = size_word >> 16;
220 gpu.dma.offset = 0;
221
222 if (is_read)
223 gpu.status.img = 1;
224
225 //printf("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
226 // gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
227}
228
229static int check_cmd(uint32_t *data, int count)
230{
231 int len, cmd, start, pos;
232
233 //printf("check_cmd %p, %d\n", data, count);
234
235 // process buffer
236 for (start = pos = 0;; )
237 {
238 cmd = -1;
239 len = 0;
240
241 if (gpu.dma.h) {
242 pos += do_vram_io(data + pos, count - pos, 0);
243 start = pos;
244 }
245
246 while (pos < count) {
247 cmd = data[pos] >> 24;
248 len = 1 + cmd_lengths[cmd];
249 //printf(" %3d: %02x %d\n", pos, cmd, len);
250 if (pos + len > count) {
251 cmd = -1;
252 break; // incomplete cmd
253 }
254 if (cmd == 0xa0 || cmd == 0xc0)
255 break; // image i/o
256 pos += len;
257 }
258
259 if (pos - start > 0) {
260 do_cmd(data + start, pos - start);
261 start = pos;
262 }
263
264 if (cmd == 0xa0 || cmd == 0xc0) {
265 // consume vram write/read cmd
266 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
267 pos += len;
268 }
269
270 if (pos == count)
271 return 0;
272
273 if (pos + len > count) {
274 //printf("discarding %d words\n", pos + len - count);
275 return pos + len - count;
276 }
277 }
278}
279
280static void flush_cmd_buffer(void)
281{
282 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
283 if (left > 0)
284 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
285 gpu.cmd_len = left;
1ab64c54
GI
286}
287
288void GPUwriteDataMem(uint32_t *mem, int count)
289{
d30279e2
GI
290 int left;
291
292 if (unlikely(gpu.cmd_len > 0))
293 flush_cmd_buffer();
294 left = check_cmd(mem, count);
295 if (left)
296 printf("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
297}
298
d30279e2 299void GPUwriteData(uint32_t data)
1ab64c54 300{
d30279e2
GI
301 gpu.cmd_buffer[gpu.cmd_len++] = data;
302 if (gpu.cmd_len >= CMD_BUFFER_LEN)
303 flush_cmd_buffer();
1ab64c54
GI
304}
305
306long GPUdmaChain(uint32_t *base, uint32_t addr)
307{
d30279e2
GI
308 uint32_t *list;
309 int len;
310
311 if (unlikely(gpu.cmd_len > 0))
312 flush_cmd_buffer();
313
314 while (addr != 0xffffff) {
315 list = base + (addr & 0x1fffff) / 4;
316 len = list[0] >> 24;
317 addr = list[0] & 0xffffff;
318 if (len)
319 GPUwriteDataMem(list + 1, len);
320 }
321
1ab64c54
GI
322 return 0;
323}
324
d30279e2
GI
325void GPUreadDataMem(uint32_t *mem, int count)
326{
327 if (unlikely(gpu.cmd_len > 0))
328 flush_cmd_buffer();
329 if (gpu.dma.h)
330 do_vram_io(mem, count, 1);
331}
332
333uint32_t GPUreadData(void)
334{
335 uint32_t v = 0;
336 GPUreadDataMem(&v, 1);
337 return v;
338}
339
340uint32_t GPUreadStatus(void)
341{
342 if (unlikely(gpu.cmd_len > 0))
343 flush_cmd_buffer();
344
345 return gpu.status.reg | (*gpu.lcf_hc << 31);
346}
347
1ab64c54
GI
348typedef struct GPUFREEZETAG
349{
350 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
351 uint32_t ulStatus; // current gpu status
352 uint32_t ulControl[256]; // latest control register values
353 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
354} GPUFreeze_t;
355
356long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
357{
358 switch (type) {
359 case 1: // save
d30279e2
GI
360 if (gpu.cmd_len > 0)
361 flush_cmd_buffer();
1ab64c54
GI
362 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
363 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
364 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
365 break;
366 case 0: // load
367 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
368 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
369 gpu.status.reg = freeze->ulStatus;
1ab64c54
GI
370 GPUwriteStatus((5 << 24) | gpu.regs[5]);
371 GPUwriteStatus((7 << 24) | gpu.regs[7]);
372 GPUwriteStatus((8 << 24) | gpu.regs[8]);
373 break;
374 }
375
376 return 1;
377}
378
d30279e2 379void GPUvBlank(int val, uint32_t *hcnt)
1ab64c54 380{
d30279e2
GI
381 gpu.lcf_hc = &gpu.zero;
382 if (gpu.status.interlace) {
383 if (val)
384 gpu.status.lcf ^= 1;
385 }
386 else {
387 gpu.status.lcf = 0;
388 if (!val)
389 gpu.lcf_hc = hcnt;
390 }
1ab64c54
GI
391}
392
393// rearmed specific
394
395#include "../../frontend/plugin_lib.h"
396#include "../../frontend/arm_utils.h"
397
398static const struct rearmed_cbs *cbs;
399static void *screen_buf;
400
401static void blit(void)
402{
403 static uint32_t old_status, old_h;
404 int x = gpu.screen.x & ~3; // alignment needed by blitter
405 int y = gpu.screen.y;
406 int w = gpu.screen.w;
407 int h;
408 uint16_t *srcs;
409 uint8_t *dest;
410
411 srcs = &gpu.vram[y * 1024 + x];
412
413 h = gpu.screen.y2 - gpu.screen.y1;
d30279e2
GI
414 if (gpu.status.dheight)
415 h *= 2;
1ab64c54
GI
416
417 if (h <= 0)
418 return;
419
420 if ((gpu.status.reg ^ old_status) & ((7<<16)|(1<<21)) || h != old_h) // width|rgb24 change?
421 {
422 old_status = gpu.status.reg;
423 old_h = h;
424 screen_buf = cbs->pl_fbdev_set_mode(w, h, gpu.status.rgb24 ? 24 : 16);
425 }
426 dest = screen_buf;
427
428 if (gpu.status.rgb24)
429 {
430#ifndef MAEMO
431 for (; h-- > 0; dest += w * 3, srcs += 1024)
432 {
433 bgr888_to_rgb888(dest, srcs, w * 3);
434 }
435#else
436 for (; h-- > 0; dest += w * 2, srcs += 1024)
437 {
438 bgr888_to_rgb565(dest, srcs, w * 3);
439 }
440#endif
441 }
442 else
443 {
444 for (; h-- > 0; dest += w * 2, srcs += 1024)
445 {
446 bgr555_to_rgb565(dest, srcs, w * 2);
447 }
448 }
449
450 screen_buf = cbs->pl_fbdev_flip();
451}
452
453void GPUupdateLace(void)
454{
d30279e2
GI
455 if (!gpu.status.blanking)
456 blit();
1ab64c54
GI
457}
458
459long GPUopen(void)
460{
461 cbs->pl_fbdev_open();
462 screen_buf = cbs->pl_fbdev_flip();
463 return 0;
464}
465
466long GPUclose(void)
467{
468 cbs->pl_fbdev_close();
469 return 0;
470}
471
472void GPUrearmedCallbacks(const struct rearmed_cbs *cbs_)
473{
474 cbs = cbs_;
475}
476
477// vim:shiftwidth=2:expandtab