gpu_neon: partially support range regs
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
CommitLineData
1ab64c54
GI
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
d30279e2 11#include <stdio.h>
1ab64c54 12#include <string.h>
56f08d83 13#include "gpu.h"
1ab64c54
GI
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
d30279e2 16#define unlikely(x) __builtin_expect((x), 0)
8dd855cd 17#define noinline __attribute__((noinline))
1ab64c54 18
56f08d83 19//#define log_io printf
20#define log_io(...)
21#define log_anomaly printf
22
23struct psx_gpu gpu __attribute__((aligned(64)));
1ab64c54
GI
24
25long GPUinit(void)
26{
56f08d83 27 int ret = vout_init();
d30279e2 28 gpu.status.reg = 0x14802000;
56f08d83 29 gpu.lcf_hc = &gpu.zero;
30 return ret;
1ab64c54
GI
31}
32
33long GPUshutdown(void)
34{
56f08d83 35 return vout_finish();
1ab64c54
GI
36}
37
8dd855cd 38static noinline void update_width(void)
39{
40 int sw = gpu.screen.x2 - gpu.screen.x1;
41 if (sw <= 0 || sw >= 2560)
42 // full width
43 gpu.screen.w = gpu.screen.hres;
44 else
45 gpu.screen.w = sw * gpu.screen.hres / 2560;
46}
47
48static noinline void update_height(void)
49{
50 int sh = gpu.screen.y2 - gpu.screen.y1;
51 if (gpu.status.dheight)
52 sh *= 2;
53 if (sh <= 0)
54 sh = gpu.screen.vres;
55
56 gpu.screen.h = sh;
57}
58
1ab64c54
GI
59void GPUwriteStatus(uint32_t data)
60{
61 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
62 static const short vres[4] = { 240, 480, 256, 480 };
63 uint32_t cmd = data >> 24;
64
8dd855cd 65 if (cmd < ARRAY_SIZE(gpu.regs))
66 gpu.regs[cmd] = data;
67
68 switch (cmd) {
1ab64c54 69 case 0x00:
d30279e2 70 gpu.status.reg = 0x14802000;
8dd855cd 71 gpu.status.blanking = 1;
1ab64c54
GI
72 break;
73 case 0x03:
d30279e2 74 gpu.status.blanking = data & 1;
1ab64c54
GI
75 break;
76 case 0x04:
77 gpu.status.dma = data & 3;
78 break;
79 case 0x05:
80 gpu.screen.x = data & 0x3ff;
81 gpu.screen.y = (data >> 10) & 0x3ff;
82 break;
8dd855cd 83 case 0x06:
84 gpu.screen.x1 = data & 0xfff;
85 gpu.screen.x2 = (data >> 12) & 0xfff;
86 update_width();
87 break;
1ab64c54
GI
88 case 0x07:
89 gpu.screen.y1 = data & 0x3ff;
90 gpu.screen.y2 = (data >> 10) & 0x3ff;
8dd855cd 91 update_height();
1ab64c54
GI
92 break;
93 case 0x08:
94 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
8dd855cd 95 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
96 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
97 update_width();
98 update_height();
1ab64c54
GI
99 break;
100 }
1ab64c54
GI
101}
102
56f08d83 103const unsigned char cmd_lengths[256] =
1ab64c54 104{
d30279e2
GI
105 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
107 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
108 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
109 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
110 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
111 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
112 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
113 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
114 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
116 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
117 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
118 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
119 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
120 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
121};
122
d30279e2
GI
123#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
124
125static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
1ab64c54 126{
d30279e2
GI
127 uint16_t *vram = VRAM_MEM_XY(x, y);
128 if (is_read)
129 memcpy(mem, vram, l * 2);
130 else
131 memcpy(vram, mem, l * 2);
132}
133
134static int do_vram_io(uint32_t *data, int count, int is_read)
135{
136 int count_initial = count;
137 uint16_t *sdata = (uint16_t *)data;
138 int x = gpu.dma.x, y = gpu.dma.y;
139 int w = gpu.dma.w, h = gpu.dma.h;
ddd56f6e 140 int o = gpu.dma.offset;
d30279e2
GI
141 int l;
142 count *= 2; // operate in 16bpp pixels
143
144 if (gpu.dma.offset) {
145 l = w - gpu.dma.offset;
ddd56f6e 146 if (count < l)
d30279e2 147 l = count;
ddd56f6e 148
149 do_vram_line(x + o, y, sdata, l, is_read);
150
151 if (o + l < w)
152 o += l;
153 else {
154 o = 0;
155 y++;
156 h--;
157 }
d30279e2
GI
158 sdata += l;
159 count -= l;
d30279e2
GI
160 }
161
162 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
163 y &= 511;
164 do_vram_line(x, y, sdata, w, is_read);
165 }
166
167 if (h > 0 && count > 0) {
168 y &= 511;
169 do_vram_line(x, y, sdata, count, is_read);
ddd56f6e 170 o = count;
d30279e2
GI
171 count = 0;
172 }
d30279e2
GI
173 gpu.dma.y = y;
174 gpu.dma.h = h;
ddd56f6e 175 gpu.dma.offset = o;
d30279e2
GI
176
177 return count_initial - (count + 1) / 2;
178}
179
180static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
181{
ddd56f6e 182 if (gpu.dma.h)
183 log_anomaly("start_vram_transfer while old unfinished\n");
184
d30279e2
GI
185 gpu.dma.x = pos_word & 1023;
186 gpu.dma.y = (pos_word >> 16) & 511;
187 gpu.dma.w = size_word & 0xffff; // ?
188 gpu.dma.h = size_word >> 16;
189 gpu.dma.offset = 0;
190
191 if (is_read)
192 gpu.status.img = 1;
193
194 //printf("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
195 // gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
196}
197
198static int check_cmd(uint32_t *data, int count)
199{
200 int len, cmd, start, pos;
201
d30279e2 202 // process buffer
ddd56f6e 203 for (start = pos = 0; pos < count; )
d30279e2
GI
204 {
205 cmd = -1;
206 len = 0;
207
208 if (gpu.dma.h) {
209 pos += do_vram_io(data + pos, count - pos, 0);
ddd56f6e 210 if (pos == count)
211 break;
d30279e2
GI
212 start = pos;
213 }
214
ddd56f6e 215 // do look-ahead pass to detect SR changes and VRAM i/o
d30279e2 216 while (pos < count) {
56f08d83 217 uint32_t *list = data + pos;
218 cmd = list[0] >> 24;
d30279e2 219 len = 1 + cmd_lengths[cmd];
56f08d83 220
d30279e2 221 //printf(" %3d: %02x %d\n", pos, cmd, len);
56f08d83 222 if ((cmd & 0xf4) == 0x24) {
223 // flat textured prim
224 gpu.status.reg &= ~0x1ff;
225 gpu.status.reg |= list[4] & 0x1ff;
226 }
227 else if ((cmd & 0xf4) == 0x34) {
228 // shaded textured prim
229 gpu.status.reg &= ~0x1ff;
230 gpu.status.reg |= list[5] & 0x1ff;
231 }
232 else switch (cmd)
233 {
234 case 0xe1:
235 gpu.status.reg &= ~0x7ff;
236 gpu.status.reg |= list[0] & 0x7ff;
237 break;
238 case 0xe6:
239 gpu.status.reg &= ~0x1800;
240 gpu.status.reg |= (list[0] & 3) << 11;
241 break;
242 }
243
d30279e2
GI
244 if (pos + len > count) {
245 cmd = -1;
246 break; // incomplete cmd
247 }
248 if (cmd == 0xa0 || cmd == 0xc0)
249 break; // image i/o
250 pos += len;
251 }
252
253 if (pos - start > 0) {
56f08d83 254 do_cmd_list(data + start, pos - start);
d30279e2
GI
255 start = pos;
256 }
257
258 if (cmd == 0xa0 || cmd == 0xc0) {
259 // consume vram write/read cmd
260 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
261 pos += len;
262 }
263
ddd56f6e 264 if (cmd == -1)
265 break;
d30279e2 266 }
ddd56f6e 267
268 return count - pos;
d30279e2
GI
269}
270
271static void flush_cmd_buffer(void)
272{
273 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
274 if (left > 0)
275 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
276 gpu.cmd_len = left;
1ab64c54
GI
277}
278
279void GPUwriteDataMem(uint32_t *mem, int count)
280{
d30279e2
GI
281 int left;
282
56f08d83 283 log_io("gpu_dma_write %p %d\n", mem, count);
284
d30279e2
GI
285 if (unlikely(gpu.cmd_len > 0))
286 flush_cmd_buffer();
56f08d83 287
d30279e2
GI
288 left = check_cmd(mem, count);
289 if (left)
56f08d83 290 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
1ab64c54
GI
291}
292
d30279e2 293void GPUwriteData(uint32_t data)
1ab64c54 294{
56f08d83 295 log_io("gpu_write %08x\n", data);
d30279e2
GI
296 gpu.cmd_buffer[gpu.cmd_len++] = data;
297 if (gpu.cmd_len >= CMD_BUFFER_LEN)
298 flush_cmd_buffer();
1ab64c54
GI
299}
300
ddd56f6e 301long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
1ab64c54 302{
ddd56f6e 303 uint32_t addr, *list;
304 int len, left, count;
d30279e2
GI
305
306 if (unlikely(gpu.cmd_len > 0))
307 flush_cmd_buffer();
308
56f08d83 309 log_io("gpu_dma_chain\n");
ddd56f6e 310 addr = start_addr & 0xffffff;
311 for (count = 0; addr != 0xffffff; count++)
312 {
56f08d83 313 log_io(".chain %08x\n", addr);
314
ddd56f6e 315 list = rambase + (addr & 0x1fffff) / 4;
d30279e2
GI
316 len = list[0] >> 24;
317 addr = list[0] & 0xffffff;
ddd56f6e 318
319 // loop detection marker
320 // (bit23 set causes DMA error on real machine, so
321 // unlikely to be ever set by the game)
322 list[0] |= 0x800000;
323
56f08d83 324 if (len) {
325 left = check_cmd(list + 1, len);
326 if (left)
327 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, len);
328 }
ddd56f6e 329
330 if (addr & 0x800000)
331 break;
332 }
333
334 // remove loop detection markers
335 addr = start_addr & 0x1fffff;
336 while (count-- > 0) {
337 list = rambase + addr / 4;
338 addr = list[0] & 0x1fffff;
339 list[0] &= ~0x800000;
d30279e2
GI
340 }
341
1ab64c54
GI
342 return 0;
343}
344
d30279e2
GI
345void GPUreadDataMem(uint32_t *mem, int count)
346{
56f08d83 347 log_io("gpu_dma_read %p %d\n", mem, count);
348
d30279e2
GI
349 if (unlikely(gpu.cmd_len > 0))
350 flush_cmd_buffer();
56f08d83 351
d30279e2
GI
352 if (gpu.dma.h)
353 do_vram_io(mem, count, 1);
354}
355
356uint32_t GPUreadData(void)
357{
358 uint32_t v = 0;
56f08d83 359
360 log_io("gpu_read\n");
361
362 if (unlikely(gpu.cmd_len > 0))
363 flush_cmd_buffer();
364
365 if (gpu.dma.h)
366 do_vram_io(&v, 1, 1);
367
d30279e2
GI
368 return v;
369}
370
371uint32_t GPUreadStatus(void)
372{
ddd56f6e 373 uint32_t ret;
56f08d83 374
d30279e2
GI
375 if (unlikely(gpu.cmd_len > 0))
376 flush_cmd_buffer();
377
ddd56f6e 378 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
379 log_io("gpu_read_status %08x\n", ret);
380 return ret;
d30279e2
GI
381}
382
1ab64c54
GI
383typedef struct GPUFREEZETAG
384{
385 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
386 uint32_t ulStatus; // current gpu status
387 uint32_t ulControl[256]; // latest control register values
388 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
389} GPUFreeze_t;
390
391long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
392{
393 switch (type) {
394 case 1: // save
d30279e2
GI
395 if (gpu.cmd_len > 0)
396 flush_cmd_buffer();
1ab64c54
GI
397 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
398 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
399 freeze->ulStatus = gpu.status.reg;
1ab64c54
GI
400 break;
401 case 0: // load
402 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
403 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
404 gpu.status.reg = freeze->ulStatus;
1ab64c54
GI
405 GPUwriteStatus((5 << 24) | gpu.regs[5]);
406 GPUwriteStatus((7 << 24) | gpu.regs[7]);
407 GPUwriteStatus((8 << 24) | gpu.regs[8]);
408 break;
409 }
410
411 return 1;
412}
413
d30279e2 414void GPUvBlank(int val, uint32_t *hcnt)
1ab64c54 415{
d30279e2
GI
416 gpu.lcf_hc = &gpu.zero;
417 if (gpu.status.interlace) {
418 if (val)
419 gpu.status.lcf ^= 1;
420 }
421 else {
422 gpu.status.lcf = 0;
423 if (!val)
424 gpu.lcf_hc = hcnt;
425 }
1ab64c54
GI
426}
427
1ab64c54 428// vim:shiftwidth=2:expandtab