gpu_neon: partially support range regs
[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <string.h>
13#include "gpu.h"
14
15#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
16#define unlikely(x) __builtin_expect((x), 0)
17#define noinline __attribute__((noinline))
18
19//#define log_io printf
20#define log_io(...)
21#define log_anomaly printf
22
23struct psx_gpu gpu __attribute__((aligned(64)));
24
25long GPUinit(void)
26{
27 int ret = vout_init();
28 gpu.status.reg = 0x14802000;
29 gpu.lcf_hc = &gpu.zero;
30 return ret;
31}
32
33long GPUshutdown(void)
34{
35 return vout_finish();
36}
37
38static noinline void update_width(void)
39{
40 int sw = gpu.screen.x2 - gpu.screen.x1;
41 if (sw <= 0 || sw >= 2560)
42 // full width
43 gpu.screen.w = gpu.screen.hres;
44 else
45 gpu.screen.w = sw * gpu.screen.hres / 2560;
46}
47
48static noinline void update_height(void)
49{
50 int sh = gpu.screen.y2 - gpu.screen.y1;
51 if (gpu.status.dheight)
52 sh *= 2;
53 if (sh <= 0)
54 sh = gpu.screen.vres;
55
56 gpu.screen.h = sh;
57}
58
59void GPUwriteStatus(uint32_t data)
60{
61 static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
62 static const short vres[4] = { 240, 480, 256, 480 };
63 uint32_t cmd = data >> 24;
64
65 if (cmd < ARRAY_SIZE(gpu.regs))
66 gpu.regs[cmd] = data;
67
68 switch (cmd) {
69 case 0x00:
70 gpu.status.reg = 0x14802000;
71 gpu.status.blanking = 1;
72 break;
73 case 0x03:
74 gpu.status.blanking = data & 1;
75 break;
76 case 0x04:
77 gpu.status.dma = data & 3;
78 break;
79 case 0x05:
80 gpu.screen.x = data & 0x3ff;
81 gpu.screen.y = (data >> 10) & 0x3ff;
82 break;
83 case 0x06:
84 gpu.screen.x1 = data & 0xfff;
85 gpu.screen.x2 = (data >> 12) & 0xfff;
86 update_width();
87 break;
88 case 0x07:
89 gpu.screen.y1 = data & 0x3ff;
90 gpu.screen.y2 = (data >> 10) & 0x3ff;
91 update_height();
92 break;
93 case 0x08:
94 gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
95 gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
96 gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
97 update_width();
98 update_height();
99 break;
100 }
101}
102
103const unsigned char cmd_lengths[256] =
104{
105 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
107 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
108 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
109 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 40
110 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
111 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 60
112 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
113 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
114 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
116 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
117 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
118 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
119 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
120 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
121};
122
123#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
124
125static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
126{
127 uint16_t *vram = VRAM_MEM_XY(x, y);
128 if (is_read)
129 memcpy(mem, vram, l * 2);
130 else
131 memcpy(vram, mem, l * 2);
132}
133
134static int do_vram_io(uint32_t *data, int count, int is_read)
135{
136 int count_initial = count;
137 uint16_t *sdata = (uint16_t *)data;
138 int x = gpu.dma.x, y = gpu.dma.y;
139 int w = gpu.dma.w, h = gpu.dma.h;
140 int o = gpu.dma.offset;
141 int l;
142 count *= 2; // operate in 16bpp pixels
143
144 if (gpu.dma.offset) {
145 l = w - gpu.dma.offset;
146 if (count < l)
147 l = count;
148
149 do_vram_line(x + o, y, sdata, l, is_read);
150
151 if (o + l < w)
152 o += l;
153 else {
154 o = 0;
155 y++;
156 h--;
157 }
158 sdata += l;
159 count -= l;
160 }
161
162 for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
163 y &= 511;
164 do_vram_line(x, y, sdata, w, is_read);
165 }
166
167 if (h > 0 && count > 0) {
168 y &= 511;
169 do_vram_line(x, y, sdata, count, is_read);
170 o = count;
171 count = 0;
172 }
173 gpu.dma.y = y;
174 gpu.dma.h = h;
175 gpu.dma.offset = o;
176
177 return count_initial - (count + 1) / 2;
178}
179
180static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
181{
182 if (gpu.dma.h)
183 log_anomaly("start_vram_transfer while old unfinished\n");
184
185 gpu.dma.x = pos_word & 1023;
186 gpu.dma.y = (pos_word >> 16) & 511;
187 gpu.dma.w = size_word & 0xffff; // ?
188 gpu.dma.h = size_word >> 16;
189 gpu.dma.offset = 0;
190
191 if (is_read)
192 gpu.status.img = 1;
193
194 //printf("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
195 // gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
196}
197
198static int check_cmd(uint32_t *data, int count)
199{
200 int len, cmd, start, pos;
201
202 // process buffer
203 for (start = pos = 0; pos < count; )
204 {
205 cmd = -1;
206 len = 0;
207
208 if (gpu.dma.h) {
209 pos += do_vram_io(data + pos, count - pos, 0);
210 if (pos == count)
211 break;
212 start = pos;
213 }
214
215 // do look-ahead pass to detect SR changes and VRAM i/o
216 while (pos < count) {
217 uint32_t *list = data + pos;
218 cmd = list[0] >> 24;
219 len = 1 + cmd_lengths[cmd];
220
221 //printf(" %3d: %02x %d\n", pos, cmd, len);
222 if ((cmd & 0xf4) == 0x24) {
223 // flat textured prim
224 gpu.status.reg &= ~0x1ff;
225 gpu.status.reg |= list[4] & 0x1ff;
226 }
227 else if ((cmd & 0xf4) == 0x34) {
228 // shaded textured prim
229 gpu.status.reg &= ~0x1ff;
230 gpu.status.reg |= list[5] & 0x1ff;
231 }
232 else switch (cmd)
233 {
234 case 0xe1:
235 gpu.status.reg &= ~0x7ff;
236 gpu.status.reg |= list[0] & 0x7ff;
237 break;
238 case 0xe6:
239 gpu.status.reg &= ~0x1800;
240 gpu.status.reg |= (list[0] & 3) << 11;
241 break;
242 }
243
244 if (pos + len > count) {
245 cmd = -1;
246 break; // incomplete cmd
247 }
248 if (cmd == 0xa0 || cmd == 0xc0)
249 break; // image i/o
250 pos += len;
251 }
252
253 if (pos - start > 0) {
254 do_cmd_list(data + start, pos - start);
255 start = pos;
256 }
257
258 if (cmd == 0xa0 || cmd == 0xc0) {
259 // consume vram write/read cmd
260 start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
261 pos += len;
262 }
263
264 if (cmd == -1)
265 break;
266 }
267
268 return count - pos;
269}
270
271static void flush_cmd_buffer(void)
272{
273 int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
274 if (left > 0)
275 memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
276 gpu.cmd_len = left;
277}
278
279void GPUwriteDataMem(uint32_t *mem, int count)
280{
281 int left;
282
283 log_io("gpu_dma_write %p %d\n", mem, count);
284
285 if (unlikely(gpu.cmd_len > 0))
286 flush_cmd_buffer();
287
288 left = check_cmd(mem, count);
289 if (left)
290 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
291}
292
293void GPUwriteData(uint32_t data)
294{
295 log_io("gpu_write %08x\n", data);
296 gpu.cmd_buffer[gpu.cmd_len++] = data;
297 if (gpu.cmd_len >= CMD_BUFFER_LEN)
298 flush_cmd_buffer();
299}
300
301long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
302{
303 uint32_t addr, *list;
304 int len, left, count;
305
306 if (unlikely(gpu.cmd_len > 0))
307 flush_cmd_buffer();
308
309 log_io("gpu_dma_chain\n");
310 addr = start_addr & 0xffffff;
311 for (count = 0; addr != 0xffffff; count++)
312 {
313 log_io(".chain %08x\n", addr);
314
315 list = rambase + (addr & 0x1fffff) / 4;
316 len = list[0] >> 24;
317 addr = list[0] & 0xffffff;
318
319 // loop detection marker
320 // (bit23 set causes DMA error on real machine, so
321 // unlikely to be ever set by the game)
322 list[0] |= 0x800000;
323
324 if (len) {
325 left = check_cmd(list + 1, len);
326 if (left)
327 log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, len);
328 }
329
330 if (addr & 0x800000)
331 break;
332 }
333
334 // remove loop detection markers
335 addr = start_addr & 0x1fffff;
336 while (count-- > 0) {
337 list = rambase + addr / 4;
338 addr = list[0] & 0x1fffff;
339 list[0] &= ~0x800000;
340 }
341
342 return 0;
343}
344
345void GPUreadDataMem(uint32_t *mem, int count)
346{
347 log_io("gpu_dma_read %p %d\n", mem, count);
348
349 if (unlikely(gpu.cmd_len > 0))
350 flush_cmd_buffer();
351
352 if (gpu.dma.h)
353 do_vram_io(mem, count, 1);
354}
355
356uint32_t GPUreadData(void)
357{
358 uint32_t v = 0;
359
360 log_io("gpu_read\n");
361
362 if (unlikely(gpu.cmd_len > 0))
363 flush_cmd_buffer();
364
365 if (gpu.dma.h)
366 do_vram_io(&v, 1, 1);
367
368 return v;
369}
370
371uint32_t GPUreadStatus(void)
372{
373 uint32_t ret;
374
375 if (unlikely(gpu.cmd_len > 0))
376 flush_cmd_buffer();
377
378 ret = gpu.status.reg | (*gpu.lcf_hc << 31);
379 log_io("gpu_read_status %08x\n", ret);
380 return ret;
381}
382
383typedef struct GPUFREEZETAG
384{
385 uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
386 uint32_t ulStatus; // current gpu status
387 uint32_t ulControl[256]; // latest control register values
388 unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
389} GPUFreeze_t;
390
391long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
392{
393 switch (type) {
394 case 1: // save
395 if (gpu.cmd_len > 0)
396 flush_cmd_buffer();
397 memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
398 memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
399 freeze->ulStatus = gpu.status.reg;
400 break;
401 case 0: // load
402 memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
403 memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
404 gpu.status.reg = freeze->ulStatus;
405 GPUwriteStatus((5 << 24) | gpu.regs[5]);
406 GPUwriteStatus((7 << 24) | gpu.regs[7]);
407 GPUwriteStatus((8 << 24) | gpu.regs[8]);
408 break;
409 }
410
411 return 1;
412}
413
414void GPUvBlank(int val, uint32_t *hcnt)
415{
416 gpu.lcf_hc = &gpu.zero;
417 if (gpu.status.interlace) {
418 if (val)
419 gpu.status.lcf ^= 1;
420 }
421 else {
422 gpu.status.lcf = 0;
423 if (!val)
424 gpu.lcf_hc = hcnt;
425 }
426}
427
428// vim:shiftwidth=2:expandtab