gpu_neon: enforce alignment required for asm
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu_if.c
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11#include <stdio.h>
12#include <assert.h>
13
14#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
15#ifndef min
16#define min(a, b) ((a) < (b) ? (a) : (b))
17#endif
18#ifndef max
19#define max(a, b) ((a) > (b) ? (a) : (b))
20#endif
21
22extern const unsigned char cmd_lengths[256];
23#define command_lengths cmd_lengths
24
25static unsigned int *ex_regs;
26static int initialized;
27
28#define PCSX
29#define SET_Ex(r, v) \
30 ex_regs[r] = v
31
32static __attribute__((noinline)) void
33sync_enhancement_buffers(int x, int y, int w, int h);
34
35#include "../gpulib/gpu.h"
36#include "psx_gpu/psx_gpu.c"
37#include "psx_gpu/psx_gpu_parse.c"
38
39static psx_gpu_struct egpu __attribute__((aligned(256)));
40
41int do_cmd_list(uint32_t *list, int count, int *last_cmd)
42{
43 int ret;
44
45#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
46 // the asm doesn't bother to save callee-save vector regs, so do it here
47 __asm__ __volatile__("":::"q4","q5","q6","q7");
48#endif
49
50 if (gpu.state.enhancement_active)
51 ret = gpu_parse_enhanced(&egpu, list, count * 4, (u32 *)last_cmd);
52 else
53 ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd);
54
55#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
56 __asm__ __volatile__("":::"q4","q5","q6","q7");
57#endif
58
59 ex_regs[1] &= ~0x1ff;
60 ex_regs[1] |= egpu.texture_settings & 0x1ff;
61 return ret;
62}
63
64#define ENHANCEMENT_BUF_SIZE (1024 * 1024 * 2 * 4 + 4096 * 2)
65
66static void *get_enhancement_bufer(int *x, int *y, int *w, int *h,
67 int *vram_h)
68{
69 uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x, *y);
70 if (ret == NULL)
71 return NULL;
72
73 *x *= 2;
74 *y *= 2;
75 *w = *w * 2;
76 *h = *h * 2;
77 *vram_h = 1024;
78 return ret;
79}
80
81static void map_enhancement_buffer(void)
82{
83 // currently we use 4x 1024*1024 buffers instead of single 2048*1024
84 // to be able to reuse 1024-width code better (triangle setup,
85 // dithering phase, lines).
86 egpu.enhancement_buf_ptr = gpu.mmap(ENHANCEMENT_BUF_SIZE);
87 if (egpu.enhancement_buf_ptr == NULL) {
88 fprintf(stderr, "failed to map enhancement buffer\n");
89 gpu.get_enhancement_bufer = NULL;
90 }
91 else {
92 egpu.enhancement_buf_ptr += 4096 / 2;
93 gpu.get_enhancement_bufer = get_enhancement_bufer;
94 }
95}
96
97int renderer_init(void)
98{
99 if (gpu.vram != NULL) {
100 initialize_psx_gpu(&egpu, gpu.vram);
101 initialized = 1;
102 }
103
104 if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL)
105 map_enhancement_buffer();
106
107 ex_regs = gpu.ex_regs;
108 return 0;
109}
110
111void renderer_finish(void)
112{
113 if (egpu.enhancement_buf_ptr != NULL) {
114 egpu.enhancement_buf_ptr -= 4096 / 2;
115 gpu.munmap(egpu.enhancement_buf_ptr, ENHANCEMENT_BUF_SIZE);
116 }
117 egpu.enhancement_buf_ptr = NULL;
118 egpu.enhancement_current_buf_ptr = NULL;
119 initialized = 0;
120}
121
122static __attribute__((noinline)) void
123sync_enhancement_buffers(int x, int y, int w, int h)
124{
125 int i, right = x + w, bottom = y + h;
126 const u16 *src = gpu.vram;
127 // use these because the scanout struct may hold reduced w, h
128 // due to intersection stuff, see the update_enhancement_buf_scanouts() mess
129 int s_w = max(gpu.screen.hres, gpu.screen.w);
130 int s_h = gpu.screen.vres;
131 s_w = min(s_w, 512);
132 for (i = 0; i < ARRAY_SIZE(egpu.enhancement_scanouts); i++) {
133 const struct psx_gpu_scanout *s = &egpu.enhancement_scanouts[i];
134 u16 *dst = select_enhancement_buf_by_index(&egpu, i);
135 int x1, x2, y1, y2;
136 if (s->w == 0) continue;
137 if (s->x >= right) continue;
138 if (s->x + s_w <= x) continue;
139 if (s->y >= bottom) continue;
140 if (s->y + s_h <= y) continue;
141 x1 = max(x, s->x);
142 x2 = min(right, s->x + s_w);
143 y1 = max(y, s->y);
144 y2 = min(bottom, s->y + s_h);
145 // 16-byte align for the asm version
146 x2 += x1 & 7;
147 x1 &= ~7;
148 scale2x_tiles8(dst + y1 * 1024*2 + x1 * 2,
149 src + y1 * 1024 + x1, (x2 - x1 + 7) / 8u, y2 - y1);
150 }
151}
152
153void renderer_sync_ecmds(uint32_t *ecmds)
154{
155 gpu_parse(&egpu, ecmds + 1, 6 * 4, NULL);
156}
157
158void renderer_update_caches(int x, int y, int w, int h, int state_changed)
159{
160 update_texture_cache_region(&egpu, x, y, x + w - 1, y + h - 1);
161
162 if (gpu.state.enhancement_active) {
163 if (state_changed) {
164 memset(egpu.enhancement_scanouts, 0, sizeof(egpu.enhancement_scanouts));
165 egpu.enhancement_scanout_eselect = 0;
166 update_enhancement_buf_scanouts(&egpu,
167 gpu.screen.src_x, gpu.screen.src_y, gpu.screen.hres, gpu.screen.vres);
168 return;
169 }
170 sync_enhancement_buffers(x, y, w, h);
171 }
172}
173
174void renderer_flush_queues(void)
175{
176 flush_render_block_buffer(&egpu);
177}
178
179void renderer_set_interlace(int enable, int is_odd)
180{
181 egpu.render_mode &= ~(RENDER_INTERLACE_ENABLED|RENDER_INTERLACE_ODD);
182 if (enable)
183 egpu.render_mode |= RENDER_INTERLACE_ENABLED;
184 if (is_odd)
185 egpu.render_mode |= RENDER_INTERLACE_ODD;
186}
187
188void renderer_notify_res_change(void)
189{
190 renderer_notify_scanout_change(gpu.screen.src_x, gpu.screen.src_y);
191}
192
193void renderer_notify_scanout_change(int x, int y)
194{
195 if (!gpu.state.enhancement_active || !egpu.enhancement_buf_ptr)
196 return;
197
198 update_enhancement_buf_scanouts(&egpu, x, y, gpu.screen.hres, gpu.screen.vres);
199}
200
201#include "../../frontend/plugin_lib.h"
202
203void renderer_set_config(const struct rearmed_cbs *cbs)
204{
205 if (!initialized) {
206 initialize_psx_gpu(&egpu, gpu.vram);
207 initialized = 1;
208 }
209 if (cbs->pl_set_gpu_caps)
210 cbs->pl_set_gpu_caps(GPU_CAP_SUPPORTS_2X);
211
212 egpu.use_dithering = cbs->gpu_neon.allow_dithering;
213 if(!egpu.use_dithering) {
214 egpu.dither_table[0] = dither_table_row(0, 0, 0, 0);
215 egpu.dither_table[1] = dither_table_row(0, 0, 0, 0);
216 egpu.dither_table[2] = dither_table_row(0, 0, 0, 0);
217 egpu.dither_table[3] = dither_table_row(0, 0, 0, 0);
218 } else {
219 egpu.dither_table[0] = dither_table_row(-4, 0, -3, 1);
220 egpu.dither_table[1] = dither_table_row(2, -2, 3, -1);
221 egpu.dither_table[2] = dither_table_row(-3, 1, -4, 0);
222 egpu.dither_table[3] = dither_table_row(3, -1, 2, -2);
223 }
224
225 disable_main_render = cbs->gpu_neon.enhancement_no_main;
226 if (gpu.state.enhancement_enable) {
227 if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL)
228 map_enhancement_buffer();
229 }
230}
231
232void renderer_sync(void)
233{
234}
235
236void renderer_notify_update_lace(int updated)
237{
238}
239
240// vim:ts=2:sw=2:expandtab