gpu_neon: rework buffer selection
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / psx_gpu_4x.c
CommitLineData
0b4038f8 1#define select_enhancement_buf_index(psx_gpu, x) \\r
2 ((psx_gpu)->enhancement_buf_by_x16[(u32)(x) / \\r
3 (1024u / sizeof((psx_gpu)->enhancement_buf_by_x16))])\r
4\r
7956599f 5#define select_enhancement_buf_ptr(psx_gpu, x) \\r
6 ((psx_gpu)->enhancement_buf_ptr + \\r
0b4038f8 7 (select_enhancement_buf_index(psx_gpu, x) << 20))\r
7956599f 8\r
a2cb152a 9#if !defined(NEON_BUILD) || defined(SIMD_BUILD)\r
10\r
11#ifndef zip_4x32b\r
12\r
13#define vector_cast(vec_to, source) source\r
14\r
15#define zip_4x32b(dest, source_a, source_b) { \\r
16 u32 _i; for(_i = 0; _i < 4; _i++) { \\r
17 (dest).e[_i * 2 + 0] = (source_a).e[_i]; \\r
18 (dest).e[_i * 2 + 1] = (source_b).e[_i]; \\r
19 } \\r
20}\r
21\r
22#endif\r
23\r
05e2e0c6
E
24void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,\r
25 s32 v, s32 width, s32 height, u32 color)\r
26{\r
27 u32 left_offset = u & 0x7;\r
28 u32 width_rounded = width + left_offset + 7;\r
29\r
fc6cef7d 30 u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (s32)(x - left_offset * 2);\r
05e2e0c6
E
31 u32 right_width = width_rounded & 0x7;\r
32 u32 block_width = width_rounded / 8;\r
fc6cef7d 33 u32 fb_ptr_pitch = (2048 + 16) - (block_width * 16);\r
05e2e0c6
E
34\r
35 u32 left_mask_bits = ~(0xFFFF << (left_offset * 2));\r
fc6cef7d 36 u32 right_mask_bits = 0xFFFC << (right_width * 2);\r
05e2e0c6
E
37\r
38 u32 texture_offset_base = u + (v * 1024);\r
39 u32 texture_mask =\r
40 psx_gpu->texture_mask_width | (psx_gpu->texture_mask_height * 1024);\r
41\r
42 u32 blocks_remaining;\r
43 u32 num_blocks = psx_gpu->num_blocks;\r
44 block_struct *block = psx_gpu->blocks + num_blocks;\r
45\r
46 u16 *texture_page_ptr = psx_gpu->texture_page_ptr;\r
47 u16 *texture_block_ptr;\r
48\r
49 texture_offset_base &= ~0x7;\r
50\r
51 sprites_16bpp++;\r
52\r
53 if(block_width == 1)\r
54 {\r
55 u32 mask_bits = left_mask_bits | right_mask_bits;\r
56 u32 mask_bits_a = mask_bits & 0xFF;\r
57 u32 mask_bits_b = mask_bits >> 8;\r
58 \r
59 vec_8x16u texels;\r
60 vec_8x16u texels_wide;\r
61\r
62 while(height)\r
63 {\r
64 num_blocks += 4;\r
65 sprite_blocks += 4;\r
66\r
67 if(num_blocks > MAX_BLOCKS)\r
68 {\r
69 flush_render_block_buffer(psx_gpu);\r
70 num_blocks = 4;\r
71 block = psx_gpu->blocks;\r
72 }\r
73 \r
74 texture_block_ptr =\r
75 texture_page_ptr + (texture_offset_base & texture_mask);\r
76\r
a2cb152a 77 //load_128b(texels, texture_block_ptr);\r
78 texels = *(vec_8x16u *)texture_block_ptr;\r
05e2e0c6 79 \r
fc6cef7d 80 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
05e2e0c6
E
81 block->texels = texels_wide;\r
82 block->draw_mask_bits = mask_bits_a;\r
83 block->fb_ptr = fb_ptr; \r
84 block++;\r
85 \r
86 block->texels = texels_wide;\r
87 block->draw_mask_bits = mask_bits_a;\r
88 block->fb_ptr = fb_ptr + 1024; \r
89 block++;\r
90 \r
fc6cef7d 91 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);\r
05e2e0c6
E
92 block->texels = texels_wide;\r
93 block->draw_mask_bits = mask_bits_b;\r
94 block->fb_ptr = fb_ptr + 8;\r
95 block++;\r
96 \r
97 block->texels = texels_wide;\r
98 block->draw_mask_bits = mask_bits_b;\r
99 block->fb_ptr = fb_ptr + 8 + 1024; \r
100 block++; \r
101\r
102 texture_offset_base += 1024;\r
103 fb_ptr += 2048;\r
104\r
105 height--;\r
106 psx_gpu->num_blocks = num_blocks;\r
107 }\r
108 }\r
109 else\r
110 {\r
111 u32 texture_offset;\r
112 \r
113 u32 left_mask_bits_a = left_mask_bits & 0xFF;\r
114 u32 left_mask_bits_b = left_mask_bits >> 8;\r
115 u32 right_mask_bits_a = right_mask_bits & 0xFF;\r
116 u32 right_mask_bits_b = right_mask_bits >> 8;\r
117 \r
118 vec_8x16u texels;\r
119 vec_8x16u texels_wide; \r
120\r
121 while(height)\r
122 {\r
123 blocks_remaining = block_width - 2;\r
124 num_blocks += block_width * 4;\r
125 sprite_blocks += block_width * 4;\r
126\r
127 if(num_blocks > MAX_BLOCKS)\r
128 {\r
129 flush_render_block_buffer(psx_gpu);\r
fc6cef7d 130 num_blocks = block_width * 4;\r
05e2e0c6
E
131 block = psx_gpu->blocks;\r
132 }\r
133\r
134 texture_offset = texture_offset_base;\r
135 texture_offset_base += 1024;\r
136\r
137 texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);\r
138 \r
a2cb152a 139 //load_128b(texels, texture_block_ptr);\r
140 texels = *(vec_8x16u *)texture_block_ptr;\r
05e2e0c6 141\r
fc6cef7d 142 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
05e2e0c6
E
143 block->texels = texels_wide;\r
144 block->draw_mask_bits = left_mask_bits_a;\r
145 block->fb_ptr = fb_ptr;\r
146 block++;\r
147 \r
148 block->texels = texels_wide;\r
149 block->draw_mask_bits = left_mask_bits_a;\r
150 block->fb_ptr = fb_ptr + 1024;\r
151 block++; \r
152\r
fc6cef7d 153 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);\r
05e2e0c6
E
154 block->texels = texels_wide;\r
155 block->draw_mask_bits = left_mask_bits_b;\r
156 block->fb_ptr = fb_ptr + 8;\r
157 block++; \r
158 \r
159 block->texels = texels_wide;\r
160 block->draw_mask_bits = left_mask_bits_b;\r
161 block->fb_ptr = fb_ptr + 8 + 1024;\r
162 block++; \r
163 \r
164 texture_offset += 8;\r
165 fb_ptr += 16;\r
166\r
167 while(blocks_remaining)\r
168 {\r
169 texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);\r
a2cb152a 170 //load_128b(texels, texture_block_ptr);\r
171 texels = *(vec_8x16u *)texture_block_ptr;\r
05e2e0c6 172\r
fc6cef7d 173 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
05e2e0c6
E
174 block->texels = texels_wide;\r
175 block->draw_mask_bits = 0;\r
176 block->fb_ptr = fb_ptr;\r
177 block++;\r
178 \r
179 block->texels = texels_wide;\r
180 block->draw_mask_bits = 0;\r
181 block->fb_ptr = fb_ptr + 1024;\r
182 block++; \r
183\r
fc6cef7d 184 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);\r
05e2e0c6
E
185 block->texels = texels_wide;\r
186 block->draw_mask_bits = 0;\r
187 block->fb_ptr = fb_ptr + 8;\r
188 block++;\r
189 \r
190 block->texels = texels_wide;\r
191 block->draw_mask_bits = 0;\r
192 block->fb_ptr = fb_ptr + 8 + 1024;\r
193 block++;\r
194 \r
195 texture_offset += 8;\r
fc6cef7d 196 fb_ptr += 16;\r
05e2e0c6
E
197\r
198 blocks_remaining--;\r
199 }\r
200\r
201 texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);\r
a2cb152a 202 //load_128b(texels, texture_block_ptr);\r
203 texels = *(vec_8x16u *)texture_block_ptr;\r
05e2e0c6 204 \r
fc6cef7d 205 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
05e2e0c6
E
206 block->texels = texels_wide;\r
207 block->draw_mask_bits = right_mask_bits_a;\r
208 block->fb_ptr = fb_ptr;\r
209 block++;\r
210 \r
211 block->texels = texels_wide;\r
212 block->draw_mask_bits = right_mask_bits_a;\r
213 block->fb_ptr = fb_ptr + 1024;\r
214 block++; \r
215\r
fc6cef7d 216 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);\r
05e2e0c6
E
217 block->texels = texels_wide;\r
218 block->draw_mask_bits = right_mask_bits_b;\r
219 block->fb_ptr = fb_ptr + 8;\r
220 block++;\r
221\r
222 block->texels = texels_wide;\r
223 block->draw_mask_bits = right_mask_bits_b;\r
224 block->fb_ptr = fb_ptr + 8 + 1024; \r
225 block++;\r
226\r
227 fb_ptr += fb_ptr_pitch;\r
228\r
229 height--;\r
230 psx_gpu->num_blocks = num_blocks;\r
231 }\r
232 }\r
233}\r
234\r
235#endif\r
236\r
fc6cef7d 237static void setup_sprite_untextured_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y,\r
238 s32 u, s32 v, s32 width, s32 height, u32 color)\r
239{\r
240 setup_sprite_untextured(psx_gpu, x, y, u, v, width * 2, height * 2, color);\r
241}\r
242\r
05e2e0c6
E
243#define setup_sprite_blocks_switch_textured_4x(texture_mode) \\r
244 setup_sprite_##texture_mode##_4x \\r
245\r
246#define setup_sprite_blocks_switch_untextured_4x(texture_mode) \\r
fc6cef7d 247 setup_sprite_untextured_4x \\r
05e2e0c6
E
248\r
249#define setup_sprite_blocks_switch_4x(texturing, texture_mode) \\r
250 setup_sprite_blocks_switch_##texturing##_4x(texture_mode) \\r
251\r
252 \r
253#define render_sprite_blocks_switch_block_modulation_4x(texture_mode, \\r
254 blend_mode, mask_evaluate, shading, dithering, texturing, blending, \\r
255 modulation) \\r
256{ \\r
257 setup_sprite_blocks_switch_4x(texturing, texture_mode), \\r
258 texture_sprite_blocks_switch_##texturing(texture_mode), \\r
259 shade_blocks_switch(unshaded, texturing, modulation, undithered, blending, \\r
260 mask_evaluate), \\r
261 blend_blocks_switch(texturing, blending, blend_mode, mask_evaluate) \\r
262} \\r
263\r
264#define render_sprite_blocks_switch_block_blending_4x(texture_mode, \\r
265 blend_mode, mask_evaluate, shading, dithering, texturing, blending) \\r
266 render_sprite_blocks_switch_block_modulation_4x(texture_mode, blend_mode, \\r
267 mask_evaluate, shading, dithering, texturing, blending, modulated), \\r
268 render_sprite_blocks_switch_block_modulation_4x(texture_mode, blend_mode, \\r
269 mask_evaluate, shading, dithering, texturing, blending, unmodulated) \\r
270\r
271#define render_sprite_blocks_switch_block_texturing_4x(texture_mode, \\r
272 blend_mode, mask_evaluate, shading, dithering, texturing) \\r
273 render_sprite_blocks_switch_block_blending_4x(texture_mode, blend_mode, \\r
274 mask_evaluate, shading, dithering, texturing, unblended), \\r
275 render_sprite_blocks_switch_block_blending_4x(texture_mode, blend_mode, \\r
276 mask_evaluate, shading, dithering, texturing, blended) \\r
277\r
278#define render_sprite_blocks_switch_block_dithering_4x(texture_mode, \\r
279 blend_mode, mask_evaluate, shading, dithering) \\r
280 render_sprite_blocks_switch_block_texturing_4x(texture_mode, blend_mode, \\r
281 mask_evaluate, shading, dithering, untextured), \\r
282 render_sprite_blocks_switch_block_texturing_4x(texture_mode, blend_mode, \\r
283 mask_evaluate, shading, dithering, textured) \\r
284\r
285#define render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \\r
286 mask_evaluate, shading) \\r
287 render_sprite_blocks_switch_block_dithering_4x(texture_mode, blend_mode, \\r
288 mask_evaluate, shading, undithered), \\r
289 render_sprite_blocks_switch_block_dithering_4x(texture_mode, blend_mode, \\r
290 mask_evaluate, shading, dithered) \\r
291\r
292#define render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, \\r
293 blend_mode, mask_evaluate) \\r
294 render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \\r
295 mask_evaluate, unshaded), \\r
296 render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \\r
297 mask_evaluate, shaded) \\r
298\r
299#define render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, \\r
300 blend_mode) \\r
301 render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, blend_mode, \\r
302 off), \\r
303 render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, blend_mode, \\r
304 on) \\r
305\r
306#define render_sprite_blocks_switch_block_texture_mode_4x(texture_mode) \\r
307 render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, average), \\r
308 render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, add), \\r
309 render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, subtract), \\r
310 render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, add_fourth) \\r
311\r
312#define render_sprite_blocks_switch_block_4x() \\r
313 render_sprite_blocks_switch_block_texture_mode_4x(4bpp), \\r
314 render_sprite_blocks_switch_block_texture_mode_4x(8bpp), \\r
315 render_sprite_blocks_switch_block_texture_mode_4x(16bpp), \\r
b7f5c059 316 render_sprite_blocks_switch_block_texture_mode_4x(16bpp) \\r
05e2e0c6
E
317\r
318\r
319render_block_handler_struct render_sprite_block_handlers_4x[] =\r
320{\r
321 render_sprite_blocks_switch_block_4x()\r
322};\r
323\r
324\r
325void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,\r
326 s32 width, s32 height, u32 flags, u32 color)\r
327{\r
05e2e0c6
E
328 s32 x_right = x + width - 1;\r
329 s32 y_bottom = y + height - 1;\r
330\r
331#ifdef PROFILE\r
332 sprites++;\r
333#endif\r
334\r
335 if(x < psx_gpu->viewport_start_x)\r
336 {\r
337 u32 clip = psx_gpu->viewport_start_x - x;\r
338 x += clip;\r
339 u += clip;\r
340 width -= clip;\r
341 }\r
342\r
343 if(y < psx_gpu->viewport_start_y)\r
344 {\r
345 s32 clip = psx_gpu->viewport_start_y - y;\r
346 y += clip;\r
347 v += clip;\r
348 height -= clip;\r
349 }\r
350\r
351 if(x_right > psx_gpu->viewport_end_x)\r
352 width -= x_right - psx_gpu->viewport_end_x;\r
353\r
354 if(y_bottom > psx_gpu->viewport_end_y)\r
355 height -= y_bottom - psx_gpu->viewport_end_y;\r
356\r
357 if((width <= 0) || (height <= 0))\r
358 return;\r
359\r
7956599f 360 psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x);\r
361\r
fc6cef7d 362 x *= 2;\r
363 y *= 2;\r
364\r
05e2e0c6
E
365#ifdef PROFILE\r
366 span_pixels += width * height;\r
367 spans += height;\r
368#endif\r
369\r
370 u32 render_state = flags &\r
371 (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND |\r
372 RENDER_FLAGS_TEXTURE_MAP);\r
373 render_state |=\r
374 (psx_gpu->render_state_base & ~RENDER_STATE_DITHER);\r
375\r
376 if((psx_gpu->render_state != render_state) ||\r
377 (psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE))\r
378 {\r
379 psx_gpu->render_state = render_state;\r
380 flush_render_block_buffer(psx_gpu);\r
381#ifdef PROFILE\r
382 state_changes++;\r
383#endif\r
384 }\r
385\r
386 psx_gpu->primitive_type = PRIMITIVE_TYPE_SPRITE;\r
387\r
388 color &= 0xFFFFFF;\r
389\r
390 if(psx_gpu->triangle_color != color)\r
391 {\r
392 flush_render_block_buffer(psx_gpu);\r
393 psx_gpu->triangle_color = color;\r
394 }\r
395\r
396 if(color == 0x808080)\r
397 render_state |= RENDER_FLAGS_MODULATE_TEXELS;\r
398\r
399 render_block_handler_struct *render_block_handler =\r
400 &(render_sprite_block_handlers_4x[render_state]);\r
401 psx_gpu->render_block_handler = render_block_handler;\r
402\r
403 ((setup_sprite_function_type *)render_block_handler->setup_blocks)\r
404 (psx_gpu, x, y, u, v, width, height, color);\r
405}\r
406\r