gpu_neon: new intrinsics-only implementation
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / psx_gpu_4x.c
CommitLineData
7956599f 1#define select_enhancement_buf_ptr(psx_gpu, x) \\r
2 ((psx_gpu)->enhancement_buf_ptr + \\r
3 ((psx_gpu)->enhancement_buf_by_x16[(x) / 16] << 20))\r
4\r
a2cb152a 5#if !defined(NEON_BUILD) || defined(SIMD_BUILD)\r
6\r
7#ifndef zip_4x32b\r
8\r
9#define vector_cast(vec_to, source) source\r
10\r
11#define zip_4x32b(dest, source_a, source_b) { \\r
12 u32 _i; for(_i = 0; _i < 4; _i++) { \\r
13 (dest).e[_i * 2 + 0] = (source_a).e[_i]; \\r
14 (dest).e[_i * 2 + 1] = (source_b).e[_i]; \\r
15 } \\r
16}\r
17\r
18#endif\r
19\r
05e2e0c6
E
20void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,\r
21 s32 v, s32 width, s32 height, u32 color)\r
22{\r
23 u32 left_offset = u & 0x7;\r
24 u32 width_rounded = width + left_offset + 7;\r
25\r
fc6cef7d 26 u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (s32)(x - left_offset * 2);\r
05e2e0c6
E
27 u32 right_width = width_rounded & 0x7;\r
28 u32 block_width = width_rounded / 8;\r
fc6cef7d 29 u32 fb_ptr_pitch = (2048 + 16) - (block_width * 16);\r
05e2e0c6
E
30\r
31 u32 left_mask_bits = ~(0xFFFF << (left_offset * 2));\r
fc6cef7d 32 u32 right_mask_bits = 0xFFFC << (right_width * 2);\r
05e2e0c6
E
33\r
34 u32 texture_offset_base = u + (v * 1024);\r
35 u32 texture_mask =\r
36 psx_gpu->texture_mask_width | (psx_gpu->texture_mask_height * 1024);\r
37\r
38 u32 blocks_remaining;\r
39 u32 num_blocks = psx_gpu->num_blocks;\r
40 block_struct *block = psx_gpu->blocks + num_blocks;\r
41\r
42 u16 *texture_page_ptr = psx_gpu->texture_page_ptr;\r
43 u16 *texture_block_ptr;\r
44\r
45 texture_offset_base &= ~0x7;\r
46\r
47 sprites_16bpp++;\r
48\r
49 if(block_width == 1)\r
50 {\r
51 u32 mask_bits = left_mask_bits | right_mask_bits;\r
52 u32 mask_bits_a = mask_bits & 0xFF;\r
53 u32 mask_bits_b = mask_bits >> 8;\r
54 \r
55 vec_8x16u texels;\r
56 vec_8x16u texels_wide;\r
57\r
58 while(height)\r
59 {\r
60 num_blocks += 4;\r
61 sprite_blocks += 4;\r
62\r
63 if(num_blocks > MAX_BLOCKS)\r
64 {\r
65 flush_render_block_buffer(psx_gpu);\r
66 num_blocks = 4;\r
67 block = psx_gpu->blocks;\r
68 }\r
69 \r
70 texture_block_ptr =\r
71 texture_page_ptr + (texture_offset_base & texture_mask);\r
72\r
a2cb152a 73 //load_128b(texels, texture_block_ptr);\r
74 texels = *(vec_8x16u *)texture_block_ptr;\r
05e2e0c6 75 \r
fc6cef7d 76 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
05e2e0c6
E
77 block->texels = texels_wide;\r
78 block->draw_mask_bits = mask_bits_a;\r
79 block->fb_ptr = fb_ptr; \r
80 block++;\r
81 \r
82 block->texels = texels_wide;\r
83 block->draw_mask_bits = mask_bits_a;\r
84 block->fb_ptr = fb_ptr + 1024; \r
85 block++;\r
86 \r
fc6cef7d 87 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);\r
05e2e0c6
E
88 block->texels = texels_wide;\r
89 block->draw_mask_bits = mask_bits_b;\r
90 block->fb_ptr = fb_ptr + 8;\r
91 block++;\r
92 \r
93 block->texels = texels_wide;\r
94 block->draw_mask_bits = mask_bits_b;\r
95 block->fb_ptr = fb_ptr + 8 + 1024; \r
96 block++; \r
97\r
98 texture_offset_base += 1024;\r
99 fb_ptr += 2048;\r
100\r
101 height--;\r
102 psx_gpu->num_blocks = num_blocks;\r
103 }\r
104 }\r
105 else\r
106 {\r
107 u32 texture_offset;\r
108 \r
109 u32 left_mask_bits_a = left_mask_bits & 0xFF;\r
110 u32 left_mask_bits_b = left_mask_bits >> 8;\r
111 u32 right_mask_bits_a = right_mask_bits & 0xFF;\r
112 u32 right_mask_bits_b = right_mask_bits >> 8;\r
113 \r
114 vec_8x16u texels;\r
115 vec_8x16u texels_wide; \r
116\r
117 while(height)\r
118 {\r
119 blocks_remaining = block_width - 2;\r
120 num_blocks += block_width * 4;\r
121 sprite_blocks += block_width * 4;\r
122\r
123 if(num_blocks > MAX_BLOCKS)\r
124 {\r
125 flush_render_block_buffer(psx_gpu);\r
fc6cef7d 126 num_blocks = block_width * 4;\r
05e2e0c6
E
127 block = psx_gpu->blocks;\r
128 }\r
129\r
130 texture_offset = texture_offset_base;\r
131 texture_offset_base += 1024;\r
132\r
133 texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);\r
134 \r
a2cb152a 135 //load_128b(texels, texture_block_ptr);\r
136 texels = *(vec_8x16u *)texture_block_ptr;\r
05e2e0c6 137\r
fc6cef7d 138 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
05e2e0c6
E
139 block->texels = texels_wide;\r
140 block->draw_mask_bits = left_mask_bits_a;\r
141 block->fb_ptr = fb_ptr;\r
142 block++;\r
143 \r
144 block->texels = texels_wide;\r
145 block->draw_mask_bits = left_mask_bits_a;\r
146 block->fb_ptr = fb_ptr + 1024;\r
147 block++; \r
148\r
fc6cef7d 149 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);\r
05e2e0c6
E
150 block->texels = texels_wide;\r
151 block->draw_mask_bits = left_mask_bits_b;\r
152 block->fb_ptr = fb_ptr + 8;\r
153 block++; \r
154 \r
155 block->texels = texels_wide;\r
156 block->draw_mask_bits = left_mask_bits_b;\r
157 block->fb_ptr = fb_ptr + 8 + 1024;\r
158 block++; \r
159 \r
160 texture_offset += 8;\r
161 fb_ptr += 16;\r
162\r
163 while(blocks_remaining)\r
164 {\r
165 texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);\r
a2cb152a 166 //load_128b(texels, texture_block_ptr);\r
167 texels = *(vec_8x16u *)texture_block_ptr;\r
05e2e0c6 168\r
fc6cef7d 169 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
05e2e0c6
E
170 block->texels = texels_wide;\r
171 block->draw_mask_bits = 0;\r
172 block->fb_ptr = fb_ptr;\r
173 block++;\r
174 \r
175 block->texels = texels_wide;\r
176 block->draw_mask_bits = 0;\r
177 block->fb_ptr = fb_ptr + 1024;\r
178 block++; \r
179\r
fc6cef7d 180 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);\r
05e2e0c6
E
181 block->texels = texels_wide;\r
182 block->draw_mask_bits = 0;\r
183 block->fb_ptr = fb_ptr + 8;\r
184 block++;\r
185 \r
186 block->texels = texels_wide;\r
187 block->draw_mask_bits = 0;\r
188 block->fb_ptr = fb_ptr + 8 + 1024;\r
189 block++;\r
190 \r
191 texture_offset += 8;\r
fc6cef7d 192 fb_ptr += 16;\r
05e2e0c6
E
193\r
194 blocks_remaining--;\r
195 }\r
196\r
197 texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);\r
a2cb152a 198 //load_128b(texels, texture_block_ptr);\r
199 texels = *(vec_8x16u *)texture_block_ptr;\r
05e2e0c6 200 \r
fc6cef7d 201 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
05e2e0c6
E
202 block->texels = texels_wide;\r
203 block->draw_mask_bits = right_mask_bits_a;\r
204 block->fb_ptr = fb_ptr;\r
205 block++;\r
206 \r
207 block->texels = texels_wide;\r
208 block->draw_mask_bits = right_mask_bits_a;\r
209 block->fb_ptr = fb_ptr + 1024;\r
210 block++; \r
211\r
fc6cef7d 212 zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);\r
05e2e0c6
E
213 block->texels = texels_wide;\r
214 block->draw_mask_bits = right_mask_bits_b;\r
215 block->fb_ptr = fb_ptr + 8;\r
216 block++;\r
217\r
218 block->texels = texels_wide;\r
219 block->draw_mask_bits = right_mask_bits_b;\r
220 block->fb_ptr = fb_ptr + 8 + 1024; \r
221 block++;\r
222\r
223 fb_ptr += fb_ptr_pitch;\r
224\r
225 height--;\r
226 psx_gpu->num_blocks = num_blocks;\r
227 }\r
228 }\r
229}\r
230\r
231#endif\r
232\r
fc6cef7d 233static void setup_sprite_untextured_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y,\r
234 s32 u, s32 v, s32 width, s32 height, u32 color)\r
235{\r
236 setup_sprite_untextured(psx_gpu, x, y, u, v, width * 2, height * 2, color);\r
237}\r
238\r
05e2e0c6
E
239#define setup_sprite_blocks_switch_textured_4x(texture_mode) \\r
240 setup_sprite_##texture_mode##_4x \\r
241\r
242#define setup_sprite_blocks_switch_untextured_4x(texture_mode) \\r
fc6cef7d 243 setup_sprite_untextured_4x \\r
05e2e0c6
E
244\r
245#define setup_sprite_blocks_switch_4x(texturing, texture_mode) \\r
246 setup_sprite_blocks_switch_##texturing##_4x(texture_mode) \\r
247\r
248 \r
249#define render_sprite_blocks_switch_block_modulation_4x(texture_mode, \\r
250 blend_mode, mask_evaluate, shading, dithering, texturing, blending, \\r
251 modulation) \\r
252{ \\r
253 setup_sprite_blocks_switch_4x(texturing, texture_mode), \\r
254 texture_sprite_blocks_switch_##texturing(texture_mode), \\r
255 shade_blocks_switch(unshaded, texturing, modulation, undithered, blending, \\r
256 mask_evaluate), \\r
257 blend_blocks_switch(texturing, blending, blend_mode, mask_evaluate) \\r
258} \\r
259\r
260#define render_sprite_blocks_switch_block_blending_4x(texture_mode, \\r
261 blend_mode, mask_evaluate, shading, dithering, texturing, blending) \\r
262 render_sprite_blocks_switch_block_modulation_4x(texture_mode, blend_mode, \\r
263 mask_evaluate, shading, dithering, texturing, blending, modulated), \\r
264 render_sprite_blocks_switch_block_modulation_4x(texture_mode, blend_mode, \\r
265 mask_evaluate, shading, dithering, texturing, blending, unmodulated) \\r
266\r
267#define render_sprite_blocks_switch_block_texturing_4x(texture_mode, \\r
268 blend_mode, mask_evaluate, shading, dithering, texturing) \\r
269 render_sprite_blocks_switch_block_blending_4x(texture_mode, blend_mode, \\r
270 mask_evaluate, shading, dithering, texturing, unblended), \\r
271 render_sprite_blocks_switch_block_blending_4x(texture_mode, blend_mode, \\r
272 mask_evaluate, shading, dithering, texturing, blended) \\r
273\r
274#define render_sprite_blocks_switch_block_dithering_4x(texture_mode, \\r
275 blend_mode, mask_evaluate, shading, dithering) \\r
276 render_sprite_blocks_switch_block_texturing_4x(texture_mode, blend_mode, \\r
277 mask_evaluate, shading, dithering, untextured), \\r
278 render_sprite_blocks_switch_block_texturing_4x(texture_mode, blend_mode, \\r
279 mask_evaluate, shading, dithering, textured) \\r
280\r
281#define render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \\r
282 mask_evaluate, shading) \\r
283 render_sprite_blocks_switch_block_dithering_4x(texture_mode, blend_mode, \\r
284 mask_evaluate, shading, undithered), \\r
285 render_sprite_blocks_switch_block_dithering_4x(texture_mode, blend_mode, \\r
286 mask_evaluate, shading, dithered) \\r
287\r
288#define render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, \\r
289 blend_mode, mask_evaluate) \\r
290 render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \\r
291 mask_evaluate, unshaded), \\r
292 render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \\r
293 mask_evaluate, shaded) \\r
294\r
295#define render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, \\r
296 blend_mode) \\r
297 render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, blend_mode, \\r
298 off), \\r
299 render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, blend_mode, \\r
300 on) \\r
301\r
302#define render_sprite_blocks_switch_block_texture_mode_4x(texture_mode) \\r
303 render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, average), \\r
304 render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, add), \\r
305 render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, subtract), \\r
306 render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, add_fourth) \\r
307\r
308#define render_sprite_blocks_switch_block_4x() \\r
309 render_sprite_blocks_switch_block_texture_mode_4x(4bpp), \\r
310 render_sprite_blocks_switch_block_texture_mode_4x(8bpp), \\r
311 render_sprite_blocks_switch_block_texture_mode_4x(16bpp), \\r
b7f5c059 312 render_sprite_blocks_switch_block_texture_mode_4x(16bpp) \\r
05e2e0c6
E
313\r
314\r
315render_block_handler_struct render_sprite_block_handlers_4x[] =\r
316{\r
317 render_sprite_blocks_switch_block_4x()\r
318};\r
319\r
320\r
321void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,\r
322 s32 width, s32 height, u32 flags, u32 color)\r
323{\r
05e2e0c6
E
324 s32 x_right = x + width - 1;\r
325 s32 y_bottom = y + height - 1;\r
326\r
327#ifdef PROFILE\r
328 sprites++;\r
329#endif\r
330\r
331 if(x < psx_gpu->viewport_start_x)\r
332 {\r
333 u32 clip = psx_gpu->viewport_start_x - x;\r
334 x += clip;\r
335 u += clip;\r
336 width -= clip;\r
337 }\r
338\r
339 if(y < psx_gpu->viewport_start_y)\r
340 {\r
341 s32 clip = psx_gpu->viewport_start_y - y;\r
342 y += clip;\r
343 v += clip;\r
344 height -= clip;\r
345 }\r
346\r
347 if(x_right > psx_gpu->viewport_end_x)\r
348 width -= x_right - psx_gpu->viewport_end_x;\r
349\r
350 if(y_bottom > psx_gpu->viewport_end_y)\r
351 height -= y_bottom - psx_gpu->viewport_end_y;\r
352\r
353 if((width <= 0) || (height <= 0))\r
354 return;\r
355\r
7956599f 356 psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x);\r
357\r
fc6cef7d 358 x *= 2;\r
359 y *= 2;\r
360\r
05e2e0c6
E
361#ifdef PROFILE\r
362 span_pixels += width * height;\r
363 spans += height;\r
364#endif\r
365\r
366 u32 render_state = flags &\r
367 (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND |\r
368 RENDER_FLAGS_TEXTURE_MAP);\r
369 render_state |=\r
370 (psx_gpu->render_state_base & ~RENDER_STATE_DITHER);\r
371\r
372 if((psx_gpu->render_state != render_state) ||\r
373 (psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE))\r
374 {\r
375 psx_gpu->render_state = render_state;\r
376 flush_render_block_buffer(psx_gpu);\r
377#ifdef PROFILE\r
378 state_changes++;\r
379#endif\r
380 }\r
381\r
382 psx_gpu->primitive_type = PRIMITIVE_TYPE_SPRITE;\r
383\r
384 color &= 0xFFFFFF;\r
385\r
386 if(psx_gpu->triangle_color != color)\r
387 {\r
388 flush_render_block_buffer(psx_gpu);\r
389 psx_gpu->triangle_color = color;\r
390 }\r
391\r
392 if(color == 0x808080)\r
393 render_state |= RENDER_FLAGS_MODULATE_TEXELS;\r
394\r
395 render_block_handler_struct *render_block_handler =\r
396 &(render_sprite_block_handlers_4x[render_state]);\r
397 psx_gpu->render_block_handler = render_block_handler;\r
398\r
399 ((setup_sprite_function_type *)render_block_handler->setup_blocks)\r
400 (psx_gpu, x, y, u, v, width, height, color);\r
401}\r
402\r