gpu_neon: don't crash on large primitives in enhancement mode
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / psx_gpu_4x.c
1 #define select_enhancement_buf_index(psx_gpu, x) \\r
2   ((psx_gpu)->enhancement_buf_by_x16[(u32)(x) / \\r
3     (1024u / sizeof((psx_gpu)->enhancement_buf_by_x16))])\r
4 \r
5 #define select_enhancement_buf_ptr(psx_gpu, x) \\r
6   ((psx_gpu)->enhancement_buf_ptr + \\r
7     (select_enhancement_buf_index(psx_gpu, x) << 20))\r
8 \r
9 #if !defined(NEON_BUILD) || defined(SIMD_BUILD)\r
10 \r
11 #ifndef zip_4x32b\r
12 \r
13 #define vector_cast(vec_to, source) source\r
14 \r
15 #define zip_4x32b(dest, source_a, source_b) {                                  \\r
16   u32 _i; for(_i = 0; _i < 4; _i++) {                                          \\r
17     (dest).e[_i * 2 + 0] = (source_a).e[_i];                                   \\r
18     (dest).e[_i * 2 + 1] = (source_b).e[_i];                                   \\r
19   }                                                                            \\r
20 }\r
21 \r
22 #endif\r
23 \r
24 void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,\r
25  s32 v, s32 width, s32 height, u32 color)\r
26 {\r
27   u32 left_offset = u & 0x7;\r
28   u32 width_rounded = width + left_offset + 7;\r
29 \r
30   u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (s32)(x - left_offset * 2);\r
31   u32 right_width = width_rounded & 0x7;\r
32   u32 block_width = width_rounded / 8;\r
33   u32 fb_ptr_pitch = (2048 + 16) - (block_width * 16);\r
34 \r
35   u32 left_mask_bits = ~(0xFFFF << (left_offset * 2));\r
36   u32 right_mask_bits = 0xFFFC << (right_width * 2);\r
37 \r
38   u32 texture_offset_base = u + (v * 1024);\r
39   u32 texture_mask =\r
40    psx_gpu->texture_mask_width | (psx_gpu->texture_mask_height * 1024);\r
41 \r
42   u32 blocks_remaining;\r
43   u32 num_blocks = psx_gpu->num_blocks;\r
44   block_struct *block = psx_gpu->blocks + num_blocks;\r
45 \r
46   u16 *texture_page_ptr = psx_gpu->texture_page_ptr;\r
47   u16 *texture_block_ptr;\r
48 \r
49   texture_offset_base &= ~0x7;\r
50 \r
51   sprites_16bpp++;\r
52 \r
53   if(block_width == 1)\r
54   {\r
55     u32 mask_bits = left_mask_bits | right_mask_bits;\r
56     u32 mask_bits_a = mask_bits & 0xFF;\r
57     u32 mask_bits_b = mask_bits >> 8;\r
58     \r
59     vec_8x16u texels;\r
60     vec_8x16u texels_wide;\r
61 \r
62     while(height)\r
63     {\r
64       num_blocks += 4;\r
65       sprite_blocks += 4;\r
66 \r
67       if(num_blocks > MAX_BLOCKS)\r
68       {\r
69         flush_render_block_buffer(psx_gpu);\r
70         num_blocks = 4;\r
71         block = psx_gpu->blocks;\r
72       }\r
73       \r
74       texture_block_ptr =\r
75        texture_page_ptr + (texture_offset_base & texture_mask);\r
76 \r
77       //load_128b(texels, texture_block_ptr);\r
78       texels = *(vec_8x16u *)texture_block_ptr;\r
79       \r
80       zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
81       block->texels = texels_wide;\r
82       block->draw_mask_bits = mask_bits_a;\r
83       block->fb_ptr = fb_ptr;          \r
84       block++;\r
85       \r
86       block->texels = texels_wide;\r
87       block->draw_mask_bits = mask_bits_a;\r
88       block->fb_ptr = fb_ptr + 1024;          \r
89       block++;\r
90       \r
91       zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);\r
92       block->texels = texels_wide;\r
93       block->draw_mask_bits = mask_bits_b;\r
94       block->fb_ptr = fb_ptr + 8;\r
95       block++;\r
96       \r
97       block->texels = texels_wide;\r
98       block->draw_mask_bits = mask_bits_b;\r
99       block->fb_ptr = fb_ptr + 8 + 1024;          \r
100       block++;      \r
101 \r
102       texture_offset_base += 1024;\r
103       fb_ptr += 2048;\r
104 \r
105       height--;\r
106       psx_gpu->num_blocks = num_blocks;\r
107     }\r
108   }\r
109   else\r
110   {\r
111     u32 texture_offset;\r
112     \r
113     u32 left_mask_bits_a = left_mask_bits & 0xFF;\r
114     u32 left_mask_bits_b = left_mask_bits >> 8;\r
115     u32 right_mask_bits_a = right_mask_bits & 0xFF;\r
116     u32 right_mask_bits_b = right_mask_bits >> 8;\r
117     \r
118     vec_8x16u texels;\r
119     vec_8x16u texels_wide;    \r
120 \r
121     while(height)\r
122     {\r
123       blocks_remaining = block_width - 2;\r
124       num_blocks += block_width * 4;\r
125       sprite_blocks += block_width * 4;\r
126 \r
127       if(num_blocks > MAX_BLOCKS)\r
128       {\r
129         flush_render_block_buffer(psx_gpu);\r
130         num_blocks = block_width * 4;\r
131         block = psx_gpu->blocks;\r
132       }\r
133 \r
134       texture_offset = texture_offset_base;\r
135       texture_offset_base += 1024;\r
136 \r
137       texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);\r
138       \r
139       //load_128b(texels, texture_block_ptr);\r
140       texels = *(vec_8x16u *)texture_block_ptr;\r
141 \r
142       zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
143       block->texels = texels_wide;\r
144       block->draw_mask_bits = left_mask_bits_a;\r
145       block->fb_ptr = fb_ptr;\r
146       block++;\r
147       \r
148       block->texels = texels_wide;\r
149       block->draw_mask_bits = left_mask_bits_a;\r
150       block->fb_ptr = fb_ptr + 1024;\r
151       block++;      \r
152 \r
153       zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);\r
154       block->texels = texels_wide;\r
155       block->draw_mask_bits = left_mask_bits_b;\r
156       block->fb_ptr = fb_ptr + 8;\r
157       block++;  \r
158       \r
159       block->texels = texels_wide;\r
160       block->draw_mask_bits = left_mask_bits_b;\r
161       block->fb_ptr = fb_ptr + 8 + 1024;\r
162       block++;  \r
163       \r
164       texture_offset += 8;\r
165       fb_ptr += 16;\r
166 \r
167       while(blocks_remaining)\r
168       {\r
169         texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);\r
170         //load_128b(texels, texture_block_ptr);\r
171         texels = *(vec_8x16u *)texture_block_ptr;\r
172 \r
173         zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
174         block->texels = texels_wide;\r
175         block->draw_mask_bits = 0;\r
176         block->fb_ptr = fb_ptr;\r
177         block++;\r
178         \r
179         block->texels = texels_wide;\r
180         block->draw_mask_bits = 0;\r
181         block->fb_ptr = fb_ptr + 1024;\r
182         block++;      \r
183 \r
184         zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);\r
185         block->texels = texels_wide;\r
186         block->draw_mask_bits = 0;\r
187         block->fb_ptr = fb_ptr + 8;\r
188         block++;\r
189         \r
190         block->texels = texels_wide;\r
191         block->draw_mask_bits = 0;\r
192         block->fb_ptr = fb_ptr + 8 + 1024;\r
193         block++;\r
194         \r
195         texture_offset += 8;\r
196         fb_ptr += 16;\r
197 \r
198         blocks_remaining--;\r
199       }\r
200 \r
201       texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);\r
202       //load_128b(texels, texture_block_ptr);\r
203       texels = *(vec_8x16u *)texture_block_ptr;\r
204       \r
205       zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
206       block->texels = texels_wide;\r
207       block->draw_mask_bits = right_mask_bits_a;\r
208       block->fb_ptr = fb_ptr;\r
209       block++;\r
210       \r
211       block->texels = texels_wide;\r
212       block->draw_mask_bits = right_mask_bits_a;\r
213       block->fb_ptr = fb_ptr + 1024;\r
214       block++;      \r
215 \r
216       zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);\r
217       block->texels = texels_wide;\r
218       block->draw_mask_bits = right_mask_bits_b;\r
219       block->fb_ptr = fb_ptr + 8;\r
220       block++;\r
221 \r
222       block->texels = texels_wide;\r
223       block->draw_mask_bits = right_mask_bits_b;\r
224       block->fb_ptr = fb_ptr + 8 + 1024;      \r
225       block++;\r
226 \r
227       fb_ptr += fb_ptr_pitch;\r
228 \r
229       height--;\r
230       psx_gpu->num_blocks = num_blocks;\r
231     }\r
232   }\r
233 }\r
234 \r
235 #endif\r
236 \r
237 static void setup_sprite_untextured_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y,\r
238  s32 u, s32 v, s32 width, s32 height, u32 color)\r
239 {\r
240   width *= 2;\r
241   height *= 2;\r
242   if (width > 1024)\r
243     width = 1024;\r
244   setup_sprite_untextured(psx_gpu, x, y, u, v, width, height, color);\r
245 }\r
246 \r
247 #define setup_sprite_blocks_switch_textured_4x(texture_mode)                   \\r
248   setup_sprite_##texture_mode##_4x                                             \\r
249 \r
250 #define setup_sprite_blocks_switch_untextured_4x(texture_mode)                 \\r
251   setup_sprite_untextured_4x                                                   \\r
252 \r
253 #define setup_sprite_blocks_switch_4x(texturing, texture_mode)                 \\r
254   setup_sprite_blocks_switch_##texturing##_4x(texture_mode)                    \\r
255 \r
256   \r
257 #define render_sprite_blocks_switch_block_modulation_4x(texture_mode,          \\r
258  blend_mode, mask_evaluate, shading, dithering, texturing, blending,           \\r
259  modulation)                                                                   \\r
260 {                                                                              \\r
261   setup_sprite_blocks_switch_4x(texturing, texture_mode),                      \\r
262   texture_sprite_blocks_switch_##texturing(texture_mode),                      \\r
263   shade_blocks_switch(unshaded, texturing, modulation, undithered, blending,   \\r
264    mask_evaluate),                                                             \\r
265   blend_blocks_switch(texturing, blending, blend_mode, mask_evaluate)          \\r
266 }                                                                              \\r
267 \r
268 #define render_sprite_blocks_switch_block_blending_4x(texture_mode,            \\r
269  blend_mode, mask_evaluate, shading, dithering, texturing, blending)           \\r
270   render_sprite_blocks_switch_block_modulation_4x(texture_mode, blend_mode,    \\r
271    mask_evaluate, shading, dithering, texturing, blending, modulated),         \\r
272   render_sprite_blocks_switch_block_modulation_4x(texture_mode, blend_mode,    \\r
273    mask_evaluate, shading, dithering, texturing, blending, unmodulated)        \\r
274 \r
275 #define render_sprite_blocks_switch_block_texturing_4x(texture_mode,           \\r
276  blend_mode, mask_evaluate, shading, dithering, texturing)                     \\r
277   render_sprite_blocks_switch_block_blending_4x(texture_mode, blend_mode,      \\r
278    mask_evaluate, shading, dithering, texturing, unblended),                   \\r
279   render_sprite_blocks_switch_block_blending_4x(texture_mode, blend_mode,      \\r
280    mask_evaluate, shading, dithering, texturing, blended)                      \\r
281 \r
282 #define render_sprite_blocks_switch_block_dithering_4x(texture_mode,           \\r
283  blend_mode, mask_evaluate, shading, dithering)                                \\r
284   render_sprite_blocks_switch_block_texturing_4x(texture_mode, blend_mode,     \\r
285    mask_evaluate, shading, dithering, untextured),                             \\r
286   render_sprite_blocks_switch_block_texturing_4x(texture_mode, blend_mode,     \\r
287    mask_evaluate, shading, dithering, textured)                                \\r
288 \r
289 #define render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \\r
290  mask_evaluate, shading)                                                       \\r
291   render_sprite_blocks_switch_block_dithering_4x(texture_mode, blend_mode,     \\r
292    mask_evaluate, shading, undithered),                                        \\r
293   render_sprite_blocks_switch_block_dithering_4x(texture_mode, blend_mode,     \\r
294    mask_evaluate, shading, dithered)                                           \\r
295 \r
296 #define render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode,       \\r
297  blend_mode, mask_evaluate)                                                    \\r
298   render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode,       \\r
299    mask_evaluate, unshaded),                                                   \\r
300   render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode,       \\r
301    mask_evaluate, shaded)                                                      \\r
302 \r
303 #define render_sprite_blocks_switch_block_blend_mode_4x(texture_mode,          \\r
304  blend_mode)                                                                   \\r
305   render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, blend_mode, \\r
306    off),                                                                       \\r
307   render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, blend_mode, \\r
308    on)                                                                         \\r
309 \r
310 #define render_sprite_blocks_switch_block_texture_mode_4x(texture_mode)        \\r
311   render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, average),      \\r
312   render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, add),          \\r
313   render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, subtract),     \\r
314   render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, add_fourth)    \\r
315 \r
316 #define render_sprite_blocks_switch_block_4x()                                 \\r
317   render_sprite_blocks_switch_block_texture_mode_4x(4bpp),                     \\r
318   render_sprite_blocks_switch_block_texture_mode_4x(8bpp),                     \\r
319   render_sprite_blocks_switch_block_texture_mode_4x(16bpp),                    \\r
320   render_sprite_blocks_switch_block_texture_mode_4x(16bpp)                     \\r
321 \r
322 \r
323 render_block_handler_struct render_sprite_block_handlers_4x[] =\r
324 {\r
325   render_sprite_blocks_switch_block_4x()\r
326 };\r
327 \r
328 \r
329 void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,\r
330  s32 width, s32 height, u32 flags, u32 color)\r
331 {\r
332   s32 x_right = x + width - 1;\r
333   s32 y_bottom = y + height - 1;\r
334 \r
335 #ifdef PROFILE\r
336   sprites++;\r
337 #endif\r
338 \r
339   if(x < psx_gpu->viewport_start_x)\r
340   {\r
341     u32 clip = psx_gpu->viewport_start_x - x;\r
342     x += clip;\r
343     u += clip;\r
344     width -= clip;\r
345   }\r
346 \r
347   if(y < psx_gpu->viewport_start_y)\r
348   {\r
349     s32 clip = psx_gpu->viewport_start_y - y;\r
350     y += clip;\r
351     v += clip;\r
352     height -= clip;\r
353   }\r
354 \r
355   if(x_right > psx_gpu->viewport_end_x)\r
356     width -= x_right - psx_gpu->viewport_end_x;\r
357 \r
358   if(y_bottom > psx_gpu->viewport_end_y)\r
359     height -= y_bottom - psx_gpu->viewport_end_y;\r
360 \r
361   if((width <= 0) || (height <= 0))\r
362     return;\r
363 \r
364   psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x);\r
365 \r
366   x *= 2;\r
367   y *= 2;\r
368 \r
369 #ifdef PROFILE\r
370   span_pixels += width * height;\r
371   spans += height;\r
372 #endif\r
373 \r
374   u32 render_state = flags &\r
375    (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND |\r
376    RENDER_FLAGS_TEXTURE_MAP);\r
377   render_state |=\r
378    (psx_gpu->render_state_base & ~RENDER_STATE_DITHER);\r
379 \r
380   if((psx_gpu->render_state != render_state) ||\r
381    (psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE))\r
382   {\r
383     psx_gpu->render_state = render_state;\r
384     flush_render_block_buffer(psx_gpu);\r
385 #ifdef PROFILE\r
386     state_changes++;\r
387 #endif\r
388   }\r
389 \r
390   psx_gpu->primitive_type = PRIMITIVE_TYPE_SPRITE;\r
391 \r
392   color &= 0xFFFFFF;\r
393 \r
394   if(psx_gpu->triangle_color != color)\r
395   {\r
396     flush_render_block_buffer(psx_gpu);\r
397     psx_gpu->triangle_color = color;\r
398   }\r
399 \r
400   if(color == 0x808080)\r
401     render_state |= RENDER_FLAGS_MODULATE_TEXELS;\r
402 \r
403   render_block_handler_struct *render_block_handler =\r
404    &(render_sprite_block_handlers_4x[render_state]);\r
405   psx_gpu->render_block_handler = render_block_handler;\r
406 \r
407   ((setup_sprite_function_type *)render_block_handler->setup_blocks)\r
408    (psx_gpu, x, y, u, v, width, height, color);\r
409 }\r
410 \r