psx_gpu: more limit checking
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / psx_gpu.c
CommitLineData
75e28f62
E
1/*
2 * Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of
7 * the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
14
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18
19#include "common.h"
20
21u32 span_pixels = 0;
22u32 span_pixel_blocks = 0;
75e28f62
E
23u32 spans = 0;
24u32 triangles = 0;
25u32 sprites = 0;
26u32 sprites_4bpp = 0;
27u32 sprites_8bpp = 0;
28u32 sprites_16bpp = 0;
29u32 sprite_blocks = 0;
30u32 sprites_untextured = 0;
31u32 lines = 0;
32u32 trivial_rejects = 0;
33u32 texels_4bpp = 0;
34u32 texels_8bpp = 0;
35u32 texels_16bpp = 0;
36u32 texel_blocks_4bpp = 0;
37u32 texel_blocks_8bpp = 0;
38u32 texel_blocks_16bpp = 0;
39u32 texel_blocks_untextured = 0;
40u32 blend_blocks = 0;
75e28f62
E
41u32 render_buffer_flushes = 0;
42u32 state_changes = 0;
43u32 left_split_triangles = 0;
44u32 flat_triangles = 0;
45u32 clipped_triangles = 0;
46u32 zero_block_spans = 0;
47u32 texture_cache_loads = 0;
3867c6ef 48u32 false_modulated_blocks = 0;
75e28f62
E
49
50u32 reciprocal_table[512];
51
52
53typedef s32 fixed_type;
54
55#define EDGE_STEP_BITS 32
56#define FIXED_BITS 12
57
58#define fixed_center(value) \
59 ((((fixed_type)(value)) << FIXED_BITS) + (1 << (FIXED_BITS - 1))) \
60
61#define int_to_fixed(value) \
62 (((fixed_type)(value)) << FIXED_BITS) \
63
64#define fixed_to_int(value) \
65 ((value) >> FIXED_BITS) \
66
67#define fixed_to_double(value) \
68 ((value) / (double)(1 << FIXED_BITS)) \
69
70#define double_to_fixed(value) \
71 (fixed_type)(((value) * (double)(1 << FIXED_BITS))) \
72
73typedef void (setup_blocks_function_type)(psx_gpu_struct *psx_gpu);
74typedef void (texture_blocks_function_type)(psx_gpu_struct *psx_gpu);
75typedef void (shade_blocks_function_type)(psx_gpu_struct *psx_gpu);
76typedef void (blend_blocks_function_type)(psx_gpu_struct *psx_gpu);
77
78typedef void (setup_sprite_function_type)(psx_gpu_struct *psx_gpu, s32 x,
79 s32 y, s32 u, s32 v, s32 width, s32 height, u32 color);
80
81struct render_block_handler_struct
82{
83 void *setup_blocks;
84 texture_blocks_function_type *texture_blocks;
85 shade_blocks_function_type *shade_blocks;
86 blend_blocks_function_type *blend_blocks;
87};
88
2bbbb7af 89#ifndef NEON_BUILD
75e28f62
E
90
91u32 fixed_reciprocal(u32 denominator, u32 *_shift)
92{
93 u32 shift = __builtin_clz(denominator);
94 u32 denominator_normalized = denominator << shift;
95
96 double numerator = (1ULL << 62) + denominator_normalized;
97 double numerator_b;
98
99 double denominator_normalized_dp_b;
100 u64 denominator_normalized_dp_u64;
101
102 u32 reciprocal;
103 double reciprocal_dp;
104
105 u64 numerator_u64 = (denominator_normalized >> 10) |
106 ((u64)(62 + 1023) << 52);
107 *((u64 *)(&numerator_b)) = numerator_u64;
108
109 denominator_normalized_dp_u64 =
110 (u64)(denominator_normalized << 21) |
111 ((u64)((denominator_normalized >> 11) + ((1022 + 31) << 20)) << 32);
112 *((u64 *)(&denominator_normalized_dp_b)) = denominator_normalized_dp_u64;
113
114 // Implement with a DP divide
115 reciprocal_dp = numerator / denominator_normalized_dp_b;
116 reciprocal = reciprocal_dp;
117
118 if(reciprocal == 0x80000001)
119 reciprocal = 0x80000000;
120
121 *_shift = 62 - shift;
122 return reciprocal;
123}
124
125double reciprocal_estimate(double a)
126{
127 int q, s;
128 double r;
129
130 q = (int)(a * 512.0);
131 /* a in units of 1/512 rounded down */
132 r = 1.0 / (((double)q + 0.5) / 512.0); /* reciprocal r */
133 s = (int)(256.0 * r + 0.5);
134
135 /* r in units of 1/256 rounded to nearest */
136
137 return (double)s / 256.0;
138}
139
140u32 reciprocal_estimate_u32(u32 value)
141{
142 u64 dp_value_u64;
143 volatile double dp_value;
144 volatile u64 *dp_value_ptr = (volatile u64 *)&dp_value;
145
146 if((value >> 31) == 0)
147 return 0xFFFFFFFF;
148
149 dp_value_u64 = (0x3FEULL << (31 + 21)) | ((u64)(value & 0x7FFFFFFF) << 21);
150
151 *dp_value_ptr = dp_value_u64;
152
153 dp_value = reciprocal_estimate(dp_value);
154 dp_value_u64 = *dp_value_ptr;
155
156 return (0x80000000 | ((dp_value_u64 >> 21) & 0x7FFFFFFF));
157}
158
159u32 fixed_reciprocal_nr(u32 value, u32 *_shift)
160{
161 u32 shift = __builtin_clz(value);
162 u32 value_normalized = value << shift;
163
164 *_shift = 62 - shift;
165
166 value_normalized -= 2;
167
168 u32 reciprocal_normalized = reciprocal_estimate_u32(value_normalized) >> 1;
169
170 u32 temp = -(((u64)value_normalized * (u32)reciprocal_normalized) >> 31);
171 reciprocal_normalized = (((u64)reciprocal_normalized * temp) >> 31);
172 temp = -(((u64)value_normalized * (u32)reciprocal_normalized) >> 31);
173 reciprocal_normalized = (((u64)reciprocal_normalized * temp) >> 31);
174 temp = -(((u64)value_normalized * (u32)reciprocal_normalized) >> 31);
175 reciprocal_normalized = (((u64)reciprocal_normalized * temp) >> 31);
176
177 return reciprocal_normalized;
178}
179
180#endif
181
182
183s32 triangle_signed_area_x2(s32 x0, s32 y0, s32 x1, s32 y1, s32 x2, s32 y2)
184{
185 return ((x1 - x0) * (y2 - y1)) - ((x2 - x1) * (y1 - y0));
186}
187
188u32 texture_region_mask(s32 x1, s32 y1, s32 x2, s32 y2)
189{
190 s32 coverage_x, coverage_y;
191
192 u32 mask_up_left;
193 u32 mask_down_right;
194
195 coverage_x = x2 >> 6;
196 coverage_y = y2 >> 8;
197
198 if(coverage_x < 0)
199 coverage_x = 0;
200
201 if(coverage_x > 31)
202 coverage_x = 31;
203
204 mask_down_right = ~(0xFFFFFFFF << (coverage_x + 1)) & 0xFFFF;
205
206 if(coverage_y >= 1)
207 mask_down_right |= mask_down_right << 16;
208
209 coverage_x = x1 >> 6;
210
211 mask_up_left = 0xFFFF0000 << coverage_x;
212 if(coverage_x < 0)
213 mask_up_left = 0xFFFF0000;
214
215 coverage_y = y1 >> 8;
216 if(coverage_y <= 0)
217 mask_up_left |= mask_up_left >> 16;
218
219 return mask_up_left & mask_down_right;
220}
221
222u32 invalidate_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1,
223 u32 x2, u32 y2)
224{
225 u32 mask = texture_region_mask(x1, y1, x2, y2);
226
227 psx_gpu->dirty_textures_4bpp_mask |= mask;
228 psx_gpu->dirty_textures_8bpp_mask |= mask;
229 psx_gpu->dirty_textures_8bpp_alternate_mask |= mask;
230
231 return mask;
232}
233
234u32 invalidate_texture_cache_region_viewport(psx_gpu_struct *psx_gpu, u32 x1,
235 u32 y1, u32 x2, u32 y2)
236{
237 u32 mask = texture_region_mask(x1, y1, x2, y2) &
238 psx_gpu->viewport_mask;
3867c6ef 239
75e28f62
E
240 psx_gpu->dirty_textures_4bpp_mask |= mask;
241 psx_gpu->dirty_textures_8bpp_mask |= mask;
242 psx_gpu->dirty_textures_8bpp_alternate_mask |= mask;
243
244 return mask;
245}
246
05740673 247void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1,
248 u32 x2, u32 y2)
249{
250 u32 mask = texture_region_mask(x1, y1, x2, y2);
251 u32 texture_page;
252 u8 *texture_page_ptr;
253 u16 *vram_ptr;
254 u32 texel_block;
255 u32 sub_x, sub_y;
256
257 psx_gpu->dirty_textures_8bpp_mask |= mask;
258 psx_gpu->dirty_textures_8bpp_alternate_mask |= mask;
259
260 if ((psx_gpu->dirty_textures_4bpp_mask & mask) == 0 &&
261 (x1 & 3) == 0 && (y1 & 15) == 0 && x2 - x1 < 4 && y2 - y1 < 16)
262 {
263 texture_page = ((x1 / 64) & 15) + (y1 / 256) * 16;
264 texture_page_ptr = psx_gpu->texture_4bpp_cache[texture_page];
265 texture_page_ptr += (x1 / 4 & 15) * 16*16 + (y1 / 16 & 15) * 16*16*16;
266 vram_ptr = psx_gpu->vram_ptr + x1 + y1 * 1024;
267 sub_x = 4;
268 sub_y = 16;
269
270 while(sub_y)
271 {
272 while(sub_x)
273 {
274 texel_block = *vram_ptr;
275
276 texture_page_ptr[0] = texel_block & 0xF;
277 texture_page_ptr[1] = (texel_block >> 4) & 0xF;
278 texture_page_ptr[2] = (texel_block >> 8) & 0xF;
279 texture_page_ptr[3] = texel_block >> 12;
280
281 vram_ptr++;
282 texture_page_ptr += 4;
283
284 sub_x--;
285 }
286
287 vram_ptr -= 4;
288 sub_x = 4;
289
290 sub_y--;
291 vram_ptr += 1024;
292 }
293 }
294 else
295 {
296 psx_gpu->dirty_textures_4bpp_mask |= mask;
297 }
298}
75e28f62
E
299
300void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu,
301 u32 texture_page);
302
2bbbb7af 303#ifndef NEON_BUILD
75e28f62
E
304
305void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu)
306{
307 u32 current_texture_page = psx_gpu->current_texture_page;
3867c6ef 308 u8 *texture_page_ptr = psx_gpu->texture_page_base;
75e28f62
E
309 u16 *vram_ptr = psx_gpu->vram_ptr;
310
311 u32 texel_block;
312 u32 tile_x, tile_y;
313 u32 sub_x, sub_y;
314
315 vram_ptr += (current_texture_page >> 4) * 256 * 1024;
316 vram_ptr += (current_texture_page & 0xF) * 64;
317
318 texture_cache_loads++;
319
320 tile_y = 16;
321 tile_x = 16;
322 sub_x = 4;
323 sub_y = 16;
324
325 psx_gpu->dirty_textures_4bpp_mask &= ~(psx_gpu->current_texture_mask);
326
327 while(tile_y)
328 {
329 while(tile_x)
330 {
331 while(sub_y)
332 {
333 while(sub_x)
334 {
335 texel_block = *vram_ptr;
b7ed0632 336
75e28f62
E
337 texture_page_ptr[0] = texel_block & 0xF;
338 texture_page_ptr[1] = (texel_block >> 4) & 0xF;
339 texture_page_ptr[2] = (texel_block >> 8) & 0xF;
340 texture_page_ptr[3] = texel_block >> 12;
341
342 vram_ptr++;
343 texture_page_ptr += 4;
344
345 sub_x--;
346 }
347
348 vram_ptr -= 4;
349 sub_x = 4;
350
351 sub_y--;
352 vram_ptr += 1024;
353 }
354
355 sub_y = 16;
356
357 vram_ptr -= (1024 * 16) - 4;
358 tile_x--;
359 }
360
361 tile_x = 16;
362
363 vram_ptr += (16 * 1024) - (4 * 16);
364 tile_y--;
365 }
366}
367
368void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu,
369 u32 texture_page)
370{
3867c6ef 371 u16 *texture_page_ptr = psx_gpu->texture_page_base;
75e28f62
E
372 u16 *vram_ptr = psx_gpu->vram_ptr;
373
374 u32 tile_x, tile_y;
375 u32 sub_y;
376
377 vec_8x16u texels;
378
379 texture_cache_loads++;
380
381 vram_ptr += (texture_page >> 4) * 256 * 1024;
382 vram_ptr += (texture_page & 0xF) * 64;
383
384 if((texture_page ^ psx_gpu->current_texture_page) & 0x1)
385 texture_page_ptr += (8 * 16) * 8;
386
387 tile_x = 8;
388 tile_y = 16;
389
390 sub_y = 16;
391
392 while(tile_y)
393 {
394 while(tile_x)
395 {
396 while(sub_y)
397 {
398 load_128b(texels, vram_ptr);
399 store_128b(texels, texture_page_ptr);
400
401 texture_page_ptr += 8;
402 vram_ptr += 1024;
403
404 sub_y--;
405 }
406
407 sub_y = 16;
408
409 vram_ptr -= (1024 * 16);
410 vram_ptr += 8;
411
412 tile_x--;
413 }
414
415 tile_x = 8;
416
417 vram_ptr -= (8 * 8);
418 vram_ptr += (16 * 1024);
419
420 texture_page_ptr += (8 * 16) * 8;
421 tile_y--;
422 }
423}
424
425#endif
426
427
428void update_texture_8bpp_cache(psx_gpu_struct *psx_gpu)
429{
430 u32 current_texture_page = psx_gpu->current_texture_page;
431 u32 update_textures =
432 psx_gpu->dirty_textures_8bpp_mask & psx_gpu->current_texture_mask;
433
434 psx_gpu->dirty_textures_8bpp_mask &= ~update_textures;
435
436 if(update_textures & (1 << current_texture_page))
437 {
438 update_texture_8bpp_cache_slice(psx_gpu, current_texture_page);
439 update_textures &= ~(1 << current_texture_page);
440 }
441
442 if(update_textures)
443 {
444 u32 adjacent_texture_page = ((current_texture_page + 1) & 0xF) |
445 (current_texture_page & 0x10);
446
447 update_texture_8bpp_cache_slice(psx_gpu, adjacent_texture_page);
448 }
449}
450
451void setup_blocks_shaded_untextured_undithered_unswizzled_indirect(
452 psx_gpu_struct *psx_gpu);
453
454void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
455{
69b09c0d
E
456 if((psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED) &&
457 (psx_gpu->primitive_type == PRIMITIVE_TYPE_SPRITE))
458 {
459 u32 num_blocks_dest = 0;
460 block_struct *block_src = psx_gpu->blocks;
461 block_struct *block_dest = psx_gpu->blocks;
462
463 u16 *vram_ptr = psx_gpu->vram_ptr;
464 u32 i;
465
466 if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD)
467 {
468 for(i = 0; i < psx_gpu->num_blocks; i++)
469 {
470 u32 fb_offset = (u32)((u8 *)block_src->fb_ptr - (u8 *)vram_ptr);
471 if(fb_offset & (1 << 11))
472 {
473 *block_dest = *block_src;
474 num_blocks_dest++;
475 block_dest++;
476 }
477 block_src++;
478 }
479 }
480 else
481 {
482 for(i = 0; i < psx_gpu->num_blocks; i++)
483 {
484 u32 fb_offset = (u32)((u8 *)block_src->fb_ptr - (u8 *)vram_ptr);
485 if((fb_offset & (1 << 11)) == 0)
486 {
487 *block_dest = *block_src;
488 num_blocks_dest++;
489 block_dest++;
490 }
491 block_src++;
492 }
493 }
494
495 psx_gpu->num_blocks = num_blocks_dest;
496 }
497
75e28f62
E
498 if(psx_gpu->num_blocks)
499 {
500 render_block_handler_struct *render_block_handler =
501 psx_gpu->render_block_handler;
502
503 render_block_handler->texture_blocks(psx_gpu);
504 render_block_handler->shade_blocks(psx_gpu);
505 render_block_handler->blend_blocks(psx_gpu);
506
3867c6ef 507#ifdef PROFILE
75e28f62
E
508 span_pixel_blocks += psx_gpu->num_blocks;
509 render_buffer_flushes++;
3867c6ef 510#endif
75e28f62
E
511
512 psx_gpu->num_blocks = 0;
513 }
514}
515
516
517void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
518 vertex_struct *b, vertex_struct *c);
519
2bbbb7af 520#ifndef NEON_BUILD
75e28f62
E
521
522#define setup_gradient_calculation_input(set, vertex) \
523 /* First type is: uvrg bxxx xxxx */\
524 /* Second type is: yyyy ybyy uvrg */\
525 /* Since x_a and y_c are the same the same variable is used for both. */\
526 x##set##_a_y##set##_c.e[0] = vertex->u; \
527 x##set##_a_y##set##_c.e[1] = vertex->v; \
528 x##set##_a_y##set##_c.e[2] = vertex->r; \
529 x##set##_a_y##set##_c.e[3] = vertex->g; \
530 dup_4x16b(x##set##_b, vertex->x); \
531 dup_4x16b(x##set##_c, vertex->x); \
532 dup_4x16b(y##set##_a, vertex->y); \
533 dup_4x16b(y##set##_b, vertex->y); \
534 x##set##_b.e[0] = vertex->b; \
535 y##set##_b.e[1] = vertex->b \
536
537
538void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
539 vertex_struct *b, vertex_struct *c)
540{
541 u32 triangle_area = psx_gpu->triangle_area;
542 u32 winding_mask_scalar;
543
544 u32 triangle_area_shift;
545 u64 triangle_area_reciprocal =
546 fixed_reciprocal(triangle_area, &triangle_area_shift);
547 triangle_area_shift = -(triangle_area_shift - FIXED_BITS);
548
549 // ((x1 - x0) * (y2 - y1)) - ((x2 - x1) * (y1 - y0)) =
550 // ( d0 * d1 ) - ( d2 * d3 ) =
551 // ( m0 ) - ( m1 ) = gradient
552
553 // This is split to do 12 elements at a time over three sets: a, b, and c.
554 // Technically we only need to do 10 elements (uvrgb_x and uvrgb_y), so
555 // two of the slots are unused.
556
557 // Inputs are all 16-bit signed. The m0/m1 results are 32-bit signed, as
558 // is g.
559
560 vec_4x16s x0_a_y0_c, x0_b, x0_c;
561 vec_4x16s y0_a, y0_b;
562 vec_4x16s x1_a_y1_c, x1_b, x1_c;
563 vec_4x16s y1_a, y1_b;
564 vec_4x16s x2_a_y2_c, x2_b, x2_c;
565 vec_4x16s y2_a, y2_b;
566
567 vec_4x32u uvrg_base;
568 vec_4x32u b_base;
569 vec_4x32u const_0x8000;
570
571 vec_4x16s d0_a_d3_c, d0_b, d0_c;
572 vec_4x16s d1_a, d1_b, d1_c_d2_a;
573 vec_4x16s d2_b, d2_c;
574 vec_4x16s d3_a, d3_b;
575
576 vec_4x32s m0_a, m0_b, m0_c;
577 vec_4x32s m1_a, m1_b, m1_c;
578
579 vec_4x32u gradient_area_a, gradient_area_c;
580 vec_2x32u gradient_area_b;
581
582 vec_4x32u gradient_area_sign_a, gradient_area_sign_c;
583 vec_2x32u gradient_area_sign_b;
584 vec_4x32u winding_mask;
585
586 vec_2x64u gradient_wide_a0, gradient_wide_a1;
587 vec_2x64u gradient_wide_c0, gradient_wide_c1;
588 vec_2x64u gradient_wide_b;
589
590 vec_4x32u gradient_a, gradient_c;
591 vec_2x32u gradient_b;
592 vec_16x8s gradient_shift;
593
594 setup_gradient_calculation_input(0, a);
595 setup_gradient_calculation_input(1, b);
596 setup_gradient_calculation_input(2, c);
597
598 dup_4x32b(const_0x8000, 0x8000);
599 shl_long_4x16b(uvrg_base, x0_a_y0_c, 16);
600 shl_long_4x16b(b_base, x0_b, 16);
601
602 add_4x32b(uvrg_base, uvrg_base, const_0x8000);
603 add_4x32b(b_base, b_base, const_0x8000);
604
605 // Can probably pair these, but it'll require careful register allocation
606 sub_4x16b(d0_a_d3_c, x1_a_y1_c, x0_a_y0_c);
607 sub_4x16b(d1_c_d2_a, x2_a_y2_c, x1_a_y1_c);
608
609 sub_4x16b(d0_b, x1_b, x0_b);
610 sub_4x16b(d0_c, x1_c, x0_c);
611
612 sub_4x16b(d1_a, y2_a, y1_a);
613 sub_4x16b(d1_b, y2_b, y1_b);
614
615 sub_4x16b(d2_b, x2_b, x1_b);
616 sub_4x16b(d2_c, x2_c, x1_c);
617
618 sub_4x16b(d3_a, y1_a, y0_a);
619 sub_4x16b(d3_b, y1_b, y0_b);
620
621 mul_long_4x16b(m0_a, d0_a_d3_c, d1_a);
622 mul_long_4x16b(m0_b, d0_b, d1_b);
623 mul_long_4x16b(m0_c, d0_c, d1_c_d2_a);
624
625 mul_long_4x16b(m1_a, d1_c_d2_a, d3_a);
626 mul_long_4x16b(m1_b, d2_b, d3_b);
627 mul_long_4x16b(m1_c, d2_c, d0_a_d3_c);
628
629 sub_4x32b(gradient_area_a, m0_a, m1_a);
630 sub_2x32b(gradient_area_b, m0_b.low, m1_b.low);
631 sub_4x32b(gradient_area_c, m0_c, m1_c);
632
633 cmpltz_4x32b(gradient_area_sign_a, gradient_area_a);
634 cmpltz_2x32b(gradient_area_sign_b, gradient_area_b);
635 cmpltz_4x32b(gradient_area_sign_c, gradient_area_c);
636
637 abs_4x32b(gradient_area_a, gradient_area_a);
638 abs_2x32b(gradient_area_b, gradient_area_b);
639 abs_4x32b(gradient_area_c, gradient_area_c);
640
641 winding_mask_scalar = -psx_gpu->triangle_winding;
642
643 dup_4x32b(winding_mask, winding_mask_scalar);
644 eor_4x32b(gradient_area_sign_a, gradient_area_sign_a, winding_mask);
645 eor_2x32b(gradient_area_sign_b, gradient_area_sign_b, winding_mask);
646 eor_4x32b(gradient_area_sign_c, gradient_area_sign_c, winding_mask);
647
648 mul_scalar_long_2x32b(gradient_wide_a0,
649 vector_cast(vec_2x32s, gradient_area_a.low),
650 (s64)triangle_area_reciprocal);
651 mul_scalar_long_2x32b(gradient_wide_a1,
652 vector_cast(vec_2x32s, gradient_area_a.high),
653 (s64)triangle_area_reciprocal);
654 mul_scalar_long_2x32b(gradient_wide_b,
655 vector_cast(vec_2x32s, gradient_area_b),
656 (s64)triangle_area_reciprocal);
657 mul_scalar_long_2x32b(gradient_wide_c0,
658 vector_cast(vec_2x32s, gradient_area_c.low),
659 (s64)triangle_area_reciprocal);
660 mul_scalar_long_2x32b(gradient_wide_c1,
661 vector_cast(vec_2x32s, gradient_area_c.high),
662 (s64)triangle_area_reciprocal);
663
664 dup_16x8b(gradient_shift, triangle_area_shift);
665 shl_reg_2x64b(gradient_wide_a0, gradient_wide_a0,
666 vector_cast(vec_2x64u, gradient_shift));
667 shl_reg_2x64b(gradient_wide_a1, gradient_wide_a1,
668 vector_cast(vec_2x64u, gradient_shift));
669 shl_reg_2x64b(gradient_wide_b, gradient_wide_b,
670 vector_cast(vec_2x64u, gradient_shift));
671 shl_reg_2x64b(gradient_wide_c0, gradient_wide_c0,
672 vector_cast(vec_2x64u, gradient_shift));
673 shl_reg_2x64b(gradient_wide_c1, gradient_wide_c1,
674 vector_cast(vec_2x64u, gradient_shift));
675
676 mov_narrow_2x64b(gradient_a.low, gradient_wide_a0);
677 mov_narrow_2x64b(gradient_a.high, gradient_wide_a1);
678 mov_narrow_2x64b(gradient_b, gradient_wide_b);
679 mov_narrow_2x64b(gradient_c.low, gradient_wide_c0);
680 mov_narrow_2x64b(gradient_c.high, gradient_wide_c1);
681
682 shl_4x32b(gradient_a, gradient_a, 4);
683 shl_2x32b(gradient_b, gradient_b, 4);
684 shl_4x32b(gradient_c, gradient_c, 4);
685
686 eor_4x32b(gradient_a, gradient_a, gradient_area_sign_a);
687 eor_2x32b(gradient_b, gradient_b, gradient_area_sign_b);
688 eor_4x32b(gradient_c, gradient_c, gradient_area_sign_c);
689
690 sub_4x32b(gradient_a, gradient_a, gradient_area_sign_a);
691 sub_2x32b(gradient_b, gradient_b, gradient_area_sign_b);
692 sub_4x32b(gradient_c, gradient_c, gradient_area_sign_c);
693
694 u32 left_adjust = a->x;
695 mls_scalar_4x32b(uvrg_base, gradient_a, left_adjust);
696 mls_scalar_2x32b(b_base.low, gradient_b, left_adjust);
697
698 vec_4x32u uvrg_dx2;
699 vec_2x32u b_dx2;
700
701 vec_4x32u uvrg_dx3;
702 vec_2x32u b_dx3;
703
704 vec_4x32u zero;
705
706 eor_4x32b(zero, zero, zero);
707 add_4x32b(uvrg_dx2, gradient_a, gradient_a);
708 add_2x32b(b_dx2, gradient_b, gradient_b);
709 add_4x32b(uvrg_dx3, gradient_a, uvrg_dx2);
710 add_2x32b(b_dx3, gradient_b, b_dx2);
711
712 // Can be done with vst4, assuming that the zero, dx, dx2, and dx3 are
713 // lined up properly
714 psx_gpu->u_block_span.e[0] = zero.e[0];
715 psx_gpu->u_block_span.e[1] = gradient_a.e[0];
716 psx_gpu->u_block_span.e[2] = uvrg_dx2.e[0];
717 psx_gpu->u_block_span.e[3] = uvrg_dx3.e[0];
718
719 psx_gpu->v_block_span.e[0] = zero.e[1];
720 psx_gpu->v_block_span.e[1] = gradient_a.e[1];
721 psx_gpu->v_block_span.e[2] = uvrg_dx2.e[1];
722 psx_gpu->v_block_span.e[3] = uvrg_dx3.e[1];
723
724 psx_gpu->r_block_span.e[0] = zero.e[2];
725 psx_gpu->r_block_span.e[1] = gradient_a.e[2];
726 psx_gpu->r_block_span.e[2] = uvrg_dx2.e[2];
727 psx_gpu->r_block_span.e[3] = uvrg_dx3.e[2];
728
729 psx_gpu->g_block_span.e[0] = zero.e[3];
730 psx_gpu->g_block_span.e[1] = gradient_a.e[3];
731 psx_gpu->g_block_span.e[2] = uvrg_dx2.e[3];
732 psx_gpu->g_block_span.e[3] = uvrg_dx3.e[3];
733
734 psx_gpu->b_block_span.e[0] = zero.e[0];
735 psx_gpu->b_block_span.e[1] = gradient_b.e[0];
736 psx_gpu->b_block_span.e[2] = b_dx2.e[0];
737 psx_gpu->b_block_span.e[3] = b_dx3.e[0];
738
739 psx_gpu->uvrg = uvrg_base;
740 psx_gpu->b = b_base.e[0];
741
742 psx_gpu->uvrg_dx = gradient_a;
743 psx_gpu->uvrg_dy = gradient_c;
744 psx_gpu->b_dy = gradient_b.e[1];
745}
746#endif
747
748#define vector_check(_a, _b) \
749 if(memcmp(&_a, &_b, sizeof(_b))) \
750 { \
751 if(sizeof(_b) == 8) \
752 { \
753 printf("mismatch on %s vs %s: (%x %x) vs (%x %x)\n", \
754 #_a, #_b, _a.e[0], _a.e[1], _b.e[0], _b.e[1]); \
755 } \
756 else \
757 { \
758 printf("mismatch on %s vs %s: (%x %x %x %x) vs (%x %x %x %x)\n", \
759 #_a, #_b, _a.e[0], _a.e[1], _a.e[2], _a.e[3], _b.e[0], _b.e[1], \
760 _b.e[2], _b.e[3]); \
761 } \
762 } \
763
764#define scalar_check(_a, _b) \
765 if(_a != _b) \
766 printf("mismatch on %s %s: %x vs %x\n", #_a, #_b, _a, _b) \
767
768
769#define setup_spans_prologue_alternate_yes() \
770 vec_2x64s alternate_x; \
771 vec_2x64s alternate_dx_dy; \
772 vec_4x32s alternate_x_32; \
773 vec_2x32s alternate_x_16; \
774 \
775 vec_4x16u alternate_select; \
776 vec_4x16s y_mid_point; \
777 \
778 s32 y_b = v_b->y; \
779 s64 edge_alt; \
780 s32 edge_dx_dy_alt; \
781 u32 edge_shift_alt \
782
783#define setup_spans_prologue_alternate_no() \
784
785#define setup_spans_prologue(alternate_active) \
786 edge_data_struct *span_edge_data; \
787 vec_4x32u *span_uvrg_offset; \
788 u32 *span_b_offset; \
789 \
790 s32 clip; \
791 \
792 vec_2x64s edges_xy; \
793 vec_2x32s edges_dx_dy; \
794 vec_2x32u edge_shifts; \
795 \
796 vec_2x64s left_x, right_x; \
797 vec_2x64s left_dx_dy, right_dx_dy; \
798 vec_4x32s left_x_32, right_x_32; \
799 vec_8x16s left_right_x_16; \
800 vec_4x16s y_x4; \
801 vec_8x16s left_edge; \
802 vec_8x16s right_edge; \
803 vec_4x16u span_shift; \
804 \
805 vec_2x32u c_0x01; \
806 vec_4x16u c_0x04; \
807 vec_4x16u c_0xFFFE; \
808 vec_4x16u c_0x07; \
809 \
810 vec_2x32s x_starts; \
811 vec_2x32s x_ends; \
812 \
813 s32 x_a = v_a->x; \
814 s32 x_b = v_b->x; \
815 s32 x_c = v_c->x; \
816 s32 y_a = v_a->y; \
817 s32 y_c = v_c->y; \
818 \
819 vec_4x32u uvrg = psx_gpu->uvrg; \
820 vec_4x32u uvrg_dy = psx_gpu->uvrg_dy; \
821 u32 b = psx_gpu->b; \
822 u32 b_dy = psx_gpu->b_dy; \
823 \
824 dup_2x32b(c_0x01, 0x01); \
825 setup_spans_prologue_alternate_##alternate_active() \
826
827#define setup_spans_prologue_b() \
828 span_edge_data = psx_gpu->span_edge_data; \
829 span_uvrg_offset = psx_gpu->span_uvrg_offset; \
830 span_b_offset = psx_gpu->span_b_offset; \
831 \
832 vec_8x16u c_0x0001; \
833 \
834 dup_8x16b(c_0x0001, 0x0001); \
835 dup_8x16b(left_edge, psx_gpu->viewport_start_x); \
836 dup_8x16b(right_edge, psx_gpu->viewport_end_x); \
837 add_8x16b(right_edge, right_edge, c_0x0001); \
838 dup_4x16b(c_0x04, 0x04); \
839 dup_4x16b(c_0x07, 0x07); \
840 dup_4x16b(c_0xFFFE, 0xFFFE); \
841
842
843#define compute_edge_delta_x2() \
844{ \
845 vec_2x32s heights; \
846 vec_2x32s height_reciprocals; \
847 vec_2x32s heights_b; \
848 vec_4x32u widths; \
849 \
850 u32 edge_shift = reciprocal_table[height]; \
851 \
852 dup_2x32b(heights, height); \
853 sub_2x32b(widths, x_ends, x_starts); \
854 \
855 dup_2x32b(edge_shifts, edge_shift); \
856 sub_2x32b(heights_b, heights, c_0x01); \
857 shr_2x32b(height_reciprocals, edge_shifts, 12); \
858 \
859 mla_2x32b(heights_b, x_starts, heights); \
860 bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \
861 mul_2x32b(edges_dx_dy, widths, height_reciprocals); \
862 mul_long_2x32b(edges_xy, heights_b, height_reciprocals); \
863} \
864
865#define compute_edge_delta_x3(start_c, height_a, height_b) \
866{ \
867 vec_2x32s heights; \
868 vec_2x32s height_reciprocals; \
869 vec_2x32s heights_b; \
870 vec_2x32u widths; \
871 \
872 u32 width_alt; \
873 s32 height_b_alt; \
874 u32 height_reciprocal_alt; \
875 \
876 heights.e[0] = height_a; \
877 heights.e[1] = height_b; \
878 \
879 edge_shifts.e[0] = reciprocal_table[height_a]; \
880 edge_shifts.e[1] = reciprocal_table[height_b]; \
881 edge_shift_alt = reciprocal_table[height_minor_b]; \
882 \
883 sub_2x32b(widths, x_ends, x_starts); \
884 width_alt = x_c - start_c; \
885 \
886 shr_2x32b(height_reciprocals, edge_shifts, 12); \
887 height_reciprocal_alt = edge_shift_alt >> 12; \
888 \
889 bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \
890 edge_shift_alt &= 0x1F; \
891 \
892 sub_2x32b(heights_b, heights, c_0x01); \
893 height_b_alt = height_minor_b - 1; \
894 \
895 mla_2x32b(heights_b, x_starts, heights); \
896 height_b_alt += height_minor_b * start_c; \
897 \
898 mul_long_2x32b(edges_xy, heights_b, height_reciprocals); \
899 edge_alt = (s64)height_b_alt * height_reciprocal_alt; \
900 \
901 mul_2x32b(edges_dx_dy, widths, height_reciprocals); \
902 edge_dx_dy_alt = width_alt * height_reciprocal_alt; \
903} \
904
905
906#define setup_spans_adjust_y_up() \
907 sub_4x32b(y_x4, y_x4, c_0x04) \
908
909#define setup_spans_adjust_y_down() \
910 add_4x32b(y_x4, y_x4, c_0x04) \
911
912#define setup_spans_adjust_interpolants_up() \
913 sub_4x32b(uvrg, uvrg, uvrg_dy); \
914 b -= b_dy \
915
916#define setup_spans_adjust_interpolants_down() \
917 add_4x32b(uvrg, uvrg, uvrg_dy); \
918 b += b_dy \
919
920
921#define setup_spans_clip_interpolants_increment() \
922 mla_scalar_4x32b(uvrg, uvrg_dy, clip); \
923 b += b_dy * clip \
924
925#define setup_spans_clip_interpolants_decrement() \
926 mls_scalar_4x32b(uvrg, uvrg_dy, clip); \
927 b -= b_dy * clip \
928
929#define setup_spans_clip_alternate_yes() \
930 edge_alt += edge_dx_dy_alt * (s64)(clip) \
931
932#define setup_spans_clip_alternate_no() \
933
934#define setup_spans_clip(direction, alternate_active) \
935{ \
936 clipped_triangles++; \
937 mla_scalar_long_2x32b(edges_xy, edges_dx_dy, (s64)clip); \
938 setup_spans_clip_alternate_##alternate_active(); \
939 setup_spans_clip_interpolants_##direction(); \
940} \
941
942
943#define setup_spans_adjust_edges_alternate_no(left_index, right_index) \
944{ \
945 vec_2x64u edge_shifts_64; \
946 vec_2x64s edges_dx_dy_64; \
947 \
948 mov_wide_2x32b(edge_shifts_64, edge_shifts); \
949 shl_variable_2x64b(edges_xy, edges_xy, edge_shifts_64); \
950 \
951 mov_wide_2x32b(edges_dx_dy_64, edges_dx_dy); \
952 shl_variable_2x64b(edges_dx_dy_64, edges_dx_dy_64, edge_shifts_64); \
953 \
954 left_x.e[0] = edges_xy.e[left_index]; \
955 right_x.e[0] = edges_xy.e[right_index]; \
956 \
957 left_dx_dy.e[0] = edges_dx_dy_64.e[left_index]; \
958 left_dx_dy.e[1] = edges_dx_dy_64.e[left_index]; \
959 right_dx_dy.e[0] = edges_dx_dy_64.e[right_index]; \
960 right_dx_dy.e[1] = edges_dx_dy_64.e[right_index]; \
961 \
962 add_1x64b(left_x.high, left_x.low, left_dx_dy.low); \
963 add_1x64b(right_x.high, right_x.low, right_dx_dy.low); \
964 \
965 add_2x64b(left_dx_dy, left_dx_dy, left_dx_dy); \
966 add_2x64b(right_dx_dy, right_dx_dy, right_dx_dy); \
967} \
968
969#define setup_spans_adjust_edges_alternate_yes(left_index, right_index) \
970{ \
971 setup_spans_adjust_edges_alternate_no(left_index, right_index); \
972 s64 edge_dx_dy_alt_64; \
973 \
974 dup_4x16b(y_mid_point, y_b); \
975 \
976 edge_alt <<= edge_shift_alt; \
977 edge_dx_dy_alt_64 = (s64)edge_dx_dy_alt << edge_shift_alt; \
978 \
979 alternate_x.e[0] = edge_alt; \
980 alternate_dx_dy.e[0] = edge_dx_dy_alt_64; \
981 alternate_dx_dy.e[1] = edge_dx_dy_alt_64; \
982 \
983 add_1x64b(alternate_x.high, alternate_x.low, alternate_dx_dy.low); \
984 add_2x64b(alternate_dx_dy, alternate_dx_dy, alternate_dx_dy); \
985} \
986
987
988#define setup_spans_y_select_up() \
989 cmplt_4x16b(alternate_select, y_x4, y_mid_point) \
990
991#define setup_spans_y_select_down() \
992 cmpgt_4x16b(alternate_select, y_x4, y_mid_point) \
993
994#define setup_spans_y_select_alternate_yes(direction) \
995 setup_spans_y_select_##direction() \
996
997#define setup_spans_y_select_alternate_no(direction) \
998
999#define setup_spans_alternate_select_left() \
1000 bit_4x16b(left_right_x_16.low, alternate_x_16, alternate_select) \
1001
1002#define setup_spans_alternate_select_right() \
1003 bit_4x16b(left_right_x_16.high, alternate_x_16, alternate_select) \
1004
1005#define setup_spans_alternate_select_none() \
1006
1007#define setup_spans_increment_alternate_yes() \
1008 shr_narrow_2x64b(alternate_x_32.low, alternate_x, 32); \
1009 add_2x64b(alternate_x, alternate_x, alternate_dx_dy); \
1010 shr_narrow_2x64b(alternate_x_32.high, alternate_x, 32); \
1011 add_2x64b(alternate_x, alternate_x, alternate_dx_dy); \
1012 mov_narrow_4x32b(alternate_x_16, alternate_x_32) \
1013
1014#define setup_spans_increment_alternate_no() \
1015
1016#define setup_spans_set_x4(alternate, direction, alternate_active) \
1017{ \
1018 span_uvrg_offset[0] = uvrg; \
1019 span_b_offset[0] = b; \
1020 setup_spans_adjust_interpolants_##direction(); \
1021 \
1022 span_uvrg_offset[1] = uvrg; \
1023 span_b_offset[1] = b; \
1024 setup_spans_adjust_interpolants_##direction(); \
1025 \
1026 span_uvrg_offset[2] = uvrg; \
1027 span_b_offset[2] = b; \
1028 setup_spans_adjust_interpolants_##direction(); \
1029 \
1030 span_uvrg_offset[3] = uvrg; \
1031 span_b_offset[3] = b; \
1032 setup_spans_adjust_interpolants_##direction(); \
1033 \
1034 span_uvrg_offset += 4; \
1035 span_b_offset += 4; \
1036 \
1037 shr_narrow_2x64b(left_x_32.low, left_x, 32); \
1038 shr_narrow_2x64b(right_x_32.low, right_x, 32); \
1039 \
1040 add_2x64b(left_x, left_x, left_dx_dy); \
1041 add_2x64b(right_x, right_x, right_dx_dy); \
1042 \
1043 shr_narrow_2x64b(left_x_32.high, left_x, 32); \
1044 shr_narrow_2x64b(right_x_32.high, right_x, 32); \
1045 \
1046 add_2x64b(left_x, left_x, left_dx_dy); \
1047 add_2x64b(right_x, right_x, right_dx_dy); \
1048 \
1049 mov_narrow_4x32b(left_right_x_16.low, left_x_32); \
1050 mov_narrow_4x32b(left_right_x_16.high, right_x_32); \
1051 \
1052 setup_spans_increment_alternate_##alternate_active(); \
1053 setup_spans_y_select_alternate_##alternate_active(direction); \
1054 setup_spans_alternate_select_##alternate(); \
1055 \
1056 max_8x16b(left_right_x_16, left_right_x_16, left_edge); \
1057 min_8x16b(left_right_x_16, left_right_x_16, right_edge); \
1058 \
1059 sub_4x16b(left_right_x_16.high, left_right_x_16.high, left_right_x_16.low); \
1060 add_4x16b(left_right_x_16.high, left_right_x_16.high, c_0x07); \
1061 and_4x16b(span_shift, left_right_x_16.high, c_0x07); \
1062 shl_variable_4x16b(span_shift, c_0xFFFE, span_shift); \
1063 shr_4x16b(left_right_x_16.high, left_right_x_16.high, 3); \
1064 \
1065 u32 i; \
1066 for(i = 0; i < 4; i++) \
1067 { \
1068 span_edge_data[i].left_x = left_right_x_16.low.e[i]; \
1069 span_edge_data[i].num_blocks = left_right_x_16.high.e[i]; \
1070 span_edge_data[i].right_mask = span_shift.e[i]; \
1071 span_edge_data[i].y = y_x4.e[i]; \
1072 } \
1073 \
1074 span_edge_data += 4; \
1075 \
1076 setup_spans_adjust_y_##direction(); \
1077} \
1078
1079
1080#define setup_spans_alternate_adjust_yes() \
1081 edge_alt -= edge_dx_dy_alt * (s64)height_minor_a \
1082
1083#define setup_spans_alternate_adjust_no() \
1084
1085
1086#define setup_spans_down(left_index, right_index, alternate, alternate_active) \
1087 setup_spans_alternate_adjust_##alternate_active(); \
1088 if(y_c > psx_gpu->viewport_end_y) \
1089 height -= y_c - psx_gpu->viewport_end_y - 1; \
1090 \
1091 clip = psx_gpu->viewport_start_y - y_a; \
1092 if(clip > 0) \
1093 { \
1094 height -= clip; \
1095 y_a += clip; \
1096 setup_spans_clip(increment, alternate_active); \
1097 } \
1098 \
1099 setup_spans_prologue_b(); \
1100 \
1101 if(height > 0) \
1102 { \
1103 y_x4.e[0] = y_a; \
1104 y_x4.e[1] = y_a + 1; \
1105 y_x4.e[2] = y_a + 2; \
1106 y_x4.e[3] = y_a + 3; \
1107 setup_spans_adjust_edges_alternate_##alternate_active(left_index, \
1108 right_index); \
1109 \
1110 psx_gpu->num_spans = height; \
1111 do \
1112 { \
1113 setup_spans_set_x4(alternate, down, alternate_active); \
1114 height -= 4; \
1115 } while(height > 0); \
1116 } \
1117
1118
1119#define setup_spans_alternate_pre_increment_yes() \
1120 edge_alt += edge_dx_dy_alt \
1121
1122#define setup_spans_alternate_pre_increment_no() \
1123
1124#define setup_spans_up_decrement_height_yes() \
1125 height-- \
1126
1127#define setup_spans_up_decrement_height_no() \
1128 {} \
1129
1130#define setup_spans_up(left_index, right_index, alternate, alternate_active) \
1131 setup_spans_alternate_adjust_##alternate_active(); \
1132 y_a--; \
1133 \
1134 if(y_c < psx_gpu->viewport_start_y) \
1135 height -= psx_gpu->viewport_start_y - y_c; \
1136 else \
1137 setup_spans_up_decrement_height_##alternate_active(); \
1138 \
1139 clip = y_a - psx_gpu->viewport_end_y; \
1140 if(clip > 0) \
1141 { \
1142 height -= clip; \
1143 y_a -= clip; \
1144 setup_spans_clip(decrement, alternate_active); \
1145 } \
1146 \
1147 setup_spans_prologue_b(); \
1148 \
1149 if(height > 0) \
1150 { \
1151 y_x4.e[0] = y_a; \
1152 y_x4.e[1] = y_a - 1; \
1153 y_x4.e[2] = y_a - 2; \
1154 y_x4.e[3] = y_a - 3; \
1155 add_wide_2x32b(edges_xy, edges_xy, edges_dx_dy); \
1156 setup_spans_alternate_pre_increment_##alternate_active(); \
1157 setup_spans_adjust_edges_alternate_##alternate_active(left_index, \
1158 right_index); \
1159 setup_spans_adjust_interpolants_up(); \
1160 \
1161 psx_gpu->num_spans = height; \
1162 while(height > 0) \
1163 { \
1164 setup_spans_set_x4(alternate, up, alternate_active); \
1165 height -= 4; \
1166 } \
1167 } \
1168
1169#define index_left 0
1170#define index_right 1
1171
1172#define setup_spans_up_up(minor, major) \
1173 setup_spans_prologue(yes); \
1174 s32 height_minor_a = y_a - y_b; \
1175 s32 height_minor_b = y_b - y_c; \
1176 s32 height = y_a - y_c; \
1177 \
1178 dup_2x32b(x_starts, x_a); \
1179 x_ends.e[0] = x_c; \
1180 x_ends.e[1] = x_b; \
1181 \
1182 compute_edge_delta_x3(x_b, height, height_minor_a); \
1183 setup_spans_up(index_##major, index_##minor, minor, yes) \
1184
1185
1186void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1187 vertex_struct *v_b, vertex_struct *v_c);
1188void setup_spans_up_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1189 vertex_struct *v_b, vertex_struct *v_c);
1190void setup_spans_down_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1191 vertex_struct *v_b, vertex_struct *v_c);
1192void setup_spans_down_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1193 vertex_struct *v_b, vertex_struct *v_c);
1194void setup_spans_up_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1195 vertex_struct *v_b, vertex_struct *v_c);
1196void setup_spans_up_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1197 vertex_struct *v_b, vertex_struct *v_c);
1198void setup_spans_down_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1199 vertex_struct *v_b, vertex_struct *v_c);
1200void setup_spans_down_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1201 vertex_struct *v_b, vertex_struct *v_c);
1202void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1203 vertex_struct *v_b, vertex_struct *v_c);
1204
1205
2bbbb7af 1206#ifndef NEON_BUILD
75e28f62
E
1207
1208void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1209 vertex_struct *v_b, vertex_struct *v_c)
1210{
1211 setup_spans_up_up(left, right);
1212}
1213
1214void setup_spans_up_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1215 vertex_struct *v_b, vertex_struct *v_c)
1216{
1217 setup_spans_up_up(right, left);
1218}
1219
1220#define setup_spans_down_down(minor, major) \
1221 setup_spans_prologue(yes); \
1222 s32 height_minor_a = y_b - y_a; \
1223 s32 height_minor_b = y_c - y_b; \
1224 s32 height = y_c - y_a; \
1225 \
1226 dup_2x32b(x_starts, x_a); \
1227 x_ends.e[0] = x_c; \
1228 x_ends.e[1] = x_b; \
1229 \
1230 compute_edge_delta_x3(x_b, height, height_minor_a); \
1231 setup_spans_down(index_##major, index_##minor, minor, yes) \
1232
1233void setup_spans_down_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1234 vertex_struct *v_b, vertex_struct *v_c)
1235{
1236 setup_spans_down_down(left, right);
1237}
1238
1239void setup_spans_down_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1240 vertex_struct *v_b, vertex_struct *v_c)
1241{
1242 setup_spans_down_down(right, left);
1243}
1244
1245#define setup_spans_up_flat() \
1246 s32 height = y_a - y_c; \
1247 \
1248 flat_triangles++; \
1249 compute_edge_delta_x2(); \
1250 setup_spans_up(index_left, index_right, none, no) \
1251
1252void setup_spans_up_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1253 vertex_struct *v_b, vertex_struct *v_c)
1254{
1255 setup_spans_prologue(no);
1256 x_starts.e[0] = x_a;
1257 x_starts.e[1] = x_b;
1258 dup_2x32b(x_ends, x_c);
1259
1260 setup_spans_up_flat();
1261}
1262
1263void setup_spans_up_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1264 vertex_struct *v_b, vertex_struct *v_c)
1265{
1266 setup_spans_prologue(no);
1267 dup_2x32b(x_starts, x_a);
1268 x_ends.e[0] = x_b;
1269 x_ends.e[1] = x_c;
1270
1271 setup_spans_up_flat();
1272}
1273
1274#define setup_spans_down_flat() \
1275 s32 height = y_c - y_a; \
1276 \
1277 flat_triangles++; \
1278 compute_edge_delta_x2(); \
1279 setup_spans_down(index_left, index_right, none, no) \
1280
1281void setup_spans_down_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1282 vertex_struct *v_b, vertex_struct *v_c)
1283{
1284 setup_spans_prologue(no);
1285 x_starts.e[0] = x_a;
1286 x_starts.e[1] = x_b;
1287 dup_2x32b(x_ends, x_c);
1288
1289 setup_spans_down_flat();
1290}
1291
1292void setup_spans_down_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1293 vertex_struct *v_b, vertex_struct *v_c)
1294{
1295 setup_spans_prologue(no);
1296 dup_2x32b(x_starts, x_a);
1297 x_ends.e[0] = x_b;
1298 x_ends.e[1] = x_c;
1299
1300 setup_spans_down_flat();
1301}
1302
1303void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1304 vertex_struct *v_b, vertex_struct *v_c)
1305{
1306 setup_spans_prologue(no);
1307
1308 s32 y_b = v_b->y;
1309 s64 edge_alt;
1310 s32 edge_dx_dy_alt;
1311 u32 edge_shift_alt;
1312
1313 s32 middle_y = y_a;
1314 s32 height_minor_a = y_a - y_b;
1315 s32 height_minor_b = y_c - y_a;
1316 s32 height_major = y_c - y_b;
1317
1318 vec_2x64s edges_xy_b;
1319 vec_2x32s edges_dx_dy_b;
1320 vec_2x32u edge_shifts_b;
1321
1322 vec_2x32s height_increment;
1323
1324 x_starts.e[0] = x_a;
1325 x_starts.e[1] = x_c;
1326 dup_2x32b(x_ends, x_b);
1327
1328 compute_edge_delta_x3(x_a, height_minor_a, height_major);
1329
1330 height_increment.e[0] = 0;
1331 height_increment.e[1] = height_minor_b;
1332
1333 mla_long_2x32b(edges_xy, edges_dx_dy, height_increment);
1334
1335 edges_xy_b.e[0] = edge_alt;
1336 edges_xy_b.e[1] = edges_xy.e[1];
1337
1338 edge_shifts_b = edge_shifts;
1339 edge_shifts_b.e[0] = edge_shift_alt;
1340
1341 neg_2x32b(edges_dx_dy_b, edges_dx_dy);
1342 edges_dx_dy_b.e[0] = edge_dx_dy_alt;
1343
1344 y_a--;
1345
1346 if(y_b < psx_gpu->viewport_start_y)
1347 height_minor_a -= psx_gpu->viewport_start_y - y_b;
1348
1349 clip = y_a - psx_gpu->viewport_end_y;
1350 if(clip > 0)
1351 {
1352 height_minor_a -= clip;
1353 y_a -= clip;
1354 setup_spans_clip(decrement, no);
1355 }
1356
1357 setup_spans_prologue_b();
1358
1359 if(height_minor_a > 0)
1360 {
1361 y_x4.e[0] = y_a;
1362 y_x4.e[1] = y_a - 1;
1363 y_x4.e[2] = y_a - 2;
1364 y_x4.e[3] = y_a - 3;
1365 add_wide_2x32b(edges_xy, edges_xy, edges_dx_dy);
1366 setup_spans_adjust_edges_alternate_no(index_left, index_right);
1367 setup_spans_adjust_interpolants_up();
1368
1369 psx_gpu->num_spans = height_minor_a;
1370 while(height_minor_a > 0)
1371 {
1372 setup_spans_set_x4(none, up, no);
1373 height_minor_a -= 4;
1374 }
1375
1376 span_edge_data += height_minor_a;
1377 span_uvrg_offset += height_minor_a;
1378 span_b_offset += height_minor_a;
1379 }
1380
1381 edges_xy = edges_xy_b;
1382 edges_dx_dy = edges_dx_dy_b;
1383 edge_shifts = edge_shifts_b;
1384
1385 uvrg = psx_gpu->uvrg;
1386 b = psx_gpu->b;
1387
1388 y_a = middle_y;
1389
1390 if(y_c > psx_gpu->viewport_end_y)
1391 height_minor_b -= y_c - psx_gpu->viewport_end_y - 1;
1392
1393 clip = psx_gpu->viewport_start_y - y_a;
1394 if(clip > 0)
1395 {
1396 height_minor_b -= clip;
1397 y_a += clip;
1398 setup_spans_clip(increment, no);
1399 }
1400
1401 if(height_minor_b > 0)
1402 {
1403 y_x4.e[0] = y_a;
1404 y_x4.e[1] = y_a + 1;
1405 y_x4.e[2] = y_a + 2;
1406 y_x4.e[3] = y_a + 3;
1407 setup_spans_adjust_edges_alternate_no(index_left, index_right);
1408
1409 psx_gpu->num_spans += height_minor_b;
1410 do
1411 {
1412 setup_spans_set_x4(none, down, no);
1413 height_minor_b -= 4;
1414 } while(height_minor_b > 0);
1415 }
1416
1417 left_split_triangles++;
1418}
1419
1420#endif
1421
1422
1423#define dither_table_entry_normal(value) \
1424 (value) \
1425
1426
1427#define setup_blocks_load_msb_mask_indirect() \
1428
1429#define setup_blocks_load_msb_mask_direct() \
1430 vec_8x16u msb_mask; \
1431 dup_8x16b(msb_mask, psx_gpu->mask_msb); \
1432
1433
1434#define setup_blocks_variables_shaded_textured(target) \
1435 vec_4x32u u_block; \
1436 vec_4x32u v_block; \
1437 vec_4x32u r_block; \
1438 vec_4x32u g_block; \
1439 vec_4x32u b_block; \
1440 vec_4x32u uvrg_dx = psx_gpu->uvrg_dx; \
1441 vec_4x32u uvrg_dx4; \
1442 vec_4x32u uvrg_dx8; \
1443 vec_4x32u uvrg; \
1444 u32 b_dx = psx_gpu->b_block_span.e[1]; \
1445 u32 b_dx4 = b_dx << 2; \
1446 u32 b_dx8 = b_dx << 3; \
1447 u32 b; \
1448 \
1449 vec_16x8u texture_mask; \
1450 shl_4x32b(uvrg_dx4, uvrg_dx, 2); \
1451 shl_4x32b(uvrg_dx8, uvrg_dx, 3); \
1452 dup_8x8b(texture_mask.low, psx_gpu->texture_mask_width); \
1453 dup_8x8b(texture_mask.high, psx_gpu->texture_mask_height) \
1454
1455#define setup_blocks_variables_shaded_untextured(target) \
1456 vec_4x32u r_block; \
1457 vec_4x32u g_block; \
1458 vec_4x32u b_block; \
1459 vec_4x32u rgb_dx; \
1460 vec_4x32u rgb_dx4; \
1461 vec_4x32u rgb_dx8; \
1462 vec_4x32u rgb; \
1463 \
1464 vec_8x8u d64_0x07; \
1465 vec_8x8u d64_1; \
1466 vec_8x8u d64_4; \
1467 vec_8x8u d64_128; \
1468 \
1469 dup_8x8b(d64_0x07, 0x07); \
1470 dup_8x8b(d64_1, 1); \
1471 dup_8x8b(d64_4, 4); \
1472 dup_8x8b(d64_128, 128); \
1473 \
1474 rgb_dx.low = psx_gpu->uvrg_dx.high; \
1475 rgb_dx.e[2] = psx_gpu->b_block_span.e[1]; \
1476 shl_4x32b(rgb_dx4, rgb_dx, 2); \
1477 shl_4x32b(rgb_dx8, rgb_dx, 3) \
1478
1479#define setup_blocks_variables_unshaded_textured(target) \
1480 vec_4x32u u_block; \
1481 vec_4x32u v_block; \
1482 vec_2x32u uv_dx = psx_gpu->uvrg_dx.low; \
1483 vec_2x32u uv_dx4; \
1484 vec_2x32u uv_dx8; \
1485 vec_2x32u uv = psx_gpu->uvrg.low; \
1486 \
1487 vec_16x8u texture_mask; \
1488 shl_2x32b(uv_dx4, uv_dx, 2); \
1489 shl_2x32b(uv_dx8, uv_dx, 3); \
1490 dup_8x8b(texture_mask.low, psx_gpu->texture_mask_width); \
1491 dup_8x8b(texture_mask.high, psx_gpu->texture_mask_height) \
1492
1493
1494#define setup_blocks_variables_unshaded_untextured_direct() \
1495 or_8x16b(colors, colors, msb_mask) \
1496
1497#define setup_blocks_variables_unshaded_untextured_indirect() \
1498
1499#define setup_blocks_variables_unshaded_untextured(target) \
1500 u32 color = psx_gpu->triangle_color; \
1501 vec_8x16u colors; \
1502 \
1503 u32 color_r = color & 0xFF; \
1504 u32 color_g = (color >> 8) & 0xFF; \
1505 u32 color_b = (color >> 16) & 0xFF; \
1506 \
1507 color = (color_r >> 3) | ((color_g >> 3) << 5) | \
1508 ((color_b >> 3) << 10); \
1509 dup_8x16b(colors, color); \
1510 setup_blocks_variables_unshaded_untextured_##target() \
1511
1512#define setup_blocks_span_initialize_dithered_textured() \
1513 vec_8x16u dither_offsets; \
1514 shl_long_8x8b(dither_offsets, dither_offsets_short, 4) \
1515
1516#define setup_blocks_span_initialize_dithered_untextured() \
1517 vec_8x8u dither_offsets; \
1518 add_8x8b(dither_offsets, dither_offsets_short, d64_4) \
1519
1520#define setup_blocks_span_initialize_dithered(texturing) \
1521 u32 dither_row = psx_gpu->dither_table[y & 0x3]; \
1522 u32 dither_shift = (span_edge_data->left_x & 0x3) * 8; \
1523 vec_8x8s dither_offsets_short; \
1524 \
1525 dither_row = \
1526 (dither_row >> dither_shift) | (dither_row << (32 - dither_shift)); \
1527 dup_2x32b(vector_cast(vec_2x32u, dither_offsets_short), dither_row); \
1528 setup_blocks_span_initialize_dithered_##texturing() \
1529
1530#define setup_blocks_span_initialize_undithered(texturing) \
1531
1532
1533#define setup_blocks_span_initialize_shaded_textured() \
1534{ \
1535 vec_4x32u block_span; \
1536 u32 offset = span_edge_data->left_x; \
1537 \
1538 uvrg = *span_uvrg_offset; \
1539 mla_scalar_4x32b(uvrg, uvrg_dx, offset); \
1540 b = *span_b_offset; \
1541 b += b_dx * offset; \
1542 \
1543 dup_4x32b(u_block, uvrg.e[0]); \
1544 dup_4x32b(v_block, uvrg.e[1]); \
1545 dup_4x32b(r_block, uvrg.e[2]); \
1546 dup_4x32b(g_block, uvrg.e[3]); \
1547 dup_4x32b(b_block, b); \
1548 \
1549 block_span = psx_gpu->u_block_span; \
1550 add_4x32b(u_block, u_block, block_span); \
1551 block_span = psx_gpu->v_block_span; \
1552 add_4x32b(v_block, v_block, block_span); \
1553 block_span = psx_gpu->r_block_span; \
1554 add_4x32b(r_block, r_block, block_span); \
1555 block_span = psx_gpu->g_block_span; \
1556 add_4x32b(g_block, g_block, block_span); \
1557 block_span = psx_gpu->b_block_span; \
1558 add_4x32b(b_block, b_block, block_span); \
1559}
1560
1561#define setup_blocks_span_initialize_shaded_untextured() \
1562{ \
1563 vec_4x32u block_span; \
1564 u32 offset = span_edge_data->left_x; \
1565 \
1566 rgb.low = span_uvrg_offset->high; \
1567 rgb.high.e[0] = *span_b_offset; \
1568 mla_scalar_4x32b(rgb, rgb_dx, offset); \
1569 \
1570 dup_4x32b(r_block, rgb.e[0]); \
1571 dup_4x32b(g_block, rgb.e[1]); \
1572 dup_4x32b(b_block, rgb.e[2]); \
1573 \
1574 block_span = psx_gpu->r_block_span; \
1575 add_4x32b(r_block, r_block, block_span); \
1576 block_span = psx_gpu->g_block_span; \
1577 add_4x32b(g_block, g_block, block_span); \
1578 block_span = psx_gpu->b_block_span; \
1579 add_4x32b(b_block, b_block, block_span); \
1580} \
1581
1582#define setup_blocks_span_initialize_unshaded_textured() \
1583{ \
1584 vec_4x32u block_span; \
1585 u32 offset = span_edge_data->left_x; \
1586 \
1587 uv = span_uvrg_offset->low; \
1588 mla_scalar_2x32b(uv, uv_dx, offset); \
1589 \
1590 dup_4x32b(u_block, uv.e[0]); \
1591 dup_4x32b(v_block, uv.e[1]); \
1592 \
1593 block_span = psx_gpu->u_block_span; \
1594 add_4x32b(u_block, u_block, block_span); \
1595 block_span = psx_gpu->v_block_span; \
1596 add_4x32b(v_block, v_block, block_span); \
1597} \
1598
1599#define setup_blocks_span_initialize_unshaded_untextured() \
1600
1601
1602#define setup_blocks_texture_swizzled() \
1603{ \
1604 vec_8x8u u_saved = u; \
1605 sli_8x8b(u, v, 4); \
1606 sri_8x8b(v, u_saved, 4); \
1607} \
1608
1609#define setup_blocks_texture_unswizzled() \
1610
1611#define setup_blocks_store_shaded_textured(swizzling, dithering, target, \
1612 edge_type) \
1613{ \
1614 vec_8x16u u_whole; \
1615 vec_8x16u v_whole; \
1616 vec_8x16u r_whole; \
1617 vec_8x16u g_whole; \
1618 vec_8x16u b_whole; \
1619 \
1620 vec_8x8u u; \
1621 vec_8x8u v; \
1622 vec_8x8u r; \
1623 vec_8x8u g; \
1624 vec_8x8u b; \
1625 vec_8x16u uv; \
1626 \
1627 vec_4x32u dx4; \
1628 vec_4x32u dx8; \
1629 \
1630 shr_narrow_4x32b(u_whole.low, u_block, 16); \
1631 shr_narrow_4x32b(v_whole.low, v_block, 16); \
1632 shr_narrow_4x32b(r_whole.low, r_block, 16); \
1633 shr_narrow_4x32b(g_whole.low, g_block, 16); \
1634 shr_narrow_4x32b(b_whole.low, b_block, 16); \
1635 \
1636 dup_4x32b(dx4, uvrg_dx4.e[0]); \
1637 add_high_narrow_4x32b(u_whole.high, u_block, dx4); \
1638 dup_4x32b(dx4, uvrg_dx4.e[1]); \
1639 add_high_narrow_4x32b(v_whole.high, v_block, dx4); \
1640 dup_4x32b(dx4, uvrg_dx4.e[2]); \
1641 add_high_narrow_4x32b(r_whole.high, r_block, dx4); \
1642 dup_4x32b(dx4, uvrg_dx4.e[3]); \
1643 add_high_narrow_4x32b(g_whole.high, g_block, dx4); \
1644 dup_4x32b(dx4, b_dx4); \
1645 add_high_narrow_4x32b(b_whole.high, b_block, dx4); \
1646 \
1647 mov_narrow_8x16b(u, u_whole); \
1648 mov_narrow_8x16b(v, v_whole); \
1649 mov_narrow_8x16b(r, r_whole); \
1650 mov_narrow_8x16b(g, g_whole); \
1651 mov_narrow_8x16b(b, b_whole); \
1652 \
1653 dup_4x32b(dx8, uvrg_dx8.e[0]); \
1654 add_4x32b(u_block, u_block, dx8); \
1655 dup_4x32b(dx8, uvrg_dx8.e[1]); \
1656 add_4x32b(v_block, v_block, dx8); \
1657 dup_4x32b(dx8, uvrg_dx8.e[2]); \
1658 add_4x32b(r_block, r_block, dx8); \
1659 dup_4x32b(dx8, uvrg_dx8.e[3]); \
1660 add_4x32b(g_block, g_block, dx8); \
1661 dup_4x32b(dx8, b_dx8); \
1662 add_4x32b(b_block, b_block, dx8); \
1663 \
1664 and_8x8b(u, u, texture_mask.low); \
1665 and_8x8b(v, v, texture_mask.high); \
1666 setup_blocks_texture_##swizzling(); \
1667 \
1668 zip_8x16b(uv, u, v); \
1669 block->uv = uv; \
1670 block->r = r; \
1671 block->g = g; \
1672 block->b = b; \
1673 block->dither_offsets = vector_cast(vec_8x16u, dither_offsets); \
1674 block->fb_ptr = fb_ptr; \
1675} \
1676
1677#define setup_blocks_store_unshaded_textured(swizzling, dithering, target, \
1678 edge_type) \
1679{ \
1680 vec_8x16u u_whole; \
1681 vec_8x16u v_whole; \
1682 \
1683 vec_8x8u u; \
1684 vec_8x8u v; \
1685 vec_8x16u uv; \
1686 \
1687 vec_4x32u dx4; \
1688 vec_4x32u dx8; \
1689 \
1690 shr_narrow_4x32b(u_whole.low, u_block, 16); \
1691 shr_narrow_4x32b(v_whole.low, v_block, 16); \
1692 \
1693 dup_4x32b(dx4, uv_dx4.e[0]); \
1694 add_high_narrow_4x32b(u_whole.high, u_block, dx4); \
1695 dup_4x32b(dx4, uv_dx4.e[1]); \
1696 add_high_narrow_4x32b(v_whole.high, v_block, dx4); \
1697 \
1698 mov_narrow_8x16b(u, u_whole); \
1699 mov_narrow_8x16b(v, v_whole); \
1700 \
1701 dup_4x32b(dx8, uv_dx8.e[0]); \
1702 add_4x32b(u_block, u_block, dx8); \
1703 dup_4x32b(dx8, uv_dx8.e[1]); \
1704 add_4x32b(v_block, v_block, dx8); \
1705 \
1706 and_8x8b(u, u, texture_mask.low); \
1707 and_8x8b(v, v, texture_mask.high); \
1708 setup_blocks_texture_##swizzling(); \
1709 \
1710 zip_8x16b(uv, u, v); \
1711 block->uv = uv; \
1712 block->dither_offsets = vector_cast(vec_8x16u, dither_offsets); \
1713 block->fb_ptr = fb_ptr; \
1714} \
1715
1716#define setup_blocks_store_shaded_untextured_dithered() \
1717 addq_8x8b(r, r, dither_offsets); \
1718 addq_8x8b(g, g, dither_offsets); \
1719 addq_8x8b(b, b, dither_offsets); \
1720 \
1721 subq_8x8b(r, r, d64_4); \
1722 subq_8x8b(g, g, d64_4); \
1723 subq_8x8b(b, b, d64_4) \
1724
1725#define setup_blocks_store_shaded_untextured_undithered() \
1726
1727
1728#define setup_blocks_store_untextured_pixels_indirect_full(_pixels) \
1729 block->pixels = _pixels; \
1730 block->fb_ptr = fb_ptr \
1731
1732#define setup_blocks_store_untextured_pixels_indirect_edge(_pixels) \
1733 block->pixels = _pixels; \
1734 block->fb_ptr = fb_ptr \
1735
1736#define setup_blocks_store_shaded_untextured_seed_pixels_indirect() \
1737 mul_long_8x8b(pixels, r, d64_1) \
1738
1739
1740#define setup_blocks_store_untextured_pixels_direct_full(_pixels) \
1741 store_8x16b(_pixels, fb_ptr) \
1742
1743#define setup_blocks_store_untextured_pixels_direct_edge(_pixels) \
1744{ \
1745 vec_8x16u fb_pixels; \
1746 vec_8x16u draw_mask; \
1747 vec_8x16u test_mask = psx_gpu->test_mask; \
1748 \
1749 load_8x16b(fb_pixels, fb_ptr); \
1750 dup_8x16b(draw_mask, span_edge_data->right_mask); \
1751 tst_8x16b(draw_mask, draw_mask, test_mask); \
1752 bif_8x16b(fb_pixels, _pixels, draw_mask); \
1753 store_8x16b(fb_pixels, fb_ptr); \
1754} \
1755
1756#define setup_blocks_store_shaded_untextured_seed_pixels_direct() \
1757 pixels = msb_mask; \
1758 mla_long_8x8b(pixels, r, d64_1) \
1759
1760
1761#define setup_blocks_store_shaded_untextured(swizzling, dithering, target, \
1762 edge_type) \
1763{ \
1764 vec_8x16u r_whole; \
1765 vec_8x16u g_whole; \
1766 vec_8x16u b_whole; \
1767 \
1768 vec_8x8u r; \
1769 vec_8x8u g; \
1770 vec_8x8u b; \
1771 \
1772 vec_4x32u dx4; \
1773 vec_4x32u dx8; \
1774 \
1775 vec_8x16u pixels; \
1776 \
1777 shr_narrow_4x32b(r_whole.low, r_block, 16); \
1778 shr_narrow_4x32b(g_whole.low, g_block, 16); \
1779 shr_narrow_4x32b(b_whole.low, b_block, 16); \
1780 \
1781 dup_4x32b(dx4, rgb_dx4.e[0]); \
1782 add_high_narrow_4x32b(r_whole.high, r_block, dx4); \
1783 dup_4x32b(dx4, rgb_dx4.e[1]); \
1784 add_high_narrow_4x32b(g_whole.high, g_block, dx4); \
1785 dup_4x32b(dx4, rgb_dx4.e[2]); \
1786 add_high_narrow_4x32b(b_whole.high, b_block, dx4); \
1787 \
1788 mov_narrow_8x16b(r, r_whole); \
1789 mov_narrow_8x16b(g, g_whole); \
1790 mov_narrow_8x16b(b, b_whole); \
1791 \
1792 dup_4x32b(dx8, rgb_dx8.e[0]); \
1793 add_4x32b(r_block, r_block, dx8); \
1794 dup_4x32b(dx8, rgb_dx8.e[1]); \
1795 add_4x32b(g_block, g_block, dx8); \
1796 dup_4x32b(dx8, rgb_dx8.e[2]); \
1797 add_4x32b(b_block, b_block, dx8); \
1798 \
1799 setup_blocks_store_shaded_untextured_##dithering(); \
1800 \
1801 shr_8x8b(r, r, 3); \
1802 bic_8x8b(g, g, d64_0x07); \
1803 bic_8x8b(b, b, d64_0x07); \
1804 \
1805 setup_blocks_store_shaded_untextured_seed_pixels_##target(); \
1806 mla_long_8x8b(pixels, g, d64_4); \
1807 mla_long_8x8b(pixels, b, d64_128) \
1808 \
1809 setup_blocks_store_untextured_pixels_##target##_##edge_type(pixels); \
1810} \
1811
1812#define setup_blocks_store_unshaded_untextured(swizzling, dithering, target, \
1813 edge_type) \
1814 setup_blocks_store_untextured_pixels_##target##_##edge_type(colors) \
1815
1816
1817#define setup_blocks_store_draw_mask_textured_indirect(_block, bits) \
1818 (_block)->draw_mask_bits = bits \
1819
1820#define setup_blocks_store_draw_mask_untextured_indirect(_block, bits) \
1821{ \
1822 vec_8x16u bits_mask; \
1823 vec_8x16u test_mask = psx_gpu->test_mask; \
1824 dup_8x16b(bits_mask, bits); \
1825 tst_8x16b(bits_mask, bits_mask, test_mask); \
1826 (_block)->draw_mask = bits_mask; \
1827} \
1828
1829#define setup_blocks_store_draw_mask_untextured_direct(_block, bits) \
1830
1831
1832#define setup_blocks_add_blocks_indirect() \
1833 num_blocks += span_num_blocks; \
1834 \
1835 if(num_blocks > MAX_BLOCKS) \
1836 { \
1837 psx_gpu->num_blocks = num_blocks - span_num_blocks; \
1838 flush_render_block_buffer(psx_gpu); \
1839 num_blocks = span_num_blocks; \
1840 block = psx_gpu->blocks; \
1841 } \
1842
1843#define setup_blocks_add_blocks_direct() \
3867c6ef
E
1844 texel_blocks_untextured += span_num_blocks; \
1845 span_pixel_blocks += span_num_blocks \
75e28f62
E
1846
1847
1848#define setup_blocks_builder(shading, texturing, dithering, sw, target) \
1849void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \
1850 psx_gpu_struct *psx_gpu) \
1851{ \
1852 setup_blocks_load_msb_mask_##target(); \
1853 setup_blocks_variables_##shading##_##texturing(target); \
1854 \
1855 edge_data_struct *span_edge_data = psx_gpu->span_edge_data; \
1856 vec_4x32u *span_uvrg_offset = psx_gpu->span_uvrg_offset; \
1857 u32 *span_b_offset = psx_gpu->span_b_offset; \
1858 \
1859 block_struct *block = psx_gpu->blocks + psx_gpu->num_blocks; \
1860 \
1861 u32 num_spans = psx_gpu->num_spans; \
1862 \
1863 u16 *fb_ptr; \
1864 u32 y; \
1865 \
1866 u32 num_blocks = psx_gpu->num_blocks; \
1867 u32 span_num_blocks; \
1868 \
1869 while(num_spans) \
1870 { \
1871 span_num_blocks = span_edge_data->num_blocks; \
1872 if(span_num_blocks) \
1873 { \
1874 y = span_edge_data->y; \
1875 fb_ptr = psx_gpu->vram_ptr + span_edge_data->left_x + (y * 1024); \
1876 \
1877 setup_blocks_span_initialize_##shading##_##texturing(); \
1878 setup_blocks_span_initialize_##dithering(texturing); \
1879 \
1880 setup_blocks_add_blocks_##target(); \
1881 \
1882 s32 pixel_span = span_num_blocks * 8; \
1883 pixel_span -= __builtin_popcount(span_edge_data->right_mask & 0xFF); \
1884 span_pixels += pixel_span; \
75e28f62
E
1885 \
1886 span_num_blocks--; \
1887 while(span_num_blocks) \
1888 { \
1889 setup_blocks_store_##shading##_##texturing(sw, dithering, target, \
1890 full); \
1891 setup_blocks_store_draw_mask_##texturing##_##target(block, 0x00); \
1892 \
1893 fb_ptr += 8; \
1894 block++; \
1895 span_num_blocks--; \
1896 } \
1897 \
1898 setup_blocks_store_##shading##_##texturing(sw, dithering, target, edge); \
1899 setup_blocks_store_draw_mask_##texturing##_##target(block, \
1900 span_edge_data->right_mask); \
1901 \
1902 block++; \
1903 } \
1904 else \
1905 { \
1906 zero_block_spans++; \
1907 } \
1908 \
1909 num_spans--; \
1910 span_edge_data++; \
1911 span_uvrg_offset++; \
1912 span_b_offset++; \
1913 } \
1914 \
1915 psx_gpu->num_blocks = num_blocks; \
1916} \
1917
1918void setup_blocks_shaded_textured_dithered_unswizzled_indirect(psx_gpu_struct
1919 *psx_gpu);
1920
1921void setup_blocks_shaded_untextured_dithered_unswizzled_indirect(psx_gpu_struct
1922 *psx_gpu);
1923void setup_blocks_shaded_untextured_undithered_unswizzled_indirect(
1924 psx_gpu_struct *psx_gpu);
1925void setup_blocks_shaded_untextured_dithered_unswizzled_direct(psx_gpu_struct
1926 *psx_gpu);
1927void setup_blocks_shaded_untextured_undithered_unswizzled_direct(
1928 psx_gpu_struct *psx_gpu);
1929
1930void setup_blocks_unshaded_textured_dithered_unswizzled_indirect(psx_gpu_struct
1931 *psx_gpu);
1932void setup_blocks_unshaded_untextured_undithered_unswizzled_indirect(
1933 psx_gpu_struct *psx_gpu);
1934void setup_blocks_unshaded_untextured_undithered_unswizzled_direct(
1935 psx_gpu_struct *psx_gpu);
1936
1937void setup_blocks_shaded_textured_dithered_swizzled_indirect(psx_gpu_struct
1938 *psx_gpu);
1939void setup_blocks_unshaded_textured_dithered_swizzled_indirect(psx_gpu_struct
1940 *psx_gpu);
1941
1942
1943//setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct);
1944
2bbbb7af 1945#ifndef NEON_BUILD
75e28f62
E
1946
1947setup_blocks_builder(shaded, textured, dithered, swizzled, indirect);
1948setup_blocks_builder(shaded, textured, dithered, unswizzled, indirect);
1949
1950setup_blocks_builder(unshaded, textured, dithered, unswizzled, indirect);
1951setup_blocks_builder(unshaded, textured, dithered, swizzled, indirect);
1952
1953setup_blocks_builder(shaded, untextured, undithered, unswizzled, indirect);
1954setup_blocks_builder(shaded, untextured, dithered, unswizzled, indirect);
1955setup_blocks_builder(shaded, untextured, undithered, unswizzled, direct);
1956setup_blocks_builder(shaded, untextured, dithered, unswizzled, direct);
1957
1958setup_blocks_builder(unshaded, untextured, undithered, unswizzled, indirect);
1959setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct);
1960
1961#endif
1962
1963void texture_blocks_untextured(psx_gpu_struct *psx_gpu);
1964void texture_blocks_4bpp(psx_gpu_struct *psx_gpu);
1965void texture_blocks_8bpp(psx_gpu_struct *psx_gpu);
1966void texture_blocks_16bpp(psx_gpu_struct *psx_gpu);
1967
2bbbb7af 1968#ifndef NEON_BUILD
75e28f62
E
1969
1970void texture_blocks_untextured(psx_gpu_struct *psx_gpu)
1971{
1972 if(psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE)
1973 texel_blocks_untextured += psx_gpu->num_blocks;
1974}
1975
1976void texture_blocks_4bpp(psx_gpu_struct *psx_gpu)
1977{
1978 block_struct *block = psx_gpu->blocks;
1979 u32 num_blocks = psx_gpu->num_blocks;
1980 texel_blocks_4bpp += num_blocks;
1981
1982 vec_8x8u texels_low;
1983 vec_8x8u texels_high;
1984 vec_8x8u texels;
1985 vec_8x16u pixels;
1986
1987 vec_8x16u clut_a;
1988 vec_8x16u clut_b;
1989 vec_16x8u clut_low;
1990 vec_16x8u clut_high;
1991
1992 u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr;
1993 u16 *clut_ptr = psx_gpu->clut_ptr;
1994
1995 // Can be done with one deinterleaving load on NEON
1996 load_8x16b(clut_a, clut_ptr);
1997 load_8x16b(clut_b, clut_ptr + 8);
1998 unzip_16x8b(clut_low, clut_high, clut_a, clut_b);
1999
2000 if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_4bpp_mask)
2001 update_texture_4bpp_cache(psx_gpu);
2002
2003 while(num_blocks)
2004 {
2005 texels.e[0] = texture_ptr_8bpp[block->uv.e[0]];
2006 texels.e[1] = texture_ptr_8bpp[block->uv.e[1]];
2007 texels.e[2] = texture_ptr_8bpp[block->uv.e[2]];
2008 texels.e[3] = texture_ptr_8bpp[block->uv.e[3]];
2009 texels.e[4] = texture_ptr_8bpp[block->uv.e[4]];
2010 texels.e[5] = texture_ptr_8bpp[block->uv.e[5]];
2011 texels.e[6] = texture_ptr_8bpp[block->uv.e[6]];
2012 texels.e[7] = texture_ptr_8bpp[block->uv.e[7]];
2013
2014 tbl_16(texels_low, texels, clut_low);
2015 tbl_16(texels_high, texels, clut_high);
2016
2017 // Can be done with an interleaving store on NEON
2018 zip_8x16b(pixels, texels_low, texels_high);
2019
2020 block->texels = pixels;
2021
2022 num_blocks--;
2023 block++;
2024 }
2025}
2026
2027void texture_blocks_8bpp(psx_gpu_struct *psx_gpu)
2028{
2029 block_struct *block = psx_gpu->blocks;
2030 u32 num_blocks = psx_gpu->num_blocks;
2031
2032 texel_blocks_8bpp += num_blocks;
2033
2034 if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask)
2035 update_texture_8bpp_cache(psx_gpu);
2036
2037 vec_8x16u texels;
2038 u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr;
2039
2040 u32 texel;
2041 u32 offset;
2042 u32 i;
2043
2044 while(num_blocks)
2045 {
2046 for(i = 0; i < 8; i++)
2047 {
2048 offset = block->uv.e[i];
2049
2050 texel = texture_ptr_8bpp[offset];
2051 texels.e[i] = psx_gpu->clut_ptr[texel];
2052 }
2053
2054 block->texels = texels;
2055
2056 num_blocks--;
2057 block++;
2058 }
2059}
2060
2061void texture_blocks_16bpp(psx_gpu_struct *psx_gpu)
2062{
2063 block_struct *block = psx_gpu->blocks;
2064 u32 num_blocks = psx_gpu->num_blocks;
2065
2066 texel_blocks_16bpp += num_blocks;
2067
2068 vec_8x16u texels;
2069
2070 u16 *texture_ptr_16bpp = psx_gpu->texture_page_ptr;
2071 u32 offset;
2072 u32 i;
2073
2074 while(num_blocks)
2075 {
2076 for(i = 0; i < 8; i++)
2077 {
2078 offset = block->uv.e[i];
2079 offset += ((offset & 0xFF00) * 3);
2080
2081 texels.e[i] = texture_ptr_16bpp[offset];
2082 }
2083
2084 block->texels = texels;
2085
2086 num_blocks--;
2087 block++;
2088 }
2089}
2090
2091#endif
2092
2093
2094#define shade_blocks_load_msb_mask_indirect() \
2095
2096#define shade_blocks_load_msb_mask_direct() \
2097 vec_8x16u msb_mask; \
2098 dup_8x16b(msb_mask, psx_gpu->mask_msb); \
2099
2100#define shade_blocks_store_indirect(_draw_mask, _pixels) \
2101 block->draw_mask = _draw_mask; \
2102 block->pixels = _pixels \
2103
2104#define shade_blocks_store_direct(_draw_mask, _pixels) \
2105{ \
2106 vec_8x16u fb_pixels; \
2107 or_8x16b(_pixels, _pixels, msb_mask); \
2108 load_8x16b(fb_pixels, block->fb_ptr); \
2109 bif_8x16b(fb_pixels, _pixels, _draw_mask); \
2110 store_8x16b(fb_pixels, block->fb_ptr); \
2111} \
2112
2113
3867c6ef 2114#define shade_blocks_textured_false_modulated_check_dithered(target) \
b7ed0632
E
2115 if(psx_gpu->triangle_color == 0x808080) \
2116 { \
2117 false_modulated_blocks += num_blocks; \
2118 } \
3867c6ef
E
2119
2120#define shade_blocks_textured_false_modulated_check_undithered(target) \
2121 if(psx_gpu->triangle_color == 0x808080) \
2122 { \
2123 \
2124 shade_blocks_textured_unmodulated_##target(psx_gpu); \
2125 false_modulated_blocks += num_blocks; \
2126 return; \
2127 } \
2128
2129
2130#define shade_blocks_textured_modulated_shaded_primitive_load(dithering, \
2131 target) \
75e28f62 2132
3867c6ef
E
2133#define shade_blocks_textured_modulated_unshaded_primitive_load(dithering, \
2134 target) \
75e28f62
E
2135{ \
2136 u32 color = psx_gpu->triangle_color; \
2137 dup_8x8b(colors_r, color); \
2138 dup_8x8b(colors_g, color >> 8); \
2139 dup_8x8b(colors_b, color >> 16); \
3867c6ef 2140 shade_blocks_textured_false_modulated_check_##dithering(target); \
75e28f62
E
2141} \
2142
2143#define shade_blocks_textured_modulated_shaded_block_load() \
2144 colors_r = block->r; \
2145 colors_g = block->g; \
2146 colors_b = block->b \
2147
2148#define shade_blocks_textured_modulated_unshaded_block_load() \
2149
2150#define shade_blocks_textured_modulate_dithered(component) \
2151 pixels_##component = block->dither_offsets; \
2152 mla_long_8x8b(pixels_##component, texels_##component, colors_##component) \
2153
2154#define shade_blocks_textured_modulate_undithered(component) \
2155 mul_long_8x8b(pixels_##component, texels_##component, colors_##component) \
2156
2157#define shade_blocks_textured_modulated_builder(shading, dithering, target) \
2158void shade_blocks_##shading##_textured_modulated_##dithering##_##target( \
2159 psx_gpu_struct *psx_gpu) \
2160{ \
2161 block_struct *block = psx_gpu->blocks; \
2162 u32 num_blocks = psx_gpu->num_blocks; \
2163 vec_8x16u texels; \
2164 \
2165 vec_8x8u texels_r; \
2166 vec_8x8u texels_g; \
2167 vec_8x8u texels_b; \
2168 \
2169 vec_8x8u colors_r; \
2170 vec_8x8u colors_g; \
2171 vec_8x8u colors_b; \
2172 \
2173 vec_8x8u pixels_r_low; \
2174 vec_8x8u pixels_g_low; \
2175 vec_8x8u pixels_b_low; \
2176 vec_8x16u pixels; \
2177 \
2178 vec_8x16u pixels_r; \
2179 vec_8x16u pixels_g; \
2180 vec_8x16u pixels_b; \
2181 \
2182 vec_8x16u draw_mask; \
2183 vec_8x16u zero_mask; \
2184 \
2185 vec_8x8u d64_0x07; \
2186 vec_8x8u d64_0x1F; \
2187 vec_8x8u d64_1; \
2188 vec_8x8u d64_4; \
2189 vec_8x8u d64_128; \
2190 \
2191 vec_8x16u d128_0x8000; \
2192 \
2193 vec_8x16u test_mask = psx_gpu->test_mask; \
2194 u32 draw_mask_bits; \
2195 shade_blocks_load_msb_mask_##target(); \
2196 \
2197 dup_8x8b(d64_0x07, 0x07); \
2198 dup_8x8b(d64_0x1F, 0x1F); \
2199 dup_8x8b(d64_1, 1); \
2200 dup_8x8b(d64_4, 4); \
2201 dup_8x8b(d64_128, 128); \
2202 \
2203 dup_8x16b(d128_0x8000, 0x8000); \
2204 \
3867c6ef
E
2205 shade_blocks_textured_modulated_##shading##_primitive_load(dithering, \
2206 target); \
75e28f62
E
2207 \
2208 while(num_blocks) \
2209 { \
2210 draw_mask_bits = block->draw_mask_bits; \
2211 dup_8x16b(draw_mask, draw_mask_bits); \
2212 tst_8x16b(draw_mask, draw_mask, test_mask); \
2213 \
2214 shade_blocks_textured_modulated_##shading##_block_load(); \
2215 \
2216 texels = block->texels; \
2217 \
2218 mov_narrow_8x16b(texels_r, texels); \
2219 shr_narrow_8x16b(texels_g, texels, 5); \
2220 shr_narrow_8x16b(texels_b, texels, 7); \
2221 \
2222 and_8x8b(texels_r, texels_r, d64_0x1F); \
2223 and_8x8b(texels_g, texels_g, d64_0x1F); \
2224 shr_8x8b(texels_b, texels_b, 3); \
2225 \
2226 shade_blocks_textured_modulate_##dithering(r); \
2227 shade_blocks_textured_modulate_##dithering(g); \
2228 shade_blocks_textured_modulate_##dithering(b); \
2229 \
2230 cmpeqz_8x16b(zero_mask, texels); \
2231 and_8x16b(pixels, texels, d128_0x8000); \
2232 \
2233 shrq_narrow_signed_8x16b(pixels_r_low, pixels_r, 4); \
2234 shrq_narrow_signed_8x16b(pixels_g_low, pixels_g, 4); \
2235 shrq_narrow_signed_8x16b(pixels_b_low, pixels_b, 4); \
2236 \
2237 or_8x16b(zero_mask, draw_mask, zero_mask); \
2238 \
2239 shr_8x8b(pixels_r_low, pixels_r_low, 3); \
2240 bic_8x8b(pixels_g_low, pixels_g_low, d64_0x07); \
2241 bic_8x8b(pixels_b_low, pixels_b_low, d64_0x07); \
2242 \
2243 mla_long_8x8b(pixels, pixels_r_low, d64_1); \
2244 mla_long_8x8b(pixels, pixels_g_low, d64_4); \
2245 mla_long_8x8b(pixels, pixels_b_low, d64_128); \
2246 \
2247 shade_blocks_store_##target(zero_mask, pixels); \
2248 \
2249 num_blocks--; \
2250 block++; \
2251 } \
2252} \
2253
2254void shade_blocks_shaded_textured_modulated_dithered_direct(psx_gpu_struct
2255 *psx_gpu);
2256void shade_blocks_shaded_textured_modulated_undithered_direct(psx_gpu_struct
2257 *psx_gpu);
2258void shade_blocks_unshaded_textured_modulated_dithered_direct(psx_gpu_struct
2259 *psx_gpu);
2260void shade_blocks_unshaded_textured_modulated_undithered_direct(psx_gpu_struct
2261 *psx_gpu);
2262
2263void shade_blocks_shaded_textured_modulated_dithered_indirect(psx_gpu_struct
2264 *psx_gpu);
2265void shade_blocks_shaded_textured_modulated_undithered_indirect(psx_gpu_struct
2266 *psx_gpu);
2267void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct
2268 *psx_gpu);
2269void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct
2270 *psx_gpu);
2271
3867c6ef
E
2272void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu);
2273void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu);
2274
2bbbb7af 2275#ifndef NEON_BUILD
75e28f62
E
2276
2277shade_blocks_textured_modulated_builder(shaded, dithered, direct);
2278shade_blocks_textured_modulated_builder(shaded, undithered, direct);
2279shade_blocks_textured_modulated_builder(unshaded, dithered, direct);
2280shade_blocks_textured_modulated_builder(unshaded, undithered, direct);
2281
2282shade_blocks_textured_modulated_builder(shaded, dithered, indirect);
2283shade_blocks_textured_modulated_builder(shaded, undithered, indirect);
2284shade_blocks_textured_modulated_builder(unshaded, dithered, indirect);
2285shade_blocks_textured_modulated_builder(unshaded, undithered, indirect);
2286
2287#endif
2288
2289
2290#define shade_blocks_textured_unmodulated_builder(target) \
2291void shade_blocks_textured_unmodulated_##target(psx_gpu_struct *psx_gpu) \
2292{ \
2293 block_struct *block = psx_gpu->blocks; \
2294 u32 num_blocks = psx_gpu->num_blocks; \
2295 vec_8x16u draw_mask; \
2296 vec_8x16u test_mask = psx_gpu->test_mask; \
2297 u32 draw_mask_bits; \
2298 \
2299 vec_8x16u pixels; \
2300 shade_blocks_load_msb_mask_##target(); \
2301 \
2302 while(num_blocks) \
2303 { \
2304 vec_8x16u zero_mask; \
2305 \
2306 draw_mask_bits = block->draw_mask_bits; \
2307 dup_8x16b(draw_mask, draw_mask_bits); \
2308 tst_8x16b(draw_mask, draw_mask, test_mask); \
2309 \
2310 pixels = block->texels; \
2311 \
2312 cmpeqz_8x16b(zero_mask, pixels); \
2313 or_8x16b(zero_mask, draw_mask, zero_mask); \
2314 \
2315 shade_blocks_store_##target(zero_mask, pixels); \
2316 \
2317 num_blocks--; \
2318 block++; \
2319 } \
2320} \
2321
3867c6ef
E
2322#define shade_blocks_textured_unmodulated_dithered_builder(target) \
2323void shade_blocks_textured_unmodulated_dithered_##target(psx_gpu_struct \
2324 *psx_gpu) \
2325{ \
2326 block_struct *block = psx_gpu->blocks; \
2327 u32 num_blocks = psx_gpu->num_blocks; \
2328 vec_8x16u draw_mask; \
2329 vec_8x16u test_mask = psx_gpu->test_mask; \
2330 u32 draw_mask_bits; \
2331 \
2332 vec_8x16u pixels; \
2333 shade_blocks_load_msb_mask_##target(); \
2334 \
2335 while(num_blocks) \
2336 { \
2337 vec_8x16u zero_mask; \
2338 \
2339 draw_mask_bits = block->draw_mask_bits; \
2340 dup_8x16b(draw_mask, draw_mask_bits); \
2341 tst_8x16b(draw_mask, draw_mask, test_mask); \
2342 \
2343 pixels = block->texels; \
2344 \
2345 cmpeqz_8x16b(zero_mask, pixels); \
2346 or_8x16b(zero_mask, draw_mask, zero_mask); \
2347 \
2348 shade_blocks_store_##target(zero_mask, pixels); \
2349 \
2350 num_blocks--; \
2351 block++; \
2352 } \
2353} \
75e28f62 2354
2bbbb7af 2355#ifndef NEON_BUILD
75e28f62
E
2356
2357shade_blocks_textured_unmodulated_builder(indirect)
2358shade_blocks_textured_unmodulated_builder(direct)
2359
2360#endif
2361
2362
2363void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu);
2364void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu);
2365
2bbbb7af 2366#ifndef NEON_BUILD
f9248bbf 2367
75e28f62
E
2368void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu)
2369{
2370}
2371
2372void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu)
2373{
2374 block_struct *block = psx_gpu->blocks;
2375 u32 num_blocks = psx_gpu->num_blocks;
2376
2377 vec_8x16u pixels = block->pixels;
2378 shade_blocks_load_msb_mask_direct();
2379
2380 while(num_blocks)
2381 {
2382 shade_blocks_store_direct(block->draw_mask, pixels);
2383
2384 num_blocks--;
2385 block++;
2386 }
2387}
2388
2389#endif
2390
2391void shade_blocks_shaded_untextured(psx_gpu_struct *psx_gpu)
2392{
2393}
2394
2395
2396#define blend_blocks_mask_evaluate_on() \
2397 vec_8x16u mask_pixels; \
2398 cmpltz_8x16b(mask_pixels, framebuffer_pixels); \
2399 or_8x16b(draw_mask, draw_mask, mask_pixels) \
2400
2401#define blend_blocks_mask_evaluate_off() \
2402
2403#define blend_blocks_average() \
2404{ \
2405 vec_8x16u pixels_no_msb; \
2406 vec_8x16u fb_pixels_no_msb; \
2407 \
2408 vec_8x16u d128_0x0421; \
2409 vec_8x16u d128_0x8000; \
2410 \
2411 dup_8x16b(d128_0x0421, 0x0421); \
2412 dup_8x16b(d128_0x8000, 0x8000); \
2413 \
2414 eor_8x16b(blend_pixels, pixels, framebuffer_pixels); \
2415 bic_8x16b(pixels_no_msb, pixels, d128_0x8000); \
2416 and_8x16b(blend_pixels, blend_pixels, d128_0x0421); \
2417 sub_8x16b(blend_pixels, pixels_no_msb, blend_pixels); \
2418 bic_8x16b(fb_pixels_no_msb, framebuffer_pixels, d128_0x8000); \
2419 average_8x16b(blend_pixels, fb_pixels_no_msb, blend_pixels); \
2420} \
2421
2422#define blend_blocks_add() \
2423{ \
2424 vec_8x16u pixels_rb, pixels_g; \
2425 vec_8x16u fb_rb, fb_g; \
2426 \
2427 vec_8x16u d128_0x7C1F; \
2428 vec_8x16u d128_0x03E0; \
2429 \
2430 dup_8x16b(d128_0x7C1F, 0x7C1F); \
2431 dup_8x16b(d128_0x03E0, 0x03E0); \
2432 \
2433 and_8x16b(pixels_rb, pixels, d128_0x7C1F); \
2434 and_8x16b(pixels_g, pixels, d128_0x03E0); \
2435 \
2436 and_8x16b(fb_rb, framebuffer_pixels, d128_0x7C1F); \
2437 and_8x16b(fb_g, framebuffer_pixels, d128_0x03E0); \
2438 \
2439 add_8x16b(fb_rb, fb_rb, pixels_rb); \
2440 add_8x16b(fb_g, fb_g, pixels_g); \
2441 \
2442 min_16x8b(vector_cast(vec_16x8u, fb_rb), vector_cast(vec_16x8u, fb_rb), \
2443 vector_cast(vec_16x8u, d128_0x7C1F)); \
2444 min_8x16b(fb_g, fb_g, d128_0x03E0); \
2445 \
2446 or_8x16b(blend_pixels, fb_rb, fb_g); \
2447} \
2448
2449#define blend_blocks_subtract() \
2450{ \
2451 vec_8x16u pixels_rb, pixels_g; \
2452 vec_8x16u fb_rb, fb_g; \
2453 \
2454 vec_8x16u d128_0x7C1F; \
2455 vec_8x16u d128_0x03E0; \
2456 \
2457 dup_8x16b(d128_0x7C1F, 0x7C1F); \
2458 dup_8x16b(d128_0x03E0, 0x03E0); \
2459 \
2460 and_8x16b(pixels_rb, pixels, d128_0x7C1F); \
2461 and_8x16b(pixels_g, pixels, d128_0x03E0); \
2462 \
2463 and_8x16b(fb_rb, framebuffer_pixels, d128_0x7C1F); \
2464 and_8x16b(fb_g, framebuffer_pixels, d128_0x03E0); \
2465 \
2466 subs_16x8b(vector_cast(vec_16x8u, fb_rb), \
2467 vector_cast(vec_16x8u, fb_rb), vector_cast(vec_16x8u, pixels_rb)); \
2468 subs_8x16b(fb_g, fb_g, pixels_g); \
2469 \
2470 or_8x16b(blend_pixels, fb_rb, fb_g); \
2471} \
2472
2473#define blend_blocks_add_fourth() \
2474{ \
2475 vec_8x16u pixels_rb, pixels_g; \
2476 vec_8x16u pixels_fourth; \
2477 vec_8x16u fb_rb, fb_g; \
2478 \
2479 vec_8x16u d128_0x7C1F; \
2480 vec_8x16u d128_0x1C07; \
2481 vec_8x16u d128_0x03E0; \
2482 vec_8x16u d128_0x00E0; \
2483 \
2484 dup_8x16b(d128_0x7C1F, 0x7C1F); \
2485 dup_8x16b(d128_0x1C07, 0x1C07); \
2486 dup_8x16b(d128_0x03E0, 0x03E0); \
2487 dup_8x16b(d128_0x00E0, 0x00E0); \
2488 \
2489 shr_8x16b(pixels_fourth, vector_cast(vec_8x16s, pixels), 2); \
2490 \
2491 and_8x16b(fb_rb, framebuffer_pixels, d128_0x7C1F); \
2492 and_8x16b(fb_g, framebuffer_pixels, d128_0x03E0); \
2493 \
2494 and_8x16b(pixels_rb, pixels_fourth, d128_0x1C07); \
2495 and_8x16b(pixels_g, pixels_fourth, d128_0x00E0); \
2496 \
2497 add_8x16b(fb_rb, fb_rb, pixels_rb); \
2498 add_8x16b(fb_g, fb_g, pixels_g); \
2499 \
2500 min_16x8b(vector_cast(vec_16x8u, fb_rb), vector_cast(vec_16x8u, fb_rb), \
2501 vector_cast(vec_16x8u, d128_0x7C1F)); \
2502 min_8x16b(fb_g, fb_g, d128_0x03E0); \
2503 \
2504 or_8x16b(blend_pixels, fb_rb, fb_g); \
2505} \
2506
2507#define blend_blocks_blended_combine_textured() \
2508{ \
2509 vec_8x16u blend_mask; \
2510 cmpltz_8x16b(blend_mask, pixels); \
2511 \
2512 or_immediate_8x16b(blend_pixels, blend_pixels, 0x8000); \
2513 bif_8x16b(blend_pixels, pixels, blend_mask); \
2514} \
2515
2516#define blend_blocks_blended_combine_untextured() \
2517
2518
2519#define blend_blocks_body_blend(blend_mode, texturing) \
2520{ \
2521 blend_blocks_##blend_mode(); \
2522 blend_blocks_blended_combine_##texturing(); \
2523} \
2524
2525#define blend_blocks_body_average(texturing) \
2526 blend_blocks_body_blend(average, texturing) \
2527
2528#define blend_blocks_body_add(texturing) \
2529 blend_blocks_body_blend(add, texturing) \
2530
2531#define blend_blocks_body_subtract(texturing) \
2532 blend_blocks_body_blend(subtract, texturing) \
2533
2534#define blend_blocks_body_add_fourth(texturing) \
2535 blend_blocks_body_blend(add_fourth, texturing) \
2536
2537#define blend_blocks_body_unblended(texturing) \
2538 blend_pixels = pixels \
2539
2540
2541#define blend_blocks_builder(texturing, blend_mode, mask_evaluate) \
2542void \
2543 blend_blocks_##texturing##_##blend_mode##_##mask_evaluate(psx_gpu_struct \
2544 *psx_gpu) \
2545{ \
2546 block_struct *block = psx_gpu->blocks; \
2547 u32 num_blocks = psx_gpu->num_blocks; \
2548 vec_8x16u draw_mask; \
2549 vec_8x16u pixels; \
2550 vec_8x16u blend_pixels; \
2551 vec_8x16u framebuffer_pixels; \
2552 vec_8x16u msb_mask; \
2553 \
2554 u16 *fb_ptr; \
2555 \
2556 dup_8x16b(msb_mask, psx_gpu->mask_msb); \
2557 \
2558 while(num_blocks) \
2559 { \
2560 pixels = block->pixels; \
2561 draw_mask = block->draw_mask; \
2562 fb_ptr = block->fb_ptr; \
2563 \
2564 load_8x16b(framebuffer_pixels, fb_ptr); \
2565 \
2566 blend_blocks_mask_evaluate_##mask_evaluate(); \
2567 blend_blocks_body_##blend_mode(texturing); \
2568 \
2569 or_8x16b(blend_pixels, blend_pixels, msb_mask); \
2570 bif_8x16b(framebuffer_pixels, blend_pixels, draw_mask); \
2571 store_8x16b(framebuffer_pixels, fb_ptr); \
2572 \
2573 blend_blocks++; \
2574 num_blocks--; \
2575 block++; \
2576 } \
2577} \
2578
2579void blend_blocks_textured_average_off(psx_gpu_struct *psx_gpu);
2580void blend_blocks_textured_average_on(psx_gpu_struct *psx_gpu);
2581void blend_blocks_textured_add_off(psx_gpu_struct *psx_gpu);
2582void blend_blocks_textured_add_on(psx_gpu_struct *psx_gpu);
2583void blend_blocks_textured_subtract_off(psx_gpu_struct *psx_gpu);
2584void blend_blocks_textured_subtract_on(psx_gpu_struct *psx_gpu);
2585void blend_blocks_textured_add_fourth_off(psx_gpu_struct *psx_gpu);
2586void blend_blocks_textured_add_fourth_on(psx_gpu_struct *psx_gpu);
2587
2588void blend_blocks_untextured_average_off(psx_gpu_struct *psx_gpu);
2589void blend_blocks_untextured_average_on(psx_gpu_struct *psx_gpu);
2590void blend_blocks_untextured_add_off(psx_gpu_struct *psx_gpu);
2591void blend_blocks_untextured_add_on(psx_gpu_struct *psx_gpu);
2592void blend_blocks_untextured_subtract_off(psx_gpu_struct *psx_gpu);
2593void blend_blocks_untextured_subtract_on(psx_gpu_struct *psx_gpu);
2594void blend_blocks_untextured_add_fourth_off(psx_gpu_struct *psx_gpu);
2595void blend_blocks_untextured_add_fourth_on(psx_gpu_struct *psx_gpu);
2596
2597void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu);
2598void blend_blocks_textured_unblended_on(psx_gpu_struct *psx_gpu);
2599
2bbbb7af 2600#ifndef NEON_BUILD
75e28f62
E
2601
2602void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu)
2603{
2604}
2605
2606blend_blocks_builder(textured, average, off);
2607blend_blocks_builder(textured, average, on);
2608blend_blocks_builder(textured, add, off);
2609blend_blocks_builder(textured, add, on);
2610blend_blocks_builder(textured, subtract, off);
2611blend_blocks_builder(textured, subtract, on);
2612blend_blocks_builder(textured, add_fourth, off);
2613blend_blocks_builder(textured, add_fourth, on);
2614
2615blend_blocks_builder(untextured, average, off);
2616blend_blocks_builder(untextured, average, on);
2617blend_blocks_builder(untextured, add, off);
2618blend_blocks_builder(untextured, add, on);
2619blend_blocks_builder(untextured, subtract, off);
2620blend_blocks_builder(untextured, subtract, on);
2621blend_blocks_builder(untextured, add_fourth, off);
2622blend_blocks_builder(untextured, add_fourth, on);
2623
2624blend_blocks_builder(textured, unblended, on);
2625
2626#endif
2627
2628
2629#define vertex_swap(_a, _b) \
2630{ \
2631 vertex_struct *temp_vertex = _a; \
2632 _a = _b; \
2633 _b = temp_vertex; \
2634 triangle_winding ^= 1; \
2635} \
2636
2637
2638// Setup blocks parametric-variables:
2639// SHADE TEXTURE_MAP SWIZZLING
2640// 0 0 x
2641// 0 1 0
2642// 0 1 1
2643// 1 0 x
2644// 1 1 0
2645// 1 1 1
2646// 8 inputs, 6 combinations
2647
2648#define setup_blocks_switch_untextured_unshaded(dithering, target) \
2649 setup_blocks_unshaded_untextured_undithered_unswizzled_##target \
2650
2651#define setup_blocks_switch_untextured_shaded(dithering, target) \
2652 setup_blocks_shaded_untextured_##dithering##_unswizzled_##target \
2653
2654#define setup_blocks_switch_untextured(shading, texture_mode, dithering, \
2655 target) \
2656 setup_blocks_switch_untextured_##shading(dithering, target) \
2657
2658#define setup_blocks_switch_texture_mode_4bpp(shading) \
2659 setup_blocks_##shading##_textured_dithered_swizzled_indirect \
2660
2661#define setup_blocks_switch_texture_mode_8bpp(shading) \
2662 setup_blocks_##shading##_textured_dithered_swizzled_indirect \
2663
2664#define setup_blocks_switch_texture_mode_16bpp(shading) \
2665 setup_blocks_##shading##_textured_dithered_unswizzled_indirect \
2666
2667#define setup_blocks_switch_textured(shading, texture_mode, dithering, target) \
2668 setup_blocks_switch_texture_mode_##texture_mode(shading) \
2669
2670#define setup_blocks_switch_blended(shading, texturing, texture_mode, \
2671 dithering, mask_evaluate) \
2672 setup_blocks_switch_##texturing(shading, texture_mode, dithering, indirect) \
2673
2674#define setup_blocks_switch_unblended_on(shading, texturing, texture_mode, \
2675 dithering) \
2676 setup_blocks_switch_##texturing(shading, texture_mode, dithering, indirect) \
2677
2678#define setup_blocks_switch_unblended_off(shading, texturing, texture_mode, \
2679 dithering) \
2680 setup_blocks_switch_##texturing(shading, texture_mode, dithering, direct) \
2681
2682#define setup_blocks_switch_unblended(shading, texturing, texture_mode, \
2683 dithering, mask_evaluate) \
2684 setup_blocks_switch_unblended_##mask_evaluate(shading, texturing, \
2685 texture_mode, dithering) \
2686
2687#define setup_blocks_switch(shading, texturing, texture_mode, dithering, \
2688 blending, mask_evaluate) \
2689 setup_blocks_switch_##blending(shading, texturing, texture_mode, \
2690 dithering, mask_evaluate) \
2691
2692
2693// Texture blocks:
2694
2695#define texture_blocks_switch_untextured(texture_mode) \
2696 texture_blocks_untextured \
2697
2698#define texture_blocks_switch_textured(texture_mode) \
2699 texture_blocks_##texture_mode \
2700
2701#define texture_blocks_switch(texturing, texture_mode) \
2702 texture_blocks_switch_##texturing(texture_mode) \
2703
2704
2705// Shade blocks parametric-variables:
2706// SHADE TEXTURE_MAP MODULATE_TEXELS dither_mode
2707// 0 0 x x
2708// 0 1 0 0
2709// 0 1 0 1
2710// x 1 1 x
2711// 1 0 x 0
2712// 1 0 x 1
2713// 1 1 0 0
2714// 1 1 0 1
2715// 16 inputs, 8 combinations
2716
2717#define shade_blocks_switch_unshaded_untextured(modulation, dithering, target) \
2718 shade_blocks_unshaded_untextured_##target \
2719
2720#define shade_blocks_switch_unshaded_textured_unmodulated(dithering, target) \
2721 shade_blocks_textured_unmodulated_##target \
2722
2723#define shade_blocks_switch_unshaded_textured_modulated(dithering, target) \
2724 shade_blocks_unshaded_textured_modulated_##dithering##_##target \
2725
2726#define shade_blocks_switch_unshaded_textured(modulation, dithering, target) \
2727 shade_blocks_switch_unshaded_textured_##modulation(dithering, target) \
2728
2729#define shade_blocks_switch_unshaded(texturing, modulation, dithering, target) \
2730 shade_blocks_switch_unshaded_##texturing(modulation, dithering, target) \
2731
2732#define shade_blocks_switch_shaded_untextured(modulation, dithering, target) \
2733 shade_blocks_shaded_untextured \
2734
2735#define shade_blocks_switch_shaded_textured_unmodulated(dithering, target) \
2736 shade_blocks_textured_unmodulated_##target \
2737
2738#define shade_blocks_switch_shaded_textured_modulated(dithering, target) \
2739 shade_blocks_shaded_textured_modulated_##dithering##_##target \
2740
2741#define shade_blocks_switch_shaded_textured(modulation, dithering, target) \
2742 shade_blocks_switch_shaded_textured_##modulation(dithering, target) \
2743
2744#define shade_blocks_switch_shaded(texturing, modulation, dithering, target) \
2745 shade_blocks_switch_shaded_##texturing(modulation, dithering, target) \
2746
2747#define shade_blocks_switch_mask_off(shading, texturing, modulation, \
2748 dithering) \
2749 shade_blocks_switch_##shading(texturing, modulation, dithering, direct) \
2750
2751#define shade_blocks_switch_mask_on(shading, texturing, modulation, \
2752 dithering) \
2753 shade_blocks_switch_##shading(texturing, modulation, dithering, indirect) \
2754
2755#define shade_blocks_switch_blended(shading, texturing, modulation, dithering, \
2756 mask_evaluate) \
2757 shade_blocks_switch_##shading(texturing, modulation, dithering, indirect) \
2758
2759#define shade_blocks_switch_unblended(shading, texturing, modulation, \
2760 dithering, mask_evaluate) \
2761 shade_blocks_switch_mask_##mask_evaluate(shading, texturing, modulation, \
2762 dithering) \
2763
2764#define shade_blocks_switch(shading, texturing, modulation, dithering, \
2765 blending, mask_evaluate) \
2766 shade_blocks_switch_##blending(shading, texturing, modulation, dithering, \
2767 mask_evaluate) \
2768
2769
2770// Blend blocks parametric-variables:
2771// TEXTURE_MAP BLEND BM_A BM_B mask_evaluate
2772// x 0 x x 0
2773// x 0 x x 1
2774// 0 1 0 0 0
2775// 0 1 0 0 1
2776// 0 1 0 1 0
2777// 0 1 0 1 1
2778// 0 1 1 0 0
2779// 0 1 1 0 1
2780// 0 1 1 1 0
2781// 0 1 1 1 1
2782// 1 1 0 0 0
2783// 1 1 0 0 1
2784// 1 1 0 1 0
2785// 1 1 0 1 1
2786// 1 1 1 0 0
2787// 1 1 1 0 1
2788// 1 1 1 1 0
2789// 1 1 1 1 1
2790// 32 inputs, 18 combinations
2791
2792#define blend_blocks_switch_unblended(texturing, blend_mode, mask_evaluate) \
2793 blend_blocks_textured_unblended_##mask_evaluate \
2794
2795#define blend_blocks_switch_blended(texturing, blend_mode, mask_evaluate) \
2796 blend_blocks_##texturing##_##blend_mode##_##mask_evaluate \
2797
2798#define blend_blocks_switch(texturing, blending, blend_mode, mask_evaluate) \
2799 blend_blocks_switch_##blending(texturing, blend_mode, mask_evaluate) \
2800
2801
2802#define render_blocks_switch_block_modulation(texture_mode, blend_mode, \
2803 mask_evaluate, shading, dithering, texturing, blending, modulation) \
2804{ \
2805 setup_blocks_switch(shading, texturing, texture_mode, dithering, blending, \
2806 mask_evaluate), \
2807 texture_blocks_switch(texturing, texture_mode), \
2808 shade_blocks_switch(shading, texturing, modulation, dithering, blending, \
2809 mask_evaluate), \
2810 blend_blocks_switch(texturing, blending, blend_mode, mask_evaluate) \
2811} \
2812
2813#define render_blocks_switch_block_blending(texture_mode, blend_mode, \
2814 mask_evaluate, shading, dithering, texturing, blending) \
2815 render_blocks_switch_block_modulation(texture_mode, blend_mode, \
2816 mask_evaluate, shading, dithering, texturing, blending, modulated), \
2817 render_blocks_switch_block_modulation(texture_mode, blend_mode, \
2818 mask_evaluate, shading, dithering, texturing, blending, unmodulated) \
2819
2820#define render_blocks_switch_block_texturing(texture_mode, blend_mode, \
2821 mask_evaluate, shading, dithering, texturing) \
2822 render_blocks_switch_block_blending(texture_mode, blend_mode, \
2823 mask_evaluate, shading, dithering, texturing, unblended), \
2824 render_blocks_switch_block_blending(texture_mode, blend_mode, \
2825 mask_evaluate, shading, dithering, texturing, blended) \
2826
2827#define render_blocks_switch_block_dithering(texture_mode, blend_mode, \
2828 mask_evaluate, shading, dithering) \
2829 render_blocks_switch_block_texturing(texture_mode, blend_mode, \
2830 mask_evaluate, shading, dithering, untextured), \
2831 render_blocks_switch_block_texturing(texture_mode, blend_mode, \
2832 mask_evaluate, shading, dithering, textured) \
2833
2834#define render_blocks_switch_block_shading(texture_mode, blend_mode, \
2835 mask_evaluate, shading) \
2836 render_blocks_switch_block_dithering(texture_mode, blend_mode, \
2837 mask_evaluate, shading, undithered), \
2838 render_blocks_switch_block_dithering(texture_mode, blend_mode, \
2839 mask_evaluate, shading, dithered) \
2840
2841#define render_blocks_switch_block_mask_evaluate(texture_mode, blend_mode, \
2842 mask_evaluate) \
2843 render_blocks_switch_block_shading(texture_mode, blend_mode, mask_evaluate, \
2844 unshaded), \
2845 render_blocks_switch_block_shading(texture_mode, blend_mode, mask_evaluate, \
2846 shaded) \
2847
2848#define render_blocks_switch_block_blend_mode(texture_mode, blend_mode) \
2849 render_blocks_switch_block_mask_evaluate(texture_mode, blend_mode, off), \
2850 render_blocks_switch_block_mask_evaluate(texture_mode, blend_mode, on) \
2851
2852#define render_blocks_switch_block_texture_mode(texture_mode) \
2853 render_blocks_switch_block_blend_mode(texture_mode, average), \
2854 render_blocks_switch_block_blend_mode(texture_mode, add), \
2855 render_blocks_switch_block_blend_mode(texture_mode, subtract), \
2856 render_blocks_switch_block_blend_mode(texture_mode, add_fourth) \
2857
2858#define render_blocks_switch_block() \
2859 render_blocks_switch_block_texture_mode(4bpp), \
2860 render_blocks_switch_block_texture_mode(8bpp), \
2861 render_blocks_switch_block_texture_mode(16bpp), \
2862 render_blocks_switch_block_texture_mode(4bpp) \
2863
2864
2865render_block_handler_struct render_triangle_block_handlers[] =
2866{
2867 render_blocks_switch_block()
2868};
2869
2870#undef render_blocks_switch_block_modulation
2871
2872#define render_blocks_switch_block_modulation(texture_mode, blend_mode, \
2873 mask_evaluate, shading, dithering, texturing, blending, modulation) \
2874 "render flags:\n" \
2875 "texture mode: " #texture_mode "\n" \
2876 "blend mode: " #blend_mode "\n" \
2877 "mask evaluation: " #mask_evaluate "\n" \
2878 #shading "\n" \
2879 #dithering "\n" \
2880 #texturing "\n" \
2881 #blending "\n" \
2882 #modulation "\n" \
2883
2884char *render_block_flag_strings[] =
2885{
2886 render_blocks_switch_block()
2887};
2888
2889
2890#define triangle_y_direction_up 1
2891#define triangle_y_direction_flat 2
2892#define triangle_y_direction_down 0
2893
2894#define triangle_winding_positive 0
2895#define triangle_winding_negative 1
2896
2897#define triangle_set_direction(direction_variable, value) \
2898 u32 direction_variable = (u32)(value) >> 31; \
2899 if(value == 0) \
2900 direction_variable = 2 \
2901
2902#define triangle_case(direction_a, direction_b, direction_c, winding) \
2903 case (triangle_y_direction_##direction_a | \
2904 (triangle_y_direction_##direction_b << 2) | \
2905 (triangle_y_direction_##direction_c << 4) | \
2906 (triangle_winding_##winding << 6)) \
2907
75e28f62
E
2908void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
2909 u32 flags)
2910{
2911 s32 y_top, y_bottom;
2912 s32 triangle_area;
2913 u32 triangle_winding = 0;
2914
2915 vertex_struct *a = &(vertexes[0]);
2916 vertex_struct *b = &(vertexes[1]);
2917 vertex_struct *c = &(vertexes[2]);
2918
2919 triangle_area = triangle_signed_area_x2(a->x, a->y, b->x, b->y, c->x, c->y);
2920
3867c6ef 2921#ifdef PROFILE
75e28f62 2922 triangles++;
3867c6ef 2923#endif
75e28f62
E
2924
2925 if(triangle_area == 0)
2926 {
3867c6ef 2927#ifdef PROFILE
75e28f62 2928 trivial_rejects++;
3867c6ef 2929#endif
75e28f62
E
2930 return;
2931 }
2932
2933 if(b->y < a->y)
2934 vertex_swap(a, b);
2935
2936 if(c->y < b->y)
2937 {
2938 vertex_swap(b, c);
2939
2940 if(b->y < a->y)
2941 vertex_swap(a, b);
2942 }
2943
2944 y_bottom = c->y;
2945 y_top = a->y;
2946
2947 if((y_bottom - y_top) >= 512)
2948 {
3867c6ef 2949#ifdef PROFILE
75e28f62 2950 trivial_rejects++;
3867c6ef 2951#endif
75e28f62
E
2952 return;
2953 }
2954
2955 if(triangle_area < 0)
2956 {
2957 triangle_area = -triangle_area;
2958 triangle_winding ^= 1;
2959 vertex_swap(a, c);
2960 }
2961
2962 if(b->x < a->x)
2963 vertex_swap(a, b);
2964
2965 if(c->x < b->x)
2966 {
2967 vertex_swap(b, c);
2968
2969 if(b->x < a->x)
2970 vertex_swap(a, b);
2971 }
2972
af044cbf 2973 if((c->x - psx_gpu->offset_x) >= 1024 || (c->x - a->x) >= 1024)
75e28f62 2974 {
3867c6ef 2975#ifdef PROFILE
75e28f62 2976 trivial_rejects++;
3867c6ef 2977#endif
75e28f62
E
2978 return;
2979 }
2980
2981 if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x,
2982 y_bottom) == 0)
2983 {
3867c6ef 2984#ifdef PROFILE
75e28f62 2985 trivial_rejects++;
3867c6ef 2986#endif
75e28f62
E
2987 return;
2988 }
2989
2990 psx_gpu->num_spans = 0;
2991 psx_gpu->triangle_area = triangle_area;
2992 psx_gpu->triangle_winding = triangle_winding;
2993
2994 s32 y_delta_a = b->y - a->y;
2995 s32 y_delta_b = c->y - b->y;
2996 s32 y_delta_c = c->y - a->y;
2997
2998 triangle_set_direction(y_direction_a, y_delta_a);
2999 triangle_set_direction(y_direction_b, y_delta_b);
3000 triangle_set_direction(y_direction_c, y_delta_c);
3001
3002 compute_all_gradients(psx_gpu, a, b, c);
3003
3004 switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) |
3005 (triangle_winding << 6))
3006 {
3007 triangle_case(up, up, up, negative):
3008 triangle_case(up, up, flat, negative):
3009 triangle_case(up, up, down, negative):
3010 setup_spans_up_right(psx_gpu, a, b, c);
3011 break;
3012
3013 triangle_case(flat, up, up, negative):
3014 triangle_case(flat, up, flat, negative):
3015 triangle_case(flat, up, down, negative):
3016 setup_spans_up_a(psx_gpu, a, b, c);
3017 break;
3018
3019 triangle_case(down, up, up, negative):
3020 setup_spans_up_down(psx_gpu, a, c, b);
3021 break;
3022
3023 triangle_case(down, up, flat, negative):
3024 setup_spans_down_a(psx_gpu, a, c, b);
3025 break;
3026
3027 triangle_case(down, up, down, negative):
3028 setup_spans_down_right(psx_gpu, a, c, b);
3029 break;
3030
3031 triangle_case(down, flat, up, negative):
3032 triangle_case(down, flat, flat, negative):
3033 triangle_case(down, flat, down, negative):
3034 setup_spans_down_b(psx_gpu, a, b, c);
3035 break;
3036
3037 triangle_case(down, down, up, negative):
3038 triangle_case(down, down, flat, negative):
3039 triangle_case(down, down, down, negative):
3040 setup_spans_down_left(psx_gpu, a, b, c);
3041 break;
3042
3043 triangle_case(up, up, up, positive):
3044 triangle_case(up, up, flat, positive):
3045 triangle_case(up, up, down, positive):
3046 setup_spans_up_left(psx_gpu, a, b, c);
3047 break;
3048
3049 triangle_case(up, flat, up, positive):
3050 triangle_case(up, flat, flat, positive):
3051 triangle_case(up, flat, down, positive):
3052 setup_spans_up_b(psx_gpu, a, b, c);
3053 break;
3054
3055 triangle_case(up, down, up, positive):
3056 setup_spans_up_right(psx_gpu, a, c, b);
3057 break;
3058
3059 triangle_case(up, down, flat, positive):
3060 setup_spans_up_a(psx_gpu, a, c, b);
3061 break;
3062
3063 triangle_case(up, down, down, positive):
3064 setup_spans_up_down(psx_gpu, a, b, c);
3065 break;
3066
3067 triangle_case(flat, down, up, positive):
3068 triangle_case(flat, down, flat, positive):
3069 triangle_case(flat, down, down, positive):
3070 setup_spans_down_a(psx_gpu, a, b, c);
3071 break;
3072
3073 triangle_case(down, down, up, positive):
3074 triangle_case(down, down, flat, positive):
3075 triangle_case(down, down, down, positive):
3076 setup_spans_down_right(psx_gpu, a, b, c);
3077 break;
3078 }
3079
3867c6ef 3080#ifdef PROFILE
75e28f62 3081 spans += psx_gpu->num_spans;
3867c6ef 3082#endif
75e28f62 3083
69b09c0d
E
3084 if(psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED)
3085 {
3086 u32 i;
3087
3088 if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD)
3089 {
3090 for(i = 0; i < psx_gpu->num_spans; i++)
3091 {
3092 if((psx_gpu->span_edge_data[i].y & 1) == 0)
3093 psx_gpu->span_edge_data[i].num_blocks = 0;
3094 }
3095 }
3096 else
3097 {
3098 for(i = 0; i < psx_gpu->num_spans; i++)
3099 {
3100 if(psx_gpu->span_edge_data[i].y & 1)
3101 psx_gpu->span_edge_data[i].num_blocks = 0;
3102 }
3103 }
3104 }
3105
75e28f62
E
3106 u32 render_state = flags &
3107 (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND |
3108 RENDER_FLAGS_TEXTURE_MAP | RENDER_FLAGS_SHADE);
3109 render_state |= psx_gpu->render_state_base;
3110
3111 if((psx_gpu->render_state != render_state) ||
3112 (psx_gpu->primitive_type != PRIMITIVE_TYPE_TRIANGLE))
3113 {
3114 psx_gpu->render_state = render_state;
3115 flush_render_block_buffer(psx_gpu);
3867c6ef 3116#ifdef PROFILE
75e28f62 3117 state_changes++;
3867c6ef 3118#endif
75e28f62
E
3119 }
3120
3121 psx_gpu->primitive_type = PRIMITIVE_TYPE_TRIANGLE;
3122
3123 psx_gpu->render_block_handler =
3124 &(render_triangle_block_handlers[render_state]);
3125 ((setup_blocks_function_type *)psx_gpu->render_block_handler->setup_blocks)
3126 (psx_gpu);
3127}
3128
3129
3130void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu);
3131
2bbbb7af 3132#ifndef NEON_BUILD
75e28f62
E
3133
3134void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
3135{
3136 block_struct *block = psx_gpu->blocks;
3137 u32 num_blocks = psx_gpu->num_blocks;
3138
3139 vec_8x16u texels;
3140 vec_8x8u texel_indexes;
3141
3142 u16 *clut_ptr = psx_gpu->clut_ptr;
3143 u32 i;
3144
3145 while(num_blocks)
3146 {
3147 texel_indexes = block->r;
3148
3149 for(i = 0; i < 8; i++)
3150 {
3151 texels.e[i] = clut_ptr[texel_indexes.e[i]];
3152 }
3153
3154 block->texels = texels;
3155
3156 num_blocks--;
3157 block++;
3158 }
3159}
3160
3161#endif
3162
3163
3164#define setup_sprite_tiled_initialize_4bpp() \
3165 u16 *clut_ptr = psx_gpu->clut_ptr; \
3166 vec_8x16u clut_a, clut_b; \
3167 vec_16x8u clut_low, clut_high; \
3168 \
3169 load_8x16b(clut_a, clut_ptr); \
3170 load_8x16b(clut_b, clut_ptr + 8); \
3171 unzip_16x8b(clut_low, clut_high, clut_a, clut_b); \
3172 \
3173 if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_4bpp_mask) \
3174 update_texture_4bpp_cache(psx_gpu) \
3175
3176#define setup_sprite_tiled_initialize_8bpp() \
3177 if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask) \
3178 update_texture_8bpp_cache(psx_gpu) \
3179
3180
3181#define setup_sprite_tile_fetch_texel_block_8bpp(offset) \
3182 texture_block_ptr = psx_gpu->texture_page_ptr + \
3183 ((texture_offset + offset) & texture_mask); \
3184 \
3185 load_64b(texels, texture_block_ptr) \
3186
3187
3188#define setup_sprite_tile_setup_block_yes(side, offset, texture_mode) \
3189
3190#define setup_sprite_tile_setup_block_no(side, offset, texture_mode) \
3191
3192#define setup_sprite_tile_add_blocks(tile_num_blocks) \
3193 num_blocks += tile_num_blocks; \
3194 sprite_blocks += tile_num_blocks; \
3195 \
3196 if(num_blocks > MAX_BLOCKS) \
3197 { \
3198 flush_render_block_buffer(psx_gpu); \
3199 num_blocks = tile_num_blocks; \
3200 block = psx_gpu->blocks; \
3201 } \
3202
3203#define setup_sprite_tile_full_4bpp(edge) \
3204{ \
3205 vec_8x8u texels_low, texels_high; \
3206 vec_8x16u pixels; \
3207 setup_sprite_tile_add_blocks(sub_tile_height * 2); \
3208 \
3209 while(sub_tile_height) \
3210 { \
3211 setup_sprite_tile_fetch_texel_block_8bpp(0); \
3212 tbl_16(texels_low, texels, clut_low); \
3213 tbl_16(texels_high, texels, clut_high); \
3214 zip_8x16b(pixels, texels_low, texels_high); \
3215 \
3216 block->texels = pixels; \
3217 block->draw_mask_bits = left_mask_bits; \
3218 block->fb_ptr = fb_ptr; \
3219 block++; \
3220 \
3221 setup_sprite_tile_fetch_texel_block_8bpp(8); \
3222 tbl_16(texels_low, texels, clut_low); \
3223 tbl_16(texels_high, texels, clut_high); \
3224 zip_8x16b(pixels, texels_low, texels_high); \
3225 \
3226 block->texels = pixels; \
3227 block->draw_mask_bits = right_mask_bits; \
3228 block->fb_ptr = fb_ptr + 8; \
3229 block++; \
3230 \
3231 fb_ptr += 1024; \
3232 texture_offset += 0x10; \
3233 sub_tile_height--; \
3234 } \
3235 texture_offset += 0xF00; \
3236 psx_gpu->num_blocks = num_blocks; \
3237} \
3238
3239#define setup_sprite_tile_half_4bpp(edge) \
3240{ \
3241 vec_8x8u texels_low, texels_high; \
3242 vec_8x16u pixels; \
3243 setup_sprite_tile_add_blocks(sub_tile_height); \
3244 \
3245 while(sub_tile_height) \
3246 { \
3247 setup_sprite_tile_fetch_texel_block_8bpp(0); \
3248 tbl_16(texels_low, texels, clut_low); \
3249 tbl_16(texels_high, texels, clut_high); \
3250 zip_8x16b(pixels, texels_low, texels_high); \
3251 \
3252 block->texels = pixels; \
3253 block->draw_mask_bits = edge##_mask_bits; \
3254 block->fb_ptr = fb_ptr; \
3255 block++; \
3256 \
3257 fb_ptr += 1024; \
3258 texture_offset += 0x10; \
3259 sub_tile_height--; \
3260 } \
3261 texture_offset += 0xF00; \
3262 psx_gpu->num_blocks = num_blocks; \
3263} \
3264
3265
3266#define setup_sprite_tile_full_8bpp(edge) \
3267{ \
3268 setup_sprite_tile_add_blocks(sub_tile_height * 2); \
3269 \
3270 while(sub_tile_height) \
3271 { \
3272 setup_sprite_tile_fetch_texel_block_8bpp(0); \
3273 block->r = texels; \
3274 block->draw_mask_bits = left_mask_bits; \
3275 block->fb_ptr = fb_ptr; \
3276 block++; \
3277 \
3278 setup_sprite_tile_fetch_texel_block_8bpp(8); \
3279 block->r = texels; \
3280 block->draw_mask_bits = right_mask_bits; \
3281 block->fb_ptr = fb_ptr + 8; \
3282 block++; \
3283 \
3284 fb_ptr += 1024; \
3285 texture_offset += 0x10; \
3286 sub_tile_height--; \
3287 } \
3288 texture_offset += 0xF00; \
3289 psx_gpu->num_blocks = num_blocks; \
3290} \
3291
3292#define setup_sprite_tile_half_8bpp(edge) \
3293{ \
3294 setup_sprite_tile_add_blocks(sub_tile_height * 2); \
3295 \
3296 while(sub_tile_height) \
3297 { \
3298 setup_sprite_tile_fetch_texel_block_8bpp(0); \
3299 block->r = texels; \
3300 block->draw_mask_bits = edge##_mask_bits; \
3301 block->fb_ptr = fb_ptr; \
3302 block++; \
3303 \
3304 fb_ptr += 1024; \
3305 texture_offset += 0x10; \
3306 sub_tile_height--; \
3307 } \
3308 texture_offset += 0xF00; \
3309 psx_gpu->num_blocks = num_blocks; \
3310} \
3311
3312
3313#define setup_sprite_tile_column_edge_pr