frontend: update libpicofe, fix missed callbacks
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / psx_gpu.c
CommitLineData
75e28f62
E
1/*
2 * Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of
7 * the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
14
15#include <stdio.h>
16#include <stdlib.h>
77e1e479 17#include <stdint.h>
2da2fc76 18#include <stddef.h>
75e28f62 19#include <string.h>
2d658c89 20#include <assert.h>
75e28f62
E
21
22#include "common.h"
a4021361 23#ifndef NEON_BUILD
24#include "vector_ops.h"
25#endif
37725e8c 26#include "psx_gpu_simd.h"
2da2fc76 27#include "psx_gpu_offsets.h"
75e28f62 28
aafce833 29#if 0
30void dump_r_d(const char *name, void *dump);
31void dump_r_q(const char *name, void *dump);
32#define dumprd(n) dump_r_d(#n, n.e)
33#define dumprq(n) dump_r_q(#n, n.e)
34#endif
35
75e28f62
E
36u32 span_pixels = 0;
37u32 span_pixel_blocks = 0;
75e28f62
E
38u32 spans = 0;
39u32 triangles = 0;
40u32 sprites = 0;
41u32 sprites_4bpp = 0;
42u32 sprites_8bpp = 0;
43u32 sprites_16bpp = 0;
44u32 sprite_blocks = 0;
45u32 sprites_untextured = 0;
46u32 lines = 0;
47u32 trivial_rejects = 0;
48u32 texels_4bpp = 0;
49u32 texels_8bpp = 0;
50u32 texels_16bpp = 0;
51u32 texel_blocks_4bpp = 0;
52u32 texel_blocks_8bpp = 0;
53u32 texel_blocks_16bpp = 0;
54u32 texel_blocks_untextured = 0;
55u32 blend_blocks = 0;
75e28f62
E
56u32 render_buffer_flushes = 0;
57u32 state_changes = 0;
58u32 left_split_triangles = 0;
59u32 flat_triangles = 0;
60u32 clipped_triangles = 0;
61u32 zero_block_spans = 0;
62u32 texture_cache_loads = 0;
3867c6ef 63u32 false_modulated_blocks = 0;
75e28f62 64
a2cb152a 65#define stats_add(stat, count) // stat += count
66
c1817bd9 67/* double size for enhancement */
68u32 reciprocal_table[512 * 2];
75e28f62
E
69
70
71typedef s32 fixed_type;
72
73#define EDGE_STEP_BITS 32
74#define FIXED_BITS 12
75
76#define fixed_center(value) \
77 ((((fixed_type)(value)) << FIXED_BITS) + (1 << (FIXED_BITS - 1))) \
78
79#define int_to_fixed(value) \
80 (((fixed_type)(value)) << FIXED_BITS) \
81
82#define fixed_to_int(value) \
83 ((value) >> FIXED_BITS) \
84
85#define fixed_to_double(value) \
86 ((value) / (double)(1 << FIXED_BITS)) \
87
88#define double_to_fixed(value) \
89 (fixed_type)(((value) * (double)(1 << FIXED_BITS))) \
90
91typedef void (setup_blocks_function_type)(psx_gpu_struct *psx_gpu);
92typedef void (texture_blocks_function_type)(psx_gpu_struct *psx_gpu);
93typedef void (shade_blocks_function_type)(psx_gpu_struct *psx_gpu);
94typedef void (blend_blocks_function_type)(psx_gpu_struct *psx_gpu);
95
96typedef void (setup_sprite_function_type)(psx_gpu_struct *psx_gpu, s32 x,
97 s32 y, s32 u, s32 v, s32 width, s32 height, u32 color);
98
99struct render_block_handler_struct
100{
101 void *setup_blocks;
102 texture_blocks_function_type *texture_blocks;
103 shade_blocks_function_type *shade_blocks;
104 blend_blocks_function_type *blend_blocks;
105};
106
2bbbb7af 107#ifndef NEON_BUILD
75e28f62
E
108
109u32 fixed_reciprocal(u32 denominator, u32 *_shift)
110{
111 u32 shift = __builtin_clz(denominator);
112 u32 denominator_normalized = denominator << shift;
113
114 double numerator = (1ULL << 62) + denominator_normalized;
115 double numerator_b;
116
117 double denominator_normalized_dp_b;
118 u64 denominator_normalized_dp_u64;
119
120 u32 reciprocal;
121 double reciprocal_dp;
122
123 u64 numerator_u64 = (denominator_normalized >> 10) |
124 ((u64)(62 + 1023) << 52);
125 *((u64 *)(&numerator_b)) = numerator_u64;
126
127 denominator_normalized_dp_u64 =
128 (u64)(denominator_normalized << 21) |
129 ((u64)((denominator_normalized >> 11) + ((1022 + 31) << 20)) << 32);
130 *((u64 *)(&denominator_normalized_dp_b)) = denominator_normalized_dp_u64;
131
132 // Implement with a DP divide
133 reciprocal_dp = numerator / denominator_normalized_dp_b;
134 reciprocal = reciprocal_dp;
135
136 if(reciprocal == 0x80000001)
137 reciprocal = 0x80000000;
138
139 *_shift = 62 - shift;
140 return reciprocal;
141}
142
143double reciprocal_estimate(double a)
144{
145 int q, s;
146 double r;
147
148 q = (int)(a * 512.0);
149 /* a in units of 1/512 rounded down */
150 r = 1.0 / (((double)q + 0.5) / 512.0); /* reciprocal r */
151 s = (int)(256.0 * r + 0.5);
152
153 /* r in units of 1/256 rounded to nearest */
154
155 return (double)s / 256.0;
156}
157
158u32 reciprocal_estimate_u32(u32 value)
159{
160 u64 dp_value_u64;
161 volatile double dp_value;
162 volatile u64 *dp_value_ptr = (volatile u64 *)&dp_value;
163
164 if((value >> 31) == 0)
165 return 0xFFFFFFFF;
166
167 dp_value_u64 = (0x3FEULL << (31 + 21)) | ((u64)(value & 0x7FFFFFFF) << 21);
168
169 *dp_value_ptr = dp_value_u64;
170
171 dp_value = reciprocal_estimate(dp_value);
172 dp_value_u64 = *dp_value_ptr;
173
174 return (0x80000000 | ((dp_value_u64 >> 21) & 0x7FFFFFFF));
175}
176
177u32 fixed_reciprocal_nr(u32 value, u32 *_shift)
178{
179 u32 shift = __builtin_clz(value);
180 u32 value_normalized = value << shift;
181
182 *_shift = 62 - shift;
183
184 value_normalized -= 2;
185
186 u32 reciprocal_normalized = reciprocal_estimate_u32(value_normalized) >> 1;
187
188 u32 temp = -(((u64)value_normalized * (u32)reciprocal_normalized) >> 31);
189 reciprocal_normalized = (((u64)reciprocal_normalized * temp) >> 31);
190 temp = -(((u64)value_normalized * (u32)reciprocal_normalized) >> 31);
191 reciprocal_normalized = (((u64)reciprocal_normalized * temp) >> 31);
192 temp = -(((u64)value_normalized * (u32)reciprocal_normalized) >> 31);
193 reciprocal_normalized = (((u64)reciprocal_normalized * temp) >> 31);
194
195 return reciprocal_normalized;
196}
197
198#endif
199
200
201s32 triangle_signed_area_x2(s32 x0, s32 y0, s32 x1, s32 y1, s32 x2, s32 y2)
202{
203 return ((x1 - x0) * (y2 - y1)) - ((x2 - x1) * (y1 - y0));
204}
205
206u32 texture_region_mask(s32 x1, s32 y1, s32 x2, s32 y2)
207{
208 s32 coverage_x, coverage_y;
209
210 u32 mask_up_left;
211 u32 mask_down_right;
212
213 coverage_x = x2 >> 6;
214 coverage_y = y2 >> 8;
215
216 if(coverage_x < 0)
217 coverage_x = 0;
218
219 if(coverage_x > 31)
220 coverage_x = 31;
221
222 mask_down_right = ~(0xFFFFFFFF << (coverage_x + 1)) & 0xFFFF;
223
224 if(coverage_y >= 1)
225 mask_down_right |= mask_down_right << 16;
226
227 coverage_x = x1 >> 6;
228
f707f14b 229 mask_up_left = 0xFFFF0000;
230 if(coverage_x > 0)
231 mask_up_left <<= coverage_x;
75e28f62
E
232
233 coverage_y = y1 >> 8;
234 if(coverage_y <= 0)
235 mask_up_left |= mask_up_left >> 16;
236
237 return mask_up_left & mask_down_right;
238}
239
240u32 invalidate_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1,
241 u32 x2, u32 y2)
242{
243 u32 mask = texture_region_mask(x1, y1, x2, y2);
244
245 psx_gpu->dirty_textures_4bpp_mask |= mask;
246 psx_gpu->dirty_textures_8bpp_mask |= mask;
247 psx_gpu->dirty_textures_8bpp_alternate_mask |= mask;
248
249 return mask;
250}
251
252u32 invalidate_texture_cache_region_viewport(psx_gpu_struct *psx_gpu, u32 x1,
253 u32 y1, u32 x2, u32 y2)
254{
255 u32 mask = texture_region_mask(x1, y1, x2, y2) &
256 psx_gpu->viewport_mask;
3867c6ef 257
75e28f62
E
258 psx_gpu->dirty_textures_4bpp_mask |= mask;
259 psx_gpu->dirty_textures_8bpp_mask |= mask;
260 psx_gpu->dirty_textures_8bpp_alternate_mask |= mask;
261
262 return mask;
263}
264
47c15995 265static void update_texture_cache_region_(psx_gpu_struct *psx_gpu,
266 u32 x1, u32 y1, u32 x2, u32 y2)
05740673 267{
268 u32 mask = texture_region_mask(x1, y1, x2, y2);
269 u32 texture_page;
270 u8 *texture_page_ptr;
271 u16 *vram_ptr;
272 u32 texel_block;
273 u32 sub_x, sub_y;
274
275 psx_gpu->dirty_textures_8bpp_mask |= mask;
276 psx_gpu->dirty_textures_8bpp_alternate_mask |= mask;
277
278 if ((psx_gpu->dirty_textures_4bpp_mask & mask) == 0 &&
279 (x1 & 3) == 0 && (y1 & 15) == 0 && x2 - x1 < 4 && y2 - y1 < 16)
280 {
281 texture_page = ((x1 / 64) & 15) + (y1 / 256) * 16;
282 texture_page_ptr = psx_gpu->texture_4bpp_cache[texture_page];
283 texture_page_ptr += (x1 / 4 & 15) * 16*16 + (y1 / 16 & 15) * 16*16*16;
284 vram_ptr = psx_gpu->vram_ptr + x1 + y1 * 1024;
285 sub_x = 4;
286 sub_y = 16;
287
288 while(sub_y)
289 {
290 while(sub_x)
291 {
292 texel_block = *vram_ptr;
293
294 texture_page_ptr[0] = texel_block & 0xF;
295 texture_page_ptr[1] = (texel_block >> 4) & 0xF;
296 texture_page_ptr[2] = (texel_block >> 8) & 0xF;
297 texture_page_ptr[3] = texel_block >> 12;
298
299 vram_ptr++;
300 texture_page_ptr += 4;
301
302 sub_x--;
303 }
304
305 vram_ptr -= 4;
306 sub_x = 4;
307
308 sub_y--;
309 vram_ptr += 1024;
310 }
311 }
312 else
313 {
314 psx_gpu->dirty_textures_4bpp_mask |= mask;
315 }
316}
75e28f62 317
47c15995 318void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1,
319 u32 x2, u32 y2)
320{
321 s32 w = x2 - x1;
322 do
323 {
324 x2 = x1 + w;
325 if (x2 > 1023)
326 x2 = 1023;
327 update_texture_cache_region_(psx_gpu, x1, y1, x2, y2);
328 w -= x2 - x1;
329 x1 = 0;
330 }
331 while (unlikely(w > 0));
332}
333
2bbbb7af 334#ifndef NEON_BUILD
75e28f62
E
335
336void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu)
337{
338 u32 current_texture_page = psx_gpu->current_texture_page;
3867c6ef 339 u8 *texture_page_ptr = psx_gpu->texture_page_base;
75e28f62
E
340 u16 *vram_ptr = psx_gpu->vram_ptr;
341
342 u32 texel_block;
343 u32 tile_x, tile_y;
344 u32 sub_x, sub_y;
345
346 vram_ptr += (current_texture_page >> 4) * 256 * 1024;
347 vram_ptr += (current_texture_page & 0xF) * 64;
348
349 texture_cache_loads++;
350
351 tile_y = 16;
352 tile_x = 16;
353 sub_x = 4;
354 sub_y = 16;
355
356 psx_gpu->dirty_textures_4bpp_mask &= ~(psx_gpu->current_texture_mask);
357
358 while(tile_y)
359 {
360 while(tile_x)
361 {
362 while(sub_y)
363 {
364 while(sub_x)
365 {
366 texel_block = *vram_ptr;
b7ed0632 367
75e28f62
E
368 texture_page_ptr[0] = texel_block & 0xF;
369 texture_page_ptr[1] = (texel_block >> 4) & 0xF;
370 texture_page_ptr[2] = (texel_block >> 8) & 0xF;
371 texture_page_ptr[3] = texel_block >> 12;
372
373 vram_ptr++;
374 texture_page_ptr += 4;
375
376 sub_x--;
377 }
378
379 vram_ptr -= 4;
380 sub_x = 4;
381
382 sub_y--;
383 vram_ptr += 1024;
384 }
385
386 sub_y = 16;
387
388 vram_ptr -= (1024 * 16) - 4;
389 tile_x--;
390 }
391
392 tile_x = 16;
393
394 vram_ptr += (16 * 1024) - (4 * 16);
395 tile_y--;
396 }
397}
398
399void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu,
400 u32 texture_page)
401{
3867c6ef 402 u16 *texture_page_ptr = psx_gpu->texture_page_base;
75e28f62
E
403 u16 *vram_ptr = psx_gpu->vram_ptr;
404
405 u32 tile_x, tile_y;
406 u32 sub_y;
407
408 vec_8x16u texels;
409
410 texture_cache_loads++;
411
412 vram_ptr += (texture_page >> 4) * 256 * 1024;
413 vram_ptr += (texture_page & 0xF) * 64;
414
415 if((texture_page ^ psx_gpu->current_texture_page) & 0x1)
416 texture_page_ptr += (8 * 16) * 8;
417
418 tile_x = 8;
419 tile_y = 16;
420
421 sub_y = 16;
422
423 while(tile_y)
424 {
425 while(tile_x)
426 {
427 while(sub_y)
428 {
429 load_128b(texels, vram_ptr);
430 store_128b(texels, texture_page_ptr);
431
432 texture_page_ptr += 8;
433 vram_ptr += 1024;
434
435 sub_y--;
436 }
437
438 sub_y = 16;
439
440 vram_ptr -= (1024 * 16);
441 vram_ptr += 8;
442
443 tile_x--;
444 }
445
446 tile_x = 8;
447
448 vram_ptr -= (8 * 8);
449 vram_ptr += (16 * 1024);
450
451 texture_page_ptr += (8 * 16) * 8;
452 tile_y--;
453 }
454}
455
456#endif
457
458
459void update_texture_8bpp_cache(psx_gpu_struct *psx_gpu)
460{
461 u32 current_texture_page = psx_gpu->current_texture_page;
462 u32 update_textures =
463 psx_gpu->dirty_textures_8bpp_mask & psx_gpu->current_texture_mask;
464
465 psx_gpu->dirty_textures_8bpp_mask &= ~update_textures;
466
467 if(update_textures & (1 << current_texture_page))
468 {
469 update_texture_8bpp_cache_slice(psx_gpu, current_texture_page);
470 update_textures &= ~(1 << current_texture_page);
471 }
472
473 if(update_textures)
474 {
475 u32 adjacent_texture_page = ((current_texture_page + 1) & 0xF) |
476 (current_texture_page & 0x10);
477
478 update_texture_8bpp_cache_slice(psx_gpu, adjacent_texture_page);
479 }
480}
481
75e28f62
E
482void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
483{
f1359c57 484 if((psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) &&
69b09c0d
E
485 (psx_gpu->primitive_type == PRIMITIVE_TYPE_SPRITE))
486 {
487 u32 num_blocks_dest = 0;
488 block_struct *block_src = psx_gpu->blocks;
489 block_struct *block_dest = psx_gpu->blocks;
490
491 u16 *vram_ptr = psx_gpu->vram_ptr;
492 u32 i;
493
f1359c57 494 if(psx_gpu->render_mode & RENDER_INTERLACE_ODD)
69b09c0d
E
495 {
496 for(i = 0; i < psx_gpu->num_blocks; i++)
497 {
498 u32 fb_offset = (u32)((u8 *)block_src->fb_ptr - (u8 *)vram_ptr);
499 if(fb_offset & (1 << 11))
500 {
501 *block_dest = *block_src;
502 num_blocks_dest++;
503 block_dest++;
504 }
505 block_src++;
506 }
507 }
508 else
509 {
510 for(i = 0; i < psx_gpu->num_blocks; i++)
511 {
512 u32 fb_offset = (u32)((u8 *)block_src->fb_ptr - (u8 *)vram_ptr);
513 if((fb_offset & (1 << 11)) == 0)
514 {
515 *block_dest = *block_src;
516 num_blocks_dest++;
517 block_dest++;
518 }
519 block_src++;
520 }
521 }
522
523 psx_gpu->num_blocks = num_blocks_dest;
524 }
525
75e28f62
E
526 if(psx_gpu->num_blocks)
527 {
528 render_block_handler_struct *render_block_handler =
529 psx_gpu->render_block_handler;
530
db2804fb 531#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
532 // the asm doesn't bother to save callee-save vector regs, so do it here
533 __asm__ __volatile__("":::"q4","q5","q6","q7");
534#endif
535
75e28f62
E
536 render_block_handler->texture_blocks(psx_gpu);
537 render_block_handler->shade_blocks(psx_gpu);
538 render_block_handler->blend_blocks(psx_gpu);
539
3867c6ef 540#ifdef PROFILE
75e28f62
E
541 span_pixel_blocks += psx_gpu->num_blocks;
542 render_buffer_flushes++;
3867c6ef 543#endif
75e28f62
E
544
545 psx_gpu->num_blocks = 0;
db2804fb 546#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
547 __asm__ __volatile__("":::"q4","q5","q6","q7");
548#endif
75e28f62
E
549 }
550}
551
552
2bbbb7af 553#ifndef NEON_BUILD
75e28f62
E
554
555#define setup_gradient_calculation_input(set, vertex) \
556 /* First type is: uvrg bxxx xxxx */\
557 /* Second type is: yyyy ybyy uvrg */\
558 /* Since x_a and y_c are the same the same variable is used for both. */\
559 x##set##_a_y##set##_c.e[0] = vertex->u; \
560 x##set##_a_y##set##_c.e[1] = vertex->v; \
561 x##set##_a_y##set##_c.e[2] = vertex->r; \
562 x##set##_a_y##set##_c.e[3] = vertex->g; \
563 dup_4x16b(x##set##_b, vertex->x); \
564 dup_4x16b(x##set##_c, vertex->x); \
565 dup_4x16b(y##set##_a, vertex->y); \
566 dup_4x16b(y##set##_b, vertex->y); \
567 x##set##_b.e[0] = vertex->b; \
568 y##set##_b.e[1] = vertex->b \
569
570
ee060c58 571void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu,
572 const vertex_struct * __restrict__ a, const vertex_struct * __restrict__ b,
573 const vertex_struct * __restrict__ c)
75e28f62
E
574{
575 u32 triangle_area = psx_gpu->triangle_area;
576 u32 winding_mask_scalar;
577
578 u32 triangle_area_shift;
579 u64 triangle_area_reciprocal =
580 fixed_reciprocal(triangle_area, &triangle_area_shift);
581 triangle_area_shift = -(triangle_area_shift - FIXED_BITS);
582
583 // ((x1 - x0) * (y2 - y1)) - ((x2 - x1) * (y1 - y0)) =
584 // ( d0 * d1 ) - ( d2 * d3 ) =
585 // ( m0 ) - ( m1 ) = gradient
586
587 // This is split to do 12 elements at a time over three sets: a, b, and c.
588 // Technically we only need to do 10 elements (uvrgb_x and uvrgb_y), so
589 // two of the slots are unused.
590
591 // Inputs are all 16-bit signed. The m0/m1 results are 32-bit signed, as
592 // is g.
593
594 vec_4x16s x0_a_y0_c, x0_b, x0_c;
595 vec_4x16s y0_a, y0_b;
596 vec_4x16s x1_a_y1_c, x1_b, x1_c;
597 vec_4x16s y1_a, y1_b;
598 vec_4x16s x2_a_y2_c, x2_b, x2_c;
599 vec_4x16s y2_a, y2_b;
600
601 vec_4x32u uvrg_base;
602 vec_4x32u b_base;
c6063f89 603 vec_4x32u uvrgb_phase;
75e28f62
E
604
605 vec_4x16s d0_a_d3_c, d0_b, d0_c;
606 vec_4x16s d1_a, d1_b, d1_c_d2_a;
607 vec_4x16s d2_b, d2_c;
608 vec_4x16s d3_a, d3_b;
609
610 vec_4x32s m0_a, m0_b, m0_c;
611 vec_4x32s m1_a, m1_b, m1_c;
612
613 vec_4x32u gradient_area_a, gradient_area_c;
614 vec_2x32u gradient_area_b;
615
616 vec_4x32u gradient_area_sign_a, gradient_area_sign_c;
617 vec_2x32u gradient_area_sign_b;
618 vec_4x32u winding_mask;
619
620 vec_2x64u gradient_wide_a0, gradient_wide_a1;
621 vec_2x64u gradient_wide_c0, gradient_wide_c1;
622 vec_2x64u gradient_wide_b;
623
624 vec_4x32u gradient_a, gradient_c;
625 vec_2x32u gradient_b;
626 vec_16x8s gradient_shift;
627
628 setup_gradient_calculation_input(0, a);
629 setup_gradient_calculation_input(1, b);
630 setup_gradient_calculation_input(2, c);
631
c6063f89 632 dup_4x32b(uvrgb_phase, psx_gpu->uvrgb_phase);
75e28f62
E
633 shl_long_4x16b(uvrg_base, x0_a_y0_c, 16);
634 shl_long_4x16b(b_base, x0_b, 16);
635
c6063f89 636 add_4x32b(uvrg_base, uvrg_base, uvrgb_phase);
637 add_4x32b(b_base, b_base, uvrgb_phase);
75e28f62
E
638
639 // Can probably pair these, but it'll require careful register allocation
640 sub_4x16b(d0_a_d3_c, x1_a_y1_c, x0_a_y0_c);
641 sub_4x16b(d1_c_d2_a, x2_a_y2_c, x1_a_y1_c);
642
643 sub_4x16b(d0_b, x1_b, x0_b);
644 sub_4x16b(d0_c, x1_c, x0_c);
645
646 sub_4x16b(d1_a, y2_a, y1_a);
647 sub_4x16b(d1_b, y2_b, y1_b);
648
649 sub_4x16b(d2_b, x2_b, x1_b);
650 sub_4x16b(d2_c, x2_c, x1_c);
651
652 sub_4x16b(d3_a, y1_a, y0_a);
653 sub_4x16b(d3_b, y1_b, y0_b);
654
655 mul_long_4x16b(m0_a, d0_a_d3_c, d1_a);
656 mul_long_4x16b(m0_b, d0_b, d1_b);
657 mul_long_4x16b(m0_c, d0_c, d1_c_d2_a);
658
659 mul_long_4x16b(m1_a, d1_c_d2_a, d3_a);
660 mul_long_4x16b(m1_b, d2_b, d3_b);
661 mul_long_4x16b(m1_c, d2_c, d0_a_d3_c);
662
663 sub_4x32b(gradient_area_a, m0_a, m1_a);
664 sub_2x32b(gradient_area_b, m0_b.low, m1_b.low);
665 sub_4x32b(gradient_area_c, m0_c, m1_c);
666
667 cmpltz_4x32b(gradient_area_sign_a, gradient_area_a);
668 cmpltz_2x32b(gradient_area_sign_b, gradient_area_b);
669 cmpltz_4x32b(gradient_area_sign_c, gradient_area_c);
670
671 abs_4x32b(gradient_area_a, gradient_area_a);
672 abs_2x32b(gradient_area_b, gradient_area_b);
673 abs_4x32b(gradient_area_c, gradient_area_c);
674
675 winding_mask_scalar = -psx_gpu->triangle_winding;
676
677 dup_4x32b(winding_mask, winding_mask_scalar);
678 eor_4x32b(gradient_area_sign_a, gradient_area_sign_a, winding_mask);
679 eor_2x32b(gradient_area_sign_b, gradient_area_sign_b, winding_mask);
680 eor_4x32b(gradient_area_sign_c, gradient_area_sign_c, winding_mask);
681
682 mul_scalar_long_2x32b(gradient_wide_a0,
683 vector_cast(vec_2x32s, gradient_area_a.low),
684 (s64)triangle_area_reciprocal);
685 mul_scalar_long_2x32b(gradient_wide_a1,
686 vector_cast(vec_2x32s, gradient_area_a.high),
687 (s64)triangle_area_reciprocal);
688 mul_scalar_long_2x32b(gradient_wide_b,
689 vector_cast(vec_2x32s, gradient_area_b),
690 (s64)triangle_area_reciprocal);
691 mul_scalar_long_2x32b(gradient_wide_c0,
692 vector_cast(vec_2x32s, gradient_area_c.low),
693 (s64)triangle_area_reciprocal);
694 mul_scalar_long_2x32b(gradient_wide_c1,
695 vector_cast(vec_2x32s, gradient_area_c.high),
696 (s64)triangle_area_reciprocal);
697
698 dup_16x8b(gradient_shift, triangle_area_shift);
699 shl_reg_2x64b(gradient_wide_a0, gradient_wide_a0,
700 vector_cast(vec_2x64u, gradient_shift));
701 shl_reg_2x64b(gradient_wide_a1, gradient_wide_a1,
702 vector_cast(vec_2x64u, gradient_shift));
703 shl_reg_2x64b(gradient_wide_b, gradient_wide_b,
704 vector_cast(vec_2x64u, gradient_shift));
705 shl_reg_2x64b(gradient_wide_c0, gradient_wide_c0,
706 vector_cast(vec_2x64u, gradient_shift));
707 shl_reg_2x64b(gradient_wide_c1, gradient_wide_c1,
708 vector_cast(vec_2x64u, gradient_shift));
709
710 mov_narrow_2x64b(gradient_a.low, gradient_wide_a0);
711 mov_narrow_2x64b(gradient_a.high, gradient_wide_a1);
712 mov_narrow_2x64b(gradient_b, gradient_wide_b);
713 mov_narrow_2x64b(gradient_c.low, gradient_wide_c0);
714 mov_narrow_2x64b(gradient_c.high, gradient_wide_c1);
715
716 shl_4x32b(gradient_a, gradient_a, 4);
717 shl_2x32b(gradient_b, gradient_b, 4);
718 shl_4x32b(gradient_c, gradient_c, 4);
719
720 eor_4x32b(gradient_a, gradient_a, gradient_area_sign_a);
721 eor_2x32b(gradient_b, gradient_b, gradient_area_sign_b);
722 eor_4x32b(gradient_c, gradient_c, gradient_area_sign_c);
723
724 sub_4x32b(gradient_a, gradient_a, gradient_area_sign_a);
725 sub_2x32b(gradient_b, gradient_b, gradient_area_sign_b);
726 sub_4x32b(gradient_c, gradient_c, gradient_area_sign_c);
727
728 u32 left_adjust = a->x;
729 mls_scalar_4x32b(uvrg_base, gradient_a, left_adjust);
730 mls_scalar_2x32b(b_base.low, gradient_b, left_adjust);
731
732 vec_4x32u uvrg_dx2;
733 vec_2x32u b_dx2;
734
735 vec_4x32u uvrg_dx3;
736 vec_2x32u b_dx3;
737
738 vec_4x32u zero;
739
740 eor_4x32b(zero, zero, zero);
741 add_4x32b(uvrg_dx2, gradient_a, gradient_a);
742 add_2x32b(b_dx2, gradient_b, gradient_b);
743 add_4x32b(uvrg_dx3, gradient_a, uvrg_dx2);
744 add_2x32b(b_dx3, gradient_b, b_dx2);
745
746 // Can be done with vst4, assuming that the zero, dx, dx2, and dx3 are
747 // lined up properly
748 psx_gpu->u_block_span.e[0] = zero.e[0];
749 psx_gpu->u_block_span.e[1] = gradient_a.e[0];
750 psx_gpu->u_block_span.e[2] = uvrg_dx2.e[0];
751 psx_gpu->u_block_span.e[3] = uvrg_dx3.e[0];
752
753 psx_gpu->v_block_span.e[0] = zero.e[1];
754 psx_gpu->v_block_span.e[1] = gradient_a.e[1];
755 psx_gpu->v_block_span.e[2] = uvrg_dx2.e[1];
756 psx_gpu->v_block_span.e[3] = uvrg_dx3.e[1];
757
758 psx_gpu->r_block_span.e[0] = zero.e[2];
759 psx_gpu->r_block_span.e[1] = gradient_a.e[2];
760 psx_gpu->r_block_span.e[2] = uvrg_dx2.e[2];
761 psx_gpu->r_block_span.e[3] = uvrg_dx3.e[2];
762
763 psx_gpu->g_block_span.e[0] = zero.e[3];
764 psx_gpu->g_block_span.e[1] = gradient_a.e[3];
765 psx_gpu->g_block_span.e[2] = uvrg_dx2.e[3];
766 psx_gpu->g_block_span.e[3] = uvrg_dx3.e[3];
767
768 psx_gpu->b_block_span.e[0] = zero.e[0];
769 psx_gpu->b_block_span.e[1] = gradient_b.e[0];
770 psx_gpu->b_block_span.e[2] = b_dx2.e[0];
771 psx_gpu->b_block_span.e[3] = b_dx3.e[0];
772
773 psx_gpu->uvrg = uvrg_base;
774 psx_gpu->b = b_base.e[0];
775
776 psx_gpu->uvrg_dx = gradient_a;
777 psx_gpu->uvrg_dy = gradient_c;
778 psx_gpu->b_dy = gradient_b.e[1];
779}
780#endif
781
782#define vector_check(_a, _b) \
783 if(memcmp(&_a, &_b, sizeof(_b))) \
784 { \
785 if(sizeof(_b) == 8) \
786 { \
787 printf("mismatch on %s vs %s: (%x %x) vs (%x %x)\n", \
788 #_a, #_b, _a.e[0], _a.e[1], _b.e[0], _b.e[1]); \
789 } \
790 else \
791 { \
792 printf("mismatch on %s vs %s: (%x %x %x %x) vs (%x %x %x %x)\n", \
793 #_a, #_b, _a.e[0], _a.e[1], _a.e[2], _a.e[3], _b.e[0], _b.e[1], \
794 _b.e[2], _b.e[3]); \
795 } \
796 } \
797
798#define scalar_check(_a, _b) \
799 if(_a != _b) \
800 printf("mismatch on %s %s: %x vs %x\n", #_a, #_b, _a, _b) \
801
802
2d658c89 803#if !defined(NEON_BUILD) && !defined(NDEBUG)
804static void setup_spans_debug_check(psx_gpu_struct *psx_gpu,
805 edge_data_struct *span_edge_data_element)
806{
807 u32 _num_spans = span_edge_data_element - psx_gpu->span_edge_data;
808 if (_num_spans > MAX_SPANS)
809 *(volatile int *)0 = 1;
810 if (_num_spans < psx_gpu->num_spans)
811 {
812 if(span_edge_data_element->num_blocks > MAX_BLOCKS_PER_ROW)
813 *(volatile int *)0 = 2;
814 if(span_edge_data_element->y >= 2048)
815 *(volatile int *)0 = 3;
816 }
817}
c111e8f8 818#else
2d658c89 819#define setup_spans_debug_check(psx_gpu, span_edge_data_element)
c111e8f8 820#endif
821
75e28f62
E
822#define setup_spans_prologue_alternate_yes() \
823 vec_2x64s alternate_x; \
824 vec_2x64s alternate_dx_dy; \
825 vec_4x32s alternate_x_32; \
aafce833 826 vec_4x16u alternate_x_16; \
75e28f62
E
827 \
828 vec_4x16u alternate_select; \
829 vec_4x16s y_mid_point; \
830 \
831 s32 y_b = v_b->y; \
832 s64 edge_alt; \
833 s32 edge_dx_dy_alt; \
834 u32 edge_shift_alt \
835
836#define setup_spans_prologue_alternate_no() \
837
838#define setup_spans_prologue(alternate_active) \
839 edge_data_struct *span_edge_data; \
840 vec_4x32u *span_uvrg_offset; \
841 u32 *span_b_offset; \
842 \
843 s32 clip; \
844 \
845 vec_2x64s edges_xy; \
846 vec_2x32s edges_dx_dy; \
847 vec_2x32u edge_shifts; \
848 \
849 vec_2x64s left_x, right_x; \
850 vec_2x64s left_dx_dy, right_dx_dy; \
851 vec_4x32s left_x_32, right_x_32; \
852 vec_8x16s left_right_x_16; \
853 vec_4x16s y_x4; \
854 vec_8x16s left_edge; \
855 vec_8x16s right_edge; \
856 vec_4x16u span_shift; \
857 \
858 vec_2x32u c_0x01; \
859 vec_4x16u c_0x04; \
860 vec_4x16u c_0xFFFE; \
861 vec_4x16u c_0x07; \
862 \
863 vec_2x32s x_starts; \
864 vec_2x32s x_ends; \
865 \
866 s32 x_a = v_a->x; \
867 s32 x_b = v_b->x; \
868 s32 x_c = v_c->x; \
869 s32 y_a = v_a->y; \
870 s32 y_c = v_c->y; \
871 \
872 vec_4x32u uvrg = psx_gpu->uvrg; \
873 vec_4x32u uvrg_dy = psx_gpu->uvrg_dy; \
874 u32 b = psx_gpu->b; \
875 u32 b_dy = psx_gpu->b_dy; \
876 \
877 dup_2x32b(c_0x01, 0x01); \
878 setup_spans_prologue_alternate_##alternate_active() \
879
880#define setup_spans_prologue_b() \
881 span_edge_data = psx_gpu->span_edge_data; \
882 span_uvrg_offset = psx_gpu->span_uvrg_offset; \
883 span_b_offset = psx_gpu->span_b_offset; \
884 \
885 vec_8x16u c_0x0001; \
2d658c89 886 vec_4x16u c_max_blocks_per_row; \
75e28f62
E
887 \
888 dup_8x16b(c_0x0001, 0x0001); \
889 dup_8x16b(left_edge, psx_gpu->viewport_start_x); \
890 dup_8x16b(right_edge, psx_gpu->viewport_end_x); \
891 add_8x16b(right_edge, right_edge, c_0x0001); \
892 dup_4x16b(c_0x04, 0x04); \
893 dup_4x16b(c_0x07, 0x07); \
894 dup_4x16b(c_0xFFFE, 0xFFFE); \
2d658c89 895 dup_4x16b(c_max_blocks_per_row, MAX_BLOCKS_PER_ROW); \
75e28f62
E
896
897
898#define compute_edge_delta_x2() \
899{ \
900 vec_2x32s heights; \
901 vec_2x32s height_reciprocals; \
902 vec_2x32s heights_b; \
903 vec_4x32u widths; \
904 \
905 u32 edge_shift = reciprocal_table[height]; \
906 \
907 dup_2x32b(heights, height); \
908 sub_2x32b(widths, x_ends, x_starts); \
909 \
910 dup_2x32b(edge_shifts, edge_shift); \
911 sub_2x32b(heights_b, heights, c_0x01); \
7d5140f5 912 shr_2x32b(height_reciprocals, edge_shifts, 10); \
75e28f62
E
913 \
914 mla_2x32b(heights_b, x_starts, heights); \
915 bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \
916 mul_2x32b(edges_dx_dy, widths, height_reciprocals); \
917 mul_long_2x32b(edges_xy, heights_b, height_reciprocals); \
918} \
919
920#define compute_edge_delta_x3(start_c, height_a, height_b) \
921{ \
922 vec_2x32s heights; \
923 vec_2x32s height_reciprocals; \
924 vec_2x32s heights_b; \
925 vec_2x32u widths; \
926 \
927 u32 width_alt; \
928 s32 height_b_alt; \
929 u32 height_reciprocal_alt; \
930 \
931 heights.e[0] = height_a; \
932 heights.e[1] = height_b; \
933 \
934 edge_shifts.e[0] = reciprocal_table[height_a]; \
935 edge_shifts.e[1] = reciprocal_table[height_b]; \
936 edge_shift_alt = reciprocal_table[height_minor_b]; \
937 \
938 sub_2x32b(widths, x_ends, x_starts); \
939 width_alt = x_c - start_c; \
940 \
7d5140f5
E
941 shr_2x32b(height_reciprocals, edge_shifts, 10); \
942 height_reciprocal_alt = edge_shift_alt >> 10; \
75e28f62
E
943 \
944 bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \
945 edge_shift_alt &= 0x1F; \
946 \
947 sub_2x32b(heights_b, heights, c_0x01); \
948 height_b_alt = height_minor_b - 1; \
949 \
950 mla_2x32b(heights_b, x_starts, heights); \
951 height_b_alt += height_minor_b * start_c; \
952 \
953 mul_long_2x32b(edges_xy, heights_b, height_reciprocals); \
954 edge_alt = (s64)height_b_alt * height_reciprocal_alt; \
955 \
956 mul_2x32b(edges_dx_dy, widths, height_reciprocals); \
957 edge_dx_dy_alt = width_alt * height_reciprocal_alt; \
958} \
959
960
961#define setup_spans_adjust_y_up() \
962 sub_4x32b(y_x4, y_x4, c_0x04) \
963
964#define setup_spans_adjust_y_down() \
965 add_4x32b(y_x4, y_x4, c_0x04) \
966
967#define setup_spans_adjust_interpolants_up() \
968 sub_4x32b(uvrg, uvrg, uvrg_dy); \
969 b -= b_dy \
970
971#define setup_spans_adjust_interpolants_down() \
972 add_4x32b(uvrg, uvrg, uvrg_dy); \
973 b += b_dy \
974
975
976#define setup_spans_clip_interpolants_increment() \
977 mla_scalar_4x32b(uvrg, uvrg_dy, clip); \
978 b += b_dy * clip \
979
980#define setup_spans_clip_interpolants_decrement() \
981 mls_scalar_4x32b(uvrg, uvrg_dy, clip); \
982 b -= b_dy * clip \
983
984#define setup_spans_clip_alternate_yes() \
985 edge_alt += edge_dx_dy_alt * (s64)(clip) \
986
987#define setup_spans_clip_alternate_no() \
988
989#define setup_spans_clip(direction, alternate_active) \
990{ \
991 clipped_triangles++; \
992 mla_scalar_long_2x32b(edges_xy, edges_dx_dy, (s64)clip); \
993 setup_spans_clip_alternate_##alternate_active(); \
994 setup_spans_clip_interpolants_##direction(); \
995} \
996
997
998#define setup_spans_adjust_edges_alternate_no(left_index, right_index) \
999{ \
1000 vec_2x64u edge_shifts_64; \
1001 vec_2x64s edges_dx_dy_64; \
1002 \
1003 mov_wide_2x32b(edge_shifts_64, edge_shifts); \
1004 shl_variable_2x64b(edges_xy, edges_xy, edge_shifts_64); \
1005 \
1006 mov_wide_2x32b(edges_dx_dy_64, edges_dx_dy); \
1007 shl_variable_2x64b(edges_dx_dy_64, edges_dx_dy_64, edge_shifts_64); \
1008 \
1009 left_x.e[0] = edges_xy.e[left_index]; \
1010 right_x.e[0] = edges_xy.e[right_index]; \
1011 \
1012 left_dx_dy.e[0] = edges_dx_dy_64.e[left_index]; \
1013 left_dx_dy.e[1] = edges_dx_dy_64.e[left_index]; \
1014 right_dx_dy.e[0] = edges_dx_dy_64.e[right_index]; \
1015 right_dx_dy.e[1] = edges_dx_dy_64.e[right_index]; \
1016 \
1017 add_1x64b(left_x.high, left_x.low, left_dx_dy.low); \
1018 add_1x64b(right_x.high, right_x.low, right_dx_dy.low); \
1019 \
1020 add_2x64b(left_dx_dy, left_dx_dy, left_dx_dy); \
1021 add_2x64b(right_dx_dy, right_dx_dy, right_dx_dy); \
1022} \
1023
1024#define setup_spans_adjust_edges_alternate_yes(left_index, right_index) \
1025{ \
1026 setup_spans_adjust_edges_alternate_no(left_index, right_index); \
1027 s64 edge_dx_dy_alt_64; \
1028 \
1029 dup_4x16b(y_mid_point, y_b); \
1030 \
1031 edge_alt <<= edge_shift_alt; \
1032 edge_dx_dy_alt_64 = (s64)edge_dx_dy_alt << edge_shift_alt; \
1033 \
1034 alternate_x.e[0] = edge_alt; \
1035 alternate_dx_dy.e[0] = edge_dx_dy_alt_64; \
1036 alternate_dx_dy.e[1] = edge_dx_dy_alt_64; \
1037 \
1038 add_1x64b(alternate_x.high, alternate_x.low, alternate_dx_dy.low); \
1039 add_2x64b(alternate_dx_dy, alternate_dx_dy, alternate_dx_dy); \
1040} \
1041
1042
1043#define setup_spans_y_select_up() \
1044 cmplt_4x16b(alternate_select, y_x4, y_mid_point) \
1045
1046#define setup_spans_y_select_down() \
1047 cmpgt_4x16b(alternate_select, y_x4, y_mid_point) \
1048
1049#define setup_spans_y_select_alternate_yes(direction) \
1050 setup_spans_y_select_##direction() \
1051
1052#define setup_spans_y_select_alternate_no(direction) \
1053
1054#define setup_spans_alternate_select_left() \
1055 bit_4x16b(left_right_x_16.low, alternate_x_16, alternate_select) \
1056
1057#define setup_spans_alternate_select_right() \
1058 bit_4x16b(left_right_x_16.high, alternate_x_16, alternate_select) \
1059
1060#define setup_spans_alternate_select_none() \
1061
1062#define setup_spans_increment_alternate_yes() \
1063 shr_narrow_2x64b(alternate_x_32.low, alternate_x, 32); \
1064 add_2x64b(alternate_x, alternate_x, alternate_dx_dy); \
1065 shr_narrow_2x64b(alternate_x_32.high, alternate_x, 32); \
1066 add_2x64b(alternate_x, alternate_x, alternate_dx_dy); \
1067 mov_narrow_4x32b(alternate_x_16, alternate_x_32) \
1068
1069#define setup_spans_increment_alternate_no() \
1070
1071#define setup_spans_set_x4(alternate, direction, alternate_active) \
1072{ \
1073 span_uvrg_offset[0] = uvrg; \
1074 span_b_offset[0] = b; \
1075 setup_spans_adjust_interpolants_##direction(); \
1076 \
1077 span_uvrg_offset[1] = uvrg; \
1078 span_b_offset[1] = b; \
1079 setup_spans_adjust_interpolants_##direction(); \
1080 \
1081 span_uvrg_offset[2] = uvrg; \
1082 span_b_offset[2] = b; \
1083 setup_spans_adjust_interpolants_##direction(); \
1084 \
1085 span_uvrg_offset[3] = uvrg; \
1086 span_b_offset[3] = b; \
1087 setup_spans_adjust_interpolants_##direction(); \
1088 \
1089 span_uvrg_offset += 4; \
1090 span_b_offset += 4; \
1091 \
1092 shr_narrow_2x64b(left_x_32.low, left_x, 32); \
1093 shr_narrow_2x64b(right_x_32.low, right_x, 32); \
1094 \
1095 add_2x64b(left_x, left_x, left_dx_dy); \
1096 add_2x64b(right_x, right_x, right_dx_dy); \
1097 \
1098 shr_narrow_2x64b(left_x_32.high, left_x, 32); \
1099 shr_narrow_2x64b(right_x_32.high, right_x, 32); \
1100 \
1101 add_2x64b(left_x, left_x, left_dx_dy); \
1102 add_2x64b(right_x, right_x, right_dx_dy); \
1103 \
1104 mov_narrow_4x32b(left_right_x_16.low, left_x_32); \
1105 mov_narrow_4x32b(left_right_x_16.high, right_x_32); \
1106 \
1107 setup_spans_increment_alternate_##alternate_active(); \
1108 setup_spans_y_select_alternate_##alternate_active(direction); \
1109 setup_spans_alternate_select_##alternate(); \
1110 \
1111 max_8x16b(left_right_x_16, left_right_x_16, left_edge); \
1112 min_8x16b(left_right_x_16, left_right_x_16, right_edge); \
1113 \
1114 sub_4x16b(left_right_x_16.high, left_right_x_16.high, left_right_x_16.low); \
1115 add_4x16b(left_right_x_16.high, left_right_x_16.high, c_0x07); \
1116 and_4x16b(span_shift, left_right_x_16.high, c_0x07); \
1117 shl_variable_4x16b(span_shift, c_0xFFFE, span_shift); \
1118 shr_4x16b(left_right_x_16.high, left_right_x_16.high, 3); \
2d658c89 1119 min_4x16b(left_right_x_16.high, left_right_x_16.high, c_max_blocks_per_row); \
75e28f62
E
1120 \
1121 u32 i; \
1122 for(i = 0; i < 4; i++) \
1123 { \
1124 span_edge_data[i].left_x = left_right_x_16.low.e[i]; \
1125 span_edge_data[i].num_blocks = left_right_x_16.high.e[i]; \
1126 span_edge_data[i].right_mask = span_shift.e[i]; \
1127 span_edge_data[i].y = y_x4.e[i]; \
2d658c89 1128 setup_spans_debug_check(psx_gpu, &span_edge_data[i]); \
75e28f62
E
1129 } \
1130 \
1131 span_edge_data += 4; \
1132 \
1133 setup_spans_adjust_y_##direction(); \
1134} \
1135
1136
1137#define setup_spans_alternate_adjust_yes() \
1138 edge_alt -= edge_dx_dy_alt * (s64)height_minor_a \
1139
1140#define setup_spans_alternate_adjust_no() \
1141
1142
1143#define setup_spans_down(left_index, right_index, alternate, alternate_active) \
1144 setup_spans_alternate_adjust_##alternate_active(); \
1145 if(y_c > psx_gpu->viewport_end_y) \
1146 height -= y_c - psx_gpu->viewport_end_y - 1; \
1147 \
1148 clip = psx_gpu->viewport_start_y - y_a; \
1149 if(clip > 0) \
1150 { \
1151 height -= clip; \
1152 y_a += clip; \
1153 setup_spans_clip(increment, alternate_active); \
1154 } \
1155 \
1156 setup_spans_prologue_b(); \
1157 \
2d658c89 1158 if (height > 512) \
1159 height = 512; \
1160 if (height > 0) \
75e28f62
E
1161 { \
1162 y_x4.e[0] = y_a; \
1163 y_x4.e[1] = y_a + 1; \
1164 y_x4.e[2] = y_a + 2; \
1165 y_x4.e[3] = y_a + 3; \
1166 setup_spans_adjust_edges_alternate_##alternate_active(left_index, \
1167 right_index); \
1168 \
1169 psx_gpu->num_spans = height; \
1170 do \
1171 { \
1172 setup_spans_set_x4(alternate, down, alternate_active); \
1173 height -= 4; \
1174 } while(height > 0); \
ee060c58 1175 if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V)) \
1176 span_uvrg_offset[height - 1].low = span_uvrg_offset[height - 2].low; \
75e28f62
E
1177 } \
1178
1179
1180#define setup_spans_alternate_pre_increment_yes() \
1181 edge_alt += edge_dx_dy_alt \
1182
1183#define setup_spans_alternate_pre_increment_no() \
1184
1185#define setup_spans_up_decrement_height_yes() \
1186 height-- \
1187
1188#define setup_spans_up_decrement_height_no() \
1189 {} \
1190
1191#define setup_spans_up(left_index, right_index, alternate, alternate_active) \
1192 setup_spans_alternate_adjust_##alternate_active(); \
1193 y_a--; \
1194 \
1195 if(y_c < psx_gpu->viewport_start_y) \
1196 height -= psx_gpu->viewport_start_y - y_c; \
1197 else \
1198 setup_spans_up_decrement_height_##alternate_active(); \
1199 \
1200 clip = y_a - psx_gpu->viewport_end_y; \
1201 if(clip > 0) \
1202 { \
1203 height -= clip; \
1204 y_a -= clip; \
1205 setup_spans_clip(decrement, alternate_active); \
1206 } \
1207 \
1208 setup_spans_prologue_b(); \
1209 \
2d658c89 1210 if (height > 512) \
1211 height = 512; \
1212 if (height > 0) \
75e28f62
E
1213 { \
1214 y_x4.e[0] = y_a; \
1215 y_x4.e[1] = y_a - 1; \
1216 y_x4.e[2] = y_a - 2; \
1217 y_x4.e[3] = y_a - 3; \
1218 add_wide_2x32b(edges_xy, edges_xy, edges_dx_dy); \
1219 setup_spans_alternate_pre_increment_##alternate_active(); \
1220 setup_spans_adjust_edges_alternate_##alternate_active(left_index, \
1221 right_index); \
1222 setup_spans_adjust_interpolants_up(); \
1223 \
1224 psx_gpu->num_spans = height; \
1225 while(height > 0) \
1226 { \
1227 setup_spans_set_x4(alternate, up, alternate_active); \
1228 height -= 4; \
1229 } \
ee060c58 1230 if (psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_V) \
1231 psx_gpu->span_uvrg_offset[0].low = psx_gpu->span_uvrg_offset[1].low; \
75e28f62
E
1232 } \
1233
1234#define index_left 0
1235#define index_right 1
1236
1237#define setup_spans_up_up(minor, major) \
1238 setup_spans_prologue(yes); \
1239 s32 height_minor_a = y_a - y_b; \
1240 s32 height_minor_b = y_b - y_c; \
1241 s32 height = y_a - y_c; \
1242 \
1243 dup_2x32b(x_starts, x_a); \
1244 x_ends.e[0] = x_c; \
1245 x_ends.e[1] = x_b; \
1246 \
1247 compute_edge_delta_x3(x_b, height, height_minor_a); \
1248 setup_spans_up(index_##major, index_##minor, minor, yes) \
1249
1250
2bbbb7af 1251#ifndef NEON_BUILD
75e28f62
E
1252
1253void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1254 vertex_struct *v_b, vertex_struct *v_c)
1255{
1256 setup_spans_up_up(left, right);
1257}
1258
1259void setup_spans_up_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1260 vertex_struct *v_b, vertex_struct *v_c)
1261{
1262 setup_spans_up_up(right, left);
1263}
1264
1265#define setup_spans_down_down(minor, major) \
1266 setup_spans_prologue(yes); \
1267 s32 height_minor_a = y_b - y_a; \
1268 s32 height_minor_b = y_c - y_b; \
1269 s32 height = y_c - y_a; \
1270 \
1271 dup_2x32b(x_starts, x_a); \
1272 x_ends.e[0] = x_c; \
1273 x_ends.e[1] = x_b; \
1274 \
1275 compute_edge_delta_x3(x_b, height, height_minor_a); \
1276 setup_spans_down(index_##major, index_##minor, minor, yes) \
1277
1278void setup_spans_down_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1279 vertex_struct *v_b, vertex_struct *v_c)
1280{
1281 setup_spans_down_down(left, right);
1282}
1283
1284void setup_spans_down_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1285 vertex_struct *v_b, vertex_struct *v_c)
1286{
1287 setup_spans_down_down(right, left);
1288}
1289
1290#define setup_spans_up_flat() \
1291 s32 height = y_a - y_c; \
1292 \
1293 flat_triangles++; \
1294 compute_edge_delta_x2(); \
1295 setup_spans_up(index_left, index_right, none, no) \
1296
1297void setup_spans_up_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1298 vertex_struct *v_b, vertex_struct *v_c)
1299{
1300 setup_spans_prologue(no);
1301 x_starts.e[0] = x_a;
1302 x_starts.e[1] = x_b;
1303 dup_2x32b(x_ends, x_c);
1304
1305 setup_spans_up_flat();
1306}
1307
1308void setup_spans_up_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1309 vertex_struct *v_b, vertex_struct *v_c)
1310{
1311 setup_spans_prologue(no);
1312 dup_2x32b(x_starts, x_a);
1313 x_ends.e[0] = x_b;
1314 x_ends.e[1] = x_c;
1315
1316 setup_spans_up_flat();
1317}
1318
1319#define setup_spans_down_flat() \
1320 s32 height = y_c - y_a; \
1321 \
1322 flat_triangles++; \
1323 compute_edge_delta_x2(); \
1324 setup_spans_down(index_left, index_right, none, no) \
1325
1326void setup_spans_down_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1327 vertex_struct *v_b, vertex_struct *v_c)
1328{
1329 setup_spans_prologue(no);
1330 x_starts.e[0] = x_a;
1331 x_starts.e[1] = x_b;
1332 dup_2x32b(x_ends, x_c);
1333
1334 setup_spans_down_flat();
1335}
1336
1337void setup_spans_down_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1338 vertex_struct *v_b, vertex_struct *v_c)
1339{
1340 setup_spans_prologue(no);
1341 dup_2x32b(x_starts, x_a);
1342 x_ends.e[0] = x_b;
1343 x_ends.e[1] = x_c;
1344
1345 setup_spans_down_flat();
1346}
1347
1348void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
1349 vertex_struct *v_b, vertex_struct *v_c)
1350{
1351 setup_spans_prologue(no);
1352
1353 s32 y_b = v_b->y;
1354 s64 edge_alt;
1355 s32 edge_dx_dy_alt;
1356 u32 edge_shift_alt;
1357
1358 s32 middle_y = y_a;
1359 s32 height_minor_a = y_a - y_b;
1360 s32 height_minor_b = y_c - y_a;
1361 s32 height_major = y_c - y_b;
1362
1363 vec_2x64s edges_xy_b;
1364 vec_2x32s edges_dx_dy_b;
1365 vec_2x32u edge_shifts_b;
1366
1367 vec_2x32s height_increment;
1368
1369 x_starts.e[0] = x_a;
1370 x_starts.e[1] = x_c;
1371 dup_2x32b(x_ends, x_b);
1372
1373 compute_edge_delta_x3(x_a, height_minor_a, height_major);
1374
1375 height_increment.e[0] = 0;
1376 height_increment.e[1] = height_minor_b;
1377
1378 mla_long_2x32b(edges_xy, edges_dx_dy, height_increment);
1379
1380 edges_xy_b.e[0] = edge_alt;
1381 edges_xy_b.e[1] = edges_xy.e[1];
1382
1383 edge_shifts_b = edge_shifts;
1384 edge_shifts_b.e[0] = edge_shift_alt;
1385
1386 neg_2x32b(edges_dx_dy_b, edges_dx_dy);
1387 edges_dx_dy_b.e[0] = edge_dx_dy_alt;
1388
1389 y_a--;
1390
1391 if(y_b < psx_gpu->viewport_start_y)
1392 height_minor_a -= psx_gpu->viewport_start_y - y_b;
1393
1394 clip = y_a - psx_gpu->viewport_end_y;
1395 if(clip > 0)
1396 {
1397 height_minor_a -= clip;
1398 y_a -= clip;
1399 setup_spans_clip(decrement, no);
1400 }
1401
1402 setup_spans_prologue_b();
1403
2d658c89 1404 if (height_minor_a > 512)
1405 height_minor_a = 512;
1406 if (height_minor_a > 0)
75e28f62
E
1407 {
1408 y_x4.e[0] = y_a;
1409 y_x4.e[1] = y_a - 1;
1410 y_x4.e[2] = y_a - 2;
1411 y_x4.e[3] = y_a - 3;
1412 add_wide_2x32b(edges_xy, edges_xy, edges_dx_dy);
1413 setup_spans_adjust_edges_alternate_no(index_left, index_right);
1414 setup_spans_adjust_interpolants_up();
1415
1416 psx_gpu->num_spans = height_minor_a;
1417 while(height_minor_a > 0)
1418 {
1419 setup_spans_set_x4(none, up, no);
1420 height_minor_a -= 4;
1421 }
1422
1423 span_edge_data += height_minor_a;
1424 span_uvrg_offset += height_minor_a;
1425 span_b_offset += height_minor_a;
1426 }
1427
1428 edges_xy = edges_xy_b;
1429 edges_dx_dy = edges_dx_dy_b;
1430 edge_shifts = edge_shifts_b;
1431
1432 uvrg = psx_gpu->uvrg;
1433 b = psx_gpu->b;
1434
1435 y_a = middle_y;
1436
1437 if(y_c > psx_gpu->viewport_end_y)
1438 height_minor_b -= y_c - psx_gpu->viewport_end_y - 1;
1439
1440 clip = psx_gpu->viewport_start_y - y_a;
1441 if(clip > 0)
1442 {
1443 height_minor_b -= clip;
1444 y_a += clip;
1445 setup_spans_clip(increment, no);
1446 }
1447
2d658c89 1448 if (height_minor_b > 512)
1449 height_minor_b = 512;
1450 if (height_minor_b > 0)
75e28f62
E
1451 {
1452 y_x4.e[0] = y_a;
1453 y_x4.e[1] = y_a + 1;
1454 y_x4.e[2] = y_a + 2;
1455 y_x4.e[3] = y_a + 3;
1456 setup_spans_adjust_edges_alternate_no(index_left, index_right);
1457
b7569147 1458 // FIXME: overflow corner case
1459 if(psx_gpu->num_spans + height_minor_b == MAX_SPANS)
1460 height_minor_b &= ~3;
1461
75e28f62 1462 psx_gpu->num_spans += height_minor_b;
b7569147 1463 while(height_minor_b > 0)
75e28f62
E
1464 {
1465 setup_spans_set_x4(none, down, no);
1466 height_minor_b -= 4;
b7569147 1467 }
ee060c58 1468 if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V))
1469 {
1470 span_uvrg_offset[height_minor_b - 1].low =
1471 span_uvrg_offset[height_minor_b - 2].low;
1472 }
75e28f62
E
1473 }
1474
1475 left_split_triangles++;
1476}
1477
1478#endif
1479
ee060c58 1480// this is some hacky mess, can this be improved somehow?
1481// ideally change things to not have to do this hack at all
1482void __attribute__((noinline))
1483setup_blocks_uv_adj_hack(psx_gpu_struct *psx_gpu, block_struct *block,
1484 edge_data_struct *span_edge_data, vec_4x32u *span_uvrg_offset)
1485{
1486 size_t span_i = span_uvrg_offset - psx_gpu->span_uvrg_offset;
1487 if (span_i != 0 && span_i != psx_gpu->num_spans - 1
1488 && !(psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_U))
1489 return;
1490 u32 num_blocks = span_edge_data->num_blocks - 1;
1491 s32 offset = __builtin_ctz(span_edge_data->right_mask | 0x100) - 1;
1492 s32 toffset = 8 * num_blocks + offset - 1;
1493 if (toffset < 0 && !(psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_U))
1494 return;
1495
1496 toffset += span_edge_data->left_x;
1497 s32 u_dx = psx_gpu->uvrg_dx.low.e[0];
1498 s32 v_dx = psx_gpu->uvrg_dx.low.e[1];
1499 u32 u = span_uvrg_offset->low.e[0];
1500 u32 v = span_uvrg_offset->low.e[1];
1501 u += u_dx * toffset;
1502 v += v_dx * toffset;
1503 u = (u >> 16) & psx_gpu->texture_mask_width;
1504 v = (v >> 16) & psx_gpu->texture_mask_height;
1505 if (!(psx_gpu->render_state_base & (TEXTURE_MODE_16BPP << 8))) {
1506 // 4bpp 8bpp are swizzled
1507 u32 u_ = u;
1508 u = (u & 0x0f) | ((v & 0x0f) << 4);
1509 v = (v & 0xf0) | (u_ >> 4);
1510 }
1511 assert(offset >= 0);
1512 //assert(block->uv.e[offset] == ((v << 8) | u));
1513 block->uv.e[offset] = (v << 8) | u;
1514}
75e28f62
E
1515
1516#define dither_table_entry_normal(value) \
1517 (value) \
1518
1519
1520#define setup_blocks_load_msb_mask_indirect() \
1521
1522#define setup_blocks_load_msb_mask_direct() \
1523 vec_8x16u msb_mask; \
1524 dup_8x16b(msb_mask, psx_gpu->mask_msb); \
1525
1526
1527#define setup_blocks_variables_shaded_textured(target) \
1528 vec_4x32u u_block; \
1529 vec_4x32u v_block; \
1530 vec_4x32u r_block; \
1531 vec_4x32u g_block; \
1532 vec_4x32u b_block; \
1533 vec_4x32u uvrg_dx = psx_gpu->uvrg_dx; \
1534 vec_4x32u uvrg_dx4; \
1535 vec_4x32u uvrg_dx8; \
1536 vec_4x32u uvrg; \
1537 u32 b_dx = psx_gpu->b_block_span.e[1]; \
1538 u32 b_dx4 = b_dx << 2; \
1539 u32 b_dx8 = b_dx << 3; \
1540 u32 b; \
1541 \
1542 vec_16x8u texture_mask; \
1543 shl_4x32b(uvrg_dx4, uvrg_dx, 2); \
1544 shl_4x32b(uvrg_dx8, uvrg_dx, 3); \
1545 dup_8x8b(texture_mask.low, psx_gpu->texture_mask_width); \
1546 dup_8x8b(texture_mask.high, psx_gpu->texture_mask_height) \
1547
1548#define setup_blocks_variables_shaded_untextured(target) \
1549 vec_4x32u r_block; \
1550 vec_4x32u g_block; \
1551 vec_4x32u b_block; \
1552 vec_4x32u rgb_dx; \
1553 vec_4x32u rgb_dx4; \
1554 vec_4x32u rgb_dx8; \
1555 vec_4x32u rgb; \
1556 \
1557 vec_8x8u d64_0x07; \
1558 vec_8x8u d64_1; \
1559 vec_8x8u d64_4; \
1560 vec_8x8u d64_128; \
1561 \
1562 dup_8x8b(d64_0x07, 0x07); \
1563 dup_8x8b(d64_1, 1); \
1564 dup_8x8b(d64_4, 4); \
1565 dup_8x8b(d64_128, 128); \
1566 \
1567 rgb_dx.low = psx_gpu->uvrg_dx.high; \
1568 rgb_dx.e[2] = psx_gpu->b_block_span.e[1]; \
1569 shl_4x32b(rgb_dx4, rgb_dx, 2); \
1570 shl_4x32b(rgb_dx8, rgb_dx, 3) \
1571
1572#define setup_blocks_variables_unshaded_textured(target) \
1573 vec_4x32u u_block; \
1574 vec_4x32u v_block; \
1575 vec_2x32u uv_dx = psx_gpu->uvrg_dx.low; \
1576 vec_2x32u uv_dx4; \
1577 vec_2x32u uv_dx8; \
1578 vec_2x32u uv = psx_gpu->uvrg.low; \
1579 \
1580 vec_16x8u texture_mask; \
1581 shl_2x32b(uv_dx4, uv_dx, 2); \
1582 shl_2x32b(uv_dx8, uv_dx, 3); \
1583 dup_8x8b(texture_mask.low, psx_gpu->texture_mask_width); \
1584 dup_8x8b(texture_mask.high, psx_gpu->texture_mask_height) \
1585
1586
1587#define setup_blocks_variables_unshaded_untextured_direct() \
1588 or_8x16b(colors, colors, msb_mask) \
1589
1590#define setup_blocks_variables_unshaded_untextured_indirect() \
1591
1592#define setup_blocks_variables_unshaded_untextured(target) \
1593 u32 color = psx_gpu->triangle_color; \
1594 vec_8x16u colors; \
1595 \
1596 u32 color_r = color & 0xFF; \
1597 u32 color_g = (color >> 8) & 0xFF; \
1598 u32 color_b = (color >> 16) & 0xFF; \
1599 \
1600 color = (color_r >> 3) | ((color_g >> 3) << 5) | \
1601 ((color_b >> 3) << 10); \
1602 dup_8x16b(colors, color); \
1603 setup_blocks_variables_unshaded_untextured_##target() \
1604
1605#define setup_blocks_span_initialize_dithered_textured() \
1606 vec_8x16u dither_offsets; \
1607 shl_long_8x8b(dither_offsets, dither_offsets_short, 4) \
1608
1609#define setup_blocks_span_initialize_dithered_untextured() \
1610 vec_8x8u dither_offsets; \
1611 add_8x8b(dither_offsets, dither_offsets_short, d64_4) \
1612
1613#define setup_blocks_span_initialize_dithered(texturing) \
1614 u32 dither_row = psx_gpu->dither_table[y & 0x3]; \
1615 u32 dither_shift = (span_edge_data->left_x & 0x3) * 8; \
1616 vec_8x8s dither_offsets_short; \
1617 \
1618 dither_row = \
f707f14b 1619 (dither_row >> dither_shift) | ((u64)dither_row << (32 - dither_shift)); \
75e28f62
E
1620 dup_2x32b(vector_cast(vec_2x32u, dither_offsets_short), dither_row); \
1621 setup_blocks_span_initialize_dithered_##texturing() \
1622
1623#define setup_blocks_span_initialize_undithered(texturing) \
1624
1625
1626#define setup_blocks_span_initialize_shaded_textured() \
1627{ \
1628 vec_4x32u block_span; \
1629 u32 offset = span_edge_data->left_x; \
1630 \
1631 uvrg = *span_uvrg_offset; \
1632 mla_scalar_4x32b(uvrg, uvrg_dx, offset); \
1633 b = *span_b_offset; \
1634 b += b_dx * offset; \
1635 \
1636 dup_4x32b(u_block, uvrg.e[0]); \
1637 dup_4x32b(v_block, uvrg.e[1]); \
1638 dup_4x32b(r_block, uvrg.e[2]); \
1639 dup_4x32b(g_block, uvrg.e[3]); \
1640 dup_4x32b(b_block, b); \
1641 \
1642 block_span = psx_gpu->u_block_span; \
1643 add_4x32b(u_block, u_block, block_span); \
1644 block_span = psx_gpu->v_block_span; \
1645 add_4x32b(v_block, v_block, block_span); \
1646 block_span = psx_gpu->r_block_span; \
1647 add_4x32b(r_block, r_block, block_span); \
1648 block_span = psx_gpu->g_block_span; \
1649 add_4x32b(g_block, g_block, block_span); \
1650 block_span = psx_gpu->b_block_span; \
1651 add_4x32b(b_block, b_block, block_span); \
1652}
1653
1654#define setup_blocks_span_initialize_shaded_untextured() \
1655{ \
1656 vec_4x32u block_span; \
1657 u32 offset = span_edge_data->left_x; \
1658 \
1659 rgb.low = span_uvrg_offset->high; \
1660 rgb.high.e[0] = *span_b_offset; \
1661 mla_scalar_4x32b(rgb, rgb_dx, offset); \
1662 \
1663 dup_4x32b(r_block, rgb.e[0]); \
1664 dup_4x32b(g_block, rgb.e[1]); \
1665 dup_4x32b(b_block, rgb.e[2]); \
1666 \
1667 block_span = psx_gpu->r_block_span; \
1668 add_4x32b(r_block, r_block, block_span); \
1669 block_span = psx_gpu->g_block_span; \
1670 add_4x32b(g_block, g_block, block_span); \
1671 block_span = psx_gpu->b_block_span; \
1672 add_4x32b(b_block, b_block, block_span); \
1673} \
1674
1675#define setup_blocks_span_initialize_unshaded_textured() \
1676{ \
1677 vec_4x32u block_span; \
1678 u32 offset = span_edge_data->left_x; \
1679 \
1680 uv = span_uvrg_offset->low; \
1681 mla_scalar_2x32b(uv, uv_dx, offset); \
1682 \
1683 dup_4x32b(u_block, uv.e[0]); \
1684 dup_4x32b(v_block, uv.e[1]); \
1685 \
1686 block_span = psx_gpu->u_block_span; \
1687 add_4x32b(u_block, u_block, block_span); \
1688 block_span = psx_gpu->v_block_span; \
1689 add_4x32b(v_block, v_block, block_span); \
1690} \
1691
1692#define setup_blocks_span_initialize_unshaded_untextured() \
1693
1694
1695#define setup_blocks_texture_swizzled() \
1696{ \
1697 vec_8x8u u_saved = u; \
1698 sli_8x8b(u, v, 4); \
1699 sri_8x8b(v, u_saved, 4); \
1700} \
1701
1702#define setup_blocks_texture_unswizzled() \
1703
1704#define setup_blocks_store_shaded_textured(swizzling, dithering, target, \
1705 edge_type) \
1706{ \
1707 vec_8x16u u_whole; \
1708 vec_8x16u v_whole; \
1709 vec_8x16u r_whole; \
1710 vec_8x16u g_whole; \
1711 vec_8x16u b_whole; \
1712 \
1713 vec_8x8u u; \
1714 vec_8x8u v; \
1715 vec_8x8u r; \
1716 vec_8x8u g; \
1717 vec_8x8u b; \
1718 vec_8x16u uv; \
1719 \
1720 vec_4x32u dx4; \
1721 vec_4x32u dx8; \
1722 \
1723 shr_narrow_4x32b(u_whole.low, u_block, 16); \
1724 shr_narrow_4x32b(v_whole.low, v_block, 16); \
1725 shr_narrow_4x32b(r_whole.low, r_block, 16); \
1726 shr_narrow_4x32b(g_whole.low, g_block, 16); \
1727 shr_narrow_4x32b(b_whole.low, b_block, 16); \
1728 \
1729 dup_4x32b(dx4, uvrg_dx4.e[0]); \
1730 add_high_narrow_4x32b(u_whole.high, u_block, dx4); \
1731 dup_4x32b(dx4, uvrg_dx4.e[1]); \
1732 add_high_narrow_4x32b(v_whole.high, v_block, dx4); \
1733 dup_4x32b(dx4, uvrg_dx4.e[2]); \
1734 add_high_narrow_4x32b(r_whole.high, r_block, dx4); \
1735 dup_4x32b(dx4, uvrg_dx4.e[3]); \
1736 add_high_narrow_4x32b(g_whole.high, g_block, dx4); \
1737 dup_4x32b(dx4, b_dx4); \
1738 add_high_narrow_4x32b(b_whole.high, b_block, dx4); \
1739 \
1740 mov_narrow_8x16b(u, u_whole); \
1741 mov_narrow_8x16b(v, v_whole); \
1742 mov_narrow_8x16b(r, r_whole); \
1743 mov_narrow_8x16b(g, g_whole); \
1744 mov_narrow_8x16b(b, b_whole); \
1745 \
1746 dup_4x32b(dx8, uvrg_dx8.e[0]); \
1747 add_4x32b(u_block, u_block, dx8); \
1748 dup_4x32b(dx8, uvrg_dx8.e[1]); \
1749 add_4x32b(v_block, v_block, dx8); \
1750 dup_4x32b(dx8, uvrg_dx8.e[2]); \
1751 add_4x32b(r_block, r_block, dx8); \
1752 dup_4x32b(dx8, uvrg_dx8.e[3]); \
1753 add_4x32b(g_block, g_block, dx8); \
1754 dup_4x32b(dx8, b_dx8); \
1755 add_4x32b(b_block, b_block, dx8); \
1756 \
1757 and_8x8b(u, u, texture_mask.low); \
1758 and_8x8b(v, v, texture_mask.high); \
1759 setup_blocks_texture_##swizzling(); \
1760 \
1761 zip_8x16b(uv, u, v); \
1762 block->uv = uv; \
1763 block->r = r; \
1764 block->g = g; \
1765 block->b = b; \
1766 block->dither_offsets = vector_cast(vec_8x16u, dither_offsets); \
1767 block->fb_ptr = fb_ptr; \
1768} \
1769
1770#define setup_blocks_store_unshaded_textured(swizzling, dithering, target, \
1771 edge_type) \
1772{ \
1773 vec_8x16u u_whole; \
1774 vec_8x16u v_whole; \
1775 \
1776 vec_8x8u u; \
1777 vec_8x8u v; \
1778 vec_8x16u uv; \
1779 \
1780 vec_4x32u dx4; \
1781 vec_4x32u dx8; \
1782 \
1783 shr_narrow_4x32b(u_whole.low, u_block, 16); \
1784 shr_narrow_4x32b(v_whole.low, v_block, 16); \
1785 \
1786 dup_4x32b(dx4, uv_dx4.e[0]); \
1787 add_high_narrow_4x32b(u_whole.high, u_block, dx4); \
1788 dup_4x32b(dx4, uv_dx4.e[1]); \
1789 add_high_narrow_4x32b(v_whole.high, v_block, dx4); \
1790 \
1791 mov_narrow_8x16b(u, u_whole); \
1792 mov_narrow_8x16b(v, v_whole); \
1793 \
1794 dup_4x32b(dx8, uv_dx8.e[0]); \
1795 add_4x32b(u_block, u_block, dx8); \
1796 dup_4x32b(dx8, uv_dx8.e[1]); \
1797 add_4x32b(v_block, v_block, dx8); \
1798 \
1799 and_8x8b(u, u, texture_mask.low); \
1800 and_8x8b(v, v, texture_mask.high); \
1801 setup_blocks_texture_##swizzling(); \
1802 \
1803 zip_8x16b(uv, u, v); \
1804 block->uv = uv; \
1805 block->dither_offsets = vector_cast(vec_8x16u, dither_offsets); \
1806 block->fb_ptr = fb_ptr; \
1807} \
1808
1809#define setup_blocks_store_shaded_untextured_dithered() \
1810 addq_8x8b(r, r, dither_offsets); \
1811 addq_8x8b(g, g, dither_offsets); \
1812 addq_8x8b(b, b, dither_offsets); \
1813 \
1814 subq_8x8b(r, r, d64_4); \
1815 subq_8x8b(g, g, d64_4); \
1816 subq_8x8b(b, b, d64_4) \
1817
1818#define setup_blocks_store_shaded_untextured_undithered() \
1819
1820
1821#define setup_blocks_store_untextured_pixels_indirect_full(_pixels) \
1822 block->pixels = _pixels; \
1823 block->fb_ptr = fb_ptr \
1824
1825#define setup_blocks_store_untextured_pixels_indirect_edge(_pixels) \
1826 block->pixels = _pixels; \
1827 block->fb_ptr = fb_ptr \
1828
1829#define setup_blocks_store_shaded_untextured_seed_pixels_indirect() \
1830 mul_long_8x8b(pixels, r, d64_1) \
1831
1832
1833#define setup_blocks_store_untextured_pixels_direct_full(_pixels) \
1834 store_8x16b(_pixels, fb_ptr) \
1835
1836#define setup_blocks_store_untextured_pixels_direct_edge(_pixels) \
1837{ \
1838 vec_8x16u fb_pixels; \
1839 vec_8x16u draw_mask; \
1840 vec_8x16u test_mask = psx_gpu->test_mask; \
1841 \
1842 load_8x16b(fb_pixels, fb_ptr); \
1843 dup_8x16b(draw_mask, span_edge_data->right_mask); \
1844 tst_8x16b(draw_mask, draw_mask, test_mask); \
1845 bif_8x16b(fb_pixels, _pixels, draw_mask); \
1846 store_8x16b(fb_pixels, fb_ptr); \
1847} \
1848
1849#define setup_blocks_store_shaded_untextured_seed_pixels_direct() \
1850 pixels = msb_mask; \
1851 mla_long_8x8b(pixels, r, d64_1) \
1852
1853
1854#define setup_blocks_store_shaded_untextured(swizzling, dithering, target, \
1855 edge_type) \
1856{ \
1857 vec_8x16u r_whole; \
1858 vec_8x16u g_whole; \
1859 vec_8x16u b_whole; \
1860 \
1861 vec_8x8u r; \
1862 vec_8x8u g; \
1863 vec_8x8u b; \
1864 \
1865 vec_4x32u dx4; \
1866 vec_4x32u dx8; \
1867 \
1868 vec_8x16u pixels; \
1869 \
1870 shr_narrow_4x32b(r_whole.low, r_block, 16); \
1871 shr_narrow_4x32b(g_whole.low, g_block, 16); \
1872 shr_narrow_4x32b(b_whole.low, b_block, 16); \
1873 \
1874 dup_4x32b(dx4, rgb_dx4.e[0]); \
1875 add_high_narrow_4x32b(r_whole.high, r_block, dx4); \
1876 dup_4x32b(dx4, rgb_dx4.e[1]); \
1877 add_high_narrow_4x32b(g_whole.high, g_block, dx4); \
1878 dup_4x32b(dx4, rgb_dx4.e[2]); \
1879 add_high_narrow_4x32b(b_whole.high, b_block, dx4); \
1880 \
1881 mov_narrow_8x16b(r, r_whole); \
1882 mov_narrow_8x16b(g, g_whole); \
1883 mov_narrow_8x16b(b, b_whole); \
1884 \
1885 dup_4x32b(dx8, rgb_dx8.e[0]); \
1886 add_4x32b(r_block, r_block, dx8); \
1887 dup_4x32b(dx8, rgb_dx8.e[1]); \
1888 add_4x32b(g_block, g_block, dx8); \
1889 dup_4x32b(dx8, rgb_dx8.e[2]); \
1890 add_4x32b(b_block, b_block, dx8); \
1891 \
1892 setup_blocks_store_shaded_untextured_##dithering(); \
1893 \
1894 shr_8x8b(r, r, 3); \
1895 bic_8x8b(g, g, d64_0x07); \
1896 bic_8x8b(b, b, d64_0x07); \
1897 \
1898 setup_blocks_store_shaded_untextured_seed_pixels_##target(); \
1899 mla_long_8x8b(pixels, g, d64_4); \
1900 mla_long_8x8b(pixels, b, d64_128) \
1901 \
1902 setup_blocks_store_untextured_pixels_##target##_##edge_type(pixels); \
1903} \
1904
1905#define setup_blocks_store_unshaded_untextured(swizzling, dithering, target, \
1906 edge_type) \
1907 setup_blocks_store_untextured_pixels_##target##_##edge_type(colors) \
1908
1909
1910#define setup_blocks_store_draw_mask_textured_indirect(_block, bits) \
1911 (_block)->draw_mask_bits = bits \
1912
1913#define setup_blocks_store_draw_mask_untextured_indirect(_block, bits) \
1914{ \
1915 vec_8x16u bits_mask; \
1916 vec_8x16u test_mask = psx_gpu->test_mask; \
1917 dup_8x16b(bits_mask, bits); \
1918 tst_8x16b(bits_mask, bits_mask, test_mask); \
1919 (_block)->draw_mask = bits_mask; \
1920} \
1921
1922#define setup_blocks_store_draw_mask_untextured_direct(_block, bits) \
1923
ee060c58 1924#define setup_blocks_uv_adj_hack_untextured(_block, edge_data, uvrg_offset) \
1925
1926#define setup_blocks_uv_adj_hack_textured(_block, edge_data, uvrg_offset) \
1927{ \
1928 u32 m_ = AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V; \
1929 if (unlikely(psx_gpu->hacks_active & m_)) \
1930 setup_blocks_uv_adj_hack(psx_gpu, _block, edge_data, uvrg_offset); \
1931} \
75e28f62
E
1932
1933#define setup_blocks_add_blocks_indirect() \
1934 num_blocks += span_num_blocks; \
1935 \
1936 if(num_blocks > MAX_BLOCKS) \
1937 { \
1938 psx_gpu->num_blocks = num_blocks - span_num_blocks; \
1939 flush_render_block_buffer(psx_gpu); \
1940 num_blocks = span_num_blocks; \
1941 block = psx_gpu->blocks; \
1942 } \
1943
1944#define setup_blocks_add_blocks_direct() \
a2cb152a 1945 stats_add(texel_blocks_untextured, span_num_blocks); \
95bcdd38 1946 stats_add(span_pixel_blocks, span_num_blocks); \
75e28f62
E
1947
1948
1949#define setup_blocks_builder(shading, texturing, dithering, sw, target) \
1950void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \
1951 psx_gpu_struct *psx_gpu) \
1952{ \
1953 setup_blocks_load_msb_mask_##target(); \
1954 setup_blocks_variables_##shading##_##texturing(target); \
1955 \
1956 edge_data_struct *span_edge_data = psx_gpu->span_edge_data; \
1957 vec_4x32u *span_uvrg_offset = psx_gpu->span_uvrg_offset; \
1958 u32 *span_b_offset = psx_gpu->span_b_offset; \
1959 \
1960 block_struct *block = psx_gpu->blocks + psx_gpu->num_blocks; \
1961 \
1962 u32 num_spans = psx_gpu->num_spans; \
1963 \
1964 u16 *fb_ptr; \
1965 u32 y; \
1966 \
1967 u32 num_blocks = psx_gpu->num_blocks; \
1968 u32 span_num_blocks; \
1969 \
1970 while(num_spans) \
1971 { \
1972 span_num_blocks = span_edge_data->num_blocks; \
1973 if(span_num_blocks) \
1974 { \
1975 y = span_edge_data->y; \
c1817bd9 1976 fb_ptr = psx_gpu->vram_out_ptr + span_edge_data->left_x + (y * 1024); \
75e28f62
E
1977 \
1978 setup_blocks_span_initialize_##shading##_##texturing(); \
1979 setup_blocks_span_initialize_##dithering(texturing); \
1980 \
1981 setup_blocks_add_blocks_##target(); \
1982 \
1983 s32 pixel_span = span_num_blocks * 8; \
1984 pixel_span -= __builtin_popcount(span_edge_data->right_mask & 0xFF); \
1985 span_pixels += pixel_span; \
75e28f62
E
1986 \
1987 span_num_blocks--; \
1988 while(span_num_blocks) \
1989 { \
1990 setup_blocks_store_##shading##_##texturing(sw, dithering, target, \
1991 full); \
1992 setup_blocks_store_draw_mask_##texturing##_##target(block, 0x00); \
1993 \
1994 fb_ptr += 8; \
1995 block++; \
1996 span_num_blocks--; \
1997 } \
1998 \
1999 setup_blocks_store_##shading##_##texturing(sw, dithering, target, edge); \
2000 setup_blocks_store_draw_mask_##texturing##_##target(block, \
2001 span_edge_data->right_mask); \
ee060c58 2002 setup_blocks_uv_adj_hack_##texturing(block, span_edge_data, \
2003 span_uvrg_offset); \
75e28f62
E
2004 \
2005 block++; \
2006 } \
2007 else \
2008 { \
2009 zero_block_spans++; \
2010 } \
2011 \
2012 num_spans--; \
2013 span_edge_data++; \
2014 span_uvrg_offset++; \
2015 span_b_offset++; \
2016 } \
2017 \
2018 psx_gpu->num_blocks = num_blocks; \
2019} \
2020
75e28f62
E
2021
2022//setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct);
2023
2bbbb7af 2024#ifndef NEON_BUILD
75e28f62
E
2025
2026setup_blocks_builder(shaded, textured, dithered, swizzled, indirect);
2027setup_blocks_builder(shaded, textured, dithered, unswizzled, indirect);
2028
2029setup_blocks_builder(unshaded, textured, dithered, unswizzled, indirect);
2030setup_blocks_builder(unshaded, textured, dithered, swizzled, indirect);
2031
2032setup_blocks_builder(shaded, untextured, undithered, unswizzled, indirect);
2033setup_blocks_builder(shaded, untextured, dithered, unswizzled, indirect);
2034setup_blocks_builder(shaded, untextured, undithered, unswizzled, direct);
2035setup_blocks_builder(shaded, untextured, dithered, unswizzled, direct);
2036
2037setup_blocks_builder(unshaded, untextured, undithered, unswizzled, indirect);
2038setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct);
2039
75e28f62
E
2040void texture_blocks_untextured(psx_gpu_struct *psx_gpu)
2041{
2042 if(psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE)
a2cb152a 2043 stats_add(texel_blocks_untextured, psx_gpu->num_blocks);
75e28f62
E
2044}
2045
2046void texture_blocks_4bpp(psx_gpu_struct *psx_gpu)
2047{
2048 block_struct *block = psx_gpu->blocks;
2049 u32 num_blocks = psx_gpu->num_blocks;
a2cb152a 2050 stats_add(texel_blocks_4bpp, num_blocks);
75e28f62
E
2051
2052 vec_8x8u texels_low;
2053 vec_8x8u texels_high;
2054 vec_8x8u texels;
2055 vec_8x16u pixels;
2056
2057 vec_8x16u clut_a;
2058 vec_8x16u clut_b;
2059 vec_16x8u clut_low;
2060 vec_16x8u clut_high;
2061
2062 u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr;
2063 u16 *clut_ptr = psx_gpu->clut_ptr;
2064
2065 // Can be done with one deinterleaving load on NEON
2066 load_8x16b(clut_a, clut_ptr);
2067 load_8x16b(clut_b, clut_ptr + 8);
2068 unzip_16x8b(clut_low, clut_high, clut_a, clut_b);
2069
2070 if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_4bpp_mask)
2071 update_texture_4bpp_cache(psx_gpu);
2072
2073 while(num_blocks)
2074 {
2075 texels.e[0] = texture_ptr_8bpp[block->uv.e[0]];
2076 texels.e[1] = texture_ptr_8bpp[block->uv.e[1]];
2077 texels.e[2] = texture_ptr_8bpp[block->uv.e[2]];
2078 texels.e[3] = texture_ptr_8bpp[block->uv.e[3]];
2079 texels.e[4] = texture_ptr_8bpp[block->uv.e[4]];
2080 texels.e[5] = texture_ptr_8bpp[block->uv.e[5]];
2081 texels.e[6] = texture_ptr_8bpp[block->uv.e[6]];
2082 texels.e[7] = texture_ptr_8bpp[block->uv.e[7]];
2083
2084 tbl_16(texels_low, texels, clut_low);
2085 tbl_16(texels_high, texels, clut_high);
2086
2087 // Can be done with an interleaving store on NEON
2088 zip_8x16b(pixels, texels_low, texels_high);
2089
2090 block->texels = pixels;
2091
2092 num_blocks--;
2093 block++;
2094 }
2095}
2096
2097void texture_blocks_8bpp(psx_gpu_struct *psx_gpu)
2098{
2099 block_struct *block = psx_gpu->blocks;
2100 u32 num_blocks = psx_gpu->num_blocks;
2101
a2cb152a 2102 stats_add(texel_blocks_8bpp, num_blocks);
75e28f62
E
2103
2104 if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask)
2105 update_texture_8bpp_cache(psx_gpu);
2106
2107 vec_8x16u texels;
2108 u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr;
2109
2110 u32 texel;
2111 u32 offset;
2112 u32 i;
2113
2114 while(num_blocks)
2115 {
2116 for(i = 0; i < 8; i++)
2117 {
2118 offset = block->uv.e[i];
2119
2120 texel = texture_ptr_8bpp[offset];
2121 texels.e[i] = psx_gpu->clut_ptr[texel];
2122 }
2123
2124 block->texels = texels;
2125
2126 num_blocks--;
2127 block++;
2128 }
2129}
2130
2131void texture_blocks_16bpp(psx_gpu_struct *psx_gpu)
2132{
2133 block_struct *block = psx_gpu->blocks;
2134 u32 num_blocks = psx_gpu->num_blocks;
2135
a2cb152a 2136 stats_add(texel_blocks_16bpp, num_blocks);
75e28f62
E
2137
2138 vec_8x16u texels;
2139
2140 u16 *texture_ptr_16bpp = psx_gpu->texture_page_ptr;
2141 u32 offset;
2142 u32 i;
2143
2144 while(num_blocks)
2145 {
2146 for(i = 0; i < 8; i++)
2147 {
2148 offset = block->uv.e[i];
2149 offset += ((offset & 0xFF00) * 3);
2150
2151 texels.e[i] = texture_ptr_16bpp[offset];
2152 }
2153
2154 block->texels = texels;
2155
2156 num_blocks--;
2157 block++;
2158 }
2159}
2160
2161#endif
2162
2163
2164#define shade_blocks_load_msb_mask_indirect() \
2165
2166#define shade_blocks_load_msb_mask_direct() \
2167 vec_8x16u msb_mask; \
2168 dup_8x16b(msb_mask, psx_gpu->mask_msb); \
2169
2170#define shade_blocks_store_indirect(_draw_mask, _pixels) \
2171 block->draw_mask = _draw_mask; \
2172 block->pixels = _pixels \
2173
2174#define shade_blocks_store_direct(_draw_mask, _pixels) \
2175{ \
2176 vec_8x16u fb_pixels; \
2177 or_8x16b(_pixels, _pixels, msb_mask); \
2178 load_8x16b(fb_pixels, block->fb_ptr); \
2179 bif_8x16b(fb_pixels, _pixels, _draw_mask); \
2180 store_8x16b(fb_pixels, block->fb_ptr); \
2181} \
2182
2183
3867c6ef 2184#define shade_blocks_textured_false_modulated_check_dithered(target) \
b7ed0632
E
2185 if(psx_gpu->triangle_color == 0x808080) \
2186 { \
2187 false_modulated_blocks += num_blocks; \
2188 } \
3867c6ef
E
2189
2190#define shade_blocks_textured_false_modulated_check_undithered(target) \
2191 if(psx_gpu->triangle_color == 0x808080) \
2192 { \
2193 \
2194 shade_blocks_textured_unmodulated_##target(psx_gpu); \
2195 false_modulated_blocks += num_blocks; \
2196 return; \
2197 } \
2198
2199
2200#define shade_blocks_textured_modulated_shaded_primitive_load(dithering, \
2201 target) \
75e28f62 2202
3867c6ef
E
2203#define shade_blocks_textured_modulated_unshaded_primitive_load(dithering, \
2204 target) \
75e28f62
E
2205{ \
2206 u32 color = psx_gpu->triangle_color; \
2207 dup_8x8b(colors_r, color); \
2208 dup_8x8b(colors_g, color >> 8); \
2209 dup_8x8b(colors_b, color >> 16); \
3867c6ef 2210 shade_blocks_textured_false_modulated_check_##dithering(target); \
75e28f62
E
2211} \
2212
2213#define shade_blocks_textured_modulated_shaded_block_load() \
2214 colors_r = block->r; \
2215 colors_g = block->g; \
2216 colors_b = block->b \
2217
2218#define shade_blocks_textured_modulated_unshaded_block_load() \
2219
2220#define shade_blocks_textured_modulate_dithered(component) \
2221 pixels_##component = block->dither_offsets; \
2222 mla_long_8x8b(pixels_##component, texels_##component, colors_##component) \
2223
2224#define shade_blocks_textured_modulate_undithered(component) \
2225 mul_long_8x8b(pixels_##component, texels_##component, colors_##component) \
2226
2227#define shade_blocks_textured_modulated_builder(shading, dithering, target) \
2228void shade_blocks_##shading##_textured_modulated_##dithering##_##target( \
2229 psx_gpu_struct *psx_gpu) \
2230{ \
2231 block_struct *block = psx_gpu->blocks; \
2232 u32 num_blocks = psx_gpu->num_blocks; \
2233 vec_8x16u texels; \
2234 \
2235 vec_8x8u texels_r; \
2236 vec_8x8u texels_g; \
2237 vec_8x8u texels_b; \
2238 \
2239 vec_8x8u colors_r; \
2240 vec_8x8u colors_g; \
2241 vec_8x8u colors_b; \
2242 \
2243 vec_8x8u pixels_r_low; \
2244 vec_8x8u pixels_g_low; \
2245 vec_8x8u pixels_b_low; \
2246 vec_8x16u pixels; \
2247 \
2248 vec_8x16u pixels_r; \
2249 vec_8x16u pixels_g; \
2250 vec_8x16u pixels_b; \
2251 \
2252 vec_8x16u draw_mask; \
2253 vec_8x16u zero_mask; \
2254 \
2255 vec_8x8u d64_0x07; \
2256 vec_8x8u d64_0x1F; \
2257 vec_8x8u d64_1; \
2258 vec_8x8u d64_4; \
2259 vec_8x8u d64_128; \
2260 \
2261 vec_8x16u d128_0x8000; \
2262 \
2263 vec_8x16u test_mask = psx_gpu->test_mask; \
2264 u32 draw_mask_bits; \
2265 shade_blocks_load_msb_mask_##target(); \
2266 \
2267 dup_8x8b(d64_0x07, 0x07); \
2268 dup_8x8b(d64_0x1F, 0x1F); \
2269 dup_8x8b(d64_1, 1); \
2270 dup_8x8b(d64_4, 4); \
2271 dup_8x8b(d64_128, 128); \
2272 \
2273 dup_8x16b(d128_0x8000, 0x8000); \
2274 \
3867c6ef
E
2275 shade_blocks_textured_modulated_##shading##_primitive_load(dithering, \
2276 target); \
75e28f62
E
2277 \
2278 while(num_blocks) \
2279 { \
2280 draw_mask_bits = block->draw_mask_bits; \
2281 dup_8x16b(draw_mask, draw_mask_bits); \
2282 tst_8x16b(draw_mask, draw_mask, test_mask); \
2283 \
2284 shade_blocks_textured_modulated_##shading##_block_load(); \
2285 \
2286 texels = block->texels; \
2287 \
2288 mov_narrow_8x16b(texels_r, texels); \
2289 shr_narrow_8x16b(texels_g, texels, 5); \
2290 shr_narrow_8x16b(texels_b, texels, 7); \
2291 \
2292 and_8x8b(texels_r, texels_r, d64_0x1F); \
2293 and_8x8b(texels_g, texels_g, d64_0x1F); \
2294 shr_8x8b(texels_b, texels_b, 3); \
2295 \
2296 shade_blocks_textured_modulate_##dithering(r); \
2297 shade_blocks_textured_modulate_##dithering(g); \
2298 shade_blocks_textured_modulate_##dithering(b); \
2299 \
2300 cmpeqz_8x16b(zero_mask, texels); \
2301 and_8x16b(pixels, texels, d128_0x8000); \
2302 \
2303 shrq_narrow_signed_8x16b(pixels_r_low, pixels_r, 4); \
2304 shrq_narrow_signed_8x16b(pixels_g_low, pixels_g, 4); \
2305 shrq_narrow_signed_8x16b(pixels_b_low, pixels_b, 4); \
2306 \
2307 or_8x16b(zero_mask, draw_mask, zero_mask); \
2308 \
2309 shr_8x8b(pixels_r_low, pixels_r_low, 3); \
2310 bic_8x8b(pixels_g_low, pixels_g_low, d64_0x07); \
2311 bic_8x8b(pixels_b_low, pixels_b_low, d64_0x07); \
2312 \
2313 mla_long_8x8b(pixels, pixels_r_low, d64_1); \
2314 mla_long_8x8b(pixels, pixels_g_low, d64_4); \
2315 mla_long_8x8b(pixels, pixels_b_low, d64_128); \
2316 \
2317 shade_blocks_store_##target(zero_mask, pixels); \
2318 \
2319 num_blocks--; \
2320 block++; \
2321 } \
2322} \
2323
2bbbb7af 2324#ifndef NEON_BUILD
75e28f62
E
2325
2326shade_blocks_textured_modulated_builder(shaded, dithered, direct);
2327shade_blocks_textured_modulated_builder(shaded, undithered, direct);
2328shade_blocks_textured_modulated_builder(unshaded, dithered, direct);
2329shade_blocks_textured_modulated_builder(unshaded, undithered, direct);
2330
2331shade_blocks_textured_modulated_builder(shaded, dithered, indirect);
2332shade_blocks_textured_modulated_builder(shaded, undithered, indirect);
2333shade_blocks_textured_modulated_builder(unshaded, dithered, indirect);
2334shade_blocks_textured_modulated_builder(unshaded, undithered, indirect);
2335
2336#endif
2337
2338
2339#define shade_blocks_textured_unmodulated_builder(target) \
2340void shade_blocks_textured_unmodulated_##target(psx_gpu_struct *psx_gpu) \
2341{ \
2342 block_struct *block = psx_gpu->blocks; \
2343 u32 num_blocks = psx_gpu->num_blocks; \
2344 vec_8x16u draw_mask; \
2345 vec_8x16u test_mask = psx_gpu->test_mask; \
2346 u32 draw_mask_bits; \
2347 \
2348 vec_8x16u pixels; \
2349 shade_blocks_load_msb_mask_##target(); \
2350 \
2351 while(num_blocks) \
2352 { \
2353 vec_8x16u zero_mask; \
2354 \
2355 draw_mask_bits = block->draw_mask_bits; \
2356 dup_8x16b(draw_mask, draw_mask_bits); \
2357 tst_8x16b(draw_mask, draw_mask, test_mask); \
2358 \
2359 pixels = block->texels; \
2360 \
2361 cmpeqz_8x16b(zero_mask, pixels); \
2362 or_8x16b(zero_mask, draw_mask, zero_mask); \
2363 \
2364 shade_blocks_store_##target(zero_mask, pixels); \
2365 \
2366 num_blocks--; \
2367 block++; \
2368 } \
2369} \
2370
3867c6ef
E
2371#define shade_blocks_textured_unmodulated_dithered_builder(target) \
2372void shade_blocks_textured_unmodulated_dithered_##target(psx_gpu_struct \
2373 *psx_gpu) \
2374{ \
2375 block_struct *block = psx_gpu->blocks; \
2376 u32 num_blocks = psx_gpu->num_blocks; \
2377 vec_8x16u draw_mask; \
2378 vec_8x16u test_mask = psx_gpu->test_mask; \
2379 u32 draw_mask_bits; \
2380 \
2381 vec_8x16u pixels; \
2382 shade_blocks_load_msb_mask_##target(); \
2383 \
2384 while(num_blocks) \
2385 { \
2386 vec_8x16u zero_mask; \
2387 \
2388 draw_mask_bits = block->draw_mask_bits; \
2389 dup_8x16b(draw_mask, draw_mask_bits); \
2390 tst_8x16b(draw_mask, draw_mask, test_mask); \
2391 \
2392 pixels = block->texels; \
2393 \
2394 cmpeqz_8x16b(zero_mask, pixels); \
2395 or_8x16b(zero_mask, draw_mask, zero_mask); \
2396 \
2397 shade_blocks_store_##target(zero_mask, pixels); \
2398 \
2399 num_blocks--; \
2400 block++; \
2401 } \
2402} \
75e28f62 2403
2bbbb7af 2404#ifndef NEON_BUILD
75e28f62
E
2405
2406shade_blocks_textured_unmodulated_builder(indirect)
2407shade_blocks_textured_unmodulated_builder(direct)
2408
75e28f62
E
2409void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu)
2410{
2411}
2412
2413void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu)
2414{
2415 block_struct *block = psx_gpu->blocks;
2416 u32 num_blocks = psx_gpu->num_blocks;
2417
2418 vec_8x16u pixels = block->pixels;
2419 shade_blocks_load_msb_mask_direct();
2420
2421 while(num_blocks)
2422 {
2423 shade_blocks_store_direct(block->draw_mask, pixels);
2424
2425 num_blocks--;
2426 block++;
2427 }
2428}
2429
2430#endif
2431
2432void shade_blocks_shaded_untextured(psx_gpu_struct *psx_gpu)
2433{
2434}
2435
2436
2437#define blend_blocks_mask_evaluate_on() \
2438 vec_8x16u mask_pixels; \
2439 cmpltz_8x16b(mask_pixels, framebuffer_pixels); \
2440 or_8x16b(draw_mask, draw_mask, mask_pixels) \
2441
2442#define blend_blocks_mask_evaluate_off() \
2443
2444#define blend_blocks_average() \
2445{ \
2446 vec_8x16u pixels_no_msb; \
2447 vec_8x16u fb_pixels_no_msb; \
2448 \
2449 vec_8x16u d128_0x0421; \
2450 vec_8x16u d128_0x8000; \
2451 \
2452 dup_8x16b(d128_0x0421, 0x0421); \
2453 dup_8x16b(d128_0x8000, 0x8000); \
2454 \
2455 eor_8x16b(blend_pixels, pixels, framebuffer_pixels); \
2456 bic_8x16b(pixels_no_msb, pixels, d128_0x8000); \
2457 and_8x16b(blend_pixels, blend_pixels, d128_0x0421); \
2458 sub_8x16b(blend_pixels, pixels_no_msb, blend_pixels); \
2459 bic_8x16b(fb_pixels_no_msb, framebuffer_pixels, d128_0x8000); \
2460 average_8x16b(blend_pixels, fb_pixels_no_msb, blend_pixels); \
2461} \
2462
2463#define blend_blocks_add() \
2464{ \
2465 vec_8x16u pixels_rb, pixels_g; \
2466 vec_8x16u fb_rb, fb_g; \
2467 \
2468 vec_8x16u d128_0x7C1F; \
2469 vec_8x16u d128_0x03E0; \
2470 \
2471 dup_8x16b(d128_0x7C1F, 0x7C1F); \
2472 dup_8x16b(d128_0x03E0, 0x03E0); \
2473 \
2474 and_8x16b(pixels_rb, pixels, d128_0x7C1F); \
2475 and_8x16b(pixels_g, pixels, d128_0x03E0); \
2476 \
2477 and_8x16b(fb_rb, framebuffer_pixels, d128_0x7C1F); \
2478 and_8x16b(fb_g, framebuffer_pixels, d128_0x03E0); \
2479 \
2480 add_8x16b(fb_rb, fb_rb, pixels_rb); \
2481 add_8x16b(fb_g, fb_g, pixels_g); \
2482 \
2483 min_16x8b(vector_cast(vec_16x8u, fb_rb), vector_cast(vec_16x8u, fb_rb), \
2484 vector_cast(vec_16x8u, d128_0x7C1F)); \
2485 min_8x16b(fb_g, fb_g, d128_0x03E0); \
2486 \
2487 or_8x16b(blend_pixels, fb_rb, fb_g); \
2488} \
2489
2490#define blend_blocks_subtract() \
2491{ \
2492 vec_8x16u pixels_rb, pixels_g; \
2493 vec_8x16u fb_rb, fb_g; \
2494 \
2495 vec_8x16u d128_0x7C1F; \
2496 vec_8x16u d128_0x03E0; \
2497 \
2498 dup_8x16b(d128_0x7C1F, 0x7C1F); \
2499 dup_8x16b(d128_0x03E0, 0x03E0); \
2500 \
2501 and_8x16b(pixels_rb, pixels, d128_0x7C1F); \
2502 and_8x16b(pixels_g, pixels, d128_0x03E0); \
2503 \
2504 and_8x16b(fb_rb, framebuffer_pixels, d128_0x7C1F); \
2505 and_8x16b(fb_g, framebuffer_pixels, d128_0x03E0); \
2506 \
2507 subs_16x8b(vector_cast(vec_16x8u, fb_rb), \
2508 vector_cast(vec_16x8u, fb_rb), vector_cast(vec_16x8u, pixels_rb)); \
2509 subs_8x16b(fb_g, fb_g, pixels_g); \
2510 \
2511 or_8x16b(blend_pixels, fb_rb, fb_g); \
2512} \
2513
2514#define blend_blocks_add_fourth() \
2515{ \
2516 vec_8x16u pixels_rb, pixels_g; \
2517 vec_8x16u pixels_fourth; \
2518 vec_8x16u fb_rb, fb_g; \
2519 \
2520 vec_8x16u d128_0x7C1F; \
2521 vec_8x16u d128_0x1C07; \
2522 vec_8x16u d128_0x03E0; \
2523 vec_8x16u d128_0x00E0; \
2524 \
2525 dup_8x16b(d128_0x7C1F, 0x7C1F); \
2526 dup_8x16b(d128_0x1C07, 0x1C07); \
2527 dup_8x16b(d128_0x03E0, 0x03E0); \
2528 dup_8x16b(d128_0x00E0, 0x00E0); \
2529 \
2530 shr_8x16b(pixels_fourth, vector_cast(vec_8x16s, pixels), 2); \
2531 \
2532 and_8x16b(fb_rb, framebuffer_pixels, d128_0x7C1F); \
2533 and_8x16b(fb_g, framebuffer_pixels, d128_0x03E0); \
2534 \
2535 and_8x16b(pixels_rb, pixels_fourth, d128_0x1C07); \
2536 and_8x16b(pixels_g, pixels_fourth, d128_0x00E0); \
2537 \
2538 add_8x16b(fb_rb, fb_rb, pixels_rb); \
2539 add_8x16b(fb_g, fb_g, pixels_g); \
2540 \
2541 min_16x8b(vector_cast(vec_16x8u, fb_rb), vector_cast(vec_16x8u, fb_rb), \
2542 vector_cast(vec_16x8u, d128_0x7C1F)); \
2543 min_8x16b(fb_g, fb_g, d128_0x03E0); \
2544 \
2545 or_8x16b(blend_pixels, fb_rb, fb_g); \
2546} \
2547
2548#define blend_blocks_blended_combine_textured() \
2549{ \
2550 vec_8x16u blend_mask; \
2551 cmpltz_8x16b(blend_mask, pixels); \
2552 \
2553 or_immediate_8x16b(blend_pixels, blend_pixels, 0x8000); \
2554 bif_8x16b(blend_pixels, pixels, blend_mask); \
2555} \
2556
2557#define blend_blocks_blended_combine_untextured() \
2558
2559
2560#define blend_blocks_body_blend(blend_mode, texturing) \
2561{ \
2562 blend_blocks_##blend_mode(); \
2563 blend_blocks_blended_combine_##texturing(); \
2564} \
2565
2566#define blend_blocks_body_average(texturing) \
2567 blend_blocks_body_blend(average, texturing) \
2568
2569#define blend_blocks_body_add(texturing) \
2570 blend_blocks_body_blend(add, texturing) \
2571
2572#define blend_blocks_body_subtract(texturing) \
2573 blend_blocks_body_blend(subtract, texturing) \
2574
2575#define blend_blocks_body_add_fourth(texturing) \
2576 blend_blocks_body_blend(add_fourth, texturing) \
2577
2578#define blend_blocks_body_unblended(texturing) \
2579 blend_pixels = pixels \
2580
2581
2582#define blend_blocks_builder(texturing, blend_mode, mask_evaluate) \
2583void \
2584 blend_blocks_##texturing##_##blend_mode##_##mask_evaluate(psx_gpu_struct \
2585 *psx_gpu) \
2586{ \
2587 block_struct *block = psx_gpu->blocks; \
2588 u32 num_blocks = psx_gpu->num_blocks; \
2589 vec_8x16u draw_mask; \
2590 vec_8x16u pixels; \
2591 vec_8x16u blend_pixels; \
2592 vec_8x16u framebuffer_pixels; \
2593 vec_8x16u msb_mask; \
2594 \
2595 u16 *fb_ptr; \
2596 \
2597 dup_8x16b(msb_mask, psx_gpu->mask_msb); \
2598 \
2599 while(num_blocks) \
2600 { \
2601 pixels = block->pixels; \
2602 draw_mask = block->draw_mask; \
2603 fb_ptr = block->fb_ptr; \
2604 \
2605 load_8x16b(framebuffer_pixels, fb_ptr); \
2606 \
2607 blend_blocks_mask_evaluate_##mask_evaluate(); \
2608 blend_blocks_body_##blend_mode(texturing); \
2609 \
2610 or_8x16b(blend_pixels, blend_pixels, msb_mask); \
2611 bif_8x16b(framebuffer_pixels, blend_pixels, draw_mask); \
2612 store_8x16b(framebuffer_pixels, fb_ptr); \
2613 \
2614 blend_blocks++; \
2615 num_blocks--; \
2616 block++; \
2617 } \
2618} \
2619
2bbbb7af 2620#ifndef NEON_BUILD
75e28f62
E
2621
2622void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu)
2623{
2624}
2625
2626blend_blocks_builder(textured, average, off);
2627blend_blocks_builder(textured, average, on);
2628blend_blocks_builder(textured, add, off);
2629blend_blocks_builder(textured, add, on);
2630blend_blocks_builder(textured, subtract, off);
2631blend_blocks_builder(textured, subtract, on);
2632blend_blocks_builder(textured, add_fourth, off);
2633blend_blocks_builder(textured, add_fourth, on);
2634
2635blend_blocks_builder(untextured, average, off);
2636blend_blocks_builder(untextured, average, on);
2637blend_blocks_builder(untextured, add, off);
2638blend_blocks_builder(untextured, add, on);
2639blend_blocks_builder(untextured, subtract, off);
2640blend_blocks_builder(untextured, subtract, on);
2641blend_blocks_builder(untextured, add_fourth, off);
2642blend_blocks_builder(untextured, add_fourth, on);
2643
2644blend_blocks_builder(textured, unblended, on);
2645
2646#endif
2647
2648
2649#define vertex_swap(_a, _b) \
2650{ \
2651 vertex_struct *temp_vertex = _a; \
2652 _a = _b; \
2653 _b = temp_vertex; \
2654 triangle_winding ^= 1; \
2655} \
2656
2657
2658// Setup blocks parametric-variables:
2659// SHADE TEXTURE_MAP SWIZZLING
2660// 0 0 x
2661// 0 1 0
2662// 0 1 1
2663// 1 0 x
2664// 1 1 0
2665// 1 1 1
2666// 8 inputs, 6 combinations
2667
2668#define setup_blocks_switch_untextured_unshaded(dithering, target) \
2669 setup_blocks_unshaded_untextured_undithered_unswizzled_##target \
2670
2671#define setup_blocks_switch_untextured_shaded(dithering, target) \
2672 setup_blocks_shaded_untextured_##dithering##_unswizzled_##target \
2673
2674#define setup_blocks_switch_untextured(shading, texture_mode, dithering, \
2675 target) \
2676 setup_blocks_switch_untextured_##shading(dithering, target) \
2677
2678#define setup_blocks_switch_texture_mode_4bpp(shading) \
2679 setup_blocks_##shading##_textured_dithered_swizzled_indirect \
2680
2681#define setup_blocks_switch_texture_mode_8bpp(shading) \
2682 setup_blocks_##shading##_textured_dithered_swizzled_indirect \
2683
2684#define setup_blocks_switch_texture_mode_16bpp(shading) \
2685 setup_blocks_##shading##_textured_dithered_unswizzled_indirect \
2686
2687#define setup_blocks_switch_textured(shading, texture_mode, dithering, target) \
2688 setup_blocks_switch_texture_mode_##texture_mode(shading) \
2689
2690#define setup_blocks_switch_blended(shading, texturing, texture_mode, \
2691 dithering, mask_evaluate) \
2692 setup_blocks_switch_##texturing(shading, texture_mode, dithering, indirect) \
2693
2694#define setup_blocks_switch_unblended_on(shading, texturing, texture_mode, \
2695 dithering) \
2696 setup_blocks_switch_##texturing(shading, texture_mode, dithering, indirect) \
2697
2698#define setup_blocks_switch_unblended_off(shading, texturing, texture_mode, \
2699 dithering) \
2700 setup_blocks_switch_##texturing(shading, texture_mode, dithering, direct) \
2701
2702#define setup_blocks_switch_unblended(shading, texturing, texture_mode, \
2703 dithering, mask_evaluate) \
2704 setup_blocks_switch_unblended_##mask_evaluate(shading, texturing, \
2705 texture_mode, dithering) \
2706
2707#define setup_blocks_switch(shading, texturing, texture_mode, dithering, \
2708 blending, mask_evaluate) \
2709 setup_blocks_switch_##blending(shading, texturing, texture_mode, \
2710 dithering, mask_evaluate) \
2711
2712
2713// Texture blocks:
2714
2715#define texture_blocks_switch_untextured(texture_mode) \
2716 texture_blocks_untextured \
2717
2718#define texture_blocks_switch_textured(texture_mode) \
2719 texture_blocks_##texture_mode \
2720
2721#define texture_blocks_switch(texturing, texture_mode) \
2722 texture_blocks_switch_##texturing(texture_mode) \
2723
2724
2725// Shade blocks parametric-variables:
2726// SHADE TEXTURE_MAP MODULATE_TEXELS dither_mode
2727// 0 0 x x
2728// 0 1 0 0
2729// 0 1 0 1
2730// x 1 1 x
2731// 1 0 x 0
2732// 1 0 x 1
2733// 1 1 0 0
2734// 1 1 0 1
2735// 16 inputs, 8 combinations
2736
2737#define shade_blocks_switch_unshaded_untextured(modulation, dithering, target) \
2738 shade_blocks_unshaded_untextured_##target \
2739
2740#define shade_blocks_switch_unshaded_textured_unmodulated(dithering, target) \
2741 shade_blocks_textured_unmodulated_##target \
2742
2743#define shade_blocks_switch_unshaded_textured_modulated(dithering, target) \
2744 shade_blocks_unshaded_textured_modulated_##dithering##_##target \
2745
2746#define shade_blocks_switch_unshaded_textured(modulation, dithering, target) \
2747 shade_blocks_switch_unshaded_textured_##modulation(dithering, target) \
2748
2749#define shade_blocks_switch_unshaded(texturing, modulation, dithering, target) \
2750 shade_blocks_switch_unshaded_##texturing(modulation, dithering, target) \
2751
2752#define shade_blocks_switch_shaded_untextured(modulation, dithering, target) \
2753 shade_blocks_shaded_untextured \
2754
2755#define shade_blocks_switch_shaded_textured_unmodulated(dithering, target) \
2756 shade_blocks_textured_unmodulated_##target \
2757
2758#define shade_blocks_switch_shaded_textured_modulated(dithering, target) \
2759 shade_blocks_shaded_textured_modulated_##dithering##_##target \
2760
2761#define shade_blocks_switch_shaded_textured(modulation, dithering, target) \
2762 shade_blocks_switch_shaded_textured_##modulation(dithering, target) \
2763
2764#define shade_blocks_switch_shaded(texturing, modulation, dithering, target) \
2765 shade_blocks_switch_shaded_##texturing(modulation, dithering, target) \
2766
2767#define shade_blocks_switch_mask_off(shading, texturing, modulation, \
2768 dithering) \
2769 shade_blocks_switch_##shading(texturing, modulation, dithering, direct) \
2770
2771#define shade_blocks_switch_mask_on(shading, texturing, modulation, \
2772 dithering) \
2773 shade_blocks_switch_##shading(texturing, modulation, dithering, indirect) \
2774
2775#define shade_blocks_switch_blended(shading, texturing, modulation, dithering, \
2776 mask_evaluate) \
2777 shade_blocks_switch_##shading(texturing, modulation, dithering, indirect) \
2778
2779#define shade_blocks_switch_unblended(shading, texturing, modulation, \
2780 dithering, mask_evaluate) \
2781 shade_blocks_switch_mask_##mask_evaluate(shading, texturing, modulation, \
2782 dithering) \
2783
2784#define shade_blocks_switch(shading, texturing, modulation, dithering, \
2785 blending, mask_evaluate) \
2786 shade_blocks_switch_##blending(shading, texturing, modulation, dithering, \
2787 mask_evaluate) \
2788
2789
2790// Blend blocks parametric-variables:
2791// TEXTURE_MAP BLEND BM_A BM_B mask_evaluate
2792// x 0 x x 0
2793// x 0 x x 1
2794// 0 1 0 0 0
2795// 0 1 0 0 1
2796// 0 1 0 1 0
2797// 0 1 0 1 1
2798// 0 1 1 0 0
2799// 0 1 1 0 1
2800// 0 1 1 1 0
2801// 0 1 1 1 1
2802// 1 1 0 0 0
2803// 1 1 0 0 1
2804// 1 1 0 1 0
2805// 1 1 0 1 1
2806// 1 1 1 0 0
2807// 1 1 1 0 1
2808// 1 1 1 1 0
2809// 1 1 1 1 1
2810// 32 inputs, 18 combinations
2811
2812#define blend_blocks_switch_unblended(texturing, blend_mode, mask_evaluate) \
2813 blend_blocks_textured_unblended_##mask_evaluate \
2814
2815#define blend_blocks_switch_blended(texturing, blend_mode, mask_evaluate) \
2816 blend_blocks_##texturing##_##blend_mode##_##mask_evaluate \
2817
2818#define blend_blocks_switch(texturing, blending, blend_mode, mask_evaluate) \
2819 blend_blocks_switch_##blending(texturing, blend_mode, mask_evaluate) \
2820
2821
2822#define render_blocks_switch_block_modulation(texture_mode, blend_mode, \
2823 mask_evaluate, shading, dithering, texturing, blending, modulation) \
2824{ \
2825 setup_blocks_switch(shading, texturing, texture_mode, dithering, blending, \
2826 mask_evaluate), \
2827 texture_blocks_switch(texturing, texture_mode), \
2828 shade_blocks_switch(shading, texturing, modulation, dithering, blending, \
2829 mask_evaluate), \
2830 blend_blocks_switch(texturing, blending, blend_mode, mask_evaluate) \
2831} \
2832
2833#define render_blocks_switch_block_blending(texture_mode, blend_mode, \
2834 mask_evaluate, shading, dithering, texturing, blending) \
2835 render_blocks_switch_block_modulation(texture_mode, blend_mode, \
2836 mask_evaluate, shading, dithering, texturing, blending, modulated), \
2837 render_blocks_switch_block_modulation(texture_mode, blend_mode, \
2838 mask_evaluate, shading, dithering, texturing, blending, unmodulated) \
2839
2840#define render_blocks_switch_block_texturing(texture_mode, blend_mode, \
2841 mask_evaluate, shading, dithering, texturing) \
2842 render_blocks_switch_block_blending(texture_mode, blend_mode, \
2843 mask_evaluate, shading, dithering, texturing, unblended), \
2844 render_blocks_switch_block_blending(texture_mode, blend_mode, \
2845 mask_evaluate, shading, dithering, texturing, blended) \
2846
2847#define render_blocks_switch_block_dithering(texture_mode, blend_mode, \
2848 mask_evaluate, shading, dithering) \
2849 render_blocks_switch_block_texturing(texture_mode, blend_mode, \
2850 mask_evaluate, shading, dithering, untextured), \
2851 render_blocks_switch_block_texturing(texture_mode, blend_mode, \
2852 mask_evaluate, shading, dithering, textured) \
2853
2854#define render_blocks_switch_block_shading(texture_mode, blend_mode, \
2855 mask_evaluate, shading) \
2856 render_blocks_switch_block_dithering(texture_mode, blend_mode, \
2857 mask_evaluate, shading, undithered), \
2858 render_blocks_switch_block_dithering(texture_mode, blend_mode, \
2859 mask_evaluate, shading, dithered) \
2860
2861#define render_blocks_switch_block_mask_evaluate(texture_mode, blend_mode, \
2862 mask_evaluate) \
2863 render_blocks_switch_block_shading(texture_mode, blend_mode, mask_evaluate, \
2864 unshaded), \
2865 render_blocks_switch_block_shading(texture_mode, blend_mode, mask_evaluate, \
2866 shaded) \
2867
2868#define render_blocks_switch_block_blend_mode(texture_mode, blend_mode) \
2869 render_blocks_switch_block_mask_evaluate(texture_mode, blend_mode, off), \
2870 render_blocks_switch_block_mask_evaluate(texture_mode, blend_mode, on) \
2871
2872#define render_blocks_switch_block_texture_mode(texture_mode) \
2873 render_blocks_switch_block_blend_mode(texture_mode, average), \
2874 render_blocks_switch_block_blend_mode(texture_mode, add), \
2875 render_blocks_switch_block_blend_mode(texture_mode, subtract), \
2876 render_blocks_switch_block_blend_mode(texture_mode, add_fourth) \
2877
2878#define render_blocks_switch_block() \
2879 render_blocks_switch_block_texture_mode(4bpp), \
2880 render_blocks_switch_block_texture_mode(8bpp), \
2881 render_blocks_switch_block_texture_mode(16bpp), \
b7f5c059 2882 render_blocks_switch_block_texture_mode(16bpp) \
75e28f62
E
2883
2884
2885render_block_handler_struct render_triangle_block_handlers[] =
2886{
2887 render_blocks_switch_block()
2888};
2889
2890#undef render_blocks_switch_block_modulation
2891
2892#define render_blocks_switch_block_modulation(texture_mode, blend_mode, \
2893 mask_evaluate, shading, dithering, texturing, blending, modulation) \
2894 "render flags:\n" \
2895 "texture mode: " #texture_mode "\n" \
2896 "blend mode: " #blend_mode "\n" \
2897 "mask evaluation: " #mask_evaluate "\n" \
2898 #shading "\n" \
2899 #dithering "\n" \
2900 #texturing "\n" \
2901 #blending "\n" \
2902 #modulation "\n" \
2903
2904char *render_block_flag_strings[] =
2905{
2906 render_blocks_switch_block()
2907};
2908
2909
2910#define triangle_y_direction_up 1
2911#define triangle_y_direction_flat 2
2912#define triangle_y_direction_down 0
2913
2914#define triangle_winding_positive 0
2915#define triangle_winding_negative 1
2916
2917#define triangle_set_direction(direction_variable, value) \
2918 u32 direction_variable = (u32)(value) >> 31; \
2919 if(value == 0) \
2920 direction_variable = 2 \
2921
2922#define triangle_case(direction_a, direction_b, direction_c, winding) \
2923 case (triangle_y_direction_##direction_a | \
2924 (triangle_y_direction_##direction_b << 2) | \
2925 (triangle_y_direction_##direction_c << 4) | \
2926 (triangle_winding_##winding << 6)) \
2927
c1817bd9 2928static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
f4ce97e1 2929 prepared_triangle *triangle_out)
75e28f62 2930{
f4ce97e1 2931 s32 y_top, y_bottom, offset_x, offset_y, i;
75e28f62
E
2932 s32 triangle_area;
2933 u32 triangle_winding = 0;
2934
2935 vertex_struct *a = &(vertexes[0]);
2936 vertex_struct *b = &(vertexes[1]);
2937 vertex_struct *c = &(vertexes[2]);
2938
2939 triangle_area = triangle_signed_area_x2(a->x, a->y, b->x, b->y, c->x, c->y);
2940
3867c6ef 2941#ifdef PROFILE
75e28f62 2942 triangles++;
3867c6ef 2943#endif
75e28f62
E
2944
2945 if(triangle_area == 0)
2946 {
3867c6ef 2947#ifdef PROFILE
75e28f62 2948 trivial_rejects++;
3867c6ef 2949#endif
c1817bd9 2950 return 0;
75e28f62
E
2951 }
2952
2953 if(b->y < a->y)
2954 vertex_swap(a, b);
2955
2956 if(c->y < b->y)
2957 {
2958 vertex_swap(b, c);
2959
2960 if(b->y < a->y)
2961 vertex_swap(a, b);
2962 }
2963
2964 y_bottom = c->y;
2965 y_top = a->y;
f4ce97e1 2966 offset_y = sign_extend_11bit(y_top + psx_gpu->offset_y) - y_top;
75e28f62
E
2967
2968 if((y_bottom - y_top) >= 512)
2969 {
3867c6ef 2970#ifdef PROFILE
75e28f62 2971 trivial_rejects++;
3867c6ef 2972#endif
c1817bd9 2973 return 0;
75e28f62
E
2974 }
2975
2976 if(triangle_area < 0)
2977 {
2978 triangle_area = -triangle_area;
2979 triangle_winding ^= 1;
2980 vertex_swap(a, c);
2981 }
2982
2983 if(b->x < a->x)
2984 vertex_swap(a, b);
2985
2986 if(c->x < b->x)
2987 {
2988 vertex_swap(b, c);
2989
2990 if(b->x < a->x)
2991 vertex_swap(a, b);
2992 }
2993
f4ce97e1 2994 if(c->x - a->x >= 1024)
75e28f62 2995 {
3867c6ef 2996#ifdef PROFILE
75e28f62 2997 trivial_rejects++;
3867c6ef 2998#endif
c1817bd9 2999 return 0;
75e28f62
E
3000 }
3001
f4ce97e1 3002 offset_x = sign_extend_11bit(a->x + psx_gpu->offset_x) - a->x;
3003 if(invalidate_texture_cache_region_viewport(psx_gpu,
3004 a->x + offset_x, y_top + offset_y,
3005 c->x + offset_x, y_bottom + offset_y) == 0)
75e28f62 3006 {
3867c6ef 3007#ifdef PROFILE
75e28f62 3008 trivial_rejects++;
3867c6ef 3009#endif
c1817bd9 3010 return 0;
75e28f62
E
3011 }
3012
f4ce97e1 3013 for (i = 0; i < 3; i++)
3014 {
3015 vertexes[i].x += offset_x;
3016 vertexes[i].y += offset_y;
3017 }
3018
75e28f62
E
3019 psx_gpu->triangle_area = triangle_area;
3020 psx_gpu->triangle_winding = triangle_winding;
3021
f4ce97e1 3022 triangle_out->vertexes[0] = a;
3023 triangle_out->vertexes[1] = b;
3024 triangle_out->vertexes[2] = c;
3025 triangle_out->offset_x = offset_x;
3026 triangle_out->offset_y = offset_y;
c1817bd9 3027
3028 return 1;
3029}
3030
3031static void render_triangle_p(psx_gpu_struct *psx_gpu,
3032 vertex_struct *vertex_ptrs[3], u32 flags)
3033{
3034 psx_gpu->num_spans = 0;
3035
3036 vertex_struct *a = vertex_ptrs[0];
3037 vertex_struct *b = vertex_ptrs[1];
3038 vertex_struct *c = vertex_ptrs[2];
3039
75e28f62
E
3040 s32 y_delta_a = b->y - a->y;
3041 s32 y_delta_b = c->y - b->y;
3042 s32 y_delta_c = c->y - a->y;
3043
3044 triangle_set_direction(y_direction_a, y_delta_a);
3045 triangle_set_direction(y_direction_b, y_delta_b);
3046 triangle_set_direction(y_direction_c, y_delta_c);
3047
db2804fb 3048#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
3049 // the asm doesn't bother to save callee-save vector regs, so do it here
3050 __asm__ __volatile__("vstmia %0, {q4-q7}" :: "r"(psx_gpu->saved_q4_q7) : "memory");
3051#endif
3052
75e28f62
E
3053 compute_all_gradients(psx_gpu, a, b, c);
3054
3055 switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) |
c1817bd9 3056 (psx_gpu->triangle_winding << 6))
75e28f62
E
3057 {
3058 triangle_case(up, up, up, negative):
3059 triangle_case(up, up, flat, negative):
3060 triangle_case(up, up, down, negative):
3061 setup_spans_up_right(psx_gpu, a, b, c);
3062 break;
3063
3064 triangle_case(flat, up, up, negative):
3065 triangle_case(flat, up, flat, negative):
3066 triangle_case(flat, up, down, negative):
3067 setup_spans_up_a(psx_gpu, a, b, c);
3068 break;
3069
3070 triangle_case(down, up, up, negative):
3071 setup_spans_up_down(psx_gpu, a, c, b);
3072 break;
3073
3074 triangle_case(down, up, flat, negative):
3075 setup_spans_down_a(psx_gpu, a, c, b);
3076 break;
3077
3078 triangle_case(down, up, down, negative):
3079 setup_spans_down_right(psx_gpu, a, c, b);
3080 break;
3081
3082 triangle_case(down, flat, up, negative):
3083 triangle_case(down, flat, flat, negative):
3084 triangle_case(down, flat, down, negative):
3085 setup_spans_down_b(psx_gpu, a, b, c);
3086 break;
3087
3088 triangle_case(down, down, up, negative):
3089 triangle_case(down, down, flat, negative):
3090 triangle_case(down, down, down, negative):
3091 setup_spans_down_left(psx_gpu, a, b, c);
3092 break;
3093
3094 triangle_case(up, up, up, positive):
3095 triangle_case(up, up, flat, positive):
3096 triangle_case(up, up, down, positive):
3097 setup_spans_up_left(psx_gpu, a, b, c);
3098 break;
3099
3100 triangle_case(up, flat, up, positive):
3101 triangle_case(up, flat, flat, positive):
3102 triangle_case(up, flat, down, positive):
3103 setup_spans_up_b(psx_gpu, a, b, c);
3104 break;
3105
3106 triangle_case(up, down, up, positive):
3107 setup_spans_up_right(psx_gpu, a, c, b);
3108 break;
3109
3110 triangle_case(up, down, flat, positive):
3111 setup_spans_up_a(psx_gpu, a, c, b);
3112 break;
3113
3114 triangle_case(up, down, down, positive):
3115 setup_spans_up_down(psx_gpu, a, b, c);
3116 break;
3117
3118 triangle_case(flat, down, up, positive):
3119 triangle_case(flat, down, flat, positive):
3120 triangle_case(flat, down, down, positive):
3121 setup_spans_down_a(psx_gpu, a, b, c);
3122 break;
3123
3124 triangle_case(down, down, up, positive):
3125 triangle_case(down, down, flat, positive):
3126 triangle_case(down, down, down, positive):
3127 setup_spans_down_right(psx_gpu, a, b, c);
3128 break;
3129 }
3130
3867c6ef 3131#ifdef PROFILE
75e28f62 3132 spans += psx_gpu->num_spans;
3867c6ef 3133#endif
75e28f62 3134
f1359c57 3135 if(unlikely(psx_gpu->render_mode & RENDER_INTERLACE_ENABLED))
69b09c0d
E
3136 {
3137 u32 i;
3138
f1359c57 3139 if(psx_gpu->render_mode & RENDER_INTERLACE_ODD)
69b09c0d
E
3140 {
3141 for(i = 0; i < psx_gpu->num_spans; i++)
3142 {
3143 if((psx_gpu->span_edge_data[i].y & 1) == 0)
3144 psx_gpu->span_edge_data[i].num_blocks = 0;
3145 }
3146 }
3147 else
3148 {
3149 for(i = 0; i < psx_gpu->num_spans; i++)
3150 {
3151 if(psx_gpu->span_edge_data[i].y & 1)
3152 psx_gpu->span_edge_data[i].num_blocks = 0;
3153 }
3154 }
3155 }
2d658c89 3156 assert(psx_gpu->span_edge_data[0].y < 1024u);
69b09c0d 3157
75e28f62
E
3158 u32 render_state = flags &
3159 (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND |
3160 RENDER_FLAGS_TEXTURE_MAP | RENDER_FLAGS_SHADE);
3161 render_state |= psx_gpu->render_state_base;
3162
3163 if((psx_gpu->render_state != render_state) ||
3164 (psx_gpu->primitive_type != PRIMITIVE_TYPE_TRIANGLE))
3165 {
3166 psx_gpu->render_state = render_state;
3167 flush_render_block_buffer(psx_gpu);
3867c6ef 3168#ifdef PROFILE
75e28f62 3169 state_changes++;
3867c6ef 3170#endif
75e28f62
E
3171 }
3172
3173 psx_gpu->primitive_type = PRIMITIVE_TYPE_TRIANGLE;
3174
3175 psx_gpu->render_block_handler =
3176 &(render_triangle_block_handlers[render_state]);
3177 ((setup_blocks_function_type *)psx_gpu->render_block_handler->setup_blocks)
3178 (psx_gpu);
db2804fb 3179
3180#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
3181 __asm__ __volatile__("vldmia %0, {q4-q7}" :: "r"(psx_gpu->saved_q4_q7));
3182#endif
75e28f62
E
3183}
3184
c1817bd9 3185void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
3186 u32 flags)
3187{
f4ce97e1 3188 prepared_triangle triangle;
3189 if (prepare_triangle(psx_gpu, vertexes, &triangle))
3190 render_triangle_p(psx_gpu, triangle.vertexes, flags);
c1817bd9 3191}
3192
a2cb152a 3193#if !defined(NEON_BUILD) || defined(SIMD_BUILD)
75e28f62
E
3194
3195void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
3196{
3197 block_struct *block = psx_gpu->blocks;
3198 u32 num_blocks = psx_gpu->num_blocks;
3199
3200 vec_8x16u texels;
3201 vec_8x8u texel_indexes;
3202
3203 u16 *clut_ptr = psx_gpu->clut_ptr;
3204 u32 i;
3205
3206 while(num_blocks)
3207 {
3208 texel_indexes = block->r;
3209
3210 for(i = 0; i < 8; i++)
3211 {
3212 texels.e[i] = clut_ptr[texel_indexes.e[i]];
3213 }
3214
3215 block->texels = texels;
3216
3217 num_blocks--;
3218 block++;
3219 }
3220}
3221
3222#endif
3223
3224
59d15d23 3225#define setup_sprite_tiled_initialize_4bpp_clut() \
75e28f62
E
3226 u16 *clut_ptr = psx_gpu->clut_ptr; \
3227 vec_8x16u clut_a, clut_b; \
3228 vec_16x8u clut_low, clut_high; \
3229 \
3230 load_8x16b(clut_a, clut_ptr); \
3231 load_8x16b(clut_b, clut_ptr + 8); \
59d15d23 3232 unzip_16x8b(clut_low, clut_high, clut_a, clut_b) \
3233
3234#define setup_sprite_tiled_initialize_4bpp() \
3235 setup_sprite_tiled_initialize_4bpp_clut(); \
75e28f62
E
3236 \
3237 if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_4bpp_mask) \
3238 update_texture_4bpp_cache(psx_gpu) \
3239
3240#define setup_sprite_tiled_initialize_8bpp() \
3241 if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask) \
3242 update_texture_8bpp_cache(psx_gpu) \
3243
3244
3245#define setup_sprite_tile_fetch_texel_block_8bpp(offset) \
df740cdc 3246 texture_block_ptr = (u8 *)psx_gpu->texture_page_ptr + \
75e28f62
E
3247 ((texture_offset + offset) & texture_mask); \
3248 \
3249 load_64b(texels, texture_block_ptr) \
3250
3251
75e28f62
E
3252#define setup_sprite_tile_add_blocks(tile_num_blocks) \
3253 num_blocks += tile_num_blocks; \
3254 sprite_blocks += tile_num_blocks; \
3255 \
3256 if(num_blocks > MAX_BLOCKS) \
3257 { \
3258 flush_render_block_buffer(psx_gpu); \
3259 num_blocks = tile_num_blocks; \
3260 block = psx_gpu->blocks; \
3261 } \
3262
3263#define setup_sprite_tile_full_4bpp(edge) \
3264{ \
3265 vec_8x8u texels_low, texels_high; \
3266 vec_8x16u pixels; \
3267 setup_sprite_tile_add_blocks(sub_tile_height * 2); \
3268 \
3269 while(sub_tile_height) \
3270 { \
3271 setup_sprite_tile_fetch_texel_block_8bpp(0); \
3272 tbl_16(texels_low, texels, clut_low); \
3273 tbl_16(texels_high, texels, clut_high); \
3274 zip_8x16b(pixels, texels_low, texels_high); \
3275 \
3276 block->texels = pixels; \
3277 block->draw_mask_bits = left_mask_bits; \
3278 block->fb_ptr = fb_ptr; \
3279 block++; \
3280 \
3281 setup_sprite_tile_fetch_texel_block_8bpp(8); \
3282 tbl_16(texels_low, texels, clut_low); \
3283 tbl_16(texels_high, texels, clut_high); \
3284 zip_8x16b(pixels, texels_low, texels_high); \
3285 \
3286 block->texels = pixels; \
3287 block->draw_mask_bits = right_mask_bits; \
3288 block->fb_ptr = fb_ptr + 8; \
3289 block++; \
3290 \
3291 fb_ptr += 1024; \
3292 texture_offset += 0x10; \
3293 sub_tile_height--; \
3294 } \
3295 texture_offset += 0xF00; \
3296 psx_gpu->num_blocks = num_blocks; \
3297} \
3298
3299#define setup_sprite_tile_half_4bpp(edge) \
3300{ \
3301 vec_8x8u texels_low, texels_high; \
3302 vec_8x16u pixels; \
3303 setup_sprite_tile_add_blocks(sub_tile_height); \
3304 \
3305 while(sub_tile_height) \
3306 { \
3307 setup_sprite_tile_fetch_texel_block_8bpp(0); \
3308 tbl_16(texels_low, texels, clut_low); \
3309 tbl_16(texels_high, texels, clut_high); \
3310 zip_8x16b(pixels, texels_low, texels_high); \
3311 \
3312 block->texels = pixels; \
3313 block->draw_mask_bits = edge##_mask_bits; \
3314 block->fb_ptr = fb_ptr; \
3315 block++; \
3316 \
3317 fb_ptr += 1024; \
3318 texture_offset += 0x10; \
3319 sub_tile_height--; \
3320 } \
3321 texture_offset += 0xF00; \
3322 psx_gpu->num_blocks = num_blocks; \
3323} \
3324
3325
3326#define setup_sprite_tile_full_8bpp(edge) \
3327{ \
3328 setup_sprite_tile_add_blocks(sub_tile_height * 2); \
3329 \
3330 while(sub_tile_height) \
3331 { \
3332 setup_sprite_tile_fetch_texel_block_8bpp(0); \
3333 block->r = texels; \
3334 block->draw_mask_bits = left_mask_bits; \
3335 block->fb_ptr = fb_ptr; \
3336 block++; \
3337 \
3338 setup_sprite_tile_fetch_texel_block_8bpp(8); \
3339 block->r = texels; \
3340 block->draw_mask_bits = right_mask_bits; \
3341 block->fb_ptr = fb_ptr + 8; \
3342 block++; \
3343 \
3344 fb_ptr += 1024; \
3345 texture_offset += 0x10; \
3346 sub_tile_height--; \
3347 } \
3348 texture_offset += 0xF00; \
3349 psx_gpu->num_blocks = num_blocks; \
3350} \
3351
3352#define setup_sprite_tile_half_8bpp(edge) \
3353{ \
df740cdc 3354 setup_sprite_tile_add_blocks(sub_tile_height); \
75e28f62
E
3355 \
3356 while(sub_tile_height) \
3357 { \
3358 setup_sprite_tile_fetch_texel_block_8bpp(0); \
3359 block->r = texels; \
3360 block->draw_mask_bits = edge##_mask_bits; \
3361 block->fb_ptr = fb_ptr; \
3362 block++; \
3363 \
3364 fb_ptr += 1024; \
3365 texture_offset += 0x10; \
3366 sub_tile_height--; \
3367 } \
3368 texture_offset += 0xF00; \
3369 psx_gpu->num_blocks = num_blocks; \
3370} \
3371
3372
3373#define setup_sprite_tile_column_edge_pre_adjust_half_right() \
3374 texture_offset = texture_offset_base + 8; \
3375 fb_ptr += 8 \
3376
3377#define setup_sprite_tile_column_edge_pre_adjust_half_left() \
3378 texture_offset = texture_offset_base \
3379
3380#define setup_sprite_tile_column_edge_pre_adjust_half(edge) \
3381 setup_sprite_tile_column_edge_pre_adjust_half_##edge() \
3382
3383#define setup_sprite_tile_column_edge_pre_adjust_full(edge) \
3384 texture_offset = texture_offset_base \
3385
3386#define setup_sprite_tile_column_edge_post_adjust_half_right() \
3387 fb_ptr -= 8 \
3388
3389#define setup_sprite_tile_column_edge_post_adjust_half_left() \
3390
3391#define setup_sprite_tile_column_edge_post_adjust_half(edge) \
3392 setup_sprite_tile_column_edge_post_adjust_half_##edge() \
3393
3394#define setup_sprite_tile_column_edge_post_adjust_full(edge) \
3395
3396
59d15d23 3397#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \
3398 x4mode) \
75e28f62
E
3399do \
3400{ \
3401 sub_tile_height = column_data; \
59d15d23 3402 setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \
3403 setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
3404 setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \
75e28f62
E
3405} while(0) \
3406
59d15d23 3407#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \
3408 x4mode) \
75e28f62
E
3409do \
3410{ \
3411 u32 tiles_remaining = column_data >> 16; \
3412 sub_tile_height = column_data & 0xFF; \
59d15d23 3413 setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \
3414 setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
75e28f62
E
3415 tiles_remaining -= 1; \
3416 \
3417 while(tiles_remaining) \
3418 { \
3419 sub_tile_height = 16; \
59d15d23 3420 setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
75e28f62
E
3421 tiles_remaining--; \
3422 } \
3423 \
3424 sub_tile_height = (column_data >> 8) & 0xFF; \
59d15d23 3425 setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
3426 setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \
75e28f62
E
3427} while(0) \
3428
3429
3430#define setup_sprite_column_data_single() \
3431 column_data = height \
3432
3433#define setup_sprite_column_data_multi() \
3434 column_data = 16 - offset_v; \
3435 column_data |= ((height_rounded & 0xF) + 1) << 8; \
3436 column_data |= (tile_height - 1) << 16 \
3437
3438
59d15d23 3439#define RIGHT_MASK_BIT_SHIFT 8
3440#define RIGHT_MASK_BIT_SHIFT_4x 16
3441
75e28f62 3442#define setup_sprite_tile_column_width_single(texture_mode, multi_height, \
59d15d23 3443 edge_mode, edge, x4mode) \
75e28f62
E
3444{ \
3445 setup_sprite_column_data_##multi_height(); \
3446 left_mask_bits = left_block_mask | right_block_mask; \
59d15d23 3447 right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \
75e28f62
E
3448 \
3449 setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \
59d15d23 3450 texture_mode, x4mode); \
75e28f62
E
3451} \
3452
3453#define setup_sprite_tiled_advance_column() \
3454 texture_offset_base += 0x100; \
3455 if((texture_offset_base & 0xF00) == 0) \
3456 texture_offset_base -= (0x100 + 0xF00) \
3457
59d15d23 3458#define FB_PTR_MULTIPLIER 1
3459#define FB_PTR_MULTIPLIER_4x 2
3460
75e28f62 3461#define setup_sprite_tile_column_width_multi(texture_mode, multi_height, \
59d15d23 3462 left_mode, right_mode, x4mode) \
75e28f62
E
3463{ \
3464 setup_sprite_column_data_##multi_height(); \
59d15d23 3465 s32 fb_ptr_advance_column = (16 - (1024 * height)) \
3466 * FB_PTR_MULTIPLIER##x4mode; \
75e28f62
E
3467 \
3468 tile_width -= 2; \
3469 left_mask_bits = left_block_mask; \
59d15d23 3470 right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \
75e28f62
E
3471 \
3472 setup_sprite_tile_column_height_##multi_height(left_mode, right, \
59d15d23 3473 texture_mode, x4mode); \
75e28f62
E
3474 fb_ptr += fb_ptr_advance_column; \
3475 \
3476 left_mask_bits = 0x00; \
3477 right_mask_bits = 0x00; \
3478 \
3479 while(tile_width) \
3480 { \
3481 setup_sprite_tiled_advance_column(); \
59d15d23 3482 setup_sprite_tile_column_height_##multi_height(full, none, \
3483 texture_mode, x4mode); \
75e28f62
E
3484 fb_ptr += fb_ptr_advance_column; \
3485 tile_width--; \
3486 } \
3487 \
3488 left_mask_bits = right_block_mask; \
59d15d23 3489 right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \
75e28f62
E
3490 \
3491 setup_sprite_tiled_advance_column(); \
3492 setup_sprite_tile_column_height_##multi_height(right_mode, left, \
59d15d23 3493 texture_mode, x4mode); \
3494} \
3495
3496
3497/* 4x stuff */
3498#define setup_sprite_tiled_initialize_4bpp_4x() \
3499 setup_sprite_tiled_initialize_4bpp_clut() \
3500
3501#define setup_sprite_tiled_initialize_8bpp_4x() \
3502
3503
3504#define setup_sprite_tile_full_4bpp_4x(edge) \
3505{ \
3506 vec_8x8u texels_low, texels_high; \
3507 vec_8x16u pixels, pixels_wide; \
3508 setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \
3509 u32 left_mask_bits_a = left_mask_bits & 0xFF; \
3510 u32 left_mask_bits_b = left_mask_bits >> 8; \
3511 u32 right_mask_bits_a = right_mask_bits & 0xFF; \
3512 u32 right_mask_bits_b = right_mask_bits >> 8; \
3513 \
3514 while(sub_tile_height) \
3515 { \
3516 setup_sprite_tile_fetch_texel_block_8bpp(0); \
3517 tbl_16(texels_low, texels, clut_low); \
3518 tbl_16(texels_high, texels, clut_high); \
3519 zip_8x16b(pixels, texels_low, texels_high); \
3520 \
3521 zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
3522 block->texels = pixels_wide; \
3523 block->draw_mask_bits = left_mask_bits_a; \
3524 block->fb_ptr = fb_ptr; \
3525 block++; \
3526 \
3527 block->texels = pixels_wide; \
3528 block->draw_mask_bits = left_mask_bits_a; \
3529 block->fb_ptr = fb_ptr + 1024; \
3530 block++; \
3531 \
3532 zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
3533 block->texels = pixels_wide; \
3534 block->draw_mask_bits = left_mask_bits_b; \
3535 block->fb_ptr = fb_ptr + 8; \
3536 block++; \
3537 \
3538 block->texels = pixels_wide; \
3539 block->draw_mask_bits = left_mask_bits_b; \
3540 block->fb_ptr = fb_ptr + 1024 + 8; \
3541 block++; \
3542 \
3543 setup_sprite_tile_fetch_texel_block_8bpp(8); \
3544 tbl_16(texels_low, texels, clut_low); \
3545 tbl_16(texels_high, texels, clut_high); \
3546 zip_8x16b(pixels, texels_low, texels_high); \
3547 \
3548 zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
3549 block->texels = pixels_wide; \
3550 block->draw_mask_bits = right_mask_bits_a; \
3551 block->fb_ptr = fb_ptr + 16; \
3552 block++; \
3553 \
3554 block->texels = pixels_wide; \
3555 block->draw_mask_bits = right_mask_bits_a; \
3556 block->fb_ptr = fb_ptr + 1024 + 16; \
3557 block++; \
3558 \
3559 zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
3560 block->texels = pixels_wide; \
3561 block->draw_mask_bits = right_mask_bits_b; \
3562 block->fb_ptr = fb_ptr + 24; \
3563 block++; \
3564 \
3565 block->texels = pixels_wide; \
3566 block->draw_mask_bits = right_mask_bits_b; \
3567 block->fb_ptr = fb_ptr + 1024 + 24; \
3568 block++; \
3569 \
3570 fb_ptr += 2048; \
3571 texture_offset += 0x10; \
3572 sub_tile_height--; \
3573 } \
3574 texture_offset += 0xF00; \
3575 psx_gpu->num_blocks = num_blocks; \
75e28f62
E
3576} \
3577
59d15d23 3578#define setup_sprite_tile_half_4bpp_4x(edge) \
3579{ \
3580 vec_8x8u texels_low, texels_high; \
3581 vec_8x16u pixels, pixels_wide; \
3582 setup_sprite_tile_add_blocks(sub_tile_height * 4); \
3583 u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \
3584 u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \
3585 \
3586 while(sub_tile_height) \
3587 { \
3588 setup_sprite_tile_fetch_texel_block_8bpp(0); \
3589 tbl_16(texels_low, texels, clut_low); \
3590 tbl_16(texels_high, texels, clut_high); \
3591 zip_8x16b(pixels, texels_low, texels_high); \
3592 \
3593 zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
3594 block->texels = pixels_wide; \
3595 block->draw_mask_bits = edge##_mask_bits_a; \
3596 block->fb_ptr = fb_ptr; \
3597 block++; \
3598 \
3599 block->texels = pixels_wide; \
3600 block->draw_mask_bits = edge##_mask_bits_a; \
3601 block->fb_ptr = fb_ptr + 1024; \
3602 block++; \
3603 \
3604 zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
3605 block->texels = pixels_wide; \
3606 block->draw_mask_bits = edge##_mask_bits_b; \
3607 block->fb_ptr = fb_ptr + 8; \
3608 block++; \
3609 \
3610 block->texels = pixels_wide; \
3611 block->draw_mask_bits = edge##_mask_bits_b; \
3612 block->fb_ptr = fb_ptr + 1024 + 8; \
3613 block++; \
3614 \
3615 fb_ptr += 2048; \
3616 texture_offset += 0x10; \
3617 sub_tile_height--; \
3618 } \
3619 texture_offset += 0xF00; \
3620 psx_gpu->num_blocks = num_blocks; \
3621} \
75e28f62 3622
59d15d23 3623
3624#define setup_sprite_tile_full_8bpp_4x(edge) \
3625{ \
3626 setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \
3627 vec_16x8u texels_wide; \
3628 u32 left_mask_bits_a = left_mask_bits & 0xFF; \
3629 u32 left_mask_bits_b = left_mask_bits >> 8; \
3630 u32 right_mask_bits_a = right_mask_bits & 0xFF; \
3631 u32 right_mask_bits_b = right_mask_bits >> 8; \
3632 \
3633 while(sub_tile_height) \
3634 { \
3635 setup_sprite_tile_fetch_texel_block_8bpp(0); \
3636 zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
3637 block->r = texels_wide.low; \
3638 block->draw_mask_bits = left_mask_bits_a; \
3639 block->fb_ptr = fb_ptr; \
3640 block++; \
3641 \
3642 block->r = texels_wide.low; \
3643 block->draw_mask_bits = left_mask_bits_a; \
3644 block->fb_ptr = fb_ptr + 1024; \
3645 block++; \
3646 \
3647 block->r = texels_wide.high; \
3648 block->draw_mask_bits = left_mask_bits_b; \
3649 block->fb_ptr = fb_ptr + 8; \
3650 block++; \
3651 \
3652 block->r = texels_wide.high; \
3653 block->draw_mask_bits = left_mask_bits_b; \
3654 block->fb_ptr = fb_ptr + 1024 + 8; \
3655 block++; \
3656 \
3657 setup_sprite_tile_fetch_texel_block_8bpp(8); \
3658 zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
3659 block->r = texels_wide.low; \
3660 block->draw_mask_bits = right_mask_bits_a; \
3661 block->fb_ptr = fb_ptr + 16; \
3662 block++; \
3663 \
3664 block->r = texels_wide.low; \
3665 block->draw_mask_bits = right_mask_bits_a; \
3666 block->fb_ptr = fb_ptr + 1024 + 16; \
3667 block++; \
3668 \
3669 block->r = texels_wide.high; \
3670 block->draw_mask_bits = right_mask_bits_b; \
3671 block->fb_ptr = fb_ptr + 24; \
3672 block++; \
3673 \
3674 block->r = texels_wide.high; \
3675 block->draw_mask_bits = right_mask_bits_b; \
3676 block->fb_ptr = fb_ptr + 24 + 1024; \
3677 block++; \
3678 \
3679 fb_ptr += 2048; \
3680 texture_offset += 0x10; \
3681 sub_tile_height--; \
3682 } \
3683 texture_offset += 0xF00; \
3684 psx_gpu->num_blocks = num_blocks; \
3685} \
3686
3687#define setup_sprite_tile_half_8bpp_4x(edge) \
3688{ \
3689 setup_sprite_tile_add_blocks(sub_tile_height * 4); \
3690 vec_16x8u texels_wide; \
3691 u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \
3692 u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \
3693 \
3694 while(sub_tile_height) \
3695 { \
3696 setup_sprite_tile_fetch_texel_block_8bpp(0); \
3697 zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
3698 block->r = texels_wide.low; \
3699 block->draw_mask_bits = edge##_mask_bits_a; \
3700 block->fb_ptr = fb_ptr; \
3701 block++; \
3702 \
3703 block->r = texels_wide.low; \
3704 block->draw_mask_bits = edge##_mask_bits_a; \
3705 block->fb_ptr = fb_ptr + 1024; \
3706 block++; \
3707 \
3708 block->r = texels_wide.high; \
3709 block->draw_mask_bits = edge##_mask_bits_b; \
3710 block->fb_ptr = fb_ptr + 8; \
3711 block++; \
3712 \
3713 block->r = texels_wide.high; \
3714 block->draw_mask_bits = edge##_mask_bits_b; \
3715 block->fb_ptr = fb_ptr + 8 + 1024; \
3716 block++; \
3717 \
3718 fb_ptr += 2048; \
3719 texture_offset += 0x10; \
3720 sub_tile_height--; \
3721 } \
3722 texture_offset += 0xF00; \
3723 psx_gpu->num_blocks = num_blocks; \
3724} \
3725
3726
3727#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \
3728 texture_offset = texture_offset_base + 8; \
3729 fb_ptr += 16 \
3730
3731#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \
3732 texture_offset = texture_offset_base \
3733
3734#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \
3735 setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \
3736
3737#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \
3738 texture_offset = texture_offset_base \
3739
3740#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \
3741 fb_ptr -= 16 \
3742
3743#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \
3744
3745#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \
3746 setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \
3747
3748#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \
3749
3750
3751#define setup_sprite_offset_u_adjust() \
3752
3753#define setup_sprite_comapre_left_block_mask() \
3754 ((left_block_mask & 0xFF) == 0xFF) \
3755
3756#define setup_sprite_comapre_right_block_mask() \
3757 (((right_block_mask >> 8) & 0xFF) == 0xFF) \
3758
3759
3760#define setup_sprite_offset_u_adjust_4x() \
3761 offset_u *= 2; \
3762 offset_u_right = offset_u_right * 2 + 1 \
3763
3764#define setup_sprite_comapre_left_block_mask_4x() \
3765 ((left_block_mask & 0xFFFF) == 0xFFFF) \
3766
3767#define setup_sprite_comapre_right_block_mask_4x() \
3768 (((right_block_mask >> 16) & 0xFFFF) == 0xFFFF) \
3769
3770
3771#define setup_sprite_tiled_builder(texture_mode, x4mode) \
3772void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\
75e28f62
E
3773 s32 u, s32 v, s32 width, s32 height, u32 color) \
3774{ \
3775 s32 offset_u = u & 0xF; \
3776 s32 offset_v = v & 0xF; \
3777 \
3778 s32 width_rounded = offset_u + width + 15; \
3779 s32 height_rounded = offset_v + height + 15; \
3780 s32 tile_height = height_rounded / 16; \
3781 s32 tile_width = width_rounded / 16; \
3782 u32 offset_u_right = width_rounded & 0xF; \
3783 \
59d15d23 3784 setup_sprite_offset_u_adjust##x4mode(); \
3785 \
3786 u32 left_block_mask = ~(0xFFFFFFFF << offset_u); \
3787 u32 right_block_mask = 0xFFFFFFFE << offset_u_right; \
75e28f62
E
3788 \
3789 u32 left_mask_bits; \
3790 u32 right_mask_bits; \
3791 \
3792 u32 sub_tile_height; \
3793 u32 column_data; \
3794 \
3795 u32 texture_mask = (psx_gpu->texture_mask_width & 0xF) | \
3796 ((psx_gpu->texture_mask_height & 0xF) << 4) | \
3797 ((psx_gpu->texture_mask_width >> 4) << 8) | \
3798 ((psx_gpu->texture_mask_height >> 4) << 12); \
3799 u32 texture_offset = ((v & 0xF) << 4) | ((u & 0xF0) << 4) | \
3800 ((v & 0xF0) << 8); \
3801 u32 texture_offset_base = texture_offset; \
3802 u32 control_mask; \
3803 \
59d15d23 3804 u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (x - offset_u); \
75e28f62
E
3805 u32 num_blocks = psx_gpu->num_blocks; \
3806 block_struct *block = psx_gpu->blocks + num_blocks; \
3807 \
df740cdc 3808 u8 *texture_block_ptr; \
75e28f62
E
3809 vec_8x8u texels; \
3810 \
59d15d23 3811 setup_sprite_tiled_initialize_##texture_mode##x4mode(); \
75e28f62
E
3812 \
3813 control_mask = tile_width == 1; \
3814 control_mask |= (tile_height == 1) << 1; \
59d15d23 3815 control_mask |= setup_sprite_comapre_left_block_mask##x4mode() << 2; \
3816 control_mask |= setup_sprite_comapre_right_block_mask##x4mode() << 3; \
75e28f62
E
3817 \
3818 sprites_##texture_mode++; \
3819 \
3820 switch(control_mask) \
3821 { \
3822 default: \
3823 case 0x0: \
59d15d23 3824 setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \
3825 x4mode); \
75e28f62
E
3826 break; \
3827 \
3828 case 0x1: \
59d15d23 3829 setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \
3830 x4mode); \
75e28f62
E
3831 break; \
3832 \
3833 case 0x2: \
59d15d23 3834 setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \
3835 x4mode); \
75e28f62
E
3836 break; \
3837 \
3838 case 0x3: \
59d15d23 3839 setup_sprite_tile_column_width_single(texture_mode, single, full, none, \
3840 x4mode); \
75e28f62
E
3841 break; \
3842 \
3843 case 0x4: \
59d15d23 3844 setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \
3845 x4mode); \
75e28f62
E
3846 break; \
3847 \
3848 case 0x5: \
59d15d23 3849 setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \
3850 x4mode); \
75e28f62
E
3851 break; \
3852 \
3853 case 0x6: \
59d15d23 3854 setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \
3855 x4mode); \
75e28f62
E
3856 break; \
3857 \
3858 case 0x7: \
59d15d23 3859 setup_sprite_tile_column_width_single(texture_mode, single, half, right, \
3860 x4mode); \
75e28f62
E
3861 break; \
3862 \
3863 case 0x8: \
59d15d23 3864 setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \
3865 x4mode); \
75e28f62
E
3866 break; \
3867 \
3868 case 0x9: \
59d15d23 3869 setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \
3870 x4mode); \
75e28f62
E
3871 break; \
3872 \
3873 case 0xA: \
59d15d23 3874 setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \
3875 x4mode); \
75e28f62
E
3876 break; \
3877 \
3878 case 0xB: \
59d15d23 3879 setup_sprite_tile_column_width_single(texture_mode, single, half, left, \
3880 x4mode); \
75e28f62
E
3881 break; \
3882 \
3883 case 0xC: \
59d15d23 3884 setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \
3885 x4mode); \
75e28f62
E
3886 break; \
3887 \
3888 case 0xE: \
59d15d23 3889 setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \
3890 x4mode); \
75e28f62
E
3891 break; \
3892 } \
3893} \
3894
2bbbb7af 3895#ifndef NEON_BUILD
59d15d23 3896setup_sprite_tiled_builder(4bpp,);
3897setup_sprite_tiled_builder(8bpp,);
3898
3899setup_sprite_tiled_builder(4bpp,_4x);
3900setup_sprite_tiled_builder(8bpp,_4x);
a2cb152a 3901#endif
3902
3903#if !defined(NEON_BUILD) || defined(SIMD_BUILD)
75e28f62
E
3904
3905void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
3906 s32 v, s32 width, s32 height, u32 color)
3907{
3908 u32 left_offset = u & 0x7;
3909 u32 width_rounded = width + left_offset + 7;
3910
fc6cef7d 3911 u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (s32)(x - left_offset);
75e28f62
E
3912 u32 right_width = width_rounded & 0x7;
3913 u32 block_width = width_rounded / 8;
3914 u32 fb_ptr_pitch = (1024 + 8) - (block_width * 8);
3915
3916 u32 left_mask_bits = ~(0xFF << left_offset);
3917 u32 right_mask_bits = 0xFE << right_width;
3918
3919 u32 texture_offset_base = u + (v * 1024);
3920 u32 texture_mask =
3921 psx_gpu->texture_mask_width | (psx_gpu->texture_mask_height * 1024);
3922
3923 u32 blocks_remaining;
3924 u32 num_blocks = psx_gpu->num_blocks;
3925 block_struct *block = psx_gpu->blocks + num_blocks;
3926
3927 u16 *texture_page_ptr = psx_gpu->texture_page_ptr;
3928 u16 *texture_block_ptr;
3929
3930 texture_offset_base &= ~0x7;
3931
a2cb152a 3932 stats_add(sprites_16bpp, 1);
75e28f62
E
3933
3934 if(block_width == 1)
3935 {
3936 u32 mask_bits = left_mask_bits | right_mask_bits;
3937
3938 while(height)
3939 {
3940 num_blocks++;
3941 sprite_blocks++;
3942
3943 if(num_blocks > MAX_BLOCKS)
3944 {
3945 flush_render_block_buffer(psx_gpu);
3946 num_blocks = 1;
3947 block = psx_gpu->blocks;
3948 }
3949
3950 texture_block_ptr =
3951 texture_page_ptr + (texture_offset_base & texture_mask);
3952
a2cb152a 3953 block->texels = *(vec_8x16u *)texture_block_ptr;
75e28f62
E
3954 block->draw_mask_bits = mask_bits;
3955 block->fb_ptr = fb_ptr;
3956
3957 block++;
3958
3959 texture_offset_base += 1024;
3960 fb_ptr += 1024;
3961
3962 height--;
3963 psx_gpu->num_blocks = num_blocks;
3964 }
3965 }
3966 else
3967 {
3968 u32 texture_offset;
3969
3970 while(height)
3971 {
3972 blocks_remaining = block_width - 2;
3973 num_blocks += block_width;
3974 sprite_blocks += block_width;
3975
3976 if(num_blocks > MAX_BLOCKS)
3977 {
3978 flush_render_block_buffer(psx_gpu);
3979 num_blocks = block_width;
3980 block = psx_gpu->blocks;
3981 }
3982
3983 texture_offset = texture_offset_base;
3984 texture_offset_base += 1024;
3985
3986 texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
a2cb152a 3987 block->texels = *(vec_8x16u *)texture_block_ptr;
75e28f62
E
3988
3989 block->draw_mask_bits = left_mask_bits;
3990 block->fb_ptr = fb_ptr;
3991
3992 texture_offset += 8;
3993 fb_ptr += 8;
3994 block++;
3995
3996 while(blocks_remaining)
3997 {
3998 texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
a2cb152a 3999 block->texels = *(vec_8x16u *)texture_block_ptr;
75e28f62
E
4000
4001 block->draw_mask_bits = 0;
4002 block->fb_ptr = fb_ptr;
4003
4004 texture_offset += 8;
4005 fb_ptr += 8;
4006 block++;
4007
4008 blocks_remaining--;
4009 }
4010
4011 texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
a2cb152a 4012 block->texels = *(vec_8x16u *)texture_block_ptr;
75e28f62
E
4013
4014 block->draw_mask_bits = right_mask_bits;
4015 block->fb_ptr = fb_ptr;
4016
4017 fb_ptr += fb_ptr_pitch;
4018 block++;
4019
4020 height--;
4021 psx_gpu->num_blocks = num_blocks;
4022 }
4023 }
4024}
4025
a2cb152a 4026#endif
4027
4028#ifndef NEON_BUILD
4029
2d658c89 4030void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
75e28f62
E
4031 s32 v, s32 width, s32 height, u32 color)
4032{
4033 u32 right_width = ((width - 1) & 0x7) + 1;
4034 u32 right_mask_bits = (0xFF << right_width);
fc6cef7d 4035 u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x;
75e28f62
E
4036 u32 block_width = (width + 7) / 8;
4037 u32 fb_ptr_pitch = 1024 - ((block_width - 1) * 8);
4038 u32 blocks_remaining;
4039 u32 num_blocks = psx_gpu->num_blocks;
4040 block_struct *block = psx_gpu->blocks + num_blocks;
4041
4042 u32 color_r = color & 0xFF;
4043 u32 color_g = (color >> 8) & 0xFF;
4044 u32 color_b = (color >> 16) & 0xFF;
4045 vec_8x16u colors;
4046 vec_8x16u right_mask;
4047 vec_8x16u test_mask = psx_gpu->test_mask;
4048 vec_8x16u zero_mask;
4049
4050 sprites_untextured++;
4051
4052 color = (color_r >> 3) | ((color_g >> 3) << 5) | ((color_b >> 3) << 10);
4053
4054 dup_8x16b(colors, color);
4055 dup_8x16b(zero_mask, 0x00);
4056 dup_8x16b(right_mask, right_mask_bits);
4057 tst_8x16b(right_mask, right_mask, test_mask);
4058
4059 while(height)
4060 {
4061 blocks_remaining = block_width - 1;
4062 num_blocks += block_width;
3867c6ef
E
4063
4064#ifdef PROFILE
75e28f62 4065 sprite_blocks += block_width;
3867c6ef 4066#endif
75e28f62
E
4067
4068 if(num_blocks > MAX_BLOCKS)
4069 {
4070 flush_render_block_buffer(psx_gpu);
4071 num_blocks = block_width;
4072 block = psx_gpu->blocks;
4073 }
4074
4075 while(blocks_remaining)
4076 {
4077 block->pixels = colors;
4078 block->draw_mask = zero_mask;
4079 block->fb_ptr = fb_ptr;
4080
4081 fb_ptr += 8;
4082 block++;
4083 blocks_remaining--;
4084 }
4085
4086 block->pixels = colors;
4087 block->draw_mask = right_mask;
4088 block->fb_ptr = fb_ptr;
4089
4090 block++;
4091 fb_ptr += fb_ptr_pitch;
4092
4093 height--;
4094 psx_gpu->num_blocks = num_blocks;
4095 }
4096}
4097
f0931e56 4098#endif
4099
2d658c89 4100static void __attribute__((noinline))
4101setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
4102 s32 v, s32 width, s32 height, u32 color)
f0931e56 4103{
4104 u32 r = color & 0xFF;
4105 u32 g = (color >> 8) & 0xFF;
4106 u32 b = (color >> 16) & 0xFF;
4107 u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
4108 psx_gpu->mask_msb;
4109 u32 color_32bpp = color_16bpp | (color_16bpp << 16);
4110
4111 u16 *vram_ptr16 = psx_gpu->vram_out_ptr + x + (y * 1024);
4112 u32 *vram_ptr;
4113
4114 u32 num_width;
4115
2d658c89 4116 if(psx_gpu->num_blocks)
f0931e56 4117 {
4118 flush_render_block_buffer(psx_gpu);
4119 }
4120
4121 while(height)
4122 {
4123 num_width = width;
4124
4125 vram_ptr = (void *)vram_ptr16;
77e1e479 4126 if((uintptr_t)vram_ptr16 & 2)
f0931e56 4127 {
4128 *vram_ptr16 = color_32bpp;
4129 vram_ptr = (void *)(vram_ptr16 + 1);
4130 num_width--;
4131 }
4132
4133 while(num_width >= 4 * 2)
4134 {
4135 vram_ptr[0] = color_32bpp;
4136 vram_ptr[1] = color_32bpp;
4137 vram_ptr[2] = color_32bpp;
4138 vram_ptr[3] = color_32bpp;
4139
4140 vram_ptr += 4;
4141 num_width -= 4 * 2;
4142 }
4143
4144 while(num_width >= 2)
4145 {
4146 *vram_ptr++ = color_32bpp;
4147 num_width -= 2;
4148 }
4149
4150 if(num_width > 0)
4151 {
4152 *(u16 *)vram_ptr = color_32bpp;
4153 }
4154
4155 vram_ptr16 += 1024;
4156 height--;
4157 }
2d658c89 4158}
4159
4160void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
4161 s32 v, s32 width, s32 height, u32 color);
4162
4163void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
4164 s32 v, s32 width, s32 height, u32 color)
4165{
4166 if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE |
4167 RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 &&
4168 (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0)
4169 {
4170 setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color);
4171 return;
4172 }
4173
4174 while (width > 0)
4175 {
4176 s32 w1 = width > 512 ? 512 : width;
4177 setup_sprite_untextured_512(psx_gpu, x, y, 0, 0, w1, height, color);
4178 x += 512;
4179 width -= 512;
4180 }
f0931e56 4181}
75e28f62
E
4182
4183
4184#define setup_sprite_blocks_switch_textured(texture_mode) \
4185 setup_sprite_##texture_mode \
4186
4187#define setup_sprite_blocks_switch_untextured(texture_mode) \
4188 setup_sprite_untextured \
4189
4190#define setup_sprite_blocks_switch(texturing, texture_mode) \
4191 setup_sprite_blocks_switch_##texturing(texture_mode) \
4192
4193
4194#define texture_sprite_blocks_switch_4bpp() \
4195 texture_blocks_untextured \
4196
4197#define texture_sprite_blocks_switch_8bpp() \
4198 texture_sprite_blocks_8bpp \
4199
4200#define texture_sprite_blocks_switch_16bpp() \
4201 texture_blocks_untextured \
4202
4203#define texture_sprite_blocks_switch_untextured(texture_mode) \
4204 texture_blocks_untextured \
4205
4206#define texture_sprite_blocks_switch_textured(texture_mode) \
4207 texture_sprite_blocks_switch_##texture_mode() \
4208
4209#define render_sprite_blocks_switch_block_modulation(texture_mode, blend_mode, \
4210 mask_evaluate, shading, dithering, texturing, blending, modulation) \
4211{ \
4212 setup_sprite_blocks_switch(texturing, texture_mode), \
4213 texture_sprite_blocks_switch_##texturing(texture_mode), \
4214 shade_blocks_switch(unshaded, texturing, modulation, undithered, blending, \
4215 mask_evaluate), \
4216 blend_blocks_switch(texturing, blending, blend_mode, mask_evaluate) \
4217} \
4218
4219#define render_sprite_blocks_switch_block_blending(texture_mode, blend_mode, \
4220 mask_evaluate, shading, dithering, texturing, blending) \
4221 render_sprite_blocks_switch_block_modulation(texture_mode, blend_mode, \
4222 mask_evaluate, shading, dithering, texturing, blending, modulated), \
4223 render_sprite_blocks_switch_block_modulation(texture_mode, blend_mode, \
4224 mask_evaluate, shading, dithering, texturing, blending, unmodulated) \
4225
4226#define render_sprite_blocks_switch_block_texturing(texture_mode, blend_mode, \
4227 mask_evaluate, shading, dithering, texturing) \
4228 render_sprite_blocks_switch_block_blending(texture_mode, blend_mode, \
4229 mask_evaluate, shading, dithering, texturing, unblended), \
4230 render_sprite_blocks_switch_block_blending(texture_mode, blend_mode, \
4231 mask_evaluate, shading, dithering, texturing, blended) \
4232
4233#define render_sprite_blocks_switch_block_dithering(texture_mode, blend_mode, \
4234 mask_evaluate, shading, dithering) \
4235 render_sprite_blocks_switch_block_texturing(texture_mode, blend_mode, \
4236 mask_evaluate, shading, dithering, untextured), \
4237 render_sprite_blocks_switch_block_texturing(texture_mode, blend_mode, \
4238 mask_evaluate, shading, dithering, textured) \
4239
4240#define render_sprite_blocks_switch_block_shading(texture_mode, blend_mode, \
4241 mask_evaluate, shading) \
4242 render_sprite_blocks_switch_block_dithering(texture_mode, blend_mode, \
4243 mask_evaluate, shading, undithered), \
4244 render_sprite_blocks_switch_block_dithering(texture_mode, blend_mode, \
4245 mask_evaluate, shading, dithered) \
4246
4247#define render_sprite_blocks_switch_block_mask_evaluate(texture_mode, \
4248 blend_mode, mask_evaluate) \
4249 render_sprite_blocks_switch_block_shading(texture_mode, blend_mode, \
4250 mask_evaluate, unshaded), \
4251 render_sprite_blocks_switch_block_shading(texture_mode, blend_mode, \
4252 mask_evaluate, shaded) \
4253
4254#define render_sprite_blocks_switch_block_blend_mode(texture_mode, blend_mode) \
4255 render_sprite_blocks_switch_block_mask_evaluate(texture_mode, blend_mode, \
4256 off), \
4257 render_sprite_blocks_switch_block_mask_evaluate(texture_mode, blend_mode, \
4258 on) \
4259
4260#define render_sprite_blocks_switch_block_texture_mode(texture_mode) \
4261 render_sprite_blocks_switch_block_blend_mode(texture_mode, average), \
4262 render_sprite_blocks_switch_block_blend_mode(texture_mode, add), \
4263 render_sprite_blocks_switch_block_blend_mode(texture_mode, subtract), \
4264 render_sprite_blocks_switch_block_blend_mode(texture_mode, add_fourth) \
4265
4266#define render_sprite_blocks_switch_block() \
4267 render_sprite_blocks_switch_block_texture_mode(4bpp), \
4268 render_sprite_blocks_switch_block_texture_mode(8bpp), \
4269 render_sprite_blocks_switch_block_texture_mode(16bpp), \
b7f5c059 4270 render_sprite_blocks_switch_block_texture_mode(16bpp) \
75e28f62
E
4271
4272
4273render_block_handler_struct render_sprite_block_handlers[] =
4274{
4275 render_sprite_blocks_switch_block()
4276};
4277
4278
4279void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
1cec4719 4280 s32 *width, s32 *height, u32 flags, u32 color)
75e28f62 4281{
1cec4719 4282 s32 x_right = x + *width - 1;
4283 s32 y_bottom = y + *height - 1;
75e28f62 4284
3867c6ef
E
4285#ifdef PROFILE
4286 sprites++;
4287#endif
4288
75e28f62
E
4289 if(invalidate_texture_cache_region_viewport(psx_gpu, x, y, x_right,
4290 y_bottom) == 0)
4291 {
1cec4719 4292 *width = *height = 0;
75e28f62
E
4293 return;
4294 }
4295
4296 if(x < psx_gpu->viewport_start_x)
4297 {
4298 u32 clip = psx_gpu->viewport_start_x - x;
4299 x += clip;
4300 u += clip;
1cec4719 4301 *width -= clip;
75e28f62
E
4302 }
4303
4304 if(y < psx_gpu->viewport_start_y)
4305 {
4306 s32 clip = psx_gpu->viewport_start_y - y;
4307 y += clip;
4308 v += clip;
1cec4719 4309 *height -= clip;
75e28f62
E
4310 }
4311
4312 if(x_right > psx_gpu->viewport_end_x)
1cec4719 4313 *width -= x_right - psx_gpu->viewport_end_x;
75e28f62
E
4314
4315 if(y_bottom > psx_gpu->viewport_end_y)
1cec4719 4316 *height -= y_bottom - psx_gpu->viewport_end_y;
75e28f62 4317
1cec4719 4318 if((*width <= 0) || (*height <= 0))
4319 {
4320 *width = *height = 0;
75e28f62 4321 return;
1cec4719 4322 }
75e28f62 4323
3867c6ef 4324#ifdef PROFILE
1cec4719 4325 span_pixels += *width * *height;
4326 spans += *height;
3867c6ef 4327#endif
75e28f62
E
4328
4329 u32 render_state = flags &
4330 (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND |
4331 RENDER_FLAGS_TEXTURE_MAP);
4332 render_state |=
4333 (psx_gpu->render_state_base & ~RENDER_STATE_DITHER);
1f88961f 4334
75e28f62
E
4335 if((psx_gpu->render_state != render_state) ||
4336 (psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE))
4337 {
4338 psx_gpu->render_state = render_state;
4339 flush_render_block_buffer(psx_gpu);
3867c6ef 4340#ifdef PROFILE
75e28f62 4341 state_changes++;
3867c6ef 4342#endif
75e28f62
E
4343 }
4344
4345 psx_gpu->primitive_type = PRIMITIVE_TYPE_SPRITE;
4346
4347 color &= 0xFFFFFF;
4348
4349 if(psx_gpu->triangle_color != color)
4350 {
4351 flush_render_block_buffer(psx_gpu);
4352 psx_gpu->triangle_color = color;
4353 }
4354
4355 if(color == 0x808080)
4356 render_state |= RENDER_FLAGS_MODULATE_TEXELS;
4357
4358 render_block_handler_struct *render_block_handler =
4359 &(render_sprite_block_handlers[render_state]);
4360 psx_gpu->render_block_handler = render_block_handler;
4361
4362 ((setup_sprite_function_type *)render_block_handler->setup_blocks)
1cec4719 4363 (psx_gpu, x, y, u, v, *width, *height, color);
75e28f62
E
4364}
4365
4366#define draw_pixel_line_mask_evaluate_yes() \
2177c1ea 4367 if((*vram_ptr & 0x8000) == 0) \
75e28f62
E
4368
4369#define draw_pixel_line_mask_evaluate_no() \
4370
4371
4372#define draw_pixel_line_shaded() \
4373{ \
4374 color_r = fixed_to_int(current_r); \
4375 color_g = fixed_to_int(current_g); \
4376 color_b = fixed_to_int(current_b); \
4377 \
4378 current_r += gradient_r; \
4379 current_g += gradient_g; \
4380 current_b += gradient_b; \
4381} \
4382
4383#define draw_pixel_line_unshaded() \
4384{ \
4385 color_r = color & 0xFF; \
4386 color_g = (color >> 8) & 0xFF; \
4387 color_b = (color >> 16) & 0xFF; \
4388} \
4389
4390
4391#define draw_pixel_line_dithered(_x, _y) \
4392{ \
4393 u32 dither_xor = _x ^ _y; \
4394 s32 dither_offset = (dither_xor >> 1) & 0x1; \
4395 dither_offset |= (_y & 0x1) << 1; \
4396 dither_offset |= (dither_xor & 0x1) << 2; \
4397 dither_offset -= 4; \
4398 \
4399 color_r += dither_offset; \
4400 color_g += dither_offset; \
4401 color_b += dither_offset; \
4402 \
4403 if(color_r < 0) \
4404 color_r = 0; \
4405 \
4406 if(color_g < 0) \
4407 color_g = 0; \
4408 \
4409 if(color_b < 0) \
4410 color_b = 0; \
4411 \
4412 if(color_r > 255) \
4413 color_r = 255; \
4414 \
4415 if(color_g > 255) \
4416 color_g = 255; \
4417 \
4418 if(color_b > 255) \
4419 color_b = 255; \
4420} \
4421
4422#define draw_pixel_line_undithered(_x, _y) \
4423
4424
4425#define draw_pixel_line_average() \
4426 color_r = (color_r + fb_r) / 2; \
4427 color_g = (color_g + fb_g) / 2; \
4428 color_b = (color_b + fb_b) / 2 \
4429
4430#define draw_pixel_line_add() \
4431 color_r += fb_r; \
4432 color_g += fb_g; \
4433 color_b += fb_b; \
4434 \
4435 if(color_r > 31) \
4436 color_r = 31; \
4437 \
4438 if(color_g > 31) \
4439 color_g = 31; \
4440 \
4441 if(color_b > 31) \
4442 color_b = 31 \
4443 \
4444
4445#define draw_pixel_line_subtract() \
4446 color_r = fb_r - color_r; \
4447 color_g = fb_g - color_g; \
4448 color_b = fb_b - color_b; \
4449 \
4450 if(color_r < 0) \
4451 color_r = 0; \
4452 \
4453 if(color_g < 0) \
4454 color_g = 0; \
4455 \
4456 if(color_b < 0) \
4457 color_b = 0 \
4458
4459#define draw_pixel_line_add_fourth() \
4460 color_r = fb_r + (color_r / 4); \
4461 color_g = fb_g + (color_g / 4); \
4462 color_b = fb_b + (color_b / 4); \
4463 \
4464 if(color_r > 31) \
4465 color_r = 31; \
4466 \
4467 if(color_g > 31) \
4468 color_g = 31; \
4469 \
4470 if(color_b > 31) \
4471 color_b = 31 \
4472
4473
4474#define draw_pixel_line_blended(blend_mode) \
4475 s32 fb_pixel = *vram_ptr; \
4476 s32 fb_r = fb_pixel & 0x1F; \
4477 s32 fb_g = (fb_pixel >> 5) & 0x1F; \
4478 s32 fb_b = (fb_pixel >> 10) & 0x1F; \
4479 \
4480 draw_pixel_line_##blend_mode() \
4481
4482#define draw_pixel_line_unblended(blend_mode) \
4483
4484
4485#define draw_pixel_line(_x, _y, shading, blending, dithering, mask_evaluate, \
4486 blend_mode) \
4487 if((_x >= psx_gpu->viewport_start_x) && (_y >= psx_gpu->viewport_start_y) && \
4488 (_x <= psx_gpu->viewport_end_x) && (_y <= psx_gpu->viewport_end_y)) \
4489 { \
4490 draw_pixel_line_mask_evaluate_##mask_evaluate() \
4491 { \
4492 draw_pixel_line_##shading(); \
4493 draw_pixel_line_##dithering(_x, _y); \
4494 \
4495 color_r >>= 3; \
4496 color_g >>= 3; \
4497 color_b >>= 3; \
4498 \
4499 draw_pixel_line_##blending(blend_mode); \
4500 \
4501 *vram_ptr = color_r | (color_g << 5) | (color_b << 10) | \
4502 psx_gpu->mask_msb; \
4503 } \
4504 } \
4505
4506#define update_increment(value) \
4507 value++ \
4508
4509#define update_decrement(value) \
4510 value-- \
4511
4512#define update_vram_row_increment(value) \
4513 vram_ptr += 1024 \
4514
4515#define update_vram_row_decrement(value) \
4516 vram_ptr -= 1024 \
4517
4518#define compare_increment(a, b) \
4519 (a <= b) \
4520
4521#define compare_decrement(a, b) \
4522 (a >= b) \
4523
4524#define set_line_gradients(minor) \
4525{ \
4526 s32 gradient_divisor = delta_##minor; \
e86b6fec
E
4527 if(gradient_divisor != 0) \
4528 { \
4529 gradient_r = int_to_fixed(vertex_b->r - vertex_a->r) / gradient_divisor; \
4530 gradient_g = int_to_fixed(vertex_b->g - vertex_a->g) / gradient_divisor; \
4531 gradient_b = int_to_fixed(vertex_b->b - vertex_a->b) / gradient_divisor; \
4532 } \
4533 else \
4534 { \
4535 gradient_r = 0; \
4536 gradient_g = 0; \
4537 gradient_b = 0; \
4538 } \
75e28f62
E
4539 current_r = fixed_center(vertex_a->r); \
4540 current_g = fixed_center(vertex_a->g); \
4541 current_b = fixed_center(vertex_a->b); \
4542}
4543
4544#define draw_line_span_horizontal(direction, shading, blending, dithering, \
4545 mask_evaluate, blend_mode) \
4546do \
4547{ \
4548 error_step = delta_y * 2; \
4549 error_wrap = delta_x * 2; \
4550 error = delta_x; \
4551 \
4552 current_y = y_a; \
4553 set_line_gradients(x); \
4554 \
4555 for(current_x = x_a; current_x <= x_b; current_x++) \
4556 { \
4557 draw_pixel_line(current_x, current_y, shading, blending, dithering, \
4558 mask_evaluate, blend_mode); \
4559 error += error_step; \
4560 vram_ptr++; \
4561 \
4562 if(error >= error_wrap) \
4563 { \
4564 update_##direction(current_y); \
4565 update_vram_row_##direction(); \
4566 error -= error_wrap; \
4567 } \
4568 } \
4569} while(0) \
4570
4571#define draw_line_span_vertical(direction, shading, blending, dithering, \
4572 mask_evaluate, blend_mode) \
4573do \
4574{ \
4575 error_step = delta_x * 2; \
4576 error_wrap = delta_y * 2; \
4577 error = delta_y; \
4578 \
4579 current_x = x_a; \
4580 set_line_gradients(y); \
4581 \
4582 for(current_y = y_a; compare_##direction(current_y, y_b); \
4583 update_##direction(current_y)) \
4584 { \
4585 draw_pixel_line(current_x, current_y, shading, blending, dithering, \
4586 mask_evaluate, blend_mode); \
4587 error += error_step; \
4588 update_vram_row_##direction(); \
4589 \
4590 if(error > error_wrap) \
4591 { \
4592 vram_ptr++; \
4593 current_x++; \
4594 error -= error_wrap; \
4595 } \
4596 } \
4597} while(0) \
4598
4599
4600#define render_line_body(shading, blending, dithering, mask_evaluate, \
4601 blend_mode) \
4602 if(delta_y < 0) \
4603 { \
4604 delta_y *= -1; \
4605 \
75e28f62
E
4606 if(delta_x > delta_y) \
4607 { \
4608 draw_line_span_horizontal(decrement, shading, blending, dithering, \
4609 mask_evaluate, blend_mode); \
4610 } \
4611 else \
4612 { \
4613 draw_line_span_vertical(decrement, shading, blending, dithering, \
4614 mask_evaluate, blend_mode); \
4615 } \
4616 } \
4617 else \
4618 { \
75e28f62
E
4619 if(delta_x > delta_y) \
4620 { \
4621 draw_line_span_horizontal(increment, shading, blending, dithering, \
4622 mask_evaluate, blend_mode); \
4623 } \
4624 else \
4625 { \
4626 draw_line_span_vertical(increment, shading, blending, dithering, \
4627 mask_evaluate, blend_mode); \
4628 } \
4629 } \
4630
4631
4632void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags,
3b3dee71 4633 u32 color, int double_resolution)
75e28f62
E
4634{
4635 s32 color_r, color_g, color_b;
4636 u32 triangle_winding = 0;
4637
4638 fixed_type gradient_r = 0;
4639 fixed_type gradient_g = 0;
4640 fixed_type gradient_b = 0;
4641 fixed_type current_r = 0;
4642 fixed_type current_g = 0;
4643 fixed_type current_b = 0;
4644
4645 s32 y_a, y_b;
4646 s32 x_a, x_b;
4647
4648 s32 delta_x, delta_y;
4649
4650 s32 current_x;
4651 s32 current_y;
4652
4653 u32 error_step;
4654 u32 error;
4655 u32 error_wrap;
4656
4657 u16 *vram_ptr;
4658
4659 flush_render_block_buffer(psx_gpu);
4660 psx_gpu->primitive_type = PRIMITIVE_TYPE_LINE;
4661
4662 vertex_struct *vertex_a = &(vertexes[0]);
4663 vertex_struct *vertex_b = &(vertexes[1]);
4664
4665 u32 control_mask;
4666
3867c6ef 4667#ifdef PROFILE
75e28f62 4668 lines++;
3867c6ef 4669#endif
75e28f62
E
4670
4671 if(vertex_a->x >= vertex_b->x)
4672 {
4673 vertex_swap(vertex_a, vertex_b);
c57af5e6 4674 (void)triangle_winding;
75e28f62
E
4675 }
4676
4677 x_a = vertex_a->x;
4678 x_b = vertex_b->x;
4679
4680 y_a = vertex_a->y;
4681 y_b = vertex_b->y;
4682
4683 delta_x = x_b - x_a;
4684 delta_y = y_b - y_a;
4685
3b3dee71 4686 if(delta_x >= 1024 || delta_y >= 512 || delta_y <= -512)
75e28f62
E
4687 return;
4688
3b3dee71 4689 if(double_resolution)
4690 {
4691 x_a *= 2;
4692 x_b *= 2;
4693 y_a *= 2;
4694 y_b *= 2;
4695 delta_x *= 2;
4696 delta_y *= 2;
4697 }
4698
75e28f62
E
4699 flags &= ~RENDER_FLAGS_TEXTURE_MAP;
4700
c1817bd9 4701 vram_ptr = psx_gpu->vram_out_ptr + (y_a * 1024) + x_a;
75e28f62
E
4702
4703 control_mask = 0x0;
4704
4705 if(flags & RENDER_FLAGS_SHADE)
4706 control_mask |= 0x1;
4707
4708 if(flags & RENDER_FLAGS_BLEND)
4709 {
4710 control_mask |= 0x2;
4711 control_mask |= ((psx_gpu->render_state_base >> 6) & 0x3) << 4;
4712 }
4713
4714 if(psx_gpu->render_state_base & RENDER_STATE_DITHER)
4715 control_mask |= 0x4;
4716
4717 if(psx_gpu->render_state_base & RENDER_STATE_MASK_EVALUATE)
4718 control_mask |= 0x8;
4719
4720 switch(control_mask)
4721 {
4722 case 0x0:
4723 render_line_body(unshaded, unblended, undithered, no, none);
4724 break;
4725
4726 case 0x1:
4727 render_line_body(shaded, unblended, undithered, no, none);
4728 break;
4729
4730 case 0x2:
4731 render_line_body(unshaded, blended, undithered, no, average);
4732 break;
4733
4734 case 0x3:
4735 render_line_body(shaded, blended, undithered, no, average);
4736 break;
4737
4738 case 0x4:
4739 render_line_body(unshaded, unblended, dithered, no, none);
4740 break;
4741
4742 case 0x5:
4743 render_line_body(shaded, unblended, dithered, no, none);
4744 break;
4745
4746 case 0x6:
4747 render_line_body(unshaded, blended, dithered, no, average);
4748 break;
4749
4750 case 0x7:
4751 render_line_body(shaded, blended, dithered, no, average);
4752 break;
4753
4754 case 0x8:
4755 render_line_body(unshaded, unblended, undithered, yes, none);
4756 break;
4757
4758 case 0x9:
4759 render_line_body(shaded, unblended, undithered, yes, none);
4760 break;
4761
4762 case 0xA:
4763 render_line_body(unshaded, blended, undithered, yes, average);
4764 break;
4765
4766 case 0xB:
4767 render_line_body(shaded, blended, undithered, yes, average);
4768 break;
4769
4770 case 0xC:
4771 render_line_body(unshaded, unblended, dithered, yes, none);
4772 break;
4773
4774 case 0xD:
4775 render_line_body(shaded, unblended, dithered, yes, none);
4776 break;
4777
4778 case 0xE:
4779 render_line_body(unshaded, blended, dithered, yes, average);
4780 break;
4781
4782 case 0xF:
4783 render_line_body(shaded, blended, dithered, yes, average);
4784 break;
4785
4786 case 0x12:
4787 render_line_body(unshaded, blended, undithered, no, add);
4788 break;
4789
4790 case 0x13:
4791 render_line_body(shaded, blended, undithered, no, add);
4792 break;
4793
4794 case 0x16:
4795 render_line_body(unshaded, blended, dithered, no, add);
4796 break;
4797
4798 case 0x17:
4799 render_line_body(shaded, blended, dithered, no, add);
4800 break;
4801
4802 case 0x1A:
4803 render_line_body(unshaded, blended, undithered, yes, add);
4804 break;
4805
4806 case 0x1B:
4807 render_line_body(shaded, blended, undithered, yes, add);
4808 break;
4809
4810 case 0x1E:
4811 render_line_body(unshaded, blended, dithered, yes, add);
4812 break;
4813
4814 case 0x1F:
4815 render_line_body(shaded, blended, dithered, yes, add);
4816 break;
4817
4818 case 0x22:
4819 render_line_body(unshaded, blended, undithered, no, subtract);
4820 break;
4821
4822 case 0x23:
4823 render_line_body(shaded, blended, undithered, no, subtract);
4824 break;
4825
4826 case 0x26:
4827 render_line_body(unshaded, blended, dithered, no, subtract);
4828 break;
4829
4830 case 0x27:
4831 render_line_body(shaded, blended, dithered, no, subtract);
4832 break;
4833
4834 case 0x2A:
4835 render_line_body(unshaded, blended, undithered, yes, subtract);
4836 break;
4837
4838 case 0x2B:
4839 render_line_body(shaded, blended, undithered, yes, subtract);
4840 break;
4841
4842 case 0x2E:
4843 render_line_body(unshaded, blended, dithered, yes, subtract);
4844 break;
4845
4846 case 0x2F:
4847 render_line_body(shaded, blended, dithered, yes, subtract);
4848 break;
4849
4850 case 0x32:
4851 render_line_body(unshaded, blended, undithered, no, add_fourth);
4852 break;
4853
4854 case 0x33:
4855 render_line_body(shaded, blended, undithered, no, add_fourth);
4856 break;
4857
4858 case 0x36:
4859 render_line_body(unshaded, blended, dithered, no, add_fourth);
4860 break;
4861
4862 case 0x37:
4863 render_line_body(shaded, blended, dithered, no, add_fourth);
4864 break;
4865
4866 case 0x3A:
4867 render_line_body(unshaded, blended, undithered, yes, add_fourth);
4868 break;
4869
4870 case 0x3B:
4871 render_line_body(shaded, blended, undithered, yes, add_fourth);
4872 break;
4873
4874 case 0x3E:
4875 render_line_body(unshaded, blended, dithered, yes, add_fourth);
4876 break;
4877
4878 case 0x3F:
4879 render_line_body(shaded, blended, dithered, yes, add_fourth);
4880 break;
4881 }
4882}
4883
4884
4885void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
4886 u32 width, u32 height)
4887{
1f88961f
E
4888 if((width == 0) || (height == 0))
4889 return;
4890
75e28f62
E
4891 invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1);
4892
75e28f62
E
4893 u32 r = color & 0xFF;
4894 u32 g = (color >> 8) & 0xFF;
4895 u32 b = (color >> 16) & 0xFF;
03eb3b69 4896 u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10);
6c4a10c4
E
4897 u32 color_32bpp = color_16bpp | (color_16bpp << 16);
4898
c1817bd9 4899 u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024));
6c4a10c4
E
4900
4901 u32 pitch = 512 - (width / 2);
4902 u32 num_width;
75e28f62 4903
f1359c57 4904 if(psx_gpu->render_mode & RENDER_INTERLACE_ENABLED)
6c4a10c4
E
4905 {
4906 pitch += 512;
4907 height /= 2;
4908
f1359c57 4909 if(psx_gpu->render_mode & RENDER_INTERLACE_ODD)
6c4a10c4
E
4910 vram_ptr += 512;
4911 }
75e28f62 4912
6c4a10c4 4913 while(height)
75e28f62 4914 {
6c4a10c4
E
4915 num_width = width;
4916 while(num_width)
75e28f62 4917 {
6c4a10c4
E
4918 vram_ptr[0] = color_32bpp;
4919 vram_ptr[1] = color_32bpp;
4920 vram_ptr[2] = color_32bpp;
4921 vram_ptr[3] = color_32bpp;
4922 vram_ptr[4] = color_32bpp;
4923 vram_ptr[5] = color_32bpp;
4924 vram_ptr[6] = color_32bpp;
4925 vram_ptr[7] = color_32bpp;
4926
4927 vram_ptr += 8;
4928 num_width -= 16;
75e28f62
E
4929 }
4930
6c4a10c4
E
4931 vram_ptr += pitch;
4932 height--;
75e28f62 4933 }
75e28f62
E
4934}
4935
f1359c57 4936void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
4937 u32 width, u32 height)
4938{
4939 if((width == 0) || (height == 0))
4940 return;
4941
e929dec5 4942 if(width > 1024)
4943 width = 1024;
4944
f1359c57 4945 u32 r = color & 0xFF;
4946 u32 g = (color >> 8) & 0xFF;
4947 u32 b = (color >> 16) & 0xFF;
03eb3b69 4948 u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10);
f1359c57 4949 u32 color_32bpp = color_16bpp | (color_16bpp << 16);
4950
e929dec5 4951 u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024));
f1359c57 4952
e929dec5 4953 u32 pitch = 1024 / 2 - (width / 2);
f1359c57 4954 u32 num_width;
4955
4956 while(height)
4957 {
4958 num_width = width;
4959 while(num_width)
4960 {
4961 vram_ptr[0] = color_32bpp;
4962 vram_ptr[1] = color_32bpp;
4963 vram_ptr[2] = color_32bpp;
4964 vram_ptr[3] = color_32bpp;
4965 vram_ptr[4] = color_32bpp;
4966 vram_ptr[5] = color_32bpp;
4967 vram_ptr[6] = color_32bpp;
4968 vram_ptr[7] = color_32bpp;
4969
4970 vram_ptr += 8;
4971 num_width -= 16;
4972 }
4973
4974 vram_ptr += pitch;
4975 height--;
4976 }
4977}
4978
72583812 4979#ifndef PCSX
75e28f62
E
4980void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
4981 u32 width, u32 height, u32 pitch)
4982{
4983 u16 *vram_ptr = psx_gpu->vram_ptr + x + (y * 1024);
4984 u32 draw_x, draw_y;
87c45ad1 4985 u32 mask_msb = psx_gpu->mask_msb;
75e28f62 4986
1f88961f
E
4987 if((width == 0) || (height == 0))
4988 return;
4989
cec398c0 4990 flush_render_block_buffer(psx_gpu);
75e28f62
E
4991 invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1);
4992
75e28f62
E
4993 for(draw_y = 0; draw_y < height; draw_y++)
4994 {
4995 for(draw_x = 0; draw_x < width; draw_x++)
4996 {
24b9bacc 4997 vram_ptr[draw_x] = source[draw_x] | mask_msb;
75e28f62
E
4998 }
4999
5000 source += pitch;
5001 vram_ptr += 1024;
5002 }
5003}
5004
5005void render_block_move(psx_gpu_struct *psx_gpu, u32 source_x, u32 source_y,
5006 u32 dest_x, u32 dest_y, u32 width, u32 height)
5007{
5008 render_block_copy(psx_gpu, psx_gpu->vram_ptr + source_x + (source_y * 1024),
5009 dest_x, dest_y, width, height, 1024);
5010}
72583812 5011#endif
75e28f62
E
5012
5013void initialize_reciprocal_table(void)
5014{
5015 u32 height;
5016 u32 height_normalized;
5017 u32 height_reciprocal;
5018 s32 shift;
5019
c1817bd9 5020 for(height = 1; height < sizeof(reciprocal_table)
5021 / sizeof(reciprocal_table[0]); height++)
75e28f62
E
5022 {
5023 shift = __builtin_clz(height);
5024 height_normalized = height << shift;
c111e8f8 5025 height_reciprocal = ((1ULL << 51) + (height_normalized - 1)) /
75e28f62
E
5026 height_normalized;
5027
c111e8f8 5028 shift = 32 - (51 - shift);
75e28f62 5029
7d5140f5 5030 reciprocal_table[height] = (height_reciprocal << 10) | shift;
75e28f62
E
5031 }
5032}
5033
5034
5035#define dither_table_row(a, b, c, d) \
f707f14b 5036 ((a & 0xFF) | ((b & 0xFF) << 8) | ((c & 0xFF) << 16) | ((u32)(d & 0xFF) << 24)) \
75e28f62 5037
e8c0e0bb 5038void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
75e28f62
E
5039{
5040 vec_8x16u test_mask =
5041 { { { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } } };
5042
5043 psx_gpu->test_mask = test_mask;
5044
75e28f62
E
5045 psx_gpu->dirty_textures_4bpp_mask = 0xFFFFFFFF;
5046 psx_gpu->dirty_textures_8bpp_mask = 0xFFFFFFFF;
5047 psx_gpu->dirty_textures_8bpp_alternate_mask = 0xFFFFFFFF;
5048 psx_gpu->viewport_mask = 0;
5049 psx_gpu->current_texture_page = 0;
5050 psx_gpu->current_texture_mask = 0;
5051 psx_gpu->last_8bpp_texture_page = 0;
5052
5053 psx_gpu->clut_settings = 0;
5054 psx_gpu->texture_settings = 0;
5055 psx_gpu->render_state = 0;
5056 psx_gpu->render_state_base = 0;
5057 psx_gpu->num_blocks = 0;
c6063f89 5058 psx_gpu->uvrgb_phase = 0x8000;
75e28f62 5059
e8c0e0bb 5060 psx_gpu->vram_ptr = vram;
c1817bd9 5061 psx_gpu->vram_out_ptr = vram;
75e28f62 5062
3867c6ef 5063 psx_gpu->texture_page_base = psx_gpu->vram_ptr;
75e28f62
E
5064 psx_gpu->texture_page_ptr = psx_gpu->vram_ptr;
5065 psx_gpu->clut_ptr = psx_gpu->vram_ptr;
5066
5fe1a2b1 5067 psx_gpu->viewport_start_x = psx_gpu->viewport_start_y = 0;
5068 psx_gpu->viewport_end_x = psx_gpu->viewport_end_y = 0;
75e28f62
E
5069 psx_gpu->mask_msb = 0;
5070
f9248bbf
E
5071 psx_gpu->texture_window_x = 0;
5072 psx_gpu->texture_window_y = 0;
5073 psx_gpu->texture_mask_width = 0xFF;
5074 psx_gpu->texture_mask_height = 0xFF;
5075
f1359c57 5076 psx_gpu->render_mode = 0;
69b09c0d 5077
75e28f62
E
5078 memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512);
5079
5080 initialize_reciprocal_table();
ed0fd81d 5081 psx_gpu->reciprocal_table_ptr = reciprocal_table;
75e28f62
E
5082
5083 // 00 01 10 11
5084 // 00 0 4 1 5
5085 // 01 6 2 7 3
5086 // 10 1 5 0 4
5087 // 11 7 3 6 2
5088 // (minus ones(4) * 4)
5089
5090 // d0: (1 3 5 7): x1 ^ y1
5091 // d1: (2 3 6 7): y0
5092 // d2: (4 5 6 7): x0 ^ y0
5093
75e28f62
E
5094 psx_gpu->dither_table[0] = dither_table_row(-4, 0, -3, 1);
5095 psx_gpu->dither_table[1] = dither_table_row(2, -2, 3, -1);
5096 psx_gpu->dither_table[2] = dither_table_row(-3, 1, -4, 0);
5097 psx_gpu->dither_table[3] = dither_table_row(3, -1, 2, -2);
df05223d 5098 psx_gpu->allow_dithering = 1;
75e28f62
E
5099
5100 psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN;
e929dec5 5101
0b4038f8 5102 psx_gpu->saved_hres = 256;
ee060c58 5103 psx_gpu->hacks_active = 0;
2da2fc76 5104
ee060c58 5105 // check some offsets, asm relies on these
5106 psx_gpu->reserved_a[(offsetof(psx_gpu_struct, test_mask) == 0) - 1] = 0;
2da2fc76 5107 psx_gpu->reserved_a[(offsetof(psx_gpu_struct, blocks) == psx_gpu_blocks_offset) - 1] = 0;
75e28f62
E
5108}
5109
5110u64 get_us(void)
5111{
5112 struct timeval tv;
5113 gettimeofday(&tv, NULL);
5114
5115 return (tv.tv_sec * 1000000ULL) + tv.tv_usec;
5116}
5117
37725e8c 5118#if 0 //def NEON_BUILD
75e28f62
E
5119
5120u32 get_counter()
5121{
5122 u32 counter;
5123 __asm__ volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(counter));
5124
5125 return counter;
5126}
5127
5128void init_counter(void)
5129{
5130 u32 value;
5131 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(value));
5132 value |= 5; // master enable, ccnt reset
5133 value &= ~8; // ccnt divider 0
5134 asm volatile("mcr p15, 0, %0, c9, c12, 0" :: "r"(value));
5135 // enable cycle counter
5136 asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(1 << 31));
5137}
5138
5139void triangle_benchmark(psx_gpu_struct *psx_gpu)
5140{
5141 u32 i;
5142
5143 u32 ticks;
5144 u32 ticks_elapsed;
5145
5146 const u32 iterations = 500000;
5147
5148 psx_gpu->num_blocks = 64;
5149 psx_gpu->clut_ptr = psx_gpu->vram_ptr;
5150
5151 for(i = 0; i < 64; i++)
5152 {
5153 memset(&(psx_gpu->blocks[i].r), 0, 16);
5154 }
5155
5156 init_counter();
5157
5158 ticks = get_counter();
5159
5160 for(i = 0; i < iterations; i++)
5161 {
5162 texture_sprite_blocks_8bpp(psx_gpu);
5163 }
5164
5165 ticks_elapsed = get_counter() - ticks;
5166
5167 printf("benchmark: %lf cycles\n", (double)ticks_elapsed / (iterations * 64));
5168}
5169
5170#endif
5171
fc6cef7d 5172#include "psx_gpu_4x.c"
47c15995 5173
5174// vim:ts=2:sw=2:expandtab