u32 width, u32 height, u32 pitch)
{
u16 *vram_ptr = psx_gpu->vram_ptr + x + (y * 1024);
- u32 mask_msb = psx_gpu->mask_msb;
u32 draw_x, draw_y;
+ u32 mask_msb = psx_gpu->mask_msb;
if((width == 0) || (height == 0))
return;
#define num_unaligned r2
#define num_width r6
-#undef colors
+#undef colors_a
+#undef colors_b
-#define colors q0
+#define colors_a q0
+#define colors_b q1
.align 3
orr color, color_r, color_g, lsl #5
orr color, color, color_b, lsl #10
- add left_unaligned, x, #0x7
+ vdup.u16 colors_a, color
- bic left_unaligned, left_unaligned, #0x7
- vdup.u16 colors, color
-
- sub left_unaligned, left_unaligned, x
+ vmov colors_b, colors_a
mov pitch, #2048
-
sub pitch, pitch, width, lsl #1
- sub width, width, left_unaligned
-
- and right_unaligned, width, #0x7
- bic width, width, #0x7
0:
- mov num_width, width, lsr #3
-
- movs num_unaligned, left_unaligned
- beq 2f
+ mov num_width, width, lsr #4
1:
- strh color, [ vram_ptr ], #2
-
- subs num_unaligned, num_unaligned, #1
- bne 1b
+ vst1.u32 { colors_a, colors_b }, [ vram_ptr, :128 ]!
- 2:
- vst1.u32 { colors }, [ vram_ptr, :128 ]!
subs num_width, num_width, #1
- bne 2b
-
- movs num_unaligned, right_unaligned
- beq 4f
-
- 3:
- strh color, [ vram_ptr ], #2
-
- subs num_unaligned, num_unaligned, #1
- bne 3b
+ bne 1b
- 4:
add vram_ptr, vram_ptr, pitch
subs height, height, #1
bne 0b
-
+
+ 1:
ldmia sp!, { r4 - r6, pc }
MAP_SHARED | 0xA0000000, fbdev_handle, 0));
vram_ptr += 64;
- initialize_psx_gpu(psx_gpu, vram_ptr + 64);
+ initialize_psx_gpu(psx_gpu, vram_ptr);
#else
initialize_psx_gpu(psx_gpu, _vram + 64);
#endif
gpu_parse(psx_gpu, list, size);
flush_render_block_buffer(psx_gpu);
- printf("%-64s: ", argv[1]);
#ifdef NEON_BUILD
u32 cycles_elapsed = get_counter() - cycles;
- printf("%d\n", cycles_elapsed);
+ printf("%-64s: %d\n", argv[1], cycles_elapsed);
+#else
+ printf("%-64s: ", argv[1]);
#endif
-#if 0
+#if 1
u32 i;
for(i = 0; i < 1024 * 512; i++)
}
#endif
-#if 1
+#if 0
printf("\n");
printf(" %d pixels, %d pixel blocks, %d spans\n"
" (%lf pixels per block, %lf pixels per span),\n"
s16 *list_s16 = (void *)list;
current_command = *list >> 24;
command_length = command_lengths[current_command];
-
+
switch(current_command)
{
case 0x00:
break;
case 0x02:
- render_block_fill(psx_gpu, list[0] & 0xFFFFFF, list_s16[2] & 0x3FF,
- list_s16[3] & 0x1FF, list_s16[4] & 0x3FF, list_s16[5] & 0x1FF);
+ {
+ u32 x = list_s16[2] & 0x3FF;
+ u32 y = list_s16[3] & 0x1FF;
+ u32 width = list_s16[4] & 0x3FF;
+ u32 height = list_s16[5] & 0x1FF;
+ u32 color = list[0] & 0xFFFFFF;
+
+ x &= ~0xF;
+ width = ((width + 0xF) & ~0xF);
+
+ if((x + width) > 1024)
+ {
+ u32 width_a = 1024 - x;
+ u32 width_b = width - width_a;
+
+ if((y + height) > 512)
+ {
+ u32 height_a = 512 - y;
+ u32 height_b = height - height_a;
+
+ render_block_fill(psx_gpu, color, x, y, width_a, height_a);
+ render_block_fill(psx_gpu, color, 0, y, width_b, height_a);
+ render_block_fill(psx_gpu, color, x, 0, width_a, height_b);
+ render_block_fill(psx_gpu, color, 0, 0, width_b, height_b);
+ }
+ else
+ {
+ render_block_fill(psx_gpu, color, x, y, width_a, height);
+ render_block_fill(psx_gpu, color, 0, y, width_b, height);
+ }
+ }
+ else
+ {
+ if((y + height) > 512)
+ {
+ u32 height_a = 512 - y;
+ u32 height_b = height - height_a;
+
+ render_block_fill(psx_gpu, color, x, y, width, height_a);
+ render_block_fill(psx_gpu, color, x, 0, width, height_b);
+ }
+ else
+ {
+ render_block_fill(psx_gpu, color, x, y, width, height);
+ }
+ }
break;
+ }
case 0x20 ... 0x23:
{
case 0xE1:
set_texture(psx_gpu, list[0] & 0x1FF);
+
if(list[0] & (1 << 9))
psx_gpu->render_state_base |= RENDER_STATE_DITHER;
else