notaz.gp2x.de
/
pcsx_rearmed.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (from parent 1:
cd23069
)
psx_gpu: convert to UAL, load everything from context
author
notaz
<notasas@gmail.com>
Mon, 1 Apr 2013 00:03:52 +0000
(
03:03
+0300)
committer
notaz
<notasas@gmail.com>
Mon, 1 Apr 2013 15:33:15 +0000
(18:33 +0300)
plugins/gpu_neon/psx_gpu/psx_gpu.c
patch
|
blob
|
blame
|
history
plugins/gpu_neon/psx_gpu/psx_gpu.h
patch
|
blob
|
blame
|
history
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
patch
|
blob
|
blame
|
history
plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
patch
|
blob
|
blame
|
history
plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
patch
|
blob
|
blame
|
history
diff --git
a/plugins/gpu_neon/psx_gpu/psx_gpu.c
b/plugins/gpu_neon/psx_gpu/psx_gpu.c
index
f52e842
..
e113f06
100644
(file)
--- a/
plugins/gpu_neon/psx_gpu/psx_gpu.c
+++ b/
plugins/gpu_neon/psx_gpu/psx_gpu.c
@@
-5056,6
+5056,7
@@
void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512);
initialize_reciprocal_table();
memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512);
initialize_reciprocal_table();
+ psx_gpu->reciprocal_table_ptr = reciprocal_table;
// 00 01 10 11
// 00 0 4 1 5
// 00 01 10 11
// 00 0 4 1 5
diff --git
a/plugins/gpu_neon/psx_gpu/psx_gpu.h
b/plugins/gpu_neon/psx_gpu/psx_gpu.h
index
846658c
..
1eaa99a
100644
(file)
--- a/
plugins/gpu_neon/psx_gpu/psx_gpu.h
+++ b/
plugins/gpu_neon/psx_gpu/psx_gpu.h
@@
-180,6
+180,8
@@
typedef struct
u16 clut_settings;
u16 texture_settings;
u16 clut_settings;
u16 texture_settings;
+ u32 *reciprocal_table_ptr;
+
// enhancement stuff
u16 *enhancement_buf_ptr;
u16 *enhancement_current_buf_ptr;
// enhancement stuff
u16 *enhancement_buf_ptr;
u16 *enhancement_current_buf_ptr;
@@
-192,7
+194,7
@@
typedef struct
// Align up to 64 byte boundary to keep the upcoming buffers cache line
// aligned, also make reachable with single immediate addition
// Align up to 64 byte boundary to keep the upcoming buffers cache line
// aligned, also make reachable with single immediate addition
- u8 reserved_a[16
4
];
+ u8 reserved_a[16
0
];
// 8KB
block_struct blocks[MAX_BLOCKS_PER_ROW];
// 8KB
block_struct blocks[MAX_BLOCKS_PER_ROW];
diff --git
a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
index
d8fb153
..
8df7aca
100644
(file)
--- a/
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
+++ b/
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
@@
-31,6
+31,8
@@
#define edge_data_right_mask_offset 4
#define edge_data_y_offset 6
#define edge_data_right_mask_offset 4
#define edge_data_y_offset 6
+.syntax unified
+.text
#define psx_gpu r0
#define v_a r1
#define psx_gpu r0
#define v_a r1
@@
-192,18
+194,6
@@
.align 4
.align 4
-/* FIXME: users of this should be in psx_gpu instead */
-#ifndef __PIC__
-#define load_pointer(register, pointer) \
- movw register, :lower16:pointer; \
- movt register, :upper16:pointer; \
-
-#else
-#define load_pointer(register, pointer) \
- ldr register, =pointer \
-
-#endif
-
#define function(name) \
.global name; \
name: \
#define function(name) \
.global name; \
name: \
@@
-576,7
+566,7
@@
function(compute_all_gradients)
vld1.32 { uvrg }, [ temp ]; \
add temp, psx_gpu, #psx_gpu_uvrg_dy_offset; \
vld1.32 { uvrg_dy }, [ temp ]; \
vld1.32 { uvrg }, [ temp ]; \
add temp, psx_gpu, #psx_gpu_uvrg_dy_offset; \
vld1.32 { uvrg_dy }, [ temp ]; \
- l
oad_pointer(reciprocal_table_ptr, reciprocal_table);
\
+ l
dr reciprocal_table_ptr, [ psx_gpu, #psx_gpu_reciprocal_table_ptr_offset ];
\
\
vmov.u32 c_0x01, #0x01 \
\
vmov.u32 c_0x01, #0x01 \
@@
-624,7
+614,7
@@
function(compute_all_gradients)
#define height_b_alt r12
#define compute_edge_delta_x3(start_c, height_a, height_b) \
#define height_b_alt r12
#define compute_edge_delta_x3(start_c, height_a, height_b) \
- vmov
.u32 heights, height_a, height_b;
\
+ vmov
heights, height_a, height_b;
\
ldr temp, [ reciprocal_table_ptr, height_a, lsl #2 ]; \
vmov.u32 edge_shifts[0], temp; \
ldr temp, [ reciprocal_table_ptr, height_b, lsl #2 ]; \
ldr temp, [ reciprocal_table_ptr, height_a, lsl #2 ]; \
vmov.u32 edge_shifts[0], temp; \
ldr temp, [ reciprocal_table_ptr, height_b, lsl #2 ]; \
@@
-884,7
+874,7
@@
function(compute_all_gradients)
add temp, temp, #(1 << 16); \
add y_a, temp, #2; \
add y_a, y_a, #(2 << 16); \
add temp, temp, #(1 << 16); \
add y_a, temp, #2; \
add y_a, y_a, #(2 << 16); \
- vmov
.u32 y_x4, temp, y_a;
\
+ vmov
y_x4, temp, y_a;
\
\
setup_spans_adjust_edges_alternate_##alternate_active(left_index, \
right_index); \
\
setup_spans_adjust_edges_alternate_##alternate_active(left_index, \
right_index); \
@@
-939,7
+929,7
@@
function(compute_all_gradients)
sub temp, temp, #(1 << 16); \
sub y_a, temp, #2; \
sub y_a, y_a, #(2 << 16); \
sub temp, temp, #(1 << 16); \
sub y_a, temp, #2; \
sub y_a, y_a, #(2 << 16); \
- vmov
.u32 y_x4, temp, y_a;
\
+ vmov
y_x4, temp, y_a;
\
\
vaddw.s32 edges_xy, edges_xy, edges_dx_dy; \
\
\
vaddw.s32 edges_xy, edges_xy, edges_dx_dy; \
\
@@
-970,7
+960,7
@@
function(compute_all_gradients)
sub height, y_a, y_c; \
\
vdup.u32 x_starts, x_a; \
sub height, y_a, y_c; \
\
vdup.u32 x_starts, x_a; \
- vmov
.u32 x_ends, x_c, x_b;
\
+ vmov
x_ends, x_c, x_b;
\
\
compute_edge_delta_x3(x_b, height_major, height_minor_a); \
setup_spans_up(major, minor, minor, yes); \
\
compute_edge_delta_x3(x_b, height_major, height_minor_a); \
setup_spans_up(major, minor, minor, yes); \
@@
-982,8
+972,6
@@
function(setup_spans_up_left)
function(setup_spans_up_right)
setup_spans_up_up(right, left)
function(setup_spans_up_right)
setup_spans_up_up(right, left)
-.pool
-
#define setup_spans_down_down(minor, major) \
setup_spans_prologue(); \
sub height_minor_a, y_b, y_a; \
#define setup_spans_down_down(minor, major) \
setup_spans_prologue(); \
sub height_minor_a, y_b, y_a; \
@@
-991,7
+979,7
@@
function(setup_spans_up_right)
sub height, y_c, y_a; \
\
vdup.u32 x_starts, x_a; \
sub height, y_c, y_a; \
\
vdup.u32 x_starts, x_a; \
- vmov
.u32 x_ends, x_c, x_b;
\
+ vmov
x_ends, x_c, x_b;
\
\
compute_edge_delta_x3(x_b, height_major, height_minor_a); \
setup_spans_down(major, minor, minor, yes); \
\
compute_edge_delta_x3(x_b, height_major, height_minor_a); \
setup_spans_down(major, minor, minor, yes); \
@@
-1014,7
+1002,7
@@
function(setup_spans_down_right)
function(setup_spans_up_a)
setup_spans_prologue()
function(setup_spans_up_a)
setup_spans_prologue()
- vmov
.u32
x_starts, x_a, x_b
+ vmov x_starts, x_a, x_b
vdup.u32 x_ends, x_c
setup_spans_up_flat()
vdup.u32 x_ends, x_c
setup_spans_up_flat()
@@
-1023,7
+1011,7
@@
function(setup_spans_up_b)
setup_spans_prologue()
vdup.u32 x_starts, x_a
setup_spans_prologue()
vdup.u32 x_starts, x_a
- vmov
.u32
x_ends, x_b, x_c
+ vmov x_ends, x_b, x_c
setup_spans_up_flat()
setup_spans_up_flat()
@@
-1037,7
+1025,7
@@
function(setup_spans_up_b)
function(setup_spans_down_a)
setup_spans_prologue()
function(setup_spans_down_a)
setup_spans_prologue()
- vmov
.u32
x_starts, x_a, x_b
+ vmov x_starts, x_a, x_b
vdup.u32 x_ends, x_c
setup_spans_down_flat()
vdup.u32 x_ends, x_c
setup_spans_down_flat()
@@
-1046,7
+1034,7
@@
function(setup_spans_down_b)
setup_spans_prologue()
vdup.u32 x_starts, x_a
setup_spans_prologue()
vdup.u32 x_starts, x_a
- vmov
.u32
x_ends, x_b, x_c
+ vmov x_ends, x_b, x_c
setup_spans_down_flat()
setup_spans_down_flat()
@@
-1077,13
+1065,13
@@
function(setup_spans_up_down)
sub height_minor_b, y_c, y_a
sub height_major, y_c, y_b
sub height_minor_b, y_c, y_a
sub height_major, y_c, y_b
- vmov
.u32
x_starts, x_a, x_c
+ vmov x_starts, x_a, x_c
vdup.u32 x_ends, x_b
compute_edge_delta_x3(x_a, height_minor_a, height_major)
mov temp, #0
vdup.u32 x_ends, x_b
compute_edge_delta_x3(x_a, height_minor_a, height_major)
mov temp, #0
- vmov
.u32
height_increment, temp, height_minor_b
+ vmov height_increment, temp, height_minor_b
vmlal.s32 edges_xy, edges_dx_dy, height_increment
vmov edges_xy_b_left, edge_alt_low, edge_alt_high
vmlal.s32 edges_xy, edges_dx_dy, height_increment
vmov edges_xy_b_left, edge_alt_low, edge_alt_high
@@
-1120,7
+1108,7
@@
function(setup_spans_up_down)
sub temp, temp, #(1 << 16)
sub y_a, temp, #2
sub y_a, y_a, #(2 << 16)
sub temp, temp, #(1 << 16)
sub y_a, temp, #2
sub y_a, y_a, #(2 << 16)
- vmov
.u32
y_x4, temp, y_a
+ vmov y_x4, temp, y_a
vaddw.s32 edges_xy, edges_xy, edges_dx_dy
vaddw.s32 edges_xy, edges_xy, edges_dx_dy
@@
-1170,7
+1158,7
@@
function(setup_spans_up_down)
add temp, temp, #(1 << 16)
add y_a, temp, #2
add y_a, y_a, #(2 << 16)
add temp, temp, #(1 << 16)
add y_a, temp, #2
add y_a, y_a, #(2 << 16)
- vmov
.u32
y_x4, temp, y_a
+ vmov y_x4, temp, y_a
setup_spans_adjust_edges_alternate_no(left, right)
setup_spans_adjust_edges_alternate_no(left, right)
@@
-1204,8
+1192,6
@@
function(setup_spans_up_down)
bne 2b
bal 1b
bne 2b
bal 1b
-.pool
-
#undef span_uvrg_offset
#undef span_edge_data
#undef span_b_offset
#undef span_uvrg_offset
#undef span_edge_data
#undef span_b_offset
@@
-1936,7
+1922,7
@@
function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
vdup.u16 colors, color
add span_edge_data, psx_gpu, #psx_gpu_span_edge_data_offset
vdup.u16 colors, color
add span_edge_data, psx_gpu, #psx_gpu_span_edge_data_offset
- orr color, color, lsl #16
+ orr color, color,
color,
lsl #16
0:
0:
@@
-1978,7
+1964,7
@@
function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
moveq right_mask, right_mask, lsr #2
tst right_mask, #0x1
moveq right_mask, right_mask, lsr #2
tst right_mask, #0x1
- str
eqh
color, [ fb_ptr ]
+ str
heq
color, [ fb_ptr ]
1:
add span_edge_data, span_edge_data, #8
1:
add span_edge_data, span_edge_data, #8
@@
-2690,7
+2676,7
@@
function(texture_blocks_4bpp)
orr pixels_a, pixels_a, pixel_3, lsl #24
orr pixels_b, pixels_b, pixel_7, lsl #24
orr pixels_a, pixels_a, pixel_3, lsl #24
orr pixels_b, pixels_b, pixel_7, lsl #24
- vmov
.u32
texels, pixels_a, pixels_b
+ vmov texels, pixels_a, pixels_b
vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels
vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels
vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels
vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels
@@
-4751,7
+4737,7
@@
setup_sprite_update_texture_8bpp_cache:
mov fb_ptr_advance_column, #32; \
vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \
\
mov fb_ptr_advance_column, #32; \
vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \
\
- sub fb_ptr_advance_column,
height, lsl #11;
\
+ sub fb_ptr_advance_column,
fb_ptr_advance_column, height, lsl #11;
\
vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \
#define setup_sprite_setup_right_draw_mask_fb_ptr() \
vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \
#define setup_sprite_setup_right_draw_mask_fb_ptr() \
@@
-5095,7
+5081,7
@@
setup_sprite_update_texture_8bpp_cache:
mov fb_ptr_advance_column, #32 * 2; \
vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \
vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \
mov fb_ptr_advance_column, #32 * 2; \
vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \
vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \
- sub fb_ptr_advance_column,
height, lsl #11 + 1;
\
+ sub fb_ptr_advance_column,
fb_ptr_advance_column, height, lsl #11 + 1;
\
vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \
vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \
vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \
vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \
@@
-5428,7
+5414,7
@@
function(setup_sprite_16bpp)
add texture_offset_base, u, u
add width_rounded, width, #7
add texture_offset_base, u, u
add width_rounded, width, #7
- add texture_offset_base, v, lsl #11
+ add texture_offset_base,
texture_offset_base,
v, lsl #11
mov left_mask_bits, #0xFF
ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ]
mov left_mask_bits, #0xFF
ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ]
@@
-5443,7
+5429,7
@@
function(setup_sprite_16bpp)
and right_width, width_rounded, #0x7
mvn left_mask_bits, left_mask_bits, lsl left_offset
and right_width, width_rounded, #0x7
mvn left_mask_bits, left_mask_bits, lsl left_offset
- add texture_mask, texture_mask_height, lsl #11
+ add texture_mask, texture_mask
, texture_mask
_height, lsl #11
mov block_width, width_rounded, lsr #3
mov right_mask_bits, right_mask_bits, lsl right_width
mov block_width, width_rounded, lsr #3
mov right_mask_bits, right_mask_bits, lsl right_width
@@
-5590,7
+5576,7
@@
function(setup_sprite_16bpp_4x)
add texture_offset_base, u, u
add width_rounded, width, #7
add texture_offset_base, u, u
add width_rounded, width, #7
- add texture_offset_base, v, lsl #11
+ add texture_offset_base,
texture_offset_base,
v, lsl #11
movw left_mask_bits, #0xFFFF
ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ]
movw left_mask_bits, #0xFFFF
ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ]
@@
-5609,7
+5595,7
@@
function(setup_sprite_16bpp_4x)
lsl right_width, #1
lsl right_width, #1
- add texture_mask, texture_mask_height, lsl #11
+ add texture_mask, texture_mask
, texture_mask
_height, lsl #11
mov block_width, width_rounded, lsr #3
mov right_mask_bits, right_mask_bits, lsl right_width
mov block_width, width_rounded, lsr #3
mov right_mask_bits, right_mask_bits, lsl right_width
@@
-5760,7
+5746,7
@@
function(setup_sprite_untextured)
ldrh r12, [ psx_gpu, #psx_gpu_render_state_offset ]
tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \
| RENDER_FLAGS_BLEND)
ldrh r12, [ psx_gpu, #psx_gpu_render_state_offset ]
tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \
| RENDER_FLAGS_BLEND)
- ldr
eqb
r12, [ psx_gpu, #psx_gpu_render_mode_offset ]
+ ldr
beq
r12, [ psx_gpu, #psx_gpu_render_mode_offset ]
tsteq r12, #RENDER_INTERLACE_ENABLED
beq setup_sprite_untextured_simple
tsteq r12, #RENDER_INTERLACE_ENABLED
beq setup_sprite_untextured_simple
@@
-6081,7
+6067,7
@@
function(scale2x_tiles8)
mov r14, r2
add r0, #1024*2*2
add r4, #1024*2
mov r14, r2
add r0, #1024*2*2
add r4, #1024*2
- sub r0, r2, lsl #4+1
+ sub r0, r
0, r
2, lsl #4+1
mov r1, r4
add r12, r0, #1024*2
bgt 0b
mov r1, r4
add r12, r0, #1024*2
bgt 0b
diff --git
a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
index
1307891
..
5460e40
100644
(file)
--- a/
plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
+++ b/
plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
@@
-48,6
+48,7
@@
#define psx_gpu_offset_y_offset 0x102
#define psx_gpu_clut_settings_offset 0x104
#define psx_gpu_texture_settings_offset 0x106
#define psx_gpu_offset_y_offset 0x102
#define psx_gpu_clut_settings_offset 0x104
#define psx_gpu_texture_settings_offset 0x106
+#define psx_gpu_reciprocal_table_ptr_offset 0x108
#define psx_gpu_blocks_offset 0x200
#define psx_gpu_span_uvrg_offset_offset 0x2200
#define psx_gpu_span_edge_data_offset 0x4200
#define psx_gpu_blocks_offset 0x200
#define psx_gpu_span_uvrg_offset_offset 0x2200
#define psx_gpu_span_edge_data_offset 0x4200
diff --git
a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
index
5adfb75
..
b1de121
100644
(file)
--- a/
plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
+++ b/
plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
@@
-73,6
+73,7
@@
int main()
WRITE_OFFSET(f, offset_y);
WRITE_OFFSET(f, clut_settings);
WRITE_OFFSET(f, texture_settings);
WRITE_OFFSET(f, offset_y);
WRITE_OFFSET(f, clut_settings);
WRITE_OFFSET(f, texture_settings);
+ WRITE_OFFSET(f, reciprocal_table_ptr);
WRITE_OFFSET(f, blocks);
WRITE_OFFSET(f, span_uvrg_offset);
WRITE_OFFSET(f, span_edge_data);
WRITE_OFFSET(f, blocks);
WRITE_OFFSET(f, span_uvrg_offset);
WRITE_OFFSET(f, span_edge_data);