summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
32631e6)
no measurable perf improvement seen just from :64 alignment both on
cortex-a8 and cortex-a72, and Psybadek uses unaligned vram location.
umull r12,r2, r3, r2
0:
pld [r1, #48*3]
umull r12,r2, r3, r2
0:
pld [r1, #48*3]
- vld3.8 {d0-d2}, [r1, :64]!
- vld3.8 {d3-d5}, [r1, :64]!
+ vld3.8 {d0-d2}, [r1]!
+ vld3.8 {d3-d5}, [r1]!
vswp d0, d2
vswp d3, d5
vst3.8 {d0-d2}, [r0, :64]!
vswp d0, d2
vswp d3, d5
vst3.8 {d0-d2}, [r0, :64]!
vdup.16 q15, r3
0:
pld [r1, #48*3]
vdup.16 q15, r3
0:
pld [r1, #48*3]
- vld3.8 {d1-d3}, [r1, :64]!
- vld3.8 {d5-d7}, [r1, :64]!
+ vld3.8 {d1-d3}, [r1]!
+ vld3.8 {d5-d7}, [r1]!
vshll.u8 q8, d2, #3 @ g
vshll.u8 q9, d6, #3
vshll.u8 q8, d2, #3 @ g
vshll.u8 q9, d6, #3
int i; \
\
vram += psx_offset_y * 1024 + psx_offset_x; \
int i; \
\
vram += psx_offset_y * 1024 + psx_offset_x; \
+ vram = (void *)((long)vram & ~3); \
for (i = psx_src_height; i > 0; i--, vram += psx_step * 1024, dst += dst_stride)\
blitfunc(dst, vram, len); \
}
for (i = psx_src_height; i > 0; i--, vram += psx_step * 1024, dst += dst_stride)\
blitfunc(dst, vram, len); \
}
+ src = (void *)((uintptr_t)src & ~3); // align for the blitter
+
for (; h1-- > 0; dest += dstride * 2, src += stride)
{
bgr555_to_rgb565(dest, src, w * 2);
for (; h1-- > 0; dest += dstride * 2, src += stride)
{
bgr555_to_rgb565(dest, src, w * 2);
- int x = gpu.screen.x & ~1; // alignment needed by blitter
int y = gpu.screen.y;
int w = gpu.screen.w;
int h = gpu.screen.h;
int y = gpu.screen.y;
int w = gpu.screen.w;
int h = gpu.screen.h;