no measurable perf improvement seen just from :64 alignment both on
cortex-a8 and cortex-a72, and Psybadek uses unaligned vram location.
umull r12,r2, r3, r2
0:
pld [r1, #48*3]
- vld3.8 {d0-d2}, [r1, :64]!
- vld3.8 {d3-d5}, [r1, :64]!
+ vld3.8 {d0-d2}, [r1]!
+ vld3.8 {d3-d5}, [r1]!
vswp d0, d2
vswp d3, d5
vst3.8 {d0-d2}, [r0, :64]!
vdup.16 q15, r3
0:
pld [r1, #48*3]
- vld3.8 {d1-d3}, [r1, :64]!
- vld3.8 {d5-d7}, [r1, :64]!
+ vld3.8 {d1-d3}, [r1]!
+ vld3.8 {d5-d7}, [r1]!
vshll.u8 q8, d2, #3 @ g
vshll.u8 q9, d6, #3
int i; \
\
vram += psx_offset_y * 1024 + psx_offset_x; \
+ vram = (void *)((long)vram & ~3); \
for (i = psx_src_height; i > 0; i--, vram += psx_step * 1024, dst += dst_stride)\
blitfunc(dst, vram, len); \
}
#endif
else
{
+ src = (void *)((uintptr_t)src & ~3); // align for the blitter
+
for (; h1-- > 0; dest += dstride * 2, src += stride)
{
bgr555_to_rgb565(dest, src, w * 2);
void vout_update(void)
{
- int x = gpu.screen.x & ~1; // alignment needed by blitter
+ int x = gpu.screen.x;
int y = gpu.screen.y;
int w = gpu.screen.w;
int h = gpu.screen.h;