X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=common%2Farm_utils.s;h=b93b99f0ad33cc96e1793d9386465681f5464638;hb=2f3ca01638b740bc8ca5530dac8f3e2d2e069cb1;hp=9d516493901b2020c66997b90d9fd35e4621f316;hpb=095a240bd91548be4b9770d28d5d6c3e97637094;p=libpicofe.git diff --git a/common/arm_utils.s b/common/arm_utils.s index 9d51649..b93b99f 100644 --- a/common/arm_utils.s +++ b/common/arm_utils.s @@ -137,69 +137,201 @@ vidConvCpyRGB32hi: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -@ mode2 blitter for 40 cols -.global vidCpyM2_40col @ void *dest, void *src - -vidCpyM2_40col: +@ mode2 blitter +.global vidcpy_m2 @ void *dest, void *src, int m32col, int with_32c_border +vidcpy_m2: stmfd sp!, {r4-r6,lr} mov r12, #224 @ lines + add r0, r0, #320*8 add r1, r1, #8 + mov lr, #0 + + tst r2, r2 + movne lr, #64 + tstne r3, r3 + addne r0, r0, #32 -vidCpyM2_40_loop_out: +vidCpyM2_loop_out: mov r6, #10 -vidCpyM2_40_loop: + sub r6, r6, lr, lsr #5 @ -= 2 in 32col mode +vidCpyM2_loop: subs r6, r6, #1 - ldmia r1!, {r2-r5} - stmia r0!, {r2-r5} - ldmia r1!, {r2-r5} - stmia r0!, {r2-r5} - bne vidCpyM2_40_loop + ldmia r1!, {r2-r5} + stmia r0!, {r2-r5} + ldmia r1!, {r2-r5} + stmia r0!, {r2-r5} + bne vidCpyM2_loop + subs r12,r12,#1 + add r0, r0, lr add r1, r1, #8 - bne vidCpyM2_40_loop_out - - ldmfd sp!, {r4-r6,lr} - bx lr + add r1, r1, lr + bne vidCpyM2_loop_out + ldmfd sp!, {r4-r6,pc} -@ mode2 blitter for 32 cols -.global vidCpyM2_32col @ void *dest, void *src -vidCpyM2_32col: - stmfd sp!, {r4-r6,lr} - - mov r12, #224 @ lines +.global vidcpy_m2_rot @ void *dest, void *src, int m32col, int with_32c_border +vidcpy_m2_rot: + stmfd sp!,{r4-r8,lr} add r1, r1, #8 - add r0, r0, #32 - -vidCpyM2_32_loop_out: - mov r6, #8 -vidCpyM2_32_loop: - subs r6, r6, #1 - ldmia r1!, {r2-r5} - stmia r0!, {r2-r5} - ldmia r1!, {r2-r5} - stmia r0!, {r2-r5} - bne vidCpyM2_32_loop - subs r12,r12,#1 - add r0, r0, #64 - add r1, r1, #8+64 - bne vidCpyM2_32_loop_out - - ldmfd sp!, {r4-r6,lr} - bx lr - - -@ mode2 blitter for 32 cols with no borders -.global vidCpyM2_32col_nobord @ void *dest, void *src + tst r2, r2 + subne r1, r1, #32 @ adjust + + mov r4, r0 + mov r5, r1 + mov r6, r2 + mov r7, #8+4 + +vidcpy_m2_rot_loop: + @ a bit lame but oh well.. + mov r0, r4 + mov r1, r5 + mov r2, r7 + mov r3, r6 + mov r8, #328 + adr lr, after_rot_blit8 + stmfd sp!,{r4-r8,lr} + b rotated_blit8_2 + +after_rot_blit8: + add r5, r5, #328*4 + add r7, r7, #4 + cmp r7, #224+8+4 + ldmgefd sp!,{r4-r8,pc} + b vidcpy_m2_rot_loop + + +.global rotated_blit8 @ void *dst, void *linesx4, u32 y, int is_32col +rotated_blit8: + stmfd sp!,{r4-r8,lr} + mov r8, #320 + +rotated_blit8_2: + add r0, r0, #(240*320) + sub r0, r0, #(240+4) @ y starts from 4 + add r0, r0, r2 + + tst r3, r3 + subne r0, r0, #(240*32) + addne r1, r1, #32 + movne lr, #256/4 + moveq lr, #320/4 + +rotated_blit_loop8: + mov r6, r1 + ldr r2, [r6], r8 + ldr r3, [r6], r8 + ldr r4, [r6], r8 + ldr r5, [r6], r8 + + mov r6, r2, lsl #24 + mov r6, r6, lsr #8 + orr r6, r6, r3, lsl #24 + mov r6, r6, lsr #8 + orr r6, r6, r4, lsl #24 + mov r6, r6, lsr #8 + orr r6, r6, r5, lsl #24 + str r6, [r0], #-240 + + and r6, r3, #0xff00 + and r7, r2, #0xff00 + orr r6, r6, r7, lsr #8 + and r7, r4, #0xff00 + orr r6, r6, r7, lsl #8 + and r7, r5, #0xff00 + orr r6, r6, r7, lsl #16 + str r6, [r0], #-240 + + and r6, r4, #0xff0000 + and r7, r2, #0xff0000 + orr r6, r6, r7, lsr #16 + and r7, r3, #0xff0000 + orr r6, r6, r7, lsr #8 + and r7, r5, #0xff0000 + orr r6, r6, r7, lsl #8 + str r6, [r0], #-240 + + mov r6, r5, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r4, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r3, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r2, lsr #24 + str r6, [r0], #-240 + + subs lr, lr, #1 + add r1, r1, #4 + bne rotated_blit_loop8 + + ldmfd sp!,{r4-r8,pc} + + +@ input: r2-r5 +@ output: r7,r8 +@ trash: r6 +.macro rb_line_low + mov r6, r2, lsl #16 + mov r7, r3, lsl #16 + orr r7, r7, r6, lsr #16 + mov r6, r4, lsl #16 + mov r8, r5, lsl #16 + orr r8, r8, r6, lsr #16 +.endm -vidCpyM2_32col_nobord: - stmfd sp!, {r4-r6,lr} +.macro rb_line_hi + mov r6, r2, lsr #16 + mov r7, r3, lsr #16 + orr r7, r6, r7, lsl #16 + mov r6, r4, lsr #16 + mov r8, r5, lsr #16 + orr r8, r6, r8, lsl #16 +.endm - mov r12, #224 @ lines +.global rotated_blit16 @ void *dst, void *linesx4, u32 y, int is_32col +rotated_blit16: + stmfd sp!,{r4-r8,lr} + + add r0, r0, #(240*320)*2 + sub r0, r0, #(240+4)*2 @ y starts from 4 + add r0, r0, r2, lsl #1 + + tst r3, r3 + subne r0, r0, #(240*32)*2 + addne r1, r1, #32*2 + movne lr, #256/4 + moveq lr, #320/4 + +rotated_blit_loop16: + ldr r2, [r1, #320*0*2] + ldr r3, [r1, #320*1*2] + ldr r4, [r1, #320*2*2] + ldr r5, [r1, #320*3*2] + rb_line_low + stmia r0, {r7,r8} + sub r0, r0, #240*2 + rb_line_hi + stmia r0, {r7,r8} + sub r0, r0, #240*2 + + ldr r2, [r1, #320*0*2+4] + ldr r3, [r1, #320*1*2+4] + ldr r4, [r1, #320*2*2+4] + ldr r5, [r1, #320*3*2+4] + rb_line_low + stmia r0, {r7,r8} + sub r0, r0, #240*2 + rb_line_hi + stmia r0, {r7,r8} + sub r0, r0, #240*2 + + subs lr, lr, #1 add r1, r1, #8 - b vidCpyM2_32_loop_out + bne rotated_blit_loop16 + + ldmfd sp!,{r4-r8,pc} .global spend_cycles @ c @@ -213,4 +345,3 @@ spend_cycles: bx lr -