X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=common%2Farm_utils.s;h=fd13cac6bf109070b2c7648f9feb844d718761cc;hb=1eb704b6332072581bf7c3bc411babc66f8bda8a;hp=9d516493901b2020c66997b90d9fd35e4621f316;hpb=095a240bd91548be4b9770d28d5d6c3e97637094;p=libpicofe.git diff --git a/common/arm_utils.s b/common/arm_utils.s index 9d51649..fd13cac 100644 --- a/common/arm_utils.s +++ b/common/arm_utils.s @@ -13,7 +13,6 @@ @ to 00000000 rrr00000 ggg00000 bbb00000 ... @ lr = 0x00e000e0, out: r3=lower_pix, r2=higher_pix; trashes rin -@ if sh==2, r8=0x00404040 (sh!=0 destroys flags!) .macro convRGB32_2 rin sh=0 and r2, lr, \rin, lsr #4 @ blue and r3, \rin, lr @@ -59,16 +58,19 @@ .endif orr r2, r2, r2, lsr #3 +.if \sh == 1 + str r2, [r0, #0x40*2*4] +.endif str r2, [r0], #4 .endm -.global vidConvCpyRGB32 @ void *to, void *from, int pixels +.global bgr444_to_rgb32 @ void *to, void *from -vidConvCpyRGB32: +bgr444_to_rgb32: stmfd sp!, {r4-r7,lr} - mov r12, r2, lsr #3 @ repeats + mov r12, #0x40>>3 @ repeats mov lr, #0x00e00000 orr lr, lr, #0x00e0 @@ -80,19 +82,18 @@ vidConvCpyRGB32: convRGB32_2 r5 convRGB32_2 r6 convRGB32_2 r7 - bgt .loopRGB32 - ldmfd sp!, {r4-r7,lr} - bx lr + ldmfd sp!, {r4-r7,pc} -.global vidConvCpyRGB32sh @ void *to, void *from, int pixels +.global bgr444_to_rgb32_sh @ void *to, void *from -vidConvCpyRGB32sh: +bgr444_to_rgb32_sh: stmfd sp!, {r4-r7,lr} - mov r12, r2, lsr #3 @ repeats + mov r12, #0x40>>3 @ repeats + add r0, r0, #0x40*4 mov lr, #0x00e00000 orr lr, lr, #0x00e0 @@ -104,21 +105,10 @@ vidConvCpyRGB32sh: convRGB32_2 r5, 1 convRGB32_2 r6, 1 convRGB32_2 r7, 1 - bgt .loopRGB32sh - ldmfd sp!, {r4-r7,lr} - bx lr - - -.global vidConvCpyRGB32hi @ void *to, void *from, int pixels - -vidConvCpyRGB32hi: - stmfd sp!, {r4-r7,lr} - - mov r12, r2, lsr #3 @ repeats - mov lr, #0x00e00000 - orr lr, lr, #0x00e0 + mov r12, #0x40>>3 @ repeats + sub r1, r1, #0x40*2 .loopRGB32hi: ldmia r1!, {r4-r7} @@ -137,69 +127,201 @@ vidConvCpyRGB32hi: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -@ mode2 blitter for 40 cols -.global vidCpyM2_40col @ void *dest, void *src - -vidCpyM2_40col: +@ mode2 blitter +.global vidcpy_m2 @ void *dest, void *src, int m32col, int with_32c_border +vidcpy_m2: stmfd sp!, {r4-r6,lr} mov r12, #224 @ lines + add r0, r0, #320*8 add r1, r1, #8 + mov lr, #0 -vidCpyM2_40_loop_out: + tst r2, r2 + movne lr, #64 + tstne r3, r3 + addne r0, r0, #32 + +vidCpyM2_loop_out: mov r6, #10 -vidCpyM2_40_loop: + sub r6, r6, lr, lsr #5 @ -= 2 in 32col mode +vidCpyM2_loop: subs r6, r6, #1 - ldmia r1!, {r2-r5} - stmia r0!, {r2-r5} - ldmia r1!, {r2-r5} - stmia r0!, {r2-r5} - bne vidCpyM2_40_loop + ldmia r1!, {r2-r5} + stmia r0!, {r2-r5} + ldmia r1!, {r2-r5} + stmia r0!, {r2-r5} + bne vidCpyM2_loop + subs r12,r12,#1 + add r0, r0, lr add r1, r1, #8 - bne vidCpyM2_40_loop_out - - ldmfd sp!, {r4-r6,lr} - bx lr + add r1, r1, lr + bne vidCpyM2_loop_out + ldmfd sp!, {r4-r6,pc} -@ mode2 blitter for 32 cols -.global vidCpyM2_32col @ void *dest, void *src -vidCpyM2_32col: - stmfd sp!, {r4-r6,lr} - - mov r12, #224 @ lines +.global vidcpy_m2_rot @ void *dest, void *src, int m32col, int with_32c_border +vidcpy_m2_rot: + stmfd sp!,{r4-r8,lr} add r1, r1, #8 - add r0, r0, #32 - -vidCpyM2_32_loop_out: - mov r6, #8 -vidCpyM2_32_loop: - subs r6, r6, #1 - ldmia r1!, {r2-r5} - stmia r0!, {r2-r5} - ldmia r1!, {r2-r5} - stmia r0!, {r2-r5} - bne vidCpyM2_32_loop - subs r12,r12,#1 - add r0, r0, #64 - add r1, r1, #8+64 - bne vidCpyM2_32_loop_out - - ldmfd sp!, {r4-r6,lr} - bx lr - - -@ mode2 blitter for 32 cols with no borders -.global vidCpyM2_32col_nobord @ void *dest, void *src + tst r2, r2 + subne r1, r1, #32 @ adjust + + mov r4, r0 + mov r5, r1 + mov r6, r2 + mov r7, #8+4 + +vidcpy_m2_rot_loop: + @ a bit lame but oh well.. + mov r0, r4 + mov r1, r5 + mov r2, r7 + mov r3, r6 + mov r8, #328 + adr lr, after_rot_blit8 + stmfd sp!,{r4-r8,lr} + b rotated_blit8_2 + +after_rot_blit8: + add r5, r5, #328*4 + add r7, r7, #4 + cmp r7, #224+8+4 + ldmgefd sp!,{r4-r8,pc} + b vidcpy_m2_rot_loop + + +.global rotated_blit8 @ void *dst, void *linesx4, u32 y, int is_32col +rotated_blit8: + stmfd sp!,{r4-r8,lr} + mov r8, #320 + +rotated_blit8_2: + add r0, r0, #(240*320) + sub r0, r0, #(240+4) @ y starts from 4 + add r0, r0, r2 + + tst r3, r3 + subne r0, r0, #(240*32) + addne r1, r1, #32 + movne lr, #256/4 + moveq lr, #320/4 + +rotated_blit_loop8: + mov r6, r1 + ldr r2, [r6], r8 + ldr r3, [r6], r8 + ldr r4, [r6], r8 + ldr r5, [r6], r8 + + mov r6, r2, lsl #24 + mov r6, r6, lsr #8 + orr r6, r6, r3, lsl #24 + mov r6, r6, lsr #8 + orr r6, r6, r4, lsl #24 + mov r6, r6, lsr #8 + orr r6, r6, r5, lsl #24 + str r6, [r0], #-240 + + and r6, r3, #0xff00 + and r7, r2, #0xff00 + orr r6, r6, r7, lsr #8 + and r7, r4, #0xff00 + orr r6, r6, r7, lsl #8 + and r7, r5, #0xff00 + orr r6, r6, r7, lsl #16 + str r6, [r0], #-240 + + and r6, r4, #0xff0000 + and r7, r2, #0xff0000 + orr r6, r6, r7, lsr #16 + and r7, r3, #0xff0000 + orr r6, r6, r7, lsr #8 + and r7, r5, #0xff0000 + orr r6, r6, r7, lsl #8 + str r6, [r0], #-240 + + mov r6, r5, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r4, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r3, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r2, lsr #24 + str r6, [r0], #-240 + + subs lr, lr, #1 + add r1, r1, #4 + bne rotated_blit_loop8 + + ldmfd sp!,{r4-r8,pc} + + +@ input: r2-r5 +@ output: r7,r8 +@ trash: r6 +.macro rb_line_low + mov r6, r2, lsl #16 + mov r7, r3, lsl #16 + orr r7, r7, r6, lsr #16 + mov r6, r4, lsl #16 + mov r8, r5, lsl #16 + orr r8, r8, r6, lsr #16 +.endm -vidCpyM2_32col_nobord: - stmfd sp!, {r4-r6,lr} +.macro rb_line_hi + mov r6, r2, lsr #16 + mov r7, r3, lsr #16 + orr r7, r6, r7, lsl #16 + mov r6, r4, lsr #16 + mov r8, r5, lsr #16 + orr r8, r6, r8, lsl #16 +.endm - mov r12, #224 @ lines +.global rotated_blit16 @ void *dst, void *linesx4, u32 y, int is_32col +rotated_blit16: + stmfd sp!,{r4-r8,lr} + + add r0, r0, #(240*320)*2 + sub r0, r0, #(240+4)*2 @ y starts from 4 + add r0, r0, r2, lsl #1 + + tst r3, r3 + subne r0, r0, #(240*32)*2 + addne r1, r1, #32*2 + movne lr, #256/4 + moveq lr, #320/4 + +rotated_blit_loop16: + ldr r2, [r1, #320*0*2] + ldr r3, [r1, #320*1*2] + ldr r4, [r1, #320*2*2] + ldr r5, [r1, #320*3*2] + rb_line_low + stmia r0, {r7,r8} + sub r0, r0, #240*2 + rb_line_hi + stmia r0, {r7,r8} + sub r0, r0, #240*2 + + ldr r2, [r1, #320*0*2+4] + ldr r3, [r1, #320*1*2+4] + ldr r4, [r1, #320*2*2+4] + ldr r5, [r1, #320*3*2+4] + rb_line_low + stmia r0, {r7,r8} + sub r0, r0, #240*2 + rb_line_hi + stmia r0, {r7,r8} + sub r0, r0, #240*2 + + subs lr, lr, #1 add r1, r1, #8 - b vidCpyM2_32_loop_out + bne rotated_blit_loop16 + + ldmfd sp!,{r4-r8,pc} .global spend_cycles @ c @@ -213,4 +335,3 @@ spend_cycles: bx lr -