@ to 00000000 rrr00000 ggg00000 bbb00000 ...\r
\r
@ lr = 0x00e000e0, out: r3=lower_pix, r2=higher_pix; trashes rin\r
-@ if sh==2, r8=0x00404040 (sh!=0 destroys flags!)\r
.macro convRGB32_2 rin sh=0\r
and r2, lr, \rin, lsr #4 @ blue\r
and r3, \rin, lr\r
.endif\r
\r
orr r2, r2, r2, lsr #3\r
+.if \sh == 1\r
+ str r2, [r0, #0x40*2*4]\r
+.endif\r
str r2, [r0], #4\r
.endm\r
\r
\r
-.global vidConvCpyRGB32 @ void *to, void *from, int pixels\r
+.global bgr444_to_rgb32 @ void *to, void *from\r
\r
-vidConvCpyRGB32:\r
+bgr444_to_rgb32:\r
stmfd sp!, {r4-r7,lr}\r
\r
- mov r12, r2, lsr #3 @ repeats\r
+ mov r12, #0x40>>3 @ repeats\r
mov lr, #0x00e00000\r
orr lr, lr, #0x00e0\r
\r
convRGB32_2 r5\r
convRGB32_2 r6\r
convRGB32_2 r7\r
-\r
bgt .loopRGB32\r
\r
- ldmfd sp!, {r4-r7,lr}\r
- bx lr\r
+ ldmfd sp!, {r4-r7,pc}\r
\r
\r
-.global vidConvCpyRGB32sh @ void *to, void *from, int pixels\r
+.global bgr444_to_rgb32_sh @ void *to, void *from\r
\r
-vidConvCpyRGB32sh:\r
+bgr444_to_rgb32_sh:\r
stmfd sp!, {r4-r7,lr}\r
\r
- mov r12, r2, lsr #3 @ repeats\r
+ mov r12, #0x40>>3 @ repeats\r
+ add r0, r0, #0x40*4\r
mov lr, #0x00e00000\r
orr lr, lr, #0x00e0\r
\r
convRGB32_2 r5, 1\r
convRGB32_2 r6, 1\r
convRGB32_2 r7, 1\r
-\r
bgt .loopRGB32sh\r
\r
- ldmfd sp!, {r4-r7,lr}\r
- bx lr\r
-\r
-\r
-.global vidConvCpyRGB32hi @ void *to, void *from, int pixels\r
-\r
-vidConvCpyRGB32hi:\r
- stmfd sp!, {r4-r7,lr}\r
-\r
- mov r12, r2, lsr #3 @ repeats\r
- mov lr, #0x00e00000\r
- orr lr, lr, #0x00e0\r
+ mov r12, #0x40>>3 @ repeats\r
+ sub r1, r1, #0x40*2\r
\r
.loopRGB32hi:\r
ldmia r1!, {r4-r7}\r
@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
\r
\r
-@ mode2 blitter for 40 cols\r
-.global vidCpyM2_40col @ void *dest, void *src\r
-\r
-vidCpyM2_40col:\r
+@ mode2 blitter\r
+.global vidcpy_m2 @ void *dest, void *src, int m32col, int with_32c_border\r
+vidcpy_m2:\r
stmfd sp!, {r4-r6,lr}\r
\r
mov r12, #224 @ lines\r
+ add r0, r0, #320*8\r
add r1, r1, #8\r
+ mov lr, #0\r
\r
-vidCpyM2_40_loop_out:\r
+ tst r2, r2\r
+ movne lr, #64\r
+ tstne r3, r3\r
+ addne r0, r0, #32\r
+\r
+vidCpyM2_loop_out:\r
mov r6, #10\r
-vidCpyM2_40_loop:\r
+ sub r6, r6, lr, lsr #5 @ -= 2 in 32col mode\r
+vidCpyM2_loop:\r
subs r6, r6, #1\r
- ldmia r1!, {r2-r5}\r
- stmia r0!, {r2-r5}\r
- ldmia r1!, {r2-r5}\r
- stmia r0!, {r2-r5}\r
- bne vidCpyM2_40_loop\r
+ ldmia r1!, {r2-r5}\r
+ stmia r0!, {r2-r5}\r
+ ldmia r1!, {r2-r5}\r
+ stmia r0!, {r2-r5}\r
+ bne vidCpyM2_loop\r
+\r
subs r12,r12,#1\r
+ add r0, r0, lr\r
add r1, r1, #8\r
- bne vidCpyM2_40_loop_out\r
-\r
- ldmfd sp!, {r4-r6,lr}\r
- bx lr\r
+ add r1, r1, lr\r
+ bne vidCpyM2_loop_out\r
\r
+ ldmfd sp!, {r4-r6,pc}\r
\r
-@ mode2 blitter for 32 cols\r
-.global vidCpyM2_32col @ void *dest, void *src\r
\r
-vidCpyM2_32col:\r
- stmfd sp!, {r4-r6,lr}\r
-\r
- mov r12, #224 @ lines\r
+.global vidcpy_m2_rot @ void *dest, void *src, int m32col, int with_32c_border\r
+vidcpy_m2_rot:\r
+ stmfd sp!,{r4-r8,lr}\r
add r1, r1, #8\r
- add r0, r0, #32\r
-\r
-vidCpyM2_32_loop_out:\r
- mov r6, #8\r
-vidCpyM2_32_loop:\r
- subs r6, r6, #1\r
- ldmia r1!, {r2-r5}\r
- stmia r0!, {r2-r5}\r
- ldmia r1!, {r2-r5}\r
- stmia r0!, {r2-r5}\r
- bne vidCpyM2_32_loop\r
- subs r12,r12,#1\r
- add r0, r0, #64\r
- add r1, r1, #8+64\r
- bne vidCpyM2_32_loop_out\r
-\r
- ldmfd sp!, {r4-r6,lr}\r
- bx lr\r
-\r
-\r
-@ mode2 blitter for 32 cols with no borders\r
-.global vidCpyM2_32col_nobord @ void *dest, void *src\r
+ tst r2, r2\r
+ subne r1, r1, #32 @ adjust\r
+\r
+ mov r4, r0\r
+ mov r5, r1\r
+ mov r6, r2\r
+ mov r7, #8+4\r
+\r
+vidcpy_m2_rot_loop:\r
+ @ a bit lame but oh well..\r
+ mov r0, r4\r
+ mov r1, r5\r
+ mov r2, r7\r
+ mov r3, r6\r
+ mov r8, #328\r
+ adr lr, after_rot_blit8\r
+ stmfd sp!,{r4-r8,lr}\r
+ b rotated_blit8_2\r
+\r
+after_rot_blit8:\r
+ add r5, r5, #328*4\r
+ add r7, r7, #4\r
+ cmp r7, #224+8+4\r
+ ldmgefd sp!,{r4-r8,pc}\r
+ b vidcpy_m2_rot_loop\r
+\r
+\r
+.global rotated_blit8 @ void *dst, void *linesx4, u32 y, int is_32col\r
+rotated_blit8:\r
+ stmfd sp!,{r4-r8,lr}\r
+ mov r8, #320\r
+\r
+rotated_blit8_2:\r
+ add r0, r0, #(240*320)\r
+ sub r0, r0, #(240+4) @ y starts from 4\r
+ add r0, r0, r2\r
+\r
+ tst r3, r3\r
+ subne r0, r0, #(240*32)\r
+ addne r1, r1, #32\r
+ movne lr, #256/4\r
+ moveq lr, #320/4\r
+\r
+rotated_blit_loop8:\r
+ mov r6, r1\r
+ ldr r2, [r6], r8\r
+ ldr r3, [r6], r8\r
+ ldr r4, [r6], r8\r
+ ldr r5, [r6], r8\r
+\r
+ mov r6, r2, lsl #24\r
+ mov r6, r6, lsr #8\r
+ orr r6, r6, r3, lsl #24\r
+ mov r6, r6, lsr #8\r
+ orr r6, r6, r4, lsl #24\r
+ mov r6, r6, lsr #8\r
+ orr r6, r6, r5, lsl #24\r
+ str r6, [r0], #-240\r
+\r
+ and r6, r3, #0xff00\r
+ and r7, r2, #0xff00\r
+ orr r6, r6, r7, lsr #8\r
+ and r7, r4, #0xff00\r
+ orr r6, r6, r7, lsl #8\r
+ and r7, r5, #0xff00\r
+ orr r6, r6, r7, lsl #16\r
+ str r6, [r0], #-240\r
+\r
+ and r6, r4, #0xff0000\r
+ and r7, r2, #0xff0000\r
+ orr r6, r6, r7, lsr #16\r
+ and r7, r3, #0xff0000\r
+ orr r6, r6, r7, lsr #8\r
+ and r7, r5, #0xff0000\r
+ orr r6, r6, r7, lsl #8\r
+ str r6, [r0], #-240\r
+\r
+ mov r6, r5, lsr #24\r
+ mov r6, r6, lsl #8\r
+ orr r6, r6, r4, lsr #24\r
+ mov r6, r6, lsl #8\r
+ orr r6, r6, r3, lsr #24\r
+ mov r6, r6, lsl #8\r
+ orr r6, r6, r2, lsr #24\r
+ str r6, [r0], #-240\r
+\r
+ subs lr, lr, #1\r
+ add r1, r1, #4\r
+ bne rotated_blit_loop8\r
+\r
+ ldmfd sp!,{r4-r8,pc}\r
+\r
+\r
+@ input: r2-r5\r
+@ output: r7,r8\r
+@ trash: r6\r
+.macro rb_line_low\r
+ mov r6, r2, lsl #16\r
+ mov r7, r3, lsl #16\r
+ orr r7, r7, r6, lsr #16\r
+ mov r6, r4, lsl #16\r
+ mov r8, r5, lsl #16\r
+ orr r8, r8, r6, lsr #16\r
+.endm\r
\r
-vidCpyM2_32col_nobord:\r
- stmfd sp!, {r4-r6,lr}\r
+.macro rb_line_hi\r
+ mov r6, r2, lsr #16\r
+ mov r7, r3, lsr #16\r
+ orr r7, r6, r7, lsl #16\r
+ mov r6, r4, lsr #16\r
+ mov r8, r5, lsr #16\r
+ orr r8, r6, r8, lsl #16\r
+.endm\r
\r
- mov r12, #224 @ lines\r
+.global rotated_blit16 @ void *dst, void *linesx4, u32 y, int is_32col\r
+rotated_blit16:\r
+ stmfd sp!,{r4-r8,lr}\r
+\r
+ add r0, r0, #(240*320)*2\r
+ sub r0, r0, #(240+4)*2 @ y starts from 4\r
+ add r0, r0, r2, lsl #1\r
+\r
+ tst r3, r3\r
+ subne r0, r0, #(240*32)*2\r
+ addne r1, r1, #32*2\r
+ movne lr, #256/4\r
+ moveq lr, #320/4\r
+\r
+rotated_blit_loop16:\r
+ ldr r2, [r1, #320*0*2]\r
+ ldr r3, [r1, #320*1*2]\r
+ ldr r4, [r1, #320*2*2]\r
+ ldr r5, [r1, #320*3*2]\r
+ rb_line_low\r
+ stmia r0, {r7,r8}\r
+ sub r0, r0, #240*2\r
+ rb_line_hi\r
+ stmia r0, {r7,r8}\r
+ sub r0, r0, #240*2\r
+\r
+ ldr r2, [r1, #320*0*2+4]\r
+ ldr r3, [r1, #320*1*2+4]\r
+ ldr r4, [r1, #320*2*2+4]\r
+ ldr r5, [r1, #320*3*2+4]\r
+ rb_line_low\r
+ stmia r0, {r7,r8}\r
+ sub r0, r0, #240*2\r
+ rb_line_hi\r
+ stmia r0, {r7,r8}\r
+ sub r0, r0, #240*2\r
+\r
+ subs lr, lr, #1\r
add r1, r1, #8\r
- b vidCpyM2_32_loop_out\r
+ bne rotated_blit_loop16\r
+\r
+ ldmfd sp!,{r4-r8,pc}\r
\r
\r
.global spend_cycles @ c\r
\r
bx lr\r
\r
-\r