@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
\r
\r
-@ mode2 blitter for 40 cols\r
-.global vidCpyM2_40col @ void *dest, void *src\r
-\r
-vidCpyM2_40col:\r
+@ mode2 blitter\r
+.global vidcpy_m2 @ void *dest, void *src, int m32col, int with_32c_border\r
+vidcpy_m2:\r
stmfd sp!, {r4-r6,lr}\r
\r
mov r12, #224 @ lines\r
+ add r0, r0, #320*8\r
add r1, r1, #8\r
+ mov lr, #0\r
+\r
+ tst r2, r2\r
+ movne lr, #64\r
+ tstne r3, r3\r
+ addne r0, r0, #32\r
\r
-vidCpyM2_40_loop_out:\r
+vidCpyM2_loop_out:\r
mov r6, #10\r
-vidCpyM2_40_loop:\r
+ sub r6, r6, lr, lsr #5 @ -= 2 in 32col mode\r
+vidCpyM2_loop:\r
subs r6, r6, #1\r
- ldmia r1!, {r2-r5}\r
- stmia r0!, {r2-r5}\r
- ldmia r1!, {r2-r5}\r
- stmia r0!, {r2-r5}\r
- bne vidCpyM2_40_loop\r
+ ldmia r1!, {r2-r5}\r
+ stmia r0!, {r2-r5}\r
+ ldmia r1!, {r2-r5}\r
+ stmia r0!, {r2-r5}\r
+ bne vidCpyM2_loop\r
+\r
subs r12,r12,#1\r
+ add r0, r0, lr\r
add r1, r1, #8\r
- bne vidCpyM2_40_loop_out\r
-\r
- ldmfd sp!, {r4-r6,lr}\r
- bx lr\r
+ add r1, r1, lr\r
+ bne vidCpyM2_loop_out\r
\r
+ ldmfd sp!, {r4-r6,pc}\r
\r
-@ mode2 blitter for 32 cols\r
-.global vidCpyM2_32col @ void *dest, void *src\r
\r
-vidCpyM2_32col:\r
- stmfd sp!, {r4-r6,lr}\r
-\r
- mov r12, #224 @ lines\r
+.global vidcpy_m2_rot @ void *dest, void *src, int m32col, int with_32c_border\r
+vidcpy_m2_rot:\r
+ stmfd sp!,{r4-r8,lr}\r
add r1, r1, #8\r
- add r0, r0, #32\r
-\r
-vidCpyM2_32_loop_out:\r
- mov r6, #8\r
-vidCpyM2_32_loop:\r
- subs r6, r6, #1\r
- ldmia r1!, {r2-r5}\r
- stmia r0!, {r2-r5}\r
- ldmia r1!, {r2-r5}\r
- stmia r0!, {r2-r5}\r
- bne vidCpyM2_32_loop\r
- subs r12,r12,#1\r
- add r0, r0, #64\r
- add r1, r1, #8+64\r
- bne vidCpyM2_32_loop_out\r
-\r
- ldmfd sp!, {r4-r6,lr}\r
- bx lr\r
-\r
-\r
-@ mode2 blitter for 32 cols with no borders\r
-.global vidCpyM2_32col_nobord @ void *dest, void *src\r
+ tst r2, r2\r
+ subne r1, r1, #32 @ adjust\r
+\r
+ mov r4, r0\r
+ mov r5, r1\r
+ mov r6, r2\r
+ mov r7, #8+4\r
+\r
+vidcpy_m2_rot_loop:\r
+ @ a bit lame but oh well..\r
+ mov r0, r4\r
+ mov r1, r5\r
+ mov r2, r7\r
+ mov r3, r6\r
+ mov r8, #328\r
+ adr lr, after_rot_blit8\r
+ stmfd sp!,{r4-r8,lr}\r
+ b rotated_blit8_2\r
+\r
+after_rot_blit8:\r
+ add r5, r5, #328*4\r
+ add r7, r7, #4\r
+ cmp r7, #224+8+4\r
+ ldmgefd sp!,{r4-r8,pc}\r
+ b vidcpy_m2_rot_loop\r
+\r
+\r
+.global rotated_blit8 @ void *dst, void *linesx4, u32 y, int is_32col\r
+rotated_blit8:\r
+ stmfd sp!,{r4-r8,lr}\r
+ mov r8, #320\r
+\r
+rotated_blit8_2:\r
+ add r0, r0, #(240*320)\r
+ sub r0, r0, #(240+4) @ y starts from 4\r
+ add r0, r0, r2\r
+\r
+ tst r3, r3\r
+ subne r0, r0, #(240*32)\r
+ addne r1, r1, #32\r
+ movne lr, #256/4\r
+ moveq lr, #320/4\r
+\r
+rotated_blit_loop8:\r
+ mov r6, r1\r
+ ldr r2, [r6], r8\r
+ ldr r3, [r6], r8\r
+ ldr r4, [r6], r8\r
+ ldr r5, [r6], r8\r
+\r
+ mov r6, r2, lsl #24\r
+ mov r6, r6, lsr #8\r
+ orr r6, r6, r3, lsl #24\r
+ mov r6, r6, lsr #8\r
+ orr r6, r6, r4, lsl #24\r
+ mov r6, r6, lsr #8\r
+ orr r6, r6, r5, lsl #24\r
+ str r6, [r0], #-240\r
+\r
+ and r6, r3, #0xff00\r
+ and r7, r2, #0xff00\r
+ orr r6, r6, r7, lsr #8\r
+ and r7, r4, #0xff00\r
+ orr r6, r6, r7, lsl #8\r
+ and r7, r5, #0xff00\r
+ orr r6, r6, r7, lsl #16\r
+ str r6, [r0], #-240\r
+\r
+ and r6, r4, #0xff0000\r
+ and r7, r2, #0xff0000\r
+ orr r6, r6, r7, lsr #16\r
+ and r7, r3, #0xff0000\r
+ orr r6, r6, r7, lsr #8\r
+ and r7, r5, #0xff0000\r
+ orr r6, r6, r7, lsl #8\r
+ str r6, [r0], #-240\r
+\r
+ mov r6, r5, lsr #24\r
+ mov r6, r6, lsl #8\r
+ orr r6, r6, r4, lsr #24\r
+ mov r6, r6, lsl #8\r
+ orr r6, r6, r3, lsr #24\r
+ mov r6, r6, lsl #8\r
+ orr r6, r6, r2, lsr #24\r
+ str r6, [r0], #-240\r
+\r
+ subs lr, lr, #1\r
+ add r1, r1, #4\r
+ bne rotated_blit_loop8\r
+\r
+ ldmfd sp!,{r4-r8,pc}\r
+\r
+\r
+@ input: r2-r5\r
+@ output: r7,r8\r
+@ trash: r6\r
+.macro rb_line_low\r
+ mov r6, r2, lsl #16\r
+ mov r7, r3, lsl #16\r
+ orr r7, r7, r6, lsr #16\r
+ mov r6, r4, lsl #16\r
+ mov r8, r5, lsl #16\r
+ orr r8, r8, r6, lsr #16\r
+.endm\r
\r
-vidCpyM2_32col_nobord:\r
- stmfd sp!, {r4-r6,lr}\r
+.macro rb_line_hi\r
+ mov r6, r2, lsr #16\r
+ mov r7, r3, lsr #16\r
+ orr r7, r6, r7, lsl #16\r
+ mov r6, r4, lsr #16\r
+ mov r8, r5, lsr #16\r
+ orr r8, r6, r8, lsl #16\r
+.endm\r
\r
- mov r12, #224 @ lines\r
+.global rotated_blit16 @ void *dst, void *linesx4, u32 y, int is_32col\r
+rotated_blit16:\r
+ stmfd sp!,{r4-r8,lr}\r
+\r
+ add r0, r0, #(240*320)*2\r
+ sub r0, r0, #(240+4)*2 @ y starts from 4\r
+ add r0, r0, r2, lsl #1\r
+\r
+ tst r3, r3\r
+ subne r0, r0, #(240*32)*2\r
+ addne r1, r1, #32*2\r
+ movne lr, #256/4\r
+ moveq lr, #320/4\r
+\r
+rotated_blit_loop16:\r
+ ldr r2, [r1, #320*0*2]\r
+ ldr r3, [r1, #320*1*2]\r
+ ldr r4, [r1, #320*2*2]\r
+ ldr r5, [r1, #320*3*2]\r
+ rb_line_low\r
+ stmia r0, {r7,r8}\r
+ sub r0, r0, #240*2\r
+ rb_line_hi\r
+ stmia r0, {r7,r8}\r
+ sub r0, r0, #240*2\r
+\r
+ ldr r2, [r1, #320*0*2+4]\r
+ ldr r3, [r1, #320*1*2+4]\r
+ ldr r4, [r1, #320*2*2+4]\r
+ ldr r5, [r1, #320*3*2+4]\r
+ rb_line_low\r
+ stmia r0, {r7,r8}\r
+ sub r0, r0, #240*2\r
+ rb_line_hi\r
+ stmia r0, {r7,r8}\r
+ sub r0, r0, #240*2\r
+\r
+ subs lr, lr, #1\r
add r1, r1, #8\r
- b vidCpyM2_32_loop_out\r
+ bne rotated_blit_loop16\r
+\r
+ ldmfd sp!,{r4-r8,pc}\r
\r
\r
.global spend_cycles @ c\r
\r
bx lr\r
\r
-\r