| 1 | @ vim:filetype=armasm |
| 2 | |
| 3 | @ input: r2-r5 |
| 4 | @ output: r7,r8 |
| 5 | @ trash: r6 |
| 6 | .macro rb_line_low |
| 7 | mov r6, r2, lsl #16 |
| 8 | mov r7, r3, lsl #16 |
| 9 | orr r7, r7, r6, lsr #16 |
| 10 | mov r6, r4, lsl #16 |
| 11 | mov r8, r5, lsl #16 |
| 12 | orr r8, r8, r6, lsr #16 |
| 13 | .endm |
| 14 | |
| 15 | .macro rb_line_hi |
| 16 | mov r6, r2, lsr #16 |
| 17 | mov r7, r3, lsr #16 |
| 18 | orr r7, r6, r7, lsl #16 |
| 19 | mov r6, r4, lsr #16 |
| 20 | mov r8, r5, lsr #16 |
| 21 | orr r8, r6, r8, lsl #16 |
| 22 | .endm |
| 23 | |
| 24 | .global rotated_blit16 @ void *dst, void *linesx4 |
| 25 | rotated_blit16: |
| 26 | stmfd sp!,{r4-r8,lr} |
| 27 | |
| 28 | sub r0, r0, #240*2 @ adjust |
| 29 | mov lr, #240/4 |
| 30 | |
| 31 | rotated_blit_loop16_o: |
| 32 | orr lr, lr, #((320/4)-1) << 16 |
| 33 | add r0, r0, #(240*320)*2 |
| 34 | |
| 35 | rotated_blit_loop16: |
| 36 | ldr r2, [r1, #320*0*2] |
| 37 | ldr r3, [r1, #320*1*2] |
| 38 | ldr r4, [r1, #320*2*2] |
| 39 | ldr r5, [r1, #320*3*2] |
| 40 | rb_line_low |
| 41 | stmia r0, {r7,r8} |
| 42 | sub r0, r0, #240*2 |
| 43 | rb_line_hi |
| 44 | stmia r0, {r7,r8} |
| 45 | sub r0, r0, #240*2 |
| 46 | |
| 47 | ldr r2, [r1, #320*0*2+4] |
| 48 | ldr r3, [r1, #320*1*2+4] |
| 49 | ldr r4, [r1, #320*2*2+4] |
| 50 | ldr r5, [r1, #320*3*2+4] |
| 51 | rb_line_low |
| 52 | stmia r0, {r7,r8} |
| 53 | sub r0, r0, #240*2 |
| 54 | rb_line_hi |
| 55 | stmia r0, {r7,r8} |
| 56 | sub r0, r0, #240*2 |
| 57 | |
| 58 | subs lr, lr, #1<<16 |
| 59 | add r1, r1, #8 |
| 60 | bpl rotated_blit_loop16 |
| 61 | |
| 62 | add lr, lr, #1<<16 |
| 63 | subs lr, lr, #1 |
| 64 | |
| 65 | add r0, r0, #4*2 |
| 66 | add r1, r1, #(320*3)*2 |
| 67 | bgt rotated_blit_loop16_o |
| 68 | |
| 69 | ldmfd sp!,{r4-r8,pc} |
| 70 | |
| 71 | |
| 72 | .global rotated_blit8 @ void *dst, void *linesx4 |
| 73 | rotated_blit8: |
| 74 | stmfd sp!,{r4-r8,lr} |
| 75 | |
| 76 | mov r8, #320 |
| 77 | sub r0, r0, #240 @ adjust |
| 78 | mov lr, #240/4 |
| 79 | |
| 80 | rotated_blit8_loop_o: |
| 81 | orr lr, lr, #((320/4)-1) << 16 |
| 82 | add r0, r0, #(240*320) |
| 83 | |
| 84 | rotated_blit8_loop: |
| 85 | mov r6, r1 |
| 86 | ldr r2, [r6], r8 |
| 87 | ldr r3, [r6], r8 |
| 88 | ldr r4, [r6], r8 |
| 89 | ldr r5, [r6], r8 |
| 90 | |
| 91 | mov r6, r2, lsl #24 |
| 92 | mov r6, r6, lsr #8 |
| 93 | orr r6, r6, r3, lsl #24 |
| 94 | mov r6, r6, lsr #8 |
| 95 | orr r6, r6, r4, lsl #24 |
| 96 | mov r6, r6, lsr #8 |
| 97 | orr r6, r6, r5, lsl #24 |
| 98 | str r6, [r0], #-240 |
| 99 | |
| 100 | and r6, r3, #0xff00 |
| 101 | and r7, r2, #0xff00 |
| 102 | orr r6, r6, r7, lsr #8 |
| 103 | and r7, r4, #0xff00 |
| 104 | orr r6, r6, r7, lsl #8 |
| 105 | and r7, r5, #0xff00 |
| 106 | orr r6, r6, r7, lsl #16 |
| 107 | str r6, [r0], #-240 |
| 108 | |
| 109 | and r6, r4, #0xff0000 |
| 110 | and r7, r2, #0xff0000 |
| 111 | orr r6, r6, r7, lsr #16 |
| 112 | and r7, r3, #0xff0000 |
| 113 | orr r6, r6, r7, lsr #8 |
| 114 | and r7, r5, #0xff0000 |
| 115 | orr r6, r6, r7, lsl #8 |
| 116 | str r6, [r0], #-240 |
| 117 | |
| 118 | mov r6, r5, lsr #24 |
| 119 | mov r6, r6, lsl #8 |
| 120 | orr r6, r6, r4, lsr #24 |
| 121 | mov r6, r6, lsl #8 |
| 122 | orr r6, r6, r3, lsr #24 |
| 123 | mov r6, r6, lsl #8 |
| 124 | orr r6, r6, r2, lsr #24 |
| 125 | str r6, [r0], #-240 |
| 126 | |
| 127 | subs lr, lr, #1<<16 |
| 128 | add r1, r1, #4 |
| 129 | bpl rotated_blit8_loop |
| 130 | |
| 131 | add lr, lr, #1<<16 |
| 132 | subs lr, lr, #1 |
| 133 | |
| 134 | add r0, r0, #4 |
| 135 | add r1, r1, #320*3 |
| 136 | bgt rotated_blit8_loop_o |
| 137 | |
| 138 | ldmfd sp!,{r4-r8,pc} |
| 139 | |