-/* buggy and slow, probably because function call overhead
-@ renderer helper, based on bitbank's method
-.global draw8pix @ uint8 *P, uint8 *C, uint8 *PALRAM @ dest, src, pal
-
-draw8pix:
- stmfd sp!, {r4,r5}
-
- ldrb r3, [r1] @ get bit 0 pixels
- mov r12,#1
- orr r12,r12,r12,lsl #8
- orr r12,r12,r12,lsl #16
- ldrb r1, [r1, #8] @ get bit 1 pixels
- orr r3, r3, r3, lsl #9 @ shift them over 1 byte + 1 bit
- orr r3, r3, r3, lsl #18 @ now 4 pixels take up 4 bytes
- and r4, r12,r3, lsr #7 @ mask off the upper nibble pixels we want
- and r5, r12,r3, lsr #3 @ mask off the lower nibble pixels we want
- ldr r2, [r2]
-
- orr r1, r1, r1, lsl #9 @ process the bit 1 pixels
- orr r1, r1, r1, lsl #18
- and r3, r12,r1, lsr #7 @ mask off the upper nibble pixels we want
- and r1, r12,r1, lsr #3 @ mask off the lower nibble
- orr r4, r4, r3, lsl #1
- orr r5, r5, r1, lsl #5
-
- @ can this be avoided?
- mov r4, r4, lsl #3 @ *8
- mov r3, r2, ror r4
- strb r3, [r0], #1
- mov r4, r4, lsr #8
- mov r3, r2, ror r4
- strb r3, [r0], #1
- mov r4, r4, lsr #8
- mov r3, r2, ror r4
- strb r3, [r0], #1
- mov r4, r4, lsr #8
- mov r3, r2, ror r4
- strb r3, [r0], #1
-
- mov r5, r5, lsl #3 @ *8
- mov r3, r2, ror r5
- strb r3, [r0], #1
- mov r5, r5, lsr #8
- mov r3, r2, ror r5
- strb r3, [r0], #1
- mov r5, r5, lsr #8
- mov r3, r2, ror r5
- strb r3, [r0], #1
- mov r5, r5, lsr #8
- mov r3, r2, ror r5
- strb r3, [r0], #1
-
- ldmfd sp!, {r4,r5}
- bx lr
-*/
+@ void convert2RGB555(unsigned short *dst, unsigned char *src, unsigned short *pal, int count);
+
+.global convert2RGB555
+
+convert2RGB555:
+ stmfd sp!,{r4-r8,lr}
+ mov lr, #0xff
+ mov lr, lr, lsl #1
+
+ mov r3, r3, lsr #3
+
+convert2RGB555_loop:
+ ldmia r1!,{r4,r5}
+
+ and r6, lr, r4, lsl #1
+ ldrh r6, [r2, r6]
+ and r7, lr, r4, lsr #7
+ ldrh r7, [r2, r7]
+ and r8, lr, r4, lsr #15
+ ldrh r8, [r2, r8]
+ and r4, lr, r4, lsr #23
+ ldrh r4, [r2, r4]
+
+ orr r6, r6, r7, lsl #16
+ and r12,lr, r5, lsl #1
+ ldrh r12, [r2, r12]
+ orr r7, r8, r4, lsl #16
+ and r8, lr, r5, lsr #7
+ ldrh r8, [r2, r8]
+ and r4, lr, r5, lsr #15
+ ldrh r4, [r2, r4]
+ and r5, lr, r5, lsr #23
+ ldrh r5, [r2, r5]
+ orr r8, r12,r8, lsl #16
+ orr r12,r4, r5, lsl #16
+
+ stmia r0!,{r6,r7,r8,r12}
+ subs r3, r3, #1
+ bne convert2RGB555_loop
+
+ ldmfd sp!,{r4-r8,lr}
+ bx lr