X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fgp2x%2Fasmutils.s;h=4a2e1a3b99d47623aa2b5d12d9c211adb7ff3756;hb=989672f4f2b4b4b2b373271374e6546546091c10;hp=3db4e303e352270eec9b3a052ea44fd8ec3d6d97;hpb=b547bda76671cf9c99973dd3faad97804949ced3;p=fceu.git diff --git a/drivers/gp2x/asmutils.s b/drivers/gp2x/asmutils.s index 3db4e30..4a2e1a3 100644 --- a/drivers/gp2x/asmutils.s +++ b/drivers/gp2x/asmutils.s @@ -66,6 +66,88 @@ spend_cycles: bx lr +.global soft_scale @ void *dst, unsigned short *pal, int offs, int lines + +soft_scale: + stmfd sp!,{r4-r11,lr} + mov lr, #0xff + mov lr, lr, lsl #1 + mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 + orr r9, r9, #0x00e7 + + mov r11,r3 @ r11= line counter + mov r3, r1 @ r3 = pal base + + mov r12,#320 + mul r2, r12,r2 + add r4, r0, r2, lsl #1 @ r4 = dst_start + add r5, r0, r2 @ r5 = src_start + mul r12,r11,r12 + add r0, r4, r12,lsl #1 @ r0 = dst_end + add r1, r5, r12 @ r1 = src_end + +soft_scale_loop: + sub r1, r1, #64 @ skip borders + mov r2, #256/8 + +soft_scale_loop_line: + ldr r12, [r1, #-8]! + ldr r7, [r1, #4] + + and r4, lr, r12,lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r12,lsr #7 + ldrh r5, [r3, r5] + and r4, r4, r9, lsl #2 + orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0 + and r5, r5, r9, lsl #2 + sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1 + add r4, r4, r6, lsl #16 @ pix_d 0, 1 + and r6, lr, r12,lsr #15 + ldrh r6, [r3, r6] + and r12,lr, r12,lsr #23 + ldrh r12,[r3, r12] + and r6, r6, r9, lsl #2 + add r5, r5, r6 + mov r5, r5, lsr #1 + sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 + orr r5, r5, r6, lsl #16 + + and r6, lr, r7, lsl #1 + ldrh r6, [r3, r6] + and r12,r12,r9, lsl #2 + add r5, r5, r12,lsl #14 @ pix_d 2, 3 + and r6, r6, r9, lsl #2 + orr r6, r12,r6, lsl #16 @ pix_d 4, 5 + + and r12,lr, r7, lsr #7 + ldrh r12,[r3, r12] + and r10,lr, r7, lsr #15 + ldrh r10,[r3, r10] + and r12,r12,r9, lsl #2 + sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1 + add r8, r8, r6, lsr #18 + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] + and r10,r10,r9, lsl #2 + orr r8, r8, r10,lsl #15 + add r8, r8, r12,lsl #15 @ pix_d 6, 7 + sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2 + and r7, r7, r9, lsl #2 + add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3 + orr r10,r10,r7, lsl #16 @ pix_d 8, 9 + + subs r2, r2, #1 + + stmdb r0!, {r4,r5,r6,r8,r10} + bne soft_scale_loop_line + + subs r11,r11,#1 + bne soft_scale_loop + + ldmfd sp!,{r4-r11,lr} + bx lr + /* buggy and slow, probably because function call overhead @ renderer helper, based on bitbank's method