X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fgp2x%2Fasmutils.s;h=3db4e303e352270eec9b3a052ea44fd8ec3d6d97;hb=b2b95d2e0d1fd5e52d03c2152605b09b024c1d0e;hp=c87a3ab3327da3244f5b4a97c856618531100513;hpb=937bf65b1c80e9394547e5f105664bd26f3671de;p=fceu.git diff --git a/drivers/gp2x/asmutils.s b/drivers/gp2x/asmutils.s index c87a3ab..3db4e30 100644 --- a/drivers/gp2x/asmutils.s +++ b/drivers/gp2x/asmutils.s @@ -7,3 +7,119 @@ flushcache: swi #0x9f0002 mov pc, lr + +.global block_or @ void *src, size_t n, int pat + +block_or: + stmfd sp!, {r4-r5} + orr r2, r2, r2, lsl #8 + orr r2, r2, r2, lsl #16 + mov r1, r1, lsr #4 +block_loop_or: + ldmia r0, {r3-r5,r12} + subs r1, r1, #1 + orr r3, r3, r2 + orr r4, r4, r2 + orr r5, r5, r2 + orr r12,r12,r2 + stmia r0!, {r3-r5,r12} + bne block_loop_or + ldmfd sp!, {r4-r5} + bx lr + + +.global block_andor @ void *src, size_t n, int andpat, int orpat + +block_andor: + stmfd sp!, {r4-r6} + orr r2, r2, r2, lsl #8 + orr r2, r2, r2, lsl #16 + orr r3, r3, r3, lsl #8 + orr r3, r3, r3, lsl #16 + mov r1, r1, lsr #4 +block_loop_andor: + ldmia r0, {r4-r6,r12} + subs r1, r1, #1 + and r4, r4, r2 + orr r4, r4, r3 + and r5, r5, r2 + orr r5, r5, r3 + and r6, r6, r2 + orr r6, r6, r3 + and r12,r12,r2 + orr r12,r12,r3 + stmia r0!, {r4-r6,r12} + bne block_loop_andor + ldmfd sp!, {r4-r6} + bx lr + + +.global spend_cycles @ c + +spend_cycles: + mov r0, r0, lsr #2 @ 4 cycles/iteration + sub r0, r0, #2 @ entry/exit/init +.sc_loop: + subs r0, r0, #1 + bpl .sc_loop + + bx lr + + + +/* buggy and slow, probably because function call overhead +@ renderer helper, based on bitbank's method +.global draw8pix @ uint8 *P, uint8 *C, uint8 *PALRAM @ dest, src, pal + +draw8pix: + stmfd sp!, {r4,r5} + + ldrb r3, [r1] @ get bit 0 pixels + mov r12,#1 + orr r12,r12,r12,lsl #8 + orr r12,r12,r12,lsl #16 + ldrb r1, [r1, #8] @ get bit 1 pixels + orr r3, r3, r3, lsl #9 @ shift them over 1 byte + 1 bit + orr r3, r3, r3, lsl #18 @ now 4 pixels take up 4 bytes + and r4, r12,r3, lsr #7 @ mask off the upper nibble pixels we want + and r5, r12,r3, lsr #3 @ mask off the lower nibble pixels we want + ldr r2, [r2] + + orr r1, r1, r1, lsl #9 @ process the bit 1 pixels + orr r1, r1, r1, lsl #18 + and r3, r12,r1, lsr #7 @ mask off the upper nibble pixels we want + and r1, r12,r1, lsr #3 @ mask off the lower nibble + orr r4, r4, r3, lsl #1 + orr r5, r5, r1, lsl #5 + + @ can this be avoided? + mov r4, r4, lsl #3 @ *8 + mov r3, r2, ror r4 + strb r3, [r0], #1 + mov r4, r4, lsr #8 + mov r3, r2, ror r4 + strb r3, [r0], #1 + mov r4, r4, lsr #8 + mov r3, r2, ror r4 + strb r3, [r0], #1 + mov r4, r4, lsr #8 + mov r3, r2, ror r4 + strb r3, [r0], #1 + + mov r5, r5, lsl #3 @ *8 + mov r3, r2, ror r5 + strb r3, [r0], #1 + mov r5, r5, lsr #8 + mov r3, r2, ror r5 + strb r3, [r0], #1 + mov r5, r5, lsr #8 + mov r3, r2, ror r5 + strb r3, [r0], #1 + mov r5, r5, lsr #8 + mov r3, r2, ror r5 + strb r3, [r0], #1 + + ldmfd sp!, {r4,r5} + bx lr +*/ +