drivers/gp2x/asmutils.s

   1 @ vim:filetype=armasm
   2
   3 @ test
   4 .global flushcache @ beginning_addr, end_addr, flags
   5
   6 flushcache:
   7     swi #0x9f0002
   8     mov pc, lr
   9
  10
  11 .global block_or @ void *src, size_t n, int pat
  12
  13 block_or:
  14     stmfd   sp!, {r4-r5}
  15     orr     r2, r2, r2, lsl #8
  16     orr     r2, r2, r2, lsl #16
  17     mov     r1, r1, lsr #4
  18 block_loop_or:
  19     ldmia   r0, {r3-r5,r12}
  20     subs    r1, r1, #1
  21     orr     r3, r3, r2
  22     orr     r4, r4, r2
  23     orr     r5, r5, r2
  24     orr     r12,r12,r2
  25     stmia   r0!, {r3-r5,r12}
  26     bne     block_loop_or
  27     ldmfd   sp!, {r4-r5}
  28     bx      lr
  29
  30
  31 .global block_and @ void *src, size_t n, int andpat
  32
  33 block_and:
  34     stmfd   sp!, {r4-r5}
  35     orr     r2, r2, r2, lsl #8
  36     orr     r2, r2, r2, lsl #16
  37     mov     r1, r1, lsr #4
  38 block_loop_and:
  39     ldmia   r0, {r3-r5,r12}
  40     subs    r1, r1, #1
  41     and     r3, r3, r2
  42     and     r4, r4, r2
  43     and     r5, r5, r2
  44     and     r12,r12,r2
  45     stmia   r0!, {r3-r5,r12}
  46     bne     block_loop_and
  47     ldmfd   sp!, {r4-r5}
  48     bx      lr
  49
  50
  51 .global block_andor @ void *src, size_t n, int andpat, int orpat
  52
  53 block_andor:
  54     stmfd   sp!, {r4-r6}
  55     orr     r2, r2, r2, lsl #8
  56     orr     r2, r2, r2, lsl #16
  57     orr     r3, r3, r3, lsl #8
  58     orr     r3, r3, r3, lsl #16
  59     mov     r1, r1, lsr #4
  60 block_loop_andor:
  61     ldmia   r0, {r4-r6,r12}
  62     subs    r1, r1, #1
  63     and     r4, r4, r2
  64     orr     r4, r4, r3
  65     and     r5, r5, r2
  66     orr     r5, r5, r3
  67     and     r6, r6, r2
  68     orr     r6, r6, r3
  69     and     r12,r12,r2
  70     orr     r12,r12,r3
  71     stmia   r0!, {r4-r6,r12}
  72     bne     block_loop_andor
  73     ldmfd   sp!, {r4-r6}
  74     bx      lr
  75
  76
  77 .global spend_cycles @ c
  78
  79 spend_cycles:
  80     mov     r0, r0, lsr #2  @ 4 cycles/iteration
  81     sub     r0, r0, #2      @ entry/exit/init
  82 .sc_loop:
  83     subs    r0, r0, #1
  84     bpl     .sc_loop
  85
  86     bx      lr
  87
  88
  89 .global soft_scale @ void *dst, unsigned short *pal, int offs, int lines
  90
  91 soft_scale:
  92     stmfd   sp!,{r4-r11,lr}
  93     mov     lr, #0xff
  94     mov     lr, lr, lsl #1
  95     mov     r9, #0x3900        @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
  96     orr     r9, r9, #0x00e7
  97
  98     mov     r11,r3             @ r11= line counter
  99     mov     r3, r1             @ r3 = pal base
 100
 101     mov     r12,#320
 102     mul     r2, r12,r2
 103     add     r4, r0, r2, lsl #1 @ r4 = dst_start
 104     add     r5, r0, r2         @ r5 = src_start
 105     mul     r12,r11,r12
 106     add     r0, r4, r12,lsl #1 @ r0 = dst_end
 107     add     r1, r5, r12        @ r1 = src_end
 108
 109 soft_scale_loop:
 110     sub     r1, r1, #64        @ skip borders
 111     mov     r2, #256/8
 112
 113 soft_scale_loop_line:
 114     ldr     r12, [r1, #-8]!
 115     ldr     r7,  [r1, #4]
 116
 117     and     r4, lr, r12,lsl #1
 118     ldrh    r4, [r3, r4]
 119     and     r5, lr, r12,lsr #7
 120     ldrh    r5, [r3, r5]
 121     and     r4, r4, r9, lsl #2
 122     orr     r4, r4, r4, lsl #14       @ r4[31:16] = 1/4 pix_s 0
 123     and     r5, r5, r9, lsl #2
 124     sub     r6, r5, r5, lsr #2        @ r6 = 3/4 pix_s 1
 125     add     r4, r4, r6, lsl #16       @ pix_d 0, 1
 126     and     r6, lr, r12,lsr #15
 127     ldrh    r6, [r3, r6]
 128     and     r12,lr, r12,lsr #23
 129     ldrh    r12,[r3, r12]
 130     and     r6, r6, r9, lsl #2
 131     add     r5, r5, r6
 132     mov     r5, r5, lsr #1
 133     sub     r6, r6, r6, lsr #2        @ r6 = 3/4 pix_s 2
 134     orr     r5, r5, r6, lsl #16
 135
 136     and     r6, lr, r7, lsl #1
 137     ldrh    r6, [r3, r6]
 138     and     r12,r12,r9, lsl #2
 139     add     r5, r5, r12,lsl #14       @ pix_d 2, 3
 140     and     r6, r6, r9, lsl #2
 141     orr     r6, r12,r6, lsl #16       @ pix_d 4, 5
 142
 143     and     r12,lr, r7, lsr #7
 144     ldrh    r12,[r3, r12]
 145     and     r10,lr, r7, lsr #15
 146     ldrh    r10,[r3, r10]
 147     and     r12,r12,r9, lsl #2
 148     sub     r8, r12,r12,lsr #2        @ r8 = 3/4 pix_s 1
 149     add     r8, r8, r6, lsr #18
 150     and     r7, lr, r7, lsr #23
 151     ldrh    r7, [r3, r7]
 152     and     r10,r10,r9, lsl #2
 153     orr     r8, r8, r10,lsl #15
 154     add     r8, r8, r12,lsl #15       @ pix_d 6, 7
 155     sub     r10,r10,r10,lsr #2        @ r10= 3/4 pix_s 2
 156     and     r7, r7, r9, lsl #2
 157     add     r10,r10,r7, lsr #2        @ += 1/4 pix_s 3
 158     orr     r10,r10,r7, lsl #16       @ pix_d 8, 9
 159
 160     subs    r2, r2, #1
 161
 162     stmdb   r0!, {r4,r5,r6,r8,r10}
 163     bne     soft_scale_loop_line
 164
 165     subs    r11,r11,#1
 166     bne     soft_scale_loop
 167
 168     ldmfd   sp!,{r4-r11,lr}
 169     bx      lr
 170
 171
 172 /* buggy and slow, probably because function call overhead
 173 @ renderer helper, based on bitbank's method
 174 .global draw8pix @ uint8 *P, uint8 *C, uint8 *PALRAM @ dest, src, pal
 175
 176 draw8pix:
 177     stmfd sp!, {r4,r5}
 178
 179     ldrb  r3, [r1]            @ get bit 0 pixels
 180     mov   r12,#1
 181     orr   r12,r12,r12,lsl #8
 182     orr   r12,r12,r12,lsl #16
 183     ldrb  r1, [r1, #8]        @ get bit 1 pixels
 184     orr   r3, r3, r3, lsl #9  @ shift them over 1 byte + 1 bit
 185     orr   r3, r3, r3, lsl #18 @ now 4 pixels take up 4 bytes
 186     and   r4, r12,r3, lsr #7  @ mask off the upper nibble pixels we want
 187     and   r5, r12,r3, lsr #3  @ mask off the lower nibble pixels we want
 188     ldr   r2, [r2]
 189
 190     orr   r1, r1, r1, lsl #9  @ process the bit 1 pixels
 191     orr   r1, r1, r1, lsl #18
 192     and   r3, r12,r1, lsr #7  @ mask off the upper nibble pixels we want
 193     and   r1, r12,r1, lsr #3  @ mask off the lower nibble
 194     orr   r4, r4, r3, lsl #1
 195     orr   r5, r5, r1, lsl #5
 196
 197     @ can this be avoided?
 198     mov   r4, r4, lsl #3      @ *8
 199     mov   r3, r2, ror r4
 200     strb  r3, [r0], #1
 201     mov   r4, r4, lsr #8
 202     mov   r3, r2, ror r4
 203     strb  r3, [r0], #1
 204     mov   r4, r4, lsr #8
 205     mov   r3, r2, ror r4
 206     strb  r3, [r0], #1
 207     mov   r4, r4, lsr #8
 208     mov   r3, r2, ror r4
 209     strb  r3, [r0], #1
 210
 211     mov   r5, r5, lsl #3      @ *8
 212     mov   r3, r2, ror r5
 213     strb  r3, [r0], #1
 214     mov   r5, r5, lsr #8
 215     mov   r3, r2, ror r5
 216     strb  r3, [r0], #1
 217     mov   r5, r5, lsr #8
 218     mov   r3, r2, ror r5
 219     strb  r3, [r0], #1
 220     mov   r5, r5, lsr #8
 221     mov   r3, r2, ror r5
 222     strb  r3, [r0], #1
 223
 224     ldmfd sp!, {r4,r5}
 225     bx    lr
 226 */
 227