drivers/gp2x/asmutils.s

   1 @ vim:filetype=armasm
   2
   3 @ test
   4 .global flushcache @ beginning_addr, end_addr, flags
   5
   6 flushcache:
   7     swi #0x9f0002
   8     mov pc, lr
   9
  10
  11 .global block_or @ void *src, size_t n, int pat
  12
  13 block_or:
  14     stmfd   sp!, {r4-r5}
  15     orr     r2, r2, r2, lsl #8
  16     orr     r2, r2, r2, lsl #16
  17     mov     r1, r1, lsr #4
  18 block_loop_or:
  19     ldmia   r0, {r3-r5,r12}
  20     subs    r1, r1, #1
  21     orr     r3, r3, r2
  22     orr     r4, r4, r2
  23     orr     r5, r5, r2
  24     orr     r12,r12,r2
  25     stmia   r0!, {r3-r5,r12}
  26     bne     block_loop_or
  27     ldmfd   sp!, {r4-r5}
  28     bx      lr
  29
  30
  31 .global block_andor @ void *src, size_t n, int andpat, int orpat
  32
  33 block_andor:
  34     stmfd   sp!, {r4-r6}
  35     orr     r2, r2, r2, lsl #8
  36     orr     r2, r2, r2, lsl #16
  37     orr     r3, r3, r3, lsl #8
  38     orr     r3, r3, r3, lsl #16
  39     mov     r1, r1, lsr #4
  40 block_loop_andor:
  41     ldmia   r0, {r4-r6,r12}
  42     subs    r1, r1, #1
  43     and     r4, r4, r2
  44     orr     r4, r4, r3
  45     and     r5, r5, r2
  46     orr     r5, r5, r3
  47     and     r6, r6, r2
  48     orr     r6, r6, r3
  49     and     r12,r12,r2
  50     orr     r12,r12,r3
  51     stmia   r0!, {r4-r6,r12}
  52     bne     block_loop_andor
  53     ldmfd   sp!, {r4-r6}
  54     bx      lr
  55
  56
  57 .global spend_cycles @ c
  58
  59 spend_cycles:
  60     mov     r0, r0, lsr #2  @ 4 cycles/iteration
  61     sub     r0, r0, #2      @ entry/exit/init
  62 .sc_loop:
  63     subs    r0, r0, #1
  64     bpl     .sc_loop
  65
  66     bx      lr
  67
  68
  69 .global soft_scale @ void *dst, unsigned short *pal, int offs, int lines
  70
  71 soft_scale:
  72     stmfd   sp!,{r4-r11,lr}
  73     mov     lr, #0xff
  74     mov     lr, lr, lsl #1
  75     mov     r9, #0x3900        @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
  76     orr     r9, r9, #0x00e7
  77
  78     mov     r11,r3             @ r11= line counter
  79     mov     r3, r1             @ r3 = pal base
  80
  81     mov     r12,#320
  82     mul     r2, r12,r2
  83     add     r4, r0, r2, lsl #1 @ r4 = dst_start
  84     add     r5, r0, r2         @ r5 = src_start
  85     mul     r12,r11,r12
  86     add     r0, r4, r12,lsl #1 @ r0 = dst_end
  87     add     r1, r5, r12        @ r1 = src_end
  88
  89 soft_scale_loop:
  90     sub     r1, r1, #64        @ skip borders
  91     mov     r2, #256/8
  92
  93 soft_scale_loop_line:
  94     ldr     r12, [r1, #-8]!
  95     ldr     r7,  [r1, #4]
  96
  97     and     r4, lr, r12,lsl #1
  98     ldrh    r4, [r3, r4]
  99     and     r5, lr, r12,lsr #7
 100     ldrh    r5, [r3, r5]
 101     and     r4, r4, r9, lsl #2
 102     orr     r4, r4, r4, lsl #14       @ r4[31:16] = 1/4 pix_s 0
 103     and     r5, r5, r9, lsl #2
 104     sub     r6, r5, r5, lsr #2        @ r6 = 3/4 pix_s 1
 105     add     r4, r4, r6, lsl #16       @ pix_d 0, 1
 106     and     r6, lr, r12,lsr #15
 107     ldrh    r6, [r3, r6]
 108     and     r12,lr, r12,lsr #23
 109     ldrh    r12,[r3, r12]
 110     and     r6, r6, r9, lsl #2
 111     add     r5, r5, r6
 112     mov     r5, r5, lsr #1
 113     sub     r6, r6, r6, lsr #2        @ r6 = 3/4 pix_s 2
 114     orr     r5, r5, r6, lsl #16
 115
 116     and     r6, lr, r7, lsl #1
 117     ldrh    r6, [r3, r6]
 118     and     r12,r12,r9, lsl #2
 119     add     r5, r5, r12,lsl #14       @ pix_d 2, 3
 120     and     r6, r6, r9, lsl #2
 121     orr     r6, r12,r6, lsl #16       @ pix_d 4, 5
 122
 123     and     r12,lr, r7, lsr #7
 124     ldrh    r12,[r3, r12]
 125     and     r10,lr, r7, lsr #15
 126     ldrh    r10,[r3, r10]
 127     and     r12,r12,r9, lsl #2
 128     sub     r8, r12,r12,lsr #2        @ r8 = 3/4 pix_s 1
 129     add     r8, r8, r6, lsr #18
 130     and     r7, lr, r7, lsr #23
 131     ldrh    r7, [r3, r7]
 132     and     r10,r10,r9, lsl #2
 133     orr     r8, r8, r10,lsl #15
 134     add     r8, r8, r12,lsl #15       @ pix_d 6, 7
 135     sub     r10,r10,r10,lsr #2        @ r10= 3/4 pix_s 2
 136     and     r7, r7, r9, lsl #2
 137     add     r10,r10,r7, lsr #2        @ += 1/4 pix_s 3
 138     orr     r10,r10,r7, lsl #16       @ pix_d 8, 9
 139
 140     subs    r2, r2, #1
 141
 142     stmdb   r0!, {r4,r5,r6,r8,r10}
 143     bne     soft_scale_loop_line
 144
 145     subs    r11,r11,#1
 146     bne     soft_scale_loop
 147
 148     ldmfd   sp!,{r4-r11,lr}
 149     bx      lr
 150
 151
 152 /* buggy and slow, probably because function call overhead
 153 @ renderer helper, based on bitbank's method
 154 .global draw8pix @ uint8 *P, uint8 *C, uint8 *PALRAM @ dest, src, pal
 155
 156 draw8pix:
 157     stmfd sp!, {r4,r5}
 158
 159     ldrb  r3, [r1]            @ get bit 0 pixels
 160     mov   r12,#1
 161     orr   r12,r12,r12,lsl #8
 162     orr   r12,r12,r12,lsl #16
 163     ldrb  r1, [r1, #8]        @ get bit 1 pixels
 164     orr   r3, r3, r3, lsl #9  @ shift them over 1 byte + 1 bit
 165     orr   r3, r3, r3, lsl #18 @ now 4 pixels take up 4 bytes
 166     and   r4, r12,r3, lsr #7  @ mask off the upper nibble pixels we want
 167     and   r5, r12,r3, lsr #3  @ mask off the lower nibble pixels we want
 168     ldr   r2, [r2]
 169
 170     orr   r1, r1, r1, lsl #9  @ process the bit 1 pixels
 171     orr   r1, r1, r1, lsl #18
 172     and   r3, r12,r1, lsr #7  @ mask off the upper nibble pixels we want
 173     and   r1, r12,r1, lsr #3  @ mask off the lower nibble
 174     orr   r4, r4, r3, lsl #1
 175     orr   r5, r5, r1, lsl #5
 176
 177     @ can this be avoided?
 178     mov   r4, r4, lsl #3      @ *8
 179     mov   r3, r2, ror r4
 180     strb  r3, [r0], #1
 181     mov   r4, r4, lsr #8
 182     mov   r3, r2, ror r4
 183     strb  r3, [r0], #1
 184     mov   r4, r4, lsr #8
 185     mov   r3, r2, ror r4
 186     strb  r3, [r0], #1
 187     mov   r4, r4, lsr #8
 188     mov   r3, r2, ror r4
 189     strb  r3, [r0], #1
 190
 191     mov   r5, r5, lsl #3      @ *8
 192     mov   r3, r2, ror r5
 193     strb  r3, [r0], #1
 194     mov   r5, r5, lsr #8
 195     mov   r3, r2, ror r5
 196     strb  r3, [r0], #1
 197     mov   r5, r5, lsr #8
 198     mov   r3, r2, ror r5
 199     strb  r3, [r0], #1
 200     mov   r5, r5, lsr #8
 201     mov   r3, r2, ror r5
 202     strb  r3, [r0], #1
 203
 204     ldmfd sp!, {r4,r5}
 205     bx    lr
 206 */
 207