drivers/gp2x/asmutils.s

   1 @ vim:filetype=armasm
   2
   3 @ Assembly optimized routines for gpfce - FCE Ultra port
   4 @ (c) Copyright 2007, Grazvydas "notaz" Ignotas
   5
   6 @ test
   7 .global flushcache @ beginning_addr, end_addr, flags
   8
   9 flushcache:
  10     swi #0x9f0002
  11     mov pc, lr
  12
  13
  14 .global block_or @ void *src, size_t n, int pat
  15
  16 block_or:
  17     stmfd   sp!, {r4-r5}
  18     orr     r2, r2, r2, lsl #8
  19     orr     r2, r2, r2, lsl #16
  20     mov     r1, r1, lsr #4
  21 block_loop_or:
  22     ldmia   r0, {r3-r5,r12}
  23     subs    r1, r1, #1
  24     orr     r3, r3, r2
  25     orr     r4, r4, r2
  26     orr     r5, r5, r2
  27     orr     r12,r12,r2
  28     stmia   r0!, {r3-r5,r12}
  29     bne     block_loop_or
  30     ldmfd   sp!, {r4-r5}
  31     bx      lr
  32
  33
  34 .global block_and @ void *src, size_t n, int andpat
  35
  36 block_and:
  37     stmfd   sp!, {r4-r5}
  38     orr     r2, r2, r2, lsl #8
  39     orr     r2, r2, r2, lsl #16
  40     mov     r1, r1, lsr #4
  41 block_loop_and:
  42     ldmia   r0, {r3-r5,r12}
  43     subs    r1, r1, #1
  44     and     r3, r3, r2
  45     and     r4, r4, r2
  46     and     r5, r5, r2
  47     and     r12,r12,r2
  48     stmia   r0!, {r3-r5,r12}
  49     bne     block_loop_and
  50     ldmfd   sp!, {r4-r5}
  51     bx      lr
  52
  53
  54 .global block_andor @ void *src, size_t n, int andpat, int orpat
  55
  56 block_andor:
  57     stmfd   sp!, {r4-r6}
  58     orr     r2, r2, r2, lsl #8
  59     orr     r2, r2, r2, lsl #16
  60     orr     r3, r3, r3, lsl #8
  61     orr     r3, r3, r3, lsl #16
  62     mov     r1, r1, lsr #4
  63 block_loop_andor:
  64     ldmia   r0, {r4-r6,r12}
  65     subs    r1, r1, #1
  66     and     r4, r4, r2
  67     orr     r4, r4, r3
  68     and     r5, r5, r2
  69     orr     r5, r5, r3
  70     and     r6, r6, r2
  71     orr     r6, r6, r3
  72     and     r12,r12,r2
  73     orr     r12,r12,r3
  74     stmia   r0!, {r4-r6,r12}
  75     bne     block_loop_andor
  76     ldmfd   sp!, {r4-r6}
  77     bx      lr
  78
  79
  80 .global spend_cycles @ c
  81
  82 spend_cycles:
  83     mov     r0, r0, lsr #2  @ 4 cycles/iteration
  84     sub     r0, r0, #2      @ entry/exit/init
  85 .sc_loop:
  86     subs    r0, r0, #1
  87     bpl     .sc_loop
  88
  89     bx      lr
  90
  91
  92 .global memset32 @ int *dest, int c, int count
  93
  94 memset32:
  95     stmfd   sp!, {lr}
  96
  97     mov     r3, r1
  98     subs    r2, r2, #4
  99     bmi     mst32_fin
 100
 101     mov     r12,r1
 102     mov     lr, r1
 103
 104 mst32_loop:
 105     subs    r2, r2, #4
 106     stmia   r0!, {r1,r3,r12,lr}
 107     bpl     mst32_loop
 108
 109 mst32_fin:
 110     tst     r2, #1
 111     strne   r1, [r0], #4
 112
 113     tst     r2, #2
 114     stmneia r0!, {r1,r3}
 115
 116     ldmfd   sp!, {lr}
 117     bx      lr
 118
 119
 120
 121 .global soft_scale @ void *dst, unsigned short *pal, int line_offs, int lines
 122
 123 soft_scale:
 124     stmfd   sp!,{r4-r11,lr}
 125     mov     lr, #0xff
 126     mov     lr, lr, lsl #1
 127     mov     r9, #0x3900        @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
 128     orr     r9, r9, #0x00e7
 129
 130     mov     r11,r3             @ r11= line counter
 131     mov     r3, r1             @ r3 = pal base
 132
 133     mov     r12,#320
 134     mul     r2, r12,r2
 135     add     r4, r0, r2, lsl #1 @ r4 = dst_start
 136     add     r5, r0, r2         @ r5 = src_start
 137     mul     r12,r11,r12
 138     add     r0, r4, r12,lsl #1 @ r0 = dst_end
 139     add     r1, r5, r12        @ r1 = src_end
 140
 141     mov     r2, r11
 142
 143 soft_scale_loop:
 144     sub     r1, r1, #64        @ skip borders
 145     orr     r2, r2, #(256/8-1)<<24
 146
 147 soft_scale_loop_line:
 148     ldr     r12, [r1, #-8]!
 149     ldr     r7,  [r1, #4]
 150
 151     and     r4, lr, r12,lsl #1
 152     ldrh    r4, [r3, r4]
 153     and     r5, lr, r12,lsr #7
 154     ldrh    r5, [r3, r5]
 155     and     r11,r4, r9, lsl #2
 156     orr     r4, r4, r11,lsl #14       @ r4[31:16] = 1/4 pix_s 0
 157     and     r11,r5, r9, lsl #2
 158     sub     r6, r5, r11,lsr #2        @ r6 = 3/4 pix_s 1
 159     add     r4, r4, r6, lsl #16       @ pix_d 0, 1
 160     and     r6, lr, r12,lsr #15
 161     ldrh    r6, [r3, r6]
 162     and     r12,lr, r12,lsr #23
 163     ldrh    r12,[r3, r12]
 164     bic     r11,r6, #0x0820
 165     bic     r5, r5, #0x0820
 166     add     r5, r5, r11
 167     mov     r5, r5, lsr #1
 168     and     r11,r6, r9, lsl #2
 169     sub     r6, r6, r11,lsr #2        @ r6 = 3/4 pix_s 2
 170     orr     r5, r5, r6, lsl #16
 171
 172     and     r6, lr, r7, lsl #1
 173     ldrh    r6, [r3, r6]
 174     and     r11,r12,r9, lsl #2
 175     add     r5, r5, r11,lsl #14       @ pix_d 2, 3
 176     orr     r6, r12,r6, lsl #16       @ pix_d 4, 5
 177
 178     and     r12,lr, r7, lsr #7
 179     ldrh    r12,[r3, r12]
 180     and     r10,lr, r7, lsr #15
 181     ldrh    r10,[r3, r10]
 182     and     r11,r12,r9, lsl #2
 183     sub     r8, r12,r11,lsr #2        @ r8 = 3/4 pix_s 1
 184     and     r11,r6, r9, lsl #18
 185     add     r8, r8, r11,lsr #18
 186     mov     r8, r8, lsl #16
 187     and     r7, lr, r7, lsr #23
 188     ldrh    r7, [r3, r7]
 189     bic     r11,r10,#0x0820
 190     bic     r12,r12,#0x0820
 191     add     r12,r12,r11
 192     add     r8, r8, r12,lsr #1        @ pix_d 6, 7
 193     mov     r8, r8, ror #16
 194     and     r11,r10,r9, lsl #2
 195     sub     r10,r10,r11,lsr #2        @ r10= 3/4 pix_s 2
 196     and     r11,r7, r9, lsl #2
 197     add     r10,r10,r11,lsr #2        @ += 1/4 pix_s 3
 198     orr     r10,r10,r7, lsl #16       @ pix_d 8, 9
 199
 200     subs    r2, r2, #1<<24
 201
 202     stmdb   r0!, {r4,r5,r6,r8,r10}
 203     bpl     soft_scale_loop_line
 204
 205     add     r2, r2, #1<<24
 206     subs    r2, r2, #1
 207     bne     soft_scale_loop
 208
 209     ldmfd   sp!,{r4-r11,lr}
 210     bx      lr
 211
 212
 213 @ void convert2RGB555(unsigned short *dst, unsigned char *src, unsigned short *pal, int count);
 214
 215 .global convert2RGB555
 216
 217 convert2RGB555:
 218     stmfd   sp!,{r4-r8,lr}
 219     mov     lr, #0xff
 220     mov     lr, lr, lsl #1
 221
 222     mov     r3, r3, lsr #3
 223
 224 convert2RGB555_loop:
 225     ldmia   r1!,{r4,r5}
 226
 227     and     r6, lr, r4, lsl #1
 228     ldrh    r6, [r2, r6]
 229     and     r7, lr, r4, lsr #7
 230     ldrh    r7, [r2, r7]
 231     and     r8, lr, r4, lsr #15
 232     ldrh    r8, [r2, r8]
 233     and     r4, lr, r4, lsr #23
 234     ldrh    r4, [r2, r4]
 235
 236     orr     r6, r6, r7, lsl #16
 237     and     r12,lr, r5, lsl #1
 238     ldrh    r12, [r2, r12]
 239     orr     r7, r8, r4, lsl #16
 240     and     r8, lr, r5, lsr #7
 241     ldrh    r8, [r2, r8]
 242     and     r4, lr, r5, lsr #15
 243     ldrh    r4, [r2, r4]
 244     and     r5, lr, r5, lsr #23
 245     ldrh    r5, [r2, r5]
 246     orr     r8, r12,r8, lsl #16
 247     orr     r12,r4, r5, lsl #16
 248
 249     stmia   r0!,{r6,r7,r8,r12}
 250     subs    r3, r3, #1
 251     bne     convert2RGB555_loop
 252
 253     ldmfd   sp!,{r4-r8,lr}
 254     bx      lr
 255