| 1 | # vim:filetype=mips |
| 2 | |
| 3 | # Some misc routines for Allegrex MIPS |
| 4 | # (c) Copyright 2007, Grazvydas "notaz" Ignotas |
| 5 | # All Rights Reserved |
| 6 | |
| 7 | .set noreorder |
| 8 | .set noat |
| 9 | |
| 10 | .text |
| 11 | .align 4 |
| 12 | |
| 13 | .globl memset32 # int *dest, int c, int count |
| 14 | |
| 15 | memset32: |
| 16 | ms32_aloop: |
| 17 | andi $t0, $a0, 0x3f |
| 18 | beqz $t0, ms32_bloop_prep |
| 19 | nop |
| 20 | sw $a1, 0($a0) |
| 21 | addiu $a2, -1 |
| 22 | beqz $a2, ms32_return |
| 23 | addiu $a0, 4 |
| 24 | j ms32_aloop |
| 25 | nop |
| 26 | |
| 27 | ms32_bloop_prep: |
| 28 | srl $t0, $a2, 4 # we will do 64 bytes per iteration (cache line) |
| 29 | beqz $t0, ms32_bloop_end |
| 30 | |
| 31 | ms32_bloop: |
| 32 | addiu $t0, -1 |
| 33 | cache 0x18, ($a0) # create dirty exclusive |
| 34 | sw $a1, 0x00($a0) |
| 35 | sw $a1, 0x04($a0) |
| 36 | sw $a1, 0x08($a0) |
| 37 | sw $a1, 0x0c($a0) |
| 38 | sw $a1, 0x10($a0) |
| 39 | sw $a1, 0x14($a0) |
| 40 | sw $a1, 0x18($a0) |
| 41 | sw $a1, 0x1c($a0) |
| 42 | sw $a1, 0x20($a0) |
| 43 | sw $a1, 0x24($a0) |
| 44 | sw $a1, 0x28($a0) |
| 45 | sw $a1, 0x2c($a0) |
| 46 | sw $a1, 0x30($a0) |
| 47 | sw $a1, 0x34($a0) |
| 48 | sw $a1, 0x38($a0) |
| 49 | sw $a1, 0x3c($a0) |
| 50 | bnez $t0, ms32_bloop |
| 51 | addiu $a0, 0x40 |
| 52 | |
| 53 | ms32_bloop_end: |
| 54 | andi $a2, $a2, 0x0f |
| 55 | beqz $a2, ms32_return |
| 56 | |
| 57 | ms32_cloop: |
| 58 | addiu $a2, -1 |
| 59 | sw $a1, 0($a0) |
| 60 | bnez $a2, ms32_cloop |
| 61 | addiu $a0, 4 |
| 62 | |
| 63 | ms32_return: |
| 64 | jr $ra |
| 65 | nop |
| 66 | |
| 67 | |
| 68 | .globl memset32_uncached # int *dest, int c, int count |
| 69 | |
| 70 | memset32_uncached: |
| 71 | srl $t0, $a2, 3 # we will do 32 bytes per iteration |
| 72 | beqz $t0, ms32u_bloop_end |
| 73 | |
| 74 | ms32u_bloop: |
| 75 | addiu $t0, -1 |
| 76 | sw $a1, 0x00($a0) |
| 77 | sw $a1, 0x04($a0) |
| 78 | sw $a1, 0x08($a0) |
| 79 | sw $a1, 0x0c($a0) |
| 80 | sw $a1, 0x10($a0) |
| 81 | sw $a1, 0x14($a0) |
| 82 | sw $a1, 0x18($a0) |
| 83 | sw $a1, 0x1c($a0) |
| 84 | bnez $t0, ms32u_bloop |
| 85 | addiu $a0, 0x20 |
| 86 | |
| 87 | ms32u_bloop_end: |
| 88 | andi $a2, $a2, 0x0f |
| 89 | beqz $a2, ms32u_return |
| 90 | |
| 91 | ms32u_cloop: |
| 92 | addiu $a2, -1 |
| 93 | sw $a1, 0($a0) |
| 94 | bnez $a2, ms32u_cloop |
| 95 | addiu $a0, 4 |
| 96 | |
| 97 | ms32u_return: |
| 98 | jr $ra |
| 99 | nop |
| 100 | |
| 101 | |
| 102 | .globl memcpy32 # int *dest, int *src, int count |
| 103 | |
| 104 | memcpy32: |
| 105 | mc32_aloop: |
| 106 | andi $t0, $a0, 0x3f |
| 107 | beqz $t0, mc32_bloop_prep |
| 108 | nop |
| 109 | lw $t1, 0($a1) |
| 110 | addiu $a2, -1 |
| 111 | sw $t1, 0($a0) |
| 112 | beqz $a2, mc32_return |
| 113 | addiu $a0, 4 |
| 114 | j mc32_aloop |
| 115 | addiu $a1, 4 |
| 116 | |
| 117 | mc32_bloop_prep: |
| 118 | srl $t0, $a2, 4 # we will do 64 bytes per iteration (cache line) |
| 119 | beqz $t0, mc32_bloop_end |
| 120 | |
| 121 | mc32_bloop: |
| 122 | addiu $t0, -1 |
| 123 | cache 0x18, ($a0) # create dirty exclusive |
| 124 | lw $t2, 0x00($a1) |
| 125 | lw $t3, 0x04($a1) |
| 126 | lw $t4, 0x08($a1) |
| 127 | lw $t5, 0x0c($a1) |
| 128 | lw $t6, 0x10($a1) |
| 129 | lw $t7, 0x14($a1) |
| 130 | lw $t8, 0x18($a1) |
| 131 | lw $t9, 0x1c($a1) |
| 132 | sw $t2, 0x00($a0) |
| 133 | sw $t3, 0x04($a0) |
| 134 | sw $t4, 0x08($a0) |
| 135 | sw $t5, 0x0c($a0) |
| 136 | sw $t6, 0x10($a0) |
| 137 | sw $t7, 0x14($a0) |
| 138 | sw $t8, 0x18($a0) |
| 139 | sw $t9, 0x1c($a0) |
| 140 | lw $t2, 0x20($a1) |
| 141 | lw $t3, 0x24($a1) |
| 142 | lw $t4, 0x28($a1) |
| 143 | lw $t5, 0x2c($a1) |
| 144 | lw $t6, 0x30($a1) |
| 145 | lw $t7, 0x34($a1) |
| 146 | lw $t8, 0x38($a1) |
| 147 | lw $t9, 0x3c($a1) |
| 148 | sw $t2, 0x20($a0) |
| 149 | sw $t3, 0x24($a0) |
| 150 | sw $t4, 0x28($a0) |
| 151 | sw $t5, 0x2c($a0) |
| 152 | sw $t6, 0x30($a0) |
| 153 | sw $t7, 0x34($a0) |
| 154 | sw $t8, 0x38($a0) |
| 155 | sw $t9, 0x3c($a0) |
| 156 | addiu $a0, 0x40 |
| 157 | bnez $t0, mc32_bloop |
| 158 | addiu $a1, 0x40 |
| 159 | |
| 160 | mc32_bloop_end: |
| 161 | andi $a2, $a2, 0x0f |
| 162 | beqz $a2, mc32_return |
| 163 | |
| 164 | mc32_cloop: |
| 165 | lw $t1, 0($a1) |
| 166 | addiu $a2, -1 |
| 167 | addiu $a1, 4 |
| 168 | sw $t1, 0($a0) |
| 169 | bnez $a2, mc32_cloop |
| 170 | addiu $a0, 4 |
| 171 | |
| 172 | mc32_return: |
| 173 | jr $ra |
| 174 | nop |
| 175 | |