b542be46 |
1 | # vim:filetype=mips |
2 | |
8b99ab90 |
3 | # Some misc routines for Allegrex MIPS |
4 | # (c) Copyright 2007, Grazvydas "notaz" Ignotas |
5 | # All Rights Reserved |
6 | |
b542be46 |
7 | .set noreorder |
8 | .set noat |
9 | |
10 | .text |
11 | .align 4 |
12 | |
13 | .globl memset32 # int *dest, int c, int count |
14 | |
15 | memset32: |
16 | ms32_aloop: |
17 | andi $t0, $a0, 0x3f |
18 | beqz $t0, ms32_bloop_prep |
19 | nop |
20 | sw $a1, 0($a0) |
21 | addiu $a2, -1 |
22 | beqz $a2, ms32_return |
23 | addiu $a0, 4 |
24 | j ms32_aloop |
25 | nop |
26 | |
27 | ms32_bloop_prep: |
28 | srl $t0, $a2, 4 # we will do 64 bytes per iteration (cache line) |
29 | beqz $t0, ms32_bloop_end |
30 | |
31 | ms32_bloop: |
32 | addiu $t0, -1 |
33 | cache 0x18, ($a0) # create dirty exclusive |
34 | sw $a1, 0x00($a0) |
35 | sw $a1, 0x04($a0) |
36 | sw $a1, 0x08($a0) |
37 | sw $a1, 0x0c($a0) |
38 | sw $a1, 0x10($a0) |
39 | sw $a1, 0x14($a0) |
40 | sw $a1, 0x18($a0) |
41 | sw $a1, 0x1c($a0) |
42 | sw $a1, 0x20($a0) |
43 | sw $a1, 0x24($a0) |
44 | sw $a1, 0x28($a0) |
45 | sw $a1, 0x2c($a0) |
46 | sw $a1, 0x30($a0) |
47 | sw $a1, 0x34($a0) |
48 | sw $a1, 0x38($a0) |
49 | sw $a1, 0x3c($a0) |
50 | bnez $t0, ms32_bloop |
51 | addiu $a0, 0x40 |
52 | |
53 | ms32_bloop_end: |
54 | andi $a2, $a2, 0x0f |
55 | beqz $a2, ms32_return |
56 | |
57 | ms32_cloop: |
58 | addiu $a2, -1 |
59 | sw $a1, 0($a0) |
60 | bnez $a2, ms32_cloop |
61 | addiu $a0, 4 |
62 | |
63 | ms32_return: |
64 | jr $ra |
65 | nop |
66 | |
67 | |
68 | .globl memset32_uncached # int *dest, int c, int count |
69 | |
70 | memset32_uncached: |
71 | srl $t0, $a2, 3 # we will do 32 bytes per iteration |
72 | beqz $t0, ms32u_bloop_end |
73 | |
74 | ms32u_bloop: |
75 | addiu $t0, -1 |
76 | sw $a1, 0x00($a0) |
77 | sw $a1, 0x04($a0) |
78 | sw $a1, 0x08($a0) |
79 | sw $a1, 0x0c($a0) |
80 | sw $a1, 0x10($a0) |
81 | sw $a1, 0x14($a0) |
82 | sw $a1, 0x18($a0) |
83 | sw $a1, 0x1c($a0) |
84 | bnez $t0, ms32u_bloop |
85 | addiu $a0, 0x20 |
86 | |
87 | ms32u_bloop_end: |
88 | andi $a2, $a2, 0x0f |
89 | beqz $a2, ms32u_return |
90 | |
91 | ms32u_cloop: |
92 | addiu $a2, -1 |
93 | sw $a1, 0($a0) |
94 | bnez $a2, ms32u_cloop |
95 | addiu $a0, 4 |
96 | |
97 | ms32u_return: |
98 | jr $ra |
99 | nop |
100 | |
101 | |
102 | .globl memcpy32 # int *dest, int *src, int count |
103 | |
104 | memcpy32: |
105 | mc32_aloop: |
106 | andi $t0, $a0, 0x3f |
107 | beqz $t0, mc32_bloop_prep |
108 | nop |
109 | lw $t1, 0($a1) |
110 | addiu $a2, -1 |
111 | sw $t1, 0($a0) |
112 | beqz $a2, mc32_return |
113 | addiu $a0, 4 |
114 | j mc32_aloop |
115 | addiu $a1, 4 |
116 | |
117 | mc32_bloop_prep: |
118 | srl $t0, $a2, 4 # we will do 64 bytes per iteration (cache line) |
119 | beqz $t0, mc32_bloop_end |
120 | |
121 | mc32_bloop: |
122 | addiu $t0, -1 |
123 | cache 0x18, ($a0) # create dirty exclusive |
124 | lw $t2, 0x00($a1) |
125 | lw $t3, 0x04($a1) |
126 | lw $t4, 0x08($a1) |
127 | lw $t5, 0x0c($a1) |
128 | lw $t6, 0x10($a1) |
129 | lw $t7, 0x14($a1) |
130 | lw $t8, 0x18($a1) |
131 | lw $t9, 0x1c($a1) |
132 | sw $t2, 0x00($a0) |
133 | sw $t3, 0x04($a0) |
134 | sw $t4, 0x08($a0) |
135 | sw $t5, 0x0c($a0) |
136 | sw $t6, 0x10($a0) |
137 | sw $t7, 0x14($a0) |
138 | sw $t8, 0x18($a0) |
139 | sw $t9, 0x1c($a0) |
140 | lw $t2, 0x20($a1) |
141 | lw $t3, 0x24($a1) |
142 | lw $t4, 0x28($a1) |
143 | lw $t5, 0x2c($a1) |
144 | lw $t6, 0x30($a1) |
145 | lw $t7, 0x34($a1) |
146 | lw $t8, 0x38($a1) |
147 | lw $t9, 0x3c($a1) |
148 | sw $t2, 0x20($a0) |
149 | sw $t3, 0x24($a0) |
150 | sw $t4, 0x28($a0) |
151 | sw $t5, 0x2c($a0) |
152 | sw $t6, 0x30($a0) |
153 | sw $t7, 0x34($a0) |
154 | sw $t8, 0x38($a0) |
155 | sw $t9, 0x3c($a0) |
156 | addiu $a0, 0x40 |
157 | bnez $t0, mc32_bloop |
158 | addiu $a1, 0x40 |
159 | |
160 | mc32_bloop_end: |
161 | andi $a2, $a2, 0x0f |
162 | beqz $a2, mc32_return |
163 | |
164 | mc32_cloop: |
165 | lw $t1, 0($a1) |
166 | addiu $a2, -1 |
167 | addiu $a1, 4 |
168 | sw $t1, 0($a0) |
169 | bnez $a2, mc32_cloop |
170 | addiu $a0, 4 |
171 | |
172 | mc32_return: |
173 | jr $ra |
174 | nop |
175 | |