b542be46 |
1 | # vim:filetype=mips |
2 | |
3 | .set noreorder |
4 | .set noat |
5 | |
6 | .text |
7 | .align 4 |
8 | |
9 | .globl memset32 # int *dest, int c, int count |
10 | |
11 | memset32: |
12 | ms32_aloop: |
13 | andi $t0, $a0, 0x3f |
14 | beqz $t0, ms32_bloop_prep |
15 | nop |
16 | sw $a1, 0($a0) |
17 | addiu $a2, -1 |
18 | beqz $a2, ms32_return |
19 | addiu $a0, 4 |
20 | j ms32_aloop |
21 | nop |
22 | |
23 | ms32_bloop_prep: |
24 | srl $t0, $a2, 4 # we will do 64 bytes per iteration (cache line) |
25 | beqz $t0, ms32_bloop_end |
26 | |
27 | ms32_bloop: |
28 | addiu $t0, -1 |
29 | cache 0x18, ($a0) # create dirty exclusive |
30 | sw $a1, 0x00($a0) |
31 | sw $a1, 0x04($a0) |
32 | sw $a1, 0x08($a0) |
33 | sw $a1, 0x0c($a0) |
34 | sw $a1, 0x10($a0) |
35 | sw $a1, 0x14($a0) |
36 | sw $a1, 0x18($a0) |
37 | sw $a1, 0x1c($a0) |
38 | sw $a1, 0x20($a0) |
39 | sw $a1, 0x24($a0) |
40 | sw $a1, 0x28($a0) |
41 | sw $a1, 0x2c($a0) |
42 | sw $a1, 0x30($a0) |
43 | sw $a1, 0x34($a0) |
44 | sw $a1, 0x38($a0) |
45 | sw $a1, 0x3c($a0) |
46 | bnez $t0, ms32_bloop |
47 | addiu $a0, 0x40 |
48 | |
49 | ms32_bloop_end: |
50 | andi $a2, $a2, 0x0f |
51 | beqz $a2, ms32_return |
52 | |
53 | ms32_cloop: |
54 | addiu $a2, -1 |
55 | sw $a1, 0($a0) |
56 | bnez $a2, ms32_cloop |
57 | addiu $a0, 4 |
58 | |
59 | ms32_return: |
60 | jr $ra |
61 | nop |
62 | |
63 | |
64 | .globl memset32_uncached # int *dest, int c, int count |
65 | |
66 | memset32_uncached: |
67 | srl $t0, $a2, 3 # we will do 32 bytes per iteration |
68 | beqz $t0, ms32u_bloop_end |
69 | |
70 | ms32u_bloop: |
71 | addiu $t0, -1 |
72 | sw $a1, 0x00($a0) |
73 | sw $a1, 0x04($a0) |
74 | sw $a1, 0x08($a0) |
75 | sw $a1, 0x0c($a0) |
76 | sw $a1, 0x10($a0) |
77 | sw $a1, 0x14($a0) |
78 | sw $a1, 0x18($a0) |
79 | sw $a1, 0x1c($a0) |
80 | bnez $t0, ms32u_bloop |
81 | addiu $a0, 0x20 |
82 | |
83 | ms32u_bloop_end: |
84 | andi $a2, $a2, 0x0f |
85 | beqz $a2, ms32u_return |
86 | |
87 | ms32u_cloop: |
88 | addiu $a2, -1 |
89 | sw $a1, 0($a0) |
90 | bnez $a2, ms32u_cloop |
91 | addiu $a0, 4 |
92 | |
93 | ms32u_return: |
94 | jr $ra |
95 | nop |
96 | |
97 | |
98 | .globl memcpy32 # int *dest, int *src, int count |
99 | |
100 | memcpy32: |
101 | mc32_aloop: |
102 | andi $t0, $a0, 0x3f |
103 | beqz $t0, mc32_bloop_prep |
104 | nop |
105 | lw $t1, 0($a1) |
106 | addiu $a2, -1 |
107 | sw $t1, 0($a0) |
108 | beqz $a2, mc32_return |
109 | addiu $a0, 4 |
110 | j mc32_aloop |
111 | addiu $a1, 4 |
112 | |
113 | mc32_bloop_prep: |
114 | srl $t0, $a2, 4 # we will do 64 bytes per iteration (cache line) |
115 | beqz $t0, mc32_bloop_end |
116 | |
117 | mc32_bloop: |
118 | addiu $t0, -1 |
119 | cache 0x18, ($a0) # create dirty exclusive |
120 | lw $t2, 0x00($a1) |
121 | lw $t3, 0x04($a1) |
122 | lw $t4, 0x08($a1) |
123 | lw $t5, 0x0c($a1) |
124 | lw $t6, 0x10($a1) |
125 | lw $t7, 0x14($a1) |
126 | lw $t8, 0x18($a1) |
127 | lw $t9, 0x1c($a1) |
128 | sw $t2, 0x00($a0) |
129 | sw $t3, 0x04($a0) |
130 | sw $t4, 0x08($a0) |
131 | sw $t5, 0x0c($a0) |
132 | sw $t6, 0x10($a0) |
133 | sw $t7, 0x14($a0) |
134 | sw $t8, 0x18($a0) |
135 | sw $t9, 0x1c($a0) |
136 | lw $t2, 0x20($a1) |
137 | lw $t3, 0x24($a1) |
138 | lw $t4, 0x28($a1) |
139 | lw $t5, 0x2c($a1) |
140 | lw $t6, 0x30($a1) |
141 | lw $t7, 0x34($a1) |
142 | lw $t8, 0x38($a1) |
143 | lw $t9, 0x3c($a1) |
144 | sw $t2, 0x20($a0) |
145 | sw $t3, 0x24($a0) |
146 | sw $t4, 0x28($a0) |
147 | sw $t5, 0x2c($a0) |
148 | sw $t6, 0x30($a0) |
149 | sw $t7, 0x34($a0) |
150 | sw $t8, 0x38($a0) |
151 | sw $t9, 0x3c($a0) |
152 | addiu $a0, 0x40 |
153 | bnez $t0, mc32_bloop |
154 | addiu $a1, 0x40 |
155 | |
156 | mc32_bloop_end: |
157 | andi $a2, $a2, 0x0f |
158 | beqz $a2, mc32_return |
159 | |
160 | mc32_cloop: |
161 | lw $t1, 0($a1) |
162 | addiu $a2, -1 |
163 | addiu $a1, 4 |
164 | sw $t1, 0($a0) |
165 | bnez $a2, mc32_cloop |
166 | addiu $a0, 4 |
167 | |
168 | mc32_return: |
169 | jr $ra |
170 | nop |
171 | |