Commit | Line | Data |
---|---|---|
75e28f62 E |
1 | /* |
2 | * Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com> | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License as | |
6 | * published by the Free Software Foundation; either version 2 of | |
7 | * the License, or (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * General Public License for more details. | |
13 | */ | |
14 | ||
15 | #ifndef VECTOR_OPS | |
16 | #define VECTOR_OPS | |
17 | ||
a4021361 | 18 | #include "vector_types.h" |
75e28f62 E |
19 | |
20 | ||
21 | #define foreach_element(iterations, operation) \ | |
22 | { \ | |
23 | u32 _i; \ | |
24 | for(_i = 0; _i < iterations; _i++) \ | |
25 | { \ | |
26 | operation; \ | |
27 | } \ | |
28 | } \ | |
29 | ||
30 | #define load_64b(dest, source) \ | |
31 | *((u64 *)(dest).e) = *((u64 *)(source)) \ | |
32 | ||
33 | #define load_128b(dest, source) \ | |
34 | *((u64 *)(dest).e) = *((u64 *)(source)); \ | |
35 | *((u64 *)(dest).e + 1) = *(((u64 *)(source)) + 1) \ | |
36 | ||
37 | #define load_8x16b(dest, source) \ | |
38 | foreach_element(8, (dest).e[_i] = ((u16 *)(source))[_i]) \ | |
39 | ||
40 | #define store_64b(source, dest) \ | |
41 | *((u64 *)(dest)) = *((u64 *)(source).e) \ | |
42 | ||
43 | #define store_128b(source, dest) \ | |
44 | *((u64 *)(dest)) = *((u64 *)(source).e); \ | |
45 | *(((u64 *)(dest)) + 1) = *((u64 *)(source).e + 1) \ | |
46 | ||
47 | #define store_8x16b(source, dest) \ | |
48 | foreach_element(8, ((u16 *)dest)[_i] = (source).e[_i]) \ | |
49 | ||
50 | ||
51 | #define split_8x16b(dest, source) \ | |
52 | foreach_element(8, \ | |
53 | { \ | |
54 | (dest).e[_i * 2] = (source).e[_i]; \ | |
55 | (dest).e[(_i * 2) + 1] = (source).e[_i] >> 8; \ | |
56 | }) \ | |
57 | ||
58 | #define merge_16x8b(dest, source) \ | |
59 | foreach_element(8, \ | |
60 | (dest).e[_i] = (source).e[_i * 2] | ((source).e[(_i * 2) + 1] << 8)) \ | |
61 | ||
62 | #define vector_cast(vec_to, source) \ | |
63 | (*((volatile vec_to *)(&(source)))) \ | |
64 | ||
65 | #define vector_cast_high(vec_to, source) \ | |
66 | (*((volatile vec_to *)((u8 *)source.e + (sizeof(source.e) / 2)))) \ | |
67 | ||
68 | ||
69 | #define dup_8x8b(dest, value) \ | |
70 | foreach_element(8, (dest).e[_i] = value) \ | |
71 | ||
72 | #define dup_16x8b(dest, value) \ | |
73 | foreach_element(16, (dest).e[_i] = value) \ | |
74 | ||
75 | #define dup_4x16b(dest, value) \ | |
76 | foreach_element(4, (dest).e[_i] = value) \ | |
77 | ||
78 | #define dup_8x16b(dest, value) \ | |
79 | foreach_element(8, (dest).e[_i] = value) \ | |
80 | ||
81 | #define dup_2x32b(dest, value) \ | |
82 | foreach_element(2, (dest).e[_i] = value) \ | |
83 | ||
84 | #define dup_4x32b(dest, value) \ | |
85 | foreach_element(4, (dest).e[_i] = value) \ | |
86 | ||
87 | #define shr_narrow_8x16b(dest, source, shift) \ | |
88 | foreach_element(8, (dest).e[_i] = (u16)(source).e[_i] >> (shift)) \ | |
89 | ||
90 | #define shr_narrow_2x64b(dest, source, shift) \ | |
91 | foreach_element(2, (dest).e[_i] = (source).e[_i] >> (shift)) \ | |
92 | ||
93 | #define shr_8x8b(dest, source, shift) \ | |
94 | foreach_element(8, (dest).e[_i] = (u8)(source).e[_i] >> (shift)) \ | |
95 | ||
96 | #define shl_8x8b(dest, source, shift) \ | |
97 | foreach_element(8, (dest).e[_i] = (source).e[_i] << (shift)) \ | |
98 | ||
99 | #define shr_8x16b(dest, source, shift) \ | |
100 | foreach_element(8, (dest).e[_i] = (u16)(source).e[_i] >> (shift)) \ | |
101 | ||
102 | #define shr_2x32b(dest, source, shift) \ | |
103 | foreach_element(2, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \ | |
104 | ||
105 | #define shr_4x16b(dest, source, shift) \ | |
aafce833 | 106 | foreach_element(4, (dest).e[_i] = (u16)(source).e[_i] >> (shift)) \ |
75e28f62 E |
107 | |
108 | #define shl_4x16b(dest, source, shift) \ | |
109 | foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] << (shift)) \ | |
110 | ||
111 | #define shr_4x32b(dest, source, shift) \ | |
112 | foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \ | |
113 | ||
114 | #define shr_narrow_4x32b(dest, source, shift) \ | |
115 | foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \ | |
116 | ||
117 | #define shl_8x16b(dest, source, shift) \ | |
118 | foreach_element(8, (dest).e[_i] = (source).e[_i] << (shift)) \ | |
119 | ||
120 | #define shl_4x32b(dest, source, shift) \ | |
121 | foreach_element(4, (dest).e[_i] = (source).e[_i] << (shift)) \ | |
122 | ||
123 | #define shl_2x32b(dest, source, shift) \ | |
124 | foreach_element(2, (dest).e[_i] = (source).e[_i] << (shift)) \ | |
125 | ||
126 | #define shl_1x64b(dest, source, shift) \ | |
127 | ((dest).e[0] = (source).e[0] << (shift)) \ | |
128 | ||
129 | #define shl_2x64b(dest, source, shift) \ | |
130 | foreach_element(2, (dest).e[_i] = (source).e[_i] << (shift)) \ | |
131 | ||
132 | #define shl_variable_2x64b(dest, source_a, source_b) \ | |
133 | foreach_element(2, \ | |
134 | (dest).e[_i] = (source_a).e[_i] << ((source_b).e[_i] & 0xFF)) \ | |
135 | ||
136 | #define shl_variable_8x16b(dest, source_a, source_b) \ | |
137 | foreach_element(8, \ | |
138 | (dest).e[_i] = (source_a).e[_i] << ((source_b).e[_i] & 0xFF)) \ | |
139 | ||
140 | #define shl_variable_4x16b(dest, source_a, source_b) \ | |
141 | foreach_element(4, \ | |
142 | (dest).e[_i] = (source_a).e[_i] << ((source_b).e[_i] & 0xFF)) \ | |
143 | ||
144 | #define shr_1x64b(dest, source, shift) \ | |
145 | ((dest).e[0] = (source).e[0] >> (shift)) \ | |
146 | ||
147 | #define shl_long_8x8b(dest, source, shift) \ | |
148 | foreach_element(8, (dest).e[_i] = (source).e[_i] << (shift)) \ | |
149 | ||
150 | #define shl_long_4x16b(dest, source, shift) \ | |
151 | foreach_element(4, (dest).e[_i] = (source).e[_i] << (shift)) \ | |
152 | ||
153 | #define shrq_narrow_signed_8x16b(dest, source, shift) \ | |
154 | foreach_element(8, \ | |
155 | { \ | |
156 | s32 result = ((s16)(source).e[_i]) >> shift; \ | |
157 | if(result < 0) \ | |
158 | result = 0; \ | |
159 | if(result > 0xFF) \ | |
160 | result = 0xFF; \ | |
161 | (dest).e[_i] = result; \ | |
162 | }) \ | |
163 | ||
164 | #define shl_reg_4x32b(dest, source_a, source_b) \ | |
165 | foreach_element(4, \ | |
166 | { \ | |
167 | s8 shift = (source_b).e[_i]; \ | |
168 | if(shift < 0) \ | |
169 | dest.e[_i] = (source_a).e[_i] >> (-shift); \ | |
170 | else \ | |
171 | dest.e[_i] = (source_a).e[_i] << shift; \ | |
172 | }) \ | |
173 | ||
174 | #define shl_reg_2x32b(dest, source_a, source_b) \ | |
175 | foreach_element(2, \ | |
176 | { \ | |
177 | s8 shift = (source_b).e[_i]; \ | |
178 | if(shift < 0) \ | |
179 | dest.e[_i] = (source_a).e[_i] >> (-shift); \ | |
180 | else \ | |
181 | dest.e[_i] = (source_a).e[_i] << shift; \ | |
182 | }) \ | |
183 | ||
184 | #define shl_reg_2x64b(dest, source_a, source_b) \ | |
185 | foreach_element(2, \ | |
186 | { \ | |
187 | s8 shift = (source_b).e[_i]; \ | |
188 | if(shift < 0) \ | |
189 | dest.e[_i] = (source_a).e[_i] >> (-shift); \ | |
190 | else \ | |
191 | dest.e[_i] = (source_a).e[_i] << shift; \ | |
192 | }) \ | |
193 | ||
194 | ||
195 | #define sri_8x8b(dest, source, shift) \ | |
196 | foreach_element(8, (dest).e[_i] = ((dest).e[_i] & ~(0xFF >> (shift))) | \ | |
197 | ((u8)(source).e[_i] >> (shift))) \ | |
198 | ||
199 | #define sli_8x8b(dest, source, shift) \ | |
200 | foreach_element(8, (dest).e[_i] = ((dest).e[_i] & ~(0xFF << (shift))) | \ | |
201 | ((source).e[_i] << (shift))) \ | |
202 | ||
203 | ||
204 | ||
205 | #define mov_narrow_8x16b(dest, source) \ | |
206 | foreach_element(8, (dest).e[_i] = (source).e[_i]) \ | |
207 | ||
208 | #define mov_narrow_4x32b(dest, source) \ | |
209 | foreach_element(4, (dest).e[_i] = (source).e[_i]) \ | |
210 | ||
211 | #define mov_narrow_2x64b(dest, source) \ | |
212 | foreach_element(2, (dest).e[_i] = (source).e[_i]) \ | |
213 | ||
214 | #define mov_wide_8x8b(dest, source) \ | |
215 | foreach_element(8, (dest).e[_i] = (source).e[_i]) \ | |
216 | ||
217 | #define mov_wide_2x32b(dest, source) \ | |
218 | foreach_element(2, (dest).e[_i] = (source).e[_i]) \ | |
219 | ||
220 | #define mvn_4x16b(dest, source) \ | |
221 | foreach_element(4, (dest).e[_i] = ~((source).e[_i])) \ | |
222 | ||
223 | #define add_4x16b(dest, source_a, source_b) \ | |
224 | foreach_element(4, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \ | |
225 | ||
226 | #define add_4x32b(dest, source_a, source_b) \ | |
227 | foreach_element(4, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \ | |
228 | ||
229 | #define add_2x32b(dest, source_a, source_b) \ | |
230 | foreach_element(2, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \ | |
231 | ||
232 | #define add_8x16b(dest, source_a, source_b) \ | |
233 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \ | |
234 | ||
235 | #define add_16x8b(dest, source_a, source_b) \ | |
236 | foreach_element(16, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \ | |
237 | ||
238 | #define add_8x8b(dest, source_a, source_b) \ | |
239 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \ | |
240 | ||
241 | #define add_1x64b(dest, source_a, source_b) \ | |
242 | (dest).e[0] = (source_a).e[0] + (source_b).e[0] \ | |
243 | ||
244 | #define add_2x64b(dest, source_a, source_b) \ | |
245 | foreach_element(2, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \ | |
246 | ||
247 | #define add_high_narrow_2x64b(dest, source_a, source_b) \ | |
248 | foreach_element(2, \ | |
249 | ((dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) >> 32) \ | |
250 | ||
251 | #define add_high_narrow_4x32b(dest, source_a, source_b) \ | |
252 | foreach_element(4, \ | |
253 | ((dest).e[_i] = ((source_a).e[_i] + (source_b).e[_i]) >> 16)) \ | |
254 | ||
255 | #define sub_4x16b(dest, source_a, source_b) \ | |
256 | foreach_element(4, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \ | |
257 | ||
258 | #define sub_4x32b(dest, source_a, source_b) \ | |
259 | foreach_element(4, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \ | |
260 | ||
261 | #define sub_2x32b(dest, source_a, source_b) \ | |
262 | foreach_element(2, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \ | |
263 | ||
264 | #define sub_wide_8x8b(dest, source_a, source_b) \ | |
265 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \ | |
266 | ||
267 | #define add_wide_8x8b(dest, source_a, source_b) \ | |
268 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \ | |
269 | ||
270 | #define add_wide_2x32b(dest, source_a, source_b) \ | |
271 | foreach_element(2, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \ | |
272 | ||
273 | #define addq_8x8b(dest, source_a, source_b) \ | |
274 | foreach_element(8, \ | |
275 | { \ | |
276 | u32 result = (source_a).e[_i] + (source_b).e[_i]; \ | |
277 | if(result > 0xFF) \ | |
278 | result = 0xFF; \ | |
279 | (dest).e[_i] = result; \ | |
280 | }) \ | |
281 | ||
282 | #define subq_8x8b(dest, source_a, source_b) \ | |
283 | foreach_element(8, \ | |
284 | { \ | |
285 | u32 result = (source_a).e[_i] - (source_b).e[_i]; \ | |
286 | if(result > 0xFF) \ | |
287 | result = 0; \ | |
288 | (dest).e[_i] = result; \ | |
289 | }) \ | |
290 | ||
291 | #define subs_long_8x8b(dest, source_a, source_b) \ | |
292 | subs_8x8b(dest, source_a, source_b) \ | |
293 | ||
294 | #define subs_16x8b(dest, source_a, source_b) \ | |
295 | foreach_element(16, \ | |
296 | { \ | |
297 | u32 result = (source_a).e[_i] - (source_b).e[_i]; \ | |
298 | if(result > 0xFF) \ | |
299 | result = 0; \ | |
300 | (dest).e[_i] = result; \ | |
301 | }) \ | |
302 | ||
303 | #define subs_8x16b(dest, source_a, source_b) \ | |
304 | foreach_element(8, \ | |
305 | { \ | |
306 | s32 result = (source_a).e[_i] - (source_b).e[_i]; \ | |
307 | if(result < 0) \ | |
308 | result = 0; \ | |
309 | \ | |
310 | (dest).e[_i] = result; \ | |
311 | }) \ | |
312 | ||
313 | #define sub_8x16b(dest, source_a, source_b) \ | |
314 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \ | |
315 | ||
316 | #define sub_16x8b(dest, source_a, source_b) \ | |
317 | foreach_element(16, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \ | |
318 | ||
319 | #define orn_8x16b(dest, source_a, source_b) \ | |
320 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] | ~((source_b).e[_i])) \ | |
321 | ||
322 | #define and_4x16b(dest, source_a, source_b) \ | |
323 | foreach_element(4, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \ | |
324 | ||
325 | #define and_8x16b(dest, source_a, source_b) \ | |
326 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \ | |
327 | ||
328 | #define and_4x32b(dest, source_a, source_b) \ | |
329 | foreach_element(4, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \ | |
330 | ||
331 | #define and_16x8b(dest, source_a, source_b) \ | |
332 | foreach_element(16, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \ | |
333 | ||
334 | #define and_8x8b(dest, source_a, source_b) \ | |
335 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \ | |
336 | ||
337 | #define and_2x32b(dest, source_a, source_b) \ | |
338 | foreach_element(2, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \ | |
339 | ||
340 | #define bic_8x8b(dest, source_a, source_b) \ | |
341 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] & ~((source_b).e[_i])) \ | |
342 | ||
343 | #define bic_8x16b(dest, source_a, source_b) \ | |
344 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] & ~((source_b).e[_i])) \ | |
345 | ||
346 | #define bic_immediate_4x16b(dest, value) \ | |
347 | foreach_element(4, (dest).e[_i] = (dest).e[_i] & ~(value)) \ | |
348 | ||
349 | #define bic_immediate_8x16b(dest, value) \ | |
350 | foreach_element(8, (dest).e[_i] = (dest).e[_i] & ~(value)) \ | |
351 | ||
352 | #define or_8x16b(dest, source_a, source_b) \ | |
353 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] | (source_b).e[_i]) \ | |
354 | ||
355 | #define or_immediate_8x16b(dest, source_a, value) \ | |
356 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] | (value)) \ | |
357 | ||
358 | #define eor_8x16b(dest, source_a, source_b) \ | |
359 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] ^ (source_b).e[_i]) \ | |
360 | ||
361 | #define eor_4x32b(dest, source_a, source_b) \ | |
362 | foreach_element(4, (dest).e[_i] = (source_a).e[_i] ^ (source_b).e[_i]) \ | |
363 | ||
364 | #define eor_2x32b(dest, source_a, source_b) \ | |
365 | foreach_element(2, (dest).e[_i] = (source_a).e[_i] ^ (source_b).e[_i]) \ | |
366 | ||
367 | #define zip_8x16b(dest, source_a, source_b) \ | |
368 | foreach_element(8, (dest).e[_i] = \ | |
369 | (u8)(source_a).e[_i] | ((u8)(source_b).e[_i] << 8)) \ | |
370 | ||
05e2e0c6 E |
371 | #define zip_4x32b(dest, source_a, source_b) \ |
372 | foreach_element(4, (dest).e[_i] = \ | |
fc6cef7d | 373 | (u16)(source_a).e[_i] | ((u16)(source_b).e[_i] << 16)) \ |
05e2e0c6 | 374 | |
75e28f62 E |
375 | #define zip_2x64b(dest, source_a, source_b) \ |
376 | foreach_element(2, (dest).e[_i] = \ | |
377 | (u64)(source_a).e[_i] | ((u64)(source_b).e[_i] << 32)) \ | |
378 | ||
379 | #define unzip_8x8b(dest_a, dest_b, source) \ | |
380 | foreach_element(8, \ | |
381 | { \ | |
382 | (dest_a).e[_i] = (source).e[_i]; \ | |
383 | (dest_b).e[_i] = ((source).e[_i]) >> 8; \ | |
384 | }) \ | |
385 | ||
386 | #define unzip_16x8b(dest_a, dest_b, source_a, source_b) \ | |
387 | foreach_element(8, \ | |
388 | { \ | |
389 | (dest_a).e[_i] = (source_a).e[_i]; \ | |
390 | (dest_b).e[_i] = (source_a).e[_i] >> 8; \ | |
391 | }); \ | |
392 | foreach_element(8, \ | |
393 | { \ | |
394 | (dest_a).e[_i + 8] = (source_b).e[_i]; \ | |
395 | (dest_b).e[_i + 8] = (source_b).e[_i] >> 8; \ | |
396 | }) \ | |
397 | ||
398 | #define tbl_16(dest, indexes, table) \ | |
399 | foreach_element(8, \ | |
400 | { \ | |
401 | u32 index = indexes.e[_i]; \ | |
402 | if(index < 16) \ | |
403 | (dest).e[_i] = table.e[index]; \ | |
404 | else \ | |
405 | (dest).e[_i] = 0; \ | |
406 | }) \ | |
407 | ||
408 | #define cmpeqz_8x16b(dest, source) \ | |
409 | foreach_element(8, (dest).e[_i] = ~(((source).e[_i] == 0) - 1)) \ | |
410 | ||
411 | #define cmpltz_8x16b(dest, source) \ | |
412 | foreach_element(8, (dest).e[_i] = ((s16)(source).e[_i] >> 15)) \ | |
413 | ||
414 | #define cmpltz_4x32b(dest, source) \ | |
415 | foreach_element(4, (dest).e[_i] = ((s32)(source).e[_i] >> 31)) \ | |
416 | ||
417 | #define cmpltz_2x32b(dest, source) \ | |
418 | foreach_element(2, (dest).e[_i] = ((s32)(source).e[_i] >> 31)) \ | |
419 | ||
420 | #define cmplte_4x16b(dest, source_a, source_b) \ | |
421 | foreach_element(4, (dest).e[_i] = ~((source_a.e[_i] <= source_b.e[_i]) - 1)) \ | |
422 | ||
423 | #define cmplt_4x16b(dest, source_a, source_b) \ | |
424 | foreach_element(4, (dest).e[_i] = ~((source_a.e[_i] < source_b.e[_i]) - 1)) \ | |
425 | ||
426 | #define cmpgt_4x16b(dest, source_a, source_b) \ | |
427 | foreach_element(4, (dest).e[_i] = ~((source_a.e[_i] > source_b.e[_i]) - 1)) \ | |
428 | ||
429 | #define tst_8x16b(dest, source_a, source_b) \ | |
430 | foreach_element(8, \ | |
431 | (dest).e[_i] = ~(((source_a.e[_i] & source_b.e[_i]) != 0) - 1)) \ | |
432 | ||
433 | #define andi_8x8b(dest, source_a, value) \ | |
434 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] & value) \ | |
435 | ||
436 | #define average_8x16b(dest, source_a, source_b) \ | |
437 | foreach_element(8, \ | |
438 | (dest).e[_i] = ((source_a).e[_i] + (source_b).e[_i]) >> 1) \ | |
439 | ||
440 | ||
441 | #define mul_8x8b(dest, source_a, source_b) \ | |
442 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \ | |
443 | ||
444 | #define mul_8x16b(dest, source_a, source_b) \ | |
445 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \ | |
446 | ||
447 | #define mul_2x32b(dest, source_a, source_b) \ | |
448 | foreach_element(2, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \ | |
449 | ||
450 | #define mul_4x32b(dest, source_a, source_b) \ | |
451 | foreach_element(4, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \ | |
452 | ||
453 | #define mul_long_8x8b(dest, source_a, source_b) \ | |
454 | foreach_element(8, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \ | |
455 | ||
456 | #define mul_long_4x16b(dest, source_a, source_b) \ | |
457 | foreach_element(4, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \ | |
458 | ||
459 | #define mul_long_2x32b(dest, source_a, source_b) \ | |
460 | foreach_element(2, \ | |
461 | (dest).e[_i] = (source_a).e[_i] * (s64)((source_b).e[_i])) \ | |
462 | ||
463 | #define mul_scalar_2x32b(dest, source, value) \ | |
464 | foreach_element(2, (dest).e[_i] = (source).e[_i] * value) \ | |
465 | ||
466 | #define mul_scalar_long_8x16b(dest, source, value) \ | |
467 | foreach_element(8, (dest).e[_i] = (source).e[_i] * value) \ | |
468 | ||
469 | #define mul_scalar_long_2x32b(dest, source, value) \ | |
470 | foreach_element(2, (dest).e[_i] = (source).e[_i] * value) \ | |
471 | ||
472 | #define mla_2x32b(dest, source_a, source_b) \ | |
473 | foreach_element(2, (dest).e[_i] += (source_a).e[_i] * (source_b).e[_i]) \ | |
474 | ||
475 | #define mla_4x32b(dest, source_a, source_b) \ | |
476 | foreach_element(4, (dest).e[_i] += (source_a).e[_i] * (source_b).e[_i]) \ | |
477 | ||
478 | #define mla_scalar_long_2x32b(dest, source, value) \ | |
479 | foreach_element(2, (dest).e[_i] += (source).e[_i] * value) \ | |
480 | ||
481 | #define mla_long_8x8b(dest, source_a, source_b) \ | |
482 | foreach_element(8, (dest).e[_i] += (source_a).e[_i] * (source_b).e[_i]) \ | |
483 | ||
484 | #define mla_long_2x32b(dest, source_a, source_b) \ | |
485 | foreach_element(2, (dest).e[_i] += (source_a).e[_i] * (s64)(source_b).e[_i]) \ | |
486 | ||
487 | #define mla_scalar_4x32b(dest, source, value) \ | |
488 | foreach_element(4, (dest).e[_i] += (source).e[_i] * value) \ | |
489 | ||
490 | #define mla_scalar_2x32b(dest, source, value) \ | |
491 | foreach_element(2, (dest).e[_i] += (source).e[_i] * value) \ | |
492 | ||
493 | #define mls_scalar_4x32b(dest, source, value) \ | |
494 | foreach_element(4, (dest).e[_i] -= (source).e[_i] * value) \ | |
495 | ||
496 | #define mls_scalar_2x32b(dest, source, value) \ | |
497 | foreach_element(2, (dest).e[_i] -= (source).e[_i] * value) \ | |
498 | ||
499 | #define mls_scalar_long_2x32b(dest, source, value) \ | |
500 | foreach_element(2, (dest).e[_i] -= (source).e[_i] * value) \ | |
501 | ||
502 | #define rev_2x32b(dest, source) \ | |
503 | { \ | |
504 | u32 tmp = source.e[1]; \ | |
505 | (dest).e[1] = source.e[0]; \ | |
506 | (dest).e[0] = tmp; \ | |
507 | } \ | |
508 | ||
509 | #define abs_4x32b(dest, source) \ | |
510 | foreach_element(4, (dest).e[_i] = abs(source.e[_i])) \ | |
511 | ||
512 | #define abs_2x32b(dest, source) \ | |
513 | foreach_element(2, (dest).e[_i] = abs(source.e[_i])) \ | |
514 | ||
515 | #define neg_2x32b(dest, source) \ | |
516 | foreach_element(2, (dest).e[_i] = -((source).e[_i])) \ | |
517 | ||
518 | ||
519 | #define shrq_narrow_8x16b(dest, source, shift) \ | |
520 | foreach_element(8, \ | |
521 | { \ | |
522 | u32 result = ((source).e[_i]) >> shift; \ | |
523 | if(result > 0xFF) \ | |
524 | result = 0xFF; \ | |
525 | (dest).e[_i] = result; \ | |
526 | }) \ | |
527 | ||
528 | #define min_8x16b(dest, source_a, source_b) \ | |
529 | foreach_element(8, \ | |
530 | { \ | |
531 | s32 result = (source_a).e[_i]; \ | |
532 | if((source_b).e[_i] < result) \ | |
533 | result = (source_b).e[_i]; \ | |
534 | (dest).e[_i] = result; \ | |
535 | }) \ | |
536 | ||
537 | #define min_8x8b(dest, source_a, source_b) \ | |
538 | foreach_element(8, \ | |
539 | { \ | |
540 | u32 result = (source_a).e[_i]; \ | |
541 | if((source_b).e[_i] < result) \ | |
542 | result = (source_b).e[_i]; \ | |
543 | (dest).e[_i] = result; \ | |
544 | }) \ | |
545 | ||
546 | #define min_16x8b(dest, source_a, source_b) \ | |
547 | foreach_element(16, \ | |
548 | { \ | |
549 | u32 result = (source_a).e[_i]; \ | |
550 | if((source_b).e[_i] < result) \ | |
551 | result = (source_b).e[_i]; \ | |
552 | (dest).e[_i] = result; \ | |
553 | }) \ | |
554 | ||
555 | #define max_8x16b(dest, source_a, source_b) \ | |
556 | foreach_element(8, \ | |
557 | { \ | |
558 | s32 result = (source_a).e[_i]; \ | |
559 | if((source_b).e[_i] > result) \ | |
560 | result = (source_b).e[_i]; \ | |
561 | (dest).e[_i] = result; \ | |
562 | }) \ | |
563 | ||
564 | #define bsl_8x16b(dest_mask, source_a, source_b) \ | |
565 | foreach_element(8, dest_mask.e[_i] = ((source_a).e[_i] & dest_mask.e[_i]) | \ | |
566 | ((source_b).e[_i] & ~(dest_mask.e[_i]))) \ | |
567 | ||
568 | #define bif_8x16b(dest, source, mask) \ | |
569 | foreach_element(8, dest.e[_i] = ((source).e[_i] & ~(mask.e[_i])) | \ | |
570 | ((dest).e[_i] & mask.e[_i])) \ | |
571 | ||
572 | #define bsl_4x32b(dest_mask, source_a, source_b) \ | |
573 | foreach_element(4, dest_mask.e[_i] = ((source_a).e[_i] & dest_mask.e[_i]) | \ | |
574 | ((source_b).e[_i] & ~(dest_mask.e[_i]))) \ | |
575 | ||
576 | #define bit_4x16b(dest, source, mask) \ | |
577 | foreach_element(4, dest.e[_i] = ((source).e[_i] & mask.e[_i]) | \ | |
578 | ((dest).e[_i] & ~(mask.e[_i]))) \ | |
579 | ||
580 | #endif |