cdrom: change pause timing again
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / vector_ops.h
CommitLineData
75e28f62
E
1/*
2 * Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of
7 * the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
14
15#ifndef VECTOR_OPS
16#define VECTOR_OPS
17
a4021361 18#include "vector_types.h"
75e28f62
E
19
20
21#define foreach_element(iterations, operation) \
22{ \
23 u32 _i; \
24 for(_i = 0; _i < iterations; _i++) \
25 { \
26 operation; \
27 } \
28} \
29
30#define load_64b(dest, source) \
31 *((u64 *)(dest).e) = *((u64 *)(source)) \
32
33#define load_128b(dest, source) \
34 *((u64 *)(dest).e) = *((u64 *)(source)); \
35 *((u64 *)(dest).e + 1) = *(((u64 *)(source)) + 1) \
36
37#define load_8x16b(dest, source) \
38 foreach_element(8, (dest).e[_i] = ((u16 *)(source))[_i]) \
39
40#define store_64b(source, dest) \
41 *((u64 *)(dest)) = *((u64 *)(source).e) \
42
43#define store_128b(source, dest) \
44 *((u64 *)(dest)) = *((u64 *)(source).e); \
45 *(((u64 *)(dest)) + 1) = *((u64 *)(source).e + 1) \
46
47#define store_8x16b(source, dest) \
48 foreach_element(8, ((u16 *)dest)[_i] = (source).e[_i]) \
49
50
51#define split_8x16b(dest, source) \
52 foreach_element(8, \
53 { \
54 (dest).e[_i * 2] = (source).e[_i]; \
55 (dest).e[(_i * 2) + 1] = (source).e[_i] >> 8; \
56 }) \
57
58#define merge_16x8b(dest, source) \
59 foreach_element(8, \
60 (dest).e[_i] = (source).e[_i * 2] | ((source).e[(_i * 2) + 1] << 8)) \
61
62#define vector_cast(vec_to, source) \
63 (*((volatile vec_to *)(&(source)))) \
64
65#define vector_cast_high(vec_to, source) \
66 (*((volatile vec_to *)((u8 *)source.e + (sizeof(source.e) / 2)))) \
67
68
69#define dup_8x8b(dest, value) \
70 foreach_element(8, (dest).e[_i] = value) \
71
72#define dup_16x8b(dest, value) \
73 foreach_element(16, (dest).e[_i] = value) \
74
75#define dup_4x16b(dest, value) \
76 foreach_element(4, (dest).e[_i] = value) \
77
78#define dup_8x16b(dest, value) \
79 foreach_element(8, (dest).e[_i] = value) \
80
81#define dup_2x32b(dest, value) \
82 foreach_element(2, (dest).e[_i] = value) \
83
84#define dup_4x32b(dest, value) \
85 foreach_element(4, (dest).e[_i] = value) \
86
87#define shr_narrow_8x16b(dest, source, shift) \
88 foreach_element(8, (dest).e[_i] = (u16)(source).e[_i] >> (shift)) \
89
90#define shr_narrow_2x64b(dest, source, shift) \
91 foreach_element(2, (dest).e[_i] = (source).e[_i] >> (shift)) \
92
93#define shr_8x8b(dest, source, shift) \
94 foreach_element(8, (dest).e[_i] = (u8)(source).e[_i] >> (shift)) \
95
96#define shl_8x8b(dest, source, shift) \
97 foreach_element(8, (dest).e[_i] = (source).e[_i] << (shift)) \
98
99#define shr_8x16b(dest, source, shift) \
100 foreach_element(8, (dest).e[_i] = (u16)(source).e[_i] >> (shift)) \
101
102#define shr_2x32b(dest, source, shift) \
103 foreach_element(2, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \
104
105#define shr_4x16b(dest, source, shift) \
aafce833 106 foreach_element(4, (dest).e[_i] = (u16)(source).e[_i] >> (shift)) \
75e28f62
E
107
108#define shl_4x16b(dest, source, shift) \
109 foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] << (shift)) \
110
111#define shr_4x32b(dest, source, shift) \
112 foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \
113
114#define shr_narrow_4x32b(dest, source, shift) \
115 foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \
116
117#define shl_8x16b(dest, source, shift) \
118 foreach_element(8, (dest).e[_i] = (source).e[_i] << (shift)) \
119
120#define shl_4x32b(dest, source, shift) \
121 foreach_element(4, (dest).e[_i] = (source).e[_i] << (shift)) \
122
123#define shl_2x32b(dest, source, shift) \
124 foreach_element(2, (dest).e[_i] = (source).e[_i] << (shift)) \
125
126#define shl_1x64b(dest, source, shift) \
127 ((dest).e[0] = (source).e[0] << (shift)) \
128
129#define shl_2x64b(dest, source, shift) \
130 foreach_element(2, (dest).e[_i] = (source).e[_i] << (shift)) \
131
132#define shl_variable_2x64b(dest, source_a, source_b) \
133 foreach_element(2, \
134 (dest).e[_i] = (source_a).e[_i] << ((source_b).e[_i] & 0xFF)) \
135
136#define shl_variable_8x16b(dest, source_a, source_b) \
137 foreach_element(8, \
138 (dest).e[_i] = (source_a).e[_i] << ((source_b).e[_i] & 0xFF)) \
139
140#define shl_variable_4x16b(dest, source_a, source_b) \
141 foreach_element(4, \
142 (dest).e[_i] = (source_a).e[_i] << ((source_b).e[_i] & 0xFF)) \
143
144#define shr_1x64b(dest, source, shift) \
145 ((dest).e[0] = (source).e[0] >> (shift)) \
146
147#define shl_long_8x8b(dest, source, shift) \
148 foreach_element(8, (dest).e[_i] = (source).e[_i] << (shift)) \
149
150#define shl_long_4x16b(dest, source, shift) \
151 foreach_element(4, (dest).e[_i] = (source).e[_i] << (shift)) \
152
153#define shrq_narrow_signed_8x16b(dest, source, shift) \
154 foreach_element(8, \
155 { \
156 s32 result = ((s16)(source).e[_i]) >> shift; \
157 if(result < 0) \
158 result = 0; \
159 if(result > 0xFF) \
160 result = 0xFF; \
161 (dest).e[_i] = result; \
162 }) \
163
164#define shl_reg_4x32b(dest, source_a, source_b) \
165 foreach_element(4, \
166 { \
167 s8 shift = (source_b).e[_i]; \
168 if(shift < 0) \
169 dest.e[_i] = (source_a).e[_i] >> (-shift); \
170 else \
171 dest.e[_i] = (source_a).e[_i] << shift; \
172 }) \
173
174#define shl_reg_2x32b(dest, source_a, source_b) \
175 foreach_element(2, \
176 { \
177 s8 shift = (source_b).e[_i]; \
178 if(shift < 0) \
179 dest.e[_i] = (source_a).e[_i] >> (-shift); \
180 else \
181 dest.e[_i] = (source_a).e[_i] << shift; \
182 }) \
183
184#define shl_reg_2x64b(dest, source_a, source_b) \
185 foreach_element(2, \
186 { \
187 s8 shift = (source_b).e[_i]; \
188 if(shift < 0) \
189 dest.e[_i] = (source_a).e[_i] >> (-shift); \
190 else \
191 dest.e[_i] = (source_a).e[_i] << shift; \
192 }) \
193
194
195#define sri_8x8b(dest, source, shift) \
196 foreach_element(8, (dest).e[_i] = ((dest).e[_i] & ~(0xFF >> (shift))) | \
197 ((u8)(source).e[_i] >> (shift))) \
198
199#define sli_8x8b(dest, source, shift) \
200 foreach_element(8, (dest).e[_i] = ((dest).e[_i] & ~(0xFF << (shift))) | \
201 ((source).e[_i] << (shift))) \
202
203
204
205#define mov_narrow_8x16b(dest, source) \
206 foreach_element(8, (dest).e[_i] = (source).e[_i]) \
207
208#define mov_narrow_4x32b(dest, source) \
209 foreach_element(4, (dest).e[_i] = (source).e[_i]) \
210
211#define mov_narrow_2x64b(dest, source) \
212 foreach_element(2, (dest).e[_i] = (source).e[_i]) \
213
214#define mov_wide_8x8b(dest, source) \
215 foreach_element(8, (dest).e[_i] = (source).e[_i]) \
216
217#define mov_wide_2x32b(dest, source) \
218 foreach_element(2, (dest).e[_i] = (source).e[_i]) \
219
220#define mvn_4x16b(dest, source) \
221 foreach_element(4, (dest).e[_i] = ~((source).e[_i])) \
222
223#define add_4x16b(dest, source_a, source_b) \
224 foreach_element(4, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
225
226#define add_4x32b(dest, source_a, source_b) \
227 foreach_element(4, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
228
229#define add_2x32b(dest, source_a, source_b) \
230 foreach_element(2, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
231
232#define add_8x16b(dest, source_a, source_b) \
233 foreach_element(8, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
234
235#define add_16x8b(dest, source_a, source_b) \
236 foreach_element(16, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
237
238#define add_8x8b(dest, source_a, source_b) \
239 foreach_element(8, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
240
241#define add_1x64b(dest, source_a, source_b) \
242 (dest).e[0] = (source_a).e[0] + (source_b).e[0] \
243
244#define add_2x64b(dest, source_a, source_b) \
245 foreach_element(2, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
246
247#define add_high_narrow_2x64b(dest, source_a, source_b) \
248 foreach_element(2, \
249 ((dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) >> 32) \
250
251#define add_high_narrow_4x32b(dest, source_a, source_b) \
252 foreach_element(4, \
253 ((dest).e[_i] = ((source_a).e[_i] + (source_b).e[_i]) >> 16)) \
254
255#define sub_4x16b(dest, source_a, source_b) \
256 foreach_element(4, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
257
258#define sub_4x32b(dest, source_a, source_b) \
259 foreach_element(4, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
260
261#define sub_2x32b(dest, source_a, source_b) \
262 foreach_element(2, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
263
264#define sub_wide_8x8b(dest, source_a, source_b) \
265 foreach_element(8, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
266
267#define add_wide_8x8b(dest, source_a, source_b) \
268 foreach_element(8, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
269
270#define add_wide_2x32b(dest, source_a, source_b) \
271 foreach_element(2, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
272
273#define addq_8x8b(dest, source_a, source_b) \
274 foreach_element(8, \
275 { \
276 u32 result = (source_a).e[_i] + (source_b).e[_i]; \
277 if(result > 0xFF) \
278 result = 0xFF; \
279 (dest).e[_i] = result; \
280 }) \
281
282#define subq_8x8b(dest, source_a, source_b) \
283 foreach_element(8, \
284 { \
285 u32 result = (source_a).e[_i] - (source_b).e[_i]; \
286 if(result > 0xFF) \
287 result = 0; \
288 (dest).e[_i] = result; \
289 }) \
290
291#define subs_long_8x8b(dest, source_a, source_b) \
292 subs_8x8b(dest, source_a, source_b) \
293
294#define subs_16x8b(dest, source_a, source_b) \
295 foreach_element(16, \
296 { \
297 u32 result = (source_a).e[_i] - (source_b).e[_i]; \
298 if(result > 0xFF) \
299 result = 0; \
300 (dest).e[_i] = result; \
301 }) \
302
303#define subs_8x16b(dest, source_a, source_b) \
304 foreach_element(8, \
305 { \
306 s32 result = (source_a).e[_i] - (source_b).e[_i]; \
307 if(result < 0) \
308 result = 0; \
309 \
310 (dest).e[_i] = result; \
311 }) \
312
313#define sub_8x16b(dest, source_a, source_b) \
314 foreach_element(8, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
315
316#define sub_16x8b(dest, source_a, source_b) \
317 foreach_element(16, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
318
319#define orn_8x16b(dest, source_a, source_b) \
320 foreach_element(8, (dest).e[_i] = (source_a).e[_i] | ~((source_b).e[_i])) \
321
322#define and_4x16b(dest, source_a, source_b) \
323 foreach_element(4, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
324
325#define and_8x16b(dest, source_a, source_b) \
326 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
327
328#define and_4x32b(dest, source_a, source_b) \
329 foreach_element(4, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
330
331#define and_16x8b(dest, source_a, source_b) \
332 foreach_element(16, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
333
334#define and_8x8b(dest, source_a, source_b) \
335 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
336
337#define and_2x32b(dest, source_a, source_b) \
338 foreach_element(2, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
339
340#define bic_8x8b(dest, source_a, source_b) \
341 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & ~((source_b).e[_i])) \
342
343#define bic_8x16b(dest, source_a, source_b) \
344 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & ~((source_b).e[_i])) \
345
346#define bic_immediate_4x16b(dest, value) \
347 foreach_element(4, (dest).e[_i] = (dest).e[_i] & ~(value)) \
348
349#define bic_immediate_8x16b(dest, value) \
350 foreach_element(8, (dest).e[_i] = (dest).e[_i] & ~(value)) \
351
352#define or_8x16b(dest, source_a, source_b) \
353 foreach_element(8, (dest).e[_i] = (source_a).e[_i] | (source_b).e[_i]) \
354
355#define or_immediate_8x16b(dest, source_a, value) \
356 foreach_element(8, (dest).e[_i] = (source_a).e[_i] | (value)) \
357
358#define eor_8x16b(dest, source_a, source_b) \
359 foreach_element(8, (dest).e[_i] = (source_a).e[_i] ^ (source_b).e[_i]) \
360
361#define eor_4x32b(dest, source_a, source_b) \
362 foreach_element(4, (dest).e[_i] = (source_a).e[_i] ^ (source_b).e[_i]) \
363
364#define eor_2x32b(dest, source_a, source_b) \
365 foreach_element(2, (dest).e[_i] = (source_a).e[_i] ^ (source_b).e[_i]) \
366
367#define zip_8x16b(dest, source_a, source_b) \
368 foreach_element(8, (dest).e[_i] = \
369 (u8)(source_a).e[_i] | ((u8)(source_b).e[_i] << 8)) \
370
05e2e0c6
E
371#define zip_4x32b(dest, source_a, source_b) \
372 foreach_element(4, (dest).e[_i] = \
fc6cef7d 373 (u16)(source_a).e[_i] | ((u16)(source_b).e[_i] << 16)) \
05e2e0c6 374
75e28f62
E
375#define zip_2x64b(dest, source_a, source_b) \
376 foreach_element(2, (dest).e[_i] = \
377 (u64)(source_a).e[_i] | ((u64)(source_b).e[_i] << 32)) \
378
379#define unzip_8x8b(dest_a, dest_b, source) \
380 foreach_element(8, \
381 { \
382 (dest_a).e[_i] = (source).e[_i]; \
383 (dest_b).e[_i] = ((source).e[_i]) >> 8; \
384 }) \
385
386#define unzip_16x8b(dest_a, dest_b, source_a, source_b) \
387 foreach_element(8, \
388 { \
389 (dest_a).e[_i] = (source_a).e[_i]; \
390 (dest_b).e[_i] = (source_a).e[_i] >> 8; \
391 }); \
392 foreach_element(8, \
393 { \
394 (dest_a).e[_i + 8] = (source_b).e[_i]; \
395 (dest_b).e[_i + 8] = (source_b).e[_i] >> 8; \
396 }) \
397
398#define tbl_16(dest, indexes, table) \
399 foreach_element(8, \
400 { \
401 u32 index = indexes.e[_i]; \
402 if(index < 16) \
403 (dest).e[_i] = table.e[index]; \
404 else \
405 (dest).e[_i] = 0; \
406 }) \
407
408#define cmpeqz_8x16b(dest, source) \
409 foreach_element(8, (dest).e[_i] = ~(((source).e[_i] == 0) - 1)) \
410
411#define cmpltz_8x16b(dest, source) \
412 foreach_element(8, (dest).e[_i] = ((s16)(source).e[_i] >> 15)) \
413
414#define cmpltz_4x32b(dest, source) \
415 foreach_element(4, (dest).e[_i] = ((s32)(source).e[_i] >> 31)) \
416
417#define cmpltz_2x32b(dest, source) \
418 foreach_element(2, (dest).e[_i] = ((s32)(source).e[_i] >> 31)) \
419
420#define cmplte_4x16b(dest, source_a, source_b) \
421 foreach_element(4, (dest).e[_i] = ~((source_a.e[_i] <= source_b.e[_i]) - 1)) \
422
423#define cmplt_4x16b(dest, source_a, source_b) \
424 foreach_element(4, (dest).e[_i] = ~((source_a.e[_i] < source_b.e[_i]) - 1)) \
425
426#define cmpgt_4x16b(dest, source_a, source_b) \
427 foreach_element(4, (dest).e[_i] = ~((source_a.e[_i] > source_b.e[_i]) - 1)) \
428
429#define tst_8x16b(dest, source_a, source_b) \
430 foreach_element(8, \
431 (dest).e[_i] = ~(((source_a.e[_i] & source_b.e[_i]) != 0) - 1)) \
432
433#define andi_8x8b(dest, source_a, value) \
434 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & value) \
435
436#define average_8x16b(dest, source_a, source_b) \
437 foreach_element(8, \
438 (dest).e[_i] = ((source_a).e[_i] + (source_b).e[_i]) >> 1) \
439
440
441#define mul_8x8b(dest, source_a, source_b) \
442 foreach_element(8, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
443
444#define mul_8x16b(dest, source_a, source_b) \
445 foreach_element(8, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
446
447#define mul_2x32b(dest, source_a, source_b) \
448 foreach_element(2, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
449
450#define mul_4x32b(dest, source_a, source_b) \
451 foreach_element(4, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
452
453#define mul_long_8x8b(dest, source_a, source_b) \
454 foreach_element(8, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
455
456#define mul_long_4x16b(dest, source_a, source_b) \
457 foreach_element(4, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
458
459#define mul_long_2x32b(dest, source_a, source_b) \
460 foreach_element(2, \
461 (dest).e[_i] = (source_a).e[_i] * (s64)((source_b).e[_i])) \
462
463#define mul_scalar_2x32b(dest, source, value) \
464 foreach_element(2, (dest).e[_i] = (source).e[_i] * value) \
465
466#define mul_scalar_long_8x16b(dest, source, value) \
467 foreach_element(8, (dest).e[_i] = (source).e[_i] * value) \
468
469#define mul_scalar_long_2x32b(dest, source, value) \
470 foreach_element(2, (dest).e[_i] = (source).e[_i] * value) \
471
472#define mla_2x32b(dest, source_a, source_b) \
473 foreach_element(2, (dest).e[_i] += (source_a).e[_i] * (source_b).e[_i]) \
474
475#define mla_4x32b(dest, source_a, source_b) \
476 foreach_element(4, (dest).e[_i] += (source_a).e[_i] * (source_b).e[_i]) \
477
478#define mla_scalar_long_2x32b(dest, source, value) \
479 foreach_element(2, (dest).e[_i] += (source).e[_i] * value) \
480
481#define mla_long_8x8b(dest, source_a, source_b) \
482 foreach_element(8, (dest).e[_i] += (source_a).e[_i] * (source_b).e[_i]) \
483
484#define mla_long_2x32b(dest, source_a, source_b) \
485 foreach_element(2, (dest).e[_i] += (source_a).e[_i] * (s64)(source_b).e[_i]) \
486
487#define mla_scalar_4x32b(dest, source, value) \
488 foreach_element(4, (dest).e[_i] += (source).e[_i] * value) \
489
490#define mla_scalar_2x32b(dest, source, value) \
491 foreach_element(2, (dest).e[_i] += (source).e[_i] * value) \
492
493#define mls_scalar_4x32b(dest, source, value) \
494 foreach_element(4, (dest).e[_i] -= (source).e[_i] * value) \
495
496#define mls_scalar_2x32b(dest, source, value) \
497 foreach_element(2, (dest).e[_i] -= (source).e[_i] * value) \
498
499#define mls_scalar_long_2x32b(dest, source, value) \
500 foreach_element(2, (dest).e[_i] -= (source).e[_i] * value) \
501
502#define rev_2x32b(dest, source) \
503{ \
504 u32 tmp = source.e[1]; \
505 (dest).e[1] = source.e[0]; \
506 (dest).e[0] = tmp; \
507} \
508
509#define abs_4x32b(dest, source) \
510 foreach_element(4, (dest).e[_i] = abs(source.e[_i])) \
511
512#define abs_2x32b(dest, source) \
513 foreach_element(2, (dest).e[_i] = abs(source.e[_i])) \
514
515#define neg_2x32b(dest, source) \
516 foreach_element(2, (dest).e[_i] = -((source).e[_i])) \
517
518
519#define shrq_narrow_8x16b(dest, source, shift) \
520 foreach_element(8, \
521 { \
522 u32 result = ((source).e[_i]) >> shift; \
523 if(result > 0xFF) \
524 result = 0xFF; \
525 (dest).e[_i] = result; \
526 }) \
527
2d658c89 528#define min_4x16b(dest, source_a, source_b) \
529 foreach_element(4, \
530 { \
531 s32 result = (source_a).e[_i]; \
532 if((source_b).e[_i] < result) \
533 result = (source_b).e[_i]; \
534 (dest).e[_i] = result; \
535 }) \
536
75e28f62
E
537#define min_8x16b(dest, source_a, source_b) \
538 foreach_element(8, \
539 { \
540 s32 result = (source_a).e[_i]; \
541 if((source_b).e[_i] < result) \
542 result = (source_b).e[_i]; \
543 (dest).e[_i] = result; \
544 }) \
545
546#define min_8x8b(dest, source_a, source_b) \
547 foreach_element(8, \
548 { \
549 u32 result = (source_a).e[_i]; \
550 if((source_b).e[_i] < result) \
551 result = (source_b).e[_i]; \
552 (dest).e[_i] = result; \
553 }) \
554
555#define min_16x8b(dest, source_a, source_b) \
556 foreach_element(16, \
557 { \
558 u32 result = (source_a).e[_i]; \
559 if((source_b).e[_i] < result) \
560 result = (source_b).e[_i]; \
561 (dest).e[_i] = result; \
562 }) \
563
564#define max_8x16b(dest, source_a, source_b) \
565 foreach_element(8, \
566 { \
567 s32 result = (source_a).e[_i]; \
568 if((source_b).e[_i] > result) \
569 result = (source_b).e[_i]; \
570 (dest).e[_i] = result; \
571 }) \
572
573#define bsl_8x16b(dest_mask, source_a, source_b) \
574 foreach_element(8, dest_mask.e[_i] = ((source_a).e[_i] & dest_mask.e[_i]) | \
575 ((source_b).e[_i] & ~(dest_mask.e[_i]))) \
576
577#define bif_8x16b(dest, source, mask) \
578 foreach_element(8, dest.e[_i] = ((source).e[_i] & ~(mask.e[_i])) | \
579 ((dest).e[_i] & mask.e[_i])) \
580
581#define bsl_4x32b(dest_mask, source_a, source_b) \
582 foreach_element(4, dest_mask.e[_i] = ((source_a).e[_i] & dest_mask.e[_i]) | \
583 ((source_b).e[_i] & ~(dest_mask.e[_i]))) \
584
585#define bit_4x16b(dest, source, mask) \
586 foreach_element(4, dest.e[_i] = ((source).e[_i] & mask.e[_i]) | \
587 ((dest).e[_i] & ~(mask.e[_i]))) \
588
589#endif