drc: replace unused reg32 with new reg_sv_flags
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / vector_ops.h
CommitLineData
75e28f62
E
1/*
2 * Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of
7 * the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
14
15#ifndef VECTOR_OPS
16#define VECTOR_OPS
17
18#define build_vector_type_pair(sign, size, count, count_x2) \
19typedef struct \
20{ \
21 sign##size e[count]; \
22} vec_##count##x##size##sign; \
23 \
24typedef struct \
25{ \
26 union \
27 { \
28 sign##size e[count_x2]; \
29 struct \
30 { \
31 vec_##count##x##size##sign low; \
32 vec_##count##x##size##sign high; \
33 }; \
34 }; \
35} vec_##count_x2##x##size##sign \
36
37#define build_vector_types(sign) \
38 build_vector_type_pair(sign, 8, 8, 16); \
39 build_vector_type_pair(sign, 16, 4, 8); \
40 build_vector_type_pair(sign, 32, 2, 4); \
41 build_vector_type_pair(sign, 64, 1, 2) \
42
43build_vector_types(u);
44build_vector_types(s);
45
46
47#define foreach_element(iterations, operation) \
48{ \
49 u32 _i; \
50 for(_i = 0; _i < iterations; _i++) \
51 { \
52 operation; \
53 } \
54} \
55
56#define load_64b(dest, source) \
57 *((u64 *)(dest).e) = *((u64 *)(source)) \
58
59#define load_128b(dest, source) \
60 *((u64 *)(dest).e) = *((u64 *)(source)); \
61 *((u64 *)(dest).e + 1) = *(((u64 *)(source)) + 1) \
62
63#define load_8x16b(dest, source) \
64 foreach_element(8, (dest).e[_i] = ((u16 *)(source))[_i]) \
65
66#define store_64b(source, dest) \
67 *((u64 *)(dest)) = *((u64 *)(source).e) \
68
69#define store_128b(source, dest) \
70 *((u64 *)(dest)) = *((u64 *)(source).e); \
71 *(((u64 *)(dest)) + 1) = *((u64 *)(source).e + 1) \
72
73#define store_8x16b(source, dest) \
74 foreach_element(8, ((u16 *)dest)[_i] = (source).e[_i]) \
75
76
77#define split_8x16b(dest, source) \
78 foreach_element(8, \
79 { \
80 (dest).e[_i * 2] = (source).e[_i]; \
81 (dest).e[(_i * 2) + 1] = (source).e[_i] >> 8; \
82 }) \
83
84#define merge_16x8b(dest, source) \
85 foreach_element(8, \
86 (dest).e[_i] = (source).e[_i * 2] | ((source).e[(_i * 2) + 1] << 8)) \
87
88#define vector_cast(vec_to, source) \
89 (*((volatile vec_to *)(&(source)))) \
90
91#define vector_cast_high(vec_to, source) \
92 (*((volatile vec_to *)((u8 *)source.e + (sizeof(source.e) / 2)))) \
93
94
95#define dup_8x8b(dest, value) \
96 foreach_element(8, (dest).e[_i] = value) \
97
98#define dup_16x8b(dest, value) \
99 foreach_element(16, (dest).e[_i] = value) \
100
101#define dup_4x16b(dest, value) \
102 foreach_element(4, (dest).e[_i] = value) \
103
104#define dup_8x16b(dest, value) \
105 foreach_element(8, (dest).e[_i] = value) \
106
107#define dup_2x32b(dest, value) \
108 foreach_element(2, (dest).e[_i] = value) \
109
110#define dup_4x32b(dest, value) \
111 foreach_element(4, (dest).e[_i] = value) \
112
113#define shr_narrow_8x16b(dest, source, shift) \
114 foreach_element(8, (dest).e[_i] = (u16)(source).e[_i] >> (shift)) \
115
116#define shr_narrow_2x64b(dest, source, shift) \
117 foreach_element(2, (dest).e[_i] = (source).e[_i] >> (shift)) \
118
119#define shr_8x8b(dest, source, shift) \
120 foreach_element(8, (dest).e[_i] = (u8)(source).e[_i] >> (shift)) \
121
122#define shl_8x8b(dest, source, shift) \
123 foreach_element(8, (dest).e[_i] = (source).e[_i] << (shift)) \
124
125#define shr_8x16b(dest, source, shift) \
126 foreach_element(8, (dest).e[_i] = (u16)(source).e[_i] >> (shift)) \
127
128#define shr_2x32b(dest, source, shift) \
129 foreach_element(2, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \
130
131#define shr_4x16b(dest, source, shift) \
132 foreach_element(4, (dest).e[_i] = (source).e[_i] >> (shift)) \
133
134#define shl_4x16b(dest, source, shift) \
135 foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] << (shift)) \
136
137#define shr_4x32b(dest, source, shift) \
138 foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \
139
140#define shr_narrow_4x32b(dest, source, shift) \
141 foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \
142
143#define shl_8x16b(dest, source, shift) \
144 foreach_element(8, (dest).e[_i] = (source).e[_i] << (shift)) \
145
146#define shl_4x32b(dest, source, shift) \
147 foreach_element(4, (dest).e[_i] = (source).e[_i] << (shift)) \
148
149#define shl_2x32b(dest, source, shift) \
150 foreach_element(2, (dest).e[_i] = (source).e[_i] << (shift)) \
151
152#define shl_1x64b(dest, source, shift) \
153 ((dest).e[0] = (source).e[0] << (shift)) \
154
155#define shl_2x64b(dest, source, shift) \
156 foreach_element(2, (dest).e[_i] = (source).e[_i] << (shift)) \
157
158#define shl_variable_2x64b(dest, source_a, source_b) \
159 foreach_element(2, \
160 (dest).e[_i] = (source_a).e[_i] << ((source_b).e[_i] & 0xFF)) \
161
162#define shl_variable_8x16b(dest, source_a, source_b) \
163 foreach_element(8, \
164 (dest).e[_i] = (source_a).e[_i] << ((source_b).e[_i] & 0xFF)) \
165
166#define shl_variable_4x16b(dest, source_a, source_b) \
167 foreach_element(4, \
168 (dest).e[_i] = (source_a).e[_i] << ((source_b).e[_i] & 0xFF)) \
169
170#define shr_1x64b(dest, source, shift) \
171 ((dest).e[0] = (source).e[0] >> (shift)) \
172
173#define shl_long_8x8b(dest, source, shift) \
174 foreach_element(8, (dest).e[_i] = (source).e[_i] << (shift)) \
175
176#define shl_long_4x16b(dest, source, shift) \
177 foreach_element(4, (dest).e[_i] = (source).e[_i] << (shift)) \
178
179#define shrq_narrow_signed_8x16b(dest, source, shift) \
180 foreach_element(8, \
181 { \
182 s32 result = ((s16)(source).e[_i]) >> shift; \
183 if(result < 0) \
184 result = 0; \
185 if(result > 0xFF) \
186 result = 0xFF; \
187 (dest).e[_i] = result; \
188 }) \
189
190#define shl_reg_4x32b(dest, source_a, source_b) \
191 foreach_element(4, \
192 { \
193 s8 shift = (source_b).e[_i]; \
194 if(shift < 0) \
195 dest.e[_i] = (source_a).e[_i] >> (-shift); \
196 else \
197 dest.e[_i] = (source_a).e[_i] << shift; \
198 }) \
199
200#define shl_reg_2x32b(dest, source_a, source_b) \
201 foreach_element(2, \
202 { \
203 s8 shift = (source_b).e[_i]; \
204 if(shift < 0) \
205 dest.e[_i] = (source_a).e[_i] >> (-shift); \
206 else \
207 dest.e[_i] = (source_a).e[_i] << shift; \
208 }) \
209
210#define shl_reg_2x64b(dest, source_a, source_b) \
211 foreach_element(2, \
212 { \
213 s8 shift = (source_b).e[_i]; \
214 if(shift < 0) \
215 dest.e[_i] = (source_a).e[_i] >> (-shift); \
216 else \
217 dest.e[_i] = (source_a).e[_i] << shift; \
218 }) \
219
220
221#define sri_8x8b(dest, source, shift) \
222 foreach_element(8, (dest).e[_i] = ((dest).e[_i] & ~(0xFF >> (shift))) | \
223 ((u8)(source).e[_i] >> (shift))) \
224
225#define sli_8x8b(dest, source, shift) \
226 foreach_element(8, (dest).e[_i] = ((dest).e[_i] & ~(0xFF << (shift))) | \
227 ((source).e[_i] << (shift))) \
228
229
230
231#define mov_narrow_8x16b(dest, source) \
232 foreach_element(8, (dest).e[_i] = (source).e[_i]) \
233
234#define mov_narrow_4x32b(dest, source) \
235 foreach_element(4, (dest).e[_i] = (source).e[_i]) \
236
237#define mov_narrow_2x64b(dest, source) \
238 foreach_element(2, (dest).e[_i] = (source).e[_i]) \
239
240#define mov_wide_8x8b(dest, source) \
241 foreach_element(8, (dest).e[_i] = (source).e[_i]) \
242
243#define mov_wide_2x32b(dest, source) \
244 foreach_element(2, (dest).e[_i] = (source).e[_i]) \
245
246#define mvn_4x16b(dest, source) \
247 foreach_element(4, (dest).e[_i] = ~((source).e[_i])) \
248
249#define add_4x16b(dest, source_a, source_b) \
250 foreach_element(4, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
251
252#define add_4x32b(dest, source_a, source_b) \
253 foreach_element(4, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
254
255#define add_2x32b(dest, source_a, source_b) \
256 foreach_element(2, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
257
258#define add_8x16b(dest, source_a, source_b) \
259 foreach_element(8, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
260
261#define add_16x8b(dest, source_a, source_b) \
262 foreach_element(16, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
263
264#define add_8x8b(dest, source_a, source_b) \
265 foreach_element(8, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
266
267#define add_1x64b(dest, source_a, source_b) \
268 (dest).e[0] = (source_a).e[0] + (source_b).e[0] \
269
270#define add_2x64b(dest, source_a, source_b) \
271 foreach_element(2, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
272
273#define add_high_narrow_2x64b(dest, source_a, source_b) \
274 foreach_element(2, \
275 ((dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) >> 32) \
276
277#define add_high_narrow_4x32b(dest, source_a, source_b) \
278 foreach_element(4, \
279 ((dest).e[_i] = ((source_a).e[_i] + (source_b).e[_i]) >> 16)) \
280
281#define sub_4x16b(dest, source_a, source_b) \
282 foreach_element(4, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
283
284#define sub_4x32b(dest, source_a, source_b) \
285 foreach_element(4, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
286
287#define sub_2x32b(dest, source_a, source_b) \
288 foreach_element(2, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
289
290#define sub_wide_8x8b(dest, source_a, source_b) \
291 foreach_element(8, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
292
293#define add_wide_8x8b(dest, source_a, source_b) \
294 foreach_element(8, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
295
296#define add_wide_2x32b(dest, source_a, source_b) \
297 foreach_element(2, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
298
299#define addq_8x8b(dest, source_a, source_b) \
300 foreach_element(8, \
301 { \
302 u32 result = (source_a).e[_i] + (source_b).e[_i]; \
303 if(result > 0xFF) \
304 result = 0xFF; \
305 (dest).e[_i] = result; \
306 }) \
307
308#define subq_8x8b(dest, source_a, source_b) \
309 foreach_element(8, \
310 { \
311 u32 result = (source_a).e[_i] - (source_b).e[_i]; \
312 if(result > 0xFF) \
313 result = 0; \
314 (dest).e[_i] = result; \
315 }) \
316
317#define subs_long_8x8b(dest, source_a, source_b) \
318 subs_8x8b(dest, source_a, source_b) \
319
320#define subs_16x8b(dest, source_a, source_b) \
321 foreach_element(16, \
322 { \
323 u32 result = (source_a).e[_i] - (source_b).e[_i]; \
324 if(result > 0xFF) \
325 result = 0; \
326 (dest).e[_i] = result; \
327 }) \
328
329#define subs_8x16b(dest, source_a, source_b) \
330 foreach_element(8, \
331 { \
332 s32 result = (source_a).e[_i] - (source_b).e[_i]; \
333 if(result < 0) \
334 result = 0; \
335 \
336 (dest).e[_i] = result; \
337 }) \
338
339#define sub_8x16b(dest, source_a, source_b) \
340 foreach_element(8, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
341
342#define sub_16x8b(dest, source_a, source_b) \
343 foreach_element(16, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
344
345#define orn_8x16b(dest, source_a, source_b) \
346 foreach_element(8, (dest).e[_i] = (source_a).e[_i] | ~((source_b).e[_i])) \
347
348#define and_4x16b(dest, source_a, source_b) \
349 foreach_element(4, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
350
351#define and_8x16b(dest, source_a, source_b) \
352 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
353
354#define and_4x32b(dest, source_a, source_b) \
355 foreach_element(4, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
356
357#define and_16x8b(dest, source_a, source_b) \
358 foreach_element(16, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
359
360#define and_8x8b(dest, source_a, source_b) \
361 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
362
363#define and_2x32b(dest, source_a, source_b) \
364 foreach_element(2, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
365
366#define bic_8x8b(dest, source_a, source_b) \
367 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & ~((source_b).e[_i])) \
368
369#define bic_8x16b(dest, source_a, source_b) \
370 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & ~((source_b).e[_i])) \
371
372#define bic_immediate_4x16b(dest, value) \
373 foreach_element(4, (dest).e[_i] = (dest).e[_i] & ~(value)) \
374
375#define bic_immediate_8x16b(dest, value) \
376 foreach_element(8, (dest).e[_i] = (dest).e[_i] & ~(value)) \
377
378#define or_8x16b(dest, source_a, source_b) \
379 foreach_element(8, (dest).e[_i] = (source_a).e[_i] | (source_b).e[_i]) \
380
381#define or_immediate_8x16b(dest, source_a, value) \
382 foreach_element(8, (dest).e[_i] = (source_a).e[_i] | (value)) \
383
384#define eor_8x16b(dest, source_a, source_b) \
385 foreach_element(8, (dest).e[_i] = (source_a).e[_i] ^ (source_b).e[_i]) \
386
387#define eor_4x32b(dest, source_a, source_b) \
388 foreach_element(4, (dest).e[_i] = (source_a).e[_i] ^ (source_b).e[_i]) \
389
390#define eor_2x32b(dest, source_a, source_b) \
391 foreach_element(2, (dest).e[_i] = (source_a).e[_i] ^ (source_b).e[_i]) \
392
393#define zip_8x16b(dest, source_a, source_b) \
394 foreach_element(8, (dest).e[_i] = \
395 (u8)(source_a).e[_i] | ((u8)(source_b).e[_i] << 8)) \
396
05e2e0c6
E
397#define zip_4x32b(dest, source_a, source_b) \
398 foreach_element(4, (dest).e[_i] = \
fc6cef7d 399 (u16)(source_a).e[_i] | ((u16)(source_b).e[_i] << 16)) \
05e2e0c6 400
75e28f62
E
401#define zip_2x64b(dest, source_a, source_b) \
402 foreach_element(2, (dest).e[_i] = \
403 (u64)(source_a).e[_i] | ((u64)(source_b).e[_i] << 32)) \
404
405#define unzip_8x8b(dest_a, dest_b, source) \
406 foreach_element(8, \
407 { \
408 (dest_a).e[_i] = (source).e[_i]; \
409 (dest_b).e[_i] = ((source).e[_i]) >> 8; \
410 }) \
411
412#define unzip_16x8b(dest_a, dest_b, source_a, source_b) \
413 foreach_element(8, \
414 { \
415 (dest_a).e[_i] = (source_a).e[_i]; \
416 (dest_b).e[_i] = (source_a).e[_i] >> 8; \
417 }); \
418 foreach_element(8, \
419 { \
420 (dest_a).e[_i + 8] = (source_b).e[_i]; \
421 (dest_b).e[_i + 8] = (source_b).e[_i] >> 8; \
422 }) \
423
424#define tbl_16(dest, indexes, table) \
425 foreach_element(8, \
426 { \
427 u32 index = indexes.e[_i]; \
428 if(index < 16) \
429 (dest).e[_i] = table.e[index]; \
430 else \
431 (dest).e[_i] = 0; \
432 }) \
433
434#define cmpeqz_8x16b(dest, source) \
435 foreach_element(8, (dest).e[_i] = ~(((source).e[_i] == 0) - 1)) \
436
437#define cmpltz_8x16b(dest, source) \
438 foreach_element(8, (dest).e[_i] = ((s16)(source).e[_i] >> 15)) \
439
440#define cmpltz_4x32b(dest, source) \
441 foreach_element(4, (dest).e[_i] = ((s32)(source).e[_i] >> 31)) \
442
443#define cmpltz_2x32b(dest, source) \
444 foreach_element(2, (dest).e[_i] = ((s32)(source).e[_i] >> 31)) \
445
446#define cmplte_4x16b(dest, source_a, source_b) \
447 foreach_element(4, (dest).e[_i] = ~((source_a.e[_i] <= source_b.e[_i]) - 1)) \
448
449#define cmplt_4x16b(dest, source_a, source_b) \
450 foreach_element(4, (dest).e[_i] = ~((source_a.e[_i] < source_b.e[_i]) - 1)) \
451
452#define cmpgt_4x16b(dest, source_a, source_b) \
453 foreach_element(4, (dest).e[_i] = ~((source_a.e[_i] > source_b.e[_i]) - 1)) \
454
455#define tst_8x16b(dest, source_a, source_b) \
456 foreach_element(8, \
457 (dest).e[_i] = ~(((source_a.e[_i] & source_b.e[_i]) != 0) - 1)) \
458
459#define andi_8x8b(dest, source_a, value) \
460 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & value) \
461
462#define average_8x16b(dest, source_a, source_b) \
463 foreach_element(8, \
464 (dest).e[_i] = ((source_a).e[_i] + (source_b).e[_i]) >> 1) \
465
466
467#define mul_8x8b(dest, source_a, source_b) \
468 foreach_element(8, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
469
470#define mul_8x16b(dest, source_a, source_b) \
471 foreach_element(8, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
472
473#define mul_2x32b(dest, source_a, source_b) \
474 foreach_element(2, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
475
476#define mul_4x32b(dest, source_a, source_b) \
477 foreach_element(4, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
478
479#define mul_long_8x8b(dest, source_a, source_b) \
480 foreach_element(8, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
481
482#define mul_long_4x16b(dest, source_a, source_b) \
483 foreach_element(4, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
484
485#define mul_long_2x32b(dest, source_a, source_b) \
486 foreach_element(2, \
487 (dest).e[_i] = (source_a).e[_i] * (s64)((source_b).e[_i])) \
488
489#define mul_scalar_2x32b(dest, source, value) \
490 foreach_element(2, (dest).e[_i] = (source).e[_i] * value) \
491
492#define mul_scalar_long_8x16b(dest, source, value) \
493 foreach_element(8, (dest).e[_i] = (source).e[_i] * value) \
494
495#define mul_scalar_long_2x32b(dest, source, value) \
496 foreach_element(2, (dest).e[_i] = (source).e[_i] * value) \
497
498#define mla_2x32b(dest, source_a, source_b) \
499 foreach_element(2, (dest).e[_i] += (source_a).e[_i] * (source_b).e[_i]) \
500
501#define mla_4x32b(dest, source_a, source_b) \
502 foreach_element(4, (dest).e[_i] += (source_a).e[_i] * (source_b).e[_i]) \
503
504#define mla_scalar_long_2x32b(dest, source, value) \
505 foreach_element(2, (dest).e[_i] += (source).e[_i] * value) \
506
507#define mla_long_8x8b(dest, source_a, source_b) \
508 foreach_element(8, (dest).e[_i] += (source_a).e[_i] * (source_b).e[_i]) \
509
510#define mla_long_2x32b(dest, source_a, source_b) \
511 foreach_element(2, (dest).e[_i] += (source_a).e[_i] * (s64)(source_b).e[_i]) \
512
513#define mla_scalar_4x32b(dest, source, value) \
514 foreach_element(4, (dest).e[_i] += (source).e[_i] * value) \
515
516#define mla_scalar_2x32b(dest, source, value) \
517 foreach_element(2, (dest).e[_i] += (source).e[_i] * value) \
518
519#define mls_scalar_4x32b(dest, source, value) \
520 foreach_element(4, (dest).e[_i] -= (source).e[_i] * value) \
521
522#define mls_scalar_2x32b(dest, source, value) \
523 foreach_element(2, (dest).e[_i] -= (source).e[_i] * value) \
524
525#define mls_scalar_long_2x32b(dest, source, value) \
526 foreach_element(2, (dest).e[_i] -= (source).e[_i] * value) \
527
528#define rev_2x32b(dest, source) \
529{ \
530 u32 tmp = source.e[1]; \
531 (dest).e[1] = source.e[0]; \
532 (dest).e[0] = tmp; \
533} \
534
535#define abs_4x32b(dest, source) \
536 foreach_element(4, (dest).e[_i] = abs(source.e[_i])) \
537
538#define abs_2x32b(dest, source) \
539 foreach_element(2, (dest).e[_i] = abs(source.e[_i])) \
540
541#define neg_2x32b(dest, source) \
542 foreach_element(2, (dest).e[_i] = -((source).e[_i])) \
543
544
545#define shrq_narrow_8x16b(dest, source, shift) \
546 foreach_element(8, \
547 { \
548 u32 result = ((source).e[_i]) >> shift; \
549 if(result > 0xFF) \
550 result = 0xFF; \
551 (dest).e[_i] = result; \
552 }) \
553
554#define min_8x16b(dest, source_a, source_b) \
555 foreach_element(8, \
556 { \
557 s32 result = (source_a).e[_i]; \
558 if((source_b).e[_i] < result) \
559 result = (source_b).e[_i]; \
560 (dest).e[_i] = result; \
561 }) \
562
563#define min_8x8b(dest, source_a, source_b) \
564 foreach_element(8, \
565 { \
566 u32 result = (source_a).e[_i]; \
567 if((source_b).e[_i] < result) \
568 result = (source_b).e[_i]; \
569 (dest).e[_i] = result; \
570 }) \
571
572#define min_16x8b(dest, source_a, source_b) \
573 foreach_element(16, \
574 { \
575 u32 result = (source_a).e[_i]; \
576 if((source_b).e[_i] < result) \
577 result = (source_b).e[_i]; \
578 (dest).e[_i] = result; \
579 }) \
580
581#define max_8x16b(dest, source_a, source_b) \
582 foreach_element(8, \
583 { \
584 s32 result = (source_a).e[_i]; \
585 if((source_b).e[_i] > result) \
586 result = (source_b).e[_i]; \
587 (dest).e[_i] = result; \
588 }) \
589
590#define bsl_8x16b(dest_mask, source_a, source_b) \
591 foreach_element(8, dest_mask.e[_i] = ((source_a).e[_i] & dest_mask.e[_i]) | \
592 ((source_b).e[_i] & ~(dest_mask.e[_i]))) \
593
594#define bif_8x16b(dest, source, mask) \
595 foreach_element(8, dest.e[_i] = ((source).e[_i] & ~(mask.e[_i])) | \
596 ((dest).e[_i] & mask.e[_i])) \
597
598#define bsl_4x32b(dest_mask, source_a, source_b) \
599 foreach_element(4, dest_mask.e[_i] = ((source_a).e[_i] & dest_mask.e[_i]) | \
600 ((source_b).e[_i] & ~(dest_mask.e[_i]))) \
601
602#define bit_4x16b(dest, source, mask) \
603 foreach_element(4, dest.e[_i] = ((source).e[_i] & mask.e[_i]) | \
604 ((dest).e[_i] & ~(mask.e[_i]))) \
605
606#endif