psx_gpu: fix line cmd length and move w/h masking
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / vector_ops.h
CommitLineData
75e28f62
E
1/*
2 * Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of
7 * the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
14
15#ifndef VECTOR_OPS
16#define VECTOR_OPS
17
18#define build_vector_type_pair(sign, size, count, count_x2) \
19typedef struct \
20{ \
21 sign##size e[count]; \
22} vec_##count##x##size##sign; \
23 \
24typedef struct \
25{ \
26 union \
27 { \
28 sign##size e[count_x2]; \
29 struct \
30 { \
31 vec_##count##x##size##sign low; \
32 vec_##count##x##size##sign high; \
33 }; \
34 }; \
35} vec_##count_x2##x##size##sign \
36
37#define build_vector_types(sign) \
38 build_vector_type_pair(sign, 8, 8, 16); \
39 build_vector_type_pair(sign, 16, 4, 8); \
40 build_vector_type_pair(sign, 32, 2, 4); \
41 build_vector_type_pair(sign, 64, 1, 2) \
42
43build_vector_types(u);
44build_vector_types(s);
45
46
47#define foreach_element(iterations, operation) \
48{ \
49 u32 _i; \
50 for(_i = 0; _i < iterations; _i++) \
51 { \
52 operation; \
53 } \
54} \
55
56#define load_64b(dest, source) \
57 *((u64 *)(dest).e) = *((u64 *)(source)) \
58
59#define load_128b(dest, source) \
60 *((u64 *)(dest).e) = *((u64 *)(source)); \
61 *((u64 *)(dest).e + 1) = *(((u64 *)(source)) + 1) \
62
63#define load_8x16b(dest, source) \
64 foreach_element(8, (dest).e[_i] = ((u16 *)(source))[_i]) \
65
66#define store_64b(source, dest) \
67 *((u64 *)(dest)) = *((u64 *)(source).e) \
68
69#define store_128b(source, dest) \
70 *((u64 *)(dest)) = *((u64 *)(source).e); \
71 *(((u64 *)(dest)) + 1) = *((u64 *)(source).e + 1) \
72
73#define store_8x16b(source, dest) \
74 foreach_element(8, ((u16 *)dest)[_i] = (source).e[_i]) \
75
76
77#define split_8x16b(dest, source) \
78 foreach_element(8, \
79 { \
80 (dest).e[_i * 2] = (source).e[_i]; \
81 (dest).e[(_i * 2) + 1] = (source).e[_i] >> 8; \
82 }) \
83
84#define merge_16x8b(dest, source) \
85 foreach_element(8, \
86 (dest).e[_i] = (source).e[_i * 2] | ((source).e[(_i * 2) + 1] << 8)) \
87
88#define vector_cast(vec_to, source) \
89 (*((volatile vec_to *)(&(source)))) \
90
91#define vector_cast_high(vec_to, source) \
92 (*((volatile vec_to *)((u8 *)source.e + (sizeof(source.e) / 2)))) \
93
94
95#define dup_8x8b(dest, value) \
96 foreach_element(8, (dest).e[_i] = value) \
97
98#define dup_16x8b(dest, value) \
99 foreach_element(16, (dest).e[_i] = value) \
100
101#define dup_4x16b(dest, value) \
102 foreach_element(4, (dest).e[_i] = value) \
103
104#define dup_8x16b(dest, value) \
105 foreach_element(8, (dest).e[_i] = value) \
106
107#define dup_2x32b(dest, value) \
108 foreach_element(2, (dest).e[_i] = value) \
109
110#define dup_4x32b(dest, value) \
111 foreach_element(4, (dest).e[_i] = value) \
112
113#define shr_narrow_8x16b(dest, source, shift) \
114 foreach_element(8, (dest).e[_i] = (u16)(source).e[_i] >> (shift)) \
115
116#define shr_narrow_2x64b(dest, source, shift) \
117 foreach_element(2, (dest).e[_i] = (source).e[_i] >> (shift)) \
118
119#define shr_8x8b(dest, source, shift) \
120 foreach_element(8, (dest).e[_i] = (u8)(source).e[_i] >> (shift)) \
121
122#define shl_8x8b(dest, source, shift) \
123 foreach_element(8, (dest).e[_i] = (source).e[_i] << (shift)) \
124
125#define shr_8x16b(dest, source, shift) \
126 foreach_element(8, (dest).e[_i] = (u16)(source).e[_i] >> (shift)) \
127
128#define shr_2x32b(dest, source, shift) \
129 foreach_element(2, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \
130
131#define shr_4x16b(dest, source, shift) \
132 foreach_element(4, (dest).e[_i] = (source).e[_i] >> (shift)) \
133
134#define shl_4x16b(dest, source, shift) \
135 foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] << (shift)) \
136
137#define shr_4x32b(dest, source, shift) \
138 foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \
139
140#define shr_narrow_4x32b(dest, source, shift) \
141 foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \
142
143#define shl_8x16b(dest, source, shift) \
144 foreach_element(8, (dest).e[_i] = (source).e[_i] << (shift)) \
145
146#define shl_4x32b(dest, source, shift) \
147 foreach_element(4, (dest).e[_i] = (source).e[_i] << (shift)) \
148
149#define shl_2x32b(dest, source, shift) \
150 foreach_element(2, (dest).e[_i] = (source).e[_i] << (shift)) \
151
152#define shl_1x64b(dest, source, shift) \
153 ((dest).e[0] = (source).e[0] << (shift)) \
154
155#define shl_2x64b(dest, source, shift) \
156 foreach_element(2, (dest).e[_i] = (source).e[_i] << (shift)) \
157
158#define shl_variable_2x64b(dest, source_a, source_b) \
159 foreach_element(2, \
160 (dest).e[_i] = (source_a).e[_i] << ((source_b).e[_i] & 0xFF)) \
161
162#define shl_variable_8x16b(dest, source_a, source_b) \
163 foreach_element(8, \
164 (dest).e[_i] = (source_a).e[_i] << ((source_b).e[_i] & 0xFF)) \
165
166#define shl_variable_4x16b(dest, source_a, source_b) \
167 foreach_element(4, \
168 (dest).e[_i] = (source_a).e[_i] << ((source_b).e[_i] & 0xFF)) \
169
170#define shr_1x64b(dest, source, shift) \
171 ((dest).e[0] = (source).e[0] >> (shift)) \
172
173#define shl_long_8x8b(dest, source, shift) \
174 foreach_element(8, (dest).e[_i] = (source).e[_i] << (shift)) \
175
176#define shl_long_4x16b(dest, source, shift) \
177 foreach_element(4, (dest).e[_i] = (source).e[_i] << (shift)) \
178
179#define shrq_narrow_signed_8x16b(dest, source, shift) \
180 foreach_element(8, \
181 { \
182 s32 result = ((s16)(source).e[_i]) >> shift; \
183 if(result < 0) \
184 result = 0; \
185 if(result > 0xFF) \
186 result = 0xFF; \
187 (dest).e[_i] = result; \
188 }) \
189
190#define shl_reg_4x32b(dest, source_a, source_b) \
191 foreach_element(4, \
192 { \
193 s8 shift = (source_b).e[_i]; \
194 if(shift < 0) \
195 dest.e[_i] = (source_a).e[_i] >> (-shift); \
196 else \
197 dest.e[_i] = (source_a).e[_i] << shift; \
198 }) \
199
200#define shl_reg_2x32b(dest, source_a, source_b) \
201 foreach_element(2, \
202 { \
203 s8 shift = (source_b).e[_i]; \
204 if(shift < 0) \
205 dest.e[_i] = (source_a).e[_i] >> (-shift); \
206 else \
207 dest.e[_i] = (source_a).e[_i] << shift; \
208 }) \
209
210#define shl_reg_2x64b(dest, source_a, source_b) \
211 foreach_element(2, \
212 { \
213 s8 shift = (source_b).e[_i]; \
214 if(shift < 0) \
215 dest.e[_i] = (source_a).e[_i] >> (-shift); \
216 else \
217 dest.e[_i] = (source_a).e[_i] << shift; \
218 }) \
219
220
221#define sri_8x8b(dest, source, shift) \
222 foreach_element(8, (dest).e[_i] = ((dest).e[_i] & ~(0xFF >> (shift))) | \
223 ((u8)(source).e[_i] >> (shift))) \
224
225#define sli_8x8b(dest, source, shift) \
226 foreach_element(8, (dest).e[_i] = ((dest).e[_i] & ~(0xFF << (shift))) | \
227 ((source).e[_i] << (shift))) \
228
229
230
231#define mov_narrow_8x16b(dest, source) \
232 foreach_element(8, (dest).e[_i] = (source).e[_i]) \
233
234#define mov_narrow_4x32b(dest, source) \
235 foreach_element(4, (dest).e[_i] = (source).e[_i]) \
236
237#define mov_narrow_2x64b(dest, source) \
238 foreach_element(2, (dest).e[_i] = (source).e[_i]) \
239
240#define mov_wide_8x8b(dest, source) \
241 foreach_element(8, (dest).e[_i] = (source).e[_i]) \
242
243#define mov_wide_2x32b(dest, source) \
244 foreach_element(2, (dest).e[_i] = (source).e[_i]) \
245
246#define mvn_4x16b(dest, source) \
247 foreach_element(4, (dest).e[_i] = ~((source).e[_i])) \
248
249#define add_4x16b(dest, source_a, source_b) \
250 foreach_element(4, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
251
252#define add_4x32b(dest, source_a, source_b) \
253 foreach_element(4, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
254
255#define add_2x32b(dest, source_a, source_b) \
256 foreach_element(2, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
257
258#define add_8x16b(dest, source_a, source_b) \
259 foreach_element(8, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
260
261#define add_16x8b(dest, source_a, source_b) \
262 foreach_element(16, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
263
264#define add_8x8b(dest, source_a, source_b) \
265 foreach_element(8, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
266
267#define add_1x64b(dest, source_a, source_b) \
268 (dest).e[0] = (source_a).e[0] + (source_b).e[0] \
269
270#define add_2x64b(dest, source_a, source_b) \
271 foreach_element(2, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
272
273#define add_high_narrow_2x64b(dest, source_a, source_b) \
274 foreach_element(2, \
275 ((dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) >> 32) \
276
277#define add_high_narrow_4x32b(dest, source_a, source_b) \
278 foreach_element(4, \
279 ((dest).e[_i] = ((source_a).e[_i] + (source_b).e[_i]) >> 16)) \
280
281#define sub_4x16b(dest, source_a, source_b) \
282 foreach_element(4, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
283
284#define sub_4x32b(dest, source_a, source_b) \
285 foreach_element(4, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
286
287#define sub_2x32b(dest, source_a, source_b) \
288 foreach_element(2, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
289
290#define sub_wide_8x8b(dest, source_a, source_b) \
291 foreach_element(8, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
292
293#define add_wide_8x8b(dest, source_a, source_b) \
294 foreach_element(8, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
295
296#define add_wide_2x32b(dest, source_a, source_b) \
297 foreach_element(2, (dest).e[_i] = (source_a).e[_i] + (source_b).e[_i]) \
298
299#define addq_8x8b(dest, source_a, source_b) \
300 foreach_element(8, \
301 { \
302 u32 result = (source_a).e[_i] + (source_b).e[_i]; \
303 if(result > 0xFF) \
304 result = 0xFF; \
305 (dest).e[_i] = result; \
306 }) \
307
308#define subq_8x8b(dest, source_a, source_b) \
309 foreach_element(8, \
310 { \
311 u32 result = (source_a).e[_i] - (source_b).e[_i]; \
312 if(result > 0xFF) \
313 result = 0; \
314 (dest).e[_i] = result; \
315 }) \
316
317#define subs_long_8x8b(dest, source_a, source_b) \
318 subs_8x8b(dest, source_a, source_b) \
319
320#define subs_16x8b(dest, source_a, source_b) \
321 foreach_element(16, \
322 { \
323 u32 result = (source_a).e[_i] - (source_b).e[_i]; \
324 if(result > 0xFF) \
325 result = 0; \
326 (dest).e[_i] = result; \
327 }) \
328
329#define subs_8x16b(dest, source_a, source_b) \
330 foreach_element(8, \
331 { \
332 s32 result = (source_a).e[_i] - (source_b).e[_i]; \
333 if(result < 0) \
334 result = 0; \
335 \
336 (dest).e[_i] = result; \
337 }) \
338
339#define sub_8x16b(dest, source_a, source_b) \
340 foreach_element(8, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
341
342#define sub_16x8b(dest, source_a, source_b) \
343 foreach_element(16, (dest).e[_i] = (source_a).e[_i] - (source_b).e[_i]) \
344
345#define orn_8x16b(dest, source_a, source_b) \
346 foreach_element(8, (dest).e[_i] = (source_a).e[_i] | ~((source_b).e[_i])) \
347
348#define and_4x16b(dest, source_a, source_b) \
349 foreach_element(4, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
350
351#define and_8x16b(dest, source_a, source_b) \
352 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
353
354#define and_4x32b(dest, source_a, source_b) \
355 foreach_element(4, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
356
357#define and_16x8b(dest, source_a, source_b) \
358 foreach_element(16, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
359
360#define and_8x8b(dest, source_a, source_b) \
361 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
362
363#define and_2x32b(dest, source_a, source_b) \
364 foreach_element(2, (dest).e[_i] = (source_a).e[_i] & (source_b).e[_i]) \
365
366#define bic_8x8b(dest, source_a, source_b) \
367 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & ~((source_b).e[_i])) \
368
369#define bic_8x16b(dest, source_a, source_b) \
370 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & ~((source_b).e[_i])) \
371
372#define bic_immediate_4x16b(dest, value) \
373 foreach_element(4, (dest).e[_i] = (dest).e[_i] & ~(value)) \
374
375#define bic_immediate_8x16b(dest, value) \
376 foreach_element(8, (dest).e[_i] = (dest).e[_i] & ~(value)) \
377
378#define or_8x16b(dest, source_a, source_b) \
379 foreach_element(8, (dest).e[_i] = (source_a).e[_i] | (source_b).e[_i]) \
380
381#define or_immediate_8x16b(dest, source_a, value) \
382 foreach_element(8, (dest).e[_i] = (source_a).e[_i] | (value)) \
383
384#define eor_8x16b(dest, source_a, source_b) \
385 foreach_element(8, (dest).e[_i] = (source_a).e[_i] ^ (source_b).e[_i]) \
386
387#define eor_4x32b(dest, source_a, source_b) \
388 foreach_element(4, (dest).e[_i] = (source_a).e[_i] ^ (source_b).e[_i]) \
389
390#define eor_2x32b(dest, source_a, source_b) \
391 foreach_element(2, (dest).e[_i] = (source_a).e[_i] ^ (source_b).e[_i]) \
392
393#define zip_8x16b(dest, source_a, source_b) \
394 foreach_element(8, (dest).e[_i] = \
395 (u8)(source_a).e[_i] | ((u8)(source_b).e[_i] << 8)) \
396
397#define zip_2x64b(dest, source_a, source_b) \
398 foreach_element(2, (dest).e[_i] = \
399 (u64)(source_a).e[_i] | ((u64)(source_b).e[_i] << 32)) \
400
401#define unzip_8x8b(dest_a, dest_b, source) \
402 foreach_element(8, \
403 { \
404 (dest_a).e[_i] = (source).e[_i]; \
405 (dest_b).e[_i] = ((source).e[_i]) >> 8; \
406 }) \
407
408#define unzip_16x8b(dest_a, dest_b, source_a, source_b) \
409 foreach_element(8, \
410 { \
411 (dest_a).e[_i] = (source_a).e[_i]; \
412 (dest_b).e[_i] = (source_a).e[_i] >> 8; \
413 }); \
414 foreach_element(8, \
415 { \
416 (dest_a).e[_i + 8] = (source_b).e[_i]; \
417 (dest_b).e[_i + 8] = (source_b).e[_i] >> 8; \
418 }) \
419
420#define tbl_16(dest, indexes, table) \
421 foreach_element(8, \
422 { \
423 u32 index = indexes.e[_i]; \
424 if(index < 16) \
425 (dest).e[_i] = table.e[index]; \
426 else \
427 (dest).e[_i] = 0; \
428 }) \
429
430#define cmpeqz_8x16b(dest, source) \
431 foreach_element(8, (dest).e[_i] = ~(((source).e[_i] == 0) - 1)) \
432
433#define cmpltz_8x16b(dest, source) \
434 foreach_element(8, (dest).e[_i] = ((s16)(source).e[_i] >> 15)) \
435
436#define cmpltz_4x32b(dest, source) \
437 foreach_element(4, (dest).e[_i] = ((s32)(source).e[_i] >> 31)) \
438
439#define cmpltz_2x32b(dest, source) \
440 foreach_element(2, (dest).e[_i] = ((s32)(source).e[_i] >> 31)) \
441
442#define cmplte_4x16b(dest, source_a, source_b) \
443 foreach_element(4, (dest).e[_i] = ~((source_a.e[_i] <= source_b.e[_i]) - 1)) \
444
445#define cmplt_4x16b(dest, source_a, source_b) \
446 foreach_element(4, (dest).e[_i] = ~((source_a.e[_i] < source_b.e[_i]) - 1)) \
447
448#define cmpgt_4x16b(dest, source_a, source_b) \
449 foreach_element(4, (dest).e[_i] = ~((source_a.e[_i] > source_b.e[_i]) - 1)) \
450
451#define tst_8x16b(dest, source_a, source_b) \
452 foreach_element(8, \
453 (dest).e[_i] = ~(((source_a.e[_i] & source_b.e[_i]) != 0) - 1)) \
454
455#define andi_8x8b(dest, source_a, value) \
456 foreach_element(8, (dest).e[_i] = (source_a).e[_i] & value) \
457
458#define average_8x16b(dest, source_a, source_b) \
459 foreach_element(8, \
460 (dest).e[_i] = ((source_a).e[_i] + (source_b).e[_i]) >> 1) \
461
462
463#define mul_8x8b(dest, source_a, source_b) \
464 foreach_element(8, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
465
466#define mul_8x16b(dest, source_a, source_b) \
467 foreach_element(8, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
468
469#define mul_2x32b(dest, source_a, source_b) \
470 foreach_element(2, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
471
472#define mul_4x32b(dest, source_a, source_b) \
473 foreach_element(4, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
474
475#define mul_long_8x8b(dest, source_a, source_b) \
476 foreach_element(8, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
477
478#define mul_long_4x16b(dest, source_a, source_b) \
479 foreach_element(4, (dest).e[_i] = (source_a).e[_i] * (source_b).e[_i]) \
480
481#define mul_long_2x32b(dest, source_a, source_b) \
482 foreach_element(2, \
483 (dest).e[_i] = (source_a).e[_i] * (s64)((source_b).e[_i])) \
484
485#define mul_scalar_2x32b(dest, source, value) \
486 foreach_element(2, (dest).e[_i] = (source).e[_i] * value) \
487
488#define mul_scalar_long_8x16b(dest, source, value) \
489 foreach_element(8, (dest).e[_i] = (source).e[_i] * value) \
490
491#define mul_scalar_long_2x32b(dest, source, value) \
492 foreach_element(2, (dest).e[_i] = (source).e[_i] * value) \
493
494#define mla_2x32b(dest, source_a, source_b) \
495 foreach_element(2, (dest).e[_i] += (source_a).e[_i] * (source_b).e[_i]) \
496
497#define mla_4x32b(dest, source_a, source_b) \
498 foreach_element(4, (dest).e[_i] += (source_a).e[_i] * (source_b).e[_i]) \
499
500#define mla_scalar_long_2x32b(dest, source, value) \
501 foreach_element(2, (dest).e[_i] += (source).e[_i] * value) \
502
503#define mla_long_8x8b(dest, source_a, source_b) \
504 foreach_element(8, (dest).e[_i] += (source_a).e[_i] * (source_b).e[_i]) \
505
506#define mla_long_2x32b(dest, source_a, source_b) \
507 foreach_element(2, (dest).e[_i] += (source_a).e[_i] * (s64)(source_b).e[_i]) \
508
509#define mla_scalar_4x32b(dest, source, value) \
510 foreach_element(4, (dest).e[_i] += (source).e[_i] * value) \
511
512#define mla_scalar_2x32b(dest, source, value) \
513 foreach_element(2, (dest).e[_i] += (source).e[_i] * value) \
514
515#define mls_scalar_4x32b(dest, source, value) \
516 foreach_element(4, (dest).e[_i] -= (source).e[_i] * value) \
517
518#define mls_scalar_2x32b(dest, source, value) \
519 foreach_element(2, (dest).e[_i] -= (source).e[_i] * value) \
520
521#define mls_scalar_long_2x32b(dest, source, value) \
522 foreach_element(2, (dest).e[_i] -= (source).e[_i] * value) \
523
524#define rev_2x32b(dest, source) \
525{ \
526 u32 tmp = source.e[1]; \
527 (dest).e[1] = source.e[0]; \
528 (dest).e[0] = tmp; \
529} \
530
531#define abs_4x32b(dest, source) \
532 foreach_element(4, (dest).e[_i] = abs(source.e[_i])) \
533
534#define abs_2x32b(dest, source) \
535 foreach_element(2, (dest).e[_i] = abs(source.e[_i])) \
536
537#define neg_2x32b(dest, source) \
538 foreach_element(2, (dest).e[_i] = -((source).e[_i])) \
539
540
541#define shrq_narrow_8x16b(dest, source, shift) \
542 foreach_element(8, \
543 { \
544 u32 result = ((source).e[_i]) >> shift; \
545 if(result > 0xFF) \
546 result = 0xFF; \
547 (dest).e[_i] = result; \
548 }) \
549
550#define min_8x16b(dest, source_a, source_b) \
551 foreach_element(8, \
552 { \
553 s32 result = (source_a).e[_i]; \
554 if((source_b).e[_i] < result) \
555 result = (source_b).e[_i]; \
556 (dest).e[_i] = result; \
557 }) \
558
559#define min_8x8b(dest, source_a, source_b) \
560 foreach_element(8, \
561 { \
562 u32 result = (source_a).e[_i]; \
563 if((source_b).e[_i] < result) \
564 result = (source_b).e[_i]; \
565 (dest).e[_i] = result; \
566 }) \
567
568#define min_16x8b(dest, source_a, source_b) \
569 foreach_element(16, \
570 { \
571 u32 result = (source_a).e[_i]; \
572 if((source_b).e[_i] < result) \
573 result = (source_b).e[_i]; \
574 (dest).e[_i] = result; \
575 }) \
576
577#define max_8x16b(dest, source_a, source_b) \
578 foreach_element(8, \
579 { \
580 s32 result = (source_a).e[_i]; \
581 if((source_b).e[_i] > result) \
582 result = (source_b).e[_i]; \
583 (dest).e[_i] = result; \
584 }) \
585
586#define bsl_8x16b(dest_mask, source_a, source_b) \
587 foreach_element(8, dest_mask.e[_i] = ((source_a).e[_i] & dest_mask.e[_i]) | \
588 ((source_b).e[_i] & ~(dest_mask.e[_i]))) \
589
590#define bif_8x16b(dest, source, mask) \
591 foreach_element(8, dest.e[_i] = ((source).e[_i] & ~(mask.e[_i])) | \
592 ((dest).e[_i] & mask.e[_i])) \
593
594#define bsl_4x32b(dest_mask, source_a, source_b) \
595 foreach_element(4, dest_mask.e[_i] = ((source_a).e[_i] & dest_mask.e[_i]) | \
596 ((source_b).e[_i] & ~(dest_mask.e[_i]))) \
597
598#define bit_4x16b(dest, source, mask) \
599 foreach_element(4, dest.e[_i] = ((source).e[_i] & mask.e[_i]) | \
600 ((dest).e[_i] & ~(mask.e[_i]))) \
601
602#endif