e14743d1 |
1 | ; |
2 | ; x86 format converters for HERMES |
3 | ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at) |
4 | ; This source code is licensed under the GNU LGPL |
5 | ; |
6 | ; Please refer to the file COPYING.LIB contained in the distribution for |
7 | ; licensing conditions |
8 | ; |
9 | ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission |
10 | ; |
11 | |
12 | BITS 32 |
13 | |
14 | %include "common.inc" |
15 | |
16 | SDL_FUNC _ConvertX86p32_32BGR888 |
17 | SDL_FUNC _ConvertX86p32_32RGBA888 |
18 | SDL_FUNC _ConvertX86p32_32BGRA888 |
19 | SDL_FUNC _ConvertX86p32_24RGB888 |
20 | SDL_FUNC _ConvertX86p32_24BGR888 |
21 | SDL_FUNC _ConvertX86p32_16RGB565 |
22 | SDL_FUNC _ConvertX86p32_16BGR565 |
23 | SDL_FUNC _ConvertX86p32_16RGB555 |
24 | SDL_FUNC _ConvertX86p32_16BGR555 |
25 | SDL_FUNC _ConvertX86p32_8RGB332 |
26 | |
27 | SECTION .text |
28 | |
29 | ;; _Convert_* |
30 | ;; Paramters: |
31 | ;; ESI = source |
32 | ;; EDI = dest |
33 | ;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though)) |
34 | ;; Destroys: |
35 | ;; EAX, EBX, EDX |
36 | |
37 | |
38 | _ConvertX86p32_32BGR888: |
39 | |
40 | ; check short |
41 | cmp ecx,BYTE 32 |
42 | ja .L3 |
43 | |
44 | .L1 ; short loop |
45 | mov edx,[esi] |
46 | bswap edx |
47 | ror edx,8 |
48 | mov [edi],edx |
49 | add esi,BYTE 4 |
50 | add edi,BYTE 4 |
51 | dec ecx |
52 | jnz .L1 |
53 | .L2 |
54 | retn |
55 | |
56 | .L3 ; save ebp |
57 | push ebp |
58 | |
59 | ; unroll four times |
60 | mov ebp,ecx |
61 | shr ebp,2 |
62 | |
63 | ; save count |
64 | push ecx |
65 | |
66 | .L4 mov eax,[esi] |
67 | mov ebx,[esi+4] |
68 | |
69 | bswap eax |
70 | |
71 | bswap ebx |
72 | |
73 | ror eax,8 |
74 | mov ecx,[esi+8] |
75 | |
76 | ror ebx,8 |
77 | mov edx,[esi+12] |
78 | |
79 | bswap ecx |
80 | |
81 | bswap edx |
82 | |
83 | ror ecx,8 |
84 | mov [edi+0],eax |
85 | |
86 | ror edx,8 |
87 | mov [edi+4],ebx |
88 | |
89 | mov [edi+8],ecx |
90 | mov [edi+12],edx |
91 | |
92 | add esi,BYTE 16 |
93 | add edi,BYTE 16 |
94 | |
95 | dec ebp |
96 | jnz .L4 |
97 | |
98 | ; check tail |
99 | pop ecx |
100 | and ecx,BYTE 11b |
101 | jz .L6 |
102 | |
103 | .L5 ; tail loop |
104 | mov edx,[esi] |
105 | bswap edx |
106 | ror edx,8 |
107 | mov [edi],edx |
108 | add esi,BYTE 4 |
109 | add edi,BYTE 4 |
110 | dec ecx |
111 | jnz .L5 |
112 | |
113 | .L6 pop ebp |
114 | retn |
115 | |
116 | |
117 | |
118 | |
119 | _ConvertX86p32_32RGBA888: |
120 | |
121 | ; check short |
122 | cmp ecx,BYTE 32 |
123 | ja .L3 |
124 | |
125 | .L1 ; short loop |
126 | mov edx,[esi] |
127 | rol edx,8 |
128 | mov [edi],edx |
129 | add esi,BYTE 4 |
130 | add edi,BYTE 4 |
131 | dec ecx |
132 | jnz .L1 |
133 | .L2 |
134 | retn |
135 | |
136 | .L3 ; save ebp |
137 | push ebp |
138 | |
139 | ; unroll four times |
140 | mov ebp,ecx |
141 | shr ebp,2 |
142 | |
143 | ; save count |
144 | push ecx |
145 | |
146 | .L4 mov eax,[esi] |
147 | mov ebx,[esi+4] |
148 | |
149 | rol eax,8 |
150 | mov ecx,[esi+8] |
151 | |
152 | rol ebx,8 |
153 | mov edx,[esi+12] |
154 | |
155 | rol ecx,8 |
156 | mov [edi+0],eax |
157 | |
158 | rol edx,8 |
159 | mov [edi+4],ebx |
160 | |
161 | mov [edi+8],ecx |
162 | mov [edi+12],edx |
163 | |
164 | add esi,BYTE 16 |
165 | add edi,BYTE 16 |
166 | |
167 | dec ebp |
168 | jnz .L4 |
169 | |
170 | ; check tail |
171 | pop ecx |
172 | and ecx,BYTE 11b |
173 | jz .L6 |
174 | |
175 | .L5 ; tail loop |
176 | mov edx,[esi] |
177 | rol edx,8 |
178 | mov [edi],edx |
179 | add esi,BYTE 4 |
180 | add edi,BYTE 4 |
181 | dec ecx |
182 | jnz .L5 |
183 | |
184 | .L6 pop ebp |
185 | retn |
186 | |
187 | |
188 | |
189 | |
190 | _ConvertX86p32_32BGRA888: |
191 | |
192 | ; check short |
193 | cmp ecx,BYTE 32 |
194 | ja .L3 |
195 | |
196 | .L1 ; short loop |
197 | mov edx,[esi] |
198 | bswap edx |
199 | mov [edi],edx |
200 | add esi,BYTE 4 |
201 | add edi,BYTE 4 |
202 | dec ecx |
203 | jnz .L1 |
204 | .L2 |
205 | retn |
206 | |
207 | .L3 ; save ebp |
208 | push ebp |
209 | |
210 | ; unroll four times |
211 | mov ebp,ecx |
212 | shr ebp,2 |
213 | |
214 | ; save count |
215 | push ecx |
216 | |
217 | .L4 mov eax,[esi] |
218 | mov ebx,[esi+4] |
219 | |
220 | mov ecx,[esi+8] |
221 | mov edx,[esi+12] |
222 | |
223 | bswap eax |
224 | |
225 | bswap ebx |
226 | |
227 | bswap ecx |
228 | |
229 | bswap edx |
230 | |
231 | mov [edi+0],eax |
232 | mov [edi+4],ebx |
233 | |
234 | mov [edi+8],ecx |
235 | mov [edi+12],edx |
236 | |
237 | add esi,BYTE 16 |
238 | add edi,BYTE 16 |
239 | |
240 | dec ebp |
241 | jnz .L4 |
242 | |
243 | ; check tail |
244 | pop ecx |
245 | and ecx,BYTE 11b |
246 | jz .L6 |
247 | |
248 | .L5 ; tail loop |
249 | mov edx,[esi] |
250 | bswap edx |
251 | mov [edi],edx |
252 | add esi,BYTE 4 |
253 | add edi,BYTE 4 |
254 | dec ecx |
255 | jnz .L5 |
256 | |
257 | .L6 pop ebp |
258 | retn |
259 | |
260 | |
261 | |
262 | |
263 | ;; 32 bit RGB 888 to 24 BIT RGB 888 |
264 | |
265 | _ConvertX86p32_24RGB888: |
266 | |
267 | ; check short |
268 | cmp ecx,BYTE 32 |
269 | ja .L3 |
270 | |
271 | .L1 ; short loop |
272 | mov al,[esi] |
273 | mov bl,[esi+1] |
274 | mov dl,[esi+2] |
275 | mov [edi],al |
276 | mov [edi+1],bl |
277 | mov [edi+2],dl |
278 | add esi,BYTE 4 |
279 | add edi,BYTE 3 |
280 | dec ecx |
281 | jnz .L1 |
282 | .L2 |
283 | retn |
284 | |
285 | .L3 ; head |
286 | mov edx,edi |
287 | and edx,BYTE 11b |
288 | jz .L4 |
289 | mov al,[esi] |
290 | mov bl,[esi+1] |
291 | mov dl,[esi+2] |
292 | mov [edi],al |
293 | mov [edi+1],bl |
294 | mov [edi+2],dl |
295 | add esi,BYTE 4 |
296 | add edi,BYTE 3 |
297 | dec ecx |
298 | jmp SHORT .L3 |
299 | |
300 | .L4 ; unroll 4 times |
301 | push ebp |
302 | mov ebp,ecx |
303 | shr ebp,2 |
304 | |
305 | ; save count |
306 | push ecx |
307 | |
308 | .L5 mov eax,[esi] ; first dword eax = [A][R][G][B] |
309 | mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
310 | |
311 | shl eax,8 ; eax = [R][G][B][.] |
312 | mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
313 | |
314 | shl ebx,8 ; ebx = [r][g][b][.] |
315 | mov al,[esi+4] ; eax = [R][G][B][b] |
316 | |
317 | ror eax,8 ; eax = [b][R][G][B] (done) |
318 | mov bh,[esi+8+1] ; ebx = [r][g][G][.] |
319 | |
320 | mov [edi],eax |
321 | add edi,BYTE 3*4 |
322 | |
323 | shl ecx,8 ; ecx = [r][g][b][.] |
324 | mov bl,[esi+8+0] ; ebx = [r][g][G][B] |
325 | |
326 | rol ebx,16 ; ebx = [G][B][r][g] (done) |
327 | mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done) |
328 | |
329 | mov [edi+4-3*4],ebx |
330 | add esi,BYTE 4*4 |
331 | |
332 | mov [edi+8-3*4],ecx |
333 | dec ebp |
334 | |
335 | jnz .L5 |
336 | |
337 | ; check tail |
338 | pop ecx |
339 | and ecx,BYTE 11b |
340 | jz .L7 |
341 | |
342 | .L6 ; tail loop |
343 | mov al,[esi] |
344 | mov bl,[esi+1] |
345 | mov dl,[esi+2] |
346 | mov [edi],al |
347 | mov [edi+1],bl |
348 | mov [edi+2],dl |
349 | add esi,BYTE 4 |
350 | add edi,BYTE 3 |
351 | dec ecx |
352 | jnz .L6 |
353 | |
354 | .L7 pop ebp |
355 | retn |
356 | |
357 | |
358 | |
359 | |
360 | ;; 32 bit RGB 888 to 24 bit BGR 888 |
361 | |
362 | _ConvertX86p32_24BGR888: |
363 | |
364 | ; check short |
365 | cmp ecx,BYTE 32 |
366 | ja .L3 |
367 | |
368 | |
369 | .L1 ; short loop |
370 | mov dl,[esi] |
371 | mov bl,[esi+1] |
372 | mov al,[esi+2] |
373 | mov [edi],al |
374 | mov [edi+1],bl |
375 | mov [edi+2],dl |
376 | add esi,BYTE 4 |
377 | add edi,BYTE 3 |
378 | dec ecx |
379 | jnz .L1 |
380 | .L2 |
381 | retn |
382 | |
383 | .L3 ; head |
384 | mov edx,edi |
385 | and edx,BYTE 11b |
386 | jz .L4 |
387 | mov dl,[esi] |
388 | mov bl,[esi+1] |
389 | mov al,[esi+2] |
390 | mov [edi],al |
391 | mov [edi+1],bl |
392 | mov [edi+2],dl |
393 | add esi,BYTE 4 |
394 | add edi,BYTE 3 |
395 | dec ecx |
396 | jmp SHORT .L3 |
397 | |
398 | .L4 ; unroll 4 times |
399 | push ebp |
400 | mov ebp,ecx |
401 | shr ebp,2 |
402 | |
403 | ; save count |
404 | push ecx |
405 | |
406 | .L5 |
407 | mov eax,[esi] ; first dword eax = [A][R][G][B] |
408 | mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
409 | |
410 | bswap eax ; eax = [B][G][R][A] |
411 | |
412 | bswap ebx ; ebx = [b][g][r][a] |
413 | |
414 | mov al,[esi+4+2] ; eax = [B][G][R][r] |
415 | mov bh,[esi+4+4+1] ; ebx = [b][g][G][a] |
416 | |
417 | ror eax,8 ; eax = [r][B][G][R] (done) |
418 | mov bl,[esi+4+4+2] ; ebx = [b][g][G][R] |
419 | |
420 | ror ebx,16 ; ebx = [G][R][b][g] (done) |
421 | mov [edi],eax |
422 | |
423 | mov [edi+4],ebx |
424 | mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
425 | |
426 | bswap ecx ; ecx = [b][g][r][a] |
427 | |
428 | mov cl,[esi+8] ; ecx = [b][g][r][B] (done) |
429 | add esi,BYTE 4*4 |
430 | |
431 | mov [edi+8],ecx |
432 | add edi,BYTE 3*4 |
433 | |
434 | dec ebp |
435 | jnz .L5 |
436 | |
437 | ; check tail |
438 | pop ecx |
439 | and ecx,BYTE 11b |
440 | jz .L7 |
441 | |
442 | .L6 ; tail loop |
443 | mov dl,[esi] |
444 | mov bl,[esi+1] |
445 | mov al,[esi+2] |
446 | mov [edi],al |
447 | mov [edi+1],bl |
448 | mov [edi+2],dl |
449 | add esi,BYTE 4 |
450 | add edi,BYTE 3 |
451 | dec ecx |
452 | jnz .L6 |
453 | |
454 | .L7 |
455 | pop ebp |
456 | retn |
457 | |
458 | |
459 | |
460 | |
461 | ;; 32 bit RGB 888 to 16 BIT RGB 565 |
462 | |
463 | _ConvertX86p32_16RGB565: |
464 | ; check short |
465 | cmp ecx,BYTE 16 |
466 | ja .L3 |
467 | |
468 | .L1 ; short loop |
469 | mov bl,[esi+0] ; blue |
470 | mov al,[esi+1] ; green |
471 | mov ah,[esi+2] ; red |
472 | shr ah,3 |
473 | and al,11111100b |
474 | shl eax,3 |
475 | shr bl,3 |
476 | add al,bl |
477 | mov [edi+0],al |
478 | mov [edi+1],ah |
479 | add esi,BYTE 4 |
480 | add edi,BYTE 2 |
481 | dec ecx |
482 | jnz .L1 |
483 | |
484 | .L2: ; End of short loop |
485 | retn |
486 | |
487 | |
488 | .L3 ; head |
489 | mov ebx,edi |
490 | and ebx,BYTE 11b |
491 | jz .L4 |
492 | |
493 | mov bl,[esi+0] ; blue |
494 | mov al,[esi+1] ; green |
495 | mov ah,[esi+2] ; red |
496 | shr ah,3 |
497 | and al,11111100b |
498 | shl eax,3 |
499 | shr bl,3 |
500 | add al,bl |
501 | mov [edi+0],al |
502 | mov [edi+1],ah |
503 | add esi,BYTE 4 |
504 | add edi,BYTE 2 |
505 | dec ecx |
506 | |
507 | .L4: |
508 | ; save count |
509 | push ecx |
510 | |
511 | ; unroll twice |
512 | shr ecx,1 |
513 | |
514 | ; point arrays to end |
515 | lea esi,[esi+ecx*8] |
516 | lea edi,[edi+ecx*4] |
517 | |
518 | ; negative counter |
519 | neg ecx |
520 | jmp SHORT .L6 |
521 | |
522 | .L5: |
523 | mov [edi+ecx*4-4],eax |
524 | .L6: |
525 | mov eax,[esi+ecx*8] |
526 | |
527 | shr ah,2 |
528 | mov ebx,[esi+ecx*8+4] |
529 | |
530 | shr eax,3 |
531 | mov edx,[esi+ecx*8+4] |
532 | |
533 | shr bh,2 |
534 | mov dl,[esi+ecx*8+2] |
535 | |
536 | shl ebx,13 |
537 | and eax,000007FFh |
538 | |
539 | shl edx,8 |
540 | and ebx,07FF0000h |
541 | |
542 | and edx,0F800F800h |
543 | add eax,ebx |
544 | |
545 | add eax,edx |
546 | inc ecx |
547 | |
548 | jnz .L5 |
549 | |
550 | mov [edi+ecx*4-4],eax |
551 | |
552 | ; tail |
553 | pop ecx |
554 | test cl,1 |
555 | jz .L7 |
556 | |
557 | mov bl,[esi+0] ; blue |
558 | mov al,[esi+1] ; green |
559 | mov ah,[esi+2] ; red |
560 | shr ah,3 |
561 | and al,11111100b |
562 | shl eax,3 |
563 | shr bl,3 |
564 | add al,bl |
565 | mov [edi+0],al |
566 | mov [edi+1],ah |
567 | add esi,BYTE 4 |
568 | add edi,BYTE 2 |
569 | |
570 | .L7: |
571 | retn |
572 | |
573 | |
574 | |
575 | |
576 | ;; 32 bit RGB 888 to 16 BIT BGR 565 |
577 | |
578 | _ConvertX86p32_16BGR565: |
579 | |
580 | ; check short |
581 | cmp ecx,BYTE 16 |
582 | ja .L3 |
583 | |
584 | .L1 ; short loop |
585 | mov ah,[esi+0] ; blue |
586 | mov al,[esi+1] ; green |
587 | mov bl,[esi+2] ; red |
588 | shr ah,3 |
589 | and al,11111100b |
590 | shl eax,3 |
591 | shr bl,3 |
592 | add al,bl |
593 | mov [edi+0],al |
594 | mov [edi+1],ah |
595 | add esi,BYTE 4 |
596 | add edi,BYTE 2 |
597 | dec ecx |
598 | jnz .L1 |
599 | .L2 |
600 | retn |
601 | |
602 | .L3 ; head |
603 | mov ebx,edi |
604 | and ebx,BYTE 11b |
605 | jz .L4 |
606 | mov ah,[esi+0] ; blue |
607 | mov al,[esi+1] ; green |
608 | mov bl,[esi+2] ; red |
609 | shr ah,3 |
610 | and al,11111100b |
611 | shl eax,3 |
612 | shr bl,3 |
613 | add al,bl |
614 | mov [edi+0],al |
615 | mov [edi+1],ah |
616 | add esi,BYTE 4 |
617 | add edi,BYTE 2 |
618 | dec ecx |
619 | |
620 | .L4 ; save count |
621 | push ecx |
622 | |
623 | ; unroll twice |
624 | shr ecx,1 |
625 | |
626 | ; point arrays to end |
627 | lea esi,[esi+ecx*8] |
628 | lea edi,[edi+ecx*4] |
629 | |
630 | ; negative count |
631 | neg ecx |
632 | jmp SHORT .L6 |
633 | |
634 | .L5 |
635 | mov [edi+ecx*4-4],eax |
636 | .L6 |
637 | mov edx,[esi+ecx*8+4] |
638 | |
639 | mov bh,[esi+ecx*8+4] |
640 | mov ah,[esi+ecx*8] |
641 | |
642 | shr bh,3 |
643 | mov al,[esi+ecx*8+1] |
644 | |
645 | shr ah,3 |
646 | mov bl,[esi+ecx*8+5] |
647 | |
648 | shl eax,3 |
649 | mov dl,[esi+ecx*8+2] |
650 | |
651 | shl ebx,19 |
652 | and eax,0000FFE0h |
653 | |
654 | shr edx,3 |
655 | and ebx,0FFE00000h |
656 | |
657 | and edx,001F001Fh |
658 | add eax,ebx |
659 | |
660 | add eax,edx |
661 | inc ecx |
662 | |
663 | jnz .L5 |
664 | |
665 | mov [edi+ecx*4-4],eax |
666 | |
667 | ; tail |
668 | pop ecx |
669 | and ecx,BYTE 1 |
670 | jz .L7 |
671 | mov ah,[esi+0] ; blue |
672 | mov al,[esi+1] ; green |
673 | mov bl,[esi+2] ; red |
674 | shr ah,3 |
675 | and al,11111100b |
676 | shl eax,3 |
677 | shr bl,3 |
678 | add al,bl |
679 | mov [edi+0],al |
680 | mov [edi+1],ah |
681 | add esi,BYTE 4 |
682 | add edi,BYTE 2 |
683 | |
684 | .L7 |
685 | retn |
686 | |
687 | |
688 | |
689 | |
690 | ;; 32 BIT RGB TO 16 BIT RGB 555 |
691 | |
692 | _ConvertX86p32_16RGB555: |
693 | |
694 | ; check short |
695 | cmp ecx,BYTE 16 |
696 | ja .L3 |
697 | |
698 | .L1 ; short loop |
699 | mov bl,[esi+0] ; blue |
700 | mov al,[esi+1] ; green |
701 | mov ah,[esi+2] ; red |
702 | shr ah,3 |
703 | and al,11111000b |
704 | shl eax,2 |
705 | shr bl,3 |
706 | add al,bl |
707 | mov [edi+0],al |
708 | mov [edi+1],ah |
709 | add esi,BYTE 4 |
710 | add edi,BYTE 2 |
711 | dec ecx |
712 | jnz .L1 |
713 | .L2 |
714 | retn |
715 | |
716 | .L3 ; head |
717 | mov ebx,edi |
718 | and ebx,BYTE 11b |
719 | jz .L4 |
720 | mov bl,[esi+0] ; blue |
721 | mov al,[esi+1] ; green |
722 | mov ah,[esi+2] ; red |
723 | shr ah,3 |
724 | and al,11111000b |
725 | shl eax,2 |
726 | shr bl,3 |
727 | add al,bl |
728 | mov [edi+0],al |
729 | mov [edi+1],ah |
730 | add esi,BYTE 4 |
731 | add edi,BYTE 2 |
732 | dec ecx |
733 | |
734 | .L4 ; save count |
735 | push ecx |
736 | |
737 | ; unroll twice |
738 | shr ecx,1 |
739 | |
740 | ; point arrays to end |
741 | lea esi,[esi+ecx*8] |
742 | lea edi,[edi+ecx*4] |
743 | |
744 | ; negative counter |
745 | neg ecx |
746 | jmp SHORT .L6 |
747 | |
748 | .L5 |
749 | mov [edi+ecx*4-4],eax |
750 | .L6 |
751 | mov eax,[esi+ecx*8] |
752 | |
753 | shr ah,3 |
754 | mov ebx,[esi+ecx*8+4] |
755 | |
756 | shr eax,3 |
757 | mov edx,[esi+ecx*8+4] |
758 | |
759 | shr bh,3 |
760 | mov dl,[esi+ecx*8+2] |
761 | |
762 | shl ebx,13 |
763 | and eax,000007FFh |
764 | |
765 | shl edx,7 |
766 | and ebx,07FF0000h |
767 | |
768 | and edx,07C007C00h |
769 | add eax,ebx |
770 | |
771 | add eax,edx |
772 | inc ecx |
773 | |
774 | jnz .L5 |
775 | |
776 | mov [edi+ecx*4-4],eax |
777 | |
778 | ; tail |
779 | pop ecx |
780 | and ecx,BYTE 1 |
781 | jz .L7 |
782 | mov bl,[esi+0] ; blue |
783 | mov al,[esi+1] ; green |
784 | mov ah,[esi+2] ; red |
785 | shr ah,3 |
786 | and al,11111000b |
787 | shl eax,2 |
788 | shr bl,3 |
789 | add al,bl |
790 | mov [edi+0],al |
791 | mov [edi+1],ah |
792 | add esi,BYTE 4 |
793 | add edi,BYTE 2 |
794 | |
795 | .L7 |
796 | retn |
797 | |
798 | |
799 | |
800 | |
801 | ;; 32 BIT RGB TO 16 BIT BGR 555 |
802 | |
803 | _ConvertX86p32_16BGR555: |
804 | |
805 | ; check short |
806 | cmp ecx,BYTE 16 |
807 | ja .L3 |
808 | |
809 | |
810 | .L1 ; short loop |
811 | mov ah,[esi+0] ; blue |
812 | mov al,[esi+1] ; green |
813 | mov bl,[esi+2] ; red |
814 | shr ah,3 |
815 | and al,11111000b |
816 | shl eax,2 |
817 | shr bl,3 |
818 | add al,bl |
819 | mov [edi+0],al |
820 | mov [edi+1],ah |
821 | add esi,BYTE 4 |
822 | add edi,BYTE 2 |
823 | dec ecx |
824 | jnz .L1 |
825 | .L2 |
826 | retn |
827 | |
828 | .L3 ; head |
829 | mov ebx,edi |
830 | and ebx,BYTE 11b |
831 | jz .L4 |
832 | mov ah,[esi+0] ; blue |
833 | mov al,[esi+1] ; green |
834 | mov bl,[esi+2] ; red |
835 | shr ah,3 |
836 | and al,11111000b |
837 | shl eax,2 |
838 | shr bl,3 |
839 | add al,bl |
840 | mov [edi+0],al |
841 | mov [edi+1],ah |
842 | add esi,BYTE 4 |
843 | add edi,BYTE 2 |
844 | dec ecx |
845 | |
846 | .L4 ; save count |
847 | push ecx |
848 | |
849 | ; unroll twice |
850 | shr ecx,1 |
851 | |
852 | ; point arrays to end |
853 | lea esi,[esi+ecx*8] |
854 | lea edi,[edi+ecx*4] |
855 | |
856 | ; negative counter |
857 | neg ecx |
858 | jmp SHORT .L6 |
859 | |
860 | .L5 |
861 | mov [edi+ecx*4-4],eax |
862 | .L6 |
863 | mov edx,[esi+ecx*8+4] |
864 | |
865 | mov bh,[esi+ecx*8+4] |
866 | mov ah,[esi+ecx*8] |
867 | |
868 | shr bh,3 |
869 | mov al,[esi+ecx*8+1] |
870 | |
871 | shr ah,3 |
872 | mov bl,[esi+ecx*8+5] |
873 | |
874 | shl eax,2 |
875 | mov dl,[esi+ecx*8+2] |
876 | |
877 | shl ebx,18 |
878 | and eax,00007FE0h |
879 | |
880 | shr edx,3 |
881 | and ebx,07FE00000h |
882 | |
883 | and edx,001F001Fh |
884 | add eax,ebx |
885 | |
886 | add eax,edx |
887 | inc ecx |
888 | |
889 | jnz .L5 |
890 | |
891 | mov [edi+ecx*4-4],eax |
892 | |
893 | ; tail |
894 | pop ecx |
895 | and ecx,BYTE 1 |
896 | jz .L7 |
897 | mov ah,[esi+0] ; blue |
898 | mov al,[esi+1] ; green |
899 | mov bl,[esi+2] ; red |
900 | shr ah,3 |
901 | and al,11111000b |
902 | shl eax,2 |
903 | shr bl,3 |
904 | add al,bl |
905 | mov [edi+0],al |
906 | mov [edi+1],ah |
907 | add esi,BYTE 4 |
908 | add edi,BYTE 2 |
909 | |
910 | .L7 |
911 | retn |
912 | |
913 | |
914 | |
915 | |
916 | |
917 | ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb) |
918 | ;; This routine writes FOUR pixels at once (dword) and then, if they exist |
919 | ;; the trailing three pixels |
920 | _ConvertX86p32_8RGB332: |
921 | |
922 | |
923 | .L_ALIGNED |
924 | push ecx |
925 | |
926 | shr ecx,2 ; We will draw 4 pixels at once |
927 | jnz .L1 |
928 | |
929 | jmp .L2 ; short jump out of range :( |
930 | |
931 | .L1: |
932 | mov eax,[esi] ; first pair of pixels |
933 | mov edx,[esi+4] |
934 | |
935 | shr dl,6 |
936 | mov ebx,eax |
937 | |
938 | shr al,6 |
939 | and ah,0e0h |
940 | |
941 | shr ebx,16 |
942 | and dh,0e0h |
943 | |
944 | shr ah,3 |
945 | and bl,0e0h |
946 | |
947 | shr dh,3 |
948 | |
949 | or al,bl |
950 | |
951 | mov ebx,edx |
952 | or al,ah |
953 | |
954 | shr ebx,16 |
955 | or dl,dh |
956 | |
957 | and bl,0e0h |
958 | |
959 | or dl,bl |
960 | |
961 | mov ah,dl |
962 | |
963 | |
964 | |
965 | mov ebx,[esi+8] ; second pair of pixels |
966 | |
967 | mov edx,ebx |
968 | and bh,0e0h |
969 | |
970 | shr bl,6 |
971 | and edx,0e00000h |
972 | |
973 | shr edx,16 |
974 | |
975 | shr bh,3 |
976 | |
977 | ror eax,16 |
978 | or bl,dl |
979 | |
980 | mov edx,[esi+12] |
981 | or bl,bh |
982 | |
983 | mov al,bl |
984 | |
985 | mov ebx,edx |
986 | and dh,0e0h |
987 | |
988 | shr dl,6 |
989 | and ebx,0e00000h |
990 | |
991 | shr dh,3 |
992 | mov ah,dl |
993 | |
994 | shr ebx,16 |
995 | or ah,dh |
996 | |
997 | or ah,bl |
998 | |
999 | rol eax,16 |
1000 | add esi,BYTE 16 |
1001 | |
1002 | mov [edi],eax |
1003 | add edi,BYTE 4 |
1004 | |
1005 | dec ecx |
1006 | jz .L2 ; L1 out of range for short jump :( |
1007 | |
1008 | jmp .L1 |
1009 | .L2: |
1010 | |
1011 | pop ecx |
1012 | and ecx,BYTE 3 ; mask out number of pixels to draw |
1013 | |
1014 | jz .L4 ; Nothing to do anymore |
1015 | |
1016 | .L3: |
1017 | mov eax,[esi] ; single pixel conversion for trailing pixels |
1018 | |
1019 | mov ebx,eax |
1020 | |
1021 | shr al,6 |
1022 | and ah,0e0h |
1023 | |
1024 | shr ebx,16 |
1025 | |
1026 | shr ah,3 |
1027 | and bl,0e0h |
1028 | |
1029 | or al,ah |
1030 | or al,bl |
1031 | |
1032 | mov [edi],al |
1033 | |
1034 | inc edi |
1035 | add esi,BYTE 4 |
1036 | |
1037 | dec ecx |
1038 | jnz .L3 |
1039 | |
1040 | .L4: |
1041 | retn |
1042 | |
1043 | %ifidn __OUTPUT_FORMAT__,elf |
1044 | section .note.GNU-stack noalloc noexec nowrite progbits |
1045 | %endif |