handle src buffer underflow corner cases
[sdl_omap.git] / src / hermes / x86p_16.asm
CommitLineData
e14743d1 1;
2; x86 format converters for HERMES
3; Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org)
4; This source code is licensed under the GNU LGPL
5;
6; Please refer to the file COPYING.LIB contained in the distribution for
7; licensing conditions
8;
9; Routines adjusted for Hermes by Christian Nentwich (brn@eleet.mcb.at)
10; Used with permission.
11;
12
13BITS 32
14
15%include "common.inc"
16
17SDL_FUNC _ConvertX86p16_16BGR565
18SDL_FUNC _ConvertX86p16_16RGB555
19SDL_FUNC _ConvertX86p16_16BGR555
20SDL_FUNC _ConvertX86p16_8RGB332
21
22EXTERN _ConvertX86
23
24SECTION .text
25
26_ConvertX86p16_16BGR565:
27
28 ; check short
29 cmp ecx,BYTE 16
30 ja .L3
31
32
33.L1 ; short loop
34 mov al,[esi]
35 mov ah,[esi+1]
36 mov ebx,eax
37 mov edx,eax
38 shr eax,11
39 and eax,BYTE 11111b
40 and ebx,11111100000b
41 shl edx,11
42 add eax,ebx
43 add eax,edx
44 mov [edi],al
45 mov [edi+1],ah
46 add esi,BYTE 2
47 add edi,BYTE 2
48 dec ecx
49 jnz .L1
50.L2
51 retn
52
53.L3 ; head
54 mov eax,edi
55 and eax,BYTE 11b
56 jz .L4
57 mov al,[esi]
58 mov ah,[esi+1]
59 mov ebx,eax
60 mov edx,eax
61 shr eax,11
62 and eax,BYTE 11111b
63 and ebx,11111100000b
64 shl edx,11
65 add eax,ebx
66 add eax,edx
67 mov [edi],al
68 mov [edi+1],ah
69 add esi,BYTE 2
70 add edi,BYTE 2
71 dec ecx
72
73.L4 ; save count
74 push ecx
75
76 ; unroll twice
77 shr ecx,1
78
79 ; point arrays to end
80 lea esi,[esi+ecx*4]
81 lea edi,[edi+ecx*4]
82
83 ; negative counter
84 neg ecx
85 jmp SHORT .L6
86
87.L5 mov [edi+ecx*4-4],eax
88.L6 mov eax,[esi+ecx*4]
89
90 mov ebx,[esi+ecx*4]
91 and eax,07E007E0h
92
93 mov edx,[esi+ecx*4]
94 and ebx,0F800F800h
95
96 shr ebx,11
97 and edx,001F001Fh
98
99 shl edx,11
100 add eax,ebx
101
102 add eax,edx
103 inc ecx
104
105 jnz .L5
106
107 mov [edi+ecx*4-4],eax
108
109 ; tail
110 pop ecx
111 and ecx,BYTE 1
112 jz .L7
113 mov al,[esi]
114 mov ah,[esi+1]
115 mov ebx,eax
116 mov edx,eax
117 shr eax,11
118 and eax,BYTE 11111b
119 and ebx,11111100000b
120 shl edx,11
121 add eax,ebx
122 add eax,edx
123 mov [edi],al
124 mov [edi+1],ah
125 add esi,BYTE 2
126 add edi,BYTE 2
127
128.L7
129 retn
130
131
132
133
134
135
136_ConvertX86p16_16RGB555:
137
138 ; check short
139 cmp ecx,BYTE 32
140 ja .L3
141
142
143.L1 ; short loop
144 mov al,[esi]
145 mov ah,[esi+1]
146 mov ebx,eax
147 shr ebx,1
148 and ebx, 0111111111100000b
149 and eax,BYTE 0000000000011111b
150 add eax,ebx
151 mov [edi],al
152 mov [edi+1],ah
153 add esi,BYTE 2
154 add edi,BYTE 2
155 dec ecx
156 jnz .L1
157.L2
158 retn
159
160.L3 ; head
161 mov eax,edi
162 and eax,BYTE 11b
163 jz .L4
164 mov al,[esi]
165 mov ah,[esi+1]
166 mov ebx,eax
167 shr ebx,1
168 and ebx, 0111111111100000b
169 and eax,BYTE 0000000000011111b
170 add eax,ebx
171 mov [edi],al
172 mov [edi+1],ah
173 add esi,BYTE 2
174 add edi,BYTE 2
175 dec ecx
176
177.L4 ; save ebp
178 push ebp
179
180 ; save count
181 push ecx
182
183 ; unroll four times
184 shr ecx,2
185
186 ; point arrays to end
187 lea esi,[esi+ecx*8]
188 lea edi,[edi+ecx*8]
189
190 ; negative counter
191 xor ebp,ebp
192 sub ebp,ecx
193
194.L5 mov eax,[esi+ebp*8] ; agi?
195 mov ecx,[esi+ebp*8+4]
196
197 mov ebx,eax
198 mov edx,ecx
199
200 and eax,0FFC0FFC0h
201 and ecx,0FFC0FFC0h
202
203 shr eax,1
204 and ebx,001F001Fh
205
206 shr ecx,1
207 and edx,001F001Fh
208
209 add eax,ebx
210 add ecx,edx
211
212 mov [edi+ebp*8],eax
213 mov [edi+ebp*8+4],ecx
214
215 inc ebp
216 jnz .L5
217
218 ; tail
219 pop ecx
220.L6 and ecx,BYTE 11b
221 jz .L7
222 mov al,[esi]
223 mov ah,[esi+1]
224 mov ebx,eax
225 shr ebx,1
226 and ebx, 0111111111100000b
227 and eax,BYTE 0000000000011111b
228 add eax,ebx
229 mov [edi],al
230 mov [edi+1],ah
231 add esi,BYTE 2
232 add edi,BYTE 2
233 dec ecx
234 jmp SHORT .L6
235
236.L7 pop ebp
237 retn
238
239
240
241
242
243
244_ConvertX86p16_16BGR555:
245
246 ; check short
247 cmp ecx,BYTE 16
248 ja .L3
249
250
251.L1 ; short loop
252 mov al,[esi]
253 mov ah,[esi+1]
254 mov ebx,eax
255 mov edx,eax
256 shr eax,11
257 and eax,BYTE 11111b
258 shr ebx,1
259 and ebx,1111100000b
260 shl edx,10
261 and edx,0111110000000000b
262 add eax,ebx
263 add eax,edx
264 mov [edi],al
265 mov [edi+1],ah
266 add esi,BYTE 2
267 add edi,BYTE 2
268 dec ecx
269 jnz .L1
270.L2
271 retn
272
273.L3 ; head
274 mov eax,edi
275 and eax,BYTE 11b
276 jz .L4
277 mov al,[esi]
278 mov ah,[esi+1]
279 mov ebx,eax
280 mov edx,eax
281 shr eax,11
282 and eax,BYTE 11111b
283 shr ebx,1
284 and ebx,1111100000b
285 shl edx,10
286 and edx,0111110000000000b
287 add eax,ebx
288 add eax,edx
289 mov [edi],al
290 mov [edi+1],ah
291 add esi,BYTE 2
292 add edi,BYTE 2
293 dec ecx
294
295.L4 ; save count
296 push ecx
297
298 ; unroll twice
299 shr ecx,1
300
301 ; point arrays to end
302 lea esi,[esi+ecx*4]
303 lea edi,[edi+ecx*4]
304
305 ; negative counter
306 neg ecx
307 jmp SHORT .L6
308
309.L5 mov [edi+ecx*4-4],eax
310.L6 mov eax,[esi+ecx*4]
311
312 shr eax,1
313 mov ebx,[esi+ecx*4]
314
315 and eax,03E003E0h
316 mov edx,[esi+ecx*4]
317
318 and ebx,0F800F800h
319
320 shr ebx,11
321 and edx,001F001Fh
322
323 shl edx,10
324 add eax,ebx
325
326 add eax,edx
327 inc ecx
328
329 jnz .L5
330
331 mov [edi+ecx*4-4],eax
332
333 ; tail
334 pop ecx
335 and ecx,BYTE 1
336 jz .L7
337 mov al,[esi]
338 mov ah,[esi+1]
339 mov ebx,eax
340 mov edx,eax
341 shr eax,11
342 and eax,BYTE 11111b
343 shr ebx,1
344 and ebx,1111100000b
345 shl edx,10
346 and edx,0111110000000000b
347 add eax,ebx
348 add eax,edx
349 mov [edi],al
350 mov [edi+1],ah
351 add esi,BYTE 2
352 add edi,BYTE 2
353
354.L7
355 retn
356
357
358
359
360
361
362_ConvertX86p16_8RGB332:
363
364 ; check short
365 cmp ecx,BYTE 16
366 ja .L3
367
368
369.L1 ; short loop
370 mov al,[esi+0]
371 mov ah,[esi+1]
372 mov ebx,eax
373 mov edx,eax
374 and eax,BYTE 11000b ; blue
375 shr eax,3
376 and ebx,11100000000b ; green
377 shr ebx,6
378 and edx,1110000000000000b ; red
379 shr edx,8
380 add eax,ebx
381 add eax,edx
382 mov [edi],al
383 add esi,BYTE 2
384 inc edi
385 dec ecx
386 jnz .L1
387.L2
388 retn
389
390.L3 mov eax,edi
391 and eax,BYTE 11b
392 jz .L4
393 mov al,[esi+0]
394 mov ah,[esi+1]
395 mov ebx,eax
396 mov edx,eax
397 and eax,BYTE 11000b ; blue
398 shr eax,3
399 and ebx,11100000000b ; green
400 shr ebx,6
401 and edx,1110000000000000b ; red
402 shr edx,8
403 add eax,ebx
404 add eax,edx
405 mov [edi],al
406 add esi,BYTE 2
407 inc edi
408 dec ecx
409 jmp SHORT .L3
410
411.L4 ; save ebp
412 push ebp
413
414 ; save count
415 push ecx
416
417 ; unroll 4 times
418 shr ecx,2
419
420 ; prestep
421 mov dl,[esi+0]
422 mov bl,[esi+1]
423 mov dh,[esi+2]
424
425.L5 shl edx,16
426 mov bh,[esi+3]
427
428 shl ebx,16
429 mov dl,[esi+4]
430
431 mov dh,[esi+6]
432 mov bl,[esi+5]
433
434 and edx,00011000000110000001100000011000b
435 mov bh,[esi+7]
436
437 ror edx,16+3
438 mov eax,ebx ; setup eax for reds
439
440 and ebx,00000111000001110000011100000111b
441 and eax,11100000111000001110000011100000b ; reds
442
443 ror ebx,16-2
444 add esi,BYTE 8
445
446 ror eax,16
447 add edi,BYTE 4
448
449 add eax,ebx
450 mov bl,[esi+1] ; greens
451
452 add eax,edx
453 mov dl,[esi+0] ; blues
454
455 mov [edi-4],eax
456 mov dh,[esi+2]
457
458 dec ecx
459 jnz .L5
460
461 ; check tail
462 pop ecx
463 and ecx,BYTE 11b
464 jz .L7
465
466.L6 ; tail
467 mov al,[esi+0]
468 mov ah,[esi+1]
469 mov ebx,eax
470 mov edx,eax
471 and eax,BYTE 11000b ; blue
472 shr eax,3
473 and ebx,11100000000b ; green
474 shr ebx,6
475 and edx,1110000000000000b ; red
476 shr edx,8
477 add eax,ebx
478 add eax,edx
479 mov [edi],al
480 add esi,BYTE 2
481 inc edi
482 dec ecx
483 jnz .L6
484
485.L7 pop ebp
486 retn
487
488%ifidn __OUTPUT_FORMAT__,elf
489section .note.GNU-stack noalloc noexec nowrite progbits
490%endif