spu: try to improve timing
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / linkage_arm.S
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * linkage_arm.s for PCSX *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2013 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "arm_features.h"
23#include "new_dynarec_config.h"
24#include "linkage_offsets.h"
25
26
27#ifdef __MACH__
28#define dynarec_local ESYM(dynarec_local)
29#define ndrc_add_jump_out ESYM(ndrc_add_jump_out)
30#define ndrc_try_restore_block ESYM(ndrc_try_restore_block)
31#define ndrc_get_addr_ht ESYM(ndrc_get_addr_ht)
32#define ndrc_get_addr_ht_param ESYM(ndrc_get_addr_ht_param)
33#define ndrc_write_invalidate_one ESYM(ndrc_write_invalidate_one)
34#define gen_interupt ESYM(gen_interupt)
35#define gteCheckStallRaw ESYM(gteCheckStallRaw)
36#define psxException ESYM(psxException)
37#endif
38
39 .bss
40 .align 4
41 .global dynarec_local
42 .type dynarec_local, %object
43 .size dynarec_local, LO_dynarec_local_size
44dynarec_local:
45 .space LO_dynarec_local_size
46
47#define DRC_VAR_(name, vname, size_) \
48 vname = dynarec_local + LO_##name; \
49 .global vname; \
50 .type vname, %object; \
51 .size vname, size_
52
53#define DRC_VAR(name, size_) \
54 DRC_VAR_(name, ESYM(name), size_)
55
56DRC_VAR(next_interupt, 4)
57DRC_VAR(cycle_count, 4)
58DRC_VAR(last_count, 4)
59DRC_VAR(pending_exception, 4)
60DRC_VAR(stop, 4)
61DRC_VAR(branch_target, 4)
62DRC_VAR(address, 4)
63DRC_VAR(hack_addr, 4)
64DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs)
65
66/* psxRegs */
67@DRC_VAR(reg, 128)
68DRC_VAR(lo, 4)
69DRC_VAR(hi, 4)
70DRC_VAR(reg_cop0, 128)
71DRC_VAR(reg_cop2d, 128)
72DRC_VAR(reg_cop2c, 128)
73DRC_VAR(pcaddr, 4)
74@DRC_VAR(code, 4)
75@DRC_VAR(cycle, 4)
76@DRC_VAR(interrupt, 4)
77@DRC_VAR(intCycle, 256)
78
79DRC_VAR(rcnts, 7*4*4)
80DRC_VAR(inv_code_start, 4)
81DRC_VAR(inv_code_end, 4)
82DRC_VAR(mem_rtab, 4)
83DRC_VAR(mem_wtab, 4)
84DRC_VAR(psxH_ptr, 4)
85DRC_VAR(zeromem_ptr, 4)
86DRC_VAR(invc_ptr, 4)
87DRC_VAR(scratch_buf_ptr, 4)
88DRC_VAR(ram_offset, 4)
89DRC_VAR(mini_ht, 256)
90
91
92 .syntax unified
93 .text
94 .align 2
95
96#ifndef HAVE_ARMV5
97.macro blx rd
98 mov lr, pc
99 bx \rd
100.endm
101#endif
102
103.macro load_varadr reg var
104#if defined(HAVE_ARMV7) && defined(TEXRELS_FORBIDDEN)
105 movw \reg, #:lower16:(\var-(1678f+8))
106 movt \reg, #:upper16:(\var-(1678f+8))
1071678:
108 add \reg, pc
109#elif defined(HAVE_ARMV7) && !defined(__PIC__)
110 movw \reg, #:lower16:\var
111 movt \reg, #:upper16:\var
112#else
113 ldr \reg, =\var
114#endif
115.endm
116
117.macro load_varadr_ext reg var
118#if defined(HAVE_ARMV7) && defined(TEXRELS_FORBIDDEN)
119 movw \reg, #:lower16:(ptr_\var-(1678f+8))
120 movt \reg, #:upper16:(ptr_\var-(1678f+8))
1211678:
122 ldr \reg, [pc, \reg]
123#else
124 load_varadr \reg \var
125#endif
126.endm
127
128.macro mov_16 reg imm
129#ifdef HAVE_ARMV7
130 movw \reg, #\imm
131#else
132 mov \reg, #(\imm & 0x00ff)
133 orr \reg, #(\imm & 0xff00)
134#endif
135.endm
136
137.macro mov_24 reg imm
138#ifdef HAVE_ARMV7
139 movw \reg, #(\imm & 0xffff)
140 movt \reg, #(\imm >> 16)
141#else
142 mov \reg, #(\imm & 0x0000ff)
143 orr \reg, #(\imm & 0x00ff00)
144 orr \reg, #(\imm & 0xff0000)
145#endif
146.endm
147
148FUNCTION(dyna_linker):
149 /* r0 = virtual target address */
150 /* r1 = pointer to an instruction to patch */
151#ifndef NO_WRITE_EXEC
152 ldr r7, [r1]
153 mov r4, r0
154 add r6, r7, #2
155 mov r5, r1
156 lsl r6, r6, #8
157 /* must not compile - that might expire the caller block */
158 mov r1, #0
159 bl ndrc_get_addr_ht_param
160
161 movs r8, r0
162 beq 0f
163 add r6, r5, r6, asr #6 /* old target */
164 teq r0, r6
165 moveq pc, r0 /* Stale i-cache */
166 mov r0, r4
167 mov r1, r6
168 bl ndrc_add_jump_out
169
170 sub r2, r8, r5
171 and r1, r7, #0xff000000
172 lsl r2, r2, #6
173 sub r1, r1, #2
174 add r1, r1, r2, lsr #8
175 str r1, [r5]
176 mov pc, r8
1770:
178 mov r0, r4
179#else
180 /* XXX: should be able to do better than this... */
181#endif
182 bl ndrc_get_addr_ht
183 mov pc, r0
184 .size dyna_linker, .-dyna_linker
185
186 .align 2
187FUNCTION(jump_vaddr_r1):
188 mov r0, r1
189 b jump_vaddr_r0
190 .size jump_vaddr_r1, .-jump_vaddr_r1
191FUNCTION(jump_vaddr_r2):
192 mov r0, r2
193 b jump_vaddr_r0
194 .size jump_vaddr_r2, .-jump_vaddr_r2
195FUNCTION(jump_vaddr_r3):
196 mov r0, r3
197 b jump_vaddr_r0
198 .size jump_vaddr_r3, .-jump_vaddr_r3
199FUNCTION(jump_vaddr_r4):
200 mov r0, r4
201 b jump_vaddr_r0
202 .size jump_vaddr_r4, .-jump_vaddr_r4
203FUNCTION(jump_vaddr_r5):
204 mov r0, r5
205 b jump_vaddr_r0
206 .size jump_vaddr_r5, .-jump_vaddr_r5
207FUNCTION(jump_vaddr_r6):
208 mov r0, r6
209 b jump_vaddr_r0
210 .size jump_vaddr_r6, .-jump_vaddr_r6
211FUNCTION(jump_vaddr_r8):
212 mov r0, r8
213 b jump_vaddr_r0
214 .size jump_vaddr_r8, .-jump_vaddr_r8
215FUNCTION(jump_vaddr_r9):
216 mov r0, r9
217 b jump_vaddr_r0
218 .size jump_vaddr_r9, .-jump_vaddr_r9
219FUNCTION(jump_vaddr_r10):
220 mov r0, r10
221 b jump_vaddr_r0
222 .size jump_vaddr_r10, .-jump_vaddr_r10
223FUNCTION(jump_vaddr_r12):
224 mov r0, r12
225 b jump_vaddr_r0
226 .size jump_vaddr_r12, .-jump_vaddr_r12
227FUNCTION(jump_vaddr_r7):
228 add r0, r7, #0
229 .size jump_vaddr_r7, .-jump_vaddr_r7
230FUNCTION(jump_vaddr_r0):
231 bl ndrc_get_addr_ht
232 mov pc, r0
233 .size jump_vaddr_r0, .-jump_vaddr_r0
234
235 .align 2
236FUNCTION(cc_interrupt):
237 ldr r0, [fp, #LO_last_count]
238 mov r1, #0
239 add r10, r0, r10
240 str r1, [fp, #LO_pending_exception]
241 str r10, [fp, #LO_cycle] /* PCSX cycles */
242 mov r10, lr
243
244 add r0, fp, #LO_reg_cop0 /* CP0 */
245 bl gen_interupt
246 mov lr, r10
247 ldr r10, [fp, #LO_cycle]
248 ldr r0, [fp, #LO_next_interupt]
249 ldr r1, [fp, #LO_pending_exception]
250 ldr r2, [fp, #LO_stop]
251 str r0, [fp, #LO_last_count]
252 sub r10, r10, r0
253 tst r2, r2
254 ldmfdne sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc}
255 tst r1, r1
256 moveq pc, lr
257 ldr r0, [fp, #LO_pcaddr]
258 bl ndrc_get_addr_ht
259 mov pc, r0
260 .size cc_interrupt, .-cc_interrupt
261
262 .align 2
263FUNCTION(jump_addrerror_ds): /* R3000E_AdEL / R3000E_AdES in r0 */
264 str r1, [fp, #(LO_psxRegs + (34+8)*4)] /* BadVaddr */
265 mov r1, #1
266 b call_psxException
267FUNCTION(jump_addrerror):
268 str r1, [fp, #(LO_psxRegs + (34+8)*4)] /* BadVaddr */
269 mov r1, #0
270 b call_psxException
271FUNCTION(jump_overflow_ds):
272 mov r0, #(12<<2) /* R3000E_Ov */
273 mov r1, #1
274 b call_psxException
275FUNCTION(jump_overflow):
276 mov r0, #(12<<2)
277 mov r1, #0
278 b call_psxException
279FUNCTION(jump_break_ds):
280 mov r0, #(9<<2) /* R3000E_Bp */
281 mov r1, #1
282 b call_psxException
283FUNCTION(jump_break):
284 mov r0, #(9<<2)
285 mov r1, #0
286 b call_psxException
287FUNCTION(jump_syscall_ds):
288 mov r0, #(8<<2) /* R3000E_Syscall */
289 mov r1, #2
290 b call_psxException
291FUNCTION(jump_syscall):
292 mov r0, #(8<<2)
293 mov r1, #0
294
295call_psxException:
296 ldr r3, [fp, #LO_last_count]
297 str r2, [fp, #LO_pcaddr]
298 add r10, r3, r10
299 str r10, [fp, #LO_cycle] /* PCSX cycles */
300 add r2, fp, #LO_reg_cop0 /* CP0 */
301 bl psxException
302
303 /* note: psxException might do recursive recompiler call from it's HLE code,
304 * so be ready for this */
305FUNCTION(jump_to_new_pc):
306 ldr r2, [fp, #LO_stop]
307 ldr r1, [fp, #LO_next_interupt]
308 ldr r10, [fp, #LO_cycle]
309 ldr r0, [fp, #LO_pcaddr]
310 tst r2, r2
311 str r1, [fp, #LO_last_count]
312 sub r10, r10, r1
313 bne new_dyna_leave
314 bl ndrc_get_addr_ht
315 mov pc, r0
316 .size jump_to_new_pc, .-jump_to_new_pc
317
318 .align 2
319FUNCTION(new_dyna_leave):
320 ldr r0, [fp, #LO_last_count]
321 add r12, fp, #28
322 add r10, r0, r10
323 str r10, [fp, #LO_cycle]
324 ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc}
325 .size new_dyna_leave, .-new_dyna_leave
326
327 .align 2
328FUNCTION(invalidate_addr_r0):
329 stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr}
330 b invalidate_addr_call
331 .size invalidate_addr_r0, .-invalidate_addr_r0
332 .align 2
333FUNCTION(invalidate_addr_r1):
334 stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr}
335 mov r0, r1
336 b invalidate_addr_call
337 .size invalidate_addr_r1, .-invalidate_addr_r1
338 .align 2
339FUNCTION(invalidate_addr_r2):
340 stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr}
341 mov r0, r2
342 b invalidate_addr_call
343 .size invalidate_addr_r2, .-invalidate_addr_r2
344 .align 2
345FUNCTION(invalidate_addr_r3):
346 stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr}
347 mov r0, r3
348 b invalidate_addr_call
349 .size invalidate_addr_r3, .-invalidate_addr_r3
350 .align 2
351FUNCTION(invalidate_addr_r4):
352 stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr}
353 mov r0, r4
354 b invalidate_addr_call
355 .size invalidate_addr_r4, .-invalidate_addr_r4
356 .align 2
357FUNCTION(invalidate_addr_r5):
358 stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr}
359 mov r0, r5
360 b invalidate_addr_call
361 .size invalidate_addr_r5, .-invalidate_addr_r5
362 .align 2
363FUNCTION(invalidate_addr_r6):
364 stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr}
365 mov r0, r6
366 b invalidate_addr_call
367 .size invalidate_addr_r6, .-invalidate_addr_r6
368 .align 2
369FUNCTION(invalidate_addr_r7):
370 stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr}
371 mov r0, r7
372 b invalidate_addr_call
373 .size invalidate_addr_r7, .-invalidate_addr_r7
374 .align 2
375FUNCTION(invalidate_addr_r8):
376 stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr}
377 mov r0, r8
378 b invalidate_addr_call
379 .size invalidate_addr_r8, .-invalidate_addr_r8
380 .align 2
381FUNCTION(invalidate_addr_r9):
382 stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr}
383 mov r0, r9
384 b invalidate_addr_call
385 .size invalidate_addr_r9, .-invalidate_addr_r9
386 .align 2
387FUNCTION(invalidate_addr_r10):
388 stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr}
389 mov r0, r10
390 b invalidate_addr_call
391 .size invalidate_addr_r10, .-invalidate_addr_r10
392 .align 2
393FUNCTION(invalidate_addr_r12):
394 stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr}
395 mov r0, r12
396 .size invalidate_addr_r12, .-invalidate_addr_r12
397 .align 2
398invalidate_addr_call:
399 ldr r12, [fp, #LO_inv_code_start]
400 ldr lr, [fp, #LO_inv_code_end]
401 cmp r0, r12
402 cmpcs lr, r0
403 blcc ndrc_write_invalidate_one
404 ldmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, pc}
405 .size invalidate_addr_call, .-invalidate_addr_call
406
407 .align 2
408FUNCTION(new_dyna_start):
409 /* ip is stored to conform EABI alignment */
410 stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr}
411 mov fp, r0 /* dynarec_local */
412 ldr r0, [fp, #LO_pcaddr]
413 bl ndrc_get_addr_ht
414 ldr r1, [fp, #LO_next_interupt]
415 ldr r10, [fp, #LO_cycle]
416 str r1, [fp, #LO_last_count]
417 sub r10, r10, r1
418 mov pc, r0
419 .size new_dyna_start, .-new_dyna_start
420
421/* --------------------------------------- */
422
423.align 2
424
425.macro pcsx_read_mem readop tab_shift
426 /* r0 = address, r1 = handler_tab, r2 = cycles */
427 lsl r3, r0, #20
428 lsr r3, #(20+\tab_shift)
429 ldr r12, [fp, #LO_last_count]
430 ldr r1, [r1, r3, lsl #2]
431 add r2, r2, r12
432 lsls r1, #1
433.if \tab_shift == 1
434 lsl r3, #1
435 \readop r0, [r1, r3]
436.else
437 \readop r0, [r1, r3, lsl #\tab_shift]
438.endif
439 movcc pc, lr
440 str r2, [fp, #LO_cycle]
441 bx r1
442.endm
443
444FUNCTION(jump_handler_read8):
445 add r1, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part
446 pcsx_read_mem ldrbcc, 0
447
448FUNCTION(jump_handler_read16):
449 add r1, #0x1000/4*4 @ shift to r16 part
450 pcsx_read_mem ldrhcc, 1
451
452FUNCTION(jump_handler_read32):
453 pcsx_read_mem ldrcc, 2
454
455
456.macro memhandler_post
457 ldr r0, [fp, #LO_next_interupt]
458 ldr r2, [fp, #LO_cycle] @ memhandlers can modify cc, like dma
459 str r0, [fp, #LO_last_count]
460 sub r0, r2, r0
461.endm
462
463.macro pcsx_write_mem wrtop tab_shift
464 /* r0 = address, r1 = data, r2 = cycles, r3 = handler_tab */
465 lsl r12,r0, #20
466 lsr r12, #(20+\tab_shift)
467 ldr r3, [r3, r12, lsl #2]
468 str r0, [fp, #LO_address] @ some handlers still need it..
469 lsls r3, #1
470 mov r0, r2 @ cycle return in case of direct store
471.if \tab_shift == 1
472 lsl r12, #1
473 \wrtop r1, [r3, r12]
474.else
475 \wrtop r1, [r3, r12, lsl #\tab_shift]
476.endif
477 movcc pc, lr
478 ldr r12, [fp, #LO_last_count]
479 mov r0, r1
480 add r2, r2, r12
481 str r2, [fp, #LO_cycle]
482
483 str lr, [fp, #LO_saved_lr]
484 blx r3
485 ldr lr, [fp, #LO_saved_lr]
486
487 memhandler_post
488 bx lr
489.endm
490
491FUNCTION(jump_handler_write8):
492 add r3, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part
493 pcsx_write_mem strbcc, 0
494
495FUNCTION(jump_handler_write16):
496 add r3, #0x1000/4*4 @ shift to r16 part
497 pcsx_write_mem strhcc, 1
498
499FUNCTION(jump_handler_write32):
500 pcsx_write_mem strcc, 2
501
502FUNCTION(jump_handler_write_h):
503 /* r0 = address, r1 = data, r2 = cycles, r3 = handler */
504 ldr r12, [fp, #LO_last_count]
505 str r0, [fp, #LO_address] @ some handlers still need it..
506 add r2, r2, r12
507 mov r0, r1
508 str r2, [fp, #LO_cycle]
509
510 str lr, [fp, #LO_saved_lr]
511 blx r3
512 ldr lr, [fp, #LO_saved_lr]
513
514 memhandler_post
515 bx lr
516
517FUNCTION(jump_handle_swl):
518 /* r0 = address, r1 = data, r2 = cycles */
519 ldr r3, [fp, #LO_mem_wtab]
520 mov r12,r0,lsr #12
521 ldr r3, [r3, r12, lsl #2]
522 lsls r3, #1
523 bcs jump_handle_swx_interp
524 add r3, r0, r3
525 mov r0, r2
526 tst r3, #2
527 beq 101f
528 tst r3, #1
529 beq 2f
5303:
531 str r1, [r3, #-3]
532 bx lr
5332:
534 lsr r2, r1, #8
535 lsr r1, #24
536 strh r2, [r3, #-2]
537 strb r1, [r3]
538 bx lr
539101:
540 tst r3, #1
541 lsrne r1, #16 @ 1
542 lsreq r12, r1, #24 @ 0
543 strhne r1, [r3, #-1]
544 strbeq r12, [r3]
545 bx lr
546
547FUNCTION(jump_handle_swr):
548 /* r0 = address, r1 = data, r2 = cycles */
549 ldr r3, [fp, #LO_mem_wtab]
550 mov r12,r0,lsr #12
551 ldr r3, [r3, r12, lsl #2]
552 lsls r3, #1
553 bcs jump_handle_swx_interp
554 add r3, r0, r3
555 and r12,r3, #3
556 mov r0, r2
557 cmp r12,#2
558 strbgt r1, [r3] @ 3
559 strheq r1, [r3] @ 2
560 cmp r12,#1
561 strlt r1, [r3] @ 0
562 bxne lr
563 lsr r2, r1, #8 @ 1
564 strb r1, [r3]
565 strh r2, [r3, #1]
566 bx lr
567
568jump_handle_swx_interp: /* almost never happens */
569 ldr r3, [fp, #LO_last_count]
570 add r0, fp, #LO_psxRegs
571 add r2, r3, r2
572 str r2, [fp, #LO_cycle] /* PCSX cycles */
573 bl execI
574 b jump_to_new_pc
575
576.macro rcntx_read_mode0 num
577 /* r0 = address, r2 = cycles */
578 ldr r3, [fp, #LO_rcnts+6*4+7*4*\num] @ cycleStart
579 mov r0, r2, lsl #16
580 sub r0, r0, r3, lsl #16
581 lsr r0, #16
582 bx lr
583.endm
584
585FUNCTION(rcnt0_read_count_m0):
586 rcntx_read_mode0 0
587
588FUNCTION(rcnt1_read_count_m0):
589 rcntx_read_mode0 1
590
591FUNCTION(rcnt2_read_count_m0):
592 rcntx_read_mode0 2
593
594FUNCTION(rcnt0_read_count_m1):
595 /* r0 = address, r2 = cycles */
596 ldr r3, [fp, #LO_rcnts+6*4+7*4*0] @ cycleStart
597 mov_16 r1, 0x3334
598 sub r2, r2, r3
599 mul r0, r1, r2 @ /= 5
600 lsr r0, #16
601 bx lr
602
603FUNCTION(rcnt1_read_count_m1):
604 /* r0 = address, r2 = cycles */
605 ldr r3, [fp, #LO_rcnts+6*4+7*4*1]
606 mov_24 r1, 0x1e6cde
607 sub r2, r2, r3
608 umull r3, r0, r1, r2 @ ~ /= hsync_cycles, max ~0x1e6cdd
609 bx lr
610
611FUNCTION(rcnt2_read_count_m1):
612 /* r0 = address, r2 = cycles */
613 ldr r3, [fp, #LO_rcnts+6*4+7*4*2]
614 mov r0, r2, lsl #16-3
615 sub r0, r0, r3, lsl #16-3
616 lsr r0, #16 @ /= 8
617 bx lr
618
619FUNCTION(call_gteStall):
620 /* r0 = op_cycles, r1 = cycles */
621 ldr r2, [fp, #LO_last_count]
622 str lr, [fp, #LO_saved_lr]
623 add r1, r1, r2
624 str r1, [fp, #LO_cycle]
625 add r1, fp, #LO_psxRegs
626 bl gteCheckStallRaw
627 ldr lr, [fp, #LO_saved_lr]
628 add r10, r10, r0
629 bx lr
630
631#ifdef HAVE_ARMV6
632
633FUNCTION(get_reg):
634 ldr r12, [r0]
635 and r1, r1, #0xff
636 ldr r2, [r0, #4]
637 orr r1, r1, r1, lsl #8
638 ldr r3, [r0, #8]
639 orr r1, r1, r1, lsl #16 @ searched char in every byte
640 ldrb r0, [r0, #12] @ last byte
641 eor r12, r12, r1
642 eor r2, r2, r1
643 eor r3, r3, r1
644 cmp r0, r1, lsr #24
645 mov r0, #12
646 mvn r1, #0 @ r1=~0
647 bxeq lr
648 orr r3, r3, #0xff000000 @ EXCLUDE_REG
649 uadd8 r0, r12, r1 @ add and set GE bits when not 0 (match)
650 mov r12, #0
651 sel r0, r12, r1 @ 0 if no match, else ff in some byte
652 uadd8 r2, r2, r1
653 sel r2, r12, r1
654 uadd8 r3, r3, r1
655 sel r3, r12, r1
656 mov r12, #3
657 clz r0, r0 @ 0, 8, 16, 24 or 32
658 clz r2, r2
659 clz r3, r3
660 sub r0, r12, r0, lsr #3 @ 3, 2, 1, 0 or -1
661 sub r2, r12, r2, lsr #3
662 sub r3, r12, r3, lsr #3
663 orr r2, r2, #4
664 orr r3, r3, #8
665 and r0, r0, r2
666 and r0, r0, r3
667 bx lr
668
669#endif /* HAVE_ARMV6 */
670
671@ vim:filetype=armasm