platform ps2, handle audio similar to psp
[picodrive.git] / platform / gp2x / code940 / memcpy.s
... / ...
CommitLineData
1/* $NetBSD: memcpy.S,v 1.3 1997/11/22 03:27:12 mark Exp $ */
2
3/*-
4* Copyright (c) 1997 The NetBSD Foundation, Inc.
5* All rights reserved.
6*
7* This code is derived from software contributed to The NetBSD Foundation
8* by Neil A. Carson and Mark Brinicombe
9*
10* Redistribution and use in source and binary forms, with or without
11* modification, are permitted provided that the following conditions
12* are met:
13* 1. Redistributions of source code must retain the above copyright
14* notice, this list of conditions and the following disclaimer.
15* 2. Redistributions in binary form must reproduce the above copyright
16* notice, this list of conditions and the following disclaimer in the
17* documentation and/or other materials provided with the distribution.
18* 3. All advertising materials mentioning features or use of this software
19* must display the following acknowledgement:
20* This product includes software developed by the NetBSD
21* Foundation, Inc. and its contributors.
22* 4. Neither the name of The NetBSD Foundation nor the names of its
23* contributors may be used to endorse or promote products derived
24* from this software without specific prior written permission.
25*
26* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27* ``AS IS\'\' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36* POSSIBILITY OF SUCH DAMAGE.
37*/
38
39/* #include <machine/asm.h>*/
40
41.globl memcpy
42.globl _memcpy
43memcpy:
44
45stmfd sp!, {r0, lr}
46bl _memcpy
47ldmfd sp!, {r0, pc}
48
49
50.globl memmove
51memmove:
52
53stmfd sp!, {r0, lr}
54bl _memcpy
55ldmfd sp!, {r0, pc}
56
57
58
59/*
60* This is one fun bit of code ...
61* Some easy listening music is suggested while trying to understand this
62* code e.g. Iron Maiden
63*
64* For anyone attempting to understand it :
65*
66* The core code is implemented here with simple stubs for memcpy()
67* memmove() and bcopy().
68*
69* All local labels are prefixed with Lmemcpy_
70* Following the prefix a label starting f is used in the forward copy code
71* while a label using b is used in the backwards copy code
72* The source and destination addresses determine whether a forward or
73* backward copy is performed.
74* Separate bits of code are used to deal with the following situations
75* for both the forward and backwards copy.
76* unaligned source address
77* unaligned destination address
78* Separate copy routines are used to produce an optimised result for each
79* of these cases.
80* The copy code will use LDM/STM instructions to copy up to 32 bytes at
81* a time where possible.
82*
83* Note: r12 (aka ip) can be trashed during the function along with
84* r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
85* Additional registers are preserved prior to use i.e. r4, r5 & lr
86*
87* Apologies for the state of the comments;-)
88*/
89
90
91
92_memcpy:
93
94/* Determine copy direction */
95cmp r1, r0
96bcc Lmemcpy_backwards
97
98moveq r0, #0 /* Quick abort for len=0 */
99moveq pc, lr
100
101stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
102subs r2, r2, #4
103blt Lmemcpy_fl4 /* less than 4 bytes */
104ands r12, r0, #3
105bne Lmemcpy_fdestul /* oh unaligned destination addr */
106ands r12, r1, #3
107bne Lmemcpy_fsrcul /* oh unaligned source addr */
108
109Lmemcpy_ft8:
110/* We have aligned source and destination */
111subs r2, r2, #8
112blt Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
113subs r2, r2, #0x14
114blt Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
115stmdb sp!, {r4, r7, r8, r9, r10} /* borrow r4 */
116
117/* blat 32 bytes at a time */
118/* XXX for really big copies perhaps we should use more registers */
119Lmemcpy_floop32:
120ldmia r1!, {r3, r4, r7, r8, r9, r10, r12, lr}
121stmia r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
122subs r2, r2, #0x20
123bge Lmemcpy_floop32
124
125cmn r2, #0x10
126ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
127stmgeia r0!, {r3, r4, r12, lr}
128subge r2, r2, #0x10
129ldmia sp!, {r4, r7, r8, r9, r10} /* return r4 */
130
131Lmemcpy_fl32:
132adds r2, r2, #0x14
133
134/* blat 12 bytes at a time */
135Lmemcpy_floop12:
136ldmgeia r1!, {r3, r12, lr}
137stmgeia r0!, {r3, r12, lr}
138subges r2, r2, #0x0c
139bge Lmemcpy_floop12
140
141Lmemcpy_fl12:
142adds r2, r2, #8
143blt Lmemcpy_fl4
144
145subs r2, r2, #4
146ldrlt r3, [r1], #4
147strlt r3, [r0], #4
148ldmgeia r1!, {r3, r12}
149stmgeia r0!, {r3, r12}
150subge r2, r2, #4
151
152Lmemcpy_fl4:
153/* less than 4 bytes to go */
154adds r2, r2, #4
155ldmeqia sp!, {r0, pc} /* done */
156
157/* copy the crud byte at a time */
158cmp r2, #2
159ldrb r3, [r1], #1
160strb r3, [r0], #1
161ldrgeb r3, [r1], #1
162strgeb r3, [r0], #1
163ldrgtb r3, [r1], #1
164strgtb r3, [r0], #1
165ldmia sp!, {r0, pc}
166
167/* erg - unaligned destination */
168Lmemcpy_fdestul:
169rsb r12, r12, #4
170cmp r12, #2
171
172/* align destination with byte copies */
173ldrb r3, [r1], #1
174strb r3, [r0], #1
175ldrgeb r3, [r1], #1
176strgeb r3, [r0], #1
177ldrgtb r3, [r1], #1
178strgtb r3, [r0], #1
179subs r2, r2, r12
180blt Lmemcpy_fl4 /* less the 4 bytes */
181
182ands r12, r1, #3
183beq Lmemcpy_ft8 /* we have an aligned source */
184
185/* erg - unaligned source */
186/* This is where it gets nasty ... */
187Lmemcpy_fsrcul:
188bic r1, r1, #3
189ldr lr, [r1], #4
190cmp r12, #2
191bgt Lmemcpy_fsrcul3
192beq Lmemcpy_fsrcul2
193cmp r2, #0x0c
194blt Lmemcpy_fsrcul1loop4
195sub r2, r2, #0x0c
196stmdb sp!, {r4, r5}
197
198Lmemcpy_fsrcul1loop16:
199mov r3, lr, lsr #8
200ldmia r1!, {r4, r5, r12, lr}
201orr r3, r3, r4, lsl #24
202mov r4, r4, lsr #8
203orr r4, r4, r5, lsl #24
204mov r5, r5, lsr #8
205orr r5, r5, r12, lsl #24
206mov r12, r12, lsr #8
207orr r12, r12, lr, lsl #24
208stmia r0!, {r3-r5, r12}
209subs r2, r2, #0x10
210bge Lmemcpy_fsrcul1loop16
211ldmia sp!, {r4, r5}
212adds r2, r2, #0x0c
213blt Lmemcpy_fsrcul1l4
214
215Lmemcpy_fsrcul1loop4:
216mov r12, lr, lsr #8
217ldr lr, [r1], #4
218orr r12, r12, lr, lsl #24
219str r12, [r0], #4
220subs r2, r2, #4
221bge Lmemcpy_fsrcul1loop4
222
223Lmemcpy_fsrcul1l4:
224sub r1, r1, #3
225b Lmemcpy_fl4
226
227Lmemcpy_fsrcul2:
228cmp r2, #0x0c
229blt Lmemcpy_fsrcul2loop4
230sub r2, r2, #0x0c
231stmdb sp!, {r4, r5}
232
233Lmemcpy_fsrcul2loop16:
234mov r3, lr, lsr #16
235ldmia r1!, {r4, r5, r12, lr}
236orr r3, r3, r4, lsl #16
237mov r4, r4, lsr #16
238orr r4, r4, r5, lsl #16
239mov r5, r5, lsr #16
240orr r5, r5, r12, lsl #16
241mov r12, r12, lsr #16
242orr r12, r12, lr, lsl #16
243stmia r0!, {r3-r5, r12}
244subs r2, r2, #0x10
245bge Lmemcpy_fsrcul2loop16
246ldmia sp!, {r4, r5}
247adds r2, r2, #0x0c
248blt Lmemcpy_fsrcul2l4
249
250Lmemcpy_fsrcul2loop4:
251mov r12, lr, lsr #16
252ldr lr, [r1], #4
253orr r12, r12, lr, lsl #16
254str r12, [r0], #4
255subs r2, r2, #4
256bge Lmemcpy_fsrcul2loop4
257
258Lmemcpy_fsrcul2l4:
259sub r1, r1, #2
260b Lmemcpy_fl4
261
262Lmemcpy_fsrcul3:
263cmp r2, #0x0c
264blt Lmemcpy_fsrcul3loop4
265sub r2, r2, #0x0c
266stmdb sp!, {r4, r5}
267
268Lmemcpy_fsrcul3loop16:
269mov r3, lr, lsr #24
270ldmia r1!, {r4, r5, r12, lr}
271orr r3, r3, r4, lsl #8
272mov r4, r4, lsr #24
273orr r4, r4, r5, lsl #8
274mov r5, r5, lsr #24
275orr r5, r5, r12, lsl #8
276mov r12, r12, lsr #24
277orr r12, r12, lr, lsl #8
278stmia r0!, {r3-r5, r12}
279subs r2, r2, #0x10
280bge Lmemcpy_fsrcul3loop16
281ldmia sp!, {r4, r5}
282adds r2, r2, #0x0c
283blt Lmemcpy_fsrcul3l4
284
285Lmemcpy_fsrcul3loop4:
286mov r12, lr, lsr #24
287ldr lr, [r1], #4
288orr r12, r12, lr, lsl #8
289str r12, [r0], #4
290subs r2, r2, #4
291bge Lmemcpy_fsrcul3loop4
292
293Lmemcpy_fsrcul3l4:
294sub r1, r1, #1
295b Lmemcpy_fl4
296
297Lmemcpy_backwards:
298add r1, r1, r2
299add r0, r0, r2
300subs r2, r2, #4
301blt Lmemcpy_bl4 /* less than 4 bytes */
302ands r12, r0, #3
303bne Lmemcpy_bdestul /* oh unaligned destination addr */
304ands r12, r1, #3
305bne Lmemcpy_bsrcul /* oh unaligned source addr */
306
307Lmemcpy_bt8:
308/* We have aligned source and destination */
309subs r2, r2, #8
310blt Lmemcpy_bl12 /* less than 12 bytes (4 from above) */
311stmdb sp!, {r4, r7, r8, r9, r10, lr}
312subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
313blt Lmemcpy_bl32
314
315/* blat 32 bytes at a time */
316/* XXX for really big copies perhaps we should use more registers */
317Lmemcpy_bloop32:
318ldmdb r1!, {r3, r4, r7, r8, r9, r10, r12, lr}
319stmdb r0!, {r3, r4, r7, r8, r9, r10, r12, lr}
320subs r2, r2, #0x20
321bge Lmemcpy_bloop32
322
323Lmemcpy_bl32:
324cmn r2, #0x10
325ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
326stmgedb r0!, {r3, r4, r12, lr}
327subge r2, r2, #0x10
328adds r2, r2, #0x14
329ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
330stmgedb r0!, {r3, r12, lr}
331subge r2, r2, #0x0c
332ldmia sp!, {r4, r7, r8, r9, r10, lr}
333
334Lmemcpy_bl12:
335adds r2, r2, #8
336blt Lmemcpy_bl4
337subs r2, r2, #4
338ldrlt r3, [r1, #-4]!
339strlt r3, [r0, #-4]!
340ldmgedb r1!, {r3, r12}
341stmgedb r0!, {r3, r12}
342subge r2, r2, #4
343
344Lmemcpy_bl4:
345/* less than 4 bytes to go */
346adds r2, r2, #4
347moveq pc, lr /* done */
348
349/* copy the crud byte at a time */
350cmp r2, #2
351ldrb r3, [r1, #-1]!
352strb r3, [r0, #-1]!
353ldrgeb r3, [r1, #-1]!
354strgeb r3, [r0, #-1]!
355ldrgtb r3, [r1, #-1]!
356strgtb r3, [r0, #-1]!
357mov pc, lr
358
359/* erg - unaligned destination */
360Lmemcpy_bdestul:
361cmp r12, #2
362
363/* align destination with byte copies */
364ldrb r3, [r1, #-1]!
365strb r3, [r0, #-1]!
366ldrgeb r3, [r1, #-1]!
367strgeb r3, [r0, #-1]!
368ldrgtb r3, [r1, #-1]!
369strgtb r3, [r0, #-1]!
370subs r2, r2, r12
371blt Lmemcpy_bl4 /* less than 4 bytes to go */
372ands r12, r1, #3
373beq Lmemcpy_bt8 /* we have an aligned source */
374
375/* erg - unaligned source */
376/* This is where it gets nasty ... */
377Lmemcpy_bsrcul:
378bic r1, r1, #3
379ldr r3, [r1, #0]
380cmp r12, #2
381blt Lmemcpy_bsrcul1
382beq Lmemcpy_bsrcul2
383cmp r2, #0x0c
384blt Lmemcpy_bsrcul3loop4
385sub r2, r2, #0x0c
386stmdb sp!, {r4, r5, lr}
387
388Lmemcpy_bsrcul3loop16:
389mov lr, r3, lsl #8
390ldmdb r1!, {r3-r5, r12}
391orr lr, lr, r12, lsr #24
392mov r12, r12, lsl #8
393orr r12, r12, r5, lsr #24
394mov r5, r5, lsl #8
395orr r5, r5, r4, lsr #24
396mov r4, r4, lsl #8
397orr r4, r4, r3, lsr #24
398stmdb r0!, {r4, r5, r12, lr}
399subs r2, r2, #0x10
400bge Lmemcpy_bsrcul3loop16
401ldmia sp!, {r4, r5, lr}
402adds r2, r2, #0x0c
403blt Lmemcpy_bsrcul3l4
404
405Lmemcpy_bsrcul3loop4:
406mov r12, r3, lsl #8
407ldr r3, [r1, #-4]!
408orr r12, r12, r3, lsr #24
409str r12, [r0, #-4]!
410subs r2, r2, #4
411bge Lmemcpy_bsrcul3loop4
412
413Lmemcpy_bsrcul3l4:
414add r1, r1, #3
415b Lmemcpy_bl4
416
417Lmemcpy_bsrcul2:
418cmp r2, #0x0c
419blt Lmemcpy_bsrcul2loop4
420sub r2, r2, #0x0c
421stmdb sp!, {r4, r5, lr}
422
423Lmemcpy_bsrcul2loop16:
424mov lr, r3, lsl #16
425ldmdb r1!, {r3-r5, r12}
426orr lr, lr, r12, lsr #16
427mov r12, r12, lsl #16
428orr r12, r12, r5, lsr #16
429mov r5, r5, lsl #16
430orr r5, r5, r4, lsr #16
431mov r4, r4, lsl #16
432orr r4, r4, r3, lsr #16
433stmdb r0!, {r4, r5, r12, lr}
434subs r2, r2, #0x10
435bge Lmemcpy_bsrcul2loop16
436ldmia sp!, {r4, r5, lr}
437adds r2, r2, #0x0c
438blt Lmemcpy_bsrcul2l4
439
440Lmemcpy_bsrcul2loop4:
441mov r12, r3, lsl #16
442ldr r3, [r1, #-4]!
443orr r12, r12, r3, lsr #16
444str r12, [r0, #-4]!
445subs r2, r2, #4
446bge Lmemcpy_bsrcul2loop4
447
448Lmemcpy_bsrcul2l4:
449add r1, r1, #2
450b Lmemcpy_bl4
451
452Lmemcpy_bsrcul1:
453cmp r2, #0x0c
454blt Lmemcpy_bsrcul1loop4
455sub r2, r2, #0x0c
456stmdb sp!, {r4, r5, lr}
457
458Lmemcpy_bsrcul1loop32:
459mov lr, r3, lsl #24
460ldmdb r1!, {r3-r5, r12}
461orr lr, lr, r12, lsr #8
462mov r12, r12, lsl #24
463orr r12, r12, r5, lsr #8
464mov r5, r5, lsl #24
465orr r5, r5, r4, lsr #8
466mov r4, r4, lsl #24
467orr r4, r4, r3, lsr #8
468stmdb r0!, {r4, r5, r12, lr}
469subs r2, r2, #0x10
470bge Lmemcpy_bsrcul1loop32
471ldmia sp!, {r4, r5, lr}
472adds r2, r2, #0x0c
473blt Lmemcpy_bsrcul1l4
474
475Lmemcpy_bsrcul1loop4:
476mov r12, r3, lsl #24
477ldr r3, [r1, #-4]!
478orr r12, r12, r3, lsr #8
479str r12, [r0, #-4]!
480subs r2, r2, #4
481bge Lmemcpy_bsrcul1loop4
482
483Lmemcpy_bsrcul1l4:
484add r1, r1, #1
485b Lmemcpy_bl4