#include "../arm_features.h"
-@ very simple adaption YM2612 output rate to sample rate (~1M cycles @44100)
-//#define INTERPOL
+@ very simple YM2612 output rate to sample rate adaption (~500k cycles @44100)
+#define INTERPOL
.equiv SLOT1, 0
.equiv SLOT2, 2
@ r5=slot, r1=eg_cnt, trashes: r0,r2,r3
@ writes output to routp, but only if vol_out changes
-.macro update_eg_phase_slot slot
+.macro update_eg_phase_slot
#if defined(INTERPOL)
ldrh r0, [r5,#0x34] @ vol_out
#endif
ldrh r3, [r5,#0x18] @ tl
add r0, r0, r3 @ volume += tl
strh r0, [r5,#0x34] @ vol_out
-.if \slot == SLOT1
- mov r6, r6, lsr #16
- orr r6, r0, r6, lsl #16
-.elseif \slot == SLOT2
- mov r6, r6, lsl #16
- mov r0, r0, lsl #16
- orr r6, r0, r6, lsr #16
-.elseif \slot == SLOT3
- mov r7, r7, lsr #16
- orr r7, r0, r7, lsl #16
-.elseif \slot == SLOT4
- mov r7, r7, lsl #16
- mov r0, r0, lsl #16
- orr r7, r0, r7, lsr #16
-.endif
0: @ EG_OFF
.endm
mov r11, r1
and r0, r0, #7
orr r4, r4, r0 @ (length<<8)|algo
- add r0, lr, #0x44
- ldmia r0, {r8,r9} @ eg_timer, eg_timer_add
+ ldr r8, [lr, #0x44] @ eg_timer
+ ldr r9, [lr, #0x48] @ eg_timer_add
ldr r10, [lr, #0x54] @ op1_out
-@ ldmia lr, {r6,r7} @ load volumes
- ldr r5, [lr, #0x40] @ CH
- ldrh r6, [r5, #0x34] @ vol_out values for all slots
- ldrh r2, [r5, #0x34+SLOT_STRUCT_SIZE*2]
- ldrh r7, [r5, #0x34+SLOT_STRUCT_SIZE]
- ldrh r3, [r5, #0x34+SLOT_STRUCT_SIZE*3]
- orr r6, r6, r2, lsl #16
- orr r7, r7, r3, lsl #16
tst r12, #8 @ lfo?
beq crl_loop
crl_loop_lfo:
- add r0, lr, #0x30
- ldmia r0, {r1,r2} @ lfo_cnt, lfo_inc
+ ldr r1, [lr, #0x30] @ lfo_cnt
+ ldr r2, [lr, #0x34] @ lfo_inc
subs r4, r4, #0x100
bmi crl_loop_end
bmi crl_loop_end
@ -- SSG --
- add r0, lr, #0x3c
- ldmia r0, {r1,r5} @ eg_cnt, CH
+ ldr r5, [lr, #0x40] @ CH
@ r5=slot, trashes: r0,r2,r3
+ mov r6, #4
+ssg_upd_loop:
update_ssg_eg
- add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT2 (2)
- update_ssg_eg
- sub r5, r5, #SLOT_STRUCT_SIZE @ SLOT3 (1)
- update_ssg_eg
- add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT4 (3)
+#if 0
+ subs r6, r6, #1
+ addne r5, r5, #SLOT_STRUCT_SIZE
+#else
+ add r5, r5, #SLOT_STRUCT_SIZE*2
update_ssg_eg
+ subs r6, r6, #2
+ subne r5, r5, #SLOT_STRUCT_SIZE
+#endif
+ bne ssg_upd_loop
sub r5, r5, #SLOT_STRUCT_SIZE*3
@ -- EG --
add r8, r8, r9
cmp r8, #EG_TIMER_OVERFLOW
bcc eg_done
+ ldr r1, [lr, #0x3c] @ eg_cnt
eg_loop:
sub r8, r8, #EG_TIMER_OVERFLOW
add r1, r1, #1
cmp r1, #4096
movge r1, #1
- @ SLOT1 (0)
- @ r5=slot, r1=eg_cnt, trashes: r0,r2,r3
- update_eg_phase_slot SLOT1
- add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT2 (2)
- update_eg_phase_slot SLOT2
- sub r5, r5, #SLOT_STRUCT_SIZE @ SLOT3 (1)
- update_eg_phase_slot SLOT3
- add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT4 (3)
- update_eg_phase_slot SLOT4
+
+ mov r6, #4
+eg_upd_loop:
+ update_eg_phase_slot
+#if 1
+ subs r6, r6, #1
+ addne r5, r5, #SLOT_STRUCT_SIZE
+#else
+ add r5, r5, #SLOT_STRUCT_SIZE*2
+ update_eg_phase_slot
+ subs r6, r6, #2
+ subne r5, r5, #SLOT_STRUCT_SIZE
+#endif
+ bne eg_upd_loop
cmp r8, #EG_TIMER_OVERFLOW
sub r5, r5, #SLOT_STRUCT_SIZE*3
beq crl_loop
@ output interpolation
-#if 0 // too expensive on slow platforms
+#if defined(INTERPOL)
+#if 1 // possibly too expensive for slow platforms?
@ basic interpolator, interpolate in middle region, else use closer value
mov r3, r8, lsr #EG_SH @ eg_timer, [0..3<<EG_SH) after loop
cmp r3, #(EG_TIMER_OVERFLOW>>EG_SH)/2
- bgt 0f @ mix is vol_out
-
- ldrh r0, [r5,#0x36] @ SLOT1 vol_ipol
- lsleq r2, r6, #16
- addeq r0, r0, r2, lsr #16
- lsreq r0, r0, #1
- mov r6, r6, lsr #16
- orr r6, r0, r6, lsl #16
-
- ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol
- addeq r0, r0, r6, lsr #16
- lsreq r0, r0, #1
- mov r6, r6, lsl #16
- orr r6, r6, r0
- ror r6, r6, #16
-
- ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol
- lsleq r2, r7, #16
- addeq r0, r0, r2, lsr #16
- lsreq r0, r0, #1
- mov r7, r7, lsr #16
- orr r7, r0, r7, lsl #16
-
- ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol
- addeq r0, r0, r7, lsr #16
- lsreq r0, r0, #1
- mov r7, r7, lsl #16
- orr r7, r7, r0
- ror r7, r7, #16
-#elif defined(INTERPOL)
+ bne 0f @ mix is vol_out
+
+ ldr r6, [r5, #0x34] @ vol_out, vol_ipol for all slots
+ ldr r2, [r5, #0x34+SLOT_STRUCT_SIZE*2]
+ ldr r7, [r5, #0x34+SLOT_STRUCT_SIZE]
+ ldr r3, [r5, #0x34+SLOT_STRUCT_SIZE*3]
+ add r6, r6, r6, lsl #16
+ lsr r6, r6, #17
+ add r2, r2, r2, lsl #16
+ lsr r2, r2, #17
+ add r7, r7, r7, lsl #16
+ lsr r7, r7, #17
+ add r3, r3, r3, lsl #16
+ lsr r3, r3, #17
+ b 1f
+#else
@ super-basic... just take value closest to sample point
mov r3, r8, lsr #EG_SH-1 @ eg_timer, [0..3<<EG_SH) after loop
cmp r3, #(EG_TIMER_OVERFLOW>>EG_SH)
- bge 0f @ mix is vol_out
-
- ldrh r0, [r5,#0x36] @ SLOT1 vol_ipol
- mov r6, r6, lsr #16
- orr r6, r0, r6, lsl #16
-
- ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol
- mov r6, r6, lsl #16
- orr r6, r6, r0
- ror r6, r6, #16
+#endif
- ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol
- mov r7, r7, lsr #16
- orr r7, r0, r7, lsl #16
+0: ldrgeh r6, [r5, #0x34] @ vol_out values for all slots
+ ldrlth r6, [r5, #0x36] @ vol_ipol values for all slots
+ ldrgeh r2, [r5, #0x34+SLOT_STRUCT_SIZE*2]
+ ldrlth r2, [r5, #0x36+SLOT_STRUCT_SIZE*2]
+ ldrgeh r7, [r5, #0x34+SLOT_STRUCT_SIZE]
+ ldrlth r7, [r5, #0x36+SLOT_STRUCT_SIZE]
+ ldrgeh r3, [r5, #0x34+SLOT_STRUCT_SIZE*3]
+ ldrlth r3, [r5, #0x36+SLOT_STRUCT_SIZE*3]
- ldrh r0, [r5,#0x36+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol
- mov r7, r7, lsl #16
- orr r7, r7, r0
- ror r7, r7, #16
+#else
+ ldrh r6, [r5, #0x34] @ vol_out values for all slots
+ ldrh r2, [r5, #0x34+SLOT_STRUCT_SIZE*2]
+ ldrh r7, [r5, #0x34+SLOT_STRUCT_SIZE]
+ ldrh r3, [r5, #0x34+SLOT_STRUCT_SIZE*3]
#endif
-0:
+1: orr r6, r6, r2, lsl #16
+ orr r7, r7, r3, lsl #16
@ -- SLOT1 --
PIC_LDR(r3, r2, ym_tl_tab)
strne r1, [r11], #4
b crl_do_phase
-ctl_sample_skip:
- and r1, r12, #1
- add r1, r1, #1
- add r11,r11, r1, lsl #2
- b crl_do_phase
-
ctl_sample_mono:
ldr r1, [r11]
add r1, r0, r1
str r1, [r11], #4
+ b crl_do_phase
+
+ctl_sample_skip:
+ and r1, r12, #1
+ add r1, r1, #1
+ add r11,r11, r1, lsl #2
crl_do_phase:
@ -- PHASE UPDATE --
add r5, lr, #0x10
- ldmia r5, {r0-r1}
- add r5, lr, #0x20
- ldmia r5, {r2-r3}
- add r5, lr, #0x10
- add r0, r0, r2
- add r1, r1, r3
- stmia r5!,{r0-r1}
- ldmia r5, {r0-r1}
- add r5, lr, #0x28
- ldmia r5, {r2-r3}
- add r5, lr, #0x18
- add r0, r0, r2
- add r1, r1, r3
- stmia r5, {r0-r1}
+ ldmia r5, {r0-r3,r6-r7}
+ add r0, r0, r6
+ add r1, r1, r7
+ ldr r6, [r5, #0x18]
+ ldr r7, [r5, #0x1c]
+ add r2, r2, r6
+ add r3, r3, r7
+ stmia r5, {r0-r3}
tst r12, #8
bne crl_loop_lfo
crl_loop_end:
-@ stmia lr, {r6,r7} @ save volumes (for debug)
str r8, [lr, #0x44] @ eg_timer
str r12, [lr, #0x4c] @ pack (for lfo_ampm)
str r4, [lr, #0x50] @ was_update