sound, ym2612 optimizations and fixes
authorkub <derkub@gmail.com>
Tue, 28 Dec 2021 16:43:25 +0000 (17:43 +0100)
committerkub <derkub@gmail.com>
Tue, 28 Dec 2021 16:43:25 +0000 (17:43 +0100)
pico/sound/ym2612.c
pico/sound/ym2612_arm.S

index ccdce77..418c1b3 100644 (file)
@@ -1067,10 +1067,10 @@ static int update_algo_channel(chan_rend_context *ct, unsigned int eg_out, unsig
                        /*    +----C1----+     */\r
                        /* M1-+-MEM---M2-+-OUT */\r
                        /*    +----C2----+     */\r
-                       if (ct->eg_timer >= (1<<EG_SH)) break;\r
-\r
                        m2 = ct->mem;\r
                        ct->mem = c1 = c2 = ct->op1_out>>16;\r
+                       if (ct->eg_timer >= (1<<EG_SH)) break;\r
+\r
                        if( eg_out < ENV_QUIET ) {              /* SLOT 3 */\r
                                smp = op_calc(ct->phase3, eg_out, m2);\r
                        }\r
@@ -1138,13 +1138,24 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length)
                unsigned int eg_out, eg_out2, eg_out4;\r
 \r
                ct->eg_timer += ct->eg_timer_add;\r
-               while (ct->eg_timer >= 1<<EG_SH) {\r
-                       ct->eg_timer -= 1<<EG_SH;\r
 \r
+               if (ct->eg_timer >= 3<<EG_SH && !(ct->pack&0xf000)) {\r
+                       int cnt = (ct->eg_timer>>EG_SH)-2;\r
                        if (ct->pack & 8) { /* LFO enabled ? (test Earthworm Jim in between demo 1 and 2) */\r
-                               ct->pack = (ct->pack&0xffff) | (advance_lfo(ct->pack >> 16, ct->lfo_cnt, ct->lfo_cnt + ct->lfo_inc) << 16);\r
-                               ct->lfo_cnt += ct->lfo_inc;\r
+                               int inc = cnt*ct->lfo_inc;\r
+                               ct->pack = (ct->pack&0xffff) | (advance_lfo(ct->pack >> 16, ct->lfo_cnt, ct->lfo_cnt + inc) << 16);\r
+                               ct->lfo_cnt += inc;\r
                        }\r
+\r
+                       ct->phase1 += cnt*ct->incr1;\r
+                       ct->phase2 += cnt*ct->incr2;\r
+                       ct->phase3 += cnt*ct->incr3;\r
+                       ct->phase4 += cnt*ct->incr4;\r
+               }\r
+\r
+               while (ct->eg_timer >= 1<<EG_SH) {\r
+                       ct->eg_timer -= 1<<EG_SH;\r
+\r
                        if (ct->pack & 2)\r
                                update_ssg_eg_channel(ct);\r
 \r
@@ -1163,10 +1174,15 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length)
                        ct->vol_out3 =  ct->CH->SLOT[SLOT3].vol_out;\r
                        ct->vol_out4 =  ct->CH->SLOT[SLOT4].vol_out;\r
 \r
-                       if (ct->pack & 4) goto disabled; /* output disabled */\r
-\r
-                       /* calculate channel sample */\r
                        if (ct->eg_timer < (2<<EG_SH) || (ct->pack&0xf000)) {\r
+                               if (ct->pack & 4) goto disabled; /* output disabled */\r
+\r
+                               if (ct->pack & 8) { /* LFO enabled ? (test Earthworm Jim in between demo 1 and 2) */\r
+                                       ct->pack = (ct->pack&0xffff) | (advance_lfo(ct->pack >> 16, ct->lfo_cnt, ct->lfo_cnt + ct->lfo_inc) << 16);\r
+                                       ct->lfo_cnt += ct->lfo_inc;\r
+                               }\r
+\r
+                               /* calculate channel sample */\r
                                eg_out = ct->vol_out1;\r
                                if ( (ct->pack & 8) && (ct->pack&(1<<(SLOT1+8))) )\r
                                        eg_out += ct->pack >> (((ct->pack&0xc0)>>6)+24);\r
@@ -1175,36 +1191,37 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length)
                                {\r
                                        int out = 0;\r
 \r
-                                       if (ct->pack&0xf000) out = ((ct->op1_out>>16) + ((ct->op1_out<<16)>>16)) << ((ct->pack&0xf000)>>12); /* op1_out0 + op1_out1 */\r
+                                       if (ct->pack&0xf000) out = ((ct->op1_out + (ct->op1_out<<16))>>16) << ((ct->pack&0xf000)>>12); /* op1_out0 + op1_out1 */\r
                                        ct->op1_out <<= 16;\r
                                        ct->op1_out |= (unsigned short)op_calc1(ct->phase1, eg_out, out);\r
                                } else {\r
                                        ct->op1_out <<= 16; /* op1_out0 = op1_out1; op1_out1 = 0; */\r
                                }\r
-                       }\r
 \r
-                       if (ct->eg_timer < (2<<EG_SH)) {\r
-                               eg_out  = ct->vol_out3; // volume_calc(&CH->SLOT[SLOT3]);\r
-                               eg_out2 = ct->vol_out2; // volume_calc(&CH->SLOT[SLOT2]);\r
-                               eg_out4 = ct->vol_out4; // volume_calc(&CH->SLOT[SLOT4]);\r
+                               if (ct->eg_timer < (2<<EG_SH)) {\r
+                                       eg_out  = ct->vol_out3; // volume_calc(&CH->SLOT[SLOT3]);\r
+                                       eg_out2 = ct->vol_out2; // volume_calc(&CH->SLOT[SLOT2]);\r
+                                       eg_out4 = ct->vol_out4; // volume_calc(&CH->SLOT[SLOT4]);\r
+\r
+                                       if (ct->pack & 8) {\r
+                                               unsigned int add = ct->pack >> (((ct->pack&0xc0)>>6)+24);\r
+                                               if (ct->pack & (1<<(SLOT3+8))) eg_out  += add;\r
+                                               if (ct->pack & (1<<(SLOT2+8))) eg_out2 += add;\r
+                                               if (ct->pack & (1<<(SLOT4+8))) eg_out4 += add;\r
+                                       }\r
 \r
-                               if (ct->pack & 8) {\r
-                                       unsigned int add = ct->pack >> (((ct->pack&0xc0)>>6)+24);\r
-                                       if (ct->pack & (1<<(SLOT3+8))) eg_out  += add;\r
-                                       if (ct->pack & (1<<(SLOT2+8))) eg_out2 += add;\r
-                                       if (ct->pack & (1<<(SLOT4+8))) eg_out4 += add;\r
+                                       smp = update_algo_channel(ct, eg_out, eg_out2, eg_out4);\r
                                }\r
+                               /* done calculating channel sample */\r
 \r
-                               smp = update_algo_channel(ct, eg_out, eg_out2, eg_out4);\r
+disabled:\r
+                               /* update phase counters AFTER output calculations */\r
+                               ct->phase1 += ct->incr1;\r
+                               ct->phase2 += ct->incr2;\r
+                               ct->phase3 += ct->incr3;\r
+                               ct->phase4 += ct->incr4;\r
                        }\r
-                       /* done calculating channel sample */\r
 \r
-disabled:\r
-                       /* update phase counters AFTER output calculations */\r
-                       ct->phase1 += ct->incr1;\r
-                       ct->phase2 += ct->incr2;\r
-                       ct->phase3 += ct->incr3;\r
-                       ct->phase4 += ct->incr4;\r
                }\r
 \r
                /* mix sample to output buffer */\r
index 52611fc..16b3833 100644 (file)
@@ -80,8 +80,7 @@
     cmp     r2, #2
     mov     r2, #1
     mov     r2, r2, lsl r3
-    mov     r2, r2, lsr #1       @ eg_inc_val
-    add     r0, r0, r2
+    add     r0, r0, r2, lsr #1   @ volume += eg_inc_val
     blt     1f                   @ EG_REL
     beq     2f                   @ EG_SUS
 
     movlt   r3, r0, lsl r3
     ldrlth  r0, [r5,#0x1a]       @ volume, unsigned (0-1023)
     movlt   r3, r3, lsr #1       @ eg_inc_val
-    addlt   r0, r0, r3, lsl #2
+    addlt   r0, r0, r3, lsl #2   @ ...*4
 
     cmp     r2, #2
     blt     1f                   @ EG_REL
     orrne   r0, r0, #0x400                @ ssgn = 4
     strneh  r0, [r5,#0x30]
 
-    eor     r0, r0, #0x4                  @ if ( !(ssg&0x04) )
-    tst     r0, #0x4
-    cmpne   r2, #EG_ATT                   @ if ( state != EG_ATT )
+    tst     r0, #0x4                      @ if ( !(ssg&0x04) )
+    bne     9f
+    cmp     r2, #EG_ATT                   @ if ( state != EG_ATT )
     movne   r3, #0x400
     subne   r3, r3, #1
     strneh  r3, [r5,#0x1a]                @ volume = MAX_ATT
     cmp     r2, r1, lsr #LFO_SH
     beq     0f
     and     r3, r2, #0x3f
-    cmp     r2, #0x40
-    eorlt   r3, r3, #0x3f
+    tst     r2, #0x40
+    eoreq   r3, r3, #0x3f
     bic     r12,r12, #0xff000000          @ lfo_ampm &= 0xff
     orr     r12,r12, r3, lsl #1+24
 
     mov     r2, r2, lsr #2
     cmp     r2, r1, lsr #LFO_SH+2
+    andne   r3, r2, #0x1f
     bicne   r12,r12, #0xff0000
-    orrne   r12,r12, r2, lsl #16
+    orrne   r12,r12, r3, lsl #16
 
 0:
 .endm
     movne   r2, r2,  lsr #6
     addne   r2, r2,  #24
     addne   r1, r1,  r12, lsr r2
-    bic     r1, r1,  #1
 .endm
 
 
     eorne   \r, \r, #0xff   @ if (sin & 0x100) sin = 0xff - (sin&0xff);
     tst     \r, #0x200
     and     \r, \r, #0xff
+    bic     r1, r1, #1
     orr     \r, \r, r1, lsl #7
     mov     \r, \r, lsl #1
     ldrh    \r, [r3, \r]    @ 2ci if ne
     ldr     r2, [lr, #0x18]
     ldr     r0, [lr, #0x38] @ mem (signed)
     mov     r2, r2, lsr #16
-    add     r0, r2, r0, lsr #1
+    add     r0, r2, r0, asr #1
     lookup_tl r0                  @ r0=c2
 
 0:
-
     @ SLOT4
     make_eg_out SLOT4
     cmp     r1, #ENV_QUIET
     movcs   r0, #0
     bcs     1f
     ldr     r2, [lr, #0x1c]
-    mov     r0, r0, lsr #1
+    mov     r0, r0, asr #1
     add     r0, r0, r2, lsr #16
     lookup_tl r0                  @ r0=output smp
 
     movcs   r2, #0
     bcs     2f
     ldr     r2, [lr, #0x14]       @ 1ci
-    mov     r5, r10, lsr #17
+    mov     r5, r10, asr #17
     add     r2, r5, r2, lsr #16
     lookup_tl r2                  @ r2=mem
 
     ldr     r2, [lr, #0x18]
     ldr     r0, [lr, #0x38] @ mem (signed)
     mov     r2, r2, lsr #16
-    add     r0, r2, r0, lsr #1
+    add     r0, r2, r0, asr #1
     lookup_tl r0                 @ r0=c2
 
 0:
     movcs   r0, #0
     bcs     1f
     ldr     r2, [lr, #0x1c]
-    mov     r0, r0, lsr #1
+    mov     r0, r0, asr #1
     add     r0, r0, r2, lsr #16
     lookup_tl r0                 @ r0=output smp
 
     ldr     r2, [lr, #0x18]
     ldr     r0, [lr, #0x38] @ mem (signed)
     mov     r2, r2, lsr #16
-    add     r0, r2, r0, lsr #1
+    add     r0, r2, r0, asr #1
     lookup_tl r0                 @ r0=c2
 
 0:
     movcs   r0, #0
     bcs     1f
     ldr     r2, [lr, #0x1c]
-    mov     r0, r0, lsr #1
+    mov     r0, r0, asr #1
     add     r0, r0, r2, lsr #16
     lookup_tl r0                 @ r0=output smp
 
     movcs   r0, #0
     bcs     1f
     ldr     r2, [lr, #0x1c]
-    mov     r0, r0, lsr #1
+    mov     r0, r0, asr #1
     add     r0, r0, r2, lsr #16
     lookup_tl r0                 @ r0=output smp
 
     movcs   r2, #0
     bcs     2f
     ldr     r2, [lr, #0x14]      @ phase2
-    mov     r5, r10, lsr #17
+    mov     r5, r10, asr #17
     add     r2, r5, r2, lsr #16
     lookup_tl r2                 @ r2=mem
 
     movcs   r0, #0
     bcs     1f
     ldr     r2, [lr, #0x1c]
-    mov     r0, r0, lsr #1
+    mov     r0, r0, asr #1
     add     r0, r0, r2, lsr #16
     lookup_tl r0                 @ r0=output smp
 
     cmp     r1, #ENV_QUIET
     bcs     2f
     ldr     r2, [lr, #0x14]
-    mov     r5, r10, lsr #17
+    mov     r5, r10, asr #17
     add     r2, r5, r2, lsr #16
     lookup_tl r2
     add     r0, r0, r2            @ add to smp
     ldr     r2, [lr, #0x18]
     ldr     r0, [lr, #0x38] @ mem (signed)
     mov     r2, r2, lsr #16
-    add     r0, r2, r0, lsr #1
+    add     r0, r2, r0, asr #1
     lookup_tl r0                 @ r0=output smp
 
 0:
     cmp     r1, #ENV_QUIET
     bcs     1f
     ldr     r2, [lr, #0x1c]
-    mov     r5, r10, lsr #17
+    mov     r5, r10, asr #17
     add     r2, r5, r2, lsr #16
     lookup_tl r2
     add     r0, r0, r2           @ add to smp
     cmp     r1, #ENV_QUIET
     bcs     2f
     ldr     r2, [lr, #0x14]
-    mov     r5, r10, lsr #17
+    mov     r5, r10, asr #17
     add     r2, r5, r2, lsr #16
     lookup_tl r2
     add     r0, r0, r2           @ add to smp
     cmp     r1, #ENV_QUIET
     bcs     2f
     ldr     r2, [lr, #0x14]
-    mov     r5, r10, lsr #17
+    mov     r5, r10, asr #17
     add     r2, r5, r2, lsr #16
     lookup_tl r2
     add     r0, r0, r2           @ add to smp
@@ -712,12 +711,20 @@ crl_loop:
     subs    r8, r8, #(1<<EG_SH)
     blt     crl_smp_loop_end
 
-crl_smp_loop:
+    cmp     r8, #(2<<EG_SH)      @ calculate only for operator memory, sample,
+    tstge   r12, #0xf000         @ ...feedback
+    bne     crl_smp_loop
+
+    @ -- LFO+PHASE UPDATE, FF --
+    mov     r0, r8, lsr #EG_SH
+    sub     r0, r0, #1
+
     tst     r12, #8              @ lfo?
-    beq     lfo_done
+    beq     lfo_done_ff
 
     ldr     r2, [lr, #0x34]      @ lfo_inc
     ldr     r1, [lr, #0x30]      @ lfo_cnt
+    mul     r2, r0, r2
 
     add     r2, r2, r1
     str     r2, [lr, #0x30]
@@ -725,7 +732,23 @@ crl_smp_loop:
     @ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt
     advance_lfo_m
 
-lfo_done:
+lfo_done_ff:
+    add     lr, lr, #0x10
+    ldmia   lr, {r1-r3,r5-r7}
+    mul     r6, r0, r6
+    mul     r7, r0, r7
+    add     r1, r1, r6
+    add     r2, r2, r7
+    ldr     r6, [lr, #0x18]
+    ldr     r7, [lr, #0x1c]
+    mul     r6, r0, r6
+    mul     r7, r0, r7
+    add     r3, r3, r6
+    add     r5, r5, r7
+    stmia   lr, {r1-r3,r5}
+    sub     lr, lr, #0x10
+
+crl_smp_loop:
     ldr     r5, [lr, #0x40]      @ CH
 #if defined(SSG_EG)
     tst     r12, #0x02              @ ssg_enabled?
@@ -758,7 +781,8 @@ ssg_done:
 
     @ -- EG --
     tst     r4, #0x30
-    subnes  r4, r4, #0x10
+    subne   r4, r4, #0x10
+    tst     r4, #0x30
     bne     eg_done
     orr     r4, r4, #0x30
 
@@ -784,15 +808,28 @@ eg_upd_loop:
     sub     r5, r5, #SLOT_STRUCT_SIZE*3
 
 eg_done:
+    cmp     r8, #(2<<EG_SH)      @ calculate only for operator memory, sample,
+    tstge   r12, #0xf000         @ ...feedback
+    beq     crl_ff
+
     @ -- disabled? --
-    tst     r12, #0x4
     mov     r0, #0
+    tst     r12, #0x4
     bne     crl_algo_done
 
-    cmp     r8, #(2<<EG_SH)      @ calculate only for operator memory, sample,
-    tstge   r12, #0xf000         @ ...feedback
-    beq     crl_algo_done
+    tst     r12, #8              @ lfo?
+    beq     lfo_done
 
+    ldr     r2, [lr, #0x34]      @ lfo_inc
+    ldr     r1, [lr, #0x30]      @ lfo_cnt
+
+    add     r2, r2, r1
+    str     r2, [lr, #0x30]
+
+    @ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt
+    advance_lfo_m
+
+lfo_done:
     ldrh    r6, [r5, #0x34]      @ vol_out values for all slots
     ldrh    r2, [r5, #0x34+SLOT_STRUCT_SIZE*2]
     ldrh    r7, [r5, #0x34+SLOT_STRUCT_SIZE]
@@ -878,6 +915,7 @@ crl_algo_done:
     stmia   lr, {r1-r3,r5}
     sub     lr, lr, #0x10
 
+crl_ff:
     subs    r8, r8, #(1<<EG_SH)
     bge     crl_smp_loop