platform ps2, handle audio similar to psp
[picodrive.git] / pico / sound / ym2612.c
index 5c52785..ee84da2 100644 (file)
@@ -5,6 +5,9 @@
 **\r
 ** SSG-EG was also removed, because it's rarely used, Sega2.doc even does not\r
 ** document it ("proprietary") and tells to write 0 to SSG-EG control register.\r
+**\r
+** updated with fixes from mame 0.216 (file version 1.5.1) (kub)\r
+** SSG-EG readded from GenPlus (kub)\r
 */\r
 \r
 /*\r
 #include <string.h>\r
 #include <math.h>\r
 \r
+#include "../pico_int.h"\r
 #include "ym2612.h"\r
 \r
 #ifndef EXTERNAL_YM2612\r
@@ -124,7 +128,7 @@ extern YM2612 *ym2612_940;
 \r
 #endif\r
 \r
-void memset32(int *dest, int c, int count);\r
+void memset32(void *dest, int c, int count);\r
 \r
 \r
 #ifndef __GNUC__\r
@@ -136,7 +140,7 @@ void memset32(int *dest, int c, int count);
 #endif\r
 \r
 #ifndef INLINE\r
-#define INLINE static __inline\r
+#define INLINE __inline\r
 #endif\r
 \r
 #ifndef M_PI\r
@@ -148,7 +152,7 @@ void memset32(int *dest, int c, int count);
 \r
 #define FREQ_SH                        16  /* 16.16 fixed point (frequency calculations) */\r
 #define EG_SH                  16  /* 16.16 fixed point (envelope generator timing) */\r
-#define LFO_SH                 25  /*  7.25 fixed point (LFO calculations)       */\r
+#define LFO_SH                 24  /*  8.24 fixed point (LFO calculations)       */\r
 #define TIMER_SH               16  /* 16.16 fixed point (timers calculations)    */\r
 \r
 #define ENV_BITS               10\r
@@ -172,16 +176,6 @@ void memset32(int *dest, int c, int count);
 \r
 #define EG_TIMER_OVERFLOW (3*(1<<EG_SH)) /* envelope generator timer overflows every 3 samples (on real chip) */\r
 \r
-#define MAXOUT         (+32767)\r
-#define MINOUT         (-32768)\r
-\r
-/* limitter */\r
-#define Limit(val, max,min) { \\r
-       if ( val > max )      val = max; \\r
-       else if ( val < min ) val = min; \\r
-}\r
-\r
-\r
 /*  TL_TAB_LEN is calculated as:\r
 *   13 - sinus amplitude bits     (Y axis)\r
 *   2  - sinus sign bit           (Y axis)\r
@@ -199,6 +193,8 @@ UINT16 ym_tl_tab2[13*TL_RES_LEN];
 /* sin waveform table in 'decibel' scale (use only period/4 values) */\r
 static UINT16 ym_sin_tab[256];\r
 \r
+static int ym_init_tab;\r
+\r
 /* sustain level table (3dB per step) */\r
 /* bit0, bit1, bit2, bit3, bit4, bit5, bit6 */\r
 /* 1,    2,    4,    8,    16,   32,   64   (value)*/\r
@@ -287,7 +283,7 @@ O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18),
 O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18),\r
 \r
 /* rates 00-11 */\r
-O( 0),O( 1),O( 2),O( 3),\r
+O(18),O(18),O( 2),O( 3),\r
 O( 0),O( 1),O( 2),O( 3),\r
 O( 0),O( 1),O( 2),O( 3),\r
 O( 0),O( 1),O( 2),O( 3),\r
@@ -328,10 +324,10 @@ O(16),O(16),O(16),O(16),O(16),O(16),O(16),O(16)
 #define O(a) (a*1)\r
 static const UINT8 eg_rate_shift[32+64+32]={   /* Envelope Generator counter shifts (32 + 64 rates + 32 RKS) */\r
 /* 32 infinite time rates */\r
-O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0),\r
-O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0),\r
-O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0),\r
-O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0),\r
+O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11),\r
+O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11),\r
+O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11),\r
+O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11),\r
 \r
 /* rates 00-11 */\r
 O(11),O(11),O(11),O(11),\r
@@ -517,7 +513,7 @@ static INT32 lfo_pm_table[128*8*32]; /* 128 combinations of 7 bits meaningful (o
        but LFO works with one more bit of a precision so we really need 4096 elements */\r
 static UINT32 fn_table[4096];  /* fnumber->increment counter */\r
 \r
-static int g_lfo_ampm = 0;\r
+static int g_lfo_ampm;\r
 \r
 /* register number to channel number , slot offset */\r
 #define OPN_CHAN(N) (N&3)\r
@@ -531,7 +527,7 @@ static int g_lfo_ampm = 0;
 \r
 \r
 /* OPN Mode Register Write */\r
-INLINE void set_timers( int v )\r
+static INLINE void set_timers( int v )\r
 {\r
        /* b7 = CSM MODE */\r
        /* b6 = 3 slot mode */\r
@@ -552,33 +548,58 @@ INLINE void set_timers( int v )
                ym2612.OPN.ST.status &= ~1;\r
 }\r
 \r
+static INLINE void recalc_volout(FM_SLOT *SLOT)\r
+{\r
+       INT16 vol_out = SLOT->volume;\r
+       if ((SLOT->ssg&0x0c) == 0x0c)\r
+               vol_out = (0x200 - vol_out) & MAX_ATT_INDEX;\r
+       SLOT->vol_out = vol_out + SLOT->tl;\r
+}\r
 \r
-INLINE void FM_KEYON(int c , int s )\r
+static INLINE void FM_KEYON(int c , int s )\r
 {\r
        FM_SLOT *SLOT = &ym2612.CH[c].SLOT[s];\r
        if( !SLOT->key )\r
        {\r
                SLOT->key = 1;\r
                SLOT->phase = 0;                /* restart Phase Generator */\r
-               SLOT->state = EG_ATT;   /* phase -> Attack */\r
+               SLOT->ssg ^= SLOT->ssgn;\r
+               SLOT->ssgn = 0;\r
+               SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC;\r
+               if (SLOT->ar_ksr < 32+62) {\r
+                       if (SLOT->volume > MIN_ATT_INDEX) SLOT->state = EG_ATT;\r
+               } else {\r
+                       SLOT->volume = MIN_ATT_INDEX;\r
+               }\r
+               recalc_volout(SLOT);\r
                ym2612.slot_mask |= (1<<s) << (c*4);\r
        }\r
 }\r
 \r
-INLINE void FM_KEYOFF(int c , int s )\r
+static INLINE void FM_KEYOFF(int c , int s )\r
 {\r
        FM_SLOT *SLOT = &ym2612.CH[c].SLOT[s];\r
        if( SLOT->key )\r
        {\r
                SLOT->key = 0;\r
-               if (SLOT->state>EG_REL)\r
+               if (SLOT->state>EG_REL) {\r
                        SLOT->state = EG_REL;/* phase -> Release */\r
+                       if (SLOT->ssg&0x08) {\r
+                               if (SLOT->ssg&0x04)\r
+                                       SLOT->volume = (0x200 - SLOT->volume);\r
+                               if (SLOT->volume >= 0x200) {\r
+                                       SLOT->volume = MAX_ATT_INDEX;\r
+                                       SLOT->state  = EG_OFF;\r
+                               }\r
+                       }\r
+               }\r
+               SLOT->vol_out = SLOT->volume + SLOT->tl;\r
        }\r
 }\r
 \r
 \r
 /* set detune & multiple */\r
-INLINE void set_det_mul(FM_CH *CH, FM_SLOT *SLOT, int v)\r
+static INLINE void set_det_mul(FM_CH *CH, FM_SLOT *SLOT, int v)\r
 {\r
        SLOT->mul = (v&0x0f)? (v&0x0f)*2 : 1;\r
        SLOT->DT  = ym2612.OPN.ST.dt_tab[(v>>4)&7];\r
@@ -586,45 +607,45 @@ INLINE void set_det_mul(FM_CH *CH, FM_SLOT *SLOT, int v)
 }\r
 \r
 /* set total level */\r
-INLINE void set_tl(FM_SLOT *SLOT, int v)\r
+static INLINE void set_tl(FM_SLOT *SLOT, int v)\r
 {\r
        SLOT->tl = (v&0x7f)<<(ENV_BITS-7); /* 7bit TL */\r
+       if (SLOT->state > EG_REL)\r
+               recalc_volout(SLOT);\r
 }\r
 \r
 /* set attack rate & key scale  */\r
-INLINE void set_ar_ksr(FM_CH *CH, FM_SLOT *SLOT, int v)\r
+static INLINE void set_ar_ksr(FM_CH *CH, FM_SLOT *SLOT, int v)\r
 {\r
        UINT8 old_KSR = SLOT->KSR;\r
+       int eg_sh_ar, eg_sel_ar;\r
 \r
        SLOT->ar = (v&0x1f) ? 32 + ((v&0x1f)<<1) : 0;\r
+       SLOT->ar_ksr = SLOT->ar + SLOT->ksr;\r
 \r
        SLOT->KSR = 3-(v>>6);\r
        if (SLOT->KSR != old_KSR)\r
        {\r
                CH->SLOT[SLOT1].Incr=-1;\r
        }\r
+\r
+       /* refresh Attack rate */\r
+       if ((SLOT->ar_ksr) < 32+62)\r
+       {\r
+               eg_sh_ar  = eg_rate_shift [SLOT->ar_ksr];\r
+               eg_sel_ar = eg_rate_select[SLOT->ar_ksr];\r
+       }\r
        else\r
        {\r
-               int eg_sh_ar, eg_sel_ar;\r
-\r
-               /* refresh Attack rate */\r
-               if ((SLOT->ar + SLOT->ksr) < 32+62)\r
-               {\r
-                       eg_sh_ar  = eg_rate_shift [SLOT->ar  + SLOT->ksr ];\r
-                       eg_sel_ar = eg_rate_select[SLOT->ar  + SLOT->ksr ];\r
-               }\r
-               else\r
-               {\r
-                       eg_sh_ar  = 0;\r
-                       eg_sel_ar = 17;\r
-               }\r
-\r
-               SLOT->eg_pack_ar = eg_inc_pack[eg_sel_ar] | (eg_sh_ar<<24);\r
+               eg_sh_ar  = 0;\r
+               eg_sel_ar = 18;\r
        }\r
+\r
+       SLOT->eg_pack_ar = eg_inc_pack[eg_sel_ar] | (eg_sh_ar<<24);\r
 }\r
 \r
 /* set decay rate */\r
-INLINE void set_dr(FM_SLOT *SLOT, int v)\r
+static INLINE void set_dr(FM_SLOT *SLOT, int v)\r
 {\r
        int eg_sh_d1r, eg_sel_d1r;\r
 \r
@@ -637,7 +658,7 @@ INLINE void set_dr(FM_SLOT *SLOT, int v)
 }\r
 \r
 /* set sustain rate */\r
-INLINE void set_sr(FM_SLOT *SLOT, int v)\r
+static INLINE void set_sr(FM_SLOT *SLOT, int v)\r
 {\r
        int eg_sh_d2r, eg_sel_d2r;\r
 \r
@@ -650,12 +671,15 @@ INLINE void set_sr(FM_SLOT *SLOT, int v)
 }\r
 \r
 /* set release rate */\r
-INLINE void set_sl_rr(FM_SLOT *SLOT, int v)\r
+static INLINE void set_sl_rr(FM_SLOT *SLOT, int v)\r
 {\r
        int eg_sh_rr, eg_sel_rr;\r
 \r
        SLOT->sl = sl_table[ v>>4 ];\r
 \r
+       if (SLOT->state == EG_DEC && (SLOT->volume >= (INT32)(SLOT->sl)))\r
+               SLOT->state = EG_SUS;\r
+\r
        SLOT->rr  = 34 + ((v&0x0f)<<2);\r
 \r
        eg_sh_rr  = eg_rate_shift [SLOT->rr  + SLOT->ksr];\r
@@ -666,7 +690,7 @@ INLINE void set_sl_rr(FM_SLOT *SLOT, int v)
 \r
 \r
 \r
-INLINE signed int op_calc(UINT32 phase, unsigned int env, signed int pm)\r
+static INLINE signed int op_calc(UINT32 phase, unsigned int env, signed int pm)\r
 {\r
        int ret, sin = (phase>>16) + (pm>>1);\r
        int neg = sin & 0x200;\r
@@ -683,7 +707,7 @@ INLINE signed int op_calc(UINT32 phase, unsigned int env, signed int pm)
        return neg ? -ret : ret;\r
 }\r
 \r
-INLINE signed int op_calc1(UINT32 phase, unsigned int env, signed int pm)\r
+static INLINE signed int op_calc1(UINT32 phase, unsigned int env, signed int pm)\r
 {\r
        int ret, sin = (phase+pm)>>16;\r
        int neg = sin & 0x200;\r
@@ -701,7 +725,7 @@ INLINE signed int op_calc1(UINT32 phase, unsigned int env, signed int pm)
 \r
 #if !defined(_ASM_YM2612_C) || defined(EXTERNAL_YM2612)\r
 /* advance LFO to next sample */\r
-INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt)\r
+static INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt)\r
 {\r
        UINT8 pos;\r
        UINT8 prev_pos;\r
@@ -715,12 +739,12 @@ INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt)
        if (prev_pos != pos)\r
        {\r
                lfo_ampm &= 0xff;\r
-               /* triangle */\r
+               /* triangle (inverted) */\r
                /* AM: 0 to 126 step +2, 126 to 0 step -2 */\r
                if (pos<64)\r
-                       lfo_ampm |= ((pos&63) * 2) << 8;           /* 0 - 126 */\r
+                       lfo_ampm |= ((pos^63) * 2) << 8;           /* 0 - 126 */\r
                else\r
-                       lfo_ampm |= (126 - (pos&63)*2) << 8;\r
+                       lfo_ampm |= ((pos&63) * 2) << 8;\r
        }\r
        else\r
        {\r
@@ -739,7 +763,7 @@ INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt)
        return lfo_ampm;\r
 }\r
 \r
-INLINE void update_eg_phase(UINT16 *vol_out, FM_SLOT *SLOT, UINT32 eg_cnt)\r
+static INLINE void update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt, UINT32 ssg_en)\r
 {\r
        INT32 volume = SLOT->volume;\r
        UINT32 pack = SLOT->eg_pack[SLOT->state - 1];\r
@@ -752,44 +776,114 @@ INLINE void update_eg_phase(UINT16 *vol_out, FM_SLOT *SLOT, UINT32 eg_cnt)
        eg_inc_val = pack >> ((eg_cnt >> shift) & 7) * 3;\r
        eg_inc_val = (1 << (eg_inc_val & 7)) >> 1;\r
 \r
-       switch (SLOT->state)\r
-       {\r
-       case EG_ATT:            /* attack phase */\r
-               volume += ( ~volume * eg_inc_val ) >> 4;\r
-               if ( volume <= MIN_ATT_INDEX )\r
+       if ((SLOT->ssg&0x08) && ssg_en) {\r
+               switch (SLOT->state)\r
                {\r
-                       volume = MIN_ATT_INDEX;\r
-                       SLOT->state = EG_DEC;\r
-               }\r
-               break;\r
+               case EG_ATT:    /* attack phase */\r
+                       volume += ( ~volume * eg_inc_val ) >> 4;\r
+                       if ( volume <= MIN_ATT_INDEX )\r
+                       {\r
+                               volume = MIN_ATT_INDEX;\r
+                               SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS: EG_DEC;\r
+                       }\r
+                       break;\r
 \r
-       case EG_DEC:    /* decay phase */\r
-               volume += eg_inc_val;\r
-               if ( volume >= (INT32) SLOT->sl )\r
-                       SLOT->state = EG_SUS;\r
-               break;\r
+               case EG_DEC:    /* decay phase */\r
+                       if (volume < 0x200)\r
+                               volume += 4*eg_inc_val;\r
+                       if ( volume >= (INT32) SLOT->sl )\r
+                               SLOT->state = EG_SUS;\r
+                       break;\r
 \r
-       case EG_SUS:    /* sustain phase */\r
-               volume += eg_inc_val;\r
-               if ( volume >= MAX_ATT_INDEX )\r
-               {\r
-                       volume = MAX_ATT_INDEX;\r
-                       /* do not change SLOT->state (verified on real chip) */\r
+               case EG_SUS:    /* sustain phase */\r
+                       if (volume < 0x200)\r
+                               volume += 4*eg_inc_val;\r
+                       break;\r
+\r
+               case EG_REL:    /* release phase */\r
+                       if (volume < 0x200)\r
+                               volume += 4*eg_inc_val;\r
+                       if ( volume >= 0x200 )\r
+                       {\r
+                               volume = MAX_ATT_INDEX;\r
+                               SLOT->state = EG_OFF;\r
+                       }\r
+                       break;\r
                }\r
-               break;\r
 \r
-       case EG_REL:    /* release phase */\r
-               volume += eg_inc_val;\r
-               if ( volume >= MAX_ATT_INDEX )\r
+               SLOT->vol_out = volume + SLOT->tl;\r
+               if ((SLOT->ssg&0x04) && (SLOT->state > EG_REL))\r
+                       SLOT->vol_out = ((0x200 - volume) & MAX_ATT_INDEX) + SLOT->tl;\r
+       } else {\r
+               switch (SLOT->state)\r
                {\r
-                       volume = MAX_ATT_INDEX;\r
-                       SLOT->state = EG_OFF;\r
+               case EG_ATT:            /* attack phase */\r
+                       volume += ( ~volume * eg_inc_val ) >> 4;\r
+                       if ( volume <= MIN_ATT_INDEX )\r
+                       {\r
+                               volume = MIN_ATT_INDEX;\r
+                               SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS: EG_DEC;\r
+                       }\r
+                       break;\r
+\r
+               case EG_DEC:    /* decay phase */\r
+                       volume += eg_inc_val;\r
+                       if ( volume >= (INT32) SLOT->sl )\r
+                               SLOT->state = EG_SUS;\r
+                       break;\r
+\r
+               case EG_SUS:    /* sustain phase */\r
+                       volume += eg_inc_val;\r
+                       if ( volume >= MAX_ATT_INDEX )\r
+                       {\r
+                               volume = MAX_ATT_INDEX;\r
+                               /* do not change SLOT->state (verified on real chip) */\r
+                       }\r
+                       break;\r
+\r
+               case EG_REL:    /* release phase */\r
+                       volume += eg_inc_val;\r
+                       if ( volume >= MAX_ATT_INDEX )\r
+                       {\r
+                               volume = MAX_ATT_INDEX;\r
+                               SLOT->state = EG_OFF;\r
+                       }\r
+                       break;\r
                }\r
-               break;\r
-       }\r
 \r
+               SLOT->vol_out = volume + SLOT->tl;\r
+       }\r
        SLOT->volume = volume;\r
-       *vol_out = SLOT->tl + volume; /* tl is 7bit<<3, volume 0-1023 (0-2039 total) */\r
+}\r
+\r
+static INLINE UINT32 update_ssg_eg_phase(FM_SLOT *SLOT, UINT32 phase)\r
+{\r
+       if (SLOT->ssg&0x01) {\r
+               if (SLOT->ssg&0x02) {\r
+                       SLOT->ssg ^= SLOT->ssgn ^ 4;\r
+                       SLOT->ssgn = 4;\r
+               }\r
+\r
+               if (SLOT->state != EG_ATT && !(SLOT->ssg&0x04))\r
+                       SLOT->volume  = MAX_ATT_INDEX;\r
+       } else {\r
+               if (SLOT->ssg&0x02) {\r
+                       SLOT->ssg ^= 4;\r
+                       SLOT->ssgn ^= 4;\r
+               } else\r
+                       phase = 0;\r
+\r
+               if (SLOT->state != EG_ATT) {\r
+                       SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC;\r
+                       if (SLOT->ar_ksr < 32+62) {\r
+                               if (SLOT->volume > MIN_ATT_INDEX) SLOT->state = EG_ATT;\r
+                       } else {\r
+                               SLOT->volume = MIN_ATT_INDEX;\r
+                       }\r
+               }\r
+       }\r
+       recalc_volout(SLOT);\r
+       return phase;\r
 }\r
 #endif\r
 \r
@@ -800,7 +894,8 @@ typedef struct
        UINT16 vol_out2;\r
        UINT16 vol_out3;\r
        UINT16 vol_out4;\r
-       UINT32 pad[2];\r
+       UINT32 lfo_init_sft16;\r
+       UINT32 pad;\r
        UINT32 phase1;   /* 10 */\r
        UINT32 phase2;\r
        UINT32 phase3;\r
@@ -817,7 +912,7 @@ typedef struct
        UINT32 eg_timer;\r
        UINT32 eg_timer_add;\r
        UINT32 pack;     // 4c: stereo, lastchan, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16]\r
-       UINT32 algo;     /* 50: algo[3], was_update */\r
+       UINT32 algo;     /* 50: algo[3], was_update, unsued, upd_cnt[2], dac */\r
        INT32  op1_out;\r
 #ifdef _MIPS_ARCH_ALLEGREX\r
        UINT32 pad1[3+8];\r
@@ -826,7 +921,209 @@ typedef struct
 \r
 \r
 #if !defined(_ASM_YM2612_C) || defined(EXTERNAL_YM2612)\r
-static void chan_render_loop(chan_rend_context *ct, int *buffer, int length)\r
+#include <limits.h>\r
+static int clip(int n) \r
+{\r
+       unsigned b = 14, s = n < 0;\r
+       int m = s + INT_MAX;\r
+       if (s + (n>>(b-1))) n = m >> (8*sizeof(int)-b);\r
+       return n;\r
+}\r
+\r
+static void update_ssg_eg_channel(chan_rend_context *ct)\r
+{\r
+       FM_SLOT *SLOT;\r
+\r
+       SLOT = &ct->CH->SLOT[SLOT1];\r
+       if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200)\r
+               ct->phase1 = update_ssg_eg_phase(SLOT, ct->phase1);\r
+       SLOT = &ct->CH->SLOT[SLOT2];\r
+       if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200)\r
+               ct->phase2 = update_ssg_eg_phase(SLOT, ct->phase2);\r
+       SLOT = &ct->CH->SLOT[SLOT3];\r
+       if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200)\r
+               ct->phase3 = update_ssg_eg_phase(SLOT, ct->phase3);\r
+       SLOT = &ct->CH->SLOT[SLOT4];\r
+       if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200)\r
+               ct->phase4 = update_ssg_eg_phase(SLOT, ct->phase4);\r
+}\r
+\r
+static void update_eg_phase_channel(chan_rend_context *ct)\r
+{\r
+       FM_SLOT *SLOT;\r
+\r
+       SLOT = &ct->CH->SLOT[SLOT1];\r
+       if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2);\r
+       SLOT = &ct->CH->SLOT[SLOT2];\r
+       if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2);\r
+       SLOT = &ct->CH->SLOT[SLOT3];\r
+       if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2);\r
+       SLOT = &ct->CH->SLOT[SLOT4];\r
+       if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2);\r
+}\r
+\r
+static int update_algo_channel(chan_rend_context *ct, unsigned int eg_out, unsigned int eg_out2, unsigned int eg_out4)\r
+{\r
+       int m2,c1,c2=0; /* Phase Modulation input for operators 2,3,4 */\r
+       int smp = 0;\r
+\r
+       switch( ct->algo&0x7 )\r
+       {\r
+               case 0:\r
+               {\r
+                       /* M1---C1---MEM---M2---C2---OUT */\r
+                       m2 = ct->mem;\r
+                       c1 = ct->op1_out>>16;\r
+                       if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
+                               ct->mem = op_calc(ct->phase2, eg_out2, c1);\r
+                       }\r
+                       else ct->mem = 0;\r
+\r
+                       if( eg_out  < ENV_QUIET ) {             /* SLOT 3 */\r
+                               c2  = op_calc(ct->phase3, eg_out,  m2);\r
+                       }\r
+                       if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
+                               smp = op_calc(ct->phase4, eg_out4, c2);\r
+                       }\r
+                       break;\r
+               }\r
+               case 1:\r
+               {\r
+                       /* M1------+-MEM---M2---C2---OUT */\r
+                       /*      C1-+                     */\r
+                       m2 = ct->mem;\r
+                       ct->mem = ct->op1_out>>16;\r
+                       if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
+                               ct->mem+= op_calc(ct->phase2, eg_out2, 0);\r
+                       }\r
+\r
+                       if( eg_out  < ENV_QUIET ) {             /* SLOT 3 */\r
+                               c2  = op_calc(ct->phase3, eg_out,  m2);\r
+                       }\r
+                       if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
+                               smp = op_calc(ct->phase4, eg_out4, c2);\r
+                       }\r
+                       break;\r
+               }\r
+               case 2:\r
+               {\r
+                       /* M1-----------------+-C2---OUT */\r
+                       /*      C1---MEM---M2-+          */\r
+                       m2 = ct->mem;\r
+                       c2 = ct->op1_out>>16;\r
+                       if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
+                               ct->mem = op_calc(ct->phase2, eg_out2, 0);\r
+                       }\r
+                       else ct->mem = 0;\r
+\r
+                       if( eg_out  < ENV_QUIET ) {             /* SLOT 3 */\r
+                               c2 += op_calc(ct->phase3, eg_out,  m2);\r
+                       }\r
+                       if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
+                               smp = op_calc(ct->phase4, eg_out4, c2);\r
+                       }\r
+                       break;\r
+               }\r
+               case 3:\r
+               {\r
+                       /* M1---C1---MEM------+-C2---OUT */\r
+                       /*                 M2-+          */\r
+                       c2 = ct->mem;\r
+                       c1 = ct->op1_out>>16;\r
+                       if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
+                               ct->mem = op_calc(ct->phase2, eg_out2, c1);\r
+                       }\r
+                       else ct->mem = 0;\r
+\r
+                       if( eg_out  < ENV_QUIET ) {             /* SLOT 3 */\r
+                               c2 += op_calc(ct->phase3, eg_out,  0);\r
+                       }\r
+                       if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
+                               smp = op_calc(ct->phase4, eg_out4, c2);\r
+                       }\r
+                       break;\r
+               }\r
+               case 4:\r
+               {\r
+                       /* M1---C1-+-OUT */\r
+                       /* M2---C2-+     */\r
+                       /* MEM: not used */\r
+\r
+                       c1 = ct->op1_out>>16;\r
+                       if( eg_out  < ENV_QUIET ) {             /* SLOT 3 */\r
+                               c2  = op_calc(ct->phase3, eg_out,  0);\r
+                       }\r
+                       if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
+                               smp = op_calc(ct->phase2, eg_out2, c1);\r
+                       }\r
+                       if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
+                               smp+= op_calc(ct->phase4, eg_out4, c2);\r
+                       }\r
+                       break;\r
+               }\r
+               case 5:\r
+               {\r
+                       /*    +----C1----+     */\r
+                       /* M1-+-MEM---M2-+-OUT */\r
+                       /*    +----C2----+     */\r
+                       m2 = ct->mem;\r
+                       ct->mem = c1 = c2 = ct->op1_out>>16;\r
+\r
+                       if( eg_out < ENV_QUIET ) {              /* SLOT 3 */\r
+                               smp = op_calc(ct->phase3, eg_out, m2);\r
+                       }\r
+                       if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
+                               smp+= op_calc(ct->phase2, eg_out2, c1);\r
+                       }\r
+                       if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
+                               smp+= op_calc(ct->phase4, eg_out4, c2);\r
+                       }\r
+                       break;\r
+               }\r
+               case 6:\r
+               {\r
+                       /* M1---C1-+     */\r
+                       /*      M2-+-OUT */\r
+                       /*      C2-+     */\r
+                       /* MEM: not used */\r
+\r
+                       c1 = ct->op1_out>>16;\r
+                       if( eg_out < ENV_QUIET ) {              /* SLOT 3 */\r
+                               smp = op_calc(ct->phase3, eg_out,  0);\r
+                       }\r
+                       if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
+                               smp+= op_calc(ct->phase2, eg_out2, c1);\r
+                       }\r
+                       if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
+                               smp+= op_calc(ct->phase4, eg_out4, 0);\r
+                       }\r
+                       break;\r
+               }\r
+               case 7:\r
+               {\r
+                       /* M1-+     */\r
+                       /* C1-+-OUT */\r
+                       /* M2-+     */\r
+                       /* C2-+     */\r
+                       /* MEM: not used*/\r
+\r
+                       smp = ct->op1_out>>16;\r
+                       if( eg_out < ENV_QUIET ) {              /* SLOT 3 */\r
+                               smp += op_calc(ct->phase3, eg_out,  0);\r
+                       }\r
+                       if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
+                               smp += op_calc(ct->phase2, eg_out2, 0);\r
+                       }\r
+                       if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
+                               smp += op_calc(ct->phase4, eg_out4, 0);\r
+                       }\r
+                       break;\r
+               }\r
+       }\r
+       return smp;\r
+}\r
+\r
+static void chan_render_loop(chan_rend_context *ct, s32 *buffer, int length)\r
 {\r
        int scounter;                                   /* sample counter */\r
 \r
@@ -836,34 +1133,47 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length)
                int smp = 0;            /* produced sample */\r
                unsigned int eg_out, eg_out2, eg_out4;\r
 \r
-               if (ct->pack & 8) { /* LFO enabled ? (test Earthworm Jim in between demo 1 and 2) */\r
-                       ct->pack = (ct->pack&0xffff) | (advance_lfo(ct->pack >> 16, ct->lfo_cnt, ct->lfo_cnt + ct->lfo_inc) << 16);\r
-                       ct->lfo_cnt += ct->lfo_inc;\r
-               }\r
-\r
                ct->eg_timer += ct->eg_timer_add;\r
-               while (ct->eg_timer >= EG_TIMER_OVERFLOW)\r
-               {\r
-                       ct->eg_timer -= EG_TIMER_OVERFLOW;\r
-                       ct->eg_cnt++;\r
 \r
-                       if (ct->CH->SLOT[SLOT1].state != EG_OFF) update_eg_phase(&ct->vol_out1, &ct->CH->SLOT[SLOT1], ct->eg_cnt);\r
-                       if (ct->CH->SLOT[SLOT2].state != EG_OFF) update_eg_phase(&ct->vol_out2, &ct->CH->SLOT[SLOT2], ct->eg_cnt);\r
-                       if (ct->CH->SLOT[SLOT3].state != EG_OFF) update_eg_phase(&ct->vol_out3, &ct->CH->SLOT[SLOT3], ct->eg_cnt);\r
-                       if (ct->CH->SLOT[SLOT4].state != EG_OFF) update_eg_phase(&ct->vol_out4, &ct->CH->SLOT[SLOT4], ct->eg_cnt);\r
+               while (ct->eg_timer >= 1<<EG_SH) {\r
+                       ct->eg_timer -= 1<<EG_SH;\r
+\r
+                       if (ct->pack & 2)\r
+                               update_ssg_eg_channel(ct);\r
+\r
+                       if (ct->algo & 0x30)\r
+                               ct->algo -= 0x10;\r
+                       if (!(ct->algo & 0x30)) {\r
+                               ct->algo |= 0x30;\r
+                               ct->eg_cnt++;\r
+                               if (ct->eg_cnt >= 4096) ct->eg_cnt = 1;\r
+\r
+                               update_eg_phase_channel(ct);\r
+                       }\r
                }\r
 \r
-               if (ct->pack & 4) continue; /* output disabled */\r
+               ct->vol_out1 =  ct->CH->SLOT[SLOT1].vol_out;\r
+               ct->vol_out2 =  ct->CH->SLOT[SLOT2].vol_out;\r
+               ct->vol_out3 =  ct->CH->SLOT[SLOT3].vol_out;\r
+               ct->vol_out4 =  ct->CH->SLOT[SLOT4].vol_out;\r
+\r
+               if (ct->pack & 4) goto disabled; /* output disabled */\r
+\r
+               if (ct->pack & 8) { /* LFO enabled ? (test Earthworm Jim in between demo 1 and 2) */\r
+                       ct->pack = (ct->pack&0xffff) | (advance_lfo(ct->pack >> 16, ct->lfo_cnt, ct->lfo_cnt + ct->lfo_inc) << 16);\r
+                       ct->lfo_cnt += ct->lfo_inc;\r
+               }\r
 \r
                /* calculate channel sample */\r
                eg_out = ct->vol_out1;\r
-               if ( (ct->pack & 8) && (ct->pack&(1<<(SLOT1+8))) ) eg_out += ct->pack >> (((ct->pack&0xc0)>>6)+24);\r
+               if ( (ct->pack & 8) && (ct->pack&(1<<(SLOT1+8))) )\r
+                       eg_out += ct->pack >> (((ct->pack&0xc0)>>6)+24);\r
 \r
                if( eg_out < ENV_QUIET )        /* SLOT 1 */\r
                {\r
                        int out = 0;\r
 \r
-                       if (ct->pack&0xf000) out = ((ct->op1_out>>16) + ((ct->op1_out<<16)>>16)) << ((ct->pack&0xf000)>>12); /* op1_out0 + op1_out1 */\r
+                       if (ct->pack&0xf000) out = ((ct->op1_out + (ct->op1_out<<16))>>16) << ((ct->pack&0xf000)>>12); /* op1_out0 + op1_out1 */\r
                        ct->op1_out <<= 16;\r
                        ct->op1_out |= (unsigned short)op_calc1(ct->phase1, eg_out, out);\r
                } else {\r
@@ -881,162 +1191,22 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length)
                        if (ct->pack & (1<<(SLOT4+8))) eg_out4 += add;\r
                }\r
 \r
-               switch( ct->CH->ALGO )\r
-               {\r
-                       case 0:\r
-                       {\r
-                               /* M1---C1---MEM---M2---C2---OUT */\r
-                               int m2,c1,c2=0; /* Phase Modulation input for operators 2,3,4 */\r
-                               m2 = ct->mem;\r
-                               c1 = ct->op1_out>>16;\r
-                               if( eg_out  < ENV_QUIET ) {             /* SLOT 3 */\r
-                                       c2  = op_calc(ct->phase3, eg_out,  m2);\r
-                               }\r
-                               if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
-                                       ct->mem = op_calc(ct->phase2, eg_out2, c1);\r
-                               }\r
-                               else ct->mem = 0;\r
-                               if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
-                                       smp = op_calc(ct->phase4, eg_out4, c2);\r
-                               }\r
-                               break;\r
-                       }\r
-                       case 1:\r
-                       {\r
-                               /* M1------+-MEM---M2---C2---OUT */\r
-                               /*      C1-+                     */\r
-                               int m2,c2=0;\r
-                               m2 = ct->mem;\r
-                               ct->mem = ct->op1_out>>16;\r
-                               if( eg_out  < ENV_QUIET ) {             /* SLOT 3 */\r
-                                       c2  = op_calc(ct->phase3, eg_out,  m2);\r
-                               }\r
-                               if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
-                                       ct->mem+= op_calc(ct->phase2, eg_out2, 0);\r
-                               }\r
-                               if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
-                                       smp = op_calc(ct->phase4, eg_out4, c2);\r
-                               }\r
-                               break;\r
-                       }\r
-                       case 2:\r
-                       {\r
-                               /* M1-----------------+-C2---OUT */\r
-                               /*      C1---MEM---M2-+          */\r
-                               int m2,c2;\r
-                               m2 = ct->mem;\r
-                               c2 = ct->op1_out>>16;\r
-                               if( eg_out  < ENV_QUIET ) {             /* SLOT 3 */\r
-                                       c2 += op_calc(ct->phase3, eg_out,  m2);\r
-                               }\r
-                               if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
-                                       ct->mem = op_calc(ct->phase2, eg_out2, 0);\r
-                               }\r
-                               else ct->mem = 0;\r
-                               if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
-                                       smp = op_calc(ct->phase4, eg_out4, c2);\r
-                               }\r
-                               break;\r
-                       }\r
-                       case 3:\r
-                       {\r
-                               /* M1---C1---MEM------+-C2---OUT */\r
-                               /*                 M2-+          */\r
-                               int c1,c2;\r
-                               c2 = ct->mem;\r
-                               c1 = ct->op1_out>>16;\r
-                               if( eg_out  < ENV_QUIET ) {             /* SLOT 3 */\r
-                                       c2 += op_calc(ct->phase3, eg_out,  0);\r
-                               }\r
-                               if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
-                                       ct->mem = op_calc(ct->phase2, eg_out2, c1);\r
-                               }\r
-                               else ct->mem = 0;\r
-                               if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
-                                       smp = op_calc(ct->phase4, eg_out4, c2);\r
-                               }\r
-                               break;\r
-                       }\r
-                       case 4:\r
-                       {\r
-                               /* M1---C1-+-OUT */\r
-                               /* M2---C2-+     */\r
-                               /* MEM: not used */\r
-                               int c1,c2=0;\r
-                               c1 = ct->op1_out>>16;\r
-                               if( eg_out  < ENV_QUIET ) {             /* SLOT 3 */\r
-                                       c2  = op_calc(ct->phase3, eg_out,  0);\r
-                               }\r
-                               if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
-                                       smp = op_calc(ct->phase2, eg_out2, c1);\r
-                               }\r
-                               if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
-                                       smp+= op_calc(ct->phase4, eg_out4, c2);\r
-                               }\r
-                               break;\r
-                       }\r
-                       case 5:\r
-                       {\r
-                               /*    +----C1----+     */\r
-                               /* M1-+-MEM---M2-+-OUT */\r
-                               /*    +----C2----+     */\r
-                               int m2,c1,c2;\r
-                               m2 = ct->mem;\r
-                               ct->mem = c1 = c2 = ct->op1_out>>16;\r
-                               if( eg_out < ENV_QUIET ) {              /* SLOT 3 */\r
-                                       smp = op_calc(ct->phase3, eg_out, m2);\r
-                               }\r
-                               if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
-                                       smp+= op_calc(ct->phase2, eg_out2, c1);\r
-                               }\r
-                               if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
-                                       smp+= op_calc(ct->phase4, eg_out4, c2);\r
-                               }\r
-                               break;\r
-                       }\r
-                       case 6:\r
-                       {\r
-                               /* M1---C1-+     */\r
-                               /*      M2-+-OUT */\r
-                               /*      C2-+     */\r
-                               /* MEM: not used */\r
-                               int c1;\r
-                               c1 = ct->op1_out>>16;\r
-                               if( eg_out < ENV_QUIET ) {              /* SLOT 3 */\r
-                                       smp = op_calc(ct->phase3, eg_out,  0);\r
-                               }\r
-                               if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
-                                       smp+= op_calc(ct->phase2, eg_out2, c1);\r
-                               }\r
-                               if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
-                                       smp+= op_calc(ct->phase4, eg_out4, 0);\r
-                               }\r
-                               break;\r
-                       }\r
-                       case 7:\r
-                       {\r
-                               /* M1-+     */\r
-                               /* C1-+-OUT */\r
-                               /* M2-+     */\r
-                               /* C2-+     */\r
-                               /* MEM: not used*/\r
-                               smp = ct->op1_out>>16;\r
-                               if( eg_out < ENV_QUIET ) {              /* SLOT 3 */\r
-                                       smp += op_calc(ct->phase3, eg_out,  0);\r
-                               }\r
-                               if( eg_out2 < ENV_QUIET ) {             /* SLOT 2 */\r
-                                       smp += op_calc(ct->phase2, eg_out2, 0);\r
-                               }\r
-                               if( eg_out4 < ENV_QUIET ) {             /* SLOT 4 */\r
-                                       smp += op_calc(ct->phase4, eg_out4, 0);\r
-                               }\r
-                               break;\r
-                       }\r
-               }\r
+               smp = update_algo_channel(ct, eg_out, eg_out2, eg_out4);\r
                /* done calculating channel sample */\r
+disabled:\r
+               /* update phase counters AFTER output calculations */\r
+               ct->phase1 += ct->incr1;\r
+               ct->phase2 += ct->incr2;\r
+               ct->phase3 += ct->incr3;\r
+               ct->phase4 += ct->incr4;\r
 \r
                /* mix sample to output buffer */\r
                if (smp) {\r
+                       smp = clip(smp); /* saturate to 14 bit */\r
+                       if (ct->algo & 0x80) {\r
+                               smp &= ~0x1f; /* drop bits (DAC has 9 bits) */\r
+                               smp -= (smp < 0 ? 7:0) << 5; /* discontinuity */\r
+                       }\r
                        if (ct->pack & 1) { /* stereo */\r
                                if (ct->pack & 0x20) /* L */ /* TODO: check correctness */\r
                                        buffer[scounter*2] += smp;\r
@@ -1047,16 +1217,10 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length)
                        }\r
                        ct->algo |= 8;\r
                }\r
-\r
-               /* update phase counters AFTER output calculations */\r
-               ct->phase1 += ct->incr1;\r
-               ct->phase2 += ct->incr2;\r
-               ct->phase3 += ct->incr3;\r
-               ct->phase4 += ct->incr4;\r
        }\r
 }\r
 #else\r
-void chan_render_loop(chan_rend_context *ct, int *buffer, unsigned short length);\r
+void chan_render_loop(chan_rend_context *ct, s32 *buffer, unsigned short length);\r
 #endif\r
 \r
 static chan_rend_context crct;\r
@@ -1064,28 +1228,55 @@ static chan_rend_context crct;
 static void chan_render_prep(void)\r
 {\r
        crct.eg_timer_add = ym2612.OPN.eg_timer_add;\r
+       crct.lfo_init_sft16 = g_lfo_ampm << 16;\r
        crct.lfo_inc = ym2612.OPN.lfo_inc;\r
 }\r
 \r
-static void chan_render_finish(void)\r
+static void chan_render_finish(s32 *buffer, int length, int active_chans)\r
 {\r
        ym2612.OPN.eg_cnt = crct.eg_cnt;\r
        ym2612.OPN.eg_timer = crct.eg_timer;\r
-       g_lfo_ampm = crct.pack >> 16; // need_save\r
-       ym2612.OPN.lfo_cnt = crct.lfo_cnt;\r
+       ym2612.OPN.lfo_cnt += ym2612.OPN.lfo_inc * length;\r
 }\r
 \r
-static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: stereo, ?, disabled, ?, pan_r, pan_l\r
+static UINT32 update_lfo_phase(const FM_SLOT *SLOT, UINT32 block_fnum)\r
+{\r
+       UINT32 fnum_lfo;\r
+       INT32  lfo_fn_table_index_offset;\r
+       UINT8  blk;\r
+       UINT32 fn;\r
+       int fc,fdt;\r
+\r
+       fnum_lfo   = ((block_fnum & 0x7f0) >> 4) * 32 * 8;\r
+       lfo_fn_table_index_offset = lfo_pm_table[ fnum_lfo + crct.CH->pms + ((crct.pack>>16)&0xff) ];\r
+       if (lfo_fn_table_index_offset)  /* LFO phase modulation active */\r
+       {\r
+               block_fnum = block_fnum*2 + lfo_fn_table_index_offset;\r
+               blk = (block_fnum&0x7000) >> 12;\r
+               fn  = block_fnum & 0xfff;\r
+\r
+               /* phase increment counter */\r
+               fc = (fn_table[fn]>>(7-blk));\r
+\r
+               fdt = fc + SLOT->DT[crct.CH->kcode];\r
+               if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2;\r
+\r
+               return (fdt * SLOT->mul) >> 1;\r
+       } else\r
+               return SLOT->Incr;\r
+}\r
+\r
+static int chan_render(s32 *buffer, int length, int c, UINT32 flags) // flags: stereo, ?, disabled, ?, pan_r, pan_l\r
 {\r
        crct.CH = &ym2612.CH[c];\r
        crct.mem = crct.CH->mem_value;          /* one sample delay memory */\r
        crct.lfo_cnt = ym2612.OPN.lfo_cnt;\r
 \r
-       flags &= 0x35;\r
+       flags &= 0x37;\r
 \r
        if (crct.lfo_inc) {\r
                flags |= 8;\r
-               flags |= g_lfo_ampm << 16;\r
+               flags |= crct.lfo_init_sft16;\r
                flags |= crct.CH->AMmasks << 8;\r
                if (crct.CH->ams == 8) // no ams\r
                     flags &= ~0xf00;\r
@@ -1103,51 +1294,25 @@ static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: s
        crct.phase3 = crct.CH->SLOT[SLOT3].phase;\r
        crct.phase4 = crct.CH->SLOT[SLOT4].phase;\r
 \r
-       /* current output from EG circuit (without AM from LFO) */\r
-       crct.vol_out1 = crct.CH->SLOT[SLOT1].tl + ((UINT32)crct.CH->SLOT[SLOT1].volume);\r
-       crct.vol_out2 = crct.CH->SLOT[SLOT2].tl + ((UINT32)crct.CH->SLOT[SLOT2].volume);\r
-       crct.vol_out3 = crct.CH->SLOT[SLOT3].tl + ((UINT32)crct.CH->SLOT[SLOT3].volume);\r
-       crct.vol_out4 = crct.CH->SLOT[SLOT4].tl + ((UINT32)crct.CH->SLOT[SLOT4].volume);\r
-\r
        crct.op1_out = crct.CH->op1_out;\r
        crct.algo = crct.CH->ALGO & 7;\r
-\r
-       if(crct.CH->pms)\r
+       crct.algo |= crct.CH->upd_cnt << 4;\r
+       if (ym2612.OPN.ST.flags & ST_DAC)\r
+               crct.algo |= 0x80;\r
+\r
+       if(crct.CH->pms && (ym2612.OPN.ST.mode & 0xC0) && c == 2) {\r
+               /* 3 slot mode */\r
+               crct.incr1 = update_lfo_phase(&crct.CH->SLOT[SLOT1], ym2612.OPN.SL3.block_fnum[1]);\r
+               crct.incr2 = update_lfo_phase(&crct.CH->SLOT[SLOT2], ym2612.OPN.SL3.block_fnum[2]);\r
+               crct.incr3 = update_lfo_phase(&crct.CH->SLOT[SLOT3], ym2612.OPN.SL3.block_fnum[0]);\r
+               crct.incr4 = update_lfo_phase(&crct.CH->SLOT[SLOT4], crct.CH->block_fnum);\r
+       }\r
+       else if(crct.CH->pms)\r
        {\r
-               /* add support for 3 slot mode */\r
-               UINT32 block_fnum = crct.CH->block_fnum;\r
-\r
-               UINT32 fnum_lfo   = ((block_fnum & 0x7f0) >> 4) * 32 * 8;\r
-               INT32  lfo_fn_table_index_offset = lfo_pm_table[ fnum_lfo + crct.CH->pms + ((crct.pack>>16)&0xff) ];\r
-\r
-               if (lfo_fn_table_index_offset)  /* LFO phase modulation active */\r
-               {\r
-                       UINT8  blk;\r
-                       UINT32 fn;\r
-                       int kc,fc;\r
-\r
-                       blk = block_fnum >> 11;\r
-                       block_fnum = block_fnum*2 + lfo_fn_table_index_offset;\r
-\r
-                       fn  = block_fnum & 0xfff;\r
-\r
-                       /* keyscale code */\r
-                       kc = (blk<<2) | opn_fktable[fn >> 8];\r
-                       /* phase increment counter */\r
-                       fc = fn_table[fn]>>(7-blk);\r
-\r
-                       crct.incr1 = ((fc+crct.CH->SLOT[SLOT1].DT[kc])*crct.CH->SLOT[SLOT1].mul) >> 1;\r
-                       crct.incr2 = ((fc+crct.CH->SLOT[SLOT2].DT[kc])*crct.CH->SLOT[SLOT2].mul) >> 1;\r
-                       crct.incr3 = ((fc+crct.CH->SLOT[SLOT3].DT[kc])*crct.CH->SLOT[SLOT3].mul) >> 1;\r
-                       crct.incr4 = ((fc+crct.CH->SLOT[SLOT4].DT[kc])*crct.CH->SLOT[SLOT4].mul) >> 1;\r
-               }\r
-               else    /* LFO phase modulation  = zero */\r
-               {\r
-                       crct.incr1 = crct.CH->SLOT[SLOT1].Incr;\r
-                       crct.incr2 = crct.CH->SLOT[SLOT2].Incr;\r
-                       crct.incr3 = crct.CH->SLOT[SLOT3].Incr;\r
-                       crct.incr4 = crct.CH->SLOT[SLOT4].Incr;\r
-               }\r
+               crct.incr1 = update_lfo_phase(&crct.CH->SLOT[SLOT1], crct.CH->block_fnum);\r
+               crct.incr2 = update_lfo_phase(&crct.CH->SLOT[SLOT2], crct.CH->block_fnum);\r
+               crct.incr3 = update_lfo_phase(&crct.CH->SLOT[SLOT3], crct.CH->block_fnum);\r
+               crct.incr4 = update_lfo_phase(&crct.CH->SLOT[SLOT4], crct.CH->block_fnum);\r
        }\r
        else    /* no LFO phase modulation */\r
        {\r
@@ -1170,12 +1335,13 @@ static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: s
        }\r
        else\r
                ym2612.slot_mask &= ~(0xf << (c*4));\r
+       crct.CH->upd_cnt = (crct.algo >> 4) & 0x7;\r
 \r
        return (crct.algo & 8) >> 3; // had output\r
 }\r
 \r
 /* update phase increment and envelope generator */\r
-INLINE void refresh_fc_eg_slot(FM_SLOT *SLOT, int fc, int kc)\r
+static INLINE void refresh_fc_eg_slot(FM_SLOT *SLOT, int fc, int kc)\r
 {\r
        int ksr, fdt;\r
 \r
@@ -1191,17 +1357,18 @@ INLINE void refresh_fc_eg_slot(FM_SLOT *SLOT, int fc, int kc)
        {\r
                int eg_sh, eg_sel;\r
                SLOT->ksr = ksr;\r
+               SLOT->ar_ksr = SLOT->ar + ksr;\r
 \r
                /* calculate envelope generator rates */\r
-               if ((SLOT->ar + ksr) < 32+62)\r
+               if ((SLOT->ar_ksr) < 32+62)\r
                {\r
-                       eg_sh  = eg_rate_shift [SLOT->ar  + ksr ];\r
-                       eg_sel = eg_rate_select[SLOT->ar  + ksr ];\r
+                       eg_sh  = eg_rate_shift [SLOT->ar_ksr];\r
+                       eg_sel = eg_rate_select[SLOT->ar_ksr];\r
                }\r
                else\r
                {\r
                        eg_sh  = 0;\r
-                       eg_sel = 17;\r
+                       eg_sel = 18;\r
                }\r
 \r
                SLOT->eg_pack_ar = eg_inc_pack[eg_sel] | (eg_sh<<24);\r
@@ -1224,7 +1391,7 @@ INLINE void refresh_fc_eg_slot(FM_SLOT *SLOT, int fc, int kc)
 }\r
 \r
 /* update phase increment counters */\r
-INLINE void refresh_fc_eg_chan(FM_CH *CH)\r
+static INLINE void refresh_fc_eg_chan(FM_CH *CH)\r
 {\r
        if( CH->SLOT[SLOT1].Incr==-1){\r
                int fc = CH->fc;\r
@@ -1236,7 +1403,7 @@ INLINE void refresh_fc_eg_chan(FM_CH *CH)
        }\r
 }\r
 \r
-INLINE void refresh_fc_eg_chan_sl3(void)\r
+static INLINE void refresh_fc_eg_chan_sl3(void)\r
 {\r
        if( ym2612.CH[2].SLOT[SLOT1].Incr==-1)\r
        {\r
@@ -1256,7 +1423,7 @@ static void init_timetables(const UINT8 *dttable)
        /* DeTune table */\r
        for (d = 0;d <= 3;d++){\r
                for (i = 0;i <= 31;i++){\r
-                       rate = ((double)dttable[d*32 + i]) * SIN_LEN  * ym2612.OPN.ST.freqbase  * (1<<FREQ_SH) / ((double)(1<<20));\r
+                       rate = ((double)dttable[d*32 + i]) * ym2612.OPN.ST.freqbase  * (1<<(FREQ_SH-10));\r
                        ym2612.OPN.ST.dt_tab[d][i]   = (INT32) rate;\r
                        ym2612.OPN.ST.dt_tab[d+4][i] = -ym2612.OPN.ST.dt_tab[d][i];\r
                }\r
@@ -1279,12 +1446,20 @@ static void reset_channels(FM_CH *CH)
                CH[c].fc = 0;\r
                for(s = 0 ; s < 4 ; s++ )\r
                {\r
+                       CH[c].SLOT[s].Incr = -1;\r
+                       CH[c].SLOT[s].key = 0;\r
+                       CH[c].SLOT[s].phase = 0;\r
+                       CH[c].SLOT[s].ar = CH[c].SLOT[s].ksr = 0;\r
+                       CH[c].SLOT[s].ar_ksr = 0;\r
+                       CH[c].SLOT[s].ssg = CH[c].SLOT[s].ssgn = 0;\r
                        CH[c].SLOT[s].state= EG_OFF;\r
                        CH[c].SLOT[s].volume = MAX_ATT_INDEX;\r
+                       CH[c].SLOT[s].vol_out = MAX_ATT_INDEX;\r
                }\r
                CH[c].mem_value = CH[c].op1_out = 0;\r
        }\r
        ym2612.slot_mask = 0;\r
+       ym2612.ssg_mask = 0;\r
 }\r
 \r
 /* initialize generic tables */\r
@@ -1294,6 +1469,9 @@ static void init_tables(void)
        signed int n;\r
        double o,m;\r
 \r
+       if (ym_init_tab) return;\r
+       ym_init_tab = 1;\r
+\r
        for (i=0; i < 256; i++)\r
        {\r
                /* non-standard sinus */\r
@@ -1413,9 +1591,10 @@ static void OPNSetPres(int pres)
        int i;\r
 \r
        /* frequency base */\r
-       ym2612.OPN.ST.freqbase = (ym2612.OPN.ST.rate) ? ((double)ym2612.OPN.ST.clock / ym2612.OPN.ST.rate) / pres : 0;\r
+       double freqbase = (ym2612.OPN.ST.rate) ? ((double)ym2612.OPN.ST.clock / ym2612.OPN.ST.rate) / pres : 0;\r
 \r
-       ym2612.OPN.eg_timer_add  = (1<<EG_SH) * ym2612.OPN.ST.freqbase;\r
+       ym2612.OPN.eg_timer_add  = (1<<EG_SH) * freqbase;\r
+       ym2612.OPN.ST.freqbase = freqbase;\r
 \r
        /* make time tables */\r
        init_timetables( dt_tab );\r
@@ -1485,16 +1664,20 @@ static int OPNWriteReg(int r, int v)
                break;\r
 \r
        case 0x90:      /* SSG-EG */\r
-               // removed.\r
-               ret = 0;\r
+               SLOT->ssg =  v&0x0f;\r
+               SLOT->ssg ^= SLOT->ssgn;\r
+               if (v&0x08) ym2612.ssg_mask |=   1<<(OPN_SLOT(r) + c*4);\r
+               else        ym2612.ssg_mask &= ~(1<<(OPN_SLOT(r) + c*4));\r
+               if (SLOT->state > EG_REL)\r
+                       recalc_volout(SLOT);\r
                break;\r
 \r
        case 0xa0:\r
                switch( OPN_SLOT(r) ){\r
                case 0:         /* 0xa0-0xa2 : FNUM1 | depends on fn_h (below) */\r
                        {\r
-                               UINT32 fn = (((UINT32)( (CH->fn_h)&7))<<8) + v;\r
-                               UINT8 blk = CH->fn_h>>3;\r
+                               UINT32 fn = ((UINT32)(ym2612.OPN.ST.fn_h & 7) << 8) | v;\r
+                               UINT8 blk = ym2612.OPN.ST.fn_h >> 3;\r
                                /* keyscale code */\r
                                CH->kcode = (blk<<2) | opn_fktable[fn >> 7];\r
                                /* phase increment counter */\r
@@ -1507,7 +1690,7 @@ static int OPNWriteReg(int r, int v)
                        }\r
                        break;\r
                case 1:         /* 0xa4-0xa6 : FNUM2,BLK */\r
-                       CH->fn_h = v&0x3f;\r
+                       ym2612.OPN.ST.fn_h = v & 0x3f;\r
                        ret = 0;\r
                        break;\r
                case 2:         /* 0xa8-0xaa : 3CH FNUM1 */\r
@@ -1577,10 +1760,11 @@ static int OPNWriteReg(int r, int v)
 /*******************************************************************************/\r
 \r
 /* Generate samples for YM2612 */\r
-int YM2612UpdateOne_(int *buffer, int length, int stereo, int is_buf_empty)\r
+int YM2612UpdateOne_(s32 *buffer, int length, int stereo, int is_buf_empty)\r
 {\r
        int pan;\r
        int active_chs = 0;\r
+       int flags = stereo ? 1:0;\r
 \r
        // if !is_buf_empty, it means it has valid samples to mix with, else it may contain trash\r
        if (is_buf_empty) memset32(buffer, 0, length<<stereo);\r
@@ -1616,31 +1800,40 @@ int YM2612UpdateOne_(int *buffer, int length, int stereo, int is_buf_empty)
        refresh_fc_eg_chan( &ym2612.CH[5] );\r
 \r
        pan = ym2612.OPN.pan;\r
-       if (stereo) stereo = 1;\r
 \r
        /* mix to 32bit dest */\r
-       // flags: stereo, ?, disabled, ?, pan_r, pan_l\r
+       // flags: stereo, ssg_enabled, disabled, _, pan_r, pan_l\r
        chan_render_prep();\r
-       if (ym2612.slot_mask & 0x00000f) active_chs |= chan_render(buffer, length, 0, stereo|((pan&0x003)<<4)) << 0;\r
-       if (ym2612.slot_mask & 0x0000f0) active_chs |= chan_render(buffer, length, 1, stereo|((pan&0x00c)<<2)) << 1;\r
-       if (ym2612.slot_mask & 0x000f00) active_chs |= chan_render(buffer, length, 2, stereo|((pan&0x030)   )) << 2;\r
-       if (ym2612.slot_mask & 0x00f000) active_chs |= chan_render(buffer, length, 3, stereo|((pan&0x0c0)>>2)) << 3;\r
-       if (ym2612.slot_mask & 0x0f0000) active_chs |= chan_render(buffer, length, 4, stereo|((pan&0x300)>>4)) << 4;\r
-       if (ym2612.slot_mask & 0xf00000) active_chs |= chan_render(buffer, length, 5, stereo|((pan&0xc00)>>6)|(ym2612.dacen<<2)) << 5;\r
-       chan_render_finish();\r
+#define        BIT_IF(v,b,c)   { v &= ~(1<<(b)); if (c) v |= 1<<(b); }\r
+       BIT_IF(flags, 1, (ym2612.ssg_mask & 0x00000f) && (ym2612.OPN.ST.flags & 1));\r
+       if (ym2612.slot_mask & 0x00000f) active_chs |= chan_render(buffer, length, 0, flags|((pan&0x003)<<4)) << 0;\r
+       BIT_IF(flags, 1, (ym2612.ssg_mask & 0x0000f0) && (ym2612.OPN.ST.flags & 1));\r
+       if (ym2612.slot_mask & 0x0000f0) active_chs |= chan_render(buffer, length, 1, flags|((pan&0x00c)<<2)) << 1;\r
+       BIT_IF(flags, 1, (ym2612.ssg_mask & 0x000f00) && (ym2612.OPN.ST.flags & 1));\r
+       if (ym2612.slot_mask & 0x000f00) active_chs |= chan_render(buffer, length, 2, flags|((pan&0x030)   )) << 2;\r
+       BIT_IF(flags, 1, (ym2612.ssg_mask & 0x00f000) && (ym2612.OPN.ST.flags & 1));\r
+       if (ym2612.slot_mask & 0x00f000) active_chs |= chan_render(buffer, length, 3, flags|((pan&0x0c0)>>2)) << 3;\r
+       BIT_IF(flags, 1, (ym2612.ssg_mask & 0x0f0000) && (ym2612.OPN.ST.flags & 1));\r
+       if (ym2612.slot_mask & 0x0f0000) active_chs |= chan_render(buffer, length, 4, flags|((pan&0x300)>>4)) << 4;\r
+       g_lfo_ampm = crct.pack >> 16; // need_save; now because ch5 might skip updating it\r
+       BIT_IF(flags, 1, (ym2612.ssg_mask & 0xf00000) && (ym2612.OPN.ST.flags & 1));\r
+       if (ym2612.slot_mask & 0xf00000) active_chs |= chan_render(buffer, length, 5, flags|((pan&0xc00)>>6)|(!!ym2612.dacen<<2)) << 5;\r
+#undef BIT_IF\r
+       chan_render_finish(buffer, length, active_chs);\r
 \r
        return active_chs; // 1 if buffer updated\r
 }\r
 \r
 \r
 /* initialize YM2612 emulator */\r
-void YM2612Init_(int clock, int rate)\r
+void YM2612Init_(int clock, int rate, int flags)\r
 {\r
        memset(&ym2612, 0, sizeof(ym2612));\r
        init_tables();\r
 \r
        ym2612.OPN.ST.clock = clock;\r
        ym2612.OPN.ST.rate = rate;\r
+       ym2612.OPN.ST.flags = flags;\r
 \r
        OPNSetPres( 6*24 );\r
 \r
@@ -1661,6 +1854,9 @@ void YM2612ResetChip_(void)
 \r
        ym2612.OPN.eg_timer = 0;\r
        ym2612.OPN.eg_cnt   = 0;\r
+       ym2612.OPN.lfo_inc = 0;\r
+       ym2612.OPN.lfo_cnt = 0;\r
+       g_lfo_ampm = 126 << 8;\r
        ym2612.OPN.ST.status = 0;\r
 \r
        reset_channels( &ym2612.CH[0] );\r
@@ -1679,6 +1875,7 @@ void YM2612ResetChip_(void)
        for(i = 0x26 ; i >= 0x20 ; i-- ) OPNWriteReg(i,0);\r
        /* DAC mode clear */\r
        ym2612.dacen = 0;\r
+       ym2612.dacout = 0;\r
        ym2612.addr_A1 = 0;\r
 }\r
 \r
@@ -1693,22 +1890,20 @@ int YM2612Write_(unsigned int a, unsigned int v)
 \r
        v &= 0xff;      /* adjust to 8 bit bus */\r
 \r
-       switch( a&3){\r
+       switch( a & 3 ){\r
        case 0: /* address port 0 */\r
+       case 2: /* address port 1 */\r
+               /* reminder: this is not used, see ym2612_write_local() */\r
                ym2612.OPN.ST.address = v;\r
-               ym2612.addr_A1 = 0;\r
-               ret=0;\r
+               ym2612.addr_A1 = (a & 2) >> 1;\r
+               ret = 0;\r
                break;\r
 \r
-       case 1: /* data port 0    */\r
-               if (ym2612.addr_A1 != 0) {\r
-                       ret=0;\r
-                       break;  /* verified on real YM2608 */\r
-               }\r
-\r
-               addr = ym2612.OPN.ST.address;\r
+       case 1:\r
+       case 3: /* data port */\r
+               addr = ym2612.OPN.ST.address | ((int)ym2612.addr_A1 << 8);\r
 \r
-               switch( addr & 0xf0 )\r
+               switch( addr & 0x1f0 )\r
                {\r
                case 0x20:      /* 0x20-0x2f Mode */\r
                        switch( addr )\r
@@ -1721,6 +1916,8 @@ int YM2612Write_(unsigned int a, unsigned int v)
                                else\r
                                {\r
                                        ym2612.OPN.lfo_inc = 0;\r
+                                       ym2612.OPN.lfo_cnt = 0;\r
+                                       g_lfo_ampm = 126 << 8;\r
                                }\r
                                break;\r
 #if 0 // handled elsewhere\r
@@ -1790,23 +1987,6 @@ int YM2612Write_(unsigned int a, unsigned int v)
                        ret = OPNWriteReg(addr,v);\r
                }\r
                break;\r
-\r
-       case 2: /* address port 1 */\r
-               ym2612.OPN.ST.address = v;\r
-               ym2612.addr_A1 = 1;\r
-               ret=0;\r
-               break;\r
-\r
-       case 3: /* data port 1    */\r
-               if (ym2612.addr_A1 != 1) {\r
-                       ret=0;\r
-                       break;  /* verified on real YM2608 */\r
-               }\r
-\r
-               addr = ym2612.OPN.ST.address | 0x100;\r
-\r
-               ret = OPNWriteReg(addr, v);\r
-               break;\r
        }\r
 \r
        return ret;\r
@@ -1869,7 +2049,7 @@ typedef struct
        UINT32  eg_timer;\r
        UINT32  lfo_cnt;\r
        UINT16  lfo_ampm;\r
-       UINT16  unused2;\r
+       INT16   busy_timer;\r
        UINT32  keyon_field;    // 20\r
        UINT32  kcode_fc_sl3_3;\r
        UINT32  reserved[2];\r
@@ -1883,7 +2063,7 @@ typedef struct
 } ym_save_addon2;\r
 \r
 \r
-void YM2612PicoStateSave2(int tat, int tbt)\r
+void YM2612PicoStateSave2(int tat, int tbt, int busy)\r
 {\r
        ym_save_addon_slot ss;\r
        ym_save_addon2 sa2;\r
@@ -1941,10 +2121,11 @@ void YM2612PicoStateSave2(int tat, int tbt)
        sa.eg_timer = ym2612.OPN.eg_timer;\r
        sa.lfo_cnt  = ym2612.OPN.lfo_cnt;\r
        sa.lfo_ampm = g_lfo_ampm;\r
+       sa.busy_timer = busy;\r
        memcpy(ptr, &sa, sizeof(sa)); // 0x30 max\r
 }\r
 \r
-int YM2612PicoStateLoad2(int *tat, int *tbt)\r
+int YM2612PicoStateLoad2(int *tat, int *tbt, int *busy)\r
 {\r
        ym_save_addon_slot ss;\r
        ym_save_addon2 sa2;\r
@@ -1970,6 +2151,7 @@ int YM2612PicoStateLoad2(int *tat, int *tbt)
        g_lfo_ampm = sa.lfo_ampm;\r
        if (tat != NULL) *tat = sa.TAT;\r
        if (tbt != NULL) *tbt = sa.TBT;\r
+       if (busy != NULL) *busy = sa.busy_timer;\r
 \r
        // chans 1,2,3\r
        ptr = &ym2612.REGS[0x0b8];\r
@@ -1990,6 +2172,7 @@ int YM2612PicoStateLoad2(int *tat, int *tbt)
                blk = ym2612.CH[c].block_fnum >> 11;\r
                ym2612.CH[c].kcode= (blk<<2) | opn_fktable[fn >> 7];\r
                ym2612.CH[c].fc = fn_table[fn*2]>>(7-blk);\r
+               refresh_fc_eg_chan( &ym2612.CH[c] );\r
        }\r
        // chans 4,5,6\r
        ptr = &ym2612.REGS[0x1b8];\r
@@ -2010,6 +2193,7 @@ int YM2612PicoStateLoad2(int *tat, int *tbt)
                blk = ym2612.CH[c].block_fnum >> 11;\r
                ym2612.CH[c].kcode= (blk<<2) | opn_fktable[fn >> 7];\r
                ym2612.CH[c].fc = fn_table[fn*2]>>(7-blk);\r
+               refresh_fc_eg_chan( &ym2612.CH[c] );\r
        }\r
        for (c = 0; c < 3; c++)\r
        {\r