+#endif
+#endif
+ // Set u,v increments and packed Gouraud increment for inner driver
+ gpu_unai.u_inc = du4;
+ gpu_unai.v_inc = dv4;
+ gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
+
+ for (s32 loop0 = 2; loop0; loop0--) {
+ if (loop0 == 2) {
+ ya = y0; yb = y1;
+ x3 = x4 = i2x(x0);
+ u3 = i2x(u0); v3 = i2x(v0);
+ r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
+ if (dx < 0) {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ float finv = FloatInv(y2 - y0);
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ float fdiv = y2 - y0;
+ dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y0) != 0) {
+ int iF, iS;
+ xInv((y2 - y0), iF, iS);
+ dx3 = xInvMulx((x2 - x0), iF, iS);
+ du3 = xInvMulx((u2 - u0), iF, iS);
+ dv3 = xInvMulx((v2 - v0), iF, iS);
+ dr3 = xInvMulx((r2 - r0), iF, iS);
+ dg3 = xInvMulx((g2 - g0), iF, iS);
+ db3 = xInvMulx((b2 - b0), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0;
+#else
+ if ((y2 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0));
+ du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0));
+ dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0));
+ dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0));
+ dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0));
+ db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0));
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0;
+#endif
+#endif
+ } else {
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ float finv = FloatInv(y1 - y0);
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ float fdiv = y1 - y0;
+ dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y1 - y0) != 0) {
+ int iF, iS;
+ xInv((y1 - y0), iF, iS);
+ dx3 = xInvMulx((x1 - x0), iF, iS);
+ du3 = xInvMulx((u1 - u0), iF, iS);
+ dv3 = xInvMulx((v1 - v0), iF, iS);
+ dr3 = xInvMulx((r1 - r0), iF, iS);
+ dg3 = xInvMulx((g1 - g0), iF, iS);
+ db3 = xInvMulx((b1 - b0), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0;
+#else
+ if ((y1 - y0) != 0) {
+ dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0));
+ du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0));
+ dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0));
+ dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0));
+ dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0));
+ db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0));
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+ dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0;
+#endif
+#endif
+ }
+ } else {
+ //senquack - break out of final loop if nothing to be drawn (1st loop
+ // must always be taken to setup dx3/dx4)
+ if (y1 == y2) break;
+
+ ya = y1; yb = y2;
+
+ if (dx < 0) {
+ x3 = i2x(x0); x4 = i2x(x1);
+ u3 = i2x(u0); v3 = i2x(v0);
+ r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0);
+
+ if ((y1 - y0) != 0) {
+ x3 += (dx3 * (y1 - y0));
+ u3 += (du3 * (y1 - y0));
+ v3 += (dv3 * (y1 - y0));
+ r3 += (dr3 * (y1 - y0));
+ g3 += (dg3 * (y1 - y0));
+ b3 += (db3 * (y1 - y0));
+ }
+
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0;
+#else
+ dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0;
+#endif
+#endif
+ } else {
+ x3 = i2x(x1);
+ x4 = i2x(x0) + (dx4 * (y1 - y0));
+
+ u3 = i2x(u1); v3 = i2x(v1);
+ r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1);
+#ifdef GPU_UNAI_USE_FLOATMATH
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ float finv = FloatInv(y2 - y1);
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv);
+ du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv);
+ dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv);
+ dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv);
+ dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv);
+ db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ float fdiv = y2 - y1;
+ dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv);
+ du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv);
+ dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv);
+ dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv);
+ dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv);
+ db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+#endif
+#else // Integer Division:
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+ if ((y2 - y1) != 0) {
+ int iF, iS;
+ xInv((y2 - y1), iF, iS);
+ dx3 = xInvMulx((x2 - x1), iF, iS);
+ du3 = xInvMulx((u2 - u1), iF, iS);
+ dv3 = xInvMulx((v2 - v1), iF, iS);
+ dr3 = xInvMulx((r2 - r1), iF, iS);
+ dg3 = xInvMulx((g2 - g1), iF, iS);
+ db3 = xInvMulx((b2 - b1), iF, iS);
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+#else
+ if ((y2 - y1) != 0) {
+ dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1));
+ du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1));
+ dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1));
+ dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1));
+ dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1));
+ db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1));
+ } else {
+ dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0;
+ }
+#endif
+#endif
+ }