git subrepo pull (merge) --force deps/lightning
[pcsx_rearmed.git] / deps / lightning / lib / jit_arm.c
index 64a70f9..25aa7cb 100644 (file)
@@ -240,6 +240,11 @@ jit_get_cpu(void)
     /* do not generate hardware integer division by default */
     if (jit_cpu.version == 7)
        jit_cpu.extend = 0;
+
+    /* By default generate extra instructions for unaligned load/store. */
+    jit_cpu.unaligned = 1;
+    /* Linux should only not handle unaligned vfp load/store */
+    jit_cpu.vfp_unaligned = 1;
 }
 
 void
@@ -1337,6 +1342,16 @@ _emit_code(jit_state_t *_jit)
                name##r##type(rn(node->u.w),                            \
                              rn(node->v.w), rn(node->w.w));            \
                break
+#define case_rqr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               if (jit_swf_p())                                        \
+                   swf_##name##r##type(rn(node->u.w), rn(node->v.q.l), \
+                                       rn(node->v.q.h), rn(node->w.w));\
+               else                                                    \
+                   vfp_##name##r##type(rn(node->u.w), rn(node->v.q.l), \
+                                       rn(node->v.q.h), rn(node->w.w));\
+           case jit_code_##name##i##type:                              \
+               break
 #define case_rrrr(name, type)                                          \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -1558,6 +1573,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsb,);
                case_rrr(mul,);
                case_rrw(mul,);
+               case_rrr(hmul,);
+               case_rrw(hmul,);
+               case_rrr(hmul, _u);
+               case_rrw(hmul, _u);
                case_rrrr(qmul,);
                case_rrrw(qmul,);
                case_rrrr(qmul, _u);
@@ -1576,16 +1595,38 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rem, _u);
                case_rrr(lsh,);
                case_rrw(lsh,);
+#define qlshr(r0, r1, r2, r3)  fallback_qlshr(r0, r1, r2, r3)
+#define qlshi(r0, r1, r2, i0)  fallback_qlshi(r0, r1, r2, i0)
+#define qlshr_u(r0, r1, r2, r3)        fallback_qlshr_u(r0, r1, r2, r3)
+#define qlshi_u(r0, r1, r2, i0)        fallback_qlshi_u(r0, r1, r2, i0)
+               case_rrrr(qlsh,);
+               case_rrrw(qlsh,);
+               case_rrrr(qlsh, _u);
+               case_rrrw(qlsh, _u);
                case_rrr(rsh,);
                case_rrw(rsh,);
                case_rrr(rsh, _u);
                case_rrw(rsh, _u);
+#define qrshr(r0, r1, r2, r3)  fallback_qrshr(r0, r1, r2, r3)
+#define qrshi(r0, r1, r2, i0)  fallback_qrshi(r0, r1, r2, i0)
+#define qrshr_u(r0, r1, r2, r3)        fallback_qrshr_u(r0, r1, r2, r3)
+#define qrshi_u(r0, r1, r2, i0)        fallback_qrshi_u(r0, r1, r2, i0)
+               case_rrrr(qrsh,);
+               case_rrrw(qrsh,);
+               case_rrrr(qrsh, _u);
+               case_rrrw(qrsh, _u);
+               case_rrr(lrot,);
+               case_rrw(lrot,);
+               case_rrr(rrot,);
+               case_rrw(rrot,);
                case_rr(neg,);
                case_rr(com,);
                case_rr(clo,);
                case_rr(clz,);
                case_rr(cto,);
                case_rr(ctz,);
+               case_rr(rbit,);
+               case_rr(popcnt,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
@@ -1614,6 +1655,18 @@ _emit_code(jit_state_t *_jit)
                case_rrw(ldx, _us);
                case_rrr(ldx, _i);
                case_rrw(ldx, _i);
+           case jit_code_unldr:
+               unldr(rn(node->u.w), rn(node->v.w), node->w.w);
+               break;
+           case jit_code_unldi:
+               unldi(rn(node->u.w), node->v.w, node->w.w);
+               break;
+           case jit_code_unldr_u:
+               unldr_u(rn(node->u.w), rn(node->v.w), node->w.w);
+               break;
+           case jit_code_unldi_u:
+               unldi_u(rn(node->u.w), node->v.w, node->w.w);
+               break;
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
@@ -1626,10 +1679,28 @@ _emit_code(jit_state_t *_jit)
                case_wrr(stx, _s);
                case_rrr(stx, _i);
                case_wrr(stx, _i);
+           case jit_code_unstr:
+               unstr(rn(node->u.w), rn(node->v.w), node->w.w);
+               break;
+           case jit_code_unsti:
+               unsti(node->u.w, rn(node->v.w), node->w.w);
+               break;
                case_rr(hton, _us);
                case_rr(hton, _ui);
                case_rr(bswap, _us);
                case_rr(bswap, _ui);
+           case jit_code_extr:
+               extr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
+               break;
+           case jit_code_extr_u:
+               extr_u(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
+               break;
+           case jit_code_depr:
+               depr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
+               break;
+           case jit_code_depi:
+               depi(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
+               break;
                case_rr(ext, _c);
                case_rr(ext, _uc);
                case_rr(ext, _s);
@@ -1734,15 +1805,43 @@ _emit_code(jit_state_t *_jit)
                case_vv(abs, _f);
                case_vv(neg, _f);
                case_vv(sqrt, _f);
+               case_rqr(fma, _f);
+               case_rqr(fms, _f);
+               case_rqr(fnma, _f);
+               case_rqr(fnms, _f);
                case_vv(ext, _f);
                case_vv(ld, _f);
                case_vw(ld, _f);
                case_vvv(ldx, _f);
                case_vvw(ldx, _f);
+           case jit_code_unldr_x:
+               if (jit_swf_p())
+                   swf_unldr_x(rn(node->u.w), rn(node->v.w), node->w.w);
+               else
+                   vfp_unldr_x(rn(node->u.w), rn(node->v.w), node->w.w);
+               break;
+           case jit_code_unldi_x:
+               if (jit_swf_p())
+                   swf_unldi_x(rn(node->u.w), node->v.w, node->w.w);
+               else
+                   vfp_unldi_x(rn(node->u.w), node->v.w, node->w.w);
+               break;
                case_vv(st, _f);
                case_wv(st, _f);
                case_vvv(stx, _f);
                case_wvv(stx, _f);
+           case jit_code_unstr_x:
+               if (jit_swf_p())
+                   swf_unstr_x(rn(node->u.w), rn(node->v.w), node->w.w);
+               else
+                   vfp_unstr_x(rn(node->u.w), rn(node->v.w), node->w.w);
+               break;
+           case jit_code_unsti_x:
+               if (jit_swf_p())
+                   swf_unsti_x(node->u.w, rn(node->v.w), node->w.w);
+               else
+                   vfp_unsti_x(node->u.w, rn(node->v.w), node->w.w);
+               break;
                case_vv(mov, _f);
            case jit_code_movi_f:
                assert_data(node);
@@ -1820,6 +1919,10 @@ _emit_code(jit_state_t *_jit)
                case_vv(abs, _d);
                case_vv(neg, _d);
                case_vv(sqrt, _d);
+               case_rqr(fma, _d);
+               case_rqr(fms, _d);
+               case_rqr(fnma, _d);
+               case_rqr(fnms, _d);
                case_vv(ext, _d);
                case_vv(ld, _d);
                case_vw(ld, _d);
@@ -2028,41 +2131,46 @@ _emit_code(jit_state_t *_jit)
                break;
            case jit_code_movr_w_f:
                if (jit_swf_p())
-                   swf_movr_f(rn(node->u.w), rn(node->v.w));
+                   swf_movr_w_f(rn(node->u.w), rn(node->v.w));
                else
-                   vfp_movr_f(rn(node->u.w), rn(node->v.w));
+                   vfp_movr_w_f(rn(node->u.w), rn(node->v.w));
                break;
            case jit_code_movr_f_w:
                if (jit_swf_p())
-                   swf_movr_f(rn(node->u.w), rn(node->v.w));
+                   swf_movr_f_w(rn(node->u.w), rn(node->v.w));
                else
-                   vfp_movr_f(rn(node->u.w), rn(node->v.w));
+                   vfp_movr_f_w(rn(node->u.w), rn(node->v.w));
                break;
            case jit_code_movi_f_w:
                assert_data(node);
+               movi_f_w(rn(node->u.w), node->v.f);
+               break;
+           case jit_code_movi_w_f:
                if (jit_swf_p())
-                   swf_movi_f(rn(node->u.w), node->v.f);
+                   swf_movi_w_f(rn(node->u.w), node->v.w);
                else
-                   vfp_movi_f(rn(node->u.w), node->v.f);
+                   vfp_movi_w_f(rn(node->u.w), node->v.w);
                break;
            case jit_code_movr_ww_d:
                if (jit_swf_p())
-                   swf_movr_d(rn(node->u.w), rn(node->v.w));
+                   swf_movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w));
                else
-                   vfp_movr_d(rn(node->u.w), rn(node->v.w));
+                   vfp_movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w));
                break;
            case jit_code_movr_d_ww:
                if (jit_swf_p())
-                   swf_movr_d(rn(node->u.w), rn(node->w.w));
+                   swf_movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w));
                else
-                   vfp_movr_d(rn(node->u.w), rn(node->w.w));
+                   vfp_movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w));
                break;
            case jit_code_movi_d_ww:
-               assert_data(node);
+               movi_d_ww(rn(node->u.w), rn(node->v.w), node->w.d);
+               break;
+           case jit_code_movi_ww_d:
                if (jit_swf_p())
-                   swf_movi_d(rn(node->u.w), node->w.d);
+                   swf_movi_ww_d(rn(node->u.w), node->v.w, node->w.w);
                else
-                   vfp_movi_d(rn(node->u.w), node->w.d);
+                   vfp_movi_ww_d(rn(node->u.w), node->v.w, node->w.w);
                break;
            case jit_code_va_start:
                vastart(rn(node->u.w));
@@ -2115,6 +2223,63 @@ _emit_code(jit_state_t *_jit)
            case jit_code_retval_f:             case jit_code_retval_d:
            case jit_code_prepare:
            case jit_code_finishr:              case jit_code_finishi:
+           case jit_code_negi_f:               case jit_code_absi_f:
+           case jit_code_sqrti_f:              case jit_code_negi_d:
+           case jit_code_absi_d:               case jit_code_sqrti_d:
+               break;
+           case jit_code_negi:
+               negi(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_comi:
+               comi(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_exti_c:
+               exti_c(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_exti_uc:
+               exti_uc(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_exti_s:
+               exti_s(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_exti_us:
+               exti_us(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_bswapi_us:
+               bswapi_us(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_bswapi_ui:
+               bswapi_ui(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_htoni_us:
+               htoni_us(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_htoni_ui:
+               htoni_ui(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_cloi:
+               cloi(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_clzi:
+               clzi(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_ctoi:
+               ctoi(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_ctzi:
+               ctzi(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_rbiti:
+               rbiti(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_popcnti:
+               popcnti(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_exti:
+               exti(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
+               break;
+           case jit_code_exti_u:
+               exti_u(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
                break;
            default:
                abort();
@@ -2124,12 +2289,19 @@ _emit_code(jit_state_t *_jit)
        /* update register live state */
        jit_reglive(node);
 
+#if defined JIT_INSTR_MAX
+       word = 4096 - JIT_INSTR_MAX;
+#else
+       word = 3968;
+#endif
+       /* longest sequence should be 64 bytes, but preventively
+        * do not let it go past 256 remaining bytes before a flush */
+       if (word > 3968)
+           word = 3968;
        if (_jitc->consts.length &&
-           (_jit->pc.uc - _jitc->consts.data >= 3968 ||
+           (_jit->pc.uc - _jitc->consts.data >= word ||
             (jit_uword_t)_jit->pc.uc -
-            (jit_uword_t)_jitc->consts.patches[0] >= 3968)) {
-           /* longest sequence should be 64 bytes, but preventively
-            * do not let it go past 128 remaining bytes before a flush */
+            (jit_uword_t)_jitc->consts.patches[0] >= word)) {
            if (node->next &&
                node->next->code != jit_code_jmpi &&
                node->next->code != jit_code_jmpr &&