+#define multdiv_assemble multdiv_assemble_arm64
+
+static void do_jump_vaddr(u_int rs)
+{
+ if (rs != 0)
+ emit_mov(rs, 0);
+ emit_far_call(get_addr_ht);
+ emit_jmpreg(0);
+}
+
+static void do_preload_rhash(u_int r) {
+ // Don't need this for ARM. On x86, this puts the value 0xf8 into the
+ // register. On ARM the hash can be done with a single instruction (below)
+}
+
+static void do_preload_rhtbl(u_int ht) {
+ emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
+}
+
+static void do_rhash(u_int rs,u_int rh) {
+ emit_andimm(rs, 0xf8, rh);
+}
+
+static void do_miniht_load(int ht, u_int rh) {
+ emit_add64(ht, rh, ht);
+ emit_ldst(0, 0, rh, ht, 0);
+}
+
+static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
+ emit_cmp(rh, rs);
+ void *jaddr = out;
+ emit_jeq(0);
+ do_jump_vaddr(rs);
+
+ set_jump_target(jaddr, out);
+ assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
+ output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
+ emit_jmpreg(ht);
+}
+
+// parsed by set_jump_target?
+static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
+ emit_movz_lsl16((return_address>>16)&0xffff,rt);
+ emit_movk(return_address&0xffff,rt);
+ add_to_linker(out,return_address,1);
+ emit_adr(out,temp);
+ emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
+ emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
+}
+
+static void clear_cache_arm64(char *start, char *end)
+{
+ // Don't rely on GCC's __clear_cache implementation, as it caches
+ // icache/dcache cache line sizes, that can vary between cores on
+ // big.LITTLE architectures.
+ uint64_t addr, ctr_el0;
+ static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
+ size_t isize, dsize;
+
+ __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
+ isize = 4 << ((ctr_el0 >> 0) & 0xf);
+ dsize = 4 << ((ctr_el0 >> 16) & 0xf);
+
+ // use the global minimum cache line size
+ icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
+ dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
+
+ /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
+ not required for instruction to data coherence. */
+ if ((ctr_el0 & (1 << 28)) == 0x0) {
+ addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
+ for (; addr < (uint64_t)end; addr += dsize)
+ // use "civac" instead of "cvau", as this is the suggested workaround for
+ // Cortex-A53 errata 819472, 826319, 827319 and 824069.
+ __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
+ }
+ __asm__ volatile("dsb ish" : : : "memory");
+
+ /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
+ Unification is not required for instruction to data coherence. */
+ if ((ctr_el0 & (1 << 29)) == 0x0) {
+ addr = (uint64_t)start & ~(uint64_t)(isize - 1);
+ for (; addr < (uint64_t)end; addr += isize)
+ __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
+
+ __asm__ volatile("dsb ish" : : : "memory");
+ }
+
+ __asm__ volatile("isb" : : : "memory");
+}