X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightning%2Flib%2Fjit_ppc-cpu.c;h=8ea8e62cd39da32aeffac75af7ce3722e5153a00;hb=d481fb64f2aac7a36532142cda11fa43f5ca792f;hp=c4397add6cafb5b02b1df964b4276b5579ced008;hpb=70575e81838e2c8d842dd28c3fc7fbb91b395061;p=pcsx_rearmed.git diff --git a/deps/lightning/lib/jit_ppc-cpu.c b/deps/lightning/lib/jit_ppc-cpu.c index c4397add..8ea8e62c 100644 --- a/deps/lightning/lib/jit_ppc-cpu.c +++ b/deps/lightning/lib/jit_ppc-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -61,14 +61,20 @@ # define _FP_REGNO 31 # if __WORDSIZE == 32 # define ldr(r0,r1) ldr_i(r0,r1) +# define ldi(r0,i0) ldi_i(r0,i0) # define ldxi(r0,r1,i0) ldxi_i(r0,r1,i0) # define ldxr(r0,r1,r2) ldxr_i(r0,r1,r2) +# define str(r0,r1) str_i(r0,r1) +# define sti(i0,r0) sti_i(i0,r0) # define stxi(i0,r0,r1) stxi_i(i0,r0,r1) # define stxr(r0,r1,r2) stxr_i(r0,r1,r2) # else # define ldr(r0,r1) ldr_l(r0,r1) +# define ldi(r0,i0) ldi_l(r0,i0) # define ldxi(r0,r1,i0) ldxi_l(r0,r1,i0) # define ldxr(r0,r1,r2) ldxr_l(r0,r1,r2) +# define str(r0,r1) str_l(r0,r1) +# define sti(i0,r0) sti_l(i0,r0) # define stxi(i0,r0,r1) stxi_l(i0,r0,r1) # define stxr(r0,r1,r2) stxr_l(r0,r1,r2) # endif @@ -202,8 +208,21 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int); # define XCMPLI(cr,l,a,u) FCI(10,cr,l,a,u) # define CMPLDI(a,s) XCMPLI(0,1,a,s) # define CMPLWI(a,s) XCMPLI(0,0,a,s) +# if __WORDSIZE == 32 +# define CMPX(a,b) CMPW(a,b) +# define CMPXI(a,s) CMPWI(a,s) +# define CMPLX(a,b) CMPLW(a,b) +# define CMPLXI(a,s) CMPLWI(a,s) +# else +# define CMPX(a,b) CMPD(a,b) +# define CMPXI(a,s) CMPDI(a,s) +# define CMPLX(a,b) CMPLD(a,b) +# define CMPLXI(a,s) CMPLDI(a,s) +# endif # define CNTLZW(a,s) FX(31,s,a,0,26) # define CNTLZW_(a,s) FX_(31,s,a,0,26) +# define CNTLZD(a,s) FX(31,s,a,0,58) +# define CNTLZD_(a,s) FX_(31,s,a,0,58) # define CRAND(d,a,b) FX(19,d,a,b,257) # define CRANDC(d,a,b) FX(19,d,a,b,129) # define CREQV(d,a,b) FX(19,d,a,b,289) @@ -260,7 +279,7 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int); # define LHAU(d,a,s) FDs(43,d,a,s) # define LHAUX(d,a,b) FX(31,d,a,b,375) # define LHAX(d,a,b) FX(31,d,a,b,343) -# define LHRBX(d,a,b) FX(31,d,a,b,790) +# define LHBRX(d,a,b) FX(31,d,a,b,790) # define LHZ(d,a,s) FDs(40,d,a,s) # define LHZU(d,a,s) FDs(41,d,a,s) # define LHZUX(d,a,b) FX(31,d,a,b,311) @@ -271,6 +290,7 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int); # define LSWI(d,a,n) FX(31,d,a,n,597) # define LSWX(d,a,b) FX(31,d,a,b,533) # define LWARX(d,a,b) FX(31,d,a,b,20) +# define LDARX(d,a,b) FX(31,d,a,b,84) # define LWBRX(d,a,b) FX(31,d,a,b,534) # define LWA(d,a,s) FDs(58,d,a,s|2) # define LWAUX(d,a,b) FX(31,d,a,b,373) @@ -281,7 +301,7 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int); # define LWZX(d,a,b) FX(31,d,a,b,23) # define LD(d,a,s) FDs(58,d,a,s) # define LDX(d,a,b) FX(31,d,a,b,21) -# define MCRF(d,s) FXL(19,d<<2,(s)<<2,0) +# define MCRF(d,s) FXL(19,((d)<<2),((s)<<2),0) # if DEBUG /* In case instruction is emulated, check the kernel can handle it. Will only generate it if DEBUG is enabled. @@ -313,31 +333,31 @@ instruction will cause the system illegal instruction error handler to be invoked """ */ -# define MCRXR(d) FX(31,d<<2,0,0,512) +# define MCRXR(d) FX(31,((d)<<2),0,0,512) # else # define MCRXR(cr) _MCRXR(_jit,cr); static void _MCRXR(jit_state_t*, jit_int32_t); # endif # define MFCR(d) FX(31,d,0,0,19) # define MFMSR(d) FX(31,d,0,0,83) -# define MFSPR(d,s) FXFX(31,d,s<<5,339) +# define MFSPR(d,s) FXFX(31,d,((s)<<5),339) # define MFXER(d) MFSPR(d,1) # define MFLR(d) MFSPR(d,8) # define MFCTR(d) MFSPR(d,9) # define MFSR(d,s) FX(31,d,s,0,595) # define MFSRIN(d,b) FX(31,d,0,b,659) -# define MFTB(d,x,y) FXFX(31,d,(x)|((y)<<5),371) +# define MFTB(d,x,y) FXFX(31,d,((x)|((y)<<5)),371) # define MFTBL(d) MFTB(d,8,12) # define MFTBU(d) MFTB(d,8,13) -# define MTCRF(c,s) FXFX(31,s,c<<1,144) +# define MTCRF(c,s) FXFX(31,s,((c)<<1),144) # define MTCR(s) MTCRF(0xff,s) # define MTMSR(s) FX(31,s,0,0,146) -# define MTSPR(d,s) FXFX(31,d,s<<5,467) +# define MTSPR(d,s) FXFX(31,d,((s)<<5),467) # define MTXER(d) MTSPR(d,1) # define MTLR(d) MTSPR(d,8) # define MTCTR(d) MTSPR(d,9) -# define MTSR(r,s) FX(31,s<<1,r,0,210) -# define MTSRIN(r,b) FX(31,r<<1,0,b,242) +# define MTSR(r,s) FX(31,((s)<<1),r,0,210) +# define MTSRIN(r,b) FX(31,((r)<<1),0,b,242) # define MULLI(d,a,s) FDs(07,d,a,s) # define MULHW(d,a,b) FXO(31,d,a,b,0,75) # define MULHW_(d,a,b) FXO_(31,d,a,b,0,75) @@ -372,22 +392,23 @@ static void _MCRXR(jit_state_t*, jit_int32_t); # define ORI(d,a,u) FDu(24,a,d,u) # define NOP() ORI(0,0,0) # define ORIS(d,a,u) FDu(25,a,d,u) +# define POPCNTB(a,s) FX(31,s,a,0,122) # define RFI() FXL(19,0,0,50) # define RLWIMI(d,s,h,b,e) FM(20,s,d,h,b,e,0) # define RLWIMI_(d,s,h,b,e) FM(20,s,d,h,b,e,1) -# define INSLWI(a,s,n,b) RLWIMI(a,s,32-b,b,b+n-1) -# define INSRWI(a,s,n,b) RLWIMI(a,s,32-(b+n),b,(b+n)-1) +# define INSLWI(a,s,n,b) RLWIMI(a,s,(32-(b)),b,(((b)+(n))-1)) +# define INSRWI(a,s,n,b) RLWIMI(a,s,(32-((b)+(n))),b,(((b)+(n))-1)) # define RLWINM(a,s,h,b,e) FM(21,s,a,h,b,e,0) # define RLWINM_(a,s,h,b,e) FM(21,s,a,h,b,e,1) -# define EXTLWI(a,s,n,b) RLWINM(a,s,b,0,n-1) -# define EXTRWI(a,s,n,b) RLWINM(a,s,b+n,32-n,31) +# define EXTLWI(a,s,n,b) RLWINM(a,s,b,0,((n)-1)) +# define EXTRWI(a,s,n,b) RLWINM(a,s,((b)+(n)),(32-(n)),31) # define ROTLWI(a,s,n) RLWINM(a,s,n,0,31) -# define ROTRWI(a,s,n) RLWINM(a,s,32-n,0,31) -# define SLWI(a,s,n) RLWINM(a,s,n,0,31-n) -# define SRWI(a,s,n) RLWINM(a,s,32-n,n,31) +# define ROTRWI(a,s,n) RLWINM(a,s,(32-(n)),0,31) +# define SLWI(a,s,n) RLWINM(a,s,n,0,(31-(n))) +# define SRWI(a,s,n) RLWINM(a,s,(32-(n)),n,31) # define CLRLWI(a,s,n) RLWINM(a,s,0,n,31) -# define CLRRWI(a,s,n) RLWINM(a,s,0,0,31-n) -# define CLRLSWI(a,s,b,n) RLWINM(a,s,n,b-n,31-n) +# define CLRRWI(a,s,n) RLWINM(a,s,0,0,(31-(n))) +# define CLRLSWI(a,s,b,n) RLWINM(a,s,n,((b)-(n)),(31-(n))) # define RLWNM(a,s,b,m,e) FM(23,s,a,b,m,e,0) # define RLWNM_(a,s,b,m,e) FM(23,s,a,b,m,e,1) # define ROTLW(a,s,b) RLWNM(a,s,b,0,31) @@ -401,33 +422,34 @@ static void _MCRXR(jit_state_t*, jit_int32_t); # define SRW(a,s,b) FX(31,s,a,b,536) # define SRW_(a,s,b) FX_(31,s,a,b,536) # if __WORDSIZE == 64 -# define RLDICL(a,s,h,b) FMD(30,s,a,h&~32,b,0,h>>5) -# define RLDICL_(a,s,h,b) FMD_(30,s,a,h&~32,b,0,h>>5) -# define EXTRDI(x,y,n,b) RLDICL(x,y,(b+n),(64-n)) -# define SRDI(x,y,n) RLDICL(x,y,(64-n),n) +# define RLDICL(a,s,h,b) FMD(30,s,a,((h)&~32),b,0,((h)>>5)) +# define RLDICL_(a,s,h,b) FMD_(30,s,a,((h)&~32),b,0,((h)>>5)) +# define EXTRDI(x,y,n,b) RLDICL(x,y,((b)+(n)),(64-(n))) +# define SRDI(x,y,n) RLDICL(x,y,(64-(n)),n) # define CLRLDI(x,y,n) RLDICL(x,y,0,n) -# define RLDICR(a,s,h,e) FMD(30,s,a,h&~32,e,1,h>>5) -# define RLDICR_(a,s,h,e) FMD_(30,s,a,h&~32,e,1,h>>5) -# define EXTRLI(x,y,n,b) RLDICR(x,y,b,(n-1)) -# define SLDI(x,y,n) RLDICR(x,y,n,(63-n)) -# define CLRRDI(x,y,n) RLDICR(x,y,0,(63-n)) -# define RLDIC(a,s,h,b) FMD(30,s,a,h&~32,b,2,h>>5) -# define RLDIC_(a,s,h,b) FMD_(30,s,a,h&~32,b,2,h>>5) -# define CLRLSLDI(x,y,b,n) RLDIC(x,y,n,(b-n)) +# define RLDICR(a,s,h,e) FMD(30,s,a,((h)&~32),e,1,((h)>>5)) +# define RLDICR_(a,s,h,e) FMD_(30,s,a,((h)&~32),e,1,((h)>>5)) +# define EXTLDI(x,y,n,b) RLDICR(x,y,b,((n)-1)) +# define SLDI(x,y,n) RLDICR(x,y,n,(63-(n))) +# define CLRRDI(x,y,n) RLDICR(x,y,0,(63-(n))) +# define RLDIC(a,s,h,b) FMD(30,s,a,((h)&~32),b,2,((h)>>5)) +# define RLDIC_(a,s,h,b) FMD_(30,s,a,((h)&~32),b,2,((h)>>5)) +# define CLRLSLDI(x,y,b,n) RLDIC(x,y,n,((b)-(n))) # define RLDCL(a,s,h,b) FMDS(30,s,a,h,b,8) # define RLDCL_(a,s,h,b) FMDS_(30,s,a,h,b,8) # define ROTLD(x,y,z) RLDCL(x,y,z,0) # define RLDCR(a,s,b,e) FMDS(30,s,a,b,e,0) # define RLDCR_(a,s,b,e) FMDS_(30,s,a,b,e,0) -# define RLDIMI(a,s,h,b) FMD(30,s,a,h&~32,b,3,h>>5) -# define RLDIMI_(a,s,h,b) FMD_(30,s,a,h&~32,b,3,h>>5) -# define INSRDI(x,y,n,b) RLDIMI(x,y,(64-(b+n)),b) +# define RLDIMI(a,s,h,b) FMD(30,s,a,((h)&~32),b,3,((h)>>5)) +# define RLDIMI_(a,s,h,b) FMD_(30,s,a,((h)&~32),b,3,((h)>>5)) +# define INSLDI(x,y,n,b) RLDIMI(x,y,(64-(b)),(((b)+(n))-1)) +# define INSRDI(x,y,n,b) RLDIMI(x,y,(64-((b)+(n))),b) # define SLD(a,s,b) FX(31,s,a,b,27) # define SLD_(a,s,b) FX_(31,s,a,b,27) # define SRD(a,s,b) FX(31,s,a,b,539) # define SRD_(a,s,b) FX_(31,s,a,b,539) -# define SRADI(a,s,h) FXS(31,s,a,h&~32,413,h>>5) -# define SRADI_(a,s,h) FXS_(31,s,a,h&~32,413,h>>5) +# define SRADI(a,s,h) FXS(31,s,a,((h)&~32),413,((h)>>5)) +# define SRADI_(a,s,h) FXS_(31,s,a,((h)&~32),413,((h)>>5)) # define SRAD(a,s,b) FX(31,s,a,b,794) # define SRAD_(a,s,b) FX_(31,s,a,b,794) # endif @@ -446,12 +468,13 @@ static void _MCRXR(jit_state_t*, jit_int32_t); # define STW(s,a,d) FDs(36,s,a,d) # define STWBRX(s,a,b) FX(31,s,a,b,662) # define STWCX_(s,a,b) FX_(31,s,a,b,150) +# define STDCX_(s,a,b) FX_(31,s,a,b,214) # define STWU(s,a,d) FDs(37,s,a,d) # define STWUX(s,a,b) FX(31,s,a,b,183) # define STWX(s,a,b) FX(31,s,a,b,151) # define STD(s,a,d) FDs(62,s,a,d) # define STDX(s,a,b) FX(31,s,a,b,149) -# define STDU(s,a,d) FDs(62,s,a,d|1) +# define STDU(s,a,d) FDs(62,s,a,((d)|1)) # define STDUX(s,a,b) FX(31,s,a,b,181) # define SUBF(d,a,b) FXO(31,d,a,b,0,40) # define SUBF_(d,a,b) FXO_(31,d,a,b,0,40) @@ -461,15 +484,15 @@ static void _MCRXR(jit_state_t*, jit_int32_t); # define SUB_(d,a,b) SUBF_(d,b,a) # define SUBO(d,a,b) SUBFO(d,b,a) # define SUBO_(d,a,b) SUBFO_(d,b,a) -# define SUBI(d,a,s) ADDI(d,a,-s) -# define SUBIS(d,a,s) ADDIS(d,a,-s) +# define SUBI(d,a,s) ADDI(d,a,-(s)) +# define SUBIS(d,a,s) ADDIS(d,a,-(s)) # define SUBFC(d,a,b) FXO(31,d,a,b,0,8) # define SUBFC_(d,a,b) FXO_(31,d,a,b,0,8) # define SUBFCO(d,a,b) FXO(31,d,a,b,1,8) # define SUBFCO_(d,a,b) FXO_(31,d,a,b,1,8) # define SUBC(d,a,b) SUBFC(d,b,a) -# define SUBIC(d,a,s) ADDIC(d,a,-s) -# define SUBIC_(d,a,s) ADDIC_(d,a,-s) +# define SUBIC(d,a,s) ADDIC(d,a,-(s)) +# define SUBIC_(d,a,s) ADDIC_(d,a,-(s)) # define SUBFE(d,a,b) FXO(31,d,a,b,0,136) # define SUBFE_(d,a,b) FXO_(31,d,a,b,0,136) # define SUBFEO(d,a,b) FXO(31,d,a,b,1,136) @@ -505,10 +528,38 @@ static void _nop(jit_state_t*,jit_int32_t); static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); # define movi(r0,i0) _movi(_jit,r0,i0) static void _movi(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) +static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) +static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define negr(r0,r1) NEG(r0,r1) # define comr(r0,r1) NOT(r0,r1) +# define clor(r0, r1) _clor(_jit, r0, r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +# if __WORDSIZE == 32 +# define clzr(r0, r1) CNTLZW(r0, r1) +# else +# define clzr(r0, r1) CNTLZD(r0, r1) +# endif +# define ctor(r0, r1) _ctor(_jit, r0, r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctzr(r0, r1) _ctzr(_jit, r0, r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define popcntr(r0, r1) _popcntr(_jit, r0, r1) +static void _popcntr(jit_state_t*, jit_int32_t, jit_int32_t); +# define extr(r0,r1,i0,i1) _extr(_jit,r0,r1,i0,i1) +static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t); +# define extr_u(r0,r1,i0,i1) _extr_u(_jit,r0,r1,i0,i1) +static void _extr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t); +# define depr(r0,r1,i0,i1) _depr(_jit,r0,r1,i0,i1) +static void _depr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t); # define extr_c(r0,r1) EXTSB(r0,r1) # define extr_uc(r0,r1) ANDI_(r0,r1,0xff) # define extr_s(r0,r1) EXTSH(r0,r1) @@ -517,23 +568,14 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); # define extr_i(r0,r1) EXTSW(r0,r1) # define extr_ui(r0,r1) CLRLDI(r0,r1,32) # endif -# if __BYTE_ORDER == __BIG_ENDIAN -# define htonr_us(r0,r1) extr_us(r0,r1) -# if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) -# else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) -# endif -# else -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t); -# if __WORDSIZE == 64 -# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1) -static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t); -# endif +# define bswapr_us_lh(r0,r1,no_flag) _bswapr_us(_jit,r0,r1,no_flag) +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1,0) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t); +# define bswapr_ui_lw(r0,r1,no_flag) _bswapr_ui(_jit,r0,r1,no_flag) +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1,0) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t); +# if __WORDSIZE == 64 +# define bswapr_ul(r0,r1) generic_bswapr_ul(_jit,r0,r1) # endif # define addr(r0,r1,r2) ADD(r0,r1,r2) # define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) @@ -558,16 +600,20 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # if __WORDSIZE == 32 # define mulr(r0,r1,r2) MULLW(r0,r1,r2) # define mullr(r0,r1,r2) MULLW(r0,r1,r2) -# define mulhr(r0,r1,r2) MULHW(r0,r1,r2) -# define mulhr_u(r0,r1,r2) MULHWU(r0,r1,r2) +# define hmulr(r0,r1,r2) MULHW(r0,r1,r2) +# define hmulr_u(r0,r1,r2) MULHWU(r0,r1,r2) # else # define mulr(r0,r1,r2) MULLD(r0,r1,r2) # define mullr(r0,r1,r2) MULLD(r0,r1,r2) -# define mulhr(r0,r1,r2) MULHD(r0,r1,r2) -# define mulhr_u(r0,r1,r2) MULHDU(r0,r1,r2) +# define hmulr(r0,r1,r2) MULHD(r0,r1,r2) +# define hmulr_u(r0,r1,r2) MULHDU(r0,r1,r2) # endif # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0) +static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0) +static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) # define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) # define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc) @@ -624,8 +670,18 @@ static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # else # define lshr(r0,r1,r2) SLD(r0,r1,r2) # endif +#define qlshr(r0,r1,r2,r3) xlshr(1,r0,r1,r2,r3) +#define xlshr(s,r0,r1,r2,r3) _xlshr(_jit,s,r0,r1,r2,r3) +static void +_xlshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0) static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define qlshi(r0, r1, r2, i0) xlshi(1, r0, r1, r2, i0) +# define xlshi(s, r0, r1, r2, i0) _xlshi(_jit, s, r0, r1, r2, i0) +static void +_xlshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); +# define qlshr_u(r0, r1, r2, r3) xlshr(0, r0, r1, r2, r3) +# define qlshi_u(r0, r1, r2, i0) xlshi(0, r0, r1, r2, i0) # if __WORDSIZE == 32 # define rshr(r0,r1,r2) SRAW(r0,r1,r2) # else @@ -640,6 +696,27 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # endif # define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define qrshr(r0, r1, r2, r3) xrshr(1, r0, r1, r2, r3) +# define qrshr_u(r0, r1, r2, r3) xrshr(0, r0, r1, r2, r3) +# define xrshr(s, r0, r1, r2, r3) _xrshr(_jit, s, r0, r1, r2, r3) +static void +_xrshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qrshi(r0, r1, r2, i0) xrshi(1, r0, r1, r2, i0) +# define qrshi_u(r0, r1, r2, i0) xrshi(0, r0, r1, r2, i0) +# define xrshi(s, r0, r1, r2, i0) _xrshi(_jit, s, r0, r1, r2, i0) +static void +_xrshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); +# if __WORDSIZE == 32 +# define lrotr(r0,r1,r2) ROTLW(r0,r1,r2) +# else +# define lrotr(r0,r1,r2) ROTLD(r0,r1,r2) +# endif +# define lroti(r0,r1,i0) _lroti(_jit,r0,r1,i0) +static void _lroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2) +static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0) +static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define ltr(r0,r1,r2) _ltr(_jit,r0,r1,r2) static void _ltr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define lti(r0,r1,i0) _lti(_jit,r0,r1,i0) @@ -856,14 +933,14 @@ static jit_word_t _jmpi_p(jit_state_t*,jit_word_t) maybe_unused; # define callr(r0,i0) _callr(_jit,r0,i0) static void _callr(jit_state_t*,jit_int32_t,jit_int32_t); # define calli(i0,i1) _calli(_jit,i0,i1) -static void _calli(jit_state_t*,jit_word_t,jit_int32_t); +static jit_word_t _calli(jit_state_t*,jit_word_t,jit_int32_t); # define calli_p(i0,i1) _calli_p(_jit,i0,i1) static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_int32_t); # else # define callr(r0) _callr(_jit,r0) static void _callr(jit_state_t*,jit_int32_t); # define calli(i0) _calli(_jit,i0) -static void _calli(jit_state_t*,jit_word_t); +static jit_word_t _calli(jit_state_t*,jit_word_t); # define calli_p(i0) _calli_p(_jit,i0) static jit_word_t _calli_p(jit_state_t*,jit_word_t); #endif @@ -1120,6 +1197,22 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + CMPXI(r2, 0); + BEQ(8); + MR(r0, r1); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + CMPXI(r2, 0); + BNE(8); + MR(r0, r1); +} + static jit_word_t _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { @@ -1138,47 +1231,225 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (word); } -# if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + SYNC(); + /* retry: */ + retry = _jit->pc.w; +# if __WORDSIZE == 32 + LWARX(r0, _R0_REGNO, r1); +# else + LDARX(r0, _R0_REGNO, r1); +# endif + jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */ +# if __WORDSIZE == 32 + STWCX_(r3, _R0_REGNO, r1); +# else + STDCX_(r3, _R0_REGNO, r1); +# endif + jump1 = _jit->pc.w; + BNE(0); /* BNE retry */ + /* done: */ + done = _jit->pc.w; + ISYNC(); + MFCR(r0); + EXTRWI(r0, r0, 1, CR_EQ); + patch_at(jump0, done); + patch_at(jump1, retry); + if (iscasi) + jit_unget_reg(r1_reg); +} + +static void +_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + comr(r0, r1); + clzr(r0, r0); +} + +static void +_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + comr(r0, r1); + ctzr(r0, r0); +} + +static void +_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - jit_int32_t t0; + jit_int32_t t0, t1; t0 = jit_get_reg(jit_class_gpr); - rshi(rn(t0), r1, 8); - andi(r0, r1, 0xff); - andi(rn(t0), rn(t0), 0xff); - lshi(r0, r0, 8); - orr(r0, r0, rn(t0)); + t1 = jit_get_reg(jit_class_gpr); + negr(rn(t0), r1); + andr(rn(t0), rn(t0), r1); + clzr(r0, rn(t0)); + xori(rn(t1), r0, __WORDSIZE - 1); + movnr(r0, rn(t1), rn(t0)); jit_unget_reg(t0); + jit_unget_reg(t1); } static void -_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); - ROTLWI(rn(reg), r1, 8); - RLWIMI(rn(reg), r1, 24, 0, 7); - RLWIMI(rn(reg), r1, 24, 16, 23); - CLRLDI(r0, rn(reg), 32); + POPCNTB(r0, r1); +#if __WORDSIZE == 32 + movi(rn(reg), 0x01010101); +#else + movi(rn(reg), 0x0101010101010101); +#endif + mullr(r0, r0, rn(reg)); + rshi_u(r0, r0, __WORDSIZE - 8); jit_unget_reg(reg); } -# if __WORDSIZE == 64 static void -_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_extr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0 ,jit_word_t i1) { - jit_int32_t reg; + assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE); + if ( i1 == __WORDSIZE) + movr(r0, r1); + else { +# if __BYTE_ORDER == __BIG_ENDIAN + i0 = __WORDSIZE - (i0 + i1); +# endif + if (__WORDSIZE - (i0 + i1)) { + lshi(r0, r1, __WORDSIZE - (i0 + i1)); + rshi(r0, r0, __WORDSIZE - i1); + } + else + rshi(r0, r1, __WORDSIZE - i1); + } +} + +static void +_extr_u(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0 ,jit_word_t i1) +{ + assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE); + if (i1 == __WORDSIZE) + movr(r0, r1); + else { +# if __BYTE_ORDER == __BIG_ENDIAN + i0 = __WORDSIZE - (i0 + i1); +# endif +# if __WORDSIZE == 32 + RLWINM(r0, r1, (32 - i0) & 0x1f, 32 - i1, 31); +# else + RLDICL(r0, r1, (64 - i0) & 0x3f, 64 - i1); +# endif + } +} + +static void +_depr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0 ,jit_word_t i1) +{ + assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE); + if (i1 == __WORDSIZE) + movr(r0, r1); + else { +# if __BYTE_ORDER == __BIG_ENDIAN + i0 = __WORDSIZE - (i0 + i1); +# endif +#if __WORDSIZE == 32 + RLWIMI(r0, r1, i0, 32 - (i0 + i1), 31 - i0); +#else + RLDIMI(r0, r1, i0, 64 - (i0 + i1)); +#endif + } +} + +static void +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag) +{ + jit_int32_t reg, addr_reg; + + /* Convert load followed by bswap to a single instruction */ + /* FIXME r0 and r1 do not need to be the same, only must check if + * r1 was loaded in previous instruction */ + if (no_flag && r0 == r1) { + if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00022e | r0 << 21)) { + /* Convert LHZX to LHBRX */ + _jit->pc.ui--; + LHBRX(r0, (*_jit->pc.ui >> 16) & 0x1f, (*_jit->pc.ui >> 11) & 0x1f); + return; + } + + if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0xa0000000 | r0 << 21)) { + /* Convert LHZ to LHBRX */ + _jit->pc.ui--; + addr_reg = (*_jit->pc.ui >> 16) & 0x1f; + + reg = jit_get_reg(jit_class_gpr); + LI(rn(reg), (short)*_jit->pc.ui); + LHBRX(r0, rn(reg), addr_reg); + jit_unget_reg(reg); + return; + } + } + + if (r0 == r1) { + RLWIMI(r0, r0, 16, 8, 15); + RLWINM(r0, r0, 24, 16, 31); + } else { + RLWINM(r0, r1, 8, 16, 23); + RLWIMI(r0, r1, 24, 24, 31); + } +} + +static void +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag) +{ + jit_int32_t reg, addr_reg; + + /* Convert load followed by bswap to a single instruction */ + /* FIXME r0 and r1 do not need to be the same, only must check if + * r1 was loaded in previous instruction */ + if (no_flag && r0 == r1) { + if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00002e | r0 << 21)) { + /* Convert LWZX to LWBRX */ + _jit->pc.ui--; + LWBRX(r0, (*_jit->pc.ui >> 16) & 0x1f, (*_jit->pc.ui >> 11) & 0x1f); + return; + } + + if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0x80000000 | r0 << 21)) { + /* Convert LWZ to LWBRX */ + _jit->pc.ui--; + addr_reg = (*_jit->pc.ui >> 16) & 0x1f; + + reg = jit_get_reg(jit_class_gpr); + LI(rn(reg), (short)*_jit->pc.ui); + LWBRX(r0, rn(reg), addr_reg); + jit_unget_reg(reg); + return; + } + } + reg = jit_get_reg(jit_class_gpr); - rshi_u(rn(reg), r1, 32); - htonr_ui(r0, r1); - htonr_ui(rn(reg), rn(reg)); - lshi(r0, r0, 32); - orr(r0, r0, rn(reg)); + ROTLWI(rn(reg), r1, 8); + RLWIMI(rn(reg), r1, 24, 0, 7); + RLWIMI(rn(reg), r1, 24, 16, 23); +# if __WORDSIZE == 64 + CLRLDI(r0, rn(reg), 32); +# else + MR(r0,rn(reg)); +# endif jit_unget_reg(reg); } -# endif -# endif static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) @@ -1268,6 +1539,9 @@ static void _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; + /* NOTE verified and overflow is correctly computed. + * No need to check for __WORDSIZE == 32. + * Documented as a 32 bit instruction. */ if (can_sign_extend_short_p(i0)) MULLI(r0, r1, i0); else { @@ -1278,6 +1552,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr_u(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + static void _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) @@ -1290,9 +1584,9 @@ _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, else mullr(r0, r2, r3); if (sign) - mulhr(r1, r2, r3); + hmulr(r1, r2, r3); else - mulhr_u(r1, r2, r3); + hmulr_u(r1, r2, r3); if (r0 == r2 || r0 == r3) { movr(r0, rn(reg)); jit_unget_reg(reg); @@ -1433,15 +1727,23 @@ _remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +# define is_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) <= 1) : 0) + static void _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; + jit_int32_t reg, offt; if (can_zero_extend_short_p(i0)) ANDI_(r0, r1, i0); else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff)) ANDIS_(r0, r1, (jit_uword_t)i0 >> 16); - else { + else if (__WORDSIZE == 32 && is_mask(i0)) { + offt = __builtin_ctzl(i0); + RLWINM(r0, r1, 0, 32 - offt - __builtin_popcountl(i0), 31 - offt); + } else if (__WORDSIZE == 32 && is_mask(~i0)) { + offt = __builtin_ctzl(~i0); + RLWINM(r0, r1, 0, 32 - offt, 31 - offt - __builtin_popcountl(~i0)); + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); AND(r0, r1, rn(reg)); @@ -1495,6 +1797,81 @@ _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_xlshr(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0, s0, t2, s2, t3, s3; + jit_word_t over, zero, done, done_over; + s0 = jit_get_reg(jit_class_gpr); + t0 = rn(s0); + if (r0 == r2 || r1 == r2) { + s2 = jit_get_reg(jit_class_gpr); + t2 = rn(s2); + movr(t2, r2); + } + else + t2 = r2; + if (r0 == r3 || r1 == r3) { + s3 = jit_get_reg(jit_class_gpr); + t3 = rn(s3); + movr(t3, r3); + } + else + t3 = r3; + rsbi(t0, t3, __WORDSIZE); + lshr(r0, t2, t3); + if (sign) + rshr(r1, t2, t0); + else + rshr_u(r1, t2, t0); + zero = beqi(_jit->pc.w, t3, 0); + over = beqi(_jit->pc.w, t3, __WORDSIZE); + done = fallback_jmpi(_jit->pc.w); + patch_at(over, _jit->pc.w); + /* overflow */ + movi(r0, 0); + done_over = fallback_jmpi(_jit->pc.w); + /* zero */ + patch_at(zero, _jit->pc.w); + if (sign) + rshi(r1, t2, __WORDSIZE - 1); + else + movi(r1, 0); + patch_at(done, _jit->pc.w); + patch_at(done_over, _jit->pc.w); + jit_unget_reg(s0); + if (t2 != r2) + jit_unget_reg(s2); + if (t3 != r3) + jit_unget_reg(s3); +} + +static void +_xlshi(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + if (i0 == 0) { + movr(r0, r2); + if (sign) + rshi(r1, r2, __WORDSIZE - 1); + else + movi(r1, 0); + } + else if (i0 == __WORDSIZE) { + movr(r1, r2); + movi(r0, 0); + } + else { + assert((jit_uword_t)i0 <= __WORDSIZE); + if (sign) + rshi(r1, r2, __WORDSIZE - i0); + else + rshi_u(r1, r2, __WORDSIZE - i0); + lshi(r0, r2, i0); + } +} + static void _rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -1523,10 +1900,129 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_xrshr(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0, s0, t2, s2, t3, s3; + jit_word_t over, zero, done, done_over; + s0 = jit_get_reg(jit_class_gpr); + t0 = rn(s0); + if (r0 == r2 || r1 == r2) { + s2 = jit_get_reg(jit_class_gpr); + t2 = rn(s2); + movr(t2, r2); + } + else + t2 = r2; + if (r0 == r3 || r1 == r3) { + s3 = jit_get_reg(jit_class_gpr); + t3 = rn(s3); + movr(t3, r3); + } + else + t3 = r3; + rsbi(t0, t3, __WORDSIZE); + if (sign) + rshr(r0, t2, t3); + else + rshr_u(r0, t2, t3); + lshr(r1, t2, t0); + zero = beqi(_jit->pc.w, t3, 0); + over = beqi(_jit->pc.w, t3, __WORDSIZE); + done = jmpi(_jit->pc.w); + patch_at(over, _jit->pc.w); + /* underflow */ + if (sign) + rshi(r0, t2, __WORDSIZE - 1); + else + movi(r0, 0); + done_over = jmpi(_jit->pc.w); + /* zero */ + patch_at(zero, _jit->pc.w); + movi(r1, 0); + patch_at(done, _jit->pc.w); + patch_at(done_over, _jit->pc.w); + jit_unget_reg(s0); + if (t2 != r2) + jit_unget_reg(s2); + if (t3 != r3) + jit_unget_reg(s3); +} + +static void +_xrshi(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + if (i0 == 0) { + movr(r0, r2); + movi(r1, 0); + } + else if (i0 == __WORDSIZE) { + movr(r1, r2); + if (sign) + rshi(r0, r2, __WORDSIZE - 1); + else + movi(r0, 0); + } + else { + assert((jit_uword_t)i0 <= __WORDSIZE); + lshi(r1, r2, __WORDSIZE - i0); + if (sign) + rshi(r0, r2, i0); + else + rshi_u(r0, r2, i0); + } +} + +static void +_lroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(r0, r1); + else { +# if __WORDSIZE == 32 + ROTLWI(r0, r1, i0); +# else + RLDICL(r0, r1, i0, 0); +# endif + } +} + +static void +_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 != r1 && r0 != r2) { + rsbi(r0, r2, __WORDSIZE); + lrotr(r0, r1, r0); + } + else { + reg = jit_get_reg(jit_class_gpr); + rsbi(rn(reg), r2, __WORDSIZE); + lrotr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(r0, r1); + else { +# if __WORDSIZE == 32 + ROTRWI(r0, r1, i0); +# else + RLDICL(r0, r1, 64 - i0, 0); +# endif + } +} + static void _ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPW(r1, r2); + CMPX(r1, r2); MFCR(r0); EXTRWI(r0, r0, 1, CR_LT); } @@ -1536,11 +2032,11 @@ _lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_sign_extend_short_p(i0)) - CMPWI(r1, i0); + CMPXI(r1, i0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - CMPW(r1, rn(reg)); + CMPX(r1, rn(reg)); jit_unget_reg(reg); } MFCR(r0); @@ -1574,7 +2070,7 @@ _lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPW(r1, r2); + CMPX(r1, r2); CRNOT(CR_GT, CR_GT); MFCR(r0); EXTRWI(r0, r0, 1, CR_GT); @@ -1585,11 +2081,11 @@ _lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_sign_extend_short_p(i0)) - CMPWI(r1, i0); + CMPXI(r1, i0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - CMPW(r1, rn(reg)); + CMPX(r1, rn(reg)); jit_unget_reg(reg); } CRNOT(CR_GT, CR_GT); @@ -1626,7 +2122,7 @@ _lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPW(r1, r2); + CMPX(r1, r2); MFCR(r0); EXTRWI(r0, r0, 1, CR_EQ); } @@ -1636,13 +2132,13 @@ _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_sign_extend_short_p(i0)) - CMPWI(r1, i0); + CMPXI(r1, i0); else if (can_zero_extend_short_p(i0)) CMPLWI(r1, i0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - CMPW(r1, rn(reg)); + CMPX(r1, rn(reg)); jit_unget_reg(reg); } MFCR(r0); @@ -1652,7 +2148,7 @@ _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPW(r1, r2); + CMPX(r1, r2); CRNOT(CR_LT, CR_LT); MFCR(r0); EXTRWI(r0, r0, 1, CR_LT); @@ -1663,11 +2159,11 @@ _gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_sign_extend_short_p(i0)) - CMPWI(r1, i0); + CMPXI(r1, i0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - CMPW(r1, rn(reg)); + CMPX(r1, rn(reg)); jit_unget_reg(reg); } CRNOT(CR_LT, CR_LT); @@ -1704,7 +2200,7 @@ _gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPW(r1, r2); + CMPX(r1, r2); MFCR(r0); EXTRWI(r0, r0, 1, CR_GT); } @@ -1714,11 +2210,11 @@ _gti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_sign_extend_short_p(i0)) - CMPWI(r1, i0); + CMPXI(r1, i0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - CMPW(r1, rn(reg)); + CMPX(r1, rn(reg)); jit_unget_reg(reg); } MFCR(r0); @@ -1752,7 +2248,7 @@ _gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPW(r1, r2); + CMPX(r1, r2); CRNOT(CR_EQ, CR_EQ); MFCR(r0); EXTRWI(r0, r0, 1, CR_EQ); @@ -1763,13 +2259,13 @@ _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_sign_extend_short_p(i0)) - CMPWI(r1, i0); + CMPXI(r1, i0); else if (can_zero_extend_short_p(i0)) CMPLWI(r1, i0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - CMPW(r1, rn(reg)); + CMPX(r1, rn(reg)); jit_unget_reg(reg); } CRNOT(CR_EQ, CR_EQ); @@ -1781,7 +2277,7 @@ static jit_word_t _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t d, w; - CMPW(r0, r1); + CMPX(r0, r1); w = _jit->pc.w; d = (i0 - w) & ~3; BLT(d); @@ -1794,11 +2290,11 @@ _blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; jit_word_t d, w; if (can_sign_extend_short_p(i1)) - CMPWI(r0, i1); + CMPXI(r0, i1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); - CMPW(r0, rn(reg)); + CMPX(r0, rn(reg)); jit_unget_reg(reg); } w = _jit->pc.w; @@ -1841,7 +2337,7 @@ static jit_word_t _bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t d, w; - CMPW(r0, r1); + CMPX(r0, r1); w = _jit->pc.w; d = (i0 - w) & ~3; BLE(d); @@ -1854,11 +2350,11 @@ _blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; jit_word_t d, w; if (can_sign_extend_short_p(i1)) - CMPWI(r0, i1); + CMPXI(r0, i1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); - CMPW(r0, rn(reg)); + CMPX(r0, rn(reg)); jit_unget_reg(reg); } w = _jit->pc.w; @@ -1901,7 +2397,7 @@ static jit_word_t _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t d, w; - CMPW(r0, r1); + CMPX(r0, r1); w = _jit->pc.w; d = (i0 - w) & ~3; BEQ(d); @@ -1914,13 +2410,13 @@ _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; jit_word_t d, w; if (can_sign_extend_short_p(i1)) - CMPWI(r0, i1); + CMPXI(r0, i1); else if (can_zero_extend_short_p(i1)) CMPLWI(r0, i1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); - CMPW(r0, rn(reg)); + CMPX(r0, rn(reg)); jit_unget_reg(reg); } w = _jit->pc.w; @@ -1933,7 +2429,7 @@ static jit_word_t _bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t d, w; - CMPW(r0, r1); + CMPX(r0, r1); w = _jit->pc.w; d = (i0 - w) & ~3; BGE(d); @@ -1946,11 +2442,11 @@ _bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; jit_word_t d, w; if (can_sign_extend_short_p(i1)) - CMPWI(r0, i1); + CMPXI(r0, i1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); - CMPW(r0, rn(reg)); + CMPX(r0, rn(reg)); jit_unget_reg(reg); } w = _jit->pc.w; @@ -1993,7 +2489,7 @@ static jit_word_t _bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t d, w; - CMPW(r0, r1); + CMPX(r0, r1); w = _jit->pc.w; d = (i0 - w) & ~3; BGT(d); @@ -2006,11 +2502,11 @@ _bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; jit_word_t d, w; if (can_sign_extend_short_p(i1)) - CMPWI(r0, i1); + CMPXI(r0, i1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); - CMPW(r0, rn(reg)); + CMPX(r0, rn(reg)); jit_unget_reg(reg); } w = _jit->pc.w; @@ -2053,7 +2549,7 @@ static jit_word_t _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t d, w; - CMPW(r0, r1); + CMPX(r0, r1); w = _jit->pc.w; d = (i0 - w) & ~3; BNE(d); @@ -2066,13 +2562,13 @@ _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; jit_word_t d, w; if (can_sign_extend_short_p(i1)) - CMPWI(r0, i1); + CMPXI(r0, i1); else if (can_zero_extend_short_p(i1)) CMPLWI(r0, i1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); - CMPW(r0, rn(reg)); + CMPX(r0, rn(reg)); jit_unget_reg(reg); } w = _jit->pc.w; @@ -2645,9 +3141,9 @@ _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) jit_bool_t inv; jit_int32_t reg; jit_word_t lo, hi; - if (can_sign_extend_short_p(i0)) + if (can_sign_extend_short_p(i0) && !(i0 & 3)) LWA(r0, _R0_REGNO, i0); - else if (can_sign_extend_int_p(i0)) { + else if (can_sign_extend_int_p(i0) && !(i0 & 3)) { hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15)); lo = (jit_int16_t)(i0 - (hi << 16)); reg = jit_get_reg(jit_class_gpr); @@ -2671,7 +3167,7 @@ _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) jit_int32_t reg; if (r1 == _R0_REGNO) { if (r2 != _R0_REGNO) - LWZX(r0, r2, r1); + LWAX(r0, r2, r1); else { reg = jit_get_reg(jit_class_gpr); movr(rn(reg), r1); @@ -2680,7 +3176,7 @@ _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } } else - LWZX(r0, r1, r2); + LWAX(r0, r1, r2); } static void @@ -2689,7 +3185,7 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_int32_t reg; if (i0 == 0) ldr_i(r0, r1); - else if (can_sign_extend_short_p(i0)) { + else if (can_sign_extend_short_p(i0) && !(i0 & 3)) { if (r1 == _R0_REGNO) { reg = jit_get_reg(jit_class_gpr); movr(rn(reg), r1); @@ -2781,9 +3277,9 @@ _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) jit_bool_t inv; jit_int32_t reg; jit_word_t lo, hi; - if (can_sign_extend_short_p(i0)) + if (can_sign_extend_short_p(i0) && !(i0 & 3)) LD(r0, _R0_REGNO, i0); - else if (can_sign_extend_int_p(i0)) { + else if (can_sign_extend_int_p(i0) && !(i0 & 3)) { hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15)); lo = (jit_int16_t)(i0 - (hi << 16)); reg = jit_get_reg(jit_class_gpr); @@ -2825,7 +3321,7 @@ _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_int32_t reg; if (i0 == 0) ldr_l(r0, r1); - else if (can_sign_extend_short_p(i0)) { + else if (can_sign_extend_short_p(i0) && !(i0 & 3)) { if (r1 == _R0_REGNO) { reg = jit_get_reg(jit_class_gpr); movr(rn(reg), r1); @@ -3055,9 +3551,9 @@ _sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) jit_bool_t inv; jit_int32_t reg; jit_word_t lo, hi; - if (can_sign_extend_short_p(i0)) + if (can_sign_extend_short_p(i0) && !(i0 & 3)) STD(r0, _R0_REGNO, i0); - else if (can_sign_extend_int_p(i0)) { + else if (can_sign_extend_int_p(i0) && !(i0 & 3)) { hi = (jit_int16_t)((i0 >> 16) + ((jit_uint16_t)i0 >> 15)); lo = (jit_int16_t)(i0 - (hi << 16)); reg = jit_get_reg(jit_class_gpr); @@ -3099,7 +3595,7 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) jit_int32_t reg; if (i0 == 0) str_l(r0, r1); - else if (can_sign_extend_short_p(i0)) { + else if (can_sign_extend_short_p(i0) && !(i0 & 3)) { if (r0 == _R0_REGNO) { reg = jit_get_reg(jit_class_gpr); movr(rn(reg), i0); @@ -3200,21 +3696,28 @@ _callr(jit_state_t *_jit, jit_int32_t r0 } /* assume fixed address or reachable address */ -static void +static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0 # if _CALL_SYSV , jit_int32_t varargs # endif ) { + jit_word_t w; # if _CALL_SYSV jit_word_t d; - d = (i0 - _jit->pc.w) & ~3; - if (can_sign_extend_jump_p(d)) + d = (i0 - _jit->pc.w - !!varargs * 4) & ~3; + if (can_sign_extend_jump_p(d)) { + /* Tell double arguments were passed in registers. */ + if (varargs) + CREQV(6, 6, 6); + w = _jit->pc.w; BL(d); + } else # endif { + w = _jit->pc.w; movi(_R12_REGNO, i0); callr(_R12_REGNO # if _CALL_SYSV @@ -3222,6 +3725,7 @@ _calli(jit_state_t *_jit, jit_word_t i0 # endif ); } + return (w); } /* absolute jump */ @@ -3242,20 +3746,15 @@ _calli_p(jit_state_t *_jit, jit_word_t i0 return (w); } -/* order is not guaranteed to be sequential */ -static jit_int32_t save[] = { - _R14, _R15, _R16, _R17, _R18, _R19, _R20, _R21, _R22, - _R23, _R24, _R25, _R26, _R27, _R28, _R29, _R30, _R31, -}; - static void _prolog(jit_state_t *_jit, jit_node_t *node) { - unsigned long regno; + jit_int32_t regno; jit_word_t offset; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; + jit_check_frame(); assert(_jitc->function->self.aoff >= frame); if (_jitc->function->assume_frame) return; @@ -3269,36 +3768,66 @@ _prolog(jit_state_t *_jit, jit_node_t *node) _jitc->function->self.size - _jitc->function->self.aoff) + 15) & -16; - /* return address */ - MFLR(_R0_REGNO); + if (_jitc->function->need_frame) + _jitc->function->need_stack = _jitc->function->need_return = 1; + + if (!_jitc->function->need_stack) { + for (regno = 0; regno < jit_size(iregs); regno++) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[regno])) { + _jitc->function->need_stack = + _jitc->function->need_return = 1; + break; + } + } + if (!_jitc->function->need_stack) { + for (offset = 0; offset < jit_size(fregs); offset++) { + if (jit_regset_tstbit(&_jitc->function->regset, fregs[offset])) { + _jitc->function->need_stack = + _jitc->function->need_return = 1; + break; + } + } + } + } /* params >= %r31+params_offset+(8*sizeof(jit_word_t)) * alloca < %r31-80 */ + /* return address */ + if (_jitc->function->need_return) { + MFLR(_R0_REGNO); #if _CALL_SYSV - stxi(sizeof(jit_word_t), _SP_REGNO, _R0_REGNO); + stxi(sizeof(jit_word_t), _SP_REGNO, _R0_REGNO); #else - stxi(sizeof(void*) * 2, _SP_REGNO, _R0_REGNO); + stxi(sizeof(void*) * 2, _SP_REGNO, _R0_REGNO); #endif - offset = -gpr_save_area; - for (regno = 0; regno < jit_size(save); regno++, offset += sizeof(void*)) { - if (jit_regset_tstbit(&_jitc->function->regset, save[regno])) - stxi(offset, _SP_REGNO, rn(save[regno])); - } - for (offset = 0; offset < 8; offset++) { - if (jit_regset_tstbit(&_jitc->function->regset, _F14 + offset)) - stxi_d(-(gpr_save_area + 8 + offset * 8), - _SP_REGNO, rn(_F14 + offset)); } - stxi(-(sizeof(void*)), _SP_REGNO, _FP_REGNO); + if (_jitc->function->need_stack) { + offset = -gpr_save_area; + for (regno = 0; regno < jit_size(iregs); + regno++, offset += sizeof(void*)) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[regno])) + stxi(offset, _SP_REGNO, rn(iregs[regno])); + } + for (offset = 0; offset < jit_size(fregs); offset++) { + if (jit_regset_tstbit(&_jitc->function->regset, fregs[offset])) + stxi_d(-(gpr_save_area + 8 + offset * 8), + _SP_REGNO, rn(fregs[offset])); + } + } - movr(_FP_REGNO, _SP_REGNO); + if (_jitc->function->need_frame) { + stxi(-(sizeof(void*)), _SP_REGNO, _FP_REGNO); + movr(_FP_REGNO, _SP_REGNO); + } + if (_jitc->function->need_stack) { #if __WORDSIZE == 32 - STWU(_SP_REGNO, _SP_REGNO, -_jitc->function->stack); + STWU(_SP_REGNO, _SP_REGNO, -_jitc->function->stack); #else - STDU(_SP_REGNO, _SP_REGNO, -_jitc->function->stack); + STDU(_SP_REGNO, _SP_REGNO, -_jitc->function->stack); #endif + } if (_jitc->function->allocar) { regno = jit_get_reg(jit_class_gpr); @@ -3329,33 +3858,44 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { - unsigned long regno; + jit_int32_t regno; jit_word_t offset; if (_jitc->function->assume_frame) return; - if (_jitc->function->allocar) - ldr(_SP_REGNO, _SP_REGNO); - else - addi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); + if (_jitc->function->need_stack) { + if (_jitc->function->allocar) + ldr(_SP_REGNO, _SP_REGNO); + else + addi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); + } + + if (_jitc->function->need_return) { #if _CALL_SYSV - ldxi(_R0_REGNO, _SP_REGNO, sizeof(jit_word_t)); + ldxi(_R0_REGNO, _SP_REGNO, sizeof(jit_word_t)); #else - ldxi(_R0_REGNO, _SP_REGNO, sizeof(void*) * 2); + ldxi(_R0_REGNO, _SP_REGNO, sizeof(void*) * 2); #endif - offset = -gpr_save_area; - for (regno = 0; regno < jit_size(save); regno++, offset += sizeof(void*)) { - if (jit_regset_tstbit(&_jitc->function->regset, save[regno])) - ldxi(rn(save[regno]), _SP_REGNO, offset); } - for (offset = 0; offset < 8; offset++) { - if (jit_regset_tstbit(&_jitc->function->regset, _F14 + offset)) - ldxi_d(rn(_F14 + offset), _SP_REGNO, - -(gpr_save_area + 8 + offset * 8)); + + if (_jitc->function->need_stack) { + offset = -gpr_save_area; + for (regno = 0; regno < jit_size(iregs); + regno++, offset += sizeof(void*)) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[regno])) + ldxi(rn(iregs[regno]), _SP_REGNO, offset); + } + for (offset = 0; offset < 8; offset++) { + if (jit_regset_tstbit(&_jitc->function->regset, fregs[offset])) + ldxi_d(rn(fregs[offset]), _SP_REGNO, + -(gpr_save_area + 8 + offset * 8)); + } } - MTLR(_R0_REGNO); - ldxi(_FP_REGNO, _SP_REGNO, -(sizeof(void*))); + if (_jitc->function->need_return) + MTLR(_R0_REGNO); + if (_jitc->function->need_frame) + ldxi(_FP_REGNO, _SP_REGNO, -(sizeof(void*))); BLR(); } @@ -3545,7 +4085,7 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) if (!can_sign_extend_short_p(d)) { /* use absolute address */ assert(can_sign_extend_short_p(label)); - d |= 2; + d = label | 2; } u.i[0] = (u.i[0] & ~0xfffd) | (d & 0xfffe); break; @@ -3573,9 +4113,9 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) if (!can_sign_extend_jump_p(d)) { /* use absolute address */ assert(can_sign_extend_jump_p(label)); - d |= 2; + d = label | 2; } - u.i[0] = (u.i[0] & ~0x3fffffd) | (d & 0x3fffffe); + u.i[0] = (u.i[0] & ~0x3fffffc) | (d & 0x3fffffd); break; case 15: /* LI */ #if __WORDSIZE == 32