From: notaz Date: Sat, 30 Jun 2007 22:24:39 +0000 (+0000) Subject: some optimizations X-Git-Tag: v1.85~715 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b637c56aadc5b854ca28a8a111cf07324bc0e2ad;p=picodrive.git some optimizations git-svn-id: file:///home/notaz/opt/svn/PicoDrive@178 be3aeb3a-fb24-0410-a615-afba39da0efa --- diff --git a/cpu/Cyclone/Cyclone.h b/cpu/Cyclone/Cyclone.h index 67704fcb..596cb99f 100644 --- a/cpu/Cyclone/Cyclone.h +++ b/cpu/Cyclone/Cyclone.h @@ -24,7 +24,7 @@ struct Cyclone unsigned char irq; // [r7,#0x47] IRQ level unsigned int osp; // [r7,#0x48] Other Stack Pointer (USP/SSP) unsigned int vector; // [r7,#0x4c] IRQ vector (temporary) - unsigned int prev_pc;// [r7,#0x50] set to start address of currently executed opcode (if enabled in config.h) + unsigned int prev_pc;// [r7,#0x50] set to start address of currently executed opcode + 2 (if enabled in config.h) unsigned int unused; // [r7,#0x54] Unused int stopped; // [r7,#0x58] 1 == processor is in stopped state int cycles; // [r7,#0x5c] diff --git a/cpu/Cyclone/Ea.cpp b/cpu/Cyclone/Ea.cpp index 39ed8a25..22e59395 100644 --- a/cpu/Cyclone/Ea.cpp +++ b/cpu/Cyclone/Ea.cpp @@ -123,7 +123,7 @@ static int EaCalcReg(int r,int ea,int mask,int forceor,int shift,int noshift=0) // EaCalc - ARM Register 'a' = Effective Address // Trashes r0,r2 and r3 // size values 0, 1, 2 ~ byte, word, long -int EaCalc(int a,int mask,int ea,int size,int top) +int EaCalc(int a,int mask,int ea,int size,int top,int sign_extend) { char text[32]=""; int func=0; @@ -134,7 +134,7 @@ int EaCalc(int a,int mask,int ea,int size,int top) if (ea<0x10) { int noshift=0; - if (size>=2||(size==0&&top)) noshift=1; // Saves one opcode + if (size>=2||(size==0&&(top||!sign_extend))) noshift=1; // Saves one opcode ot(";@ EaCalc : Get register index into r%d:\n",a); @@ -180,8 +180,8 @@ int EaCalc(int a,int mask,int ea,int size,int top) if (ea<0x30) // ($nn,An) (di) { - EaCalcReg(2,8,mask,0,0); ot(" ldrsh r0,[r4],#2 ;@ Fetch offset\n"); pc_dirty=1; + EaCalcReg(2,8,mask,0,0); ot(" ldr r2,[r7,r2,lsl #2]\n"); ot(" add r%d,r0,r2 ;@ Add on offset\n",a); Cycles+=size<2 ? 8:12; // Extra cycles @@ -277,9 +277,10 @@ int EaCalc(int a,int mask,int ea,int size,int top) // 'a' and 'v' can be anything but 0 is generally best (for both) // If (ea<0x10) nothing is trashed, else r0-r3 is trashed // If 'top' is given, the ARM register v shifted to the top, e.g. 0xc000 -> 0xc0000000 -// Otherwise the ARM register v is sign extended, e.g. 0xc000 -> 0xffffc000 +// If top is 0 and sign_extend is not, then ARM register v is sign extended, +// e.g. 0xc000 -> 0xffffc000 (else it may or may not be sign extended) -int EaRead(int a,int v,int ea,int size,int mask,int top) +int EaRead(int a,int v,int ea,int size,int mask,int top,int sign_extend) { char text[32]=""; int shift=0; @@ -291,7 +292,7 @@ int EaRead(int a,int v,int ea,int size,int mask,int top) if (ea<0x10) { int lsl=0,low=0,i; - if (size>=2||(size==0&&top)) { + if (size>=2||(size==0&&(top||!sign_extend))) { if(mask) for (i=mask|0x8000; (i&1)==0; i>>=1) low++; // Find out how high up the EA mask is lsl=2-low; // Having a lsl #2 here saves one opcode @@ -316,18 +317,35 @@ int EaRead(int a,int v,int ea,int size,int mask,int top) if (top) asl=shift; - if (v!=a || asl) ot(" mov r%d,r%d,asl #%d\n",v,a,asl); + if (asl) ot(" mov r%d,r%d,asl #%d\n",v,a,asl); + else if (v!=a) ot(" mov r%d,r%d\n",v,a); ot("\n"); return 0; } if (ea>=0x3a && ea<=0x3b) MemHandler(2,size,a); // Fetch else MemHandler(0,size,a); // Read - if (v!=0 || shift) { - if (shift) ot(" mov r%d,r0,asl #%d\n",v,shift); - else ot(" mov r%d,r0\n",v); + if (sign_extend) + { + int d_reg=0; + if (shift) { + ot(" mov r%d,r%d,asl #%d\n",v,d_reg,shift); + d_reg=v; + } + if (!top && shift) { + ot(" mov r%d,r%d,asr #%d\n",v,d_reg,shift); + d_reg=v; + } + if (d_reg != v) + ot(" mov r%d,r%d\n",v,d_reg); + } + else + { + if (top && shift) + ot(" mov r%d,r0,asl #%d\n",v,shift); + else if (v!=0) + ot(" mov r%d,r0\n",v); } - if (top==0 && shift) ot(" mov r%d,r%d,asr #%d\n",v,v,shift); ot("\n"); return 0; } @@ -352,7 +370,7 @@ int EaCanRead(int ea,int size) // Write effective address (ARM Register 'a') with ARM register 'v' // Trashes r0-r3,r12,lr; 'a' can be 0 or 2+, 'v' can be 1 or higher // If a==0 and v==1 it's faster though. -int EaWrite(int a,int v,int ea,int size,int mask,int top) +int EaWrite(int a,int v,int ea,int size,int mask,int top,int sign_extend_ea) { char text[32]=""; int shift=0; @@ -366,7 +384,7 @@ int EaWrite(int a,int v,int ea,int size,int mask,int top) if (ea<0x10) { int lsl=0,low=0,i; - if (size>=2||(size==0&&top)) { + if (size>=2||(size==0&&(top||!sign_extend_ea))) { if(mask) for (i=mask|0x8000; (i&1)==0; i>>=1) low++; // Find out how high up the EA mask is lsl=2-low; // Having a lsl #x here saves one opcode @@ -386,7 +404,8 @@ int EaWrite(int a,int v,int ea,int size,int mask,int top) if (ea==0x3c) { ot("Error! Write EA=0x%x\n\n",ea); return 1; } - if (v!=1 || shift) ot(" mov r1,r%d,asr #%d\n",v,shift); + if (shift) ot(" mov r1,r%d,asr #%d\n",v,shift); + else if (v!=1) ot(" mov r1,r%d\n",v); MemHandler(1,size,a); // Call write handler diff --git a/cpu/Cyclone/Main.cpp b/cpu/Cyclone/Main.cpp index 80bbfd8e..fce0c3d4 100644 --- a/cpu/Cyclone/Main.cpp +++ b/cpu/Cyclone/Main.cpp @@ -10,6 +10,7 @@ char *Narm[4]={ "b", "h","",""}; // Normal ARM Extensions for operand sizes 0,1, char *Sarm[4]={"sb","sh","",""}; // Sign-extend ARM Extensions for operand sizes 0,1,2 int Cycles; // Current cycles for opcode int pc_dirty; // something changed PC during processing +static int arm_op_count; void ot(const char *format, ...) @@ -22,6 +23,9 @@ void ot(const char *format, ...) for(i=0, len=strlen(format); i < len && format[i] != '\n'; i++); if(i < len-1 && format[len-1] != '\n') printf("\nWARNING: possible improper newline placement:\n%s\n", format); + if (format[0] == ' ' && format[1] == ' ' && format[2] != ' ' && format[2] != '.') + arm_op_count++; + va_start(valist,format); if (AsmFile) vfprintf(AsmFile,format,valist); va_end(valist); @@ -441,7 +445,7 @@ int MemHandler(int type,int size,int addrreg) static void PrintOpcodes() { int op=0; - + printf("Creating Opcodes: ["); ot(";@ ---------------------------- Opcodes ---------------------------\n"); @@ -713,7 +717,9 @@ static int CycloneMake() ot("\n"); PrintFramework(); + arm_op_count = 0; PrintOpcodes(); + printf("~%i ARM instructions used for opcode handlers\n", arm_op_count); PrintJumpTable(); if (ms) ot(" END\n"); diff --git a/cpu/Cyclone/OpAny.cpp b/cpu/Cyclone/OpAny.cpp index c1f71102..3a401668 100644 --- a/cpu/Cyclone/OpAny.cpp +++ b/cpu/Cyclone/OpAny.cpp @@ -33,8 +33,7 @@ void OpStart(int op, int sea, int tea) #if (MEMHANDLERS_NEED_PREV_PC || MEMHANDLERS_NEED_CYCLES) if ((sea >= 0x10 && sea != 0x3c) || (tea >= 0x10 && tea != 0x3c)) { #if MEMHANDLERS_NEED_PREV_PC - ot(" sub r0,r4,#2\n"); - ot(" str r0,[r7,#0x50] ;@ Save prev PC\n"); + ot(" str r4,[r7,#0x50] ;@ Save prev PC + 2\n"); #endif #if MEMHANDLERS_NEED_CYCLES ot(" str r5,[r7,#0x5c] ;@ Save Cycles\n"); diff --git a/cpu/Cyclone/OpArith.cpp b/cpu/Cyclone/OpArith.cpp index c6f0bc83..9e956c04 100644 --- a/cpu/Cyclone/OpArith.cpp +++ b/cpu/Cyclone/OpArith.cpp @@ -8,6 +8,7 @@ int OpArith(int op) int type=0,size=0; int sea=0,tea=0; int use=0; + char *shiftstr=""; // Get source and target EA type=(op>>9)&7; if (type==4 || type>=7) return 1; @@ -26,17 +27,20 @@ int OpArith(int op) EaCalc(10,0x0000, sea,size,1); EaCalc(11,0x003f, tea,size,1); - EaRead(10, 10, sea,size,0,1); + EaRead(10, 10, sea,size,0,0,0); EaRead(11, 0, tea,size,0x003f,1); + if (size==0) shiftstr=",asl #24"; + else if (size==1) shiftstr=",asl #16"; + ot(";@ Do arithmetic:\n"); - if (type==0) ot(" orr r1,r0,r10\n"); - if (type==1) ot(" and r1,r0,r10\n"); - if (type==2) ot(" subs r1,r0,r10 ;@ Defines NZCV\n"); - if (type==3) ot(" adds r1,r0,r10 ;@ Defines NZCV\n"); - if (type==5) ot(" eor r1,r0,r10\n"); - if (type==6) ot(" cmp r0,r10 ;@ Defines NZCV\n"); + if (type==0) ot(" orr r1,r0,r10%s\n",shiftstr); + if (type==1) ot(" and r1,r0,r10%s\n",shiftstr); + if (type==2) ot(" subs r1,r0,r10%s ;@ Defines NZCV\n",shiftstr); + if (type==3) ot(" adds r1,r0,r10%s ;@ Defines NZCV\n",shiftstr); + if (type==5) ot(" eor r1,r0,r10%s\n",shiftstr); + if (type==6) ot(" cmp r0,r10%s ;@ Defines NZCV\n",shiftstr); if (type<2 || type==5) ot(" adds r1,r1,#0 ;@ Defines NZ, clears CV\n"); // 0,1,5 @@ -523,19 +527,20 @@ int OpAritha(int op) if(type==1) Cycles=6; - EaCalc ( 0,0x003f, sea,size); - EaRead ( 0, 10, sea,size,0x003f); + // must calculate reg EA first, because of situations like: suba.w (A0)+, A0 + EaCalc (10,0x0e00, dea,2,1); + EaRead (10, 11, dea,2,0x0e00); - EaCalc ( 0,0x0e00, dea,2,1); - EaRead ( 0, 1, dea,2,0x0e00); + EaCalc ( 0,0x003f, sea,size); + EaRead ( 0, 0, sea,size,0x003f); - if (type==0) ot(" sub r1,r1,r10\n"); - if (type==1) ot(" cmp r1,r10 ;@ Defines NZCV\n"); + if (type==0) ot(" sub r11,r11,r0\n"); + if (type==1) ot(" cmp r11,r0 ;@ Defines NZCV\n"); if (type==1) OpGetFlags(1,0); // Get Cmp flags - if (type==2) ot(" add r1,r1,r10\n"); + if (type==2) ot(" add r11,r11,r0\n"); ot("\n"); - if (type!=1) EaWrite( 0, 1, dea,2,0x0e00,1); + if (type!=1) EaWrite(10, 11, dea,2,0x0e00,1); OpEnd(sea); diff --git a/cpu/Cyclone/OpLogic.cpp b/cpu/Cyclone/OpLogic.cpp index 49ed29f8..207ff456 100644 --- a/cpu/Cyclone/OpLogic.cpp +++ b/cpu/Cyclone/OpLogic.cpp @@ -36,15 +36,19 @@ int OpBtstReg(int op) if(size>=2) Cycles+=2; } - EaCalc (0,0x0e00,sea,0); - EaRead (0, 0,sea,0,0x0e00); + EaCalc (10,0x0e00,sea,0,0,0); + EaRead (10, 10,sea,0,0x0e00,0,0); + + EaCalc ( 0,0x003f,tea,size,0,0); + if (type>0) + ot(" mov r11,r0\n"); + EaRead ( 0, 0,tea,size,0x003f,0,0); + if (tea>=0x10) - ot(" and r10,r0,#7 ;@ mem - do mod 8\n"); - else ot(" and r10,r0,#31 ;@ reg - do mod 32\n"); + ot(" and r10,r10,#7 ;@ mem - do mod 8\n"); // size always 0 + else ot(" and r10,r10,#31 ;@ reg - do mod 32\n"); // size always 2 ot("\n"); - EaCalc(11,0x003f,tea,size); - EaRead(11, 0,tea,size,0x003f); ot(" mov r1,#1\n"); ot(" tst r0,r1,lsl r10 ;@ Do arithmetic\n"); ot(" bicne r9,r9,#0x40000000\n"); @@ -57,7 +61,7 @@ int OpBtstReg(int op) if (type==2) ot(" bic r1,r0,r1,lsl r10 ;@ Clear bit\n"); if (type==3) ot(" orr r1,r0,r1,lsl r10 ;@ Set bit\n"); ot("\n"); - EaWrite(11, 1,tea,size,0x003f); + EaWrite(11, 1,tea,size,0x003f,0,0); } OpEnd(tea); @@ -90,14 +94,14 @@ int OpBtstImm(int op) OpStart(op,sea,tea); - ot(" mov r10,#1\n"); ot("\n"); - EaCalc ( 0,0x0000,sea,0); - EaRead ( 0, 0,sea,0,0); + EaCalc ( 0,0x0000,sea,0,0,0); + EaRead ( 0, 0,sea,0,0,0,0); + ot(" mov r10,#1\n"); ot(" bic r9,r9,#0x40000000 ;@ Blank Z flag\n"); if (tea>=0x10) - ot(" and r0,r0,#7 ;@ mem - do mod 8\n"); - else ot(" and r0,r0,#0x1F ;@ reg - do mod 32\n"); + ot(" and r0,r0,#7 ;@ mem - do mod 8\n"); // size always 0 + else ot(" and r0,r0,#0x1F ;@ reg - do mod 32\n"); // size always 2 ot(" mov r10,r10,lsl r0 ;@ Make bit mask\n"); ot("\n"); @@ -108,8 +112,8 @@ int OpBtstImm(int op) if(size>=2) Cycles+=2; } - EaCalc (11,0x003f,tea,size); - EaRead (11, 0,tea,size,0x003f); + EaCalc (11,0x003f,tea,size,0,0); + EaRead (11, 0,tea,size,0x003f,0,0); ot(" tst r0,r10 ;@ Do arithmetic\n"); ot(" orreq r9,r9,#0x40000000 ;@ Get Z flag\n"); ot("\n"); @@ -120,7 +124,7 @@ int OpBtstImm(int op) if (type==2) ot(" bic r1,r0,r10 ;@ Clear bit\n"); if (type==3) ot(" orr r1,r0,r10 ;@ Set bit\n"); ot("\n"); - EaWrite(11, 1,tea,size,0x003f); + EaWrite(11, 1,tea,size,0x003f,0,0); } OpEnd(sea,tea); @@ -156,16 +160,16 @@ int OpNeg(int op) #endif } - EaCalc (10,0x003f,ea,size); + EaCalc (10,0x003f,ea,size,0,0); - if (type!=1) EaRead (10,0,ea,size,0x003f); // Don't need to read for 'clr' + if (type!=1) EaRead (10,0,ea,size,0x003f,0,0); // Don't need to read for 'clr' (or do we, for dummy read?) if (type==1) ot("\n"); if (type==0) { ot(";@ Negx:\n"); GetXBit(1); - if(size!=2) ot(" mov r0,r0,lsl #%i\n",size?16:24); + if(size!=2) ot(" mov r0,r0,asl #%i\n",size?16:24); ot(" rscs r1,r0,#0 ;@ do arithmetic\n"); ot(" orr r3,r9,#0xb0000000 ;@ for old Z\n"); OpGetFlags(1,1,0); @@ -188,7 +192,7 @@ int OpNeg(int op) if (type==2) { ot(";@ Neg:\n"); - if(size!=2) ot(" mov r0,r0,lsl #%i\n",size?16:24); + if(size!=2) ot(" mov r0,r0,asl #%i\n",size?16:24); ot(" rsbs r1,r0,#0\n"); OpGetFlags(1,1); if(size!=2) ot(" mov r1,r1,asr #%i\n",size?16:24); @@ -198,13 +202,18 @@ int OpNeg(int op) if (type==3) { ot(";@ Not:\n"); - ot(" mvn r1,r0\n"); + if(size!=2) { + ot(" mov r0,r0,asl #%i\n",size?16:24); + ot(" mvn r1,r0,asr #%i\n",size?16:24); + } + else + ot(" mvn r1,r0\n"); ot(" adds r1,r1,#0 ;@ Defines NZ, clears CV\n"); OpGetFlags(0,0); ot("\n"); } - EaWrite(10, 1,ea,size,0x003f); + EaWrite(10, 1,ea,size,0x003f,0,0); OpEnd(ea); @@ -284,8 +293,8 @@ int OpExt(int op) OpStart(op); Cycles=4; - EaCalc (10,0x0007,ea,size+1); - EaRead (10, 0,ea,size+1,0x0007); + EaCalc (10,0x0007,ea,size+1,0,0); + EaRead (10, 0,ea,size+1,0x0007,0,0); ot(" mov r0,r0,asl #%d\n",shift); ot(" adds r0,r0,#0 ;@ Defines NZ, clears CV\n"); @@ -293,7 +302,7 @@ int OpExt(int op) ot(" mov r1,r0,asr #%d\n",shift); ot("\n"); - EaWrite(10, 1,ea,size+1,0x0007); + EaWrite(10, 1,ea,size+1,0x0007,0,0); OpEnd(); return 0; @@ -325,22 +334,39 @@ int OpSet(int op) OpStart(op,ea); Cycles=8; if (ea<8) Cycles=4; - ot(" mov r1,#0\n"); + if (cc) + ot(" mov r1,#0\n"); - if (cc!=1) + switch (cc) { - ot(";@ Is the condition true?\n"); - if ((cc&~1)==2) ot(" eor r9,r9,#0x20000000 ;@ Invert carry for hi/ls\n"); - ot(" msr cpsr_flg,r9 ;@ ARM flags = 68000 flags\n"); - if ((cc&~1)==2) ot(" eor r9,r9,#0x20000000 ;@ Invert carry for hi/ls\n"); - ot(" mvn%s r1,r1\n",cond[cc]); + case 0: // T + ot(" mvn r1,#0\n"); + if (ea<8) Cycles+=2; + break; + case 1: // F + break; + case 2: // hi + ot(" ands r0,r9,#0x60000000 ;@ hi: !C && !Z\n"); + ot(" mvneq r1,r1\n"); + if (ea<8) ot(" subeq r5,r5,#2 ;@ Extra cycles\n"); + break; + case 3: // ls + ot(" tst r9,#0x60000000 ;@ ls: C || Z\n"); + ot(" mvnne r1,r1\n"); + if (ea<8) ot(" subne r5,r5,#2 ;@ Extra cycles\n"); + break; + default: + ot(";@ Is the condition true?\n"); + ot(" msr cpsr_flg,r9 ;@ ARM flags = 68000 flags\n"); + ot(" mvn%s r1,r1\n",cond[cc]); + if (ea<8) ot(" sub%s r5,r5,#2 ;@ Extra cycles\n",cond[cc]); + break; } - if (cc!=1 && ea<8) ot(" sub%s r5,r5,#2 ;@ Extra cycles\n",cond[cc]); ot("\n"); - EaCalc (0,0x003f, ea,size); - EaWrite(0, 1, ea,size,0x003f); + EaCalc (0,0x003f, ea,size,0,0); + EaWrite(0, 1, ea,size,0x003f,0,0); OpEnd(ea); return 0; diff --git a/cpu/Cyclone/OpMove.cpp b/cpu/Cyclone/OpMove.cpp index 95e30278..8dc5be69 100644 --- a/cpu/Cyclone/OpMove.cpp +++ b/cpu/Cyclone/OpMove.cpp @@ -130,19 +130,19 @@ int OpMove(int op) if (movea) size=2; // movea always expands to 32-bits - EaCalc (0,0x0e00,tea,size); + EaCalc (0,0x0e00,tea,size,0,0); #if SPLIT_MOVEL_PD if ((tea&0x38)==0x20 && size==2) { // -(An) ot(" mov r10,r0\n"); ot(" mov r11,r1\n"); ot(" add r0,r0,#2\n"); - EaWrite(0, 1,tea,1,0x0e00); + EaWrite(0, 1,tea,1,0x0e00,0,0); EaWrite(10, 11,tea,1,0x0e00,1); } else { - EaWrite(0, 1,tea,size,0x0e00); + EaWrite(0, 1,tea,size,0x0e00,0,0); } #else - EaWrite(0, 1,tea,size,0x0e00); + EaWrite(0, 1,tea,size,0x0e00,0,0); #endif #if CYCLONE_FOR_GENESIS && !MEMHANDLERS_CHANGE_CYCLES @@ -224,14 +224,14 @@ int OpMoveSr(int op) if (type==0 || type==1) { OpFlagsToReg(type==0); - EaCalc (0,0x003f,ea,size); - EaWrite(0, 1,ea,size,0x003f); + EaCalc (0,0x003f,ea,size,0,0); + EaWrite(0, 1,ea,size,0x003f,0,0); } if (type==2 || type==3) { - EaCalc(0,0x003f,ea,size); - EaRead(0, 0,ea,size,0x003f); + EaCalc(0,0x003f,ea,size,0,0); + EaRead(0, 0,ea,size,0x003f,0,0); OpRegToFlags(type==3); if (type==3) { SuperChange(op); @@ -264,8 +264,8 @@ int OpArithSr(int op) if (size) SuperCheck(op); - EaCalc(0,0x003f,ea,size); - EaRead(0, 10,ea,size,0x003f); + EaCalc(10,0x003f,ea,size); + EaRead(10, 10,ea,size,0x003f); OpFlagsToReg(size); if (type==0) ot(" orr r0,r1,r10\n"); @@ -341,57 +341,58 @@ int OpMovem(int op) OpStart(op,ea); - ot(" stmdb sp!,{r9} ;@ Push r9\n"); // can't just use r12 or lr here, because memhandlers touch them ot(" ldrh r11,[r4],#2 ;@ r11=register mask\n"); - ot("\n"); - ot(";@ Get the address into r9:\n"); - EaCalc(9,0x003f,cea,size); - ot(";@ r10=Register Index*4:\n"); - if (decr) ot(" mov r10,#0x3c ;@ order reversed for -(An)\n"); - else ot(" mov r10,#0\n"); + if (decr) ot(" mov r10,#0x40 ;@ order reversed for -(An)\n"); + else ot(" mov r10,#-4\n"); ot("\n"); - ot("MoreReg%.4x%s\n",op, ms?"":":"); + ot(";@ Get the address into r6:\n"); + EaCalc(6,0x003f,cea,size); - ot(" tst r11,#1\n"); - ot(" beq SkipReg%.4x\n",op); ot("\n"); + ot(" tst r11,r11\n"); // sanity check + ot(" beq NoRegs%.4x\n",op); - if (decr) ot(" sub r9,r9,#%d ;@ Pre-decrement address\n",1<