| 1 | ; 7zCrcOpt.asm -- CRC32 calculation : optimized version\r |
| 2 | ; 2021-02-07 : Igor Pavlov : Public domain\r |
| 3 | \r |
| 4 | include 7zAsm.asm\r |
| 5 | \r |
| 6 | MY_ASM_START\r |
| 7 | \r |
| 8 | rD equ r2\r |
| 9 | rN equ r7\r |
| 10 | rT equ r5\r |
| 11 | \r |
| 12 | ifdef x64\r |
| 13 | num_VAR equ r8\r |
| 14 | table_VAR equ r9\r |
| 15 | else\r |
| 16 | if (IS_CDECL gt 0)\r |
| 17 | crc_OFFS equ (REG_SIZE * 5)\r |
| 18 | data_OFFS equ (REG_SIZE + crc_OFFS)\r |
| 19 | size_OFFS equ (REG_SIZE + data_OFFS)\r |
| 20 | else\r |
| 21 | size_OFFS equ (REG_SIZE * 5)\r |
| 22 | endif\r |
| 23 | table_OFFS equ (REG_SIZE + size_OFFS)\r |
| 24 | num_VAR equ [r4 + size_OFFS]\r |
| 25 | table_VAR equ [r4 + table_OFFS]\r |
| 26 | endif\r |
| 27 | \r |
| 28 | SRCDAT equ rD + rN * 1 + 4 *\r |
| 29 | \r |
| 30 | CRC macro op:req, dest:req, src:req, t:req\r |
| 31 | op dest, DWORD PTR [rT + src * 4 + 0400h * t]\r |
| 32 | endm\r |
| 33 | \r |
| 34 | CRC_XOR macro dest:req, src:req, t:req\r |
| 35 | CRC xor, dest, src, t\r |
| 36 | endm\r |
| 37 | \r |
| 38 | CRC_MOV macro dest:req, src:req, t:req\r |
| 39 | CRC mov, dest, src, t\r |
| 40 | endm\r |
| 41 | \r |
| 42 | CRC1b macro\r |
| 43 | movzx x6, BYTE PTR [rD]\r |
| 44 | inc rD\r |
| 45 | movzx x3, x0_L\r |
| 46 | xor x6, x3\r |
| 47 | shr x0, 8\r |
| 48 | CRC xor, x0, r6, 0\r |
| 49 | dec rN\r |
| 50 | endm\r |
| 51 | \r |
| 52 | MY_PROLOG macro crc_end:req\r |
| 53 | \r |
| 54 | ifdef x64\r |
| 55 | if (IS_LINUX gt 0)\r |
| 56 | MY_PUSH_2_REGS\r |
| 57 | mov x0, REG_ABI_PARAM_0_x ; x0 = x7\r |
| 58 | mov rT, REG_ABI_PARAM_3 ; r5 = r1\r |
| 59 | mov rN, REG_ABI_PARAM_2 ; r7 = r2\r |
| 60 | mov rD, REG_ABI_PARAM_1 ; r2 = r6\r |
| 61 | else\r |
| 62 | MY_PUSH_4_REGS\r |
| 63 | mov x0, REG_ABI_PARAM_0_x ; x0 = x1\r |
| 64 | mov rT, REG_ABI_PARAM_3 ; r5 = r9\r |
| 65 | mov rN, REG_ABI_PARAM_2 ; r7 = r8\r |
| 66 | ; mov rD, REG_ABI_PARAM_1 ; r2 = r2\r |
| 67 | endif\r |
| 68 | else\r |
| 69 | MY_PUSH_4_REGS\r |
| 70 | if (IS_CDECL gt 0)\r |
| 71 | mov x0, [r4 + crc_OFFS]\r |
| 72 | mov rD, [r4 + data_OFFS]\r |
| 73 | else\r |
| 74 | mov x0, REG_ABI_PARAM_0_x\r |
| 75 | endif\r |
| 76 | mov rN, num_VAR\r |
| 77 | mov rT, table_VAR\r |
| 78 | endif\r |
| 79 | \r |
| 80 | test rN, rN\r |
| 81 | jz crc_end\r |
| 82 | @@:\r |
| 83 | test rD, 7\r |
| 84 | jz @F\r |
| 85 | CRC1b\r |
| 86 | jnz @B\r |
| 87 | @@:\r |
| 88 | cmp rN, 16\r |
| 89 | jb crc_end\r |
| 90 | add rN, rD\r |
| 91 | mov num_VAR, rN\r |
| 92 | sub rN, 8\r |
| 93 | and rN, NOT 7\r |
| 94 | sub rD, rN\r |
| 95 | xor x0, [SRCDAT 0]\r |
| 96 | endm\r |
| 97 | \r |
| 98 | MY_EPILOG macro crc_end:req\r |
| 99 | xor x0, [SRCDAT 0]\r |
| 100 | mov rD, rN\r |
| 101 | mov rN, num_VAR\r |
| 102 | sub rN, rD\r |
| 103 | crc_end:\r |
| 104 | test rN, rN\r |
| 105 | jz @F\r |
| 106 | CRC1b\r |
| 107 | jmp crc_end\r |
| 108 | @@:\r |
| 109 | if (IS_X64 gt 0) and (IS_LINUX gt 0)\r |
| 110 | MY_POP_2_REGS\r |
| 111 | else\r |
| 112 | MY_POP_4_REGS\r |
| 113 | endif\r |
| 114 | endm\r |
| 115 | \r |
| 116 | MY_PROC CrcUpdateT8, 4\r |
| 117 | MY_PROLOG crc_end_8\r |
| 118 | mov x1, [SRCDAT 1]\r |
| 119 | align 16\r |
| 120 | main_loop_8:\r |
| 121 | mov x6, [SRCDAT 2]\r |
| 122 | movzx x3, x1_L\r |
| 123 | CRC_XOR x6, r3, 3\r |
| 124 | movzx x3, x1_H\r |
| 125 | CRC_XOR x6, r3, 2\r |
| 126 | shr x1, 16\r |
| 127 | movzx x3, x1_L\r |
| 128 | movzx x1, x1_H\r |
| 129 | CRC_XOR x6, r3, 1\r |
| 130 | movzx x3, x0_L\r |
| 131 | CRC_XOR x6, r1, 0\r |
| 132 | \r |
| 133 | mov x1, [SRCDAT 3]\r |
| 134 | CRC_XOR x6, r3, 7\r |
| 135 | movzx x3, x0_H\r |
| 136 | shr x0, 16\r |
| 137 | CRC_XOR x6, r3, 6\r |
| 138 | movzx x3, x0_L\r |
| 139 | CRC_XOR x6, r3, 5\r |
| 140 | movzx x3, x0_H\r |
| 141 | CRC_MOV x0, r3, 4\r |
| 142 | xor x0, x6\r |
| 143 | add rD, 8\r |
| 144 | jnz main_loop_8\r |
| 145 | \r |
| 146 | MY_EPILOG crc_end_8\r |
| 147 | MY_ENDP\r |
| 148 | \r |
| 149 | MY_PROC CrcUpdateT4, 4\r |
| 150 | MY_PROLOG crc_end_4\r |
| 151 | align 16\r |
| 152 | main_loop_4:\r |
| 153 | movzx x1, x0_L\r |
| 154 | movzx x3, x0_H\r |
| 155 | shr x0, 16\r |
| 156 | movzx x6, x0_H\r |
| 157 | and x0, 0FFh\r |
| 158 | CRC_MOV x1, r1, 3\r |
| 159 | xor x1, [SRCDAT 1]\r |
| 160 | CRC_XOR x1, r3, 2\r |
| 161 | CRC_XOR x1, r6, 0\r |
| 162 | CRC_XOR x1, r0, 1\r |
| 163 | \r |
| 164 | movzx x0, x1_L\r |
| 165 | movzx x3, x1_H\r |
| 166 | shr x1, 16\r |
| 167 | movzx x6, x1_H\r |
| 168 | and x1, 0FFh\r |
| 169 | CRC_MOV x0, r0, 3\r |
| 170 | xor x0, [SRCDAT 2]\r |
| 171 | CRC_XOR x0, r3, 2\r |
| 172 | CRC_XOR x0, r6, 0\r |
| 173 | CRC_XOR x0, r1, 1\r |
| 174 | add rD, 8\r |
| 175 | jnz main_loop_4\r |
| 176 | \r |
| 177 | MY_EPILOG crc_end_4\r |
| 178 | MY_ENDP\r |
| 179 | \r |
| 180 | end\r |