| 1 | ; XzCrc64Opt.asm -- CRC64 calculation : optimized version\r |
| 2 | ; 2021-02-06 : Igor Pavlov : Public domain\r |
| 3 | \r |
| 4 | include 7zAsm.asm\r |
| 5 | \r |
| 6 | MY_ASM_START\r |
| 7 | \r |
| 8 | ifdef x64\r |
| 9 | \r |
| 10 | rD equ r9\r |
| 11 | rN equ r10\r |
| 12 | rT equ r5\r |
| 13 | num_VAR equ r8\r |
| 14 | \r |
| 15 | SRCDAT4 equ dword ptr [rD + rN * 1]\r |
| 16 | \r |
| 17 | CRC_XOR macro dest:req, src:req, t:req\r |
| 18 | xor dest, QWORD PTR [rT + src * 8 + 0800h * t]\r |
| 19 | endm\r |
| 20 | \r |
| 21 | CRC1b macro\r |
| 22 | movzx x6, BYTE PTR [rD]\r |
| 23 | inc rD\r |
| 24 | movzx x3, x0_L\r |
| 25 | xor x6, x3\r |
| 26 | shr r0, 8\r |
| 27 | CRC_XOR r0, r6, 0\r |
| 28 | dec rN\r |
| 29 | endm\r |
| 30 | \r |
| 31 | MY_PROLOG macro crc_end:req\r |
| 32 | ifdef ABI_LINUX\r |
| 33 | MY_PUSH_2_REGS\r |
| 34 | else\r |
| 35 | MY_PUSH_4_REGS\r |
| 36 | endif\r |
| 37 | mov r0, REG_ABI_PARAM_0\r |
| 38 | mov rN, REG_ABI_PARAM_2\r |
| 39 | mov rT, REG_ABI_PARAM_3\r |
| 40 | mov rD, REG_ABI_PARAM_1\r |
| 41 | test rN, rN\r |
| 42 | jz crc_end\r |
| 43 | @@:\r |
| 44 | test rD, 3\r |
| 45 | jz @F\r |
| 46 | CRC1b\r |
| 47 | jnz @B\r |
| 48 | @@:\r |
| 49 | cmp rN, 8\r |
| 50 | jb crc_end\r |
| 51 | add rN, rD\r |
| 52 | mov num_VAR, rN\r |
| 53 | sub rN, 4\r |
| 54 | and rN, NOT 3\r |
| 55 | sub rD, rN\r |
| 56 | mov x1, SRCDAT4\r |
| 57 | xor r0, r1\r |
| 58 | add rN, 4\r |
| 59 | endm\r |
| 60 | \r |
| 61 | MY_EPILOG macro crc_end:req\r |
| 62 | sub rN, 4\r |
| 63 | mov x1, SRCDAT4\r |
| 64 | xor r0, r1\r |
| 65 | mov rD, rN\r |
| 66 | mov rN, num_VAR\r |
| 67 | sub rN, rD\r |
| 68 | crc_end:\r |
| 69 | test rN, rN\r |
| 70 | jz @F\r |
| 71 | CRC1b\r |
| 72 | jmp crc_end\r |
| 73 | @@:\r |
| 74 | ifdef ABI_LINUX\r |
| 75 | MY_POP_2_REGS\r |
| 76 | else\r |
| 77 | MY_POP_4_REGS\r |
| 78 | endif\r |
| 79 | endm\r |
| 80 | \r |
| 81 | MY_PROC XzCrc64UpdateT4, 4\r |
| 82 | MY_PROLOG crc_end_4\r |
| 83 | align 16\r |
| 84 | main_loop_4:\r |
| 85 | mov x1, SRCDAT4\r |
| 86 | movzx x2, x0_L\r |
| 87 | movzx x3, x0_H\r |
| 88 | shr r0, 16\r |
| 89 | movzx x6, x0_L\r |
| 90 | movzx x7, x0_H\r |
| 91 | shr r0, 16\r |
| 92 | CRC_XOR r1, r2, 3\r |
| 93 | CRC_XOR r0, r3, 2\r |
| 94 | CRC_XOR r1, r6, 1\r |
| 95 | CRC_XOR r0, r7, 0\r |
| 96 | xor r0, r1\r |
| 97 | \r |
| 98 | add rD, 4\r |
| 99 | jnz main_loop_4\r |
| 100 | \r |
| 101 | MY_EPILOG crc_end_4\r |
| 102 | MY_ENDP\r |
| 103 | \r |
| 104 | else\r |
| 105 | ; x86 (32-bit)\r |
| 106 | \r |
| 107 | rD equ r1\r |
| 108 | rN equ r7\r |
| 109 | rT equ r5\r |
| 110 | \r |
| 111 | crc_OFFS equ (REG_SIZE * 5)\r |
| 112 | \r |
| 113 | if (IS_CDECL gt 0) or (IS_LINUX gt 0)\r |
| 114 | ; cdecl or (GNU fastcall) stack:\r |
| 115 | ; (UInt32 *) table\r |
| 116 | ; size_t size\r |
| 117 | ; void * data\r |
| 118 | ; (UInt64) crc\r |
| 119 | ; ret-ip <-(r4)\r |
| 120 | data_OFFS equ (8 + crc_OFFS)\r |
| 121 | size_OFFS equ (REG_SIZE + data_OFFS)\r |
| 122 | table_OFFS equ (REG_SIZE + size_OFFS)\r |
| 123 | num_VAR equ [r4 + size_OFFS]\r |
| 124 | table_VAR equ [r4 + table_OFFS]\r |
| 125 | else\r |
| 126 | ; Windows fastcall:\r |
| 127 | ; r1 = data, r2 = size\r |
| 128 | ; stack:\r |
| 129 | ; (UInt32 *) table\r |
| 130 | ; (UInt64) crc\r |
| 131 | ; ret-ip <-(r4)\r |
| 132 | table_OFFS equ (8 + crc_OFFS)\r |
| 133 | table_VAR equ [r4 + table_OFFS]\r |
| 134 | num_VAR equ table_VAR\r |
| 135 | endif\r |
| 136 | \r |
| 137 | SRCDAT4 equ dword ptr [rD + rN * 1]\r |
| 138 | \r |
| 139 | CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req\r |
| 140 | op0 dest0, DWORD PTR [rT + src * 8 + 0800h * t]\r |
| 141 | op1 dest1, DWORD PTR [rT + src * 8 + 0800h * t + 4]\r |
| 142 | endm\r |
| 143 | \r |
| 144 | CRC_XOR macro dest0:req, dest1:req, src:req, t:req\r |
| 145 | CRC xor, xor, dest0, dest1, src, t\r |
| 146 | endm\r |
| 147 | \r |
| 148 | \r |
| 149 | CRC1b macro\r |
| 150 | movzx x6, BYTE PTR [rD]\r |
| 151 | inc rD\r |
| 152 | movzx x3, x0_L\r |
| 153 | xor x6, x3\r |
| 154 | shrd r0, r2, 8\r |
| 155 | shr r2, 8\r |
| 156 | CRC_XOR r0, r2, r6, 0\r |
| 157 | dec rN\r |
| 158 | endm\r |
| 159 | \r |
| 160 | MY_PROLOG macro crc_end:req\r |
| 161 | MY_PUSH_4_REGS\r |
| 162 | \r |
| 163 | if (IS_CDECL gt 0) or (IS_LINUX gt 0)\r |
| 164 | proc_numParams = proc_numParams + 2 ; for ABI_LINUX\r |
| 165 | mov rN, [r4 + size_OFFS]\r |
| 166 | mov rD, [r4 + data_OFFS]\r |
| 167 | else\r |
| 168 | mov rN, r2\r |
| 169 | endif\r |
| 170 | \r |
| 171 | mov x0, [r4 + crc_OFFS]\r |
| 172 | mov x2, [r4 + crc_OFFS + 4]\r |
| 173 | mov rT, table_VAR\r |
| 174 | test rN, rN\r |
| 175 | jz crc_end\r |
| 176 | @@:\r |
| 177 | test rD, 3\r |
| 178 | jz @F\r |
| 179 | CRC1b\r |
| 180 | jnz @B\r |
| 181 | @@:\r |
| 182 | cmp rN, 8\r |
| 183 | jb crc_end\r |
| 184 | add rN, rD\r |
| 185 | \r |
| 186 | mov num_VAR, rN\r |
| 187 | \r |
| 188 | sub rN, 4\r |
| 189 | and rN, NOT 3\r |
| 190 | sub rD, rN\r |
| 191 | xor r0, SRCDAT4\r |
| 192 | add rN, 4\r |
| 193 | endm\r |
| 194 | \r |
| 195 | MY_EPILOG macro crc_end:req\r |
| 196 | sub rN, 4\r |
| 197 | xor r0, SRCDAT4\r |
| 198 | \r |
| 199 | mov rD, rN\r |
| 200 | mov rN, num_VAR\r |
| 201 | sub rN, rD\r |
| 202 | crc_end:\r |
| 203 | test rN, rN\r |
| 204 | jz @F\r |
| 205 | CRC1b\r |
| 206 | jmp crc_end\r |
| 207 | @@:\r |
| 208 | MY_POP_4_REGS\r |
| 209 | endm\r |
| 210 | \r |
| 211 | MY_PROC XzCrc64UpdateT4, 5\r |
| 212 | MY_PROLOG crc_end_4\r |
| 213 | movzx x6, x0_L\r |
| 214 | align 16\r |
| 215 | main_loop_4:\r |
| 216 | mov r3, SRCDAT4\r |
| 217 | xor r3, r2\r |
| 218 | \r |
| 219 | CRC xor, mov, r3, r2, r6, 3\r |
| 220 | movzx x6, x0_H\r |
| 221 | shr r0, 16\r |
| 222 | CRC_XOR r3, r2, r6, 2\r |
| 223 | \r |
| 224 | movzx x6, x0_L\r |
| 225 | movzx x0, x0_H\r |
| 226 | CRC_XOR r3, r2, r6, 1\r |
| 227 | CRC_XOR r3, r2, r0, 0\r |
| 228 | movzx x6, x3_L\r |
| 229 | mov r0, r3\r |
| 230 | \r |
| 231 | add rD, 4\r |
| 232 | jnz main_loop_4\r |
| 233 | \r |
| 234 | MY_EPILOG crc_end_4\r |
| 235 | MY_ENDP\r |
| 236 | \r |
| 237 | endif ; ! x64\r |
| 238 | \r |
| 239 | end\r |