1 #if defined(ED25519_GCC_64BIT_SSE_CHOOSE) 3 #define HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS 12 __asm__ __volatile__ (
15 "movd %%rax, %%xmm14 ;\n" 16 "pshufd $0x00, %%xmm14, %%xmm14 ;\n" 17 "pxor %%xmm0, %%xmm0 ;\n" 18 "pxor %%xmm1, %%xmm1 ;\n" 19 "pxor %%xmm2, %%xmm2 ;\n" 20 "pxor %%xmm3, %%xmm3 ;\n" 21 "pxor %%xmm4, %%xmm4 ;\n" 22 "pxor %%xmm5, %%xmm5 ;\n" 26 "movd %%rax, %%xmm15 ;\n" 27 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 28 "pcmpeqd %%xmm14, %%xmm15 ;\n" 30 "movd %%rax, %%xmm6 ;\n" 31 "pxor %%xmm7, %%xmm7 ;\n" 32 "pand %%xmm15, %%xmm6 ;\n" 33 "pand %%xmm15, %%xmm7 ;\n" 34 "por %%xmm6, %%xmm0 ;\n" 35 "por %%xmm7, %%xmm1 ;\n" 36 "por %%xmm6, %%xmm2 ;\n" 37 "por %%xmm7, %%xmm3 ;\n" 41 "movd %%rax, %%xmm15 ;\n" 42 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 43 "pcmpeqd %%xmm14, %%xmm15 ;\n" 44 "movdqa 0(%1), %%xmm6 ;\n" 45 "movdqa 16(%1), %%xmm7 ;\n" 46 "movdqa 32(%1), %%xmm8 ;\n" 47 "movdqa 48(%1), %%xmm9 ;\n" 48 "movdqa 64(%1), %%xmm10 ;\n" 49 "movdqa 80(%1), %%xmm11 ;\n" 50 "pand %%xmm15, %%xmm6 ;\n" 51 "pand %%xmm15, %%xmm7 ;\n" 52 "pand %%xmm15, %%xmm8 ;\n" 53 "pand %%xmm15, %%xmm9 ;\n" 54 "pand %%xmm15, %%xmm10 ;\n" 55 "pand %%xmm15, %%xmm11 ;\n" 56 "por %%xmm6, %%xmm0 ;\n" 57 "por %%xmm7, %%xmm1 ;\n" 58 "por %%xmm8, %%xmm2 ;\n" 59 "por %%xmm9, %%xmm3 ;\n" 60 "por %%xmm10, %%xmm4 ;\n" 61 "por %%xmm11, %%xmm5 ;\n" 65 "movd %%rax, %%xmm15 ;\n" 66 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 67 "pcmpeqd %%xmm14, %%xmm15 ;\n" 68 "movdqa 96(%1), %%xmm6 ;\n" 69 "movdqa 112(%1), %%xmm7 ;\n" 70 "movdqa 128(%1), %%xmm8 ;\n" 71 "movdqa 144(%1), %%xmm9 ;\n" 72 "movdqa 160(%1), %%xmm10 ;\n" 73 "movdqa 176(%1), %%xmm11 ;\n" 74 "pand %%xmm15, %%xmm6 ;\n" 75 "pand %%xmm15, %%xmm7 ;\n" 76 "pand %%xmm15, %%xmm8 ;\n" 77 "pand %%xmm15, %%xmm9 ;\n" 78 "pand %%xmm15, %%xmm10 ;\n" 79 "pand %%xmm15, %%xmm11 ;\n" 80 "por %%xmm6, %%xmm0 ;\n" 81 "por %%xmm7, %%xmm1 ;\n" 82 "por %%xmm8, %%xmm2 ;\n" 83 "por %%xmm9, %%xmm3 ;\n" 84 "por %%xmm10, %%xmm4 ;\n" 85 "por %%xmm11, %%xmm5 ;\n" 89 "movd %%rax, %%xmm15 ;\n" 90 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 91 "pcmpeqd %%xmm14, %%xmm15 ;\n" 92 "movdqa 192(%1), %%xmm6 ;\n" 93 "movdqa 208(%1), %%xmm7 ;\n" 94 "movdqa 224(%1), %%xmm8 ;\n" 95 "movdqa 240(%1), %%xmm9 ;\n" 96 "movdqa 256(%1), %%xmm10 ;\n" 97 "movdqa 272(%1), %%xmm11 ;\n" 98 "pand %%xmm15, %%xmm6 ;\n" 99 "pand %%xmm15, %%xmm7 ;\n" 100 "pand %%xmm15, %%xmm8 ;\n" 101 "pand %%xmm15, %%xmm9 ;\n" 102 "pand %%xmm15, %%xmm10 ;\n" 103 "pand %%xmm15, %%xmm11 ;\n" 104 "por %%xmm6, %%xmm0 ;\n" 105 "por %%xmm7, %%xmm1 ;\n" 106 "por %%xmm8, %%xmm2 ;\n" 107 "por %%xmm9, %%xmm3 ;\n" 108 "por %%xmm10, %%xmm4 ;\n" 109 "por %%xmm11, %%xmm5 ;\n" 113 "movd %%rax, %%xmm15 ;\n" 114 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 115 "pcmpeqd %%xmm14, %%xmm15 ;\n" 116 "movdqa 288(%1), %%xmm6 ;\n" 117 "movdqa 304(%1), %%xmm7 ;\n" 118 "movdqa 320(%1), %%xmm8 ;\n" 119 "movdqa 336(%1), %%xmm9 ;\n" 120 "movdqa 352(%1), %%xmm10 ;\n" 121 "movdqa 368(%1), %%xmm11 ;\n" 122 "pand %%xmm15, %%xmm6 ;\n" 123 "pand %%xmm15, %%xmm7 ;\n" 124 "pand %%xmm15, %%xmm8 ;\n" 125 "pand %%xmm15, %%xmm9 ;\n" 126 "pand %%xmm15, %%xmm10 ;\n" 127 "pand %%xmm15, %%xmm11 ;\n" 128 "por %%xmm6, %%xmm0 ;\n" 129 "por %%xmm7, %%xmm1 ;\n" 130 "por %%xmm8, %%xmm2 ;\n" 131 "por %%xmm9, %%xmm3 ;\n" 132 "por %%xmm10, %%xmm4 ;\n" 133 "por %%xmm11, %%xmm5 ;\n" 137 "movd %%rax, %%xmm15 ;\n" 138 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 139 "pcmpeqd %%xmm14, %%xmm15 ;\n" 140 "movdqa 384(%1), %%xmm6 ;\n" 141 "movdqa 400(%1), %%xmm7 ;\n" 142 "movdqa 416(%1), %%xmm8 ;\n" 143 "movdqa 432(%1), %%xmm9 ;\n" 144 "movdqa 448(%1), %%xmm10 ;\n" 145 "movdqa 464(%1), %%xmm11 ;\n" 146 "pand %%xmm15, %%xmm6 ;\n" 147 "pand %%xmm15, %%xmm7 ;\n" 148 "pand %%xmm15, %%xmm8 ;\n" 149 "pand %%xmm15, %%xmm9 ;\n" 150 "pand %%xmm15, %%xmm10 ;\n" 151 "pand %%xmm15, %%xmm11 ;\n" 152 "por %%xmm6, %%xmm0 ;\n" 153 "por %%xmm7, %%xmm1 ;\n" 154 "por %%xmm8, %%xmm2 ;\n" 155 "por %%xmm9, %%xmm3 ;\n" 156 "por %%xmm10, %%xmm4 ;\n" 157 "por %%xmm11, %%xmm5 ;\n" 161 "movd %%rax, %%xmm15 ;\n" 162 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 163 "pcmpeqd %%xmm14, %%xmm15 ;\n" 164 "movdqa 480(%1), %%xmm6 ;\n" 165 "movdqa 496(%1), %%xmm7 ;\n" 166 "movdqa 512(%1), %%xmm8 ;\n" 167 "movdqa 528(%1), %%xmm9 ;\n" 168 "movdqa 544(%1), %%xmm10 ;\n" 169 "movdqa 560(%1), %%xmm11 ;\n" 170 "pand %%xmm15, %%xmm6 ;\n" 171 "pand %%xmm15, %%xmm7 ;\n" 172 "pand %%xmm15, %%xmm8 ;\n" 173 "pand %%xmm15, %%xmm9 ;\n" 174 "pand %%xmm15, %%xmm10 ;\n" 175 "pand %%xmm15, %%xmm11 ;\n" 176 "por %%xmm6, %%xmm0 ;\n" 177 "por %%xmm7, %%xmm1 ;\n" 178 "por %%xmm8, %%xmm2 ;\n" 179 "por %%xmm9, %%xmm3 ;\n" 180 "por %%xmm10, %%xmm4 ;\n" 181 "por %%xmm11, %%xmm5 ;\n" 185 "movd %%rax, %%xmm15 ;\n" 186 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 187 "pcmpeqd %%xmm14, %%xmm15 ;\n" 188 "movdqa 576(%1), %%xmm6 ;\n" 189 "movdqa 592(%1), %%xmm7 ;\n" 190 "movdqa 608(%1), %%xmm8 ;\n" 191 "movdqa 624(%1), %%xmm9 ;\n" 192 "movdqa 640(%1), %%xmm10 ;\n" 193 "movdqa 656(%1), %%xmm11 ;\n" 194 "pand %%xmm15, %%xmm6 ;\n" 195 "pand %%xmm15, %%xmm7 ;\n" 196 "pand %%xmm15, %%xmm8 ;\n" 197 "pand %%xmm15, %%xmm9 ;\n" 198 "pand %%xmm15, %%xmm10 ;\n" 199 "pand %%xmm15, %%xmm11 ;\n" 200 "por %%xmm6, %%xmm0 ;\n" 201 "por %%xmm7, %%xmm1 ;\n" 202 "por %%xmm8, %%xmm2 ;\n" 203 "por %%xmm9, %%xmm3 ;\n" 204 "por %%xmm10, %%xmm4 ;\n" 205 "por %%xmm11, %%xmm5 ;\n" 209 "movd %%rax, %%xmm15 ;\n" 210 "pshufd $0x00, %%xmm15, %%xmm15 ;\n" 211 "pcmpeqd %%xmm14, %%xmm15 ;\n" 212 "movdqa 672(%1), %%xmm6 ;\n" 213 "movdqa 688(%1), %%xmm7 ;\n" 214 "movdqa 704(%1), %%xmm8 ;\n" 215 "movdqa 720(%1), %%xmm9 ;\n" 216 "movdqa 736(%1), %%xmm10 ;\n" 217 "movdqa 752(%1), %%xmm11 ;\n" 218 "pand %%xmm15, %%xmm6 ;\n" 219 "pand %%xmm15, %%xmm7 ;\n" 220 "pand %%xmm15, %%xmm8 ;\n" 221 "pand %%xmm15, %%xmm9 ;\n" 222 "pand %%xmm15, %%xmm10 ;\n" 223 "pand %%xmm15, %%xmm11 ;\n" 224 "por %%xmm6, %%xmm0 ;\n" 225 "por %%xmm7, %%xmm1 ;\n" 226 "por %%xmm8, %%xmm2 ;\n" 227 "por %%xmm9, %%xmm3 ;\n" 228 "por %%xmm10, %%xmm4 ;\n" 229 "por %%xmm11, %%xmm5 ;\n" 234 "movd %%rax, %%xmm14 ;\n" 235 "pxor %%xmm15, %%xmm15 ;\n" 236 "pshufd $0x00, %%xmm14, %%xmm14 ;\n" 237 "pxor %%xmm0, %%xmm2 ;\n" 238 "pxor %%xmm1, %%xmm3 ;\n" 239 "pcmpeqd %%xmm14, %%xmm15 ;\n" 240 "movdqa %%xmm2, %%xmm6 ;\n" 241 "movdqa %%xmm3, %%xmm7 ;\n" 242 "pand %%xmm15, %%xmm6 ;\n" 243 "pand %%xmm15, %%xmm7 ;\n" 244 "pxor %%xmm6, %%xmm0 ;\n" 245 "pxor %%xmm7, %%xmm1 ;\n" 246 "pxor %%xmm0, %%xmm2 ;\n" 247 "pxor %%xmm1, %%xmm3 ;\n" 250 "xorq %%rax, %%rax ;\n" 251 "movd %%xmm0, %%rcx ;\n" 252 "movd %%xmm0, %%r8 ;\n" 253 "movd %%xmm1, %%rsi ;\n" 254 "pshufd $0xee, %%xmm0, %%xmm0 ;\n" 255 "pshufd $0xee, %%xmm1, %%xmm1 ;\n" 256 "movd %%xmm0, %%rdx ;\n" 257 "movd %%xmm1, %%rdi ;\n" 258 "shrdq $51, %%rdx, %%r8 ;\n" 259 "shrdq $38, %%rsi, %%rdx ;\n" 260 "shrdq $25, %%rdi, %%rsi ;\n" 261 "shrq $12, %%rdi ;\n" 262 "movq %%rcx, %%r9 ;\n" 263 "movq %%r8, %%r10 ;\n" 264 "movq %%rdx, %%r11 ;\n" 265 "movq %%rsi, %%r12 ;\n" 266 "movq %%rdi, %%r13 ;\n" 268 "shrq $26, %%r10 ;\n" 269 "shrq $26, %%r11 ;\n" 270 "shrq $26, %%r12 ;\n" 271 "shrq $26, %%r13 ;\n" 272 "andl $0x3ffffff, %%ecx ;\n" 273 "andl $0x1ffffff, %%r9d ;\n" 274 "andl $0x3ffffff, %%r8d ;\n" 275 "andl $0x1ffffff, %%r10d ;\n" 276 "andl $0x3ffffff, %%edx ;\n" 277 "andl $0x1ffffff, %%r11d ;\n" 278 "andl $0x3ffffff, %%esi ;\n" 279 "andl $0x1ffffff, %%r12d ;\n" 280 "andl $0x3ffffff, %%edi ;\n" 281 "andl $0x1ffffff, %%r13d ;\n" 282 "movl %%ecx, 0(%2) ;\n" 283 "movl %%r9d, 4(%2) ;\n" 284 "movl %%r8d, 8(%2) ;\n" 285 "movl %%r10d, 12(%2) ;\n" 286 "movl %%edx, 16(%2) ;\n" 287 "movl %%r11d, 20(%2) ;\n" 288 "movl %%esi, 24(%2) ;\n" 289 "movl %%r12d, 28(%2) ;\n" 290 "movl %%edi, 32(%2) ;\n" 291 "movl %%r13d, 36(%2) ;\n" 292 "movq %%rax, 40(%2) ;\n" 295 "movd %%xmm2, %%rcx ;\n" 296 "movd %%xmm2, %%r8 ;\n" 297 "movd %%xmm3, %%rsi ;\n" 298 "pshufd $0xee, %%xmm2, %%xmm2 ;\n" 299 "pshufd $0xee, %%xmm3, %%xmm3 ;\n" 300 "movd %%xmm2, %%rdx ;\n" 301 "movd %%xmm3, %%rdi ;\n" 302 "shrdq $51, %%rdx, %%r8 ;\n" 303 "shrdq $38, %%rsi, %%rdx ;\n" 304 "shrdq $25, %%rdi, %%rsi ;\n" 305 "shrq $12, %%rdi ;\n" 306 "movq %%rcx, %%r9 ;\n" 307 "movq %%r8, %%r10 ;\n" 308 "movq %%rdx, %%r11 ;\n" 309 "movq %%rsi, %%r12 ;\n" 310 "movq %%rdi, %%r13 ;\n" 312 "shrq $26, %%r10 ;\n" 313 "shrq $26, %%r11 ;\n" 314 "shrq $26, %%r12 ;\n" 315 "shrq $26, %%r13 ;\n" 316 "andl $0x3ffffff, %%ecx ;\n" 317 "andl $0x1ffffff, %%r9d ;\n" 318 "andl $0x3ffffff, %%r8d ;\n" 319 "andl $0x1ffffff, %%r10d ;\n" 320 "andl $0x3ffffff, %%edx ;\n" 321 "andl $0x1ffffff, %%r11d ;\n" 322 "andl $0x3ffffff, %%esi ;\n" 323 "andl $0x1ffffff, %%r12d ;\n" 324 "andl $0x3ffffff, %%edi ;\n" 325 "andl $0x1ffffff, %%r13d ;\n" 326 "movl %%ecx, 48(%2) ;\n" 327 "movl %%r9d, 52(%2) ;\n" 328 "movl %%r8d, 56(%2) ;\n" 329 "movl %%r10d, 60(%2) ;\n" 330 "movl %%edx, 64(%2) ;\n" 331 "movl %%r11d, 68(%2) ;\n" 332 "movl %%esi, 72(%2) ;\n" 333 "movl %%r12d, 76(%2) ;\n" 334 "movl %%edi, 80(%2) ;\n" 335 "movl %%r13d, 84(%2) ;\n" 336 "movq %%rax, 88(%2) ;\n" 339 "xorq %%rax, %%rax ;\n" 340 "movd %%xmm4, %%rcx ;\n" 341 "movd %%xmm4, %%r8 ;\n" 342 "movd %%xmm5, %%rsi ;\n" 343 "pshufd $0xee, %%xmm4, %%xmm4 ;\n" 344 "pshufd $0xee, %%xmm5, %%xmm5 ;\n" 345 "movd %%xmm4, %%rdx ;\n" 346 "movd %%xmm5, %%rdi ;\n" 347 "shrdq $51, %%rdx, %%r8 ;\n" 348 "shrdq $38, %%rsi, %%rdx ;\n" 349 "shrdq $25, %%rdi, %%rsi ;\n" 350 "shrq $12, %%rdi ;\n" 351 "movq %%rcx, %%r9 ;\n" 352 "movq %%r8, %%r10 ;\n" 353 "movq %%rdx, %%r11 ;\n" 354 "movq %%rsi, %%r12 ;\n" 355 "movq %%rdi, %%r13 ;\n" 357 "shrq $26, %%r10 ;\n" 358 "shrq $26, %%r11 ;\n" 359 "shrq $26, %%r12 ;\n" 360 "shrq $26, %%r13 ;\n" 361 "andl $0x3ffffff, %%ecx ;\n" 362 "andl $0x1ffffff, %%r9d ;\n" 363 "andl $0x3ffffff, %%r8d ;\n" 364 "andl $0x1ffffff, %%r10d ;\n" 365 "andl $0x3ffffff, %%edx ;\n" 366 "andl $0x1ffffff, %%r11d ;\n" 367 "andl $0x3ffffff, %%esi ;\n" 368 "andl $0x1ffffff, %%r12d ;\n" 369 "andl $0x3ffffff, %%edi ;\n" 370 "andl $0x1ffffff, %%r13d ;\n" 371 "movd %%ecx, %%xmm0 ;\n" 372 "movd %%r9d, %%xmm4 ;\n" 373 "movd %%r8d, %%xmm8 ;\n" 374 "movd %%r10d, %%xmm3 ;\n" 375 "movd %%edx, %%xmm1 ;\n" 376 "movd %%r11d, %%xmm5 ;\n" 377 "movd %%esi, %%xmm6 ;\n" 378 "movd %%r12d, %%xmm7 ;\n" 379 "movd %%edi, %%xmm2 ;\n" 380 "movd %%r13d, %%xmm9 ;\n" 381 "punpckldq %%xmm4, %%xmm0 ;\n" 382 "punpckldq %%xmm3, %%xmm8 ;\n" 383 "punpckldq %%xmm5, %%xmm1 ;\n" 384 "punpckldq %%xmm7, %%xmm6 ;\n" 385 "punpckldq %%xmm9, %%xmm2 ;\n" 386 "punpcklqdq %%xmm8, %%xmm0 ;\n" 387 "punpcklqdq %%xmm6, %%xmm1 ;\n" 390 "movl $0x7ffffda, %%ecx ;\n" 391 "movl $0x3fffffe, %%edx ;\n" 392 "movl $0x7fffffe, %%eax ;\n" 393 "movd %%ecx, %%xmm3 ;\n" 394 "movd %%edx, %%xmm5 ;\n" 395 "movd %%eax, %%xmm4 ;\n" 396 "punpckldq %%xmm5, %%xmm3 ;\n" 397 "punpckldq %%xmm5, %%xmm4 ;\n" 398 "punpcklqdq %%xmm4, %%xmm3 ;\n" 399 "movdqa %%xmm4, %%xmm5 ;\n" 400 "punpcklqdq %%xmm4, %%xmm4 ;\n" 405 "movd %%ecx, %%xmm6 ;\n" 406 "pshufd $0x00, %%xmm6, %%xmm6 ;\n" 407 "movdqa %%xmm6, %%xmm7 ;\n" 408 "psubd %%xmm0, %%xmm3 ;\n" 409 "psubd %%xmm1, %%xmm4 ;\n" 410 "psubd %%xmm2, %%xmm5 ;\n" 411 "pand %%xmm6, %%xmm0 ;\n" 412 "pand %%xmm6, %%xmm1 ;\n" 413 "pand %%xmm6, %%xmm2 ;\n" 414 "pandn %%xmm3, %%xmm6 ;\n" 415 "movdqa %%xmm7, %%xmm3 ;\n" 416 "pandn %%xmm4, %%xmm7 ;\n" 417 "pandn %%xmm5, %%xmm3 ;\n" 418 "por %%xmm6, %%xmm0 ;\n" 419 "por %%xmm7, %%xmm1 ;\n" 420 "por %%xmm3, %%xmm2 ;\n" 423 "movdqa %%xmm0, 96(%2) ;\n" 424 "movdqa %%xmm1, 112(%2) ;\n" 425 "movdqa %%xmm2, 128(%2) ;\n" 427 :
"m"(u),
"r"(&
table[pos * 8]),
"r"(t),
"m"(sign)
429 "%rax",
"%rcx",
"%rdx",
"%rdi",
"%rsi",
"%r8",
"%r9",
"%r10",
"%r11",
"%r12",
"%r13",
430 "%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm4",
"%xmm5",
"%xmm6",
"%xmm7",
"%xmm8",
"%xmm9",
"%xmm10",
"%xmm11",
"%xmm14",
"%xmm14",
std::vector< std::vector< _variant_t > > table
unsigned __int64 uint64_t