Bitcoin ABC 0.33.3
P2P Digital Currency
field_5x52_asm_impl.h
Go to the documentation of this file.
1/***********************************************************************
2 * Copyright (c) 2013-2014 Diederik Huys, Pieter Wuille *
3 * Distributed under the MIT software license, see the accompanying *
4 * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
5 ***********************************************************************/
6
14#ifndef SECP256K1_FIELD_INNER5X52_IMPL_H
15#define SECP256K1_FIELD_INNER5X52_IMPL_H
16
17#include "util.h"
18
19SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) {
29 uint64_t tmp1, tmp2, tmp3;
30__asm__ __volatile__(
31 "movq 0(%%rsi),%%r10\n"
32 "movq 8(%%rsi),%%r11\n"
33 "movq 16(%%rsi),%%r12\n"
34 "movq 24(%%rsi),%%r13\n"
35 "movq 32(%%rsi),%%r14\n"
36
37 /* d += a3 * b0 */
38 "movq 0(%%rbx),%%rax\n"
39 "mulq %%r13\n"
40 "movq %%rax,%%rcx\n"
41 "movq %%rdx,%%r15\n"
42 /* d += a2 * b1 */
43 "movq 8(%%rbx),%%rax\n"
44 "mulq %%r12\n"
45 "addq %%rax,%%rcx\n"
46 "adcq %%rdx,%%r15\n"
47 /* d += a1 * b2 */
48 "movq 16(%%rbx),%%rax\n"
49 "mulq %%r11\n"
50 "addq %%rax,%%rcx\n"
51 "adcq %%rdx,%%r15\n"
52 /* d = a0 * b3 */
53 "movq 24(%%rbx),%%rax\n"
54 "mulq %%r10\n"
55 "addq %%rax,%%rcx\n"
56 "adcq %%rdx,%%r15\n"
57 /* c = a4 * b4 */
58 "movq 32(%%rbx),%%rax\n"
59 "mulq %%r14\n"
60 "movq %%rax,%%r8\n"
61 "movq %%rdx,%%r9\n"
62 /* d += (c & M) * R */
63 "movq $0xfffffffffffff,%%rdx\n"
64 "andq %%rdx,%%rax\n"
65 "movq $0x1000003d10,%%rdx\n"
66 "mulq %%rdx\n"
67 "addq %%rax,%%rcx\n"
68 "adcq %%rdx,%%r15\n"
69 /* c >>= 52 (%%r8 only) */
70 "shrdq $52,%%r9,%%r8\n"
71 /* t3 (tmp1) = d & M */
72 "movq %%rcx,%%rsi\n"
73 "movq $0xfffffffffffff,%%rdx\n"
74 "andq %%rdx,%%rsi\n"
75 "movq %%rsi,%q1\n"
76 /* d >>= 52 */
77 "shrdq $52,%%r15,%%rcx\n"
78 "xorq %%r15,%%r15\n"
79 /* d += a4 * b0 */
80 "movq 0(%%rbx),%%rax\n"
81 "mulq %%r14\n"
82 "addq %%rax,%%rcx\n"
83 "adcq %%rdx,%%r15\n"
84 /* d += a3 * b1 */
85 "movq 8(%%rbx),%%rax\n"
86 "mulq %%r13\n"
87 "addq %%rax,%%rcx\n"
88 "adcq %%rdx,%%r15\n"
89 /* d += a2 * b2 */
90 "movq 16(%%rbx),%%rax\n"
91 "mulq %%r12\n"
92 "addq %%rax,%%rcx\n"
93 "adcq %%rdx,%%r15\n"
94 /* d += a1 * b3 */
95 "movq 24(%%rbx),%%rax\n"
96 "mulq %%r11\n"
97 "addq %%rax,%%rcx\n"
98 "adcq %%rdx,%%r15\n"
99 /* d += a0 * b4 */
100 "movq 32(%%rbx),%%rax\n"
101 "mulq %%r10\n"
102 "addq %%rax,%%rcx\n"
103 "adcq %%rdx,%%r15\n"
104 /* d += c * R */
105 "movq %%r8,%%rax\n"
106 "movq $0x1000003d10,%%rdx\n"
107 "mulq %%rdx\n"
108 "addq %%rax,%%rcx\n"
109 "adcq %%rdx,%%r15\n"
110 /* t4 = d & M (%%rsi) */
111 "movq %%rcx,%%rsi\n"
112 "movq $0xfffffffffffff,%%rdx\n"
113 "andq %%rdx,%%rsi\n"
114 /* d >>= 52 */
115 "shrdq $52,%%r15,%%rcx\n"
116 "xorq %%r15,%%r15\n"
117 /* tx = t4 >> 48 (tmp3) */
118 "movq %%rsi,%%rax\n"
119 "shrq $48,%%rax\n"
120 "movq %%rax,%q3\n"
121 /* t4 &= (M >> 4) (tmp2) */
122 "movq $0xffffffffffff,%%rax\n"
123 "andq %%rax,%%rsi\n"
124 "movq %%rsi,%q2\n"
125 /* c = a0 * b0 */
126 "movq 0(%%rbx),%%rax\n"
127 "mulq %%r10\n"
128 "movq %%rax,%%r8\n"
129 "movq %%rdx,%%r9\n"
130 /* d += a4 * b1 */
131 "movq 8(%%rbx),%%rax\n"
132 "mulq %%r14\n"
133 "addq %%rax,%%rcx\n"
134 "adcq %%rdx,%%r15\n"
135 /* d += a3 * b2 */
136 "movq 16(%%rbx),%%rax\n"
137 "mulq %%r13\n"
138 "addq %%rax,%%rcx\n"
139 "adcq %%rdx,%%r15\n"
140 /* d += a2 * b3 */
141 "movq 24(%%rbx),%%rax\n"
142 "mulq %%r12\n"
143 "addq %%rax,%%rcx\n"
144 "adcq %%rdx,%%r15\n"
145 /* d += a1 * b4 */
146 "movq 32(%%rbx),%%rax\n"
147 "mulq %%r11\n"
148 "addq %%rax,%%rcx\n"
149 "adcq %%rdx,%%r15\n"
150 /* u0 = d & M (%%rsi) */
151 "movq %%rcx,%%rsi\n"
152 "movq $0xfffffffffffff,%%rdx\n"
153 "andq %%rdx,%%rsi\n"
154 /* d >>= 52 */
155 "shrdq $52,%%r15,%%rcx\n"
156 "xorq %%r15,%%r15\n"
157 /* u0 = (u0 << 4) | tx (%%rsi) */
158 "shlq $4,%%rsi\n"
159 "movq %q3,%%rax\n"
160 "orq %%rax,%%rsi\n"
161 /* c += u0 * (R >> 4) */
162 "movq $0x1000003d1,%%rax\n"
163 "mulq %%rsi\n"
164 "addq %%rax,%%r8\n"
165 "adcq %%rdx,%%r9\n"
166 /* r[0] = c & M */
167 "movq %%r8,%%rax\n"
168 "movq $0xfffffffffffff,%%rdx\n"
169 "andq %%rdx,%%rax\n"
170 "movq %%rax,0(%%rdi)\n"
171 /* c >>= 52 */
172 "shrdq $52,%%r9,%%r8\n"
173 "xorq %%r9,%%r9\n"
174 /* c += a1 * b0 */
175 "movq 0(%%rbx),%%rax\n"
176 "mulq %%r11\n"
177 "addq %%rax,%%r8\n"
178 "adcq %%rdx,%%r9\n"
179 /* c += a0 * b1 */
180 "movq 8(%%rbx),%%rax\n"
181 "mulq %%r10\n"
182 "addq %%rax,%%r8\n"
183 "adcq %%rdx,%%r9\n"
184 /* d += a4 * b2 */
185 "movq 16(%%rbx),%%rax\n"
186 "mulq %%r14\n"
187 "addq %%rax,%%rcx\n"
188 "adcq %%rdx,%%r15\n"
189 /* d += a3 * b3 */
190 "movq 24(%%rbx),%%rax\n"
191 "mulq %%r13\n"
192 "addq %%rax,%%rcx\n"
193 "adcq %%rdx,%%r15\n"
194 /* d += a2 * b4 */
195 "movq 32(%%rbx),%%rax\n"
196 "mulq %%r12\n"
197 "addq %%rax,%%rcx\n"
198 "adcq %%rdx,%%r15\n"
199 /* c += (d & M) * R */
200 "movq %%rcx,%%rax\n"
201 "movq $0xfffffffffffff,%%rdx\n"
202 "andq %%rdx,%%rax\n"
203 "movq $0x1000003d10,%%rdx\n"
204 "mulq %%rdx\n"
205 "addq %%rax,%%r8\n"
206 "adcq %%rdx,%%r9\n"
207 /* d >>= 52 */
208 "shrdq $52,%%r15,%%rcx\n"
209 "xorq %%r15,%%r15\n"
210 /* r[1] = c & M */
211 "movq %%r8,%%rax\n"
212 "movq $0xfffffffffffff,%%rdx\n"
213 "andq %%rdx,%%rax\n"
214 "movq %%rax,8(%%rdi)\n"
215 /* c >>= 52 */
216 "shrdq $52,%%r9,%%r8\n"
217 "xorq %%r9,%%r9\n"
218 /* c += a2 * b0 */
219 "movq 0(%%rbx),%%rax\n"
220 "mulq %%r12\n"
221 "addq %%rax,%%r8\n"
222 "adcq %%rdx,%%r9\n"
223 /* c += a1 * b1 */
224 "movq 8(%%rbx),%%rax\n"
225 "mulq %%r11\n"
226 "addq %%rax,%%r8\n"
227 "adcq %%rdx,%%r9\n"
228 /* c += a0 * b2 (last use of %%r10 = a0) */
229 "movq 16(%%rbx),%%rax\n"
230 "mulq %%r10\n"
231 "addq %%rax,%%r8\n"
232 "adcq %%rdx,%%r9\n"
233 /* fetch t3 (%%r10, overwrites a0), t4 (%%rsi) */
234 "movq %q2,%%rsi\n"
235 "movq %q1,%%r10\n"
236 /* d += a4 * b3 */
237 "movq 24(%%rbx),%%rax\n"
238 "mulq %%r14\n"
239 "addq %%rax,%%rcx\n"
240 "adcq %%rdx,%%r15\n"
241 /* d += a3 * b4 */
242 "movq 32(%%rbx),%%rax\n"
243 "mulq %%r13\n"
244 "addq %%rax,%%rcx\n"
245 "adcq %%rdx,%%r15\n"
246 /* c += (d & M) * R */
247 "movq %%rcx,%%rax\n"
248 "movq $0xfffffffffffff,%%rdx\n"
249 "andq %%rdx,%%rax\n"
250 "movq $0x1000003d10,%%rdx\n"
251 "mulq %%rdx\n"
252 "addq %%rax,%%r8\n"
253 "adcq %%rdx,%%r9\n"
254 /* d >>= 52 (%%rcx only) */
255 "shrdq $52,%%r15,%%rcx\n"
256 /* r[2] = c & M */
257 "movq %%r8,%%rax\n"
258 "movq $0xfffffffffffff,%%rdx\n"
259 "andq %%rdx,%%rax\n"
260 "movq %%rax,16(%%rdi)\n"
261 /* c >>= 52 */
262 "shrdq $52,%%r9,%%r8\n"
263 "xorq %%r9,%%r9\n"
264 /* c += t3 */
265 "addq %%r10,%%r8\n"
266 /* c += d * R */
267 "movq %%rcx,%%rax\n"
268 "movq $0x1000003d10,%%rdx\n"
269 "mulq %%rdx\n"
270 "addq %%rax,%%r8\n"
271 "adcq %%rdx,%%r9\n"
272 /* r[3] = c & M */
273 "movq %%r8,%%rax\n"
274 "movq $0xfffffffffffff,%%rdx\n"
275 "andq %%rdx,%%rax\n"
276 "movq %%rax,24(%%rdi)\n"
277 /* c >>= 52 (%%r8 only) */
278 "shrdq $52,%%r9,%%r8\n"
279 /* c += t4 (%%r8 only) */
280 "addq %%rsi,%%r8\n"
281 /* r[4] = c */
282 "movq %%r8,32(%%rdi)\n"
283: "+S"(a), "=&m"(tmp1), "=&m"(tmp2), "=&m"(tmp3)
284: "b"(b), "D"(r)
285: "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
286);
287}
288
289SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) {
299 uint64_t tmp1, tmp2, tmp3;
300__asm__ __volatile__(
301 "movq 0(%%rsi),%%r10\n"
302 "movq 8(%%rsi),%%r11\n"
303 "movq 16(%%rsi),%%r12\n"
304 "movq 24(%%rsi),%%r13\n"
305 "movq 32(%%rsi),%%r14\n"
306 "movq $0xfffffffffffff,%%r15\n"
307
308 /* d = (a0*2) * a3 */
309 "leaq (%%r10,%%r10,1),%%rax\n"
310 "mulq %%r13\n"
311 "movq %%rax,%%rbx\n"
312 "movq %%rdx,%%rcx\n"
313 /* d += (a1*2) * a2 */
314 "leaq (%%r11,%%r11,1),%%rax\n"
315 "mulq %%r12\n"
316 "addq %%rax,%%rbx\n"
317 "adcq %%rdx,%%rcx\n"
318 /* c = a4 * a4 */
319 "movq %%r14,%%rax\n"
320 "mulq %%r14\n"
321 "movq %%rax,%%r8\n"
322 "movq %%rdx,%%r9\n"
323 /* d += (c & M) * R */
324 "andq %%r15,%%rax\n"
325 "movq $0x1000003d10,%%rdx\n"
326 "mulq %%rdx\n"
327 "addq %%rax,%%rbx\n"
328 "adcq %%rdx,%%rcx\n"
329 /* c >>= 52 (%%r8 only) */
330 "shrdq $52,%%r9,%%r8\n"
331 /* t3 (tmp1) = d & M */
332 "movq %%rbx,%%rsi\n"
333 "andq %%r15,%%rsi\n"
334 "movq %%rsi,%q1\n"
335 /* d >>= 52 */
336 "shrdq $52,%%rcx,%%rbx\n"
337 "xorq %%rcx,%%rcx\n"
338 /* a4 *= 2 */
339 "addq %%r14,%%r14\n"
340 /* d += a0 * a4 */
341 "movq %%r10,%%rax\n"
342 "mulq %%r14\n"
343 "addq %%rax,%%rbx\n"
344 "adcq %%rdx,%%rcx\n"
345 /* d+= (a1*2) * a3 */
346 "leaq (%%r11,%%r11,1),%%rax\n"
347 "mulq %%r13\n"
348 "addq %%rax,%%rbx\n"
349 "adcq %%rdx,%%rcx\n"
350 /* d += a2 * a2 */
351 "movq %%r12,%%rax\n"
352 "mulq %%r12\n"
353 "addq %%rax,%%rbx\n"
354 "adcq %%rdx,%%rcx\n"
355 /* d += c * R */
356 "movq %%r8,%%rax\n"
357 "movq $0x1000003d10,%%rdx\n"
358 "mulq %%rdx\n"
359 "addq %%rax,%%rbx\n"
360 "adcq %%rdx,%%rcx\n"
361 /* t4 = d & M (%%rsi) */
362 "movq %%rbx,%%rsi\n"
363 "andq %%r15,%%rsi\n"
364 /* d >>= 52 */
365 "shrdq $52,%%rcx,%%rbx\n"
366 "xorq %%rcx,%%rcx\n"
367 /* tx = t4 >> 48 (tmp3) */
368 "movq %%rsi,%%rax\n"
369 "shrq $48,%%rax\n"
370 "movq %%rax,%q3\n"
371 /* t4 &= (M >> 4) (tmp2) */
372 "movq $0xffffffffffff,%%rax\n"
373 "andq %%rax,%%rsi\n"
374 "movq %%rsi,%q2\n"
375 /* c = a0 * a0 */
376 "movq %%r10,%%rax\n"
377 "mulq %%r10\n"
378 "movq %%rax,%%r8\n"
379 "movq %%rdx,%%r9\n"
380 /* d += a1 * a4 */
381 "movq %%r11,%%rax\n"
382 "mulq %%r14\n"
383 "addq %%rax,%%rbx\n"
384 "adcq %%rdx,%%rcx\n"
385 /* d += (a2*2) * a3 */
386 "leaq (%%r12,%%r12,1),%%rax\n"
387 "mulq %%r13\n"
388 "addq %%rax,%%rbx\n"
389 "adcq %%rdx,%%rcx\n"
390 /* u0 = d & M (%%rsi) */
391 "movq %%rbx,%%rsi\n"
392 "andq %%r15,%%rsi\n"
393 /* d >>= 52 */
394 "shrdq $52,%%rcx,%%rbx\n"
395 "xorq %%rcx,%%rcx\n"
396 /* u0 = (u0 << 4) | tx (%%rsi) */
397 "shlq $4,%%rsi\n"
398 "movq %q3,%%rax\n"
399 "orq %%rax,%%rsi\n"
400 /* c += u0 * (R >> 4) */
401 "movq $0x1000003d1,%%rax\n"
402 "mulq %%rsi\n"
403 "addq %%rax,%%r8\n"
404 "adcq %%rdx,%%r9\n"
405 /* r[0] = c & M */
406 "movq %%r8,%%rax\n"
407 "andq %%r15,%%rax\n"
408 "movq %%rax,0(%%rdi)\n"
409 /* c >>= 52 */
410 "shrdq $52,%%r9,%%r8\n"
411 "xorq %%r9,%%r9\n"
412 /* a0 *= 2 */
413 "addq %%r10,%%r10\n"
414 /* c += a0 * a1 */
415 "movq %%r10,%%rax\n"
416 "mulq %%r11\n"
417 "addq %%rax,%%r8\n"
418 "adcq %%rdx,%%r9\n"
419 /* d += a2 * a4 */
420 "movq %%r12,%%rax\n"
421 "mulq %%r14\n"
422 "addq %%rax,%%rbx\n"
423 "adcq %%rdx,%%rcx\n"
424 /* d += a3 * a3 */
425 "movq %%r13,%%rax\n"
426 "mulq %%r13\n"
427 "addq %%rax,%%rbx\n"
428 "adcq %%rdx,%%rcx\n"
429 /* c += (d & M) * R */
430 "movq %%rbx,%%rax\n"
431 "andq %%r15,%%rax\n"
432 "movq $0x1000003d10,%%rdx\n"
433 "mulq %%rdx\n"
434 "addq %%rax,%%r8\n"
435 "adcq %%rdx,%%r9\n"
436 /* d >>= 52 */
437 "shrdq $52,%%rcx,%%rbx\n"
438 "xorq %%rcx,%%rcx\n"
439 /* r[1] = c & M */
440 "movq %%r8,%%rax\n"
441 "andq %%r15,%%rax\n"
442 "movq %%rax,8(%%rdi)\n"
443 /* c >>= 52 */
444 "shrdq $52,%%r9,%%r8\n"
445 "xorq %%r9,%%r9\n"
446 /* c += a0 * a2 (last use of %%r10) */
447 "movq %%r10,%%rax\n"
448 "mulq %%r12\n"
449 "addq %%rax,%%r8\n"
450 "adcq %%rdx,%%r9\n"
451 /* fetch t3 (%%r10, overwrites a0),t4 (%%rsi) */
452 "movq %q2,%%rsi\n"
453 "movq %q1,%%r10\n"
454 /* c += a1 * a1 */
455 "movq %%r11,%%rax\n"
456 "mulq %%r11\n"
457 "addq %%rax,%%r8\n"
458 "adcq %%rdx,%%r9\n"
459 /* d += a3 * a4 */
460 "movq %%r13,%%rax\n"
461 "mulq %%r14\n"
462 "addq %%rax,%%rbx\n"
463 "adcq %%rdx,%%rcx\n"
464 /* c += (d & M) * R */
465 "movq %%rbx,%%rax\n"
466 "andq %%r15,%%rax\n"
467 "movq $0x1000003d10,%%rdx\n"
468 "mulq %%rdx\n"
469 "addq %%rax,%%r8\n"
470 "adcq %%rdx,%%r9\n"
471 /* d >>= 52 (%%rbx only) */
472 "shrdq $52,%%rcx,%%rbx\n"
473 /* r[2] = c & M */
474 "movq %%r8,%%rax\n"
475 "andq %%r15,%%rax\n"
476 "movq %%rax,16(%%rdi)\n"
477 /* c >>= 52 */
478 "shrdq $52,%%r9,%%r8\n"
479 "xorq %%r9,%%r9\n"
480 /* c += t3 */
481 "addq %%r10,%%r8\n"
482 /* c += d * R */
483 "movq %%rbx,%%rax\n"
484 "movq $0x1000003d10,%%rdx\n"
485 "mulq %%rdx\n"
486 "addq %%rax,%%r8\n"
487 "adcq %%rdx,%%r9\n"
488 /* r[3] = c & M */
489 "movq %%r8,%%rax\n"
490 "andq %%r15,%%rax\n"
491 "movq %%rax,24(%%rdi)\n"
492 /* c >>= 52 (%%r8 only) */
493 "shrdq $52,%%r9,%%r8\n"
494 /* c += t4 (%%r8 only) */
495 "addq %%rsi,%%r8\n"
496 /* r[4] = c */
497 "movq %%r8,32(%%rdi)\n"
498: "+S"(a), "=&m"(tmp1), "=&m"(tmp2), "=&m"(tmp3)
499: "D"(r)
500: "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
501);
502}
503
504#endif /* SECP256K1_FIELD_INNER5X52_IMPL_H */
static SECP256K1_INLINE void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t *SECP256K1_RESTRICT b)
Changelog:
static SECP256K1_INLINE void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a)
#define SECP256K1_INLINE
Definition: util.h:48
#define SECP256K1_RESTRICT
Definition: util.h:171