field_10x26_impl.h
1 /**********************************************************************
2  * Copyright (c) 2013, 2014 Pieter Wuille *
5  **********************************************************************/
6
7 #ifndef SECP256K1_FIELD_REPR_IMPL_H
8 #define SECP256K1_FIELD_REPR_IMPL_H
9
10 #include "util.h"
11 #include "field.h"
12
13 #ifdef VERIFY
14 static void secp256k1_fe_verify(const secp256k1_fe *a) {
15  const uint32_t *d = a->n;
16  int m = a->normalized ? 1 : 2 * a->magnitude, r = 1;
17  r &= (d[0] <= 0x3FFFFFFUL * m);
18  r &= (d[1] <= 0x3FFFFFFUL * m);
19  r &= (d[2] <= 0x3FFFFFFUL * m);
20  r &= (d[3] <= 0x3FFFFFFUL * m);
21  r &= (d[4] <= 0x3FFFFFFUL * m);
22  r &= (d[5] <= 0x3FFFFFFUL * m);
23  r &= (d[6] <= 0x3FFFFFFUL * m);
24  r &= (d[7] <= 0x3FFFFFFUL * m);
25  r &= (d[8] <= 0x3FFFFFFUL * m);
26  r &= (d[9] <= 0x03FFFFFUL * m);
27  r &= (a->magnitude >= 0);
28  r &= (a->magnitude <= 32);
29  if (a->normalized) {
30  r &= (a->magnitude <= 1);
31  if (r && (d[9] == 0x03FFFFFUL)) {
32  uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2];
33  if (mid == 0x3FFFFFFUL) {
34  r &= ((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL);
35  }
36  }
37  }
38  VERIFY_CHECK(r == 1);
39 }
40 #endif
41
43  uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
44  t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
45
46  /* Reduce t9 at the start so there will be at most a single carry from the first pass */
47  uint32_t m;
48  uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
49
50  /* The first pass ensures the magnitude is 1, ... */
51  t0 += x * 0x3D1UL; t1 += (x << 6);
52  t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
53  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
54  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
55  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
56  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
57  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
58  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
59  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
60  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
61
62  /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
63  VERIFY_CHECK(t9 >> 23 == 0);
64
65  /* At most a single final reduction is needed; check if the value is >= the field characteristic */
66  x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
67  & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
68
69  /* Apply the final reduction (for constant-time behaviour, we do it always) */
70  t0 += x * 0x3D1UL; t1 += (x << 6);
71  t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
72  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
73  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
74  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
75  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
76  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
77  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
78  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
79  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
80
81  /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
82  VERIFY_CHECK(t9 >> 22 == x);
83
84  /* Mask off the possible multiple of 2^256 from the final reduction */
85  t9 &= 0x03FFFFFUL;
86
87  r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
88  r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
89
90 #ifdef VERIFY
91  r->magnitude = 1;
92  r->normalized = 1;
93  secp256k1_fe_verify(r);
94 #endif
95 }
96
98  uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
99  t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
100
101  /* Reduce t9 at the start so there will be at most a single carry from the first pass */
102  uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
103
104  /* The first pass ensures the magnitude is 1, ... */
105  t0 += x * 0x3D1UL; t1 += (x << 6);
106  t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
107  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
108  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
109  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
110  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
111  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
112  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
113  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
114  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
115
116  /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
117  VERIFY_CHECK(t9 >> 23 == 0);
118
119  r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
120  r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
121
122 #ifdef VERIFY
123  r->magnitude = 1;
124  secp256k1_fe_verify(r);
125 #endif
126 }
127
129  uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
130  t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
131
132  /* Reduce t9 at the start so there will be at most a single carry from the first pass */
133  uint32_t m;
134  uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
135
136  /* The first pass ensures the magnitude is 1, ... */
137  t0 += x * 0x3D1UL; t1 += (x << 6);
138  t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
139  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
140  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
141  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
142  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
143  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
144  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
145  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
146  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
147
148  /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
149  VERIFY_CHECK(t9 >> 23 == 0);
150
151  /* At most a single final reduction is needed; check if the value is >= the field characteristic */
152  x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
153  & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
154
155  if (x) {
156  t0 += 0x3D1UL; t1 += (x << 6);
157  t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
158  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
159  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
160  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
161  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
162  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
163  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
164  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
165  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
166
167  /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
168  VERIFY_CHECK(t9 >> 22 == x);
169
170  /* Mask off the possible multiple of 2^256 from the final reduction */
171  t9 &= 0x03FFFFFUL;
172  }
173
174  r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
175  r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
176
177 #ifdef VERIFY
178  r->magnitude = 1;
179  r->normalized = 1;
180  secp256k1_fe_verify(r);
181 #endif
182 }
183
185  uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
186  t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
187
188  /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
189  uint32_t z0, z1;
190
191  /* Reduce t9 at the start so there will be at most a single carry from the first pass */
192  uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
193
194  /* The first pass ensures the magnitude is 1, ... */
195  t0 += x * 0x3D1UL; t1 += (x << 6);
196  t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL;
197  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
198  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
199  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
200  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
201  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
202  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
203  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
204  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
205  z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
206
207  /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
208  VERIFY_CHECK(t9 >> 23 == 0);
209
210  return (z0 == 0) | (z1 == 0x3FFFFFFUL);
211 }
212
214  uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
215  uint32_t z0, z1;
216  uint32_t x;
217
218  t0 = r->n[0];
219  t9 = r->n[9];
220
221  /* Reduce t9 at the start so there will be at most a single carry from the first pass */
222  x = t9 >> 22;
223
224  /* The first pass ensures the magnitude is 1, ... */
225  t0 += x * 0x3D1UL;
226
227  /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
228  z0 = t0 & 0x3FFFFFFUL;
229  z1 = z0 ^ 0x3D0UL;
230
231  /* Fast return path should catch the majority of cases */
232  if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) {
233  return 0;
234  }
235
236  t1 = r->n[1];
237  t2 = r->n[2];
238  t3 = r->n[3];
239  t4 = r->n[4];
240  t5 = r->n[5];
241  t6 = r->n[6];
242  t7 = r->n[7];
243  t8 = r->n[8];
244
245  t9 &= 0x03FFFFFUL;
246  t1 += (x << 6);
247
248  t1 += (t0 >> 26);
249  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
250  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
251  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
252  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
253  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
254  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
255  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
256  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
257  z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
258
259  /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
260  VERIFY_CHECK(t9 >> 23 == 0);
261
262  return (z0 == 0) | (z1 == 0x3FFFFFFUL);
263 }
264
266  r->n[0] = a;
267  r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
268 #ifdef VERIFY
269  r->magnitude = 1;
270  r->normalized = 1;
271  secp256k1_fe_verify(r);
272 #endif
273 }
274
276  const uint32_t *t = a->n;
277 #ifdef VERIFY
278  VERIFY_CHECK(a->normalized);
279  secp256k1_fe_verify(a);
280 #endif
281  return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0;
282 }
283
285 #ifdef VERIFY
286  VERIFY_CHECK(a->normalized);
287  secp256k1_fe_verify(a);
288 #endif
289  return a->n[0] & 1;
290 }
291
293  int i;
294 #ifdef VERIFY
295  a->magnitude = 0;
296  a->normalized = 1;
297 #endif
298  for (i=0; i<10; i++) {
299  a->n[i] = 0;
300  }
301 }
302
303 static int secp256k1_fe_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) {
304  int i;
305 #ifdef VERIFY
306  VERIFY_CHECK(a->normalized);
307  VERIFY_CHECK(b->normalized);
308  secp256k1_fe_verify(a);
309  secp256k1_fe_verify(b);
310 #endif
311  for (i = 9; i >= 0; i--) {
312  if (a->n[i] > b->n[i]) {
313  return 1;
314  }
315  if (a->n[i] < b->n[i]) {
316  return -1;
317  }
318  }
319  return 0;
320 }
321
322 static int secp256k1_fe_set_b32(secp256k1_fe *r, const unsigned char *a) {
323  int ret;
324  r->n[0] = (uint32_t)a[31] | ((uint32_t)a[30] << 8) | ((uint32_t)a[29] << 16) | ((uint32_t)(a[28] & 0x3) << 24);
325  r->n[1] = (uint32_t)((a[28] >> 2) & 0x3f) | ((uint32_t)a[27] << 6) | ((uint32_t)a[26] << 14) | ((uint32_t)(a[25] & 0xf) << 22);
326  r->n[2] = (uint32_t)((a[25] >> 4) & 0xf) | ((uint32_t)a[24] << 4) | ((uint32_t)a[23] << 12) | ((uint32_t)(a[22] & 0x3f) << 20);
327  r->n[3] = (uint32_t)((a[22] >> 6) & 0x3) | ((uint32_t)a[21] << 2) | ((uint32_t)a[20] << 10) | ((uint32_t)a[19] << 18);
328  r->n[4] = (uint32_t)a[18] | ((uint32_t)a[17] << 8) | ((uint32_t)a[16] << 16) | ((uint32_t)(a[15] & 0x3) << 24);
329  r->n[5] = (uint32_t)((a[15] >> 2) & 0x3f) | ((uint32_t)a[14] << 6) | ((uint32_t)a[13] << 14) | ((uint32_t)(a[12] & 0xf) << 22);
330  r->n[6] = (uint32_t)((a[12] >> 4) & 0xf) | ((uint32_t)a[11] << 4) | ((uint32_t)a[10] << 12) | ((uint32_t)(a[9] & 0x3f) << 20);
331  r->n[7] = (uint32_t)((a[9] >> 6) & 0x3) | ((uint32_t)a[8] << 2) | ((uint32_t)a[7] << 10) | ((uint32_t)a[6] << 18);
332  r->n[8] = (uint32_t)a[5] | ((uint32_t)a[4] << 8) | ((uint32_t)a[3] << 16) | ((uint32_t)(a[2] & 0x3) << 24);
333  r->n[9] = (uint32_t)((a[2] >> 2) & 0x3f) | ((uint32_t)a[1] << 6) | ((uint32_t)a[0] << 14);
334
335  ret = !((r->n[9] == 0x3FFFFFUL) & ((r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL) & ((r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
336 #ifdef VERIFY
337  r->magnitude = 1;
338  if (ret) {
339  r->normalized = 1;
340  secp256k1_fe_verify(r);
341  } else {
342  r->normalized = 0;
343  }
344 #endif
345  return ret;
346 }
347
349 static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe *a) {
350 #ifdef VERIFY
351  VERIFY_CHECK(a->normalized);
352  secp256k1_fe_verify(a);
353 #endif
354  r[0] = (a->n[9] >> 14) & 0xff;
355  r[1] = (a->n[9] >> 6) & 0xff;
356  r[2] = ((a->n[9] & 0x3F) << 2) | ((a->n[8] >> 24) & 0x3);
357  r[3] = (a->n[8] >> 16) & 0xff;
358  r[4] = (a->n[8] >> 8) & 0xff;
359  r[5] = a->n[8] & 0xff;
360  r[6] = (a->n[7] >> 18) & 0xff;
361  r[7] = (a->n[7] >> 10) & 0xff;
362  r[8] = (a->n[7] >> 2) & 0xff;
363  r[9] = ((a->n[7] & 0x3) << 6) | ((a->n[6] >> 20) & 0x3f);
364  r[10] = (a->n[6] >> 12) & 0xff;
365  r[11] = (a->n[6] >> 4) & 0xff;
366  r[12] = ((a->n[6] & 0xf) << 4) | ((a->n[5] >> 22) & 0xf);
367  r[13] = (a->n[5] >> 14) & 0xff;
368  r[14] = (a->n[5] >> 6) & 0xff;
369  r[15] = ((a->n[5] & 0x3f) << 2) | ((a->n[4] >> 24) & 0x3);
370  r[16] = (a->n[4] >> 16) & 0xff;
371  r[17] = (a->n[4] >> 8) & 0xff;
372  r[18] = a->n[4] & 0xff;
373  r[19] = (a->n[3] >> 18) & 0xff;
374  r[20] = (a->n[3] >> 10) & 0xff;
375  r[21] = (a->n[3] >> 2) & 0xff;
376  r[22] = ((a->n[3] & 0x3) << 6) | ((a->n[2] >> 20) & 0x3f);
377  r[23] = (a->n[2] >> 12) & 0xff;
378  r[24] = (a->n[2] >> 4) & 0xff;
379  r[25] = ((a->n[2] & 0xf) << 4) | ((a->n[1] >> 22) & 0xf);
380  r[26] = (a->n[1] >> 14) & 0xff;
381  r[27] = (a->n[1] >> 6) & 0xff;
382  r[28] = ((a->n[1] & 0x3f) << 2) | ((a->n[0] >> 24) & 0x3);
383  r[29] = (a->n[0] >> 16) & 0xff;
384  r[30] = (a->n[0] >> 8) & 0xff;
385  r[31] = a->n[0] & 0xff;
386 }
387
389 #ifdef VERIFY
390  VERIFY_CHECK(a->magnitude <= m);
391  secp256k1_fe_verify(a);
392 #endif
393  r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0];
394  r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1];
395  r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2];
396  r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3];
397  r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4];
398  r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5];
399  r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6];
400  r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7];
401  r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8];
402  r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9];
403 #ifdef VERIFY
404  r->magnitude = m + 1;
405  r->normalized = 0;
406  secp256k1_fe_verify(r);
407 #endif
408 }
409
411  r->n[0] *= a;
412  r->n[1] *= a;
413  r->n[2] *= a;
414  r->n[3] *= a;
415  r->n[4] *= a;
416  r->n[5] *= a;
417  r->n[6] *= a;
418  r->n[7] *= a;
419  r->n[8] *= a;
420  r->n[9] *= a;
421 #ifdef VERIFY
422  r->magnitude *= a;
423  r->normalized = 0;
424  secp256k1_fe_verify(r);
425 #endif
426 }
427
429 #ifdef VERIFY
430  secp256k1_fe_verify(a);
431 #endif
432  r->n[0] += a->n[0];
433  r->n[1] += a->n[1];
434  r->n[2] += a->n[2];
435  r->n[3] += a->n[3];
436  r->n[4] += a->n[4];
437  r->n[5] += a->n[5];
438  r->n[6] += a->n[6];
439  r->n[7] += a->n[7];
440  r->n[8] += a->n[8];
441  r->n[9] += a->n[9];
442 #ifdef VERIFY
443  r->magnitude += a->magnitude;
444  r->normalized = 0;
445  secp256k1_fe_verify(r);
446 #endif
447 }
448
449 #if defined(USE_EXTERNAL_ASM)
450
451 /* External assembler implementation */
452 void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b);
453 void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a);
454
455 #else
456
457 #ifdef VERIFY
458 #define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
459 #else
460 #define VERIFY_BITS(x, n) do { } while(0)
461 #endif
462
463 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
464  uint64_t c, d;
465  uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
466  uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;
467  const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
468
469  VERIFY_BITS(a[0], 30);
470  VERIFY_BITS(a[1], 30);
471  VERIFY_BITS(a[2], 30);
472  VERIFY_BITS(a[3], 30);
473  VERIFY_BITS(a[4], 30);
474  VERIFY_BITS(a[5], 30);
475  VERIFY_BITS(a[6], 30);
476  VERIFY_BITS(a[7], 30);
477  VERIFY_BITS(a[8], 30);
478  VERIFY_BITS(a[9], 26);
479  VERIFY_BITS(b[0], 30);
480  VERIFY_BITS(b[1], 30);
481  VERIFY_BITS(b[2], 30);
482  VERIFY_BITS(b[3], 30);
483  VERIFY_BITS(b[4], 30);
484  VERIFY_BITS(b[5], 30);
485  VERIFY_BITS(b[6], 30);
486  VERIFY_BITS(b[7], 30);
487  VERIFY_BITS(b[8], 30);
488  VERIFY_BITS(b[9], 26);
489
496  d = (uint64_t)a[0] * b[9]
497  + (uint64_t)a[1] * b[8]
498  + (uint64_t)a[2] * b[7]
499  + (uint64_t)a[3] * b[6]
500  + (uint64_t)a[4] * b[5]
501  + (uint64_t)a[5] * b[4]
502  + (uint64_t)a[6] * b[3]
503  + (uint64_t)a[7] * b[2]
504  + (uint64_t)a[8] * b[1]
505  + (uint64_t)a[9] * b[0];
506  /* VERIFY_BITS(d, 64); */
507  /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
508  t9 = d & M; d >>= 26;
509  VERIFY_BITS(t9, 26);
510  VERIFY_BITS(d, 38);
511  /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
512
513  c = (uint64_t)a[0] * b[0];
514  VERIFY_BITS(c, 60);
515  /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
516  d += (uint64_t)a[1] * b[9]
517  + (uint64_t)a[2] * b[8]
518  + (uint64_t)a[3] * b[7]
519  + (uint64_t)a[4] * b[6]
520  + (uint64_t)a[5] * b[5]
521  + (uint64_t)a[6] * b[4]
522  + (uint64_t)a[7] * b[3]
523  + (uint64_t)a[8] * b[2]
524  + (uint64_t)a[9] * b[1];
525  VERIFY_BITS(d, 63);
526  /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
527  u0 = d & M; d >>= 26; c += u0 * R0;
528  VERIFY_BITS(u0, 26);
529  VERIFY_BITS(d, 37);
530  VERIFY_BITS(c, 61);
531  /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
532  t0 = c & M; c >>= 26; c += u0 * R1;
533  VERIFY_BITS(t0, 26);
534  VERIFY_BITS(c, 37);
535  /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
536  /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
537
538  c += (uint64_t)a[0] * b[1]
539  + (uint64_t)a[1] * b[0];
540  VERIFY_BITS(c, 62);
541  /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
542  d += (uint64_t)a[2] * b[9]
543  + (uint64_t)a[3] * b[8]
544  + (uint64_t)a[4] * b[7]
545  + (uint64_t)a[5] * b[6]
546  + (uint64_t)a[6] * b[5]
547  + (uint64_t)a[7] * b[4]
548  + (uint64_t)a[8] * b[3]
549  + (uint64_t)a[9] * b[2];
550  VERIFY_BITS(d, 63);
551  /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
552  u1 = d & M; d >>= 26; c += u1 * R0;
553  VERIFY_BITS(u1, 26);
554  VERIFY_BITS(d, 37);
555  VERIFY_BITS(c, 63);
556  /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
557  t1 = c & M; c >>= 26; c += u1 * R1;
558  VERIFY_BITS(t1, 26);
559  VERIFY_BITS(c, 38);
560  /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
561  /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
562
563  c += (uint64_t)a[0] * b[2]
564  + (uint64_t)a[1] * b[1]
565  + (uint64_t)a[2] * b[0];
566  VERIFY_BITS(c, 62);
567  /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
568  d += (uint64_t)a[3] * b[9]
569  + (uint64_t)a[4] * b[8]
570  + (uint64_t)a[5] * b[7]
571  + (uint64_t)a[6] * b[6]
572  + (uint64_t)a[7] * b[5]
573  + (uint64_t)a[8] * b[4]
574  + (uint64_t)a[9] * b[3];
575  VERIFY_BITS(d, 63);
576  /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
577  u2 = d & M; d >>= 26; c += u2 * R0;
578  VERIFY_BITS(u2, 26);
579  VERIFY_BITS(d, 37);
580  VERIFY_BITS(c, 63);
581  /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
582  t2 = c & M; c >>= 26; c += u2 * R1;
583  VERIFY_BITS(t2, 26);
584  VERIFY_BITS(c, 38);
585  /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
586  /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
587
588  c += (uint64_t)a[0] * b[3]
589  + (uint64_t)a[1] * b[2]
590  + (uint64_t)a[2] * b[1]
591  + (uint64_t)a[3] * b[0];
592  VERIFY_BITS(c, 63);
593  /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
594  d += (uint64_t)a[4] * b[9]
595  + (uint64_t)a[5] * b[8]
596  + (uint64_t)a[6] * b[7]
597  + (uint64_t)a[7] * b[6]
598  + (uint64_t)a[8] * b[5]
599  + (uint64_t)a[9] * b[4];
600  VERIFY_BITS(d, 63);
601  /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
602  u3 = d & M; d >>= 26; c += u3 * R0;
603  VERIFY_BITS(u3, 26);
604  VERIFY_BITS(d, 37);
605  /* VERIFY_BITS(c, 64); */
606  /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
607  t3 = c & M; c >>= 26; c += u3 * R1;
608  VERIFY_BITS(t3, 26);
609  VERIFY_BITS(c, 39);
610  /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
611  /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
612
613  c += (uint64_t)a[0] * b[4]
614  + (uint64_t)a[1] * b[3]
615  + (uint64_t)a[2] * b[2]
616  + (uint64_t)a[3] * b[1]
617  + (uint64_t)a[4] * b[0];
618  VERIFY_BITS(c, 63);
619  /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
620  d += (uint64_t)a[5] * b[9]
621  + (uint64_t)a[6] * b[8]
622  + (uint64_t)a[7] * b[7]
623  + (uint64_t)a[8] * b[6]
624  + (uint64_t)a[9] * b[5];
625  VERIFY_BITS(d, 62);
626  /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
627  u4 = d & M; d >>= 26; c += u4 * R0;
628  VERIFY_BITS(u4, 26);
629  VERIFY_BITS(d, 36);
630  /* VERIFY_BITS(c, 64); */
631  /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
632  t4 = c & M; c >>= 26; c += u4 * R1;
633  VERIFY_BITS(t4, 26);
634  VERIFY_BITS(c, 39);
635  /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
636  /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
637
638  c += (uint64_t)a[0] * b[5]
639  + (uint64_t)a[1] * b[4]
640  + (uint64_t)a[2] * b[3]
641  + (uint64_t)a[3] * b[2]
642  + (uint64_t)a[4] * b[1]
643  + (uint64_t)a[5] * b[0];
644  VERIFY_BITS(c, 63);
645  /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
646  d += (uint64_t)a[6] * b[9]
647  + (uint64_t)a[7] * b[8]
648  + (uint64_t)a[8] * b[7]
649  + (uint64_t)a[9] * b[6];
650  VERIFY_BITS(d, 62);
651  /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
652  u5 = d & M; d >>= 26; c += u5 * R0;
653  VERIFY_BITS(u5, 26);
654  VERIFY_BITS(d, 36);
655  /* VERIFY_BITS(c, 64); */
656  /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
657  t5 = c & M; c >>= 26; c += u5 * R1;
658  VERIFY_BITS(t5, 26);
659  VERIFY_BITS(c, 39);
660  /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
661  /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
662
663  c += (uint64_t)a[0] * b[6]
664  + (uint64_t)a[1] * b[5]
665  + (uint64_t)a[2] * b[4]
666  + (uint64_t)a[3] * b[3]
667  + (uint64_t)a[4] * b[2]
668  + (uint64_t)a[5] * b[1]
669  + (uint64_t)a[6] * b[0];
670  VERIFY_BITS(c, 63);
671  /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
672  d += (uint64_t)a[7] * b[9]
673  + (uint64_t)a[8] * b[8]
674  + (uint64_t)a[9] * b[7];
675  VERIFY_BITS(d, 61);
676  /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
677  u6 = d & M; d >>= 26; c += u6 * R0;
678  VERIFY_BITS(u6, 26);
679  VERIFY_BITS(d, 35);
680  /* VERIFY_BITS(c, 64); */
681  /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
682  t6 = c & M; c >>= 26; c += u6 * R1;
683  VERIFY_BITS(t6, 26);
684  VERIFY_BITS(c, 39);
685  /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
686  /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
687
688  c += (uint64_t)a[0] * b[7]
689  + (uint64_t)a[1] * b[6]
690  + (uint64_t)a[2] * b[5]
691  + (uint64_t)a[3] * b[4]
692  + (uint64_t)a[4] * b[3]
693  + (uint64_t)a[5] * b[2]
694  + (uint64_t)a[6] * b[1]
695  + (uint64_t)a[7] * b[0];
696  /* VERIFY_BITS(c, 64); */
697  VERIFY_CHECK(c <= 0x8000007C00000007ULL);
698  /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
699  d += (uint64_t)a[8] * b[9]
700  + (uint64_t)a[9] * b[8];
701  VERIFY_BITS(d, 58);
702  /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
703  u7 = d & M; d >>= 26; c += u7 * R0;
704  VERIFY_BITS(u7, 26);
705  VERIFY_BITS(d, 32);
706  /* VERIFY_BITS(c, 64); */
707  VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
708  /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
709  t7 = c & M; c >>= 26; c += u7 * R1;
710  VERIFY_BITS(t7, 26);
711  VERIFY_BITS(c, 38);
712  /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
713  /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
714
715  c += (uint64_t)a[0] * b[8]
716  + (uint64_t)a[1] * b[7]
717  + (uint64_t)a[2] * b[6]
718  + (uint64_t)a[3] * b[5]
719  + (uint64_t)a[4] * b[4]
720  + (uint64_t)a[5] * b[3]
721  + (uint64_t)a[6] * b[2]
722  + (uint64_t)a[7] * b[1]
723  + (uint64_t)a[8] * b[0];
724  /* VERIFY_BITS(c, 64); */
725  VERIFY_CHECK(c <= 0x9000007B80000008ULL);
726  /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
727  d += (uint64_t)a[9] * b[9];
728  VERIFY_BITS(d, 57);
729  /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
730  u8 = d & M; d >>= 26; c += u8 * R0;
731  VERIFY_BITS(u8, 26);
732  VERIFY_BITS(d, 31);
733  /* VERIFY_BITS(c, 64); */
734  VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
735  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
736
737  r[3] = t3;
738  VERIFY_BITS(r[3], 26);
739  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
740  r[4] = t4;
741  VERIFY_BITS(r[4], 26);
742  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
743  r[5] = t5;
744  VERIFY_BITS(r[5], 26);
745  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
746  r[6] = t6;
747  VERIFY_BITS(r[6], 26);
748  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
749  r[7] = t7;
750  VERIFY_BITS(r[7], 26);
751  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
752
753  r[8] = c & M; c >>= 26; c += u8 * R1;
754  VERIFY_BITS(r[8], 26);
755  VERIFY_BITS(c, 39);
756  /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
757  /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
758  c += d * R0 + t9;
759  VERIFY_BITS(c, 45);
760  /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
761  r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
762  VERIFY_BITS(r[9], 22);
763  VERIFY_BITS(c, 46);
764  /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
765  /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
766  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
767
768  d = c * (R0 >> 4) + t0;
769  VERIFY_BITS(d, 56);
770  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
771  r[0] = d & M; d >>= 26;
772  VERIFY_BITS(r[0], 26);
773  VERIFY_BITS(d, 30);
774  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
775  d += c * (R1 >> 4) + t1;
776  VERIFY_BITS(d, 53);
777  VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
778  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
779  /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
780  r[1] = d & M; d >>= 26;
781  VERIFY_BITS(r[1], 26);
782  VERIFY_BITS(d, 27);
783  VERIFY_CHECK(d <= 0x4000000ULL);
784  /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
785  d += t2;
786  VERIFY_BITS(d, 27);
787  /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
788  r[2] = d;
789  VERIFY_BITS(r[2], 27);
790  /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
791 }
792
793 SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) {
794  uint64_t c, d;
795  uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
796  uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7;
797  const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
798
799  VERIFY_BITS(a[0], 30);
800  VERIFY_BITS(a[1], 30);
801  VERIFY_BITS(a[2], 30);
802  VERIFY_BITS(a[3], 30);
803  VERIFY_BITS(a[4], 30);
804  VERIFY_BITS(a[5], 30);
805  VERIFY_BITS(a[6], 30);
806  VERIFY_BITS(a[7], 30);
807  VERIFY_BITS(a[8], 30);
808  VERIFY_BITS(a[9], 26);
809
815  d = (uint64_t)(a[0]*2) * a[9]
816  + (uint64_t)(a[1]*2) * a[8]
817  + (uint64_t)(a[2]*2) * a[7]
818  + (uint64_t)(a[3]*2) * a[6]
819  + (uint64_t)(a[4]*2) * a[5];
820  /* VERIFY_BITS(d, 64); */
821  /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
822  t9 = d & M; d >>= 26;
823  VERIFY_BITS(t9, 26);
824  VERIFY_BITS(d, 38);
825  /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
826
827  c = (uint64_t)a[0] * a[0];
828  VERIFY_BITS(c, 60);
829  /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
830  d += (uint64_t)(a[1]*2) * a[9]
831  + (uint64_t)(a[2]*2) * a[8]
832  + (uint64_t)(a[3]*2) * a[7]
833  + (uint64_t)(a[4]*2) * a[6]
834  + (uint64_t)a[5] * a[5];
835  VERIFY_BITS(d, 63);
836  /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
837  u0 = d & M; d >>= 26; c += u0 * R0;
838  VERIFY_BITS(u0, 26);
839  VERIFY_BITS(d, 37);
840  VERIFY_BITS(c, 61);
841  /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
842  t0 = c & M; c >>= 26; c += u0 * R1;
843  VERIFY_BITS(t0, 26);
844  VERIFY_BITS(c, 37);
845  /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
846  /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
847
848  c += (uint64_t)(a[0]*2) * a[1];
849  VERIFY_BITS(c, 62);
850  /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
851  d += (uint64_t)(a[2]*2) * a[9]
852  + (uint64_t)(a[3]*2) * a[8]
853  + (uint64_t)(a[4]*2) * a[7]
854  + (uint64_t)(a[5]*2) * a[6];
855  VERIFY_BITS(d, 63);
856  /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
857  u1 = d & M; d >>= 26; c += u1 * R0;
858  VERIFY_BITS(u1, 26);
859  VERIFY_BITS(d, 37);
860  VERIFY_BITS(c, 63);
861  /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
862  t1 = c & M; c >>= 26; c += u1 * R1;
863  VERIFY_BITS(t1, 26);
864  VERIFY_BITS(c, 38);
865  /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
866  /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
867
868  c += (uint64_t)(a[0]*2) * a[2]
869  + (uint64_t)a[1] * a[1];
870  VERIFY_BITS(c, 62);
871  /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
872  d += (uint64_t)(a[3]*2) * a[9]
873  + (uint64_t)(a[4]*2) * a[8]
874  + (uint64_t)(a[5]*2) * a[7]
875  + (uint64_t)a[6] * a[6];
876  VERIFY_BITS(d, 63);
877  /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
878  u2 = d & M; d >>= 26; c += u2 * R0;
879  VERIFY_BITS(u2, 26);
880  VERIFY_BITS(d, 37);
881  VERIFY_BITS(c, 63);
882  /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
883  t2 = c & M; c >>= 26; c += u2 * R1;
884  VERIFY_BITS(t2, 26);
885  VERIFY_BITS(c, 38);
886  /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
887  /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
888
889  c += (uint64_t)(a[0]*2) * a[3]
890  + (uint64_t)(a[1]*2) * a[2];
891  VERIFY_BITS(c, 63);
892  /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
893  d += (uint64_t)(a[4]*2) * a[9]
894  + (uint64_t)(a[5]*2) * a[8]
895  + (uint64_t)(a[6]*2) * a[7];
896  VERIFY_BITS(d, 63);
897  /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
898  u3 = d & M; d >>= 26; c += u3 * R0;
899  VERIFY_BITS(u3, 26);
900  VERIFY_BITS(d, 37);
901  /* VERIFY_BITS(c, 64); */
902  /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
903  t3 = c & M; c >>= 26; c += u3 * R1;
904  VERIFY_BITS(t3, 26);
905  VERIFY_BITS(c, 39);
906  /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
907  /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
908
909  c += (uint64_t)(a[0]*2) * a[4]
910  + (uint64_t)(a[1]*2) * a[3]
911  + (uint64_t)a[2] * a[2];
912  VERIFY_BITS(c, 63);
913  /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
914  d += (uint64_t)(a[5]*2) * a[9]
915  + (uint64_t)(a[6]*2) * a[8]
916  + (uint64_t)a[7] * a[7];
917  VERIFY_BITS(d, 62);
918  /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
919  u4 = d & M; d >>= 26; c += u4 * R0;
920  VERIFY_BITS(u4, 26);
921  VERIFY_BITS(d, 36);
922  /* VERIFY_BITS(c, 64); */
923  /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
924  t4 = c & M; c >>= 26; c += u4 * R1;
925  VERIFY_BITS(t4, 26);
926  VERIFY_BITS(c, 39);
927  /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
928  /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
929
930  c += (uint64_t)(a[0]*2) * a[5]
931  + (uint64_t)(a[1]*2) * a[4]
932  + (uint64_t)(a[2]*2) * a[3];
933  VERIFY_BITS(c, 63);
934  /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
935  d += (uint64_t)(a[6]*2) * a[9]
936  + (uint64_t)(a[7]*2) * a[8];
937  VERIFY_BITS(d, 62);
938  /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
939  u5 = d & M; d >>= 26; c += u5 * R0;
940  VERIFY_BITS(u5, 26);
941  VERIFY_BITS(d, 36);
942  /* VERIFY_BITS(c, 64); */
943  /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
944  t5 = c & M; c >>= 26; c += u5 * R1;
945  VERIFY_BITS(t5, 26);
946  VERIFY_BITS(c, 39);
947  /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
948  /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
949
950  c += (uint64_t)(a[0]*2) * a[6]
951  + (uint64_t)(a[1]*2) * a[5]
952  + (uint64_t)(a[2]*2) * a[4]
953  + (uint64_t)a[3] * a[3];
954  VERIFY_BITS(c, 63);
955  /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
956  d += (uint64_t)(a[7]*2) * a[9]
957  + (uint64_t)a[8] * a[8];
958  VERIFY_BITS(d, 61);
959  /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
960  u6 = d & M; d >>= 26; c += u6 * R0;
961  VERIFY_BITS(u6, 26);
962  VERIFY_BITS(d, 35);
963  /* VERIFY_BITS(c, 64); */
964  /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
965  t6 = c & M; c >>= 26; c += u6 * R1;
966  VERIFY_BITS(t6, 26);
967  VERIFY_BITS(c, 39);
968  /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
969  /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
970
971  c += (uint64_t)(a[0]*2) * a[7]
972  + (uint64_t)(a[1]*2) * a[6]
973  + (uint64_t)(a[2]*2) * a[5]
974  + (uint64_t)(a[3]*2) * a[4];
975  /* VERIFY_BITS(c, 64); */
976  VERIFY_CHECK(c <= 0x8000007C00000007ULL);
977  /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
978  d += (uint64_t)(a[8]*2) * a[9];
979  VERIFY_BITS(d, 58);
980  /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
981  u7 = d & M; d >>= 26; c += u7 * R0;
982  VERIFY_BITS(u7, 26);
983  VERIFY_BITS(d, 32);
984  /* VERIFY_BITS(c, 64); */
985  VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
986  /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
987  t7 = c & M; c >>= 26; c += u7 * R1;
988  VERIFY_BITS(t7, 26);
989  VERIFY_BITS(c, 38);
990  /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
991  /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
992
993  c += (uint64_t)(a[0]*2) * a[8]
994  + (uint64_t)(a[1]*2) * a[7]
995  + (uint64_t)(a[2]*2) * a[6]
996  + (uint64_t)(a[3]*2) * a[5]
997  + (uint64_t)a[4] * a[4];
998  /* VERIFY_BITS(c, 64); */
999  VERIFY_CHECK(c <= 0x9000007B80000008ULL);
1000  /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1001  d += (uint64_t)a[9] * a[9];
1002  VERIFY_BITS(d, 57);
1003  /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1004  u8 = d & M; d >>= 26; c += u8 * R0;
1005  VERIFY_BITS(u8, 26);
1006  VERIFY_BITS(d, 31);
1007  /* VERIFY_BITS(c, 64); */
1008  VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
1009  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1010
1011  r[3] = t3;
1012  VERIFY_BITS(r[3], 26);
1013  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1014  r[4] = t4;
1015  VERIFY_BITS(r[4], 26);
1016  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1017  r[5] = t5;
1018  VERIFY_BITS(r[5], 26);
1019  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1020  r[6] = t6;
1021  VERIFY_BITS(r[6], 26);
1022  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1023  r[7] = t7;
1024  VERIFY_BITS(r[7], 26);
1025  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1026
1027  r[8] = c & M; c >>= 26; c += u8 * R1;
1028  VERIFY_BITS(r[8], 26);
1029  VERIFY_BITS(c, 39);
1030  /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1031  /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1032  c += d * R0 + t9;
1033  VERIFY_BITS(c, 45);
1034  /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1035  r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
1036  VERIFY_BITS(r[9], 22);
1037  VERIFY_BITS(c, 46);
1038  /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1039  /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1040  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1041
1042  d = c * (R0 >> 4) + t0;
1043  VERIFY_BITS(d, 56);
1044  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1045  r[0] = d & M; d >>= 26;
1046  VERIFY_BITS(r[0], 26);
1047  VERIFY_BITS(d, 30);
1048  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1049  d += c * (R1 >> 4) + t1;
1050  VERIFY_BITS(d, 53);
1051  VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
1052  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1053  /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1054  r[1] = d & M; d >>= 26;
1055  VERIFY_BITS(r[1], 26);
1056  VERIFY_BITS(d, 27);
1057  VERIFY_CHECK(d <= 0x4000000ULL);
1058  /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1059  d += t2;
1060  VERIFY_BITS(d, 27);
1061  /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1062  r[2] = d;
1063  VERIFY_BITS(r[2], 27);
1064  /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1065 }
1066 #endif
1067
1069 #ifdef VERIFY
1070  VERIFY_CHECK(a->magnitude <= 8);
1071  VERIFY_CHECK(b->magnitude <= 8);
1072  secp256k1_fe_verify(a);
1073  secp256k1_fe_verify(b);
1074  VERIFY_CHECK(r != b);
1075  VERIFY_CHECK(a != b);
1076 #endif
1077  secp256k1_fe_mul_inner(r->n, a->n, b->n);
1078 #ifdef VERIFY
1079  r->magnitude = 1;
1080  r->normalized = 0;
1081  secp256k1_fe_verify(r);
1082 #endif
1083 }
1084
1085 static void secp256k1_fe_sqr(secp256k1_fe *r, const secp256k1_fe *a) {
1086 #ifdef VERIFY
1087  VERIFY_CHECK(a->magnitude <= 8);
1088  secp256k1_fe_verify(a);
1089 #endif
1090  secp256k1_fe_sqr_inner(r->n, a->n);
1091 #ifdef VERIFY
1092  r->magnitude = 1;
1093  r->normalized = 0;
1094  secp256k1_fe_verify(r);
1095 #endif
1096 }
1097
1098 static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag) {
1100  VG_CHECK_VERIFY(r->n, sizeof(r->n));
1101  mask0 = flag + ~((uint32_t)0);
1113 #ifdef VERIFY
1114  if (flag) {
1115  r->magnitude = a->magnitude;
1116  r->normalized = a->normalized;
1117  }
1118 #endif
1119 }
1120
1123  VG_CHECK_VERIFY(r->n, sizeof(r->n));
1124  mask0 = flag + ~((uint32_t)0);
1134 }
1135
1137 #ifdef VERIFY
1138  VERIFY_CHECK(a->normalized);
1139 #endif
1140  r->n[0] = a->n[0] | a->n[1] << 26;
1141  r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
1142  r->n[2] = a->n[2] >> 12 | a->n[3] << 14;
1143  r->n[3] = a->n[3] >> 18 | a->n[4] << 8;
1144  r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28;
1145  r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
1146  r->n[6] = a->n[7] >> 10 | a->n[8] << 16;
1147  r->n[7] = a->n[8] >> 16 | a->n[9] << 10;
1148 }
1149
1151  r->n[0] = a->n[0] & 0x3FFFFFFUL;
1152  r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
1153  r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL);
1154  r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL);
1155  r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
1156  r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL;
1157  r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
1158  r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL);
1159  r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL);
1160  r->n[9] = a->n[7] >> 10;
1161 #ifdef VERIFY
1162  r->magnitude = 1;
1163  r->normalized = 1;
1164 #endif
1165 }
1166
1167 #endif /* SECP256K1_FIELD_REPR_IMPL_H */
