Bitcoin ABC 0.32.4
P2P Digital Currency
modinv32_impl.h
Go to the documentation of this file.
1/***********************************************************************
2 * Copyright (c) 2020 Peter Dettman *
3 * Distributed under the MIT software license, see the accompanying *
4 * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
5 **********************************************************************/
6
7#ifndef SECP256K1_MODINV32_IMPL_H
8#define SECP256K1_MODINV32_IMPL_H
9
10#include "modinv32.h"
11
12#include "util.h"
13
14#include <stdlib.h>
15
16/* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
17 * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
18 *
19 * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
20 * implementation for N=30, using 30-bit signed limbs represented as int32_t.
21 */
22
23#ifdef VERIFY
24static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}};
25
26/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */
27static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) {
28 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
29 int64_t c = 0;
30 int i;
31 for (i = 0; i < 8; ++i) {
32 if (i < alen) c += (int64_t)a->v[i] * factor;
33 r->v[i] = (int32_t)c & M30; c >>= 30;
34 }
35 if (8 < alen) c += (int64_t)a->v[8] * factor;
36 VERIFY_CHECK(c == (int32_t)c);
37 r->v[8] = (int32_t)c;
38}
39
40/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
41static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
42 int i;
44 secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
45 secp256k1_modinv32_mul_30(&bm, b, 9, factor);
46 for (i = 0; i < 8; ++i) {
47 /* Verify that all but the top limb of a and b are normalized. */
48 VERIFY_CHECK(am.v[i] >> 30 == 0);
49 VERIFY_CHECK(bm.v[i] >> 30 == 0);
50 }
51 for (i = 8; i >= 0; --i) {
52 if (am.v[i] < bm.v[i]) return -1;
53 if (am.v[i] > bm.v[i]) return 1;
54 }
55 return 0;
56}
57#endif
58
59/* Take as input a signed30 number in range (-2*modulus,modulus), and add a multiple of the modulus
60 * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
61 * process. The input must have limbs in range (-2^30,2^30). The output will have limbs in range
62 * [0,2^30). */
64 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
65 int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4],
66 r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8];
67 volatile int32_t cond_add, cond_negate;
68
69#ifdef VERIFY
70 /* Verify that all limbs are in range (-2^30,2^30). */
71 int i;
72 for (i = 0; i < 9; ++i) {
73 VERIFY_CHECK(r->v[i] >= -M30);
74 VERIFY_CHECK(r->v[i] <= M30);
75 }
76 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
77 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
78#endif
79
80 /* In a first step, add the modulus if the input is negative, and then negate if requested.
81 * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
82 * limbs are in range (-2^30,2^30), this cannot overflow an int32_t. Note that the right
83 * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
84 * indeed the behavior of the right shift operator). */
85 cond_add = r8 >> 31;
86 r0 += modinfo->modulus.v[0] & cond_add;
87 r1 += modinfo->modulus.v[1] & cond_add;
88 r2 += modinfo->modulus.v[2] & cond_add;
89 r3 += modinfo->modulus.v[3] & cond_add;
90 r4 += modinfo->modulus.v[4] & cond_add;
91 r5 += modinfo->modulus.v[5] & cond_add;
92 r6 += modinfo->modulus.v[6] & cond_add;
93 r7 += modinfo->modulus.v[7] & cond_add;
94 r8 += modinfo->modulus.v[8] & cond_add;
95 cond_negate = sign >> 31;
96 r0 = (r0 ^ cond_negate) - cond_negate;
97 r1 = (r1 ^ cond_negate) - cond_negate;
98 r2 = (r2 ^ cond_negate) - cond_negate;
99 r3 = (r3 ^ cond_negate) - cond_negate;
100 r4 = (r4 ^ cond_negate) - cond_negate;
101 r5 = (r5 ^ cond_negate) - cond_negate;
102 r6 = (r6 ^ cond_negate) - cond_negate;
103 r7 = (r7 ^ cond_negate) - cond_negate;
104 r8 = (r8 ^ cond_negate) - cond_negate;
105 /* Propagate the top bits, to bring limbs back to range (-2^30,2^30). */
106 r1 += r0 >> 30; r0 &= M30;
107 r2 += r1 >> 30; r1 &= M30;
108 r3 += r2 >> 30; r2 &= M30;
109 r4 += r3 >> 30; r3 &= M30;
110 r5 += r4 >> 30; r4 &= M30;
111 r6 += r5 >> 30; r5 &= M30;
112 r7 += r6 >> 30; r6 &= M30;
113 r8 += r7 >> 30; r7 &= M30;
114
115 /* In a second step add the modulus again if the result is still negative, bringing r to range
116 * [0,modulus). */
117 cond_add = r8 >> 31;
118 r0 += modinfo->modulus.v[0] & cond_add;
119 r1 += modinfo->modulus.v[1] & cond_add;
120 r2 += modinfo->modulus.v[2] & cond_add;
121 r3 += modinfo->modulus.v[3] & cond_add;
122 r4 += modinfo->modulus.v[4] & cond_add;
123 r5 += modinfo->modulus.v[5] & cond_add;
124 r6 += modinfo->modulus.v[6] & cond_add;
125 r7 += modinfo->modulus.v[7] & cond_add;
126 r8 += modinfo->modulus.v[8] & cond_add;
127 /* And propagate again. */
128 r1 += r0 >> 30; r0 &= M30;
129 r2 += r1 >> 30; r1 &= M30;
130 r3 += r2 >> 30; r2 &= M30;
131 r4 += r3 >> 30; r3 &= M30;
132 r5 += r4 >> 30; r4 &= M30;
133 r6 += r5 >> 30; r5 &= M30;
134 r7 += r6 >> 30; r6 &= M30;
135 r8 += r7 >> 30; r7 &= M30;
136
137 r->v[0] = r0;
138 r->v[1] = r1;
139 r->v[2] = r2;
140 r->v[3] = r3;
141 r->v[4] = r4;
142 r->v[5] = r5;
143 r->v[6] = r6;
144 r->v[7] = r7;
145 r->v[8] = r8;
146
147#ifdef VERIFY
148 VERIFY_CHECK(r0 >> 30 == 0);
149 VERIFY_CHECK(r1 >> 30 == 0);
150 VERIFY_CHECK(r2 >> 30 == 0);
151 VERIFY_CHECK(r3 >> 30 == 0);
152 VERIFY_CHECK(r4 >> 30 == 0);
153 VERIFY_CHECK(r5 >> 30 == 0);
154 VERIFY_CHECK(r6 >> 30 == 0);
155 VERIFY_CHECK(r7 >> 30 == 0);
156 VERIFY_CHECK(r8 >> 30 == 0);
157 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
158 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
159#endif
160}
161
162/* Data type for transition matrices (see section 3 of explanation).
163 *
164 * t = [ u v ]
165 * [ q r ]
166 */
167typedef struct {
168 int32_t u, v, q, r;
170
171/* Compute the transition matrix and zeta for 30 divsteps.
172 *
173 * Input: zeta: initial zeta
174 * f0: bottom limb of initial f
175 * g0: bottom limb of initial g
176 * Output: t: transition matrix
177 * Return: final zeta
178 *
179 * Implements the divsteps_n_matrix function from the explanation.
180 */
181static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
182 /* u,v,q,r are the elements of the transformation matrix being built up,
183 * starting with the identity matrix. Semantically they are signed integers
184 * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
185 * permits left shifting (which is UB for negative numbers). The range
186 * being inside [-2^31,2^31) means that casting to signed works correctly.
187 */
188 uint32_t u = 1, v = 0, q = 0, r = 1;
189 volatile uint32_t c1, c2;
190 uint32_t mask1, mask2, f = f0, g = g0, x, y, z;
191 int i;
192
193 for (i = 0; i < 30; ++i) {
194 VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
195 VERIFY_CHECK((u * f0 + v * g0) == f << i);
196 VERIFY_CHECK((q * f0 + r * g0) == g << i);
197 /* Compute conditional masks for (zeta < 0) and for (g & 1). */
198 c1 = zeta >> 31;
199 mask1 = c1;
200 c2 = g & 1;
201 mask2 = -c2;
202 /* Compute x,y,z, conditionally negated versions of f,u,v. */
203 x = (f ^ mask1) - mask1;
204 y = (u ^ mask1) - mask1;
205 z = (v ^ mask1) - mask1;
206 /* Conditionally add x,y,z to g,q,r. */
207 g += x & mask2;
208 q += y & mask2;
209 r += z & mask2;
210 /* In what follows, mask1 is a condition mask for (zeta < 0) and (g & 1). */
211 mask1 &= mask2;
212 /* Conditionally change zeta into -zeta-2 or zeta-1. */
213 zeta = (zeta ^ mask1) - 1;
214 /* Conditionally add g,q,r to f,u,v. */
215 f += g & mask1;
216 u += q & mask1;
217 v += r & mask1;
218 /* Shifts */
219 g >>= 1;
220 u <<= 1;
221 v <<= 1;
222 /* Bounds on zeta that follow from the bounds on iteration count (max 20*30 divsteps). */
223 VERIFY_CHECK(zeta >= -601 && zeta <= 601);
224 }
225 /* Return data in t and return value. */
226 t->u = (int32_t)u;
227 t->v = (int32_t)v;
228 t->q = (int32_t)q;
229 t->r = (int32_t)r;
230 /* The determinant of t must be a power of two. This guarantees that multiplication with t
231 * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
232 * will be divided out again). As each divstep's individual matrix has determinant 2, the
233 * aggregate of 30 of them will have determinant 2^30. */
234 VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
235 return zeta;
236}
237
238/* Compute the transition matrix and eta for 30 divsteps (variable time).
239 *
240 * Input: eta: initial eta
241 * f0: bottom limb of initial f
242 * g0: bottom limb of initial g
243 * Output: t: transition matrix
244 * Return: final eta
245 *
246 * Implements the divsteps_n_matrix_var function from the explanation.
247 */
248static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
249 /* inv256[i] = -(2*i+1)^-1 (mod 256) */
250 static const uint8_t inv256[128] = {
251 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
252 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
253 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
254 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
255 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
256 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
257 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
258 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
259 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
260 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
261 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
262 };
263
264 /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
265 uint32_t u = 1, v = 0, q = 0, r = 1;
266 uint32_t f = f0, g = g0, m;
267 uint16_t w;
268 int i = 30, limit, zeros;
269
270 for (;;) {
271 /* Use a sentinel bit to count zeros only up to i. */
272 zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
273 /* Perform zeros divsteps at once; they all just divide g by two. */
274 g >>= zeros;
275 u <<= zeros;
276 v <<= zeros;
277 eta -= zeros;
278 i -= zeros;
279 /* We're done once we've done 30 divsteps. */
280 if (i == 0) break;
281 VERIFY_CHECK((f & 1) == 1);
282 VERIFY_CHECK((g & 1) == 1);
283 VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
284 VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
285 /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
286 VERIFY_CHECK(eta >= -751 && eta <= 751);
287 /* If eta is negative, negate it and replace f,g with g,-f. */
288 if (eta < 0) {
289 uint32_t tmp;
290 eta = -eta;
291 tmp = f; f = g; g = -tmp;
292 tmp = u; u = q; q = -tmp;
293 tmp = v; v = r; r = -tmp;
294 }
295 /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
296 * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
297 * can be done as its sign will flip once that happens. */
298 limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
299 /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
300 VERIFY_CHECK(limit > 0 && limit <= 30);
301 m = (UINT32_MAX >> (32 - limit)) & 255U;
302 /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
303 w = (g * inv256[(f >> 1) & 127]) & m;
304 /* Do so. */
305 g += f * w;
306 q += u * w;
307 r += v * w;
308 VERIFY_CHECK((g & m) == 0);
309 }
310 /* Return data in t and return value. */
311 t->u = (int32_t)u;
312 t->v = (int32_t)v;
313 t->q = (int32_t)q;
314 t->r = (int32_t)r;
315 /* The determinant of t must be a power of two. This guarantees that multiplication with t
316 * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
317 * will be divided out again). As each divstep's individual matrix has determinant 2, the
318 * aggregate of 30 of them will have determinant 2^30. */
319 VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
320 return eta;
321}
322
323/* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
324 *
325 * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
326 * (-2^30,2^30).
327 *
328 * This implements the update_de function from the explanation.
329 */
331 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
332 const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
333 int32_t di, ei, md, me, sd, se;
334 int64_t cd, ce;
335 int i;
336#ifdef VERIFY
337 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
338 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
339 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
340 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
341 VERIFY_CHECK((labs(u) + labs(v)) >= 0); /* |u|+|v| doesn't overflow */
342 VERIFY_CHECK((labs(q) + labs(r)) >= 0); /* |q|+|r| doesn't overflow */
343 VERIFY_CHECK((labs(u) + labs(v)) <= M30 + 1); /* |u|+|v| <= 2^30 */
344 VERIFY_CHECK((labs(q) + labs(r)) <= M30 + 1); /* |q|+|r| <= 2^30 */
345#endif
346 /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
347 sd = d->v[8] >> 31;
348 se = e->v[8] >> 31;
349 md = (u & sd) + (v & se);
350 me = (q & sd) + (r & se);
351 /* Begin computing t*[d,e]. */
352 di = d->v[0];
353 ei = e->v[0];
354 cd = (int64_t)u * di + (int64_t)v * ei;
355 ce = (int64_t)q * di + (int64_t)r * ei;
356 /* Correct md,me so that t*[d,e]+modulus*[md,me] has 30 zero bottom bits. */
357 md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
358 me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
359 /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
360 cd += (int64_t)modinfo->modulus.v[0] * md;
361 ce += (int64_t)modinfo->modulus.v[0] * me;
362 /* Verify that the low 30 bits of the computation are indeed zero, and then throw them away. */
363 VERIFY_CHECK(((int32_t)cd & M30) == 0); cd >>= 30;
364 VERIFY_CHECK(((int32_t)ce & M30) == 0); ce >>= 30;
365 /* Now iteratively compute limb i=1..8 of t*[d,e]+modulus*[md,me], and store them in output
366 * limb i-1 (shifting down by 30 bits). */
367 for (i = 1; i < 9; ++i) {
368 di = d->v[i];
369 ei = e->v[i];
370 cd += (int64_t)u * di + (int64_t)v * ei;
371 ce += (int64_t)q * di + (int64_t)r * ei;
372 cd += (int64_t)modinfo->modulus.v[i] * md;
373 ce += (int64_t)modinfo->modulus.v[i] * me;
374 d->v[i - 1] = (int32_t)cd & M30; cd >>= 30;
375 e->v[i - 1] = (int32_t)ce & M30; ce >>= 30;
376 }
377 /* What remains is limb 9 of t*[d,e]+modulus*[md,me]; store it as output limb 8. */
378 d->v[8] = (int32_t)cd;
379 e->v[8] = (int32_t)ce;
380#ifdef VERIFY
381 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
382 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
383 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
384 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
385#endif
386}
387
388/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
389 *
390 * This implements the update_fg function from the explanation.
391 */
393 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
394 const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
395 int32_t fi, gi;
396 int64_t cf, cg;
397 int i;
398 /* Start computing t*[f,g]. */
399 fi = f->v[0];
400 gi = g->v[0];
401 cf = (int64_t)u * fi + (int64_t)v * gi;
402 cg = (int64_t)q * fi + (int64_t)r * gi;
403 /* Verify that the bottom 30 bits of the result are zero, and then throw them away. */
404 VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
405 VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
406 /* Now iteratively compute limb i=1..8 of t*[f,g], and store them in output limb i-1 (shifting
407 * down by 30 bits). */
408 for (i = 1; i < 9; ++i) {
409 fi = f->v[i];
410 gi = g->v[i];
411 cf += (int64_t)u * fi + (int64_t)v * gi;
412 cg += (int64_t)q * fi + (int64_t)r * gi;
413 f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
414 g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
415 }
416 /* What remains is limb 9 of t*[f,g]; store it as output limb 8. */
417 f->v[8] = (int32_t)cf;
418 g->v[8] = (int32_t)cg;
419}
420
421/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
422 *
423 * Version that operates on a variable number of limbs in f and g.
424 *
425 * This implements the update_fg function from the explanation in modinv64_impl.h.
426 */
428 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
429 const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
430 int32_t fi, gi;
431 int64_t cf, cg;
432 int i;
433 VERIFY_CHECK(len > 0);
434 /* Start computing t*[f,g]. */
435 fi = f->v[0];
436 gi = g->v[0];
437 cf = (int64_t)u * fi + (int64_t)v * gi;
438 cg = (int64_t)q * fi + (int64_t)r * gi;
439 /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
440 VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
441 VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
442 /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
443 * down by 30 bits). */
444 for (i = 1; i < len; ++i) {
445 fi = f->v[i];
446 gi = g->v[i];
447 cf += (int64_t)u * fi + (int64_t)v * gi;
448 cg += (int64_t)q * fi + (int64_t)r * gi;
449 f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
450 g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
451 }
452 /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
453 f->v[len - 1] = (int32_t)cf;
454 g->v[len - 1] = (int32_t)cg;
455}
456
457/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
459 /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */
464 int i;
465 int32_t zeta = -1; /* zeta = -(delta+1/2); delta is initially 1/2. */
466
467 /* Do 20 iterations of 30 divsteps each = 600 divsteps. 590 suffices for 256-bit inputs. */
468 for (i = 0; i < 20; ++i) {
469 /* Compute transition matrix and new zeta after 30 divsteps. */
471 zeta = secp256k1_modinv32_divsteps_30(zeta, f.v[0], g.v[0], &t);
472 /* Update d,e using that transition matrix. */
473 secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
474 /* Update f,g using that transition matrix. */
475#ifdef VERIFY
476 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
477 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
478 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
479 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
480#endif
482#ifdef VERIFY
483 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
484 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
485 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
486 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
487#endif
488 }
489
490 /* At this point sufficient iterations have been performed that g must have reached 0
491 * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
492 * values i.e. +/- 1, and d now contains +/- the modular inverse. */
493#ifdef VERIFY
494 /* g == 0 */
495 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0);
496 /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
497 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
498 secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
499 (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
500 secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
501 (secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0 ||
502 secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) == 0)));
503#endif
504
505 /* Optionally negate d, normalize to [0,modulus), and return it. */
506 secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo);
507 *x = d;
508}
509
510/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
512 /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
513 secp256k1_modinv32_signed30 d = {{0, 0, 0, 0, 0, 0, 0, 0, 0}};
514 secp256k1_modinv32_signed30 e = {{1, 0, 0, 0, 0, 0, 0, 0, 0}};
517#ifdef VERIFY
518 int i = 0;
519#endif
520 int j, len = 9;
521 int32_t eta = -1; /* eta = -delta; delta is initially 1 (faster for the variable-time code) */
522 int32_t cond, fn, gn;
523
524 /* Do iterations of 30 divsteps each until g=0. */
525 while (1) {
526 /* Compute transition matrix and new eta after 30 divsteps. */
528 eta = secp256k1_modinv32_divsteps_30_var(eta, f.v[0], g.v[0], &t);
529 /* Update d,e using that transition matrix. */
530 secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
531 /* Update f,g using that transition matrix. */
532#ifdef VERIFY
533 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
534 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
535 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
536 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
537#endif
539 /* If the bottom limb of g is 0, there is a chance g=0. */
540 if (g.v[0] == 0) {
541 cond = 0;
542 /* Check if all other limbs are also 0. */
543 for (j = 1; j < len; ++j) {
544 cond |= g.v[j];
545 }
546 /* If so, we're done. */
547 if (cond == 0) break;
548 }
549
550 /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
551 fn = f.v[len - 1];
552 gn = g.v[len - 1];
553 cond = ((int32_t)len - 2) >> 31;
554 cond |= fn ^ (fn >> 31);
555 cond |= gn ^ (gn >> 31);
556 /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
557 if (cond == 0) {
558 f.v[len - 2] |= (uint32_t)fn << 30;
559 g.v[len - 2] |= (uint32_t)gn << 30;
560 --len;
561 }
562#ifdef VERIFY
563 VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */
564 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
565 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
566 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
567 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
568#endif
569 }
570
571 /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
572 * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
573#ifdef VERIFY
574 /* g == 0 */
575 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0);
576 /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
577 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
578 secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
579 (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
580 secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
581 (secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0 ||
582 secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) == 0)));
583#endif
584
585 /* Optionally negate d, normalize to [0,modulus), and return it. */
586 secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo);
587 *x = d;
588}
589
590#endif /* SECP256K1_MODINV32_IMPL_H */
static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int32_t sign, const secp256k1_modinv32_modinfo *modinfo)
Definition: modinv32_impl.h:63
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp256k1_modinv32_signed30 *e, const secp256k1_modinv32_trans2x2 *t, const secp256k1_modinv32_modinfo *modinfo)
static SECP256K1_INLINE int secp256k1_ctz32_var(uint32_t x)
Definition: util.h:309
#define VERIFY_CHECK(cond)
Definition: util.h:68
secp256k1_modinv32_signed30 modulus
Definition: modinv32.h:25