26constexpr int FINAL_LIMB_POSITION = 3072 / SIGNED_LIMB_SIZE;
27constexpr int FINAL_LIMB_MODULUS_BITS = 3072 % SIGNED_LIMB_SIZE;
28constexpr limb_t MAX_LIMB = (limb_t)(-1);
29constexpr limb_t MAX_SIGNED_LIMB = (((limb_t)1) << SIGNED_LIMB_SIZE) - 1;
34constexpr limb_t MAX_PRIME_DIFF = 1103717;
38constexpr limb_t MODULUS_INVERSE = limb_t(0x70a1421da087d93);
44inline void extract3(limb_t &c0, limb_t &c1, limb_t &c2, limb_t &n) {
52inline void mul(limb_t &c0, limb_t &c1,
const limb_t &a,
const limb_t &b) {
53 double_limb_t t = (double_limb_t)a * b;
59inline void mulnadd3(limb_t &c0, limb_t &c1, limb_t &c2, limb_t &d0, limb_t &d1,
60 limb_t &d2,
const limb_t &n) {
61 double_limb_t t = (double_limb_t)d0 * n + c0;
64 t += (double_limb_t)d1 * n + c1;
71inline void muln2(limb_t &c0, limb_t &c1,
const limb_t &n) {
72 double_limb_t t = (double_limb_t)c0 * n;
75 t += (double_limb_t)c1 * n;
80inline void muladd3(limb_t &c0, limb_t &c1, limb_t &c2,
const limb_t &a,
82 double_limb_t t = (double_limb_t)a * b;
83 limb_t th = t >> LIMB_SIZE;
87 th += (c0 < tl) ? 1 : 0;
89 c2 += (c1 < th) ? 1 : 0;
96inline void addnextract2(limb_t &c0, limb_t &c1,
const limb_t &a, limb_t &n) {
120 if (this->
limbs[0] <= std::numeric_limits<limb_t>::max() - MAX_PRIME_DIFF) {
123 for (
int i = 1; i <
LIMBS; ++i) {
124 if (this->
limbs[i] != std::numeric_limits<limb_t>::max()) {
132 limb_t c0 = MAX_PRIME_DIFF;
134 for (
int i = 0; i <
LIMBS; ++i) {
135 addnextract2(c0, c1, this->
limbs[i], this->
limbs[i]);
141struct Num3072Signed {
147 signed_limb_t limbs[SIGNED_LIMBS];
150 Num3072Signed() { memset(limbs, 0,
sizeof(limbs)); }
156 void FromNum3072(
const Num3072 &in) {
158 int b = 0, outpos = 0;
159 for (
int i = 0; i < LIMBS; ++i) {
160 c += double_limb_t{in.
limbs[i]} << b;
162 while (b >= SIGNED_LIMB_SIZE) {
163 limbs[outpos++] = limb_t(c) & MAX_SIGNED_LIMB;
164 c >>= SIGNED_LIMB_SIZE;
165 b -= SIGNED_LIMB_SIZE;
168 Assume(outpos == SIGNED_LIMBS - 1);
169 limbs[SIGNED_LIMBS - 1] = c;
170 c >>= SIGNED_LIMB_SIZE;
178 void ToNum3072(
Num3072 &out)
const {
180 int b = 0, outpos = 0;
181 for (
int i = 0; i < SIGNED_LIMBS; ++i) {
182 c += double_limb_t(limbs[i]) << b;
183 b += SIGNED_LIMB_SIZE;
184 if (b >= LIMB_SIZE) {
185 out.
limbs[outpos++] = c;
200 void Normalize(
bool negate) {
203 signed_limb_t cond_add = limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
204 limbs[0] += signed_limb_t(-MAX_PRIME_DIFF) & cond_add;
205 limbs[FINAL_LIMB_POSITION] +=
206 (signed_limb_t(1) << FINAL_LIMB_MODULUS_BITS) & cond_add;
209 signed_limb_t cond_negate = -signed_limb_t(negate);
210 for (
int i = 0; i < SIGNED_LIMBS; ++i) {
211 limbs[i] = (limbs[i] ^ cond_negate) - cond_negate;
215 for (
int i = 0; i < SIGNED_LIMBS - 1; ++i) {
216 limbs[i + 1] += limbs[i] >> SIGNED_LIMB_SIZE;
217 limbs[i] &= MAX_SIGNED_LIMB;
221 cond_add = limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
222 limbs[0] += signed_limb_t(-MAX_PRIME_DIFF) & cond_add;
223 limbs[FINAL_LIMB_POSITION] +=
224 (signed_limb_t(1) << FINAL_LIMB_MODULUS_BITS) & cond_add;
227 for (
int i = 0; i < SIGNED_LIMBS - 1; ++i) {
228 limbs[i + 1] += limbs[i] >> SIGNED_LIMB_SIZE;
229 limbs[i] &= MAX_SIGNED_LIMB;
236 signed_limb_t u, v, q, r;
243inline int CountTrailingZeroes(limb_t v) {
244#ifdef HAVE_DECL___BUILTIN_CTZ
246 if constexpr (std::numeric_limits<limb_t>::max() <=
247 std::numeric_limits<unsigned>::max()) {
248 return __builtin_ctz(v);
251#ifdef HAVE_DECL___BUILTIN_CTZL
254 if constexpr (std::numeric_limits<limb_t>::max() <=
255 std::numeric_limits<unsigned long>::max()) {
256 return __builtin_ctzl(v);
259#ifdef HAVE_DECL___BUILTIN_CTZLL
262 if constexpr (std::numeric_limits<limb_t>::max() <=
263 std::numeric_limits<unsigned long long>::max()) {
264 return __builtin_ctzll(v);
268 if constexpr (LIMB_SIZE <= 32) {
269 static constexpr uint8_t debruijn[32] = {
270 0, 1, 2, 24, 3, 19, 6, 25, 26, 4, 20, 10, 16, 7, 12, 26,
271 31, 23, 18, 5, 21, 9, 15, 11, 30, 17, 8, 14, 29, 13, 28, 27};
272 return debruijn[((v & -v) * 0x04D7651F) >> 27];
274 static_assert(LIMB_SIZE <= 64);
275 static constexpr uint8_t debruijn[64] = {
276 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
277 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
278 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
279 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12};
280 return debruijn[((v & -v) * 0x022FDD63CC95386D) >> 58];
293inline limb_t ComputeDivstepMatrix(signed_limb_t eta, limb_t f, limb_t g,
296 static const uint8_t NEGINV256[128] = {
297 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
298 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
299 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
300 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
301 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
302 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
303 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
304 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
305 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
306 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
307 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01};
309 limb_t u = 1, v = 0, q = 0, r = 1;
311 int i = SIGNED_LIMB_SIZE;
314 int zeros = CountTrailingZeroes(g | (MAX_LIMB << i));
344 int limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
349 limb_t m = (MAX_LIMB >> (LIMB_SIZE - limit)) & 255U;
354 limb_t w = (g * NEGINV256[(f >> 1) & 127]) & m;
360 out.u = (signed_limb_t)u;
361 out.v = (signed_limb_t)v;
362 out.q = (signed_limb_t)q;
363 out.r = (signed_limb_t)r;
372inline void UpdateDE(Num3072Signed &d, Num3072Signed &e,
373 const SignedMatrix &t) {
374 const signed_limb_t u = t.u, v = t.v, q = t.q, r = t.r;
380 signed_limb_t sd = d.limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
381 signed_limb_t se = e.limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
382 signed_limb_t md = (u & sd) + (v & se);
383 signed_limb_t me = (q & sd) + (r & se);
385 signed_limb_t di = d.limbs[0], ei = e.limbs[0];
386 signed_double_limb_t cd =
387 (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
388 signed_double_limb_t ce =
389 (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
394 md -= (MODULUS_INVERSE * limb_t(cd) + md) & MAX_SIGNED_LIMB;
395 me -= (MODULUS_INVERSE * limb_t(ce) + me) & MAX_SIGNED_LIMB;
400 cd -= (signed_double_limb_t)1103717 * md;
401 ce -= (signed_double_limb_t)1103717 * me;
406 Assume((cd & MAX_SIGNED_LIMB) == 0);
407 Assume((ce & MAX_SIGNED_LIMB) == 0);
408 cd >>= SIGNED_LIMB_SIZE;
409 ce >>= SIGNED_LIMB_SIZE;
416 for (
int i = 1; i < SIGNED_LIMBS - 1; ++i) {
419 cd += (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
420 ce += (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
421 d.limbs[i - 1] = (signed_limb_t)cd & MAX_SIGNED_LIMB;
422 cd >>= SIGNED_LIMB_SIZE;
423 e.limbs[i - 1] = (signed_limb_t)ce & MAX_SIGNED_LIMB;
424 ce >>= SIGNED_LIMB_SIZE;
430 di = d.limbs[SIGNED_LIMBS - 1];
431 ei = e.limbs[SIGNED_LIMBS - 1];
432 cd += (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
433 ce += (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
434 cd += (signed_double_limb_t)md << FINAL_LIMB_MODULUS_BITS;
435 ce += (signed_double_limb_t)me << FINAL_LIMB_MODULUS_BITS;
436 d.limbs[SIGNED_LIMBS - 2] = (signed_limb_t)cd & MAX_SIGNED_LIMB;
437 cd >>= SIGNED_LIMB_SIZE;
438 e.limbs[SIGNED_LIMBS - 2] = (signed_limb_t)ce & MAX_SIGNED_LIMB;
439 ce >>= SIGNED_LIMB_SIZE;
441 d.limbs[SIGNED_LIMBS - 1] = (signed_limb_t)cd;
442 e.limbs[SIGNED_LIMBS - 1] = (signed_limb_t)ce;
452inline void UpdateFG(Num3072Signed &f, Num3072Signed &g,
const SignedMatrix &t,
454 const signed_limb_t u = t.u, v = t.v, q = t.q, r = t.r;
456 signed_limb_t fi, gi;
457 signed_double_limb_t cf, cg;
461 cf = (signed_double_limb_t)u * fi + (signed_double_limb_t)v * gi;
462 cg = (signed_double_limb_t)q * fi + (signed_double_limb_t)r * gi;
467 Assume((cf & MAX_SIGNED_LIMB) == 0);
468 Assume((cg & MAX_SIGNED_LIMB) == 0);
469 cf >>= SIGNED_LIMB_SIZE;
470 cg >>= SIGNED_LIMB_SIZE;
475 for (
int i = 1; i < len; ++i) {
478 cf += (signed_double_limb_t)u * fi + (signed_double_limb_t)v * gi;
479 cg += (signed_double_limb_t)q * fi + (signed_double_limb_t)r * gi;
480 f.limbs[i - 1] = (signed_limb_t)cf & MAX_SIGNED_LIMB;
481 cf >>= SIGNED_LIMB_SIZE;
482 g.limbs[i - 1] = (signed_limb_t)cg & MAX_SIGNED_LIMB;
483 cg >>= SIGNED_LIMB_SIZE;
489 f.limbs[len - 1] = (signed_limb_t)cf;
490 g.limbs[len - 1] = (signed_limb_t)cg;
510 Num3072Signed d, e, f, g;
514 f.limbs[0] = -MAX_PRIME_DIFF;
515 f.limbs[FINAL_LIMB_POSITION] = ((
limb_t)1) << FINAL_LIMB_MODULUS_BITS;
516 g.FromNum3072(*
this);
526 eta = ComputeDivstepMatrix(eta, f.limbs[0], g.limbs[0], t);
528 UpdateFG(f, g, t, len);
534 if (g.limbs[0] == 0) {
536 for (
int j = 1; j < len; ++j) {
561 Assume((f.limbs[0] & MAX_SIGNED_LIMB) == 1 ||
562 (f.limbs[0] & MAX_SIGNED_LIMB) == MAX_SIGNED_LIMB);
567 d.Normalize(f.limbs[len - 1] >> (
LIMB_SIZE - 1));
574 limb_t c0 = 0, c1 = 0, c2 = 0;
578 for (
int j = 0; j <
LIMBS - 1; ++j) {
579 limb_t d0 = 0, d1 = 0, d2 = 0;
580 mul(d0, d1, this->limbs[1 + j], a.
limbs[
LIMBS + j - (1 + j)]);
581 for (
int i = 2 + j; i <
LIMBS; ++i) {
582 muladd3(d0, d1, d2, this->limbs[i], a.
limbs[
LIMBS + j - i]);
584 mulnadd3(c0, c1, c2, d0, d1, d2, MAX_PRIME_DIFF);
585 for (
int i = 0; i < j + 1; ++i) {
586 muladd3(c0, c1, c2, this->limbs[i], a.
limbs[j - i]);
588 extract3(c0, c1, c2, tmp.
limbs[j]);
593 for (
int i = 0; i <
LIMBS; ++i) {
594 muladd3(c0, c1, c2, this->limbs[i], a.
limbs[
LIMBS - 1 - i]);
599 muln2(c0, c1, MAX_PRIME_DIFF);
600 for (
int j = 0; j <
LIMBS; ++j) {
601 addnextract2(c0, c1, tmp.
limbs[j], this->limbs[j]);
605 assert(c0 == 0 || c0 == 1);
622 for (
int i = 1; i <
LIMBS; ++i) {
648 for (
int i = 0; i <
LIMBS; ++i) {
649 if (
sizeof(
limb_t) == 4) {
650 this->limbs[i] =
ReadLE32(data + 4 * i);
651 }
else if (
sizeof(
limb_t) == 8) {
652 this->limbs[i] =
ReadLE64(data + 8 * i);
658 for (
int i = 0; i <
LIMBS; ++i) {
659 if (
sizeof(
limb_t) == 4) {
661 }
else if (
sizeof(
limb_t) == 8) {
680 m_numerator = ToNum3072(in);
684 m_numerator.Divide(m_denominator);
686 m_denominator.SetToOne();
689 m_numerator.ToBytes(data);
695 m_numerator.Multiply(mul.m_numerator);
696 m_denominator.Multiply(mul.m_denominator);
701 m_numerator.Multiply(div.m_denominator);
702 m_denominator.Multiply(div.m_numerator);
707 m_numerator.Multiply(ToNum3072(in));
712 m_denominator.Multiply(ToNum3072(in));
#define Assume(val)
Assume is the identity function.
ChaCha20 cipher that only operates on multiples of 64 bytes.
static constexpr unsigned BLOCKLEN
Block size (inputs/outputs to Keystream / Crypt should be multiples of this).
A writer stream (for serialization) that computes a 256-bit hash.
A class representing MuHash sets.
Num3072 ToNum3072(Span< const uint8_t > in)
MuHash3072 & Remove(Span< const uint8_t > in) noexcept
void Finalize(uint256 &out) noexcept
MuHash3072 & operator/=(const MuHash3072 &div) noexcept
MuHash3072 & Insert(Span< const uint8_t > in) noexcept
MuHash3072 & operator*=(const MuHash3072 &mul) noexcept
Num3072 GetInverse() const
static constexpr int LIMBS
int64_t signed_double_limb_t
static constexpr size_t BYTE_SIZE
bool IsOverflow() const
Indicates whether d is larger than the modulus.
void ToBytes(uint8_t(&out)[BYTE_SIZE])
static constexpr int SIGNED_LIMBS
static constexpr int LIMB_SIZE
void Divide(const Num3072 &a)
static constexpr int SIGNED_LIMB_SIZE
void Multiply(const Num3072 &a)
static void WriteLE32(uint8_t *ptr, uint32_t x)
static uint64_t ReadLE64(const uint8_t *ptr)
static uint32_t ReadLE32(const uint8_t *ptr)
static void WriteLE64(uint8_t *ptr, uint64_t x)
Span< const std::byte > MakeByteSpan(V &&v) noexcept
Span< std::byte > MakeWritableByteSpan(V &&v) noexcept