15 #define bignum256modm_bits_per_limb 56 16 #define bignum256modm_limb_size 5 49 pb += modm_m[0]; b = lt_modm(r[0], pb); t[0] = (r[0] - pb + (b << 56)); pb = b;
50 pb += modm_m[1]; b = lt_modm(r[1], pb); t[1] = (r[1] - pb + (b << 56)); pb = b;
51 pb += modm_m[2]; b = lt_modm(r[2], pb); t[2] = (r[2] - pb + (b << 56)); pb = b;
52 pb += modm_m[3]; b = lt_modm(r[3], pb); t[3] = (r[3] - pb + (b << 56)); pb = b;
53 pb += modm_m[4]; b = lt_modm(r[4], pb); t[4] = (r[4] - pb + (b << 32));
58 r[0] ^= mask & (r[0] ^ t[0]);
59 r[1] ^= mask & (r[1] ^ t[1]);
60 r[2] ^= mask & (r[2] ^ t[2]);
61 r[3] ^= mask & (r[3] ^ t[3]);
62 r[4] ^= mask & (r[4] ^ t[4]);
74 mul64x64_128(c, modm_mu[0], q1[3]) mul64x64_128(mul, modm_mu[3], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[2]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[1]) add128(c, mul) shr128(f, c, 56);
75 mul64x64_128(c, modm_mu[0], q1[4]) add128_64(c, f) mul64x64_128(mul, modm_mu[4], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[1]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[2]) add128(c, mul)
76 f = lo128(c); q3[0] = (f >> 40) & 0xffff; shr128(f, c, 56);
77 mul64x64_128(c, modm_mu[4], q1[1]) add128_64(c, f) mul64x64_128(mul, modm_mu[1], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[2]) add128(c, mul)
78 f = lo128(c); q3[0] |= (f << 16) & 0xffffffffffffff; q3[1] = (f >> 40) & 0xffff; shr128(f, c, 56);
79 mul64x64_128(c, modm_mu[4], q1[2]) add128_64(c, f) mul64x64_128(mul, modm_mu[2], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[3]) add128(c, mul)
80 f = lo128(c); q3[1] |= (f << 16) & 0xffffffffffffff; q3[2] = (f >> 40) & 0xffff; shr128(f, c, 56);
81 mul64x64_128(c, modm_mu[4], q1[3]) add128_64(c, f) mul64x64_128(mul, modm_mu[3], q1[4]) add128(c, mul)
82 f = lo128(c); q3[2] |= (f << 16) & 0xffffffffffffff; q3[3] = (f >> 40) & 0xffff; shr128(f, c, 56);
83 mul64x64_128(c, modm_mu[4], q1[4]) add128_64(c, f)
84 f = lo128(c); q3[3] |= (f << 16) & 0xffffffffffffff; q3[4] = (f >> 40) & 0xffff; shr128(f, c, 56);
87 mul64x64_128(c, modm_m[0], q3[0])
88 r2[0] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
89 mul64x64_128(c, modm_m[0], q3[1]) add128_64(c, f) mul64x64_128(mul, modm_m[1], q3[0]) add128(c, mul)
90 r2[1] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
91 mul64x64_128(c, modm_m[0], q3[2]) add128_64(c, f) mul64x64_128(mul, modm_m[2], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[1]) add128(c, mul)
92 r2[2] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
93 mul64x64_128(c, modm_m[0], q3[3]) add128_64(c, f) mul64x64_128(mul, modm_m[3], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[2]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[1]) add128(c, mul)
94 r2[3] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);
95 mul64x64_128(c, modm_m[0], q3[4]) add128_64(c, f) mul64x64_128(mul, modm_m[4], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[3], q3[1]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[3]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[2]) add128(c, mul)
96 r2[4] = lo128(c) & 0x0000ffffffffff;
99 pb += r2[0]; b = lt_modm(r1[0], pb); r[0] = (r1[0] - pb + (b << 56)); pb = b;
100 pb += r2[1]; b = lt_modm(r1[1], pb); r[1] = (r1[1] - pb + (b << 56)); pb = b;
101 pb += r2[2]; b = lt_modm(r1[2], pb); r[2] = (r1[2] - pb + (b << 56)); pb = b;
102 pb += r2[3]; b = lt_modm(r1[3], pb); r[3] = (r1[3] - pb + (b << 56)); pb = b;
103 pb += r2[4]; b = lt_modm(r1[4], pb); r[4] = (r1[4] - pb + (b << 40));
114 c = x[0] + y[0]; r[0] = c & 0xffffffffffffff; c >>= 56;
115 c += x[1] + y[1]; r[1] = c & 0xffffffffffffff; c >>= 56;
116 c += x[2] + y[2]; r[2] = c & 0xffffffffffffff; c >>= 56;
117 c += x[3] + y[3]; r[3] = c & 0xffffffffffffff; c >>= 56;
118 c += x[4] + y[4]; r[4] = c;
129 mul64x64_128(c, x[0], y[0])
130 f = lo128(c); r1[0] = f & 0xffffffffffffff; shr128(f, c, 56);
131 mul64x64_128(c, x[0], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[0]) add128(c, mul)
132 f = lo128(c); r1[1] = f & 0xffffffffffffff; shr128(f, c, 56);
133 mul64x64_128(c, x[0], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[1]) add128(c, mul)
134 f = lo128(c); r1[2] = f & 0xffffffffffffff; shr128(f, c, 56);
135 mul64x64_128(c, x[0], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[2]) add128(c, mul) mul64x64_128(mul, x[2], y[1]) add128(c, mul)
136 f = lo128(c); r1[3] = f & 0xffffffffffffff; shr128(f, c, 56);
137 mul64x64_128(c, x[0], y[4]) add128_64(c, f) mul64x64_128(mul, x[4], y[0]) add128(c, mul) mul64x64_128(mul, x[3], y[1]) add128(c, mul) mul64x64_128(mul, x[1], y[3]) add128(c, mul) mul64x64_128(mul, x[2], y[2]) add128(c, mul)
138 f = lo128(c); r1[4] = f & 0x0000ffffffffff; q1[0] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
139 mul64x64_128(c, x[4], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[4]) add128(c, mul) mul64x64_128(mul, x[2], y[3]) add128(c, mul) mul64x64_128(mul, x[3], y[2]) add128(c, mul)
140 f = lo128(c); q1[0] |= (f << 32) & 0xffffffffffffff; q1[1] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
141 mul64x64_128(c, x[4], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[4]) add128(c, mul) mul64x64_128(mul, x[3], y[3]) add128(c, mul)
142 f = lo128(c); q1[1] |= (f << 32) & 0xffffffffffffff; q1[2] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
143 mul64x64_128(c, x[4], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[4]) add128(c, mul)
144 f = lo128(c); q1[2] |= (f << 32) & 0xffffffffffffff; q1[3] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
145 mul64x64_128(c, x[4], y[4]) add128_64(c, f)
146 f = lo128(c); q1[3] |= (f << 32) & 0xffffffffffffff; q1[4] = (f >> 24) & 0xffffffff; shr128(f, c, 56);
149 barrett_reduce256_modm(r, q1, r1);
153 expand256_modm(
bignum256modm out,
const unsigned char *in,
size_t len) {
154 unsigned char work[64] = {0};
159 x[0] = U8TO64_LE(work + 0);
160 x[1] = U8TO64_LE(work + 8);
161 x[2] = U8TO64_LE(work + 16);
162 x[3] = U8TO64_LE(work + 24);
163 x[4] = U8TO64_LE(work + 32);
164 x[5] = U8TO64_LE(work + 40);
165 x[6] = U8TO64_LE(work + 48);
166 x[7] = U8TO64_LE(work + 56);
169 out[0] = ( x[0]) & 0xffffffffffffff;
170 out[1] = ((x[ 0] >> 56) | (x[ 1] << 8)) & 0xffffffffffffff;
171 out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff;
172 out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff;
173 out[4] = ((x[ 3] >> 32) | (x[ 4] << 32)) & 0x0000ffffffffff;
180 q1[0] = ((x[ 3] >> 56) | (x[ 4] << 8)) & 0xffffffffffffff;
181 q1[1] = ((x[ 4] >> 48) | (x[ 5] << 16)) & 0xffffffffffffff;
182 q1[2] = ((x[ 5] >> 40) | (x[ 6] << 24)) & 0xffffffffffffff;
183 q1[3] = ((x[ 6] >> 32) | (x[ 7] << 32)) & 0xffffffffffffff;
184 q1[4] = ((x[ 7] >> 24) );
186 barrett_reduce256_modm(out, q1, out);
190 expand_raw256_modm(
bignum256modm out,
const unsigned char in[32]) {
193 x[0] = U8TO64_LE(in + 0);
194 x[1] = U8TO64_LE(in + 8);
195 x[2] = U8TO64_LE(in + 16);
196 x[3] = U8TO64_LE(in + 24);
198 out[0] = ( x[0]) & 0xffffffffffffff;
199 out[1] = ((x[ 0] >> 56) | (x[ 1] << 8)) & 0xffffffffffffff;
200 out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff;
201 out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff;
202 out[4] = ((x[ 3] >> 32) ) & 0x000000ffffffff;
206 contract256_modm(
unsigned char out[32],
const bignum256modm in) {
207 U64TO8_LE(out + 0, (in[0] ) | (in[1] << 56));
208 U64TO8_LE(out + 8, (in[1] >> 8) | (in[2] << 48));
209 U64TO8_LE(out + 16, (in[2] >> 16) | (in[3] << 40));
210 U64TO8_LE(out + 24, (in[3] >> 24) | (in[4] << 32));
214 contract256_window4_modm(
signed char r[64],
const bignum256modm in) {
216 signed char *quads = r;
219 for (i = 0; i < 5; i++) {
221 m = (i == 4) ? 8 : 14;
222 for (j = 0; j < m; j++) {
230 for(i = 0; i < 63; i++) {
232 r[i+1] += (r[i] >> 4);
235 r[i] -= (carry << 4);
241 contract256_slidingwindow_modm(
signed char r[256],
const bignum256modm s,
int windowsize) {
243 int m = (1 << (windowsize - 1)) - 1, soplen = 256;
244 signed char *
bits = r;
248 for (i = 0; i < 4; i++) {
250 for (j = 0; j < 56; j++, v >>= 1)
254 for (j = 0; j < 32; j++, v >>= 1)
258 for (j = 0; j < soplen; j++) {
262 for (b = 1; (b < (soplen - j)) && (b <= 6); b++) {
263 if ((r[j] + (r[j + b] << b)) <= m) {
264 r[j] += r[j + b] << b;
266 }
else if ((r[j] - (r[j + b] << b)) >= -m) {
267 r[j] -= r[j + b] << b;
268 for (k = j + b; k < soplen; k++) {
275 }
else if (r[j + b]) {
292 case 4:
out[i] = (
a[i] - b[i]) ; carry = (
out[i] >> 63);
out[i] &= 0xffffffffffffff; i++;
293 case 3:
out[i] = (
a[i] - b[i]) - carry; carry = (
out[i] >> 63);
out[i] &= 0xffffffffffffff; i++;
294 case 2:
out[i] = (
a[i] - b[i]) - carry; carry = (
out[i] >> 63);
out[i] &= 0xffffffffffffff; i++;
295 case 1:
out[i] = (
a[i] - b[i]) - carry; carry = (
out[i] >> 63);
out[i] &= 0xffffffffffffff; i++;
297 default:
out[i] = (
a[i] - b[i]) - carry;
308 case 4: t = (
a[i] - b[i]) ; carry = (t >> 63); i++;
309 case 3: t = (
a[i] - b[i]) - carry; carry = (t >> 63); i++;
310 case 2: t = (
a[i] - b[i]) - carry; carry = (t >> 63); i++;
311 case 1: t = (
a[i] - b[i]) - carry; carry = (t >> 63); i++;
312 case 0: t = (
a[i] - b[i]) - carry; carry = (t >> 63);
323 case 4: t = (b[i] -
a[i]) ; carry = (t >> 63); i++;
324 case 3: t = (b[i] -
a[i]) - carry; carry = (t >> 63); i++;
325 case 2: t = (b[i] -
a[i]) - carry; carry = (t >> 63); i++;
326 case 1: t = (b[i] -
a[i]) - carry; carry = (t >> 63); i++;
327 case 0: t = (b[i] -
a[i]) - carry; carry = (t >> 63);
336 for (i = 0; i < 5; i++)
346 for (i = 0; i < 5; i++)
347 if (
a[i] != ((i) ? 0 : 1))
358 (
a[2] & 0xffffffffff0000));
uint64_t bignum256modm_element_t
bignum256modm_element_t bignum256modm[5]
unsigned __int64 uint64_t
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
void * memcpy(void *a, const void *b, size_t c)
uint32_t bignum256modm_element_t