15 #define bignum256modm_bits_per_limb 30 16 #define bignum256modm_limb_size 9 22 0x1cf5d3ed, 0x20498c69, 0x2f79cd65, 0x37be77a8,
23 0x00000014, 0x00000000, 0x00000000, 0x00000000,
28 0x0a2c131b, 0x3673968c, 0x06329a7e, 0x01885742,
29 0x3fffeb21, 0x3fffffff, 0x3fffffff, 0x3fffffff,
46 pb += modm_m[0]; b = lt_modm(r[0], pb); t[0] = (r[0] - pb + (b << 30)); pb = b;
47 pb += modm_m[1]; b = lt_modm(r[1], pb); t[1] = (r[1] - pb + (b << 30)); pb = b;
48 pb += modm_m[2]; b = lt_modm(r[2], pb); t[2] = (r[2] - pb + (b << 30)); pb = b;
49 pb += modm_m[3]; b = lt_modm(r[3], pb); t[3] = (r[3] - pb + (b << 30)); pb = b;
50 pb += modm_m[4]; b = lt_modm(r[4], pb); t[4] = (r[4] - pb + (b << 30)); pb = b;
51 pb += modm_m[5]; b = lt_modm(r[5], pb); t[5] = (r[5] - pb + (b << 30)); pb = b;
52 pb += modm_m[6]; b = lt_modm(r[6], pb); t[6] = (r[6] - pb + (b << 30)); pb = b;
53 pb += modm_m[7]; b = lt_modm(r[7], pb); t[7] = (r[7] - pb + (b << 30)); pb = b;
54 pb += modm_m[8]; b = lt_modm(r[8], pb); t[8] = (r[8] - pb + (b << 16));
58 r[0] ^= mask & (r[0] ^ t[0]);
59 r[1] ^= mask & (r[1] ^ t[1]);
60 r[2] ^= mask & (r[2] ^ t[2]);
61 r[3] ^= mask & (r[3] ^ t[3]);
62 r[4] ^= mask & (r[4] ^ t[4]);
63 r[5] ^= mask & (r[5] ^ t[5]);
64 r[6] ^= mask & (r[6] ^ t[6]);
65 r[7] ^= mask & (r[7] ^ t[7]);
66 r[8] ^= mask & (r[8] ^ t[8]);
128 pb += r2[0]; b = lt_modm(r1[0], pb); r[0] = (r1[0] - pb + (b << 30)); pb = b;
129 pb += r2[1]; b = lt_modm(r1[1], pb); r[1] = (r1[1] - pb + (b << 30)); pb = b;
130 pb += r2[2]; b = lt_modm(r1[2], pb); r[2] = (r1[2] - pb + (b << 30)); pb = b;
131 pb += r2[3]; b = lt_modm(r1[3], pb); r[3] = (r1[3] - pb + (b << 30)); pb = b;
132 pb += r2[4]; b = lt_modm(r1[4], pb); r[4] = (r1[4] - pb + (b << 30)); pb = b;
133 pb += r2[5]; b = lt_modm(r1[5], pb); r[5] = (r1[5] - pb + (b << 30)); pb = b;
134 pb += r2[6]; b = lt_modm(r1[6], pb); r[6] = (r1[6] - pb + (b << 30)); pb = b;
135 pb += r2[7]; b = lt_modm(r1[7], pb); r[7] = (r1[7] - pb + (b << 30)); pb = b;
136 pb += r2[8]; b = lt_modm(r1[8], pb); r[8] = (r1[8] - pb + (b << 24));
147 c = x[0] + y[0]; r[0] = c & 0x3fffffff; c >>= 30;
148 c += x[1] + y[1]; r[1] = c & 0x3fffffff; c >>= 30;
149 c += x[2] + y[2]; r[2] = c & 0x3fffffff; c >>= 30;
150 c += x[3] + y[3]; r[3] = c & 0x3fffffff; c >>= 30;
151 c += x[4] + y[4]; r[4] = c & 0x3fffffff; c >>= 30;
152 c += x[5] + y[5]; r[5] = c & 0x3fffffff; c >>= 30;
153 c += x[6] + y[6]; r[6] = c & 0x3fffffff; c >>= 30;
154 c += x[7] + y[7]; r[7] = c & 0x3fffffff; c >>= 30;
155 c += x[8] + y[8]; r[8] = c;
188 f = (
bignum256modm_element_t)c; q1[0] = (q1[0] | (f << 22)) & 0x3fffffff; q1[1] = (f >> 8) & 0x3fffff; c >>= 30;
190 f = (
bignum256modm_element_t)c; q1[1] = (q1[1] | (f << 22)) & 0x3fffffff; q1[2] = (f >> 8) & 0x3fffff; c >>= 30;
192 f = (
bignum256modm_element_t)c; q1[2] = (q1[2] | (f << 22)) & 0x3fffffff; q1[3] = (f >> 8) & 0x3fffff; c >>= 30;
194 f = (
bignum256modm_element_t)c; q1[3] = (q1[3] | (f << 22)) & 0x3fffffff; q1[4] = (f >> 8) & 0x3fffff; c >>= 30;
196 f = (
bignum256modm_element_t)c; q1[4] = (q1[4] | (f << 22)) & 0x3fffffff; q1[5] = (f >> 8) & 0x3fffff; c >>= 30;
198 f = (
bignum256modm_element_t)c; q1[5] = (q1[5] | (f << 22)) & 0x3fffffff; q1[6] = (f >> 8) & 0x3fffff; c >>= 30;
200 f = (
bignum256modm_element_t)c; q1[6] = (q1[6] | (f << 22)) & 0x3fffffff; q1[7] = (f >> 8) & 0x3fffff; c >>= 30;
204 barrett_reduce256_modm(r, q1, r1);
208 expand256_modm(
bignum256modm out,
const unsigned char *in,
size_t len) {
209 unsigned char work[64] = {0};
214 x[0] = U8TO32_LE(work + 0);
215 x[1] = U8TO32_LE(work + 4);
216 x[2] = U8TO32_LE(work + 8);
217 x[3] = U8TO32_LE(work + 12);
218 x[4] = U8TO32_LE(work + 16);
219 x[5] = U8TO32_LE(work + 20);
220 x[6] = U8TO32_LE(work + 24);
221 x[7] = U8TO32_LE(work + 28);
222 x[8] = U8TO32_LE(work + 32);
223 x[9] = U8TO32_LE(work + 36);
224 x[10] = U8TO32_LE(work + 40);
225 x[11] = U8TO32_LE(work + 44);
226 x[12] = U8TO32_LE(work + 48);
227 x[13] = U8TO32_LE(work + 52);
228 x[14] = U8TO32_LE(work + 56);
229 x[15] = U8TO32_LE(work + 60);
232 out[0] = ( x[0]) & 0x3fffffff;
233 out[1] = ((x[ 0] >> 30) | (x[ 1] << 2)) & 0x3fffffff;
234 out[2] = ((x[ 1] >> 28) | (x[ 2] << 4)) & 0x3fffffff;
235 out[3] = ((x[ 2] >> 26) | (x[ 3] << 6)) & 0x3fffffff;
236 out[4] = ((x[ 3] >> 24) | (x[ 4] << 8)) & 0x3fffffff;
237 out[5] = ((x[ 4] >> 22) | (x[ 5] << 10)) & 0x3fffffff;
238 out[6] = ((x[ 5] >> 20) | (x[ 6] << 12)) & 0x3fffffff;
239 out[7] = ((x[ 6] >> 18) | (x[ 7] << 14)) & 0x3fffffff;
240 out[8] = ((x[ 7] >> 16) | (x[ 8] << 16)) & 0x00ffffff;
247 q1[0] = ((x[ 7] >> 24) | (x[ 8] << 8)) & 0x3fffffff;
248 q1[1] = ((x[ 8] >> 22) | (x[ 9] << 10)) & 0x3fffffff;
249 q1[2] = ((x[ 9] >> 20) | (x[10] << 12)) & 0x3fffffff;
250 q1[3] = ((x[10] >> 18) | (x[11] << 14)) & 0x3fffffff;
251 q1[4] = ((x[11] >> 16) | (x[12] << 16)) & 0x3fffffff;
252 q1[5] = ((x[12] >> 14) | (x[13] << 18)) & 0x3fffffff;
253 q1[6] = ((x[13] >> 12) | (x[14] << 20)) & 0x3fffffff;
254 q1[7] = ((x[14] >> 10) | (x[15] << 22)) & 0x3fffffff;
255 q1[8] = ((x[15] >> 8) );
257 barrett_reduce256_modm(out, q1, out);
261 expand_raw256_modm(
bignum256modm out,
const unsigned char in[32]) {
264 x[0] = U8TO32_LE(in + 0);
265 x[1] = U8TO32_LE(in + 4);
266 x[2] = U8TO32_LE(in + 8);
267 x[3] = U8TO32_LE(in + 12);
268 x[4] = U8TO32_LE(in + 16);
269 x[5] = U8TO32_LE(in + 20);
270 x[6] = U8TO32_LE(in + 24);
271 x[7] = U8TO32_LE(in + 28);
273 out[0] = ( x[0]) & 0x3fffffff;
274 out[1] = ((x[ 0] >> 30) | (x[ 1] << 2)) & 0x3fffffff;
275 out[2] = ((x[ 1] >> 28) | (x[ 2] << 4)) & 0x3fffffff;
276 out[3] = ((x[ 2] >> 26) | (x[ 3] << 6)) & 0x3fffffff;
277 out[4] = ((x[ 3] >> 24) | (x[ 4] << 8)) & 0x3fffffff;
278 out[5] = ((x[ 4] >> 22) | (x[ 5] << 10)) & 0x3fffffff;
279 out[6] = ((x[ 5] >> 20) | (x[ 6] << 12)) & 0x3fffffff;
280 out[7] = ((x[ 6] >> 18) | (x[ 7] << 14)) & 0x3fffffff;
281 out[8] = ((x[ 7] >> 16) ) & 0x0000ffff;
285 contract256_modm(
unsigned char out[32],
const bignum256modm in) {
286 U32TO8_LE(out + 0, (in[0] ) | (in[1] << 30));
287 U32TO8_LE(out + 4, (in[1] >> 2) | (in[2] << 28));
288 U32TO8_LE(out + 8, (in[2] >> 4) | (in[3] << 26));
289 U32TO8_LE(out + 12, (in[3] >> 6) | (in[4] << 24));
290 U32TO8_LE(out + 16, (in[4] >> 8) | (in[5] << 22));
291 U32TO8_LE(out + 20, (in[5] >> 10) | (in[6] << 20));
292 U32TO8_LE(out + 24, (in[6] >> 12) | (in[7] << 18));
293 U32TO8_LE(out + 28, (in[7] >> 14) | (in[8] << 16));
299 contract256_window4_modm(
signed char r[64],
const bignum256modm in) {
301 signed char *quads = r;
304 for (i = 0; i < 8; i += 2) {
306 for (j = 0; j < 7; j++) {
311 for (j = 0; j < 8; j++) {
317 *quads++ = (v & 15); v >>= 4;
318 *quads++ = (v & 15); v >>= 4;
319 *quads++ = (v & 15); v >>= 4;
320 *quads++ = (v & 15); v >>= 4;
324 for(i = 0; i < 63; i++) {
326 r[i+1] += (r[i] >> 4);
329 r[i] -= (carry << 4);
335 contract256_slidingwindow_modm(
signed char r[256],
const bignum256modm s,
int windowsize) {
337 int m = (1 << (windowsize - 1)) - 1, soplen = 256;
338 signed char *
bits = r;
342 for (i = 0; i < 8; i++) {
344 for (j = 0; j < 30; j++, v >>= 1)
348 for (j = 0; j < 16; j++, v >>= 1)
352 for (j = 0; j < soplen; j++) {
356 for (b = 1; (b < (soplen - j)) && (b <= 6); b++) {
357 if ((r[j] + (r[j + b] << b)) <= m) {
358 r[j] += r[j + b] << b;
360 }
else if ((r[j] - (r[j + b] << b)) >= -m) {
361 r[j] -= r[j + b] << b;
362 for (k = j + b; k < soplen; k++) {
369 }
else if (r[j + b]) {
387 case 8:
out[i] = (
a[i] - b[i]) - carry; carry = (
out[i] >> 31);
out[i] &= 0x3fffffff; i++;
388 case 7:
out[i] = (
a[i] - b[i]) - carry; carry = (
out[i] >> 31);
out[i] &= 0x3fffffff; i++;
389 case 6:
out[i] = (
a[i] - b[i]) - carry; carry = (
out[i] >> 31);
out[i] &= 0x3fffffff; i++;
390 case 5:
out[i] = (
a[i] - b[i]) - carry; carry = (
out[i] >> 31);
out[i] &= 0x3fffffff; i++;
391 case 4:
out[i] = (
a[i] - b[i]) - carry; carry = (
out[i] >> 31);
out[i] &= 0x3fffffff; i++;
392 case 3:
out[i] = (
a[i] - b[i]) - carry; carry = (
out[i] >> 31);
out[i] &= 0x3fffffff; i++;
393 case 2:
out[i] = (
a[i] - b[i]) - carry; carry = (
out[i] >> 31);
out[i] &= 0x3fffffff; i++;
394 case 1:
out[i] = (
a[i] - b[i]) - carry; carry = (
out[i] >> 31);
out[i] &= 0x3fffffff; i++;
396 default:
out[i] = (
a[i] - b[i]) - carry;
405 case 8:
if (
a[8] > b[8])
return 0;
if (
a[8] < b[8])
return 1;
406 case 7:
if (
a[7] > b[7])
return 0;
if (
a[7] < b[7])
return 1;
407 case 6:
if (
a[6] > b[6])
return 0;
if (
a[6] < b[6])
return 1;
408 case 5:
if (
a[5] > b[5])
return 0;
if (
a[5] < b[5])
return 1;
409 case 4:
if (
a[4] > b[4])
return 0;
if (
a[4] < b[4])
return 1;
410 case 3:
if (
a[3] > b[3])
return 0;
if (
a[3] < b[3])
return 1;
411 case 2:
if (
a[2] > b[2])
return 0;
if (
a[2] < b[2])
return 1;
412 case 1:
if (
a[1] > b[1])
return 0;
if (
a[1] < b[1])
return 1;
413 case 0:
if (
a[0] > b[0])
return 0;
if (
a[0] < b[0])
return 1;
422 case 8:
if (
a[8] > b[8])
return 0;
if (
a[8] < b[8])
return 1;
423 case 7:
if (
a[7] > b[7])
return 0;
if (
a[7] < b[7])
return 1;
424 case 6:
if (
a[6] > b[6])
return 0;
if (
a[6] < b[6])
return 1;
425 case 5:
if (
a[5] > b[5])
return 0;
if (
a[5] < b[5])
return 1;
426 case 4:
if (
a[4] > b[4])
return 0;
if (
a[4] < b[4])
return 1;
427 case 3:
if (
a[3] > b[3])
return 0;
if (
a[3] < b[3])
return 1;
428 case 2:
if (
a[2] > b[2])
return 0;
if (
a[2] < b[2])
return 1;
429 case 1:
if (
a[1] > b[1])
return 0;
if (
a[1] < b[1])
return 1;
430 case 0:
if (
a[0] > b[0])
return 0;
if (
a[0] < b[0])
return 1;
440 for (i = 0; i < 9; i++)
452 for (i = 1; i < 9; i++)
466 (
a[4] & 0x3fffff00));
#define mul32x32_64(a, b)
unsigned __int64 uint64_t
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
void * memcpy(void *a, const void *b, size_t c)
bignum256modm_element_t bignum256modm[9]
uint32_t bignum256modm_element_t