Skip to content

Commit ba13656

Browse files
committed
Add generic optimizations
1 parent 03fb60a commit ba13656

File tree

6 files changed

+235
-210
lines changed

6 files changed

+235
-210
lines changed

out

Whitespace-only changes.

src/field_10x26_impl.h

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,20 @@ static void secp256k1_fe_impl_verify(const secp256k1_fe *a) {
3838
#endif
3939

4040
static void secp256k1_fe_impl_get_bounds(secp256k1_fe *r, int m) {
41-
r->n[0] = 0x3FFFFFFUL * 2 * m;
42-
r->n[1] = 0x3FFFFFFUL * 2 * m;
43-
r->n[2] = 0x3FFFFFFUL * 2 * m;
44-
r->n[3] = 0x3FFFFFFUL * 2 * m;
45-
r->n[4] = 0x3FFFFFFUL * 2 * m;
46-
r->n[5] = 0x3FFFFFFUL * 2 * m;
47-
r->n[6] = 0x3FFFFFFUL * 2 * m;
48-
r->n[7] = 0x3FFFFFFUL * 2 * m;
49-
r->n[8] = 0x3FFFFFFUL * 2 * m;
50-
r->n[9] = 0x03FFFFFUL * 2 * m;
41+
const uint64_t two_m = 2 * m;
42+
const uint64_t bound1 = 0x3FFFFFFUL * two_m;
43+
const uint64_t bound2 = 0x03FFFFFUL * two_m;
44+
45+
r->n[0] = bound1;
46+
r->n[1] = bound1;
47+
r->n[2] = bound1;
48+
r->n[3] = bound1;
49+
r->n[4] = bound1;
50+
r->n[5] = bound1;
51+
r->n[6] = bound1;
52+
r->n[7] = bound1;
53+
r->n[8] = bound1;
54+
r->n[9] = bound2;
5155
}
5256

5357
static void secp256k1_fe_impl_normalize(secp256k1_fe *r) {
@@ -257,8 +261,8 @@ static int secp256k1_fe_impl_normalizes_to_zero_var(const secp256k1_fe *r) {
257261
}
258262

259263
SECP256K1_INLINE static void secp256k1_fe_impl_set_int(secp256k1_fe *r, int a) {
264+
memset(r->n, 0, sizeof(r->n));
260265
r->n[0] = a;
261-
r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
262266
}
263267

264268
SECP256K1_INLINE static int secp256k1_fe_impl_is_zero(const secp256k1_fe *a) {
@@ -272,12 +276,11 @@ SECP256K1_INLINE static int secp256k1_fe_impl_is_odd(const secp256k1_fe *a) {
272276

273277
static int secp256k1_fe_impl_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) {
274278
int i;
279+
int diff;
275280
for (i = 9; i >= 0; i--) {
276-
if (a->n[i] > b->n[i]) {
277-
return 1;
278-
}
279-
if (a->n[i] < b->n[i]) {
280-
return -1;
281+
diff = (a->n[i] > b->n[i]) - (a->n[i] < b->n[i]);
282+
if (diff != 0) {
283+
return diff;
281284
}
282285
}
283286
return 0;
@@ -338,24 +341,30 @@ static void secp256k1_fe_impl_get_b32(unsigned char *r, const secp256k1_fe *a) {
338341
}
339342

340343
SECP256K1_INLINE static void secp256k1_fe_impl_negate_unchecked(secp256k1_fe *r, const secp256k1_fe *a, int m) {
344+
const uint32_t two_m1 = 2 * (m + 1);
345+
const uint32_t bound1 = 0x3FFFC2FUL * two_m1;
346+
const uint32_t bound2 = 0x3FFFFBFUL * two_m1;
347+
const uint32_t bound3 = 0x3FFFFFFUL * two_m1;
348+
const uint32_t bound4 = 0x03FFFFFUL * two_m1;
349+
341350
/* For all legal values of m (0..31), the following properties hold: */
342-
VERIFY_CHECK(0x3FFFC2FUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
343-
VERIFY_CHECK(0x3FFFFBFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
344-
VERIFY_CHECK(0x3FFFFFFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
345-
VERIFY_CHECK(0x03FFFFFUL * 2 * (m + 1) >= 0x03FFFFFUL * 2 * m);
351+
VERIFY_CHECK(bound1 >= 0x3FFFFFFUL * 2 * m);
352+
VERIFY_CHECK(bound2 >= 0x3FFFFFFUL * 2 * m);
353+
VERIFY_CHECK(bound3 >= 0x3FFFFFFUL * 2 * m);
354+
VERIFY_CHECK(bound4 >= 0x03FFFFFUL * 2 * m);
346355

347356
/* Due to the properties above, the left hand in the subtractions below is never less than
348357
* the right hand. */
349-
r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0];
350-
r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1];
351-
r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2];
352-
r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3];
353-
r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4];
354-
r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5];
355-
r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6];
356-
r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7];
357-
r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8];
358-
r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9];
358+
r->n[0] = bound1 - a->n[0];
359+
r->n[1] = bound2 - a->n[1];
360+
r->n[2] = bound3 - a->n[2];
361+
r->n[3] = bound3 - a->n[3];
362+
r->n[4] = bound3 - a->n[4];
363+
r->n[5] = bound3 - a->n[5];
364+
r->n[6] = bound3 - a->n[6];
365+
r->n[7] = bound3 - a->n[7];
366+
r->n[8] = bound3 - a->n[8];
367+
r->n[9] = bound4 - a->n[9];
359368
}
360369

361370
SECP256K1_INLINE static void secp256k1_fe_impl_mul_int_unchecked(secp256k1_fe *r, int a) {
@@ -1111,24 +1120,24 @@ static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r,
11111120
}
11121121

11131122
static void secp256k1_fe_impl_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a) {
1114-
r->n[0] = a->n[0] | a->n[1] << 26;
1115-
r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
1123+
r->n[0] = a->n[0] | a->n[1] << 26;
1124+
r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
11161125
r->n[2] = a->n[2] >> 12 | a->n[3] << 14;
11171126
r->n[3] = a->n[3] >> 18 | a->n[4] << 8;
11181127
r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28;
1119-
r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
1128+
r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
11201129
r->n[6] = a->n[7] >> 10 | a->n[8] << 16;
11211130
r->n[7] = a->n[8] >> 16 | a->n[9] << 10;
11221131
}
11231132

11241133
static SECP256K1_INLINE void secp256k1_fe_impl_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a) {
11251134
r->n[0] = a->n[0] & 0x3FFFFFFUL;
1126-
r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
1135+
r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
11271136
r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL);
11281137
r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL);
1129-
r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
1138+
r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
11301139
r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL;
1131-
r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
1140+
r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
11321141
r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL);
11331142
r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL);
11341143
r->n[9] = a->n[7] >> 10;

src/field_5x52_impl.h

Lines changed: 85 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,15 @@ static void secp256k1_fe_impl_verify(const secp256k1_fe *a) {
3333
#endif
3434

3535
static void secp256k1_fe_impl_get_bounds(secp256k1_fe *r, int m) {
36-
r->n[0] = 0xFFFFFFFFFFFFFULL * 2 * m;
37-
r->n[1] = 0xFFFFFFFFFFFFFULL * 2 * m;
38-
r->n[2] = 0xFFFFFFFFFFFFFULL * 2 * m;
39-
r->n[3] = 0xFFFFFFFFFFFFFULL * 2 * m;
40-
r->n[4] = 0x0FFFFFFFFFFFFULL * 2 * m;
36+
const uint64_t two_m = 2 * m;
37+
const uint64_t bound1 = 0xFFFFFFFFFFFFFULL * two_m;
38+
const uint64_t bound2 = 0x0FFFFFFFFFFFFULL * two_m;
39+
40+
r->n[0] = bound1;
41+
r->n[1] = bound1;
42+
r->n[2] = bound1;
43+
r->n[3] = bound1;
44+
r->n[4] = bound2;
4145
}
4246

4347
static void secp256k1_fe_impl_normalize(secp256k1_fe *r) {
@@ -199,8 +203,8 @@ static int secp256k1_fe_impl_normalizes_to_zero_var(const secp256k1_fe *r) {
199203
}
200204

201205
SECP256K1_INLINE static void secp256k1_fe_impl_set_int(secp256k1_fe *r, int a) {
206+
memset(r->n, 0, sizeof(r->n));
202207
r->n[0] = a;
203-
r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
204208
}
205209

206210
SECP256K1_INLINE static int secp256k1_fe_impl_is_zero(const secp256k1_fe *a) {
@@ -214,52 +218,33 @@ SECP256K1_INLINE static int secp256k1_fe_impl_is_odd(const secp256k1_fe *a) {
214218

215219
static int secp256k1_fe_impl_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) {
216220
int i;
221+
int8_t diff;
217222
for (i = 4; i >= 0; i--) {
218-
if (a->n[i] > b->n[i]) {
219-
return 1;
220-
}
221-
if (a->n[i] < b->n[i]) {
222-
return -1;
223+
diff = (a->n[i] > b->n[i]) - (a->n[i] < b->n[i]);
224+
if (diff != 0) {
225+
return diff;
223226
}
224227
}
225228
return 0;
226229
}
227230

228231
static void secp256k1_fe_impl_set_b32_mod(secp256k1_fe *r, const unsigned char *a) {
229-
r->n[0] = (uint64_t)a[31]
230-
| ((uint64_t)a[30] << 8)
231-
| ((uint64_t)a[29] << 16)
232-
| ((uint64_t)a[28] << 24)
233-
| ((uint64_t)a[27] << 32)
234-
| ((uint64_t)a[26] << 40)
235-
| ((uint64_t)(a[25] & 0xF) << 48);
236-
r->n[1] = (uint64_t)((a[25] >> 4) & 0xF)
237-
| ((uint64_t)a[24] << 4)
238-
| ((uint64_t)a[23] << 12)
239-
| ((uint64_t)a[22] << 20)
240-
| ((uint64_t)a[21] << 28)
241-
| ((uint64_t)a[20] << 36)
242-
| ((uint64_t)a[19] << 44);
243-
r->n[2] = (uint64_t)a[18]
244-
| ((uint64_t)a[17] << 8)
245-
| ((uint64_t)a[16] << 16)
246-
| ((uint64_t)a[15] << 24)
247-
| ((uint64_t)a[14] << 32)
248-
| ((uint64_t)a[13] << 40)
249-
| ((uint64_t)(a[12] & 0xF) << 48);
250-
r->n[3] = (uint64_t)((a[12] >> 4) & 0xF)
251-
| ((uint64_t)a[11] << 4)
252-
| ((uint64_t)a[10] << 12)
253-
| ((uint64_t)a[9] << 20)
254-
| ((uint64_t)a[8] << 28)
255-
| ((uint64_t)a[7] << 36)
256-
| ((uint64_t)a[6] << 44);
257-
r->n[4] = (uint64_t)a[5]
258-
| ((uint64_t)a[4] << 8)
259-
| ((uint64_t)a[3] << 16)
260-
| ((uint64_t)a[2] << 24)
261-
| ((uint64_t)a[1] << 32)
262-
| ((uint64_t)a[0] << 40);
232+
uint64_t limbs[4];
233+
memcpy(limbs, a, 32);
234+
235+
#ifdef SECP256K1_LITTLE_ENDIAN
236+
limbs[0] = BYTESWAP_64(limbs[0]);
237+
limbs[1] = BYTESWAP_64(limbs[1]);
238+
limbs[2] = BYTESWAP_64(limbs[2]);
239+
limbs[3] = BYTESWAP_64(limbs[3]);
240+
#endif
241+
242+
r->n[0] = (limbs[3] & 0xFFFFFFFFFFFFFULL);
243+
r->n[1] = (limbs[3] >> 52) | ((limbs[2] & 0xFFFFFFFFFFULL) << 12);
244+
r->n[2] = (limbs[2] >> 40) | ((limbs[1] & 0xFFFFFFFULL) << 24);
245+
r->n[3] = (limbs[1] >> 28) | ((limbs[0] & 0xFFFFULL) << 36);
246+
247+
r->n[4] = (limbs[0] >> 16) & 0xFFFFFFFFFFFFULL;
263248
}
264249

265250
static int secp256k1_fe_impl_set_b32_limit(secp256k1_fe *r, const unsigned char *a) {
@@ -269,53 +254,60 @@ static int secp256k1_fe_impl_set_b32_limit(secp256k1_fe *r, const unsigned char
269254

270255
/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
271256
static void secp256k1_fe_impl_get_b32(unsigned char *r, const secp256k1_fe *a) {
272-
r[0] = (a->n[4] >> 40) & 0xFF;
273-
r[1] = (a->n[4] >> 32) & 0xFF;
274-
r[2] = (a->n[4] >> 24) & 0xFF;
275-
r[3] = (a->n[4] >> 16) & 0xFF;
276-
r[4] = (a->n[4] >> 8) & 0xFF;
277-
r[5] = a->n[4] & 0xFF;
278-
r[6] = (a->n[3] >> 44) & 0xFF;
279-
r[7] = (a->n[3] >> 36) & 0xFF;
280-
r[8] = (a->n[3] >> 28) & 0xFF;
281-
r[9] = (a->n[3] >> 20) & 0xFF;
282-
r[10] = (a->n[3] >> 12) & 0xFF;
283-
r[11] = (a->n[3] >> 4) & 0xFF;
284-
r[12] = ((a->n[2] >> 48) & 0xF) | ((a->n[3] & 0xF) << 4);
285-
r[13] = (a->n[2] >> 40) & 0xFF;
286-
r[14] = (a->n[2] >> 32) & 0xFF;
287-
r[15] = (a->n[2] >> 24) & 0xFF;
288-
r[16] = (a->n[2] >> 16) & 0xFF;
289-
r[17] = (a->n[2] >> 8) & 0xFF;
290-
r[18] = a->n[2] & 0xFF;
291-
r[19] = (a->n[1] >> 44) & 0xFF;
292-
r[20] = (a->n[1] >> 36) & 0xFF;
293-
r[21] = (a->n[1] >> 28) & 0xFF;
294-
r[22] = (a->n[1] >> 20) & 0xFF;
295-
r[23] = (a->n[1] >> 12) & 0xFF;
296-
r[24] = (a->n[1] >> 4) & 0xFF;
297-
r[25] = ((a->n[0] >> 48) & 0xF) | ((a->n[1] & 0xF) << 4);
298-
r[26] = (a->n[0] >> 40) & 0xFF;
299-
r[27] = (a->n[0] >> 32) & 0xFF;
300-
r[28] = (a->n[0] >> 24) & 0xFF;
301-
r[29] = (a->n[0] >> 16) & 0xFF;
302-
r[30] = (a->n[0] >> 8) & 0xFF;
303-
r[31] = a->n[0] & 0xFF;
257+
const uint64_t a0 = a->n[0], a1 = a->n[1], a2 = a->n[2], a3 = a->n[3], a4 = a->n[4];
258+
259+
r[0] = (a4 >> 40) & 0xFF;
260+
r[1] = (a4 >> 32) & 0xFF;
261+
r[2] = (a4 >> 24) & 0xFF;
262+
r[3] = (a4 >> 16) & 0xFF;
263+
r[4] = (a4 >> 8) & 0xFF;
264+
r[5] = a4 & 0xFF;
265+
r[6] = (a3 >> 44) & 0xFF;
266+
r[7] = (a3 >> 36) & 0xFF;
267+
r[8] = (a3 >> 28) & 0xFF;
268+
r[9] = (a3 >> 20) & 0xFF;
269+
r[10] = (a3 >> 12) & 0xFF;
270+
r[11] = (a3 >> 4) & 0xFF;
271+
r[12] = ((a2 >> 48) & 0xF) | ((a3 & 0xF) << 4);
272+
r[13] = (a2 >> 40) & 0xFF;
273+
r[14] = (a2 >> 32) & 0xFF;
274+
r[15] = (a2 >> 24) & 0xFF;
275+
r[16] = (a2 >> 16) & 0xFF;
276+
r[17] = (a2 >> 8) & 0xFF;
277+
r[18] = a2 & 0xFF;
278+
r[19] = (a1 >> 44) & 0xFF;
279+
r[20] = (a1 >> 36) & 0xFF;
280+
r[21] = (a1 >> 28) & 0xFF;
281+
r[22] = (a1 >> 20) & 0xFF;
282+
r[23] = (a1 >> 12) & 0xFF;
283+
r[24] = (a1 >> 4) & 0xFF;
284+
r[25] = ((a0 >> 48) & 0xF) | ((a1 & 0xF) << 4);
285+
r[26] = (a0 >> 40) & 0xFF;
286+
r[27] = (a0 >> 32) & 0xFF;
287+
r[28] = (a0 >> 24) & 0xFF;
288+
r[29] = (a0 >> 16) & 0xFF;
289+
r[30] = (a0 >> 8) & 0xFF;
290+
r[31] = a0 & 0xFF;
304291
}
305292

306293
SECP256K1_INLINE static void secp256k1_fe_impl_negate_unchecked(secp256k1_fe *r, const secp256k1_fe *a, int m) {
294+
const uint32_t two_m1 = 2 * (m + 1);
295+
const uint64_t bound1 = 0xFFFFEFFFFFC2FULL * two_m1;
296+
const uint64_t bound2 = 0xFFFFFFFFFFFFFULL * two_m1;
297+
const uint64_t bound3 = 0x0FFFFFFFFFFFFULL * two_m1;
298+
307299
/* For all legal values of m (0..31), the following properties hold: */
308-
VERIFY_CHECK(0xFFFFEFFFFFC2FULL * 2 * (m + 1) >= 0xFFFFFFFFFFFFFULL * 2 * m);
309-
VERIFY_CHECK(0xFFFFFFFFFFFFFULL * 2 * (m + 1) >= 0xFFFFFFFFFFFFFULL * 2 * m);
310-
VERIFY_CHECK(0x0FFFFFFFFFFFFULL * 2 * (m + 1) >= 0x0FFFFFFFFFFFFULL * 2 * m);
300+
VERIFY_CHECK(bound1 >= 0xFFFFFFFFFFFFFULL * 2 * m);
301+
VERIFY_CHECK(bound2 >= 0xFFFFFFFFFFFFFULL * 2 * m);
302+
VERIFY_CHECK(bound3 >= 0x0FFFFFFFFFFFFULL * 2 * m);
311303

312304
/* Due to the properties above, the left hand in the subtractions below is never less than
313305
* the right hand. */
314-
r->n[0] = 0xFFFFEFFFFFC2FULL * 2 * (m + 1) - a->n[0];
315-
r->n[1] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[1];
316-
r->n[2] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[2];
317-
r->n[3] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[3];
318-
r->n[4] = 0x0FFFFFFFFFFFFULL * 2 * (m + 1) - a->n[4];
306+
r->n[0] = bound1 - a->n[0];
307+
r->n[1] = bound2 - a->n[1];
308+
r->n[2] = bound2 - a->n[2];
309+
r->n[3] = bound2 - a->n[3];
310+
r->n[4] = bound3 - a->n[4];
319311
}
320312

321313
SECP256K1_INLINE static void secp256k1_fe_impl_mul_int_unchecked(secp256k1_fe *r, int a) {
@@ -426,18 +418,20 @@ static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r,
426418
}
427419

428420
static void secp256k1_fe_impl_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a) {
429-
r->n[0] = a->n[0] | a->n[1] << 52;
421+
r->n[0] = a->n[0] | a->n[1] << 52;
430422
r->n[1] = a->n[1] >> 12 | a->n[2] << 40;
431423
r->n[2] = a->n[2] >> 24 | a->n[3] << 28;
432424
r->n[3] = a->n[3] >> 36 | a->n[4] << 16;
433425
}
434426

435427
static SECP256K1_INLINE void secp256k1_fe_impl_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a) {
436-
r->n[0] = a->n[0] & 0xFFFFFFFFFFFFFULL;
437-
r->n[1] = a->n[0] >> 52 | ((a->n[1] << 12) & 0xFFFFFFFFFFFFFULL);
438-
r->n[2] = a->n[1] >> 40 | ((a->n[2] << 24) & 0xFFFFFFFFFFFFFULL);
439-
r->n[3] = a->n[2] >> 28 | ((a->n[3] << 36) & 0xFFFFFFFFFFFFFULL);
440-
r->n[4] = a->n[3] >> 16;
428+
const uint64_t a0 = a->n[0], a1 = a->n[1], a2 = a->n[2], a3 = a->n[3];
429+
430+
r->n[0] = a0 & 0xFFFFFFFFFFFFFULL;
431+
r->n[1] = a0 >> 52 | ((a1 << 12) & 0xFFFFFFFFFFFFFULL);
432+
r->n[2] = a1 >> 40 | ((a2 << 24) & 0xFFFFFFFFFFFFFULL);
433+
r->n[3] = a2 >> 28 | ((a3 << 36) & 0xFFFFFFFFFFFFFULL);
434+
r->n[4] = a3 >> 16;
441435
}
442436

443437
static void secp256k1_fe_from_signed62(secp256k1_fe *r, const secp256k1_modinv64_signed62 *a) {

0 commit comments

Comments
 (0)