From e3460ad06b4e0cb8d1af0d52b52106674942a9d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Kn=C3=A1pek?= Date: Sat, 29 Nov 2025 10:52:17 +0100 Subject: [PATCH 1/2] Smaller stack usage for SHA-1, SHA-256 and SHA-512. --- src/hashes/sha1.c | 65 +++++++++++--------- src/hashes/sha2/sha256.c | 129 ++++++++++++++++++++------------------- src/hashes/sha2/sha512.c | 46 ++++++++++---- 3 files changed, 134 insertions(+), 106 deletions(-) diff --git a/src/hashes/sha1.c b/src/hashes/sha1.c index 13d913c43..52b35a2bc 100644 --- a/src/hashes/sha1.c +++ b/src/hashes/sha1.c @@ -39,7 +39,7 @@ static int ss_sha1_compress(hash_state *md, const unsigned char *buf) static int s_sha1_compress(hash_state *md, const unsigned char *buf) #endif { - ulong32 a,b,c,d,e,W[80],i; + ulong32 a,b,c,d,e,W[16],i; #ifdef LTC_SMALL_CODE ulong32 t; #endif @@ -48,6 +48,7 @@ static int s_sha1_compress(hash_state *md, const unsigned char *buf) for (i = 0; i < 16; i++) { LOAD32H(W[i], buf + (4*i)); } + #define Wi(i) W[(i) % 16] = ROL(W[((i) - 3) % 16] ^ W[((i) - 8) % 16] ^ W[((i) - 14) % 16] ^ W[((i) - 16) % 16], 1); /* copy state */ a = md->sha1.state[0]; @@ -56,71 +57,74 @@ static int s_sha1_compress(hash_state *md, const unsigned char *buf) d = md->sha1.state[3]; e = md->sha1.state[4]; - /* expand it */ - for (i = 16; i < 80; i++) { - W[i] = ROL(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1); - } - /* compress */ /* round one */ - #define FF0(a,b,c,d,e,i) e = (ROLc(a, 5) + F0(b,c,d) + e + W[i] + 0x5a827999UL); b = ROLc(b, 30); - #define FF1(a,b,c,d,e,i) e = (ROLc(a, 5) + F1(b,c,d) + e + W[i] + 0x6ed9eba1UL); b = ROLc(b, 30); - #define FF2(a,b,c,d,e,i) e = (ROLc(a, 5) + F2(b,c,d) + e + W[i] + 0x8f1bbcdcUL); b = ROLc(b, 30); - #define FF3(a,b,c,d,e,i) e = (ROLc(a, 5) + F3(b,c,d) + e + W[i] + 0xca62c1d6UL); b = ROLc(b, 30); + #define FF0(a,b,c,d,e,i) e = (ROLc(a, 5) + F0(b,c,d) + e + W[(i) % 16] + 0x5a827999UL); b = ROLc(b, 30); + #define FF1(a,b,c,d,e,i) e = (ROLc(a, 5) + F1(b,c,d) + e + W[(i) % 16] + 0x6ed9eba1UL); b = ROLc(b, 30); + #define FF2(a,b,c,d,e,i) e = (ROLc(a, 5) + F2(b,c,d) + e + W[(i) % 16] + 0x8f1bbcdcUL); b = ROLc(b, 30); + #define FF3(a,b,c,d,e,i) e = (ROLc(a, 5) + F3(b,c,d) + e + W[(i) % 16] + 0xca62c1d6UL); b = ROLc(b, 30); #ifdef LTC_SMALL_CODE - for (i = 0; i < 20; ) { + for (i = 0; i < 16; ) { FF0(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t; } + for (; i < 20; ) { + Wi(i); FF0(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t; + } for (; i < 40; ) { - FF1(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t; + Wi(i); FF1(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t; } for (; i < 60; ) { - FF2(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t; + Wi(i); FF2(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t; } for (; i < 80; ) { - FF3(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t; + Wi(i); FF3(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t; } #else - for (i = 0; i < 20; ) { + for (i = 0; i < 15; ) { FF0(a,b,c,d,e,i++); FF0(e,a,b,c,d,i++); FF0(d,e,a,b,c,i++); FF0(c,d,e,a,b,i++); FF0(b,c,d,e,a,i++); } + FF0(a,b,c,d,e,i++); + Wi(i); FF0(e,a,b,c,d,i++); + Wi(i); FF0(d,e,a,b,c,i++); + Wi(i); FF0(c,d,e,a,b,i++); + Wi(i); FF0(b,c,d,e,a,i++); /* round two */ for (; i < 40; ) { - FF1(a,b,c,d,e,i++); - FF1(e,a,b,c,d,i++); - FF1(d,e,a,b,c,i++); - FF1(c,d,e,a,b,i++); - FF1(b,c,d,e,a,i++); + Wi(i); FF1(a,b,c,d,e,i++); + Wi(i); FF1(e,a,b,c,d,i++); + Wi(i); FF1(d,e,a,b,c,i++); + Wi(i); FF1(c,d,e,a,b,i++); + Wi(i); FF1(b,c,d,e,a,i++); } /* round three */ for (; i < 60; ) { - FF2(a,b,c,d,e,i++); - FF2(e,a,b,c,d,i++); - FF2(d,e,a,b,c,i++); - FF2(c,d,e,a,b,i++); - FF2(b,c,d,e,a,i++); + Wi(i); FF2(a,b,c,d,e,i++); + Wi(i); FF2(e,a,b,c,d,i++); + Wi(i); FF2(d,e,a,b,c,i++); + Wi(i); FF2(c,d,e,a,b,i++); + Wi(i); FF2(b,c,d,e,a,i++); } /* round four */ for (; i < 80; ) { - FF3(a,b,c,d,e,i++); - FF3(e,a,b,c,d,i++); - FF3(d,e,a,b,c,i++); - FF3(c,d,e,a,b,i++); - FF3(b,c,d,e,a,i++); + Wi(i); FF3(a,b,c,d,e,i++); + Wi(i); FF3(e,a,b,c,d,i++); + Wi(i); FF3(d,e,a,b,c,i++); + Wi(i); FF3(c,d,e,a,b,i++); + Wi(i); FF3(b,c,d,e,a,i++); } #endif @@ -128,6 +132,7 @@ static int s_sha1_compress(hash_state *md, const unsigned char *buf) #undef FF1 #undef FF2 #undef FF3 + #undef Wi /* store */ md->sha1.state[0] = md->sha1.state[0] + a; diff --git a/src/hashes/sha2/sha256.c b/src/hashes/sha2/sha256.c index 1f4432823..f8f0825b0 100644 --- a/src/hashes/sha2/sha256.c +++ b/src/hashes/sha2/sha256.c @@ -63,7 +63,7 @@ static int ss_sha256_compress(hash_state * md, const unsigned char *buf) static int s_sha256_compress(hash_state * md, const unsigned char *buf) #endif { - ulong32 S[8], W[64], t0, t1; + ulong32 S[8], W[16], t0, t1; #ifdef LTC_SMALL_CODE ulong32 t; #endif @@ -78,30 +78,32 @@ static int s_sha256_compress(hash_state * md, const unsigned char *buf) for (i = 0; i < 16; i++) { LOAD32H(W[i], buf + (4*i)); } - - /* fill W[16..63] */ - for (i = 16; i < 64; i++) { - W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; - } + #define Wi(i) W[(i) % 16] = Gamma1(W[((i) - 2) % 16]) + W[((i) - 7) % 16] + Gamma0(W[((i) - 15) % 16]) + W[((i) - 16) % 16] /* Compress */ #ifdef LTC_SMALL_CODE -#define RND(a,b,c,d,e,f,g,h,i) \ - t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ - t1 = Sigma0(a) + Maj(a, b, c); \ - d += t0; \ +#define RND(a,b,c,d,e,f,g,h,i) \ + t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[(i) % 16]; \ + t1 = Sigma0(a) + Maj(a, b, c); \ + d += t0; \ h = t0 + t1; - for (i = 0; i < 64; ++i) { + for (i = 0; i < 16; ++i) { + RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i); + t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; + S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t; + } + for (; i < 64; ++i) { + Wi(i); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i); t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4]; S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t; } #else -#define RND(a,b,c,d,e,f,g,h,i,ki) \ - t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i]; \ - t1 = Sigma0(a) + Maj(a, b, c); \ - d += t0; \ +#define RND(a,b,c,d,e,f,g,h,i,ki) \ + t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[(i) % 16]; \ + t1 = Sigma0(a) + Maj(a, b, c); \ + d += t0; \ h = t0 + t1; RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],0,0x428a2f98); @@ -120,56 +122,57 @@ static int s_sha256_compress(hash_state * md, const unsigned char *buf) RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],13,0x80deb1fe); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],14,0x9bdc06a7); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],15,0xc19bf174); - RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],16,0xe49b69c1); - RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],17,0xefbe4786); - RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],18,0x0fc19dc6); - RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],19,0x240ca1cc); - RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],20,0x2de92c6f); - RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],21,0x4a7484aa); - RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],22,0x5cb0a9dc); - RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],23,0x76f988da); - RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],24,0x983e5152); - RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],25,0xa831c66d); - RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],26,0xb00327c8); - RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],27,0xbf597fc7); - RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],28,0xc6e00bf3); - RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],29,0xd5a79147); - RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],30,0x06ca6351); - RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],31,0x14292967); - RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],32,0x27b70a85); - RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],33,0x2e1b2138); - RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],34,0x4d2c6dfc); - RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],35,0x53380d13); - RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],36,0x650a7354); - RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],37,0x766a0abb); - RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],38,0x81c2c92e); - RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],39,0x92722c85); - RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],40,0xa2bfe8a1); - RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],41,0xa81a664b); - RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],42,0xc24b8b70); - RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],43,0xc76c51a3); - RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],44,0xd192e819); - RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],45,0xd6990624); - RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],46,0xf40e3585); - RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],47,0x106aa070); - RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],48,0x19a4c116); - RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],49,0x1e376c08); - RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],50,0x2748774c); - RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],51,0x34b0bcb5); - RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],52,0x391c0cb3); - RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],53,0x4ed8aa4a); - RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],54,0x5b9cca4f); - RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],55,0x682e6ff3); - RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],56,0x748f82ee); - RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],57,0x78a5636f); - RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],58,0x84c87814); - RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],59,0x8cc70208); - RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],60,0x90befffa); - RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],61,0xa4506ceb); - RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],62,0xbef9a3f7); - RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],63,0xc67178f2); + Wi(16); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],16,0xe49b69c1); + Wi(17); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],17,0xefbe4786); + Wi(18); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],18,0x0fc19dc6); + Wi(19); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],19,0x240ca1cc); + Wi(20); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],20,0x2de92c6f); + Wi(21); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],21,0x4a7484aa); + Wi(22); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],22,0x5cb0a9dc); + Wi(23); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],23,0x76f988da); + Wi(24); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],24,0x983e5152); + Wi(25); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],25,0xa831c66d); + Wi(26); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],26,0xb00327c8); + Wi(27); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],27,0xbf597fc7); + Wi(28); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],28,0xc6e00bf3); + Wi(29); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],29,0xd5a79147); + Wi(30); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],30,0x06ca6351); + Wi(31); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],31,0x14292967); + Wi(32); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],32,0x27b70a85); + Wi(33); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],33,0x2e1b2138); + Wi(34); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],34,0x4d2c6dfc); + Wi(35); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],35,0x53380d13); + Wi(36); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],36,0x650a7354); + Wi(37); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],37,0x766a0abb); + Wi(38); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],38,0x81c2c92e); + Wi(39); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],39,0x92722c85); + Wi(40); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],40,0xa2bfe8a1); + Wi(41); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],41,0xa81a664b); + Wi(42); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],42,0xc24b8b70); + Wi(43); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],43,0xc76c51a3); + Wi(44); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],44,0xd192e819); + Wi(45); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],45,0xd6990624); + Wi(46); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],46,0xf40e3585); + Wi(47); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],47,0x106aa070); + Wi(48); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],48,0x19a4c116); + Wi(49); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],49,0x1e376c08); + Wi(50); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],50,0x2748774c); + Wi(51); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],51,0x34b0bcb5); + Wi(52); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],52,0x391c0cb3); + Wi(53); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],53,0x4ed8aa4a); + Wi(54); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],54,0x5b9cca4f); + Wi(55); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],55,0x682e6ff3); + Wi(56); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],56,0x748f82ee); + Wi(57); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],57,0x78a5636f); + Wi(58); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],58,0x84c87814); + Wi(59); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],59,0x8cc70208); + Wi(60); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],60,0x90befffa); + Wi(61); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],61,0xa4506ceb); + Wi(62); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],62,0xbef9a3f7); + Wi(63); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],63,0xc67178f2); #endif #undef RND +#undef Wi /* feedback */ for (i = 0; i < 8; i++) { diff --git a/src/hashes/sha2/sha512.c b/src/hashes/sha2/sha512.c index d6337af12..326ee90ba 100644 --- a/src/hashes/sha2/sha512.c +++ b/src/hashes/sha2/sha512.c @@ -88,7 +88,7 @@ static int ss_sha512_compress(hash_state * md, const unsigned char *buf) static int s_sha512_compress(hash_state * md, const unsigned char *buf) #endif { - ulong64 S[8], W[80], t0, t1; + ulong64 S[8], W[16], t0, t1; int i; /* copy state into S */ @@ -100,16 +100,25 @@ static int s_sha512_compress(hash_state * md, const unsigned char *buf) for (i = 0; i < 16; i++) { LOAD64H(W[i], buf + (8*i)); } - - /* fill W[16..79] */ - for (i = 16; i < 80; i++) { - W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]; - } + #define Wi(i) W[(i) % 16] = Gamma1(W[((i) - 2) % 16]) + W[((i) - 7) % 16] + Gamma0(W[((i) - 15) % 16]) + W[((i) - 16) % 16]; /* Compress */ #ifdef LTC_SMALL_CODE - for (i = 0; i < 80; i++) { - t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i]; + for (i = 0; i < 16; i++) { + t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i % 16]; + t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]); + S[7] = S[6]; + S[6] = S[5]; + S[5] = S[4]; + S[4] = S[3] + t0; + S[3] = S[2]; + S[2] = S[1]; + S[1] = S[0]; + S[0] = t0 + t1; + } + for (; i < 80; i++) { + Wi(i); + t0 = S[7] + Sigma1(S[4]) + Ch(S[4], S[5], S[6]) + K[i] + W[i % 16]; t1 = Sigma0(S[0]) + Maj(S[0], S[1], S[2]); S[7] = S[6]; S[6] = S[5]; @@ -121,13 +130,13 @@ static int s_sha512_compress(hash_state * md, const unsigned char *buf) S[0] = t0 + t1; } #else -#define RND(a,b,c,d,e,f,g,h,i) \ - t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \ - t1 = Sigma0(a) + Maj(a, b, c); \ - d += t0; \ +#define RND(a,b,c,d,e,f,g,h,i) \ + t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[(i) % 16]; \ + t1 = Sigma0(a) + Maj(a, b, c); \ + d += t0; \ h = t0 + t1; - for (i = 0; i < 80; i += 8) { + for (i = 0; i < 16; i += 8) { RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); @@ -137,7 +146,18 @@ static int s_sha512_compress(hash_state * md, const unsigned char *buf) RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); } + for (; i < 80; i += 8) { + Wi(i+0); RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); + Wi(i+1); RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); + Wi(i+2); RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); + Wi(i+3); RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); + Wi(i+4); RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); + Wi(i+5); RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); + Wi(i+6); RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); + Wi(i+7); RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); + } #endif +#undef Wi /* feedback */ From 2a5c99df9c28b2628f07a4420cd8304d7b518387 Mon Sep 17 00:00:00 2001 From: Steffen Jaeckel Date: Tue, 2 Dec 2025 09:19:55 +0100 Subject: [PATCH 2/2] Slightly improve timing demo. * Add the option to only run for a subset of algos. * Improve `hash` to show something meaningful. Signed-off-by: Steffen Jaeckel --- demos/timing.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/demos/timing.c b/demos/timing.c index 572756dda..97af034c6 100644 --- a/demos/timing.c +++ b/demos/timing.c @@ -14,6 +14,8 @@ static prng_state yarrow_prng; #define KTIMES 25 #define TIMES 100000 +static const char *filter_arg; + static struct list { int id; ulong64 spd1, spd2, avg; @@ -56,7 +58,7 @@ static void tally_results(int type) } /* RDTSC from Scott Duplichan */ -static ulong64 rdtsc (void) +static LTC_INLINE ulong64 rdtsc (void) { #if defined __GNUC__ && !defined(LTC_NO_ASM) #if defined(__i386__) || defined(__x86_64__) @@ -111,12 +113,12 @@ static ulong64 rdtsc (void) static ulong64 timer, skew = 0; -static void t_start(void) +static LTC_INLINE void t_start(void) { timer = rdtsc(); } -static ulong64 t_read(void) +static LTC_INLINE ulong64 t_read(void) { return rdtsc() - timer; } @@ -470,20 +472,27 @@ static void time_cipher_lrw(void) { fprintf(stderr, "NO LRW\n"); } static void time_hash(void) { - unsigned long x, y1, len; + unsigned long x, y1, len = 1024; ulong64 t1, t2, c1, c2; hash_state md; int (*func)(hash_state *, const unsigned char *, unsigned long), err; - unsigned char pt[MAXBLOCKSIZE] = { 0 }; - + unsigned char *pt = XMALLOC(len); + if (pt == NULL) { + fprintf(stderr, "\n\nout of heap yo\n\n"); + exit(EXIT_FAILURE); + } fprintf(stderr, "\n\nHASH Time Trials for:\n"); no_results = 0; for (x = 0; hash_descriptor[x].name != NULL; x++) { + if (filter_arg && strstr(hash_descriptor[x].name, filter_arg) == NULL) + continue; + /* sanity check on hash */ if ((err = hash_descriptor[x].test()) != CRYPT_OK) { fprintf(stderr, "\n\nERROR: Hash %s failed self-test %s\n", hash_descriptor[x].name, error_to_string(err)); + XFREE(pt); exit(EXIT_FAILURE); } @@ -493,7 +502,6 @@ static void time_hash(void) #define DO2 DO1 DO1 func = hash_descriptor[x].process; - len = hash_descriptor[x].blocksize; c1 = c2 = (ulong64)-1; for (y1 = 0; y1 < TIMES; y1++) { @@ -515,6 +523,7 @@ static void time_hash(void) #undef DO1 } tally_results(2); + XFREE(pt); } /*#warning you need an mp_rand!!!*/ @@ -1368,12 +1377,15 @@ static void LTC_NORETURN die(int status) { FILE* o = status == EXIT_SUCCESS ? stdout : stderr; fprintf(o, - "Usage: timing [<-h|-l|alg>] [mpi]\n\n" + "Usage: timing [<-h|-l|alg>] [mpi] [filter]\n\n" "Run timing tests of all built-in algorithms, or only the one given in .\n\n" - "\talg\tThe algorithm to test. Use the '-l' option to check for valid values.\n" + "\talg\tThe algorithms to test. Use the '-l' option to check for valid values.\n" "\tmpi\tThe MPI provider to use.\n" + "\tfilter\tFilter within the algorithm class (currently only for 'hash'es).\n" "\t-l\tList all built-in algorithms that can be timed.\n" - "\t-h\tThe help you're looking at.\n" + "\t-h\tThe help you're looking at.\n\n" + "Examples:\n" + "\ttiming hash sha\t\tWill run the timing demo for all hashes containing 'sha' in their name\n" ); exit(status); } @@ -1440,6 +1452,9 @@ register_all_prngs(); if (crypt_mp_init(mpi_provider) != CRYPT_OK) { fprintf(stderr, "Init of MPI provider \"%s\" failed\n", mpi_provider ? mpi_provider : "(null)"); + filter_arg = mpi_provider; + } else if (argc > 3){ + filter_arg = argv[3]; } if ((err = rng_make_prng(128, find_prng("yarrow"), &yarrow_prng, NULL)) != CRYPT_OK) {