mirror of https://git.ffmpeg.org/ffmpeg.git
lavu/sha: Fully unroll the transform function loops
crypto_bench SHA-1 and SHA-256 results using an AMD Athlon X2 7750+, mingw32-w64 GCC 4.7.3 x86_64 Before: lavu SHA-1 size: 1048576 runs: 1024 time: 9.012 +- 0.162 lavu SHA-256 size: 1048576 runs: 1024 time: 19.625 +- 0.173 After: lavu SHA-1 size: 1048576 runs: 1024 time: 7.948 +- 0.154 lavu SHA-256 size: 1048576 runs: 1024 time: 17.841 +- 0.170 Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
a8e00cf926
commit
7e4fe5162a
115
libavutil/sha.c
115
libavutil/sha.c
|
@ -98,39 +98,53 @@ static void sha1_transform(uint32_t state[5], const uint8_t buffer[64])
|
|||
a = t;
|
||||
}
|
||||
#else
|
||||
for (i = 0; i < 15; i += 5) {
|
||||
R0(a, b, c, d, e, 0 + i);
|
||||
R0(e, a, b, c, d, 1 + i);
|
||||
R0(d, e, a, b, c, 2 + i);
|
||||
R0(c, d, e, a, b, 3 + i);
|
||||
R0(b, c, d, e, a, 4 + i);
|
||||
}
|
||||
|
||||
#define R1_0 \
|
||||
R0(a, b, c, d, e, 0 + i); \
|
||||
R0(e, a, b, c, d, 1 + i); \
|
||||
R0(d, e, a, b, c, 2 + i); \
|
||||
R0(c, d, e, a, b, 3 + i); \
|
||||
R0(b, c, d, e, a, 4 + i); \
|
||||
i += 5
|
||||
|
||||
i = 0;
|
||||
R1_0; R1_0; R1_0;
|
||||
R0(a, b, c, d, e, 15);
|
||||
R1(e, a, b, c, d, 16);
|
||||
R1(d, e, a, b, c, 17);
|
||||
R1(c, d, e, a, b, 18);
|
||||
R1(b, c, d, e, a, 19);
|
||||
for (i = 20; i < 40; i += 5) {
|
||||
R2(a, b, c, d, e, 0 + i);
|
||||
R2(e, a, b, c, d, 1 + i);
|
||||
R2(d, e, a, b, c, 2 + i);
|
||||
R2(c, d, e, a, b, 3 + i);
|
||||
R2(b, c, d, e, a, 4 + i);
|
||||
}
|
||||
for (; i < 60; i += 5) {
|
||||
R3(a, b, c, d, e, 0 + i);
|
||||
R3(e, a, b, c, d, 1 + i);
|
||||
R3(d, e, a, b, c, 2 + i);
|
||||
R3(c, d, e, a, b, 3 + i);
|
||||
R3(b, c, d, e, a, 4 + i);
|
||||
}
|
||||
for (; i < 80; i += 5) {
|
||||
R4(a, b, c, d, e, 0 + i);
|
||||
R4(e, a, b, c, d, 1 + i);
|
||||
R4(d, e, a, b, c, 2 + i);
|
||||
R4(c, d, e, a, b, 3 + i);
|
||||
R4(b, c, d, e, a, 4 + i);
|
||||
}
|
||||
|
||||
#define R1_20 \
|
||||
R2(a, b, c, d, e, 0 + i); \
|
||||
R2(e, a, b, c, d, 1 + i); \
|
||||
R2(d, e, a, b, c, 2 + i); \
|
||||
R2(c, d, e, a, b, 3 + i); \
|
||||
R2(b, c, d, e, a, 4 + i); \
|
||||
i += 5
|
||||
|
||||
i = 20;
|
||||
R1_20; R1_20; R1_20; R1_20;
|
||||
|
||||
#define R1_40 \
|
||||
R3(a, b, c, d, e, 0 + i); \
|
||||
R3(e, a, b, c, d, 1 + i); \
|
||||
R3(d, e, a, b, c, 2 + i); \
|
||||
R3(c, d, e, a, b, 3 + i); \
|
||||
R3(b, c, d, e, a, 4 + i); \
|
||||
i += 5
|
||||
|
||||
R1_40; R1_40; R1_40; R1_40;
|
||||
|
||||
#define R1_60 \
|
||||
R4(a, b, c, d, e, 0 + i); \
|
||||
R4(e, a, b, c, d, 1 + i); \
|
||||
R4(d, e, a, b, c, 2 + i); \
|
||||
R4(c, d, e, a, b, 3 + i); \
|
||||
R4(b, c, d, e, a, 4 + i); \
|
||||
i += 5
|
||||
|
||||
R1_60; R1_60; R1_60; R1_60;
|
||||
#endif
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
|
@ -218,27 +232,32 @@ static void sha256_transform(uint32_t *state, const uint8_t buffer[64])
|
|||
a = T1 + T2;
|
||||
}
|
||||
#else
|
||||
for (i = 0; i < 16 - 7;) {
|
||||
ROUND256_0_TO_15(a, b, c, d, e, f, g, h);
|
||||
ROUND256_0_TO_15(h, a, b, c, d, e, f, g);
|
||||
ROUND256_0_TO_15(g, h, a, b, c, d, e, f);
|
||||
ROUND256_0_TO_15(f, g, h, a, b, c, d, e);
|
||||
ROUND256_0_TO_15(e, f, g, h, a, b, c, d);
|
||||
ROUND256_0_TO_15(d, e, f, g, h, a, b, c);
|
||||
ROUND256_0_TO_15(c, d, e, f, g, h, a, b);
|
||||
ROUND256_0_TO_15(b, c, d, e, f, g, h, a);
|
||||
}
|
||||
|
||||
for (; i < 64 - 7;) {
|
||||
ROUND256_16_TO_63(a, b, c, d, e, f, g, h);
|
||||
ROUND256_16_TO_63(h, a, b, c, d, e, f, g);
|
||||
ROUND256_16_TO_63(g, h, a, b, c, d, e, f);
|
||||
ROUND256_16_TO_63(f, g, h, a, b, c, d, e);
|
||||
ROUND256_16_TO_63(e, f, g, h, a, b, c, d);
|
||||
ROUND256_16_TO_63(d, e, f, g, h, a, b, c);
|
||||
ROUND256_16_TO_63(c, d, e, f, g, h, a, b);
|
||||
ROUND256_16_TO_63(b, c, d, e, f, g, h, a);
|
||||
}
|
||||
i = 0;
|
||||
#define R256_0 \
|
||||
ROUND256_0_TO_15(a, b, c, d, e, f, g, h); \
|
||||
ROUND256_0_TO_15(h, a, b, c, d, e, f, g); \
|
||||
ROUND256_0_TO_15(g, h, a, b, c, d, e, f); \
|
||||
ROUND256_0_TO_15(f, g, h, a, b, c, d, e); \
|
||||
ROUND256_0_TO_15(e, f, g, h, a, b, c, d); \
|
||||
ROUND256_0_TO_15(d, e, f, g, h, a, b, c); \
|
||||
ROUND256_0_TO_15(c, d, e, f, g, h, a, b); \
|
||||
ROUND256_0_TO_15(b, c, d, e, f, g, h, a)
|
||||
|
||||
R256_0; R256_0;
|
||||
|
||||
#define R256_16 \
|
||||
ROUND256_16_TO_63(a, b, c, d, e, f, g, h); \
|
||||
ROUND256_16_TO_63(h, a, b, c, d, e, f, g); \
|
||||
ROUND256_16_TO_63(g, h, a, b, c, d, e, f); \
|
||||
ROUND256_16_TO_63(f, g, h, a, b, c, d, e); \
|
||||
ROUND256_16_TO_63(e, f, g, h, a, b, c, d); \
|
||||
ROUND256_16_TO_63(d, e, f, g, h, a, b, c); \
|
||||
ROUND256_16_TO_63(c, d, e, f, g, h, a, b); \
|
||||
ROUND256_16_TO_63(b, c, d, e, f, g, h, a)
|
||||
|
||||
R256_16; R256_16; R256_16;
|
||||
R256_16; R256_16; R256_16;
|
||||
#endif
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
|
|
Loading…
Reference in New Issue