Skip to content

Commit

Permalink
v3.18.2
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Oct 20, 2021
1 parent 47cc5dc commit 1a234cb
Show file tree
Hide file tree
Showing 18 changed files with 475 additions and 190 deletions.
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ cpuminer_SOURCES = \
algo/sha/hmac-sha256-hash-4way.c \
algo/sha/sha256d.c \
algo/sha/sha2.c \
algo/sha/sha256d-4way.c \
algo/sha/sha256t-gate.c \
algo/sha/sha256t-4way.c \
algo/sha/sha256t.c \
Expand Down
14 changes: 14 additions & 0 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,20 @@ If not what makes it happen or not happen?
Change Log
----------

v3.8.2

Issue #342, fixed Groestl AES on Windows, broken in v3.18.0.

AVX512 for sha256d.

SSE42 and AVX may now be displayed as mining features at startup.
This is hard coded for each algo, and is only implemented for scrypt
at this time as it is the only algo with significant performance differences
with those features.

Fixed an issue where a high hashrate algo could cause excessive invalid hash
rate log reports when starting up in benchmark mode.

v3.18.1

More speed for scrypt:
Expand Down
26 changes: 13 additions & 13 deletions algo/scrypt/scrypt-core-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,42 +337,42 @@ do{ \
XC2 = XOR( XC2, TC ); \
\
TA = ADD32( XA2, XA1 ); \
XA1 = ROL_1X32( XA1 ); \
TB = ADD32( XB2, XB1 ); \
TC = ADD32( XC2, XC1 ); \
TA = ROL32( TA, 13 ); \
XA1 = ROL_1X32( XA1 ); \
XB1 = ROL_1X32( XB1 ); \
XC1 = ROL_1X32( XC1 ); \
TA = ROL32( TA, 13 ); \
XA3 = XOR( XA3, TA ); \
XC1 = ROL_1X32( XC1 ); \
TB = ROL32( TB, 13 ); \
XB3 = XOR( XB3, TB ); \
TC = ROL32( TC, 13 ); \
XC3 = XOR( XC3, TC ); \
\
TA = ADD32( XA3, XA2 ); \
XA2 = SWAP_64( XA2 ); \
TB = ADD32( XB3, XB2 ); \
TC = ADD32( XC3, XC2 ); \
TA = ROL32( TA, 18 ); \
XA2 = SWAP_64( XA2 ); \
XB2 = SWAP_64( XB2 ); \
XC2 = SWAP_64( XC2 ); \
XA0 = XOR( XA0, TA ); \
TB = ROL32( TB, 18 ); \
XB0 = XOR( XB0, TB ); \
XC2 = SWAP_64( XC2 ); \
TC = ROL32( TC, 18 ); \
XC0 = XOR( XC0, TC ); \
\
TA = ADD32( XA0, XA1 ); \
XA3 = ROR_1X32( XA3 ); \
TB = ADD32( XB0, XB1 ); \
TC = ADD32( XC0, XC1 ); \
TA = ROL32( TA, 7 ); \
XA3 = ROR_1X32( XA3 ); \
XB3 = ROR_1X32( XB3 ); \
XA3 = XOR( XA3, TA ); \
TB = ROL32( TB, 7 ); \
XB3 = ROR_1X32( XB3 ); \
XC3 = ROR_1X32( XC3 ); \
XB3 = XOR( XB3, TB ); \
TC = ROL32( TC, 7 ); \
XC3 = ROR_1X32( XC3 ); \
XC3 = XOR( XC3, TC ); \
\
TA = ADD32( XA3, XA0 ); \
Expand All @@ -399,24 +399,24 @@ do{ \
XC1 = XOR( XC1, TC ); \
\
TA = ADD32( XA1, XA2 ); \
XA2 = SWAP_64( XA2 ); \
TB = ADD32( XB1, XB2 ); \
XB2 = SWAP_64( XB2 ); \
TA = ROL32( TA, 18); \
TC = ADD32( XC1, XC2 ); \
XA2 = SWAP_64( XA2 ); \
XC2 = SWAP_64( XC2 ); \
TB = ROL32( TB, 18); \
XA0 = XOR( XA0, TA ); \
XB2 = SWAP_64( XB2 ); \
XA1 = ROR_1X32( XA1 ); \
TC = ROL32( TC, 18); \
XB0 = XOR( XB0, TB ); \
XC2 = SWAP_64( XC2 ); \
XA1 = ROR_1X32( XA1 ); \
XB1 = ROR_1X32( XB1 ); \
XC0 = XOR( XC0, TC ); \
XC1 = ROR_1X32( XC1 ); \
} while (0);


// slow rol, an attempt to optimze non-avx512 bit rotations
// slow rot, an attempt to optimze non-avx512 bit rotations
// Contains target specific instructions, only for use with 128 bit vectors
#define SALSA_2ROUNDS_SIMD128_3BUF_SLOROT \
do{ \
Expand Down
Loading

0 comments on commit 1a234cb

Please sign in to comment.