Skip to content

Commit

Permalink
v3.8.2
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Feb 15, 2018
1 parent e4265a6 commit d60a268
Show file tree
Hide file tree
Showing 57 changed files with 3,471 additions and 2,137 deletions.
13 changes: 12 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ cpuminer_SOURCES = \
algo/gost/sph_gost.c \
algo/groestl/sph_groestl.c \
algo/groestl/groestl.c \
algo/groestl/myrgr-gate.c \
algo/groestl/myrgr-4way.c \
algo/groestl/myr-groestl.c \
algo/groestl/aes_ni/hash-groestl.c \
algo/groestl/aes_ni/hash-groestl256.c \
Expand Down Expand Up @@ -97,7 +99,6 @@ cpuminer_SOURCES = \
algo/keccak/keccak-4way.c\
algo/keccak/keccak-gate.c \
algo/keccak/sse2/keccak.c \
algo/lbry.c \
algo/luffa/sph_luffa.c \
algo/luffa/luffa.c \
algo/luffa/luffa_for_sse2.c \
Expand All @@ -115,6 +116,9 @@ cpuminer_SOURCES = \
algo/lyra2/lyra2h-gate.c \
algo/lyra2/lyra2h.c \
algo/lyra2/lyra2h-4way.c \
algo/lyra2/allium-gate.c \
algo/lyra2/allium-4way.c \
algo/lyra2/allium.c \
algo/m7m.c \
algo/neoscrypt/neoscrypt.c \
algo/nist5/nist5-gate.c \
Expand All @@ -135,6 +139,10 @@ cpuminer_SOURCES = \
algo/qubit/deep-2way.c \
algo/qubit/deep.c \
algo/ripemd/sph_ripemd.c \
algo/ripemd/ripemd-hash-4way.c \
algo/ripemd/lbry-gate.c \
algo/ripemd/lbry.c \
algo/ripemd/lbry-4way.c \
algo/scrypt.c \
algo/scryptjane/scrypt-jane.c \
algo/sha/sph_sha2.c \
Expand Down Expand Up @@ -190,6 +198,9 @@ cpuminer_SOURCES = \
algo/x11/x11evo.c \
algo/x11/x11evo-4way.c \
algo/x11/x11evo-gate.c \
algo/x12/x12-gate.c \
algo/x12/x12.c \
algo/x12/x12-4way.c \
algo/x13/x13-gate.c \
algo/x13/x13.c \
algo/x13/x13-4way.c \
Expand Down
47 changes: 24 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,29 @@ mailto://[email protected]

See file RELEASE_NOTES for change log and compile instructions.

Requirements
------------

1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
Intel Core2 and newer and AMD equivalents. In order to take advantage of AES_NI
optimizations a CPU with AES_NI is required. This includes Intel Westbridge
and newer and AMD equivalents. Further optimizations are available on some
algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.

Older CPUs are supported by cpuminer-multi by TPruvot but at reduced
performance.

ARM CPUs are not supported.

2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
Centos are known to work and have all dependencies in their repositories.
Others may work but may require more effort.
64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.

MacOS, OSx is not supported.

3. Stratum pool. Some algos may work wallet mining using getwork.

Supported Algorithms
--------------------

Expand Down Expand Up @@ -75,6 +98,7 @@ Supported Algorithms
x11 Dash
x11evo Revolvercoin
x11gost sib (SibCoin)
x12 Galaxie Cash (GCH)
x13 X13
x13sm3 hsr (Hshare)
x14 X14
Expand All @@ -87,29 +111,6 @@ Supported Algorithms
yescryptr16 Yenten (YTN)
zr5 Ziftr

Requirements
------------

1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
Intel Core2 and newer and AMD equivalents. In order to take advantage of AES_NI
optimizations a CPU with AES_NI is required. This includes Intel Westbridge
and newer and AMD equivalents. Further optimizations are available on some
algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.

Older CPUs are supported by cpuminer-multi by TPruvot but at reduced
performance.

ARM CPUs are not supported.

2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
Centos are known to work and have all dependencies in their repositories.
Others may work but may require more effort.
64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.

MacOS, OSx is not supported.

3. Stratum pool. Some algos may work wallet mining using getwork.

Errata
------

Expand Down
7 changes: 7 additions & 0 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,13 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log
----------

v3.8.2

Fixed and faster myr-gr.
Added x12 algo (Galaxie Cash), allium algo (Garlicoin).
Faster lyra2rev2, lbry, skein.
Large reduction in compiler warnings.

v3.8.1.1

Fixed Windows AVX2 crash.
Expand Down
3 changes: 3 additions & 0 deletions algo-gate-api.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )

switch (algo)
{
case ALGO_ALLIUM: register_allium_algo ( gate ); break;
case ALGO_ANIME: register_anime_algo ( gate ); break;
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
Expand Down Expand Up @@ -213,6 +214,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_X11: register_x11_algo ( gate ); break;
case ALGO_X11EVO: register_x11evo_algo ( gate ); break;
case ALGO_X11GOST: register_x11gost_algo ( gate ); break;
case ALGO_X12: register_x12_algo ( gate ); break;
case ALGO_X13: register_x13_algo ( gate ); break;
case ALGO_X13SM3: register_x13sm3_algo ( gate ); break;
case ALGO_X14: register_x14_algo ( gate ); break;
Expand Down Expand Up @@ -298,6 +300,7 @@ const char* const algo_alias_map[][2] =
{ "lyra2", "lyra2re" },
{ "lyra2v2", "lyra2rev2" },
{ "lyra2zoin", "lyra2z330" },
{ "myrgr", "myr-gr" },
{ "myriad", "myr-gr" },
{ "neo", "neoscrypt" },
{ "phi", "phi1612" },
Expand Down
2 changes: 1 addition & 1 deletion algo/blake/blakecoin-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
if ( ( n >= max_nonce ) && ( *hashes_done < 10 ) )
{
*hashes_done = 0;
sleep(1);
// sleep(1);
}

return num_found;
Expand Down
10 changes: 5 additions & 5 deletions algo/blake/decred-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ static __thread blake256_4way_context blake_mid;
void decred_hash_4way( void *state, const void *input )
{
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
uint32_t hash0[8] __attribute__ ((aligned (32)));
uint32_t hash1[8] __attribute__ ((aligned (32)));
uint32_t hash2[8] __attribute__ ((aligned (32)));
uint32_t hash3[8] __attribute__ ((aligned (32)));
void *tail = input + ( DECRED_MIDSTATE_LEN << 2 );
// uint32_t hash0[8] __attribute__ ((aligned (32)));
// uint32_t hash1[8] __attribute__ ((aligned (32)));
// uint32_t hash2[8] __attribute__ ((aligned (32)));
// uint32_t hash3[8] __attribute__ ((aligned (32)));
const void *tail = input + ( DECRED_MIDSTATE_LEN << 2 );
int tail_len = 180 - DECRED_MIDSTATE_LEN;
blake256_4way_context ctx __attribute__ ((aligned (64)));

Expand Down
95 changes: 43 additions & 52 deletions algo/bmw/bmw-hash-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,6 @@ extern "C"{

// BMW256

// BMW small has a bug not present in big. Lanes 0 & 2 produce valid hash
// while lanes 1 & 3 produce invalid hash. The cause is not known.
// Some things that could cause it are: using epi64 instead of epi32,
// a memory write that is the wrong size, an attempt to index a vector
// like an array (only works for 64 bit elements).


static const sph_u32 IV256[] = {
SPH_C32(0x40414243), SPH_C32(0x44454647),
SPH_C32(0x48494A4B), SPH_C32(0x4C4D4E4F),
Expand Down Expand Up @@ -123,16 +116,14 @@ static const sph_u64 IV512[] = {
mm_rotl_32( M[ ( (j) + (off) ) & 0xF ] , \
( ( (j) + (off) ) & 0xF ) + 1 )

// The multiplication in this macro is a possible cause of the lane
// corruption but a vectorized mullo did not help.
#define add_elt_s( M, H, j ) \
_mm_xor_si128( \
_mm_add_epi32( \
_mm_sub_epi32( _mm_add_epi32( rol_off_32( M, j, 0 ), \
rol_off_32( M, j, 3 ) ), \
rol_off_32( M, j, 10 ) ), \
_mm_set1_epi32( ( (j) + 16 ) * 0x05555555UL ) \
), H[ ( (j)+7 ) & 0xF ] )
_mm_add_epi32( \
_mm_sub_epi32( _mm_add_epi32( rol_off_32( M, j, 0 ), \
rol_off_32( M, j, 3 ) ), \
rol_off_32( M, j, 10 ) ), \
_mm_set1_epi32( ( (j)+16 ) * SPH_C32(0x05555555UL) ) ), \
H[ ( (j)+7 ) & 0xF ] )


#define expand1s( qt, M, H, i ) \
Expand Down Expand Up @@ -449,22 +440,22 @@ void compress_small( const __m128i *M, const __m128i H[16], __m128i dH[16] )
{
__m128i qt[32], xl, xh; \

qt[ 0] = ss0( Ws0 ) + H[ 1];
qt[ 1] = ss1( Ws1 ) + H[ 2];
qt[ 2] = ss2( Ws2 ) + H[ 3];
qt[ 3] = ss3( Ws3 ) + H[ 4];
qt[ 4] = ss4( Ws4 ) + H[ 5];
qt[ 5] = ss0( Ws5 ) + H[ 6];
qt[ 6] = ss1( Ws6 ) + H[ 7];
qt[ 7] = ss2( Ws7 ) + H[ 8];
qt[ 8] = ss3( Ws8 ) + H[ 9];
qt[ 9] = ss4( Ws9 ) + H[10];
qt[10] = ss0( Ws10) + H[11];
qt[11] = ss1( Ws11) + H[12];
qt[12] = ss2( Ws12) + H[13];
qt[13] = ss3( Ws13) + H[14];
qt[14] = ss4( Ws14) + H[15];
qt[15] = ss0( Ws15) + H[ 0];
qt[ 0] = _mm_add_epi32( ss0( Ws0 ), H[ 1] );
qt[ 1] = _mm_add_epi32( ss1( Ws1 ), H[ 2] );
qt[ 2] = _mm_add_epi32( ss2( Ws2 ), H[ 3] );
qt[ 3] = _mm_add_epi32( ss3( Ws3 ), H[ 4] );
qt[ 4] = _mm_add_epi32( ss4( Ws4 ), H[ 5] );
qt[ 5] = _mm_add_epi32( ss0( Ws5 ), H[ 6] );
qt[ 6] = _mm_add_epi32( ss1( Ws6 ), H[ 7] );
qt[ 7] = _mm_add_epi32( ss2( Ws7 ), H[ 8] );
qt[ 8] = _mm_add_epi32( ss3( Ws8 ), H[ 9] );
qt[ 9] = _mm_add_epi32( ss4( Ws9 ), H[10] );
qt[10] = _mm_add_epi32( ss0( Ws10), H[11] );
qt[11] = _mm_add_epi32( ss1( Ws11), H[12] );
qt[12] = _mm_add_epi32( ss2( Ws12), H[13] );
qt[13] = _mm_add_epi32( ss3( Ws13), H[14] );
qt[14] = _mm_add_epi32( ss4( Ws14), H[15] );
qt[15] = _mm_add_epi32( ss0( Ws15), H[ 0] );
qt[16] = expand1s( qt, M, H, 16 );
qt[17] = expand1s( qt, M, H, 17 );
qt[18] = expand2s( qt, M, H, 18 );
Expand Down Expand Up @@ -740,24 +731,24 @@ void compress_small( const __m128i *M, const __m128i H[16], __m128i dH[16] )

void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
{
__m256i qt[32], xl, xh; \

qt[ 0] = sb0( Wb0 ) + H[ 1];
qt[ 1] = sb1( Wb1 ) + H[ 2];
qt[ 2] = sb2( Wb2 ) + H[ 3];
qt[ 3] = sb3( Wb3 ) + H[ 4];
qt[ 4] = sb4( Wb4 ) + H[ 5];
qt[ 5] = sb0( Wb5 ) + H[ 6];
qt[ 6] = sb1( Wb6 ) + H[ 7];
qt[ 7] = sb2( Wb7 ) + H[ 8];
qt[ 8] = sb3( Wb8 ) + H[ 9];
qt[ 9] = sb4( Wb9 ) + H[10];
qt[10] = sb0( Wb10) + H[11];
qt[11] = sb1( Wb11) + H[12];
qt[12] = sb2( Wb12) + H[13];
qt[13] = sb3( Wb13) + H[14];
qt[14] = sb4( Wb14) + H[15];
qt[15] = sb0( Wb15) + H[ 0];
__m256i qt[32], xl, xh;

qt[ 0] = _mm256_add_epi64( sb0( Wb0 ), H[ 1] );
qt[ 1] = _mm256_add_epi64( sb1( Wb1 ), H[ 2] );
qt[ 2] = _mm256_add_epi64( sb2( Wb2 ), H[ 3] );
qt[ 3] = _mm256_add_epi64( sb3( Wb3 ), H[ 4] );
qt[ 4] = _mm256_add_epi64( sb4( Wb4 ), H[ 5] );
qt[ 5] = _mm256_add_epi64( sb0( Wb5 ), H[ 6] );
qt[ 6] = _mm256_add_epi64( sb1( Wb6 ), H[ 7] );
qt[ 7] = _mm256_add_epi64( sb2( Wb7 ), H[ 8] );
qt[ 8] = _mm256_add_epi64( sb3( Wb8 ), H[ 9] );
qt[ 9] = _mm256_add_epi64( sb4( Wb9 ), H[10] );
qt[10] = _mm256_add_epi64( sb0( Wb10), H[11] );
qt[11] = _mm256_add_epi64( sb1( Wb11), H[12] );
qt[12] = _mm256_add_epi64( sb2( Wb12), H[13] );
qt[13] = _mm256_add_epi64( sb3( Wb13), H[14] );
qt[14] = _mm256_add_epi64( sb4( Wb14), H[15] );
qt[15] = _mm256_add_epi64( sb0( Wb15), H[ 0] );
qt[16] = expand1b( qt, M, H, 16 );
qt[17] = expand1b( qt, M, H, 17 );
qt[18] = expand2b( qt, M, H, 18 );
Expand Down Expand Up @@ -870,7 +861,7 @@ void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
}

// BMW256
/*

static const uint32_t final_s[16][4] =
{
{ 0xaaaaaaa0, 0xaaaaaaa0, 0xaaaaaaa0, 0xaaaaaaa0 },
Expand All @@ -890,7 +881,7 @@ static const uint32_t final_s[16][4] =
{ 0xaaaaaaae, 0xaaaaaaae, 0xaaaaaaae, 0xaaaaaaae },
{ 0xaaaaaaaf, 0xaaaaaaaf, 0xaaaaaaaf, 0xaaaaaaaf }
};
*/
/*
static const __m128i final_s[16] =
{
{ 0xaaaaaaa0aaaaaaa0, 0xaaaaaaa0aaaaaaa0 },
Expand All @@ -910,7 +901,7 @@ static const __m128i final_s[16] =
{ 0xaaaaaaaeaaaaaaae, 0xaaaaaaaeaaaaaaae },
{ 0xaaaaaaafaaaaaaaf, 0xaaaaaaafaaaaaaaf }
};

*/
static void
bmw32_4way_init(bmw_4way_small_context *sc, const sph_u32 *iv)
{
Expand Down
Loading

0 comments on commit d60a268

Please sign in to comment.