v3.8.2

JayDDee · Feb 15, 2018 · d60a268 · d60a268
1 parent e4265a6
commit d60a268
Show file tree

Hide file tree

Showing 57 changed files with 3,471 additions and 2,137 deletions.
diff --git a/Makefile.am b/Makefile.am
@@ -70,6 +70,8 @@ cpuminer_SOURCES = \
   algo/gost/sph_gost.c \
   algo/groestl/sph_groestl.c \
   algo/groestl/groestl.c \
+  algo/groestl/myrgr-gate.c \
+  algo/groestl/myrgr-4way.c \
   algo/groestl/myr-groestl.c \
   algo/groestl/aes_ni/hash-groestl.c \
   algo/groestl/aes_ni/hash-groestl256.c \
@@ -97,7 +99,6 @@ cpuminer_SOURCES = \
   algo/keccak/keccak-4way.c\
   algo/keccak/keccak-gate.c \
   algo/keccak/sse2/keccak.c \
-  algo/lbry.c \
   algo/luffa/sph_luffa.c \
   algo/luffa/luffa.c \
   algo/luffa/luffa_for_sse2.c \
@@ -115,6 +116,9 @@ cpuminer_SOURCES = \
   algo/lyra2/lyra2h-gate.c \
   algo/lyra2/lyra2h.c \
   algo/lyra2/lyra2h-4way.c \
+  algo/lyra2/allium-gate.c \
+  algo/lyra2/allium-4way.c \
+  algo/lyra2/allium.c \
   algo/m7m.c \
   algo/neoscrypt/neoscrypt.c \
   algo/nist5/nist5-gate.c \
@@ -135,6 +139,10 @@ cpuminer_SOURCES = \
   algo/qubit/deep-2way.c \
   algo/qubit/deep.c \
   algo/ripemd/sph_ripemd.c \
+  algo/ripemd/ripemd-hash-4way.c \
+  algo/ripemd/lbry-gate.c \
+  algo/ripemd/lbry.c \
+  algo/ripemd/lbry-4way.c \
   algo/scrypt.c \
   algo/scryptjane/scrypt-jane.c \
   algo/sha/sph_sha2.c \
@@ -190,6 +198,9 @@ cpuminer_SOURCES = \
   algo/x11/x11evo.c \
   algo/x11/x11evo-4way.c \
   algo/x11/x11evo-gate.c \
+  algo/x12/x12-gate.c \
+  algo/x12/x12.c \
+  algo/x12/x12-4way.c \
   algo/x13/x13-gate.c \
   algo/x13/x13.c \
   algo/x13/x13-4way.c \

diff --git a/README.md b/README.md
@@ -13,6 +13,29 @@ mailto://[email protected]
 
 See file RELEASE_NOTES for change log and compile instructions.
 
+Requirements
+------------
+
+1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
+Intel Core2 and newer and AMD equivalents. In order to take advantage of AES_NI
+optimizations a CPU with AES_NI is required. This includes Intel Westbridge
+and newer and AMD equivalents. Further optimizations are available on some
+algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.
+
+Older CPUs are supported by cpuminer-multi by TPruvot but at reduced
+performance.
+
+ARM CPUs are not supported.
+
+2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
+Centos are known to work and have all dependencies in their repositories.
+Others may work but may require more effort.
+64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.
+
+MacOS, OSx is not supported.
+
+3. Stratum pool. Some algos may work wallet mining using getwork.
+
 Supported Algorithms
 --------------------
 
@@ -75,6 +98,7 @@ Supported Algorithms
                           x11          Dash
                           x11evo       Revolvercoin
                           x11gost      sib (SibCoin)
+                          x12          Galaxie Cash (GCH)
                           x13          X13
                           x13sm3       hsr (Hshare)
                           x14          X14
@@ -87,29 +111,6 @@ Supported Algorithms
                           yescryptr16  Yenten (YTN)
                           zr5          Ziftr
 
-Requirements
-------------
-
-1. A x86_64 architecture CPU with a minimum of SSE2 support. This includes
-Intel Core2 and newer and AMD equivalents. In order to take advantage of AES_NI
-optimizations a CPU with AES_NI is required. This includes Intel Westbridge
-and newer and AMD equivalents. Further optimizations are available on some
-algoritms for CPUs with AVX and AVX2, Sandybridge and Haswell respectively.
-
-Older CPUs are supported by cpuminer-multi by TPruvot but at reduced
-performance.
-
-ARM CPUs are not supported.
-
-2. 64 bit Linux OS. Ubuntu and Fedora based distributions, including Mint and
-Centos are known to work and have all dependencies in their repositories.
-Others may work but may require more effort.
-64 bit Windows OS is supported with mingw_w64 and msys or pre-built binaries.
-
-MacOS, OSx is not supported.
-
-3. Stratum pool. Some algos may work wallet mining using getwork.
-
 Errata
 ------
 

diff --git a/RELEASE_NOTES b/RELEASE_NOTES
@@ -159,6 +159,13 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
 Change Log
 ----------
 
+v3.8.2
+
+Fixed and faster myr-gr.
+Added x12 algo (Galaxie Cash), allium algo (Garlicoin).
+Faster lyra2rev2, lbry, skein.
+Large reduction in compiler warnings.
+
 v3.8.1.1
 
 Fixed Windows AVX2 crash.

diff --git a/algo-gate-api.c b/algo-gate-api.c
@@ -155,6 +155,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
 
    switch (algo)
    {
+     case ALGO_ALLIUM:       register_allium_algo      ( gate ); break;
      case ALGO_ANIME:        register_anime_algo       ( gate ); break;
      case ALGO_ARGON2:       register_argon2_algo      ( gate ); break;
      case ALGO_AXIOM:        register_axiom_algo       ( gate ); break;
@@ -213,6 +214,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
      case ALGO_X11:          register_x11_algo         ( gate ); break;
      case ALGO_X11EVO:       register_x11evo_algo      ( gate ); break;
      case ALGO_X11GOST:      register_x11gost_algo     ( gate ); break;
+     case ALGO_X12:          register_x12_algo         ( gate ); break;
      case ALGO_X13:          register_x13_algo         ( gate ); break;
      case ALGO_X13SM3:       register_x13sm3_algo      ( gate ); break;
      case ALGO_X14:          register_x14_algo         ( gate ); break;
@@ -298,6 +300,7 @@ const char* const algo_alias_map[][2] =
   { "lyra2",             "lyra2re"      },
   { "lyra2v2",           "lyra2rev2"    },
   { "lyra2zoin",         "lyra2z330"    },
+  { "myrgr",             "myr-gr"       },
   { "myriad",            "myr-gr"       },
   { "neo",               "neoscrypt"    },
   { "phi",               "phi1612"      },

diff --git a/algo/blake/blakecoin-4way.c b/algo/blake/blakecoin-4way.c
@@ -96,7 +96,7 @@ int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
    if ( ( n >= max_nonce ) && ( *hashes_done < 10 ) )
    {
       *hashes_done = 0;
-      sleep(1);
+//      sleep(1);
    }
 
    return num_found;

diff --git a/algo/blake/decred-4way.c b/algo/blake/decred-4way.c
@@ -12,11 +12,11 @@ static __thread blake256_4way_context blake_mid;
 void decred_hash_4way( void *state, const void *input )
 {
      uint32_t vhash[8*4] __attribute__ ((aligned (64)));
-     uint32_t hash0[8] __attribute__ ((aligned (32)));
-     uint32_t hash1[8] __attribute__ ((aligned (32)));
-     uint32_t hash2[8] __attribute__ ((aligned (32)));
-     uint32_t hash3[8] __attribute__ ((aligned (32)));
-     void *tail = input + ( DECRED_MIDSTATE_LEN << 2 );
+//     uint32_t hash0[8] __attribute__ ((aligned (32)));
+//     uint32_t hash1[8] __attribute__ ((aligned (32)));
+//     uint32_t hash2[8] __attribute__ ((aligned (32)));
+//     uint32_t hash3[8] __attribute__ ((aligned (32)));
+     const void *tail = input + ( DECRED_MIDSTATE_LEN << 2 );
      int tail_len = 180 - DECRED_MIDSTATE_LEN; 
      blake256_4way_context ctx __attribute__ ((aligned (64)));
 

diff --git a/algo/bmw/bmw-hash-4way.c b/algo/bmw/bmw-hash-4way.c
@@ -49,13 +49,6 @@ extern "C"{
 
 // BMW256
 
-// BMW small has a bug not present in big. Lanes 0 & 2 produce valid hash
-// while lanes 1 & 3 produce invalid hash. The cause is not known.
-// Some things that could cause it are: using epi64 instead of epi32,
-// a memory write that is the wrong size, an attempt to index a vector
-// like an array (only works for 64 bit elements).  
-
-
 static const sph_u32 IV256[] = {
 	SPH_C32(0x40414243), SPH_C32(0x44454647),
 	SPH_C32(0x48494A4B), SPH_C32(0x4C4D4E4F),
@@ -123,16 +116,14 @@ static const sph_u64 IV512[] = {
    mm_rotl_32( M[ ( (j) + (off) ) & 0xF ] , \
                 ( ( (j) + (off) ) & 0xF ) + 1 )
 
-// The multiplication in this macro is a possible cause of the lane
-// corruption but a vectorized mullo did not help.
 #define add_elt_s( M, H, j ) \
    _mm_xor_si128( \
-      _mm_add_epi32( \
-            _mm_sub_epi32( _mm_add_epi32( rol_off_32( M, j, 0 ), \
-                                          rol_off_32( M, j, 3 ) ), \
-                           rol_off_32( M, j, 10 ) ), \
-            _mm_set1_epi32( ( (j) + 16 ) * 0x05555555UL ) \
-                   ), H[ ( (j)+7 ) & 0xF ] )
+       _mm_add_epi32( \
+             _mm_sub_epi32( _mm_add_epi32( rol_off_32( M, j, 0 ), \
+                                           rol_off_32( M, j, 3 ) ), \
+                            rol_off_32( M, j, 10 ) ), \
+       _mm_set1_epi32( ( (j)+16 ) * SPH_C32(0x05555555UL) ) ), \
+   H[ ( (j)+7 ) & 0xF ] )
 
 
 #define expand1s( qt, M, H, i ) \
@@ -449,22 +440,22 @@ void compress_small( const __m128i *M, const __m128i H[16], __m128i dH[16] )
 {
    __m128i qt[32], xl, xh; \
 
-   qt[ 0] = ss0( Ws0 ) + H[ 1];
-   qt[ 1] = ss1( Ws1 ) + H[ 2];
-   qt[ 2] = ss2( Ws2 ) + H[ 3];
-   qt[ 3] = ss3( Ws3 ) + H[ 4];
-   qt[ 4] = ss4( Ws4 ) + H[ 5];
-   qt[ 5] = ss0( Ws5 ) + H[ 6];
-   qt[ 6] = ss1( Ws6 ) + H[ 7];
-   qt[ 7] = ss2( Ws7 ) + H[ 8];
-   qt[ 8] = ss3( Ws8 ) + H[ 9];
-   qt[ 9] = ss4( Ws9 ) + H[10];
-   qt[10] = ss0( Ws10) + H[11];
-   qt[11] = ss1( Ws11) + H[12];
-   qt[12] = ss2( Ws12) + H[13];
-   qt[13] = ss3( Ws13) + H[14];
-   qt[14] = ss4( Ws14) + H[15];
-   qt[15] = ss0( Ws15) + H[ 0];
+   qt[ 0] = _mm_add_epi32( ss0( Ws0 ), H[ 1] );
+   qt[ 1] = _mm_add_epi32( ss1( Ws1 ), H[ 2] );
+   qt[ 2] = _mm_add_epi32( ss2( Ws2 ), H[ 3] );
+   qt[ 3] = _mm_add_epi32( ss3( Ws3 ), H[ 4] );
+   qt[ 4] = _mm_add_epi32( ss4( Ws4 ), H[ 5] );
+   qt[ 5] = _mm_add_epi32( ss0( Ws5 ), H[ 6] );
+   qt[ 6] = _mm_add_epi32( ss1( Ws6 ), H[ 7] );
+   qt[ 7] = _mm_add_epi32( ss2( Ws7 ), H[ 8] );
+   qt[ 8] = _mm_add_epi32( ss3( Ws8 ), H[ 9] );
+   qt[ 9] = _mm_add_epi32( ss4( Ws9 ), H[10] );
+   qt[10] = _mm_add_epi32( ss0( Ws10), H[11] );
+   qt[11] = _mm_add_epi32( ss1( Ws11), H[12] );
+   qt[12] = _mm_add_epi32( ss2( Ws12), H[13] );
+   qt[13] = _mm_add_epi32( ss3( Ws13), H[14] );
+   qt[14] = _mm_add_epi32( ss4( Ws14), H[15] );
+   qt[15] = _mm_add_epi32( ss0( Ws15), H[ 0] );
    qt[16] = expand1s( qt, M, H, 16 );
    qt[17] = expand1s( qt, M, H, 17 );
    qt[18] = expand2s( qt, M, H, 18 );
@@ -740,24 +731,24 @@ void compress_small( const __m128i *M, const __m128i H[16], __m128i dH[16] )
 
 void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
 {
-   __m256i qt[32], xl, xh; \
-
-   qt[ 0] = sb0( Wb0 ) + H[ 1]; 
-   qt[ 1] = sb1( Wb1 ) + H[ 2]; 
-   qt[ 2] = sb2( Wb2 ) + H[ 3]; 
-   qt[ 3] = sb3( Wb3 ) + H[ 4]; 
-   qt[ 4] = sb4( Wb4 ) + H[ 5]; 
-   qt[ 5] = sb0( Wb5 ) + H[ 6]; 
-   qt[ 6] = sb1( Wb6 ) + H[ 7]; 
-   qt[ 7] = sb2( Wb7 ) + H[ 8]; 
-   qt[ 8] = sb3( Wb8 ) + H[ 9]; 
-   qt[ 9] = sb4( Wb9 ) + H[10]; 
-   qt[10] = sb0( Wb10) + H[11]; 
-   qt[11] = sb1( Wb11) + H[12]; 
-   qt[12] = sb2( Wb12) + H[13]; 
-   qt[13] = sb3( Wb13) + H[14];
-   qt[14] = sb4( Wb14) + H[15]; 
-   qt[15] = sb0( Wb15) + H[ 0]; 
+   __m256i qt[32], xl, xh;
+
+   qt[ 0] = _mm256_add_epi64( sb0( Wb0 ), H[ 1] ); 
+   qt[ 1] = _mm256_add_epi64( sb1( Wb1 ), H[ 2] ); 
+   qt[ 2] = _mm256_add_epi64( sb2( Wb2 ), H[ 3] ); 
+   qt[ 3] = _mm256_add_epi64( sb3( Wb3 ), H[ 4] ); 
+   qt[ 4] = _mm256_add_epi64( sb4( Wb4 ), H[ 5] ); 
+   qt[ 5] = _mm256_add_epi64( sb0( Wb5 ), H[ 6] ); 
+   qt[ 6] = _mm256_add_epi64( sb1( Wb6 ), H[ 7] ); 
+   qt[ 7] = _mm256_add_epi64( sb2( Wb7 ), H[ 8] ); 
+   qt[ 8] = _mm256_add_epi64( sb3( Wb8 ), H[ 9] ); 
+   qt[ 9] = _mm256_add_epi64( sb4( Wb9 ), H[10] ); 
+   qt[10] = _mm256_add_epi64( sb0( Wb10), H[11] ); 
+   qt[11] = _mm256_add_epi64( sb1( Wb11), H[12] ); 
+   qt[12] = _mm256_add_epi64( sb2( Wb12), H[13] ); 
+   qt[13] = _mm256_add_epi64( sb3( Wb13), H[14] );
+   qt[14] = _mm256_add_epi64( sb4( Wb14), H[15] ); 
+   qt[15] = _mm256_add_epi64( sb0( Wb15), H[ 0] ); 
    qt[16] = expand1b( qt, M, H, 16 ); 
    qt[17] = expand1b( qt, M, H, 17 ); 
    qt[18] = expand2b( qt, M, H, 18 ); 
@@ -870,7 +861,7 @@ void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
 } 
 
 // BMW256
-/*
+
 static const uint32_t final_s[16][4] =
 {
    { 0xaaaaaaa0, 0xaaaaaaa0, 0xaaaaaaa0, 0xaaaaaaa0 },
@@ -890,7 +881,7 @@ static const uint32_t final_s[16][4] =
    { 0xaaaaaaae, 0xaaaaaaae, 0xaaaaaaae, 0xaaaaaaae },
    { 0xaaaaaaaf, 0xaaaaaaaf, 0xaaaaaaaf, 0xaaaaaaaf }
 };
-*/
+/*
 static const __m128i final_s[16] =
 {
    { 0xaaaaaaa0aaaaaaa0, 0xaaaaaaa0aaaaaaa0 },
@@ -910,7 +901,7 @@ static const __m128i final_s[16] =
    { 0xaaaaaaaeaaaaaaae, 0xaaaaaaaeaaaaaaae },
    { 0xaaaaaaafaaaaaaaf, 0xaaaaaaafaaaaaaaf }
 };
-
+*/
 static void
 bmw32_4way_init(bmw_4way_small_context *sc, const sph_u32 *iv)
 {