Skip to content

Commit

Permalink
v3.7.6
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Dec 14, 2017
1 parent af1c940 commit 7a13899
Show file tree
Hide file tree
Showing 31 changed files with 1,279 additions and 371 deletions.
5 changes: 4 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ cpuminer_SOURCES = \
algo/lyra2/lyra2z.c \
algo/lyra2/lyra2z-4way.c \
algo/lyra2/lyra2z330.c \
algo/lyra2/lyra2h.c \
algo/m7m.c \
algo/neoscrypt.c \
algo/nist5/nist5-gate.c \
Expand All @@ -128,6 +129,7 @@ cpuminer_SOURCES = \
algo/sha/sha256t.c \
algo/shabal/sph_shabal.c \
algo/shavite/sph_shavite.c \
algo/shavite/sph-shavite-aesni.c \
algo/shavite/shavite.c \
algo/simd/sph_simd.c \
algo/simd/sse2/nist.c \
Expand Down Expand Up @@ -155,11 +157,12 @@ cpuminer_SOURCES = \
algo/whirlpool/whirlpool-4way.c \
algo/whirlpool/whirlpool.c \
algo/whirlpool/whirlpoolx.c \
algo/x11/phi1612.c \
algo/x11/x11-gate.c \
algo/x11/x11.c \
algo/x11/x11evo.c \
algo/x11/x11gost.c \
algo/x11/c11.c \
algo/x11/phi1612.c \
algo/x13/x13.c \
algo/x13/x13sm3.c \
algo/x14/x14.c \
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Supported Algorithms
keccakc Creative coin
lbry LBC, LBRY Credits
luffa Luffa
lyra2h Hppcoin
lyra2re lyra2
lyra2rev2 lyra2v2, Vertcoin
lyra2z Zcoin (XZC)
Expand Down
6 changes: 6 additions & 0 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,12 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log
----------

v3.7.6

Added kyra2h algo for Hppcoin.
Added support for more than 64 CPUs.
Optimized shavite512 with AES, improves x11 etc.

v3.7.5

New algo keccakc for Creative coin with 4way optimizations
Expand Down
17 changes: 8 additions & 9 deletions algo-gate-api.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ void init_algo_gate( algo_gate_t* gate )
gate->work_cmp_size = STD_WORK_CMP_SIZE;
}

// Ignore warnings for not yet defined register functions
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wimplicit-function-declaration"

// called by each thread that uses the gate
bool register_algo_gate( int algo, algo_gate_t *gate )
{
Expand All @@ -151,11 +155,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )

switch (algo)
{

// Ignore warnings for not yet defined register fucntions
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wimplicit-function-declaration"

case ALGO_ARGON2: register_argon2_algo ( gate ); break;
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
case ALGO_BASTION: register_bastion_algo ( gate ); break;
Expand All @@ -180,6 +179,7 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_KECCAKC: register_keccakc_algo ( gate ); break;
case ALGO_LBRY: register_lbry_algo ( gate ); break;
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break;
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break;
Expand Down Expand Up @@ -221,10 +221,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
case ALGO_ZR5: register_zr5_algo ( gate ); break;

// restore warnings
#pragma GCC diagnostic pop

default:
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
return false;
Expand All @@ -239,6 +235,9 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
return true;
}

// restore warnings
#pragma GCC diagnostic pop

// override std defaults with jr2 defaults
bool register_json_rpc2( algo_gate_t *gate )
{
Expand Down
24 changes: 11 additions & 13 deletions algo/blake/blake-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t hash[4*4] __attribute__ ((aligned (32)));
uint32_t hash[8*4] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
// uint32_t HTarget = ptarget[7];
uint32_t _ALIGN(32) endiandata[20];
uint32_t _ALIGN(32) edata[20];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
Expand All @@ -47,18 +47,17 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
// HTarget = 0x7f;

// we need big endian data...
swab32_array( endiandata, pdata, 20 );
swab32_array( edata, pdata, 20 );

mm_interleave_4x32( vdata, endiandata, endiandata, endiandata,
endiandata, 640 );
mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );

uint32_t *noncep = vdata + 76; // 19*4
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep, n );
be32enc( noncep +2, n+1 );
be32enc( noncep +4, n+2 );
be32enc( noncep +6, n+3 );
be32enc( noncep +1, n+1 );
be32enc( noncep +2, n+2 );
be32enc( noncep +3, n+3 );

blakehash_4way( hash, vdata );

Expand All @@ -74,7 +73,7 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
}
if ( (hash+8)[7] == 0 )
{
if ( fulltest( hash, ptarget ) )
if ( fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
Expand All @@ -83,7 +82,7 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
}
if ( (hash+16)[7] == 0 )
{
if ( fulltest( hash, ptarget ) )
if ( fulltest( hash+8, ptarget ) )
{
found[2] = true;
num_found++;
Expand All @@ -92,15 +91,14 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
}
if ( (hash+24)[7] == 0 )
{
if ( fulltest( hash, ptarget ) )
if ( fulltest( hash+8, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
}
}

n += 4;
n += 4;
*hashes_done = n - first_nonce + 1;

} while ( (num_found == 0) && (n < max_nonce)
Expand Down
1 change: 0 additions & 1 deletion algo/blake/blake-gate.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ bool register_blake_algo( algo_gate_t* gate )
gate->optimizations = FOUR_WAY_OPT;
gate->scanhash = (void*)&scanhash_blake_4way;
gate->hash = (void*)&blakehash_4way;
four_way_not_tested();
#else
gate->scanhash = (void*)&scanhash_blake;
gate->hash = (void*)&blakehash;
Expand Down
63 changes: 33 additions & 30 deletions algo/blake/blake-hash-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -524,18 +524,18 @@ do { \
V5 = H5; \
V6 = H6; \
V7 = H7; \
V8 = _mm_xor_si128( s0, _mmset_epi32( CS0, CS0, CS0, CS0 ) ); \
V9 = _mm_xor_si128( s1, _mmset_epi32( CS1, CS1, CS1, CS1 ) ); \
VA = _mm_xor_si128( s2, _mmset_epi32( CS2, CS2, CS2, CS2 ) ); \
VB = _mm_xor_si128( s3, _mmset_epi32( CS3, CS3, CS3, CS3 ) ); \
VC = _mm_xor_si128( _mmset_epi32( T0, T0, T0, T0 ), \
_mmset_epi32( CS4, CS4, CS4, CS4 ) ); \
VD = _mm_xor_si128( _mmset_epi32( T0, T0, T0, T0 ), \
_mmset_epi32( CS5, CS5, CS5, CS5 ) ); \
VE = _mm_xor_si128( _mmset_epi32( T1, T1, T1, T1 ) \
, _mmset_epi32( CS6, CS6, CS6, CS6 ) ); \
VF = _mm_xor_si128( _mmset_epi32( T1, T1, T1, T1 ), \
_mmset_epi32( CS7, CS7, CS7, CS7 ) ); \
V8 = _mm_xor_si128( S0, _mm_set_epi32( CS0, CS0, CS0, CS0 ) ); \
V9 = _mm_xor_si128( S1, _mm_set_epi32( CS1, CS1, CS1, CS1 ) ); \
VA = _mm_xor_si128( S2, _mm_set_epi32( CS2, CS2, CS2, CS2 ) ); \
VB = _mm_xor_si128( S3, _mm_set_epi32( CS3, CS3, CS3, CS3 ) ); \
VC = _mm_xor_si128( _mm_set_epi32( T0, T0, T0, T0 ), \
_mm_set_epi32( CS4, CS4, CS4, CS4 ) ); \
VD = _mm_xor_si128( _mm_set_epi32( T0, T0, T0, T0 ), \
_mm_set_epi32( CS5, CS5, CS5, CS5 ) ); \
VE = _mm_xor_si128( _mm_set_epi32( T1, T1, T1, T1 ) \
, _mm_set_epi32( CS6, CS6, CS6, CS6 ) ); \
VF = _mm_xor_si128( _mm_set_epi32( T1, T1, T1, T1 ), \
_mm_set_epi32( CS7, CS7, CS7, CS7 ) ); \
M[0x0] = mm_byteswap_32( *(buf + 0) ); \
M[0x1] = mm_byteswap_32( *(buf + 1) ); \
M[0x2] = mm_byteswap_32( *(buf + 2) ); \
Expand Down Expand Up @@ -710,18 +710,18 @@ do { \
V5 = H5; \
V6 = H6; \
V7 = H7; \
V8 = _mm256_xor_si256( S0, _mm256_set_epi64( CB0, CB0, CB0, CB0 ) ); \
V9 = _mm256_xor_si256( S1, _mm256_set_epi64( CB1, CB1, CB1, CB1 ) ); \
VA = _mm256_xor_si256( S2, _mm256_set_epi64( CB2, CB2, CB2, CB2 ) ); \
VB = _mm256_xor_si256( S3, _mm256_set_epi64( CB3, CB3, CB3, CB3 ) ); \
VC = _mm256_xor_si128( _mm256_set_epi64( T0, T0, T0, T0 ), \
_mm256_set_epi64( CB4, CB4, CB4, CB4 ) ); \
VD = _mm256_xor_si256( _mm256_set_epi64( T0, T0, T0, T0 ), \
_mm256_set_epi64( CB5, CB5, CB5, CB5 ) ); \
VE = _mm256_xor_si256( _mm256_set_epi64( T1, T1, T1, T1 ), \
_mm256_set256_epi64( CB6, CB6, CB6, CB6 ) ); \
VF = _mm256_xor_si256( _mm256_set_epi64( T1, T1, T1, T1 ), \
_mm256_set256_epi64( CB7, CB7, CB7, CB7 ) ); \
V8 = _mm256_xor_si256( S0, _mm256_set_epi64x( CB0, CB0, CB0, CB0 ) ); \
V9 = _mm256_xor_si256( S1, _mm256_set_epi64x( CB1, CB1, CB1, CB1 ) ); \
VA = _mm256_xor_si256( S2, _mm256_set_epi64x( CB2, CB2, CB2, CB2 ) ); \
VB = _mm256_xor_si256( S3, _mm256_set_epi64x( CB3, CB3, CB3, CB3 ) ); \
VC = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
_mm256_set_epi64x( CB4, CB4, CB4, CB4 ) ); \
VD = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
_mm256_set_epi64x( CB5, CB5, CB5, CB5 ) ); \
VE = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
_mm256_set_epi64x( CB6, CB6, CB6, CB6 ) ); \
VF = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
_mm256_set_epi64x( CB7, CB7, CB7, CB7 ) ); \
M[0x0] = mm256_byteswap_64( *(buf+0) ); \
M[0x1] = mm256_byteswap_64( *(buf+1) ); \
M[0x2] = mm256_byteswap_64( *(buf+2) ); \
Expand Down Expand Up @@ -867,7 +867,6 @@ blake32_4way( blake_4way_small_context *sc, const void *data, size_t len )

buf = sc->buf;
ptr = sc->ptr;

if ( len < buf_size - ptr )
{
memcpy_128( buf + (ptr>>2), vdata, len>>2 );
Expand Down Expand Up @@ -915,9 +914,10 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,

ptr = sc->ptr;
bit_len = ((unsigned)ptr << 3);
unsigned z = 0x80 >> n;
unsigned zz = ((ub & -z) | z) & 0xFF;
u.buf[ptr>>2] = _mm_set_epi32( zz, zz, zz, zz );
// unsigned z = 0x80 >> n;
// unsigned zz = ((ub & -z) | z) & 0xFF;
// u.buf[ptr>>2] = _mm_set_epi32( zz, zz, zz, zz );
u.buf[ptr>>2] = _mm_set1_epi32( 0x80 );
tl = sc->T0 + bit_len;
th = sc->T1;

Expand All @@ -934,9 +934,11 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
else
sc->T0 -= 512 - bit_len;

if ( ptr <= 48 )
// if ( ptr <= 48 )
if ( ptr <= 52 )
{
memset_zero_128( u.buf + (ptr>>2) + 1, (48 - ptr) >> 2 );
memset_zero_128( u.buf + (ptr>>2) + 1, (52 - ptr) >> 2 );
// memset_zero_128( u.buf + (ptr>>2) + 1, (48 - ptr) >> 2 );
if (out_size_w32 == 8)
u.buf[52>>2] = _mm_or_si128( u.buf[52>>2],
_mm_set_epi32( 0x010000000, 0x01000000,
Expand All @@ -962,6 +964,7 @@ blake32_4way_close( blake_4way_small_context *sc, unsigned ub, unsigned n,
out = (__m128i*)dst;
for ( k = 0; k < out_size_w32; k++ )
out[k] = mm_byteswap_32( sc->H[k] );
// out[k] = sc->H[k];
}

#if defined (__AVX2__)
Expand Down
Loading

0 comments on commit 7a13899

Please sign in to comment.