Skip to content

Commit

Permalink
v3.8.3
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Feb 23, 2018
1 parent 502ed0b commit 3c02653
Show file tree
Hide file tree
Showing 70 changed files with 3,876 additions and 1,853 deletions.
3 changes: 3 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ cpuminer_SOURCES = \
algo/blake/sph_blake2b.c \
algo/blake/blake2b.c \
algo/blake/sph-blake2s.c \
algo/blake/blake2s-hash-4way.c \
algo/blake/blake2s.c \
algo/blake/blake2s-gate.c \
algo/blake/blake2s-4way.c \
algo/blake/blakecoin-gate.c \
algo/blake/mod_blakecoin.c \
algo/blake/blakecoin.c \
Expand Down
10 changes: 9 additions & 1 deletion RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ cd /c/path/to/cpuminer-opt
Run build.sh to build on Windows or execute the following commands.

./autogen.sh
CFLAGS="-O3 -march=native -Wall" CXXFLAGS="$CFLAGS -std=gnu++11 -fpermissive" ./configure --with-curl
CFLAGS="-O3 -march=native -Wall" ./configure --with-curl
make

Start mining
Expand All @@ -159,6 +159,14 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log
----------

v3.8.3

More restoration of lost lyra2 hash.
8 way AVX2 and 4way AVX optimization for blakecoin, vanilla & blake2s.
8 way AVX2 for lbry.
Scaled hashrate for API output.
A couple of GBT fixes.

v3.8.2.1

Fixed low difficulty rejects with allium.
Expand Down
122 changes: 84 additions & 38 deletions algo/blake/blake-4way.c
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
#include "blake-gate.h"

#if defined (BLAKE_4WAY)

#include "blake-hash-4way.h"
#include <string.h>
#include <stdint.h>
#include <memory.h>

blake256r14_4way_context blake_ctx;
#if defined (BLAKE_4WAY)

blake256r14_4way_context blake_4w_ctx;

void blakehash_4way(void *state, const void *input)
{
uint32_t vhash[8*4] __attribute__ ((aligned (64)));
blake256r14_4way_context ctx;
memcpy( &ctx, &blake_ctx, sizeof ctx );
memcpy( &ctx, &blake_4w_ctx, sizeof ctx );
blake256r14_4way( &ctx, input + (64<<2), 16 );
blake256r14_4way_close( &ctx, vhash );
mm_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
Expand All @@ -31,58 +30,31 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t _ALIGN(32) edata[20];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;

if (opt_benchmark)
HTarget = 0x7f;

// we need big endian data...
swab32_array( edata, pdata, 20 );

mm_interleave_4x32( vdata, edata, edata, edata, edata, 640 );

blake256r14_4way_init( &blake_ctx );
blake256r14_4way( &blake_ctx, vdata, 64 );
blake256r14_4way_init( &blake_4w_ctx );
blake256r14_4way( &blake_4w_ctx, vdata, 64 );

uint32_t *noncep = vdata + 76; // 19*4
do {
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep, n );
be32enc( noncep +1, n+1 );
be32enc( noncep +2, n+2 );
be32enc( noncep +3, n+3 );

blakehash_4way( hash, vdata );

if ( hash[7] <= HTarget && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
pdata[19] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= HTarget && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= HTarget && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= HTarget && fulltest( hash+24, ptarget ) )
for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
}
n += 4;

Expand All @@ -95,3 +67,77 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,

#endif

#if defined(BLAKE_8WAY)

blake256r14_8way_context blake_8w_ctx;

void blakehash_8way( void *state, const void *input )
{
uint32_t vhash[8*8] __attribute__ ((aligned (64)));
blake256r14_8way_context ctx;
memcpy( &ctx, &blake_8w_ctx, sizeof ctx );
blake256r14_8way( &ctx, input + (64<<3), 16 );
blake256r14_8way_close( &ctx, vhash );
mm256_deinterleave_8x32( state, state+ 32, state+ 64, state+ 96,
state+128, state+160, state+192, state+224,
vhash, 256 );
}

int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
uint32_t hash[8*8] __attribute__ ((aligned (32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t HTarget = ptarget[7];
uint32_t _ALIGN(32) edata[20];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
int num_found = 0;

if (opt_benchmark)
HTarget = 0x7f;

// we need big endian data...
swab32_array( edata, pdata, 20 );

mm256_interleave_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );

blake256r14_8way_init( &blake_8w_ctx );
blake256r14_8way( &blake_8w_ctx, vdata, 64 );

uint32_t *noncep = vdata + 152; // 19*8
do {
be32enc( noncep, n );
be32enc( noncep +1, n+1 );
be32enc( noncep +2, n+2 );
be32enc( noncep +3, n+3 );
be32enc( noncep +4, n+4 );
be32enc( noncep +5, n+5 );
be32enc( noncep +6, n+6 );
be32enc( noncep +7, n+7 );
pdata[19] = n;

blakehash_8way( hash, vdata );

for ( int i = 0; i < 8; i++ )
if ( (hash+i)[7] <= HTarget && fulltest( hash+i, ptarget ) )
{
found[i] = true;
num_found++;
nonces[i] = n+i;
work_set_target_ratio( work, hash+1 );
}
n += 8;

} while ( (num_found == 0) && (n < max_nonce)
&& !work_restart[thr_id].restart );

*hashes_done = n - first_nonce + 1;
return num_found;
}

#endif
Loading

0 comments on commit 3c02653

Please sign in to comment.