Skip to content

Commit

Permalink
v3.19.2
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Dec 30, 2021
1 parent 7d2ef79 commit 0e3945d
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 48 deletions.
13 changes: 13 additions & 0 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,19 @@ If not what makes it happen or not happen?
Change Log
----------

v3.19.2

Fixed log displaying incorrect memory usage for scrypt, broken in v3.19.1.

Reduce log noise when replies to submitted shares are lost due to stratum errors.

Fugue prehash optimization for X16r family AVX2 & AVX512.

Small speed improvement for Hamsi AVX2 & AVX512.

Win: With CPU groups enabled the number of CPUs displayed in the ASCII art
affinity map is the number of CPUs in a CPU group, was number of CPUs up to 64.

v3.19.1

Changes to Windows binaries package:
Expand Down
11 changes: 11 additions & 0 deletions algo/fugue/fugue-aesni.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,23 @@ typedef struct

} hashState_fugue __attribute__ ((aligned (64)));


// These functions are deprecated, use the lower case macro aliases that use
// the standard interface. This will be cleaned up at a later date.
HashReturn fugue512_Init(hashState_fugue *state, int hashbitlen);

HashReturn fugue512_Update(hashState_fugue *state, const void *data, DataLength databitlen);

HashReturn fugue512_Final(hashState_fugue *state, void *hashval);

#define fugue512_init( state ) \
fugue512_Init( state, 512 )
#define fugue512_update( state, data, len ) \
fugue512_Update( state, data, (len)<<3 )
#define fugue512_final \
fugue512_Final


HashReturn fugue512_full(hashState_fugue *hs, void *hashval, const void *data, DataLength databitlen);

#endif // AES
Expand Down
37 changes: 29 additions & 8 deletions algo/hamsi/hamsi-hash-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -550,16 +550,38 @@ static const sph_u32 T512[64][16] = {

// Hamsi 8 way AVX512

// Tested on i9-9940x movepi64_mask is slow, cmple_epi64_mask with zero
// produces the same result but is faster.
#define INPUT_BIG8 \
do { \
__m512i db = _mm512_ror_epi64( *buf, 1 ); \
const uint64_t *tp = (const uint64_t*)T512; \
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = m512_zero; \
for ( int u = 0; u < 64; u++ ) \
{ \
__mmask8 dm = _mm512_cmplt_epi64_mask( db, m512_zero ); \
m0 = _mm512_mask_xor_epi64( m0, dm, m0, m512_const1_64( tp[0] ) ); \
m1 = _mm512_mask_xor_epi64( m1, dm, m1, m512_const1_64( tp[1] ) ); \
m2 = _mm512_mask_xor_epi64( m2, dm, m2, m512_const1_64( tp[2] ) ); \
m3 = _mm512_mask_xor_epi64( m3, dm, m3, m512_const1_64( tp[3] ) ); \
m4 = _mm512_mask_xor_epi64( m4, dm, m4, m512_const1_64( tp[4] ) ); \
m5 = _mm512_mask_xor_epi64( m5, dm, m5, m512_const1_64( tp[5] ) ); \
m6 = _mm512_mask_xor_epi64( m6, dm, m6, m512_const1_64( tp[6] ) ); \
m7 = _mm512_mask_xor_epi64( m7, dm, m7, m512_const1_64( tp[7] ) ); \
db = _mm512_ror_epi64( db, 1 ); \
tp += 8; \
} \
} while (0)

/*
#define INPUT_BIG8 \
do { \
__m512i db = *buf; \
const uint64_t *tp = (uint64_t*)&T512[0][0]; \
const uint64_t *tp = (const uint64_t*)T512; \
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = m512_zero; \
for ( int u = 0; u < 64; u++ ) \
{ \
__m512i dm = _mm512_and_si512( db, m512_one_64 ) ; \
dm = mm512_negate_32( _mm512_or_si512( dm, \
_mm512_slli_epi64( dm, 32 ) ) ); \
__m512i dm = mm512_negate_64( _mm512_and_si512( db, m512_one_64 ) ); \
m0 = mm512_xorand( m0, dm, m512_const1_64( tp[0] ) ); \
m1 = mm512_xorand( m1, dm, m512_const1_64( tp[1] ) ); \
m2 = mm512_xorand( m2, dm, m512_const1_64( tp[2] ) ); \
Expand All @@ -572,6 +594,7 @@ do { \
db = _mm512_srli_epi64( db, 1 ); \
} \
} while (0)
*/

#define SBOX8( a, b, c, d ) \
do { \
Expand Down Expand Up @@ -888,13 +911,11 @@ void hamsi512_8way_close( hamsi_8way_big_context *sc, void *dst )
#define INPUT_BIG \
do { \
__m256i db = *buf; \
const uint64_t *tp = (uint64_t*)&T512[0][0]; \
const uint64_t *tp = (const uint64_t*)T512; \
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = m256_zero; \
for ( int u = 0; u < 64; u++ ) \
{ \
__m256i dm = _mm256_and_si256( db, m256_one_64 ) ; \
dm = mm256_negate_32( _mm256_or_si256( dm, \
_mm256_slli_epi64( dm, 32 ) ) ); \
__m256i dm = mm256_negate_64( _mm256_and_si256( db, m256_one_64 ) ); \
m0 = _mm256_xor_si256( m0, _mm256_and_si256( dm, \
m256_const1_64( tp[0] ) ) ); \
m1 = _mm256_xor_si256( m1, _mm256_and_si256( dm, \
Expand Down
1 change: 0 additions & 1 deletion algo/scrypt/scrypt.c
Original file line number Diff line number Diff line change
Expand Up @@ -1544,7 +1544,6 @@ bool register_scrypt_algo( algo_gate_t* gate )

format_number_si( &t_size, t_units );
format_number_si( &d_size, d_units );

applog( LOG_INFO,"Throughput %d/thr, Buffer %.0f %siB/thr, Total %.0f %siB\n",
SCRYPT_THROUGHPUT, t_size, t_units, d_size, d_units );

Expand Down
105 changes: 82 additions & 23 deletions algo/x16/x16r-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,14 @@ void x16r_8way_prehash( void *vdata, void *pdata )
case HAMSI:
mm512_bswap32_intrlv80_8x64( vdata, pdata );
hamsi512_8way_init( &x16r_ctx.hamsi );
hamsi512_8way_update( &x16r_ctx.hamsi, vdata, 64 );
hamsi512_8way_update( &x16r_ctx.hamsi, vdata, 72 );
break;
case FUGUE:
mm128_bswap32_80( edata, pdata );
fugue512_init( &x16r_ctx.fugue );
fugue512_update( &x16r_ctx.fugue, edata, 76 );
intrlv_8x64( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 640 );
break;
case SHABAL:
mm256_bswap32_intrlv80_8x32( vdata2, pdata );
Expand Down Expand Up @@ -306,7 +313,7 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
break;
case HAMSI:
if ( i == 0 )
hamsi512_8way_update( &ctx.hamsi, input + (64<<3), 16 );
hamsi512_8way_update( &ctx.hamsi, input + (72<<3), 8 );
else
{
intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
Expand All @@ -319,14 +326,43 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
hash7, vhash );
break;
case FUGUE:
fugue512_full( &ctx.fugue, hash0, in0, size );
fugue512_full( &ctx.fugue, hash1, in1, size );
fugue512_full( &ctx.fugue, hash2, in2, size );
fugue512_full( &ctx.fugue, hash3, in3, size );
fugue512_full( &ctx.fugue, hash4, in4, size );
fugue512_full( &ctx.fugue, hash5, in5, size );
fugue512_full( &ctx.fugue, hash6, in6, size );
fugue512_full( &ctx.fugue, hash7, in7, size );
if ( i == 0 )
{
fugue512_update( &ctx.fugue, in0 + 76, 4 );
fugue512_final( &ctx.fugue, hash0 );
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in1 + 76, 4 );
fugue512_final( &ctx.fugue, hash1 );
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in2 + 76, 4 );
fugue512_final( &ctx.fugue, hash2 );
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in3 + 76, 4 );
fugue512_final( &ctx.fugue, hash3 );
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in4 + 76, 4 );
fugue512_final( &ctx.fugue, hash4 );
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in5 + 76, 4 );
fugue512_final( &ctx.fugue, hash5 );
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in6 + 76, 4 );
fugue512_final( &ctx.fugue, hash6 );
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in7 + 76, 4 );
fugue512_final( &ctx.fugue, hash7 );
}
else
{
fugue512_full( &ctx.fugue, hash0, in0, size );
fugue512_full( &ctx.fugue, hash1, in1, size );
fugue512_full( &ctx.fugue, hash2, in2, size );
fugue512_full( &ctx.fugue, hash3, in3, size );
fugue512_full( &ctx.fugue, hash4, in4, size );
fugue512_full( &ctx.fugue, hash5, in5, size );
fugue512_full( &ctx.fugue, hash6, in6, size );
fugue512_full( &ctx.fugue, hash7, in7, size );
}
break;
case SHABAL:
intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
Expand All @@ -347,25 +383,25 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
{
sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash0 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash1 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
sph_whirlpool( &ctx.whirlpool, in2 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash2 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
sph_whirlpool( &ctx.whirlpool, in3 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash3 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
sph_whirlpool( &ctx.whirlpool, in4 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash4 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
sph_whirlpool( &ctx.whirlpool, in5 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash5 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
sph_whirlpool( &ctx.whirlpool, in6 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash6 );
memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
sph_whirlpool( &ctx.whirlpool, in7 + 64, 16 );
sph_whirlpool_close( &ctx.whirlpool, hash7 );
}
Expand Down Expand Up @@ -532,7 +568,13 @@ void x16r_4way_prehash( void *vdata, void *pdata )
case HAMSI:
mm256_bswap32_intrlv80_4x64( vdata, pdata );
hamsi512_4way_init( &x16r_ctx.hamsi );
hamsi512_4way_update( &x16r_ctx.hamsi, vdata, 64 );
hamsi512_4way_update( &x16r_ctx.hamsi, vdata, 72 );
break;
case FUGUE:
mm128_bswap32_80( edata, pdata );
fugue512_init( &x16r_ctx.fugue );
fugue512_update( &x16r_ctx.fugue, edata, 76 );
intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
break;
case SHABAL:
mm128_bswap32_intrlv80_4x32( vdata2, pdata );
Expand Down Expand Up @@ -734,7 +776,7 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
break;
case HAMSI:
if ( i == 0 )
hamsi512_4way_update( &ctx.hamsi, input + (64<<2), 16 );
hamsi512_4way_update( &ctx.hamsi, input + (72<<2), 8 );
else
{
intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
Expand All @@ -745,10 +787,27 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
break;
case FUGUE:
fugue512_full( &ctx.fugue, hash0, in0, size );
fugue512_full( &ctx.fugue, hash1, in1, size );
fugue512_full( &ctx.fugue, hash2, in2, size );
fugue512_full( &ctx.fugue, hash3, in3, size );
if ( i == 0 )
{
fugue512_update( &ctx.fugue, in0 + 76, 4 );
fugue512_final( &ctx.fugue, hash0 );
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in1 + 76, 4 );
fugue512_final( &ctx.fugue, hash1 );
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in2 + 76, 4 );
fugue512_final( &ctx.fugue, hash2 );
memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
fugue512_update( &ctx.fugue, in3 + 76, 4 );
fugue512_final( &ctx.fugue, hash3 );
}
else
{
fugue512_full( &ctx.fugue, hash0, in0, size );
fugue512_full( &ctx.fugue, hash1, in1, size );
fugue512_full( &ctx.fugue, hash2, in2, size );
fugue512_full( &ctx.fugue, hash3, in3, size );
}
break;
case SHABAL:
intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
Expand Down
20 changes: 10 additions & 10 deletions configure
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.19.1.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.19.2.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
Expand Down Expand Up @@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.19.1'
PACKAGE_STRING='cpuminer-opt 3.19.1'
PACKAGE_VERSION='3.19.2'
PACKAGE_STRING='cpuminer-opt 3.19.2'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''

Expand Down Expand Up @@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.19.1 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.19.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
Expand Down Expand Up @@ -1404,7 +1404,7 @@ fi

if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.19.1:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.19.2:";;
esac
cat <<\_ACEOF
Expand Down Expand Up @@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.19.1
cpuminer-opt configure 3.19.2
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
Expand Down Expand Up @@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.19.1, which was
It was created by cpuminer-opt $as_me 3.19.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
Expand Down Expand Up @@ -2993,7 +2993,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.19.1'
VERSION='3.19.2'
cat >>confdefs.h <<_ACEOF
Expand Down Expand Up @@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.19.1, which was
This file was extended by cpuminer-opt $as_me 3.19.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
Expand Down Expand Up @@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.19.1
cpuminer-opt config.status 3.19.2
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.19.1])
AC_INIT([cpuminer-opt], [3.19.2])

AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
Expand Down
Loading

0 comments on commit 0e3945d

Please sign in to comment.