Skip to content

Commit

Permalink
v3.21.5
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Mar 15, 2023
1 parent 7a91c41 commit cae1ce2
Show file tree
Hide file tree
Showing 13 changed files with 143 additions and 138 deletions.
12 changes: 12 additions & 0 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,18 @@ If not what makes it happen or not happen?
Change Log
----------

v3.21.5

All issues with v3.21.3 & v3.21.4 should be resolved.
Changes since v3.21.2:
#392 #379 #389 Fixed misaligned address segfault solo mining.
#392 Fixed stats for myr-gr algo, and a few others, for CPUs without AVX2.
#392 Fixed conditional mining.
#392 Fixed cpu affinity on Ryzen CPUs using Windows binaries,
Windows binaries no longer support CPU groups,
Windows binaries support CPUs with up to 64 threads.
Small optimizations to serialized vectoring.

v3.21.4

Reapply selected changes from v3.21.3.
Expand Down
16 changes: 8 additions & 8 deletions algo/blake/sph_blake2b.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,16 @@
const uint8_t *sigmaR = sigma[R]; \
BLAKE2B_G( V[0], V[2], V[4], V[6], 0, 1, 2, 3 ); \
BLAKE2B_G( V[1], V[3], V[5], V[7], 4, 5, 6, 7 ); \
V2 = mm128_alignr_64( V[3], V[2] ); \
V3 = mm128_alignr_64( V[2], V[3] ); \
V6 = mm128_alignr_64( V[6], V[7] ); \
V7 = mm128_alignr_64( V[7], V[6] ); \
V2 = mm128_alignr_64( V[3], V[2], 1 ); \
V3 = mm128_alignr_64( V[2], V[3], 1 ); \
V6 = mm128_alignr_64( V[6], V[7], 1 ); \
V7 = mm128_alignr_64( V[7], V[6], 1 ); \
BLAKE2B_G( V[0], V2, V[5], V6, 8, 9, 10, 11 ); \
BLAKE2B_G( V[1], V3, V[4], V7, 12, 13, 14, 15 ); \
V[2] = mm128_alignr_64( V2, V3 ); \
V[3] = mm128_alignr_64( V3, V2 ); \
V[6] = mm128_alignr_64( V7, V6 ); \
V[7] = mm128_alignr_64( V6, V7 ); \
V[2] = mm128_alignr_64( V2, V3, 1 ); \
V[3] = mm128_alignr_64( V3, V2, 1 ); \
V[6] = mm128_alignr_64( V7, V6, 1 ); \
V[7] = mm128_alignr_64( V6, V7, 1 ); \
}

#else
Expand Down
6 changes: 3 additions & 3 deletions algo/groestl/myr-groestl.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@ int scanhash_myriad( struct work *work, uint32_t max_nonce,
be32enc(&endiandata[19], nonce);
myriad_hash(hash, endiandata);

if (hash[7] <= Htarg && fulltest(hash, ptarget))
if (hash[7] <= Htarg )
if ( fulltest(hash, ptarget) && !opt_benchmark )
{
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
return 1;
submit_solution( work, hash, mythr );
}
nonce++;

Expand Down
22 changes: 15 additions & 7 deletions algo/luffa/luffa_for_sse2.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,34 @@
*/

#include <string.h>
#include <emmintrin.h>
#include "simd-utils.h"
#include "luffa_for_sse2.h"

#if defined(__SSE4_1__)
#if defined(__AVX512VL__)

#define MULT2( a0, a1 ) \
{ \
__m128i b = _mm_xor_si128( a0, _mm_maskz_shuffle_epi32( 0xb, a1, 0x10 ) ); \
a0 = _mm_alignr_epi32( a1, b, 1 ); \
a1 = _mm_alignr_epi32( b, a1, 1 ); \
}

#elif defined(__SSE4_1__)

#define MULT2( a0, a1 ) do \
{ \
__m128i b = _mm_xor_si128( a0, _mm_shuffle_epi32( mm128_mask_32( a1, 0xe ), 0x10 ) ); \
a0 = _mm_or_si128( _mm_srli_si128( b, 4 ), _mm_slli_si128( a1, 12 ) ); \
a1 = _mm_or_si128( _mm_srli_si128( a1, 4 ), _mm_slli_si128( b, 12 ) ); \
__m128i b = _mm_xor_si128( a0, _mm_shuffle_epi32( mm128_mask_32( a1, 0xe ), 0x10 ) ); \
a0 = _mm_alignr_epi8( a1, b, 4 ); \
a1 = _mm_alignr_epi8( b, a1, 4 ); \
} while(0)

#else

#define MULT2( a0, a1 ) do \
{ \
__m128i b = _mm_xor_si128( a0, _mm_shuffle_epi32( _mm_and_si128( a1, MASK ), 16 ) ); \
__m128i b = _mm_xor_si128( a0, _mm_shuffle_epi32( _mm_and_si128( a1, MASK ), 0x10 ) ); \
a0 = _mm_or_si128( _mm_srli_si128( b, 4 ), _mm_slli_si128( a1, 12 ) ); \
a1 = _mm_or_si128( _mm_srli_si128( a1, 4 ), _mm_slli_si128( b, 12 ) ); \
a1 = _mm_or_si128( _mm_srli_si128( a1, 4 ), _mm_slli_si128( b, 12 ) ); \
} while(0)

#endif
Expand Down
19 changes: 15 additions & 4 deletions algo/lyra2/sponge.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,14 +146,25 @@ static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
b = mm128_ror_64( _mm_xor_si128( b, c ), 63 );

#define LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
{ \
__m128i t; \
G_2X64( s0, s2, s4, s6 ); \
G_2X64( s1, s3, s5, s7 ); \
mm128_vrol256_64( s6, s7 ); \
mm128_vror256_64( s2, s3 ); \
t = mm128_alignr_64( s7, s6, 1 ); \
s6 = mm128_alignr_64( s6, s7, 1 ); \
s7 = t; \
t = mm128_alignr_64( s2, s3, 1 ); \
s2 = mm128_alignr_64( s3, s2, 1 ); \
s3 = t; \
G_2X64( s0, s2, s5, s6 ); \
G_2X64( s1, s3, s4, s7 ); \
mm128_vror256_64( s6, s7 ); \
mm128_vrol256_64( s2, s3 );
t = mm128_alignr_64( s6, s7, 1 ); \
s6 = mm128_alignr_64( s7, s6, 1 ); \
s7 = t; \
t = mm128_alignr_64( s3, s2, 1 ); \
s2 = mm128_alignr_64( s2, s3, 1 ); \
s3 = t; \
}

#define LYRA_12_ROUNDS_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
LYRA_ROUND_AVX(s0,s1,s2,s3,s4,s5,s6,s7) \
Expand Down
13 changes: 7 additions & 6 deletions algo/skein/skein.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,19 @@ int scanhash_skein( struct work *work, uint32_t max_nonce,
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
int thr_id = mythr->id;

swab32_array( endiandata, pdata, 20 );

do {
be32enc(&endiandata[19], n);
skeinhash(hash64, endiandata);
if (hash64[7] < Htarg && fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return true;
}
if (hash64[7] <= Htarg )
if ( fulltest(hash64, ptarget) && !opt_benchmark )
{
pdata[19] = n;
submit_solution( work, hash64, mythr );
}
n++;

} while (n < max_nonce && !work_restart[thr_id].restart);
Expand Down
20 changes: 10 additions & 10 deletions algo/skein/skein2.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,31 +34,31 @@ void skein2hash(void *output, const void *input)
sph_skein512_close(&ctx_skein, hash);

memcpy(output, hash, 32);

}

int scanhash_skein2( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t hash64[8] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__ ((aligned (64)));
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
int thr_id = mythr->id; // thr_id arg is deprecated
int thr_id = mythr->id;

swab32_array( endiandata, pdata, 20 );
swab32_array( endiandata, pdata, 20 );

do {
be32enc(&endiandata[19], n);
skein2hash(hash64, endiandata);
if (hash64[7] < Htarg && fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return true;
}
if (hash64[7] <= Htarg )
if ( fulltest(hash64, ptarget) && !opt_benchmark )
{
pdata[19] = n;
submit_solution( work, hash64, mythr );
}
n++;

} while (n < max_nonce && !work_restart[thr_id].restart);
Expand Down
20 changes: 10 additions & 10 deletions configure
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 3.21.4.
# Generated by GNU Autoconf 2.71 for cpuminer-opt 3.21.5.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
Expand Down Expand Up @@ -608,8 +608,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.21.4'
PACKAGE_STRING='cpuminer-opt 3.21.4'
PACKAGE_VERSION='3.21.5'
PACKAGE_STRING='cpuminer-opt 3.21.5'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''

Expand Down Expand Up @@ -1360,7 +1360,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.21.4 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.21.5 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
Expand Down Expand Up @@ -1432,7 +1432,7 @@ fi

if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.21.4:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.21.5:";;
esac
cat <<\_ACEOF
Expand Down Expand Up @@ -1538,7 +1538,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.21.4
cpuminer-opt configure 3.21.5
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
Expand Down Expand Up @@ -1985,7 +1985,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.21.4, which was
It was created by cpuminer-opt $as_me 3.21.5, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
Expand Down Expand Up @@ -3593,7 +3593,7 @@ fi

# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.21.4'
VERSION='3.21.5'


printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
Expand Down Expand Up @@ -7508,7 +7508,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.21.4, which was
This file was extended by cpuminer-opt $as_me 3.21.5, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
Expand Down Expand Up @@ -7576,7 +7576,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
cpuminer-opt config.status 3.21.4
cpuminer-opt config.status 3.21.5
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.21.4])
AC_INIT([cpuminer-opt], [3.21.5])

AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
Expand Down
49 changes: 18 additions & 31 deletions cpu-miner.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
#include <curl/curl.h>
#include <jansson.h>
#include <openssl/sha.h>
#include <mm_malloc.h>
//#include <mm_malloc.h>
#include "sysinfos.c"
#include "algo/sha/sha256d.h"

Expand Down Expand Up @@ -900,21 +900,11 @@ static bool gbt_work_decode( const json_t *val, struct work *work )
goto out;
}

// See git issue https://github.com/JayDDee/cpuminer-opt/issues/379
#if defined(__AVX2__)
if ( opt_debug )
{
if ( (uint64_t)target % 32 )
applog( LOG_ERR, "Misaligned target %p", target );
if ( (uint64_t)(work->target) % 32 )
applog( LOG_ERR, "Misaligned work->target %p", work->target );
}
#endif

for ( i = 0; i < 8; i++ )
work->target[7 - i] = be32dec( target + i );
// reverse the bytes in target
casti_m128i( work->target, 0 ) = mm128_bswap_128( casti_m128i( target, 1 ) );
casti_m128i( work->target, 1 ) = mm128_bswap_128( casti_m128i( target, 0 ) );
net_diff = work->targetdiff = hash_to_diff( work->target );

tmp = json_object_get( val, "workid" );
if ( tmp )
{
Expand Down Expand Up @@ -1724,20 +1714,19 @@ static void workio_cmd_free(struct workio_cmd *wc)

static bool workio_get_work( struct workio_cmd *wc, CURL *curl )
{
struct work *ret_work;
struct work *work_heap;
int failures = 0;

ret_work = (struct work*) _mm_malloc( sizeof(*ret_work), 32 );
if ( !ret_work ) return false;
memset( ret_work, 0, sizeof(*ret_work) );
work_heap = calloc( 1, sizeof(struct work) );
if ( !work_heap ) return false;

/* obtain new work from bitcoin via JSON-RPC */
while ( !get_upstream_work( curl, ret_work ) )
while ( !get_upstream_work( curl, work_heap ) )
{
if ( unlikely( ( opt_retries >= 0 ) && ( ++failures > opt_retries ) ) )
{
applog( LOG_ERR, "json_rpc_call failed, terminating workio thread" );
free( ret_work );
free( work_heap );
return false;
}

Expand All @@ -1748,8 +1737,8 @@ static bool workio_get_work( struct workio_cmd *wc, CURL *curl )
}

/* send work to requesting thread */
if ( !tq_push(wc->thr->q, ret_work ) )
free( ret_work );
if ( !tq_push(wc->thr->q, work_heap ) )
free( work_heap );

return true;
}
Expand Down Expand Up @@ -1825,7 +1814,7 @@ static void *workio_thread(void *userdata)
static bool get_work(struct thr_info *thr, struct work *work)
{
struct workio_cmd *wc;
struct work *work_heap;
struct work *work_heap;

if unlikely( opt_benchmark )
{
Expand All @@ -1850,17 +1839,16 @@ static bool get_work(struct thr_info *thr, struct work *work)
wc->thr = thr;
/* send work request to workio thread */
if (!tq_push(thr_info[work_thr_id].q, wc))
{
{
workio_cmd_free(wc);
return false;
}
/* wait for response, a unit of work */
work_heap = (struct work*) tq_pop(thr->q, NULL);
if (!work_heap)
return false;
/* copy returned work into storage provided by caller */
memcpy(work, work_heap, sizeof(*work));
free(work_heap);
if ( !work_heap ) return false;
/* copy returned work into storage provided by caller */
memcpy( work, work_heap, sizeof(*work) );
free( work_heap );
return true;
}

Expand Down Expand Up @@ -3738,7 +3726,6 @@ int main(int argc, char *argv[])
if ( opt_time_limit )
time_limit_stop = (unsigned int)time(NULL) + opt_time_limit;


// need to register to get algo optimizations for cpu capabilities
// but that causes registration logs before cpu capabilities is output.
// Would need to split register function into 2 parts. First part sets algo
Expand Down
Loading

0 comments on commit cae1ce2

Please sign in to comment.