Skip to content

Commit

Permalink
v23.11
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Nov 17, 2023
1 parent f3fde95 commit 8f94d02
Show file tree
Hide file tree
Showing 35 changed files with 1,718 additions and 536 deletions.
28 changes: 18 additions & 10 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,19 @@ See INSTALL_LINUX or INSTALL_WINDOWS for compile instructions
Requirements
------------

Intel Core2 or newer, or AMD Steamroller or newer CPU. ARM CPUs are not
supported.
- A x86_64 architecture CPU with a minimum of SSE2 support. This includes Intel Core2 and newer and AMD equivalents.
- Arm CPU supporting AArch64 and NEON.

64 bit Linux or Windows operating system. Apple, Android and Raspberry Pi
are not supported. FreeBSD YMMV.
32 bit CPUs are not supported.

ARM requirements (Beta):
Older CPUs are supported by open source cpuminer-multi by TPruvot but at reduced performance.

CPU: Armv8 and NEON, SHA2 & AES are optional
OS: Linux distribution built for AArch64.
Packages: source code only.
Mining on mobile devices that meet the requirements is not recommended due to the risk of
overheating and damaging the battery. Mining has unlimited demand, it will push any device
to or beyond its limits. There is also a fire risk with overheated lithium batteries.

Beware of apps claiming "mobile only mining". There is no such thing, they aren't miners.
If a mobile CPU can mine it any CPU can.

See wiki for details.

Expand Down Expand Up @@ -73,12 +75,18 @@ If not what makes it happen or not happen?
Change Log
----------

v23.11

This is a release candidate for full AArch64 support, marking the end of the Beta phase.
Fixed hmq1725 & x25x algos, SSE2 & NEON, broken in v3.23.4.
Most CPU-mineable SHA3 algos (X*) upgraded to 2-way SSE2 & NEON.

v23.10

x86_64: Fixed scrypt, scryptn2 algos SSE2.
Fixed sha512d256d algo AVX2, SSE2, NEON.
Fixed sha512256d algo AVX2, SSE2, NEON.
Fixed a bug in Skein N-way that reduced performance.
ARM: Skein algo optimized for NEON & SHA2.
ARM: Skein optimized for NEON, SHA2 & SSE2.
Skein2 algo 2-way optimized for NEON & SSE2.

v23.9
Expand Down
2 changes: 1 addition & 1 deletion algo-gate-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ typedef uint32_t set_t;
#define AES_OPT 1 << 7 // Intel Westmere, AArch64
#define VAES_OPT 1 << 8 // Icelake, Zen3
#define SHA_OPT 1 << 9 // Zen1, Icelake, AArch64
#define SHA512_OPT 1 << 10 // AArch64
#define SHA512_OPT 1 << 10 // Intel Arrow Lake, AArch64
#define NEON_OPT 1 << 11 // AArch64

// AVX10 does not have explicit algo features:
Expand Down
72 changes: 60 additions & 12 deletions algo/bmw/bmw512-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
//#include "sph_keccak.h"
#include "bmw-hash-4way.h"

#if defined(BMW512_8WAY)

void bmw512hash_8way(void *state, const void *input)
void bmw512hash_8way( void *state, const void *input )
{
bmw512_8way_context ctx;
bmw512_8way_init( &ctx );
Expand All @@ -27,9 +26,9 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 8;
__m512i *noncev = (__m512i*)vdata + 9; // aligned
__m512i *noncev = (__m512i*)vdata + 9;
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id;
const int thr_id = mythr->id;

mm512_bswap32_intrlv80_8x64( vdata, pdata );
do {
Expand All @@ -43,7 +42,7 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
if ( unlikely( hash7[ lane<<1 ] <= Htarg ) )
{
extr_lane_8x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ))
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
Expand All @@ -59,9 +58,7 @@ int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,

#elif defined(BMW512_4WAY)

//#ifdef BMW512_4WAY

void bmw512hash_4way(void *state, const void *input)
void bmw512hash_4way( void *state, const void *input )
{
bmw512_4way_context ctx;
bmw512_4way_init( &ctx );
Expand All @@ -80,10 +77,10 @@ int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9; // aligned
const uint32_t last_nonce = max_nonce - 4;
__m256i *noncev = (__m256i*)vdata + 9;
const uint32_t Htarg = ptarget[7];
int thr_id = mythr->id; // thr_id arg is deprecated
const int thr_id = mythr->id;

mm256_bswap32_intrlv80_4x64( vdata, pdata );
do {
Expand All @@ -96,7 +93,7 @@ int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
if ( unlikely( hash7[ lane<<1 ] <= Htarg ) )
{
extr_lane_4x64( lane_hash, hash, lane, 256 );
if ( fulltest( lane_hash, ptarget ) )
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ))
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
Expand All @@ -110,4 +107,55 @@ int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
return 0;
}

#elif defined(BMW512_2WAY)

void bmw512hash_2x64( void *state, const void *input )
{
bmw512_2x64_context ctx;
bmw512_2x64_init( &ctx );
bmw512_2x64_update( &ctx, input, 80 );
bmw512_2x64_close( &ctx, state );
}

int scanhash_bmw512_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t vdata[24*2] __attribute__ ((aligned (64)));
uint32_t hash[16*2] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash7 = &(hash[13]); // 3*4+1
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 2;
v128_t *noncev = (v128_t*)vdata + 9;
const uint32_t Htarg = ptarget[7];
const int thr_id = mythr->id;

v128_bswap32_intrlv80_2x64( vdata, pdata );
do {
*noncev = v128_intrlv_blend_32( v128_bswap32(
v128_set32( n+1, 0, n, 0 ) ), *noncev );

bmw512hash_2x64( hash, vdata );

for ( int lane = 0; lane < 2; lane++ )
if ( unlikely( hash7[ lane<<1 ] <= Htarg ) )
{
extr_lane_2x64( lane_hash, hash, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ))
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
}
}
n += 2;

} while ( likely( (n < last_nonce) && !work_restart[thr_id].restart ) );

*hashes_done = n - first_nonce;
return 0;
}

#endif
5 changes: 4 additions & 1 deletion algo/bmw/bmw512-gate.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@

bool register_bmw512_algo( algo_gate_t* gate )
{
gate->optimizations = AVX2_OPT | AVX512_OPT;
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT | NEON_OPT;
opt_target_factor = 256.0;
#if defined (BMW512_8WAY)
gate->scanhash = (void*)&scanhash_bmw512_8way;
gate->hash = (void*)&bmw512hash_8way;
#elif defined (BMW512_4WAY)
gate->scanhash = (void*)&scanhash_bmw512_4way;
gate->hash = (void*)&bmw512hash_4way;
#elif defined (BMW512_2WAY)
gate->scanhash = (void*)&scanhash_bmw512_2x64;
gate->hash = (void*)&bmw512hash_2x64;
#else
gate->scanhash = (void*)&scanhash_bmw512;
gate->hash = (void*)&bmw512hash;
Expand Down
12 changes: 10 additions & 2 deletions algo/bmw/bmw512-gate.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,27 @@
#define BMW512_8WAY 1
#elif defined(__AVX2__)
#define BMW512_4WAY 1
#elif defined(__SSE2__) || defined(__ARM_NEON)
#define BMW512_2WAY 1
#endif

#if defined(BMW512_8WAY)

void bmw512hash_8way( void *state, const void *input );
int scanhash_bmw512_8way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
uint64_t *hashes_done, struct thr_info *mythr );

#elif defined(BMW512_4WAY)

void bmw512hash_4way( void *state, const void *input );
int scanhash_bmw512_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
uint64_t *hashes_done, struct thr_info *mythr );

#elif defined(BMW512_2WAY)

void bmw512hash_2x64( void *state, const void *input );
int scanhash_bmw512_2x64( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );

#else

Expand Down
24 changes: 12 additions & 12 deletions algo/echo/aes_ni/hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,9 +236,7 @@ void Compress(hashState_echo *ctx, const unsigned char *pmsg, unsigned int uBloc

}



HashReturn init_echo(hashState_echo *ctx, int nHashSize)
HashReturn init_echo( hashState_echo *ctx, int nHashSize )
{
int i, j;

Expand Down Expand Up @@ -280,7 +278,8 @@ HashReturn init_echo(hashState_echo *ctx, int nHashSize)
return SUCCESS;
}

HashReturn update_echo(hashState_echo *state, const BitSequence *data, DataLength databitlen)
HashReturn update_echo( hashState_echo *state, const void *data,
uint32_t databitlen )
{
unsigned int uByteLength, uBlockCount, uRemainingBytes;

Expand Down Expand Up @@ -330,7 +329,7 @@ HashReturn update_echo(hashState_echo *state, const BitSequence *data, DataLengt
return SUCCESS;
}

HashReturn final_echo(hashState_echo *state, BitSequence *hashval)
HashReturn final_echo( hashState_echo *state, void *hashval)
{
v128_t remainingbits;

Expand Down Expand Up @@ -407,8 +406,8 @@ HashReturn final_echo(hashState_echo *state, BitSequence *hashval)
return SUCCESS;
}

HashReturn update_final_echo( hashState_echo *state, BitSequence *hashval,
const BitSequence *data, DataLength databitlen )
HashReturn update_final_echo( hashState_echo *state, void *hashval,
const void *data, uint32_t databitlen )
{
unsigned int uByteLength, uBlockCount, uRemainingBytes;

Expand Down Expand Up @@ -530,8 +529,8 @@ HashReturn update_final_echo( hashState_echo *state, BitSequence *hashval,
return SUCCESS;
}

HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
int nHashSize, const BitSequence *data, DataLength datalen )
HashReturn echo_full( hashState_echo *state, void *hashval,
int nHashSize, const void *data, uint32_t datalen )
{
int i, j;

Expand Down Expand Up @@ -578,7 +577,7 @@ HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
{
// Fill the buffer
memcpy( state->buffer + state->uBufferBytes,
(void*)data, state->uBlockLength - state->uBufferBytes );
data, state->uBlockLength - state->uBufferBytes );

// Process buffer
Compress( state, state->buffer, 1 );
Expand All @@ -601,7 +600,7 @@ HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
}

if( uRemainingBytes > 0 )
memcpy(state->buffer, (void*)data, uRemainingBytes);
memcpy(state->buffer, data, uRemainingBytes);

state->uBufferBytes = uRemainingBytes;
}
Expand Down Expand Up @@ -689,7 +688,7 @@ HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
}



#if 0
HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval)
{
HashReturn hRet;
Expand Down Expand Up @@ -746,5 +745,6 @@ HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databit

return SUCCESS;
}
#endif

#endif
14 changes: 7 additions & 7 deletions algo/echo/aes_ni/hash_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,16 @@ HashReturn init_echo(hashState_echo *state, int hashbitlen);

HashReturn reinit_echo(hashState_echo *state);

HashReturn update_echo(hashState_echo *state, const BitSequence *data, DataLength databitlen);
HashReturn update_echo(hashState_echo *state, const void *data, uint32_t databitlen);

HashReturn final_echo(hashState_echo *state, BitSequence *hashval);
HashReturn final_echo(hashState_echo *state, void *hashval);

HashReturn hash_echo(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
HashReturn hash_echo(int hashbitlen, const void *data, uint32_t databitlen, void *hashval);

HashReturn update_final_echo( hashState_echo *state, BitSequence *hashval,
const BitSequence *data, DataLength databitlen );
HashReturn echo_full( hashState_echo *state, BitSequence *hashval,
int nHashSize, const BitSequence *data, DataLength databitlen );
HashReturn update_final_echo( hashState_echo *state, void *hashval,
const void *data, uint32_t databitlen );
HashReturn echo_full( hashState_echo *state, void *hashval,
int nHashSize, const void *data, uint32_t databitlen );

#endif // HASH_API_H

2 changes: 0 additions & 2 deletions algo/echo/sph_echo.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@

#include "sph_echo.h"

#if !defined(__AES__)

#ifdef __cplusplus
extern "C"{
Expand Down Expand Up @@ -1031,4 +1030,3 @@ sph_echo512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
#ifdef __cplusplus
}
#endif
#endif // !AES
3 changes: 0 additions & 3 deletions algo/echo/sph_echo.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@
#ifndef SPH_ECHO_H__
#define SPH_ECHO_H__

#if !defined(__AES__)

#ifdef __cplusplus
extern "C"{
#endif
Expand Down Expand Up @@ -318,5 +316,4 @@ void sph_echo512_addbits_and_close(
#ifdef __cplusplus
}
#endif
#endif // !AES
#endif
3 changes: 0 additions & 3 deletions algo/groestl/sph_groestl.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@

#include "sph_groestl.h"

#if !defined(__AES__)

#ifdef __cplusplus
extern "C"{
#endif
Expand Down Expand Up @@ -3119,5 +3117,4 @@ sph_groestl512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
#ifdef __cplusplus
}

#endif // !AES
#endif
2 changes: 0 additions & 2 deletions algo/groestl/sph_groestl.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ extern "C"{
#include <stddef.h>
#include "compat/sph_types.h"

#if !defined(__AES__)
/**
* Output size (in bits) for Groestl-224.
*/
Expand Down Expand Up @@ -327,5 +326,4 @@ void sph_groestl512_addbits_and_close(
}
#endif

#endif // !AES
#endif
Loading

0 comments on commit 8f94d02

Please sign in to comment.