diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index e22c2baa..a8a5e1a8 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -65,6 +65,19 @@ If not what makes it happen or not happen?
 Change Log
 ----------
 
+v3.19.2
+
+Fixed log displaying incorrect memory usage for scrypt, broken in v3.19.1.
+
+Reduce log noise when replies to submitted shares are lost due to stratum errors.
+
+Fugue prehash optimization for X16r family AVX2 & AVX512.
+
+Small speed improvement for Hamsi AVX2 & AVX512.
+
+Win: With CPU groups enabled the number of CPUs displayed in the ASCII art
+affinity map is the number of CPUs in a CPU group, was number of CPUs up to 64.
+
 v3.19.1
 
 Changes to Windows binaries package:
diff --git a/algo/fugue/fugue-aesni.h b/algo/fugue/fugue-aesni.h
index d1536641..13fd8f87 100644
--- a/algo/fugue/fugue-aesni.h
+++ b/algo/fugue/fugue-aesni.h
@@ -37,12 +37,23 @@ typedef struct
 
 } hashState_fugue __attribute__ ((aligned (64)));
 
+
+// These functions are deprecated, use the lower case macro aliases that use
+// the standard interface. This will be cleaned up at a later date.
 HashReturn fugue512_Init(hashState_fugue *state, int hashbitlen);
 
 HashReturn fugue512_Update(hashState_fugue *state, const void *data, DataLength databitlen);
 
 HashReturn fugue512_Final(hashState_fugue *state, void *hashval);
 
+#define fugue512_init( state ) \
+   fugue512_Init( state, 512 )
+#define fugue512_update( state, data, len ) \
+   fugue512_Update( state, data, (len)<<3 )
+#define fugue512_final \
+   fugue512_Final
+
+
 HashReturn fugue512_full(hashState_fugue *hs, void *hashval, const void *data, DataLength databitlen);
 
 #endif // AES
diff --git a/algo/hamsi/hamsi-hash-4way.c b/algo/hamsi/hamsi-hash-4way.c
index 26e133c9..b7b7c705 100644
--- a/algo/hamsi/hamsi-hash-4way.c
+++ b/algo/hamsi/hamsi-hash-4way.c
@@ -550,16 +550,38 @@ static const sph_u32 T512[64][16] = {
 
 // Hamsi 8 way AVX512 
 
+// Tested on i9-9940x movepi64_mask is slow, cmple_epi64_mask with zero
+// produces the same result but is faster.
+#define INPUT_BIG8 \
+do { \
+  __m512i db = _mm512_ror_epi64( *buf, 1 ); \
+  const uint64_t *tp = (const uint64_t*)T512; \
+  m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = m512_zero; \
+  for ( int u = 0; u < 64; u++ ) \
+  { \
+     __mmask8 dm = _mm512_cmplt_epi64_mask( db, m512_zero ); \
+     m0 = _mm512_mask_xor_epi64( m0, dm, m0, m512_const1_64( tp[0] ) ); \
+     m1 = _mm512_mask_xor_epi64( m1, dm, m1, m512_const1_64( tp[1] ) ); \
+     m2 = _mm512_mask_xor_epi64( m2, dm, m2, m512_const1_64( tp[2] ) ); \
+     m3 = _mm512_mask_xor_epi64( m3, dm, m3, m512_const1_64( tp[3] ) ); \
+     m4 = _mm512_mask_xor_epi64( m4, dm, m4, m512_const1_64( tp[4] ) ); \
+     m5 = _mm512_mask_xor_epi64( m5, dm, m5, m512_const1_64( tp[5] ) ); \
+     m6 = _mm512_mask_xor_epi64( m6, dm, m6, m512_const1_64( tp[6] ) ); \
+     m7 = _mm512_mask_xor_epi64( m7, dm, m7, m512_const1_64( tp[7] ) ); \
+     db = _mm512_ror_epi64( db, 1 ); \
+     tp += 8; \
+  } \
+} while (0)
+
+/*
 #define INPUT_BIG8 \
 do { \
   __m512i db = *buf; \
-  const uint64_t *tp = (uint64_t*)&T512[0][0];  \
+  const uint64_t *tp = (const uint64_t*)T512;  \
   m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = m512_zero; \
   for ( int u = 0; u < 64; u++ ) \
   { \
-     __m512i dm = _mm512_and_si512( db, m512_one_64 ) ; \
-     dm = mm512_negate_32( _mm512_or_si512( dm, \
-                                          _mm512_slli_epi64( dm, 32 ) ) ); \
+     __m512i dm = mm512_negate_64( _mm512_and_si512( db, m512_one_64 ) ); \
      m0 = mm512_xorand( m0, dm, m512_const1_64( tp[0] ) ); \
      m1 = mm512_xorand( m1, dm, m512_const1_64( tp[1] ) ); \
      m2 = mm512_xorand( m2, dm, m512_const1_64( tp[2] ) ); \
@@ -572,6 +594,7 @@ do { \
      db = _mm512_srli_epi64( db, 1 ); \
   } \
 } while (0)
+*/
 
 #define SBOX8( a, b, c, d ) \
 do { \
@@ -888,13 +911,11 @@ void hamsi512_8way_close( hamsi_8way_big_context *sc, void *dst )
 #define INPUT_BIG \
 do { \
   __m256i db = *buf; \
-  const uint64_t *tp = (uint64_t*)&T512[0][0];  \
+  const uint64_t *tp = (const uint64_t*)T512;  \
   m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = m256_zero; \
   for ( int u = 0; u < 64; u++ ) \
   { \
-     __m256i dm = _mm256_and_si256( db, m256_one_64 ) ; \
-     dm = mm256_negate_32( _mm256_or_si256( dm, \
-                         _mm256_slli_epi64( dm, 32 ) ) ); \
+     __m256i dm = mm256_negate_64( _mm256_and_si256( db, m256_one_64 ) ); \
      m0 = _mm256_xor_si256( m0, _mm256_and_si256( dm, \
                                           m256_const1_64( tp[0] ) ) ); \
      m1 = _mm256_xor_si256( m1, _mm256_and_si256( dm, \
diff --git a/algo/scrypt/scrypt.c b/algo/scrypt/scrypt.c
index 95639691..5557ca33 100644
--- a/algo/scrypt/scrypt.c
+++ b/algo/scrypt/scrypt.c
@@ -1544,7 +1544,6 @@ bool register_scrypt_algo( algo_gate_t* gate )
 
    format_number_si( &t_size, t_units );
    format_number_si( &d_size, d_units );
-   
    applog( LOG_INFO,"Throughput %d/thr, Buffer %.0f %siB/thr, Total %.0f %siB\n",
           SCRYPT_THROUGHPUT, t_size, t_units, d_size, d_units );
 
diff --git a/algo/x16/x16r-4way.c b/algo/x16/x16r-4way.c
index 8d4fb058..39efd257 100644
--- a/algo/x16/x16r-4way.c
+++ b/algo/x16/x16r-4way.c
@@ -60,7 +60,14 @@ void x16r_8way_prehash( void *vdata, void *pdata )
       case HAMSI:
          mm512_bswap32_intrlv80_8x64( vdata, pdata );
          hamsi512_8way_init( &x16r_ctx.hamsi );
-         hamsi512_8way_update( &x16r_ctx.hamsi, vdata, 64 );
+         hamsi512_8way_update( &x16r_ctx.hamsi, vdata, 72 );
+      break;
+      case FUGUE:
+         mm128_bswap32_80( edata, pdata );
+         fugue512_init( &x16r_ctx.fugue );
+         fugue512_update( &x16r_ctx.fugue, edata, 76 );
+         intrlv_8x64( vdata, edata, edata, edata, edata,
+                             edata, edata, edata, edata, 640 );
       break;
       case SHABAL:
          mm256_bswap32_intrlv80_8x32( vdata2, pdata );
@@ -306,7 +313,7 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
          break;
          case HAMSI:
             if ( i == 0 )
-               hamsi512_8way_update( &ctx.hamsi, input + (64<<3), 16 );
+               hamsi512_8way_update( &ctx.hamsi, input + (72<<3), 8 );
             else
             {
                intrlv_8x64( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
@@ -319,14 +326,43 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
                           hash7, vhash );
          break;
          case FUGUE:
-             fugue512_full( &ctx.fugue, hash0, in0, size );
-             fugue512_full( &ctx.fugue, hash1, in1, size );
-             fugue512_full( &ctx.fugue, hash2, in2, size );
-             fugue512_full( &ctx.fugue, hash3, in3, size );
-             fugue512_full( &ctx.fugue, hash4, in4, size );
-             fugue512_full( &ctx.fugue, hash5, in5, size );
-             fugue512_full( &ctx.fugue, hash6, in6, size );
-             fugue512_full( &ctx.fugue, hash7, in7, size );
+            if ( i == 0 )
+            {
+               fugue512_update( &ctx.fugue, in0 + 76, 4 );
+               fugue512_final( &ctx.fugue, hash0 );
+               memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
+               fugue512_update( &ctx.fugue, in1 + 76, 4 );
+               fugue512_final( &ctx.fugue, hash1 );
+               memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
+               fugue512_update( &ctx.fugue, in2 + 76, 4 );
+               fugue512_final( &ctx.fugue, hash2 );
+               memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
+               fugue512_update( &ctx.fugue, in3 + 76, 4 );
+               fugue512_final( &ctx.fugue, hash3 );
+               memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
+               fugue512_update( &ctx.fugue, in4 + 76, 4 );
+               fugue512_final( &ctx.fugue, hash4 );
+               memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
+               fugue512_update( &ctx.fugue, in5 + 76, 4 );
+               fugue512_final( &ctx.fugue, hash5 );
+               memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
+               fugue512_update( &ctx.fugue, in6 + 76, 4 );
+               fugue512_final( &ctx.fugue, hash6 );
+               memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
+               fugue512_update( &ctx.fugue, in7 + 76, 4 );
+               fugue512_final( &ctx.fugue, hash7 );
+            }
+            else
+            {
+               fugue512_full( &ctx.fugue, hash0, in0, size );
+               fugue512_full( &ctx.fugue, hash1, in1, size );
+               fugue512_full( &ctx.fugue, hash2, in2, size );
+               fugue512_full( &ctx.fugue, hash3, in3, size );
+               fugue512_full( &ctx.fugue, hash4, in4, size );
+               fugue512_full( &ctx.fugue, hash5, in5, size );
+               fugue512_full( &ctx.fugue, hash6, in6, size );
+               fugue512_full( &ctx.fugue, hash7, in7, size );
+            }
          break;
          case SHABAL:
              intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
@@ -347,25 +383,25 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
             {
                sph_whirlpool( &ctx.whirlpool, in0 + 64, 16 );
                sph_whirlpool_close( &ctx.whirlpool, hash0 );
-               memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
+               memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
                sph_whirlpool( &ctx.whirlpool, in1 + 64, 16 );
                sph_whirlpool_close( &ctx.whirlpool, hash1 );
-               memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
+               memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
                sph_whirlpool( &ctx.whirlpool, in2 + 64, 16 );
                sph_whirlpool_close( &ctx.whirlpool, hash2 );
-               memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
+               memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
                sph_whirlpool( &ctx.whirlpool, in3 + 64, 16 );
                sph_whirlpool_close( &ctx.whirlpool, hash3 );
-               memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
+               memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
                sph_whirlpool( &ctx.whirlpool, in4 + 64, 16 );
                sph_whirlpool_close( &ctx.whirlpool, hash4 ); 
-               memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
+               memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
                sph_whirlpool( &ctx.whirlpool, in5 + 64, 16 );
                sph_whirlpool_close( &ctx.whirlpool, hash5 );
-               memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
+               memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
                sph_whirlpool( &ctx.whirlpool, in6 + 64, 16 );
                sph_whirlpool_close( &ctx.whirlpool, hash6 );
-               memcpy( &ctx, &x16r_ctx, sizeof(ctx) );
+               memcpy( &ctx, &x16r_ctx, sizeof(sph_whirlpool_context) );
                sph_whirlpool( &ctx.whirlpool, in7 + 64, 16 );
                sph_whirlpool_close( &ctx.whirlpool, hash7 );
             }
@@ -532,7 +568,13 @@ void x16r_4way_prehash( void *vdata, void *pdata )
       case HAMSI:
          mm256_bswap32_intrlv80_4x64( vdata, pdata );
          hamsi512_4way_init( &x16r_ctx.hamsi );
-         hamsi512_4way_update( &x16r_ctx.hamsi, vdata, 64 );
+         hamsi512_4way_update( &x16r_ctx.hamsi, vdata, 72 );
+      break;
+      case FUGUE:
+         mm128_bswap32_80( edata, pdata );
+         fugue512_init( &x16r_ctx.fugue );
+         fugue512_update( &x16r_ctx.fugue, edata, 76 );
+         intrlv_4x64( vdata, edata, edata, edata, edata, 640 );
       break;
       case SHABAL:
          mm128_bswap32_intrlv80_4x32( vdata2, pdata );
@@ -734,7 +776,7 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
    	    break;
          case HAMSI:
             if ( i == 0 )
-               hamsi512_4way_update( &ctx.hamsi, input + (64<<2), 16 );
+               hamsi512_4way_update( &ctx.hamsi, input + (72<<2), 8 );
             else
             {
                intrlv_4x64( vhash, in0, in1, in2, in3, size<<3 );
@@ -745,10 +787,27 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
             dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
          break;
          case FUGUE:
-             fugue512_full( &ctx.fugue, hash0, in0, size );
-             fugue512_full( &ctx.fugue, hash1, in1, size );
-             fugue512_full( &ctx.fugue, hash2, in2, size );
-             fugue512_full( &ctx.fugue, hash3, in3, size );
+            if ( i == 0 )
+            {
+               fugue512_update( &ctx.fugue, in0 + 76, 4 );
+               fugue512_final( &ctx.fugue, hash0 );
+               memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
+               fugue512_update( &ctx.fugue, in1 + 76, 4 );
+               fugue512_final( &ctx.fugue, hash1 );
+               memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
+               fugue512_update( &ctx.fugue, in2 + 76, 4 );
+               fugue512_final( &ctx.fugue, hash2 );
+               memcpy( &ctx, &x16r_ctx, sizeof(hashState_fugue) );
+               fugue512_update( &ctx.fugue, in3 + 76, 4 );
+               fugue512_final( &ctx.fugue, hash3 );
+             }
+             else
+             {
+                fugue512_full( &ctx.fugue, hash0, in0, size );
+                fugue512_full( &ctx.fugue, hash1, in1, size );
+                fugue512_full( &ctx.fugue, hash2, in2, size );
+                fugue512_full( &ctx.fugue, hash3, in3, size );
+             }
          break;
          case SHABAL:
              intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
diff --git a/configure b/configure
index eca6ff1f..f678bda4 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.19.1.
+# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.19.2.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='cpuminer-opt'
 PACKAGE_TARNAME='cpuminer-opt'
-PACKAGE_VERSION='3.19.1'
-PACKAGE_STRING='cpuminer-opt 3.19.1'
+PACKAGE_VERSION='3.19.2'
+PACKAGE_STRING='cpuminer-opt 3.19.2'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures cpuminer-opt 3.19.1 to adapt to many kinds of systems.
+\`configure' configures cpuminer-opt 3.19.2 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1404,7 +1404,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of cpuminer-opt 3.19.1:";;
+     short | recursive ) echo "Configuration of cpuminer-opt 3.19.2:";;
    esac
   cat <<\_ACEOF
 
@@ -1509,7 +1509,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-cpuminer-opt configure 3.19.1
+cpuminer-opt configure 3.19.2
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by cpuminer-opt $as_me 3.19.1, which was
+It was created by cpuminer-opt $as_me 3.19.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2993,7 +2993,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='cpuminer-opt'
- VERSION='3.19.1'
+ VERSION='3.19.2'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by cpuminer-opt $as_me 3.19.1, which was
+This file was extended by cpuminer-opt $as_me 3.19.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-cpuminer-opt config.status 3.19.1
+cpuminer-opt config.status 3.19.2
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.ac b/configure.ac
index 11d4e595..314b0d5b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([cpuminer-opt], [3.19.1])
+AC_INIT([cpuminer-opt], [3.19.2])
 
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
diff --git a/cpu-miner.c b/cpu-miner.c
index ee31ae58..7a70f9f1 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -224,7 +224,11 @@ static uint8_t thread_affinity_map[ max_cpus ];
 // display affinity mask graphically
 static void format_affinity_mask( char *mask_str, uint64_t mask )
 {
+#if defined(WINDOWS_CPU_GROUPS_ENABLED)
+   int n = num_cpus / num_cpugroups;
+#else
    int n = num_cpus < 64 ? num_cpus : 64;
+#endif
    int i;
    for ( i = 0; i < n; i++ )
    {
@@ -2164,7 +2168,7 @@ static void stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
       } // !quiet
    }  // new diff/block
 
-   if ( new_job && !opt_quiet )
+   if ( new_job && !( opt_quiet || stratum_errors ) )
    {
       int mismatch = submitted_share_count - ( accepted_share_count
                                              + stale_share_count
@@ -3609,7 +3613,9 @@ int main(int argc, char *argv[])
 	num_cpus = 1;
 #endif
 
-   if ( num_cpus < 1 )    num_cpus = 1;
+   if ( num_cpus < 1 )
+      num_cpus = 1;
+   opt_n_threads = num_cpus;
 
    parse_cmdline( argc, argv );
 
@@ -3745,9 +3751,6 @@ int main(int argc, char *argv[])
 	}
 #endif
 
-   if ( ( opt_n_threads == 0 ) || ( opt_n_threads > num_cpus ) )
-      opt_n_threads = num_cpus;
-
    if ( opt_affinity && num_cpus > max_cpus )
    {
       applog( LOG_WARNING, "More than %d CPUs, CPU affinity is disabled",