diff --git a/README.md b/README.md
index 30b00584..f0105262 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Feature | | Mlucas | Prime95/MPrime
 \- | P-1 | ✔️ | ✔️
 \- | P+1 | | ✔️
 \- | ECM | | ✔️
-\- | Pépin | ✔️ | 
+\- | Pépin | ✔️ | ✔️
 **PRP** | Proofs | | ✔️
 \- | Certs | | ✔️
 **Error Checking** | Jacobi | | ✔️
@@ -93,7 +93,7 @@ This README is still in progress. For now, see the original [Mlucas README](http
 
 ## Help
 
-See the [help.txt](help.txt) file for usage information.
+The [help.txt](help.txt) file includes a variety of usage information not covered in the original [README](https://mersenneforum.org/mayer/README.html), concentrating largely on the Mlucas command line options. A separate documentation page covers [Fermat numbers](docs/Fermat-testing.md).
 
 ## Contributing
 
diff --git a/config-fermat.sh b/config-fermat.sh
index b6ec7169..b9d33c7b 100644
--- a/config-fermat.sh
+++ b/config-fermat.sh
@@ -27,12 +27,11 @@
 # Mlucas
 MLUCAS=./Mlucas
 
-# Number of iterations
-# use 100, 1000, or 10000 to match pre-computed values
+# Number of iterations (use 100, 1000, or 10000 to match pre-computed values)
 ITERS=100
 
-# Minimum Fermat number (14 or greater)
-MIN=14
+# Minimum Fermat number (15 or greater)
+MIN=15
 
 # Maximum Fermat number (33 or less)
 MAX=29
@@ -40,13 +39,12 @@ MAX=29
 # Mlucas arguments
 ARGS=(
 	"$@"
-	# Add desired -cpu or -core settings here, or as following arguments, e.g. ../config-fermat.sh -cpu 0:3
-
+	# Add desired -cpu or -core settings here, or as following arguments, e.g. bash ../config-fermat.sh -cpu 0:3
 )
 
-# First, tiny FFT lengths for F14 to F17;
-FFTS=([1]=14 [2]=15 [4]=16 [7]=17 [8]=17)
-# Then, from small up to egregious FFTs for F18 to F33.
+# First, tiny FFT lengths for F15 to F17 (note 4K is the smallest workable length without fiddly radix settings);
+FFTS=([2]=15 [4]=16 [7]=17 [8]=17)
+# Then, from small up to egregiously large FFTs for F18 to F33.
 # The largest FFT reached is 512M, if MAX is set to 33.
 # Note that large FFTs require considerable runtime at 10000 iterations.
 for ((n = 0; n < 16; ++n)); do
@@ -60,12 +58,11 @@ for ((n = 0; n < 16; ++n)); do
 		# k = 15, 16 should both be supported up to at least F32.
 		FFTS[k * m]=$f
 		if [[ $k -eq 15 && $n -gt 5 ]]; then
-			# k = 63 is supported for F24 and above (1008K).
+			# k = 63 is mostly supported for F24 (1008K) and above.
 			FFTS[63 * m >> 2]=$f
 		fi
 	done
 done
-
 for fft in "${!FFTS[@]}"; do
 	f=${FFTS[fft]}
 	if [[ -n $MIN && $f -lt $MIN ]]; then
@@ -75,6 +72,10 @@ for fft in "${!FFTS[@]}"; do
 	fi
 	printf '\n\tTesting F%s (2^%s + 1),\tFFT length: %sK\n\n' "$f" $((1 << f)) "$fft"
 	args=("${ARGS[@]}")
+	# First we test the very fiddly F15 and then loop over F16 up to maximum
+	if [[ $f -eq 15 ]]; then
+		args+=(-radset 8,8,16)
+	fi
 	if [[ $f -le 17 || $f -ge 32 ]]; then
 		args+=(-shift 0)
 	fi
diff --git a/docs/Fermat-testing.md b/docs/Fermat-testing.md
new file mode 100644
index 00000000..747517a1
--- /dev/null
+++ b/docs/Fermat-testing.md
@@ -0,0 +1,189 @@
+# Introduction to Fermat number testing with Mlucas
+
+Mlucas is a powerful Mersenne primality testing and factoring package which supports Lucas–Lehmer 
+testing, Fermat probable primality testing, and Pollard _p_–1 factorisation; justly, most applications of the 
+software have concentrated on examination of the Mersenne numbers, however Mlucas is also capable of 
+testing the primality of Fermat numbers, testing the compositeness of Fermat cofactors, and running _p_–1
+factorisation on Fermat numbers. This document is aimed at filling in several lacunae in the Mlucas 
+documentation with respect to Fermat number testing.
+
+## Mlucas build notes
+
+The Fermat modular squaring code is architecture-specific, ideally requiring Mlucas to be built with one of 
+the SIMD modes (AVX, AVX2, SSE2, etc). This should be handled automatically by the `makemake.sh` script 
+for most hardware, and with the exception of 64-bit ARM (for which workarounds may be available) the default
+build should be capable of testing Fermat numbers.
+
+For example, Apple Silicon is ARM which supports ASIMD, meaning a native Mlucas build cannot test Fermat numbers. One 
+possible answer would be to build an SSE2 Mlucas on an Intel-based Mac, which should be capable of running under 
+Rosetta emulation on Apple Silicon, at the penalty of a significant performance hit for larger exponents.
+
+(When compiling on Intel with the intention of building a compatible version for Apple Silicon, the Makefile 
+requires static linking of the GMP library, by the following alteration to the linker flags:
+`LDLIBS = -Bstatic ${LD_ARGS[@]} -Bdynamic`
+Static linking of libraries is usually not recommended, and this may not be a solution in all possible cases.)
+
+## Fermat self-testing and populating `fermat.cfg`
+
+Assuming you have a working build, Mlucas allows any Fermat number _F<sub>m</sub>_ = 2<sup>2<sup>_m_</sup></sup> + 1 with exponent _m_ in 
+the range [13,63] to be entered as an argument for self-testing, by using the `-f` flag like so:
+
+`./Mlucas -f <exponent> -iters <number> [-fft <fft_length>]`
+
+However, the software only provides fast Fourier transforms (FFTs) up to 512M in length, which further 
+limits the largest Fermat number that may be tested, up to _F_<sub>33</sub>.
+
+The `-iters` flag is mandatory and should be followed by a number less than a million; the recommended 
+values for `-iters` to be set to are 100, 1000, or 10000, since the self-testing code has correct residues 
+for all Fermat numbers [14,33] saved, and any error in the test result will be immediately discovered.
+
+Mlucas does not support Fermat testing for all possible FFT lengths. Generally, FFTs for testing Mersenne 
+numbers are available for any length _k_·2<sup>_n_</sup> in kilobytes, where _k_ = [8,15], _n_ ≥ 0. 
+In the case of Fermat numbers however, only the subset _k_ = {2, 7, 15} supports Fermat testing, along with
+_k_ = 63 when _n_ ≥ 4.
+
+When self-testing Mlucas for Mersenne numbers, a command such as `./Mlucas -s all` configures 
+Mlucas for the majority of FFT lengths and saves the results in `mlucas.cfg`. A similar process must be 
+carried out for the Fermat numbers, using the `./Mlucas -f` command above, which saves results in the 
+file `fermat.cfg`.
+
+A script [`config-fermat.sh`](https://github.com/primesearch/Mlucas/blob/main/config-fermat.sh) has been written to automate the generation of the `fermat.cfg` file using 
+the set of possible FFT lengths. After the license, the next few lines declare some variables which may 
+be customised as desired (for example, 10000 iterations will require significant runtime at the largest 
+possible FFT sizes):
+```bash
+# Mlucas
+MLUCAS=./Mlucas
+
+# Number of iterations (use 100, 1000, or 10000 to match pre-computed values)
+ITERS=100
+
+# Minimum Fermat number (15 or greater)
+MIN=15
+
+# Maximum Fermat number (33 or less)
+MAX=29
+```
+The script should be invoked from the build directory, typically `obj`, with the following command, which 
+may include as parameters any cpu-specific options, such as `-core` or `-cpu`:
+```
+bash ../config-fermat.sh -cpu 0:3
+```
+Once self-testing has occurred, production work on Fermat numbers may be carried out using a `worktodo.txt` 
+file (in previous versions of Mlucas this file was named `worktodo.ini`) with entries in one of the several 
+formats below.
+
+Only assignments for ECM factoring of Fermat numbers are distributed by Primenet, so work assignments 
+cannot be obtained for Fermat numbers using the `primenet.py` script. However _p_–1 results for Fermat 
+numbers up to _F_<sub>29</sub> may be submitted to [mersenne.org](https://www.mersenne.org/manual_result/) as Manual results.
+
+## Primality testing: Pépin’s test
+
+For the Pépin test, the `worktodo` format is simply:
+```
+Fermat,Test=<exponent>
+```
+
+Fermat numbers in the range [13,22] may select a non-optimal FFT length by default in production mode.
+If this is the case, the FFT may be overridden using the command:
+```
+./Mlucas -fft FFT_LENGTH -shift 0
+```
+The optimal FFT lengths vary from 2K up to 512M, as shown in the table below under [testable Fermat numbers](#the-testable-fermat-numbers-fm--22m--1).
+
+Currently, all Fermat numbers up to _F_<sub>30</sub> have received a [Pépin test](https://www.mersenneforum.org/showthread.php?t=18748), and moreover _F_<sub>31</sub> and _F_<sub>32</sub> are known 
+to be composite. However, the character of their cofactors is unknown so a Pépin test would be a 
+necessary prerequisite for testing them; and as for _F_<sub>33</sub>, this is yet to be 
+tested for primality.
+
+The Pépin test uses Gerbicz error checking (GEC) to assure the reliability of the computation, however 
+residue shifting is not implemented for Fermat modular squaring with GEC. Thus when invoking Mlucas the 
+flag `-shift 0` _must_ be added to the Mlucas command line:
+```
+./Mlucas -shift 0
+```
+If non-zero residue shifting is used, the Pépin test will not be able to progress beyond the millionth 
+iteration (the default interval for GEC) as error checking will be unable to confirm whether or not the 
+computation is correct.
+
+## Cofactor compositeness testing: Suyama’s test
+Suyama’s test is a Fermat probable primality test on the cofactor of a Fermat number. As a prerequisite, 
+you _must_ already have run a Pépin test, which should have saved a final residue as a file named e.g. `f23` 
+for a Pépin test of _F_<sub>23</sub>. Do **not** try to run a Suyama test as a single work type incorporating the preceding 
+Pépin test.
+
+The `worktodo` format for the Suyama test is:
+```
+PRP=N/A,1,2,<2^m>,+1,99,0,3,5,"known_factor_1[,known_factor_2,...]"
+```
+
+Note that this work format uses the numeric value of 2<sup>_m_</sup> of a Fermat number _F<sub>m</sub>_, rather than just the 
+exponent _m_. At least one known prime factor must be supplied; composite factors are disallowed.
+
+Prior to testing, Mlucas tries an “integrity check” on each known factor, which has the effect of testing 
+whether the Pépin residue _R_ has been correctly calculated. This calculation can be performed at any point 
+up to the final iteration. More information is available [here](https://github.com/primesearch/Mlucas/issues/4).
+
+## Pollard _p_–1 factoring
+
+Work entries for _p_–1 factoring have two variations, however only the `Pminus1` format is supported for 
+Fermat numbers:
+```
+Pminus1=N/A,1,2,<2^m>,+1,B1,B2[,trial_factoring_bits][,B2_start][,"known_factors"]
+```
+
+The same note as regards the exponent for the Suyama test applies here also; however supplying known, prime factors
+is optional. The `-shift 0` flag is again a necessary addition to the Mlucas command line.
+
+The `B2_start` variable supports breaking up stage 2 of the algorithm among multiple instances of Mlucas.
+
+## Fermat number `worktodo.txt` examples
+
+An example of each of the work types, Pépin and Suyama tests, and Pollard _p_–1 factoring:
+```
+Fermat,Test=23
+PRP=N/A,1,2,1073741824,+1,99,0,3,5,"640126220763137,1095981164658689"
+Pminus1=N/A,1,2,8589934592,+1,10000000,10000000
+```
+Note the _p_–1 test of _F_<sub>33</sub> (as actually [performed](https://www.mersenneforum.org/showthread.php?t=29183) by Ernst Mayer) 
+used the same _B1_ and _B2_ bounds to run only the first stage of the algorithm.
+
+## The testable Fermat numbers _F<sub>m</sub>_ = 2<sup>2<sup>_m_</sup></sup> + 1
+
+The following table lists the default FFT lengths selected by Mlucas for the Fermat numbers in the range 
+[13,33] and the known factors, required for some of the test types above.
+
+ _m_|       2<sup>_m_</sup>|Default FFT |Optimal FFTs     |known_factor(s)
+--|---------:|-----------:|----------------:|--------------------------------------------------------------------
+13|      8192|        1K\*|               2K|`"2710954639361,2663848877152141313,3603109844542291969,319546020820551643220672513"`
+14|     16384|        1K\*|           2K, 4K|`"116928085873074369829035993834596371340386703423373313"`
+15|     32768|        2K  |           2K, 4K|`"1214251009,2327042503868417,168768817029516972383024127016961"`
+16|     65536|        3K\*|               4K|`"825753601,188981757975021318420037633"`
+17|    131072|  6K\*, 7K  |           7K, 8K|`"31065037602817,7751061099802522589358967058392886922693580423169"`
+18|    262144|       13K\*|    14K, 15K, 16K|`"13631489,81274690703860512587777"`
+19|    524288|       26K\*|    28K, 30K, 32K|`"70525124609,646730219521,37590055514133754286524446080499713"`
+20|   1048576|       52K\*|    56K, 60K, 64K|
+21|   2097152|      104K\*| 112K, 120K, 128K|`"4485296422913"`
+22|   4194304|      208K\*| 224K, 240K, 256K|`"64658705994591851009055774868504577"`
+23|   8388608|      448K  | 448K, 480K, 512K|`"167772161"`
+24|  16777216|      896K  | 896K, 960K, 1008K, 1M|
+25|  33554432|     1792K  | 1792K, 1920K, 2M|`"25991531462657,204393464266227713,2170072644496392193"`
+26|  67108864|     3584K  | 3584K, 3840K, 4032K, 4M|`"76861124116481"`
+27| 134217728|  7M, 7.5M  | 7M, 7.5M, 7.875M, 8M|`"151413703311361,231292694251438081"`
+28| 268435456|       15M  | 14M, 15M, 15.75M, 16M|`"1766730974551267606529"`
+29| 536870912|       30M  |  30M, 31.5M, 32M|`"2405286912458753"`
+30|1073741824|       60M  |    60M, 63M, 64M|`"640126220763137,1095981164658689"`
+31|2147483648|      120M  | 120M, 126M, 128M|`"46931635677864055013377"`
+32|4294967296|      256M  | 240M, 252M, 256M|`"25409026523137"`
+33|8589934592|      512M  |       504M, 512M|
+
+\* These default FFTs selected by Mlucas have radices that cannot be used for Fermat modular squaring;
+thus _F_<sub>13</sub> to _F_<sub>22</sub> cannot be tested with Mlucas without overriding the default FFT selected, _e.g._:
+```
+./Mlucas -fft 224K -shift 0
+```
+As mentioned above at [Mlucas build notes](#mlucas-build-notes), building Mlucas is highly architecture-specific, and some 
+build types will not support all of the optimal FFT lengths.
+
+Finally, 2K appears to be the smallest usable FFT for the smallest testable Fermat number _F_<sub>13</sub>;
+the 4K FFT seems to be more reliable on a wider range of systems for the next larger Fermat numbers _F_<sub>14</sub> to _F_<sub>16</sub>.
diff --git a/src/Mlucas.c b/src/Mlucas.c
index 732a443f..ea0746c0 100644
--- a/src/Mlucas.c
+++ b/src/Mlucas.c
@@ -695,7 +695,7 @@ with the default #threads = 1 and affinity set to logical core 0, unless user ov
 				TEST_TYPE = TEST_TYPE_PRP;
 			} else {	// PRP double-check:
 				// NB: Hit a gcc compiler bug (which left i = 0 for e.g. char_addr = ", 3 ,...") using -O0 here ... clang compiled correctly, as did gcc -O1:
-				i = (int)strtol(char_addr+1, &cptr, 10);	ASSERT(HERE, i == 3,"PRP-test base must be 3!");
+				i = (int)strtol(char_addr+1, &cptr, 10); // PRP bases other than 3 allowed; see https://github.com/primesearch/Mlucas/issues/18 //	ASSERT(HERE, i == 3,"PRP-test base must be 3!");
 				PRP_BASE = i;
 				ASSERT(HERE, (char_addr = strstr(cptr, ",")) != 0x0,"Expected ',' not found in assignment-specifying line!");
 				i = (int)strtol(char_addr+1, &cptr, 10); ASSERT(HERE, i == 1 || i == 5,"Only PRP-tests of type 1 (PRP-only) and type 5 (PRP and subsequent cofactor-PRP check) supported!");
@@ -705,6 +705,7 @@ with the default #threads = 1 and affinity set to logical core 0, unless user ov
 				// Use 0-or-not-ness of KNOWN_FACTORS[0] to differentiate between PRP-only and PRP-CF:
 				if(KNOWN_FACTORS[0] != 0ull) {
 					ASSERT(HERE, i == 5,"Only PRP-CF tests of type 5 supported!");
+					if (MODULUS_TYPE == MODULUS_TYPE_FERMAT) ASSERT(HERE, PRP_BASE == 3, "PRP-CF test base for Fermat numbers must be 3!");
 				}
 			}
 			goto GET_EXPO;
@@ -1201,7 +1202,7 @@ with the default #threads = 1 and affinity set to logical core 0, unless user ov
 	#ifdef USE_ARM_V8_SIMD
 		ASSERT(HERE, 0, "ARMv8 SIMD builds do not support Fermat-number testing!");
 	#endif
-		ASSERT(HERE,findex >= 14 && findex < 64, "Fermat number index must be in range [14,64]!\n");
+		ASSERT(HERE,findex >= 13 && findex < 64, "Fermat number index must be in range [13,63]!\n");
 		// This takes care of the number-to-char conversion and leading-whitespace-removal
 		// in one step - use PSTRING for temporary storage here:
 		strcpy(ESTRING, &PSTRING[convert_uint64_base10_char(PSTRING, (uint64)findex)]);
@@ -2432,7 +2433,7 @@ with the default #threads = 1 and affinity set to logical core 0, unless user ov
 			// by base and check that remainder 0. Note that we do want the quotient now, as that is our reside/base:
 			mi64_div(c_uint64_ptr, &itmp64, j+1,1, arrtmp,&rmodb);	ASSERT(HERE, rmodb == 0ull,"After short-div, R != 0 (mod B)");
 			// And recompute the S-H residues:
-			res_SH(arrtmp,i,&Res64,&Res35m1,&Res36m1);
+			res_SH(arrtmp,j,&Res64,&Res35m1,&Res36m1);
 			// Now that residue is standard Fermat-PRP-test one, check if == 1:
 			isprime = (arrtmp[0] == 1ull);
 			if(isprime) { // Check the arrtmp[] array for non-zero elements; see https://github.com/primesearch/Mlucas/issues/15
@@ -2478,8 +2479,8 @@ with the default #threads = 1 and affinity set to logical core 0, unless user ov
 			gm_time = gmtime(&calendar_time);
 			if(!gm_time)	// If UTC not available for some reason, just substitute the local time:
 				gm_time = localtime(&calendar_time);
-			// Want 'UTC' instead of 'GMT', so include that in lieu of the %Z format specifier
-			strftime(timebuffer,SIZE,"%Y-%m-%d %H:%M:%S UTC",gm_time);
+			// Want 'UTC' instead of 'GMT', so include that in lieu of the %Z format specifier (but also see https://www.mersenneforum.org/showthread.php?p=653672#post653672 as UTC is actually redundant)
+			strftime(timebuffer,SIZE,"%Y-%m-%d %H:%M:%S",gm_time);
 			// Trio of p-1 fields all 0; cstr holds the formatted output line here. Need to differentiate
 			// between this PRP-CF result and the preceding PRP; set the otherwise-unused s2_partial flag:
 			generate_JSON_report(isprime,p,n,Res64,Res2048,timebuffer, 0,0ull,0x0,s2_partial, cstr);
@@ -3313,6 +3314,8 @@ uint32 Suyama_CF_PRP(uint64 p, uint64*Res64, uint32 nfac, double a[], double b[]
 /*A*/	ierr = func_mod_square(a, (int*)ci, n, ilo,ihi, 0ull, p, scrnFlag, tdiff, TRUE, 0x0);
 		convert_res_FP_bytewise(a, (uint8*)ci, n, p, Res64, &Res35m1, &Res36m1);	// Overwrite passed-in Pepin-Res64 with Fermat-PRP one
 		snprintf_nowarn(cbuf,STR_MAX_LEN,"MaxErr = %10.9f\n",MME); mlucas_fprint(cbuf,1);
+	} else if (MODULUS_TYPE == MODULUS_TYPE_MERSENNE) {	// Mersenne PRP-CF doesn't have the Res35m1 or Res36m1 values passed in,
+		res_SH(ci,n,&itmp64,&Res35m1,&Res36m1);			// so we refresh these; see https://github.com/primesearch/Mlucas/issues/27
 	}
 	if(ierr) {
 		snprintf_nowarn(cbuf,STR_MAX_LEN,"Error of type[%u] = %s in mod-squaring ... aborting\n",ierr,returnMlucasErrCode(ierr));
@@ -3839,6 +3842,7 @@ struct testFerm FermVec[numFerm+1] =
 /* FFTlen(K) Fidx           Res64           mod 2^35-1      mod 2^36-1               Res64           mod 2^35-1      mod 2^36-1     */
 /*   ------- ----      ----------------     -----------     -----------         ----------------     -----------     -----------    */
 	/*                                    [%34359738367  ][%68719476735  ]                         [%34359738367  ][%68719476735  ] */
+//	{     2,  13u, { {0xC8FC67EA3A1AC788ull, 29592689237ull, 35156594447ull}, {0xBE489C2CF00D582Aull,  3108135315ull, 47125592449ull}, {0x0ull, 0ull, 0ull} } },
 	{     1,  14u, { {0xDB9AC520C403CB21ull,   342168579ull, 59244817440ull}, {0xF111F12732CCCB0Full, 24848612524ull, 66609820796ull}, {0x78738D068D641C2Cull, 12664273769ull, 29297626750ull} } },
 	{     2,  15u, { {0x3B21A6E55ED13454ull, 28379302213ull, 15546218647ull}, {0x4784657F2A36BE74ull,   617376037ull, 44891093359ull}, {0x589BFE53458FFC14ull, 12200390606ull, 46971422957ull} } },
 	{     4,  16u, { {0xAAE76C15C2B37465ull, 20013824731ull,  2261076122ull}, {0x42CC2CBE97C728E6ull, 30814966349ull, 44505312792ull}, {0xEED00D8AE6886440ull, 19057922301ull, 53800020279ull} } },
@@ -4305,8 +4309,8 @@ just below the upper limit for each FFT lengh in some subrange of the self-tests
 			ASSERT(HERE, !(i64arg>>32), "Fermat-number-index argument must be < 2^32 ... halting.");
 			findex = (uint32)i64arg;
 			/* Make sure the Fermat number index is in range: */
-			if(findex < 14 || findex > 63) {
-				fprintf(stderr, " Fermat number index must be in the range [14,63].\n");
+			if(findex < 13 || findex > 63) {
+				fprintf(stderr, " Fermat number index must be in the range [13,63].\n");
 				return ERR_EXPONENT_ILLEGAL;
 			}
 			userSetExponent = 1;
@@ -5265,10 +5269,13 @@ int read_ppm1_savefiles(const char*fname, uint64 p, uint32*kblocks, FILE*fp, uin
 				exp[0] = ui256.d0; exp[1] = ui256.d1; exp[2] = ui256.d2; exp[3] = ui256.d3;
 			} else
 				ASSERT(HERE, 0, "Only known-factors < 2^256 supported!");
-			// Raise 3 to the just-computed power; result in 4-limb local-array pow[]:
-			mi64_scalar_modpow_lr(3ull, exp, KNOWN_FACTORS+i, j, pow);
+			// Raise PRP base (usually but not always 3) to the just-computed power; result in 4-limb local-array pow[]:
+			mi64_scalar_modpow_lr(PRP_BASE, exp, KNOWN_FACTORS+i, j, pow);
 			sprintf(cstr,"\tB: R == %s (mod q)\n",&cbuf[convert_mi64_base10_char(cbuf, pow, j, 0)] ); mlucas_fprint(cstr,1);
-			ASSERT(HERE, mi64_getlen(pow,4) == k && mi64_cmp_eq(pow,rem,k), "Full-residue == 3^nsquares (mod q) check fails!");
+			if (mi64_getlen(pow,4) != k || !mi64_cmp_eq(pow,rem,k)) {
+				snprintf_nowarn(cbuf,STR_MAX_LEN,"Full-residue == %u^nsquares (mod q) check fails!", PRP_BASE); mlucas_fprint(cbuf,0);
+				ASSERT(HERE, 0, cbuf);
+			}
 		}
 	}
 #if 0
diff --git a/src/mi64.c b/src/mi64.c
index d0b26e3b..08bb5c88 100755
--- a/src/mi64.c
+++ b/src/mi64.c
@@ -6902,6 +6902,10 @@ printf("\n");
 			rem64 = rem64 + q*(uint64)fquo;
 		} else {
 			fquo = rem64*fqinv;
+			if(fquo >= 0.99999999999999 && fquo < 1.0) { // CXC: Give fquo a tiny push if we are in the 2nd pass, to ensure a 0.9999999999999 value actually gets to 1.0;
+				fprintf(stderr,"WARNING: floating point round-off error, %llu * %1.16f = %1.16f (should be an integer)\n", rem64, fqinv, fquo);
+				fquo += 0.00000000000001; // see https://github.com/primesearch/Mlucas/issues/18
+			}
 			itmp64 += (uint64)fquo;
 			rem64 = rem64 - q*(uint64)fquo;
 		}