diff --git a/README.md b/README.md index 30b00584..f0105262 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Feature | | Mlucas | Prime95/MPrime \- | P-1 | ✔️ | ✔️ \- | P+1 | | ✔️ \- | ECM | | ✔️ -\- | Pépin | ✔️ | +\- | Pépin | ✔️ | ✔️ **PRP** | Proofs | | ✔️ \- | Certs | | ✔️ **Error Checking** | Jacobi | | ✔️ @@ -93,7 +93,7 @@ This README is still in progress. For now, see the original [Mlucas README](http ## Help -See the [help.txt](help.txt) file for usage information. +The [help.txt](help.txt) file includes a variety of usage information not covered in the original [README](https://mersenneforum.org/mayer/README.html), concentrating largely on the Mlucas command line options. A separate documentation page covers [Fermat numbers](docs/Fermat-testing.md). ## Contributing diff --git a/config-fermat.sh b/config-fermat.sh index b6ec7169..b9d33c7b 100644 --- a/config-fermat.sh +++ b/config-fermat.sh @@ -27,12 +27,11 @@ # Mlucas MLUCAS=./Mlucas -# Number of iterations -# use 100, 1000, or 10000 to match pre-computed values +# Number of iterations (use 100, 1000, or 10000 to match pre-computed values) ITERS=100 -# Minimum Fermat number (14 or greater) -MIN=14 +# Minimum Fermat number (15 or greater) +MIN=15 # Maximum Fermat number (33 or less) MAX=29 @@ -40,13 +39,12 @@ MAX=29 # Mlucas arguments ARGS=( "$@" - # Add desired -cpu or -core settings here, or as following arguments, e.g. ../config-fermat.sh -cpu 0:3 - + # Add desired -cpu or -core settings here, or as following arguments, e.g. bash ../config-fermat.sh -cpu 0:3 ) -# First, tiny FFT lengths for F14 to F17; -FFTS=([1]=14 [2]=15 [4]=16 [7]=17 [8]=17) -# Then, from small up to egregious FFTs for F18 to F33. +# First, tiny FFT lengths for F15 to F17 (note 4K is the smallest workable length without fiddly radix settings); +FFTS=([2]=15 [4]=16 [7]=17 [8]=17) +# Then, from small up to egregiously large FFTs for F18 to F33. # The largest FFT reached is 512M, if MAX is set to 33. # Note that large FFTs require considerable runtime at 10000 iterations. for ((n = 0; n < 16; ++n)); do @@ -60,12 +58,11 @@ for ((n = 0; n < 16; ++n)); do # k = 15, 16 should both be supported up to at least F32. FFTS[k * m]=$f if [[ $k -eq 15 && $n -gt 5 ]]; then - # k = 63 is supported for F24 and above (1008K). + # k = 63 is mostly supported for F24 (1008K) and above. FFTS[63 * m >> 2]=$f fi done done - for fft in "${!FFTS[@]}"; do f=${FFTS[fft]} if [[ -n $MIN && $f -lt $MIN ]]; then @@ -75,6 +72,10 @@ for fft in "${!FFTS[@]}"; do fi printf '\n\tTesting F%s (2^%s + 1),\tFFT length: %sK\n\n' "$f" $((1 << f)) "$fft" args=("${ARGS[@]}") + # First we test the very fiddly F15 and then loop over F16 up to maximum + if [[ $f -eq 15 ]]; then + args+=(-radset 8,8,16) + fi if [[ $f -le 17 || $f -ge 32 ]]; then args+=(-shift 0) fi diff --git a/docs/Fermat-testing.md b/docs/Fermat-testing.md new file mode 100644 index 00000000..747517a1 --- /dev/null +++ b/docs/Fermat-testing.md @@ -0,0 +1,189 @@ +# Introduction to Fermat number testing with Mlucas + +Mlucas is a powerful Mersenne primality testing and factoring package which supports Lucas–Lehmer +testing, Fermat probable primality testing, and Pollard _p_–1 factorisation; justly, most applications of the +software have concentrated on examination of the Mersenne numbers, however Mlucas is also capable of +testing the primality of Fermat numbers, testing the compositeness of Fermat cofactors, and running _p_–1 +factorisation on Fermat numbers. This document is aimed at filling in several lacunae in the Mlucas +documentation with respect to Fermat number testing. + +## Mlucas build notes + +The Fermat modular squaring code is architecture-specific, ideally requiring Mlucas to be built with one of +the SIMD modes (AVX, AVX2, SSE2, etc). This should be handled automatically by the `makemake.sh` script +for most hardware, and with the exception of 64-bit ARM (for which workarounds may be available) the default +build should be capable of testing Fermat numbers. + +For example, Apple Silicon is ARM which supports ASIMD, meaning a native Mlucas build cannot test Fermat numbers. One +possible answer would be to build an SSE2 Mlucas on an Intel-based Mac, which should be capable of running under +Rosetta emulation on Apple Silicon, at the penalty of a significant performance hit for larger exponents. + +(When compiling on Intel with the intention of building a compatible version for Apple Silicon, the Makefile +requires static linking of the GMP library, by the following alteration to the linker flags: +`LDLIBS = -Bstatic ${LD_ARGS[@]} -Bdynamic` +Static linking of libraries is usually not recommended, and this may not be a solution in all possible cases.) + +## Fermat self-testing and populating `fermat.cfg` + +Assuming you have a working build, Mlucas allows any Fermat number _Fm_ = 22_m_ + 1 with exponent _m_ in +the range [13,63] to be entered as an argument for self-testing, by using the `-f` flag like so: + +`./Mlucas -f -iters [-fft ]` + +However, the software only provides fast Fourier transforms (FFTs) up to 512M in length, which further +limits the largest Fermat number that may be tested, up to _F_33. + +The `-iters` flag is mandatory and should be followed by a number less than a million; the recommended +values for `-iters` to be set to are 100, 1000, or 10000, since the self-testing code has correct residues +for all Fermat numbers [14,33] saved, and any error in the test result will be immediately discovered. + +Mlucas does not support Fermat testing for all possible FFT lengths. Generally, FFTs for testing Mersenne +numbers are available for any length _k_·2_n_ in kilobytes, where _k_ = [8,15], _n_ ≥ 0. +In the case of Fermat numbers however, only the subset _k_ = {2, 7, 15} supports Fermat testing, along with +_k_ = 63 when _n_ ≥ 4. + +When self-testing Mlucas for Mersenne numbers, a command such as `./Mlucas -s all` configures +Mlucas for the majority of FFT lengths and saves the results in `mlucas.cfg`. A similar process must be +carried out for the Fermat numbers, using the `./Mlucas -f` command above, which saves results in the +file `fermat.cfg`. + +A script [`config-fermat.sh`](https://github.com/primesearch/Mlucas/blob/main/config-fermat.sh) has been written to automate the generation of the `fermat.cfg` file using +the set of possible FFT lengths. After the license, the next few lines declare some variables which may +be customised as desired (for example, 10000 iterations will require significant runtime at the largest +possible FFT sizes): +```bash +# Mlucas +MLUCAS=./Mlucas + +# Number of iterations (use 100, 1000, or 10000 to match pre-computed values) +ITERS=100 + +# Minimum Fermat number (15 or greater) +MIN=15 + +# Maximum Fermat number (33 or less) +MAX=29 +``` +The script should be invoked from the build directory, typically `obj`, with the following command, which +may include as parameters any cpu-specific options, such as `-core` or `-cpu`: +``` +bash ../config-fermat.sh -cpu 0:3 +``` +Once self-testing has occurred, production work on Fermat numbers may be carried out using a `worktodo.txt` +file (in previous versions of Mlucas this file was named `worktodo.ini`) with entries in one of the several +formats below. + +Only assignments for ECM factoring of Fermat numbers are distributed by Primenet, so work assignments +cannot be obtained for Fermat numbers using the `primenet.py` script. However _p_–1 results for Fermat +numbers up to _F_29 may be submitted to [mersenne.org](https://www.mersenne.org/manual_result/) as Manual results. + +## Primality testing: Pépin’s test + +For the Pépin test, the `worktodo` format is simply: +``` +Fermat,Test= +``` + +Fermat numbers in the range [13,22] may select a non-optimal FFT length by default in production mode. +If this is the case, the FFT may be overridden using the command: +``` +./Mlucas -fft FFT_LENGTH -shift 0 +``` +The optimal FFT lengths vary from 2K up to 512M, as shown in the table below under [testable Fermat numbers](#the-testable-fermat-numbers-fm--22m--1). + +Currently, all Fermat numbers up to _F_30 have received a [Pépin test](https://www.mersenneforum.org/showthread.php?t=18748), and moreover _F_31 and _F_32 are known +to be composite. However, the character of their cofactors is unknown so a Pépin test would be a +necessary prerequisite for testing them; and as for _F_33, this is yet to be +tested for primality. + +The Pépin test uses Gerbicz error checking (GEC) to assure the reliability of the computation, however +residue shifting is not implemented for Fermat modular squaring with GEC. Thus when invoking Mlucas the +flag `-shift 0` _must_ be added to the Mlucas command line: +``` +./Mlucas -shift 0 +``` +If non-zero residue shifting is used, the Pépin test will not be able to progress beyond the millionth +iteration (the default interval for GEC) as error checking will be unable to confirm whether or not the +computation is correct. + +## Cofactor compositeness testing: Suyama’s test +Suyama’s test is a Fermat probable primality test on the cofactor of a Fermat number. As a prerequisite, +you _must_ already have run a Pépin test, which should have saved a final residue as a file named e.g. `f23` +for a Pépin test of _F_23. Do **not** try to run a Suyama test as a single work type incorporating the preceding +Pépin test. + +The `worktodo` format for the Suyama test is: +``` +PRP=N/A,1,2,<2^m>,+1,99,0,3,5,"known_factor_1[,known_factor_2,...]" +``` + +Note that this work format uses the numeric value of 2_m_ of a Fermat number _Fm_, rather than just the +exponent _m_. At least one known prime factor must be supplied; composite factors are disallowed. + +Prior to testing, Mlucas tries an “integrity check” on each known factor, which has the effect of testing +whether the Pépin residue _R_ has been correctly calculated. This calculation can be performed at any point +up to the final iteration. More information is available [here](https://github.com/primesearch/Mlucas/issues/4). + +## Pollard _p_–1 factoring + +Work entries for _p_–1 factoring have two variations, however only the `Pminus1` format is supported for +Fermat numbers: +``` +Pminus1=N/A,1,2,<2^m>,+1,B1,B2[,trial_factoring_bits][,B2_start][,"known_factors"] +``` + +The same note as regards the exponent for the Suyama test applies here also; however supplying known, prime factors +is optional. The `-shift 0` flag is again a necessary addition to the Mlucas command line. + +The `B2_start` variable supports breaking up stage 2 of the algorithm among multiple instances of Mlucas. + +## Fermat number `worktodo.txt` examples + +An example of each of the work types, Pépin and Suyama tests, and Pollard _p_–1 factoring: +``` +Fermat,Test=23 +PRP=N/A,1,2,1073741824,+1,99,0,3,5,"640126220763137,1095981164658689" +Pminus1=N/A,1,2,8589934592,+1,10000000,10000000 +``` +Note the _p_–1 test of _F_33 (as actually [performed](https://www.mersenneforum.org/showthread.php?t=29183) by Ernst Mayer) +used the same _B1_ and _B2_ bounds to run only the first stage of the algorithm. + +## The testable Fermat numbers _Fm_ = 22_m_ + 1 + +The following table lists the default FFT lengths selected by Mlucas for the Fermat numbers in the range +[13,33] and the known factors, required for some of the test types above. + + _m_| 2_m_|Default FFT |Optimal FFTs |known_factor(s) +--|---------:|-----------:|----------------:|-------------------------------------------------------------------- +13| 8192| 1K\*| 2K|`"2710954639361,2663848877152141313,3603109844542291969,319546020820551643220672513"` +14| 16384| 1K\*| 2K, 4K|`"116928085873074369829035993834596371340386703423373313"` +15| 32768| 2K | 2K, 4K|`"1214251009,2327042503868417,168768817029516972383024127016961"` +16| 65536| 3K\*| 4K|`"825753601,188981757975021318420037633"` +17| 131072| 6K\*, 7K | 7K, 8K|`"31065037602817,7751061099802522589358967058392886922693580423169"` +18| 262144| 13K\*| 14K, 15K, 16K|`"13631489,81274690703860512587777"` +19| 524288| 26K\*| 28K, 30K, 32K|`"70525124609,646730219521,37590055514133754286524446080499713"` +20| 1048576| 52K\*| 56K, 60K, 64K| +21| 2097152| 104K\*| 112K, 120K, 128K|`"4485296422913"` +22| 4194304| 208K\*| 224K, 240K, 256K|`"64658705994591851009055774868504577"` +23| 8388608| 448K | 448K, 480K, 512K|`"167772161"` +24| 16777216| 896K | 896K, 960K, 1008K, 1M| +25| 33554432| 1792K | 1792K, 1920K, 2M|`"25991531462657,204393464266227713,2170072644496392193"` +26| 67108864| 3584K | 3584K, 3840K, 4032K, 4M|`"76861124116481"` +27| 134217728| 7M, 7.5M | 7M, 7.5M, 7.875M, 8M|`"151413703311361,231292694251438081"` +28| 268435456| 15M | 14M, 15M, 15.75M, 16M|`"1766730974551267606529"` +29| 536870912| 30M | 30M, 31.5M, 32M|`"2405286912458753"` +30|1073741824| 60M | 60M, 63M, 64M|`"640126220763137,1095981164658689"` +31|2147483648| 120M | 120M, 126M, 128M|`"46931635677864055013377"` +32|4294967296| 256M | 240M, 252M, 256M|`"25409026523137"` +33|8589934592| 512M | 504M, 512M| + +\* These default FFTs selected by Mlucas have radices that cannot be used for Fermat modular squaring; +thus _F_13 to _F_22 cannot be tested with Mlucas without overriding the default FFT selected, _e.g._: +``` +./Mlucas -fft 224K -shift 0 +``` +As mentioned above at [Mlucas build notes](#mlucas-build-notes), building Mlucas is highly architecture-specific, and some +build types will not support all of the optimal FFT lengths. + +Finally, 2K appears to be the smallest usable FFT for the smallest testable Fermat number _F_13; +the 4K FFT seems to be more reliable on a wider range of systems for the next larger Fermat numbers _F_14 to _F_16. diff --git a/src/Mlucas.c b/src/Mlucas.c index 732a443f..ea0746c0 100644 --- a/src/Mlucas.c +++ b/src/Mlucas.c @@ -695,7 +695,7 @@ with the default #threads = 1 and affinity set to logical core 0, unless user ov TEST_TYPE = TEST_TYPE_PRP; } else { // PRP double-check: // NB: Hit a gcc compiler bug (which left i = 0 for e.g. char_addr = ", 3 ,...") using -O0 here ... clang compiled correctly, as did gcc -O1: - i = (int)strtol(char_addr+1, &cptr, 10); ASSERT(HERE, i == 3,"PRP-test base must be 3!"); + i = (int)strtol(char_addr+1, &cptr, 10); // PRP bases other than 3 allowed; see https://github.com/primesearch/Mlucas/issues/18 // ASSERT(HERE, i == 3,"PRP-test base must be 3!"); PRP_BASE = i; ASSERT(HERE, (char_addr = strstr(cptr, ",")) != 0x0,"Expected ',' not found in assignment-specifying line!"); i = (int)strtol(char_addr+1, &cptr, 10); ASSERT(HERE, i == 1 || i == 5,"Only PRP-tests of type 1 (PRP-only) and type 5 (PRP and subsequent cofactor-PRP check) supported!"); @@ -705,6 +705,7 @@ with the default #threads = 1 and affinity set to logical core 0, unless user ov // Use 0-or-not-ness of KNOWN_FACTORS[0] to differentiate between PRP-only and PRP-CF: if(KNOWN_FACTORS[0] != 0ull) { ASSERT(HERE, i == 5,"Only PRP-CF tests of type 5 supported!"); + if (MODULUS_TYPE == MODULUS_TYPE_FERMAT) ASSERT(HERE, PRP_BASE == 3, "PRP-CF test base for Fermat numbers must be 3!"); } } goto GET_EXPO; @@ -1201,7 +1202,7 @@ with the default #threads = 1 and affinity set to logical core 0, unless user ov #ifdef USE_ARM_V8_SIMD ASSERT(HERE, 0, "ARMv8 SIMD builds do not support Fermat-number testing!"); #endif - ASSERT(HERE,findex >= 14 && findex < 64, "Fermat number index must be in range [14,64]!\n"); + ASSERT(HERE,findex >= 13 && findex < 64, "Fermat number index must be in range [13,63]!\n"); // This takes care of the number-to-char conversion and leading-whitespace-removal // in one step - use PSTRING for temporary storage here: strcpy(ESTRING, &PSTRING[convert_uint64_base10_char(PSTRING, (uint64)findex)]); @@ -2432,7 +2433,7 @@ with the default #threads = 1 and affinity set to logical core 0, unless user ov // by base and check that remainder 0. Note that we do want the quotient now, as that is our reside/base: mi64_div(c_uint64_ptr, &itmp64, j+1,1, arrtmp,&rmodb); ASSERT(HERE, rmodb == 0ull,"After short-div, R != 0 (mod B)"); // And recompute the S-H residues: - res_SH(arrtmp,i,&Res64,&Res35m1,&Res36m1); + res_SH(arrtmp,j,&Res64,&Res35m1,&Res36m1); // Now that residue is standard Fermat-PRP-test one, check if == 1: isprime = (arrtmp[0] == 1ull); if(isprime) { // Check the arrtmp[] array for non-zero elements; see https://github.com/primesearch/Mlucas/issues/15 @@ -2478,8 +2479,8 @@ with the default #threads = 1 and affinity set to logical core 0, unless user ov gm_time = gmtime(&calendar_time); if(!gm_time) // If UTC not available for some reason, just substitute the local time: gm_time = localtime(&calendar_time); - // Want 'UTC' instead of 'GMT', so include that in lieu of the %Z format specifier - strftime(timebuffer,SIZE,"%Y-%m-%d %H:%M:%S UTC",gm_time); + // Want 'UTC' instead of 'GMT', so include that in lieu of the %Z format specifier (but also see https://www.mersenneforum.org/showthread.php?p=653672#post653672 as UTC is actually redundant) + strftime(timebuffer,SIZE,"%Y-%m-%d %H:%M:%S",gm_time); // Trio of p-1 fields all 0; cstr holds the formatted output line here. Need to differentiate // between this PRP-CF result and the preceding PRP; set the otherwise-unused s2_partial flag: generate_JSON_report(isprime,p,n,Res64,Res2048,timebuffer, 0,0ull,0x0,s2_partial, cstr); @@ -3313,6 +3314,8 @@ uint32 Suyama_CF_PRP(uint64 p, uint64*Res64, uint32 nfac, double a[], double b[] /*A*/ ierr = func_mod_square(a, (int*)ci, n, ilo,ihi, 0ull, p, scrnFlag, tdiff, TRUE, 0x0); convert_res_FP_bytewise(a, (uint8*)ci, n, p, Res64, &Res35m1, &Res36m1); // Overwrite passed-in Pepin-Res64 with Fermat-PRP one snprintf_nowarn(cbuf,STR_MAX_LEN,"MaxErr = %10.9f\n",MME); mlucas_fprint(cbuf,1); + } else if (MODULUS_TYPE == MODULUS_TYPE_MERSENNE) { // Mersenne PRP-CF doesn't have the Res35m1 or Res36m1 values passed in, + res_SH(ci,n,&itmp64,&Res35m1,&Res36m1); // so we refresh these; see https://github.com/primesearch/Mlucas/issues/27 } if(ierr) { snprintf_nowarn(cbuf,STR_MAX_LEN,"Error of type[%u] = %s in mod-squaring ... aborting\n",ierr,returnMlucasErrCode(ierr)); @@ -3839,6 +3842,7 @@ struct testFerm FermVec[numFerm+1] = /* FFTlen(K) Fidx Res64 mod 2^35-1 mod 2^36-1 Res64 mod 2^35-1 mod 2^36-1 */ /* ------- ---- ---------------- ----------- ----------- ---------------- ----------- ----------- */ /* [%34359738367 ][%68719476735 ] [%34359738367 ][%68719476735 ] */ +// { 2, 13u, { {0xC8FC67EA3A1AC788ull, 29592689237ull, 35156594447ull}, {0xBE489C2CF00D582Aull, 3108135315ull, 47125592449ull}, {0x0ull, 0ull, 0ull} } }, { 1, 14u, { {0xDB9AC520C403CB21ull, 342168579ull, 59244817440ull}, {0xF111F12732CCCB0Full, 24848612524ull, 66609820796ull}, {0x78738D068D641C2Cull, 12664273769ull, 29297626750ull} } }, { 2, 15u, { {0x3B21A6E55ED13454ull, 28379302213ull, 15546218647ull}, {0x4784657F2A36BE74ull, 617376037ull, 44891093359ull}, {0x589BFE53458FFC14ull, 12200390606ull, 46971422957ull} } }, { 4, 16u, { {0xAAE76C15C2B37465ull, 20013824731ull, 2261076122ull}, {0x42CC2CBE97C728E6ull, 30814966349ull, 44505312792ull}, {0xEED00D8AE6886440ull, 19057922301ull, 53800020279ull} } }, @@ -4305,8 +4309,8 @@ just below the upper limit for each FFT lengh in some subrange of the self-tests ASSERT(HERE, !(i64arg>>32), "Fermat-number-index argument must be < 2^32 ... halting."); findex = (uint32)i64arg; /* Make sure the Fermat number index is in range: */ - if(findex < 14 || findex > 63) { - fprintf(stderr, " Fermat number index must be in the range [14,63].\n"); + if(findex < 13 || findex > 63) { + fprintf(stderr, " Fermat number index must be in the range [13,63].\n"); return ERR_EXPONENT_ILLEGAL; } userSetExponent = 1; @@ -5265,10 +5269,13 @@ int read_ppm1_savefiles(const char*fname, uint64 p, uint32*kblocks, FILE*fp, uin exp[0] = ui256.d0; exp[1] = ui256.d1; exp[2] = ui256.d2; exp[3] = ui256.d3; } else ASSERT(HERE, 0, "Only known-factors < 2^256 supported!"); - // Raise 3 to the just-computed power; result in 4-limb local-array pow[]: - mi64_scalar_modpow_lr(3ull, exp, KNOWN_FACTORS+i, j, pow); + // Raise PRP base (usually but not always 3) to the just-computed power; result in 4-limb local-array pow[]: + mi64_scalar_modpow_lr(PRP_BASE, exp, KNOWN_FACTORS+i, j, pow); sprintf(cstr,"\tB: R == %s (mod q)\n",&cbuf[convert_mi64_base10_char(cbuf, pow, j, 0)] ); mlucas_fprint(cstr,1); - ASSERT(HERE, mi64_getlen(pow,4) == k && mi64_cmp_eq(pow,rem,k), "Full-residue == 3^nsquares (mod q) check fails!"); + if (mi64_getlen(pow,4) != k || !mi64_cmp_eq(pow,rem,k)) { + snprintf_nowarn(cbuf,STR_MAX_LEN,"Full-residue == %u^nsquares (mod q) check fails!", PRP_BASE); mlucas_fprint(cbuf,0); + ASSERT(HERE, 0, cbuf); + } } } #if 0 diff --git a/src/mi64.c b/src/mi64.c index d0b26e3b..08bb5c88 100755 --- a/src/mi64.c +++ b/src/mi64.c @@ -6902,6 +6902,10 @@ printf("\n"); rem64 = rem64 + q*(uint64)fquo; } else { fquo = rem64*fqinv; + if(fquo >= 0.99999999999999 && fquo < 1.0) { // CXC: Give fquo a tiny push if we are in the 2nd pass, to ensure a 0.9999999999999 value actually gets to 1.0; + fprintf(stderr,"WARNING: floating point round-off error, %llu * %1.16f = %1.16f (should be an integer)\n", rem64, fqinv, fquo); + fquo += 0.00000000000001; // see https://github.com/primesearch/Mlucas/issues/18 + } itmp64 += (uint64)fquo; rem64 = rem64 - q*(uint64)fquo; }