Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed assorted compiler warnings #22

Merged
merged 5 commits into from
Aug 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Mdata.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ extern char PSTRING[STR_MAX_LEN]; /* Number being tested in string form, typical
#endif

extern const int hex_chars[16];
extern char cbuf[STR_MAX_LEN], cstr[STR_MAX_LEN];
extern char cbuf[STR_MAX_LEN*2], cstr[STR_MAX_LEN];
extern char in_line[STR_MAX_LEN];
extern char *char_addr;
extern int char_offset;
Expand Down
898 changes: 449 additions & 449 deletions src/Mlucas.c

Large diffs are not rendered by default.

Empty file modified src/Mlucas.h
100755 → 100644
Empty file.
8 changes: 4 additions & 4 deletions src/br.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ void print_pow2_twiddles(const uint32 n, const uint32 p, const uint32 q)
const char csigns[2] = {'+','-'};
const char re_im[2] = {'c','s'};
char prefix[3]; // 0-slot for overall sign; 1 for complex operator * [Re / Im interchange], 2 for ~ [complex conjugation].
ASSERT(HERE, n == (1<<lgn), "n not a power of 2!");
ASSERT(HERE, n == p*q, "n != p*q!");
ASSERT(n == (1<<lgn), "n not a power of 2!");
ASSERT(n == p*q, "n != p*q!");
printf("Fundamental-root powers for %d x %d impl of radix-%d DFT:\n",p,q,n);
for(i = 1; i < p; i++) { // Skip 0-row, since those roots = 1
ir = reverse(i,lgp);
Expand Down Expand Up @@ -231,7 +231,7 @@ void bit_reverse_int(int vec[], int n, int nradices, int radix[], int incr, int*
/* If no scratch-space array provided, create one locally: */
if(arr_scratch) {
/* Don't allow reuse of main array for inits at this time: */
ASSERT(HERE, &vec[0] != &arr_scratch[0], "Array re-use not currently supported!");
ASSERT(&vec[0] != &arr_scratch[0], "Array re-use not currently supported!");
tmp = arr_scratch;
} else {
tmp = (int *)malloc(n*sizeof(int));
Expand All @@ -254,7 +254,7 @@ void bit_reverse_int(int vec[], int n, int nradices, int radix[], int incr, int*
i += incr;
}
printf("] != vector length [%u] in BIT_REVERSE_INT\n",n);
ASSERT(HERE,0,"Exiting.");
ASSERT(0,"Exiting.");
}

/*...We don't use the final radix for the bit reversal, we simply need it for array bounds checking. */
Expand Down
20 changes: 10 additions & 10 deletions src/carry_dbg.h
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ printf("WARN: frac = %10.8f occurred in Re(a[%2u]) at j = %10d\n",frac,j,0);\
cy = (temp*baseinv[i] + RND_A) - RND_B;\
check_nint(cy, temp*baseinv[i]);\
x = (temp-cy*base[i]);\
ASSERT(HERE, fabs(x+x) <= base[i], "X-output out of range!");\
ASSERT(fabs(x+x) <= base[i], "X-output out of range!");\
x *= wt;\
\
bjmodn -= sw;\
Expand All @@ -496,7 +496,7 @@ printf("WARN: frac = %10.8f occurred in Im(a[%2u]) at j = %10d\n",frac,j,0);\
cy = (temp*baseinv[i] + RND_A) - RND_B;\
check_nint(cy, temp*baseinv[i]);\
y = (temp-cy*base[i]);\
ASSERT(HERE, fabs(y+y) <= base[i], "Y-output out of range!");\
ASSERT(fabs(y+y) <= base[i], "Y-output out of range!");\
y *= wt;\
\
bjmodn -= sw;\
Expand Down Expand Up @@ -528,7 +528,7 @@ printf("WARN: frac = %10.8f occurred in Re(a[%2u]) at j = %10d\n",frac,j,set);\
cy = (temp*baseinv[i] + RND_A) - RND_B;\
check_nint(cy, temp*baseinv[i]);\
x = (temp-cy*base[i]);\
ASSERT(HERE, fabs(x+x) <= base[i], "X-output out of range!");\
ASSERT(fabs(x+x) <= base[i], "X-output out of range!");\
x *= wt;\
\
bjmodn -= sw;\
Expand All @@ -554,7 +554,7 @@ printf("WARN: frac = %10.8f occurred in Im(a[%2u]) at j = %10d\n",frac,j,set);\
cy = (temp*baseinv[i] + RND_A) - RND_B;\
check_nint(cy, temp*baseinv[i]);\
y = (temp-cy*base[i]);\
ASSERT(HERE, fabs(y+y) <= base[i], "Y-output out of range!");\
ASSERT(fabs(y+y) <= base[i], "Y-output out of range!");\
y *= wt;\
\
bjmodn -= sw;\
Expand Down Expand Up @@ -721,7 +721,7 @@ sign = ix & himask;\
mant = ix & mmask;\
dexp = (ix-sign)>>52;\
shift = 1074 - dexp;\
/*if(j1==0)printf("0xmant,shift,bits = %20llX %10d %10u\n",mant,shift,bits);*/\
/*if(j1==0)printf("0xmant,shift,bits = %20" PRIX64 " %10d %10u\n",mant,shift,bits);*/\
if(shift<0)printf("WARN: j1 = %10d %20.15e gives negative shift count = %10d\n",j1,x,shift);\
if(shift < 52)\
{\
Expand All @@ -730,15 +730,15 @@ ifrac = mant << (63-shift);\
if(ifrac > ifracmax) ifracmax=ifrac;\
mant += ((uint64)1)<<shift;\
mant = (mant+two52)>>(shift+1);\
/*if(j1==0)printf("A: 0xmant = %20llX\n",mant);*/\
/*if(j1==0)printf("A: 0xmant = %20" PRIX64 "\n",mant);*/\
mant -= (mant & sign)<<1;\
/*if(j1==0)printf("B: 0xmant = %20llX\n",mant);*/\
/*if(j1==0)printf("B: 0xmant = %20" PRIX64 "\n",mant);*/\
word = mant & (~(ones << bits));\
/*if(j1==0)printf("C: 0xword = %20llX\n",word);*/\
/*if(j1==0)printf("C: 0xword = %20" PRIX64 "\n",word);*/\
topbit= word >> (bits - 1);\
/*if(j1==0)printf("D: 0xtbit = %20llX\n",topbit);*/\
/*if(j1==0)printf("D: 0xtbit = %20" PRIX64 "\n",topbit);*/\
word -= topbit << bits;\
/*if(j1==0)printf("E: 0xword = %20llX\n",word);*/\
/*if(j1==0)printf("E: 0xword = %20" PRIX64 "\n",word);*/\
x = wt*(double)word;\
cy = (double)( (mant >> bits) + topbit );\
/*if(j1==0)printf("%20.4f %20.4f\n",x,cy);*/\
Expand Down
58 changes: 29 additions & 29 deletions src/dft_macro.c
Original file line number Diff line number Diff line change
Expand Up @@ -3396,18 +3396,18 @@ in the same order here as DIF, but the in-and-output-index offsets are BRed: j1
if(thr_id == -1) // Value of init stores #threads
{
if(init <= max_threads) { // Previously inited with sufficient #threads
ASSERT(HERE, sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
ASSERT(sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
return;
}
max_threads = init;
#ifndef COMPILER_TYPE_GCC
ASSERT(HERE, NTHREADS == 1, "Multithreading currently only supported for GCC builds!");
ASSERT(NTHREADS == 1, "Multithreading currently only supported for GCC builds!");
#endif
if(sc_arr) { free((void *)sc_arr); }
// 126 slots for DFT-63 data, 22 for DFT-7,9 consts and DFT-7 pads, 4 to allow for alignment = 152:
sc_arr = ALLOC_VEC_DBL(sc_arr, 152*max_threads); if(!sc_arr){ sprintf(cbuf, "ERROR: unable to allocate sc_arr!.\n"); fprintf(stderr,"%s", cbuf); ASSERT(HERE, 0,cbuf); }
sc_arr = ALLOC_VEC_DBL(sc_arr, 152*max_threads); if(!sc_arr){ sprintf(cbuf, "ERROR: unable to allocate sc_arr!.\n"); fprintf(stderr,"%s", cbuf); ASSERT(0,cbuf); }
sc_ptr = ALIGN_VEC_DBL(sc_arr);
ASSERT(HERE, ((intptr_t)sc_ptr & 0x3f) == 0, "sc_ptr not 64-byte aligned!");
ASSERT(((intptr_t)sc_ptr & 0x3f) == 0, "sc_ptr not 64-byte aligned!");

#ifdef MULTITHREAD
__r0 = tdat = sc_ptr; tmp = tdat + 126;
Expand Down Expand Up @@ -3491,12 +3491,12 @@ in the same order here as DIF, but the in-and-output-index offsets are BRed: j1
#endif
return;
} else {
ASSERT(HERE, sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
ASSERT(sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
} /* end of inits */

/* If multithreaded, set the local-store pointers needed for the current thread; */
#ifdef MULTITHREAD
ASSERT(HERE, (uint32)thr_id < (uint32)max_threads, "Bad thread ID!");
ASSERT((uint32)thr_id < (uint32)max_threads, "Bad thread ID!");
tdat = __r0 + thr_id*152; tmp = tdat + 126;
two = tmp + 0x0;
one = tmp + 0x1;
Expand Down Expand Up @@ -3645,18 +3645,18 @@ in the same order here as DIF, but the in-and-output-index offsets are BRed: j1
if(thr_id == -1) // Value of init stores #threads
{
if(init <= max_threads) { // Previously inited with sufficient #threads
ASSERT(HERE, sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
ASSERT(sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
return;
}
max_threads = init;
#ifndef COMPILER_TYPE_GCC
ASSERT(HERE, NTHREADS == 1, "Multithreading currently only supported for GCC builds!");
ASSERT(NTHREADS == 1, "Multithreading currently only supported for GCC builds!");
#endif
if(sc_arr) { free((void *)sc_arr); }
// 126 slots for DFT-63 data, 22 for DFT-7,9 consts and DFT-7 pads, 4 to allow for alignment = 152:
sc_arr = ALLOC_VEC_DBL(sc_arr, 152*max_threads); if(!sc_arr){ sprintf(cbuf, "ERROR: unable to allocate sc_arr!.\n"); fprintf(stderr,"%s", cbuf); ASSERT(HERE, 0,cbuf); }
sc_arr = ALLOC_VEC_DBL(sc_arr, 152*max_threads); if(!sc_arr){ sprintf(cbuf, "ERROR: unable to allocate sc_arr!.\n"); fprintf(stderr,"%s", cbuf); ASSERT(0,cbuf); }
sc_ptr = ALIGN_VEC_DBL(sc_arr);
ASSERT(HERE, ((intptr_t)sc_ptr & 0x3f) == 0, "sc_ptr not 64-byte aligned!");
ASSERT(((intptr_t)sc_ptr & 0x3f) == 0, "sc_ptr not 64-byte aligned!");

#ifdef MULTITHREAD
__r0 = tdat = sc_ptr; tmp = tdat + 126;
Expand Down Expand Up @@ -3740,12 +3740,12 @@ in the same order here as DIF, but the in-and-output-index offsets are BRed: j1
#endif
return;
} else {
ASSERT(HERE, sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
ASSERT(sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
} /* end of inits */

/* If multithreaded, set the local-store pointers needed for the current thread; */
#ifdef MULTITHREAD
ASSERT(HERE, (uint32)thr_id < (uint32)max_threads, "Bad thread ID!");
ASSERT((uint32)thr_id < (uint32)max_threads, "Bad thread ID!");
tdat = __r0 + thr_id*152; tmp = tdat + 126;
two = tmp + 0x0;
one = tmp + 0x1;
Expand Down Expand Up @@ -3877,17 +3877,17 @@ in the same order here as DIF, but the in-and-output-index offsets are BRed: j1
if(thr_id == -1) // Value of init stores #threads
{
if(init <= max_threads) { // Previously inited with sufficient #threads
ASSERT(HERE, sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
ASSERT(sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
return;
}
max_threads = init;
#ifndef COMPILER_TYPE_GCC
ASSERT(HERE, NTHREADS == 1, "Multithreading currently only supported for GCC builds!");
ASSERT(NTHREADS == 1, "Multithreading currently only supported for GCC builds!");
#endif
if(sc_arr) { free((void *)sc_arr); }
sc_arr = ALLOC_VEC_DBL(sc_arr, 0x32*max_threads); if(!sc_arr){ sprintf(cbuf, "ERROR: unable to allocate sc_arr!.\n"); fprintf(stderr,"%s", cbuf); ASSERT(HERE, 0,cbuf); }
sc_arr = ALLOC_VEC_DBL(sc_arr, 0x32*max_threads); if(!sc_arr){ sprintf(cbuf, "ERROR: unable to allocate sc_arr!.\n"); fprintf(stderr,"%s", cbuf); ASSERT(0,cbuf); }
sc_ptr = ALIGN_VEC_DBL(sc_arr);
ASSERT(HERE, ((intptr_t)sc_ptr & 0x3f) == 0, "sc_ptr not 64-byte aligned!");
ASSERT(((intptr_t)sc_ptr & 0x3f) == 0, "sc_ptr not 64-byte aligned!");

#ifdef MULTITHREAD
__r0 = tmp = sc_ptr;
Expand Down Expand Up @@ -3925,7 +3925,7 @@ in the same order here as DIF, but the in-and-output-index offsets are BRed: j1
// VEC_DBL_INIT(sqrt2, SQRT2); VEC_DBL_INIT(tmp, ISRT2);
VEC_DBL_INIT(nisrt2,-dtmp);
VEC_DBL_INIT( isrt2, dtmp); // Copies of +ISRT2 needed for 30-asm-macro-operand-GCC-limit workaround:
VEC_DBL_INIT( cc0, 1.0); VEC_DBL_INIT( ss0, 0.0); // tmp = cc0-1; ASSERT(HERE, tmp->d0 == ISRT2 && tmp->d1 == ISRT2, "tmp->d0,1 != ISRT2"); Disable to allow "round down" variant
VEC_DBL_INIT( cc0, 1.0); VEC_DBL_INIT( ss0, 0.0); // tmp = cc0-1; ASSERT(tmp->d0 == ISRT2 && tmp->d1 == ISRT2, "tmp->d0,1 != ISRT2"); Disable to allow "round down" variant
VEC_DBL_INIT( cc1, c64_1); VEC_DBL_INIT( ss1, s64_1); tmp = cc1-1; VEC_DBL_INIT(tmp, dtmp);
VEC_DBL_INIT( cc2, c32_1); VEC_DBL_INIT( ss2, s32_1); tmp = cc2-1; VEC_DBL_INIT(tmp, dtmp);
VEC_DBL_INIT( cc3, c64_3); VEC_DBL_INIT( ss3, s64_3); tmp = cc3-1; VEC_DBL_INIT(tmp, dtmp);
Expand Down Expand Up @@ -4013,7 +4013,7 @@ in the same order here as DIF, but the in-and-output-index offsets are BRed: j1
// VEC_DBL_INIT(sqrt2, SQRT2); VEC_DBL_INIT(tmp, ISRT2);
VEC_DBL_INIT(nisrt2,-dtmp);
VEC_DBL_INIT( isrt2, dtmp); // Copies of +ISRT2 needed for 30-asm-macro-operand-GCC-limit workaround:
VEC_DBL_INIT( cc0, 1.0); VEC_DBL_INIT( ss0, 0.0); // tmp = cc0-1; ASSERT(HERE, tmp->d0 == ISRT2 && tmp->d1 == ISRT2, "tmp->d0,1 != ISRT2"); Disable to allow "round down" variant
VEC_DBL_INIT( cc0, 1.0); VEC_DBL_INIT( ss0, 0.0); // tmp = cc0-1; ASSERT(tmp->d0 == ISRT2 && tmp->d1 == ISRT2, "tmp->d0,1 != ISRT2"); Disable to allow "round down" variant
VEC_DBL_INIT( cc1, c64_1); VEC_DBL_INIT( ss1, s64_1); tmp = cc1-1; VEC_DBL_INIT(tmp, dtmp);
VEC_DBL_INIT( cc2, c32_1); VEC_DBL_INIT( ss2, s32_1); tmp = cc2-1; VEC_DBL_INIT(tmp, dtmp);
VEC_DBL_INIT( cc3, c64_3); VEC_DBL_INIT( ss3, s64_3); tmp = cc3-1; VEC_DBL_INIT(tmp, dtmp);
Expand All @@ -4032,12 +4032,12 @@ in the same order here as DIF, but the in-and-output-index offsets are BRed: j1
// fprintf(stderr, "Init SSE2_RADIX_64_DIF with max_threads = %d\n",max_threads);
return;
} else {
ASSERT(HERE, sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
ASSERT(sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
} /* end of inits */

/* If multithreaded, set the local-store pointers needed for the current thread; */
#ifdef MULTITHREAD
ASSERT(HERE, (uint32)thr_id < (uint32)max_threads, "Bad thread ID!");
ASSERT((uint32)thr_id < (uint32)max_threads, "Bad thread ID!");
tmp = __r0 + thr_id*0x32;
// To support FMA versions of the radix-8 macros used to build radix-64 we insert a standalone copy of the [2,1,sqrt2,isrt2] quartet:
two = tmp + 0; // AVX+ versions of various DFT macros assume consts 2.0,1.0,isrt2 laid out thusly
Expand Down Expand Up @@ -4280,17 +4280,17 @@ in the same order here as DIF, but the in-and-output-index offsets are BRed: j1
if(thr_id == -1) // Value of init stores #threads
{
if(init <= max_threads) { // Previously inited with sufficient #threads
ASSERT(HERE, sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
ASSERT(sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
return;
}
max_threads = init;
#ifndef COMPILER_TYPE_GCC
ASSERT(HERE, NTHREADS == 1, "Multithreading currently only supported for GCC builds!");
ASSERT(NTHREADS == 1, "Multithreading currently only supported for GCC builds!");
#endif
if(sc_arr) { free((void *)sc_arr); }
sc_arr = ALLOC_VEC_DBL(sc_arr, 0x32*max_threads); if(!sc_arr){ sprintf(cbuf, "ERROR: unable to allocate sc_arr!.\n"); fprintf(stderr,"%s", cbuf); ASSERT(HERE, 0,cbuf); }
sc_arr = ALLOC_VEC_DBL(sc_arr, 0x32*max_threads); if(!sc_arr){ sprintf(cbuf, "ERROR: unable to allocate sc_arr!.\n"); fprintf(stderr,"%s", cbuf); ASSERT(0,cbuf); }
sc_ptr = ALIGN_VEC_DBL(sc_arr);
ASSERT(HERE, ((intptr_t)sc_ptr & 0x3f) == 0, "sc_ptr not 64-byte aligned!");
ASSERT(((intptr_t)sc_ptr & 0x3f) == 0, "sc_ptr not 64-byte aligned!");

#ifdef MULTITHREAD
__r0 = tmp = sc_ptr;
Expand Down Expand Up @@ -4328,7 +4328,7 @@ in the same order here as DIF, but the in-and-output-index offsets are BRed: j1
// VEC_DBL_INIT(sqrt2, SQRT2); VEC_DBL_INIT(tmp, ISRT2);
VEC_DBL_INIT(nisrt2,-dtmp);
VEC_DBL_INIT( isrt2, dtmp); // Copies of +ISRT2 needed for 30-asm-macro-operand-GCC-limit workaround:
VEC_DBL_INIT( cc0, 1.0); VEC_DBL_INIT( ss0, 0.0); // tmp = cc0-1; ASSERT(HERE, tmp->d0 == ISRT2 && tmp->d1 == ISRT2, "tmp->d0,1 != ISRT2"); Disable to allow "round down" variant
VEC_DBL_INIT( cc0, 1.0); VEC_DBL_INIT( ss0, 0.0); // tmp = cc0-1; ASSERT(tmp->d0 == ISRT2 && tmp->d1 == ISRT2, "tmp->d0,1 != ISRT2"); Disable to allow "round down" variant
VEC_DBL_INIT( cc1, c64_1); VEC_DBL_INIT( ss1, s64_1); tmp = cc1-1; VEC_DBL_INIT(tmp, dtmp);
VEC_DBL_INIT( cc2, c32_1); VEC_DBL_INIT( ss2, s32_1); tmp = cc2-1; VEC_DBL_INIT(tmp, dtmp);
VEC_DBL_INIT( cc3, c64_3); VEC_DBL_INIT( ss3, s64_3); tmp = cc3-1; VEC_DBL_INIT(tmp, dtmp);
Expand Down Expand Up @@ -4416,7 +4416,7 @@ in the same order here as DIF, but the in-and-output-index offsets are BRed: j1
// VEC_DBL_INIT(sqrt2, SQRT2); VEC_DBL_INIT(tmp, ISRT2);
VEC_DBL_INIT(nisrt2,-dtmp);
VEC_DBL_INIT( isrt2, dtmp); // Copies of +ISRT2 needed for 30-asm-macro-operand-GCC-limit workaround:
VEC_DBL_INIT( cc0, 1.0); VEC_DBL_INIT( ss0, 0.0); // tmp = cc0-1; ASSERT(HERE, tmp->d0 == ISRT2 && tmp->d1 == ISRT2, "tmp->d0,1 != ISRT2"); Disable to allow "round down" variant
VEC_DBL_INIT( cc0, 1.0); VEC_DBL_INIT( ss0, 0.0); // tmp = cc0-1; ASSERT(tmp->d0 == ISRT2 && tmp->d1 == ISRT2, "tmp->d0,1 != ISRT2"); Disable to allow "round down" variant
VEC_DBL_INIT( cc1, c64_1); VEC_DBL_INIT( ss1, s64_1); tmp = cc1-1; VEC_DBL_INIT(tmp, dtmp);
VEC_DBL_INIT( cc2, c32_1); VEC_DBL_INIT( ss2, s32_1); tmp = cc2-1; VEC_DBL_INIT(tmp, dtmp);
VEC_DBL_INIT( cc3, c64_3); VEC_DBL_INIT( ss3, s64_3); tmp = cc3-1; VEC_DBL_INIT(tmp, dtmp);
Expand All @@ -4435,12 +4435,12 @@ in the same order here as DIF, but the in-and-output-index offsets are BRed: j1
// fprintf(stderr, "Init SSE2_RADIX_64_DIT with max_threads = %d\n",max_threads);
return;
} else {
ASSERT(HERE, sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
ASSERT(sc_arr != 0, "This function requires an initial Init-consts-mode call (in 1-thread mode only) before use!");
} /* end of inits */

/* If multithreaded, set the local-store pointers needed for the current thread; */
#ifdef MULTITHREAD
ASSERT(HERE, (uint32)thr_id < (uint32)max_threads, "Bad thread ID!");
ASSERT((uint32)thr_id < (uint32)max_threads, "Bad thread ID!");
tmp = __r0 + thr_id*0x32;
// To support FMA versions of the radix-8 macros used to build radix-64 we insert a standalone copy of the [2,1,sqrt2,isrt2] quartet:
two = tmp + 0; // AVX+ versions of various DFT macros assume consts 2.0,1.0,isrt2 laid out thusly
Expand Down Expand Up @@ -4888,7 +4888,7 @@ in the same order here as DIF, but the in-and-output-index offsets are BRed: j1
// Index-offset names here reflect original unpermuted inputs, but the math also works for permuted ones:
int i,j,nshift, *off_ptr;
int p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pa,pb,pc,pd,pe,pf;
ASSERT(HERE, o_idx != 0x0, "Null o_idx pointer in SSE2_RADIX256_DIF!");
ASSERT(o_idx != 0x0, "Null o_idx pointer in SSE2_RADIX256_DIF!");
// NOTE that unlike the RADIX_08_DIF_OOP() macro used for pass 1 of the radix-64 DFT, RADIX_16_DIF outputs are IN-ORDER rather than BR:
#ifdef USE_ARM_V8_SIMD
uint32 OFF1,OFF2,OFF3,OFF4;
Expand Down
4 changes: 2 additions & 2 deletions src/f2psp.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ extern "C" {
retval \
)\
{\
DBG_ASSERT(HERE, qinv == qinv*((uint32)2 - q*qinv), "mi64_is_div_by_scalar32p: bad qinv!");\
DBG_ASSERT(HERE, ((uint32)&a[0] & 0x3f) == 0, "A-array not 64-byte aligned!");\
DBG_ASSERT(qinv == qinv*((uint32)2 - q*qinv), "mi64_is_div_by_scalar32p: bad qinv!");\
DBG_ASSERT(((uint32)&a[0] & 0x3f) == 0, "A-array not 64-byte aligned!");\
__asm mov eax, array_64x8inputs /* Assumes inputs a,b,c,d,... are 64-bit separated and &a[0} is 64-byte aligned */\
__asm lea ebx, q\
__asm lea ecx, qinv\
Expand Down
Loading
Loading