Skip to content

Commit

Permalink
Firth bug might be fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
chrchang committed Dec 22, 2024
1 parent c18ab98 commit b87c596
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 102 deletions.
5 changes: 2 additions & 3 deletions 2.0/include/pgenlib_misc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@ namespace plink2 {
#endif

#ifndef NDEBUG
uint32_t g_pgl_debug_on = 0;
// 128 extra bytes to make WordWrapMultiline() safe
char g_pgl_errbuf[kPglErrbufBlen + 128];
char* g_pgl_errbuf = nullptr;
char* g_pgl_errbuf_write_iter = nullptr;
char* g_pgl_errbuf_end = nullptr;
#endif

#ifdef USE_AVX2
Expand Down
59 changes: 48 additions & 11 deletions 2.0/include/pgenlib_misc.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,38 +89,75 @@ namespace plink2 {
#endif

#ifdef NDEBUG
HEADER_INLINE void PglInitLog() {
HEADER_INLINE BoolErr PglInitLog(__attribute__((unused)) uintptr_t log_capacity) {
return 0;
}

HEADER_INLINE void PglLogprintf(__attribute__((unused)) const char* fmt, ...) {
}

HEADER_INLINE char* PglReturnLog() {
return nullptr;
}

HEADER_INLINE void PglResetLog() {
}
#else
CONSTI32(kPglErrbufBlen, 262144);
extern uint32_t g_pgl_debug_on;
extern char g_pgl_errbuf[];
extern char* g_pgl_errbuf;
extern char* g_pgl_errbuf_write_iter;

HEADER_INLINE void PglInitLog() {
if (g_pgl_debug_on) {
g_pgl_errbuf_write_iter = g_pgl_errbuf;
*g_pgl_errbuf_write_iter = '\0';
extern char* g_pgl_errbuf_end;

HEADER_INLINE BoolErr PglInitLog(uintptr_t log_capacity) {
if (g_pgl_errbuf) {
if (log_capacity <= S_CAST(uintptr_t, g_pgl_errbuf_end - g_pgl_errbuf)) {
// existing allocation is fine. no need to shrink
g_pgl_errbuf_write_iter = g_pgl_errbuf;
*g_pgl_errbuf_write_iter = '\0';
return 0;
}
free(g_pgl_errbuf);
}
// 128 extra bytes to make WordWrapMultiline() safe.
g_pgl_errbuf = S_CAST(char*, malloc(log_capacity + 128));
if (!g_pgl_errbuf) {
g_pgl_errbuf_write_iter = nullptr;
g_pgl_errbuf_end = nullptr;
// in the unlikely event this comes up, caller is free to ignore the error,
// logging just won't happen. though, if malloc is failing, something else
// is likely to fail...
return 1;
}
g_pgl_errbuf_write_iter = g_pgl_errbuf;
*g_pgl_errbuf_write_iter = '\0';
g_pgl_errbuf_end = &(g_pgl_errbuf[log_capacity]);
return 0;
}

HEADER_INLINE void PglLogprintf(const char* fmt, ...) {
// possible todo: log levels
if (g_pgl_errbuf_write_iter != nullptr) {
va_list args;
va_start(args, fmt);
const uintptr_t remaining_space = &(g_pgl_errbuf[kPglErrbufBlen]) - g_pgl_errbuf_write_iter;
const uintptr_t remaining_space = g_pgl_errbuf_end - g_pgl_errbuf_write_iter;
const uintptr_t intended_slen = vsnprintf(g_pgl_errbuf_write_iter, remaining_space, fmt, args);
if (intended_slen < remaining_space) {
g_pgl_errbuf_write_iter = &(g_pgl_errbuf_write_iter[intended_slen]);
} else {
g_pgl_errbuf_write_iter = &(g_pgl_errbuf[kPglErrbufBlen]);
g_pgl_errbuf_write_iter = g_pgl_errbuf_end;
}
}
}

HEADER_INLINE char* PglReturnLog() {
return g_pgl_errbuf;
}

HEADER_INLINE void PglResetLog() {
if (g_pgl_errbuf) {
g_pgl_errbuf_write_iter = g_pgl_errbuf;
g_pgl_errbuf_write_iter[0] = '\0';
}
}
#endif

// other configuration-ish values needed by plink2_common subset
Expand Down
19 changes: 0 additions & 19 deletions 2.0/include/pgenlib_read.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1959,21 +1959,10 @@ uint64_t PgfiMultireadGetCachelineReq(const uintptr_t* variant_include, const Pg
}

PglErr PgfiMultiread(const uintptr_t* variant_include, uint32_t variant_uidx_start, uint32_t variant_uidx_end, uint32_t load_variant_ct, PgenFileInfo* pgfip) {
PglLogprintf("[pgl] PgfiMultiread() called with variant_uidx_start=%u, variant_uidx_end=%u, load_variant_ct=%u\n", variant_uidx_start, variant_uidx_end, load_variant_ct);
// we could permit 0, but that encourages lots of unnecessary thread wakeups
assert(load_variant_ct);
#ifndef NDEBUG
if (g_pgl_debug_on) {
if (!variant_include) {
PglLogprintf("[pgl] PgfiMultiread(): variant_include not provided\n");
} else {
PglLogprintf("[pgl] PgfiMultiread(): PopcountBitRange(%u, %u)=%u\n", variant_uidx_start, variant_uidx_end, PopcountBitRange(variant_include, variant_uidx_start, variant_uidx_end));
}
}
#endif
if (variant_include) {
variant_uidx_start = AdvTo1Bit(variant_include, variant_uidx_start);
PglLogprintf("[pgl] PgfiMultiread() advanced variant_uidx_start to %u\n", variant_uidx_start);
}
assert(variant_uidx_start < pgfip->raw_variant_ct);
uint64_t block_offset;
Expand All @@ -1996,7 +1985,6 @@ PglErr PgfiMultiread(const uintptr_t* variant_include, uint32_t variant_uidx_sta
uint64_t cur_read_end_fpos;
while (1) {
cur_read_uidx_end = variant_uidx_end;
PglLogprintf("[pgl] PgfiMultiread(): restarting inner loop, cur_read_uidx_end=%u, variant_uidx_start=%u, load_variant_ct=%u\n", cur_read_uidx_end, variant_uidx_start, load_variant_ct);
if (cur_read_uidx_end - variant_uidx_start == load_variant_ct) {
cur_read_end_fpos = GetPgfiFpos(pgfip, cur_read_uidx_end);
load_variant_ct = 0;
Expand All @@ -2005,16 +1993,13 @@ PglErr PgfiMultiread(const uintptr_t* variant_include, uint32_t variant_uidx_sta
cur_read_uidx_end = AdvTo0Bit(variant_include, variant_uidx_start);
cur_read_end_fpos = GetPgfiFpos(pgfip, cur_read_uidx_end);
load_variant_ct -= cur_read_uidx_end - variant_uidx_start;
PglLogprintf("[pgl] PgfiMultiread(): cur_read_uidx_end updated to %u, cur_read_end_fpos=%" PRIu64 ", load_variant_ct updated to %u\n", cur_read_uidx_end, cur_read_end_fpos, load_variant_ct);
if (!load_variant_ct) {
break;
}
variant_uidx_start = AdvTo1Bit(variant_include, cur_read_uidx_end);
next_read_start_fpos = GetPgfiFpos(pgfip, variant_uidx_start);
PglLogprintf("[pgl] PgfiMultiread(): variant_uidx_start updated to %u, next_read_start_fpos=%" PRIu64 "\n", variant_uidx_start, next_read_start_fpos);
if (pgfip->vrtypes && ((pgfip->vrtypes[variant_uidx_start] & 6) == 2)) {
const uint32_t variant_read_uidx_start = GetLdbaseVidx(pgfip->vrtypes, variant_uidx_start);
PglLogprintf("[pgl] PgfiMultiread(): variant_read_uidx_start=%u\n", variant_read_uidx_start);
if (variant_read_uidx_start <= cur_read_uidx_end) {
continue;
}
Expand All @@ -2027,21 +2012,17 @@ PglErr PgfiMultiread(const uintptr_t* variant_include, uint32_t variant_uidx_sta
break;
}
}
PglLogprintf("[pgl] PgfiMultiread() attempting to read %" PRIuPTR " byte(s) from %" PRIu64 ".\n", cur_read_end_fpos - cur_read_start_fpos, cur_read_start_fpos);
if (unlikely(fseeko(pgfip->shared_ff, cur_read_start_fpos, SEEK_SET))) {
PglLogprintf("[pgl] PgfiMultiread() fseeko(%" PRIu64 ") failed.\n", cur_read_start_fpos);
return kPglRetReadFail;
}
uintptr_t len = cur_read_end_fpos - cur_read_start_fpos;
if (unlikely(fread_checked(K_CAST(unsigned char*, &(pgfip->block_base[cur_read_start_fpos - block_offset])), len, pgfip->shared_ff))) {
if (feof_unlocked(pgfip->shared_ff)) {
errno = 0;
}
PglLogprintf("[pgl] PgfiMultiread() fread(%" PRIu64 ", %" PRIuPTR ") failed, block_offset=%" PRIu64 ".\n", cur_read_start_fpos, len, block_offset);
return kPglRetReadFail;
}
} while (load_variant_ct);
PglLogprintf("[pgl] PgfiMultiread() returned success\n");
return kPglRetSuccess;
}

Expand Down
9 changes: 3 additions & 6 deletions 2.0/plink2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
namespace plink2 {
#endif

static const char ver_str[] = "PLINK v2.0.0-a.6.4.e"
static const char ver_str[] = "PLINK v2.0.0-a.6.5"
#ifdef NOLAPACK
"NL"
#elif defined(LAPACK_ILP64)
Expand Down Expand Up @@ -72,7 +72,7 @@ static const char ver_str[] = "PLINK v2.0.0-a.6.4.e"
#elif defined(USE_AOCL)
" AMD"
#endif
" (19 Dec 2024)";
" (22 Dec 2024)";
static const char ver_str2[] =
// include leading space if day < 10, so character length stays the same
""
Expand Down Expand Up @@ -100,7 +100,7 @@ static const char ver_str2[] =
# endif
#endif

" cog-genomics.org/plink/2.0/\n"
" cog-genomics.org/plink/2.0/\n"
"(C) 2005-2024 Shaun Purcell, Christopher Chang GNU General Public License v3\n";
static const char errstr_append[] = "For more info, try \"" PROG_NAME_STR " --help <flag name>\" or \"" PROG_NAME_STR " --help | more\".\n";

Expand Down Expand Up @@ -5169,9 +5169,6 @@ int main(int argc, char** argv) {
goto main_param_zero;
} else if (strequal_k_unsafe(flagname_p2, "ebug")) {
g_debug_on = 1;
#ifndef NDEBUG
g_pgl_debug_on = 1;
#endif
goto main_param_zero;
} else if (strequal_k_unsafe(flagname_p2, "ata")) {
if (unlikely(load_params || (xload & (~kfXloadOxBgen)))) {
Expand Down
12 changes: 5 additions & 7 deletions 2.0/plink2_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -1481,14 +1481,12 @@ void AppendZerotabsUnsafe(uint32_t zerotab_ct, char** cswritep_ptr);
PglErr InitAllelePermuteUnsafe(const uintptr_t* allele_idx_offsets, uint32_t raw_variant_ct, uint32_t max_thread_ct, AlleleCode* allele_permute);

HEADER_INLINE void PglLogputsb() {
#ifndef NDEBUG
if (g_pgl_errbuf_write_iter) {
WordWrapMultiline(g_pgl_errbuf);
logputs(g_pgl_errbuf);
g_pgl_errbuf_write_iter = g_pgl_errbuf;
*g_pgl_errbuf_write_iter = '\0';
char* pgl_errbuf = PglReturnLog();
if (pgl_errbuf) {
WordWrapMultiline(pgl_errbuf);
logputs(pgl_errbuf);
PglResetLog();
}
#endif
}

#ifdef __cplusplus
Expand Down
51 changes: 0 additions & 51 deletions 2.0/plink2_glm_logistic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3086,9 +3086,6 @@ BoolErr FirthRegressionD(const double* yy, const double* xx, const double* sampl
const double lconv = 0.00001;
double delta_max = 0.0;
double loglik_old = 0.0;
if (g_debug_on) {
DPrintf("FirthRegressionD sample_ct: %u\n", sample_ct);
}
for (uint32_t iter_idx = 0; ; ++iter_idx) {
// P[i] = \sum_j beta[j] * X[i][j];
// categorical optimization possible here
Expand All @@ -3100,11 +3097,6 @@ BoolErr FirthRegressionD(const double* yy, const double* xx, const double* sampl
logistic_v_unsafe(pp, sample_ct);
double loglik;
if (ComputeLoglikCheckedD(yy, pp, sample_ct, &loglik)) {
DPrintf("FirthRegressionD: failed to compute log-likelihood\npp:");
for (uint32_t uii = 0; uii != sample_ct; ++uii) {
DPrintf(" %g", pp[uii]);
}
DPrintf("\n");
return 1;
}
// V[i] = P[i] * (1 - P[i]);
Expand All @@ -3117,12 +3109,10 @@ BoolErr FirthRegressionD(const double* yy, const double* xx, const double* sampl
// we shouldn't need to compute the log directly, since underflow <->
// regression failure, right? check this.
if (InvertSymmdefMatrixFirstHalf(predictor_ct, predictor_ctav, hh0, inv_1d_buf, dbl_2d_buf)) {
DPrintf("FirthRegressionD: InvertSymmdefMatrixFirstHalf failed\n");
return 1;
}
const double dethh = HalfSymmInvertedDet(hh0, inv_1d_buf, predictor_ct, predictor_ctav);
loglik += 0.5 * log(dethh);
DPrintf("FirthRegressionD loglik: %g dethh: %g\n", loglik, dethh);

InvertSymmdefMatrixSecondHalf(predictor_ct, predictor_ctav, hh0, inv_1d_buf, dbl_2d_buf);
// trailing elements of hh0[] rows can't be arbitrary for later
Expand All @@ -3144,58 +3134,18 @@ BoolErr FirthRegressionD(const double* yy, const double* xx, const double* sampl
}
const double loglik_change = loglik - loglik_old;
if ((delta_max <= xconv) && (ustar_max < gconv) && (loglik_change < lconv)) {
DPrintf("FirthRegressionD: converged properly\n");
return 0;
}
if (iter_idx > max_iter) {
*is_unfinished_ptr = 1;
DPrintf("FirthRegressionD: unfinished\n");
return 0;
}
}
loglik_old = loglik;

FirthComputeSecondWeightsD(hdiag, vv, sample_ct, sample_ctav, ww);
if (sample_ct > 10) {
DPrintf("FirthRegressionD: xx = [");
for (uint32_t sample_idx = 0; sample_idx < 5; ++sample_idx) {
DPrintf(" %g", xx[sample_idx]);
}
DPrintf(" ...");
for (uint32_t sample_idx = sample_ct - 5; sample_idx < sample_ct; ++sample_idx) {
DPrintf(" %g", xx[sample_idx]);
}
for (uint32_t sample_idx = sample_ct; sample_idx < sample_ctav; ++sample_idx) {
DPrintf(" (%g)", xx[sample_idx]);
}
DPrintf(" ]\n");
DPrintf("FirthRegressionD: ww = [");
for (uint32_t sample_idx = 0; sample_idx < 5; ++sample_idx) {
DPrintf(" %g", ww[sample_idx]);
}
DPrintf(" ...");
for (uint32_t sample_idx = sample_ct - 5; sample_idx < sample_ct; ++sample_idx) {
DPrintf(" %g", ww[sample_idx]);
}
for (uint32_t sample_idx = sample_ct; sample_idx < sample_ctav; ++sample_idx) {
DPrintf(" (%g)", ww[sample_idx]);
}
DPrintf(" ]\n");
}
ComputeHessianD(xx, ww, sample_ct, predictor_ct, hh);
DPrintf("FirthRegressionD: hh (predictor_ct=%u, predictor_ctav=%u):\n", predictor_ct, predictor_ctav);
for (uint32_t row_idx = 0; row_idx < predictor_ct; ++row_idx) {
DPrintf("[");
for (uint32_t col_idx = 0; col_idx < predictor_ct; ++col_idx) {
DPrintf(" %g", hh[row_idx * predictor_ctav + col_idx]);
}
for (uint32_t col_idx = predictor_ct; col_idx < predictor_ctav; ++col_idx) {
DPrintf(" (%g)", hh[row_idx * predictor_ctav + col_idx]);
}
DPrintf(" ]\n");
}
if (InvertSymmdefStridedMatrix(predictor_ct, predictor_ctav, hh, inv_1d_buf, dbl_2d_buf)) {
DPrintf("FirthRegressionD: failed to invert matrix\n");
return 1;
}
ReflectStridedMatrix0(predictor_ct, predictor_ctav, hh);
Expand Down Expand Up @@ -3792,7 +3742,6 @@ THREAD_FUNC_DECL GlmLogisticThreadD(void* raw_arg) {
cur_constraint_ct = common->constraint_ct;
cur_is_always_firth = is_always_firth || ctx->separation_found;
}
DPrintf("GlmLogisticThreadD: cur_is_always_firth=%u is_always_firth=%u\n", cur_is_always_firth, is_always_firth);
const uint32_t sample_ctl = BitCtToWordCt(cur_sample_ct);
const uint32_t sample_ctav = RoundUpPow2(cur_sample_ct, kDoublePerDVec);
const uint32_t cur_case_ct = PopcountWords(cur_pheno_cc, sample_ctl);
Expand Down
8 changes: 3 additions & 5 deletions 2.0/plink2_misc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9825,7 +9825,6 @@ PglErr HetCalcMain(const uintptr_t* sample_include, const uintptr_t* variant_sub
ThreadGroup tg;
PreinitThreads(&tg);
HetCtx ctx;
PglInitLog();
{
// return values
if (unlikely(bigstack_alloc_u32(sample_ct, ohets_ptr) ||
Expand Down Expand Up @@ -10032,7 +10031,6 @@ PglErr HetCalcMain(const uintptr_t* sample_include, const uintptr_t* variant_sub
CleanupThreads(&tg);
BigstackReset(bigstack_mark);
pgfip->block_base = nullptr;
PglLogputsb();
return reterr;
}

Expand Down Expand Up @@ -10208,7 +10206,6 @@ PglErr CheckOrImputeSex(const uintptr_t* sample_include, const SampleIdInfo* sii
if (x_end < raw_variant_ct_rounded_up) {
ClearBitsNz(x_end, raw_variant_ct_rounded_up, variant_include_x);
}
DPrintf("CheckOrImputeSex(): x_start=%u x_end=%u used_variant_ct_x=%u PopcountBitRange: %" PRIuPTR "\n", x_start, x_end, used_variant_ct_x, PopcountBitRange(variant_include_x, x_start, x_end));
// Don't actually need nobs.
uint32_t* ohets;
double* ehet_incrs;
Expand Down Expand Up @@ -10281,8 +10278,9 @@ PglErr CheckOrImputeSex(const uintptr_t* sample_include, const SampleIdInfo* sii
if (y_start) {
ClearBitsNz(0, y_start, variant_include_y);
}
if (y_end < raw_variant_ct) {
ClearBitsNz(y_end, raw_variant_ct, variant_include_y);
const uint32_t raw_variant_ct_rounded_up = RoundUpPow2(raw_variant_ct, kBitsPerWord);
if (y_end < raw_variant_ct_rounded_up) {
ClearBitsNz(y_end, raw_variant_ct_rounded_up, variant_include_y);
}
logprintf("%s: ", flagstr);
reterr = LoadSampleMissingCts(sample_include, sample_include, variant_include_y, cip, "chrY valid genotype call", raw_variant_ct, used_variant_ct_y, raw_sample_ct, 0, max_thread_ct, pgr_alloc_cacheline_ct, pgfip, sample_missing_hc_cts, nullptr, sample_hethap_cts);
Expand Down

0 comments on commit b87c596

Please sign in to comment.