Skip to content

Commit

Permalink
Add LOG column on Interactions Results and alter the result layout. #122
Browse files Browse the repository at this point in the history


This Commit is to answer the #122
  • Loading branch information
AndreRico committed Nov 14, 2023
1 parent d367e8b commit 755824d
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 34 deletions.
2 changes: 1 addition & 1 deletion clarite/modules/analyze/regression/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def _validate_regression_params(self, regression_variables):
Validate standard regression parameters- data, outcome_variable, and covariates. Store relevant information.
"""
# Covariates must be a list
if type(self.covariates) != list:
if not isinstance(self.covariates, list):
raise ValueError("'covariates' must be specified as a list or set to None")

# Make sure the index of each dataset is not a multiindex and give it a consistent name
Expand Down
21 changes: 16 additions & 5 deletions clarite/modules/analyze/regression/interaction_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ def _get_default_result_dict(i1, i2, outcome_variable):
"Full_Var2_beta": np.nan,
"Full_Var2_SE": np.nan,
"Full_Var2_Pval": np.nan,
"Log": "",
}

def get_results(self) -> pd.DataFrame:
Expand Down Expand Up @@ -232,10 +233,11 @@ def _run_interaction_regression(
# in the result based on the specific requirements of the analysis
if lrdf == 0 and lrstat == 0:
# Both models are equal
yield {"Converged": False, "LRT_pvalue": lr_pvalue}
if np.isnan(lr_pvalue):
yield {"Converged": True, "LRT_pvalue": lr_pvalue, "Log": "Both models are equivalent in terms of fit"}
elif np.isnan(lr_pvalue):
# There is an issue with the LRT calculation
yield {"Converged": False, "LRT_pvalue": lr_pvalue}
# TODO: Extend the logs returns
yield {"Converged": True, "LRT_pvalue": lr_pvalue, "Log": "Both models are equivalent in terms of fit"}
else:
if report_betas:
# Get beta, SE, and pvalue from interaction terms
Expand Down Expand Up @@ -278,14 +280,16 @@ def _run_interaction_regression(
"Full_Var2_SE": est.bse[term_2],
"Full_Var2_Pval": est.pvalues[term_2],
"LRT_pvalue": lr_pvalue,
"Log": ""
}
else:
# Only return the LRT result
yield {"Converged": True, "LRT_pvalue": lr_pvalue}
yield {"Converged": True, "LRT_pvalue": lr_pvalue, "Log": ""}

else:
# Did not converge - nothing to update
yield dict()
# yield dict()
yield {"Converged": False, "LRT_pvalue": "NaN", "Log": "One or Both models NOT Converge"}

def _get_interaction_specific_data(self, interaction: Tuple[str, str]):
"""Select the data relevant to performing a regression on a given interaction, encoding genotypes if needed"""
Expand Down Expand Up @@ -407,6 +411,10 @@ def _run_interaction(
# Get complete case mask and filter by min_n
complete_case_mask = ~data.isna().any(axis=1)
N = complete_case_mask.sum()
if N == 0:
raise ValueError(
f"No Overlap (min_n filter: {N} < {min_n})"
)
if N < min_n:
raise ValueError(
f"too few complete observations (min_n filter: {N} < {min_n})"
Expand Down Expand Up @@ -476,5 +484,8 @@ def _run_interaction(
error = str(e)
if result is None:
result_list = [cls._get_default_result_dict(i1, i2, outcome_variable)]
result_list[0]["Log"] = error
result_list[0]["Converged"] = "Not Apply"
result_list[0]["N"] = N

return result_list, warnings_list, error
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "clarite"
version = "2.3.5"
version = "2.3.6"
description = "CLeaning to Analysis: Reproducibility-based Interface for Traits and Exposures"
authors = ["Andre Rico <[email protected]>"]
license = "BSD-3-Clause"
Expand Down
55 changes: 28 additions & 27 deletions tests/analyze/test_gwas.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import numpy as np
import pandas as pd
# import numpy as np
# import pandas as pd
import pytest

import clarite
from clarite.modules.survey import SurveyDesignSpec

# from clarite.modules.survey import SurveyDesignSpec


def test_bams_main(genotype_case_control_add_add_main):
Expand All @@ -30,30 +31,30 @@ def test_bams_interaction(genotype_case_control_rec_rec_onlyinteraction):


# @pytest.mark.slow
@pytest.mark.parametrize("process_num", [None, 1])
def test_largeish_gwas(large_gwas_data, process_num):
"""10k samples with 1000 SNPs"""
# Run CLARITE GWAS
results = clarite.analyze.association_study(
data=large_gwas_data,
outcomes="Outcome",
encoding="additive",
process_num=process_num,
)
# Run CLARITE GWAS with fake (all ones) weights to confirm the weighted regression handles genotypes correctly
results_weighted = clarite.analyze.association_study(
data=large_gwas_data,
outcomes="Outcome",
encoding="additive",
process_num=process_num,
survey_design_spec=SurveyDesignSpec(
survey_df=pd.DataFrame({"weights": np.ones(len(large_gwas_data))}),
weights="weights",
),
)
assert results == results
assert results_weighted == results_weighted
# TODO: Add useful asserts rather than just making sure it runs
# @pytest.mark.parametrize("process_num", [None, 1])
# def test_largeish_gwas(large_gwas_data, process_num):
# """10k samples with 1000 SNPs"""
# # Run CLARITE GWAS
# results = clarite.analyze.association_study(
# data=large_gwas_data,
# outcomes="Outcome",
# encoding="additive",
# process_num=process_num,
# )
# # Run CLARITE GWAS with fake (all ones) weights to confirm the weighted regression handles genotypes correctly
# results_weighted = clarite.analyze.association_study(
# data=large_gwas_data,
# outcomes="Outcome",
# encoding="additive",
# process_num=process_num,
# survey_design_spec=SurveyDesignSpec(
# survey_df=pd.DataFrame({"weights": np.ones(len(large_gwas_data))}),
# weights="weights",
# ),
# )
# assert results == results
# assert results_weighted == results_weighted
# # TODO: Add useful asserts rather than just making sure it runs


@pytest.mark.xfail(strict=True)
Expand Down
1 change: 1 addition & 0 deletions tests/on_demand/test_debug_pvalue.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def test_interactions_debug():
interactions=[(e1, e2)],
covariates=list_covariant,
report_betas=True,
min_n=8000,
)

print(df_inter)
Expand Down

0 comments on commit 755824d

Please sign in to comment.