Skip to content

Commit

Permalink
Approaching quarto-look
Browse files Browse the repository at this point in the history
  • Loading branch information
sigven committed Mar 21, 2024
1 parent 3f655be commit 16737fe
Show file tree
Hide file tree
Showing 69 changed files with 7,148 additions and 1,394 deletions.
4 changes: 4 additions & 0 deletions pcgr/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ def create_config(arg_dict, workflow = "PCGR"):
conf_options['sample_properties']['purity'] = 'NA'
conf_options['sample_properties']['ploidy'] = 'NA'
conf_options['sample_properties']['site'] = str(pcgr_vars.tsites[arg_dict['tsite']])
conf_options['sample_properties']['dp_control_detected'] = 0
conf_options['sample_properties']['vaf_control_detected'] = 0
conf_options['sample_properties']['dp_tumor_detected'] = 0
conf_options['sample_properties']['vaf_tumor_detected'] = 0
conf_options['assay_properties']['type'] = str(arg_dict['assay'])
conf_options['assay_properties']['vcf_tumor_only'] = 0
conf_options['assay_properties']['mode'] = "Tumor-Control"
Expand Down
42 changes: 31 additions & 11 deletions pcgr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,22 @@ def cli():
optional_cna = parser.add_argument_group("Somatic copy number alteration (CNA) data options")
optional_vcfanno = parser.add_argument_group("vcfanno options")
optional_vep = parser.add_argument_group("VEP options")
optional_assay = parser.add_argument_group("Sequencing assay settings")
optional_tumor = parser.add_argument_group("Tumor sample settings")
optional_assay = parser.add_argument_group("Sequencing assay options")
optional_tumor = parser.add_argument_group("Tumor sample options")
optional_tmb_msi = parser.add_argument_group("Tumor mutational burden (TMB) and MSI options")
optional_rna = parser.add_argument_group("Bulk RNA-seq and RNA fusion data settings")
optional_rna = parser.add_argument_group("Bulk RNA-seq and RNA fusion data options")
#optional_germline = parser.add_argument_group("Germline variant options")
optional_signatures = parser.add_argument_group("Mutational signature options")
optional_tumor_only = parser.add_argument_group("Tumor-only filtering options")
optional_allelic_support = parser.add_argument_group("Allelic support settings")
optional_allelic_support = parser.add_argument_group("Allelic support options")
optional_other = parser.add_argument_group("Other options")

optional_rna.add_argument("--input_rna_fusion", dest = "input_rna_fusion", help = "File with RNA fusion transcripts detected in tumor (tab-separated values)")
optional_rna.add_argument("--input_rna_exp", dest = "input_rna_exp", help = "File with RNA expression counts (bulk) of genes in tumor (tab-separated values)")


#optional_germline.add_argument('--input_germline_calls', dest="input_germline_calls", help="Compressed TSV file with classified germline calls from CPSR")


optional_cna.add_argument("--input_cna", dest="input_cna", help="Somatic copy number alteration segments (tab-separated values)")
optional_cna.add_argument("--n_copy_gain", type=int, default=6, dest="n_copy_gain", help="Minimum number of total copy number for segments considered as gains/amplifications (default: %(default)s)")
optional_cna.add_argument("--cna_overlap_pct", type=float, default=50, dest="cna_overlap_pct", help="Mean percent overlap between copy number segment and gene transcripts for reporting of gains/losses in tumor suppressor genes/oncogenes, (default: %(default)s)")
Expand Down Expand Up @@ -199,12 +203,12 @@ def run_pcgr(pcgr_paths, conf_options):

random_id = random_id_generator(15)
# Define temporary output file names
input_vcf_validated = f'{conf_options["sample_id"]}.{random_id}.pcgr_ready.vcf.gz'
input_vcf_validated_uncompr = f'{conf_options["sample_id"]}.{random_id}.pcgr_ready.vcf'
vep_vcf = f'{conf_options["sample_id"]}.{random_id}.vep.vcf'
vep_vcfanno_vcf = f'{conf_options["sample_id"]}.{random_id}.vep.vcfanno.vcf'
vep_vcfanno_summarised_vcf = f'{conf_options["sample_id"]}.{random_id}.vep.vcfanno.summarised.vcf'
vep_vcfanno_summarised_pass_vcf = f'{conf_options["sample_id"]}.{random_id}.vep.vcfanno.summarised.pass.vcf'
input_vcf_validated = os.path.join(output_dir, f'{conf_options["sample_id"]}.{random_id}.pcgr_ready.vcf.gz')
input_vcf_validated_uncompr = os.path.join(output_dir, f'{conf_options["sample_id"]}.{random_id}.pcgr_ready.vcf')
vep_vcf = os.path.join(output_dir, f'{conf_options["sample_id"]}.{random_id}.vep.vcf')
vep_vcfanno_vcf = os.path.join(output_dir, f'{conf_options["sample_id"]}.{random_id}.vep.vcfanno.vcf')
vep_vcfanno_summarised_vcf = os.path.join(output_dir, f'{conf_options["sample_id"]}.{random_id}.vep.vcfanno.summarised.vcf')
vep_vcfanno_summarised_pass_vcf = os.path.join(output_dir, f'{conf_options["sample_id"]}.{random_id}.vep.vcfanno.summarised.pass.vcf')
prefix = os.path.join(output_dir, f'{conf_options["sample_id"]}.pcgr_acmg.{conf_options["genome_assembly"]}')
output_vcf = f'{prefix}.vcf.gz'
output_pass_vcf = f'{prefix}.pass.vcf.gz'
Expand Down Expand Up @@ -454,6 +458,22 @@ def run_pcgr(pcgr_paths, conf_options):
variant_set = set_allelic_support(variant_set, allelic_support_tags = yaml_data["conf"]['somatic_snv']['allelic_support'])
variant_set = clean_annotations(variant_set, yaml_data, germline = False, logger = logger)
variant_set.fillna('.').to_csv(output_pass_tsv_gz, sep="\t", compression="gzip", index=False)

## Check if AD/DP properties could be pulled from VCFs
if {'DP_CONTROL'}.issubset(variant_set.columns):
if variant_set.loc[variant_set['DP_CONTROL'] == '.'].empty:
yaml_data['conf']['sample_properties']['dp_control_detected'] = 1
if {'VAF_CONTROL'}.issubset(variant_set.columns):
if variant_set.loc[variant_set['VAF_CONTROL'] == '.'].empty:
yaml_data['conf']['sample_properties']['vaf_control_detected'] = 1
if {'DP_TUMOR'}.issubset(variant_set.columns):
if variant_set.loc[variant_set['DP_TUMOR'] == '.'].empty:
yaml_data['conf']['sample_properties']['dp_tumor_detected'] = 1
if {'VAF_TUMOR'}.issubset(variant_set.columns):
if variant_set.loc[variant_set['VAF_TUMOR'] == '.'].empty:
yaml_data['conf']['sample_properties']['vaf_tumor_detected'] = 1


if not debug:
remove_file(output_pass_vcf2tsv_gz)

Expand Down
24 changes: 12 additions & 12 deletions pcgr/variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def set_allelic_support(variant_set: pd.DataFrame, allelic_support_tags: dict) -
"""
Set allelic support for variants
"""
for e in ['DP_CONTROL','DP_TUMOR','AF_CONTROL','AF_TUMOR']:
for e in ['DP_CONTROL','DP_TUMOR','VAF_CONTROL','VAF_TUMOR']:
if e in variant_set.columns:
variant_set[e] = np.nan

Expand All @@ -168,12 +168,12 @@ def set_allelic_support(variant_set: pd.DataFrame, allelic_support_tags: dict) -
variant_set['DP_TUMOR'] = variant_set[allelic_support_tags['tumor_dp_tag']].astype(int)

if allelic_support_tags['control_af_tag'] != "_NA_":
if {allelic_support_tags['control_af_tag'],'AF_CONTROL'}.issubset(variant_set.columns):
variant_set['AF_CONTROL'] = variant_set[allelic_support_tags['control_af_tag']].astype(float).round(4)
if {allelic_support_tags['control_af_tag'],'VAF_CONTROL'}.issubset(variant_set.columns):
variant_set['VAF_CONTROL'] = variant_set[allelic_support_tags['control_af_tag']].astype(float).round(4)

if allelic_support_tags['tumor_af_tag'] != "_NA_":
if {allelic_support_tags['tumor_af_tag'],'AF_TUMOR'}.issubset(variant_set.columns):
variant_set['AF_TUMOR'] = variant_set[allelic_support_tags['tumor_af_tag']].astype(float).round(4)
if {allelic_support_tags['tumor_af_tag'],'VAF_TUMOR'}.issubset(variant_set.columns):
variant_set['VAF_TUMOR'] = variant_set[allelic_support_tags['tumor_af_tag']].astype(float).round(4)

return variant_set

Expand All @@ -188,21 +188,21 @@ def calculate_tmb(variant_set: pd.DataFrame, tumor_dp_min: int, tumor_af_min: fl
counts_coding_and_silent = 0
counts_coding_non_silent = 0

if {'CONSEQUENCE','AF_TUMOR','DP_TUMOR','VARIANT_CLASS'}.issubset(variant_set.columns):
if {'CONSEQUENCE','VAF_TUMOR','DP_TUMOR','VARIANT_CLASS'}.issubset(variant_set.columns):
tmb_data_set = \
variant_set[['CONSEQUENCE','DP_TUMOR','AF_TUMOR','VARIANT_CLASS']]
variant_set[['CONSEQUENCE','DP_TUMOR','VAF_TUMOR','VARIANT_CLASS']]

n_rows_raw = len(tmb_data_set)
if float(tumor_af_min) > 0:
## filter variant set to those with AF_TUMOR > af_min
if variant_set[variant_set['AF_TUMOR'].isna() == True].empty is True:
tmb_data_set = tmb_data_set.loc[tmb_data_set['AF_TUMOR'] >= float(tumor_af_min),:]
## filter variant set to those with VAF_TUMOR > tumor_af_min
if variant_set[variant_set['VAF_TUMOR'].isna() == True].empty is True:
tmb_data_set = tmb_data_set.loc[tmb_data_set['VAF_TUMOR'] >= float(tumor_af_min),:]
n_removed_af = n_rows_raw - len(tmb_data_set)
logger.info(f'Removing n = {n_removed_af} variants with allelic fraction (tumor sample) < {tumor_af_min}')
else:
logger.info(f"Cannot filter on sequencing depth - 'AF_TUMOR' contains missing values")
logger.info(f"Cannot filter on sequencing depth - 'VAF_TUMOR' contains missing values")
if int(tumor_dp_min) > 0:
## filter variant set to those with AF_TUMOR > af_min
## filter variant set to those with DP_TUMOR > tumor_dp_min
if variant_set[variant_set['DP_TUMOR'].isna() == True].empty is True:
tmb_data_set = tmb_data_set.loc[tmb_data_set['DP_TUMOR'] >= int(tumor_dp_min),:]
n_removed_dp = n_rows_raw - len(tmb_data_set)
Expand Down
13 changes: 4 additions & 9 deletions pcgr/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,17 +181,12 @@ def check_format_ad_dp_tags(vcf: VCF,
logger.warning(f"Could not find the specified tumor_af_tag ('{tumor_af_tag}') in INFO column of input VCF - filtering of heterozygous germline variants in tumor-only mode will be ignored")


if found_tdp_tag == 1 and found_taf_tag == 0:
logger.warning('BOTH \' tumor_dp_tag\' AND \' tumor_af_tag\' need to be specified for use in tumor report (\'tumor_af_tag\' is missing)')
if (found_tdp_tag == 1 and found_taf_tag == 0) or (found_tdp_tag == 0 and found_taf_tag == 1):
logger.warning('BOTH \' tumor_dp_tag\' AND \' tumor_af_tag\' need to be specified for use in tumor report (\'tumor_af_tag\' or \'tumor_dp_tag\' is missing)')

if found_tdp_tag == 0 and found_taf_tag == 1:
logger.warning('BOTH \'tumor_dp_tag\' AND \'tumor_af_tag\' need to be specified for use in tumor report (\'tumor_dp_tag\' is missing)')
if (found_ndp_tag == 1 and found_naf_tag == 0) or (found_ndp_tag == 0 and found_naf_tag == 1):
logger.warning('BOTH \'control_dp_tag\' AND \'control_af_tag\' need to be specified for use in tumor report (\'control_af_tag\' or \'control_dp_tag\' is missing)')

if found_ndp_tag == 1 and found_naf_tag == 0:
logger.warning('BOTH \'control_dp_tag\' AND \'control_af_tag\' need to be specified for use in tumor report (\'control_af_tag\' is missing)')

if found_ndp_tag == 0 and found_naf_tag == 1:
logger.warning('BOTH \'control_dp_tag\' AND \'control_af_tag\' need to be specified for use in tumor report (\'control_dp_tag\' is missing)')

## if filtering turned on for AF-based tumor-only filtering, return error if TVAF not defined

Expand Down
2 changes: 1 addition & 1 deletion pcgrr/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: pcgrr
Type: Package
Title: Personal Cancer Genome ReporteR
Version: 1.4.1.9001
Date: 2024-03-13
Date: 2024-03-21
Authors@R:
c(person(given = "Sigve",
family = "Nakken",
Expand Down
1 change: 1 addition & 0 deletions pcgrr/NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(af_distribution)
export(append_annotation_links)
export(append_cancer_association_ranks)
export(append_cancer_gene_evidence)
Expand Down
34 changes: 29 additions & 5 deletions pcgrr/R/acmg.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ assign_acmg_tiers <- function(
dplyr::filter(
vartype == "cna" |
(vartype == "snv_indel" &
.data$BM_RESOLUTION != "gene")
(.data$BM_RESOLUTION != "exon_nonprincipal" &
.data$BM_RESOLUTION != "hgvsp_nonprincipal")
)
)
}

Expand Down Expand Up @@ -215,8 +217,8 @@ assign_acmg_tiers <- function(
}
}

if(NROW(biomarker_items) > 0){
biomarker_items <- biomarker_items |>
if(NROW(biomarker_items_hires) > 0){
biomarker_items_hires <- biomarker_items_hires |>
dplyr::left_join(
dplyr::select(
variants_df,
Expand Down Expand Up @@ -256,6 +258,7 @@ assign_acmg_tiers <- function(

results_acmg[['biomarker_evidence']][['tier_classification']] <- data.frame()
results_acmg[['biomarker_evidence']][['items']] <- data.frame()
results_acmg[['biomarker_evidence']][['items_all']] <- data.frame()
results_acmg[['variant']] <- data.frame()

results_acmg[['variant']] <- variants_df |>
Expand All @@ -268,12 +271,21 @@ assign_acmg_tiers <- function(
TIER == 4 ~ "Other coding mutation",
TIER == 5 ~ "Noncoding mutation",
TRUE ~ as.character("Undefined")
)) |>
dplyr::mutate(ACTIONABILITY = dplyr::case_when(
TIER == 1 ~ "Strong significance",
TIER == 2 ~ "Potential significance",
TIER == 3 ~ "Uncertain significance",
TRUE ~ as.character(NA)
))


if(NROW(biomarker_items) > 0){
results_acmg[['biomarker_evidence']][['items_all']] <-
biomarker_items

if(NROW(biomarker_items_hires) > 0){
results_acmg[['biomarker_evidence']][['items']] <-
biomarker_items |>
biomarker_items_hires |>
dplyr::rename(TIER = .data$ACMG_AMP_TIER) |>
dplyr::mutate(TIER_FRAMEWORK = "ACMG_AMP") |>
dplyr::mutate(TIER_DESCRIPTION = dplyr::case_when(
Expand All @@ -283,6 +295,12 @@ assign_acmg_tiers <- function(
TIER == 4 ~ "Other coding mutation",
TIER == 5 ~ "Noncoding mutation",
TRUE ~ as.character("Undefined")
)) |>
dplyr::mutate(ACTIONABILITY = dplyr::case_when(
TIER == 1 ~ "Strong significance",
TIER == 2 ~ "Potential significance",
TIER == 3 ~ "Uncertain significance",
TRUE ~ as.character(NA)
))
}

Expand All @@ -298,6 +316,12 @@ assign_acmg_tiers <- function(
TIER == 4 ~ "Other coding mutation",
TIER == 5 ~ "Noncoding mutation",
TRUE ~ as.character("Undefined")
)) |>
dplyr::mutate(ACTIONABILITY = dplyr::case_when(
TIER == 1 ~ "Strong significance",
TIER == 2 ~ "Potential significance",
TIER == 3 ~ "Uncertain significance",
TRUE ~ as.character(NA)
))
}

Expand Down
Loading

0 comments on commit 16737fe

Please sign in to comment.