Skip to content

Commit

Permalink
Include column 9 when writing complexes to GPI
Browse files Browse the repository at this point in the history
Column 9: "Protein_Containing_Complex_Members"

Refs pombase/pombase-chado#1166
  • Loading branch information
kimrutherford committed May 6, 2024
1 parent 8c692ca commit c79960e
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 5 deletions.
15 changes: 12 additions & 3 deletions src/pombase/bio/go_format_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,19 @@ fn write_complexes_to_gpi(gpi_writer: &mut dyn io::Write, protein_complex_map: &
{
let db_object_taxon = make_ncbi_taxon_id(config);

for protein_complex_uniquename in protein_complex_map.keys() {
let gpi_line = format!("ComplexPortal:{}\t\t\t\tGO:0032991\t{}\t\t\t\t\t\n",
for (protein_complex_uniquename, complex_details) in protein_complex_map.iter() {
let mut genes_with_prefixes: Vec<String> =
complex_details.genes
.iter().map(|gene_uniquename| {
format!("{}:{}", config.database_name, gene_uniquename)
}).collect();

genes_with_prefixes.sort();

let gpi_line = format!("ComplexPortal:{}\t\t\t\tGO:0032991\t{}\t\t\t{}\t\t\n",
protein_complex_uniquename,
db_object_taxon);
db_object_taxon,
genes_with_prefixes.join("|"));
gpi_writer.write_all(gpi_line.as_bytes())?;
}

Expand Down
3 changes: 2 additions & 1 deletion src/pombase/data_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2314,7 +2314,8 @@ pub struct ProteinViewData {

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct ProteinComplexDetails {
pub uniquename: FlexStr,
pub complex_uniquename: FlexStr,
pub genes: HashSet<GeneUniquename>,
}

pub type GoCamId = FlexStr;
Expand Down
11 changes: 10 additions & 1 deletion src/pombase/web/data_build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ pub struct WebDataBuild<'a> {
all_not_ont_annotations: HashMap<TermId, Vec<OntAnnotationId>>,

protein_complexes: HashMap<ProteinComplexUniquename, ProteinComplexDetails>,
genes_of_complexes: HashMap<ProteinComplexUniquename, HashSet<GeneUniquename>>,

// map from term name to term ID (ie "nucleus" -> "GO:0005634")
term_ids_by_name: HashMap<FlexStr, TermId>,
Expand Down Expand Up @@ -879,6 +880,7 @@ impl <'a> WebDataBuild<'a> {
transcripts: HashMap::new(),
other_features: HashMap::new(),
protein_complexes: HashMap::new(),
genes_of_complexes: HashMap::new(),
terms: HashMap::new(),
chromosomes: BTreeMap::new(),
references: HashMap::new(),
Expand Down Expand Up @@ -1771,6 +1773,11 @@ phenotypes, so just the first part of this extension will be used:
entry.or_insert_with(Vec::new).push(part);
}

if subject_type_name == "gene" && object_type_name == "protein-containing complex" {
let entry = self.genes_of_complexes.entry(object_uniquename.clone());
entry.or_insert_with(HashSet::new).insert(subject_uniquename.clone());
}

if object_type_name == "genotype_interaction" {
let genotype_interaction_uniquename = object_uniquename.clone();

Expand Down Expand Up @@ -2237,7 +2244,9 @@ phenotypes, so just the first part of this extension will be used:
let complex_uniquename = feat.uniquename.clone();

let details = ProteinComplexDetails {
uniquename: complex_uniquename.clone()
complex_uniquename: complex_uniquename.clone(),
genes: self.genes_of_complexes.get(&complex_uniquename)
.map_or_else(HashSet::new, HashSet::clone),
};

self.protein_complexes.insert(complex_uniquename, details);
Expand Down

0 comments on commit c79960e

Please sign in to comment.