Skip to content

Commit

Permalink
normalize clinical data counts in a case-insensitive way (#11304)
Browse files Browse the repository at this point in the history
  • Loading branch information
onursumer authored Jan 3, 2025
1 parent 697d4f4 commit dc326dc
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,10 @@ public List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilter studyVi
List<String> involvedCancerStudies = context.involvedCancerStudies();

var result = studyViewRepository.getClinicalDataCounts(context, filteredAttributes);


// normalize data counts so that values like TRUE, True, and true are all merged in one count
result.forEach(item -> item.setCounts(StudyViewColumnarServiceUtil.normalizeDataCounts(item.getCounts())));

// attributes may be missing in result set because they have been filtered out
// e.g. if the filtered samples happen to have no SEX data, they will not appear in the list
// even though the inferred value of those attributes is NA
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@
WITH clinical_data_query AS (
SELECT
attribute_name AS attributeId,
upper(attribute_value) AS value,
attribute_value AS value,
cast(count(*) AS INTEGER) as count
FROM clinical_data_derived
<where>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,12 @@ public void getCenterCounts() {
var categoricalClinicalDataCounts = categoricalClinicalDataCountsOptional.get().getCounts();

assertEquals(7, categoricalClinicalDataCounts.size());
assertEquals(3, findClinicaDataCount(categoricalClinicalDataCounts, "MSK"));
assertEquals(2, findClinicaDataCount(categoricalClinicalDataCounts, "DFCI"));
assertEquals(2, findClinicaDataCount(categoricalClinicalDataCounts, "CHOP"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "MDA"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "OHSU"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "UCSF"));
assertEquals(3, findClinicaDataCount(categoricalClinicalDataCounts, "msk"));
assertEquals(2, findClinicaDataCount(categoricalClinicalDataCounts, "dfci"));
assertEquals(2, findClinicaDataCount(categoricalClinicalDataCounts, "chop"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "mda"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "ohsu"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "ucsf"));
// 1 empty string + 1 'NA' + 12 samples with no data
assertEquals(14, findClinicaDataCount(categoricalClinicalDataCounts, "NA"));
}
Expand All @@ -108,12 +108,16 @@ public void getDeadCounts() {
assertTrue(categoricalClinicalDataCountsOptional.isPresent());
var categoricalClinicalDataCounts = categoricalClinicalDataCountsOptional.get().getCounts();

assertEquals(6, categoricalClinicalDataCounts.size());
assertEquals(3, findClinicaDataCount(categoricalClinicalDataCounts, "TRUE"));
assertEquals(4, findClinicaDataCount(categoricalClinicalDataCounts, "FALSE"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "NOT RELEASED"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "NOT COLLECTED"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "UNKNOWN"));
assertEquals(10, categoricalClinicalDataCounts.size());
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "True"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "TRUE"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "true"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "False"));
assertEquals(2, findClinicaDataCount(categoricalClinicalDataCounts, "FALSE"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "false"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "Not Released"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "Not Collected"));
assertEquals(1, findClinicaDataCount(categoricalClinicalDataCounts, "Unknown"));
// 1 empty string + 1 'N/A' + 12 samples with no data
assertEquals(14, findClinicaDataCount(categoricalClinicalDataCounts, "NA"));
}
Expand Down

0 comments on commit dc326dc

Please sign in to comment.