Skip to content

Commit

Permalink
test: add mzml contains peak udf (#202)
Browse files Browse the repository at this point in the history
* test: add mzml contains peak udf
* feat: fix test path
* test: use all features
* feat: fixup fixture tests
  • Loading branch information
tshauck authored Oct 3, 2023
1 parent b22f844 commit 7a8bc9f
Show file tree
Hide file tree
Showing 6 changed files with 280 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,4 @@ jobs:
awslocal s3api put-object --bucket test-bucket --key test.fasta --body ./exon/test-data/datasources/fasta/test.fasta
cargo test
cargo test --package exon --test sqllogictests
cargo test --package exon --test sqllogictests --all-features
2 changes: 1 addition & 1 deletion exon/src/datasources/vcf/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ impl ExecutionPlan for VCFScan {
if let Some(projections) = &self.base_config.projection {
config = config.with_projection(projections.clone());
}
tracing::debug!("VCF starting scan with config: {:#?}", config);
tracing::trace!("VCF starting scan with config: {:#?}", config);

let opener = VCFOpener::new(Arc::new(config), self.file_compression_type);
let stream = FileStream::new(&self.base_config, partition, opener, &self.metrics)?;
Expand Down
20 changes: 13 additions & 7 deletions exon/src/datasources/vcf/table_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -479,20 +479,26 @@ mod tests {
let path = crate::tests::test_fixture_table_url("chr17/")?;

let ctx = SessionContext::new_exon();
let registration_result = ctx
.register_vcf_file("vcf_file", path.to_string().as_str())
.await;

assert!(registration_result.is_ok());
ctx.sql(
format!(
"CREATE EXTERNAL TABLE vcf_file STORED AS INDEXED_VCF COMPRESSION TYPE GZIP LOCATION '{}';",
path.to_string().as_str()
)
.as_str(),
)
.await?;

let sql = "SELECT chrom FROM vcf_file WHERE chrom = '17' AND pos BETWEEN 1000 AND 1000000 AND qual != 100;";
let sql =
"SELECT chrom FROM vcf_file WHERE vcf_region_filter('17:1000-1000000', chrom, pos) AND qual != 100;";
let df = ctx.sql(sql).await?;

let cnt_where_qual_neq_100 = df.count().await?;
assert!(cnt_where_qual_neq_100 > 0);

let cnt_total = ctx
.sql("SELECT chrom FROM vcf_file WHERE chrom = '17' AND pos BETWEEN 1000 AND 1000000;")
.sql(
"SELECT chrom FROM vcf_file WHERE vcf_region_filter('17:1000-1000000', chrom, pos)",
)
.await?
.count()
.await?;
Expand Down
4 changes: 2 additions & 2 deletions exon/src/udfs/massspec/contains_peak.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ use datafusion::{
/// * `result` - A BooleanArray. The BooleanArray contains true if the spectrum
/// contains a peak within the tolerance and false otherwise.
fn contains_peak(args: &[ArrayRef]) -> DataFusionResult<ArrayRef> {
if args.len() < 3 {
if args.len() != 3 {
return Err(datafusion::error::DataFusionError::Execution(
"contains_peak takes at least two arguments".to_string(),
"contains_peak takes three arguments".to_string(),
));
}

Expand Down
237 changes: 237 additions & 0 deletions exon/test-data/datasources/mzml-pyoteomics/pyoteomics.mzML

Large diffs are not rendered by default.

26 changes: 26 additions & 0 deletions exon/tests/sqllogictests/slt/mzml-functions.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
control substitution on

statement ok
CREATE EXTERNAL TABLE mzml_table STORED AS MZML LOCATION '$CARGO_MANIFEST_DIR/test-data/datasources/mzml-pyoteomics/pyoteomics.mzML'
----

query I
SELECT contains_peak(mz.mz, 200.0, 1.0) AS has_peak FROM mzml_table LIMIT 1;
----
true

query I
SELECT contains_peak(mz.mz, 0.0, 1.0) AS has_peak FROM mzml_table LIMIT 1;
----
false

statement error
SELECT contains_peak(mz.mz) AS has_peak FROM mzml_table LIMIT 1;

query I
SELECT bin_vectors(mz.mz, intensity.intensity, 200.0, 10, 1.0) AS bins FROM mzml_table LIMIT 1;
----
0,0,0,0,203667.40002441406,0,0,0,0,0

statement ok
DROP TABLE mzml_table

0 comments on commit 7a8bc9f

Please sign in to comment.