Skip to content

Commit

Permalink
feat: pass through encoding option to create table (#512)
Browse files Browse the repository at this point in the history
  • Loading branch information
tshauck authored May 23, 2024
1 parent edc1d80 commit 81c6d21
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
15 changes: 14 additions & 1 deletion exon/exon-core/src/datasources/exon_listing_table_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use datafusion::{
execution::context::SessionState,
logical_expr::CreateExternalTable,
};
use exon_fasta::SequenceDataType;
use url::Url;

use crate::{config::extract_config_from_state, datasources::ExonFileType, ExonRuntimeEnvExt};
Expand Down Expand Up @@ -243,9 +244,21 @@ impl ExonListingTableFactory {
ExonFileType::FASTA | ExonFileType::FA | ExonFileType::FAA | ExonFileType::FNA => {
let extension = options.get(FILE_EXTENSION_OPTION).map(|s| s.as_str());

let fasta_sequence_data_type =
if let Some(data_type) = options.get("fasta_sequence_data_type") {
SequenceDataType::from_str(data_type).map_err(|e| {
datafusion::error::DataFusionError::Execution(format!(
"Failed to parse sequence data type: {}",
e
))
})?
} else {
exon_config_extension.fasta_sequence_data_type()?
};

let table_options = ListingFASTATableOptions::new(file_compression_type)
.with_table_partition_cols(table_partition_cols)
.with_sequence_data_type(exon_config_extension.fasta_sequence_data_type()?)
.with_sequence_data_type(fasta_sequence_data_type)
.with_some_file_extension(extension);

let schema = table_options.infer_schema(state).await?;
Expand Down
9 changes: 9 additions & 0 deletions exon/exon-core/tests/sqllogictests/slt/fasta-scan-tests.slt
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,12 @@ b description2 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1

statement ok
DROP TABLE exon_table;

statement ok
CREATE EXTERNAL TABLE exon_table STORED AS FASTA OPTIONS (file_extension 'faa', fasta_sequence_data_type 'one_hot_protein') LOCATION '$CARGO_MANIFEST_DIR/test-data/datasources/faa/test.faa';

query T
SELECT id, description, sequence FROM exon_table;
----
a description [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
b description2 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]

0 comments on commit 81c6d21

Please sign in to comment.