diff --git a/Cargo.lock b/Cargo.lock index 03507208..70e7712d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1302,12 +1302,9 @@ dependencies = [ "pretty_assertions", "prettydiff", "regex", - "serde", - "serde_json", "test-log", "tokio", "tokio-test", - "toml", "tree-sitter-bash", "tree-sitter-facade", "tree-sitter-json", diff --git a/topiary/Cargo.toml b/topiary/Cargo.toml index ba7e93b2..0fc3bf2c 100644 --- a/topiary/Cargo.toml +++ b/topiary/Cargo.toml @@ -21,9 +21,6 @@ log = { workspace = true } pretty_assertions = { workspace = true } prettydiff = { workspace = true } regex = { workspace = true } -serde = { workspace = true, features = ["derive"] } -serde_json = { workspace = true } -toml = { workspace = true } tree-sitter-facade = { workspace = true } unescape = { workspace = true } diff --git a/topiary/src/configuration.rs b/topiary/src/configuration.rs deleted file mode 100644 index 273fcede..00000000 --- a/topiary/src/configuration.rs +++ /dev/null @@ -1,110 +0,0 @@ -/// Topiary can be configured using the `Configuration` struct. -/// A basic configuration, written in toml, it is included buildtime and parsed runtime. -/// Additional configuration has to be provided by the user of the library. -use std::collections::{HashMap, HashSet}; -use std::fmt; - -use crate::{language::Language, FormatterError, FormatterResult}; -use serde::{Deserialize, Serialize}; - -/// The configuration of Topiary. Contains information on how to format every language. -/// Can be provided by the user of the library, or alternatively, Topiary ships with a default -/// configuration that can be accessed using `default_configuration_toml` or -/// `parse_default_configuration`. -#[derive(Deserialize, Serialize, Debug)] -pub struct Configuration { - pub language: Vec, -} - -impl Configuration { - pub fn new() -> Self { - Configuration { language: vec![] } - } - - /// Collects the known extensions of all languages into a single HashSet. - /// Useful for testing if Topiary is able to configure the given file. - #[must_use] - pub fn known_extensions(&self) -> HashSet<&str> { - let mut res: HashSet<&str> = HashSet::new(); - for lang in &self.language { - for ext in &lang.extensions { - res.insert(ext); - } - } - res - } - - /// Gets a language configuration from the entire configuration. - /// - /// # Errors - /// - /// If the provided language name cannot be found in the Configuration, this - /// function returns a `FormatterError:UnsupportedLanguage` - pub fn get_language>(&self, name: T) -> FormatterResult<&Language> { - for lang in &self.language { - if lang.name == name.as_ref() { - return Ok(lang); - } - } - return Err(FormatterError::UnsupportedLanguage( - name.as_ref().to_string(), - )); - } - - /// Parse the default configuration directly into a `Configuration`, - /// This is useful for users of Topiary that have no special requirements. - /// It is also incredibly useful in tests. - pub fn parse_default_configuration() -> FormatterResult { - default_configuration_toml() - .try_into() - .map_err(FormatterError::from) - } -} - -impl Default for Configuration { - fn default() -> Self { - Self::new() - } -} - -/// Convert `Configuration` values into `HashMap`s, keyed on `Language::name` -// NOTE There are optimisations to be had here, to avoid cloning, but life's too short! -impl From<&Configuration> for HashMap { - fn from(config: &Configuration) -> Self { - HashMap::from_iter(config.language.iter().map(|language| { - let name = language.name.clone(); - let language = language.clone(); - - (name, language) - })) - } -} - -// Order-invariant equality; required for unit testing -impl PartialEq for Configuration { - fn eq(&self, other: &Self) -> bool { - let lhs: HashMap = self.into(); - let rhs: HashMap = other.into(); - - lhs == rhs - } -} - -impl fmt::Display for Configuration { - /// Pretty-print configuration as TOML - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let toml = toml::to_string_pretty(self).map_err(|_| fmt::Error)?; - write!(f, "{toml}") - } -} - -/// Default built-in languages.toml parsed to a toml file. -/// We parse the configuration file in two phases, the first is to a `toml::Value` -/// This function is exported to allow users of the library to merge their own -/// configuration with the builtin one. -/// Parsing straight to a `Configuration` doesn't work well, because that forces -/// every configuration file to define every part of the configuration. -pub fn default_configuration_toml() -> toml::Value { - let default_config = include_str!("../languages.toml"); - toml::from_str(default_config).expect("Could not parse built-in languages.toml to valid toml") -} diff --git a/topiary/src/error.rs b/topiary/src/error.rs index f3d98347..430214f4 100644 --- a/topiary/src/error.rs +++ b/topiary/src/error.rs @@ -198,12 +198,6 @@ where } } -impl From for FormatterError { - fn from(e: serde_json::Error) -> Self { - Self::Internal("Could not serialise JSON output".into(), Some(Box::new(e))) - } -} - impl From for FormatterError { fn from(e: tree_sitter_facade::LanguageError) -> Self { Self::Internal( @@ -218,12 +212,3 @@ impl From for FormatterError { Self::Internal("Error while parsing".into(), Some(Box::new(e))) } } - -impl From for FormatterError { - fn from(e: toml::de::Error) -> Self { - Self::Internal( - "Error while parsing the internal configuration file".to_owned(), - Some(Box::new(e)), - ) - } -} diff --git a/topiary/src/language.rs b/topiary/src/language.rs index 3830adbd..30cd335c 100644 --- a/topiary/src/language.rs +++ b/topiary/src/language.rs @@ -1,211 +1,28 @@ -use std::{ - collections::HashSet, - fmt, io, - path::{Path, PathBuf}, -}; +use std::fmt; -use clap::ValueEnum; -use serde::{Deserialize, Serialize}; - -use crate::{Configuration, FormatterError, FormatterResult, IoError}; +use crate::TopiaryQuery; /// A Language contains all the information Topiary requires to format that /// specific languages. -#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +#[derive(Debug)] pub struct Language { /// The name of the language, used as a key when looking up information in /// the Configuration, and to convert from a language to the respective tree-sitter /// grammar. pub name: String, - /// A Set of the filetype extensions associated with this particular language. - /// Enables Topiary to pick the right language given an input file - pub extensions: HashSet, + /// The Query Topiary will use to get the formating captures, must be + /// present. The topiary engine does not include any formatting queries. + pub query: TopiaryQuery, + /// The tree-sitter Language. Topiary will use this Language for parsing. + pub grammar: tree_sitter_facade::Language, /// The indentation string used for that particular language. Defaults to " " /// if not provided. Any string can be provided, but in most instances will be /// some whitespace: " ", " ", or "\t". pub indent: Option, } -impl Language { - /// Convenience alias to detect the Language from a Path-like value's extension. - /// - /// # Errors - /// - /// If the file extension is not supported, a `FormatterError` will be returned. - pub fn detect>(path: P, config: &Configuration) -> FormatterResult<&Self> { - let pb = &path.as_ref().to_path_buf(); - if let Some(extension) = pb.extension().map(|ext| ext.to_string_lossy()) { - for lang in &config.language { - if lang.extensions.contains::(&extension.to_string()) { - return Ok(lang); - } - } - return Err(FormatterError::LanguageDetection( - pb.clone(), - Some(extension.to_string()), - )); - } - Err(FormatterError::LanguageDetection(pb.clone(), None)) - } - - /// Convenience alias to return the query file path for the Language. - pub fn query_file(&self) -> FormatterResult { - self.try_into() - } - - /// Convert a Language into a supported Tree-sitter grammar. - /// - /// Note that, currently, all grammars are statically linked. This will change once dynamic linking - /// is implemented (see Issue #4). - /// - /// # Errors - /// - /// If the language is not supported, a `FormatterError` will be returned. - #[cfg(not(target_arch = "wasm32"))] - pub async fn grammar(&self) -> FormatterResult { - Ok(match self.name.as_str() { - "bash" => tree_sitter_bash::language(), - "json" => tree_sitter_json::language(), - "nickel" => tree_sitter_nickel::language(), - "ocaml" => tree_sitter_ocaml::language_ocaml(), - "ocaml_interface" => tree_sitter_ocaml::language_ocaml_interface(), - "ocamllex" => tree_sitter_ocamllex::language(), - "rust" => tree_sitter_rust::language(), - "toml" => tree_sitter_toml::language(), - "tree_sitter_query" => tree_sitter_query::language(), - name => return Err(FormatterError::UnsupportedLanguage(name.to_string())), - } - .into()) - } - - #[cfg(target_arch = "wasm32")] - pub async fn grammar_wasm(&self) -> FormatterResult { - let language_name = match self.name.as_str() { - "bash" => "bash", - "json" => "json", - "nickel" => "nickel", - "ocaml" => "ocaml", - "ocaml_interface" => "ocaml_interface", - "ocamllex" => "ocamllex", - "rust" => "rust", - "toml" => "toml", - "tree_sitter_query" => "query", - name => return Err(FormatterError::UnsupportedLanguage(name.to_string())), - }; - - Ok(web_tree_sitter::Language::load_path(&format!( - "/playground/scripts/tree-sitter-{language_name}.wasm" - )) - .await - .map_err(|e| { - let error: tree_sitter_facade::LanguageError = e.into(); - error - })? - .into()) - } -} - impl fmt::Display for Language { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.name) } } - -/// Convert a Language into the canonical basename of its query file, under the most appropriate -/// search path. We test 3 different locations for query files, in the following priority order, -/// returning the first that exists: -/// -/// 1. Under the `TOPIARY_LANGUAGE_DIR` environment variable at runtime; -/// 2. Under the `TOPIARY_LANGUAGE_DIR` environment variable at build time; -/// 3. Under the `./languages` subdirectory. -/// -/// If all of these fail, we return an I/O error. -/// -/// Note that different languages may map to the same query file, because their grammars produce -/// similar trees, which can be formatted with the same queries. -impl TryFrom<&Language> for PathBuf { - type Error = FormatterError; - - fn try_from(language: &Language) -> FormatterResult { - let basename = Self::from(match language.name.as_str() { - "bash" => "bash", - "json" => "json", - "nickel" => "nickel", - "ocaml" | "ocaml_interface" => "ocaml", - "ocamllex" => "ocamllex", - "rust" => "rust", - "toml" => "toml", - "tree_sitter_query" => "tree-sitter-query", - name => return Err(FormatterError::UnsupportedLanguage(name.to_string())), - }) - .with_extension("scm"); - - #[rustfmt::skip] - let potentials: [Option; 4] = [ - std::env::var("TOPIARY_LANGUAGE_DIR").map(Self::from).ok(), - option_env!("TOPIARY_LANGUAGE_DIR").map(Self::from), - Some(Self::from("./languages")), - Some(Self::from("../languages")), - ]; - - potentials - .into_iter() - .flatten() - .map(|path| path.join(&basename)) - .find(|path| path.exists()) - .ok_or_else(|| { - FormatterError::Io(IoError::Filesystem( - "Language query file could not be found".into(), - io::Error::from(io::ErrorKind::NotFound), - )) - }) - } -} - -/// Topiary can format more languages than are actually "supported". -/// This enum is an enumeration of those we (the maintainers) are comfortable in -/// calling "supported". -/// Any other entries in crate::Language are experimental and won't be -/// exposed in the CLI. They can be accessed using --query language/foo.scm -/// instead. -#[derive(Clone, Copy, Debug, ValueEnum)] -pub enum SupportedLanguage { - Json, - Nickel, - Ocaml, - OcamlInterface, - Ocamllex, - Toml, -} - -impl SupportedLanguage { - /// Function to convert a `SupportedLanguage` into a `crate::Language` for further processing - pub fn to_language<'config>(&self, configuration: &'config Configuration) -> &'config Language { - let name = self.name(); - - for lang in &configuration.language { - if lang.name == name { - return lang; - } - } - - // Every supported language MUST have an entry in the builtin - // configuration, and so there should always be a match. - unreachable!() - } - - pub fn name(&self) -> &str { - match self { - SupportedLanguage::Json => "json", - SupportedLanguage::Nickel => "nickel", - SupportedLanguage::Ocaml => "ocaml", - SupportedLanguage::OcamlInterface => "ocaml_interface", - SupportedLanguage::Ocamllex => "ocamllex", - SupportedLanguage::Toml => "toml", - } - } - - pub fn is_supported(name: &str) -> bool { - SupportedLanguage::from_str(name, true).is_ok() - } -} diff --git a/topiary/src/lib.rs b/topiary/src/lib.rs index 815c8226..0a0d5554 100644 --- a/topiary/src/lib.rs +++ b/topiary/src/lib.rs @@ -17,14 +17,12 @@ use pretty_assertions::StrComparison; use tree_sitter::Position; pub use crate::{ - configuration::{default_configuration_toml, Configuration}, error::{FormatterError, IoError}, - language::{Language, SupportedLanguage}, + language::Language, tree_sitter::{apply_query, SyntaxNode, TopiaryQuery, Visualisation}, }; mod atom_collection; -mod configuration; mod error; mod graphviz; mod language; @@ -194,9 +192,7 @@ pub enum Operation { pub fn formatter( input: &mut impl io::Read, output: &mut impl io::Write, - query: &TopiaryQuery, language: &Language, - grammar: &tree_sitter_facade::Language, operation: Operation, ) -> FormatterResult<()> { let content = read_input(input).map_err(|e| { @@ -214,8 +210,13 @@ pub fn formatter( // All the work related to tree-sitter and the query is done here log::info!("Apply Tree-sitter query"); - let mut atoms = - tree_sitter::apply_query(&content, query, grammar, tolerate_parsing_errors, false)?; + let mut atoms = tree_sitter::apply_query( + &content, + &language.query, + &language.grammar, + tolerate_parsing_errors, + false, + )?; // Various post-processing of whitespace atoms.post_process(); @@ -230,7 +231,7 @@ pub fn formatter( let trimmed = trim_whitespace(&rendered); if !skip_idempotence { - idempotence_check(&trimmed, query, language, grammar, tolerate_parsing_errors)?; + idempotence_check(&trimmed, language, tolerate_parsing_errors)?; } write!(output, "{trimmed}")?; @@ -275,9 +276,7 @@ fn trim_whitespace(s: &str) -> String { /// `Err(FormatterError::Formatting(...))` if the formatting failed fn idempotence_check( content: &str, - query: &TopiaryQuery, language: &Language, - grammar: &tree_sitter_facade::Language, tolerate_parsing_errors: bool, ) -> FormatterResult<()> { log::info!("Checking for idempotence ..."); @@ -288,9 +287,7 @@ fn idempotence_check( match formatter( &mut input, &mut output, - query, language, - grammar, Operation::Format { skip_idempotence: true, tolerate_parsing_errors, @@ -321,8 +318,8 @@ mod tests { use test_log::test; use crate::{ - configuration::Configuration, error::FormatterError, formatter, - test_utils::pretty_assert_eq, Operation, TopiaryQuery, + error::FormatterError, formatter, test_utils::pretty_assert_eq, Language, Operation, + TopiaryQuery, }; /// Attempt to parse invalid json, expecting a failure @@ -331,17 +328,18 @@ mod tests { let mut input = r#"{"foo":{"bar"}}"#.as_bytes(); let mut output = Vec::new(); let query_content = "(#language! json)"; - let configuration = Configuration::parse_default_configuration().unwrap(); - let language = configuration.get_language("json").unwrap(); - let grammar = language.grammar().await.unwrap(); - let query = TopiaryQuery::new(&grammar, query_content).unwrap(); + let grammar = tree_sitter_json::language().into(); + let language = Language { + name: "json".to_owned(), + query: TopiaryQuery::new(&grammar, query_content).unwrap(), + grammar, + indent: None, + }; match formatter( &mut input, &mut output, - &query, - language, - &grammar, + &language, Operation::Format { skip_idempotence: true, tolerate_parsing_errors: false, @@ -366,17 +364,18 @@ mod tests { let mut output = Vec::new(); let query_content = fs::read_to_string("../languages/json.scm").unwrap(); - let configuration = Configuration::parse_default_configuration().unwrap(); - let language = configuration.get_language("json").unwrap(); - let grammar = language.grammar().await.unwrap(); - let query = TopiaryQuery::new(&grammar, &query_content).unwrap(); + let grammar = tree_sitter_json::language().into(); + let language = Language { + name: "json".to_owned(), + query: TopiaryQuery::new(&grammar, &query_content).unwrap(), + grammar, + indent: None, + }; formatter( &mut input, &mut output, - &query, - language, - &grammar, + &language, Operation::Format { skip_idempotence: true, tolerate_parsing_errors: true, diff --git a/topiary/src/tree_sitter.rs b/topiary/src/tree_sitter.rs index 21ee9dad..33b80520 100644 --- a/topiary/src/tree_sitter.rs +++ b/topiary/src/tree_sitter.rs @@ -21,7 +21,7 @@ pub enum Visualisation { /// Refers to a position within the code. Used for error reporting, and for /// comparing input with formatted output. The numbers are 1-based, because that /// is how editors usually refer to a position. Derived from tree_sitter::Point. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Position { pub row: u32, pub column: u32, @@ -30,6 +30,7 @@ pub struct Position { /// Topiary often needs both the tree-sitter `Query` and the original content /// beloging to the file from which the query was parsed. This struct is a simple /// convenience wrapper that combines the `Query` with its original string. +#[derive(Debug)] pub struct TopiaryQuery { pub query: Query, pub query_content: String,