diff --git a/topiary-core/src/atom_collection.rs b/topiary-core/src/atom_collection.rs index 0a1eb13e..3ef4676d 100644 --- a/topiary-core/src/atom_collection.rs +++ b/topiary-core/src/atom_collection.rs @@ -8,6 +8,8 @@ use std::{ use topiary_tree_sitter_facade::Node; use crate::{ + comments::{AnchoredComment, Comment}, + common::InputSection, tree_sitter::NodeExt, Atom, FormatterError, FormatterResult, ScopeCondition, ScopeInformation, }; @@ -38,6 +40,8 @@ pub struct AtomCollection { /// something to a node, a new Atom is added to this HashMap. /// The key of the hashmap is the identifier of the node. append: HashMap>, + /// + comments: HashMap>, /// A query file can define custom leaf nodes (nodes that Topiary should not /// touch during formatting). When such a node is encountered, its id is stored in /// this HashSet. @@ -73,6 +77,7 @@ impl AtomCollection { atoms, prepend: HashMap::new(), append: HashMap::new(), + comments: HashMap::new(), specified_leaf_nodes: HashSet::new(), parent_leaf_nodes: HashMap::new(), multi_line_nodes: HashSet::new(), @@ -88,6 +93,7 @@ impl AtomCollection { root: &Node, source: &[u8], specified_leaf_nodes: HashSet, + mut comments: Vec, ) -> FormatterResult { // Flatten the tree, from the root node, in a depth-first traversal let dfs_nodes = dfs_flatten(root); @@ -101,6 +107,7 @@ impl AtomCollection { atoms: Vec::new(), prepend: HashMap::new(), append: HashMap::new(), + comments: HashMap::new(), specified_leaf_nodes, parent_leaf_nodes: HashMap::new(), multi_line_nodes, @@ -110,9 +117,18 @@ impl AtomCollection { counter: 0, }; - atoms.collect_leafs_inner(root, source, &Vec::new(), 0)?; + atoms.collect_leafs_inner(root, source, &mut comments, &Vec::new(), 0)?; - Ok(atoms) + if let Some(comment) = comments.pop() { + // Some anchored couldn't be attached back to the code: + // rais ean error with the first of them + Err(FormatterError::CommentAbandoned( + comment.comment_text, + format!("{:?}", comment.commented), + )) + } else { + Ok(atoms) + } } // wrap inside a conditional atom if #single/multi_line_scope_only! is set @@ -496,11 +512,14 @@ impl AtomCollection { /// A leaf node is either a node with no children or a node that is specified as a leaf node by the formatter. /// A leaf parent is the closest ancestor of a leaf node. /// + /// This function also attach comments to the leaf node that contains their anchor. + /// /// # Arguments /// /// * `node` - The current node to process. /// * `source` - The full source code as a byte slice. /// * `parent_ids` - A vector of node ids that are the ancestors of the current node. + /// * `comments` - A vector that stores unattached comments. /// * `level` - The depth of the current node in the CST tree. /// /// # Errors @@ -510,6 +529,7 @@ impl AtomCollection { &mut self, node: &Node, source: &[u8], + comments: &mut Vec, parent_ids: &[usize], level: usize, ) -> FormatterResult<()> { @@ -542,9 +562,23 @@ impl AtomCollection { }); // Mark all sub-nodes as having this node as a "leaf parent" self.mark_leaf_parent(node, node.id()); + // Test the node for comments + // `comments` is sorted in reverse order, so `pop()` gets the first one. + while let Some(comment) = comments.pop() { + let node_section: InputSection = node.into(); + if node_section.contains(&(&comment).into()) { + self.comments + .entry(id) + .or_insert(Vec::new()) + .push((&comment).into()); + } else { + comments.push(comment); + break; + } + } } else { for child in node.children(&mut node.walk()) { - self.collect_leafs_inner(&child, source, &parent_ids, level + 1)?; + self.collect_leafs_inner(&child, source, comments, &parent_ids, level + 1)?; } } diff --git a/topiary-core/src/comments.rs b/topiary-core/src/comments.rs index 4b31ad60..7e55128a 100644 --- a/topiary-core/src/comments.rs +++ b/topiary-core/src/comments.rs @@ -127,6 +127,42 @@ impl Diff for Commented { } } +/// A comment, as part of Topiary's output. +/// We forget posiiton information here, because the struct +/// is supposed to be attached to the node it comments. +#[derive(Debug)] +pub enum Comment { + /// The comment is before the code section, as is: + /// ``` + /// struct Foo { + /// // let's have a baz + /// baz: usize, + /// // and a qux + /// qux: usize, + /// } + /// ``` + CommentBefore(String), + /// The comment is after the code section, as in: + /// The code section is before the comment, as in: + /// ``` + /// struct Foo { + /// baz: usize, // this is baz + /// quz: usize, // this is qux + /// } + /// ``` + CommentAfter(String), +} + +impl From<&AnchoredComment> for Comment { + fn from(value: &AnchoredComment) -> Self { + let content = value.comment_text.clone(); + match value.commented { + Commented::CommentedAfter(_) => Comment::CommentBefore(content), + Commented::CommentedBefore(_) => Comment::CommentAfter(content), + } + } +} + // TODO: if performance is an issue, use TreeCursor to navigate the tree fn next_non_comment_leaf<'tree>(starting_node: Node<'tree>) -> Option> { let mut node: Node<'tree> = starting_node; @@ -246,24 +282,22 @@ fn find_anchor<'tree>(node: &'tree Node<'tree>, input: &str) -> FormatterResult< })?; if prefix.trim_start() == "" { if let Some(anchor) = next_non_comment_leaf(node.clone()) { - return Ok(Commented::CommentedAfter(anchor.into())); + return Ok(Commented::CommentedAfter((&anchor).into())); } else if let Some(anchor) = previous_non_comment_leaf(node.clone()) { - return Ok(Commented::CommentedBefore(anchor.into())); + return Ok(Commented::CommentedBefore((&anchor).into())); } else { - return Err(FormatterError::Internal( - format!("Could find no anchor for comment {node:?}",), - None, + return Err(FormatterError::CommentOrphaned( + node.utf8_text(input.as_bytes())?.to_string(), )); } } else { if let Some(anchor) = previous_non_comment_leaf(node.clone()) { - return Ok(Commented::CommentedBefore(anchor.into())); + return Ok(Commented::CommentedBefore((&anchor).into())); } else if let Some(anchor) = next_non_comment_leaf(node.clone()) { - return Ok(Commented::CommentedAfter(anchor.into())); + return Ok(Commented::CommentedAfter((&anchor).into())); } else { - return Err(FormatterError::Internal( - format!("Could find no anchor for comment {node:?}",), - None, + return Err(FormatterError::CommentOrphaned( + node.utf8_text(input.as_bytes())?.to_string(), )); } } @@ -275,6 +309,15 @@ pub struct AnchoredComment { pub commented: Commented, } +impl From<&AnchoredComment> for InputSection { + fn from(value: &AnchoredComment) -> Self { + match value.commented { + Commented::CommentedBefore(section) => section, + Commented::CommentedAfter(section) => section, + } + } +} + pub struct SeparatedInput { pub input_tree: Tree, pub input_string: String, @@ -327,7 +370,7 @@ pub fn extract_comments<'a>( comment_text, }| -> FormatterResult { - commented.subtract(comment.clone().into())?; + commented.subtract((&comment).into())?; Ok(AnchoredComment { commented, comment_text: comment_text.to_string(), diff --git a/topiary-core/src/common.rs b/topiary-core/src/common.rs index 61d35692..a643f3fd 100644 --- a/topiary-core/src/common.rs +++ b/topiary-core/src/common.rs @@ -40,8 +40,14 @@ pub struct InputSection { pub end: Position, } -impl From> for InputSection { - fn from(value: Node) -> Self { +impl InputSection { + pub fn contains(self: Self, other: &Self) -> bool { + self.start <= other.start && other.end <= self.end + } +} + +impl From<&Node<'_>> for InputSection { + fn from(value: &Node) -> Self { InputSection { start: value.start_position().into(), end: value.end_position().into(), diff --git a/topiary-core/src/error.rs b/topiary-core/src/error.rs index 4c93c8fb..18e1344c 100644 --- a/topiary-core/src/error.rs +++ b/topiary-core/src/error.rs @@ -6,6 +6,14 @@ use std::{error::Error, fmt, io, ops::Deref, str, string}; /// The various errors the formatter may return. #[derive(Debug)] pub enum FormatterError { + /// Found an anchored comment that couldn't be re-attached to its anchor. + /// The second argument should be an InputSection, but cyclic dependencies + /// make it difficult. + CommentAbandoned(String, String), + + /// Found a comment for which no anchor could be found. + CommentOrphaned(String), + /// The input produced output that isn't idempotent, i.e. formatting the /// output again made further changes. If this happened using our provided /// query files, it is a bug. Please log an issue. @@ -57,6 +65,20 @@ impl fmt::Display for FormatterError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let please_log_message = "If this happened with the built-in query files, it is a bug. It would be\nhelpful if you logged this error at\nhttps://github.com/tweag/topiary/issues/new?assignees=&labels=type%3A+bug&template=bug_report.md"; match self { + Self::CommentAbandoned(comment, anchor) => { + write!( + f, + "Found an anchored comment, but could not attach it back to its anchor\n{comment}\nThe anchor was {anchor}", + ) + } + + Self::CommentOrphaned(comment) => { + write!( + f, + "Found a comment for which no anchor could be found:\n{comment}", + ) + } + Self::Idempotence => { write!( f, @@ -100,6 +122,8 @@ impl Error for FormatterError { fn source(&self) -> Option<&(dyn Error + 'static)> { match self { Self::Idempotence + | Self::CommentAbandoned(..) + | Self::CommentOrphaned(_) | Self::Parsing { .. } | Self::PatternDoesNotMatch | Self::Io(IoError::Generic(_, None)) => None, diff --git a/topiary-core/src/tree_sitter.rs b/topiary-core/src/tree_sitter.rs index db1f73f0..d92935e0 100644 --- a/topiary-core/src/tree_sitter.rs +++ b/topiary-core/src/tree_sitter.rs @@ -225,7 +225,7 @@ pub fn apply_query( for AnchoredComment { comment_text, commented, - } in comments + } in comments.iter() { log::debug!("Found comment \"{comment_text}\" with anchor {commented:?}"); } @@ -251,9 +251,9 @@ pub fn apply_query( let specified_leaf_nodes: HashSet = collect_leaf_ids(&matches, capture_names.clone()); // The Flattening: collects all terminal nodes of the tree-sitter tree in a Vec - let mut atoms = AtomCollection::collect_leafs(&root, source, specified_leaf_nodes)?; + let mut atoms = AtomCollection::collect_leafs(&root, source, specified_leaf_nodes, comments)?; - log::debug!("List of atoms before formatting: {atoms:?}"); + log::debug!("List of atoms before formatting: {atoms:#?}"); // Memoization of the pattern positions let mut pattern_positions: Vec> = Vec::new();