From 0d753d4cfd52907fd203728f5c233e95f292af22 Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Wed, 18 Dec 2024 18:48:26 +0100 Subject: [PATCH] Re-attach comments in `AtomCollection` --- topiary-core/src/atom_collection.rs | 55 ++++++++++++++++++++++++++--- topiary-core/src/comments.rs | 45 +++++++++++++++++------ topiary-core/src/common.rs | 10 ++++-- topiary-core/src/error.rs | 24 +++++++++++++ topiary-core/src/tree_sitter.rs | 7 ++-- 5 files changed, 122 insertions(+), 19 deletions(-) diff --git a/topiary-core/src/atom_collection.rs b/topiary-core/src/atom_collection.rs index 499b4a9c..fa3b826a 100644 --- a/topiary-core/src/atom_collection.rs +++ b/topiary-core/src/atom_collection.rs @@ -1,6 +1,6 @@ use std::{ borrow::Cow, - collections::{HashMap, HashSet}, + collections::{HashMap, HashSet, VecDeque}, mem, ops::Deref, }; @@ -8,6 +8,8 @@ use std::{ use topiary_tree_sitter_facade::Node; use crate::{ + comments::{AnchoredComment, Comment, Commented}, + common::InputSection, tree_sitter::NodeExt, Atom, FormatterError, FormatterResult, ScopeCondition, ScopeInformation, }; @@ -38,6 +40,10 @@ pub struct AtomCollection { /// something to a node, a new Atom is added to this HashMap. /// The key of the hashmap is the identifier of the node. append: HashMap>, + /// Maps node IDs to comments before that node. Comments are stored in reading order. + comments_before: HashMap>, + /// Maps node IDs to comments after that node. Comments are stored in reading order. + comments_after: HashMap>, /// A query file can define custom leaf nodes (nodes that Topiary should not /// touch during formatting). When such a node is encountered, its id is stored in /// this HashSet. @@ -73,6 +79,8 @@ impl AtomCollection { atoms, prepend: HashMap::new(), append: HashMap::new(), + comments_before: HashMap::new(), + comments_after: HashMap::new(), specified_leaf_nodes: HashSet::new(), parent_leaf_nodes: HashMap::new(), multi_line_nodes: HashSet::new(), @@ -88,6 +96,7 @@ impl AtomCollection { root: &Node, source: &[u8], specified_leaf_nodes: HashSet, + mut comments: Vec, ) -> FormatterResult { // Flatten the tree, from the root node, in a depth-first traversal let dfs_nodes = dfs_flatten(root); @@ -101,6 +110,8 @@ impl AtomCollection { atoms: Vec::new(), prepend: HashMap::new(), append: HashMap::new(), + comments_before: HashMap::new(), + comments_after: HashMap::new(), specified_leaf_nodes, parent_leaf_nodes: HashMap::new(), multi_line_nodes, @@ -110,9 +121,18 @@ impl AtomCollection { counter: 0, }; - atoms.collect_leafs_inner(root, source, &Vec::new(), 0)?; + atoms.collect_leafs_inner(root, source, &mut comments, &Vec::new(), 0)?; - Ok(atoms) + if let Some(comment) = comments.pop() { + // Some anchored couldn't be attached back to the code: + // raise an error with the first of them + Err(FormatterError::CommentAbandoned( + comment.comment_text, + format!("{:?}", comment.commented), + )) + } else { + Ok(atoms) + } } // wrap inside a conditional atom if #single/multi_line_scope_only! is set @@ -496,11 +516,14 @@ impl AtomCollection { /// A leaf node is either a node with no children or a node that is specified as a leaf node by the formatter. /// A leaf parent is the closest ancestor of a leaf node. /// + /// This function also attach comments to the leaf node that contains their anchor. + /// /// # Arguments /// /// * `node` - The current node to process. /// * `source` - The full source code as a byte slice. /// * `parent_ids` - A vector of node ids that are the ancestors of the current node. + /// * `comments` - A vector that stores unattached comments. /// * `level` - The depth of the current node in the CST tree. /// /// # Errors @@ -510,6 +533,7 @@ impl AtomCollection { &mut self, node: &Node, source: &[u8], + comments: &mut Vec, parent_ids: &[usize], level: usize, ) -> FormatterResult<()> { @@ -542,9 +566,32 @@ impl AtomCollection { }); // Mark all sub-nodes as having this node as a "leaf parent" self.mark_leaf_parent(node, node.id()); + // Test the node for comments + // `comments` is sorted in reverse order, so `pop()` gets the first one. + while let Some(comment) = comments.pop() { + let node_section: InputSection = node.into(); + // REVIEW: the double borrow is sort of weird, is this idiomatic? + if node_section.contains(&(&comment).into()) { + match comment.commented { + Commented::CommentedAfter(_) => self + .comments_before + .entry(id) + .or_default() + .push_back((&comment).into()), + Commented::CommentedBefore(_) => self + .comments_after + .entry(id) + .or_default() + .push_back((&comment).into()), + } + } else { + comments.push(comment); + break; + } + } } else { for child in node.children(&mut node.walk()) { - self.collect_leafs_inner(&child, source, &parent_ids, level + 1)?; + self.collect_leafs_inner(&child, source, comments, &parent_ids, level + 1)?; } } diff --git a/topiary-core/src/comments.rs b/topiary-core/src/comments.rs index a995c437..c4911539 100644 --- a/topiary-core/src/comments.rs +++ b/topiary-core/src/comments.rs @@ -168,6 +168,24 @@ impl Diff for Commented { } } +/// A comment, as part of Topiary's output. +/// We forget node information here, because the struct +/// is supposed to be attached to the node it comments. +#[derive(Debug)] +pub struct Comment { + pub content: String, + pub original_column: i32, +} + +impl From<&AnchoredComment> for Comment { + fn from(value: &AnchoredComment) -> Self { + Comment { + content: value.comment_text.clone(), + original_column: value.original_column, + } + } +} + fn next_disjoint_node<'tree>(starting_node: &'tree Node<'tree>) -> Option> { let mut node: Node<'tree> = starting_node.clone(); // move up until we find a next sibling @@ -311,24 +329,22 @@ fn find_anchor<'tree>(node: &'tree Node<'tree>, input: &str) -> FormatterResult< })?; if prefix.trim_start() == "" { if let Some(anchor) = next_non_comment_leaf(node.clone()) { - return Ok(Commented::CommentedAfter(anchor.into())); + return Ok(Commented::CommentedAfter((&anchor).into())); } else if let Some(anchor) = previous_non_comment_leaf(node.clone()) { - return Ok(Commented::CommentedBefore(anchor.into())); + return Ok(Commented::CommentedBefore((&anchor).into())); } else { - return Err(FormatterError::Internal( - format!("Could find no anchor for comment {node:?}",), - None, + return Err(FormatterError::CommentOrphaned( + node.utf8_text(input.as_bytes())?.to_string(), )); } } else { if let Some(anchor) = previous_non_comment_leaf(node.clone()) { - return Ok(Commented::CommentedBefore(anchor.into())); + return Ok(Commented::CommentedBefore((&anchor).into())); } else if let Some(anchor) = next_non_comment_leaf(node.clone()) { - return Ok(Commented::CommentedAfter(anchor.into())); + return Ok(Commented::CommentedAfter((&anchor).into())); } else { - return Err(FormatterError::Internal( - format!("Could find no anchor for comment {node:?}",), - None, + return Err(FormatterError::CommentOrphaned( + node.utf8_text(input.as_bytes())?.to_string(), )); } } @@ -342,6 +358,15 @@ pub struct AnchoredComment { pub commented: Commented, } +impl From<&AnchoredComment> for InputSection { + fn from(value: &AnchoredComment) -> Self { + match value.commented { + Commented::CommentedBefore(section) => section, + Commented::CommentedAfter(section) => section, + } + } +} + pub struct SeparatedInput { pub input_tree: Tree, pub input_string: String, diff --git a/topiary-core/src/common.rs b/topiary-core/src/common.rs index 7bd3e1be..8f6bccc0 100644 --- a/topiary-core/src/common.rs +++ b/topiary-core/src/common.rs @@ -40,8 +40,14 @@ pub struct InputSection { pub end: Position, } -impl From> for InputSection { - fn from(value: Node) -> Self { +impl InputSection { + pub fn contains(self: Self, other: &Self) -> bool { + self.start <= other.start && other.end <= self.end + } +} + +impl From<&Node<'_>> for InputSection { + fn from(value: &Node) -> Self { InputSection { start: value.start_position().into(), end: value.end_position().into(), diff --git a/topiary-core/src/error.rs b/topiary-core/src/error.rs index 4c93c8fb..18e1344c 100644 --- a/topiary-core/src/error.rs +++ b/topiary-core/src/error.rs @@ -6,6 +6,14 @@ use std::{error::Error, fmt, io, ops::Deref, str, string}; /// The various errors the formatter may return. #[derive(Debug)] pub enum FormatterError { + /// Found an anchored comment that couldn't be re-attached to its anchor. + /// The second argument should be an InputSection, but cyclic dependencies + /// make it difficult. + CommentAbandoned(String, String), + + /// Found a comment for which no anchor could be found. + CommentOrphaned(String), + /// The input produced output that isn't idempotent, i.e. formatting the /// output again made further changes. If this happened using our provided /// query files, it is a bug. Please log an issue. @@ -57,6 +65,20 @@ impl fmt::Display for FormatterError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let please_log_message = "If this happened with the built-in query files, it is a bug. It would be\nhelpful if you logged this error at\nhttps://github.com/tweag/topiary/issues/new?assignees=&labels=type%3A+bug&template=bug_report.md"; match self { + Self::CommentAbandoned(comment, anchor) => { + write!( + f, + "Found an anchored comment, but could not attach it back to its anchor\n{comment}\nThe anchor was {anchor}", + ) + } + + Self::CommentOrphaned(comment) => { + write!( + f, + "Found a comment for which no anchor could be found:\n{comment}", + ) + } + Self::Idempotence => { write!( f, @@ -100,6 +122,8 @@ impl Error for FormatterError { fn source(&self) -> Option<&(dyn Error + 'static)> { match self { Self::Idempotence + | Self::CommentAbandoned(..) + | Self::CommentOrphaned(_) | Self::Parsing { .. } | Self::PatternDoesNotMatch | Self::Io(IoError::Generic(_, None)) => None, diff --git a/topiary-core/src/tree_sitter.rs b/topiary-core/src/tree_sitter.rs index db1f73f0..3a5ebacf 100644 --- a/topiary-core/src/tree_sitter.rs +++ b/topiary-core/src/tree_sitter.rs @@ -225,7 +225,8 @@ pub fn apply_query( for AnchoredComment { comment_text, commented, - } in comments + .. + } in comments.iter() { log::debug!("Found comment \"{comment_text}\" with anchor {commented:?}"); } @@ -251,9 +252,9 @@ pub fn apply_query( let specified_leaf_nodes: HashSet = collect_leaf_ids(&matches, capture_names.clone()); // The Flattening: collects all terminal nodes of the tree-sitter tree in a Vec - let mut atoms = AtomCollection::collect_leafs(&root, source, specified_leaf_nodes)?; + let mut atoms = AtomCollection::collect_leafs(&root, source, specified_leaf_nodes, comments)?; - log::debug!("List of atoms before formatting: {atoms:?}"); + log::debug!("List of atoms before formatting: {atoms:#?}"); // Memoization of the pattern positions let mut pattern_positions: Vec> = Vec::new();