Skip to content

Commit

Permalink
Re-attach comments in AtomCollection
Browse files Browse the repository at this point in the history
  • Loading branch information
nbacquey committed Dec 18, 2024
1 parent 6000cac commit 0d753d4
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 19 deletions.
55 changes: 51 additions & 4 deletions topiary-core/src/atom_collection.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
use std::{
borrow::Cow,
collections::{HashMap, HashSet},
collections::{HashMap, HashSet, VecDeque},
mem,
ops::Deref,
};

use topiary_tree_sitter_facade::Node;

use crate::{
comments::{AnchoredComment, Comment, Commented},
common::InputSection,
tree_sitter::NodeExt,
Atom, FormatterError, FormatterResult, ScopeCondition, ScopeInformation,
};
Expand Down Expand Up @@ -38,6 +40,10 @@ pub struct AtomCollection {
/// something to a node, a new Atom is added to this HashMap.
/// The key of the hashmap is the identifier of the node.
append: HashMap<usize, Vec<Atom>>,
/// Maps node IDs to comments before that node. Comments are stored in reading order.
comments_before: HashMap<usize, VecDeque<Comment>>,
/// Maps node IDs to comments after that node. Comments are stored in reading order.
comments_after: HashMap<usize, VecDeque<Comment>>,
/// A query file can define custom leaf nodes (nodes that Topiary should not
/// touch during formatting). When such a node is encountered, its id is stored in
/// this HashSet.
Expand Down Expand Up @@ -73,6 +79,8 @@ impl AtomCollection {
atoms,
prepend: HashMap::new(),
append: HashMap::new(),
comments_before: HashMap::new(),
comments_after: HashMap::new(),
specified_leaf_nodes: HashSet::new(),
parent_leaf_nodes: HashMap::new(),
multi_line_nodes: HashSet::new(),
Expand All @@ -88,6 +96,7 @@ impl AtomCollection {
root: &Node,
source: &[u8],
specified_leaf_nodes: HashSet<usize>,
mut comments: Vec<AnchoredComment>,
) -> FormatterResult<Self> {
// Flatten the tree, from the root node, in a depth-first traversal
let dfs_nodes = dfs_flatten(root);
Expand All @@ -101,6 +110,8 @@ impl AtomCollection {
atoms: Vec::new(),
prepend: HashMap::new(),
append: HashMap::new(),
comments_before: HashMap::new(),
comments_after: HashMap::new(),
specified_leaf_nodes,
parent_leaf_nodes: HashMap::new(),
multi_line_nodes,
Expand All @@ -110,9 +121,18 @@ impl AtomCollection {
counter: 0,
};

atoms.collect_leafs_inner(root, source, &Vec::new(), 0)?;
atoms.collect_leafs_inner(root, source, &mut comments, &Vec::new(), 0)?;

Ok(atoms)
if let Some(comment) = comments.pop() {
// Some anchored couldn't be attached back to the code:
// raise an error with the first of them
Err(FormatterError::CommentAbandoned(
comment.comment_text,
format!("{:?}", comment.commented),
))
} else {
Ok(atoms)
}
}

// wrap inside a conditional atom if #single/multi_line_scope_only! is set
Expand Down Expand Up @@ -496,11 +516,14 @@ impl AtomCollection {
/// A leaf node is either a node with no children or a node that is specified as a leaf node by the formatter.
/// A leaf parent is the closest ancestor of a leaf node.
///
/// This function also attach comments to the leaf node that contains their anchor.
///
/// # Arguments
///
/// * `node` - The current node to process.
/// * `source` - The full source code as a byte slice.
/// * `parent_ids` - A vector of node ids that are the ancestors of the current node.
/// * `comments` - A vector that stores unattached comments.
/// * `level` - The depth of the current node in the CST tree.
///
/// # Errors
Expand All @@ -510,6 +533,7 @@ impl AtomCollection {
&mut self,
node: &Node,
source: &[u8],
comments: &mut Vec<AnchoredComment>,
parent_ids: &[usize],
level: usize,
) -> FormatterResult<()> {
Expand Down Expand Up @@ -542,9 +566,32 @@ impl AtomCollection {
});
// Mark all sub-nodes as having this node as a "leaf parent"
self.mark_leaf_parent(node, node.id());
// Test the node for comments
// `comments` is sorted in reverse order, so `pop()` gets the first one.
while let Some(comment) = comments.pop() {
let node_section: InputSection = node.into();
// REVIEW: the double borrow is sort of weird, is this idiomatic?
if node_section.contains(&(&comment).into()) {
match comment.commented {
Commented::CommentedAfter(_) => self
.comments_before
.entry(id)
.or_default()
.push_back((&comment).into()),
Commented::CommentedBefore(_) => self
.comments_after
.entry(id)
.or_default()
.push_back((&comment).into()),
}
} else {
comments.push(comment);
break;
}
}
} else {
for child in node.children(&mut node.walk()) {
self.collect_leafs_inner(&child, source, &parent_ids, level + 1)?;
self.collect_leafs_inner(&child, source, comments, &parent_ids, level + 1)?;
}
}

Expand Down
45 changes: 35 additions & 10 deletions topiary-core/src/comments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,24 @@ impl Diff<InputSection> for Commented {
}
}

/// A comment, as part of Topiary's output.
/// We forget node information here, because the struct
/// is supposed to be attached to the node it comments.
#[derive(Debug)]
pub struct Comment {
pub content: String,
pub original_column: i32,
}

impl From<&AnchoredComment> for Comment {
fn from(value: &AnchoredComment) -> Self {
Comment {
content: value.comment_text.clone(),
original_column: value.original_column,
}
}
}

fn next_disjoint_node<'tree>(starting_node: &'tree Node<'tree>) -> Option<Node<'tree>> {
let mut node: Node<'tree> = starting_node.clone();
// move up until we find a next sibling
Expand Down Expand Up @@ -311,24 +329,22 @@ fn find_anchor<'tree>(node: &'tree Node<'tree>, input: &str) -> FormatterResult<
})?;
if prefix.trim_start() == "" {
if let Some(anchor) = next_non_comment_leaf(node.clone()) {
return Ok(Commented::CommentedAfter(anchor.into()));
return Ok(Commented::CommentedAfter((&anchor).into()));
} else if let Some(anchor) = previous_non_comment_leaf(node.clone()) {
return Ok(Commented::CommentedBefore(anchor.into()));
return Ok(Commented::CommentedBefore((&anchor).into()));
} else {
return Err(FormatterError::Internal(
format!("Could find no anchor for comment {node:?}",),
None,
return Err(FormatterError::CommentOrphaned(
node.utf8_text(input.as_bytes())?.to_string(),
));
}
} else {
if let Some(anchor) = previous_non_comment_leaf(node.clone()) {
return Ok(Commented::CommentedBefore(anchor.into()));
return Ok(Commented::CommentedBefore((&anchor).into()));
} else if let Some(anchor) = next_non_comment_leaf(node.clone()) {
return Ok(Commented::CommentedAfter(anchor.into()));
return Ok(Commented::CommentedAfter((&anchor).into()));
} else {
return Err(FormatterError::Internal(
format!("Could find no anchor for comment {node:?}",),
None,
return Err(FormatterError::CommentOrphaned(
node.utf8_text(input.as_bytes())?.to_string(),
));
}
}
Expand All @@ -342,6 +358,15 @@ pub struct AnchoredComment {
pub commented: Commented,
}

impl From<&AnchoredComment> for InputSection {
fn from(value: &AnchoredComment) -> Self {
match value.commented {
Commented::CommentedBefore(section) => section,
Commented::CommentedAfter(section) => section,
}
}
}

pub struct SeparatedInput {
pub input_tree: Tree,
pub input_string: String,
Expand Down
10 changes: 8 additions & 2 deletions topiary-core/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,14 @@ pub struct InputSection {
pub end: Position,
}

impl From<Node<'_>> for InputSection {
fn from(value: Node) -> Self {
impl InputSection {
pub fn contains(self: Self, other: &Self) -> bool {
self.start <= other.start && other.end <= self.end
}
}

impl From<&Node<'_>> for InputSection {
fn from(value: &Node) -> Self {
InputSection {
start: value.start_position().into(),
end: value.end_position().into(),
Expand Down
24 changes: 24 additions & 0 deletions topiary-core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ use std::{error::Error, fmt, io, ops::Deref, str, string};
/// The various errors the formatter may return.
#[derive(Debug)]
pub enum FormatterError {
/// Found an anchored comment that couldn't be re-attached to its anchor.
/// The second argument should be an InputSection, but cyclic dependencies
/// make it difficult.
CommentAbandoned(String, String),

/// Found a comment for which no anchor could be found.
CommentOrphaned(String),

/// The input produced output that isn't idempotent, i.e. formatting the
/// output again made further changes. If this happened using our provided
/// query files, it is a bug. Please log an issue.
Expand Down Expand Up @@ -57,6 +65,20 @@ impl fmt::Display for FormatterError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let please_log_message = "If this happened with the built-in query files, it is a bug. It would be\nhelpful if you logged this error at\nhttps://github.com/tweag/topiary/issues/new?assignees=&labels=type%3A+bug&template=bug_report.md";
match self {
Self::CommentAbandoned(comment, anchor) => {
write!(
f,
"Found an anchored comment, but could not attach it back to its anchor\n{comment}\nThe anchor was {anchor}",
)
}

Self::CommentOrphaned(comment) => {
write!(
f,
"Found a comment for which no anchor could be found:\n{comment}",
)
}

Self::Idempotence => {
write!(
f,
Expand Down Expand Up @@ -100,6 +122,8 @@ impl Error for FormatterError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::Idempotence
| Self::CommentAbandoned(..)
| Self::CommentOrphaned(_)
| Self::Parsing { .. }
| Self::PatternDoesNotMatch
| Self::Io(IoError::Generic(_, None)) => None,
Expand Down
7 changes: 4 additions & 3 deletions topiary-core/src/tree_sitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@ pub fn apply_query(
for AnchoredComment {
comment_text,
commented,
} in comments
..
} in comments.iter()
{
log::debug!("Found comment \"{comment_text}\" with anchor {commented:?}");
}
Expand All @@ -251,9 +252,9 @@ pub fn apply_query(
let specified_leaf_nodes: HashSet<usize> = collect_leaf_ids(&matches, capture_names.clone());

// The Flattening: collects all terminal nodes of the tree-sitter tree in a Vec
let mut atoms = AtomCollection::collect_leafs(&root, source, specified_leaf_nodes)?;
let mut atoms = AtomCollection::collect_leafs(&root, source, specified_leaf_nodes, comments)?;

log::debug!("List of atoms before formatting: {atoms:?}");
log::debug!("List of atoms before formatting: {atoms:#?}");

// Memoization of the pattern positions
let mut pattern_positions: Vec<Option<Position>> = Vec::new();
Expand Down

0 comments on commit 0d753d4

Please sign in to comment.