Skip to content

Commit

Permalink
Re-attach comments in AtomCollection
Browse files Browse the repository at this point in the history
  • Loading branch information
nbacquey committed Dec 11, 2024
1 parent 48483fb commit d6b26e3
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 19 deletions.
40 changes: 37 additions & 3 deletions topiary-core/src/atom_collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use std::{
use topiary_tree_sitter_facade::Node;

use crate::{
comments::{AnchoredComment, Comment},
common::InputSection,
tree_sitter::NodeExt,
Atom, FormatterError, FormatterResult, ScopeCondition, ScopeInformation,
};
Expand Down Expand Up @@ -38,6 +40,8 @@ pub struct AtomCollection {
/// something to a node, a new Atom is added to this HashMap.
/// The key of the hashmap is the identifier of the node.
append: HashMap<usize, Vec<Atom>>,
///
comments: HashMap<usize, Vec<Comment>>,
/// A query file can define custom leaf nodes (nodes that Topiary should not
/// touch during formatting). When such a node is encountered, its id is stored in
/// this HashSet.
Expand Down Expand Up @@ -73,6 +77,7 @@ impl AtomCollection {
atoms,
prepend: HashMap::new(),
append: HashMap::new(),
comments: HashMap::new(),
specified_leaf_nodes: HashSet::new(),
parent_leaf_nodes: HashMap::new(),
multi_line_nodes: HashSet::new(),
Expand All @@ -88,6 +93,7 @@ impl AtomCollection {
root: &Node,
source: &[u8],
specified_leaf_nodes: HashSet<usize>,
mut comments: Vec<AnchoredComment>,
) -> FormatterResult<Self> {
// Flatten the tree, from the root node, in a depth-first traversal
let dfs_nodes = dfs_flatten(root);
Expand All @@ -101,6 +107,7 @@ impl AtomCollection {
atoms: Vec::new(),
prepend: HashMap::new(),
append: HashMap::new(),
comments: HashMap::new(),
specified_leaf_nodes,
parent_leaf_nodes: HashMap::new(),
multi_line_nodes,
Expand All @@ -110,9 +117,18 @@ impl AtomCollection {
counter: 0,
};

atoms.collect_leafs_inner(root, source, &Vec::new(), 0)?;
atoms.collect_leafs_inner(root, source, &mut comments, &Vec::new(), 0)?;

Ok(atoms)
if let Some(comment) = comments.pop() {
// Some anchored couldn't be attached back to the code:
// rais ean error with the first of them
Err(FormatterError::CommentAbandoned(
comment.comment_text,
format!("{:?}", comment.commented),
))
} else {
Ok(atoms)
}
}

// wrap inside a conditional atom if #single/multi_line_scope_only! is set
Expand Down Expand Up @@ -496,11 +512,14 @@ impl AtomCollection {
/// A leaf node is either a node with no children or a node that is specified as a leaf node by the formatter.
/// A leaf parent is the closest ancestor of a leaf node.
///
/// This function also attach comments to the leaf node that contains their anchor.
///
/// # Arguments
///
/// * `node` - The current node to process.
/// * `source` - The full source code as a byte slice.
/// * `parent_ids` - A vector of node ids that are the ancestors of the current node.
/// * `comments` - A vector that stores unattached comments.
/// * `level` - The depth of the current node in the CST tree.
///
/// # Errors
Expand All @@ -510,6 +529,7 @@ impl AtomCollection {
&mut self,
node: &Node,
source: &[u8],
comments: &mut Vec<AnchoredComment>,
parent_ids: &[usize],
level: usize,
) -> FormatterResult<()> {
Expand Down Expand Up @@ -542,9 +562,23 @@ impl AtomCollection {
});
// Mark all sub-nodes as having this node as a "leaf parent"
self.mark_leaf_parent(node, node.id());
// Test the node for comments
// `comments` is sorted in reverse order, so `pop()` gets the first one.
while let Some(comment) = comments.pop() {
let node_section: InputSection = node.into();
if node_section.contains(&(&comment).into()) {
self.comments
.entry(id)
.or_insert(Vec::new())
.push((&comment).into());
} else {
comments.push(comment);
break;
}
}
} else {
for child in node.children(&mut node.walk()) {
self.collect_leafs_inner(&child, source, &parent_ids, level + 1)?;
self.collect_leafs_inner(&child, source, comments, &parent_ids, level + 1)?;
}
}

Expand Down
65 changes: 54 additions & 11 deletions topiary-core/src/comments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,42 @@ impl Diff<InputSection> for Commented {
}
}

/// A comment, as part of Topiary's output.
/// We forget posiiton information here, because the struct
/// is supposed to be attached to the node it comments.
#[derive(Debug)]
pub enum Comment {
/// The comment is before the code section, as is:
/// ```
/// struct Foo {
/// // let's have a baz
/// baz: usize,
/// // and a qux
/// qux: usize,
/// }
/// ```
CommentBefore(String),
/// The comment is after the code section, as in:
/// The code section is before the comment, as in:
/// ```
/// struct Foo {
/// baz: usize, // this is baz
/// quz: usize, // this is qux
/// }
/// ```
CommentAfter(String),
}

impl From<&AnchoredComment> for Comment {
fn from(value: &AnchoredComment) -> Self {
let content = value.comment_text.clone();
match value.commented {
Commented::CommentedAfter(_) => Comment::CommentBefore(content),
Commented::CommentedBefore(_) => Comment::CommentAfter(content),
}
}
}

// TODO: if performance is an issue, use TreeCursor to navigate the tree
fn next_non_comment_leaf<'tree>(starting_node: Node<'tree>) -> Option<Node<'tree>> {
let mut node: Node<'tree> = starting_node;
Expand Down Expand Up @@ -246,24 +282,22 @@ fn find_anchor<'tree>(node: &'tree Node<'tree>, input: &str) -> FormatterResult<
})?;
if prefix.trim_start() == "" {
if let Some(anchor) = next_non_comment_leaf(node.clone()) {
return Ok(Commented::CommentedAfter(anchor.into()));
return Ok(Commented::CommentedAfter((&anchor).into()));
} else if let Some(anchor) = previous_non_comment_leaf(node.clone()) {
return Ok(Commented::CommentedBefore(anchor.into()));
return Ok(Commented::CommentedBefore((&anchor).into()));
} else {
return Err(FormatterError::Internal(
format!("Could find no anchor for comment {node:?}",),
None,
return Err(FormatterError::CommentOrphaned(
node.utf8_text(input.as_bytes())?.to_string(),
));
}
} else {
if let Some(anchor) = previous_non_comment_leaf(node.clone()) {
return Ok(Commented::CommentedBefore(anchor.into()));
return Ok(Commented::CommentedBefore((&anchor).into()));
} else if let Some(anchor) = next_non_comment_leaf(node.clone()) {
return Ok(Commented::CommentedAfter(anchor.into()));
return Ok(Commented::CommentedAfter((&anchor).into()));
} else {
return Err(FormatterError::Internal(
format!("Could find no anchor for comment {node:?}",),
None,
return Err(FormatterError::CommentOrphaned(
node.utf8_text(input.as_bytes())?.to_string(),
));
}
}
Expand All @@ -275,6 +309,15 @@ pub struct AnchoredComment {
pub commented: Commented,
}

impl From<&AnchoredComment> for InputSection {
fn from(value: &AnchoredComment) -> Self {
match value.commented {
Commented::CommentedBefore(section) => section,
Commented::CommentedAfter(section) => section,
}
}
}

pub struct SeparatedInput {
pub input_tree: Tree,
pub input_string: String,
Expand Down Expand Up @@ -327,7 +370,7 @@ pub fn extract_comments<'a>(
comment_text,
}|
-> FormatterResult<AnchoredComment> {
commented.subtract(comment.clone().into())?;
commented.subtract((&comment).into())?;
Ok(AnchoredComment {
commented,
comment_text: comment_text.to_string(),
Expand Down
10 changes: 8 additions & 2 deletions topiary-core/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,14 @@ pub struct InputSection {
pub end: Position,
}

impl From<Node<'_>> for InputSection {
fn from(value: Node) -> Self {
impl InputSection {
pub fn contains(self: Self, other: &Self) -> bool {
self.start <= other.start && other.end <= self.end
}
}

impl From<&Node<'_>> for InputSection {
fn from(value: &Node) -> Self {
InputSection {
start: value.start_position().into(),
end: value.end_position().into(),
Expand Down
24 changes: 24 additions & 0 deletions topiary-core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ use std::{error::Error, fmt, io, ops::Deref, str, string};
/// The various errors the formatter may return.
#[derive(Debug)]
pub enum FormatterError {
/// Found an anchored comment that couldn't be re-attached to its anchor.
/// The second argument should be an InputSection, but cyclic dependencies
/// make it difficult.
CommentAbandoned(String, String),

/// Found a comment for which no anchor could be found.
CommentOrphaned(String),

/// The input produced output that isn't idempotent, i.e. formatting the
/// output again made further changes. If this happened using our provided
/// query files, it is a bug. Please log an issue.
Expand Down Expand Up @@ -57,6 +65,20 @@ impl fmt::Display for FormatterError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let please_log_message = "If this happened with the built-in query files, it is a bug. It would be\nhelpful if you logged this error at\nhttps://github.com/tweag/topiary/issues/new?assignees=&labels=type%3A+bug&template=bug_report.md";
match self {
Self::CommentAbandoned(comment, anchor) => {
write!(
f,
"Found an anchored comment, but could not attach it back to its anchor\n{comment}\nThe anchor was {anchor}",
)
}

Self::CommentOrphaned(comment) => {
write!(
f,
"Found a comment for which no anchor could be found:\n{comment}",
)
}

Self::Idempotence => {
write!(
f,
Expand Down Expand Up @@ -100,6 +122,8 @@ impl Error for FormatterError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::Idempotence
| Self::CommentAbandoned(..)
| Self::CommentOrphaned(_)
| Self::Parsing { .. }
| Self::PatternDoesNotMatch
| Self::Io(IoError::Generic(_, None)) => None,
Expand Down
6 changes: 3 additions & 3 deletions topiary-core/src/tree_sitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ pub fn apply_query(
for AnchoredComment {
comment_text,
commented,
} in comments
} in comments.iter()
{
log::debug!("Found comment \"{comment_text}\" with anchor {commented:?}");
}
Expand All @@ -251,9 +251,9 @@ pub fn apply_query(
let specified_leaf_nodes: HashSet<usize> = collect_leaf_ids(&matches, capture_names.clone());

// The Flattening: collects all terminal nodes of the tree-sitter tree in a Vec
let mut atoms = AtomCollection::collect_leafs(&root, source, specified_leaf_nodes)?;
let mut atoms = AtomCollection::collect_leafs(&root, source, specified_leaf_nodes, comments)?;

log::debug!("List of atoms before formatting: {atoms:?}");
log::debug!("List of atoms before formatting: {atoms:#?}");

// Memoization of the pattern positions
let mut pattern_positions: Vec<Option<Position>> = Vec::new();
Expand Down

0 comments on commit d6b26e3

Please sign in to comment.