Allow markdown syntax within captions (#14)

This commit changes the logic of the preprocessing that wraps the table caption such that the caption is self is still plain markdown. This causes the mkdocs parser to convert the caption into html by its own, which allows the usage of markown in captions. The logic is adapted in such a way that a custom html element is created both befor and after the caption. The caption itself is untouched. In the post processing the two added tags can be used to find the start and end of the caption easily.
tobiasah · Feb 16, 2024 · 931d951 · 931d951
1 parent 886f64d
commit 931d951
Show file tree

Hide file tree

Showing 14 changed files with 422 additions and 206 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## Version 1.0.1
+
+* Allow markdown syntax within the caption element.
+
 ## Version 1.0.0
 
 * Prevent conversion of emojis into figure elements with captions.

diff --git a/demo/docs/index.md b/demo/docs/index.md
@@ -29,7 +29,7 @@ Inline images should not be converted ![Hello](assets/demo.png){width="30"}, eve
 
 ## Tables
 
-Table: Table caption
+Table: Table **bold** caption
 
 | My | Table |
 | - | - |

diff --git a/docs/quick_start.md b/docs/quick_start.md
@@ -17,6 +17,10 @@ With the plugin enabled, one can now use an easy and descriptive syntax to add
 captions to figures and tables. The captions are automatically numbered and
 can be referenced in the text.
 
+!!! tip
+
+    The caption text is converted by mkdocs itself. This means that technically
+    a caption can contain the same things than any other text.
 
 === "Markdown"
 

diff --git a/src/mkdocs_caption/config.py b/src/mkdocs_caption/config.py
@@ -1,4 +1,5 @@
 """The configuration options for the Caption plugin."""
+
 from __future__ import annotations
 
 import typing as t

diff --git a/src/mkdocs_caption/custom.py b/src/mkdocs_caption/custom.py
@@ -1,11 +1,18 @@
 """Custom caption handling."""
+
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
 
 from lxml import etree
 
-from mkdocs_caption.helper import TreeElement, update_references, wrap_md_captions
+from mkdocs_caption.helper import (
+    CaptionInfo,
+    TreeElement,
+    iter_caption_elements,
+    update_references,
+    wrap_md_captions,
+)
 
 if TYPE_CHECKING:
     from mkdocs_caption.config import IdentifierCaption
@@ -47,11 +54,10 @@ def preprocess_markdown(
 
 
 def _wrap_in_figure(
-    caption_element: TreeElement,
+    caption_info: CaptionInfo,
     *,
     tree: TreeElement,
     index: int,
-    identifier: str,
     config: IdentifierCaption,
     logger: PluginLogger,
 ) -> None:
@@ -62,52 +68,55 @@ def _wrap_in_figure(
     with a custom caption based on the caption element, index, and identifier.
 
     Args:
-        caption_element: The caption element to use for the caption text.
+        caption_info: The caption info.
         tree: The root element of the XML tree.
         index: The index of the figure element.
-        identifier: The identifier of the custom caption.
         config: The plugin configuration.
         logger: Current plugin logger.
     """
-    a_wrapper: TreeElement = caption_element.getparent()  # type: ignore[assignment]
-    target_element = a_wrapper.getnext()
-    if target_element is None:
+    if caption_info.target_element is None:
         logger.error("Custom caption does not semm to have a element that follows it")
         return
 
     figure_element = etree.Element("figure", None, None)
-    figure_element.attrib.update(caption_element.attrib)
+    figure_element.attrib.update(caption_info.attributes)
     # wrap target element
-    target_element.addprevious(figure_element)
+    caption_info.target_element.addprevious(figure_element)
 
     # add caption
-    caption_prefix = config.get_caption_prefix(identifier=identifier, index=index)
+    caption_prefix = config.get_caption_prefix(
+        identifier=caption_info.identifier,
+        index=index,
+    )
     try:
         fig_caption_element = etree.fromstring(
-            f"<figcaption>{caption_prefix} {caption_element.text}</figcaption>",
+            f"<figcaption>{caption_prefix} {caption_info.caption}</figcaption>",
         )
     except etree.XMLSyntaxError:
-        logger.error("Invalid XML in caption: %s", caption_element.text)
+        logger.error(
+            'Invalid XML in caption: <caption style="caption-side:%s">%s %s</caption>',
+            config.position,
+            caption_prefix,
+            caption_info.caption,
+        )
         return
     if config.position == "top":
         figure_element.append(fig_caption_element)
-        figure_element.append(target_element)
+        figure_element.append(caption_info.target_element)
     else:
-        figure_element.append(target_element)
+        figure_element.append(caption_info.target_element)
         figure_element.append(fig_caption_element)
 
-    figure_id = caption_element.attrib.get(
+    figure_id = caption_info.attributes.get(
         "id",
-        config.get_default_id(identifier=identifier, index=index),
+        config.get_default_id(identifier=caption_info.identifier, index=index),
     )
     figure_element.attrib["id"] = figure_id
     update_references(
         tree,
         figure_id,
-        config.get_reference_text(identifier=identifier, index=index),
+        config.get_reference_text(identifier=caption_info.identifier, index=index),
     )
-    a_wrapper.remove(caption_element)
-    a_wrapper.getparent().remove(a_wrapper)  # type: ignore[union-attr]
 
 
 def postprocess_html(
@@ -129,15 +138,13 @@ def postprocess_html(
     if not config.enable:
         return
     index_dict: dict[str, int] = {}
-    for custom_caption in tree.xpath(f"//{CAPTION_TAG}"):
-        identifier = custom_caption.attrib.pop("identifier")
-        index = index_dict.get(identifier, config.start_index)
-        index_dict[identifier] = index + config.increment_index
+    for caption_info in iter_caption_elements(CAPTION_TAG, tree):
+        index = index_dict.get(caption_info.identifier, config.start_index)
+        index_dict[caption_info.identifier] = index + config.increment_index
         _wrap_in_figure(
-            custom_caption,
+            caption_info,
             tree=tree,
             index=index,
-            identifier=identifier,
             config=config,
             logger=logger,
         )
diff --git a/src/mkdocs_caption/helper.py b/src/mkdocs_caption/helper.py
@@ -1,7 +1,10 @@
 """General helper functions for the mkdocs-caption plugin."""
+
 from __future__ import annotations
 
 import re
+from dataclasses import dataclass
+from typing import Iterator
 
 from lxml import etree
 
@@ -74,7 +77,7 @@ def _escape_md_caption(match: re.Match, *, target_tag: str) -> str:
     options = _parse_extended_markdown(match.group(5))
     return str(
         f'\n{prefix}<{target_tag} identifier="{identifier}"'
-        f"{options}>{caption}</{target_tag}>\n\n",
+        f"{options}>\n\n{prefix}{caption}\n\n{prefix}<{target_tag}-end>\n\n",
     )
 
 
@@ -108,13 +111,78 @@ def wrap_md_captions(
     )
 
 
-def sanitize_caption(caption: str | None) -> str:
-    """Sanitize a caption to be used as an id.
+def create_caption_str(caption_text_elements: list[TreeElement]) -> str:
+    """Create a htaml string from a list of caption text elements.
+
+    This function takes a list of caption text elements and returns a string
+    with the caption text.
 
     Args:
-        caption: The caption to sanatize.
+        caption_text_elements: The list of caption text elements.
 
     Returns:
-        The sanitized caption.
+        A string with the caption text.
+    """
+    caption_text = ""
+    for text_element in caption_text_elements:
+        caption_text += etree.tostring(
+            text_element,
+            encoding="unicode",
+            method="html",
+        ).strip("\n")
+    if len(caption_text_elements) == 1:
+        caption_text = caption_text.strip("<p>").strip("</p>")
+    return caption_text
+
+
+@dataclass
+class CaptionInfo:
+    """Dataclass to store information about a caption."""
+
+    target_element: TreeElement
+    attributes: dict[str, str]
+    caption: str
+    identifier: str
+
+
+def iter_caption_elements(tag: str, tree: TreeElement) -> Iterator[CaptionInfo]:
+    """Iterate over all caption elements in an XML tree.
+
+    This function takes an XML tree and iterates over all caption elements
+    in the tree. It yields a tuple with the target element, the attributes
+    of the caption element, the caption text, and the identifier of the
+    caption element.
+
+    Args:
+        tag: The tag of the caption elements.
+        tree: The XML tree to iterate over.
+
+    Yields:
+        A tuple with the target element, the attributes of the caption
+        element, the caption text, and the identifier of the caption element.
     """
-    return caption.replace(" & ", " &amp; ") if caption else ""
+    for caption_element in tree.xpath(f"//{tag}"):
+        a_wrapper = caption_element.getparent()
+        caption_text_elements = []
+        a_wrapper_end = a_wrapper.getnext()
+        while a_wrapper_end is not None and not a_wrapper_end.xpath(f"{tag}-end"):
+            caption_text_elements.append(a_wrapper_end)
+            a_wrapper_end = a_wrapper_end.getnext()
+
+        target_element = a_wrapper_end.getnext()
+        # unused attribute identifier
+        identifier = caption_element.attrib.pop("identifier")
+
+        try:
+            yield CaptionInfo(
+                target_element=target_element,
+                attributes=caption_element.attrib,
+                caption=create_caption_str(caption_text_elements),
+                identifier=identifier,
+            )
+        finally:
+            a_wrapper.remove(caption_element)
+            a_wrapper.getparent().remove(a_wrapper)
+            for caption_text_element in caption_text_elements:
+                caption_text_element.getparent().remove(caption_text_element)
+            a_wrapper_end.getparent().remove(a_wrapper_end)
diff --git a/src/mkdocs_caption/image.py b/src/mkdocs_caption/image.py
@@ -1,11 +1,17 @@
 """Handle image related captioning."""
+
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
 
 from lxml import etree
 
-from mkdocs_caption.helper import TreeElement, update_references, wrap_md_captions
+from mkdocs_caption.helper import (
+    TreeElement,
+    iter_caption_elements,
+    update_references,
+    wrap_md_captions,
+)
 
 if TYPE_CHECKING:
     from mkdocs_caption.config import FigureCaption
@@ -159,27 +165,22 @@ def postprocess_html(
 
     # Handle additional figure caption elements
     custom_figure_attrib = {}
-    for custom_caption in tree.xpath(f"//{IMG_CAPTION_TAG}"):
-        a_wrapper = custom_caption.getparent()
+    for caption_info in iter_caption_elements(IMG_CAPTION_TAG, tree):
         try:
-            next_element = a_wrapper.getnext()
             target_element = (
-                next_element
-                if next_element.tag == "img"
-                else next_element.xpath(".//img")[0]
+                caption_info.target_element
+                if caption_info.target_element.tag == "img"
+                else caption_info.target_element.xpath(".//img")[0]
             )
         except IndexError:
             logger.error(
                 "Figure caption must be followed by a img element. Skipping: %s",
-                custom_caption.text,
+                caption_info.caption,
             )
             continue
-        target_element.attrib["title"] = custom_caption.text
-        # unused attribute identifier
-        custom_caption.attrib.pop("identifier")
-        custom_figure_attrib[target_element] = custom_caption.attrib
-        a_wrapper.remove(custom_caption)
-        a_wrapper.getparent().remove(a_wrapper)
+        target_element.attrib["title"] = caption_info.caption
+        custom_figure_attrib[target_element] = caption_info.attributes
+
     # Iterate through all images and wrap them in a figure element if requested
     index = config.start_index
     for img_element in tree.xpath("//p/a/img|//p/img"):

diff --git a/src/mkdocs_caption/logger.py b/src/mkdocs_caption/logger.py
@@ -1,4 +1,5 @@
 """Setup custom logger that is mkdocs compatible."""
+
 from __future__ import annotations
 
 import logging

diff --git a/src/mkdocs_caption/plugin.py b/src/mkdocs_caption/plugin.py
@@ -1,4 +1,5 @@
 """MkDocs plugin for custom image and table captions."""
+
 from lxml import etree
 from mkdocs.config.defaults import MkDocsConfig
 from mkdocs.plugins import BasePlugin, event_priority