Skip to content

Commit

Permalink
Allow markdown syntax within captions (#14)
Browse files Browse the repository at this point in the history
This commit changes the logic of the preprocessing that wraps the
table caption such that the caption is self is still plain markdown.
This causes the mkdocs parser to convert the caption into html by
its own, which allows the usage of markown in captions.

The logic is adapted in such a way that a custom html element is
created both befor and after the caption. The caption itself is
untouched. In the post processing the two added tags can be used
to find the start and end of the caption easily.
  • Loading branch information
tobiasah committed Feb 16, 2024
1 parent 886f64d commit 931d951
Show file tree
Hide file tree
Showing 14 changed files with 422 additions and 206 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## Version 1.0.1

* Allow markdown syntax within the caption element.

## Version 1.0.0

* Prevent conversion of emojis into figure elements with captions.
Expand Down
2 changes: 1 addition & 1 deletion demo/docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Inline images should not be converted ![Hello](assets/demo.png){width="30"}, eve

## Tables

Table: Table caption
Table: Table **bold** caption

| My | Table |
| - | - |
Expand Down
4 changes: 4 additions & 0 deletions docs/quick_start.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ With the plugin enabled, one can now use an easy and descriptive syntax to add
captions to figures and tables. The captions are automatically numbered and
can be referenced in the text.
!!! tip
The caption text is converted by mkdocs itself. This means that technically
a caption can contain the same things than any other text.
=== "Markdown"
Expand Down
1 change: 1 addition & 0 deletions src/mkdocs_caption/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""The configuration options for the Caption plugin."""

from __future__ import annotations

import typing as t
Expand Down
59 changes: 33 additions & 26 deletions src/mkdocs_caption/custom.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
"""Custom caption handling."""

from __future__ import annotations

from typing import TYPE_CHECKING

from lxml import etree

from mkdocs_caption.helper import TreeElement, update_references, wrap_md_captions
from mkdocs_caption.helper import (
CaptionInfo,
TreeElement,
iter_caption_elements,
update_references,
wrap_md_captions,
)

if TYPE_CHECKING:
from mkdocs_caption.config import IdentifierCaption
Expand Down Expand Up @@ -47,11 +54,10 @@ def preprocess_markdown(


def _wrap_in_figure(
caption_element: TreeElement,
caption_info: CaptionInfo,
*,
tree: TreeElement,
index: int,
identifier: str,
config: IdentifierCaption,
logger: PluginLogger,
) -> None:
Expand All @@ -62,52 +68,55 @@ def _wrap_in_figure(
with a custom caption based on the caption element, index, and identifier.
Args:
caption_element: The caption element to use for the caption text.
caption_info: The caption info.
tree: The root element of the XML tree.
index: The index of the figure element.
identifier: The identifier of the custom caption.
config: The plugin configuration.
logger: Current plugin logger.
"""
a_wrapper: TreeElement = caption_element.getparent() # type: ignore[assignment]
target_element = a_wrapper.getnext()
if target_element is None:
if caption_info.target_element is None:
logger.error("Custom caption does not semm to have a element that follows it")
return

figure_element = etree.Element("figure", None, None)
figure_element.attrib.update(caption_element.attrib)
figure_element.attrib.update(caption_info.attributes)
# wrap target element
target_element.addprevious(figure_element)
caption_info.target_element.addprevious(figure_element)

# add caption
caption_prefix = config.get_caption_prefix(identifier=identifier, index=index)
caption_prefix = config.get_caption_prefix(
identifier=caption_info.identifier,
index=index,
)
try:
fig_caption_element = etree.fromstring(
f"<figcaption>{caption_prefix} {caption_element.text}</figcaption>",
f"<figcaption>{caption_prefix} {caption_info.caption}</figcaption>",
)
except etree.XMLSyntaxError:
logger.error("Invalid XML in caption: %s", caption_element.text)
logger.error(
'Invalid XML in caption: <caption style="caption-side:%s">%s %s</caption>',
config.position,
caption_prefix,
caption_info.caption,
)
return
if config.position == "top":
figure_element.append(fig_caption_element)
figure_element.append(target_element)
figure_element.append(caption_info.target_element)
else:
figure_element.append(target_element)
figure_element.append(caption_info.target_element)
figure_element.append(fig_caption_element)

figure_id = caption_element.attrib.get(
figure_id = caption_info.attributes.get(
"id",
config.get_default_id(identifier=identifier, index=index),
config.get_default_id(identifier=caption_info.identifier, index=index),
)
figure_element.attrib["id"] = figure_id
update_references(
tree,
figure_id,
config.get_reference_text(identifier=identifier, index=index),
config.get_reference_text(identifier=caption_info.identifier, index=index),
)
a_wrapper.remove(caption_element)
a_wrapper.getparent().remove(a_wrapper) # type: ignore[union-attr]


def postprocess_html(
Expand All @@ -129,15 +138,13 @@ def postprocess_html(
if not config.enable:
return
index_dict: dict[str, int] = {}
for custom_caption in tree.xpath(f"//{CAPTION_TAG}"):
identifier = custom_caption.attrib.pop("identifier")
index = index_dict.get(identifier, config.start_index)
index_dict[identifier] = index + config.increment_index
for caption_info in iter_caption_elements(CAPTION_TAG, tree):
index = index_dict.get(caption_info.identifier, config.start_index)
index_dict[caption_info.identifier] = index + config.increment_index
_wrap_in_figure(
custom_caption,
caption_info,
tree=tree,
index=index,
identifier=identifier,
config=config,
logger=logger,
)
80 changes: 74 additions & 6 deletions src/mkdocs_caption/helper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""General helper functions for the mkdocs-caption plugin."""

from __future__ import annotations

import re
from dataclasses import dataclass
from typing import Iterator

from lxml import etree

Expand Down Expand Up @@ -74,7 +77,7 @@ def _escape_md_caption(match: re.Match, *, target_tag: str) -> str:
options = _parse_extended_markdown(match.group(5))
return str(
f'\n{prefix}<{target_tag} identifier="{identifier}"'
f"{options}>{caption}</{target_tag}>\n\n",
f"{options}>\n\n{prefix}{caption}\n\n{prefix}<{target_tag}-end>\n\n",
)


Expand Down Expand Up @@ -108,13 +111,78 @@ def wrap_md_captions(
)


def sanitize_caption(caption: str | None) -> str:
"""Sanitize a caption to be used as an id.
def create_caption_str(caption_text_elements: list[TreeElement]) -> str:
"""Create a htaml string from a list of caption text elements.
This function takes a list of caption text elements and returns a string
with the caption text.
Args:
caption: The caption to sanatize.
caption_text_elements: The list of caption text elements.
Returns:
The sanitized caption.
A string with the caption text.
"""
caption_text = ""
for text_element in caption_text_elements:
caption_text += etree.tostring(
text_element,
encoding="unicode",
method="html",
).strip("\n")
if len(caption_text_elements) == 1:
caption_text = caption_text.strip("<p>").strip("</p>")
return caption_text


@dataclass
class CaptionInfo:
"""Dataclass to store information about a caption."""

target_element: TreeElement
attributes: dict[str, str]
caption: str
identifier: str


def iter_caption_elements(tag: str, tree: TreeElement) -> Iterator[CaptionInfo]:
"""Iterate over all caption elements in an XML tree.
This function takes an XML tree and iterates over all caption elements
in the tree. It yields a tuple with the target element, the attributes
of the caption element, the caption text, and the identifier of the
caption element.
Args:
tag: The tag of the caption elements.
tree: The XML tree to iterate over.
Yields:
A tuple with the target element, the attributes of the caption
element, the caption text, and the identifier of the caption element.
"""
return caption.replace(" & ", " &amp; ") if caption else ""
for caption_element in tree.xpath(f"//{tag}"):
a_wrapper = caption_element.getparent()
caption_text_elements = []
a_wrapper_end = a_wrapper.getnext()
while a_wrapper_end is not None and not a_wrapper_end.xpath(f"{tag}-end"):
caption_text_elements.append(a_wrapper_end)
a_wrapper_end = a_wrapper_end.getnext()

target_element = a_wrapper_end.getnext()
# unused attribute identifier
identifier = caption_element.attrib.pop("identifier")

try:
yield CaptionInfo(
target_element=target_element,
attributes=caption_element.attrib,
caption=create_caption_str(caption_text_elements),
identifier=identifier,
)
finally:
a_wrapper.remove(caption_element)
a_wrapper.getparent().remove(a_wrapper)
for caption_text_element in caption_text_elements:
caption_text_element.getparent().remove(caption_text_element)
a_wrapper_end.getparent().remove(a_wrapper_end)
29 changes: 15 additions & 14 deletions src/mkdocs_caption/image.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
"""Handle image related captioning."""

from __future__ import annotations

from typing import TYPE_CHECKING

from lxml import etree

from mkdocs_caption.helper import TreeElement, update_references, wrap_md_captions
from mkdocs_caption.helper import (
TreeElement,
iter_caption_elements,
update_references,
wrap_md_captions,
)

if TYPE_CHECKING:
from mkdocs_caption.config import FigureCaption
Expand Down Expand Up @@ -159,27 +165,22 @@ def postprocess_html(

# Handle additional figure caption elements
custom_figure_attrib = {}
for custom_caption in tree.xpath(f"//{IMG_CAPTION_TAG}"):
a_wrapper = custom_caption.getparent()
for caption_info in iter_caption_elements(IMG_CAPTION_TAG, tree):
try:
next_element = a_wrapper.getnext()
target_element = (
next_element
if next_element.tag == "img"
else next_element.xpath(".//img")[0]
caption_info.target_element
if caption_info.target_element.tag == "img"
else caption_info.target_element.xpath(".//img")[0]
)
except IndexError:
logger.error(
"Figure caption must be followed by a img element. Skipping: %s",
custom_caption.text,
caption_info.caption,
)
continue
target_element.attrib["title"] = custom_caption.text
# unused attribute identifier
custom_caption.attrib.pop("identifier")
custom_figure_attrib[target_element] = custom_caption.attrib
a_wrapper.remove(custom_caption)
a_wrapper.getparent().remove(a_wrapper)
target_element.attrib["title"] = caption_info.caption
custom_figure_attrib[target_element] = caption_info.attributes

# Iterate through all images and wrap them in a figure element if requested
index = config.start_index
for img_element in tree.xpath("//p/a/img|//p/img"):
Expand Down
1 change: 1 addition & 0 deletions src/mkdocs_caption/logger.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Setup custom logger that is mkdocs compatible."""

from __future__ import annotations

import logging
Expand Down
1 change: 1 addition & 0 deletions src/mkdocs_caption/plugin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""MkDocs plugin for custom image and table captions."""

from lxml import etree
from mkdocs.config.defaults import MkDocsConfig
from mkdocs.plugins import BasePlugin, event_priority
Expand Down
Loading

0 comments on commit 931d951

Please sign in to comment.