-
-
Notifications
You must be signed in to change notification settings - Fork 258
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Magiclink support for custom autolinked references #2550
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -86,6 +86,15 @@ | |
# Internal mention patterns | ||
RE_INT_MENTIONS = r'(?P<mention>(?<![a-zA-Z])@{})\b' | ||
|
||
# Custom references | ||
RE_CUSTOM_REFS_TEMPLATE = r'''(?xi) | ||
(?:(?<=\b)|(?<=_))(?: | ||
(?P<prefix>%s) # prefix; to be injected | ||
(?P<id>\w+) # identifier | ||
) | ||
''' | ||
|
||
|
||
def create_ext_mentions(name, provider_type): | ||
"""Create external mentions by provider type.""" | ||
|
||
|
@@ -1036,6 +1045,33 @@ def handleMatch(self, m, data): | |
return el, m.start(0), m.end(0) | ||
|
||
|
||
class MagiclinkCustomRefPattern(InlineProcessor): | ||
"""Return a link Element given a custom prefix.""" | ||
|
||
ANCESTOR_EXCLUDES = ('a',) | ||
|
||
def __init__(self, pattern, md, shortname, target_url): | ||
"""Initialize.""" | ||
|
||
self.shortname = shortname | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: better term than "shortname". This is the lower-cased version of the prefix that we inject into a class name and inject into the name we use when registering the inlineProcessor. |
||
self.target_url = target_url | ||
InlineProcessor.__init__(self, pattern, md) | ||
|
||
def handleMatch(self, m, data): | ||
"""Return link.""" | ||
|
||
el = etree.Element("a") | ||
prefix = m.group(1) | ||
identifier = m.group(2) | ||
el.set('href', self.target_url.replace('<id>', identifier)) | ||
el.text = prefix + identifier | ||
|
||
el.set('class', f'magiclink magiclink-customref magiclink-customref-{self.shortname}') | ||
|
||
return el, m.start(0), m.end(0) | ||
|
||
|
||
|
||
class MagiclinkExtension(Extension): | ||
"""Add auto link and link transformation extensions to Markdown class.""" | ||
|
||
|
@@ -1097,6 +1133,10 @@ def __init__(self, *args, **kwargs): | |
'custom': [ | ||
{}, | ||
"Custom repositories hosts - Default {}" | ||
], | ||
'custom_refs': [ | ||
[], | ||
"Custom reference patterns - Default []" | ||
] | ||
} | ||
super().__init__(*args, **kwargs) | ||
|
@@ -1115,6 +1155,17 @@ def setup_autolinks(self, md, config): | |
|
||
md.inlinePatterns.register(MagiclinkMailPattern(RE_MAIL, md), "magic-mail", 84.9) | ||
|
||
def setup_custom_refs(self, md, config): | ||
"""Setup custom refs.""" | ||
|
||
for custom_ref_config in config.get('custom_refs', []): | ||
ref_prefix = custom_ref_config['ref_prefix'] | ||
target_url = custom_ref_config['target_url'] | ||
pattern_re = RE_CUSTOM_REFS_TEMPLATE % ref_prefix | ||
shortname = re.sub(r'\W+', '', ref_prefix).lower() | ||
pattern = MagiclinkCustomRefPattern(pattern_re, md, shortname, target_url) | ||
md.inlinePatterns.register(pattern, "customref-" + shortname, 120) | ||
|
||
def setup_shorthand(self, md): | ||
"""Setup shorthand.""" | ||
|
||
|
@@ -1308,6 +1359,7 @@ def extendMarkdown(self, md): | |
self.provider = 'github' | ||
|
||
self.setup_autolinks(md, config) | ||
self.setup_custom_refs(md, config) | ||
|
||
if self.git_short or self.social_short: | ||
self.ext_mentions = [] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -399,3 +399,65 @@ def test_deprecated_twitter_shortener(self): | |
self.assertTrue(len(w) == 1) | ||
self.assertTrue(issubclass(w[-1].category, DeprecationWarning)) | ||
self.assertTrue(found) | ||
|
||
class TestMagicLinkCustomRefs(util.MdCase): | ||
"""Test cases for custom references.""" | ||
|
||
extension = [ | ||
'pymdownx.magiclink' | ||
] | ||
|
||
extension_configs = { | ||
'pymdownx.magiclink': { | ||
'custom_refs': [ | ||
{ | ||
'ref_prefix': 'TICKET-', | ||
'target_url': 'https://ticket.test.com/TICKET-<id>' | ||
}, | ||
{ | ||
'ref_prefix': 'go/', | ||
'target_url': 'https://go.test.com/<id>' | ||
}, | ||
] | ||
} | ||
} | ||
|
||
def test_numeric(self): | ||
"""Test numeric identifiers.""" | ||
|
||
self.check_markdown( | ||
'TICKET-123', | ||
'<p><a class="magiclink magiclink-customref magiclink-customref-ticket" href="https://ticket.test.com/TICKET-123">TICKET-123</a></p>' # noqa: E501 | ||
) | ||
|
||
def test_word_boundary(self): | ||
"""Test numeric identifiers.""" | ||
|
||
self.check_markdown( | ||
'Hello, TICKET-123!', | ||
'<p>Hello, <a class="magiclink magiclink-customref magiclink-customref-ticket" href="https://ticket.test.com/TICKET-123">TICKET-123</a>!</p>' # noqa: E501 | ||
) | ||
|
||
def test_alphanumeric(self): | ||
"""Test alphanumeric identifiers.""" | ||
|
||
self.check_markdown( | ||
'go/abc123', | ||
'<p><a class="magiclink magiclink-customref magiclink-customref-go" href="https://go.test.com/abc123">go/abc123</a></p>' # noqa: E501 | ||
) | ||
|
||
def test_underscores(self): | ||
"""Test underscore counts as a word character.""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We originally discussed supporting "alphanumeric" identifiers, but These two test cases are validating "go/ link"-style links which would benefit from supporting So, we could consider expanding the set to "alphanumeric characters, plus Or, we could make this feature significantly more generic and powerful by letting users specify regex patterns themselves. And we'd rely on documentation to give users solid examples for the simpler cases. I'm interested in your perspective on the user base here and whether they'd generally prefer to have the power+responsibility of a more generic interface. GitHub's autolink reference feature needs to run whenever rendering the UI, but There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be more specific, when I said alphanumeric, I was thinking Now that I've had some more time to think, I do think in general ( I think each proposed ID should be validated with some regex such as this.
I want to be careful here as we are now running into scope increase. The original feature discussed was simply a method of providing a short name to translate into a link, now we are talking about mirroring various different issue tracking systems, and I'm not sure that is something I want or am willing to do. I'm not saying there isn't value in such suggestions, but at some point, I wonder if a user has more complicated auto-linking requirements if it would be better that they just write a specialized plugin at that point. Regardless, for now, I'd like to keep the scope reasonable or we need to go back and decide what it is we really want to do so that we can approach it the right way. Depending on what the end goal is, it may greatly affect how I'd like to go about the implementation. If there is a bigger vision here we need to take a step back and define what our goals are, and maybe this PR is not the answer. Maybe we need a different approach. If we want a more robust system for various repos that is similar to our current support, then we may need to consider refactoring and generalizing the existing system to make GitHub, Bitbucket, and GitLab handling even more generic so that Jira or any other system can be plugged in (within reason). Different systems have different username, project, and issue requirements. Some may be more difficult to identify; as patterns become more complex, so do false-positive matching and conflicts with other matching. So there are going to be some restrictions of what we can reasonably do in a generic sense. Maybe we can't always support every kind of link like we do for GitHub in Jira (or some other service). For instance, some systems may use the user email as the identifier in the URL, but may also display the user name in other aspects. Without the server making such translations and us tapping into their API, that is impossible for us. Also, we already translate emails into email links. When I was focused on the 3 big free services, the intent was that those would be all I was supporting to aid the majority of the open-source user base. They were generally kind of similar and monkied each other in various ways. Corporate installs of these often are allowed more freedom to do things a bit differently. I know our corporate username requirements for Bitbucket is different than that of the open-source version of the site. When I developed this extension, I wasn't targeting the corporate world specifically.
My thoughts are pulled between two things. I do like to give the user power but I also like to keep my maintenance cost (in regards to time) at a reasonable level as well. So my decisions are often influenced by these two things. In short, if there is a bigger vision beyond a simple short name to URL mapping, let's iron out what it is we want and are willing to do so that we can decide on a sustainable approach. If what we are doing now is too limited, we need to take a step back and reconsider now before we implement something that we want to rewrite later. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is great feedback and I appreciate the depth.
I will think about this. I agree with you that we either proceed with a simple feature that allows only There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do think it's worth moving forward with a fairly restrictive model of user-defined autolink references. I've updated the feature name here to "simplerefs" to better capture the spirit of this model.
I'm not entirely clear whether you're referring to the For For captured identifiers, we need to allow pure numeric values to support the Jira case, so can't require it starts with a letter. So this could be straight alphanumeric There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Then let's take a step back and let's try and create a proposal spec of what it is we want. My intention was never to support every repo in existence, and the plugin is fairly complicated as is. So when accepting new features, I have to consider that I will be maintaining the solution long after the PR is developed and accepted. Or maybe I am misunderstanding what you mean by this statement?
Numbers are captured separately from prefixes. Numbers are numbers and I'm not talking about numbers. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I imagine something like this: import re
RE_VALIDATE = re.compile(r'^[a-z][-a-z0-9_]+$')
patterns = [
'test1',
'test-2',
'-test3',
'test4-'
]
validated = '|'.join(re.escape(p) for p in patterns if RE_VALIDATE.match(p))
pattern = re.compile(fr"\b({validated})([0-9]+)\b")
BUFFER = """
This is a test to find things like test-21234, test11234, and test4-1234, but not -test31234.
"""
for found in pattern.findall(BUFFER):
print(found)
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are a number of questions that come to mind looking at this. Do we want normalize the final results to all uppercase? I know we talked about matching them case insensitive, but should we normalize case on display in the final document? Should we have an implied There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No, I think we should preserve the casing of the text as written in the document. Assuming uppercase feels pretty Jira-specific.
No, I think ending punctuation like that should be part of the configured prefix, as it is with GitHub's autolink references interface.
Lol. I think we should call the captured "number" an "identifier" (so So we end up capturing text in the form It might make sense for me to go ahead and add proposed documentation in this PR and have that serve as the proposal spec. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'm okay with that as I want to understand what the actual proposal is opposed to seeing evolve through iterations of commits. |
||
|
||
self.check_markdown( | ||
'go/abc_123', | ||
'<p><a class="magiclink magiclink-customref magiclink-customref-go" href="https://go.test.com/abc_123">go/abc_123</a></p>' # noqa: E501 | ||
) | ||
|
||
def test_hyphen(self): | ||
"""Test hyphen breaks matching.""" | ||
|
||
self.check_markdown( | ||
'go/abc-123', | ||
'<p><a class="magiclink magiclink-customref magiclink-customref-go" href="https://go.test.com/abc">go/abc</a>-123</p>' # noqa: E501 | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TODO: better docstring here