From ef17f0a74f5ecff219846265593cda91a5109623 Mon Sep 17 00:00:00 2001 From: Manuel Bucher Date: Tue, 14 Apr 2026 01:03:44 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20FIX:=20Correctly=20encode=20"&"?= =?UTF-8?q?=20in=20Markdown=20URLs=20by=20not=20HTML-escaping=20refuri?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `escapeHtml` was called on the URL before storing it in the `refuri` attribute of a reference node, converting `&` to `&`. This caused double-escaping when Sphinx's HTML writer later escaped the `&` in `&` to produce `&amp;` in the final `href` attribute, breaking URLs with query parameters. The `refuri` attribute should hold the raw URL; HTML-escaping is the responsibility of the output writer. The other characters `escapeHtml` converts (`<`, `>`, `"`) are already percent-encoded by `normalizeLink` before reaching this point, so removing the call has no other effect. --- myst_parser/mdit_to_docutils/base.py | 3 +- .../fixtures/docutil_syntax_elements.md | 30 +++++++++++++++++++ .../sourcedirs/references/index.md | 2 ++ .../test_sphinx_builds/test_references.html | 5 ++++ .../test_references.resolved.xml | 3 ++ .../test_sphinx_builds/test_references.xml | 3 ++ 6 files changed, 44 insertions(+), 2 deletions(-) diff --git a/myst_parser/mdit_to_docutils/base.py b/myst_parser/mdit_to_docutils/base.py index 85d64be3..a247774e 100644 --- a/myst_parser/mdit_to_docutils/base.py +++ b/myst_parser/mdit_to_docutils/base.py @@ -31,7 +31,6 @@ from docutils.utils import Reporter, SystemMessage, new_document from docutils.utils.code_analyzer import Lexer, LexerError, NumberLines from markdown_it import MarkdownIt -from markdown_it.common.utils import escapeHtml from markdown_it.renderer import RendererProtocol from markdown_it.token import Token from markdown_it.tree import SyntaxTreeNode @@ -954,7 +953,7 @@ def render_link_url( if "classes" in conversion: ref_node["classes"].extend(conversion["classes"]) - ref_node["refuri"] = escapeHtml(uri) + ref_node["refuri"] = uri if implicit_text is not None: with self.current_node_context(ref_node, append=True): self.current_node.append(nodes.Text(implicit_text)) diff --git a/tests/test_renderers/fixtures/docutil_syntax_elements.md b/tests/test_renderers/fixtures/docutil_syntax_elements.md index 8c323b52..c2307953 100644 --- a/tests/test_renderers/fixtures/docutil_syntax_elements.md +++ b/tests/test_renderers/fixtures/docutil_syntax_elements.md @@ -747,3 +747,33 @@ a = 1 . + +URL with ampersand in query string +. +[link](https://example.com/search?q=foo&bar=baz) +. + + + + link +. + +URL with angle brackets (percent-encoded by normalizeLink, not HTML-escaped) +. +[link](https://example.com/pathbrackets) +. + + + + link +. + +URL with double quotes (percent-encoded by normalizeLink, not HTML-escaped) +. +[link](https://example.com/path"with"quotes) +. + + + + link +. diff --git a/tests/test_sphinx/sourcedirs/references/index.md b/tests/test_sphinx/sourcedirs/references/index.md index 5255b475..88bd5d8d 100644 --- a/tests/test_sphinx/sourcedirs/references/index.md +++ b/tests/test_sphinx/sourcedirs/references/index.md @@ -8,6 +8,8 @@ [nested *syntax*](https://example.com) +[query params](https://example.com?foo=bar&a=1) + [](title) [plain text](title) diff --git a/tests/test_sphinx/test_sphinx_builds/test_references.html b/tests/test_sphinx/test_sphinx_builds/test_references.html index 23da570c..c3dda3bf 100644 --- a/tests/test_sphinx/test_sphinx_builds/test_references.html +++ b/tests/test_sphinx/test_sphinx_builds/test_references.html @@ -33,6 +33,11 @@

+

+ + query params + +

diff --git a/tests/test_sphinx/test_sphinx_builds/test_references.resolved.xml b/tests/test_sphinx/test_sphinx_builds/test_references.resolved.xml index f832d049..4277ec6b 100644 --- a/tests/test_sphinx/test_sphinx_builds/test_references.resolved.xml +++ b/tests/test_sphinx/test_sphinx_builds/test_references.resolved.xml @@ -18,6 +18,9 @@ nested syntax + + + query params diff --git a/tests/test_sphinx/test_sphinx_builds/test_references.xml b/tests/test_sphinx/test_sphinx_builds/test_references.xml index 4968ca2f..a3b74005 100644 --- a/tests/test_sphinx/test_sphinx_builds/test_references.xml +++ b/tests/test_sphinx/test_sphinx_builds/test_references.xml @@ -18,6 +18,9 @@ nested syntax + + + query params