diff --git a/admin/base/templatetags/filters.py b/admin/base/templatetags/filters.py index 5821d331a76..40f4ae805e5 100644 --- a/admin/base/templatetags/filters.py +++ b/admin/base/templatetags/filters.py @@ -3,6 +3,7 @@ from django.utils.safestring import mark_safe import json from django.utils.translation import ugettext_lazy as _ +from osf.utils.text_rendering import osf_urlize as custom_osf_urlize register = template.Library() @@ -13,3 +14,7 @@ def jsonify(o): @register.filter def transValue(value1): return _(str(value1)) + +@register.filter +def osf_urlize(text): + return custom_osf_urlize(text) diff --git a/admin/templates/maintenance/display.html b/admin/templates/maintenance/display.html index 9d5cde65cd0..2f278c63dde 100644 --- a/admin/templates/maintenance/display.html +++ b/admin/templates/maintenance/display.html @@ -3,7 +3,7 @@ {% load render_bundle from webpack_loader %} {% load spam_extras %} {% load static %} -{% load render_bundle from webpack_loader %} +{% load filters %} {% block title %} {% trans "Maintenance State" %} {% endblock title %} @@ -26,7 +26,7 @@

{% trans "Current alert:" %}

{{ current_alert.start }} - {{ current_alert.end }} UTC {% if current_alert.message %} - {{ current_alert.message }} + {{ current_alert.message|osf_urlize|linebreaksbr }} {% else %} {% trans "The site will undergo maintenance between <localized start time>-<localized end time>. Thank you for your patience." %} {% endif %} diff --git a/osf/utils/text_rendering.py b/osf/utils/text_rendering.py new file mode 100644 index 00000000000..ffa2bed0327 --- /dev/null +++ b/osf/utils/text_rendering.py @@ -0,0 +1,113 @@ +from django.template.defaultfilters import linebreaksbr +from django.utils.html import escape +from django.utils.safestring import mark_safe +import re +from urllib.parse import urlparse + +_LEADING_BRACKETS = frozenset('([{【(「『〔') +_CLOSER_MAP = {')': '(', ']': '[', '}': '{', ')': '(', '】': '【', '」': '「', '』': '『', '〕': '〔'} +_TRAILING_PUNCT = frozenset('.,!?、。') + +# Matches URL portion only — stops at whitespace, HTML chars, and Japanese brackets +_URL_IN_TEXT = re.compile(r'https?://[^\s<>"\'【(「『〔】)」』〕、。]*') + + +def is_valid_domain(host: str) -> bool: + if not host: + return False + if not re.match(r'^[a-zA-Z0-9.-]+$', host): + return False + if '..' in host or '.' not in host: + return False + + labels = host.split('.') + for label in labels: + if label.startswith('-') or label.endswith('-') or not label: + return False + + tld = labels[-1] + if not re.match(r'^[a-zA-Z]{2,}$', tld): + return False + + return True + + +def _trim_edges(s: str): + core = s + leading = '' + trailing = '' + + while core and core[0] in _LEADING_BRACKETS: + leading += core[0] + core = core[1:] + + changed = True + while changed and core: + changed = False + if core[-1] in _TRAILING_PUNCT: + trailing = core[-1] + trailing + core = core[:-1] + changed = True + elif core[-1] in _CLOSER_MAP: + closer = core[-1] + opener = _CLOSER_MAP[closer] + if core.count(opener) < core.count(closer): + trailing = closer + trailing + core = core[:-1] + changed = True + + return leading, core, trailing + + +def osf_urlize(text: str) -> str: + if not text: + return '' + + result = [] + for part in re.split(r'(\s+)', text): + if not part: + continue + if re.match(r'^\s+$', part): + result.append(part) + continue + + for chunk in re.split(r'([<>"])', part): + if not chunk: + continue + if chunk in ('<', '>', '"'): + result.append(escape(chunk)) + continue + + leading, core, trailing = _trim_edges(chunk) + + m = _URL_IN_TEXT.search(core) + if m: + text_before = core[:m.start()] + url_raw = m.group(0) + text_after = core[m.start() + len(url_raw):] + + # Strip trailing ASCII punctuation and unbalanced brackets from URL + _, url_core, url_trailing = _trim_edges(url_raw) + text_after = url_trailing + text_after + + try: + host = urlparse(url_core).hostname + if host and is_valid_domain(host): + result.append( + escape(leading + text_before) + + f'{escape(url_core)}' + + escape(text_after + trailing) + ) + continue + except Exception: + pass + + result.append(escape(leading + core + trailing)) + + return mark_safe(''.join(result)) + + +def render_text(text: str) -> str: + if not text: + return '' + return linebreaksbr(osf_urlize(text)) diff --git a/tests/test_text_rendering.py b/tests/test_text_rendering.py new file mode 100644 index 00000000000..0474c279ea2 --- /dev/null +++ b/tests/test_text_rendering.py @@ -0,0 +1,100 @@ +import unittest +from osf.utils.text_rendering import render_text, osf_urlize + + +class TestOsfUrlize(unittest.TestCase): + + def test_https_url_linkified(self): + result = osf_urlize('Visit https://example.com') + assert 'alert("xss")') + assert '') + assert '