Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions admin/base/templatetags/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from django.utils.safestring import mark_safe
import json
from django.utils.translation import ugettext_lazy as _
from osf.utils.text_rendering import osf_urlize as custom_osf_urlize

register = template.Library()

Expand All @@ -13,3 +14,7 @@ def jsonify(o):
@register.filter
def transValue(value1):
return _(str(value1))

@register.filter
def osf_urlize(text):
return custom_osf_urlize(text)
4 changes: 2 additions & 2 deletions admin/templates/maintenance/display.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{% load render_bundle from webpack_loader %}
{% load spam_extras %}
{% load static %}
{% load render_bundle from webpack_loader %}
{% load filters %}
{% block title %}
<title>{% trans "Maintenance State" %}</title>
{% endblock title %}
Expand All @@ -26,7 +26,7 @@ <h4>{% trans "Current alert:" %}</h4>
<td>{{ current_alert.start }} - {{ current_alert.end }} UTC</td>
<td>
{% if current_alert.message %}
{{ current_alert.message }}
{{ current_alert.message|osf_urlize|linebreaksbr }}
{% else %}
{% trans "The site will undergo maintenance between &lt;localized start time&gt;-&lt;localized end time&gt;. Thank you for your patience." %}
{% endif %}
Expand Down
113 changes: 113 additions & 0 deletions osf/utils/text_rendering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from django.template.defaultfilters import linebreaksbr
from django.utils.html import escape
from django.utils.safestring import mark_safe
import re
from urllib.parse import urlparse

_LEADING_BRACKETS = frozenset('([{【(「『〔')
_CLOSER_MAP = {')': '(', ']': '[', '}': '{', ')': '(', '】': '【', '」': '「', '』': '『', '〕': '〔'}
_TRAILING_PUNCT = frozenset('.,!?、。')

# Matches URL portion only — stops at whitespace, HTML chars, and Japanese brackets
_URL_IN_TEXT = re.compile(r'https?://[^\s<>"\'【(「『〔】)」』〕、。]*')


def is_valid_domain(host: str) -> bool:
if not host:
return False
if not re.match(r'^[a-zA-Z0-9.-]+$', host):
return False
if '..' in host or '.' not in host:
return False

labels = host.split('.')
for label in labels:
if label.startswith('-') or label.endswith('-') or not label:
return False

tld = labels[-1]
if not re.match(r'^[a-zA-Z]{2,}$', tld):
return False

return True


def _trim_edges(s: str):
core = s
leading = ''
trailing = ''

while core and core[0] in _LEADING_BRACKETS:
leading += core[0]
core = core[1:]

changed = True
while changed and core:
changed = False
if core[-1] in _TRAILING_PUNCT:
trailing = core[-1] + trailing
core = core[:-1]
changed = True
elif core[-1] in _CLOSER_MAP:
closer = core[-1]
opener = _CLOSER_MAP[closer]
if core.count(opener) < core.count(closer):
trailing = closer + trailing
core = core[:-1]
changed = True

return leading, core, trailing


def osf_urlize(text: str) -> str:
if not text:
return ''

result = []
for part in re.split(r'(\s+)', text):
if not part:
continue
if re.match(r'^\s+$', part):
result.append(part)
continue

for chunk in re.split(r'([<>"])', part):
if not chunk:
continue
if chunk in ('<', '>', '"'):
result.append(escape(chunk))
continue

leading, core, trailing = _trim_edges(chunk)

m = _URL_IN_TEXT.search(core)
if m:
text_before = core[:m.start()]
url_raw = m.group(0)
text_after = core[m.start() + len(url_raw):]

# Strip trailing ASCII punctuation and unbalanced brackets from URL
_, url_core, url_trailing = _trim_edges(url_raw)
text_after = url_trailing + text_after

try:
host = urlparse(url_core).hostname
if host and is_valid_domain(host):
result.append(
escape(leading + text_before) +
f'<a href="{escape(url_core)}" rel="nofollow">{escape(url_core)}</a>' +
escape(text_after + trailing)
)
continue
except Exception:
pass

result.append(escape(leading + core + trailing))

return mark_safe(''.join(result))


def render_text(text: str) -> str:
if not text:
return ''
return linebreaksbr(osf_urlize(text))
100 changes: 100 additions & 0 deletions tests/test_text_rendering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import unittest
from osf.utils.text_rendering import render_text, osf_urlize


class TestOsfUrlize(unittest.TestCase):

def test_https_url_linkified(self):
result = osf_urlize('Visit https://example.com')
assert '<a href="https://example.com"' in result
assert 'rel="nofollow"' in result

def test_http_url_linkified(self):
result = osf_urlize('Visit http://example.com')
assert '<a href="http://example.com"' in result

def test_bare_domain_not_linkified(self):
result = osf_urlize('Visit google.com')
assert '<a' not in result

def test_www_domain_not_linkified(self):
result = osf_urlize('Visit www.google.com')
assert '<a' not in result

def test_ftp_url_not_linkified(self):
result = osf_urlize('ftp://example.com')
assert '<a' not in result

def test_html_escaped(self):
result = osf_urlize('<script>alert("xss")</script>')
assert '<script>' not in result

def test_url_in_square_brackets_linkified(self):
result = osf_urlize('[https://example.com]')
assert '<a href="https://example.com"' in result
assert 'https://example.com]' not in result

def test_url_in_parentheses_linkified(self):
result = osf_urlize('(https://example.com)')
assert '<a href="https://example.com"' in result
assert 'https://example.com)' not in result

def test_url_with_trailing_slash_in_square_brackets(self):
result = osf_urlize('[https://example.com/]')
assert '<a href="https://example.com/"' in result
assert 'https://example.com/]' not in result

def test_url_with_trailing_slash_in_parentheses(self):
result = osf_urlize('(https://example.com/)')
assert '<a href="https://example.com/"' in result
assert 'https://example.com/)' not in result

def test_trailing_dots(self):
result = osf_urlize('https://google.com...')
assert '<a href="https://google.com"' in result
assert 'https://google.com...' not in result

def test_url_in_parens_with_trailing_period(self):
result = osf_urlize('(https://google.com).')
assert '<a href="https://google.com"' in result
assert 'https://google.com).' not in result

def test_trailing_comma(self):
result = osf_urlize('https://google.com,')
assert '<a href="https://google.com"' in result
assert 'https://google.com,' not in result

def test_url_in_double_quotes(self):
result = osf_urlize('"https://google.com"')
assert '<a href="https://google.com"' in result
assert 'https://google.com&quot;' not in result


class TestRenderText(unittest.TestCase):

def test_empty_string(self):
assert render_text('') == ''

def test_none_input(self):
assert render_text(None) == ''

def test_linebreaks(self):
result = render_text('line1\nline2')
assert '<br' in result

def test_https_url_linkified(self):
result = render_text('Visit https://abc-test.com')
assert '<a href="https://abc-test.com"' in result

def test_bare_domain_not_linkified(self):
result = render_text('Visit google.com')
assert '<a' not in result

def test_plain_text_not_linkified(self):
result = render_text('version 1.0 etc. Co.,Ltd.')
assert '<a' not in result

def test_xss_not_rendered(self):
result = render_text('<script>alert("xss")</script>')
assert '<script>' not in result

3 changes: 2 additions & 1 deletion website/templates/nav.mako
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
<%def name="nav(service_name, service_url, service_support_url, service_support_target='_self')">
<% from osf.utils.text_rendering import render_text %>
<link rel="stylesheet" href='/static/css/nav.css'>
<div class="osf-nav-wrapper">

Expand Down Expand Up @@ -139,7 +140,7 @@
<span aria-hidden="true">&times;</span></button>
<strong>${_('Notice:')}</strong>
% if maintenance['message']:
${maintenance['message']}
${render_text(maintenance['message']) | n}
% else:
${_('The site will undergo maintenance between <span id="maintenanceTime"></span>.') | n}
${_("Thank you for your patience.")}
Expand Down
Loading