diff --git a/osv/sources.py b/osv/sources.py
index f6fe6e98156..b94f7b05064 100644
--- a/osv/sources.py
+++ b/osv/sources.py
@@ -17,6 +17,7 @@
import hashlib
import logging
import os
+import re
import jsonschema
import pygit2
@@ -178,6 +179,11 @@ def parse_vulnerability_from_dict(data, key_path=None, strict=False):
if not vulnerability.id:
raise ValueError('Missing id field. Invalid vulnerability.')
+ if vulnerability.summary:
+ vulnerability.summary = _sanitize_string(vulnerability.summary)
+ if vulnerability.details:
+ vulnerability.details = _sanitize_string(vulnerability.details)
+
return vulnerability
@@ -230,6 +236,12 @@ def _write_vulnerability_dict(data, output_path,
os.utime(output_path, (modified_date_timestamp, modified_date_timestamp))
+def _sanitize_string(text):
+ """Sanitize string by removing anchor tags."""
+ # Remove text and keep text.
+ return re.sub(r']*>(.*?)', r'\1', text, flags=re.IGNORECASE | re.DOTALL)
+
+
def write_vulnerability(vulnerability: vulnerability_pb2.Vulnerability,
output_path,
key_path=None):