From da252021206c06495d714fbca2d7f9eb49e0bede Mon Sep 17 00:00:00 2001 From: biplavbarua Date: Thu, 25 Dec 2025 15:35:13 +0530 Subject: [PATCH] fix(importer): sanitize HTML anchor tags from vulnerability details --- osv/sources.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/osv/sources.py b/osv/sources.py index f6fe6e98156..b94f7b05064 100644 --- a/osv/sources.py +++ b/osv/sources.py @@ -17,6 +17,7 @@ import hashlib import logging import os +import re import jsonschema import pygit2 @@ -178,6 +179,11 @@ def parse_vulnerability_from_dict(data, key_path=None, strict=False): if not vulnerability.id: raise ValueError('Missing id field. Invalid vulnerability.') + if vulnerability.summary: + vulnerability.summary = _sanitize_string(vulnerability.summary) + if vulnerability.details: + vulnerability.details = _sanitize_string(vulnerability.details) + return vulnerability @@ -230,6 +236,12 @@ def _write_vulnerability_dict(data, output_path, os.utime(output_path, (modified_date_timestamp, modified_date_timestamp)) +def _sanitize_string(text): + """Sanitize string by removing anchor tags.""" + # Remove text and keep text. + return re.sub(r']*>(.*?)', r'\1', text, flags=re.IGNORECASE | re.DOTALL) + + def write_vulnerability(vulnerability: vulnerability_pb2.Vulnerability, output_path, key_path=None):