diff --git a/gcp/workers/importer/importer.py b/gcp/workers/importer/importer.py index e6525647b17..19ea4a4eda5 100755 --- a/gcp/workers/importer/importer.py +++ b/gcp/workers/importer/importer.py @@ -246,7 +246,7 @@ def _infer_id_from_invalid_data(self, name: str, content: bytes) -> str: extension = os.path.splitext(name)[1] try: vulns = osv.parse_vulnerabilities_from_data( - content, extension, strict=False) + content, extension, strict=False, source_name=name) if vulns: return vulns[0].id except RuntimeError: @@ -435,7 +435,8 @@ def _vuln_ids_from_gcs_blob(self, client: storage.Client, vulns = osv.parse_vulnerabilities_from_data( blob_bytes, os.path.splitext(blob.name)[1], - strict=source_repo.strict_validation and self._strict_validation) + strict=source_repo.strict_validation and self._strict_validation, + source_name=blob.name) for vuln in vulns: vuln_ids.append(vuln.id) return vuln_ids @@ -502,7 +503,8 @@ def _convert_blob_to_vuln( vulns = osv.parse_vulnerabilities_from_data( blob_bytes, os.path.splitext(blob.name)[1], - strict=self._strict_validation) + strict=self._strict_validation, + source_name=blob.name) # TODO(andrewpollock): integrate with linter here. diff --git a/gcp/workers/worker/worker.py b/gcp/workers/worker/worker.py index cf61a5694bc..6299397d983 100644 --- a/gcp/workers/worker/worker.py +++ b/gcp/workers/worker/worker.py @@ -380,7 +380,8 @@ def _source_update(self, message): vulnerabilities = osv.parse_vulnerabilities_from_data( blob, extension=os.path.splitext(path)[1], - key_path=source_repo.key_path) + key_path=source_repo.key_path, + source_name=path) except Exception: logging.exception('Failed to parse vulnerability %s', path) return diff --git a/osv/sources.py b/osv/sources.py index f6fe6e98156..5bd3df8a101 100644 --- a/osv/sources.py +++ b/osv/sources.py @@ -138,14 +138,35 @@ def parse_vulnerabilities(path, key_path=None, strict=False): def parse_vulnerabilities_from_data(data_text, extension, key_path=None, - strict=False): - """Parse vulnerabilities from data.""" - if extension in YAML_EXTENSIONS: - data = yaml.load(data_text, Loader=NoDatesSafeLoader) - elif extension in JSON_EXTENSIONS: - data = json.loads(data_text) - else: - raise RuntimeError('Unknown format ' + extension) + strict=False, + source_name=None): + """Parse vulnerabilities from data. + + Args: + data_text: The raw vulnerability data. + extension: File extension (.json, .yaml, .yml). + key_path: Optional key path for nested data. + strict: If True, raises on validation errors. + source_name: Optional source identifier for error context. + + Returns: + List of parsed vulnerabilities. + + Raises: + RuntimeError: If parsing fails, includes source_name if provided. + """ + try: + if extension in YAML_EXTENSIONS: + data = yaml.load(data_text, Loader=NoDatesSafeLoader) + elif extension in JSON_EXTENSIONS: + data = json.loads(data_text) + else: + raise RuntimeError('Unknown format ' + extension) + except Exception as e: + if source_name: + raise RuntimeError( + f"Failed to parse vulnerability file '{source_name}'") from e + raise return _parse_vulnerabilities(data, key_path, strict)