From 360e54b5e0262732f181c99cc9b29c8ad27dac9f Mon Sep 17 00:00:00 2001 From: ashmod Date: Sun, 14 Sep 2025 18:25:33 +0300 Subject: [PATCH 01/14] feat(vulnfeeds): populate package.purl for GIT ranges --- vulnfeeds/cmd/combine-to-osv/main.go | 31 ++++++++++++ vulnfeeds/cmd/combine-to-osv/main_test.go | 37 +++++++++++++++ vulnfeeds/git/purl.go | 33 +++++++++++++ vulnfeeds/git/repository_test.go | 57 +++++++++++++++++++++++ vulnfeeds/go.mod | 2 +- 5 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 vulnfeeds/git/purl.go diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index be44f13f395..07cef66bd25 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -12,6 +12,7 @@ import ( "time" "github.com/google/osv/vulnfeeds/cves" + gitpurl "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -183,6 +184,7 @@ func combineIntoOSV(loadedCves map[cves.CVEID]cves.Vulnerability, allParts map[c if cvePartsModifiedTime[cveID].After(cveModified) { convertedCve.Modified = cvePartsModifiedTime[cveID] } + enrichRepoPURLs(convertedCve) convertedCves[cveID] = convertedCve } logger.Info("Ended writing OSV files", slog.Int("count", len(convertedCves))) @@ -258,3 +260,32 @@ func addReference(cveID string, ecosystem string, convertedCve *vulns.Vulnerabil convertedCve.References = append(convertedCve.References, securityReference) } + +// repoURLFromRanges returns the first repo URL from a GIT-type range, if present. +func repoURLFromRanges(ranges []osvschema.Range) string { + for _, r := range ranges { + if r.Type == "GIT" && r.Repo != "" { + return r.Repo + } + } + + return "" +} + +// enrichRepoPURLs sets affected.package.purl to an unversioned pkg:generic repo pURL +// when a GIT range with a repo URL exists and purl is currently empty. +func enrichRepoPURLs(v *vulns.Vulnerability) { + if v == nil || len(v.Affected) == 0 { + return + } + for i := range v.Affected { + if v.Affected[i].Package.Purl != "" { + continue + } + if repo := repoURLFromRanges(v.Affected[i].Ranges); repo != "" { + if p, err := gitpurl.BuildGenericRepoPURL(repo); err == nil && p != "" { + v.Affected[i].Package.Purl = p + } + } + } +} diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index e3cd32e6ca7..d6000654907 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -13,6 +13,7 @@ import ( "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/utility" + "github.com/ossf/osv-schema/bindings/go/osvschema" ) func loadTestData2(cveName string) cves.Vulnerability { @@ -179,3 +180,39 @@ func TestUpdateModifiedDate(t *testing.T) { t.Errorf("Wrong modified time, expected: %s, got: %s", time2, combinedOSV["CVE-2022-32746"].Modified) } } + +func TestRepoURLFromRanges_GIT(t *testing.T) { + t.Parallel() + + ranges := []osvschema.Range{ + { + Type: "GIT", + Repo: "https://github.com/eclipse-openj9/openj9", + Events: []osvschema.Event{ + {Introduced: "0"}, + }, + }, + } + got := repoURLFromRanges(ranges) + want := "https://github.com/eclipse-openj9/openj9" + if got != want { + t.Fatalf("repoURLFromRanges() = %q, want %q", got, want) + } +} + +func TestRepoURLFromRanges_NoGIT(t *testing.T) { + t.Parallel() + + ranges := []osvschema.Range{ + { + Type: "ECOSYSTEM", + Events: []osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.2.3"}, + }, + }, + } + if got := repoURLFromRanges(ranges); got != "" { + t.Fatalf("repoURLFromRanges() = %q, want empty", got) + } +} diff --git a/vulnfeeds/git/purl.go b/vulnfeeds/git/purl.go new file mode 100644 index 00000000000..a576a847057 --- /dev/null +++ b/vulnfeeds/git/purl.go @@ -0,0 +1,33 @@ +package git + +import ( + "fmt" + "net/url" + "strings" + + packageurl "github.com/package-url/packageurl-go" +) + +// BuildGenericRepoPURL returns an unversioned generic purl +// Example: pkg:generic/github.com/owner/repo +func BuildGenericRepoPURL(repoURL string) (string, error) { + u, err := url.Parse(repoURL) + if err != nil { + return "", fmt.Errorf("invalid repo url: %w", err) + } + + host := strings.ToLower(u.Hostname()) + path := strings.Trim(strings.TrimSuffix(u.EscapedPath(), ".git"), "/") + parts := strings.Split(path, "/") + if len(parts) < 2 { + return "", fmt.Errorf("invalid repo path in %q", repoURL) + } + + // Namespace is host + all path segments except the last; name is the last segment. + ns := strings.Join(append([]string{host}, parts[:len(parts)-1]...), "/") + name := parts[len(parts)-1] + + p := packageurl.NewPackageURL("generic", ns, name, "", nil, "") + + return p.ToString(), nil +} diff --git a/vulnfeeds/git/repository_test.go b/vulnfeeds/git/repository_test.go index dfe07b704e4..729b7946d42 100644 --- a/vulnfeeds/git/repository_test.go +++ b/vulnfeeds/git/repository_test.go @@ -383,3 +383,60 @@ func TestInvalidRepos(t *testing.T) { t.Errorf("These redundant repos are in InvalidRepos: %s", diff) } } + +func TestBuildGenericRepoPURL(t *testing.T) { + t.Parallel() + + tests := []struct { + desc string + inputURL string + wantPURL string + wantError bool + }{ + { + desc: "GitHub repo", + inputURL: "https://github.com/eclipse-openj9/openj9", + wantPURL: "pkg:generic/github.com/eclipse-openj9/openj9", + }, + { + desc: "GitHub repo with .git suffix", + inputURL: "https://github.com/torvalds/linux.git", + wantPURL: "pkg:generic/github.com/torvalds/linux", + }, + { + desc: "GitLab subgroup repo", + inputURL: "https://gitlab.com/group/subgroup/repo", + wantPURL: "pkg:generic/gitlab.com/group/subgroup/repo", + }, + { + desc: "Self-hosted cgit repo with .git", + inputURL: "https://git.libssh.org/projects/libssh.git", + wantPURL: "pkg:generic/git.libssh.org/projects/libssh", + }, + { + desc: "Insufficient path segments", + inputURL: "https://github.com/onlyowner", + wantError: true, + }, + } + + for _, tc := range tests { + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + got, err := BuildGenericRepoPURL(tc.inputURL) + if tc.wantError { + if err == nil { + t.Fatalf("BuildGenericRepoPURL(%q) = %q, want error", tc.inputURL, got) + } + + return + } + if err != nil { + t.Fatalf("BuildGenericRepoPURL(%q) unexpected error: %v", tc.inputURL, err) + } + if got != tc.wantPURL { + t.Fatalf("BuildGenericRepoPURL(%q) = %q, want %q", tc.inputURL, got, tc.wantPURL) + } + }) + } +} diff --git a/vulnfeeds/go.mod b/vulnfeeds/go.mod index 0dcf5133f68..cf1e03745b5 100644 --- a/vulnfeeds/go.mod +++ b/vulnfeeds/go.mod @@ -11,6 +11,7 @@ require ( github.com/google/osv-scanner v1.9.2 github.com/knqyf263/go-cpe v0.0.0-20230627041855-cb0794d06872 github.com/ossf/osv-schema/bindings/go v0.0.0-20250902063920-695987a6b7da + github.com/package-url/packageurl-go v0.1.3 github.com/sethvargo/go-retry v0.3.0 gopkg.in/dnaeon/go-vcr.v4 v4.0.5 gopkg.in/yaml.v2 v2.4.0 @@ -40,7 +41,6 @@ require ( github.com/googleapis/gax-go/v2 v2.14.2 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect - github.com/package-url/packageurl-go v0.1.3 // indirect github.com/pjbgf/sha1cd v0.3.2 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect From a6e4d12c7e7750348b3b8120f45e595c6538840c Mon Sep 17 00:00:00 2001 From: ashmod Date: Sun, 14 Sep 2025 18:57:16 +0300 Subject: [PATCH 02/14] versions --- vulnfeeds/cmd/combine-to-osv/main.go | 76 +++++++++++++++++++++-- vulnfeeds/cmd/combine-to-osv/main_test.go | 40 ++++++++++++ 2 files changed, 111 insertions(+), 5 deletions(-) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index 07cef66bd25..d9d8be1eda9 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -8,6 +8,7 @@ import ( "net/url" "os" "path" + "sort" "strings" "time" @@ -279,13 +280,78 @@ func enrichRepoPURLs(v *vulns.Vulnerability) { return } for i := range v.Affected { - if v.Affected[i].Package.Purl != "" { - continue + aff := &v.Affected[i] + + // Ensure base purl is set (unversioned). + if aff.Package.Purl == "" { + if repo := repoURLFromRanges(aff.Ranges); repo != "" { + if p, err := gitpurl.BuildGenericRepoPURL(repo); err == nil && p != "" { + aff.Package.Purl = p + } + } + } + + // Add versioned repo pURLs when possible. + if repo := repoURLFromRanges(aff.Ranges); repo != "" { + addVersionedRepoPURLs(aff, repo) } - if repo := repoURLFromRanges(v.Affected[i].Ranges); repo != "" { - if p, err := gitpurl.BuildGenericRepoPURL(repo); err == nil && p != "" { - v.Affected[i].Package.Purl = p + } +} + +var repoTagsCache = make(gitpurl.RepoTagsCache) + +// addVersionedRepoPURLs populates affected.database_specific["repo_purls"] +// with pkg:generic/...@ entries, using affected.versions if available. +func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) { + if aff == nil || repo == "" { + return + } + + var tags []string + if len(aff.Versions) > 0 { + tags = append(tags, aff.Versions...) + } else if os.Getenv("ENABLE_REPO_PURL_TAGS") == "1" { + norm, err := gitpurl.NormalizeRepoTags(repo, repoTagsCache) + if err == nil && len(norm) > 0 { + for tag := range norm { + tags = append(tags, tag) } + sort.Strings(tags) + const maxTags = 200 + if len(tags) > maxTags { + tags = tags[:maxTags] + } + } + } + + if len(tags) == 0 { + return + } + + base, err := gitpurl.BuildGenericRepoPURL(repo) + if err != nil || base == "" { + return + } + + // Dedup and format. + seen := make(map[string]struct{}, len(tags)) + vPURLs := make([]string, 0, len(tags)) + for _, t := range tags { + if t == "" { + continue + } + if _, ok := seen[t]; ok { + continue } + seen[t] = struct{}{} + vPURLs = append(vPURLs, base+"@"+t) + } + if len(vPURLs) == 0 { + return + } + + if aff.DatabaseSpecific == nil { + aff.DatabaseSpecific = map[string]any{} } + aff.DatabaseSpecific["repo_purls"] = vPURLs } diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index d6000654907..5fc3db6baf6 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -12,6 +12,7 @@ import ( "maps" "github.com/google/osv/vulnfeeds/cves" + gitpurl "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/utility" "github.com/ossf/osv-schema/bindings/go/osvschema" ) @@ -216,3 +217,42 @@ func TestRepoURLFromRanges_NoGIT(t *testing.T) { t.Fatalf("repoURLFromRanges() = %q, want empty", got) } } + +func TestAddVersionedRepoPURLs_FromVersions(t *testing.T) { + t.Setenv("ENABLE_REPO_PURL_TAGS", "") // ensure derivation path is off + + repo := "https://github.com/chriskohlhoff/asio" + aff := &osvschema.Affected{ + Package: osvschema.Package{Ecosystem: "GIT", Name: "asio"}, + Versions: []string{"asio-1-13-0", "asio-1-12-0"}, + Ranges: []osvschema.Range{{Type: "GIT", Repo: repo, Events: []osvschema.Event{{Introduced: "0"}}}}, + } + + addVersionedRepoPURLs(aff, repo) + + base, err := gitpurl.BuildGenericRepoPURL(repo) + if err != nil || base == "" { + t.Fatalf("failed to build base purl: %v", err) + } + + ds := aff.DatabaseSpecific + list, ok := ds["repo_purls"].([]string) + if !ok || len(list) == 0 { + t.Fatalf("repo_purls missing/empty: %#v", ds) + } + + want1 := base + "@asio-1-13-0" + want2 := base + "@asio-1-12-0" + found1, found2 := false, false + for _, p := range list { + if p == want1 { + found1 = true + } + if p == want2 { + found2 = true + } + } + if !found1 || !found2 { + t.Fatalf("missing expected entries, got %#v", list) + } +} From bed29c81a2ff42c2873dbd6ecffc378c63822d6e Mon Sep 17 00:00:00 2001 From: Shehab <127568346+ashmod@users.noreply.github.com> Date: Fri, 6 Feb 2026 01:41:53 +0200 Subject: [PATCH 03/14] fix indentation --- vulnfeeds/go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnfeeds/go.mod b/vulnfeeds/go.mod index d3a5f51fc33..fe00aab5656 100644 --- a/vulnfeeds/go.mod +++ b/vulnfeeds/go.mod @@ -13,7 +13,7 @@ require ( github.com/google/go-cmp v0.7.0 github.com/google/osv-scanner v1.9.2 github.com/knqyf263/go-cpe v0.0.0-20230627041855-cb0794d06872 - github.com/package-url/packageurl-go v0.1.3 + github.com/package-url/packageurl-go v0.1.3 github.com/ossf/osv-schema/bindings/go v0.0.0-20260129002236-09a17f85b44a github.com/sethvargo/go-retry v0.3.0 google.golang.org/api v0.265.0 From 3f2c3a7911f1a801eeae82d72a087dfb3995c63b Mon Sep 17 00:00:00 2001 From: ashmod Date: Mon, 13 Apr 2026 21:27:42 +0200 Subject: [PATCH 04/14] restore combine-to-osv build after schema migrations --- vulnfeeds/cmd/combine-to-osv/main.go | 52 ++++++++++++++--------- vulnfeeds/cmd/combine-to-osv/main_test.go | 36 ++++++++++------ vulnfeeds/go.mod | 2 +- 3 files changed, 57 insertions(+), 33 deletions(-) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index 84515f28d77..a3926af0894 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -11,8 +11,8 @@ import ( "log/slog" "os" "path/filepath" - "sort" "slices" + "sort" "strings" "cloud.google.com/go/storage" @@ -24,6 +24,7 @@ import ( "github.com/ossf/osv-schema/bindings/go/osvschema" "google.golang.org/api/iterator" "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/types/known/structpb" ) const ( @@ -206,7 +207,7 @@ func combineIntoOSV(cve5osv map[models.CVEID]*osvschema.Vulnerability, nvdosv ma if len(nvd.GetAffected()) == 0 { continue } - enrichRepoPURLs(convertedCve) + enrichRepoPURLs(nvd) osvRecords[cveID] = nvd } @@ -372,10 +373,10 @@ func getRangeBoundaryVersions(events []*osvschema.Event) (introduced, fixed stri } // repoURLFromRanges returns the first repo URL from a GIT-type range, if present. -func repoURLFromRanges(ranges []osvschema.Range) string { +func repoURLFromRanges(ranges []*osvschema.Range) string { for _, r := range ranges { - if r.Type == "GIT" && r.Repo != "" { - return r.Repo + if r.GetType() == osvschema.Range_GIT && r.GetRepo() != "" { + return r.GetRepo() } } @@ -384,30 +385,32 @@ func repoURLFromRanges(ranges []osvschema.Range) string { // enrichRepoPURLs sets affected.package.purl to an unversioned pkg:generic repo pURL // when a GIT range with a repo URL exists and purl is currently empty. -func enrichRepoPURLs(v *vulns.Vulnerability) { - if v == nil || len(v.Affected) == 0 { +func enrichRepoPURLs(v *osvschema.Vulnerability) { + if v == nil || len(v.GetAffected()) == 0 { return } - for i := range v.Affected { - aff := &v.Affected[i] + for _, aff := range v.Affected { + repo := repoURLFromRanges(aff.GetRanges()) + if repo == "" { + continue + } // Ensure base purl is set (unversioned). + if aff.Package == nil { + aff.Package = &osvschema.Package{} + } if aff.Package.Purl == "" { - if repo := repoURLFromRanges(aff.Ranges); repo != "" { - if p, err := gitpurl.BuildGenericRepoPURL(repo); err == nil && p != "" { - aff.Package.Purl = p - } + if p, err := gitpurl.BuildGenericRepoPURL(repo); err == nil && p != "" { + aff.Package.Purl = p } } // Add versioned repo pURLs when possible. - if repo := repoURLFromRanges(aff.Ranges); repo != "" { - addVersionedRepoPURLs(aff, repo) - } + addVersionedRepoPURLs(aff, repo) } } -var repoTagsCache = make(gitpurl.RepoTagsCache) +var repoTagsCache = &gitpurl.RepoTagsCache{} // addVersionedRepoPURLs populates affected.database_specific["repo_purls"] // with pkg:generic/...@ entries, using affected.versions if available. @@ -459,8 +462,19 @@ func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) { return } + anys := make([]any, len(vPURLs)) + for i, s := range vPURLs { + anys[i] = s + } + listVal, err := structpb.NewValue(anys) + if err != nil { + return + } if aff.DatabaseSpecific == nil { - aff.DatabaseSpecific = map[string]any{} + aff.DatabaseSpecific = &structpb.Struct{Fields: map[string]*structpb.Value{}} + } + if aff.DatabaseSpecific.Fields == nil { + aff.DatabaseSpecific.Fields = map[string]*structpb.Value{} } - aff.DatabaseSpecific["repo_purls"] = vPURLs + aff.DatabaseSpecific.Fields["repo_purls"] = listVal } diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index ec40a700706..753cf19a2d0 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -9,7 +9,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" gitpurl "github.com/google/osv/vulnfeeds/git" - "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/models" "github.com/ossf/osv-schema/bindings/go/osvschema" "google.golang.org/protobuf/testing/protocmp" @@ -435,11 +434,11 @@ func TestCombineTwoOSVRecords(t *testing.T) { func TestRepoURLFromRanges_GIT(t *testing.T) { t.Parallel() - ranges := []osvschema.Range{ + ranges := []*osvschema.Range{ { - Type: "GIT", + Type: osvschema.Range_GIT, Repo: "https://github.com/eclipse-openj9/openj9", - Events: []osvschema.Event{ + Events: []*osvschema.Event{ {Introduced: "0"}, }, }, @@ -454,10 +453,10 @@ func TestRepoURLFromRanges_GIT(t *testing.T) { func TestRepoURLFromRanges_NoGIT(t *testing.T) { t.Parallel() - ranges := []osvschema.Range{ + ranges := []*osvschema.Range{ { - Type: "ECOSYSTEM", - Events: []osvschema.Event{ + Type: osvschema.Range_ECOSYSTEM, + Events: []*osvschema.Event{ {Introduced: "0"}, {Fixed: "1.2.3"}, }, @@ -473,9 +472,13 @@ func TestAddVersionedRepoPURLs_FromVersions(t *testing.T) { repo := "https://github.com/chriskohlhoff/asio" aff := &osvschema.Affected{ - Package: osvschema.Package{Ecosystem: "GIT", Name: "asio"}, + Package: &osvschema.Package{Ecosystem: "GIT", Name: "asio"}, Versions: []string{"asio-1-13-0", "asio-1-12-0"}, - Ranges: []osvschema.Range{{Type: "GIT", Repo: repo, Events: []osvschema.Event{{Introduced: "0"}}}}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}}, + }}, } addVersionedRepoPURLs(aff, repo) @@ -485,10 +488,17 @@ func TestAddVersionedRepoPURLs_FromVersions(t *testing.T) { t.Fatalf("failed to build base purl: %v", err) } - ds := aff.DatabaseSpecific - list, ok := ds["repo_purls"].([]string) - if !ok || len(list) == 0 { - t.Fatalf("repo_purls missing/empty: %#v", ds) + field := aff.GetDatabaseSpecific().GetFields()["repo_purls"] + if field == nil { + t.Fatalf("repo_purls missing: %#v", aff.GetDatabaseSpecific()) + } + values := field.GetListValue().GetValues() + if len(values) == 0 { + t.Fatalf("repo_purls empty: %#v", aff.GetDatabaseSpecific()) + } + list := make([]string, 0, len(values)) + for _, v := range values { + list = append(list, v.GetStringValue()) } want1 := base + "@asio-1-13-0" diff --git a/vulnfeeds/go.mod b/vulnfeeds/go.mod index fe00aab5656..e73c4805b83 100644 --- a/vulnfeeds/go.mod +++ b/vulnfeeds/go.mod @@ -13,8 +13,8 @@ require ( github.com/google/go-cmp v0.7.0 github.com/google/osv-scanner v1.9.2 github.com/knqyf263/go-cpe v0.0.0-20230627041855-cb0794d06872 - github.com/package-url/packageurl-go v0.1.3 github.com/ossf/osv-schema/bindings/go v0.0.0-20260129002236-09a17f85b44a + github.com/package-url/packageurl-go v0.1.3 github.com/sethvargo/go-retry v0.3.0 google.golang.org/api v0.265.0 google.golang.org/protobuf v1.36.11 From 77137dea36ab9698af9fc7b12ca49ebcc1dfca76 Mon Sep 17 00:00:00 2001 From: ashmod Date: Mon, 13 Apr 2026 21:32:11 +0200 Subject: [PATCH 05/14] enrich cve5 records with purls --- vulnfeeds/cmd/combine-to-osv/main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index a3926af0894..b60ebb83469 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -199,6 +199,7 @@ func combineIntoOSV(cve5osv map[models.CVEID]*osvschema.Vulnerability, nvdosv ma continue } } + enrichRepoPURLs(baseOSV) osvRecords[cveID] = baseOSV } From a141f221432b38f905806bbc85de657af4ce234a Mon Sep 17 00:00:00 2001 From: ashmod Date: Mon, 13 Apr 2026 21:37:59 +0200 Subject: [PATCH 06/14] harden purl build input validation --- vulnfeeds/git/purl.go | 14 ++++++++++++-- vulnfeeds/git/repository_test.go | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/vulnfeeds/git/purl.go b/vulnfeeds/git/purl.go index a576a847057..1113c8c68a9 100644 --- a/vulnfeeds/git/purl.go +++ b/vulnfeeds/git/purl.go @@ -16,10 +16,20 @@ func BuildGenericRepoPURL(repoURL string) (string, error) { return "", fmt.Errorf("invalid repo url: %w", err) } + switch strings.ToLower(u.Scheme) { + case "http", "https": + default: + return "", fmt.Errorf("unsupported scheme %q in %q", u.Scheme, repoURL) + } + host := strings.ToLower(u.Hostname()) - path := strings.Trim(strings.TrimSuffix(u.EscapedPath(), ".git"), "/") + if host == "" { + return "", fmt.Errorf("missing host in %q", repoURL) + } + + path := strings.Trim(strings.TrimSuffix(u.Path, ".git"), "/") parts := strings.Split(path, "/") - if len(parts) < 2 { + if len(parts) < 2 || parts[0] == "" { return "", fmt.Errorf("invalid repo path in %q", repoURL) } diff --git a/vulnfeeds/git/repository_test.go b/vulnfeeds/git/repository_test.go index 9cf57e037bc..d9d7b17304c 100644 --- a/vulnfeeds/git/repository_test.go +++ b/vulnfeeds/git/repository_test.go @@ -430,6 +430,26 @@ func TestBuildGenericRepoPURL(t *testing.T) { inputURL: "https://github.com/onlyowner", wantError: true, }, + { + desc: "SCP-style git URL", + inputURL: "git@github.com:owner/repo.git", + wantError: true, + }, + { + desc: "Unsupported scheme", + inputURL: "ftp://example.com/owner/repo", + wantError: true, + }, + { + desc: "Missing host", + inputURL: "https:///owner/repo", + wantError: true, + }, + { + desc: "Empty input", + inputURL: "", + wantError: true, + }, } for _, tc := range tests { From 1fb63de36ab2abe3fea170988d6f310199acf4b0 Mon Sep 17 00:00:00 2001 From: ashmod Date: Mon, 13 Apr 2026 21:49:20 +0200 Subject: [PATCH 07/14] improve repo tag handling by capping version tags --- vulnfeeds/cmd/combine-to-osv/main.go | 36 +++++++++++++++------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index b60ebb83469..8f4af8821ed 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -384,8 +384,15 @@ func repoURLFromRanges(ranges []*osvschema.Range) string { return "" } -// enrichRepoPURLs sets affected.package.purl to an unversioned pkg:generic repo pURL -// when a GIT range with a repo URL exists and purl is currently empty. +// maxRepoPURLTags caps the number of versioned pURLs attached to a record +const maxRepoPURLTags = 200 + +var repoTagsCache = &gitpurl.RepoTagsCache{} + +// enrichRepoPURLs populates repo-derived pURLs on each affected entry that +// has a GIT-type range: an unversioned pkg:generic purl on +// affected.package.purl (when unset), and a list of versioned variants under +// affected.database_specific["repo_purls"]. func enrichRepoPURLs(v *osvschema.Vulnerability) { if v == nil || len(v.GetAffected()) == 0 { return @@ -396,7 +403,6 @@ func enrichRepoPURLs(v *osvschema.Vulnerability) { continue } - // Ensure base purl is set (unversioned). if aff.Package == nil { aff.Package = &osvschema.Package{} } @@ -406,15 +412,13 @@ func enrichRepoPURLs(v *osvschema.Vulnerability) { } } - // Add versioned repo pURLs when possible. addVersionedRepoPURLs(aff, repo) } } -var repoTagsCache = &gitpurl.RepoTagsCache{} - // addVersionedRepoPURLs populates affected.database_specific["repo_purls"] -// with pkg:generic/...@ entries, using affected.versions if available. +// with pkg:generic/...@ entries, using affected.versions if available +// or (behind ENABLE_REPO_PURL_TAGS) tags discovered from the remote repo. func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) { if aff == nil || repo == "" { return @@ -430,12 +434,11 @@ func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) { tags = append(tags, tag) } sort.Strings(tags) - const maxTags = 200 - if len(tags) > maxTags { - tags = tags[:maxTags] - } } } + if len(tags) > maxRepoPURLTags { + tags = tags[:maxRepoPURLTags] + } if len(tags) == 0 { return @@ -446,9 +449,8 @@ func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) { return } - // Dedup and format. seen := make(map[string]struct{}, len(tags)) - vPURLs := make([]string, 0, len(tags)) + versionedPURLs := make([]string, 0, len(tags)) for _, t := range tags { if t == "" { continue @@ -457,14 +459,14 @@ func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) { continue } seen[t] = struct{}{} - vPURLs = append(vPURLs, base+"@"+t) + versionedPURLs = append(versionedPURLs, base+"@"+t) } - if len(vPURLs) == 0 { + if len(versionedPURLs) == 0 { return } - anys := make([]any, len(vPURLs)) - for i, s := range vPURLs { + anys := make([]any, len(versionedPURLs)) + for i, s := range versionedPURLs { anys[i] = s } listVal, err := structpb.NewValue(anys) From 8b64f9671778c847a8e42ba746abc912f572814a Mon Sep 17 00:00:00 2001 From: ashmod Date: Mon, 13 Apr 2026 21:59:48 +0200 Subject: [PATCH 08/14] add tests --- vulnfeeds/cmd/combine-to-osv/main_test.go | 204 ++++++++++++++++++++++ 1 file changed, 204 insertions(+) diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index 753cf19a2d0..30ac547286f 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -1,6 +1,7 @@ package main import ( + "fmt" "path/filepath" "sort" "testing" @@ -516,3 +517,206 @@ func TestAddVersionedRepoPURLs_FromVersions(t *testing.T) { t.Fatalf("missing expected entries, got %#v", list) } } + +// repoPURLs pulls the string list stored under database_specific["repo_purls"] +// so tests can assert on the versioned pURLs attached by enrichRepoPURLs. +func repoPURLs(t *testing.T, aff *osvschema.Affected) []string { + t.Helper() + field := aff.GetDatabaseSpecific().GetFields()["repo_purls"] + if field == nil { + return nil + } + values := field.GetListValue().GetValues() + out := make([]string, 0, len(values)) + for _, v := range values { + out = append(out, v.GetStringValue()) + } + + return out +} + +func TestEnrichRepoPURLs_Issue3807(t *testing.T) { + t.Parallel() + + repo := "https://github.com/chriskohlhoff/asio" + v := &osvschema.Vulnerability{ + Id: "CVE-2019-25219", + Affected: []*osvschema.Affected{ + { + Versions: []string{"asio-1-12-0", "asio-1-12-1", "asio-1-13-0"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + aff := v.Affected[0] + wantBase := "pkg:generic/github.com/chriskohlhoff/asio" + if got := aff.GetPackage().GetPurl(); got != wantBase { + t.Errorf("package.purl = %q, want %q", got, wantBase) + } + + got := repoPURLs(t, aff) + want := []string{ + wantBase + "@asio-1-12-0", + wantBase + "@asio-1-12-1", + wantBase + "@asio-1-13-0", + } + sort.Strings(got) + sort.Strings(want) + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("repo_purls mismatch (-want +got):\n%s", diff) + } +} + +func TestEnrichRepoPURLs_PreservesExistingPurl(t *testing.T) { + t.Parallel() + + existing := "pkg:deb/debian/libasio-dev" + v := &osvschema.Vulnerability{ + Affected: []*osvschema.Affected{ + { + Package: &osvschema.Package{Purl: existing}, + Versions: []string{"asio-1-12-0"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/chriskohlhoff/asio", + Events: []*osvschema.Event{{Introduced: "0"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + if got := v.Affected[0].GetPackage().GetPurl(); got != existing { + t.Errorf("package.purl clobbered: got %q, want %q", got, existing) + } + if got := repoPURLs(t, v.Affected[0]); len(got) == 0 { + t.Errorf("expected repo_purls to be populated, got none") + } +} + +func TestEnrichRepoPURLs_NonGITRangeNoop(t *testing.T) { + t.Parallel() + + v := &osvschema.Vulnerability{ + Affected: []*osvschema.Affected{ + { + Package: &osvschema.Package{Ecosystem: "Debian:11", Name: "libasio"}, + Versions: []string{"1.18.1"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_ECOSYSTEM, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "1.18.2"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + if got := v.Affected[0].GetPackage().GetPurl(); got != "" { + t.Errorf("package.purl = %q, want empty (no GIT range)", got) + } + if got := repoPURLs(t, v.Affected[0]); len(got) != 0 { + t.Errorf("repo_purls should be absent, got %v", got) + } +} + +func TestEnrichRepoPURLs_DotGitSuffix(t *testing.T) { + t.Parallel() + + v := &osvschema.Vulnerability{ + Affected: []*osvschema.Affected{ + { + Versions: []string{"v1.2.11"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/madler/zlib.git", + Events: []*osvschema.Event{{Introduced: "0"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + wantBase := "pkg:generic/github.com/madler/zlib" + if got := v.Affected[0].GetPackage().GetPurl(); got != wantBase { + t.Errorf("package.purl = %q, want %q (.git suffix should be stripped)", got, wantBase) + } + want := []string{wantBase + "@v1.2.11"} + if diff := cmp.Diff(want, repoPURLs(t, v.Affected[0])); diff != "" { + t.Errorf("repo_purls mismatch (-want +got):\n%s", diff) + } +} + +func TestEnrichRepoPURLs_MultipleAffectedOnlyGITEnriched(t *testing.T) { + t.Parallel() + + v := &osvschema.Vulnerability{ + Affected: []*osvschema.Affected{ + { + Package: &osvschema.Package{Ecosystem: "Debian:11", Name: "libasio"}, + Versions: []string{"1.18.1"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_ECOSYSTEM, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "1.18.2"}}, + }}, + }, + { + Versions: []string{"asio-1-12-0"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/chriskohlhoff/asio", + Events: []*osvschema.Event{{Introduced: "0"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + if got := v.Affected[0].GetPackage().GetPurl(); got != "" { + t.Errorf("affected[0] (ECOSYSTEM) package.purl = %q, want empty", got) + } + if got := repoPURLs(t, v.Affected[0]); len(got) != 0 { + t.Errorf("affected[0] (ECOSYSTEM) repo_purls should be empty, got %v", got) + } + + wantBase := "pkg:generic/github.com/chriskohlhoff/asio" + if got := v.Affected[1].GetPackage().GetPurl(); got != wantBase { + t.Errorf("affected[1] (GIT) package.purl = %q, want %q", got, wantBase) + } + if got := repoPURLs(t, v.Affected[1]); len(got) != 1 || got[0] != wantBase+"@asio-1-12-0" { + t.Errorf("affected[1] (GIT) repo_purls = %v, want [%s@asio-1-12-0]", got, wantBase) + } +} + +func TestAddVersionedRepoPURLs_CapsLargeVersionLists(t *testing.T) { + t.Parallel() + + versions := make([]string, maxRepoPURLTags+50) + for i := range versions { + versions[i] = fmt.Sprintf("v1.0.%d", i) + } + aff := &osvschema.Affected{ + Versions: versions, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/big", + }}, + } + + addVersionedRepoPURLs(aff, "https://github.com/example/big") + + got := repoPURLs(t, aff) + if len(got) != maxRepoPURLTags { + t.Errorf("len(repo_purls) = %d, want %d", len(got), maxRepoPURLTags) + } +} From 80406e7ceae9101b8acea9677ba25fb438c478a1 Mon Sep 17 00:00:00 2001 From: ashmod Date: Mon, 13 Apr 2026 22:40:31 +0200 Subject: [PATCH 09/14] build versioned pURLs via packageurl --- vulnfeeds/cmd/combine-to-osv/main.go | 11 +++- vulnfeeds/cmd/combine-to-osv/main_test.go | 27 +++++++++ vulnfeeds/git/purl.go | 17 +++++- vulnfeeds/git/repository_test.go | 69 +++++++++++++++++++++++ 4 files changed, 119 insertions(+), 5 deletions(-) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index ec8135ccd2d..92325e3a681 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -470,8 +470,9 @@ func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) { return } - base, err := gitpurl.BuildGenericRepoPURL(repo) - if err != nil || base == "" { + // Validate the repo URL once up front; if the base purl can't be built + // the per-tag calls below will all fail for the same reason. + if _, err := gitpurl.BuildGenericRepoPURL(repo); err != nil { return } @@ -485,7 +486,11 @@ func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) { continue } seen[t] = struct{}{} - versionedPURLs = append(versionedPURLs, base+"@"+t) + p, err := gitpurl.BuildVersionedGenericRepoPURL(repo, t) + if err != nil || p == "" { + continue + } + versionedPURLs = append(versionedPURLs, p) } if len(versionedPURLs) == 0 { return diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index 6f102e9d304..21a016b646f 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -698,6 +698,33 @@ func TestEnrichRepoPURLs_MultipleAffectedOnlyGITEnriched(t *testing.T) { } } +func TestAddVersionedRepoPURLs_EscapesSpecialCharsInTags(t *testing.T) { + t.Parallel() + + repo := "https://github.com/example/repo" + aff := &osvschema.Affected{ + Versions: []string{"release/1.2.3", "v1.0 beta", "rel#1"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + }}, + } + + addVersionedRepoPURLs(aff, repo) + + got := repoPURLs(t, aff) + want := []string{ + "pkg:generic/github.com/example/repo@rel%231", + "pkg:generic/github.com/example/repo@release%2F1.2.3", + "pkg:generic/github.com/example/repo@v1.0%20beta", + } + sort.Strings(got) + sort.Strings(want) + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("repo_purls mismatch (-want +got):\n%s", diff) + } +} + func TestAddVersionedRepoPURLs_CapsLargeVersionLists(t *testing.T) { t.Parallel() diff --git a/vulnfeeds/git/purl.go b/vulnfeeds/git/purl.go index 1113c8c68a9..03cbc67db43 100644 --- a/vulnfeeds/git/purl.go +++ b/vulnfeeds/git/purl.go @@ -8,9 +8,22 @@ import ( packageurl "github.com/package-url/packageurl-go" ) -// BuildGenericRepoPURL returns an unversioned generic purl +// BuildGenericRepoPURL returns an unversioned generic purl for a repo URL. // Example: pkg:generic/github.com/owner/repo func BuildGenericRepoPURL(repoURL string) (string, error) { + return buildGenericRepoPURL(repoURL, "") +} + +// BuildVersionedGenericRepoPURL returns a generic purl whose version +// component is the given string, encoded by packageurl-go. +// A tag like "release/1.2.3" becomes "...@release%2F1.2.3" rather than a +// malformed "...@release/1.2.3". +// Example: pkg:generic/github.com/owner/repo@v1.0.0 +func BuildVersionedGenericRepoPURL(repoURL, version string) (string, error) { + return buildGenericRepoPURL(repoURL, version) +} + +func buildGenericRepoPURL(repoURL, version string) (string, error) { u, err := url.Parse(repoURL) if err != nil { return "", fmt.Errorf("invalid repo url: %w", err) @@ -37,7 +50,7 @@ func BuildGenericRepoPURL(repoURL string) (string, error) { ns := strings.Join(append([]string{host}, parts[:len(parts)-1]...), "/") name := parts[len(parts)-1] - p := packageurl.NewPackageURL("generic", ns, name, "", nil, "") + p := packageurl.NewPackageURL("generic", ns, name, version, nil, "") return p.ToString(), nil } diff --git a/vulnfeeds/git/repository_test.go b/vulnfeeds/git/repository_test.go index d9d7b17304c..86fb3f9c62b 100644 --- a/vulnfeeds/git/repository_test.go +++ b/vulnfeeds/git/repository_test.go @@ -472,3 +472,72 @@ func TestBuildGenericRepoPURL(t *testing.T) { }) } } + +func TestBuildVersionedGenericRepoPURL(t *testing.T) { + t.Parallel() + + tests := []struct { + desc string + inputURL string + version string + wantPURL string + wantError bool + }{ + { + desc: "simple version", + inputURL: "https://github.com/owner/repo", + version: "v1.2.3", + wantPURL: "pkg:generic/github.com/owner/repo@v1.2.3", + }, + { + desc: "tag with slash is percent-encoded", + inputURL: "https://github.com/owner/repo", + version: "release/1.2.3", + wantPURL: "pkg:generic/github.com/owner/repo@release%2F1.2.3", + }, + { + desc: "tag with space", + inputURL: "https://github.com/owner/repo", + version: "v1.0 beta", + wantPURL: "pkg:generic/github.com/owner/repo@v1.0%20beta", + }, + { + desc: "tag with hash", + inputURL: "https://github.com/owner/repo", + version: "rel#1", + wantPURL: "pkg:generic/github.com/owner/repo@rel%231", + }, + { + desc: "empty version produces unversioned purl", + inputURL: "https://github.com/owner/repo", + version: "", + wantPURL: "pkg:generic/github.com/owner/repo", + }, + { + desc: "invalid repo url still fails", + inputURL: "ftp://example.com/owner/repo", + version: "v1.0", + wantError: true, + }, + } + + for _, tc := range tests { + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + got, err := BuildVersionedGenericRepoPURL(tc.inputURL, tc.version) + if tc.wantError { + if err == nil { + t.Fatalf("BuildVersionedGenericRepoPURL(%q, %q) = %q, want error", tc.inputURL, tc.version, got) + } + + return + } + if err != nil { + t.Fatalf("BuildVersionedGenericRepoPURL(%q, %q) unexpected error: %v", tc.inputURL, tc.version, err) + } + if got != tc.wantPURL { + t.Fatalf("BuildVersionedGenericRepoPURL(%q, %q) = %q, want %q", tc.inputURL, tc.version, got, tc.wantPURL) + } + }) + } +} From 7dd6339c4c092fa294854265bde5ebd67e3614e8 Mon Sep 17 00:00:00 2001 From: ashmod Date: Mon, 13 Apr 2026 23:07:03 +0200 Subject: [PATCH 10/14] normalize git-transport variants and cleanup --- vulnfeeds/cmd/combine-to-osv/main.go | 66 ++++++-------- vulnfeeds/cmd/combine-to-osv/main_test.go | 2 +- vulnfeeds/git/purl.go | 87 ++++++++++++++---- vulnfeeds/git/repository_test.go | 106 +++++++++------------- 4 files changed, 137 insertions(+), 124 deletions(-) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index 92325e3a681..125bfd8d244 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -20,11 +20,11 @@ import ( gitpurl "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/upload" + "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/ossf/osv-schema/bindings/go/osvschema" "google.golang.org/api/iterator" "google.golang.org/protobuf/encoding/protojson" - "google.golang.org/protobuf/types/known/structpb" ) const ( @@ -410,10 +410,15 @@ func repoURLFromRanges(ranges []*osvschema.Range) string { return "" } -// maxRepoPURLTags caps the number of versioned pURLs attached to a record -const maxRepoPURLTags = 200 +const ( + maxRepoPURLTags = 200 + repoPURLsKey = "repo_purls" +) -var repoTagsCache = &gitpurl.RepoTagsCache{} +var ( + repoTagsCache = &gitpurl.RepoTagsCache{} + enableRepoPURLTags = os.Getenv("ENABLE_REPO_PURL_TAGS") == "1" +) // enrichRepoPURLs populates repo-derived pURLs on each affected entry that // has a GIT-type range: an unversioned pkg:generic purl on @@ -442,20 +447,20 @@ func enrichRepoPURLs(v *osvschema.Vulnerability) { } } -// addVersionedRepoPURLs populates affected.database_specific["repo_purls"] -// with pkg:generic/...@ entries, using affected.versions if available -// or (behind ENABLE_REPO_PURL_TAGS) tags discovered from the remote repo. +// addVersionedRepoPURLs attaches versioned pkg:generic/...@ entries +// under affected.database_specific[repoPURLsKey], using affected.versions +// if available or (behind ENABLE_REPO_PURL_TAGS) tags discovered from the +// remote repo. func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) { if aff == nil || repo == "" { return } - var tags []string - if len(aff.Versions) > 0 { - tags = append(tags, aff.Versions...) - } else if os.Getenv("ENABLE_REPO_PURL_TAGS") == "1" { + tags := aff.Versions + if len(tags) == 0 && enableRepoPURLTags { norm, err := gitpurl.NormalizeRepoTags(repo, repoTagsCache) if err == nil && len(norm) > 0 { + tags = make([]string, 0, len(norm)) for tag := range norm { tags = append(tags, tag) } @@ -465,50 +470,35 @@ func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) { if len(tags) > maxRepoPURLTags { tags = tags[:maxRepoPURLTags] } - if len(tags) == 0 { return } - // Validate the repo URL once up front; if the base purl can't be built - // the per-tag calls below will all fail for the same reason. - if _, err := gitpurl.BuildGenericRepoPURL(repo); err != nil { + tmpl, err := gitpurl.ParseRepoPURL(repo) + if err != nil { return } - seen := make(map[string]struct{}, len(tags)) - versionedPURLs := make([]string, 0, len(tags)) + versionedPURLs := make([]any, 0, len(tags)) for _, t := range tags { if t == "" { continue } - if _, ok := seen[t]; ok { - continue - } - seen[t] = struct{}{} - p, err := gitpurl.BuildVersionedGenericRepoPURL(repo, t) - if err != nil || p == "" { - continue - } - versionedPURLs = append(versionedPURLs, p) + tmpl.Version = t + versionedPURLs = append(versionedPURLs, tmpl.ToString()) } if len(versionedPURLs) == 0 { return } - anys := make([]any, len(versionedPURLs)) - for i, s := range versionedPURLs { - anys[i] = s - } - listVal, err := structpb.NewValue(anys) - if err != nil { - return - } if aff.DatabaseSpecific == nil { - aff.DatabaseSpecific = &structpb.Struct{Fields: map[string]*structpb.Value{}} + ds, err := utility.NewStructpbFromMap(nil) + if err != nil { + return + } + aff.DatabaseSpecific = ds } - if aff.DatabaseSpecific.Fields == nil { - aff.DatabaseSpecific.Fields = map[string]*structpb.Value{} + if err := conversion.AddFieldToDatabaseSpecific(aff.DatabaseSpecific, repoPURLsKey, versionedPURLs); err != nil { + return } - aff.DatabaseSpecific.Fields["repo_purls"] = listVal } diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index 21a016b646f..d4f3ee55493 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -535,7 +535,7 @@ func repoPURLs(t *testing.T, aff *osvschema.Affected) []string { return out } -func TestEnrichRepoPURLs_Issue3807(t *testing.T) { +func TestEnrichRepoPURLs_GITRangeWithTagVersions(t *testing.T) { t.Parallel() repo := "https://github.com/chriskohlhoff/asio" diff --git a/vulnfeeds/git/purl.go b/vulnfeeds/git/purl.go index 03cbc67db43..a55dd9f1256 100644 --- a/vulnfeeds/git/purl.go +++ b/vulnfeeds/git/purl.go @@ -3,6 +3,7 @@ package git import ( "fmt" "net/url" + "strconv" "strings" packageurl "github.com/package-url/packageurl-go" @@ -11,46 +12,92 @@ import ( // BuildGenericRepoPURL returns an unversioned generic purl for a repo URL. // Example: pkg:generic/github.com/owner/repo func BuildGenericRepoPURL(repoURL string) (string, error) { - return buildGenericRepoPURL(repoURL, "") -} + p, err := ParseRepoPURL(repoURL) + if err != nil { + return "", err + } -// BuildVersionedGenericRepoPURL returns a generic purl whose version -// component is the given string, encoded by packageurl-go. -// A tag like "release/1.2.3" becomes "...@release%2F1.2.3" rather than a -// malformed "...@release/1.2.3". -// Example: pkg:generic/github.com/owner/repo@v1.0.0 -func BuildVersionedGenericRepoPURL(repoURL, version string) (string, error) { - return buildGenericRepoPURL(repoURL, version) + return p.ToString(), nil } -func buildGenericRepoPURL(repoURL, version string) (string, error) { - u, err := url.Parse(repoURL) +// ParseRepoPURL decodes a repo URL into a PackageURL template with type, +// namespace, and name populated. packageurl-go handles version +// escaping so reserved characters such as "/" are encoded to "%2F". +func ParseRepoPURL(repoURL string) (*packageurl.PackageURL, error) { + u, err := url.Parse(normalizeRepoURL(repoURL)) if err != nil { - return "", fmt.Errorf("invalid repo url: %w", err) + return nil, fmt.Errorf("invalid repo url: %w", err) } switch strings.ToLower(u.Scheme) { case "http", "https": default: - return "", fmt.Errorf("unsupported scheme %q in %q", u.Scheme, repoURL) + return nil, fmt.Errorf("unsupported scheme %q in %q", u.Scheme, repoURL) } host := strings.ToLower(u.Hostname()) if host == "" { - return "", fmt.Errorf("missing host in %q", repoURL) + return nil, fmt.Errorf("missing host in %q", repoURL) } path := strings.Trim(strings.TrimSuffix(u.Path, ".git"), "/") parts := strings.Split(path, "/") if len(parts) < 2 || parts[0] == "" { - return "", fmt.Errorf("invalid repo path in %q", repoURL) + return nil, fmt.Errorf("invalid repo path in %q", repoURL) } - // Namespace is host + all path segments except the last; name is the last segment. - ns := strings.Join(append([]string{host}, parts[:len(parts)-1]...), "/") - name := parts[len(parts)-1] + return packageurl.NewPackageURL( + "generic", + strings.Join(append([]string{host}, parts[:len(parts)-1]...), "/"), + parts[len(parts)-1], + "", nil, "", + ), nil +} - p := packageurl.NewPackageURL("generic", ns, name, version, nil, "") +// normalizeRepoURL rewrites common git-transport variants into an https URL +// so ParseRepoPURL can treat them uniformly. +// Inputs with an unrecognized scheme (ftp://, file://, …) are returned as-is +// so ParseRepoPURL can reject them via its scheme check. +func normalizeRepoURL(raw string) string { + raw = strings.TrimSpace(raw) - return p.ToString(), nil + if strings.HasPrefix(raw, "http://") || strings.HasPrefix(raw, "https://") { + return raw + } + + body := raw + hadScheme := false + if i := strings.Index(body, "://"); i != -1 { + scheme := strings.ToLower(body[:i]) + if scheme != "git" && scheme != "ssh" { + return raw + } + body = body[i+3:] + hadScheme = true + } + + if at := strings.Index(body, "@"); at != -1 { + if slash := strings.Index(body, "/"); slash == -1 || at < slash { + body = body[at+1:] + } + } + + if colon := strings.Index(body, ":"); colon != -1 { + slash := strings.Index(body, "/") + if slash == -1 || colon < slash { + portEnd := slash + if portEnd == -1 { + portEnd = len(body) + } + if _, err := strconv.Atoi(body[colon+1 : portEnd]); err != nil { + body = body[:colon] + "/" + body[colon+1:] + } + } + } + + if hadScheme || body != raw { + return "https://" + body + } + + return raw } diff --git a/vulnfeeds/git/repository_test.go b/vulnfeeds/git/repository_test.go index 86fb3f9c62b..29e48c5861d 100644 --- a/vulnfeeds/git/repository_test.go +++ b/vulnfeeds/git/repository_test.go @@ -431,9 +431,29 @@ func TestBuildGenericRepoPURL(t *testing.T) { wantError: true, }, { - desc: "SCP-style git URL", - inputURL: "git@github.com:owner/repo.git", - wantError: true, + desc: "git:// transport", + inputURL: "git://github.com/owner/repo.git", + wantPURL: "pkg:generic/github.com/owner/repo", + }, + { + desc: "ssh:// transport with user", + inputURL: "ssh://git@github.com/owner/repo.git", + wantPURL: "pkg:generic/github.com/owner/repo", + }, + { + desc: "SCP-style git URL", + inputURL: "git@github.com:owner/repo.git", + wantPURL: "pkg:generic/github.com/owner/repo", + }, + { + desc: "malformed hybrid (CVE-2025-1110)", + inputURL: "git://git@gitlab.com:gitlab-org/gitlab.git", + wantPURL: "pkg:generic/gitlab.com/gitlab-org/gitlab", + }, + { + desc: "ssh:// transport with numeric port", + inputURL: "ssh://git@git.example.com:22/owner/repo.git", + wantPURL: "pkg:generic/git.example.com/owner/repo", }, { desc: "Unsupported scheme", @@ -473,71 +493,27 @@ func TestBuildGenericRepoPURL(t *testing.T) { } } -func TestBuildVersionedGenericRepoPURL(t *testing.T) { +func TestParseRepoPURL_VersionEscape(t *testing.T) { t.Parallel() - tests := []struct { - desc string - inputURL string - version string - wantPURL string - wantError bool - }{ - { - desc: "simple version", - inputURL: "https://github.com/owner/repo", - version: "v1.2.3", - wantPURL: "pkg:generic/github.com/owner/repo@v1.2.3", - }, - { - desc: "tag with slash is percent-encoded", - inputURL: "https://github.com/owner/repo", - version: "release/1.2.3", - wantPURL: "pkg:generic/github.com/owner/repo@release%2F1.2.3", - }, - { - desc: "tag with space", - inputURL: "https://github.com/owner/repo", - version: "v1.0 beta", - wantPURL: "pkg:generic/github.com/owner/repo@v1.0%20beta", - }, - { - desc: "tag with hash", - inputURL: "https://github.com/owner/repo", - version: "rel#1", - wantPURL: "pkg:generic/github.com/owner/repo@rel%231", - }, - { - desc: "empty version produces unversioned purl", - inputURL: "https://github.com/owner/repo", - version: "", - wantPURL: "pkg:generic/github.com/owner/repo", - }, - { - desc: "invalid repo url still fails", - inputURL: "ftp://example.com/owner/repo", - version: "v1.0", - wantError: true, - }, + tmpl, err := ParseRepoPURL("https://github.com/owner/repo") + if err != nil { + t.Fatalf("ParseRepoPURL unexpected error: %v", err) + } + if got := tmpl.ToString(); got != "pkg:generic/github.com/owner/repo" { + t.Errorf("unversioned ToString = %q, want %q", got, "pkg:generic/github.com/owner/repo") } - for _, tc := range tests { - t.Run(tc.desc, func(t *testing.T) { - t.Parallel() - got, err := BuildVersionedGenericRepoPURL(tc.inputURL, tc.version) - if tc.wantError { - if err == nil { - t.Fatalf("BuildVersionedGenericRepoPURL(%q, %q) = %q, want error", tc.inputURL, tc.version, got) - } - - return - } - if err != nil { - t.Fatalf("BuildVersionedGenericRepoPURL(%q, %q) unexpected error: %v", tc.inputURL, tc.version, err) - } - if got != tc.wantPURL { - t.Fatalf("BuildVersionedGenericRepoPURL(%q, %q) = %q, want %q", tc.inputURL, tc.version, got, tc.wantPURL) - } - }) + cases := map[string]string{ + "v1.2.3": "pkg:generic/github.com/owner/repo@v1.2.3", + "release/1.2.3": "pkg:generic/github.com/owner/repo@release%2F1.2.3", + "v1.0 beta": "pkg:generic/github.com/owner/repo@v1.0%20beta", + "rel#1": "pkg:generic/github.com/owner/repo@rel%231", + } + for version, want := range cases { + tmpl.Version = version + if got := tmpl.ToString(); got != want { + t.Errorf("version %q: ToString = %q, want %q", version, got, want) + } } } From dd097101380f65654b233bde7689de02e6a52687 Mon Sep 17 00:00:00 2001 From: ashmod Date: Mon, 13 Apr 2026 23:31:04 +0200 Subject: [PATCH 11/14] carry Versions through pickAffectedInformation merges --- vulnfeeds/cmd/combine-to-osv/main.go | 44 +++++++++++- vulnfeeds/cmd/combine-to-osv/main_test.go | 85 +++++++++++++++++++++++ 2 files changed, 126 insertions(+), 3 deletions(-) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index 125bfd8d244..aaad5f3dea5 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -282,25 +282,33 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* } nvdRepoMap := make(map[string][]*osvschema.Range) + nvdRepoVersions := make(map[string][]string) for _, affected := range nvdAffected { for _, r := range affected.GetRanges() { if r.GetRepo() != "" { repo := strings.ToLower(r.GetRepo()) nvdRepoMap[repo] = append(nvdRepoMap[repo], r) + nvdRepoVersions[repo] = append(nvdRepoVersions[repo], affected.GetVersions()...) } } } cve5RepoMap := make(map[string][]*osvschema.Range) + cve5RepoVersions := make(map[string][]string) for _, affected := range cve5Affected { for _, r := range affected.GetRanges() { if r.GetRepo() != "" { repo := strings.ToLower(r.GetRepo()) cve5RepoMap[repo] = append(cve5RepoMap[repo], r) + cve5RepoVersions[repo] = append(cve5RepoVersions[repo], affected.GetVersions()...) } } } + mergedVersions := func(repo string) []string { + return mergeUniqueStrings(cve5RepoVersions[repo], nvdRepoVersions[repo]) + } + newRepoAffectedMap := make(map[string]*osvschema.Affected) // Finds ranges with the same repo and merges them into one affected set. @@ -337,11 +345,13 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* // Remove from map so we know which NVD packages are left. delete(nvdRepoMap, repo) newRepoAffectedMap[repo] = &osvschema.Affected{ - Ranges: newAffectedRanges, + Ranges: newAffectedRanges, + Versions: mergedVersions(repo), } } else { newRepoAffectedMap[repo] = &osvschema.Affected{ - Ranges: cveRanges, + Ranges: cveRanges, + Versions: mergedVersions(repo), } } } @@ -349,7 +359,8 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* // Add remaining NVD packages that were not in cve5. for repo, nvdRange := range nvdRepoMap { newRepoAffectedMap[repo] = &osvschema.Affected{ - Ranges: nvdRange, + Ranges: nvdRange, + Versions: mergedVersions(repo), } } @@ -399,6 +410,33 @@ func getRangeBoundaryVersions(events []*osvschema.Event) (introduced, fixed stri return introduced, fixed } +// mergeUniqueStrings returns the order-preserving union of two string slices. +// Entries from a come first, followed by entries from b that aren't already +// present. Used to combine per-repo version lists from cve5 and NVD. +func mergeUniqueStrings(a, b []string) []string { + if len(a) == 0 && len(b) == 0 { + return nil + } + out := make([]string, 0, len(a)+len(b)) + seen := make(map[string]struct{}, len(a)+len(b)) + for _, s := range a { + if _, ok := seen[s]; ok { + continue + } + seen[s] = struct{}{} + out = append(out, s) + } + for _, s := range b { + if _, ok := seen[s]; ok { + continue + } + seen[s] = struct{}{} + out = append(out, s) + } + + return out +} + // repoURLFromRanges returns the first repo URL from a GIT-type range, if present. func repoURLFromRanges(ranges []*osvschema.Range) string { for _, r := range ranges { diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index d4f3ee55493..cb56fb452a1 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -780,3 +780,88 @@ func TestCombineTwoOSVRecords_ReferencesDeterminism(t *testing.T) { } } } + +func TestPickAffectedInformation_PreservesVersions(t *testing.T) { + t.Parallel() + + repo := "https://github.com/chriskohlhoff/asio" + cve5 := []*osvschema.Affected{{ + Versions: []string{"asio-1-12-0", "asio-1-12-1"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "asio-1-13-0"}}, + }}, + }} + nvd := []*osvschema.Affected{{ + Versions: []string{"asio-1-12-1", "asio-1-13-0"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "asio-1-13-0"}}, + }}, + }} + + got := pickAffectedInformation(cve5, nvd) + if len(got) != 1 { + t.Fatalf("expected 1 merged affected, got %d", len(got)) + } + wantVersions := []string{"asio-1-12-0", "asio-1-12-1", "asio-1-13-0"} + gotVersions := append([]string(nil), got[0].GetVersions()...) + sort.Strings(gotVersions) + sort.Strings(wantVersions) + if diff := cmp.Diff(wantVersions, gotVersions); diff != "" { + t.Errorf("merged Versions mismatch (-want +got):\n%s", diff) + } +} + +func TestEnrichRepoPURLs_AfterMerge(t *testing.T) { + t.Parallel() + + repo := "https://github.com/chriskohlhoff/asio" + cve5 := &osvschema.Vulnerability{ + Id: "CVE-2019-25219", + Affected: []*osvschema.Affected{{ + Versions: []string{"asio-1-12-0", "asio-1-12-1"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "asio-1-13-0"}}, + }}, + }}, + } + nvd := &osvschema.Vulnerability{ + Id: "CVE-2019-25219", + Affected: []*osvschema.Affected{{ + Versions: []string{"asio-1-12-1", "asio-1-13-0"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "asio-1-13-0"}}, + }}, + }}, + } + + merged := combineTwoOSVRecords(cve5, nvd) + enrichRepoPURLs(merged) + + if len(merged.Affected) != 1 { + t.Fatalf("expected 1 affected after merge, got %d", len(merged.Affected)) + } + aff := merged.Affected[0] + + wantBase := "pkg:generic/github.com/chriskohlhoff/asio" + if got := aff.GetPackage().GetPurl(); got != wantBase { + t.Errorf("package.purl = %q, want %q", got, wantBase) + } + got := repoPURLs(t, aff) + sort.Strings(got) + want := []string{ + wantBase + "@asio-1-12-0", + wantBase + "@asio-1-12-1", + wantBase + "@asio-1-13-0", + } + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("repo_purls mismatch (-want +got):\n%s", diff) + } +} From 6ac8304bcc0563de8ef1fa2d6a8b0b7eeae74bdd Mon Sep 17 00:00:00 2001 From: ashmod Date: Tue, 14 Apr 2026 00:00:13 +0200 Subject: [PATCH 12/14] remove tag discovery logic --- vulnfeeds/cmd/combine-to-osv/main.go | 143 +++++++--------------- vulnfeeds/cmd/combine-to-osv/main_test.go | 104 ++++++++-------- 2 files changed, 92 insertions(+), 155 deletions(-) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index aaad5f3dea5..a181cac74a3 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -12,7 +12,6 @@ import ( "os" "path/filepath" "slices" - "sort" "strings" "cloud.google.com/go/storage" @@ -22,7 +21,9 @@ import ( "github.com/google/osv/vulnfeeds/upload" "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" + "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" + packageurl "github.com/package-url/packageurl-go" "google.golang.org/api/iterator" "google.golang.org/protobuf/encoding/protojson" ) @@ -276,53 +277,26 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* if len(nvdAffected) == 0 { return cve5Affected } - // If NVD has more affected packages, prefer it entirely. if len(cve5Affected) == 0 || len(nvdAffected) > len(cve5Affected) { return nvdAffected } - nvdRepoMap := make(map[string][]*osvschema.Range) - nvdRepoVersions := make(map[string][]string) - for _, affected := range nvdAffected { - for _, r := range affected.GetRanges() { - if r.GetRepo() != "" { - repo := strings.ToLower(r.GetRepo()) - nvdRepoMap[repo] = append(nvdRepoMap[repo], r) - nvdRepoVersions[repo] = append(nvdRepoVersions[repo], affected.GetVersions()...) - } - } - } - - cve5RepoMap := make(map[string][]*osvschema.Range) - cve5RepoVersions := make(map[string][]string) - for _, affected := range cve5Affected { - for _, r := range affected.GetRanges() { - if r.GetRepo() != "" { - repo := strings.ToLower(r.GetRepo()) - cve5RepoMap[repo] = append(cve5RepoMap[repo], r) - cve5RepoVersions[repo] = append(cve5RepoVersions[repo], affected.GetVersions()...) - } - } - } - - mergedVersions := func(repo string) []string { - return mergeUniqueStrings(cve5RepoVersions[repo], nvdRepoVersions[repo]) - } + cve5Ranges, cve5Versions := bucketByRepo(cve5Affected) + nvdRanges, nvdVersions := bucketByRepo(nvdAffected) newRepoAffectedMap := make(map[string]*osvschema.Affected) // Finds ranges with the same repo and merges them into one affected set. - for repo, cveRanges := range cve5RepoMap { - if nvdRanges, ok := nvdRepoMap[repo]; ok { + for repo, cveRanges := range cve5Ranges { + if nvd, ok := nvdRanges[repo]; ok { var newAffectedRanges []*osvschema.Range // Found a match. If NVD has more ranges, use its ranges. - if len(nvdRanges) > len(cveRanges) { - // just use the nvd ranges - newAffectedRanges = nvdRanges - } else if len(cveRanges) == 1 && len(nvdRanges) == 1 { + if len(nvd) > len(cveRanges) { + newAffectedRanges = nvd + } else if len(cveRanges) == 1 && len(nvd) == 1 { c5Intro, c5Fixed := getRangeBoundaryVersions(cveRanges[0].GetEvents()) - nvdIntro, nvdFixed := getRangeBoundaryVersions(nvdRanges[0].GetEvents()) + nvdIntro, nvdFixed := getRangeBoundaryVersions(nvd[0].GetEvents()) // Prefer cve5 data, but use nvd data if cve5 data is missing. if c5Intro == "" { @@ -342,29 +316,28 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* newAffectedRanges = cveRanges } - // Remove from map so we know which NVD packages are left. - delete(nvdRepoMap, repo) + delete(nvdRanges, repo) newRepoAffectedMap[repo] = &osvschema.Affected{ Ranges: newAffectedRanges, - Versions: mergedVersions(repo), + Versions: vulns.Unique(slices.Concat(cve5Versions[repo], nvdVersions[repo])), } } else { newRepoAffectedMap[repo] = &osvschema.Affected{ Ranges: cveRanges, - Versions: mergedVersions(repo), + Versions: vulns.Unique(cve5Versions[repo]), } } } // Add remaining NVD packages that were not in cve5. - for repo, nvdRange := range nvdRepoMap { + for repo, nvdRange := range nvdRanges { newRepoAffectedMap[repo] = &osvschema.Affected{ Ranges: nvdRange, - Versions: mergedVersions(repo), + Versions: vulns.Unique(nvdVersions[repo]), } } - var combinedAffected []*osvschema.Affected //nolint:prealloc + combinedAffected := make([]*osvschema.Affected, 0, len(newRepoAffectedMap)) for _, aff := range newRepoAffectedMap { combinedAffected = append(combinedAffected, aff) } @@ -410,31 +383,23 @@ func getRangeBoundaryVersions(events []*osvschema.Event) (introduced, fixed stri return introduced, fixed } -// mergeUniqueStrings returns the order-preserving union of two string slices. -// Entries from a come first, followed by entries from b that aren't already -// present. Used to combine per-repo version lists from cve5 and NVD. -func mergeUniqueStrings(a, b []string) []string { - if len(a) == 0 && len(b) == 0 { - return nil - } - out := make([]string, 0, len(a)+len(b)) - seen := make(map[string]struct{}, len(a)+len(b)) - for _, s := range a { - if _, ok := seen[s]; ok { - continue - } - seen[s] = struct{}{} - out = append(out, s) - } - for _, s := range b { - if _, ok := seen[s]; ok { - continue +// bucketByRepo groups each Affected's ranges (and the parent Affected's +// Versions) by the lowercased repo URL of every GIT-bearing range. +func bucketByRepo(affected []*osvschema.Affected) (map[string][]*osvschema.Range, map[string][]string) { + ranges := make(map[string][]*osvschema.Range) + versions := make(map[string][]string) + for _, a := range affected { + for _, r := range a.GetRanges() { + if r.GetRepo() == "" { + continue + } + repo := strings.ToLower(r.GetRepo()) + ranges[repo] = append(ranges[repo], r) + versions[repo] = append(versions[repo], a.GetVersions()...) } - seen[s] = struct{}{} - out = append(out, s) } - return out + return ranges, versions } // repoURLFromRanges returns the first repo URL from a GIT-type range, if present. @@ -453,11 +418,6 @@ const ( repoPURLsKey = "repo_purls" ) -var ( - repoTagsCache = &gitpurl.RepoTagsCache{} - enableRepoPURLTags = os.Getenv("ENABLE_REPO_PURL_TAGS") == "1" -) - // enrichRepoPURLs populates repo-derived pURLs on each affected entry that // has a GIT-type range: an unversioned pkg:generic purl on // affected.package.purl (when unset), and a list of versioned variants under @@ -471,51 +431,30 @@ func enrichRepoPURLs(v *osvschema.Vulnerability) { if repo == "" { continue } + tmpl, err := gitpurl.ParseRepoPURL(repo) + if err != nil { + continue + } if aff.Package == nil { aff.Package = &osvschema.Package{} } if aff.Package.Purl == "" { - if p, err := gitpurl.BuildGenericRepoPURL(repo); err == nil && p != "" { - aff.Package.Purl = p - } + aff.Package.Purl = tmpl.ToString() } - addVersionedRepoPURLs(aff, repo) + addVersionedRepoPURLs(aff, tmpl) } } -// addVersionedRepoPURLs attaches versioned pkg:generic/...@ entries -// under affected.database_specific[repoPURLsKey], using affected.versions -// if available or (behind ENABLE_REPO_PURL_TAGS) tags discovered from the -// remote repo. -func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) { - if aff == nil || repo == "" { +// addVersionedRepoPURLs attaches one versioned pkg:generic/...@ entry +// under affected.database_specific[repoPURLsKey] per entry in aff.Versions. +func addVersionedRepoPURLs(aff *osvschema.Affected, tmpl *packageurl.PackageURL) { + if len(aff.Versions) == 0 { return } - tags := aff.Versions - if len(tags) == 0 && enableRepoPURLTags { - norm, err := gitpurl.NormalizeRepoTags(repo, repoTagsCache) - if err == nil && len(norm) > 0 { - tags = make([]string, 0, len(norm)) - for tag := range norm { - tags = append(tags, tag) - } - sort.Strings(tags) - } - } - if len(tags) > maxRepoPURLTags { - tags = tags[:maxRepoPURLTags] - } - if len(tags) == 0 { - return - } - - tmpl, err := gitpurl.ParseRepoPURL(repo) - if err != nil { - return - } + tags := aff.Versions[:min(len(aff.Versions), maxRepoPURLTags)] versionedPURLs := make([]any, 0, len(tags)) for _, t := range tags { diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index cb56fb452a1..8faed1ade3e 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -468,56 +468,6 @@ func TestRepoURLFromRanges_NoGIT(t *testing.T) { } } -func TestAddVersionedRepoPURLs_FromVersions(t *testing.T) { - t.Setenv("ENABLE_REPO_PURL_TAGS", "") // ensure derivation path is off - - repo := "https://github.com/chriskohlhoff/asio" - aff := &osvschema.Affected{ - Package: &osvschema.Package{Ecosystem: "GIT", Name: "asio"}, - Versions: []string{"asio-1-13-0", "asio-1-12-0"}, - Ranges: []*osvschema.Range{{ - Type: osvschema.Range_GIT, - Repo: repo, - Events: []*osvschema.Event{{Introduced: "0"}}, - }}, - } - - addVersionedRepoPURLs(aff, repo) - - base, err := gitpurl.BuildGenericRepoPURL(repo) - if err != nil || base == "" { - t.Fatalf("failed to build base purl: %v", err) - } - - field := aff.GetDatabaseSpecific().GetFields()["repo_purls"] - if field == nil { - t.Fatalf("repo_purls missing: %#v", aff.GetDatabaseSpecific()) - } - values := field.GetListValue().GetValues() - if len(values) == 0 { - t.Fatalf("repo_purls empty: %#v", aff.GetDatabaseSpecific()) - } - list := make([]string, 0, len(values)) - for _, v := range values { - list = append(list, v.GetStringValue()) - } - - want1 := base + "@asio-1-13-0" - want2 := base + "@asio-1-12-0" - found1, found2 := false, false - for _, p := range list { - if p == want1 { - found1 = true - } - if p == want2 { - found2 = true - } - } - if !found1 || !found2 { - t.Fatalf("missing expected entries, got %#v", list) - } -} - // repoPURLs pulls the string list stored under database_specific["repo_purls"] // so tests can assert on the versioned pURLs attached by enrichRepoPURLs. func repoPURLs(t *testing.T, aff *osvschema.Affected) []string { @@ -628,6 +578,45 @@ func TestEnrichRepoPURLs_NonGITRangeNoop(t *testing.T) { } } +func TestEnrichRepoPURLs_MalformedRepoIsNoop(t *testing.T) { + t.Parallel() + + cases := map[string]string{ + "unsupported scheme": "ftp://example.com/owner/repo", + "missing host": "https:///owner/repo", + "insufficient path": "https://github.com/onlyowner", + "scp-like, bad port hybrid": "git://git@gitlab.com:gitlab-org", + } + + for desc, repo := range cases { + t.Run(desc, func(t *testing.T) { + t.Parallel() + v := &osvschema.Vulnerability{ + Affected: []*osvschema.Affected{ + { + Versions: []string{"v1.0.0"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + aff := v.Affected[0] + if aff.Package != nil { + t.Errorf("Package was populated as %#v, want nil (malformed repo should be a no-op)", aff.Package) + } + if aff.DatabaseSpecific != nil { + t.Errorf("DatabaseSpecific was populated as %#v, want nil", aff.DatabaseSpecific) + } + }) + } +} + func TestEnrichRepoPURLs_DotGitSuffix(t *testing.T) { t.Parallel() @@ -710,7 +699,11 @@ func TestAddVersionedRepoPURLs_EscapesSpecialCharsInTags(t *testing.T) { }}, } - addVersionedRepoPURLs(aff, repo) + tmpl, err := gitpurl.ParseRepoPURL(repo) + if err != nil { + t.Fatalf("ParseRepoPURL unexpected error: %v", err) + } + addVersionedRepoPURLs(aff, tmpl) got := repoPURLs(t, aff) want := []string{ @@ -732,15 +725,20 @@ func TestAddVersionedRepoPURLs_CapsLargeVersionLists(t *testing.T) { for i := range versions { versions[i] = fmt.Sprintf("v1.0.%d", i) } + repo := "https://github.com/example/big" aff := &osvschema.Affected{ Versions: versions, Ranges: []*osvschema.Range{{ Type: osvschema.Range_GIT, - Repo: "https://github.com/example/big", + Repo: repo, }}, } - addVersionedRepoPURLs(aff, "https://github.com/example/big") + tmpl, err := gitpurl.ParseRepoPURL(repo) + if err != nil { + t.Fatalf("ParseRepoPURL unexpected error: %v", err) + } + addVersionedRepoPURLs(aff, tmpl) got := repoPURLs(t, aff) if len(got) != maxRepoPURLTags { From 170cdba976452926c6e1c30236c8261e0609ca95 Mon Sep 17 00:00:00 2001 From: ashmod Date: Tue, 14 Apr 2026 00:06:31 +0200 Subject: [PATCH 13/14] cleanup --- vulnfeeds/cmd/combine-to-osv/main.go | 71 +++++++++++++++------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index a181cac74a3..43dc2b3bb04 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -277,26 +277,49 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* if len(nvdAffected) == 0 { return cve5Affected } + // If NVD has more affected packages, prefer it entirely. if len(cve5Affected) == 0 || len(nvdAffected) > len(cve5Affected) { return nvdAffected } - cve5Ranges, cve5Versions := bucketByRepo(cve5Affected) - nvdRanges, nvdVersions := bucketByRepo(nvdAffected) + nvdRepoMap := make(map[string][]*osvschema.Range) + nvdRepoVersions := make(map[string][]string) + for _, affected := range nvdAffected { + for _, r := range affected.GetRanges() { + if r.GetRepo() != "" { + repo := strings.ToLower(r.GetRepo()) + nvdRepoMap[repo] = append(nvdRepoMap[repo], r) + nvdRepoVersions[repo] = append(nvdRepoVersions[repo], affected.GetVersions()...) + } + } + } + + cve5RepoMap := make(map[string][]*osvschema.Range) + cve5RepoVersions := make(map[string][]string) + for _, affected := range cve5Affected { + for _, r := range affected.GetRanges() { + if r.GetRepo() != "" { + repo := strings.ToLower(r.GetRepo()) + cve5RepoMap[repo] = append(cve5RepoMap[repo], r) + cve5RepoVersions[repo] = append(cve5RepoVersions[repo], affected.GetVersions()...) + } + } + } newRepoAffectedMap := make(map[string]*osvschema.Affected) // Finds ranges with the same repo and merges them into one affected set. - for repo, cveRanges := range cve5Ranges { - if nvd, ok := nvdRanges[repo]; ok { + for repo, cveRanges := range cve5RepoMap { + if nvdRanges, ok := nvdRepoMap[repo]; ok { var newAffectedRanges []*osvschema.Range // Found a match. If NVD has more ranges, use its ranges. - if len(nvd) > len(cveRanges) { - newAffectedRanges = nvd - } else if len(cveRanges) == 1 && len(nvd) == 1 { + if len(nvdRanges) > len(cveRanges) { + // just use the nvd ranges + newAffectedRanges = nvdRanges + } else if len(cveRanges) == 1 && len(nvdRanges) == 1 { c5Intro, c5Fixed := getRangeBoundaryVersions(cveRanges[0].GetEvents()) - nvdIntro, nvdFixed := getRangeBoundaryVersions(nvd[0].GetEvents()) + nvdIntro, nvdFixed := getRangeBoundaryVersions(nvdRanges[0].GetEvents()) // Prefer cve5 data, but use nvd data if cve5 data is missing. if c5Intro == "" { @@ -316,28 +339,29 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* newAffectedRanges = cveRanges } - delete(nvdRanges, repo) + // Remove from map so we know which NVD packages are left. + delete(nvdRepoMap, repo) newRepoAffectedMap[repo] = &osvschema.Affected{ Ranges: newAffectedRanges, - Versions: vulns.Unique(slices.Concat(cve5Versions[repo], nvdVersions[repo])), + Versions: vulns.Unique(slices.Concat(cve5RepoVersions[repo], nvdRepoVersions[repo])), } } else { newRepoAffectedMap[repo] = &osvschema.Affected{ Ranges: cveRanges, - Versions: vulns.Unique(cve5Versions[repo]), + Versions: vulns.Unique(cve5RepoVersions[repo]), } } } // Add remaining NVD packages that were not in cve5. - for repo, nvdRange := range nvdRanges { + for repo, nvdRange := range nvdRepoMap { newRepoAffectedMap[repo] = &osvschema.Affected{ Ranges: nvdRange, - Versions: vulns.Unique(nvdVersions[repo]), + Versions: vulns.Unique(nvdRepoVersions[repo]), } } - combinedAffected := make([]*osvschema.Affected, 0, len(newRepoAffectedMap)) + var combinedAffected []*osvschema.Affected //nolint:prealloc for _, aff := range newRepoAffectedMap { combinedAffected = append(combinedAffected, aff) } @@ -383,25 +407,6 @@ func getRangeBoundaryVersions(events []*osvschema.Event) (introduced, fixed stri return introduced, fixed } -// bucketByRepo groups each Affected's ranges (and the parent Affected's -// Versions) by the lowercased repo URL of every GIT-bearing range. -func bucketByRepo(affected []*osvschema.Affected) (map[string][]*osvschema.Range, map[string][]string) { - ranges := make(map[string][]*osvschema.Range) - versions := make(map[string][]string) - for _, a := range affected { - for _, r := range a.GetRanges() { - if r.GetRepo() == "" { - continue - } - repo := strings.ToLower(r.GetRepo()) - ranges[repo] = append(ranges[repo], r) - versions[repo] = append(versions[repo], a.GetVersions()...) - } - } - - return ranges, versions -} - // repoURLFromRanges returns the first repo URL from a GIT-type range, if present. func repoURLFromRanges(ranges []*osvschema.Range) string { for _, r := range ranges { From 7ae63910e3b31100f2043de3ebf6dd845277e045 Mon Sep 17 00:00:00 2001 From: ashmod Date: Tue, 14 Apr 2026 00:13:51 +0200 Subject: [PATCH 14/14] don't overwrite purls when identity is set --- vulnfeeds/cmd/combine-to-osv/main.go | 5 ++- vulnfeeds/cmd/combine-to-osv/main_test.go | 39 +++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index 43dc2b3bb04..36f6c01bc14 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -442,9 +442,8 @@ func enrichRepoPURLs(v *osvschema.Vulnerability) { } if aff.Package == nil { - aff.Package = &osvschema.Package{} - } - if aff.Package.Purl == "" { + aff.Package = &osvschema.Package{Purl: tmpl.ToString()} + } else if aff.Package.GetPurl() == "" && aff.Package.GetName() == "" && aff.Package.GetEcosystem() == "" { aff.Package.Purl = tmpl.ToString() } diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index 8faed1ade3e..edbf3c9a988 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -551,6 +551,45 @@ func TestEnrichRepoPURLs_PreservesExistingPurl(t *testing.T) { t.Errorf("expected repo_purls to be populated, got none") } } +func TestEnrichRepoPURLs_PreservesExistingPackageIdentity(t *testing.T) { + t.Parallel() + + repo := "https://github.com/upstream/libfoo" + v := &osvschema.Vulnerability{ + Affected: []*osvschema.Affected{ + { + Package: &osvschema.Package{ + Ecosystem: "Debian:11", + Name: "libfoo", + }, + Versions: []string{"1.2.3"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "1.2.4"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + pkg := v.Affected[0].GetPackage() + if got := pkg.GetPurl(); got != "" { + t.Errorf("package.purl = %q, want empty (Debian identity must not be overwritten)", got) + } + if got := pkg.GetEcosystem(); got != "Debian:11" { + t.Errorf("package.ecosystem = %q, want %q", got, "Debian:11") + } + if got := pkg.GetName(); got != "libfoo" { + t.Errorf("package.name = %q, want %q", got, "libfoo") + } + + want := []string{"pkg:generic/github.com/upstream/libfoo@1.2.3"} + if diff := cmp.Diff(want, repoPURLs(t, v.Affected[0])); diff != "" { + t.Errorf("repo_purls mismatch (-want +got):\n%s", diff) + } +} func TestEnrichRepoPURLs_NonGITRangeNoop(t *testing.T) { t.Parallel()