diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index c0f0b38b56f..36f6c01bc14 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -16,10 +16,14 @@ import ( "cloud.google.com/go/storage" "github.com/google/osv/vulnfeeds/conversion" + gitpurl "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/upload" + "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" + "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" + packageurl "github.com/package-url/packageurl-go" "google.golang.org/api/iterator" "google.golang.org/protobuf/encoding/protojson" ) @@ -197,6 +201,7 @@ func combineIntoOSV(cve5osv map[models.CVEID]*osvschema.Vulnerability, nvdosv ma continue } } + enrichRepoPURLs(baseOSV) osvRecords[cveID] = baseOSV } @@ -205,6 +210,7 @@ func combineIntoOSV(cve5osv map[models.CVEID]*osvschema.Vulnerability, nvdosv ma if len(nvd.GetAffected()) == 0 || !hasRanges(nvd.GetAffected()) { continue } + enrichRepoPURLs(nvd) osvRecords[cveID] = nvd } @@ -277,21 +283,25 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* } nvdRepoMap := make(map[string][]*osvschema.Range) + nvdRepoVersions := make(map[string][]string) for _, affected := range nvdAffected { for _, r := range affected.GetRanges() { if r.GetRepo() != "" { repo := strings.ToLower(r.GetRepo()) nvdRepoMap[repo] = append(nvdRepoMap[repo], r) + nvdRepoVersions[repo] = append(nvdRepoVersions[repo], affected.GetVersions()...) } } } cve5RepoMap := make(map[string][]*osvschema.Range) + cve5RepoVersions := make(map[string][]string) for _, affected := range cve5Affected { for _, r := range affected.GetRanges() { if r.GetRepo() != "" { repo := strings.ToLower(r.GetRepo()) cve5RepoMap[repo] = append(cve5RepoMap[repo], r) + cve5RepoVersions[repo] = append(cve5RepoVersions[repo], affected.GetVersions()...) } } } @@ -332,11 +342,13 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* // Remove from map so we know which NVD packages are left. delete(nvdRepoMap, repo) newRepoAffectedMap[repo] = &osvschema.Affected{ - Ranges: newAffectedRanges, + Ranges: newAffectedRanges, + Versions: vulns.Unique(slices.Concat(cve5RepoVersions[repo], nvdRepoVersions[repo])), } } else { newRepoAffectedMap[repo] = &osvschema.Affected{ - Ranges: cveRanges, + Ranges: cveRanges, + Versions: vulns.Unique(cve5RepoVersions[repo]), } } } @@ -344,7 +356,8 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* // Add remaining NVD packages that were not in cve5. for repo, nvdRange := range nvdRepoMap { newRepoAffectedMap[repo] = &osvschema.Affected{ - Ranges: nvdRange, + Ranges: nvdRange, + Versions: vulns.Unique(nvdRepoVersions[repo]), } } @@ -393,3 +406,80 @@ func getRangeBoundaryVersions(events []*osvschema.Event) (introduced, fixed stri return introduced, fixed } + +// repoURLFromRanges returns the first repo URL from a GIT-type range, if present. +func repoURLFromRanges(ranges []*osvschema.Range) string { + for _, r := range ranges { + if r.GetType() == osvschema.Range_GIT && r.GetRepo() != "" { + return r.GetRepo() + } + } + + return "" +} + +const ( + maxRepoPURLTags = 200 + repoPURLsKey = "repo_purls" +) + +// enrichRepoPURLs populates repo-derived pURLs on each affected entry that +// has a GIT-type range: an unversioned pkg:generic purl on +// affected.package.purl (when unset), and a list of versioned variants under +// affected.database_specific["repo_purls"]. +func enrichRepoPURLs(v *osvschema.Vulnerability) { + if v == nil || len(v.GetAffected()) == 0 { + return + } + for _, aff := range v.Affected { + repo := repoURLFromRanges(aff.GetRanges()) + if repo == "" { + continue + } + tmpl, err := gitpurl.ParseRepoPURL(repo) + if err != nil { + continue + } + + if aff.Package == nil { + aff.Package = &osvschema.Package{Purl: tmpl.ToString()} + } else if aff.Package.GetPurl() == "" && aff.Package.GetName() == "" && aff.Package.GetEcosystem() == "" { + aff.Package.Purl = tmpl.ToString() + } + + addVersionedRepoPURLs(aff, tmpl) + } +} + +// addVersionedRepoPURLs attaches one versioned pkg:generic/...@ entry +// under affected.database_specific[repoPURLsKey] per entry in aff.Versions. +func addVersionedRepoPURLs(aff *osvschema.Affected, tmpl *packageurl.PackageURL) { + if len(aff.Versions) == 0 { + return + } + + tags := aff.Versions[:min(len(aff.Versions), maxRepoPURLTags)] + + versionedPURLs := make([]any, 0, len(tags)) + for _, t := range tags { + if t == "" { + continue + } + tmpl.Version = t + versionedPURLs = append(versionedPURLs, tmpl.ToString()) + } + if len(versionedPURLs) == 0 { + return + } + + if aff.DatabaseSpecific == nil { + ds, err := utility.NewStructpbFromMap(nil) + if err != nil { + return + } + aff.DatabaseSpecific = ds + } + if err := conversion.AddFieldToDatabaseSpecific(aff.DatabaseSpecific, repoPURLsKey, versionedPURLs); err != nil { + return + } +} diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index 51504d9b34b..edbf3c9a988 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -1,6 +1,7 @@ package main import ( + "fmt" "path/filepath" "sort" "testing" @@ -8,6 +9,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + gitpurl "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" "github.com/ossf/osv-schema/bindings/go/osvschema" "google.golang.org/protobuf/testing/protocmp" @@ -430,6 +432,359 @@ func TestCombineTwoOSVRecords(t *testing.T) { } } +func TestRepoURLFromRanges_GIT(t *testing.T) { + t.Parallel() + + ranges := []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: "https://github.com/eclipse-openj9/openj9", + Events: []*osvschema.Event{ + {Introduced: "0"}, + }, + }, + } + got := repoURLFromRanges(ranges) + want := "https://github.com/eclipse-openj9/openj9" + if got != want { + t.Fatalf("repoURLFromRanges() = %q, want %q", got, want) + } +} + +func TestRepoURLFromRanges_NoGIT(t *testing.T) { + t.Parallel() + + ranges := []*osvschema.Range{ + { + Type: osvschema.Range_ECOSYSTEM, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.2.3"}, + }, + }, + } + if got := repoURLFromRanges(ranges); got != "" { + t.Fatalf("repoURLFromRanges() = %q, want empty", got) + } +} + +// repoPURLs pulls the string list stored under database_specific["repo_purls"] +// so tests can assert on the versioned pURLs attached by enrichRepoPURLs. +func repoPURLs(t *testing.T, aff *osvschema.Affected) []string { + t.Helper() + field := aff.GetDatabaseSpecific().GetFields()["repo_purls"] + if field == nil { + return nil + } + values := field.GetListValue().GetValues() + out := make([]string, 0, len(values)) + for _, v := range values { + out = append(out, v.GetStringValue()) + } + + return out +} + +func TestEnrichRepoPURLs_GITRangeWithTagVersions(t *testing.T) { + t.Parallel() + + repo := "https://github.com/chriskohlhoff/asio" + v := &osvschema.Vulnerability{ + Id: "CVE-2019-25219", + Affected: []*osvschema.Affected{ + { + Versions: []string{"asio-1-12-0", "asio-1-12-1", "asio-1-13-0"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + aff := v.Affected[0] + wantBase := "pkg:generic/github.com/chriskohlhoff/asio" + if got := aff.GetPackage().GetPurl(); got != wantBase { + t.Errorf("package.purl = %q, want %q", got, wantBase) + } + + got := repoPURLs(t, aff) + want := []string{ + wantBase + "@asio-1-12-0", + wantBase + "@asio-1-12-1", + wantBase + "@asio-1-13-0", + } + sort.Strings(got) + sort.Strings(want) + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("repo_purls mismatch (-want +got):\n%s", diff) + } +} + +func TestEnrichRepoPURLs_PreservesExistingPurl(t *testing.T) { + t.Parallel() + + existing := "pkg:deb/debian/libasio-dev" + v := &osvschema.Vulnerability{ + Affected: []*osvschema.Affected{ + { + Package: &osvschema.Package{Purl: existing}, + Versions: []string{"asio-1-12-0"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/chriskohlhoff/asio", + Events: []*osvschema.Event{{Introduced: "0"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + if got := v.Affected[0].GetPackage().GetPurl(); got != existing { + t.Errorf("package.purl clobbered: got %q, want %q", got, existing) + } + if got := repoPURLs(t, v.Affected[0]); len(got) == 0 { + t.Errorf("expected repo_purls to be populated, got none") + } +} +func TestEnrichRepoPURLs_PreservesExistingPackageIdentity(t *testing.T) { + t.Parallel() + + repo := "https://github.com/upstream/libfoo" + v := &osvschema.Vulnerability{ + Affected: []*osvschema.Affected{ + { + Package: &osvschema.Package{ + Ecosystem: "Debian:11", + Name: "libfoo", + }, + Versions: []string{"1.2.3"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "1.2.4"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + pkg := v.Affected[0].GetPackage() + if got := pkg.GetPurl(); got != "" { + t.Errorf("package.purl = %q, want empty (Debian identity must not be overwritten)", got) + } + if got := pkg.GetEcosystem(); got != "Debian:11" { + t.Errorf("package.ecosystem = %q, want %q", got, "Debian:11") + } + if got := pkg.GetName(); got != "libfoo" { + t.Errorf("package.name = %q, want %q", got, "libfoo") + } + + want := []string{"pkg:generic/github.com/upstream/libfoo@1.2.3"} + if diff := cmp.Diff(want, repoPURLs(t, v.Affected[0])); diff != "" { + t.Errorf("repo_purls mismatch (-want +got):\n%s", diff) + } +} + +func TestEnrichRepoPURLs_NonGITRangeNoop(t *testing.T) { + t.Parallel() + + v := &osvschema.Vulnerability{ + Affected: []*osvschema.Affected{ + { + Package: &osvschema.Package{Ecosystem: "Debian:11", Name: "libasio"}, + Versions: []string{"1.18.1"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_ECOSYSTEM, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "1.18.2"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + if got := v.Affected[0].GetPackage().GetPurl(); got != "" { + t.Errorf("package.purl = %q, want empty (no GIT range)", got) + } + if got := repoPURLs(t, v.Affected[0]); len(got) != 0 { + t.Errorf("repo_purls should be absent, got %v", got) + } +} + +func TestEnrichRepoPURLs_MalformedRepoIsNoop(t *testing.T) { + t.Parallel() + + cases := map[string]string{ + "unsupported scheme": "ftp://example.com/owner/repo", + "missing host": "https:///owner/repo", + "insufficient path": "https://github.com/onlyowner", + "scp-like, bad port hybrid": "git://git@gitlab.com:gitlab-org", + } + + for desc, repo := range cases { + t.Run(desc, func(t *testing.T) { + t.Parallel() + v := &osvschema.Vulnerability{ + Affected: []*osvschema.Affected{ + { + Versions: []string{"v1.0.0"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + aff := v.Affected[0] + if aff.Package != nil { + t.Errorf("Package was populated as %#v, want nil (malformed repo should be a no-op)", aff.Package) + } + if aff.DatabaseSpecific != nil { + t.Errorf("DatabaseSpecific was populated as %#v, want nil", aff.DatabaseSpecific) + } + }) + } +} + +func TestEnrichRepoPURLs_DotGitSuffix(t *testing.T) { + t.Parallel() + + v := &osvschema.Vulnerability{ + Affected: []*osvschema.Affected{ + { + Versions: []string{"v1.2.11"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/madler/zlib.git", + Events: []*osvschema.Event{{Introduced: "0"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + wantBase := "pkg:generic/github.com/madler/zlib" + if got := v.Affected[0].GetPackage().GetPurl(); got != wantBase { + t.Errorf("package.purl = %q, want %q (.git suffix should be stripped)", got, wantBase) + } + want := []string{wantBase + "@v1.2.11"} + if diff := cmp.Diff(want, repoPURLs(t, v.Affected[0])); diff != "" { + t.Errorf("repo_purls mismatch (-want +got):\n%s", diff) + } +} + +func TestEnrichRepoPURLs_MultipleAffectedOnlyGITEnriched(t *testing.T) { + t.Parallel() + + v := &osvschema.Vulnerability{ + Affected: []*osvschema.Affected{ + { + Package: &osvschema.Package{Ecosystem: "Debian:11", Name: "libasio"}, + Versions: []string{"1.18.1"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_ECOSYSTEM, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "1.18.2"}}, + }}, + }, + { + Versions: []string{"asio-1-12-0"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/chriskohlhoff/asio", + Events: []*osvschema.Event{{Introduced: "0"}}, + }}, + }, + }, + } + + enrichRepoPURLs(v) + + if got := v.Affected[0].GetPackage().GetPurl(); got != "" { + t.Errorf("affected[0] (ECOSYSTEM) package.purl = %q, want empty", got) + } + if got := repoPURLs(t, v.Affected[0]); len(got) != 0 { + t.Errorf("affected[0] (ECOSYSTEM) repo_purls should be empty, got %v", got) + } + + wantBase := "pkg:generic/github.com/chriskohlhoff/asio" + if got := v.Affected[1].GetPackage().GetPurl(); got != wantBase { + t.Errorf("affected[1] (GIT) package.purl = %q, want %q", got, wantBase) + } + if got := repoPURLs(t, v.Affected[1]); len(got) != 1 || got[0] != wantBase+"@asio-1-12-0" { + t.Errorf("affected[1] (GIT) repo_purls = %v, want [%s@asio-1-12-0]", got, wantBase) + } +} + +func TestAddVersionedRepoPURLs_EscapesSpecialCharsInTags(t *testing.T) { + t.Parallel() + + repo := "https://github.com/example/repo" + aff := &osvschema.Affected{ + Versions: []string{"release/1.2.3", "v1.0 beta", "rel#1"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + }}, + } + + tmpl, err := gitpurl.ParseRepoPURL(repo) + if err != nil { + t.Fatalf("ParseRepoPURL unexpected error: %v", err) + } + addVersionedRepoPURLs(aff, tmpl) + + got := repoPURLs(t, aff) + want := []string{ + "pkg:generic/github.com/example/repo@rel%231", + "pkg:generic/github.com/example/repo@release%2F1.2.3", + "pkg:generic/github.com/example/repo@v1.0%20beta", + } + sort.Strings(got) + sort.Strings(want) + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("repo_purls mismatch (-want +got):\n%s", diff) + } +} + +func TestAddVersionedRepoPURLs_CapsLargeVersionLists(t *testing.T) { + t.Parallel() + + versions := make([]string, maxRepoPURLTags+50) + for i := range versions { + versions[i] = fmt.Sprintf("v1.0.%d", i) + } + repo := "https://github.com/example/big" + aff := &osvschema.Affected{ + Versions: versions, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + }}, + } + + tmpl, err := gitpurl.ParseRepoPURL(repo) + if err != nil { + t.Fatalf("ParseRepoPURL unexpected error: %v", err) + } + addVersionedRepoPURLs(aff, tmpl) + + got := repoPURLs(t, aff) + if len(got) != maxRepoPURLTags { + t.Errorf("len(repo_purls) = %d, want %d", len(got), maxRepoPURLTags) + } +} + func TestCombineTwoOSVRecords_ReferencesDeterminism(t *testing.T) { cve5 := &osvschema.Vulnerability{ Id: "CVE-2023-1234", @@ -462,3 +817,88 @@ func TestCombineTwoOSVRecords_ReferencesDeterminism(t *testing.T) { } } } + +func TestPickAffectedInformation_PreservesVersions(t *testing.T) { + t.Parallel() + + repo := "https://github.com/chriskohlhoff/asio" + cve5 := []*osvschema.Affected{{ + Versions: []string{"asio-1-12-0", "asio-1-12-1"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "asio-1-13-0"}}, + }}, + }} + nvd := []*osvschema.Affected{{ + Versions: []string{"asio-1-12-1", "asio-1-13-0"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "asio-1-13-0"}}, + }}, + }} + + got := pickAffectedInformation(cve5, nvd) + if len(got) != 1 { + t.Fatalf("expected 1 merged affected, got %d", len(got)) + } + wantVersions := []string{"asio-1-12-0", "asio-1-12-1", "asio-1-13-0"} + gotVersions := append([]string(nil), got[0].GetVersions()...) + sort.Strings(gotVersions) + sort.Strings(wantVersions) + if diff := cmp.Diff(wantVersions, gotVersions); diff != "" { + t.Errorf("merged Versions mismatch (-want +got):\n%s", diff) + } +} + +func TestEnrichRepoPURLs_AfterMerge(t *testing.T) { + t.Parallel() + + repo := "https://github.com/chriskohlhoff/asio" + cve5 := &osvschema.Vulnerability{ + Id: "CVE-2019-25219", + Affected: []*osvschema.Affected{{ + Versions: []string{"asio-1-12-0", "asio-1-12-1"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "asio-1-13-0"}}, + }}, + }}, + } + nvd := &osvschema.Vulnerability{ + Id: "CVE-2019-25219", + Affected: []*osvschema.Affected{{ + Versions: []string{"asio-1-12-1", "asio-1-13-0"}, + Ranges: []*osvschema.Range{{ + Type: osvschema.Range_GIT, + Repo: repo, + Events: []*osvschema.Event{{Introduced: "0"}, {Fixed: "asio-1-13-0"}}, + }}, + }}, + } + + merged := combineTwoOSVRecords(cve5, nvd) + enrichRepoPURLs(merged) + + if len(merged.Affected) != 1 { + t.Fatalf("expected 1 affected after merge, got %d", len(merged.Affected)) + } + aff := merged.Affected[0] + + wantBase := "pkg:generic/github.com/chriskohlhoff/asio" + if got := aff.GetPackage().GetPurl(); got != wantBase { + t.Errorf("package.purl = %q, want %q", got, wantBase) + } + got := repoPURLs(t, aff) + sort.Strings(got) + want := []string{ + wantBase + "@asio-1-12-0", + wantBase + "@asio-1-12-1", + wantBase + "@asio-1-13-0", + } + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("repo_purls mismatch (-want +got):\n%s", diff) + } +} diff --git a/vulnfeeds/git/purl.go b/vulnfeeds/git/purl.go new file mode 100644 index 00000000000..a55dd9f1256 --- /dev/null +++ b/vulnfeeds/git/purl.go @@ -0,0 +1,103 @@ +package git + +import ( + "fmt" + "net/url" + "strconv" + "strings" + + packageurl "github.com/package-url/packageurl-go" +) + +// BuildGenericRepoPURL returns an unversioned generic purl for a repo URL. +// Example: pkg:generic/github.com/owner/repo +func BuildGenericRepoPURL(repoURL string) (string, error) { + p, err := ParseRepoPURL(repoURL) + if err != nil { + return "", err + } + + return p.ToString(), nil +} + +// ParseRepoPURL decodes a repo URL into a PackageURL template with type, +// namespace, and name populated. packageurl-go handles version +// escaping so reserved characters such as "/" are encoded to "%2F". +func ParseRepoPURL(repoURL string) (*packageurl.PackageURL, error) { + u, err := url.Parse(normalizeRepoURL(repoURL)) + if err != nil { + return nil, fmt.Errorf("invalid repo url: %w", err) + } + + switch strings.ToLower(u.Scheme) { + case "http", "https": + default: + return nil, fmt.Errorf("unsupported scheme %q in %q", u.Scheme, repoURL) + } + + host := strings.ToLower(u.Hostname()) + if host == "" { + return nil, fmt.Errorf("missing host in %q", repoURL) + } + + path := strings.Trim(strings.TrimSuffix(u.Path, ".git"), "/") + parts := strings.Split(path, "/") + if len(parts) < 2 || parts[0] == "" { + return nil, fmt.Errorf("invalid repo path in %q", repoURL) + } + + return packageurl.NewPackageURL( + "generic", + strings.Join(append([]string{host}, parts[:len(parts)-1]...), "/"), + parts[len(parts)-1], + "", nil, "", + ), nil +} + +// normalizeRepoURL rewrites common git-transport variants into an https URL +// so ParseRepoPURL can treat them uniformly. +// Inputs with an unrecognized scheme (ftp://, file://, …) are returned as-is +// so ParseRepoPURL can reject them via its scheme check. +func normalizeRepoURL(raw string) string { + raw = strings.TrimSpace(raw) + + if strings.HasPrefix(raw, "http://") || strings.HasPrefix(raw, "https://") { + return raw + } + + body := raw + hadScheme := false + if i := strings.Index(body, "://"); i != -1 { + scheme := strings.ToLower(body[:i]) + if scheme != "git" && scheme != "ssh" { + return raw + } + body = body[i+3:] + hadScheme = true + } + + if at := strings.Index(body, "@"); at != -1 { + if slash := strings.Index(body, "/"); slash == -1 || at < slash { + body = body[at+1:] + } + } + + if colon := strings.Index(body, ":"); colon != -1 { + slash := strings.Index(body, "/") + if slash == -1 || colon < slash { + portEnd := slash + if portEnd == -1 { + portEnd = len(body) + } + if _, err := strconv.Atoi(body[colon+1 : portEnd]); err != nil { + body = body[:colon] + "/" + body[colon+1:] + } + } + } + + if hadScheme || body != raw { + return "https://" + body + } + + return raw +} diff --git a/vulnfeeds/git/repository_test.go b/vulnfeeds/git/repository_test.go index 0f479b8828e..29e48c5861d 100644 --- a/vulnfeeds/git/repository_test.go +++ b/vulnfeeds/git/repository_test.go @@ -395,3 +395,125 @@ func TestInvalidRepos(t *testing.T) { t.Errorf("These redundant repos are in InvalidRepos: %s", diff) } } + +func TestBuildGenericRepoPURL(t *testing.T) { + t.Parallel() + + tests := []struct { + desc string + inputURL string + wantPURL string + wantError bool + }{ + { + desc: "GitHub repo", + inputURL: "https://github.com/eclipse-openj9/openj9", + wantPURL: "pkg:generic/github.com/eclipse-openj9/openj9", + }, + { + desc: "GitHub repo with .git suffix", + inputURL: "https://github.com/torvalds/linux.git", + wantPURL: "pkg:generic/github.com/torvalds/linux", + }, + { + desc: "GitLab subgroup repo", + inputURL: "https://gitlab.com/group/subgroup/repo", + wantPURL: "pkg:generic/gitlab.com/group/subgroup/repo", + }, + { + desc: "Self-hosted cgit repo with .git", + inputURL: "https://git.libssh.org/projects/libssh.git", + wantPURL: "pkg:generic/git.libssh.org/projects/libssh", + }, + { + desc: "Insufficient path segments", + inputURL: "https://github.com/onlyowner", + wantError: true, + }, + { + desc: "git:// transport", + inputURL: "git://github.com/owner/repo.git", + wantPURL: "pkg:generic/github.com/owner/repo", + }, + { + desc: "ssh:// transport with user", + inputURL: "ssh://git@github.com/owner/repo.git", + wantPURL: "pkg:generic/github.com/owner/repo", + }, + { + desc: "SCP-style git URL", + inputURL: "git@github.com:owner/repo.git", + wantPURL: "pkg:generic/github.com/owner/repo", + }, + { + desc: "malformed hybrid (CVE-2025-1110)", + inputURL: "git://git@gitlab.com:gitlab-org/gitlab.git", + wantPURL: "pkg:generic/gitlab.com/gitlab-org/gitlab", + }, + { + desc: "ssh:// transport with numeric port", + inputURL: "ssh://git@git.example.com:22/owner/repo.git", + wantPURL: "pkg:generic/git.example.com/owner/repo", + }, + { + desc: "Unsupported scheme", + inputURL: "ftp://example.com/owner/repo", + wantError: true, + }, + { + desc: "Missing host", + inputURL: "https:///owner/repo", + wantError: true, + }, + { + desc: "Empty input", + inputURL: "", + wantError: true, + }, + } + + for _, tc := range tests { + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + got, err := BuildGenericRepoPURL(tc.inputURL) + if tc.wantError { + if err == nil { + t.Fatalf("BuildGenericRepoPURL(%q) = %q, want error", tc.inputURL, got) + } + + return + } + if err != nil { + t.Fatalf("BuildGenericRepoPURL(%q) unexpected error: %v", tc.inputURL, err) + } + if got != tc.wantPURL { + t.Fatalf("BuildGenericRepoPURL(%q) = %q, want %q", tc.inputURL, got, tc.wantPURL) + } + }) + } +} + +func TestParseRepoPURL_VersionEscape(t *testing.T) { + t.Parallel() + + tmpl, err := ParseRepoPURL("https://github.com/owner/repo") + if err != nil { + t.Fatalf("ParseRepoPURL unexpected error: %v", err) + } + if got := tmpl.ToString(); got != "pkg:generic/github.com/owner/repo" { + t.Errorf("unversioned ToString = %q, want %q", got, "pkg:generic/github.com/owner/repo") + } + + cases := map[string]string{ + "v1.2.3": "pkg:generic/github.com/owner/repo@v1.2.3", + "release/1.2.3": "pkg:generic/github.com/owner/repo@release%2F1.2.3", + "v1.0 beta": "pkg:generic/github.com/owner/repo@v1.0%20beta", + "rel#1": "pkg:generic/github.com/owner/repo@rel%231", + } + for version, want := range cases { + tmpl.Version = version + if got := tmpl.ToString(); got != want { + t.Errorf("version %q: ToString = %q, want %q", version, got, want) + } + } +} diff --git a/vulnfeeds/go.mod b/vulnfeeds/go.mod index 2efcc1109ed..02e808400e5 100644 --- a/vulnfeeds/go.mod +++ b/vulnfeeds/go.mod @@ -17,6 +17,7 @@ require ( github.com/google/go-cmp v0.7.0 github.com/knqyf263/go-cpe v0.0.0-20230627041855-cb0794d06872 github.com/ossf/osv-schema/bindings/go v0.0.0-20260324022310-df534973d091 + github.com/package-url/packageurl-go v0.1.3 github.com/sethvargo/go-retry v0.3.0 go.opentelemetry.io/contrib/detectors/gcp v1.42.0 go.opentelemetry.io/otel v1.43.0 diff --git a/vulnfeeds/go.sum b/vulnfeeds/go.sum index 309c6ff9828..2736165a6b0 100644 --- a/vulnfeeds/go.sum +++ b/vulnfeeds/go.sum @@ -216,6 +216,8 @@ github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= github.com/ossf/osv-schema/bindings/go v0.0.0-20260324022310-df534973d091 h1:zxs8dDNDyWfepjEqZmVyJ8IljbEiD+AUtbNz18SM9Yw= github.com/ossf/osv-schema/bindings/go v0.0.0-20260324022310-df534973d091/go.mod h1:IrUa4QzZUi03J3WXDzZYXVawYipHownNfqqZrqeGXfg= +github.com/package-url/packageurl-go v0.1.3 h1:4juMED3hHiz0set3Vq3KeQ75KD1avthoXLtmE3I0PLs= +github.com/package-url/packageurl-go v0.1.3/go.mod h1:nKAWB8E6uk1MHqiS/lQb9pYBGH2+mdJ2PJc2s50dQY0= github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM= github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4=