From 28bb3aaf3f0dfdb9ef20587fd005b04371934207 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Thu, 5 Mar 2026 04:46:51 +0000 Subject: [PATCH 01/28] create some helper functions to uploading/downloading gcs --- vulnfeeds/upload/gcs.go | 88 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 vulnfeeds/upload/gcs.go diff --git a/vulnfeeds/upload/gcs.go b/vulnfeeds/upload/gcs.go new file mode 100644 index 00000000000..067e4892631 --- /dev/null +++ b/vulnfeeds/upload/gcs.go @@ -0,0 +1,88 @@ +package upload + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + + "cloud.google.com/go/storage" + "google.golang.org/api/iterator" +) + +// ToGCS uploads data from an io.Reader to a GCS bucket. +func ToGCS(ctx context.Context, bkt *storage.BucketHandle, objectName string, data io.Reader) error { + obj := bkt.Object(objectName) + wc := obj.NewWriter(ctx) + + if _, err := io.Copy(wc, data); err != nil { + if closeErr := wc.Close(); closeErr != nil { + return fmt.Errorf("failed to write to GCS object %q: %w (also failed to close writer: %v)", objectName, err, closeErr) + } + return fmt.Errorf("failed to write to GCS object %q: %w", objectName, err) + } + + if err := wc.Close(); err != nil { + return fmt.Errorf("failed to close GCS writer for object %q: %w", objectName, err) + } + + return nil +} + +// UploadFile uploads a local file to a GCS bucket. +func UploadFile(ctx context.Context, bkt *storage.BucketHandle, objectName string, filePath string) error { + f, err := os.Open(filePath) + if err != nil { + return fmt.Errorf("os.Open: %w", err) + } + defer f.Close() + + return ToGCS(ctx, bkt, objectName, f) +} + +// DownloadBucket downloads all objects from a GCS bucket to a local directory. +func DownloadBucket(ctx context.Context, bkt *storage.BucketHandle, prefix string, destDir string) error { + it := bkt.Objects(ctx, &storage.Query{Prefix: prefix}) + for { + attrs, err := it.Next() + if err == iterator.Done { + break + } + if err != nil { + return fmt.Errorf("bucket.Objects: %w", err) + } + + // Skip directories + if attrs.Name[len(attrs.Name)-1] == '/' { + continue + } + + destPath := filepath.Join(destDir, attrs.Name) + if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { + return fmt.Errorf("os.MkdirAll: %w", err) + } + + f, err := os.Create(destPath) + if err != nil { + return fmt.Errorf("os.Create: %w", err) + } + + rc, err := bkt.Object(attrs.Name).NewReader(ctx) + if err != nil { + f.Close() + return fmt.Errorf("Object(%q).NewReader: %w", attrs.Name, err) + } + + if _, err := io.Copy(f, rc); err != nil { + rc.Close() + f.Close() + return fmt.Errorf("io.Copy: %w", err) + } + + rc.Close() + f.Close() + } + + return nil +} From 3cd4243c143732664a9577ecfb3e757c915d9f48 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Thu, 5 Mar 2026 22:51:17 +0000 Subject: [PATCH 02/28] parallelize + tests --- vulnfeeds/upload/gcs.go | 67 +++++++---- vulnfeeds/upload/gcs_test.go | 217 +++++++++++++++++++++++++++++++++++ 2 files changed, 263 insertions(+), 21 deletions(-) create mode 100644 vulnfeeds/upload/gcs_test.go diff --git a/vulnfeeds/upload/gcs.go b/vulnfeeds/upload/gcs.go index 067e4892631..f2c52451228 100644 --- a/vulnfeeds/upload/gcs.go +++ b/vulnfeeds/upload/gcs.go @@ -6,15 +6,20 @@ import ( "io" "os" "path/filepath" + "strings" "cloud.google.com/go/storage" + "golang.org/x/sync/errgroup" "google.golang.org/api/iterator" ) // ToGCS uploads data from an io.Reader to a GCS bucket. -func ToGCS(ctx context.Context, bkt *storage.BucketHandle, objectName string, data io.Reader) error { +func ToGCS(ctx context.Context, bkt *storage.BucketHandle, objectName string, data io.Reader, contentType string) error { obj := bkt.Object(objectName) wc := obj.NewWriter(ctx) + if contentType != "" { + wc.ContentType = contentType + } if _, err := io.Copy(wc, data); err != nil { if closeErr := wc.Close(); closeErr != nil { @@ -38,13 +43,22 @@ func UploadFile(ctx context.Context, bkt *storage.BucketHandle, objectName strin } defer f.Close() - return ToGCS(ctx, bkt, objectName, f) + return ToGCS(ctx, bkt, objectName, f, "") } // DownloadBucket downloads all objects from a GCS bucket to a local directory. func DownloadBucket(ctx context.Context, bkt *storage.BucketHandle, prefix string, destDir string) error { it := bkt.Objects(ctx, &storage.Query{Prefix: prefix}) + + g, ctx := errgroup.WithContext(ctx) + // Limit concurrency to avoid running out of file descriptors or overwhelming the network + g.SetLimit(10) + for { + if err := ctx.Err(); err != nil { + return err + } + attrs, err := it.Next() if err == iterator.Done { break @@ -54,34 +68,45 @@ func DownloadBucket(ctx context.Context, bkt *storage.BucketHandle, prefix strin } // Skip directories - if attrs.Name[len(attrs.Name)-1] == '/' { + if strings.HasSuffix(attrs.Name, "/") { continue } destPath := filepath.Join(destDir, attrs.Name) - if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { - return fmt.Errorf("os.MkdirAll: %w", err) + if !strings.HasPrefix(destPath, filepath.Clean(destDir)+string(os.PathSeparator)) { + return fmt.Errorf("invalid object name %q: path traversal attempt", attrs.Name) } - f, err := os.Create(destPath) - if err != nil { - return fmt.Errorf("os.Create: %w", err) - } + // Capture loop variable for the goroutine + objName := attrs.Name - rc, err := bkt.Object(attrs.Name).NewReader(ctx) - if err != nil { - f.Close() - return fmt.Errorf("Object(%q).NewReader: %w", attrs.Name, err) - } + g.Go(func() error { + if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { + return fmt.Errorf("os.MkdirAll: %w", err) + } - if _, err := io.Copy(f, rc); err != nil { - rc.Close() - f.Close() - return fmt.Errorf("io.Copy: %w", err) - } + f, err := os.Create(destPath) + if err != nil { + return fmt.Errorf("os.Create: %w", err) + } + defer f.Close() + + rc, err := bkt.Object(objName).NewReader(ctx) + if err != nil { + return fmt.Errorf("Object(%q).NewReader: %w", objName, err) + } + defer rc.Close() + + if _, err := io.Copy(f, rc); err != nil { + return fmt.Errorf("io.Copy: %w", err) + } + + return nil + }) + } - rc.Close() - f.Close() + if err := g.Wait(); err != nil { + return err } return nil diff --git a/vulnfeeds/upload/gcs_test.go b/vulnfeeds/upload/gcs_test.go new file mode 100644 index 00000000000..e15e7f3d3bf --- /dev/null +++ b/vulnfeeds/upload/gcs_test.go @@ -0,0 +1,217 @@ +package upload + +import ( + "bytes" + "context" + "os" + "path/filepath" + "testing" + + "github.com/fsouza/fake-gcs-server/fakestorage" +) + +func TestToGCS(t *testing.T) { + server := fakestorage.NewServer([]fakestorage.Object{}) + defer server.Stop() + + client := server.Client() + bkt := client.Bucket("test-bucket") + if err := bkt.Create(context.Background(), "project", nil); err != nil { + t.Fatalf("failed to create bucket: %v", err) + } + + content := []byte("test content") + err := ToGCS(context.Background(), bkt, "test-object.txt", bytes.NewReader(content), "text/plain") + if err != nil { + t.Fatalf("ToGCS failed: %v", err) + } + + obj, err := server.GetObject("test-bucket", "test-object.txt") + if err != nil { + t.Fatalf("failed to get object: %v", err) + } + + if !bytes.Equal(obj.Content, content) { + t.Errorf("expected content %q, got %q", content, obj.Content) + } + if obj.ContentType != "text/plain" { + t.Errorf("expected content type %q, got %q", "text/plain", obj.ContentType) + } +} + +func TestUploadFile(t *testing.T) { + server := fakestorage.NewServer([]fakestorage.Object{}) + defer server.Stop() + + client := server.Client() + bkt := client.Bucket("test-bucket") + if err := bkt.Create(context.Background(), "project", nil); err != nil { + t.Fatalf("failed to create bucket: %v", err) + } + + tmpFile, err := os.CreateTemp("", "test-upload-*.txt") + if err != nil { + t.Fatalf("failed to create temp file: %v", err) + } + defer os.Remove(tmpFile.Name()) + + content := []byte("file content") + if _, err := tmpFile.Write(content); err != nil { + t.Fatalf("failed to write to temp file: %v", err) + } + tmpFile.Close() + + err = UploadFile(context.Background(), bkt, "uploaded-file.txt", tmpFile.Name()) + if err != nil { + t.Fatalf("UploadFile failed: %v", err) + } + + obj, err := server.GetObject("test-bucket", "uploaded-file.txt") + if err != nil { + t.Fatalf("failed to get object: %v", err) + } + + if !bytes.Equal(obj.Content, content) { + t.Errorf("expected content %q, got %q", content, obj.Content) + } +} + +func TestDownloadBucket(t *testing.T) { + objects := []fakestorage.Object{ + { + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "test-bucket", + Name: "folder/file1.txt", + }, + Content: []byte("content 1"), + }, + { + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "test-bucket", + Name: "folder/file2.txt", + }, + Content: []byte("content 2"), + }, + { + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "test-bucket", + Name: "folder/subfolder/", // Should be skipped + }, + Content: []byte(""), + }, + { + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "test-bucket", + Name: "other-folder/file3.txt", + }, + Content: []byte("content 3"), + }, + } + + server := fakestorage.NewServer(objects) + defer server.Stop() + + client := server.Client() + bkt := client.Bucket("test-bucket") + + tmpDir, err := os.MkdirTemp("", "test-download-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + err = DownloadBucket(context.Background(), bkt, "folder/", tmpDir) + if err != nil { + t.Fatalf("DownloadBucket failed: %v", err) + } + + // Verify file1.txt + content1, err := os.ReadFile(filepath.Join(tmpDir, "folder/file1.txt")) + if err != nil { + t.Fatalf("failed to read downloaded file1: %v", err) + } + if !bytes.Equal(content1, []byte("content 1")) { + t.Errorf("expected content 1, got %q", content1) + } + + // Verify file2.txt + content2, err := os.ReadFile(filepath.Join(tmpDir, "folder/file2.txt")) + if err != nil { + t.Fatalf("failed to read downloaded file2: %v", err) + } + if !bytes.Equal(content2, []byte("content 2")) { + t.Errorf("expected content 2, got %q", content2) + } + + // Verify file3.txt is NOT downloaded because of the prefix + if _, err := os.Stat(filepath.Join(tmpDir, "other-folder/file3.txt")); !os.IsNotExist(err) { + t.Errorf("expected file3.txt to not exist, but it does") + } +} + +func TestDownloadBucket_PathTraversal(t *testing.T) { + objects := []fakestorage.Object{ + { + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "test-bucket", + Name: "../malicious.txt", + }, + Content: []byte("malicious content"), + }, + } + + server := fakestorage.NewServer(objects) + defer server.Stop() + + client := server.Client() + bkt := client.Bucket("test-bucket") + + tmpDir, err := os.MkdirTemp("", "test-download-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + err = DownloadBucket(context.Background(), bkt, "", tmpDir) + if err == nil { + t.Fatalf("expected path traversal error, got nil") + } + if err.Error() != "invalid object name \"../malicious.txt\": path traversal attempt" { + t.Errorf("unexpected error message: %v", err) + } +} + +func TestDownloadBucket_RelativeDestDir(t *testing.T) { + objects := []fakestorage.Object{ + { + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "test-bucket", + Name: "file.txt", + }, + Content: []byte("content"), + }, + } + + server := fakestorage.NewServer(objects) + defer server.Stop() + + client := server.Client() + bkt := client.Bucket("test-bucket") + + // Use a relative directory + destDir := "test-relative-dir" + defer os.RemoveAll(destDir) + + err := DownloadBucket(context.Background(), bkt, "", destDir) + if err != nil { + t.Fatalf("DownloadBucket failed with relative dir: %v", err) + } + + content, err := os.ReadFile(filepath.Join(destDir, "file.txt")) + if err != nil { + t.Fatalf("failed to read downloaded file: %v", err) + } + if !bytes.Equal(content, []byte("content")) { + t.Errorf("expected content, got %q", content) + } +} From 3a32283a582182f55fd917c6151225abdfdcfce5 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 6 Mar 2026 05:33:07 +0000 Subject: [PATCH 03/28] rename upload -> gcs-tools --- vulnfeeds/cmd/combine-to-osv/main.go | 4 +- vulnfeeds/cmd/converters/alpine/main.go | 4 +- vulnfeeds/cmd/converters/debian/main.go | 4 +- vulnfeeds/{upload => gcs-tools}/cveworker.go | 4 +- vulnfeeds/{upload => gcs-tools}/gcs.go | 2 +- vulnfeeds/gcs-tools/gcs_test.go | 219 +++++++++++++++++++ vulnfeeds/go.mod | 21 +- vulnfeeds/go.sum | 152 +++++++++++-- vulnfeeds/upload/gcs_test.go | 217 ------------------ 9 files changed, 380 insertions(+), 247 deletions(-) rename vulnfeeds/{upload => gcs-tools}/cveworker.go (98%) rename vulnfeeds/{upload => gcs-tools}/gcs.go (99%) create mode 100644 vulnfeeds/gcs-tools/gcs_test.go delete mode 100644 vulnfeeds/upload/gcs_test.go diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index b406df0674e..82ae354e0da 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -16,8 +16,8 @@ import ( "cloud.google.com/go/storage" "github.com/google/osv/vulnfeeds/conversion" + "github.com/google/osv/vulnfeeds/gcs-tools" "github.com/google/osv/vulnfeeds/models" - "github.com/google/osv/vulnfeeds/upload" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/ossf/osv-schema/bindings/go/osvschema" "google.golang.org/api/iterator" @@ -92,7 +92,7 @@ func main() { vulnerabilities = append(vulnerabilities, v) } - upload.Upload(ctx, "OSV files", *uploadToGCS, *outputBucketName, *overridesBucketName, *numWorkers, *osvOutputPath, vulnerabilities, *syncDeletions) + gcs.Upload(ctx, "OSV files", *uploadToGCS, *outputBucketName, *overridesBucketName, *numWorkers, *osvOutputPath, vulnerabilities, *syncDeletions) } // extractCVEName extracts the CVE name from a given filename and prefix. diff --git a/vulnfeeds/cmd/converters/alpine/main.go b/vulnfeeds/cmd/converters/alpine/main.go index b0e58654b5c..4acdd078c4f 100644 --- a/vulnfeeds/cmd/converters/alpine/main.go +++ b/vulnfeeds/cmd/converters/alpine/main.go @@ -15,8 +15,8 @@ import ( "strings" "time" + "github.com/google/osv/vulnfeeds/gcs-tools" "github.com/google/osv/vulnfeeds/models" - "github.com/google/osv/vulnfeeds/upload" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -64,7 +64,7 @@ func main() { } ctx := context.Background() - upload.Upload(ctx, "Alpine CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *alpineOutputPath, vulnerabilities, *syncDeletions) + gcs.Upload(ctx, "Alpine CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *alpineOutputPath, vulnerabilities, *syncDeletions) logger.Info("Alpine CVE conversion succeeded.") } diff --git a/vulnfeeds/cmd/converters/debian/main.go b/vulnfeeds/cmd/converters/debian/main.go index f1765dbaced..347a52ae621 100644 --- a/vulnfeeds/cmd/converters/debian/main.go +++ b/vulnfeeds/cmd/converters/debian/main.go @@ -15,8 +15,8 @@ import ( "strings" "github.com/google/osv/vulnfeeds/faulttolerant" + "github.com/google/osv/vulnfeeds/gcs-tools" "github.com/google/osv/vulnfeeds/models" - "github.com/google/osv/vulnfeeds/upload" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -70,7 +70,7 @@ func main() { } ctx := context.Background() - upload.Upload(ctx, "Debian CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *debianOutputPath, vulnerabilities, *syncDeletions) + gcs.Upload(ctx, "Debian CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *debianOutputPath, vulnerabilities, *syncDeletions) logger.Info("Debian CVE conversion succeeded.") } diff --git a/vulnfeeds/upload/cveworker.go b/vulnfeeds/gcs-tools/cveworker.go similarity index 98% rename from vulnfeeds/upload/cveworker.go rename to vulnfeeds/gcs-tools/cveworker.go index 56c3a9d5263..34ce1ccf774 100644 --- a/vulnfeeds/upload/cveworker.go +++ b/vulnfeeds/gcs-tools/cveworker.go @@ -1,5 +1,5 @@ -// Package upload handles allocating workers to intelligently uploading OSV records to a bucket -package upload +// Package gcs handles allocating workers to intelligently uploading OSV records to a bucket +package gcs import ( "bytes" diff --git a/vulnfeeds/upload/gcs.go b/vulnfeeds/gcs-tools/gcs.go similarity index 99% rename from vulnfeeds/upload/gcs.go rename to vulnfeeds/gcs-tools/gcs.go index f2c52451228..cf8cc10f88f 100644 --- a/vulnfeeds/upload/gcs.go +++ b/vulnfeeds/gcs-tools/gcs.go @@ -1,4 +1,4 @@ -package upload +package gcs import ( "context" diff --git a/vulnfeeds/gcs-tools/gcs_test.go b/vulnfeeds/gcs-tools/gcs_test.go new file mode 100644 index 00000000000..7ef58b996db --- /dev/null +++ b/vulnfeeds/gcs-tools/gcs_test.go @@ -0,0 +1,219 @@ +package gcs + +import ( + "bytes" + "context" + "os" + "path/filepath" + "testing" + + "github.com/fsouza/fake-gcs-server/fakestorage" +) + +func TestToGCS(t *testing.T) { + server := fakestorage.NewServer([]fakestorage.Object{}) + defer server.Stop() + + client := server.Client() + bkt := client.Bucket("test-bucket") + if err := bkt.Create(context.Background(), "project", nil); err != nil { + t.Fatalf("failed to create bucket: %v", err) + } + + content := []byte("test content") + err := ToGCS(context.Background(), bkt, "test-object.txt", bytes.NewReader(content), "text/plain") + if err != nil { + t.Fatalf("ToGCS failed: %v", err) + } + + obj, err := server.GetObject("test-bucket", "test-object.txt") + if err != nil { + t.Fatalf("failed to get object: %v", err) + } + + if !bytes.Equal(obj.Content, content) { + t.Errorf("expected content %q, got %q", content, obj.Content) + } + if obj.ContentType != "text/plain" { + t.Errorf("expected content type %q, got %q", "text/plain", obj.ContentType) + } +} + +func TestUploadFile(t *testing.T) { + server := fakestorage.NewServer([]fakestorage.Object{}) + defer server.Stop() + + client := server.Client() + bkt := client.Bucket("test-bucket") + if err := bkt.Create(context.Background(), "project", nil); err != nil { + t.Fatalf("failed to create bucket: %v", err) + } + + tmpFile, err := os.CreateTemp("", "test-upload-*.txt") + if err != nil { + t.Fatalf("failed to create temp file: %v", err) + } + defer os.Remove(tmpFile.Name()) + + content := []byte("file content") + if _, err := tmpFile.Write(content); err != nil { + t.Fatalf("failed to write to temp file: %v", err) + } + tmpFile.Close() + + err = UploadFile(context.Background(), bkt, "uploaded-file.txt", tmpFile.Name()) + if err != nil { + t.Fatalf("UploadFile failed: %v", err) + } + + obj, err := server.GetObject("test-bucket", "uploaded-file.txt") + if err != nil { + t.Fatalf("failed to get object: %v", err) + } + + if !bytes.Equal(obj.Content, content) { + t.Errorf("expected content %q, got %q", content, obj.Content) + } +} + +func TestDownloadBucket(t *testing.T) { + t.Run("success", func(t *testing.T) { + objects := []fakestorage.Object{ + { + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "test-bucket", + Name: "folder/file1.txt", + }, + Content: []byte("content 1"), + }, + { + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "test-bucket", + Name: "folder/file2.txt", + }, + Content: []byte("content 2"), + }, + { + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "test-bucket", + Name: "folder/subfolder/", // Should be skipped + }, + Content: []byte(""), + }, + { + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "test-bucket", + Name: "other-folder/file3.txt", + }, + Content: []byte("content 3"), + }, + } + + server := fakestorage.NewServer(objects) + defer server.Stop() + + client := server.Client() + bkt := client.Bucket("test-bucket") + + tmpDir, err := os.MkdirTemp("", "test-download-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + err = DownloadBucket(context.Background(), bkt, "folder/", tmpDir) + if err != nil { + t.Fatalf("DownloadBucket failed: %v", err) + } + + // Verify file1.txt + content1, err := os.ReadFile(filepath.Join(tmpDir, "folder/file1.txt")) + if err != nil { + t.Fatalf("failed to read downloaded file1: %v", err) + } + if !bytes.Equal(content1, []byte("content 1")) { + t.Errorf("expected content 1, got %q", content1) + } + + // Verify file2.txt + content2, err := os.ReadFile(filepath.Join(tmpDir, "folder/file2.txt")) + if err != nil { + t.Fatalf("failed to read downloaded file2: %v", err) + } + if !bytes.Equal(content2, []byte("content 2")) { + t.Errorf("expected content 2, got %q", content2) + } + + // Verify file3.txt is NOT downloaded because of the prefix + if _, err := os.Stat(filepath.Join(tmpDir, "other-folder/file3.txt")); !os.IsNotExist(err) { + t.Errorf("expected file3.txt to not exist, but it does") + } + }) + + t.Run("path traversal", func(t *testing.T) { + objects := []fakestorage.Object{ + { + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "test-bucket", + Name: "../malicious.txt", + }, + Content: []byte("malicious content"), + }, + } + + server := fakestorage.NewServer(objects) + defer server.Stop() + + client := server.Client() + bkt := client.Bucket("test-bucket") + + tmpDir, err := os.MkdirTemp("", "test-download-*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + err = DownloadBucket(context.Background(), bkt, "", tmpDir) + if err == nil { + t.Fatalf("expected path traversal error, got nil") + } + if err.Error() != "invalid object name \"../malicious.txt\": path traversal attempt" { + t.Errorf("unexpected error message: %v", err) + } + }) + + t.Run("relative dest dir", func(t *testing.T) { + objects := []fakestorage.Object{ + { + ObjectAttrs: fakestorage.ObjectAttrs{ + BucketName: "test-bucket", + Name: "file.txt", + }, + Content: []byte("content"), + }, + } + + server := fakestorage.NewServer(objects) + defer server.Stop() + + client := server.Client() + bkt := client.Bucket("test-bucket") + + // Use a relative directory + destDir := "test-relative-dir" + defer os.RemoveAll(destDir) + + err := DownloadBucket(context.Background(), bkt, "", destDir) + if err != nil { + t.Fatalf("DownloadBucket failed with relative dir: %v", err) + } + + content, err := os.ReadFile(filepath.Join(destDir, "file.txt")) + if err != nil { + t.Fatalf("failed to read downloaded file: %v", err) + } + if !bytes.Equal(content, []byte("content")) { + t.Errorf("expected content, got %q", content) + } + }) +} diff --git a/vulnfeeds/go.mod b/vulnfeeds/go.mod index c61dde23911..2ce17b2b846 100644 --- a/vulnfeeds/go.mod +++ b/vulnfeeds/go.mod @@ -9,6 +9,7 @@ require ( github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/trace v1.31.0 github.com/aquasecurity/go-pep440-version v0.0.1 github.com/atombender/go-jsonschema v0.22.0 + github.com/fsouza/fake-gcs-server v1.54.0 github.com/gkampitakis/go-snaps v0.5.19 github.com/go-git/go-git/v5 v5.16.5 github.com/goccy/go-yaml v1.19.2 @@ -16,10 +17,11 @@ require ( github.com/knqyf263/go-cpe v0.0.0-20230627041855-cb0794d06872 github.com/ossf/osv-schema/bindings/go v0.0.0-20260129002236-09a17f85b44a github.com/sethvargo/go-retry v0.3.0 - go.opentelemetry.io/contrib/detectors/gcp v1.38.0 + go.opentelemetry.io/contrib/detectors/gcp v1.39.0 go.opentelemetry.io/otel v1.40.0 go.opentelemetry.io/otel/sdk v1.40.0 go.opentelemetry.io/otel/trace v1.40.0 + golang.org/x/sync v0.19.0 google.golang.org/api v0.268.0 google.golang.org/protobuf v1.36.11 gopkg.in/dnaeon/go-vcr.v4 v4.0.6 @@ -27,13 +29,14 @@ require ( ) require ( - cel.dev/expr v0.24.0 // indirect + cel.dev/expr v0.25.1 // indirect cloud.google.com/go v0.123.0 // indirect cloud.google.com/go/auth v0.18.1 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect cloud.google.com/go/compute/metadata v0.9.0 // indirect cloud.google.com/go/iam v1.5.3 // indirect cloud.google.com/go/monitoring v1.24.3 // indirect + cloud.google.com/go/pubsub/v2 v2.4.0 // indirect cloud.google.com/go/trace v1.11.7 // indirect dario.cat/mergo v1.0.2 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 // indirect @@ -52,11 +55,11 @@ require ( github.com/clipperhouse/displaywidth v0.11.0 // indirect github.com/clipperhouse/uax29/v2 v2.7.0 // indirect github.com/cloudflare/circl v1.6.3 // indirect - github.com/cncf/xds/go v0.0.0-20251022180443-0feb69152e9f // indirect + github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 // indirect github.com/cyphar/filepath-securejoin v0.4.1 // indirect github.com/emirpasic/gods v1.18.1 // indirect - github.com/envoyproxy/go-control-plane/envoy v1.35.0 // indirect - github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect + github.com/envoyproxy/go-control-plane/envoy v1.36.0 // indirect + github.com/envoyproxy/protoc-gen-validate v1.3.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/gkampitakis/ciinfo v0.3.2 // indirect github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect @@ -65,10 +68,13 @@ require ( github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect + github.com/google/renameio/v2 v2.0.0 // indirect github.com/google/s2a-go v0.1.9 // indirect github.com/google/uuid v1.6.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.11 // indirect github.com/googleapis/gax-go/v2 v2.17.0 // indirect + github.com/gorilla/handlers v1.5.2 // indirect + github.com/gorilla/mux v1.8.1 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect github.com/kr/pretty v0.3.1 // indirect @@ -79,6 +85,7 @@ require ( github.com/muesli/cancelreader v0.2.2 // indirect github.com/pjbgf/sha1cd v0.3.2 // indirect github.com/pkg/errors v0.9.1 // indirect + github.com/pkg/xattr v0.4.12 // indirect github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect @@ -91,6 +98,7 @@ require ( github.com/tidwall/sjson v1.2.5 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect + go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect @@ -101,7 +109,6 @@ require ( golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b // indirect golang.org/x/net v0.49.0 // indirect golang.org/x/oauth2 v0.35.0 // indirect - golang.org/x/sync v0.19.0 // indirect golang.org/x/sys v0.41.0 // indirect golang.org/x/text v0.33.0 // indirect golang.org/x/time v0.14.0 // indirect @@ -109,6 +116,6 @@ require ( google.golang.org/genproto v0.0.0-20260128011058-8636f8732409 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20260203192932-546029d2fa20 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20260203192932-546029d2fa20 // indirect - google.golang.org/grpc v1.78.0 // indirect + google.golang.org/grpc v1.79.1 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect ) diff --git a/vulnfeeds/go.sum b/vulnfeeds/go.sum index 2f00db9e1f2..4aa72a074b4 100644 --- a/vulnfeeds/go.sum +++ b/vulnfeeds/go.sum @@ -1,7 +1,8 @@ -cel.dev/expr v0.24.0 h1:56OvJKSH3hDGL0ml5uSxZmz3/3Pq4tJ+fb1unVLAFcY= -cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw= +cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4= +cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4= charm.land/lipgloss/v2 v2.0.0 h1:sd8N/B3x892oiOjFfBQdXBQp3cAkvjGaU5TvVZC3ivo= charm.land/lipgloss/v2 v2.0.0/go.mod h1:w6SnmsBFBmEFBodiEDurGS/sdUY/u1+v72DqUzc6J14= +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE= cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU= cloud.google.com/go/auth v0.18.1 h1:IwTEx92GFUo2pJ6Qea0EU3zYvKnTAeRCODxfA/G5UWs= @@ -18,6 +19,8 @@ cloud.google.com/go/longrunning v0.8.0 h1:LiKK77J3bx5gDLi4SMViHixjD2ohlkwBi+mKA7 cloud.google.com/go/longrunning v0.8.0/go.mod h1:UmErU2Onzi+fKDg2gR7dusz11Pe26aknR4kHmJJqIfk= cloud.google.com/go/monitoring v1.24.3 h1:dde+gMNc0UhPZD1Azu6at2e79bfdztVDS5lvhOdsgaE= cloud.google.com/go/monitoring v1.24.3/go.mod h1:nYP6W0tm3N9H/bOw8am7t62YTzZY+zUeQ+Bi6+2eonI= +cloud.google.com/go/pubsub/v2 v2.4.0 h1:oMKNiBQpXImRWnHYla9uSU66ZzByZwBSCJOEs/pTKVg= +cloud.google.com/go/pubsub/v2 v2.4.0/go.mod h1:2lS/XQKq5qtOMs6kHBK+WX1ytUC36kLl2ig3zqsGUx8= cloud.google.com/go/secretmanager v1.16.0 h1:19QT7ZsLJ8FSP1k+4esQvuCD7npMJml6hYzilxVyT+k= cloud.google.com/go/secretmanager v1.16.0/go.mod h1://C/e4I8D26SDTz1f3TQcddhcmiC3rMEl0S1Cakvs3Q= cloud.google.com/go/storage v1.60.0 h1:oBfZrSOCimggVNz9Y/bXY35uUcts7OViubeddTTVzQ8= @@ -26,6 +29,7 @@ cloud.google.com/go/trace v1.11.7 h1:kDNDX8JkaAG3R2nq1lIdkb7FCSi1rCmsEtKVsty7p+U cloud.google.com/go/trace v1.11.7/go.mod h1:TNn9d5V3fQVf6s4SCveVMIBS2LJUqo73GACmq/Tky0s= dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8= dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 h1:sBEjpZlNHzK1voKq9695PJSX2o5NEXl7/OL3coiIY0c= github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0= github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.55.0 h1:UnDZ/zFfG1JhH/DqxIZYU/1CUAlTUScoXD/LcM2Ykk8= @@ -51,6 +55,7 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPd github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/atombender/go-jsonschema v0.22.0 h1:7H48X5fUccsfsacar5UfP6nnOXuQzmnr6lQmH/Fj2pQ= github.com/atombender/go-jsonschema v0.22.0/go.mod h1:8Q281v0ozTIfvdnbwDoWQDIk0syH6F0Fpoq+Z1cs+rM= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/charmbracelet/colorprofile v0.4.2 h1:BdSNuMjRbotnxHSfxy+PCSa4xAmz7szw70ktAtWRYrY= @@ -65,14 +70,16 @@ github.com/charmbracelet/x/termios v0.1.1 h1:o3Q2bT8eqzGnGPOYheoYS8eEleT5ZVNYNy8 github.com/charmbracelet/x/termios v0.1.1/go.mod h1:rB7fnv1TgOPOyyKRJ9o+AsTU/vK5WHJ2ivHeut/Pcwo= github.com/charmbracelet/x/windows v0.2.2 h1:IofanmuvaxnKHuV04sC0eBy/smG6kIKrWG2/jYn2GuM= github.com/charmbracelet/x/windows v0.2.2/go.mod h1:/8XtdKZzedat74NQFn0NGlGL4soHB0YQZrETF96h75k= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/clipperhouse/displaywidth v0.11.0 h1:lBc6kY44VFw+TDx4I8opi/EtL9m20WSEFgwIwO+UVM8= github.com/clipperhouse/displaywidth v0.11.0/go.mod h1:bkrFNkf81G8HyVqmKGxsPufD3JhNl3dSqnGhOoSD/o0= github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk= github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM= github.com/cloudflare/circl v1.6.3 h1:9GPOhQGF9MCYUeXyMYlqTR6a5gTrgR/fBLXvUgtVcg8= github.com/cloudflare/circl v1.6.3/go.mod h1:2eXP6Qfat4O/Yhh8BznvKnJ+uzEoTQ6jVKJRn81BiS4= -github.com/cncf/xds/go v0.0.0-20251022180443-0feb69152e9f h1:Y8xYupdHxryycyPlc9Y+bSQAYZnetRJ70VMVKm5CKI0= -github.com/cncf/xds/go v0.0.0-20251022180443-0feb69152e9f/go.mod h1:HlzOvOjVBOfTGSRXRyY0OiCS/3J1akRGQQpRO/7zyF4= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 h1:6xNmx7iTtyBRev0+D/Tv1FZd4SCg8axKApyNyRsAt/w= +github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s= github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= @@ -80,20 +87,28 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o= github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= -github.com/envoyproxy/go-control-plane v0.13.5-0.20251024222203-75eaa193e329 h1:K+fnvUM0VZ7ZFJf0n4L/BRlnsb9pL/GuDG6FqaH+PwM= -github.com/envoyproxy/go-control-plane v0.13.5-0.20251024222203-75eaa193e329/go.mod h1:Alz8LEClvR7xKsrq3qzoc4N0guvVNSS8KmSChGYr9hs= -github.com/envoyproxy/go-control-plane/envoy v1.35.0 h1:ixjkELDE+ru6idPxcHLj8LBVc2bFP7iBytj353BoHUo= -github.com/envoyproxy/go-control-plane/envoy v1.35.0/go.mod h1:09qwbGVuSWWAyN5t/b3iyVfz5+z8QWGrzkoqm/8SbEs= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA= +github.com/envoyproxy/go-control-plane v0.14.0/go.mod h1:NcS5X47pLl/hfqxU70yPwL9ZMkUlwlKxtAohpi2wBEU= +github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g= +github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98= github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI= github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4= -github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8= -github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/envoyproxy/protoc-gen-validate v1.3.0 h1:TvGH1wof4H33rezVKWSpqKz5NXWg5VPuZ0uONDT6eb4= +github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/fsouza/fake-gcs-server v1.54.0 h1:DGO4EkFVbtP/A5Ha+CAHHx+Xa6O6LeskMB4hQ1wBE48= +github.com/fsouza/fake-gcs-server v1.54.0/go.mod h1:ryXYE4debQs8GjOxwaOAwFRwM4Cvs6S+NKPPgdVJe6g= github.com/gkampitakis/ciinfo v0.3.2 h1:JcuOPk8ZU7nZQjdUhctuhQofk7BGHuIy0c9Ez8BNhXs= github.com/gkampitakis/ciinfo v0.3.2/go.mod h1:1NIwaOcFChN4fa/B0hEBdAb6npDlFL8Bwx4dfRLRqAo= github.com/gkampitakis/go-snaps v0.5.19 h1:hUJlCQOpTt1M+kSisMwioDWZDWpDtdAvUhvWCx1YGW0= @@ -108,6 +123,8 @@ github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMj github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII= github.com/go-git/go-git/v5 v5.16.5 h1:mdkuqblwr57kVfXri5TTH+nMFLNUxIj9Z7F5ykFbw5s= github.com/go-git/go-git/v5 v5.16.5/go.mod h1:QOMLpNf1qxuSY4StA/ArOdfFR2TrKEjJiye2kel2m+M= +github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= +github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs= github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= @@ -117,26 +134,57 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM= github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc= github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0= +github.com/google/renameio/v2 v2.0.0 h1:UifI23ZTGY8Tt29JbYFiuyIU3eX+RNFtUwefq9qAhxg= +github.com/google/renameio/v2 v2.0.0/go.mod h1:BtmJXm5YlszgC+TD4HOEEUFgkJP3nLxehU6hfe7jRt4= github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.3.11 h1:vAe81Msw+8tKUxi2Dqh/NZMz7475yUvmRIkXr4oN2ao= github.com/googleapis/enterprise-certificate-proxy v0.3.11/go.mod h1:RFV7MUdlb7AgEq2v7FmMCfeSMCllAzWxFgRdusoGks8= github.com/googleapis/gax-go/v2 v2.17.0 h1:RksgfBpxqff0EZkDWYuz9q/uWsTVz+kf43LsZ1J6SMc= github.com/googleapis/gax-go/v2 v2.17.0/go.mod h1:mzaqghpQp4JDh3HvADwrat+6M3MOIDp5YKHhb9PAgDY= +github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE= +github.com/gorilla/handlers v1.5.2/go.mod h1:dX+xVpaxdSw+q0Qek8SSsl3dfMk3jNddUkMzo0GtH0w= +github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= +github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= +github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk= +github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= +github.com/klauspost/cpuid/v2 v2.2.11 h1:0OwqZRYI2rFrjS4kvkDnqJkKHdHaRnCm68/DY4OxRzU= +github.com/klauspost/cpuid/v2 v2.2.11/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= +github.com/klauspost/crc32 v1.3.0 h1:sSmTt3gUt81RP655XGZPElI0PelVTZ6YwCRnPSupoFM= +github.com/klauspost/crc32 v1.3.0/go.mod h1:D7kQaZhnkX/Y0tstFGf8VUzv2UofNGqCjnC3zdHB0Hw= github.com/knqyf263/go-cpe v0.0.0-20230627041855-cb0794d06872 h1:snH0nDYi3kizy9vxYBhZm5KXkGt9VXdGEtr6/1SGUqY= github.com/knqyf263/go-cpe v0.0.0-20230627041855-cb0794d06872/go.mod h1:4cVhzV/TndScEg4xMtSo3TTz3cMFhEAvhAA4igAyXZY= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= @@ -152,28 +200,41 @@ github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo= github.com/maruel/natural v1.1.1/go.mod h1:v+Rfd79xlw1AgVBjbO0BEQmptqb5HvL/k9GRHB7ZKEg= github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw= github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= +github.com/minio/crc64nvme v1.1.1 h1:8dwx/Pz49suywbO+auHCBpCtlW1OfpcLN7wYgVR6wAI= +github.com/minio/crc64nvme v1.1.1/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg= +github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= +github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= +github.com/minio/minio-go/v7 v7.0.98 h1:MeAVKjLVz+XJ28zFcuYyImNSAh8Mq725uNW4beRisi0= +github.com/minio/minio-go/v7 v7.0.98/go.mod h1:cY0Y+W7yozf0mdIclrttzo1Iiu7mEf9y7nk2uXqMOvM= github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= github.com/ossf/osv-schema/bindings/go v0.0.0-20260129002236-09a17f85b44a h1:XHKiGf7vsACh43gFCBb5lOxDjnY6CQYNoQXsO/SlI6k= github.com/ossf/osv-schema/bindings/go v0.0.0-20260129002236-09a17f85b44a/go.mod h1:Eo7R19vlnflsCRdHW1ynyNUyoRwxdaTmTWD9MtKnJTc= +github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM= +github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4= github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/xattr v0.4.12 h1:rRTkSyFNTRElv6pkA3zpjHpQ90p/OdHQC1GmGh1aTjM= +github.com/pkg/xattr v0.4.12/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU= +github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw= github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/sethvargo/go-retry v0.3.0 h1:EEt31A35QhrcRZtrYFDTBg91cqZVnFL2navjDrah2SE= @@ -184,8 +245,13 @@ github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQ github.com/spiffe/go-spiffe/v2 v2.6.0 h1:l+DolpxNWYgruGQVV0xsfeya3CsC7m8iBzDnMpsbLuo= github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= @@ -198,14 +264,20 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +github.com/tinylib/msgp v1.6.1 h1:ESRv8eL3u+DNHUoSAAQRE50Hm162zqAnBoGv9PzScPY= +github.com/tinylib/msgp v1.6.1/go.mod h1:RSp0LW9oSxFut3KzESt5Voq4GVWyS+PSulT77roAqEA= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= +go.einride.tech/aip v0.79.0 h1:19zdPlZzlUvxOA8syAFw4LkdJdXepzyTl6gt9XEeqdU= +go.einride.tech/aip v0.79.0/go.mod h1:E8+wdTApA70odnpFzJgsGogHozC2JCIhFJBKPr8bVig= +go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= +go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/contrib/detectors/gcp v1.38.0 h1:ZoYbqX7OaA/TAikspPl3ozPI6iY6LiIY9I8cUfm+pJs= -go.opentelemetry.io/contrib/detectors/gcp v1.38.0/go.mod h1:SU+iU7nu5ud4oCb3LQOhIZ3nRLj6FNVrKgtflbaf2ts= +go.opentelemetry.io/contrib/detectors/gcp v1.39.0 h1:kWRNZMsfBHZ+uHjiH4y7Etn2FK26LAGkNFw7RHv1DhE= +go.opentelemetry.io/contrib/detectors/gcp v1.39.0/go.mod h1:t/OGqzHBa5v6RHZwrDBJ2OirWc+4q/w2fTbLZwAKjTk= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 h1:YH4g8lQroajqUwWbq/tr2QX1JFmEXaDLgG+ew9bLMWo= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0/go.mod h1:fvPi2qXDqFs8M4B4fmJhE92TyQs9Ydjlg3RvfUp+NbQ= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= @@ -224,51 +296,100 @@ go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZY go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= go.yaml.in/yaml/v4 v4.0.0-rc.3 h1:3h1fjsh1CTAPjW7q/EMe+C8shx5d8ctzZTrLcs/j8Go= go.yaml.in/yaml/v4 v4.0.0-rc.3/go.mod h1:aZqd9kCMsGL7AuUv/m/PvWLdg5sjJsZ4oHDEnfPPfY0= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8= golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b h1:DXr+pvt3nC887026GRP39Ej11UATqWDmWuS99x26cD0= golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b/go.mod h1:4QTo5u+SEIbbKW1RacMZq1YEfOBqeXa19JeshGi+zc4= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.39.0 h1:RclSuaJf32jOqZz74CkPA9qFuVTX7vhLlpfj/IGWlqY= golang.org/x/term v0.39.0/go.mod h1:yxzUCTP/U+FzoxfdKmLaA0RV1WgE0VY7hXBwKtY/4ww= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= google.golang.org/api v0.268.0 h1:hgA3aS4lt9rpF5RCCkX0Q2l7DvHgvlb53y4T4u6iKkA= google.golang.org/api v0.268.0/go.mod h1:HXMyMH496wz+dAJwD/GkAPLd3ZL33Kh0zEG32eNvy9w= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20260128011058-8636f8732409 h1:VQZ/yAbAtjkHgH80teYd2em3xtIkkHd7ZhqfH2N9CsM= google.golang.org/genproto v0.0.0-20260128011058-8636f8732409/go.mod h1:rxKD3IEILWEu3P44seeNOAwZN4SaoKaQ/2eTg4mM6EM= google.golang.org/genproto/googleapis/api v0.0.0-20260203192932-546029d2fa20 h1:7ei4lp52gK1uSejlA8AZl5AJjeLUOHBQscRQZUgAcu0= google.golang.org/genproto/googleapis/api v0.0.0-20260203192932-546029d2fa20/go.mod h1:ZdbssH/1SOVnjnDlXzxDHK2MCidiqXtbYccJNzNYPEE= google.golang.org/genproto/googleapis/rpc v0.0.0-20260203192932-546029d2fa20 h1:Jr5R2J6F6qWyzINc+4AM8t5pfUz6beZpHp678GNrMbE= google.golang.org/genproto/googleapis/rpc v0.0.0-20260203192932-546029d2fa20/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= -google.golang.org/grpc v1.78.0 h1:K1XZG/yGDJnzMdd/uZHAkVqJE+xIDOcmdSFZkBUicNc= -google.golang.org/grpc v1.78.0/go.mod h1:I47qjTo4OKbMkjA/aOOwxDIiPSBofUtQUI5EfpWvW7U= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= +google.golang.org/grpc v1.79.1 h1:zGhSi45ODB9/p3VAawt9a+O/MULLl9dpizzNNpq7flY= +google.golang.org/grpc v1.79.1/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -282,5 +403,8 @@ gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRN gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/vulnfeeds/upload/gcs_test.go b/vulnfeeds/upload/gcs_test.go deleted file mode 100644 index e15e7f3d3bf..00000000000 --- a/vulnfeeds/upload/gcs_test.go +++ /dev/null @@ -1,217 +0,0 @@ -package upload - -import ( - "bytes" - "context" - "os" - "path/filepath" - "testing" - - "github.com/fsouza/fake-gcs-server/fakestorage" -) - -func TestToGCS(t *testing.T) { - server := fakestorage.NewServer([]fakestorage.Object{}) - defer server.Stop() - - client := server.Client() - bkt := client.Bucket("test-bucket") - if err := bkt.Create(context.Background(), "project", nil); err != nil { - t.Fatalf("failed to create bucket: %v", err) - } - - content := []byte("test content") - err := ToGCS(context.Background(), bkt, "test-object.txt", bytes.NewReader(content), "text/plain") - if err != nil { - t.Fatalf("ToGCS failed: %v", err) - } - - obj, err := server.GetObject("test-bucket", "test-object.txt") - if err != nil { - t.Fatalf("failed to get object: %v", err) - } - - if !bytes.Equal(obj.Content, content) { - t.Errorf("expected content %q, got %q", content, obj.Content) - } - if obj.ContentType != "text/plain" { - t.Errorf("expected content type %q, got %q", "text/plain", obj.ContentType) - } -} - -func TestUploadFile(t *testing.T) { - server := fakestorage.NewServer([]fakestorage.Object{}) - defer server.Stop() - - client := server.Client() - bkt := client.Bucket("test-bucket") - if err := bkt.Create(context.Background(), "project", nil); err != nil { - t.Fatalf("failed to create bucket: %v", err) - } - - tmpFile, err := os.CreateTemp("", "test-upload-*.txt") - if err != nil { - t.Fatalf("failed to create temp file: %v", err) - } - defer os.Remove(tmpFile.Name()) - - content := []byte("file content") - if _, err := tmpFile.Write(content); err != nil { - t.Fatalf("failed to write to temp file: %v", err) - } - tmpFile.Close() - - err = UploadFile(context.Background(), bkt, "uploaded-file.txt", tmpFile.Name()) - if err != nil { - t.Fatalf("UploadFile failed: %v", err) - } - - obj, err := server.GetObject("test-bucket", "uploaded-file.txt") - if err != nil { - t.Fatalf("failed to get object: %v", err) - } - - if !bytes.Equal(obj.Content, content) { - t.Errorf("expected content %q, got %q", content, obj.Content) - } -} - -func TestDownloadBucket(t *testing.T) { - objects := []fakestorage.Object{ - { - ObjectAttrs: fakestorage.ObjectAttrs{ - BucketName: "test-bucket", - Name: "folder/file1.txt", - }, - Content: []byte("content 1"), - }, - { - ObjectAttrs: fakestorage.ObjectAttrs{ - BucketName: "test-bucket", - Name: "folder/file2.txt", - }, - Content: []byte("content 2"), - }, - { - ObjectAttrs: fakestorage.ObjectAttrs{ - BucketName: "test-bucket", - Name: "folder/subfolder/", // Should be skipped - }, - Content: []byte(""), - }, - { - ObjectAttrs: fakestorage.ObjectAttrs{ - BucketName: "test-bucket", - Name: "other-folder/file3.txt", - }, - Content: []byte("content 3"), - }, - } - - server := fakestorage.NewServer(objects) - defer server.Stop() - - client := server.Client() - bkt := client.Bucket("test-bucket") - - tmpDir, err := os.MkdirTemp("", "test-download-*") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - defer os.RemoveAll(tmpDir) - - err = DownloadBucket(context.Background(), bkt, "folder/", tmpDir) - if err != nil { - t.Fatalf("DownloadBucket failed: %v", err) - } - - // Verify file1.txt - content1, err := os.ReadFile(filepath.Join(tmpDir, "folder/file1.txt")) - if err != nil { - t.Fatalf("failed to read downloaded file1: %v", err) - } - if !bytes.Equal(content1, []byte("content 1")) { - t.Errorf("expected content 1, got %q", content1) - } - - // Verify file2.txt - content2, err := os.ReadFile(filepath.Join(tmpDir, "folder/file2.txt")) - if err != nil { - t.Fatalf("failed to read downloaded file2: %v", err) - } - if !bytes.Equal(content2, []byte("content 2")) { - t.Errorf("expected content 2, got %q", content2) - } - - // Verify file3.txt is NOT downloaded because of the prefix - if _, err := os.Stat(filepath.Join(tmpDir, "other-folder/file3.txt")); !os.IsNotExist(err) { - t.Errorf("expected file3.txt to not exist, but it does") - } -} - -func TestDownloadBucket_PathTraversal(t *testing.T) { - objects := []fakestorage.Object{ - { - ObjectAttrs: fakestorage.ObjectAttrs{ - BucketName: "test-bucket", - Name: "../malicious.txt", - }, - Content: []byte("malicious content"), - }, - } - - server := fakestorage.NewServer(objects) - defer server.Stop() - - client := server.Client() - bkt := client.Bucket("test-bucket") - - tmpDir, err := os.MkdirTemp("", "test-download-*") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - defer os.RemoveAll(tmpDir) - - err = DownloadBucket(context.Background(), bkt, "", tmpDir) - if err == nil { - t.Fatalf("expected path traversal error, got nil") - } - if err.Error() != "invalid object name \"../malicious.txt\": path traversal attempt" { - t.Errorf("unexpected error message: %v", err) - } -} - -func TestDownloadBucket_RelativeDestDir(t *testing.T) { - objects := []fakestorage.Object{ - { - ObjectAttrs: fakestorage.ObjectAttrs{ - BucketName: "test-bucket", - Name: "file.txt", - }, - Content: []byte("content"), - }, - } - - server := fakestorage.NewServer(objects) - defer server.Stop() - - client := server.Client() - bkt := client.Bucket("test-bucket") - - // Use a relative directory - destDir := "test-relative-dir" - defer os.RemoveAll(destDir) - - err := DownloadBucket(context.Background(), bkt, "", destDir) - if err != nil { - t.Fatalf("DownloadBucket failed with relative dir: %v", err) - } - - content, err := os.ReadFile(filepath.Join(destDir, "file.txt")) - if err != nil { - t.Fatalf("failed to read downloaded file: %v", err) - } - if !bytes.Equal(content, []byte("content")) { - t.Errorf("expected content, got %q", content) - } -} From 45cb1f8e6245c933b5204b1713db149d51cdc0a7 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 6 Mar 2026 05:42:37 +0000 Subject: [PATCH 04/28] make things more go tst idiomatic --- vulnfeeds/gcs-tools/gcs_test.go | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/vulnfeeds/gcs-tools/gcs_test.go b/vulnfeeds/gcs-tools/gcs_test.go index 7ef58b996db..78bfe57d4b1 100644 --- a/vulnfeeds/gcs-tools/gcs_test.go +++ b/vulnfeeds/gcs-tools/gcs_test.go @@ -12,7 +12,7 @@ import ( func TestToGCS(t *testing.T) { server := fakestorage.NewServer([]fakestorage.Object{}) - defer server.Stop() + t.Cleanup(server.Stop) client := server.Client() bkt := client.Bucket("test-bucket") @@ -41,7 +41,7 @@ func TestToGCS(t *testing.T) { func TestUploadFile(t *testing.T) { server := fakestorage.NewServer([]fakestorage.Object{}) - defer server.Stop() + t.Cleanup(server.Stop) client := server.Client() bkt := client.Bucket("test-bucket") @@ -49,11 +49,10 @@ func TestUploadFile(t *testing.T) { t.Fatalf("failed to create bucket: %v", err) } - tmpFile, err := os.CreateTemp("", "test-upload-*.txt") + tmpFile, err := os.CreateTemp(t.TempDir(), "test-upload-*.txt") if err != nil { t.Fatalf("failed to create temp file: %v", err) } - defer os.Remove(tmpFile.Name()) content := []byte("file content") if _, err := tmpFile.Write(content); err != nil { @@ -110,18 +109,14 @@ func TestDownloadBucket(t *testing.T) { } server := fakestorage.NewServer(objects) - defer server.Stop() + t.Cleanup(server.Stop) client := server.Client() bkt := client.Bucket("test-bucket") - tmpDir, err := os.MkdirTemp("", "test-download-*") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - defer os.RemoveAll(tmpDir) + tmpDir := t.TempDir() - err = DownloadBucket(context.Background(), bkt, "folder/", tmpDir) + err := DownloadBucket(context.Background(), bkt, "folder/", tmpDir) if err != nil { t.Fatalf("DownloadBucket failed: %v", err) } @@ -162,18 +157,14 @@ func TestDownloadBucket(t *testing.T) { } server := fakestorage.NewServer(objects) - defer server.Stop() + t.Cleanup(server.Stop) client := server.Client() bkt := client.Bucket("test-bucket") - tmpDir, err := os.MkdirTemp("", "test-download-*") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - defer os.RemoveAll(tmpDir) + tmpDir := t.TempDir() - err = DownloadBucket(context.Background(), bkt, "", tmpDir) + err := DownloadBucket(context.Background(), bkt, "", tmpDir) if err == nil { t.Fatalf("expected path traversal error, got nil") } @@ -194,14 +185,14 @@ func TestDownloadBucket(t *testing.T) { } server := fakestorage.NewServer(objects) - defer server.Stop() + t.Cleanup(server.Stop) client := server.Client() bkt := client.Bucket("test-bucket") // Use a relative directory destDir := "test-relative-dir" - defer os.RemoveAll(destDir) + t.Cleanup(func() { os.RemoveAll(destDir) }) err := DownloadBucket(context.Background(), bkt, "", destDir) if err != nil { From 6a596ec02eb41eac13a1d2cbf4b0d7aaa743ed12 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Tue, 17 Mar 2026 04:08:18 +0000 Subject: [PATCH 05/28] fix test --- vulnfeeds/gcs-tools/cveworker_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnfeeds/gcs-tools/cveworker_test.go b/vulnfeeds/gcs-tools/cveworker_test.go index adaa84132f3..9cabe41273d 100644 --- a/vulnfeeds/gcs-tools/cveworker_test.go +++ b/vulnfeeds/gcs-tools/cveworker_test.go @@ -1,4 +1,4 @@ -package upload +package gcs import ( "bytes" From d506f45dd6c27ec129e637438a1040be38b6d7ad Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Tue, 17 Mar 2026 04:08:36 +0000 Subject: [PATCH 06/28] fix lint --- vulnfeeds/gcs-tools/gcs.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vulnfeeds/gcs-tools/gcs.go b/vulnfeeds/gcs-tools/gcs.go index cf8cc10f88f..bdfa13324f6 100644 --- a/vulnfeeds/gcs-tools/gcs.go +++ b/vulnfeeds/gcs-tools/gcs.go @@ -2,6 +2,7 @@ package gcs import ( "context" + "errors" "fmt" "io" "os" @@ -23,8 +24,9 @@ func ToGCS(ctx context.Context, bkt *storage.BucketHandle, objectName string, da if _, err := io.Copy(wc, data); err != nil { if closeErr := wc.Close(); closeErr != nil { - return fmt.Errorf("failed to write to GCS object %q: %w (also failed to close writer: %v)", objectName, err, closeErr) + return fmt.Errorf("failed to write to GCS object %q: %w (also failed to close writer: %w)", objectName, err, closeErr) } + return fmt.Errorf("failed to write to GCS object %q: %w", objectName, err) } @@ -60,7 +62,7 @@ func DownloadBucket(ctx context.Context, bkt *storage.BucketHandle, prefix strin } attrs, err := it.Next() - if err == iterator.Done { + if errors.Is(err, iterator.Done) { break } if err != nil { From 933d315c93b662bd4a2037411b96314499376ed4 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Thu, 19 Mar 2026 04:49:25 +0000 Subject: [PATCH 07/28] initial changes for immediately uploading records to gcs bucket --- .../cmd/converters/cve/nvd-cve-osv/main.go | 37 ++++++++++++++++--- vulnfeeds/conversion/nvd/converter.go | 35 +++++++++++------- vulnfeeds/gcs-tools/gcs.go | 20 ++++++++++ 3 files changed, 72 insertions(+), 20 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index b1bfdf4c46e..828ed473d78 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -2,6 +2,7 @@ package main import ( + "context" "encoding/json" "flag" "fmt" @@ -14,11 +15,14 @@ import ( "slices" "sync" + "cloud.google.com/go/storage" c "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/conversion/nvd" + gcs "github.com/google/osv/vulnfeeds/gcs-tools" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility/logger" + "github.com/ossf/osv-schema/bindings/go/osvschema" ) var ( @@ -30,6 +34,9 @@ var ( rejectFailed = flag.Bool("reject-failed", false, "If set, OSV records with a failed conversion outcome will not be generated.") outputMetrics = flag.Bool("output-metrics", true, "If true, output the metrics information about the conversion") cpuProfile = flag.String("cpuprofile", "", "Path to write cpu profile to file (default = no output)") + uploadToGCS = flag.Bool("upload-to-gcs", false, "If true, upload to GCS bucket instead of writing to local disk.") + outputBucket = flag.String("output-bucket", "osv-test-cve-osv-conversion", "The GCS bucket to write to.") + gcsPrefix = flag.String("gcs-prefix", "nvd-osv", "The prefix within the GCS bucket.") ) func loadCPEDictionary(productToRepo *c.VPRepoCache, f string) error { @@ -91,12 +98,24 @@ func main() { repoTagsCache := &git.RepoTagsCache{} + var bkt *storage.BucketHandle + ctx := context.Background() + if *uploadToGCS { + client, err := storage.NewClient(ctx) + if err != nil { + logger.Fatal("Failed to create GCS client", slog.Any("err", err)) + } + defer client.Close() + bkt = client.Bucket(*outputBucket) + logger.Info("GCS Client and Bucket initialized", slog.String("bucket", *outputBucket)) + } + jobs := make(chan models.NVDCVE) var wg sync.WaitGroup for range *workers { wg.Add(1) - go worker(&wg, jobs, *outDir, vpRepoCache, repoTagsCache) + go worker(ctx, &wg, jobs, bkt, *outDir, vpRepoCache, repoTagsCache) } for _, cve := range parsed.Vulnerabilities { @@ -122,7 +141,7 @@ func main() { } } -func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) models.ConversionOutcome { +func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) (*osvschema.Vulnerability, models.ConversionOutcome) { metrics := &models.ConversionMetrics{ CVEID: cve.ID, CNA: "nvd", @@ -131,24 +150,30 @@ func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *gi metrics.Repos = repos var outcome models.ConversionOutcome + var vuln *osvschema.Vulnerability switch *outFormat { case "OSV": - outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed, *outputMetrics) + vuln, outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed, *outputMetrics, !*uploadToGCS) case "PackageInfo": outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics) } - return outcome + return vuln, outcome } -func worker(wg *sync.WaitGroup, jobs <-chan models.NVDCVE, _ string, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) { +func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, bkt *storage.BucketHandle, _ string, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) { defer wg.Done() for cve := range jobs { - outcome := processCVE(cve, vpRepoCache, repoTagsCache) + vuln, outcome := processCVE(cve, vpRepoCache, repoTagsCache) if outcome != models.Successful { logger.Info("Failed to generate an OSV record", slog.String("cve", string(cve.ID)), slog.String("outcome", outcome.String())) } else { logger.Info("Generated OSV record for "+string(cve.ID), slog.String("cve", string(cve.ID))) + if vuln != nil && *uploadToGCS && bkt != nil { + if err := gcs.UploadVulnerability(ctx, bkt, *gcsPrefix, vuln); err != nil { + logger.Error("Failed to upload vulnerability", slog.String("cve", vuln.Id), slog.Any("err", err)) + } + } } } } diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 5fd0aa63d3d..7c800b532be 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -24,8 +24,9 @@ var ErrNoRanges = errors.New("no ranges") var ErrUnresolvedFix = errors.New("fixes not resolved to commits") -// CVEToOSV Takes an NVD CVE record and outputs an OSV file in the specified directory. -func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool, outputMetrics bool) models.ConversionOutcome { +// CVEToOSV Takes an NVD CVE record and returns an OSV Vulnerability object. +// It also outputs an OSV file in the specified directory if writeOSVFile is true. +func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool, outputMetrics bool, writeOSVFile bool) (*osvschema.Vulnerability, models.ConversionOutcome) { CPEs := c.CPEs(cve) metrics.CPEs = CPEs // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. @@ -38,7 +39,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc maybeProductName = CPE.Product if err != nil { metrics.AddNote("Can't generate an OSV record without valid CPE data") - return models.ConversionUnknown + return nil, models.ConversionUnknown } } @@ -52,9 +53,9 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc // If there are no repos, there are no commits from the refs either if len(cpeRanges) == 0 && len(repos) == 0 { metrics.SetOutcome(models.NoRepos) - outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) + outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics, writeOSVFile) - return models.NoRepos + return v.Vulnerability, models.NoRepos } successfulRepos := make(map[string]bool) @@ -67,15 +68,15 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc affected := MergeRangesAndCreateAffected(resolvedRanges, cpeRanges, nil, nil, metrics) v.Affected = append(v.Affected, affected) // Exit early - outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) + outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics, writeOSVFile) - return models.NoRepos + return v.Vulnerability, models.NoRepos } // If we have ranges, try to resolve them r, un, sR := processRanges(cpeRanges, repos, metrics, cache, models.VersionSourceCPE) if metrics.Outcome == models.Error { - return models.Error + return nil, models.Error } resolvedRanges = append(resolvedRanges, r...) unresolvedRanges = append(unresolvedRanges, un...) @@ -105,7 +106,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc } r, un, sR := processRanges(textRanges, repos, metrics, cache, models.VersionSourceDescription) if metrics.Outcome == models.Error { - return models.Error + return nil, models.Error } resolvedRanges = append(resolvedRanges, r...) unresolvedRanges = append(unresolvedRanges, un...) @@ -124,13 +125,18 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc affected := MergeRangesAndCreateAffected(resolvedRanges, unresolvedRanges, commits, keys, metrics) v.Affected = append(v.Affected, affected) + osvV := v.Vulnerability + if metrics.Outcome == models.Error || (!outputMetrics && rejectFailed && metrics.Outcome != models.Successful) { - return metrics.Outcome + if metrics.Outcome == models.Error { + return nil, metrics.Outcome + } + return osvV, metrics.Outcome } - outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) + outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics, writeOSVFile) - return metrics.Outcome + return osvV, metrics.Outcome } // CVEToPackageInfo takes an NVD CVE record and outputs a PackageInfo struct in a file in the specified directory. @@ -460,7 +466,8 @@ func convertCommitToEvent(commit models.AffectedCommit) *osvschema.Event { // - metrics: A pointer to ConversionMetrics to be written to a metrics file. // - rejectFailed: A boolean indicating whether to skip writing the OSV file if the conversion was not successful. // - outputMetrics: A boolean indicating whether to write the metrics file. -func outputFiles(v *vulns.Vulnerability, dir string, vendor string, product string, metrics *models.ConversionMetrics, rejectFailed bool, outputMetrics bool) { +// - writeOSVFile: A boolean indicating whether to write the OSV file to disk. +func outputFiles(v *vulns.Vulnerability, dir string, vendor string, product string, metrics *models.ConversionMetrics, rejectFailed bool, outputMetrics bool, writeOSVFile bool) { cveID := v.Id vulnDir := filepath.Join(dir, vendor, product) @@ -472,7 +479,7 @@ func outputFiles(v *vulns.Vulnerability, dir string, vendor string, product stri return } - if !rejectFailed || metrics.Outcome == models.Successful { + if writeOSVFile && (!rejectFailed || metrics.Outcome == models.Successful) { osvFile, errCVE := c.CreateOSVFile(models.CVEID(cveID), vulnDir) if errCVE != nil { logger.Fatal("File failed to be created for CVE", slog.String("cve", cveID)) diff --git a/vulnfeeds/gcs-tools/gcs.go b/vulnfeeds/gcs-tools/gcs.go index bdfa13324f6..175245ddb1b 100644 --- a/vulnfeeds/gcs-tools/gcs.go +++ b/vulnfeeds/gcs-tools/gcs.go @@ -1,6 +1,7 @@ package gcs import ( + "bytes" "context" "errors" "fmt" @@ -10,8 +11,10 @@ import ( "strings" "cloud.google.com/go/storage" + "github.com/ossf/osv-schema/bindings/go/osvschema" "golang.org/x/sync/errgroup" "google.golang.org/api/iterator" + "google.golang.org/protobuf/encoding/protojson" ) // ToGCS uploads data from an io.Reader to a GCS bucket. @@ -113,3 +116,20 @@ func DownloadBucket(ctx context.Context, bkt *storage.BucketHandle, prefix strin return nil } + +// UploadVulnerability marshals an OSV Vulnerability to JSON and uploads it to GCS. +func UploadVulnerability(ctx context.Context, bkt *storage.BucketHandle, prefix string, vuln *osvschema.Vulnerability) error { + if vuln == nil || vuln.Id == "" { + return fmt.Errorf("invalid vulnerability provided") + } + + data, err := protojson.MarshalOptions{Indent: " "}.Marshal(vuln) + if err != nil { + return fmt.Errorf("failed to marshal vulnerability %s: %w", vuln.Id, err) + } + + objectName := filepath.Join(prefix, vuln.Id+".json") + reader := bytes.NewReader(data) + + return ToGCS(ctx, bkt, objectName, reader, "application/json") +} From 06c4f4fe647c728b8a6f0b527296e452e5d71e7c Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Thu, 19 Mar 2026 05:39:32 +0000 Subject: [PATCH 08/28] progress --- .../cmd/converters/cve/nvd-cve-osv/main.go | 59 +++++++++++-- vulnfeeds/conversion/nvd/converter.go | 88 +++---------------- vulnfeeds/gcs-tools/gcs.go | 19 ++++ 3 files changed, 82 insertions(+), 84 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 828ed473d78..dc239b805ed 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -141,7 +141,7 @@ func main() { } } -func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) (*osvschema.Vulnerability, models.ConversionOutcome) { +func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) (*osvschema.Vulnerability, *models.ConversionMetrics, models.ConversionOutcome) { metrics := &models.ConversionMetrics{ CVEID: cve.ID, CNA: "nvd", @@ -151,29 +151,72 @@ func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *gi var outcome models.ConversionOutcome var vuln *osvschema.Vulnerability + var finalMetrics *models.ConversionMetrics switch *outFormat { case "OSV": - vuln, outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed, *outputMetrics, !*uploadToGCS) + vuln, finalMetrics, outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed, *outputMetrics) case "PackageInfo": outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics) + finalMetrics = metrics } - return vuln, outcome + return vuln, finalMetrics, outcome } -func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, bkt *storage.BucketHandle, _ string, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) { +func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, bkt *storage.BucketHandle, outDir string, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) { defer wg.Done() for cve := range jobs { - vuln, outcome := processCVE(cve, vpRepoCache, repoTagsCache) + vuln, metrics, outcome := processCVE(cve, vpRepoCache, repoTagsCache) + cveID := string(cve.ID) + if outcome == models.Error { + logger.Error("Error generating OSV record", slog.String("cve", cveID), slog.String("outcome", outcome.String())) + return // Don't attempt to output files if there was an error + } + if outcome != models.Successful { - logger.Info("Failed to generate an OSV record", slog.String("cve", string(cve.ID)), slog.String("outcome", outcome.String())) + logger.Info("Failed to generate a successful OSV record", slog.String("cve", cveID), slog.String("outcome", outcome.String())) + if *rejectFailed { + return // Skip outputting OSV file + } } else { - logger.Info("Generated OSV record for "+string(cve.ID), slog.String("cve", string(cve.ID))) - if vuln != nil && *uploadToGCS && bkt != nil { + logger.Info("Generated OSV record for "+cveID, slog.String("cve", cveID)) + } + + if *uploadToGCS && bkt != nil { + if vuln != nil { if err := gcs.UploadVulnerability(ctx, bkt, *gcsPrefix, vuln); err != nil { logger.Error("Failed to upload vulnerability", slog.String("cve", vuln.Id), slog.Any("err", err)) } } + if *outputMetrics && metrics != nil { + if err := gcs.UploadMetrics(ctx, bkt, *gcsPrefix, models.CVEID(cveID), metrics); err != nil { + logger.Error("Failed to upload metrics", slog.String("cve", cveID), slog.Any("err", err)) + } + } + } else { + // Local file output + if vuln != nil { + osvFile, err := c.CreateOSVFile(models.CVEID(vuln.Id), outDir) + if err != nil { + logger.Error("Failed to create OSV file locally", slog.String("cve", vuln.Id), slog.Any("err", err)) + } else { + if err := vuln.ToJSON(osvFile); err != nil { + logger.Error("Failed to write OSV file locally", slog.String("cve", vuln.Id), slog.Any("err", err)) + } + osvFile.Close() + } + } + if *outputMetrics && metrics != nil { + metricsFile, err := c.CreateMetricsFile(models.CVEID(cveID), outDir) + if err != nil { + logger.Error("Failed to create metrics file locally", slog.String("cve", cveID), slog.Any("err", err)) + } else { + if err := c.WriteMetricsFile(metrics, metricsFile); err != nil { + logger.Error("Failed to write metrics file locally", slog.String("cve", cveID), slog.Any("err", err)) + } + metricsFile.Close() + } + } } } } diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 7c800b532be..fdfe3b6c0a4 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -24,22 +24,17 @@ var ErrNoRanges = errors.New("no ranges") var ErrUnresolvedFix = errors.New("fixes not resolved to commits") -// CVEToOSV Takes an NVD CVE record and returns an OSV Vulnerability object. -// It also outputs an OSV file in the specified directory if writeOSVFile is true. -func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool, outputMetrics bool, writeOSVFile bool) (*osvschema.Vulnerability, models.ConversionOutcome) { +// CVEToOSV Takes an NVD CVE record and returns an OSV Vulnerability object, ConversionMetrics, and the outcome. +func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool, outputMetrics bool) (*osvschema.Vulnerability, *models.ConversionMetrics, models.ConversionOutcome) { CPEs := c.CPEs(cve) metrics.CPEs = CPEs // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. - maybeVendorName := "ENOCPE" - maybeProductName := "ENOCPE" if len(CPEs) > 0 { - CPE, err := c.ParseCPE(CPEs[0]) // For naming the subdirectory used for output. - maybeVendorName = CPE.Vendor - maybeProductName = CPE.Product + _, err := c.ParseCPE(CPEs[0]) // For naming the subdirectory used for output. if err != nil { metrics.AddNote("Can't generate an OSV record without valid CPE data") - return nil, models.ConversionUnknown + return nil, metrics, models.ConversionUnknown } } @@ -53,9 +48,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc // If there are no repos, there are no commits from the refs either if len(cpeRanges) == 0 && len(repos) == 0 { metrics.SetOutcome(models.NoRepos) - outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics, writeOSVFile) - - return v.Vulnerability, models.NoRepos + return v.Vulnerability, metrics, models.NoRepos } successfulRepos := make(map[string]bool) @@ -68,15 +61,13 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc affected := MergeRangesAndCreateAffected(resolvedRanges, cpeRanges, nil, nil, metrics) v.Affected = append(v.Affected, affected) // Exit early - outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics, writeOSVFile) - - return v.Vulnerability, models.NoRepos + return v.Vulnerability, metrics, models.NoRepos } // If we have ranges, try to resolve them r, un, sR := processRanges(cpeRanges, repos, metrics, cache, models.VersionSourceCPE) if metrics.Outcome == models.Error { - return nil, models.Error + return nil, metrics, models.Error } resolvedRanges = append(resolvedRanges, r...) unresolvedRanges = append(unresolvedRanges, un...) @@ -106,7 +97,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc } r, un, sR := processRanges(textRanges, repos, metrics, cache, models.VersionSourceDescription) if metrics.Outcome == models.Error { - return nil, models.Error + return nil, metrics, models.Error } resolvedRanges = append(resolvedRanges, r...) unresolvedRanges = append(unresolvedRanges, un...) @@ -116,7 +107,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc } if len(resolvedRanges) == 0 && len(commits) == 0 { - metrics.AddNote("No ranges detected for %q", maybeProductName) + metrics.AddNote("No ranges detected") metrics.SetOutcome(models.NoRanges) } @@ -125,18 +116,11 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc affected := MergeRangesAndCreateAffected(resolvedRanges, unresolvedRanges, commits, keys, metrics) v.Affected = append(v.Affected, affected) - osvV := v.Vulnerability - - if metrics.Outcome == models.Error || (!outputMetrics && rejectFailed && metrics.Outcome != models.Successful) { - if metrics.Outcome == models.Error { - return nil, metrics.Outcome - } - return osvV, metrics.Outcome + if metrics.Outcome == models.Error { + return nil, metrics, metrics.Outcome } - outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics, writeOSVFile) - - return osvV, metrics.Outcome + return v.Vulnerability, metrics, metrics.Outcome } // CVEToPackageInfo takes an NVD CVE record and outputs a PackageInfo struct in a file in the specified directory. @@ -453,54 +437,6 @@ func convertCommitToEvent(commit models.AffectedCommit) *osvschema.Event { return nil } - -// outputFiles writes the OSV vulnerability record and conversion metrics to files in the specified directory. -// It creates the necessary subdirectories based on the vendor and product names and handles whether or not -// the files should be written based on the rejectFailed and outputMetrics flags. -// -// Arguments: -// - v: The OSV Vulnerability object to be written to a file. -// - dir: The base directory where the output files should be created. -// - vendor: The vendor name used to create the subdirectory. -// - product: The product name used to create the subdirectory. -// - metrics: A pointer to ConversionMetrics to be written to a metrics file. -// - rejectFailed: A boolean indicating whether to skip writing the OSV file if the conversion was not successful. -// - outputMetrics: A boolean indicating whether to write the metrics file. -// - writeOSVFile: A boolean indicating whether to write the OSV file to disk. -func outputFiles(v *vulns.Vulnerability, dir string, vendor string, product string, metrics *models.ConversionMetrics, rejectFailed bool, outputMetrics bool, writeOSVFile bool) { - cveID := v.Id - vulnDir := filepath.Join(dir, vendor, product) - - if err := os.MkdirAll(vulnDir, 0755); err != nil { - logger.Info("Failed to create directory "+vulnDir, slog.String("cve", cveID), slog.String("path", vulnDir), slog.Any("err", err)) - } - - if metrics.Outcome == models.Error { - return - } - - if writeOSVFile && (!rejectFailed || metrics.Outcome == models.Successful) { - osvFile, errCVE := c.CreateOSVFile(models.CVEID(cveID), vulnDir) - if errCVE != nil { - logger.Fatal("File failed to be created for CVE", slog.String("cve", cveID)) - } - if err := v.ToJSON(osvFile); err != nil { - logger.Error("Failed to write", slog.Any("err", err)) - } - osvFile.Close() - } - if outputMetrics { - metricsFile, errMetrics := c.CreateMetricsFile(models.CVEID(cveID), vulnDir) - if errMetrics != nil { - logger.Fatal("File failed to be created for CVE", slog.String("cve", cveID)) - } - if err := c.WriteMetricsFile(metrics, metricsFile); err != nil { - logger.Error("Failed to write metrics", slog.Any("err", err)) - } - metricsFile.Close() - } -} - // processRanges attempts to resolve the given ranges to commits and updates the metrics accordingly. func processRanges(ranges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache, source models.VersionSource) ([]*osvschema.Range, []*osvschema.Range, []string) { if len(ranges) == 0 { diff --git a/vulnfeeds/gcs-tools/gcs.go b/vulnfeeds/gcs-tools/gcs.go index 175245ddb1b..1e780a02ffd 100644 --- a/vulnfeeds/gcs-tools/gcs.go +++ b/vulnfeeds/gcs-tools/gcs.go @@ -3,6 +3,7 @@ package gcs import ( "bytes" "context" + "encoding/json" "errors" "fmt" "io" @@ -11,6 +12,7 @@ import ( "strings" "cloud.google.com/go/storage" + "github.com/google/osv/vulnfeeds/models" "github.com/ossf/osv-schema/bindings/go/osvschema" "golang.org/x/sync/errgroup" "google.golang.org/api/iterator" @@ -133,3 +135,20 @@ func UploadVulnerability(ctx context.Context, bkt *storage.BucketHandle, prefix return ToGCS(ctx, bkt, objectName, reader, "application/json") } + +// UploadMetrics marshals ConversionMetrics to JSON and uploads it to GCS. +func UploadMetrics(ctx context.Context, bkt *storage.BucketHandle, prefix string, cveID models.CVEID, metrics *models.ConversionMetrics) error { + if metrics == nil || cveID == "" { + return fmt.Errorf("invalid metrics or CVE ID provided") + } + + data, err := json.MarshalIndent(metrics, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal metrics for %s: %w", cveID, err) + } + + objectName := filepath.Join(prefix, string(cveID)+".metrics.json") + reader := bytes.NewReader(data) + + return ToGCS(ctx, bkt, objectName, reader, "application/json") +} From 886c2c138a5dd48602b5f4a0e55420403aa0027c Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 20 Mar 2026 02:56:40 +0000 Subject: [PATCH 09/28] fix some compiling issues and lint --- vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go | 10 +++++----- vulnfeeds/conversion/nvd/converter.go | 9 +++++---- vulnfeeds/conversion/nvd/converter_test.go | 2 +- vulnfeeds/gcs-tools/gcs.go | 10 +++++----- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index dc239b805ed..321a4797c34 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -22,7 +22,7 @@ import ( "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility/logger" - "github.com/ossf/osv-schema/bindings/go/osvschema" + "github.com/google/osv/vulnfeeds/vulns" ) var ( @@ -141,7 +141,7 @@ func main() { } } -func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) (*osvschema.Vulnerability, *models.ConversionMetrics, models.ConversionOutcome) { +func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) (*vulns.Vulnerability, *models.ConversionMetrics, models.ConversionOutcome) { metrics := &models.ConversionMetrics{ CVEID: cve.ID, CNA: "nvd", @@ -150,11 +150,11 @@ func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *gi metrics.Repos = repos var outcome models.ConversionOutcome - var vuln *osvschema.Vulnerability + var vuln *vulns.Vulnerability var finalMetrics *models.ConversionMetrics switch *outFormat { case "OSV": - vuln, finalMetrics, outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed, *outputMetrics) + vuln, finalMetrics, outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, metrics) case "PackageInfo": outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics) finalMetrics = metrics @@ -184,7 +184,7 @@ func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, if *uploadToGCS && bkt != nil { if vuln != nil { - if err := gcs.UploadVulnerability(ctx, bkt, *gcsPrefix, vuln); err != nil { + if err := gcs.UploadVulnerability(ctx, bkt, *gcsPrefix, vuln.Vulnerability); err != nil { logger.Error("Failed to upload vulnerability", slog.String("cve", vuln.Id), slog.Any("err", err)) } } diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index fdfe3b6c0a4..573b5dc3c47 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -25,7 +25,7 @@ var ErrNoRanges = errors.New("no ranges") var ErrUnresolvedFix = errors.New("fixes not resolved to commits") // CVEToOSV Takes an NVD CVE record and returns an OSV Vulnerability object, ConversionMetrics, and the outcome. -func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool, outputMetrics bool) (*osvschema.Vulnerability, *models.ConversionMetrics, models.ConversionOutcome) { +func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) (*vulns.Vulnerability, *models.ConversionMetrics, models.ConversionOutcome) { CPEs := c.CPEs(cve) metrics.CPEs = CPEs // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. @@ -48,7 +48,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc // If there are no repos, there are no commits from the refs either if len(cpeRanges) == 0 && len(repos) == 0 { metrics.SetOutcome(models.NoRepos) - return v.Vulnerability, metrics, models.NoRepos + return v, metrics, models.NoRepos } successfulRepos := make(map[string]bool) @@ -61,7 +61,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc affected := MergeRangesAndCreateAffected(resolvedRanges, cpeRanges, nil, nil, metrics) v.Affected = append(v.Affected, affected) // Exit early - return v.Vulnerability, metrics, models.NoRepos + return v, metrics, models.NoRepos } // If we have ranges, try to resolve them @@ -120,7 +120,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc return nil, metrics, metrics.Outcome } - return v.Vulnerability, metrics, metrics.Outcome + return v, metrics, metrics.Outcome } // CVEToPackageInfo takes an NVD CVE record and outputs a PackageInfo struct in a file in the specified directory. @@ -437,6 +437,7 @@ func convertCommitToEvent(commit models.AffectedCommit) *osvschema.Event { return nil } + // processRanges attempts to resolve the given ranges to commits and updates the metrics accordingly. func processRanges(ranges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache, source models.VersionSource) ([]*osvschema.Range, []*osvschema.Range, []string) { if len(ranges) == 0 { diff --git a/vulnfeeds/conversion/nvd/converter_test.go b/vulnfeeds/conversion/nvd/converter_test.go index 396d8ed5b8f..bf099b5b1e4 100644 --- a/vulnfeeds/conversion/nvd/converter_test.go +++ b/vulnfeeds/conversion/nvd/converter_test.go @@ -65,7 +65,7 @@ func TestCVEToOSV_429(t *testing.T) { cache := &git.RepoTagsCache{} outDir := t.TempDir() - outcome := CVEToOSV(cve, []string{"https://github.com/foo/bar"}, cache, outDir, metrics, false, false) + _, _, outcome := CVEToOSV(cve, []string{"https://github.com/foo/bar"}, cache, metrics) // It should fail because of the 429 error causing unresolved fixes if outcome != models.Error { diff --git a/vulnfeeds/gcs-tools/gcs.go b/vulnfeeds/gcs-tools/gcs.go index 1e780a02ffd..a208988155f 100644 --- a/vulnfeeds/gcs-tools/gcs.go +++ b/vulnfeeds/gcs-tools/gcs.go @@ -121,16 +121,16 @@ func DownloadBucket(ctx context.Context, bkt *storage.BucketHandle, prefix strin // UploadVulnerability marshals an OSV Vulnerability to JSON and uploads it to GCS. func UploadVulnerability(ctx context.Context, bkt *storage.BucketHandle, prefix string, vuln *osvschema.Vulnerability) error { - if vuln == nil || vuln.Id == "" { - return fmt.Errorf("invalid vulnerability provided") + if vuln == nil || vuln.GetId() == "" { + return errors.New("invalid vulnerability provided") } data, err := protojson.MarshalOptions{Indent: " "}.Marshal(vuln) if err != nil { - return fmt.Errorf("failed to marshal vulnerability %s: %w", vuln.Id, err) + return fmt.Errorf("failed to marshal vulnerability %s: %w", vuln.GetId(), err) } - objectName := filepath.Join(prefix, vuln.Id+".json") + objectName := filepath.Join(prefix, vuln.GetId()+".json") reader := bytes.NewReader(data) return ToGCS(ctx, bkt, objectName, reader, "application/json") @@ -139,7 +139,7 @@ func UploadVulnerability(ctx context.Context, bkt *storage.BucketHandle, prefix // UploadMetrics marshals ConversionMetrics to JSON and uploads it to GCS. func UploadMetrics(ctx context.Context, bkt *storage.BucketHandle, prefix string, cveID models.CVEID, metrics *models.ConversionMetrics) error { if metrics == nil || cveID == "" { - return fmt.Errorf("invalid metrics or CVE ID provided") + return errors.New("invalid metrics or CVE ID provided") } data, err := json.MarshalIndent(metrics, "", " ") From 15c4757ce029c693bc71e4d8e030c7b543bd6d46 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 20 Mar 2026 03:01:17 +0000 Subject: [PATCH 10/28] remove the upload part of the run-cve-to-osv generation script --- .../nvd-cve-osv/run_cve_to_osv_generation.sh | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh index d4d89806adf..d8d794fc500 100755 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh @@ -63,21 +63,4 @@ for (( YEAR = $(date +%Y) ; YEAR >= ${FIRST_INSCOPE_YEAR} ; YEAR-- )); do -exec cp '{}' "${WORK_DIR}/nvd2osv/gcs_stage/" \; done -# Copy (and remove any missing) results to GCS bucket, with some sanity -# checking. -objs_present=$(gsutil ls "${OSV_OUTPUT_GCS_PATH}" | wc -l) -objs_deleted=$(gsutil -m rsync -c -n -d "${WORK_DIR}/nvd2osv/gcs_stage" "${OSV_OUTPUT_GCS_PATH}" 2>&1 | grep "Would remove" | wc -l) - -threshold=$(echo "scale=2; ${objs_present} * (${SAFETY_THRESHOLD_PCT:-2} / 100)" | bc) - -# Bash can't deal with floats -if (( $(echo "${objs_deleted} > ${threshold}" | bc -l) )); then - echo "Warning. Unexpectedly high (${objs_deleted}) number of CVE records would be deleted!" >> /dev/stderr - gsutil -m rsync -c -n -d "${WORK_DIR}/nvd2osv/gcs_stage" "${OSV_OUTPUT_GCS_PATH}" 2>&1 | grep "Would remove" >> /dev/stderr - # TODO: add back in once nvd-mirror issue fixed: exit 1 -fi - -echo "Copying NVD CVE records successfully converted to GCS bucket" -gsutil -q -m rsync -c "${WORK_DIR}/nvd2osv/gcs_stage" "${OSV_OUTPUT_GCS_PATH}" - echo "Conversion run complete" From b8ed421f78a9ba16dd11865dd3158dc7455aea2d Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 20 Mar 2026 04:53:06 +0000 Subject: [PATCH 11/28] renamed functions and moved them around to be more and less generic --- vulnfeeds/cmd/combine-to-osv/main.go | 4 +- vulnfeeds/cmd/converters/alpine/main.go | 4 +- .../cve/cve5/bulk-converter/main.go | 6 +- .../cve/cve5/single-converter/main.go | 6 +- .../cmd/converters/cve/nvd-cve-osv/main.go | 12 +- vulnfeeds/cmd/converters/debian/main.go | 4 +- vulnfeeds/conversion/common.go | 43 ------- vulnfeeds/conversion/nvd/converter.go | 5 +- .../writer/writer.go} | 113 ++++++++++++++---- .../writer/writer_test.go} | 12 +- vulnfeeds/gcs-tools/gcs.go | 58 +++------ vulnfeeds/gcs-tools/gcs_test.go | 6 +- 12 files changed, 136 insertions(+), 137 deletions(-) rename vulnfeeds/{gcs-tools/cveworker.go => conversion/writer/writer.go} (69%) rename vulnfeeds/{gcs-tools/cveworker_test.go => conversion/writer/writer_test.go} (96%) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index e66364de9d9..e384ee9ea17 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -16,7 +16,7 @@ import ( "cloud.google.com/go/storage" "github.com/google/osv/vulnfeeds/conversion" - "github.com/google/osv/vulnfeeds/gcs-tools" + "github.com/google/osv/vulnfeeds/conversion/writer" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -92,7 +92,7 @@ func main() { vulnerabilities = append(vulnerabilities, v) } - gcs.Upload(ctx, "OSV files", *uploadToGCS, *outputBucketName, *overridesBucketName, *numWorkers, *osvOutputPath, vulnerabilities, *syncDeletions) + writer.UploadVulnsToGCS(ctx, "OSV files", *uploadToGCS, *outputBucketName, *overridesBucketName, *numWorkers, *osvOutputPath, vulnerabilities, *syncDeletions) } // extractCVEName extracts the CVE name from a given filename and prefix. diff --git a/vulnfeeds/cmd/converters/alpine/main.go b/vulnfeeds/cmd/converters/alpine/main.go index 4acdd078c4f..226285d9193 100644 --- a/vulnfeeds/cmd/converters/alpine/main.go +++ b/vulnfeeds/cmd/converters/alpine/main.go @@ -15,7 +15,7 @@ import ( "strings" "time" - "github.com/google/osv/vulnfeeds/gcs-tools" + "github.com/google/osv/vulnfeeds/conversion/writer" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" @@ -64,7 +64,7 @@ func main() { } ctx := context.Background() - gcs.Upload(ctx, "Alpine CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *alpineOutputPath, vulnerabilities, *syncDeletions) + writer.UploadVulnsToGCS(ctx, "Alpine CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *alpineOutputPath, vulnerabilities, *syncDeletions) logger.Info("Alpine CVE conversion succeeded.") } diff --git a/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go b/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go index 9d75b7816e3..3e554cf18da 100644 --- a/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go +++ b/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go @@ -14,8 +14,8 @@ import ( "sync" "time" - "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/conversion/cve5" + "github.com/google/osv/vulnfeeds/conversion/writer" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility/logger" ) @@ -119,8 +119,8 @@ func worker(wg *sync.WaitGroup, jobs <-chan string, outDir string, cnas []string cveID := cve.Metadata.CVEID logger.Info("Processing "+string(cveID), slog.String("cve", string(cveID))) - osvFile, errCVE := conversion.CreateOSVFile(cveID, outDir) - metricsFile, errMetrics := conversion.CreateMetricsFile(cveID, outDir) + osvFile, errCVE := writer.CreateOSVFile(cveID, outDir) + metricsFile, errMetrics := writer.CreateMetricsFile(cveID, outDir) if errCVE != nil || errMetrics != nil { logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cveID))) } diff --git a/vulnfeeds/cmd/converters/cve/cve5/single-converter/main.go b/vulnfeeds/cmd/converters/cve/cve5/single-converter/main.go index 3dc45a05c8a..70eaf67fed0 100644 --- a/vulnfeeds/cmd/converters/cve/cve5/single-converter/main.go +++ b/vulnfeeds/cmd/converters/cve/cve5/single-converter/main.go @@ -7,8 +7,8 @@ import ( "log/slog" "os" - "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/conversion/cve5" + "github.com/google/osv/vulnfeeds/conversion/writer" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility/logger" ) @@ -46,8 +46,8 @@ func main() { } // create the files - osvFile, errCVE := conversion.CreateOSVFile(cveID, outDir) - metricsFile, errMetrics := conversion.CreateMetricsFile(cveID, outDir) + osvFile, errCVE := writer.CreateOSVFile(cveID, outDir) + metricsFile, errMetrics := writer.CreateMetricsFile(cveID, outDir) if errCVE != nil || errMetrics != nil { logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cveID))) } diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 321a4797c34..abc8e579a51 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -18,7 +18,7 @@ import ( "cloud.google.com/go/storage" c "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/conversion/nvd" - gcs "github.com/google/osv/vulnfeeds/gcs-tools" + "github.com/google/osv/vulnfeeds/conversion/writer" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility/logger" @@ -184,19 +184,19 @@ func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, if *uploadToGCS && bkt != nil { if vuln != nil { - if err := gcs.UploadVulnerability(ctx, bkt, *gcsPrefix, vuln.Vulnerability); err != nil { + if err := writer.UploadVulnToGCS(ctx, bkt, *gcsPrefix, vuln.Vulnerability); err != nil { logger.Error("Failed to upload vulnerability", slog.String("cve", vuln.Id), slog.Any("err", err)) } } if *outputMetrics && metrics != nil { - if err := gcs.UploadMetrics(ctx, bkt, *gcsPrefix, models.CVEID(cveID), metrics); err != nil { + if err := writer.UploadMetricsToGCS(ctx, bkt, *gcsPrefix, models.CVEID(cveID), metrics); err != nil { logger.Error("Failed to upload metrics", slog.String("cve", cveID), slog.Any("err", err)) } } } else { // Local file output if vuln != nil { - osvFile, err := c.CreateOSVFile(models.CVEID(vuln.Id), outDir) + osvFile, err := writer.CreateOSVFile(models.CVEID(vuln.Id), outDir) if err != nil { logger.Error("Failed to create OSV file locally", slog.String("cve", vuln.Id), slog.Any("err", err)) } else { @@ -207,11 +207,11 @@ func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, } } if *outputMetrics && metrics != nil { - metricsFile, err := c.CreateMetricsFile(models.CVEID(cveID), outDir) + metricsFile, err := writer.CreateMetricsFile(models.CVEID(cveID), outDir) if err != nil { logger.Error("Failed to create metrics file locally", slog.String("cve", cveID), slog.Any("err", err)) } else { - if err := c.WriteMetricsFile(metrics, metricsFile); err != nil { + if err := writer.WriteMetricsFile(metrics, metricsFile); err != nil { logger.Error("Failed to write metrics file locally", slog.String("cve", cveID), slog.Any("err", err)) } metricsFile.Close() diff --git a/vulnfeeds/cmd/converters/debian/main.go b/vulnfeeds/cmd/converters/debian/main.go index 347a52ae621..956f35a5745 100644 --- a/vulnfeeds/cmd/converters/debian/main.go +++ b/vulnfeeds/cmd/converters/debian/main.go @@ -14,8 +14,8 @@ import ( "strconv" "strings" + "github.com/google/osv/vulnfeeds/conversion/writer" "github.com/google/osv/vulnfeeds/faulttolerant" - "github.com/google/osv/vulnfeeds/gcs-tools" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" @@ -70,7 +70,7 @@ func main() { } ctx := context.Background() - gcs.Upload(ctx, "Debian CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *debianOutputPath, vulnerabilities, *syncDeletions) + writer.UploadVulnsToGCS(ctx, "Debian CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *debianOutputPath, vulnerabilities, *syncDeletions) logger.Info("Debian CVE conversion succeeded.") } diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 8609fee8a63..8d230a39257 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -130,49 +130,6 @@ func ConductAnalysis(year string, dir string) { } } -// CreateMetricsFile creates the initial file for the metrics record. -func CreateMetricsFile(id models.CVEID, vulnDir string) (*os.File, error) { - metricsFile := filepath.Join(vulnDir, string(id)+".metrics"+models.Extension) - f, err := os.Create(metricsFile) - if err != nil { - logger.Info("Failed to open for writing "+metricsFile, slog.String("cve", string(id)), slog.String("path", metricsFile), slog.Any("err", err)) - return nil, err - } - - return f, nil -} - -// CreateOSVFile creates the initial file for the OSV record. -func CreateOSVFile(id models.CVEID, vulnDir string) (*os.File, error) { - outputFile := filepath.Join(vulnDir, string(id)+models.Extension) - - f, err := os.Create(outputFile) - if err != nil { - logger.Info("Failed to open for writing "+outputFile, slog.String("cve", string(id)), slog.String("path", outputFile), slog.Any("err", err)) - return nil, err - } - - return f, err -} - -func WriteMetricsFile(metrics *models.ConversionMetrics, metricsFile *os.File) error { - marshalledMetrics, err := json.MarshalIndent(&metrics, "", " ") - if err != nil { - logger.Info("Failed to marshal", slog.Any("err", err)) - return err - } - - _, err = metricsFile.Write(marshalledMetrics) - if err != nil { - logger.Warn("Failed to write", slog.String("path", metricsFile.Name()), slog.Any("err", err)) - return fmt.Errorf("failed to write %s: %w", metricsFile.Name(), err) - } - - metricsFile.Close() - - return nil -} - // GitVersionsToCommits examines repos and tries to convert versions to commits by treating them as Git tags. // Returns the resolved ranges, unresolved ranges, and successful repos involved. func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) ([]*osvschema.Range, []*osvschema.Range, []string) { diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 573b5dc3c47..5bb4b6f629c 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -12,6 +12,7 @@ import ( "slices" c "github.com/google/osv/vulnfeeds/conversion" + "github.com/google/osv/vulnfeeds/conversion/writer" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility" @@ -219,11 +220,11 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach logger.Info("Generated PackageInfo record", slog.String("cve", string(cve.ID)), slog.String("product", maybeProductName)) - metricsFile, err := c.CreateMetricsFile(cve.ID, vulnDir) + metricsFile, err := writer.CreateMetricsFile(cve.ID, vulnDir) if err != nil { logger.Warn("Failed to create metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) } - err = c.WriteMetricsFile(metrics, metricsFile) + err = writer.WriteMetricsFile(metrics, metricsFile) if err != nil { logger.Warn("Failed to write metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) } diff --git a/vulnfeeds/gcs-tools/cveworker.go b/vulnfeeds/conversion/writer/writer.go similarity index 69% rename from vulnfeeds/gcs-tools/cveworker.go rename to vulnfeeds/conversion/writer/writer.go index 09fd0ad10e0..92b56c73de6 100644 --- a/vulnfeeds/gcs-tools/cveworker.go +++ b/vulnfeeds/conversion/writer/writer.go @@ -1,26 +1,29 @@ -// Package gcs handles allocating workers to intelligently uploading OSV records to a bucket -package gcs +// Package writer handles allocating workers to intelligently uploading OSV records to a bucket +package writer import ( "bytes" "context" "crypto/sha256" "encoding/hex" + "encoding/json" "errors" "fmt" "io" "log/slog" "os" "path" + "path/filepath" "sync" "sync/atomic" "time" "cloud.google.com/go/storage" - "google.golang.org/api/iterator" "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/types/known/timestamppb" + "github.com/google/osv/vulnfeeds/gcs-tools" + "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" "github.com/ossf/osv-schema/bindings/go/osvschema" @@ -49,10 +52,10 @@ func writeToDisk(v *osvschema.Vulnerability, preModifiedBuf []byte, outputPrefix return nil } -// uploadToGCS uploads the vulnerability to a GCS bucket. +// uploadIfChanged uploads the vulnerability to a GCS bucket. // It returns an error if the upload failed, or ErrUploadSkipped if the upload // was intentionally avoided (e.g. because the GCS object has a matching hash). -func uploadToGCS(ctx context.Context, v *osvschema.Vulnerability, preModifiedBuf []byte, outBkt *storage.BucketHandle, outputPrefix string) error { +func uploadIfChanged(ctx context.Context, v *osvschema.Vulnerability, preModifiedBuf []byte, outBkt *storage.BucketHandle, outputPrefix string) error { vulnID := v.GetId() filename := vulnID + ".json" @@ -151,7 +154,7 @@ func handleOverride(ctx context.Context, v *osvschema.Vulnerability, overridesBk // For GCS uploads, it calculates a hash of the vulnerability (excluding the modified time) and compares it // with the existing object's hash. The vulnerability is uploaded only if the hashes differ, with the // modified time updated. This prevents updating the modified time for vulnerabilities with no content changes. -func Worker(ctx context.Context, vulnChan <-chan *osvschema.Vulnerability, outBkt, overridesBkt *storage.BucketHandle, outputPrefix string, counter *atomic.Uint64) { +func VulnWorker(ctx context.Context, vulnChan <-chan *osvschema.Vulnerability, outBkt, overridesBkt *storage.BucketHandle, outputPrefix string, counter *atomic.Uint64) { for v := range vulnChan { vulnID := v.GetId() if len(v.GetAffected()) == 0 { @@ -187,7 +190,7 @@ func Worker(ctx context.Context, vulnChan <-chan *osvschema.Vulnerability, outBk writeErr = writeToDisk(vulnToProcess, preModifiedBuf, outputPrefix) } else { // Upload to GCS - writeErr = uploadToGCS(ctx, vulnToProcess, preModifiedBuf, outBkt, outputPrefix) + writeErr = uploadIfChanged(ctx, vulnToProcess, preModifiedBuf, outBkt, outputPrefix) } if writeErr == nil { @@ -203,8 +206,8 @@ func Worker(ctx context.Context, vulnChan <-chan *osvschema.Vulnerability, outBk } } -// Upload delegates workers to upload vulnerabilities to the buckets. -func Upload( +// UploadVulnsToGCS delegates workers to upload vulnerabilities to the buckets. +func UploadVulnsToGCS( ctx context.Context, jobName string, uploadToGCS bool, @@ -238,7 +241,7 @@ func Upload( wg.Add(1) go func() { defer wg.Done() - Worker(ctx, vulnChan, outBkt, overridesBkt, osvOutputPath, &successCount) + VulnWorker(ctx, vulnChan, outBkt, overridesBkt, osvOutputPath, &successCount) }() } @@ -254,7 +257,7 @@ func Upload( func handleDeletion(ctx context.Context, outBkt *storage.BucketHandle, osvOutputPath string, vulnerabilities []*osvschema.Vulnerability) { // Check if any need to be deleted - bucketObjects, err := listBucketObjects(ctx, outBkt, osvOutputPath) + bucketObjects, err := gcs.ListBucketObjects(ctx, outBkt, osvOutputPath) if err != nil { logger.Error("Failed to list bucket objects for deletion check, skipping deletion.", slog.Any("err", err)) return @@ -276,21 +279,79 @@ func handleDeletion(ctx context.Context, outBkt *storage.BucketHandle, osvOutput } } -// listBucketObjects lists the names of all objects in a Google Cloud Storage bucket. -// It does not download the file contents. -func listBucketObjects(ctx context.Context, bucket *storage.BucketHandle, prefix string) ([]string, error) { - it := bucket.Objects(ctx, &storage.Query{Prefix: prefix}) - var filenames []string - for { - attrs, err := it.Next() - if errors.Is(err, iterator.Done) { - break // All objects have been listed. - } - if err != nil { - return nil, fmt.Errorf("bucket.Objects: %w", err) - } - filenames = append(filenames, attrs.Name) +// UploadVulnToGCS marshals a single OSV Vulnerability to JSON and uploads it to GCS. +func UploadVulnToGCS(ctx context.Context, bkt *storage.BucketHandle, prefix string, vuln *osvschema.Vulnerability) error { + if vuln == nil || vuln.GetId() == "" { + return errors.New("invalid vulnerability provided") + } + + data, err := protojson.MarshalOptions{Indent: " "}.Marshal(vuln) + if err != nil { + return fmt.Errorf("failed to marshal vulnerability %s: %w", vuln.GetId(), err) + } + + objectName := filepath.Join(prefix, vuln.GetId()+".json") + reader := bytes.NewReader(data) + + return gcs.UploadToGCS(ctx, bkt, objectName, reader, "application/json") +} + +// UploadMetricsToGCS marshals ConversionMetrics to JSON and uploads it to GCS. +func UploadMetricsToGCS(ctx context.Context, bkt *storage.BucketHandle, prefix string, cveID models.CVEID, metrics *models.ConversionMetrics) error { + if metrics == nil || cveID == "" { + return errors.New("invalid metrics or CVE ID provided") + } + + data, err := json.MarshalIndent(metrics, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal metrics for %s: %w", cveID, err) + } + + objectName := filepath.Join(prefix, string(cveID)+".metrics.json") + reader := bytes.NewReader(data) + + return gcs.UploadToGCS(ctx, bkt, objectName, reader, "application/json") +} + +// CreateMetricsFile creates the initial file for the metrics record. +func CreateMetricsFile(id models.CVEID, vulnDir string) (*os.File, error) { + metricsFile := filepath.Join(vulnDir, string(id)+".metrics"+models.Extension) + f, err := os.Create(metricsFile) + if err != nil { + logger.Info("Failed to open for writing "+metricsFile, slog.String("cve", string(id)), slog.String("path", metricsFile), slog.Any("err", err)) + return nil, err + } + + return f, nil +} + +// CreateOSVFile creates the initial file for the OSV record. +func CreateOSVFile(id models.CVEID, vulnDir string) (*os.File, error) { + outputFile := filepath.Join(vulnDir, string(id)+models.Extension) + + f, err := os.Create(outputFile) + if err != nil { + logger.Info("Failed to open for writing "+outputFile, slog.String("cve", string(id)), slog.String("path", outputFile), slog.Any("err", err)) + return nil, err + } + + return f, err +} + +func WriteMetricsFile(metrics *models.ConversionMetrics, metricsFile *os.File) error { + marshalledMetrics, err := json.MarshalIndent(&metrics, "", " ") + if err != nil { + logger.Info("Failed to marshal", slog.Any("err", err)) + return err } - return filenames, nil + _, err = metricsFile.Write(marshalledMetrics) + if err != nil { + logger.Warn("Failed to write", slog.String("path", metricsFile.Name()), slog.Any("err", err)) + return fmt.Errorf("failed to write %s: %w", metricsFile.Name(), err) + } + + metricsFile.Close() + + return nil } diff --git a/vulnfeeds/gcs-tools/cveworker_test.go b/vulnfeeds/conversion/writer/writer_test.go similarity index 96% rename from vulnfeeds/gcs-tools/cveworker_test.go rename to vulnfeeds/conversion/writer/writer_test.go index 9cabe41273d..3cedf9b6a5b 100644 --- a/vulnfeeds/gcs-tools/cveworker_test.go +++ b/vulnfeeds/conversion/writer/writer_test.go @@ -1,4 +1,4 @@ -package gcs +package writer import ( "bytes" @@ -59,7 +59,7 @@ func TestUploadToGCS(t *testing.T) { preModifiedBuf := []byte(`{"id":"CVE-2023-1234"}`) t.Run("Upload new object", func(t *testing.T) { - err := uploadToGCS(ctx, v, preModifiedBuf, bkt, "") + err := uploadIfChanged(ctx, v, preModifiedBuf, bkt, "") if err != nil { t.Errorf("Expected uploadToGCS to return nil for new object, got %v", err) } @@ -81,7 +81,7 @@ func TestUploadToGCS(t *testing.T) { t.Run("Skip upload if hash matches", func(t *testing.T) { // Modify the vulnerability to simulate a change in modified time but not content v.Modified = timestamppb.New(time.Now().Add(1 * time.Hour)) - err := uploadToGCS(ctx, v, preModifiedBuf, bkt, "") + err := uploadIfChanged(ctx, v, preModifiedBuf, bkt, "") if !errors.Is(err, ErrUploadSkipped) { t.Errorf("Expected uploadToGCS to return ErrUploadSkipped when hash matches, got %v", err) } @@ -101,7 +101,7 @@ func TestUploadToGCS(t *testing.T) { t.Run("Upload if hash differs", func(t *testing.T) { preModifiedBuf2 := []byte(`{"id":"CVE-2023-1234", "summary": "updated"}`) - err := uploadToGCS(ctx, v, preModifiedBuf2, bkt, "") + err := uploadIfChanged(ctx, v, preModifiedBuf2, bkt, "") if err != nil { t.Errorf("Expected uploadToGCS to return nil when hash differs, got %v", err) } @@ -244,7 +244,7 @@ func TestWorker(t *testing.T) { w.Close() var counter atomic.Uint64 - Worker(ctx, vulnChan, outBkt, overridesBkt, "", &counter) + VulnWorker(ctx, vulnChan, outBkt, overridesBkt, "", &counter) if counter.Load() != 2 { t.Errorf("Expected counter to be 2, got %d", counter.Load()) @@ -299,7 +299,7 @@ func TestUpload(t *testing.T) { }, } - Upload(ctx, "test-job", true, outBucketName, "", 1, "", vulnerabilities, false) + UploadVulnsToGCS(ctx, "test-job", true, outBucketName, "", 1, "", vulnerabilities, false) client := server.Client() bkt := client.Bucket(outBucketName) diff --git a/vulnfeeds/gcs-tools/gcs.go b/vulnfeeds/gcs-tools/gcs.go index a208988155f..d52465340d5 100644 --- a/vulnfeeds/gcs-tools/gcs.go +++ b/vulnfeeds/gcs-tools/gcs.go @@ -1,9 +1,7 @@ package gcs import ( - "bytes" "context" - "encoding/json" "errors" "fmt" "io" @@ -12,15 +10,12 @@ import ( "strings" "cloud.google.com/go/storage" - "github.com/google/osv/vulnfeeds/models" - "github.com/ossf/osv-schema/bindings/go/osvschema" "golang.org/x/sync/errgroup" "google.golang.org/api/iterator" - "google.golang.org/protobuf/encoding/protojson" ) -// ToGCS uploads data from an io.Reader to a GCS bucket. -func ToGCS(ctx context.Context, bkt *storage.BucketHandle, objectName string, data io.Reader, contentType string) error { +// UploadToGCS uploads data from an io.Reader to a GCS bucket. +func UploadToGCS(ctx context.Context, bkt *storage.BucketHandle, objectName string, data io.Reader, contentType string) error { obj := bkt.Object(objectName) wc := obj.NewWriter(ctx) if contentType != "" { @@ -50,7 +45,7 @@ func UploadFile(ctx context.Context, bkt *storage.BucketHandle, objectName strin } defer f.Close() - return ToGCS(ctx, bkt, objectName, f, "") + return UploadToGCS(ctx, bkt, objectName, f, "") } // DownloadBucket downloads all objects from a GCS bucket to a local directory. @@ -119,36 +114,21 @@ func DownloadBucket(ctx context.Context, bkt *storage.BucketHandle, prefix strin return nil } -// UploadVulnerability marshals an OSV Vulnerability to JSON and uploads it to GCS. -func UploadVulnerability(ctx context.Context, bkt *storage.BucketHandle, prefix string, vuln *osvschema.Vulnerability) error { - if vuln == nil || vuln.GetId() == "" { - return errors.New("invalid vulnerability provided") - } - - data, err := protojson.MarshalOptions{Indent: " "}.Marshal(vuln) - if err != nil { - return fmt.Errorf("failed to marshal vulnerability %s: %w", vuln.GetId(), err) - } - - objectName := filepath.Join(prefix, vuln.GetId()+".json") - reader := bytes.NewReader(data) - - return ToGCS(ctx, bkt, objectName, reader, "application/json") -} - -// UploadMetrics marshals ConversionMetrics to JSON and uploads it to GCS. -func UploadMetrics(ctx context.Context, bkt *storage.BucketHandle, prefix string, cveID models.CVEID, metrics *models.ConversionMetrics) error { - if metrics == nil || cveID == "" { - return errors.New("invalid metrics or CVE ID provided") - } - - data, err := json.MarshalIndent(metrics, "", " ") - if err != nil { - return fmt.Errorf("failed to marshal metrics for %s: %w", cveID, err) +// listBucketObjects lists the names of all objects in a Google Cloud Storage bucket. +// It does not download the file contents. +func ListBucketObjects(ctx context.Context, bucket *storage.BucketHandle, prefix string) ([]string, error) { + it := bucket.Objects(ctx, &storage.Query{Prefix: prefix}) + var filenames []string + for { + attrs, err := it.Next() + if errors.Is(err, iterator.Done) { + break // All objects have been listed. + } + if err != nil { + return nil, fmt.Errorf("bucket.Objects: %w", err) + } + filenames = append(filenames, attrs.Name) } - objectName := filepath.Join(prefix, string(cveID)+".metrics.json") - reader := bytes.NewReader(data) - - return ToGCS(ctx, bkt, objectName, reader, "application/json") -} + return filenames, nil +} \ No newline at end of file diff --git a/vulnfeeds/gcs-tools/gcs_test.go b/vulnfeeds/gcs-tools/gcs_test.go index 78bfe57d4b1..290d4a249cc 100644 --- a/vulnfeeds/gcs-tools/gcs_test.go +++ b/vulnfeeds/gcs-tools/gcs_test.go @@ -10,7 +10,7 @@ import ( "github.com/fsouza/fake-gcs-server/fakestorage" ) -func TestToGCS(t *testing.T) { +func TesUploadToGCS(t *testing.T) { server := fakestorage.NewServer([]fakestorage.Object{}) t.Cleanup(server.Stop) @@ -21,9 +21,9 @@ func TestToGCS(t *testing.T) { } content := []byte("test content") - err := ToGCS(context.Background(), bkt, "test-object.txt", bytes.NewReader(content), "text/plain") + err := UploadToGCS(context.Background(), bkt, "test-object.txt", bytes.NewReader(content), "text/plain") if err != nil { - t.Fatalf("ToGCS failed: %v", err) + t.Fatalf("UploadToGCS failed: %v", err) } obj, err := server.GetObject("test-bucket", "test-object.txt") From 6c773e6fd3f88b836f53338345da06b76490b1b9 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 20 Mar 2026 05:03:27 +0000 Subject: [PATCH 12/28] fix lint --- vulnfeeds/gcs-tools/gcs.go | 3 ++- vulnfeeds/gcs-tools/gcs_test.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/vulnfeeds/gcs-tools/gcs.go b/vulnfeeds/gcs-tools/gcs.go index d52465340d5..a73d66a8fe5 100644 --- a/vulnfeeds/gcs-tools/gcs.go +++ b/vulnfeeds/gcs-tools/gcs.go @@ -1,3 +1,4 @@ +// Package gcs provides utilities for working with Google Cloud Storage. package gcs import ( @@ -131,4 +132,4 @@ func ListBucketObjects(ctx context.Context, bucket *storage.BucketHandle, prefix } return filenames, nil -} \ No newline at end of file +} diff --git a/vulnfeeds/gcs-tools/gcs_test.go b/vulnfeeds/gcs-tools/gcs_test.go index 290d4a249cc..93e36ff5d3d 100644 --- a/vulnfeeds/gcs-tools/gcs_test.go +++ b/vulnfeeds/gcs-tools/gcs_test.go @@ -10,7 +10,7 @@ import ( "github.com/fsouza/fake-gcs-server/fakestorage" ) -func TesUploadToGCS(t *testing.T) { +func TestUploadToGCS(t *testing.T) { server := fakestorage.NewServer([]fakestorage.Object{}) t.Cleanup(server.Stop) From 24369ea884307961f2cba3e2c67c4e27e114b4b4 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 20 Mar 2026 05:11:09 +0000 Subject: [PATCH 13/28] remove unneeded test as output happens elsewhere --- vulnfeeds/conversion/nvd/converter_test.go | 62 ---------------------- 1 file changed, 62 deletions(-) diff --git a/vulnfeeds/conversion/nvd/converter_test.go b/vulnfeeds/conversion/nvd/converter_test.go index 72f8c6e0c1b..bf099b5b1e4 100644 --- a/vulnfeeds/conversion/nvd/converter_test.go +++ b/vulnfeeds/conversion/nvd/converter_test.go @@ -8,12 +8,8 @@ import ( "github.com/go-git/go-git/v5/plumbing/transport/client" githttp "github.com/go-git/go-git/v5/plumbing/transport/http" - "github.com/google/go-cmp/cmp" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" - "github.com/ossf/osv-schema/bindings/go/osvschema" - "google.golang.org/protobuf/encoding/protojson" - "google.golang.org/protobuf/testing/protocmp" ) type roundTripperFunc func(*http.Request) (*http.Response, error) @@ -92,61 +88,3 @@ func TestCVEToOSV_429(t *testing.T) { } } } - -func TestCVEToOSV_ReferencesDeterminism(t *testing.T) { - cve := models.NVDCVE{ - ID: "CVE-2025-12345", - References: []models.Reference{ - {URL: "https://example.com/D"}, - {URL: "https://example.com/A"}, - {URL: "https://example.com/C", Tags: []string{"Patch"}}, - {URL: "https://example.com/C"}, - {URL: "https://example.com/B", Tags: []string{"Issue Tracking"}}, - {URL: "https://example.com/E"}, - }, - Metrics: &models.CVEItemMetrics{}, - } - metrics := &models.ConversionMetrics{} - outDir := t.TempDir() - - var firstResult []*osvschema.Reference - for i := range 10 { - cache := &git.RepoTagsCache{} - CVEToOSV(cve, nil, cache, outDir, metrics, false, false) - - var b []byte - err := filepath.Walk(outDir, func(path string, info os.FileInfo, _ error) error { - if !info.IsDir() && filepath.Ext(path) == ".json" { - var fileErr error - b, fileErr = os.ReadFile(path) - if fileErr != nil { - return fileErr - } - } - - return nil - }) - if err != nil { - t.Fatalf("Failed to walk or read OSV file: %v", err) - } - - if len(b) == 0 { - t.Fatalf("Failed to find OSV file") - } - - var vuln osvschema.Vulnerability - err = protojson.Unmarshal(b, &vuln) - if err != nil { - t.Fatalf("Failed to unmarshal OSV: %v", err) - } - - if i == 0 { - firstResult = vuln.GetReferences() - continue - } - - if diff := cmp.Diff(firstResult, vuln.GetReferences(), protocmp.Transform()); diff != "" { - t.Fatalf("Iteration %d produced different references result:\n%s", i, diff) - } - } -} From d586a1106678b2edc1465ea7976e6612640e0664 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Wed, 29 Apr 2026 05:52:21 +0000 Subject: [PATCH 14/28] fix some linter issues --- vulnfeeds/cmd/converters/dsa-dla-dtsa/main.go | 4 ++-- vulnfeeds/conversion/nvd/converter.go | 1 - vulnfeeds/conversion/nvd/converter_test.go | 10 ++++------ 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/vulnfeeds/cmd/converters/dsa-dla-dtsa/main.go b/vulnfeeds/cmd/converters/dsa-dla-dtsa/main.go index 48902e19f15..34f9907248e 100644 --- a/vulnfeeds/cmd/converters/dsa-dla-dtsa/main.go +++ b/vulnfeeds/cmd/converters/dsa-dla-dtsa/main.go @@ -20,7 +20,7 @@ import ( "time" htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2" - "github.com/google/osv/vulnfeeds/upload" + "github.com/google/osv/vulnfeeds/conversion/writer" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/ossf/osv-schema/bindings/go/osvschema" "golang.org/x/text/encoding/charmap" @@ -572,7 +572,7 @@ func run(webwmlRepo, securityTrackerRepo, outputDir, outputBucket string, upload if uploadToGCS { logger.Info("Uploading to GCS", "bucket", outputBucket) ctx := context.Background() - upload.Upload(ctx, "debian-osv", uploadToGCS, outputBucket, "", numWorkers, outputDir, allVulnerabilities, doDeletions) + writer.UploadVulnsToGCS(ctx, "debian-osv", uploadToGCS, outputBucket, "", numWorkers, outputDir, allVulnerabilities, doDeletions) } else { logger.Info("Skipping GCS upload") } diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 57a8763a2eb..704f7004226 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -330,4 +330,3 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *git return reposForCVE } - diff --git a/vulnfeeds/conversion/nvd/converter_test.go b/vulnfeeds/conversion/nvd/converter_test.go index 0a675e43814..105186acd3d 100644 --- a/vulnfeeds/conversion/nvd/converter_test.go +++ b/vulnfeeds/conversion/nvd/converter_test.go @@ -1,20 +1,19 @@ package nvd import ( - "encoding/json" "net/http" "os" "path/filepath" - "sort" "testing" - "github.com/gkampitakis/go-snaps/snaps" "github.com/go-git/go-git/v5/plumbing/transport/client" githttp "github.com/go-git/go-git/v5/plumbing/transport/http" "github.com/google/go-cmp/cmp" - "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" + "github.com/ossf/osv-schema/bindings/go/osvschema" + "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/testing/protocmp" ) type roundTripperFunc func(*http.Request) (*http.Response, error) @@ -94,7 +93,6 @@ func TestCVEToOSV_429(t *testing.T) { } } - func TestCVEToOSV_ReferencesDeterminism(t *testing.T) { cve := models.NVDCVE{ ID: "CVE-2025-12345", @@ -114,7 +112,7 @@ func TestCVEToOSV_ReferencesDeterminism(t *testing.T) { var firstResult []*osvschema.Reference for i := range 10 { cache := &git.RepoTagsCache{} - CVEToOSV(cve, nil, cache, outDir, metrics, false, false) + CVEToOSV(cve, nil, cache, metrics) var b []byte err := filepath.Walk(outDir, func(path string, info os.FileInfo, _ error) error { From 9e3fa6280b4c07cc71e426c450d0232d7f873f07 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Wed, 29 Apr 2026 05:55:42 +0000 Subject: [PATCH 15/28] fixed the test change --- vulnfeeds/conversion/nvd/converter_test.go | 31 +++------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/vulnfeeds/conversion/nvd/converter_test.go b/vulnfeeds/conversion/nvd/converter_test.go index 105186acd3d..1158d24fb58 100644 --- a/vulnfeeds/conversion/nvd/converter_test.go +++ b/vulnfeeds/conversion/nvd/converter_test.go @@ -12,7 +12,6 @@ import ( "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" "github.com/ossf/osv-schema/bindings/go/osvschema" - "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/testing/protocmp" ) @@ -107,37 +106,13 @@ func TestCVEToOSV_ReferencesDeterminism(t *testing.T) { Metrics: &models.CVEItemMetrics{}, } metrics := &models.ConversionMetrics{} - outDir := t.TempDir() var firstResult []*osvschema.Reference for i := range 10 { cache := &git.RepoTagsCache{} - CVEToOSV(cve, nil, cache, metrics) - - var b []byte - err := filepath.Walk(outDir, func(path string, info os.FileInfo, _ error) error { - if !info.IsDir() && filepath.Ext(path) == ".json" { - var fileErr error - b, fileErr = os.ReadFile(path) - if fileErr != nil { - return fileErr - } - } - - return nil - }) - if err != nil { - t.Fatalf("Failed to walk or read OSV file: %v", err) - } - - if len(b) == 0 { - t.Fatalf("Failed to find OSV file") - } - - var vuln osvschema.Vulnerability - err = protojson.Unmarshal(b, &vuln) - if err != nil { - t.Fatalf("Failed to unmarshal OSV: %v", err) + vuln, _, _ := CVEToOSV(cve, nil, cache, metrics) + if vuln == nil { + t.Fatalf("Iteration %d produced nil vulnerability", i) } if i == 0 { From 5b315de819c26203ea5ec554873e480cc318ede9 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 1 May 2026 03:51:15 +0000 Subject: [PATCH 16/28] update nits --- vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go | 4 ++-- vulnfeeds/conversion/writer/writer.go | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 99cf378e25a..ea9fd893315 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -155,13 +155,13 @@ func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, cveID := string(cve.ID) if outcome == models.Error { logger.Error("Error generating OSV record", slog.String("cve", cveID), slog.String("outcome", outcome.String())) - return // Don't attempt to output files if there was an error + continue // Don't attempt to output files if there was an error } if outcome != models.Successful { logger.Info("Failed to generate a successful OSV record", slog.String("cve", cveID), slog.String("outcome", outcome.String())) if *rejectFailed { - return // Skip outputting OSV file + continue // Skip outputting OSV file } } else { logger.Info("Generated OSV record for "+cveID, slog.String("cve", cveID)) diff --git a/vulnfeeds/conversion/writer/writer.go b/vulnfeeds/conversion/writer/writer.go index 92b56c73de6..d3b103298e9 100644 --- a/vulnfeeds/conversion/writer/writer.go +++ b/vulnfeeds/conversion/writer/writer.go @@ -13,7 +13,6 @@ import ( "log/slog" "os" "path" - "path/filepath" "sync" "sync/atomic" "time" @@ -224,6 +223,8 @@ func UploadVulnsToGCS( if err != nil { logger.Fatal("Failed to create storage client", slog.Any("err", err)) } + defer storageClient.Close() + outBkt = storageClient.Bucket(outputBucketName) if overridesBucketName != "" { overridesBkt = storageClient.Bucket(overridesBucketName) @@ -290,7 +291,7 @@ func UploadVulnToGCS(ctx context.Context, bkt *storage.BucketHandle, prefix stri return fmt.Errorf("failed to marshal vulnerability %s: %w", vuln.GetId(), err) } - objectName := filepath.Join(prefix, vuln.GetId()+".json") + objectName := path.Join(prefix, vuln.GetId()+".json") reader := bytes.NewReader(data) return gcs.UploadToGCS(ctx, bkt, objectName, reader, "application/json") @@ -307,7 +308,7 @@ func UploadMetricsToGCS(ctx context.Context, bkt *storage.BucketHandle, prefix s return fmt.Errorf("failed to marshal metrics for %s: %w", cveID, err) } - objectName := filepath.Join(prefix, string(cveID)+".metrics.json") + objectName := path.Join(prefix, string(cveID)+".metrics.json") reader := bytes.NewReader(data) return gcs.UploadToGCS(ctx, bkt, objectName, reader, "application/json") @@ -315,7 +316,7 @@ func UploadMetricsToGCS(ctx context.Context, bkt *storage.BucketHandle, prefix s // CreateMetricsFile creates the initial file for the metrics record. func CreateMetricsFile(id models.CVEID, vulnDir string) (*os.File, error) { - metricsFile := filepath.Join(vulnDir, string(id)+".metrics"+models.Extension) + metricsFile := path.Join(vulnDir, string(id)+".metrics"+models.Extension) f, err := os.Create(metricsFile) if err != nil { logger.Info("Failed to open for writing "+metricsFile, slog.String("cve", string(id)), slog.String("path", metricsFile), slog.Any("err", err)) @@ -327,7 +328,7 @@ func CreateMetricsFile(id models.CVEID, vulnDir string) (*os.File, error) { // CreateOSVFile creates the initial file for the OSV record. func CreateOSVFile(id models.CVEID, vulnDir string) (*os.File, error) { - outputFile := filepath.Join(vulnDir, string(id)+models.Extension) + outputFile := path.Join(vulnDir, string(id)+models.Extension) f, err := os.Create(outputFile) if err != nil { From 997ae6542871ebfd12acf5fc80146025cca2b654 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 1 May 2026 03:53:03 +0000 Subject: [PATCH 17/28] update comment --- vulnfeeds/conversion/writer/writer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnfeeds/conversion/writer/writer.go b/vulnfeeds/conversion/writer/writer.go index d3b103298e9..d67c98d9d1c 100644 --- a/vulnfeeds/conversion/writer/writer.go +++ b/vulnfeeds/conversion/writer/writer.go @@ -147,7 +147,7 @@ func handleOverride(ctx context.Context, v *osvschema.Vulnerability, overridesBk return &overrideV, overrideBuf, nil } -// Worker is a generic worker that processes OSV vulnerabilities from a channel. +// VulnWorker is a generic worker that processes OSV vulnerabilities from a channel. // It can upload them to a GCS bucket or write them to disk. // It supports checking for overrides in a separate GCS bucket location if overridesBkt is not nil. // For GCS uploads, it calculates a hash of the vulnerability (excluding the modified time) and compares it From da92e6847cb62e4d5da321d2a0bbf4b0e22ecbcf Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 1 May 2026 04:28:57 +0000 Subject: [PATCH 18/28] ensure deterministic output --- vulnfeeds/conversion/nvd/converter.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 704f7004226..4041ef227a7 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -2,6 +2,7 @@ package nvd import ( + "cmp" "encoding/json" "errors" "log/slog" @@ -18,6 +19,7 @@ import ( "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" + "github.com/ossf/osv-schema/bindings/go/osvschema" ) var ErrNoRanges = errors.New("no ranges") @@ -135,6 +137,19 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, metri v.Affected = append(v.Affected, affected...) + // sort affected by repository name alphabetically to ensure deterministic output and caching hashes + slices.SortFunc(v.Affected, func(a, b *osvschema.Affected) int { + var repoA, repoB string + if len(a.GetRanges()) > 0 { + repoA = a.GetRanges()[0].GetRepo() + } + if len(b.GetRanges()) > 0 { + repoB = b.GetRanges()[0].GetRepo() + } + + return cmp.Compare(repoA, repoB) + }) + unresolvedRangesList := c.CreateUnresolvedRanges(unresolvedRanges) if unresolvedRangesList != nil { if err := c.AddFieldToDatabaseSpecific(v.DatabaseSpecific, "unresolved_ranges", unresolvedRangesList); err != nil { From dc1ab51c3493b940dd12869d7cfa734fbc345bed Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Fri, 1 May 2026 04:30:27 +0000 Subject: [PATCH 19/28] add logs for upload and fixed upload still happening if vuln not changed --- .../cmd/converters/cve/nvd-cve-osv/main.go | 2 +- vulnfeeds/conversion/writer/writer.go | 27 ++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index ea9fd893315..54da2540f34 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -169,7 +169,7 @@ func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, if *uploadToGCS && bkt != nil { if vuln != nil { - if err := writer.UploadVulnToGCS(ctx, bkt, *gcsPrefix, vuln.Vulnerability); err != nil { + if err := writer.UploadVulnIfChanged(ctx, bkt, *gcsPrefix, vuln.Vulnerability); err != nil { logger.Error("Failed to upload vulnerability", slog.String("cve", vuln.Id), slog.Any("err", err)) } } diff --git a/vulnfeeds/conversion/writer/writer.go b/vulnfeeds/conversion/writer/writer.go index d67c98d9d1c..72bff19864a 100644 --- a/vulnfeeds/conversion/writer/writer.go +++ b/vulnfeeds/conversion/writer/writer.go @@ -69,6 +69,7 @@ func uploadIfChanged(ctx context.Context, v *osvschema.Vulnerability, preModifie if err == nil { // Object exists, check hash. if attrs.Metadata != nil && attrs.Metadata[hashMetadataKey] == hexHash { + logger.Info("Skipping GCS upload, hash matches", slog.String("id", vulnID), slog.String("object", objName)) return ErrUploadSkipped } } else if !errors.Is(err, storage.ErrObjectNotExist) { @@ -76,6 +77,7 @@ func uploadIfChanged(ctx context.Context, v *osvschema.Vulnerability, preModifie } // Object does not exist or hash differs, upload. + logger.Info("Uploading vulnerability record to GCS", slog.String("id", vulnID), slog.String("object", objName)) v.Modified = timestamppb.New(time.Now().UTC()) vuln := vulns.Vulnerability{Vulnerability: v} var buf bytes.Buffer @@ -280,7 +282,7 @@ func handleDeletion(ctx context.Context, outBkt *storage.BucketHandle, osvOutput } } -// UploadVulnToGCS marshals a single OSV Vulnerability to JSON and uploads it to GCS. +// UploadVulnToGCS marshals a single OSV Vulnerability to JSON and unconditionally uploads it to GCS. func UploadVulnToGCS(ctx context.Context, bkt *storage.BucketHandle, prefix string, vuln *osvschema.Vulnerability) error { if vuln == nil || vuln.GetId() == "" { return errors.New("invalid vulnerability provided") @@ -292,11 +294,33 @@ func UploadVulnToGCS(ctx context.Context, bkt *storage.BucketHandle, prefix stri } objectName := path.Join(prefix, vuln.GetId()+".json") + logger.Info("Uploading vulnerability record to GCS", slog.String("id", vuln.GetId()), slog.String("object", objectName)) reader := bytes.NewReader(data) return gcs.UploadToGCS(ctx, bkt, objectName, reader, "application/json") } +// UploadVulnIfChanged marshals a single OSV Vulnerability to JSON and uploads it to GCS if it has changed. +func UploadVulnIfChanged(ctx context.Context, bkt *storage.BucketHandle, prefix string, vuln *osvschema.Vulnerability) error { + if vuln == nil || vuln.GetId() == "" { + return errors.New("invalid vulnerability provided") + } + + var buf bytes.Buffer + v := vulns.Vulnerability{Vulnerability: vuln} + if err := v.ToJSON(&buf); err != nil { + return fmt.Errorf("failed to marshal vulnerability %s: %w", vuln.GetId(), err) + } + preModifiedBuf := buf.Bytes() + + err := uploadIfChanged(ctx, vuln, preModifiedBuf, bkt, prefix) + if errors.Is(err, ErrUploadSkipped) { + return nil + } + + return err +} + // UploadMetricsToGCS marshals ConversionMetrics to JSON and uploads it to GCS. func UploadMetricsToGCS(ctx context.Context, bkt *storage.BucketHandle, prefix string, cveID models.CVEID, metrics *models.ConversionMetrics) error { if metrics == nil || cveID == "" { @@ -309,6 +333,7 @@ func UploadMetricsToGCS(ctx context.Context, bkt *storage.BucketHandle, prefix s } objectName := path.Join(prefix, string(cveID)+".metrics.json") + logger.Debug("Uploading conversion metrics record to GCS", slog.String("id", string(cveID)), slog.String("object", objectName)) reader := bytes.NewReader(data) return gcs.UploadToGCS(ctx, bkt, objectName, reader, "application/json") From 4167826e380b66c3a25395b40f5228d6e34cd1fd Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 4 May 2026 03:09:59 +0000 Subject: [PATCH 20/28] asynchronously upload to gcs using a worker pool --- .../cmd/converters/cve/nvd-cve-osv/main.go | 43 ++++-- vulnfeeds/conversion/writer/writer.go | 146 ++++++++++++------ vulnfeeds/conversion/writer/writer_test.go | 115 ++++++++++++-- vulnfeeds/gcs-tools/gcs.go | 98 +++++++++++- vulnfeeds/gcs-tools/gcs_test.go | 2 +- 5 files changed, 333 insertions(+), 71 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 54da2540f34..d03af7ed3af 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -14,11 +14,12 @@ import ( "runtime/pprof" "slices" "sync" + "sync/atomic" - "cloud.google.com/go/storage" c "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/conversion/nvd" "github.com/google/osv/vulnfeeds/conversion/writer" + "github.com/google/osv/vulnfeeds/gcs-tools" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility/logger" @@ -39,6 +40,11 @@ var ( gcsPrefix = flag.String("gcs-prefix", "nvd-osv", "The prefix within the GCS bucket.") ) +var ( + totalConversionsCount atomic.Uint64 + successfulConversionsCount atomic.Uint64 +) + func main() { flag.Parse() if !slices.Contains([]string{"OSV", "PackageInfo"}, *outFormat) { @@ -83,16 +89,16 @@ func main() { repoTagsCache := &git.RepoTagsCache{} - var bkt *storage.BucketHandle + var gcsHelper *gcs.Helper ctx := context.Background() if *uploadToGCS { - client, err := storage.NewClient(ctx) + var err error + gcsHelper, err = gcs.InitUploadPool(ctx, *workers, *outputBucket) if err != nil { - logger.Fatal("Failed to create GCS client", slog.Any("err", err)) + logger.Fatal("Failed to initialize GCS upload pool", slog.Any("err", err)) } - defer client.Close() - bkt = client.Bucket(*outputBucket) - logger.Info("GCS Client and Bucket initialized", slog.String("bucket", *outputBucket)) + defer gcsHelper.CloseAndWait() + logger.Info("GCS Upload Pool initialized", slog.String("bucket", *outputBucket)) } jobs := make(chan models.NVDCVE) @@ -100,7 +106,7 @@ func main() { for range *workers { wg.Add(1) - go worker(ctx, &wg, jobs, bkt, *outDir, vpRepoCache, repoTagsCache) + go worker(ctx, &wg, jobs, gcsHelper, *outDir, vpRepoCache, repoTagsCache) } for _, cve := range parsed.Vulnerabilities { @@ -109,6 +115,13 @@ func main() { close(jobs) wg.Wait() + if gcsHelper != nil { + gcsHelper.CloseAndWait() + } + logger.Info("Conversion Stats", + slog.Uint64("total_processed", totalConversionsCount.Load()), + slog.Uint64("successful_conversions", successfulConversionsCount.Load()), + ) logger.Info("NVD Conversion run complete") // Conduct analysis on the outcome of the converted files and output to a csv @@ -148,10 +161,11 @@ func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *gi return vuln, finalMetrics, outcome } -func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, bkt *storage.BucketHandle, outDir string, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) { +func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, gcsHelper *gcs.Helper, outDir string, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) { defer wg.Done() for cve := range jobs { vuln, metrics, outcome := processCVE(cve, vpRepoCache, repoTagsCache) + totalConversionsCount.Add(1) cveID := string(cve.ID) if outcome == models.Error { logger.Error("Error generating OSV record", slog.String("cve", cveID), slog.String("outcome", outcome.String())) @@ -165,17 +179,18 @@ func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, } } else { logger.Info("Generated OSV record for "+cveID, slog.String("cve", cveID)) + successfulConversionsCount.Add(1) } - if *uploadToGCS && bkt != nil { + if *uploadToGCS && gcsHelper != nil { if vuln != nil { - if err := writer.UploadVulnIfChanged(ctx, bkt, *gcsPrefix, vuln.Vulnerability); err != nil { - logger.Error("Failed to upload vulnerability", slog.String("cve", vuln.Id), slog.Any("err", err)) + if err := writer.UploadVulnIfChangedAsync(ctx, gcsHelper, *gcsPrefix, vuln.Vulnerability); err != nil { + logger.Error("Failed to queue vulnerability upload", slog.String("cve", vuln.Id), slog.Any("err", err)) } } if *outputMetrics && metrics != nil { - if err := writer.UploadMetricsToGCS(ctx, bkt, *gcsPrefix, models.CVEID(cveID), metrics); err != nil { - logger.Error("Failed to upload metrics", slog.String("cve", cveID), slog.Any("err", err)) + if err := writer.UploadMetricsToGCSAsync(ctx, gcsHelper, *gcsPrefix, models.CVEID(cveID), metrics); err != nil { + logger.Error("Failed to queue metrics upload", slog.String("cve", cveID), slog.Any("err", err)) } } } else { diff --git a/vulnfeeds/conversion/writer/writer.go b/vulnfeeds/conversion/writer/writer.go index 72bff19864a..e3a4cbb8242 100644 --- a/vulnfeeds/conversion/writer/writer.go +++ b/vulnfeeds/conversion/writer/writer.go @@ -51,16 +51,38 @@ func writeToDisk(v *osvschema.Vulnerability, preModifiedBuf []byte, outputPrefix return nil } -// uploadIfChanged uploads the vulnerability to a GCS bucket. +// prepareVulnUpload marshals a vulnerability record, calculates its SHA256 hash (excluding the Modified time), updates its Modified time, and returns the hash and the updated payload. +func prepareVulnUpload(vuln *osvschema.Vulnerability) (hexHash string, postModifiedBuf []byte, err error) { + if vuln == nil || vuln.GetId() == "" { + return "", nil, errors.New("invalid vulnerability provided") + } + + var buf bytes.Buffer + v := vulns.Vulnerability{Vulnerability: vuln} + if err := v.ToJSON(&buf); err != nil { + return "", nil, fmt.Errorf("failed to marshal vulnerability %s: %w", vuln.GetId(), err) + } + preModifiedBuf := buf.Bytes() + hash := sha256.Sum256(preModifiedBuf) + hexHash = hex.EncodeToString(hash[:]) + + vuln.Modified = timestamppb.New(time.Now().UTC()) + var postBuf bytes.Buffer + vPost := vulns.Vulnerability{Vulnerability: vuln} + if err := vPost.ToJSON(&postBuf); err != nil { + return "", nil, fmt.Errorf("failed to marshal vulnerability with modified time for %s: %w", vuln.GetId(), err) + } + + return hexHash, postBuf.Bytes(), nil +} + +// uploadIfChanged uploads the vulnerability to a GCS bucket if it has changed. // It returns an error if the upload failed, or ErrUploadSkipped if the upload // was intentionally avoided (e.g. because the GCS object has a matching hash). -func uploadIfChanged(ctx context.Context, v *osvschema.Vulnerability, preModifiedBuf []byte, outBkt *storage.BucketHandle, outputPrefix string) error { +func uploadIfChanged(ctx context.Context, v *osvschema.Vulnerability, hexHash string, postModifiedBuf []byte, outBkt *storage.BucketHandle, outputPrefix string) error { vulnID := v.GetId() filename := vulnID + ".json" - hash := sha256.Sum256(preModifiedBuf) - hexHash := hex.EncodeToString(hash[:]) - objName := path.Join(outputPrefix, filename) obj := outBkt.Object(objName) @@ -78,13 +100,6 @@ func uploadIfChanged(ctx context.Context, v *osvschema.Vulnerability, preModifie // Object does not exist or hash differs, upload. logger.Info("Uploading vulnerability record to GCS", slog.String("id", vulnID), slog.String("object", objName)) - v.Modified = timestamppb.New(time.Now().UTC()) - vuln := vulns.Vulnerability{Vulnerability: v} - var buf bytes.Buffer - if err := vuln.ToJSON(&buf); err != nil { - return fmt.Errorf("failed to marshal vulnerability with modified time for %s: %w", vulnID, err) - } - postModifiedBuf := buf.Bytes() wc := obj.NewWriter(ctx) wc.Metadata = map[string]string{ @@ -155,7 +170,7 @@ func handleOverride(ctx context.Context, v *osvschema.Vulnerability, overridesBk // For GCS uploads, it calculates a hash of the vulnerability (excluding the modified time) and compares it // with the existing object's hash. The vulnerability is uploaded only if the hashes differ, with the // modified time updated. This prevents updating the modified time for vulnerabilities with no content changes. -func VulnWorker(ctx context.Context, vulnChan <-chan *osvschema.Vulnerability, outBkt, overridesBkt *storage.BucketHandle, outputPrefix string, counter *atomic.Uint64) { +func VulnWorker(ctx context.Context, vulnChan <-chan *osvschema.Vulnerability, outBkt, overridesBkt *storage.BucketHandle, gcsHelper *gcs.Helper, outputPrefix string, counter *atomic.Uint64) { for v := range vulnChan { vulnID := v.GetId() if len(v.GetAffected()) == 0 { @@ -174,24 +189,31 @@ func VulnWorker(ctx context.Context, vulnChan <-chan *osvschema.Vulnerability, o } } - if preModifiedBuf == nil { - // Marshal before setting modified time to generate hash. - vuln := vulns.Vulnerability{Vulnerability: v} - var buf bytes.Buffer - if err := vuln.ToJSON(&buf); err != nil { - logger.Error("failed to marshal vulnerability", slog.String("id", vulnID), slog.Any("err", err)) - continue - } - preModifiedBuf = buf.Bytes() - } - var writeErr error - if outBkt == nil { + if outBkt == nil && gcsHelper == nil { // Write to local disk + if preModifiedBuf == nil { + // Marshal before setting modified time to generate hash. + vuln := vulns.Vulnerability{Vulnerability: v} + var buf bytes.Buffer + if err := vuln.ToJSON(&buf); err != nil { + logger.Error("failed to marshal vulnerability", slog.String("id", vulnID), slog.Any("err", err)) + continue + } + preModifiedBuf = buf.Bytes() + } writeErr = writeToDisk(vulnToProcess, preModifiedBuf, outputPrefix) + } else if gcsHelper != nil { + // Upload to GCS asynchronously using pool + writeErr = UploadVulnIfChangedAsync(ctx, gcsHelper, outputPrefix, vulnToProcess) } else { - // Upload to GCS - writeErr = uploadIfChanged(ctx, vulnToProcess, preModifiedBuf, outBkt, outputPrefix) + // Upload to GCS synchronously + hexHash, postModifiedBuf, err := prepareVulnUpload(vulnToProcess) + if err != nil { + writeErr = err + } else { + writeErr = uploadIfChanged(ctx, vulnToProcess, hexHash, postModifiedBuf, outBkt, outputPrefix) + } } if writeErr == nil { @@ -220,6 +242,7 @@ func UploadVulnsToGCS( doDeletions bool, ) { var outBkt, overridesBkt *storage.BucketHandle + var gcsHelper *gcs.Helper if uploadToGCS { storageClient, err := storage.NewClient(ctx) if err != nil { @@ -233,8 +256,14 @@ func UploadVulnsToGCS( } if doDeletions { - handleDeletion(ctx, outBkt, osvOutputPath, vulnerabilities) + HandleDeletion(ctx, outBkt, osvOutputPath, vulnerabilities) } + + gcsHelper, err = gcs.InitUploadPool(ctx, numWorkers, outputBucketName) + if err != nil { + logger.Fatal("Failed to initialize GCS upload pool", slog.Any("err", err)) + } + defer gcsHelper.CloseAndWait() } var wg sync.WaitGroup var successCount atomic.Uint64 @@ -244,7 +273,7 @@ func UploadVulnsToGCS( wg.Add(1) go func() { defer wg.Done() - VulnWorker(ctx, vulnChan, outBkt, overridesBkt, osvOutputPath, &successCount) + VulnWorker(ctx, vulnChan, outBkt, overridesBkt, gcsHelper, osvOutputPath, &successCount) }() } @@ -254,11 +283,16 @@ func UploadVulnsToGCS( close(vulnChan) wg.Wait() + if gcsHelper != nil { + gcsHelper.CloseAndWait() + } logger.Info("Successfully processed "+jobName, slog.Int("count", len(vulnerabilities))) - logger.Info("Successfully uploaded records", slog.Uint64("count", successCount.Load())) + if outBkt == nil && gcsHelper == nil { + logger.Info("Successfully wrote records to disk", slog.Uint64("count", successCount.Load())) + } } -func handleDeletion(ctx context.Context, outBkt *storage.BucketHandle, osvOutputPath string, vulnerabilities []*osvschema.Vulnerability) { +func HandleDeletion(ctx context.Context, outBkt *storage.BucketHandle, osvOutputPath string, vulnerabilities []*osvschema.Vulnerability) { // Check if any need to be deleted bucketObjects, err := gcs.ListBucketObjects(ctx, outBkt, osvOutputPath) if err != nil { @@ -297,23 +331,17 @@ func UploadVulnToGCS(ctx context.Context, bkt *storage.BucketHandle, prefix stri logger.Info("Uploading vulnerability record to GCS", slog.String("id", vuln.GetId()), slog.String("object", objectName)) reader := bytes.NewReader(data) - return gcs.UploadToGCS(ctx, bkt, objectName, reader, "application/json") + return gcs.UploadToGCS(ctx, bkt, objectName, reader, "application/json", nil) } // UploadVulnIfChanged marshals a single OSV Vulnerability to JSON and uploads it to GCS if it has changed. func UploadVulnIfChanged(ctx context.Context, bkt *storage.BucketHandle, prefix string, vuln *osvschema.Vulnerability) error { - if vuln == nil || vuln.GetId() == "" { - return errors.New("invalid vulnerability provided") - } - - var buf bytes.Buffer - v := vulns.Vulnerability{Vulnerability: vuln} - if err := v.ToJSON(&buf); err != nil { - return fmt.Errorf("failed to marshal vulnerability %s: %w", vuln.GetId(), err) + hexHash, postModifiedBuf, err := prepareVulnUpload(vuln) + if err != nil { + return err } - preModifiedBuf := buf.Bytes() - err := uploadIfChanged(ctx, vuln, preModifiedBuf, bkt, prefix) + err = uploadIfChanged(ctx, vuln, hexHash, postModifiedBuf, bkt, prefix) if errors.Is(err, ErrUploadSkipped) { return nil } @@ -336,7 +364,39 @@ func UploadMetricsToGCS(ctx context.Context, bkt *storage.BucketHandle, prefix s logger.Debug("Uploading conversion metrics record to GCS", slog.String("id", string(cveID)), slog.String("object", objectName)) reader := bytes.NewReader(data) - return gcs.UploadToGCS(ctx, bkt, objectName, reader, "application/json") + return gcs.UploadToGCS(ctx, bkt, objectName, reader, "application/json", nil) +} + +// UploadVulnIfChangedAsync marshals a single OSV Vulnerability to JSON and schedules it for upload via the Helper pool if it has changed. +func UploadVulnIfChangedAsync(ctx context.Context, gcsHelper *gcs.Helper, prefix string, vuln *osvschema.Vulnerability) error { + hexHash, postModifiedBuf, err := prepareVulnUpload(vuln) + if err != nil { + return err + } + + objectName := path.Join(prefix, vuln.GetId()+".json") + gcsHelper.Upload(objectName, bytes.NewReader(postModifiedBuf), hexHash, "application/json") + + return nil +} + +// UploadMetricsToGCSAsync marshals ConversionMetrics to JSON and schedules it for upload via the Helper pool. +func UploadMetricsToGCSAsync(ctx context.Context, gcsHelper *gcs.Helper, prefix string, cveID models.CVEID, metrics *models.ConversionMetrics) error { + if metrics == nil || cveID == "" { + return errors.New("invalid metrics or CVE ID provided") + } + + data, err := json.MarshalIndent(metrics, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal metrics for %s: %w", cveID, err) + } + + objectName := path.Join(prefix, string(cveID)+".metrics.json") + reader := bytes.NewReader(data) + + gcsHelper.Upload(objectName, reader, "", "application/json") + + return nil } // CreateMetricsFile creates the initial file for the metrics record. diff --git a/vulnfeeds/conversion/writer/writer_test.go b/vulnfeeds/conversion/writer/writer_test.go index 3cedf9b6a5b..2638fe843b2 100644 --- a/vulnfeeds/conversion/writer/writer_test.go +++ b/vulnfeeds/conversion/writer/writer_test.go @@ -15,6 +15,8 @@ import ( "time" "github.com/fsouza/fake-gcs-server/fakestorage" + gcs "github.com/google/osv/vulnfeeds/gcs-tools" + "github.com/google/osv/vulnfeeds/models" "github.com/ossf/osv-schema/bindings/go/osvschema" "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/types/known/timestamppb" @@ -59,7 +61,9 @@ func TestUploadToGCS(t *testing.T) { preModifiedBuf := []byte(`{"id":"CVE-2023-1234"}`) t.Run("Upload new object", func(t *testing.T) { - err := uploadIfChanged(ctx, v, preModifiedBuf, bkt, "") + hash := sha256.Sum256(preModifiedBuf) + hexHash := hex.EncodeToString(hash[:]) + err := uploadIfChanged(ctx, v, hexHash, preModifiedBuf, bkt, "") if err != nil { t.Errorf("Expected uploadToGCS to return nil for new object, got %v", err) } @@ -70,9 +74,6 @@ func TestUploadToGCS(t *testing.T) { t.Fatalf("Failed to get object attrs: %v", err) } - hash := sha256.Sum256(preModifiedBuf) - hexHash := hex.EncodeToString(hash[:]) - if attrs.Metadata[hashMetadataKey] != hexHash { t.Errorf("Expected hash %s, got %s", hexHash, attrs.Metadata[hashMetadataKey]) } @@ -81,7 +82,9 @@ func TestUploadToGCS(t *testing.T) { t.Run("Skip upload if hash matches", func(t *testing.T) { // Modify the vulnerability to simulate a change in modified time but not content v.Modified = timestamppb.New(time.Now().Add(1 * time.Hour)) - err := uploadIfChanged(ctx, v, preModifiedBuf, bkt, "") + hash := sha256.Sum256(preModifiedBuf) + hexHash := hex.EncodeToString(hash[:]) + err := uploadIfChanged(ctx, v, hexHash, preModifiedBuf, bkt, "") if !errors.Is(err, ErrUploadSkipped) { t.Errorf("Expected uploadToGCS to return ErrUploadSkipped when hash matches, got %v", err) } @@ -101,7 +104,9 @@ func TestUploadToGCS(t *testing.T) { t.Run("Upload if hash differs", func(t *testing.T) { preModifiedBuf2 := []byte(`{"id":"CVE-2023-1234", "summary": "updated"}`) - err := uploadIfChanged(ctx, v, preModifiedBuf2, bkt, "") + hash2 := sha256.Sum256(preModifiedBuf2) + hexHash2 := hex.EncodeToString(hash2[:]) + err := uploadIfChanged(ctx, v, hexHash2, preModifiedBuf2, bkt, "") if err != nil { t.Errorf("Expected uploadToGCS to return nil when hash differs, got %v", err) } @@ -112,9 +117,6 @@ func TestUploadToGCS(t *testing.T) { t.Fatalf("Failed to get object attrs: %v", err) } - hash2 := sha256.Sum256(preModifiedBuf2) - hexHash2 := hex.EncodeToString(hash2[:]) - if attrs3.Metadata[hashMetadataKey] != hexHash2 { t.Errorf("Expected hash %s, got %s", hexHash2, attrs3.Metadata[hashMetadataKey]) } @@ -244,7 +246,7 @@ func TestWorker(t *testing.T) { w.Close() var counter atomic.Uint64 - VulnWorker(ctx, vulnChan, outBkt, overridesBkt, "", &counter) + VulnWorker(ctx, vulnChan, outBkt, overridesBkt, nil, "", &counter) if counter.Load() != 2 { t.Errorf("Expected counter to be 2, got %d", counter.Load()) @@ -337,7 +339,7 @@ func TestHandleDeletion(t *testing.T) { {Id: "CVE-2023-3333"}, } - handleDeletion(ctx, bkt, "", vulnerabilities) + HandleDeletion(ctx, bkt, "", vulnerabilities) // CVE-2023-1111.json should still exist if _, err := bkt.Object("CVE-2023-1111.json").Attrs(ctx); err != nil { @@ -349,3 +351,94 @@ func TestHandleDeletion(t *testing.T) { t.Errorf("Expected CVE-2023-2222.json to be deleted, but it still exists") } } + +func TestUploadVulnIfChangedAsync(t *testing.T) { + server, err := fakestorage.NewServerWithOptions(fakestorage.Options{ + Scheme: "http", + }) + if err != nil { + t.Fatalf("Failed to create fake storage server: %v", err) + } + defer server.Stop() + + t.Setenv("STORAGE_EMULATOR_HOST", server.URL()) + + ctx := context.Background() + bucketName := "test-out-bucket" + server.CreateBucketWithOpts(fakestorage.CreateBucketOpts{Name: bucketName}) + + gcsHelper, err := gcs.InitUploadPool(ctx, 2, bucketName) + if err != nil { + t.Fatalf("Failed to init upload pool: %v", err) + } + + v := &osvschema.Vulnerability{ + Id: "CVE-2023-9999", + Affected: []*osvschema.Affected{ + {Package: &osvschema.Package{Name: "test-pkg"}}, + }, + } + + t.Run("Async upload new object", func(t *testing.T) { + err := UploadVulnIfChangedAsync(ctx, gcsHelper, "nvd-prefix", v) + if err != nil { + t.Fatalf("Expected UploadVulnIfChangedAsync to succeed, got %v", err) + } + + gcsHelper.CloseAndWait() + + client := server.Client() + bkt := client.Bucket(bucketName) + objName := "nvd-prefix/CVE-2023-9999.json" + obj := bkt.Object(objName) + attrs, err := obj.Attrs(ctx) + if err != nil { + t.Fatalf("Expected object %q to exist on GCS, got error: %v", objName, err) + } + + if attrs.Metadata[hashMetadataKey] == "" { + t.Errorf("Expected hash metadata to be set on GCS object") + } + }) +} + +func TestUploadMetricsToGCSAsync(t *testing.T) { + server, err := fakestorage.NewServerWithOptions(fakestorage.Options{ + Scheme: "http", + }) + if err != nil { + t.Fatalf("Failed to create fake storage server: %v", err) + } + defer server.Stop() + + t.Setenv("STORAGE_EMULATOR_HOST", server.URL()) + + ctx := context.Background() + bucketName := "test-out-bucket" + server.CreateBucketWithOpts(fakestorage.CreateBucketOpts{Name: bucketName}) + + gcsHelper, err := gcs.InitUploadPool(ctx, 2, bucketName) + if err != nil { + t.Fatalf("Failed to init upload pool: %v", err) + } + + metrics := &models.ConversionMetrics{ + CVEID: "CVE-2023-9999", + CNA: "nvd", + } + + err = UploadMetricsToGCSAsync(ctx, gcsHelper, "nvd-prefix", "CVE-2023-9999", metrics) + if err != nil { + t.Fatalf("Expected UploadMetricsToGCSAsync to succeed, got %v", err) + } + + gcsHelper.CloseAndWait() + + client := server.Client() + bkt := client.Bucket(bucketName) + objName := "nvd-prefix/CVE-2023-9999.metrics.json" + _, err = bkt.Object(objName).Attrs(ctx) + if err != nil { + t.Fatalf("Expected metrics object %q to exist on GCS, got error: %v", objName, err) + } +} diff --git a/vulnfeeds/gcs-tools/gcs.go b/vulnfeeds/gcs-tools/gcs.go index a73d66a8fe5..802fbc26ded 100644 --- a/vulnfeeds/gcs-tools/gcs.go +++ b/vulnfeeds/gcs-tools/gcs.go @@ -6,22 +6,116 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "strings" + "sync" "cloud.google.com/go/storage" + "github.com/google/osv/vulnfeeds/utility/logger" "golang.org/x/sync/errgroup" "google.golang.org/api/iterator" ) +const ( + hashMetadataKey = "sha256-hash" // hashMetadataKey is the key for the sha256 hash in the GCS object metadata. +) + +type Helper struct { + wg sync.WaitGroup + bus chan *uploadMsg + bkt *storage.BucketHandle + client *storage.Client + once sync.Once +} + +type uploadMsg struct { + objectName string + data io.Reader + contentType string + hash string // if hash is empty, always upload +} + +func InitUploadPool(ctx context.Context, workers int, bktName string) (*Helper, error) { + client, err := storage.NewClient(ctx) + if err != nil { + return nil, fmt.Errorf("storage.NewClient: %w", err) + } + + helper := &Helper{ + bus: make(chan *uploadMsg, workers), + bkt: client.Bucket(bktName), + client: client, + } + + for range workers { + helper.wg.Add(1) + go bucketWorker(ctx, helper) + } + + return helper, nil +} + +func bucketWorker(ctx context.Context, gcsHelper *Helper) { + defer gcsHelper.wg.Done() + for msg := range gcsHelper.bus { + func() { + if closer, ok := msg.data.(io.Closer); ok { + defer closer.Close() + } + if msg.hash != "" { + attrs, err := gcsHelper.bkt.Object(msg.objectName).Attrs(ctx) + if err == nil { + if attrs.Metadata != nil && attrs.Metadata[hashMetadataKey] == msg.hash { + logger.Info("Skipping GCS upload, hash matches", slog.String("id", msg.objectName)) + return + } + } else if !errors.Is(err, storage.ErrObjectNotExist) { + logger.Info("Failed to get object attributes", slog.String("object", msg.objectName), slog.String("error", err.Error())) + return + } + } + var metadata map[string]string + if msg.hash != "" { + metadata = map[string]string{hashMetadataKey: msg.hash} + } + if err := UploadToGCS(ctx, gcsHelper.bkt, msg.objectName, msg.data, msg.contentType, metadata); err != nil { + logger.Info("Failed to upload object", slog.String("object", msg.objectName), slog.String("error", err.Error())) + } + }() + } +} + +func (g *Helper) Upload(objectName string, data io.Reader, hash string, contentType string) { + g.bus <- &uploadMsg{ + objectName: objectName, + data: data, + hash: hash, + contentType: contentType, + } +} + +func (g *Helper) CloseAndWait() { + g.once.Do(func() { + close(g.bus) + g.wg.Wait() + if g.client != nil { + g.client.Close() + } + }) +} + // UploadToGCS uploads data from an io.Reader to a GCS bucket. -func UploadToGCS(ctx context.Context, bkt *storage.BucketHandle, objectName string, data io.Reader, contentType string) error { +func UploadToGCS(ctx context.Context, bkt *storage.BucketHandle, objectName string, data io.Reader, contentType string, metadata map[string]string) error { obj := bkt.Object(objectName) wc := obj.NewWriter(ctx) if contentType != "" { wc.ContentType = contentType } + if metadata != nil { + wc.Metadata = metadata + } if _, err := io.Copy(wc, data); err != nil { if closeErr := wc.Close(); closeErr != nil { @@ -46,7 +140,7 @@ func UploadFile(ctx context.Context, bkt *storage.BucketHandle, objectName strin } defer f.Close() - return UploadToGCS(ctx, bkt, objectName, f, "") + return UploadToGCS(ctx, bkt, objectName, f, "", nil) } // DownloadBucket downloads all objects from a GCS bucket to a local directory. diff --git a/vulnfeeds/gcs-tools/gcs_test.go b/vulnfeeds/gcs-tools/gcs_test.go index 93e36ff5d3d..af8a2c7f94f 100644 --- a/vulnfeeds/gcs-tools/gcs_test.go +++ b/vulnfeeds/gcs-tools/gcs_test.go @@ -21,7 +21,7 @@ func TestUploadToGCS(t *testing.T) { } content := []byte("test content") - err := UploadToGCS(context.Background(), bkt, "test-object.txt", bytes.NewReader(content), "text/plain") + err := UploadToGCS(context.Background(), bkt, "test-object.txt", bytes.NewReader(content), "text/plain", nil) if err != nil { t.Fatalf("UploadToGCS failed: %v", err) } From c963ab3b178cb90a929a0c21c6971933c16bc610 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 4 May 2026 06:10:15 +0000 Subject: [PATCH 21/28] move year-by-year logic into Go so cache is shared across years --- .../cmd/converters/cve/nvd-cve-osv/main.go | 101 ++++++++++++------ .../nvd-cve-osv/run_cve_to_osv_generation.sh | 36 ++++--- vulnfeeds/conversion/common.go | 5 + 3 files changed, 94 insertions(+), 48 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index d03af7ed3af..240a16fe006 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -28,6 +28,7 @@ import ( var ( jsonPath = flag.String("nvd-json", "", "Path to NVD CVE JSON to examine.") + jsonDir = flag.String("nvd-json-dir", "", "Path to directory containing NVD CVE JSON files to examine.") parsedCPEDictionary = flag.String("cpe-repos", "", "Path to JSON mapping of CPEs to repos generated by cpe-repo-gen") outDir = flag.String("out-dir", "", "Path to output results.") outFormat = flag.String("out-format", "OSV", "Format to output {OSV,PackageInfo}") @@ -52,6 +53,12 @@ func main() { os.Exit(1) } + if *outDir != "" { + if err := os.MkdirAll(*outDir, 0755); err != nil { + logger.Fatal("Failed to create output directory", slog.Any("err", err)) + } + } + if *cpuProfile != "" { f, err := os.Create(*cpuProfile) if err != nil { @@ -67,20 +74,26 @@ func main() { logger.InitGlobalLogger() defer logger.Close() - data, err := os.ReadFile(*jsonPath) - if err != nil { - logger.Fatal("Failed to open file", slog.Any("err", err)) // double check this is best practice output + var files []string + if *jsonDir != "" { + matches, err := filepath.Glob(filepath.Join(*jsonDir, "nvdcve-2.0-*.json")) + if err != nil { + logger.Fatal("Failed to glob NVD JSON directory", slog.Any("err", err)) + } + files = matches + } else if *jsonPath != "" { + files = []string{*jsonPath} + } else { + logger.Fatal("Either --nvd-json or --nvd-json-dir must be provided") } - var parsed models.CVEAPIJSON20Schema - err = json.Unmarshal(data, &parsed) - if err != nil { - logger.Fatal("Failed to parse NVD CVE JSON", slog.Any("err", err)) + if len(files) == 0 { + logger.Fatal("No NVD JSON files found to process") } vpRepoCache := c.NewVPRepoCache() if *parsedCPEDictionary != "" { - err = c.LoadCPEDictionary(vpRepoCache, *parsedCPEDictionary) + err := c.LoadCPEDictionary(vpRepoCache, *parsedCPEDictionary) if err != nil { logger.Fatal("Failed to load parsed CPE dictionary", slog.Any("err", err)) } @@ -101,7 +114,7 @@ func main() { logger.Info("GCS Upload Pool initialized", slog.String("bucket", *outputBucket)) } - jobs := make(chan models.NVDCVE) + jobs := make(chan models.NVDCVE, *workers) var wg sync.WaitGroup for range *workers { @@ -109,8 +122,24 @@ func main() { go worker(ctx, &wg, jobs, gcsHelper, *outDir, vpRepoCache, repoTagsCache) } - for _, cve := range parsed.Vulnerabilities { - jobs <- cve.CVE + for _, file := range files { + logger.Info("Parsing NVD CVE file", slog.String("file", file)) + data, err := os.ReadFile(file) + if err != nil { + logger.Error("Failed to read NVD JSON file, skipping", slog.String("file", file), slog.Any("err", err)) + continue + } + + var parsed models.CVEAPIJSON20Schema + err = json.Unmarshal(data, &parsed) + if err != nil { + logger.Error("Failed to parse NVD JSON file, skipping", slog.String("file", file), slog.Any("err", err)) + continue + } + + for _, cve := range parsed.Vulnerabilities { + jobs <- cve.CVE + } } close(jobs) @@ -126,16 +155,7 @@ func main() { // Conduct analysis on the outcome of the converted files and output to a csv if *outputMetrics { - // Try to extract year from path, otherwise use "xxxx" filler - filename := filepath.Base(*jsonPath) - re := regexp.MustCompile(`nvdcve-2\.0-([0-9]{4})\.json`) - matches := re.FindStringSubmatch(filename) - if len(matches) >= 2 { - year := matches[1] - c.ConductAnalysis(year, *outDir) - } else { - c.ConductAnalysis("xxxx", *outDir) - } + c.ConductAnalysis("all", *outDir) } } @@ -182,6 +202,22 @@ func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, successfulConversionsCount.Add(1) } + // Extract year from CVE ID to organize local outputs into subfolders + re := regexp.MustCompile(`CVE-([0-9]{4})-[0-9]+`) + matches := re.FindStringSubmatch(cveID) + year := "xxxx" + if len(matches) >= 2 { + year = matches[1] + } + cveOutDir := outDir + if outDir != "" { + cveOutDir = filepath.Join(outDir, year) + if err := os.MkdirAll(cveOutDir, 0755); err != nil { + logger.Error("Failed to create year directory", slog.String("dir", cveOutDir), slog.Any("err", err)) + continue + } + } + if *uploadToGCS && gcsHelper != nil { if vuln != nil { if err := writer.UploadVulnIfChangedAsync(ctx, gcsHelper, *gcsPrefix, vuln.Vulnerability); err != nil { @@ -196,7 +232,7 @@ func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, } else { // Local file output if vuln != nil { - osvFile, err := writer.CreateOSVFile(models.CVEID(vuln.Id), outDir) + osvFile, err := writer.CreateOSVFile(models.CVEID(vuln.Id), cveOutDir) if err != nil { logger.Error("Failed to create OSV file locally", slog.String("cve", vuln.Id), slog.Any("err", err)) } else { @@ -206,16 +242,19 @@ func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, osvFile.Close() } } - if *outputMetrics && metrics != nil { - metricsFile, err := writer.CreateMetricsFile(models.CVEID(cveID), outDir) - if err != nil { - logger.Error("Failed to create metrics file locally", slog.String("cve", cveID), slog.Any("err", err)) - } else { - if err := writer.WriteMetricsFile(metrics, metricsFile); err != nil { - logger.Error("Failed to write metrics file locally", slog.String("cve", cveID), slog.Any("err", err)) - } - metricsFile.Close() + } + + // Always write metrics locally if requested, even if uploading to GCS + // so that we are able to analyse the outcomes in a csv file. + if *outputMetrics && metrics != nil { + metricsFile, err := writer.CreateMetricsFile(models.CVEID(cveID), cveOutDir) + if err != nil { + logger.Error("Failed to create metrics file locally", slog.String("cve", cveID), slog.Any("err", err)) + } else { + if err := writer.WriteMetricsFile(metrics, metricsFile); err != nil { + logger.Error("Failed to write metrics file locally", slog.String("cve", cveID), slog.Any("err", err)) } + metricsFile.Close() } } } diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh index d8d794fc500..c3861cf51ae 100755 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh @@ -41,26 +41,28 @@ gcloud --no-user-output-enabled storage -q cp "${NVD_GCS_PATH}/*-????.json" "${W echo "Downloading latest CPE Git repository map" gcloud --no-user-output-enabled storage -q cp "${CPEREPO_GCS_PATH}" "${WORK_DIR}" -mkdir -p "${WORK_DIR}/nvd2osv/gcs_stage" +NUM_WORKERS="${NUM_WORKERS:=30}" -NUM_WORKERS="${NUM_WORKERS:=10}" +# Extract GCS bucket and prefix from OSV_OUTPUT_GCS_PATH. +gcs_path="${OSV_OUTPUT_GCS_PATH#gs://}" +OSV_OUTPUT_GCS_BUCKET="${gcs_path%%/*}" +if [[ "${gcs_path}" == *"/"* ]]; then + OSV_OUTPUT_GCS_PREFIX="${gcs_path#*/}" +else + OSV_OUTPUT_GCS_PREFIX="" +fi # Convert NVD CVE records to OSV. -for (( YEAR = $(date +%Y) ; YEAR >= ${FIRST_INSCOPE_YEAR} ; YEAR-- )); do - # Run OSV record generation. - echo "Converting NVD CVE records from ${YEAR} to OSV" - /usr/local/bin/nvd-cve-osv \ - --cpe-repos "${WORK_DIR}/cpe_product_to_repo.json" \ - --nvd-json "${WORK_DIR}/nvd/nvdcve-2.0-${YEAR}.json" \ - --out-dir "${WORK_DIR}/nvd2osv/${YEAR}" \ - --out-format OSV \ - --workers "${NUM_WORKERS}" - - # Copy results to staging area. - echo "Copying NVD CVE records from ${YEAR} successfully converted to OSV to aggregated staging" - find "${WORK_DIR}/nvd2osv/${YEAR}" -type f -name \*.json \ - -exec cp '{}' "${WORK_DIR}/nvd2osv/gcs_stage/" \; -done +echo "Converting NVD CVE records to OSV" +/usr/local/bin/nvd-cve-osv \ + --cpe-repos "${WORK_DIR}/cpe_product_to_repo.json" \ + --nvd-json-dir "${WORK_DIR}/nvd" \ + --out-dir "${WORK_DIR}/nvd2osv" \ + --out-format OSV \ + --workers "${NUM_WORKERS}" \ + --upload-to-gcs=true \ + --output-bucket="${OSV_OUTPUT_GCS_BUCKET}" \ + --gcs-prefix="${OSV_OUTPUT_GCS_PREFIX}" echo "Conversion run complete" diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index d019beeb2de..6c5970fd86e 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -85,6 +85,11 @@ func ConductAnalysis(year string, dir string) { // get the current time in minutes currentTime := time.Now().Format("2006-01-02T15:04") outcomesCSV := "nvd-conversion-outcomes-" + year + "-" + currentTime + ".csv" + + if err := os.MkdirAll(dir, 0755); err != nil { + logger.Fatal("Failed to create output directory for analysis CSV file", slog.Any("err", err)) + } + csvFile, err := os.Create(filepath.Join(dir, outcomesCSV)) if err != nil { logger.Fatal("Failed to create analysis CSV file", slog.Any("err", err)) From 31842bb8ea0b83d0d800872c94d9978a6a000cdf Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 4 May 2026 06:16:01 +0000 Subject: [PATCH 22/28] fix lint --- vulnfeeds/gcs-tools/gcs.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vulnfeeds/gcs-tools/gcs.go b/vulnfeeds/gcs-tools/gcs.go index 802fbc26ded..c63905d5b3e 100644 --- a/vulnfeeds/gcs-tools/gcs.go +++ b/vulnfeeds/gcs-tools/gcs.go @@ -23,11 +23,11 @@ const ( ) type Helper struct { - wg sync.WaitGroup - bus chan *uploadMsg - bkt *storage.BucketHandle - client *storage.Client - once sync.Once + wg sync.WaitGroup + bus chan *uploadMsg + bkt *storage.BucketHandle + client *storage.Client + once sync.Once } type uploadMsg struct { From 0908633a259b436766245db992600cd1472551f1 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 4 May 2026 06:19:53 +0000 Subject: [PATCH 23/28] fix lint again --- .../cmd/converters/cve/nvd-cve-osv/main.go | 27 +++++++++---------- vulnfeeds/conversion/writer/writer.go | 6 ++--- vulnfeeds/conversion/writer/writer_test.go | 4 +-- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 240a16fe006..2d4eee2ec50 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -119,7 +119,7 @@ func main() { for range *workers { wg.Add(1) - go worker(ctx, &wg, jobs, gcsHelper, *outDir, vpRepoCache, repoTagsCache) + go worker(&wg, jobs, gcsHelper, *outDir, vpRepoCache, repoTagsCache) } for _, file := range files { @@ -181,7 +181,7 @@ func processCVE(cve models.NVDCVE, vpRepoCache *c.VPRepoCache, repoTagsCache *gi return vuln, finalMetrics, outcome } -func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, gcsHelper *gcs.Helper, outDir string, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) { +func worker(wg *sync.WaitGroup, jobs <-chan models.NVDCVE, gcsHelper *gcs.Helper, outDir string, vpRepoCache *c.VPRepoCache, repoTagsCache *git.RepoTagsCache) { defer wg.Done() for cve := range jobs { vuln, metrics, outcome := processCVE(cve, vpRepoCache, repoTagsCache) @@ -220,27 +220,24 @@ func worker(ctx context.Context, wg *sync.WaitGroup, jobs <-chan models.NVDCVE, if *uploadToGCS && gcsHelper != nil { if vuln != nil { - if err := writer.UploadVulnIfChangedAsync(ctx, gcsHelper, *gcsPrefix, vuln.Vulnerability); err != nil { + if err := writer.UploadVulnIfChangedAsync(gcsHelper, *gcsPrefix, vuln.Vulnerability); err != nil { logger.Error("Failed to queue vulnerability upload", slog.String("cve", vuln.Id), slog.Any("err", err)) } } if *outputMetrics && metrics != nil { - if err := writer.UploadMetricsToGCSAsync(ctx, gcsHelper, *gcsPrefix, models.CVEID(cveID), metrics); err != nil { + if err := writer.UploadMetricsToGCSAsync(gcsHelper, *gcsPrefix, models.CVEID(cveID), metrics); err != nil { logger.Error("Failed to queue metrics upload", slog.String("cve", cveID), slog.Any("err", err)) } } - } else { - // Local file output - if vuln != nil { - osvFile, err := writer.CreateOSVFile(models.CVEID(vuln.Id), cveOutDir) - if err != nil { - logger.Error("Failed to create OSV file locally", slog.String("cve", vuln.Id), slog.Any("err", err)) - } else { - if err := vuln.ToJSON(osvFile); err != nil { - logger.Error("Failed to write OSV file locally", slog.String("cve", vuln.Id), slog.Any("err", err)) - } - osvFile.Close() + } else if vuln != nil { + osvFile, err := writer.CreateOSVFile(models.CVEID(vuln.Id), cveOutDir) + if err != nil { + logger.Error("Failed to create OSV file locally", slog.String("cve", vuln.Id), slog.Any("err", err)) + } else { + if err := vuln.ToJSON(osvFile); err != nil { + logger.Error("Failed to write OSV file locally", slog.String("cve", vuln.Id), slog.Any("err", err)) } + osvFile.Close() } } diff --git a/vulnfeeds/conversion/writer/writer.go b/vulnfeeds/conversion/writer/writer.go index e3a4cbb8242..f30fed98e7f 100644 --- a/vulnfeeds/conversion/writer/writer.go +++ b/vulnfeeds/conversion/writer/writer.go @@ -205,7 +205,7 @@ func VulnWorker(ctx context.Context, vulnChan <-chan *osvschema.Vulnerability, o writeErr = writeToDisk(vulnToProcess, preModifiedBuf, outputPrefix) } else if gcsHelper != nil { // Upload to GCS asynchronously using pool - writeErr = UploadVulnIfChangedAsync(ctx, gcsHelper, outputPrefix, vulnToProcess) + writeErr = UploadVulnIfChangedAsync(gcsHelper, outputPrefix, vulnToProcess) } else { // Upload to GCS synchronously hexHash, postModifiedBuf, err := prepareVulnUpload(vulnToProcess) @@ -368,7 +368,7 @@ func UploadMetricsToGCS(ctx context.Context, bkt *storage.BucketHandle, prefix s } // UploadVulnIfChangedAsync marshals a single OSV Vulnerability to JSON and schedules it for upload via the Helper pool if it has changed. -func UploadVulnIfChangedAsync(ctx context.Context, gcsHelper *gcs.Helper, prefix string, vuln *osvschema.Vulnerability) error { +func UploadVulnIfChangedAsync(gcsHelper *gcs.Helper, prefix string, vuln *osvschema.Vulnerability) error { hexHash, postModifiedBuf, err := prepareVulnUpload(vuln) if err != nil { return err @@ -381,7 +381,7 @@ func UploadVulnIfChangedAsync(ctx context.Context, gcsHelper *gcs.Helper, prefix } // UploadMetricsToGCSAsync marshals ConversionMetrics to JSON and schedules it for upload via the Helper pool. -func UploadMetricsToGCSAsync(ctx context.Context, gcsHelper *gcs.Helper, prefix string, cveID models.CVEID, metrics *models.ConversionMetrics) error { +func UploadMetricsToGCSAsync(gcsHelper *gcs.Helper, prefix string, cveID models.CVEID, metrics *models.ConversionMetrics) error { if metrics == nil || cveID == "" { return errors.New("invalid metrics or CVE ID provided") } diff --git a/vulnfeeds/conversion/writer/writer_test.go b/vulnfeeds/conversion/writer/writer_test.go index 2638fe843b2..72fb09c4ca1 100644 --- a/vulnfeeds/conversion/writer/writer_test.go +++ b/vulnfeeds/conversion/writer/writer_test.go @@ -380,7 +380,7 @@ func TestUploadVulnIfChangedAsync(t *testing.T) { } t.Run("Async upload new object", func(t *testing.T) { - err := UploadVulnIfChangedAsync(ctx, gcsHelper, "nvd-prefix", v) + err := UploadVulnIfChangedAsync(gcsHelper, "nvd-prefix", v) if err != nil { t.Fatalf("Expected UploadVulnIfChangedAsync to succeed, got %v", err) } @@ -427,7 +427,7 @@ func TestUploadMetricsToGCSAsync(t *testing.T) { CNA: "nvd", } - err = UploadMetricsToGCSAsync(ctx, gcsHelper, "nvd-prefix", "CVE-2023-9999", metrics) + err = UploadMetricsToGCSAsync(gcsHelper, "nvd-prefix", "CVE-2023-9999", metrics) if err != nil { t.Fatalf("Expected UploadMetricsToGCSAsync to succeed, got %v", err) } From 4c531c24a8c891ded0a028b26b16ac3bafdd9ff5 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 4 May 2026 06:20:08 +0000 Subject: [PATCH 24/28] make cve5 upload async too --- .../cve/cve5/bulk-converter/main.go | 87 ++++++++++++++----- 1 file changed, 67 insertions(+), 20 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go b/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go index 3e554cf18da..abd87cf473e 100644 --- a/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go +++ b/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go @@ -2,6 +2,8 @@ package main import ( + "bytes" + "context" _ "embed" "encoding/json" "flag" @@ -16,6 +18,7 @@ import ( "github.com/google/osv/vulnfeeds/conversion/cve5" "github.com/google/osv/vulnfeeds/conversion/writer" + "github.com/google/osv/vulnfeeds/gcs-tools" "github.com/google/osv/vulnfeeds/models" "github.com/google/osv/vulnfeeds/utility/logger" ) @@ -27,6 +30,9 @@ var ( workers = flag.Int("workers", 30, "The number of concurrent workers to use for processing CVEs.") cnaAllowList = flag.String("cnas-allowlist", "", "A comma-separated list of CNAs to process. If not provided, defaults to cna_allowlist.txt.") rejectFailed = flag.Bool("reject-failed", false, "If set, OSV records with a failed conversion outcome will not be generated.") + uploadToGCS = flag.Bool("upload-to-gcs", false, "If true, upload to GCS bucket instead of writing to local disk.") + outputBucket = flag.String("output-bucket", "osv-test-cve-osv-conversion", "The GCS bucket to write to.") + gcsPrefix = flag.String("gcs-prefix", "cve5-osv", "The prefix within the GCS bucket.") ) //go:embed cna_allowlist.txt @@ -56,10 +62,22 @@ func main() { } } + var gcsHelper *gcs.Helper + ctx := context.Background() + if *uploadToGCS { + var err error + gcsHelper, err = gcs.InitUploadPool(ctx, *workers, *outputBucket) + if err != nil { + logger.Fatal("Failed to initialize GCS upload pool", slog.Any("err", err)) + } + defer gcsHelper.CloseAndWait() + logger.Info("GCS Upload Pool initialized", slog.String("bucket", *outputBucket)) + } + // Start the worker pool. for range *workers { wg.Add(1) - go worker(&wg, jobs, *localOutputDir, cnaList, *rejectFailed) + go worker(&wg, jobs, gcsHelper, *localOutputDir, cnaList, *rejectFailed) } // Discover files and send them to the workers. @@ -98,7 +116,7 @@ func main() { } // worker is a function that processes CVE files from the jobs channel. -func worker(wg *sync.WaitGroup, jobs <-chan string, outDir string, cnas []string, rejectFailed bool) { +func worker(wg *sync.WaitGroup, jobs <-chan string, gcsHelper *gcs.Helper, outDir string, cnas []string, rejectFailed bool) { defer wg.Done() for path := range jobs { data, err := os.ReadFile(path) @@ -119,12 +137,6 @@ func worker(wg *sync.WaitGroup, jobs <-chan string, outDir string, cnas []string cveID := cve.Metadata.CVEID logger.Info("Processing "+string(cveID), slog.String("cve", string(cveID))) - osvFile, errCVE := writer.CreateOSVFile(cveID, outDir) - metricsFile, errMetrics := writer.CreateMetricsFile(cveID, outDir) - if errCVE != nil || errMetrics != nil { - logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cveID))) - } - sourceLink := "" baseDirCVEList := "cves/" // The base folder for the CVEListV5 repository. idx := strings.Index(path, baseDirCVEList) @@ -133,21 +145,56 @@ func worker(wg *sync.WaitGroup, jobs <-chan string, outDir string, cnas []string sourceLink = "https://github.com/CVEProject/cvelistV5/tree/main/" + relPath } - // Perform the conversion and export the results. - metrics, err := cve5.ConvertAndExportCVEToOSV(cve, osvFile, metricsFile, sourceLink) - if err != nil { - logger.Warn("Failed to generate an OSV record", slog.String("cve", string(cveID)), slog.Any("err", err)) + if gcsHelper != nil { + var vulnBuf, metricsBuf bytes.Buffer + metrics, err := cve5.ConvertAndExportCVEToOSV(cve, &vulnBuf, &metricsBuf, sourceLink) + if err != nil { + logger.Warn("Failed to generate an OSV record", slog.String("cve", string(cveID)), slog.Any("err", err)) + } else { + if rejectFailed && metrics.Outcome != models.Successful { + logger.Info("Rejecting failed OSV record", slog.String("cve", string(cveID)), slog.String("outcome", metrics.Outcome.String())) + } else { + logger.Info("Queueing OSV record for "+string(cveID), slog.String("cve", string(cveID))) + objectName := filepath.Join(*gcsPrefix, string(cveID)+".json") + gcsHelper.Upload(objectName, bytes.NewReader(vulnBuf.Bytes()), "", "application/json") + + metricsObjectName := filepath.Join(*gcsPrefix, string(cveID)+".metrics.json") + gcsHelper.Upload(metricsObjectName, bytes.NewReader(metricsBuf.Bytes()), "", "application/json") + } + } + + // Always write metrics locally for outcomes CSV auditing + metricsFile, err := writer.CreateMetricsFile(cveID, outDir) + if err == nil { + err = writer.WriteMetricsFile(metrics, metricsFile) + if err != nil { + logger.Error("Failed to write metrics file", slog.String("cve", string(cveID)), slog.Any("err", err)) + } + metricsFile.Close() + } } else { - if rejectFailed && metrics.Outcome != models.Successful { - logger.Info("Rejecting failed OSV record", slog.String("cve", string(cveID)), slog.String("outcome", metrics.Outcome.String())) - osvFile.Close() - os.Remove(osvFile.Name()) + osvFile, errCVE := writer.CreateOSVFile(cveID, outDir) + metricsFile, errMetrics := writer.CreateMetricsFile(cveID, outDir) + if errCVE != nil || errMetrics != nil { + logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cveID))) + } + + // Perform the conversion and export the results. + metrics, err := cve5.ConvertAndExportCVEToOSV(cve, osvFile, metricsFile, sourceLink) + if err != nil { + logger.Warn("Failed to generate an OSV record", slog.String("cve", string(cveID)), slog.Any("err", err)) } else { - logger.Info("Generated OSV record for "+string(cveID), slog.String("cve", string(cveID)), slog.String("cna", cve.Metadata.AssignerShortName), slog.String("outcome", metrics.Outcome.String())) + if rejectFailed && metrics.Outcome != models.Successful { + logger.Info("Rejecting failed OSV record", slog.String("cve", string(cveID)), slog.String("outcome", metrics.Outcome.String())) + osvFile.Close() + os.Remove(osvFile.Name()) + } else { + logger.Info("Generated OSV record for "+string(cveID), slog.String("cve", string(cveID)), slog.String("cna", cve.Metadata.AssignerShortName), slog.String("outcome", metrics.Outcome.String())) + } } - } - metricsFile.Close() - osvFile.Close() + metricsFile.Close() + osvFile.Close() + } } } From 59d9c69709ce980f98c3b7e11dfdfe070d0484b8 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 4 May 2026 06:23:24 +0000 Subject: [PATCH 25/28] fix cve5 bash script --- .../bulk-converter/run-cvelist-converter.sh | 31 ++++--------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/run-cvelist-converter.sh b/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/run-cvelist-converter.sh index 4450f834452..a5169b8d269 100755 --- a/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/run-cvelist-converter.sh +++ b/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/run-cvelist-converter.sh @@ -53,30 +53,11 @@ fi # Convert CVEList records to OSV. echo "Commence CVEList bulk conversion run" ./cve-bulk-converter \ - --start-year="2022" \ - --out-dir="${LOCAL_OUT_DIR}/${OSV_OUTPUT_PATH}" \ - --workers="${NUM_WORKERS}" - -# Copy results to staging area. -echo "Copying CVEList records successfully converted to OSV to aggregated staging" -find "${LOCAL_OUT_DIR}/${OSV_OUTPUT_PATH}" -type f -name \*.json \ - -exec cp '{}' "${LOCAL_OUT_DIR}/gcs_stage/" \; - -# Copy (and remove any missing) results to GCS bucket, with some sanity -# checking. -objs_present=$(gcloud storage ls "${OSV_OUTPUT_GCS_PATH}" | wc -l) -objs_deleted=$(gcloud storage rsync --checksums-only --dry-run --delete-unmatched-destination-objects "${LOCAL_OUT_DIR}/gcs_stage" "${OSV_OUTPUT_GCS_PATH}" 2>&1 | grep "Would remove" | wc -l) - -threshold=$(echo "scale=2; ${objs_present} * (${SAFETY_THRESHOLD_PCT:-2} / 100)" | bc) - -# # Bash can't deal with floats -if (( $(echo "${objs_deleted} > ${threshold}" | bc -l) )); then - echo "Aborting. Unexpectedly high (${objs_deleted}) number of CVE records would be deleted!" >> /dev/stderr - gcloud storage rsync --checksums-only --dry-run --delete-unmatched-destination-objects "${LOCAL_OUT_DIR}/gcs_stage" "${OSV_OUTPUT_GCS_PATH}" 2>&1 | grep "Would remove" >> /dev/stderr - exit 1 -fi - -echo "Copying CVEList records successfully converted to GCS bucket" -gcloud storage rsync --checksums-only --delete-unmatched-destination-objects "${LOCAL_OUT_DIR}/gcs_stage" "${OSV_OUTPUT_GCS_PATH}" + --start-year="2022" \ + --out-dir="${LOCAL_OUT_DIR}/${OSV_OUTPUT_PATH}" \ + --workers="${NUM_WORKERS}" \ + --upload-to-gcs=true \ + --output-bucket="${OUTPUT_BUCKET}" \ + --gcs-prefix="${OSV_OUTPUT_PATH}" echo "Conversion run complete" From e01fed04954d0e9998b9e47877bee9072f6d363e Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 4 May 2026 23:45:24 +0000 Subject: [PATCH 26/28] do it in reverse chronological order --- .../cmd/converters/cve/nvd-cve-osv/main.go | 26 ++++++++++++++++++- .../nvd-cve-osv/run_cve_to_osv_generation.sh | 1 + 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 2d4eee2ec50..e48019f2bb1 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -13,6 +13,7 @@ import ( "regexp" "runtime/pprof" "slices" + "strconv" "sync" "sync/atomic" @@ -39,6 +40,7 @@ var ( uploadToGCS = flag.Bool("upload-to-gcs", false, "If true, upload to GCS bucket instead of writing to local disk.") outputBucket = flag.String("output-bucket", "osv-test-cve-osv-conversion", "The GCS bucket to write to.") gcsPrefix = flag.String("gcs-prefix", "nvd-osv", "The prefix within the GCS bucket.") + startYear = flag.Int("start-year", 2016, "The first in scope year to process. If 0, process all years.") ) var ( @@ -80,7 +82,18 @@ func main() { if err != nil { logger.Fatal("Failed to glob NVD JSON directory", slog.Any("err", err)) } - files = matches + for _, file := range matches { + filename := filepath.Base(file) + re := regexp.MustCompile(`nvdcve-2\.0-([0-9]{4})\.json`) + submatches := re.FindStringSubmatch(filename) + if len(submatches) >= 2 { + yearInt, _ := strconv.Atoi(submatches[1]) + if *startYear > 0 && yearInt < *startYear { + continue + } + } + files = append(files, file) + } } else if *jsonPath != "" { files = []string{*jsonPath} } else { @@ -91,6 +104,17 @@ func main() { logger.Fatal("No NVD JSON files found to process") } + // Process newest years first (reverse chronological order) + slices.SortFunc(files, func(a, b string) int { + if a > b { + return -1 + } + if a < b { + return 1 + } + return 0 + }) + vpRepoCache := c.NewVPRepoCache() if *parsedCPEDictionary != "" { err := c.LoadCPEDictionary(vpRepoCache, *parsedCPEDictionary) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh index c3861cf51ae..72a7daca338 100755 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh @@ -58,6 +58,7 @@ echo "Converting NVD CVE records to OSV" /usr/local/bin/nvd-cve-osv \ --cpe-repos "${WORK_DIR}/cpe_product_to_repo.json" \ --nvd-json-dir "${WORK_DIR}/nvd" \ + --start-year="${FIRST_INSCOPE_YEAR}" \ --out-dir "${WORK_DIR}/nvd2osv" \ --out-format OSV \ --workers "${NUM_WORKERS}" \ From f7ac7323e9a679f463c85c9a837d1ef909f45cf0 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Mon, 4 May 2026 23:52:15 +0000 Subject: [PATCH 27/28] fix lint --- vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index e48019f2bb1..318bb16ddde 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -112,6 +112,7 @@ func main() { if a < b { return 1 } + return 0 }) From d5a44ae928b90d2c044fe12bdc153c77702c7891 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Tue, 5 May 2026 02:59:53 +0000 Subject: [PATCH 28/28] different number of configurable gcs workers --- vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go | 5 +++-- .../cve/cve5/bulk-converter/run-cvelist-converter.sh | 4 +++- vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go | 5 +++-- .../converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh | 4 +++- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go b/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go index abd87cf473e..ad55e289a71 100644 --- a/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go +++ b/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go @@ -27,7 +27,8 @@ var ( repoDir = flag.String("cve5-repo", "cvelistV5", "CVEListV5 directory path") localOutputDir = flag.String("out-dir", "cve5", "Path to output results.") startYear = flag.String("start-year", "2022", "The first in scope year to process.") - workers = flag.Int("workers", 30, "The number of concurrent workers to use for processing CVEs.") + workers = flag.Int("workers", 10, "The number of concurrent workers to use for processing CVEs.") + gcsWorkers = flag.Int("gcs-workers", 30, "The number of concurrent workers to use for GCS uploads.") cnaAllowList = flag.String("cnas-allowlist", "", "A comma-separated list of CNAs to process. If not provided, defaults to cna_allowlist.txt.") rejectFailed = flag.Bool("reject-failed", false, "If set, OSV records with a failed conversion outcome will not be generated.") uploadToGCS = flag.Bool("upload-to-gcs", false, "If true, upload to GCS bucket instead of writing to local disk.") @@ -66,7 +67,7 @@ func main() { ctx := context.Background() if *uploadToGCS { var err error - gcsHelper, err = gcs.InitUploadPool(ctx, *workers, *outputBucket) + gcsHelper, err = gcs.InitUploadPool(ctx, *gcsWorkers, *outputBucket) if err != nil { logger.Fatal("Failed to initialize GCS upload pool", slog.Any("err", err)) } diff --git a/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/run-cvelist-converter.sh b/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/run-cvelist-converter.sh index a5169b8d269..f4c04d24c8a 100755 --- a/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/run-cvelist-converter.sh +++ b/vulnfeeds/cmd/converters/cve/cve5/bulk-converter/run-cvelist-converter.sh @@ -32,7 +32,8 @@ set -u echo "Commencing cvelist conversion run" -NUM_WORKERS="${NUM_WORKERS:=30}" +NUM_WORKERS="${NUM_WORKERS:=10}" +GCS_WORKERS="${GCS_WORKERS:=30}" OUTPUT_BUCKET="${OUTPUT_BUCKET:=osv-test-cve-osv-conversion}" OSV_OUTPUT_PATH="cve5" @@ -56,6 +57,7 @@ echo "Commence CVEList bulk conversion run" --start-year="2022" \ --out-dir="${LOCAL_OUT_DIR}/${OSV_OUTPUT_PATH}" \ --workers="${NUM_WORKERS}" \ + --gcs-workers="${GCS_WORKERS}" \ --upload-to-gcs=true \ --output-bucket="${OUTPUT_BUCKET}" \ --gcs-prefix="${OSV_OUTPUT_PATH}" diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index 318bb16ddde..81a05c189cb 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -33,7 +33,8 @@ var ( parsedCPEDictionary = flag.String("cpe-repos", "", "Path to JSON mapping of CPEs to repos generated by cpe-repo-gen") outDir = flag.String("out-dir", "", "Path to output results.") outFormat = flag.String("out-format", "OSV", "Format to output {OSV,PackageInfo}") - workers = flag.Int("workers", 30, "The number of concurrent workers to use for processing CVEs.") + workers = flag.Int("workers", 10, "The number of concurrent workers to use for processing CVEs.") + gcsWorkers = flag.Int("gcs-workers", 30, "The number of concurrent workers to use for GCS uploads.") rejectFailed = flag.Bool("reject-failed", false, "If set, OSV records with a failed conversion outcome will not be generated.") outputMetrics = flag.Bool("output-metrics", true, "If true, output the metrics information about the conversion") cpuProfile = flag.String("cpuprofile", "", "Path to write cpu profile to file (default = no output)") @@ -131,7 +132,7 @@ func main() { ctx := context.Background() if *uploadToGCS { var err error - gcsHelper, err = gcs.InitUploadPool(ctx, *workers, *outputBucket) + gcsHelper, err = gcs.InitUploadPool(ctx, *gcsWorkers, *outputBucket) if err != nil { logger.Fatal("Failed to initialize GCS upload pool", slog.Any("err", err)) } diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh index 72a7daca338..31cce7a9a4c 100755 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/run_cve_to_osv_generation.sh @@ -41,7 +41,8 @@ gcloud --no-user-output-enabled storage -q cp "${NVD_GCS_PATH}/*-????.json" "${W echo "Downloading latest CPE Git repository map" gcloud --no-user-output-enabled storage -q cp "${CPEREPO_GCS_PATH}" "${WORK_DIR}" -NUM_WORKERS="${NUM_WORKERS:=30}" +NUM_WORKERS="${NUM_WORKERS:=10}" +GCS_WORKERS="${GCS_WORKERS:=30}" # Extract GCS bucket and prefix from OSV_OUTPUT_GCS_PATH. gcs_path="${OSV_OUTPUT_GCS_PATH#gs://}" @@ -62,6 +63,7 @@ echo "Converting NVD CVE records to OSV" --out-dir "${WORK_DIR}/nvd2osv" \ --out-format OSV \ --workers "${NUM_WORKERS}" \ + --gcs-workers "${GCS_WORKERS}" \ --upload-to-gcs=true \ --output-bucket="${OSV_OUTPUT_GCS_BUCKET}" \ --gcs-prefix="${OSV_OUTPUT_GCS_PREFIX}"