Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
28bb3aa
create some helper functions to uploading/downloading gcs
jess-lowe Mar 5, 2026
3cd4243
parallelize + tests
jess-lowe Mar 5, 2026
3a32283
rename upload -> gcs-tools
jess-lowe Mar 6, 2026
45cb1f8
make things more go tst idiomatic
jess-lowe Mar 6, 2026
c44a7d6
Merge branch 'master' into feat/upload-to-gcs
jess-lowe Mar 17, 2026
6a596ec
fix test
jess-lowe Mar 17, 2026
d506f45
fix lint
jess-lowe Mar 17, 2026
263f87d
Merge branch 'feat/use-go-gcs' into refactor/nvd-use-gcs
jess-lowe Mar 19, 2026
933d315
initial changes for immediately uploading records to gcs bucket
jess-lowe Mar 19, 2026
06c4f4f
progress
jess-lowe Mar 19, 2026
886c2c1
fix some compiling issues and lint
jess-lowe Mar 20, 2026
15c4757
remove the upload part of the run-cve-to-osv generation script
jess-lowe Mar 20, 2026
b8ed421
renamed functions and moved them around to be more and less generic
jess-lowe Mar 20, 2026
3c64e5f
Merge branch 'master' into refactor/nvd-use-gcs
jess-lowe Mar 20, 2026
6c773e6
fix lint
jess-lowe Mar 20, 2026
31e132b
Merge branch 'refactor/nvd-use-gcs' of https://github.com/jess-lowe/o…
jess-lowe Mar 20, 2026
24369ea
remove unneeded test as output happens elsewhere
jess-lowe Mar 20, 2026
9468c20
Merge remote-tracking branch 'upstream/master' into refactor/nvd-use-gcs
jess-lowe Apr 29, 2026
d586a11
fix some linter issues
jess-lowe Apr 29, 2026
9e3fa62
fixed the test change
jess-lowe Apr 29, 2026
5b315de
update nits
jess-lowe May 1, 2026
997ae65
update comment
jess-lowe May 1, 2026
da92e68
ensure deterministic output
jess-lowe May 1, 2026
dc1ab51
add logs for upload and fixed upload still happening if vuln not changed
jess-lowe May 1, 2026
4167826
asynchronously upload to gcs using a worker pool
jess-lowe May 4, 2026
c963ab3
move year-by-year logic into Go so cache is shared across years
jess-lowe May 4, 2026
31842bb
fix lint
jess-lowe May 4, 2026
0908633
fix lint again
jess-lowe May 4, 2026
4c531c2
make cve5 upload async too
jess-lowe May 4, 2026
59d9c69
fix cve5 bash script
jess-lowe May 4, 2026
e01fed0
do it in reverse chronological order
jess-lowe May 4, 2026
f7ac732
fix lint
jess-lowe May 4, 2026
d5a44ae
different number of configurable gcs workers
jess-lowe May 5, 2026
a377b13
Merge branch 'master' into refactor/nvd-use-gcs
jess-lowe May 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions vulnfeeds/cmd/combine-to-osv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ import (

"cloud.google.com/go/storage"
"github.com/google/osv/vulnfeeds/conversion"
"github.com/google/osv/vulnfeeds/conversion/writer"
"github.com/google/osv/vulnfeeds/models"
"github.com/google/osv/vulnfeeds/upload"
"github.com/google/osv/vulnfeeds/utility/logger"
"github.com/ossf/osv-schema/bindings/go/osvschema"
"google.golang.org/api/iterator"
Expand Down Expand Up @@ -92,7 +92,7 @@ func main() {
vulnerabilities = append(vulnerabilities, v)
}

upload.Upload(ctx, "OSV files", *uploadToGCS, *outputBucketName, *overridesBucketName, *numWorkers, *osvOutputPath, vulnerabilities, *syncDeletions)
writer.UploadVulnsToGCS(ctx, "OSV files", *uploadToGCS, *outputBucketName, *overridesBucketName, *numWorkers, *osvOutputPath, vulnerabilities, *syncDeletions)
}

// extractCVEName extracts the CVE name from a given filename and prefix.
Expand Down
4 changes: 2 additions & 2 deletions vulnfeeds/cmd/converters/alpine/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ import (
"strings"
"time"

"github.com/google/osv/vulnfeeds/conversion/writer"
"github.com/google/osv/vulnfeeds/models"
"github.com/google/osv/vulnfeeds/upload"
"github.com/google/osv/vulnfeeds/utility/logger"
"github.com/google/osv/vulnfeeds/vulns"
"github.com/ossf/osv-schema/bindings/go/osvschema"
Expand Down Expand Up @@ -72,7 +72,7 @@ func main() {
}

ctx := context.Background()
upload.Upload(ctx, "Alpine CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *alpineOutputPath, vulnerabilities, *syncDeletions)
writer.UploadVulnsToGCS(ctx, "Alpine CVEs", *uploadToGCS, *outputBucketName, "", *numWorkers, *alpineOutputPath, vulnerabilities, *syncDeletions)
logger.Info("Alpine CVE conversion succeeded.")
}

Expand Down
92 changes: 70 additions & 22 deletions vulnfeeds/cmd/converters/cve/cve5/bulk-converter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
package main

import (
"bytes"
"context"
_ "embed"
"encoding/json"
"flag"
Expand All @@ -14,8 +16,9 @@ import (
"sync"
"time"

"github.com/google/osv/vulnfeeds/conversion"
"github.com/google/osv/vulnfeeds/conversion/cve5"
"github.com/google/osv/vulnfeeds/conversion/writer"
"github.com/google/osv/vulnfeeds/gcs-tools"
"github.com/google/osv/vulnfeeds/models"
"github.com/google/osv/vulnfeeds/utility/logger"
)
Expand All @@ -24,9 +27,13 @@ var (
repoDir = flag.String("cve5-repo", "cvelistV5", "CVEListV5 directory path")
localOutputDir = flag.String("out-dir", "cve5", "Path to output results.")
startYear = flag.String("start-year", "2022", "The first in scope year to process.")
workers = flag.Int("workers", 30, "The number of concurrent workers to use for processing CVEs.")
workers = flag.Int("workers", 10, "The number of concurrent workers to use for processing CVEs.")
gcsWorkers = flag.Int("gcs-workers", 30, "The number of concurrent workers to use for GCS uploads.")
cnaDenyList = flag.String("cna-denylist", "", "A comma-separated list of CNAs to skip. If not provided, defaults to cna_denylist.txt.")
rejectFailed = flag.Bool("reject-failed", false, "If set, OSV records with a failed conversion outcome will not be generated.")
uploadToGCS = flag.Bool("upload-to-gcs", false, "If true, upload to GCS bucket instead of writing to local disk.")
outputBucket = flag.String("output-bucket", "osv-test-cve-osv-conversion", "The GCS bucket to write to.")
gcsPrefix = flag.String("gcs-prefix", "cve5-osv", "The prefix within the GCS bucket.")
)

//go:embed cna_denylist.txt
Expand Down Expand Up @@ -56,10 +63,22 @@ func main() {
}
}

var gcsHelper *gcs.Helper
ctx := context.Background()
if *uploadToGCS {
var err error
gcsHelper, err = gcs.InitUploadPool(ctx, *gcsWorkers, *outputBucket)
if err != nil {
logger.Fatal("Failed to initialize GCS upload pool", slog.Any("err", err))
}
defer gcsHelper.CloseAndWait()
logger.Info("GCS Upload Pool initialized", slog.String("bucket", *outputBucket))
}

// Start the worker pool.
for range *workers {
wg.Add(1)
go worker(&wg, jobs, *localOutputDir, cnaList, *rejectFailed)
go worker(&wg, jobs, gcsHelper, *localOutputDir, cnaList, *rejectFailed)
}

// Discover files and send them to the workers.
Expand Down Expand Up @@ -98,7 +117,7 @@ func main() {
}

// worker is a function that processes CVE files from the jobs channel.
func worker(wg *sync.WaitGroup, jobs <-chan string, outDir string, cnas []string, rejectFailed bool) {
func worker(wg *sync.WaitGroup, jobs <-chan string, gcsHelper *gcs.Helper, outDir string, cnas []string, rejectFailed bool) {
defer wg.Done()
for path := range jobs {
data, err := os.ReadFile(path)
Expand All @@ -119,12 +138,6 @@ func worker(wg *sync.WaitGroup, jobs <-chan string, outDir string, cnas []string
cveID := cve.Metadata.CVEID
logger.Info("Processing "+string(cveID), slog.String("cve", string(cveID)))

osvFile, errCVE := conversion.CreateOSVFile(cveID, outDir)
metricsFile, errMetrics := conversion.CreateMetricsFile(cveID, outDir)
if errCVE != nil || errMetrics != nil {
logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cveID)))
}

sourceLink := ""
baseDirCVEList := "cves/" // The base folder for the CVEListV5 repository.
idx := strings.Index(path, baseDirCVEList)
Expand All @@ -133,21 +146,56 @@ func worker(wg *sync.WaitGroup, jobs <-chan string, outDir string, cnas []string
sourceLink = "https://github.com/CVEProject/cvelistV5/tree/main/" + relPath
}

// Perform the conversion and export the results.
metrics, err := cve5.ConvertAndExportCVEToOSV(cve, osvFile, metricsFile, sourceLink)
if err != nil {
logger.Warn("Failed to generate an OSV record", slog.String("cve", string(cveID)), slog.Any("err", err))
if gcsHelper != nil {
var vulnBuf, metricsBuf bytes.Buffer
metrics, err := cve5.ConvertAndExportCVEToOSV(cve, &vulnBuf, &metricsBuf, sourceLink)
if err != nil {
logger.Warn("Failed to generate an OSV record", slog.String("cve", string(cveID)), slog.Any("err", err))
} else {
if rejectFailed && metrics.Outcome != models.Successful {
logger.Info("Rejecting failed OSV record", slog.String("cve", string(cveID)), slog.String("outcome", metrics.Outcome.String()))
} else {
logger.Info("Queueing OSV record for "+string(cveID), slog.String("cve", string(cveID)))
objectName := filepath.Join(*gcsPrefix, string(cveID)+".json")
gcsHelper.Upload(objectName, bytes.NewReader(vulnBuf.Bytes()), "", "application/json")

metricsObjectName := filepath.Join(*gcsPrefix, string(cveID)+".metrics.json")
gcsHelper.Upload(metricsObjectName, bytes.NewReader(metricsBuf.Bytes()), "", "application/json")
}
}

// Always write metrics locally for outcomes CSV auditing
metricsFile, err := writer.CreateMetricsFile(cveID, outDir)
if err == nil {
err = writer.WriteMetricsFile(metrics, metricsFile)
if err != nil {
logger.Error("Failed to write metrics file", slog.String("cve", string(cveID)), slog.Any("err", err))
}
metricsFile.Close()
}
} else {
if rejectFailed && metrics.Outcome != models.Successful {
logger.Info("Rejecting failed OSV record", slog.String("cve", string(cveID)), slog.String("outcome", metrics.Outcome.String()))
osvFile.Close()
os.Remove(osvFile.Name())
osvFile, errCVE := writer.CreateOSVFile(cveID, outDir)
metricsFile, errMetrics := writer.CreateMetricsFile(cveID, outDir)
if errCVE != nil || errMetrics != nil {
logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cveID)))
}

// Perform the conversion and export the results.
metrics, err := cve5.ConvertAndExportCVEToOSV(cve, osvFile, metricsFile, sourceLink)
if err != nil {
logger.Warn("Failed to generate an OSV record", slog.String("cve", string(cveID)), slog.Any("err", err))
} else {
logger.Info("Generated OSV record for "+string(cveID), slog.String("cve", string(cveID)), slog.String("cna", cve.Metadata.AssignerShortName), slog.String("outcome", metrics.Outcome.String()))
if rejectFailed && metrics.Outcome != models.Successful {
logger.Info("Rejecting failed OSV record", slog.String("cve", string(cveID)), slog.String("outcome", metrics.Outcome.String()))
osvFile.Close()
os.Remove(osvFile.Name())
} else {
logger.Info("Generated OSV record for "+string(cveID), slog.String("cve", string(cveID)), slog.String("cna", cve.Metadata.AssignerShortName), slog.String("outcome", metrics.Outcome.String()))
}
}
}

metricsFile.Close()
osvFile.Close()
metricsFile.Close()
osvFile.Close()
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ set -u


echo "Commencing cvelist conversion run"
NUM_WORKERS="${NUM_WORKERS:=30}"
NUM_WORKERS="${NUM_WORKERS:=10}"
GCS_WORKERS="${GCS_WORKERS:=30}"

OUTPUT_BUCKET="${OUTPUT_BUCKET:=osv-test-cve-osv-conversion}"
OSV_OUTPUT_PATH="cve5"
Expand All @@ -53,30 +54,12 @@ fi
# Convert CVEList records to OSV.
echo "Commence CVEList bulk conversion run"
./cve-bulk-converter \
--start-year="2022" \
--out-dir="${LOCAL_OUT_DIR}/${OSV_OUTPUT_PATH}" \
--workers="${NUM_WORKERS}"

# Copy results to staging area.
echo "Copying CVEList records successfully converted to OSV to aggregated staging"
find "${LOCAL_OUT_DIR}/${OSV_OUTPUT_PATH}" -type f -name \*.json \
-exec cp '{}' "${LOCAL_OUT_DIR}/gcs_stage/" \;

# Copy (and remove any missing) results to GCS bucket, with some sanity
# checking.
objs_present=$(gcloud storage ls "${OSV_OUTPUT_GCS_PATH}" | wc -l)
objs_deleted=$(gcloud storage rsync --checksums-only --dry-run --delete-unmatched-destination-objects "${LOCAL_OUT_DIR}/gcs_stage" "${OSV_OUTPUT_GCS_PATH}" 2>&1 | grep "Would remove" | wc -l)

threshold=$(echo "scale=2; ${objs_present} * (${SAFETY_THRESHOLD_PCT:-2} / 100)" | bc)

# # Bash can't deal with floats
if (( $(echo "${objs_deleted} > ${threshold}" | bc -l) )); then
echo "Aborting. Unexpectedly high (${objs_deleted}) number of CVE records would be deleted!" >> /dev/stderr
gcloud storage rsync --checksums-only --dry-run --delete-unmatched-destination-objects "${LOCAL_OUT_DIR}/gcs_stage" "${OSV_OUTPUT_GCS_PATH}" 2>&1 | grep "Would remove" >> /dev/stderr
exit 1
fi

echo "Copying CVEList records successfully converted to GCS bucket"
gcloud storage rsync --checksums-only --delete-unmatched-destination-objects "${LOCAL_OUT_DIR}/gcs_stage" "${OSV_OUTPUT_GCS_PATH}"
--start-year="2022" \
--out-dir="${LOCAL_OUT_DIR}/${OSV_OUTPUT_PATH}" \
--workers="${NUM_WORKERS}" \
--gcs-workers="${GCS_WORKERS}" \
--upload-to-gcs=true \
--output-bucket="${OUTPUT_BUCKET}" \
--gcs-prefix="${OSV_OUTPUT_PATH}"

echo "Conversion run complete"
6 changes: 3 additions & 3 deletions vulnfeeds/cmd/converters/cve/cve5/single-converter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import (
"log/slog"
"os"

"github.com/google/osv/vulnfeeds/conversion"
"github.com/google/osv/vulnfeeds/conversion/cve5"
"github.com/google/osv/vulnfeeds/conversion/writer"
"github.com/google/osv/vulnfeeds/models"
"github.com/google/osv/vulnfeeds/utility/logger"
)
Expand Down Expand Up @@ -46,8 +46,8 @@ func main() {
}
// create the files

osvFile, errCVE := conversion.CreateOSVFile(cveID, outDir)
metricsFile, errMetrics := conversion.CreateMetricsFile(cveID, outDir)
osvFile, errCVE := writer.CreateOSVFile(cveID, outDir)
metricsFile, errMetrics := writer.CreateMetricsFile(cveID, outDir)
if errCVE != nil || errMetrics != nil {
logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cveID)))
}
Expand Down
Loading
Loading