From 8b80d134e1f373859a8e7ccd441e8adcfd52ed0a Mon Sep 17 00:00:00 2001 From: Jiradet Ounjai Date: Thu, 7 Apr 2022 00:19:50 +0700 Subject: [PATCH 1/7] add optional flag to enable user to provide seed corpus --- common/experiment_utils.py | 6 ++ .../runner-startup-script-template.sh | 1 + experiment/run_experiment.py | 86 ++++++++++++++++++- experiment/runner.py | 20 ++++- experiment/scheduler.py | 1 + experiment/test_data/experiment-config.yaml | 1 + experiment/test_run_experiment.py | 1 + experiment/test_scheduler.py | 1 + 8 files changed, 114 insertions(+), 3 deletions(-) diff --git a/common/experiment_utils.py b/common/experiment_utils.py index c533d12ec..1f6b120a1 100644 --- a/common/experiment_utils.py +++ b/common/experiment_utils.py @@ -72,6 +72,12 @@ def get_oss_fuzz_corpora_filestore_path(): return posixpath.join(get_experiment_filestore_path(), 'oss_fuzz_corpora') +def get_random_seed_corpora_filestore_path(): + """Returns path containing the user-provided seed corpora.""" + return posixpath.join(get_experiment_filestore_path(), + 'random_seed_corpora') + + def get_dispatcher_instance_name(experiment: str) -> str: """Returns a dispatcher instance name for an experiment.""" return 'd-%s' % experiment diff --git a/experiment/resources/runner-startup-script-template.sh b/experiment/resources/runner-startup-script-template.sh index ac43f655f..829d804f2 100644 --- a/experiment/resources/runner-startup-script-template.sh +++ b/experiment/resources/runner-startup-script-template.sh @@ -46,6 +46,7 @@ docker run \ -e NO_SEEDS={{no_seeds}} \ -e NO_DICTIONARIES={{no_dictionaries}} \ -e OSS_FUZZ_CORPUS={{oss_fuzz_corpus}} \ +-e RANDOM_SEED_CORPUS={{random_seed_corpus}} \ -e DOCKER_REGISTRY={{docker_registry}} {% if not local_experiment %}-e CLOUD_PROJECT={{cloud_project}} -e CLOUD_COMPUTE_ZONE={{cloud_compute_zone}} {% endif %}\ -e EXPERIMENT_FILESTORE={{experiment_filestore}} {% if local_experiment %}-v {{experiment_filestore}}:{{experiment_filestore}} {% endif %}\ -e REPORT_FILESTORE={{report_filestore}} {% if local_experiment %}-v {{report_filestore}}:{{report_filestore}} {% endif %}\ diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index 2bc6e0622..e62a4d20f 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -22,6 +22,7 @@ import sys import tarfile import tempfile +import zipfile from typing import Dict, List import jinja2 @@ -63,6 +64,10 @@ 'gs://{project}-backup.clusterfuzz-external.appspot.com/corpus/' 'libFuzzer/{fuzz_target}/public.zip') +# max size allowed per seed corpus for AFL +CORPUS_ELEMENT_BYTES_LIMIT = 1 * 1024 * 1024 +RANDOM_CORPORA_ZIP_DIR_NAME = "random_seed_corpora_zip" + def read_and_validate_experiment_config(config_filename: str) -> Dict: """Reads |config_filename|, validates it, finds as many errors as possible, @@ -148,6 +153,54 @@ def get_directories(parent_dir): ] +# pylint: disable=too-many-locals +def validate_and_pack_random_seed_corpus(random_seed_corpus, benchmarks): + """Validate and archive seed corpus provided by user and.""" + if not os.path.isdir(random_seed_corpus): + raise ValidationError('Corpus location "%s" is invalid.' % + random_seed_corpus) + + with tempfile.TemporaryDirectory() as zip_dir: + for benchmark in benchmarks: + benchmark_corpus_dir = os.path.join(random_seed_corpus, benchmark) + if not os.path.exists(benchmark_corpus_dir): + raise ValidationError('Random seed corpus directory for ' + 'benchmark "%s" does not exist.' % + benchmark) + if not os.path.isdir(benchmark_corpus_dir): + raise ValidationError('seed corpus of benchmark "%s" must be ' + 'a directory.' % benchmark) + if not os.listdir(benchmark_corpus_dir): + raise ValidationError( + 'Seed corpus of benchmark "%s" is empty.' % benchmark) + + valid_corpus_files = set() + for root, _, files in os.walk(benchmark_corpus_dir): + for filename in files: + file_path = os.path.join(root, filename) + file_size = os.path.getsize(file_path) + + if file_size == 0 or file_size > CORPUS_ELEMENT_BYTES_LIMIT: + continue + valid_corpus_files.add(file_path) + + if not valid_corpus_files: + raise ValidationError('No valid corpus files for "%s"' % + benchmark) + + seed_zip_archive_path = os.path.join(zip_dir, f'{benchmark}.zip') + with zipfile.ZipFile(seed_zip_archive_path, 'w') as archive: + for filename in valid_corpus_files: + dir_name = os.path.dirname(filename) + archive.write( + filename, + os.path.relpath(filename, os.path.join(dir_name, '..'))) + + random_seed_corpora_zip_dir = os.path.join(random_seed_corpus, + RANDOM_CORPORA_ZIP_DIR_NAME) + filesystem.replace_dir(zip_dir, random_seed_corpora_zip_dir) + + def validate_benchmarks(benchmarks: List[str]): """Parses and validates list of benchmarks.""" benchmark_types = set() @@ -220,7 +273,8 @@ def start_experiment( # pylint: disable=too-many-arguments concurrent_builds=None, measurers_cpus=None, runners_cpus=None, - use_branch_coverage=False): + use_branch_coverage=False, + random_seed_corpus=None): """Start a fuzzer benchmarking experiment.""" if not allow_uncommitted_changes: check_no_uncommitted_changes() @@ -250,6 +304,12 @@ def start_experiment( # pylint: disable=too-many-arguments # 12GB is just the amount that KLEE needs, use this default to make KLEE # experiments easier to run. config['runner_memory'] = config.get('runner_memory', '12GB') + + config['random_seed_corpus'] = random_seed_corpus + if config['random_seed_corpus']: + validate_and_pack_random_seed_corpus(config['random_seed_corpus'], + benchmarks) + return start_experiment_from_full_config(config) @@ -332,6 +392,15 @@ def filter_file(tar_info): for benchmark in config['benchmarks']: add_oss_fuzz_corpus(benchmark, oss_fuzz_corpora_dir) + if config['random_seed_corpus']: + random_seed_corpus_zip = os.path.join(config['random_seed_corpus'], + RANDOM_CORPORA_ZIP_DIR_NAME) + filestore_utils.cp( + random_seed_corpus_zip, + experiment_utils.get_random_seed_corpora_filestore_path(), + recursive=True, + parallel=True) + class BaseDispatcher: """Class representing the dispatcher.""" @@ -524,6 +593,10 @@ def main(): '--runners-cpus', help='Cpus available to the runners.', required=False) + parser.add_argument('-rs', + '--random-seed-corpus', + help='Path to the random seed corpus', + required=True) all_fuzzers = fuzzer_utils.get_fuzzer_names() parser.add_argument('-f', @@ -593,6 +666,14 @@ def main(): parser.error('The sum of runners and measurers cpus is greater than the' ' available cpu cores (%d)' % os.cpu_count()) + if args.random_seed_corpus: + if args.no_seeds: + parser.error( + 'You cannot start an experiment with no_seeds option if' + ' seeds location is provided you') + if args.oss_fuzz_corpus: + parser.error('Cannot use seeds from multiple sources') + start_experiment(args.experiment_name, args.experiment_config, args.benchmarks, @@ -605,7 +686,8 @@ def main(): concurrent_builds=concurrent_builds, measurers_cpus=measurers_cpus, runners_cpus=runners_cpus, - use_branch_coverage=args.use_branch_coverage) + use_branch_coverage=args.use_branch_coverage, + random_seed_corpus=args.random_seed_corpus) return 0 diff --git a/experiment/runner.py b/experiment/runner.py index 94cc9305e..7642393f3 100644 --- a/experiment/runner.py +++ b/experiment/runner.py @@ -27,6 +27,7 @@ import threading import time import zipfile +import random from common import benchmark_config from common import environment @@ -115,6 +116,20 @@ def get_clusterfuzz_seed_corpus_path(fuzz_target_path): return seed_corpus_path if os.path.exists(seed_corpus_path) else None +def _unpack_random_seed_corpus(corpus_directory): + "Unpack and randomply pick one input from the seed corpus provided by user" + # remove initial seed corpus + shutil.rmtree(corpus_directory) + os.mkdir(corpus_directory) + benchmark = environment.get('BENCHMARK') + corpus_archive_filename = posixpath.join( + experiment_utils.get_random_seed_corpora_filestore_path(), + f'{benchmark}.zip') + with zipfile.ZipFile(corpus_archive_filename) as zip_file: + selected_file = random.choice(zip_file.infolist()) + zip_file.extract(selected_file, corpus_directory) + + def _unpack_clusterfuzz_seed_corpus(fuzz_target_path, corpus_directory): """If a clusterfuzz seed corpus archive is available, unpack it into the corpus directory if it exists. Copied from unpack_seed_corpus in @@ -172,7 +187,10 @@ def run_fuzzer(max_total_time, log_filename): logs.error('Fuzz target binary not found.') return - _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus) + if environment.get('RANDOM_SEED_CORPUS'): + _unpack_random_seed_corpus(input_corpus) + else: + _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus) _clean_seed_corpus(input_corpus) if max_total_time is None: diff --git a/experiment/scheduler.py b/experiment/scheduler.py index 0f8946001..21c54455e 100644 --- a/experiment/scheduler.py +++ b/experiment/scheduler.py @@ -717,6 +717,7 @@ def render_startup_script_template(instance_name: str, fuzzer: str, 'oss_fuzz_corpus': experiment_config['oss_fuzz_corpus'], 'num_cpu_cores': experiment_config['runner_num_cpu_cores'], 'cpuset': CPUSET, + 'random_seed_corpus': experiment_config['random_seed_corpus'], } if not local_experiment: diff --git a/experiment/test_data/experiment-config.yaml b/experiment/test_data/experiment-config.yaml index 67f556bea..17c8b095a 100644 --- a/experiment/test_data/experiment-config.yaml +++ b/experiment/test_data/experiment-config.yaml @@ -31,6 +31,7 @@ git_hash: "git-hash" no_seeds: false no_dictionaries: false oss_fuzz_corpus: false +random_seed_corpus: false description: "Test experiment" concurrent_builds: null runners_cpus: null diff --git a/experiment/test_run_experiment.py b/experiment/test_run_experiment.py index 47034fbf7..e3c458b9e 100644 --- a/experiment/test_run_experiment.py +++ b/experiment/test_run_experiment.py @@ -202,6 +202,7 @@ def test_copy_resources_to_bucket(tmp_path): 'experiment': 'experiment', 'benchmarks': ['libxslt_xpath'], 'oss_fuzz_corpus': True, + 'random_seed_corpus': False, } try: with mock.patch('common.filestore_utils.cp') as mocked_filestore_cp: diff --git a/experiment/test_scheduler.py b/experiment/test_scheduler.py index 02fbbef75..2cfb57568 100644 --- a/experiment/test_scheduler.py +++ b/experiment/test_scheduler.py @@ -118,6 +118,7 @@ def test_create_trial_instance(benchmark, expected_image, expected_target, -e NO_SEEDS=False \\ -e NO_DICTIONARIES=False \\ -e OSS_FUZZ_CORPUS=False \\ +-e RANDOM_SEED_CORPUS=False \\ -e DOCKER_REGISTRY=gcr.io/fuzzbench -e CLOUD_PROJECT=fuzzbench -e CLOUD_COMPUTE_ZONE=us-central1-a \\ -e EXPERIMENT_FILESTORE=gs://experiment-data \\ -e REPORT_FILESTORE=gs://web-reports \\ From 51e8c0803a593a2dd2bc497e797120ce146bf27b Mon Sep 17 00:00:00 2001 From: Jiradet Ounjai Date: Mon, 11 Apr 2022 16:38:50 +0700 Subject: [PATCH 2/7] rename variables --- experiment/run_experiment.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index e62a4d20f..dbd660a90 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -168,7 +168,7 @@ def validate_and_pack_random_seed_corpus(random_seed_corpus, benchmarks): 'benchmark "%s" does not exist.' % benchmark) if not os.path.isdir(benchmark_corpus_dir): - raise ValidationError('seed corpus of benchmark "%s" must be ' + raise ValidationError('Seed corpus of benchmark "%s" must be ' 'a directory.' % benchmark) if not os.listdir(benchmark_corpus_dir): raise ValidationError( @@ -188,8 +188,8 @@ def validate_and_pack_random_seed_corpus(random_seed_corpus, benchmarks): raise ValidationError('No valid corpus files for "%s"' % benchmark) - seed_zip_archive_path = os.path.join(zip_dir, f'{benchmark}.zip') - with zipfile.ZipFile(seed_zip_archive_path, 'w') as archive: + benchmark_corpus_archive_path = os.path.join(zip_dir, f'{benchmark}.zip') + with zipfile.ZipFile(benchmark_corpus_archive_path, 'w') as archive: for filename in valid_corpus_files: dir_name = os.path.dirname(filename) archive.write( From 846907de62c3640123b90a108ff8fcaa346ef091 Mon Sep 17 00:00:00 2001 From: Jiradet Ounjai Date: Mon, 11 Apr 2022 16:55:11 +0700 Subject: [PATCH 3/7] don't require optional parameter --- experiment/run_experiment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index dbd660a90..b353807e6 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -596,7 +596,7 @@ def main(): parser.add_argument('-rs', '--random-seed-corpus', help='Path to the random seed corpus', - required=True) + required=False) all_fuzzers = fuzzer_utils.get_fuzzer_names() parser.add_argument('-f', From a3864379bc36632d9190c39cba3b78e3eea0ed1b Mon Sep 17 00:00:00 2001 From: Jiradet Ounjai Date: Mon, 11 Apr 2022 20:27:27 +0700 Subject: [PATCH 4/7] update argument description --- experiment/run_experiment.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index b353807e6..c3b95ec2f 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -188,7 +188,8 @@ def validate_and_pack_random_seed_corpus(random_seed_corpus, benchmarks): raise ValidationError('No valid corpus files for "%s"' % benchmark) - benchmark_corpus_archive_path = os.path.join(zip_dir, f'{benchmark}.zip') + benchmark_corpus_archive_path = os.path.join( + zip_dir, f'{benchmark}.zip') with zipfile.ZipFile(benchmark_corpus_archive_path, 'w') as archive: for filename in valid_corpus_files: dir_name = os.path.dirname(filename) @@ -668,11 +669,11 @@ def main(): if args.random_seed_corpus: if args.no_seeds: - parser.error( - 'You cannot start an experiment with no_seeds option if' - ' seeds location is provided you') + parser.error('Cannot enable options "random_seed_corpus" and ' + '"no_seeds" at the same time') if args.oss_fuzz_corpus: - parser.error('Cannot use seeds from multiple sources') + parser.error('Cannot enable options "random_seed_corpus" and ' + '"oss_fuzz_corpus" at the same time') start_experiment(args.experiment_name, args.experiment_config, From 389130b12e81112d6e49de7b28452e77d0850d9a Mon Sep 17 00:00:00 2001 From: Jiradet Ounjai Date: Tue, 12 Apr 2022 01:35:44 +0700 Subject: [PATCH 5/7] rename to random_seed_corpus_dir --- .../runner-startup-script-template.sh | 2 +- experiment/run_experiment.py | 33 ++++++++++--------- experiment/runner.py | 2 +- experiment/scheduler.py | 2 +- experiment/test_data/experiment-config.yaml | 2 +- experiment/test_run_experiment.py | 2 +- experiment/test_scheduler.py | 2 +- 7 files changed, 23 insertions(+), 22 deletions(-) diff --git a/experiment/resources/runner-startup-script-template.sh b/experiment/resources/runner-startup-script-template.sh index 829d804f2..aad4e0f16 100644 --- a/experiment/resources/runner-startup-script-template.sh +++ b/experiment/resources/runner-startup-script-template.sh @@ -46,7 +46,7 @@ docker run \ -e NO_SEEDS={{no_seeds}} \ -e NO_DICTIONARIES={{no_dictionaries}} \ -e OSS_FUZZ_CORPUS={{oss_fuzz_corpus}} \ --e RANDOM_SEED_CORPUS={{random_seed_corpus}} \ +-e RANDOM_SEED_CORPUS_DIR={{random_seed_corpus_dir}} \ -e DOCKER_REGISTRY={{docker_registry}} {% if not local_experiment %}-e CLOUD_PROJECT={{cloud_project}} -e CLOUD_COMPUTE_ZONE={{cloud_compute_zone}} {% endif %}\ -e EXPERIMENT_FILESTORE={{experiment_filestore}} {% if local_experiment %}-v {{experiment_filestore}}:{{experiment_filestore}} {% endif %}\ -e REPORT_FILESTORE={{report_filestore}} {% if local_experiment %}-v {{report_filestore}}:{{report_filestore}} {% endif %}\ diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index c3b95ec2f..5aa6af0c4 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -154,15 +154,16 @@ def get_directories(parent_dir): # pylint: disable=too-many-locals -def validate_and_pack_random_seed_corpus(random_seed_corpus, benchmarks): +def validate_and_pack_random_seed_corpus(random_seed_corpus_dir, benchmarks): """Validate and archive seed corpus provided by user and.""" - if not os.path.isdir(random_seed_corpus): + if not os.path.isdir(random_seed_corpus_dir): raise ValidationError('Corpus location "%s" is invalid.' % - random_seed_corpus) + random_seed_corpus_dir) with tempfile.TemporaryDirectory() as zip_dir: for benchmark in benchmarks: - benchmark_corpus_dir = os.path.join(random_seed_corpus, benchmark) + benchmark_corpus_dir = os.path.join(random_seed_corpus_dir, + benchmark) if not os.path.exists(benchmark_corpus_dir): raise ValidationError('Random seed corpus directory for ' 'benchmark "%s" does not exist.' % @@ -197,7 +198,7 @@ def validate_and_pack_random_seed_corpus(random_seed_corpus, benchmarks): filename, os.path.relpath(filename, os.path.join(dir_name, '..'))) - random_seed_corpora_zip_dir = os.path.join(random_seed_corpus, + random_seed_corpora_zip_dir = os.path.join(random_seed_corpus_dir, RANDOM_CORPORA_ZIP_DIR_NAME) filesystem.replace_dir(zip_dir, random_seed_corpora_zip_dir) @@ -275,7 +276,7 @@ def start_experiment( # pylint: disable=too-many-arguments measurers_cpus=None, runners_cpus=None, use_branch_coverage=False, - random_seed_corpus=None): + random_seed_corpus_dir=None): """Start a fuzzer benchmarking experiment.""" if not allow_uncommitted_changes: check_no_uncommitted_changes() @@ -306,9 +307,9 @@ def start_experiment( # pylint: disable=too-many-arguments # experiments easier to run. config['runner_memory'] = config.get('runner_memory', '12GB') - config['random_seed_corpus'] = random_seed_corpus - if config['random_seed_corpus']: - validate_and_pack_random_seed_corpus(config['random_seed_corpus'], + config['random_seed_corpus_dir'] = random_seed_corpus_dir + if config['random_seed_corpus_dir']: + validate_and_pack_random_seed_corpus(config['random_seed_corpus_dir'], benchmarks) return start_experiment_from_full_config(config) @@ -393,8 +394,8 @@ def filter_file(tar_info): for benchmark in config['benchmarks']: add_oss_fuzz_corpus(benchmark, oss_fuzz_corpora_dir) - if config['random_seed_corpus']: - random_seed_corpus_zip = os.path.join(config['random_seed_corpus'], + if config['random_seed_corpus_dir']: + random_seed_corpus_zip = os.path.join(config['random_seed_corpus_dir'], RANDOM_CORPORA_ZIP_DIR_NAME) filestore_utils.cp( random_seed_corpus_zip, @@ -595,7 +596,7 @@ def main(): help='Cpus available to the runners.', required=False) parser.add_argument('-rs', - '--random-seed-corpus', + '--random-seed-corpus-dir', help='Path to the random seed corpus', required=False) @@ -667,12 +668,12 @@ def main(): parser.error('The sum of runners and measurers cpus is greater than the' ' available cpu cores (%d)' % os.cpu_count()) - if args.random_seed_corpus: + if args.random_seed_corpus_dir: if args.no_seeds: - parser.error('Cannot enable options "random_seed_corpus" and ' + parser.error('Cannot enable options "random_seed_corpus_dir" and ' '"no_seeds" at the same time') if args.oss_fuzz_corpus: - parser.error('Cannot enable options "random_seed_corpus" and ' + parser.error('Cannot enable options "random_seed_corpus_dir" and ' '"oss_fuzz_corpus" at the same time') start_experiment(args.experiment_name, @@ -688,7 +689,7 @@ def main(): measurers_cpus=measurers_cpus, runners_cpus=runners_cpus, use_branch_coverage=args.use_branch_coverage, - random_seed_corpus=args.random_seed_corpus) + random_seed_corpus_dir=args.random_seed_corpus_dir) return 0 diff --git a/experiment/runner.py b/experiment/runner.py index 7642393f3..af97df09b 100644 --- a/experiment/runner.py +++ b/experiment/runner.py @@ -187,7 +187,7 @@ def run_fuzzer(max_total_time, log_filename): logs.error('Fuzz target binary not found.') return - if environment.get('RANDOM_SEED_CORPUS'): + if environment.get('RANDOM_SEED_CORPUS_DIR'): _unpack_random_seed_corpus(input_corpus) else: _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus) diff --git a/experiment/scheduler.py b/experiment/scheduler.py index 21c54455e..910dcba86 100644 --- a/experiment/scheduler.py +++ b/experiment/scheduler.py @@ -717,7 +717,7 @@ def render_startup_script_template(instance_name: str, fuzzer: str, 'oss_fuzz_corpus': experiment_config['oss_fuzz_corpus'], 'num_cpu_cores': experiment_config['runner_num_cpu_cores'], 'cpuset': CPUSET, - 'random_seed_corpus': experiment_config['random_seed_corpus'], + 'random_seed_corpus_dir': experiment_config['random_seed_corpus_dir'], } if not local_experiment: diff --git a/experiment/test_data/experiment-config.yaml b/experiment/test_data/experiment-config.yaml index 17c8b095a..e99dc8978 100644 --- a/experiment/test_data/experiment-config.yaml +++ b/experiment/test_data/experiment-config.yaml @@ -31,7 +31,7 @@ git_hash: "git-hash" no_seeds: false no_dictionaries: false oss_fuzz_corpus: false -random_seed_corpus: false +random_seed_corpus_dir: null description: "Test experiment" concurrent_builds: null runners_cpus: null diff --git a/experiment/test_run_experiment.py b/experiment/test_run_experiment.py index e3c458b9e..f40c44a85 100644 --- a/experiment/test_run_experiment.py +++ b/experiment/test_run_experiment.py @@ -202,7 +202,7 @@ def test_copy_resources_to_bucket(tmp_path): 'experiment': 'experiment', 'benchmarks': ['libxslt_xpath'], 'oss_fuzz_corpus': True, - 'random_seed_corpus': False, + 'random_seed_corpus_dir': None, } try: with mock.patch('common.filestore_utils.cp') as mocked_filestore_cp: diff --git a/experiment/test_scheduler.py b/experiment/test_scheduler.py index 2cfb57568..2598f0950 100644 --- a/experiment/test_scheduler.py +++ b/experiment/test_scheduler.py @@ -118,7 +118,7 @@ def test_create_trial_instance(benchmark, expected_image, expected_target, -e NO_SEEDS=False \\ -e NO_DICTIONARIES=False \\ -e OSS_FUZZ_CORPUS=False \\ --e RANDOM_SEED_CORPUS=False \\ +-e RANDOM_SEED_CORPUS_DIR=None \\ -e DOCKER_REGISTRY=gcr.io/fuzzbench -e CLOUD_PROJECT=fuzzbench -e CLOUD_COMPUTE_ZONE=us-central1-a \\ -e EXPERIMENT_FILESTORE=gs://experiment-data \\ -e REPORT_FILESTORE=gs://web-reports \\ From 5b9cc505d4c661430a03770c0f3c5e60d277f2ce Mon Sep 17 00:00:00 2001 From: Jiradet Ounjai Date: Tue, 12 Apr 2022 02:23:27 +0700 Subject: [PATCH 6/7] delete tmp dir containing zip corpus --- experiment/run_experiment.py | 86 ++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 47 deletions(-) diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index 5aa6af0c4..bf7567f6d 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -66,7 +66,6 @@ # max size allowed per seed corpus for AFL CORPUS_ELEMENT_BYTES_LIMIT = 1 * 1024 * 1024 -RANDOM_CORPORA_ZIP_DIR_NAME = "random_seed_corpora_zip" def read_and_validate_experiment_config(config_filename: str) -> Dict: @@ -160,47 +159,39 @@ def validate_and_pack_random_seed_corpus(random_seed_corpus_dir, benchmarks): raise ValidationError('Corpus location "%s" is invalid.' % random_seed_corpus_dir) - with tempfile.TemporaryDirectory() as zip_dir: - for benchmark in benchmarks: - benchmark_corpus_dir = os.path.join(random_seed_corpus_dir, - benchmark) - if not os.path.exists(benchmark_corpus_dir): - raise ValidationError('Random seed corpus directory for ' - 'benchmark "%s" does not exist.' % - benchmark) - if not os.path.isdir(benchmark_corpus_dir): - raise ValidationError('Seed corpus of benchmark "%s" must be ' - 'a directory.' % benchmark) - if not os.listdir(benchmark_corpus_dir): - raise ValidationError( - 'Seed corpus of benchmark "%s" is empty.' % benchmark) - - valid_corpus_files = set() - for root, _, files in os.walk(benchmark_corpus_dir): - for filename in files: - file_path = os.path.join(root, filename) - file_size = os.path.getsize(file_path) - - if file_size == 0 or file_size > CORPUS_ELEMENT_BYTES_LIMIT: - continue - valid_corpus_files.add(file_path) - - if not valid_corpus_files: - raise ValidationError('No valid corpus files for "%s"' % - benchmark) + for benchmark in benchmarks: + benchmark_corpus_dir = os.path.join(random_seed_corpus_dir, benchmark) + if not os.path.exists(benchmark_corpus_dir): + raise ValidationError('Random seed corpus directory for ' + 'benchmark "%s" does not exist.' % benchmark) + if not os.path.isdir(benchmark_corpus_dir): + raise ValidationError('Seed corpus of benchmark "%s" must be ' + 'a directory.' % benchmark) + if not os.listdir(benchmark_corpus_dir): + raise ValidationError('Seed corpus of benchmark "%s" is empty.' % + benchmark) - benchmark_corpus_archive_path = os.path.join( - zip_dir, f'{benchmark}.zip') - with zipfile.ZipFile(benchmark_corpus_archive_path, 'w') as archive: - for filename in valid_corpus_files: - dir_name = os.path.dirname(filename) - archive.write( - filename, - os.path.relpath(filename, os.path.join(dir_name, '..'))) + valid_corpus_files = set() + for root, _, files in os.walk(benchmark_corpus_dir): + for filename in files: + file_path = os.path.join(root, filename) + file_size = os.path.getsize(file_path) + + if file_size == 0 or file_size > CORPUS_ELEMENT_BYTES_LIMIT: + continue + valid_corpus_files.add(file_path) - random_seed_corpora_zip_dir = os.path.join(random_seed_corpus_dir, - RANDOM_CORPORA_ZIP_DIR_NAME) - filesystem.replace_dir(zip_dir, random_seed_corpora_zip_dir) + if not valid_corpus_files: + raise ValidationError('No valid corpus files for "%s"' % benchmark) + + benchmark_corpus_archive_path = os.path.join(random_seed_corpus_dir, + f'{benchmark}.zip') + with zipfile.ZipFile(benchmark_corpus_archive_path, 'w') as archive: + for filename in valid_corpus_files: + dir_name = os.path.dirname(filename) + archive.write( + filename, + os.path.relpath(filename, os.path.join(dir_name, '..'))) def validate_benchmarks(benchmarks: List[str]): @@ -395,13 +386,14 @@ def filter_file(tar_info): add_oss_fuzz_corpus(benchmark, oss_fuzz_corpora_dir) if config['random_seed_corpus_dir']: - random_seed_corpus_zip = os.path.join(config['random_seed_corpus_dir'], - RANDOM_CORPORA_ZIP_DIR_NAME) - filestore_utils.cp( - random_seed_corpus_zip, - experiment_utils.get_random_seed_corpora_filestore_path(), - recursive=True, - parallel=True) + for benchmark in config['benchmarks']: + benchmark_corpus_archive_path = os.path.join( + config['random_seed_corpus_dir'], f'{benchmark}.zip') + filestore_utils.cp( + benchmark_corpus_archive_path, + experiment_utils.get_random_seed_corpora_filestore_path() + "/", + recursive=True, + parallel=True) class BaseDispatcher: From b5548805d30afd9009c8dc8dc013be3cd5c9d1dd Mon Sep 17 00:00:00 2001 From: Jiradet Ounjai Date: Wed, 13 Apr 2022 12:09:42 +0700 Subject: [PATCH 7/7] use single quote --- experiment/run_experiment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index bf7567f6d..6c654c1c7 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -391,7 +391,7 @@ def filter_file(tar_info): config['random_seed_corpus_dir'], f'{benchmark}.zip') filestore_utils.cp( benchmark_corpus_archive_path, - experiment_utils.get_random_seed_corpora_filestore_path() + "/", + experiment_utils.get_random_seed_corpora_filestore_path() + '/', recursive=True, parallel=True)