diff --git a/cmdbench/cli.py b/cmdbench/cli.py index 35e3bae..7ed8da7 100644 --- a/cmdbench/cli.py +++ b/cmdbench/cli.py @@ -36,6 +36,10 @@ help="Width and height of the saving plot. Works if --save-plot is specified.") @click.option("--save-plot", "-p", default = None, type=click.File('wb'), help="File address to save a plot of the command's resource usage over time (CPU + Memory).") +@click.option("--capture-points", "-c", default = False, is_flag = True, show_default=True, + help="Enables plotting delimiters when your program prints \"cmdbench point\\n\" and option " + "--save-plot enabled. Slows down execution speed. " + "NOTE: after each print you should flush the output buffer of your program (fflush(stdout) in C for example).") @click.option("--iterations", "-i", default = 1, type = click.IntRange(1), show_default=True, help="Number of iterations to get benchmarking results for the target command.") @@ -63,10 +67,11 @@ def benchmark(command, iterations, **kwargs): If no printing options are specified, statistics will be printed for more than 1 iterations, and the first iteration for only 1 iteration.""" np.set_printoptions(threshold=15) - + capture_items = [] + capture_items.append('points') if kwargs.get('capture_points') else None click.echo("Benchmarking started..") benchmark_results = BenchmarkResults() - benchmark_generator = benchmark_command_generator(" ".join(command), iterations) + benchmark_generator = benchmark_command_generator(" ".join(command), iterations, capture_items = capture_items) t = tqdm(range(iterations)) for i in t: benchmark_result = next(benchmark_generator) @@ -78,7 +83,7 @@ def benchmark(command, iterations, **kwargs): click.echo("Benchmarking done.") click.echo() - option_keys = ["print_statistics", "print_averages", "print_values", "print_first_iteration", "print_all_iterations"] + option_keys = ["print_statistics", "print_averages", "print_values", "print_first_iteration", "print_all_iterations", "capture_points"] # Print statistics if user did not tell us what info to print printing_any = False @@ -110,12 +115,13 @@ def benchmark(command, iterations, **kwargs): print_benchmark_dict(BenchmarkDict.from_dict(iteration), "Iteration #%s" % (ind + 1), indentation = 4, title_fg_color="magenta") save_plot_value = kwargs["save_plot"] + capture_points = kwargs["capture_points"] if save_plot_value is not None: save_plot_sizes = kwargs["save_plot_size"] save_plot_width = save_plot_sizes[0] save_plot_height = save_plot_sizes[1] - fig = benchmark_results.get_resources_plot(save_plot_width, save_plot_height) + fig = benchmark_results.get_resources_plot(save_plot_width, save_plot_height, capture_points) if fig: fig.savefig(save_plot_value) click.echo("Plot saved.") diff --git a/cmdbench/core.py b/cmdbench/core.py index 4626dd9..af040b5 100644 --- a/cmdbench/core.py +++ b/cmdbench/core.py @@ -11,6 +11,7 @@ import tempfile import shlex import click +import io from sys import platform as _platform is_linux = _platform.startswith("linux") @@ -18,25 +19,25 @@ is_unix = is_linux or is_macos is_win = os.name == "nt" -def benchmark_command(command, iterations_num = 1, raw_data = False): +def benchmark_command(command, iterations_num = 1, raw_data = False, capture_items = []): if iterations_num <= 0: raise Exception("The number of iterations to run the command should be >= 1") raw_benchmark_results = [] for _ in range(iterations_num): - raw_benchmark_result = single_benchmark_command_raw(command) + raw_benchmark_result = single_benchmark_command_raw(command, capture_items) raw_benchmark_results.append(raw_benchmark_result) final_benchmark_results = list(map(lambda raw_benchmark_result: raw_benchmark_result if raw_data else raw_to_final_benchmark(raw_benchmark_result), raw_benchmark_results)) return BenchmarkResults(final_benchmark_results) -def benchmark_command_generator(command, iterations_num = 1, raw_data = False): +def benchmark_command_generator(command, iterations_num = 1, raw_data = False, capture_items = []): if iterations_num <= 0: raise Exception("The number of iterations to run the command should be >= 1") for _ in range(iterations_num): - raw_benchmark_result = single_benchmark_command_raw(command) + raw_benchmark_result = single_benchmark_command_raw(command, capture_items) final_benchmark_result = raw_benchmark_result if raw_data else raw_to_final_benchmark(raw_benchmark_result) yield BenchmarkResults([final_benchmark_result]) @@ -65,7 +66,7 @@ def raw_to_final_benchmark(benchmark_raw_dict): exit_code = benchmark_raw_dict["general"]["exit_code"] - + points = benchmark_raw_dict["time_series"]["points"] if "points" in benchmark_raw_dict["time_series"] else [] benchmark_results = { "process": { "stdout_data": process_stdout_data, "stderr_data": process_stderr_data, "execution_time": process_execution_time, "exit_code": exit_code }, @@ -75,7 +76,8 @@ def raw_to_final_benchmark(benchmark_raw_dict): { "sample_milliseconds": time_series_sample_milliseconds, "cpu_percentages": time_series_cpu_percentages, - "memory_bytes": time_series_memory_bytes + "memory_bytes": time_series_memory_bytes, + "points": points } } # psutil io_counters() is not available on macos @@ -300,9 +302,11 @@ def collect_time_series(shared_process_dict): shared_process_dict["memory_values"] = memory_values # Performs benchmarking on the command based on both /usr/bin/time and psutil library -def single_benchmark_command_raw(command): +def single_benchmark_command_raw(command, capture_items=set()): # https://docs.python.org/3/library/shlex.html#shlex.split commands_list = shlex.split(command) + # remove duplicates and convert to hash table + capture_items = set(capture_items) time_tmp_output_file = None @@ -315,6 +319,12 @@ def single_benchmark_command_raw(command): # CPU cpu_times = None + + # POINTS + points = [] + capture_points = "points" in capture_items + CMDBENCH_BYTE_STRING = str.encode("cmdbench", encoding=OutputCapture.output_encoding) + POINT_BYTE_STRING = str.encode("cmdbench point", encoding=OutputCapture.output_encoding) # Disk disk_io_counters = None @@ -365,7 +375,10 @@ def single_benchmark_command_raw(command): # Finally, run the command # Master process could be GNU Time running target command or the target command itself - master_process = psutil.Popen(commands_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if not capture_items: + master_process = psutil.Popen(commands_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + else: + master_process = psutil.Popen(commands_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1, text='r') execution_start = current_milli_time() # Only in linux, we target command will be GNU Time's child process @@ -394,12 +407,35 @@ def single_benchmark_command_raw(command): if not shared_process_dict["skip_benchmarking"]: shared_process_dict["target_process_pid"] = p.pid - # Wait for process to finish (time_series_exec and fixed_data_exec will be processing it in parallel) - outdata, errdata = master_process.communicate() - outdata, errdata = outdata.decode(sys.stdout.encoding), errdata.decode(sys.stderr.encoding) + # Wait for process to finish (time_series_exec and fixed_data_exec will be processing it in parallel) + if not capture_items: + outdata, errdata = master_process.communicate() + outdata, errdata = outdata.decode(sys.stdout.encoding), errdata.decode(sys.stderr.encoding) + else: + outdata=[] + errdata=[] + for line in master_process.stdout: + if not line: + break + if capture_points and line.startswith("cmdbench point"): + points.append(current_nano_time()) + outdata.append(line) + + for line in (iter(master_process.stderr.readline,b'')): + if not line: + break + errdata.append(line) + exection_end = current_milli_time() + # process points and err/out data outside to speed up code + if isinstance(outdata, list): + outdata = "\n".join([string for string in outdata]) + if isinstance(errdata, list): + errdata = "\n".join([string for string in errdata]) + points = [round(point/1_000_000, 2)-execution_start for point in points] + # Done with the master process, wait for the parallel (threads or processes) to finish up time_series_exec.join() fixed_data_exec.join() @@ -550,7 +586,9 @@ def single_benchmark_command_raw(command): { "sample_milliseconds": np.array(sample_milliseconds), "cpu_percentages": np.array(cpu_percentages), - "memory_bytes": np.array(memory_values) + "memory_bytes": np.array(memory_values), + "points": points + }, } diff --git a/cmdbench/result.py b/cmdbench/result.py index fb582c7..7382002 100644 --- a/cmdbench/result.py +++ b/cmdbench/result.py @@ -60,6 +60,9 @@ def stats_replace_func(list_of_objects, key_path): value_per_attribute_stats_dict = self._get_values_per_attribute(self.iterations, stats_replace_func) return BenchmarkDict.from_dict(value_per_attribute_stats_dict) + def get_points(self): + return self._get_values_per_attribute(self.iterations)['time_series'].get('points', []) + def get_averages(self): time_series_dict_key = "time_series" @@ -80,7 +83,7 @@ def avg_replace_func(list_of_objects, key_path): time_series_y_values = {} for key, value in value_per_attribute_avgs_dict["time_series"].items(): - if key != "sample_milliseconds": + if key not in ["sample_milliseconds", "points"]: time_series_y_values[key] = value if len(time_series_x_values) == 0 or len(time_series_x_values[0]) == 0: @@ -114,12 +117,14 @@ def avg_replace_func(list_of_objects, key_path): for key, value in time_series_y_values_out.items(): averaged_time_series[key] = value + # averaged_time_series['points'] = value_per_attribute_avgs_dict["time_series"]['points'] + value_per_attribute_avgs_dict["time_series"] = averaged_time_series return BenchmarkDict.from_dict(value_per_attribute_avgs_dict) - def get_resources_plot(self, width = 15, height = 3): + def get_resources_plot(self, width = 15, height = 3, capture_points=False): if not matplotlib_available: raise Exception("You need to install matplotlib before using this method") @@ -128,6 +133,9 @@ def get_resources_plot(self, width = 15, height = 3): time_series_obj = self.get_first_iteration() else: time_series_obj = self.get_averages() + + if capture_points: + points = self.get_points() time_series_obj = time_series_obj["time_series"] @@ -179,6 +187,15 @@ def get_resources_plot(self, width = 15, height = 3): ax_cpu.set_ylabel("CPU (%)", color=color) ax_cpu.plot(x, cpu_y, color=color, alpha=0.75, linewidth=1) ax_cpu.tick_params(axis="y", labelcolor=color) + if capture_points: + for ind, point_list in enumerate(points): + color = COLORS[(ind+3)%len(COLORS)] + ylen = 1 / len(points) + ymin = ind*ylen + ymax = ymin+ylen + for point in point_list: + if isinstance(point, float) or isinstance(point, int): + ax_cpu.axvline(x=point, ymin=ymin, ymax=ymax, linestyle="-", lw=1, alpha=0.2, color = color) #plt.fill_between(x, cpu_y, alpha=0.2, color=color) #plt.tight_layout() diff --git a/cmdbench/utils.py b/cmdbench/utils.py index 9cabec9..c4e039c 100644 --- a/cmdbench/utils.py +++ b/cmdbench/utils.py @@ -2,7 +2,9 @@ from collections import defaultdict import numpy as np from beeprint import pp +from matplotlib import pyplot as plt +COLORS = plt.rcParams["axes.prop_cycle"].by_key()["color"] # https://stackoverflow.com/a/41274937 # Allows attribute access through both obj["key"] (internal library convenience) and obj.key (external developer convenience) @@ -94,8 +96,13 @@ def __repr__(self): "min": self.min, "max": self.max } +class OutputCapture: + output_encoding = "utf-8" + + # https://stackoverflow.com/a/5998359 current_milli_time = lambda: int(round(time.time() * 1000)) +current_nano_time = lambda: int(round(time.time_ns())) def iterable(obj): diff --git a/pyproject.toml b/pyproject.toml index 8afd58c..b9884b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cmdbench" -version = "0.1.22" +version = "0.2.22" description = "Quick and easy benchmarking for any command's CPU, memory, disk usage and runtime." authors = ["Mohsen Yousefian "] license = "MIT" diff --git a/resources/plot delimiters.png b/resources/plot delimiters.png new file mode 100644 index 0000000..92ddad6 Binary files /dev/null and b/resources/plot delimiters.png differ