diff --git a/cmdbench/cli.py b/cmdbench/cli.py
index 35e3bae..7ed8da7 100644
--- a/cmdbench/cli.py
+++ b/cmdbench/cli.py
@@ -36,6 +36,10 @@
     help="Width and height of the saving plot. Works if --save-plot is specified.")
 @click.option("--save-plot", "-p", default = None, type=click.File('wb'),
     help="File address to save a plot of the command's resource usage over time (CPU + Memory).")
+@click.option("--capture-points", "-c", default = False, is_flag = True, show_default=True,
+    help="Enables plotting delimiters when your program prints \"cmdbench point\\n\" and option "
+    "--save-plot enabled. Slows down execution speed. "
+    "NOTE: after each print you should flush the output buffer of your program (fflush(stdout) in C for example).") 
 
 @click.option("--iterations", "-i", default = 1, type = click.IntRange(1), show_default=True,
     help="Number of iterations to get benchmarking results for the target command.")
@@ -63,10 +67,11 @@ def benchmark(command, iterations, **kwargs):
        If no printing options are specified, statistics will be printed for more than 1 iterations, and the first iteration for only 1 iteration."""
 
     np.set_printoptions(threshold=15)
-
+    capture_items = []
+    capture_items.append('points') if kwargs.get('capture_points') else None
     click.echo("Benchmarking started..")
     benchmark_results = BenchmarkResults()
-    benchmark_generator = benchmark_command_generator(" ".join(command), iterations)
+    benchmark_generator = benchmark_command_generator(" ".join(command), iterations, capture_items = capture_items)
     t = tqdm(range(iterations))
     for i in t:
         benchmark_result = next(benchmark_generator)
@@ -78,7 +83,7 @@ def benchmark(command, iterations, **kwargs):
     click.echo("Benchmarking done.")
     click.echo()
 
-    option_keys = ["print_statistics", "print_averages", "print_values", "print_first_iteration", "print_all_iterations"]
+    option_keys = ["print_statistics", "print_averages", "print_values", "print_first_iteration", "print_all_iterations", "capture_points"]
 
     # Print statistics if user did not tell us what info to print
     printing_any = False
@@ -110,12 +115,13 @@ def benchmark(command, iterations, **kwargs):
             print_benchmark_dict(BenchmarkDict.from_dict(iteration), "Iteration #%s" % (ind + 1), indentation = 4, title_fg_color="magenta")
 
     save_plot_value = kwargs["save_plot"]
+    capture_points = kwargs["capture_points"]
     if save_plot_value is not None:
         save_plot_sizes = kwargs["save_plot_size"]
         save_plot_width = save_plot_sizes[0]
         save_plot_height = save_plot_sizes[1]
 
-        fig = benchmark_results.get_resources_plot(save_plot_width, save_plot_height)
+        fig = benchmark_results.get_resources_plot(save_plot_width, save_plot_height, capture_points)
         if fig:
             fig.savefig(save_plot_value)
             click.echo("Plot saved.")
diff --git a/cmdbench/core.py b/cmdbench/core.py
index 4626dd9..af040b5 100644
--- a/cmdbench/core.py
+++ b/cmdbench/core.py
@@ -11,6 +11,7 @@
 import tempfile
 import shlex
 import click
+import io
 from sys import platform as _platform
 
 is_linux = _platform.startswith("linux")
@@ -18,25 +19,25 @@
 is_unix = is_linux or is_macos
 is_win = os.name == "nt"
 
-def benchmark_command(command, iterations_num = 1, raw_data = False):
+def benchmark_command(command, iterations_num = 1, raw_data = False, capture_items = []):
     if iterations_num <= 0:
         raise Exception("The number of iterations to run the command should be >= 1")
 
     raw_benchmark_results = []
     for _ in range(iterations_num):
-        raw_benchmark_result = single_benchmark_command_raw(command)
+        raw_benchmark_result = single_benchmark_command_raw(command, capture_items)
         raw_benchmark_results.append(raw_benchmark_result)
     
     final_benchmark_results = list(map(lambda raw_benchmark_result: raw_benchmark_result if raw_data else raw_to_final_benchmark(raw_benchmark_result), raw_benchmark_results))
 
     return BenchmarkResults(final_benchmark_results)
 
-def benchmark_command_generator(command, iterations_num = 1, raw_data = False):
+def benchmark_command_generator(command, iterations_num = 1, raw_data = False, capture_items = []):
     if iterations_num <= 0:
         raise Exception("The number of iterations to run the command should be >= 1")
 
     for _ in range(iterations_num):
-        raw_benchmark_result = single_benchmark_command_raw(command)
+        raw_benchmark_result = single_benchmark_command_raw(command, capture_items)
         final_benchmark_result = raw_benchmark_result if raw_data else raw_to_final_benchmark(raw_benchmark_result)
         yield BenchmarkResults([final_benchmark_result])
 
@@ -65,7 +66,7 @@ def raw_to_final_benchmark(benchmark_raw_dict):
 
     exit_code = benchmark_raw_dict["general"]["exit_code"]
 
-
+    points = benchmark_raw_dict["time_series"]["points"] if "points" in benchmark_raw_dict["time_series"] else []
 
     benchmark_results = {
         "process": { "stdout_data": process_stdout_data, "stderr_data": process_stderr_data, "execution_time": process_execution_time, "exit_code": exit_code },
@@ -75,7 +76,8 @@ def raw_to_final_benchmark(benchmark_raw_dict):
         {
             "sample_milliseconds": time_series_sample_milliseconds,
             "cpu_percentages": time_series_cpu_percentages,
-            "memory_bytes": time_series_memory_bytes
+            "memory_bytes": time_series_memory_bytes,
+            "points": points
         }
     }
     # psutil io_counters() is not available on macos
@@ -300,9 +302,11 @@ def collect_time_series(shared_process_dict):
     shared_process_dict["memory_values"] = memory_values 
 
 # Performs benchmarking on the command based on both /usr/bin/time and psutil library
-def single_benchmark_command_raw(command):
+def single_benchmark_command_raw(command, capture_items=set()):
     # https://docs.python.org/3/library/shlex.html#shlex.split
     commands_list = shlex.split(command)
+    # remove duplicates and convert to hash table 
+    capture_items = set(capture_items)
 
     time_tmp_output_file = None
 
@@ -315,6 +319,12 @@ def single_benchmark_command_raw(command):
 
     # CPU
     cpu_times = None
+
+    # POINTS
+    points = []
+    capture_points = "points" in capture_items
+    CMDBENCH_BYTE_STRING = str.encode("cmdbench", encoding=OutputCapture.output_encoding)
+    POINT_BYTE_STRING = str.encode("cmdbench point", encoding=OutputCapture.output_encoding)
     
     # Disk
     disk_io_counters = None
@@ -365,7 +375,10 @@ def single_benchmark_command_raw(command):
 
     # Finally, run the command
     # Master process could be GNU Time running target command or the target command itself
-    master_process = psutil.Popen(commands_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if not capture_items:
+        master_process = psutil.Popen(commands_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    else:
+        master_process = psutil.Popen(commands_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1, text='r')
     execution_start = current_milli_time()
     
     # Only in linux, we target command will be GNU Time's child process
@@ -394,12 +407,35 @@ def single_benchmark_command_raw(command):
     if not shared_process_dict["skip_benchmarking"]:
         shared_process_dict["target_process_pid"] = p.pid
         
-    # Wait for process to finish (time_series_exec and fixed_data_exec will be processing it in parallel)
-    outdata, errdata = master_process.communicate()
-    outdata, errdata = outdata.decode(sys.stdout.encoding), errdata.decode(sys.stderr.encoding)
+    # Wait for process to finish (time_series_exec and fixed_data_exec will be processing it in parallel)    
+    if not capture_items:
+        outdata, errdata = master_process.communicate()
+        outdata, errdata = outdata.decode(sys.stdout.encoding), errdata.decode(sys.stderr.encoding)
+    else:
+        outdata=[]
+        errdata=[]
+        for line in master_process.stdout:
+            if not line:
+                break
+            if capture_points and line.startswith("cmdbench point"):
+                points.append(current_nano_time())
+            outdata.append(line)
+        
+        for line in (iter(master_process.stderr.readline,b'')):
+            if not line:
+                break
+            errdata.append(line)
+
     
     exection_end = current_milli_time()
     
+    # process points and err/out data outside to speed up code
+    if isinstance(outdata, list):
+        outdata = "\n".join([string for string in outdata])
+    if isinstance(errdata, list):
+        errdata = "\n".join([string for string in  errdata])
+    points = [round(point/1_000_000, 2)-execution_start for point in points]
+
     # Done with the master process, wait for the parallel (threads or processes) to finish up
     time_series_exec.join()
     fixed_data_exec.join()
@@ -550,7 +586,9 @@ def single_benchmark_command_raw(command):
         {
             "sample_milliseconds": np.array(sample_milliseconds),
             "cpu_percentages": np.array(cpu_percentages),
-            "memory_bytes": np.array(memory_values)
+            "memory_bytes": np.array(memory_values),
+            "points": points
+
         },
         
     }
diff --git a/cmdbench/result.py b/cmdbench/result.py
index fb582c7..7382002 100644
--- a/cmdbench/result.py
+++ b/cmdbench/result.py
@@ -60,6 +60,9 @@ def stats_replace_func(list_of_objects, key_path):
         value_per_attribute_stats_dict = self._get_values_per_attribute(self.iterations, stats_replace_func)
         return BenchmarkDict.from_dict(value_per_attribute_stats_dict)
 
+    def get_points(self):
+        return self._get_values_per_attribute(self.iterations)['time_series'].get('points', [])
+
     def get_averages(self):
         time_series_dict_key = "time_series"
 
@@ -80,7 +83,7 @@ def avg_replace_func(list_of_objects, key_path):
         time_series_y_values = {}
 
         for key, value in value_per_attribute_avgs_dict["time_series"].items():
-            if key != "sample_milliseconds":
+            if key not in  ["sample_milliseconds", "points"]:
                 time_series_y_values[key] = value
 
         if len(time_series_x_values) == 0 or len(time_series_x_values[0]) == 0:
@@ -114,12 +117,14 @@ def avg_replace_func(list_of_objects, key_path):
         for key, value in time_series_y_values_out.items():
             averaged_time_series[key] = value
 
+        # averaged_time_series['points'] = value_per_attribute_avgs_dict["time_series"]['points']
+
         value_per_attribute_avgs_dict["time_series"] = averaged_time_series
 
         return BenchmarkDict.from_dict(value_per_attribute_avgs_dict)
 
 
-    def get_resources_plot(self, width = 15, height = 3):
+    def get_resources_plot(self, width = 15, height = 3, capture_points=False):
         if not matplotlib_available:
             raise Exception("You need to install matplotlib before using this method")
 
@@ -128,6 +133,9 @@ def get_resources_plot(self, width = 15, height = 3):
             time_series_obj = self.get_first_iteration()
         else:
             time_series_obj = self.get_averages()
+
+        if capture_points:
+            points = self.get_points()
         
         time_series_obj = time_series_obj["time_series"]
 
@@ -179,6 +187,15 @@ def get_resources_plot(self, width = 15, height = 3):
         ax_cpu.set_ylabel("CPU (%)", color=color)
         ax_cpu.plot(x, cpu_y, color=color, alpha=0.75, linewidth=1)
         ax_cpu.tick_params(axis="y", labelcolor=color)
+        if capture_points:
+            for ind, point_list in enumerate(points):
+                color = COLORS[(ind+3)%len(COLORS)]
+                ylen = 1 / len(points)
+                ymin = ind*ylen
+                ymax = ymin+ylen
+                for point in point_list:
+                    if isinstance(point, float) or isinstance(point, int):
+                        ax_cpu.axvline(x=point, ymin=ymin, ymax=ymax,  linestyle="-", lw=1, alpha=0.2, color = color)
         #plt.fill_between(x, cpu_y, alpha=0.2, color=color)
 
         #plt.tight_layout()
diff --git a/cmdbench/utils.py b/cmdbench/utils.py
index 9cabec9..c4e039c 100644
--- a/cmdbench/utils.py
+++ b/cmdbench/utils.py
@@ -2,7 +2,9 @@
 from collections import defaultdict
 import numpy as np
 from beeprint import pp
+from matplotlib import pyplot as plt
 
+COLORS = plt.rcParams["axes.prop_cycle"].by_key()["color"]
 
 # https://stackoverflow.com/a/41274937
 # Allows attribute access through both obj["key"] (internal library convenience) and obj.key (external developer convenience)
@@ -94,8 +96,13 @@ def __repr__(self):
             "min": self.min, "max": self.max
         }
 
+class OutputCapture:
+    output_encoding = "utf-8"
+
+
 # https://stackoverflow.com/a/5998359
 current_milli_time = lambda: int(round(time.time() * 1000))
+current_nano_time = lambda: int(round(time.time_ns()))
 
 
 def iterable(obj):
diff --git a/pyproject.toml b/pyproject.toml
index 8afd58c..b9884b0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "cmdbench"
-version = "0.1.22"
+version = "0.2.22"
 description = "Quick and easy benchmarking for any command's CPU, memory, disk usage and runtime."
 authors = ["Mohsen Yousefian <contact@manzik.com>"]
 license = "MIT"
diff --git a/resources/plot delimiters.png b/resources/plot delimiters.png
new file mode 100644
index 0000000..92ddad6
Binary files /dev/null and b/resources/plot delimiters.png differ