Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 69 additions & 16 deletions docs/source/reference/Features/OPTIMIZE.rst
Original file line number Diff line number Diff line change
@@ -1,32 +1,85 @@
<OPTIMIZE> Directives
=====================

reStructuredPython allows you to apply runtime optimizations using special compiler directives and decorators.
reStructuredPython allows you to apply runtime optimizations using special
compiler directives and decorators. These directives can be applied to both
loops and functions to enhance performance, enable diagnostics,
and optionally parallelize execution.

Loop Optimization
-----------------

Use ``<OPTIMIZE ...>`` before a ``for`` or ``while`` loop to apply runtime enhancements:

.. code-block:: python

<OPTIMIZE gct=True, parallel=True, profile=True>
for i in range(10_000_000) {
temp = str(i) * 10
}
<OPTIMIZE gct=True, parallel=True, profile=True, cache=True>
for i in range(10_000_000) {
temp = str(i) * 10
}

.. versionadded::
Added the cache option in 2.6.0

.. note::
Optimizations currently support loops only. The function implementation currently **does not** have actual optimizations, only profiling and tracing.
.. versionchanged::
Changed the parallel functionality in 2.6.0

Arguments for <OPTIMIZE ...> on loops include:

- ``gct=True``: Enable garbage collection tracking.
- ``profile=True``: Enable execution time logging.
- ``parallel=True``: Enables multiprocessing pool
- ``gct=False``: Trigger garbage collection before loop execution.

- ``profile=False``: Log execution time of the loop.

- ``parallel=Flase``: Attempts **multithreading** using ``concurrent.futures.ThreadPoolExecutor``

Arguments for <OPTIMIZE ...> on functions include:
- ``cache=False``: Enable memoization for loop-returning functions.

- ``profile=False`` Enable execution time logging
- ``trace=False`` Enable event tracing
- ``unroll=N``: Unrolls loops to preserve preformance

This will generate a python file that imports the optimization decorators from this ( the ``restructuredpython`` package ), so you will need to have this package installed via pip on systems running your compiled, optimized program.
.. warning::
**<OPTIMIZE parallel=True> on loops uses multithreading only.**
For true multiprocessing, use python's multiprocessing module with
top-level functions and ensure your script includes freeze_support
due to limitations of python's multiprocessing setup.

However, as of 2.5.0, you could technically open the generated python file, remove the imports from ``restructuredpython``, and instead use ``include 'subinterpreter.optimize'``. However, this is expictily NOT recommended as it will break in future versions of reStructuredPython and will include an annoying copyright header in the generated file.
.. code-block:: python
if __name__ == "__main__":
from multiprocessing import freeze_support
freeze_support()
main()


Function Optimization
---------------------

You can also apply ``<OPTIMIZE ...>`` before a function definition
to enable diagnostics and performance enhancements:

.. code-block:: python

We recommend running this with ``repycl`` the restructuredpython interpreter & launcher.
<OPTIMIZE profile=True, trace=True, cache=True>
def compute(x) {
return x ** 2
}

.. versionadded::

Note: Function optimization now includes caching
as well profiling and tracing, starting from version 2.6.0

Arguments for <OPTIMIZE ...> on functions include

- ``cache=False``: Uses LRU cahching for function memoization
- ``profile=False``: Log execution time of the function.
- ``trace=True``: Trace events for the function

This will generate a python file that imports the
optimization decorators from this ( the ``restructuredpython`` package ),
so you will need to have this package installed via pip on systems
running your compiled, optimized program.

.. note::
However, as of 2.6.0, you could technically open the generated python file, remove the imports from ``restructuredpython``, and instead use ``include 'subinterpreter.optimize'``. However, this is expictily NOT recommended as it will break in future versions of reStructuredPython and will include an annoying copyright header in the generated file.

.. note::
We recommend running this with ``repycl`` the restructuredpython interpreter & launcher.
134 changes: 111 additions & 23 deletions restructuredpython/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@
from .check_syntax import check_syntax
import re


def wrap_loops_for_optimization(code):
"""
Rewrites for/while loops with <OPTIMIZE ...> annotations into runtime functions
with decorators.
with decorators. Supports loop unrolling via `unroll=N` and parallel execution.
"""
lines = code.splitlines()
modified_lines = []
Expand All @@ -30,31 +29,120 @@ def wrap_loops_for_optimization(code):
line = lines[i].strip()
if line.startswith("@optimize_loop("):
decorator_line = lines[i]

unroll_match = re.search(r'unroll\s*=\s*(\d+)', decorator_line)
unroll_factor = int(unroll_match.group(1)) if unroll_match else 1
parallel = "parallel=True" in decorator_line.replace(" ", "")

loop_line = lines[i + 1].strip()
loop_indent = len(lines[i + 1]) - len(loop_line)
func_name = f"_repy_optimized_loop_{loop_counter}"
loop_counter += 1

modified_lines.append(decorator_line)
modified_lines.append(" " * loop_indent + f"def {func_name}():")
modified_lines.append(" " * (loop_indent + 4) + loop_line)
i += 2
if parallel and loop_line.startswith("for "):
loop_match = re.match(r'for\s+(.*?)\s+in\s+(.*):?', loop_line)
loop_vars = loop_match.group(1).strip() # type: ignore
iter_expr = loop_match.group(2).strip().rstrip(':') # type: ignore
is_tuple_unpack = ',' in loop_vars

body_func_name = f"_repy_loop_body_{loop_counter}"
i += 2
loop_body = []
while i < len(lines):
body_line = lines[i]
if body_line.strip() == "":
loop_body.append(body_line)
i += 1
continue
body_indent = len(body_line) - len(body_line.lstrip())
if body_indent <= loop_indent:
break
relative_indent = body_indent - loop_indent
new_indent = loop_indent + 4 + relative_indent
loop_body.append(" " * new_indent + body_line.lstrip())
i += 1

body_func_lines = [f"def {body_func_name}({loop_vars}):"]
body_func_lines.extend(loop_body)

func_name = f"_repy_optimized_loop_{loop_counter}"
loop_counter += 1

modified_lines.append(" " * loop_indent + decorator_line)
modified_lines.append(" " * loop_indent + f"def {body_func_name}({loop_vars}):")
modified_lines.extend(loop_body)

# Emit executor function
modified_lines.append(" " * loop_indent + f"def {func_name}():")
executor_type = "ThreadPoolExecutor"
modified_lines.append(" " * (loop_indent + 4) + f"from concurrent.futures import {executor_type}")

if is_tuple_unpack:
modified_lines.append(" " * (loop_indent + 4) + "def starmap_pool(fn, iterable):")
modified_lines.append(" " * (loop_indent + 8) + f"with {executor_type}() as pool:")
modified_lines.append(" " * (loop_indent + 12) + "futures = [pool.submit(fn, *args) for args in iterable]")
modified_lines.append(" " * (loop_indent + 12) + "return [f.result() for f in futures]")
modified_lines.append(" " * (loop_indent + 4) + f"starmap_pool({body_func_name}, {iter_expr})")
else:
modified_lines.append(" " * (loop_indent + 4) + f"with {executor_type}() as pool:")
modified_lines.append(" " * (loop_indent + 8) + f"list(pool.map({body_func_name}, {iter_expr}))")

# Call the executor function
modified_lines.append(" " * loop_indent + f"{func_name}()")

# Copy indented body lines until dedent or EOF
while i < len(lines):
body_line = lines[i]
if body_line.strip() == "":
modified_lines.append(body_line)
else:
# Non-parallel loop: wrap as usual
modified_lines.append(" " * loop_indent + decorator_line)
modified_lines.append(" " * loop_indent + f"def {func_name}():")
modified_lines.append(" " * (loop_indent + 4) + loop_line)
i += 2
loop_body = []
while i < len(lines):
body_line = lines[i]
if body_line.strip() == "":
loop_body.append(body_line)
i += 1
continue
body_indent = len(body_line) - len(body_line.lstrip())
if body_indent <= loop_indent:
break
relative_indent = body_indent - loop_indent
new_indent = loop_indent + 4 + relative_indent
loop_body.append(" " * new_indent + body_line.lstrip())
i += 1
continue
body_indent = len(body_line) - len(body_line.lstrip())
if body_indent <= loop_indent:
break
modified_lines.append(
" " * (loop_indent + 8) + body_line.strip())
i += 1

modified_lines.append(" " * loop_indent + f"{func_name}()")

if unroll_factor > 1 and loop_line.startswith("for ") and "range(" in loop_line:
range_match = re.search(r'range\(([^)]+)\)', loop_line)
if range_match:
range_args = range_match.group(1).split(',')
if len(range_args) == 1:
start, end, step = "0", range_args[0].strip(), str(unroll_factor)
elif len(range_args) == 2:
start, end = range_args[0].strip(), range_args[1].strip()
step = str(unroll_factor)
elif len(range_args) == 3:
start, end, step = [arg.strip() for arg in range_args]
step = f"({step}) * {unroll_factor}"

var_match = re.match(r'for\s+(\w+)\s+in\s+range', loop_line)
loop_var = var_match.group(1) if var_match else "i"

new_loop_line = f"for {loop_var} in range({start}, {end}, {step}):"
modified_lines[-1] = " " * (loop_indent + 4) + new_loop_line

for offset in range(unroll_factor):
for body in loop_body:
if offset == 0:
unrolled_line = body
else:
unrolled_line = re.sub(rf'\b{loop_var}\b', f"{loop_var}+{offset}", body)
modified_lines.append(unrolled_line)
else:
modified_lines.extend(loop_body)
else:
modified_lines.extend(loop_body)

modified_lines.append(" " * loop_indent + f"{func_name}()")
else:
modified_lines.append(lines[i])
i += 1
Expand Down Expand Up @@ -98,7 +186,6 @@ def nest(parts):
for i, line in enumerate(lines):
stripped = line.strip()

# Detect <OPTIMIZE ...> directive
if stripped.startswith("<OPTIMIZE") and stripped.endswith(">"):
match = re.match(r'<OPTIMIZE\s+(.+?)>', stripped)
if match:
Expand Down Expand Up @@ -126,14 +213,15 @@ def nest(parts):
modified_code.append(f"# {processed_line[:-2].strip()}")
continue

# Apply decorator before next block
if pending_optimize:
loop_indent = len(lines[i]) - len(lines[i].lstrip())
if re.match(r'^\s*(for|while)\s+.*\{', processed_line):
modified_code.append(f"@optimize_loop({pending_optimize})")

required_imports.add("optimize_loop")
pending_optimize = None
elif re.match(r'^\s*def\s+.*\{', processed_line):
modified_code.append(f"@optimize_function({pending_optimize})")
modified_code.append(" " * loop_indent + f"@optimize_function({pending_optimize})")
required_imports.add("optimize_function")
pending_optimize = None

Expand Down
62 changes: 35 additions & 27 deletions restructuredpython/predefined/subinterpreter/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,63 +16,71 @@
import time
import sys
import functools
import types
import dis
import multiprocessing
import warnings


def optimize_loop(profile=False, gct=False, parallel=False, unroll=0):
def optimize_loop(profile=False, gct=False, multithreading=False, parallel=False, cache=False, unroll=0):
"""
Decorator to optimize loop-like functions.
Supports profiling, garbage collection, parallel execution, caching, and JIT compilation.
"""
def decorator(fn):
@functools.wraps(fn)
original_fn = fn

# Caching
if cache:
fn = functools.lru_cache(maxsize=None)(fn)

@functools.wraps(original_fn)
def wrapper(*args, **kwargs):
# Garbage Collection
if gct:
gc.collect()

# Profiling
start = time.perf_counter() if profile else None

# Execute loop
if parallel:
# Naive parallelism: assuming fn yields or returns a list
results = fn(*args, **kwargs)
with multiprocessing.Pool() as pool:
pool.map(lambda x: x, results)
else:
fn(*args, **kwargs)
fn(*args, **kwargs)

if profile:
duration = time.perf_counter() - start
duration = time.perf_counter() - start # type: ignore
print(f"[PROFILE] Loop took {duration:.4f}s")

return wrapper
return decorator


def optimize_function(profile=False, trace=False):
def optimize_function(profile=False, trace=False, cache=False, parallel=False):
"""
Decorator to optimize general functions.
Supports profiling, tracing, caching
"""
def decorator(fn):
original_fn = fn

if cache:
fn = functools.lru_cache(maxsize=None)(fn)

if profile:
@functools.wraps(fn)
@functools.wraps(original_fn)
def profiled(*args, **kwargs):
start = time.perf_counter()
result = fn(*args, **kwargs)
print(
f"[PROFILE] {
fn.__name__} took {
time.perf_counter() -
start:.4f}s")
result = fn(*args, **kwargs) # type: ignore
print(f"[PROFILE] {original_fn.__name__} took {time.perf_counter() - start:.4f}s")
return result
return profiled

if trace:
def tracer(frame, event, arg):
print(f"[TRACE] {event} in {frame.f_code.co_name}")
return tracer

def wrapped(*args, **kwargs):
@functools.wraps(original_fn)
def traced(*args, **kwargs):
sys.settrace(tracer)
result = fn(*args, **kwargs)
result = fn(*args, **kwargs) # type: ignore
sys.settrace(None)
return result
return wrapped
return traced

return fn

return decorator
Loading