diff --git a/.github/actions/run-notebook/action.yml b/.github/actions/run-notebook/action.yml index c40913f9..ba4b1e04 100644 --- a/.github/actions/run-notebook/action.yml +++ b/.github/actions/run-notebook/action.yml @@ -1,5 +1,5 @@ name: "Run Notebook" -description: "Run a notebook" +description: "Run a notebook end-to-end against a real Jupyter kernel via nbclient" inputs: notebook: @@ -26,34 +26,18 @@ runs: with: python-version: '3.11' - - name: Install dependencies + - name: Install runner dependencies shell: bash run: | pip install --upgrade pip - pip install nbformat + pip install nbformat nbclient ipykernel - - id: convert + - name: Run the notebook shell: bash - name: Convert notebook into tmpdir script run: | - python .github/actions/run-notebook/convert-notebook.py ${{ inputs.notebook }} - - - name: View the run script - shell: bash - run: | - cat ${{ steps.convert.outputs.script_path }} - - - name: View converted notebook content - shell: bash - run: | - cat ${{ steps.convert.outputs.notebook_path }} - - - name: Run the converted notebook - shell: bash - run: | - bash ${{ steps.convert.outputs.script_path }} + python .github/actions/run-notebook/run-notebook.py "${{ inputs.notebook }}" env: PINECONE_API_KEY: ${{ inputs.PINECONE_API_KEY }} OPENAI_API_KEY: ${{ inputs.OPENAI_API_KEY }} HF_TOKEN: ${{ inputs.HF_TOKEN }} - ANTHROPIC_API_KEY: ${{ inputs.ANTHROPIC_API_KEY }} \ No newline at end of file + ANTHROPIC_API_KEY: ${{ inputs.ANTHROPIC_API_KEY }} diff --git a/.github/actions/run-notebook/convert-notebook.py b/.github/actions/run-notebook/convert-notebook.py deleted file mode 100755 index e425e6b3..00000000 --- a/.github/actions/run-notebook/convert-notebook.py +++ /dev/null @@ -1,115 +0,0 @@ -#! /usr/bin/env python - -# Convert a notebook to a Python script - -import os -import sys -import nbformat -import shutil -from tempfile import mkdtemp -from tempfile import TemporaryDirectory - -# Get the notebook filename from the command line -filename = "../../../" + sys.argv[1] -print(f"Processing notebook: {filename}") -nb_source_path = os.path.join(os.path.dirname(__file__), filename) - -temp_dir = mkdtemp() -venv_path = os.path.join(temp_dir, 'venv') -os.makedirs(venv_path, exist_ok=True) - -# Copy file into temp directory -temp_nb_path = os.path.join(temp_dir, 'notebook.ipynb') -print(f"Copying notebook to {temp_nb_path}") -shutil.copy(nb_source_path, temp_nb_path) - -with open(temp_nb_path, "r", encoding="utf-8") as f: - nb = nbformat.read(f, as_version=4) - -# Extract pip install commands (assumes they are written as "!pip install ..." or "%pip install ...") -# This grabs any line containing "pip install" in the script. -activate_venv = """ -#!/bin/bash - -set -ex - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Create new virtual environment -python -m venv "${SCRIPT_DIR}/venv" - -# Activate the virtual environment -source "${SCRIPT_DIR}/venv/bin/activate" -pip install --upgrade pip -pip install ipython -""" -run_commands = [activate_venv] -for cell in nb.cells: - if cell.cell_type == "code": - if "!pip" in cell.source or "%pip" in cell.source: - # Replace all instances of "!pip" and "%pip" with "pip" - command = cell.source.replace("!pip", "pip").replace("%pip", "pip") - run_commands.append(command) - -run_commands.append(""" -# Run the notebook executable code -python "${SCRIPT_DIR}/notebook.py" -""") - -run_commands.append(""" -# Deactivate the virtual environment -deactivate -""") - -# Save pip install commands to a run.sh script -run_script_path = os.path.join(temp_dir, 'run.sh') -with open(run_script_path, 'w', encoding="utf-8") as f: - f.write("\n".join(run_commands)) - -print(f"Setup script saved to {run_script_path}") - -# Collect cells that are not pip install commands -executable_cells = ["from IPython.display import display"] - -# pip commands we want to ignore: - -PIP_COMMANDS = [ - "pip install", - "pip uninstall", - "pip freeze", - "pip list", - "pip show", - "pip check", - "pip download", - "pip config", - "pip search", - "pip wheel", - "pip hash", - "pip cache", - "pip index" -] - - -for cell in nb.cells: - if cell.cell_type == "code": - # ensures the cell is not a pip command, avoid hitting words that contain "pip" - if not any(cmd in cell.source for cmd in PIP_COMMANDS): - # Remove any lines that start with "!" or "%" - # These are "magic" commands such as "%matplotlib inline" that - # are not executable outside of a notebook environment. - executable = "\n".join([line for line in cell.source.split("\n") if not line.strip().startswith("!") and not line.strip().startswith("%")]) - executable_cells.append(executable) - -# Save executable cells to a notebook.py file -script_path = os.path.join(temp_dir, 'notebook.py') -with open(script_path, 'w', encoding="utf-8") as f: - for cell in executable_cells: - f.write(cell + '\n') - -print(f"Script saved to {script_path}") - -# Output script and notebook path to github actions output -with open(os.environ['GITHUB_OUTPUT'], 'a') as f: - f.write(f"script_path={run_script_path}\n") - f.write(f"notebook_path={script_path}\n") - \ No newline at end of file diff --git a/.github/actions/run-notebook/run-notebook.py b/.github/actions/run-notebook/run-notebook.py new file mode 100644 index 00000000..4959c52f --- /dev/null +++ b/.github/actions/run-notebook/run-notebook.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +"""Execute a notebook end-to-end against a real Jupyter kernel. + +Replaces the cell-partitioning approach in convert-notebook.py with a +straight nbclient drive. This means: + + - !pip install and other shell magics run via the kernel's normal magic + handling (the kernel spawns a subshell), exactly as in Colab/Jupyter Lab. + - A code cell may freely mix `!pip install` and Python imports. + - Errors surface with cell and traceback context, not as + `line N: import: command not found`. + +Notebook deps are installed into the same Python environment as the +runner (the kernel and runner share an interpreter), so `!pip install foo` +in cell 1 means cell 2 can `import foo`. + +Usage: + run-notebook.py + +Exits non-zero on any cell failure. +""" + +import os +import sys + +import nbformat +from nbclient import NotebookClient +from nbclient.exceptions import CellExecutionError, CellTimeoutError, DeadKernelError + +CELL_TIMEOUT = int(os.environ.get("NOTEBOOK_CELL_TIMEOUT", "600")) + + +def main() -> int: + if len(sys.argv) != 2: + print("usage: run-notebook.py ", file=sys.stderr) + return 2 + + notebook_path = sys.argv[1] + print(f"Executing {notebook_path}") + + nb = nbformat.read(notebook_path, as_version=4) + client = NotebookClient( + nb, + timeout=CELL_TIMEOUT, + kernel_name="python3", + resources={"metadata": {"path": os.path.dirname(notebook_path) or "."}}, + ) + + try: + client.execute() + except CellTimeoutError as exc: + print( + f"\nCell exceeded timeout of {CELL_TIMEOUT}s " + f"(override via NOTEBOOK_CELL_TIMEOUT):\n{exc}", + file=sys.stderr, + ) + return 1 + except DeadKernelError as exc: + print( + f"\nKernel died during execution (OOM/segfault?):\n{exc}", file=sys.stderr + ) + return 1 + except CellExecutionError as exc: + print(f"\nCell execution failed:\n{exc}", file=sys.stderr) + return 1 + + print(f"PASS — {notebook_path}") + return 0 + + +if __name__ == "__main__": + sys.exit(main())