From 41efc3acef40749ff50af7e227c22ab34eb6712a Mon Sep 17 00:00:00 2001
From: NinaCai <ninacai@google.com>
Date: Wed, 13 May 2026 02:30:14 +0000
Subject: [PATCH 1/2] kill subprocesses when server process is killed

---
 .../hitl_agent/server_utils/cpu_server.py     | 17 +++++++++-------
 .../server_utils/server_manager_mixin.py      |  7 +++++++
 MaxKernel/hitl_agent/server_utils/setup.sh    |  4 +++-
 .../hitl_agent/server_utils/tpu_server.py     | 17 +++++++++-------
 .../subagents/autotuning/autotune_tool.py     | 20 +++++++++++++++++--
 5 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/MaxKernel/hitl_agent/server_utils/cpu_server.py b/MaxKernel/hitl_agent/server_utils/cpu_server.py
index 4a12590..63fe40c 100644
--- a/MaxKernel/hitl_agent/server_utils/cpu_server.py
+++ b/MaxKernel/hitl_agent/server_utils/cpu_server.py
@@ -97,7 +97,7 @@ async def compilation_test(request: CodeRequest):
       request.code = code_content
       # Create a temporary file to store the code
       with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".py", delete=False
+        mode="w", suffix=".py", prefix="hitl_eval_", delete=False
       ) as temp_file:
         temp_file.write(request.code)
         temp_file_path = temp_file.name
@@ -180,7 +180,7 @@ async def correctness_test(request: CodeRequest):
       request.code = code_content
       # Create a temporary file to store the code
       with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".py", delete=False
+        mode="w", suffix=".py", prefix="hitl_eval_", delete=False
       ) as temp_file:
         temp_file.write(request.code)
         temp_file_path = temp_file.name
@@ -262,7 +262,7 @@ async def performance_test(request: CodeRequest):
       request.code = code_content
       # Create a temporary file to store the code
       with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".py", delete=False
+        mode="w", suffix=".py", prefix="hitl_eval_", delete=False
       ) as temp_file:
         temp_file.write(request.code)
         temp_file_path = temp_file.name
@@ -340,11 +340,12 @@ async def autotune(request: AutotuneRequest):
 
         # Execute the code
         with tempfile.NamedTemporaryFile(
-          mode="w", suffix=".py", delete=False
+          mode="w", suffix=".py", prefix="hitl_eval_", delete=False
         ) as temp_file:
           temp_file.write(code_content)
           temp_file_path = temp_file.name
 
+        process = None
         try:
           process = await asyncio.create_subprocess_exec(
             sys.executable,
@@ -383,8 +384,9 @@ async def autotune(request: AutotuneRequest):
 
         except asyncio.TimeoutError:
           logging.warning(f"Config {cfg} timed out")
-          process.kill()
-          await process.wait()
+          if process:
+            process.kill()
+            await process.wait()
         except Exception as e:
           logging.error(f"Error running config {cfg}: {e}")
         finally:
@@ -392,6 +394,7 @@ async def autotune(request: AutotuneRequest):
             os.unlink(temp_file_path)
           except OSError:
             pass
+          await asyncio.sleep(2)
 
       if best_cfg is None:
         return CodeResponse(
@@ -442,7 +445,7 @@ async def profile(request: CodeRequest):
       request.code = code_content
       # Create a temporary directory to store the code and any generated files
 
-      temp_dir = tempfile.mkdtemp()
+      temp_dir = tempfile.mkdtemp(prefix="hitl_eval_")
       logging.info("temp_dir: " + str(temp_dir))
 
       # Create a temporary file to store the code within temp_dir
diff --git a/MaxKernel/hitl_agent/server_utils/server_manager_mixin.py b/MaxKernel/hitl_agent/server_utils/server_manager_mixin.py
index aa45b18..bad6a55 100644
--- a/MaxKernel/hitl_agent/server_utils/server_manager_mixin.py
+++ b/MaxKernel/hitl_agent/server_utils/server_manager_mixin.py
@@ -270,3 +270,10 @@ async def _cleanup_servers(self):
       process_name = f"{server_type}_server.py"
       self._stop_server_sync(process_name)
       await asyncio.sleep(0.5)  # Brief pause between stops
+
+    # Clean up dangling evaluation subprocesses
+    try:
+      logging.info("Cleaning up dangling evaluation subprocesses...")
+      subprocess.run(["pkill", "-f", "/tmp/hitl_eval_.*\.py"], check=False)
+    except Exception as e:
+      logging.warning(f"Failed to clean up subprocesses: {e}")
diff --git a/MaxKernel/hitl_agent/server_utils/setup.sh b/MaxKernel/hitl_agent/server_utils/setup.sh
index 134b271..0bd510b 100644
--- a/MaxKernel/hitl_agent/server_utils/setup.sh
+++ b/MaxKernel/hitl_agent/server_utils/setup.sh
@@ -26,7 +26,9 @@ elif [ "$1" = "--end" ]; then
     pkill -f "tpu_server.py"
     pkill -f "cpu_server.py"
     pkill -f "eval_server.py"
-
+    # Kill any dangling evaluation subprocesses
+    pkill -f "/tmp/hitl_eval_.*\.py"
+    
     echo "Server(s) stopped successfully"
 else
     echo "Usage: $0 --start-tpu|--start-cpu|--start-eval|--end"
diff --git a/MaxKernel/hitl_agent/server_utils/tpu_server.py b/MaxKernel/hitl_agent/server_utils/tpu_server.py
index 1027995..3211f0f 100644
--- a/MaxKernel/hitl_agent/server_utils/tpu_server.py
+++ b/MaxKernel/hitl_agent/server_utils/tpu_server.py
@@ -86,7 +86,7 @@ async def compilation_test(request: CodeRequest):
       request.code = code_content
       # Create a temporary file to store the code
       with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".py", delete=False
+        mode="w", suffix=".py", prefix="hitl_eval_", delete=False
       ) as temp_file:
         temp_file.write(request.code)
         temp_file_path = temp_file.name
@@ -168,7 +168,7 @@ async def correctness_test(request: CodeRequest):
       request.code = code_content
       # Create a temporary file to store the code
       with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".py", delete=False
+        mode="w", suffix=".py", prefix="hitl_eval_", delete=False
       ) as temp_file:
         temp_file.write(request.code)
         temp_file_path = temp_file.name
@@ -249,7 +249,7 @@ async def performance_test(request: CodeRequest):
       request.code = code_content
       # Create a temporary file to store the code
       with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".py", delete=False
+        mode="w", suffix=".py", prefix="hitl_eval_", delete=False
       ) as temp_file:
         temp_file.write(request.code)
         temp_file_path = temp_file.name
@@ -327,11 +327,12 @@ async def autotune(request: AutotuneRequest):
 
         # Execute the code
         with tempfile.NamedTemporaryFile(
-          mode="w", suffix=".py", delete=False
+          mode="w", suffix=".py", prefix="hitl_eval_", delete=False
         ) as temp_file:
           temp_file.write(code_content)
           temp_file_path = temp_file.name
 
+        process = None
         try:
           process = await asyncio.create_subprocess_exec(
             sys.executable,
@@ -383,8 +384,9 @@ async def autotune(request: AutotuneRequest):
 
         except asyncio.TimeoutError:
           logging.warning(f"Config {cfg} timed out")
-          process.kill()
-          await process.wait()
+          if process:
+            process.kill()
+            await process.wait()
           all_results.append({"cfg": cfg, "status": "timeout"})
         except Exception as e:
           logging.error(f"Error running config {cfg}: {e}")
@@ -397,6 +399,7 @@ async def autotune(request: AutotuneRequest):
               os.unlink(temp_file_path)
             except OSError:
               pass
+          await asyncio.sleep(2)
 
       if best_cfg is None:
         return CodeResponse(
@@ -448,7 +451,7 @@ async def profile(request: CodeRequest):
       request.code = code_content
       # Create a temporary directory to store the code and any generated files
 
-      temp_dir = tempfile.mkdtemp()
+      temp_dir = tempfile.mkdtemp(prefix="hitl_eval_")
       logging.info("temp_dir: " + str(temp_dir))
 
       # Create a temporary file to store the code within temp_dir
diff --git a/MaxKernel/hitl_agent/subagents/autotuning/autotune_tool.py b/MaxKernel/hitl_agent/subagents/autotuning/autotune_tool.py
index 70601bd..d2e0af4 100644
--- a/MaxKernel/hitl_agent/subagents/autotuning/autotune_tool.py
+++ b/MaxKernel/hitl_agent/subagents/autotuning/autotune_tool.py
@@ -2,11 +2,12 @@
 
 import json
 import logging
+import subprocess
 from typing import Any
 
 import requests
 
-from hitl_agent.constants import EVAL_SERVER_PORT
+from hitl_agent.constants import EVAL_SERVER_PORT, AUTOTUNE_TIMEOUT
 
 
 def autotune_kernel(
@@ -48,7 +49,7 @@ def autotune_kernel(
         "timeout": 300,
         "backend_type": backend,
       },
-      timeout=3600,  # 1 hour timeout for the whole autotune request
+      timeout=AUTOTUNE_TIMEOUT,  # timeout for the whole autotune request
     )
 
     if response.status_code == 200:
@@ -104,5 +105,20 @@ def autotune_kernel(
         f"Could not connect to server at {url}. Make sure it is running."
       ),
     }
+  except requests.exceptions.Timeout:
+    logging.warning(
+      "Autotune timed out on client side. Cleaning up dangling subprocesses on TPU server..."
+    )
+    try:
+      subprocess.run(["pkill", "-9", "-f", "tpu_server.py"], check=False)
+      subprocess.run(["pkill", "-f", "/tmp/hitl_eval_.*\\.py"], check=False)
+      logging.info("Killed dangling evaluations and tpu_server.py")
+    except Exception as cleanup_error:
+      logging.error(f"Failed to run cleanup commands: {cleanup_error}")
+
+    return {
+      "status": "error",
+      "message": f"Autotune request timed out after {AUTOTUNE_TIMEOUT} seconds. Dangling processes were killed.",
+    }
   except Exception as e:
     return {"status": "error", "message": str(e)}

From 464b758e4a8e3caf9a3851071a774a0454de00be Mon Sep 17 00:00:00 2001
From: NinaCai <ninacai@google.com>
Date: Wed, 13 May 2026 15:56:20 +0000
Subject: [PATCH 2/2] stop both tpu and cpu servers

---
 MaxKernel/hitl_agent/subagents/autotuning/autotune_tool.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MaxKernel/hitl_agent/subagents/autotuning/autotune_tool.py b/MaxKernel/hitl_agent/subagents/autotuning/autotune_tool.py
index d2e0af4..0da7522 100644
--- a/MaxKernel/hitl_agent/subagents/autotuning/autotune_tool.py
+++ b/MaxKernel/hitl_agent/subagents/autotuning/autotune_tool.py
@@ -107,10 +107,11 @@ def autotune_kernel(
     }
   except requests.exceptions.Timeout:
     logging.warning(
-      "Autotune timed out on client side. Cleaning up dangling subprocesses on TPU server..."
+      "Autotune timed out on client side. Cleaning up dangling subprocesses on server..."
     )
     try:
       subprocess.run(["pkill", "-9", "-f", "tpu_server.py"], check=False)
+      subprocess.run(["pkill", "-9", "-f", "cpu_server.py"], check=False)
       subprocess.run(["pkill", "-f", "/tmp/hitl_eval_.*\\.py"], check=False)
       logging.info("Killed dangling evaluations and tpu_server.py")
     except Exception as cleanup_error: