From e25b13bfba41ca2243fdce8806b1ccbca1f245f5 Mon Sep 17 00:00:00 2001
From: Vic <125237471+vicsanity623@users.noreply.github.com>
Date: Mon, 9 Mar 2026 17:49:35 -0700
Subject: [PATCH 1/3] Update core_utils.py

Gemini empty response issue
---
 src/pyob/core_utils.py | 85 +++++++++++++-----------------------------
 1 file changed, 26 insertions(+), 59 deletions(-)

diff --git a/src/pyob/core_utils.py b/src/pyob/core_utils.py
index 5e75be2..2e6c9cb 100644
--- a/src/pyob/core_utils.py
+++ b/src/pyob/core_utils.py
@@ -393,6 +393,7 @@ def _stream_single_llm(
         input_tokens = len(prompt) // 4
         first_chunk_received = [False]
         gen_start_time = time.time()
+        is_cloud = os.environ.get("GITHUB_ACTIONS") == "true"
 
         def spinner():
             spinner_chars = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
@@ -420,17 +421,17 @@ def on_chunk():
                 sys.stdout.write("\r\033[K")
                 sys.stdout.flush()
                 source = f"Gemini ...{key[-4:]}" if key else "GitHub Models"
+                if not key and not is_cloud: source = "Local Ollama"
                 print(f"🤖 AI Output ({source}): ", end="", flush=True)
 
         response_text = ""
         try:
             if key is not None:
                 response_text = self.stream_gemini(prompt, key, on_chunk)
-            elif os.environ.get("GITHUB_ACTIONS") == "true":
-                # In cloud, 'None' key always means use GitHub Models
+            elif is_cloud:
+                # Force GitHub Models in cloud, skip Ollama entirely
                 response_text = self.stream_github_models(prompt, on_chunk)
             else:
-                # Only iMac uses Ollama
                 response_text = self.stream_ollama(prompt, on_chunk)
         except Exception as e:
             first_chunk_received[0] = True
@@ -439,91 +440,57 @@ def on_chunk():
         first_chunk_received[0] = True
         final_time = time.time() - gen_start_time
         if response_text and not response_text.startswith("ERROR_CODE_"):
-            print(
-                f"\n\n[✅ Generation Complete: ~{len(response_text) // 4} tokens in {final_time:.1f}s]"
-            )
+            print(f"\n\n[✅ Generation Complete: ~{len(response_text) // 4} tokens in {final_time:.1f}s]")
         return response_text
 
     def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> str:
         attempts = 0
         is_cloud = os.environ.get("GITHUB_ACTIONS") == "true"
-
-        logger.info(
-            f"📊 Engine check: Found {len(self.key_cooldowns)} Gemini API keys."
-        )
-
+        
         while True:
             key = None
             now = time.time()
-            available_keys = [
-                k for k, cooldown in self.key_cooldowns.items() if now > cooldown
-            ]
+            available_keys = [k for k, cd in self.key_cooldowns.items() if now > cd]
 
-            # --- 1. ENGINE SELECTION LOGIC ---
+            # 1. Select Engine
             if available_keys:
-                # Use Gemini (Primary)
                 key = available_keys[attempts % len(available_keys)]
-                logger.info(
-                    f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}"
-                )
-                response_text = self._stream_single_llm(
-                    prompt, key=key, context=context
-                )
-
+                logger.info(f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}")
             elif is_cloud:
-                # ALL GEMINI KEYS LIMITED -> TRY GITHUB MODELS (Secondary)
-                logger.warning(
-                    "⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)..."
-                )
-                response_text = self._stream_single_llm(
-                    prompt, key=None, context=context
-                )
-
-                # If GitHub Models ALSO fails or returns an error
-                if not response_text or response_text.startswith("ERROR_CODE_"):
-                    logger.warning(
-                        "🚫 All Cloud AI engines exhausted. Sleeping 5 minutes for cooldown..."
-                    )
-                    time.sleep(300)
-                    continue
-
+                logger.warning("⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)...")
+                # key remains None, which triggers stream_github_models in _stream_single_llm
             else:
-                # LOCAL IMAC -> FALLBACK TO OLLAMA
                 logger.info("🏠 Using Local Ollama Engine...")
-                response_text = self._stream_single_llm(
-                    prompt, key=None, context=context
-                )
 
-            # --- 2. RESPONSE VALIDATION & ROTATION ---
+            response_text = self._stream_single_llm(prompt, key=key, context=context)
 
-            # Handle standard Gemini Rate Limit (429)
+            # 2. Handle Rate Limits (429)
             if response_text.startswith("ERROR_CODE_429"):
                 if key:
                     self.key_cooldowns[key] = time.time() + 1200
                     logger.warning(f"⚠️ Key {key[-4:]} rate-limited. Rotating...")
+                else:
+                    # If GitHub Models is also rate-limited
+                    logger.warning("🚫 All cloud engines limited. Sleeping 5 minutes...")
+                    time.sleep(300)
                 attempts += 1
                 continue
 
-            # Handle Empty or Generic Error Responses
+            # 3. Handle Empty or Error Responses (STOPS THE INFINITE LOOP)
             if not response_text or response_text.startswith("ERROR_CODE_"):
-                logger.warning(
-                    f"⚠️ LLM Error detected ({response_text[:20]}...). Retrying in 10s..."
-                )
-                time.sleep(10)
+                logger.warning(f"⚠️ API Error/Empty Response. Sleeping 10s before retry...")
+                time.sleep(10) # MANDATORY SLEEP to prevent tight-looping
                 attempts += 1
                 continue
 
-            # Check if the AI's content matches our XML/Format rules
+            # 4. Final Validation
             if validator(response_text):
-                # SUCCESS BREATHER: Stay under RPM limits
-                if is_cloud:
-                    time.sleep(2)
+                if is_cloud: time.sleep(2) # Success breather
                 return response_text
-            else:
-                logger.warning("LLM response failed internal validation. Retrying...")
-                if is_cloud:
-                    time.sleep(5)
-                attempts += 1
+            
+            logger.warning("LLM response failed internal validation. Retrying in 5s...")
+            time.sleep(5)
+            attempts += 1
 
     def _get_user_prompt_augmentation(self, initial_text: str = "") -> str:
         import tempfile

From 997a6d45062f58e301462dc32660928219ddccab Mon Sep 17 00:00:00 2001
From: Vic <125237471+vicsanity623@users.noreply.github.com>
Date: Mon, 9 Mar 2026 17:52:55 -0700
Subject: [PATCH 2/3] Update core_utils.py

---
 src/pyob/core_utils.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/pyob/core_utils.py b/src/pyob/core_utils.py
index 2e6c9cb..18b4e29 100644
--- a/src/pyob/core_utils.py
+++ b/src/pyob/core_utils.py
@@ -421,7 +421,8 @@ def on_chunk():
                 sys.stdout.write("\r\033[K")
                 sys.stdout.flush()
                 source = f"Gemini ...{key[-4:]}" if key else "GitHub Models"
-                if not key and not is_cloud: source = "Local Ollama"
+                if not key and not is_cloud:
+                    source = "Local Ollama"
                 print(f"🤖 AI Output ({source}): ", end="", flush=True)
 
         response_text = ""
@@ -485,9 +486,10 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s
 
             # 4. Final Validation
             if validator(response_text):
-                if is_cloud: time.sleep(2) # Success breather
+                if is_cloud:
+                    time.sleep(2) # Success breather
                 return response_text
-            
+
             logger.warning("LLM response failed internal validation. Retrying in 5s...")
             time.sleep(5)
             attempts += 1

From 385308eff1c7b725c3317878d8d96d04287a9178 Mon Sep 17 00:00:00 2001
From: vicsanity623 <125237471+vicsanity623@users.noreply.github.com>
Date: Tue, 10 Mar 2026 00:53:15 +0000
Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=AA=84=20PyOB:=20Automated=20Lint=20&?=
 =?UTF-8?q?=20Format=20Fixes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/pyob/core_utils.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/pyob/core_utils.py b/src/pyob/core_utils.py
index 18b4e29..b2581b0 100644
--- a/src/pyob/core_utils.py
+++ b/src/pyob/core_utils.py
@@ -441,13 +441,15 @@ def on_chunk():
         first_chunk_received[0] = True
         final_time = time.time() - gen_start_time
         if response_text and not response_text.startswith("ERROR_CODE_"):
-            print(f"\n\n[✅ Generation Complete: ~{len(response_text) // 4} tokens in {final_time:.1f}s]")
+            print(
+                f"\n\n[✅ Generation Complete: ~{len(response_text) // 4} tokens in {final_time:.1f}s]"
+            )
         return response_text
 
     def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> str:
         attempts = 0
         is_cloud = os.environ.get("GITHUB_ACTIONS") == "true"
-        
+
         while True:
             key = None
             now = time.time()
@@ -456,9 +458,13 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s
             # 1. Select Engine
             if available_keys:
                 key = available_keys[attempts % len(available_keys)]
-                logger.info(f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}")
+                logger.info(
+                    f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}"
+                )
             elif is_cloud:
-                logger.warning("⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)...")
+                logger.warning(
+                    "⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)..."
+                )
                 # key remains None, which triggers stream_github_models in _stream_single_llm
             else:
                 logger.info("🏠 Using Local Ollama Engine...")
@@ -472,22 +478,26 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s
                     logger.warning(f"⚠️ Key {key[-4:]} rate-limited. Rotating...")
                 else:
                     # If GitHub Models is also rate-limited
-                    logger.warning("🚫 All cloud engines limited. Sleeping 5 minutes...")
+                    logger.warning(
+                        "🚫 All cloud engines limited. Sleeping 5 minutes..."
+                    )
                     time.sleep(300)
                 attempts += 1
                 continue
 
             # 3. Handle Empty or Error Responses (STOPS THE INFINITE LOOP)
             if not response_text or response_text.startswith("ERROR_CODE_"):
-                logger.warning(f"⚠️ API Error/Empty Response. Sleeping 10s before retry...")
-                time.sleep(10) # MANDATORY SLEEP to prevent tight-looping
+                logger.warning(
+                    "⚠️ API Error/Empty Response. Sleeping 10s before retry..."
+                )
+                time.sleep(10)  # MANDATORY SLEEP to prevent tight-looping
                 attempts += 1
                 continue
 
             # 4. Final Validation
             if validator(response_text):
                 if is_cloud:
-                    time.sleep(2) # Success breather
+                    time.sleep(2)  # Success breather
                 return response_text
 
             logger.warning("LLM response failed internal validation. Retrying in 5s...")