From e25b13bfba41ca2243fdce8806b1ccbca1f245f5 Mon Sep 17 00:00:00 2001 From: Vic <125237471+vicsanity623@users.noreply.github.com> Date: Mon, 9 Mar 2026 17:49:35 -0700 Subject: [PATCH 1/3] Update core_utils.py Gemini empty response issue --- src/pyob/core_utils.py | 85 +++++++++++++----------------------------- 1 file changed, 26 insertions(+), 59 deletions(-) diff --git a/src/pyob/core_utils.py b/src/pyob/core_utils.py index 5e75be2..2e6c9cb 100644 --- a/src/pyob/core_utils.py +++ b/src/pyob/core_utils.py @@ -393,6 +393,7 @@ def _stream_single_llm( input_tokens = len(prompt) // 4 first_chunk_received = [False] gen_start_time = time.time() + is_cloud = os.environ.get("GITHUB_ACTIONS") == "true" def spinner(): spinner_chars = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"] @@ -420,17 +421,17 @@ def on_chunk(): sys.stdout.write("\r\033[K") sys.stdout.flush() source = f"Gemini ...{key[-4:]}" if key else "GitHub Models" + if not key and not is_cloud: source = "Local Ollama" print(f"🤖 AI Output ({source}): ", end="", flush=True) response_text = "" try: if key is not None: response_text = self.stream_gemini(prompt, key, on_chunk) - elif os.environ.get("GITHUB_ACTIONS") == "true": - # In cloud, 'None' key always means use GitHub Models + elif is_cloud: + # Force GitHub Models in cloud, skip Ollama entirely response_text = self.stream_github_models(prompt, on_chunk) else: - # Only iMac uses Ollama response_text = self.stream_ollama(prompt, on_chunk) except Exception as e: first_chunk_received[0] = True @@ -439,91 +440,57 @@ def on_chunk(): first_chunk_received[0] = True final_time = time.time() - gen_start_time if response_text and not response_text.startswith("ERROR_CODE_"): - print( - f"\n\n[✅ Generation Complete: ~{len(response_text) // 4} tokens in {final_time:.1f}s]" - ) + print(f"\n\n[✅ Generation Complete: ~{len(response_text) // 4} tokens in {final_time:.1f}s]") return response_text def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> str: attempts = 0 is_cloud = os.environ.get("GITHUB_ACTIONS") == "true" - - logger.info( - f"📊 Engine check: Found {len(self.key_cooldowns)} Gemini API keys." - ) - + while True: key = None now = time.time() - available_keys = [ - k for k, cooldown in self.key_cooldowns.items() if now > cooldown - ] + available_keys = [k for k, cd in self.key_cooldowns.items() if now > cd] - # --- 1. ENGINE SELECTION LOGIC --- + # 1. Select Engine if available_keys: - # Use Gemini (Primary) key = available_keys[attempts % len(available_keys)] - logger.info( - f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}" - ) - response_text = self._stream_single_llm( - prompt, key=key, context=context - ) - + logger.info(f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}") elif is_cloud: - # ALL GEMINI KEYS LIMITED -> TRY GITHUB MODELS (Secondary) - logger.warning( - "⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)..." - ) - response_text = self._stream_single_llm( - prompt, key=None, context=context - ) - - # If GitHub Models ALSO fails or returns an error - if not response_text or response_text.startswith("ERROR_CODE_"): - logger.warning( - "🚫 All Cloud AI engines exhausted. Sleeping 5 minutes for cooldown..." - ) - time.sleep(300) - continue - + logger.warning("⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)...") + # key remains None, which triggers stream_github_models in _stream_single_llm else: - # LOCAL IMAC -> FALLBACK TO OLLAMA logger.info("🏠 Using Local Ollama Engine...") - response_text = self._stream_single_llm( - prompt, key=None, context=context - ) - # --- 2. RESPONSE VALIDATION & ROTATION --- + response_text = self._stream_single_llm(prompt, key=key, context=context) - # Handle standard Gemini Rate Limit (429) + # 2. Handle Rate Limits (429) if response_text.startswith("ERROR_CODE_429"): if key: self.key_cooldowns[key] = time.time() + 1200 logger.warning(f"⚠️ Key {key[-4:]} rate-limited. Rotating...") + else: + # If GitHub Models is also rate-limited + logger.warning("🚫 All cloud engines limited. Sleeping 5 minutes...") + time.sleep(300) attempts += 1 continue - # Handle Empty or Generic Error Responses + # 3. Handle Empty or Error Responses (STOPS THE INFINITE LOOP) if not response_text or response_text.startswith("ERROR_CODE_"): - logger.warning( - f"⚠️ LLM Error detected ({response_text[:20]}...). Retrying in 10s..." - ) - time.sleep(10) + logger.warning(f"⚠️ API Error/Empty Response. Sleeping 10s before retry...") + time.sleep(10) # MANDATORY SLEEP to prevent tight-looping attempts += 1 continue - # Check if the AI's content matches our XML/Format rules + # 4. Final Validation if validator(response_text): - # SUCCESS BREATHER: Stay under RPM limits - if is_cloud: - time.sleep(2) + if is_cloud: time.sleep(2) # Success breather return response_text - else: - logger.warning("LLM response failed internal validation. Retrying...") - if is_cloud: - time.sleep(5) - attempts += 1 + + logger.warning("LLM response failed internal validation. Retrying in 5s...") + time.sleep(5) + attempts += 1 def _get_user_prompt_augmentation(self, initial_text: str = "") -> str: import tempfile From 997a6d45062f58e301462dc32660928219ddccab Mon Sep 17 00:00:00 2001 From: Vic <125237471+vicsanity623@users.noreply.github.com> Date: Mon, 9 Mar 2026 17:52:55 -0700 Subject: [PATCH 2/3] Update core_utils.py --- src/pyob/core_utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/pyob/core_utils.py b/src/pyob/core_utils.py index 2e6c9cb..18b4e29 100644 --- a/src/pyob/core_utils.py +++ b/src/pyob/core_utils.py @@ -421,7 +421,8 @@ def on_chunk(): sys.stdout.write("\r\033[K") sys.stdout.flush() source = f"Gemini ...{key[-4:]}" if key else "GitHub Models" - if not key and not is_cloud: source = "Local Ollama" + if not key and not is_cloud: + source = "Local Ollama" print(f"🤖 AI Output ({source}): ", end="", flush=True) response_text = "" @@ -485,9 +486,10 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s # 4. Final Validation if validator(response_text): - if is_cloud: time.sleep(2) # Success breather + if is_cloud: + time.sleep(2) # Success breather return response_text - + logger.warning("LLM response failed internal validation. Retrying in 5s...") time.sleep(5) attempts += 1 From 385308eff1c7b725c3317878d8d96d04287a9178 Mon Sep 17 00:00:00 2001 From: vicsanity623 <125237471+vicsanity623@users.noreply.github.com> Date: Tue, 10 Mar 2026 00:53:15 +0000 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=AA=84=20PyOB:=20Automated=20Lint=20&?= =?UTF-8?q?=20Format=20Fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/pyob/core_utils.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/pyob/core_utils.py b/src/pyob/core_utils.py index 18b4e29..b2581b0 100644 --- a/src/pyob/core_utils.py +++ b/src/pyob/core_utils.py @@ -441,13 +441,15 @@ def on_chunk(): first_chunk_received[0] = True final_time = time.time() - gen_start_time if response_text and not response_text.startswith("ERROR_CODE_"): - print(f"\n\n[✅ Generation Complete: ~{len(response_text) // 4} tokens in {final_time:.1f}s]") + print( + f"\n\n[✅ Generation Complete: ~{len(response_text) // 4} tokens in {final_time:.1f}s]" + ) return response_text def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> str: attempts = 0 is_cloud = os.environ.get("GITHUB_ACTIONS") == "true" - + while True: key = None now = time.time() @@ -456,9 +458,13 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s # 1. Select Engine if available_keys: key = available_keys[attempts % len(available_keys)] - logger.info(f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}") + logger.info( + f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}" + ) elif is_cloud: - logger.warning("⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)...") + logger.warning( + "⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)..." + ) # key remains None, which triggers stream_github_models in _stream_single_llm else: logger.info("🏠 Using Local Ollama Engine...") @@ -472,22 +478,26 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s logger.warning(f"⚠️ Key {key[-4:]} rate-limited. Rotating...") else: # If GitHub Models is also rate-limited - logger.warning("🚫 All cloud engines limited. Sleeping 5 minutes...") + logger.warning( + "🚫 All cloud engines limited. Sleeping 5 minutes..." + ) time.sleep(300) attempts += 1 continue # 3. Handle Empty or Error Responses (STOPS THE INFINITE LOOP) if not response_text or response_text.startswith("ERROR_CODE_"): - logger.warning(f"⚠️ API Error/Empty Response. Sleeping 10s before retry...") - time.sleep(10) # MANDATORY SLEEP to prevent tight-looping + logger.warning( + "⚠️ API Error/Empty Response. Sleeping 10s before retry..." + ) + time.sleep(10) # MANDATORY SLEEP to prevent tight-looping attempts += 1 continue # 4. Final Validation if validator(response_text): if is_cloud: - time.sleep(2) # Success breather + time.sleep(2) # Success breather return response_text logger.warning("LLM response failed internal validation. Retrying in 5s...")