From ca0bfc9e7c241452fdbd7de9a781e3e4b154e9ad Mon Sep 17 00:00:00 2001 From: Vic <125237471+vicsanity623@users.noreply.github.com> Date: Mon, 9 Mar 2026 17:27:11 -0700 Subject: [PATCH 1/2] Update core_utils.py --- src/pyob/core_utils.py | 103 +++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 60 deletions(-) diff --git a/src/pyob/core_utils.py b/src/pyob/core_utils.py index 9939b2b..e5c54f1 100644 --- a/src/pyob/core_utils.py +++ b/src/pyob/core_utils.py @@ -402,7 +402,6 @@ def spinner(): elapsed = time.time() - gen_start_time expected_time = max(1, input_tokens / 12.0) progress = min(1.0, elapsed / expected_time) - bar_len = max(10, cols - 65) filled = int(progress * bar_len) bar = "█" * filled + "░" * (bar_len - filled) @@ -420,43 +419,34 @@ def on_chunk(): first_chunk_received[0] = True sys.stdout.write("\r\033[K") sys.stdout.flush() - source = f"Gemini ...{key[-4:]}" if key else "Local Ollama" + source = f"Gemini ...{key[-4:]}" if key else "GitHub Models" print(f"🤖 AI Output ({source}): ", end="", flush=True) response_text = "" try: if key is not None: response_text = self.stream_gemini(prompt, key, on_chunk) + elif os.environ.get("GITHUB_ACTIONS") == "true": + # In cloud, 'None' key always means use GitHub Models + response_text = self.stream_github_models(prompt, on_chunk) else: - if os.environ.get("GITHUB_ACTIONS") == "true": - logger.info( - "☁️ Gemini limited. Pivoting to GitHub Models (Phi-4)..." - ) - response_text = self.stream_github_models(prompt, on_chunk) - else: - response_text = self.stream_ollama(prompt, on_chunk) + # Only iMac uses Ollama + response_text = self.stream_ollama(prompt, on_chunk) except Exception as e: first_chunk_received[0] = True return f"ERROR_CODE_EXCEPTION: {e}" - if not first_chunk_received[0]: - first_chunk_received[0] = True - + first_chunk_received[0] = True final_time = time.time() - gen_start_time if response_text and not response_text.startswith("ERROR_CODE_"): - print( - f"\n\n[✅ Generation Complete: ~{len(response_text) // 4} tokens in {final_time:.1f}s]" - ) + print(f"\n\n[✅ Generation Complete: ~{len(response_text) // 4} tokens in {final_time:.1f}s]") return response_text def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> str: attempts = 0 - use_ollama = False is_cloud = os.environ.get("GITHUB_ACTIONS") == "true" - logger.info( - f"📊 Engine check: Found {len(self.key_cooldowns)} Gemini API keys." - ) + logger.info(f"📊 Engine check: Found {len(self.key_cooldowns)} Gemini API keys.") while True: key = None @@ -464,65 +454,58 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s available_keys = [ k for k, cooldown in self.key_cooldowns.items() if now > cooldown ] - - if not available_keys: - if is_cloud: - # In the cloud, we don't 'use_ollama', we just try the GitHub Models fallback - # which is handled inside _stream_single_llm(key=None) - use_ollama = False - else: - if not use_ollama: - logger.warning( - "🚫 Gemini keys limited. Falling back to Local Ollama." - ) - use_ollama = True - else: - use_ollama = False + + # --- 1. ENGINE SELECTION LOGIC --- + if available_keys: + # Use Gemini (Primary) key = available_keys[attempts % len(available_keys)] - logger.info( - f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}" - ) - - response_text = self._stream_single_llm(prompt, key=key, context=context) + logger.info(f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}") + response_text = self._stream_single_llm(prompt, key=key, context=context) + + elif is_cloud: + # ALL GEMINI KEYS LIMITED -> TRY GITHUB MODELS (Secondary) + logger.warning("⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)...") + response_text = self._stream_single_llm(prompt, key=None, context=context) + + # If GitHub Models ALSO fails or returns an error + if not response_text or response_text.startswith("ERROR_CODE_"): + logger.warning("🚫 All Cloud AI engines exhausted. Sleeping 5 minutes for cooldown...") + time.sleep(300) + continue + + else: + # LOCAL IMAC -> FALLBACK TO OLLAMA + logger.info("🏠 Using Local Ollama Engine...") + response_text = self._stream_single_llm(prompt, key=None, context=context) - # Handle errors/rate-limits + # --- 2. RESPONSE VALIDATION & ROTATION --- + + # Handle standard Gemini Rate Limit (429) if response_text.startswith("ERROR_CODE_429"): if key: self.key_cooldowns[key] = time.time() + 1200 + logger.warning(f"⚠️ Key {key[-4:]} rate-limited. Rotating...") attempts += 1 continue - # If Gemini fails/returns empty in the cloud, perform Smart Sleep - if is_cloud and ( - response_text.startswith("ERROR_CODE_") or not response_text.strip() - ): - wait_times = [ - cooldown - now for cooldown in self.key_cooldowns.values() - ] - sleep_duration = max( - 10, min(min(wait_times) if wait_times else 60, 600) - ) - logger.warning( - f"⏳ Cloud limit reached. Resuming in {int(sleep_duration)}s..." - ) - time.sleep(sleep_duration) - attempts += 1 - continue - - if response_text.startswith("ERROR_CODE_") or not response_text.strip(): + # Handle Empty or Generic Error Responses + if not response_text or response_text.startswith("ERROR_CODE_"): + logger.warning(f"⚠️ LLM Error detected ({response_text[:20]}...). Retrying in 10s...") + time.sleep(10) attempts += 1 continue + # Check if the AI's content matches our XML/Format rules if validator(response_text): - # --- SUCCESS BREATHER --- + # SUCCESS BREATHER: Stay under RPM limits if is_cloud: time.sleep(2) return response_text else: - logger.warning("LLM response failed validation. Retrying...") - attempts += 1 + logger.warning("LLM response failed internal validation. Retrying...") if is_cloud: time.sleep(5) + attempts += 1 def _get_user_prompt_augmentation(self, initial_text: str = "") -> str: import tempfile From 06698a6b293966656608e1e62d69511753822119 Mon Sep 17 00:00:00 2001 From: vicsanity623 <125237471+vicsanity623@users.noreply.github.com> Date: Tue, 10 Mar 2026 00:27:49 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=AA=84=20PyOB:=20Automated=20Lint=20&?= =?UTF-8?q?=20Format=20Fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/pyob/core_utils.py | 46 +++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/src/pyob/core_utils.py b/src/pyob/core_utils.py index e5c54f1..5e75be2 100644 --- a/src/pyob/core_utils.py +++ b/src/pyob/core_utils.py @@ -439,14 +439,18 @@ def on_chunk(): first_chunk_received[0] = True final_time = time.time() - gen_start_time if response_text and not response_text.startswith("ERROR_CODE_"): - print(f"\n\n[✅ Generation Complete: ~{len(response_text) // 4} tokens in {final_time:.1f}s]") + print( + f"\n\n[✅ Generation Complete: ~{len(response_text) // 4} tokens in {final_time:.1f}s]" + ) return response_text def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> str: attempts = 0 is_cloud = os.environ.get("GITHUB_ACTIONS") == "true" - logger.info(f"📊 Engine check: Found {len(self.key_cooldowns)} Gemini API keys.") + logger.info( + f"📊 Engine check: Found {len(self.key_cooldowns)} Gemini API keys." + ) while True: key = None @@ -454,32 +458,44 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s available_keys = [ k for k, cooldown in self.key_cooldowns.items() if now > cooldown ] - + # --- 1. ENGINE SELECTION LOGIC --- if available_keys: # Use Gemini (Primary) key = available_keys[attempts % len(available_keys)] - logger.info(f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}") - response_text = self._stream_single_llm(prompt, key=key, context=context) - + logger.info( + f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}" + ) + response_text = self._stream_single_llm( + prompt, key=key, context=context + ) + elif is_cloud: # ALL GEMINI KEYS LIMITED -> TRY GITHUB MODELS (Secondary) - logger.warning("⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)...") - response_text = self._stream_single_llm(prompt, key=None, context=context) - + logger.warning( + "⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)..." + ) + response_text = self._stream_single_llm( + prompt, key=None, context=context + ) + # If GitHub Models ALSO fails or returns an error if not response_text or response_text.startswith("ERROR_CODE_"): - logger.warning("🚫 All Cloud AI engines exhausted. Sleeping 5 minutes for cooldown...") + logger.warning( + "🚫 All Cloud AI engines exhausted. Sleeping 5 minutes for cooldown..." + ) time.sleep(300) continue - + else: # LOCAL IMAC -> FALLBACK TO OLLAMA logger.info("🏠 Using Local Ollama Engine...") - response_text = self._stream_single_llm(prompt, key=None, context=context) + response_text = self._stream_single_llm( + prompt, key=None, context=context + ) # --- 2. RESPONSE VALIDATION & ROTATION --- - + # Handle standard Gemini Rate Limit (429) if response_text.startswith("ERROR_CODE_429"): if key: @@ -490,7 +506,9 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s # Handle Empty or Generic Error Responses if not response_text or response_text.startswith("ERROR_CODE_"): - logger.warning(f"⚠️ LLM Error detected ({response_text[:20]}...). Retrying in 10s...") + logger.warning( + f"⚠️ LLM Error detected ({response_text[:20]}...). Retrying in 10s..." + ) time.sleep(10) attempts += 1 continue