diff --git a/src/pyob/core_utils.py b/src/pyob/core_utils.py index 5e75be2..b2581b0 100644 --- a/src/pyob/core_utils.py +++ b/src/pyob/core_utils.py @@ -393,6 +393,7 @@ def _stream_single_llm( input_tokens = len(prompt) // 4 first_chunk_received = [False] gen_start_time = time.time() + is_cloud = os.environ.get("GITHUB_ACTIONS") == "true" def spinner(): spinner_chars = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"] @@ -420,17 +421,18 @@ def on_chunk(): sys.stdout.write("\r\033[K") sys.stdout.flush() source = f"Gemini ...{key[-4:]}" if key else "GitHub Models" + if not key and not is_cloud: + source = "Local Ollama" print(f"🤖 AI Output ({source}): ", end="", flush=True) response_text = "" try: if key is not None: response_text = self.stream_gemini(prompt, key, on_chunk) - elif os.environ.get("GITHUB_ACTIONS") == "true": - # In cloud, 'None' key always means use GitHub Models + elif is_cloud: + # Force GitHub Models in cloud, skip Ollama entirely response_text = self.stream_github_models(prompt, on_chunk) else: - # Only iMac uses Ollama response_text = self.stream_ollama(prompt, on_chunk) except Exception as e: first_chunk_received[0] = True @@ -448,82 +450,59 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s attempts = 0 is_cloud = os.environ.get("GITHUB_ACTIONS") == "true" - logger.info( - f"📊 Engine check: Found {len(self.key_cooldowns)} Gemini API keys." - ) - while True: key = None now = time.time() - available_keys = [ - k for k, cooldown in self.key_cooldowns.items() if now > cooldown - ] + available_keys = [k for k, cd in self.key_cooldowns.items() if now > cd] - # --- 1. ENGINE SELECTION LOGIC --- + # 1. Select Engine if available_keys: - # Use Gemini (Primary) key = available_keys[attempts % len(available_keys)] logger.info( f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}" ) - response_text = self._stream_single_llm( - prompt, key=key, context=context - ) - elif is_cloud: - # ALL GEMINI KEYS LIMITED -> TRY GITHUB MODELS (Secondary) logger.warning( "⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)..." ) - response_text = self._stream_single_llm( - prompt, key=None, context=context - ) - - # If GitHub Models ALSO fails or returns an error - if not response_text or response_text.startswith("ERROR_CODE_"): - logger.warning( - "🚫 All Cloud AI engines exhausted. Sleeping 5 minutes for cooldown..." - ) - time.sleep(300) - continue - + # key remains None, which triggers stream_github_models in _stream_single_llm else: - # LOCAL IMAC -> FALLBACK TO OLLAMA logger.info("🏠 Using Local Ollama Engine...") - response_text = self._stream_single_llm( - prompt, key=None, context=context - ) - # --- 2. RESPONSE VALIDATION & ROTATION --- + response_text = self._stream_single_llm(prompt, key=key, context=context) - # Handle standard Gemini Rate Limit (429) + # 2. Handle Rate Limits (429) if response_text.startswith("ERROR_CODE_429"): if key: self.key_cooldowns[key] = time.time() + 1200 logger.warning(f"⚠️ Key {key[-4:]} rate-limited. Rotating...") + else: + # If GitHub Models is also rate-limited + logger.warning( + "🚫 All cloud engines limited. Sleeping 5 minutes..." + ) + time.sleep(300) attempts += 1 continue - # Handle Empty or Generic Error Responses + # 3. Handle Empty or Error Responses (STOPS THE INFINITE LOOP) if not response_text or response_text.startswith("ERROR_CODE_"): logger.warning( - f"⚠️ LLM Error detected ({response_text[:20]}...). Retrying in 10s..." + "⚠️ API Error/Empty Response. Sleeping 10s before retry..." ) - time.sleep(10) + time.sleep(10) # MANDATORY SLEEP to prevent tight-looping attempts += 1 continue - # Check if the AI's content matches our XML/Format rules + # 4. Final Validation if validator(response_text): - # SUCCESS BREATHER: Stay under RPM limits if is_cloud: - time.sleep(2) + time.sleep(2) # Success breather return response_text - else: - logger.warning("LLM response failed internal validation. Retrying...") - if is_cloud: - time.sleep(5) - attempts += 1 + + logger.warning("LLM response failed internal validation. Retrying in 5s...") + time.sleep(5) + attempts += 1 def _get_user_prompt_augmentation(self, initial_text: str = "") -> str: import tempfile