From 65d4ac7ce6cd2ee63865f10a9cb35c7f235737fe Mon Sep 17 00:00:00 2001 From: Vic <125237471+vicsanity623@users.noreply.github.com> Date: Mon, 9 Mar 2026 16:58:19 -0700 Subject: [PATCH 1/3] Update core_utils.py --- src/pyob/core_utils.py | 109 ++++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 44 deletions(-) diff --git a/src/pyob/core_utils.py b/src/pyob/core_utils.py index 12e2d75..0552887 100644 --- a/src/pyob/core_utils.py +++ b/src/pyob/core_utils.py @@ -342,6 +342,46 @@ def stream_ollama(self, prompt: str, on_chunk) -> str: except Exception as e: logger.error(f"Ollama Error: {e}") return response_text + + def stream_github_models(self, prompt: str, on_chunk) -> str: + """Fallback to GitHub Models API (Phi-4).""" + token = os.environ.get("GITHUB_TOKEN") + if not token: + return "ERROR_CODE_GITHUB_TOKEN_MISSING" + + # GitHub Models use the Azure AI Inference endpoint + endpoint = "https://models.inference.ai.azure.com/chat/completions" + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json" + } + data = { + "messages": [{"role": "user", "content": prompt}], + "model": "Phi-4", + "stream": True, + "temperature": 0.1 + } + + full_text = "" + try: + response = requests.post(endpoint, headers=headers, json=data, stream=True, timeout=120) + if response.status_code != 200: + return f"ERROR_CODE_{response.status_code}" + + for line in response.iter_lines(): + if line: + decoded_line = line.decode("utf-8").replace("data: ", "") + if decoded_line == "[DONE]": break + try: + chunk = json.loads(decoded_line) + content = chunk["choices"][0]["delta"].get("content", "") + if content: + full_text += content + on_chunk() + except: continue + return full_text + except Exception as e: + return f"ERROR_CODE_EXCEPTION: {e}" def _stream_single_llm( self, prompt: str, key: str | None = None, context: str = "" @@ -385,10 +425,10 @@ def on_chunk(): response_text = self.stream_gemini(prompt, key, on_chunk) else: if os.environ.get("GITHUB_ACTIONS") == "true": - first_chunk_received[0] = True - return "ERROR_CODE_CLOUD_OLLAMA_BLOCKED" - - response_text = self.stream_ollama(prompt, on_chunk) + logger.info("☁️ Gemini limited. Pivoting to GitHub Models (Phi-4)...") + response_text = self.stream_github_models(prompt, on_chunk) + else: + response_text = self.stream_ollama(prompt, on_chunk) except Exception as e: first_chunk_received[0] = True return f"ERROR_CODE_EXCEPTION: {e}" @@ -408,9 +448,7 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s use_ollama = False is_cloud = os.environ.get("GITHUB_ACTIONS") == "true" - logger.info( - f"📊 Engine check: Found {len(self.key_cooldowns)} Gemini API keys." - ) + logger.info(f"📊 Engine check: Found {len(self.key_cooldowns)} Gemini API keys.") while True: key = None @@ -418,54 +456,36 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s available_keys = [ k for k, cooldown in self.key_cooldowns.items() if now > cooldown ] + if not available_keys: if is_cloud: - wait_times = [ - cooldown - now for cooldown in self.key_cooldowns.values() - ] - sleep_duration = max( - 10, min(min(wait_times) if wait_times else 120, 1200) - ) - logger.warning( - f"⏳ CLOUD NOTICE: All keys rate-limited. Retrying Gemini in {int(sleep_duration)}s..." - ) - time.sleep(sleep_duration) - continue - - if not use_ollama: - logger.warning( - "🚫 All Gemini keys rate-limited. Falling back to Local Ollama." - ) - use_ollama = True + # In the cloud, we don't 'use_ollama', we just try the GitHub Models fallback + # which is handled inside _stream_single_llm(key=None) + use_ollama = False + else: + if not use_ollama: + logger.warning("🚫 Gemini keys limited. Falling back to Local Ollama.") + use_ollama = True else: use_ollama = False key = available_keys[attempts % len(available_keys)] - logger.info( - f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}" - ) - - if use_ollama: - logger.info("Using Local Ollama Engine...") + logger.info(f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}") response_text = self._stream_single_llm(prompt, key=key, context=context) - if is_cloud and ( - response_text.startswith("ERROR_CODE_") or not response_text.strip() - ): - if "429" in response_text and key: + # Handle errors/rate-limits + if response_text.startswith("ERROR_CODE_429"): + if key: self.key_cooldowns[key] = time.time() + 1200 - logger.warning("⚠️ Key rate-limited. Rotating...") - else: - logger.warning( - "⚠️ Gemini error/empty response. Sleeping 10s before retry..." - ) - time.sleep(10) attempts += 1 continue - if response_text.startswith("ERROR_CODE_429"): - if key: - self.key_cooldowns[key] = time.time() + 1200 + # If Gemini fails/returns empty in the cloud, perform Smart Sleep + if is_cloud and (response_text.startswith("ERROR_CODE_") or not response_text.strip()): + wait_times = [cooldown - now for cooldown in self.key_cooldowns.values()] + sleep_duration = max(10, min(min(wait_times) if wait_times else 60, 600)) + logger.warning(f"⏳ Cloud limit reached. Resuming in {int(sleep_duration)}s...") + time.sleep(sleep_duration) attempts += 1 continue @@ -474,8 +494,9 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s continue if validator(response_text): + # --- SUCCESS BREATHER --- if is_cloud: - time.sleep(2) + time.sleep(2) return response_text else: logger.warning("LLM response failed validation. Retrying...") From 3c3cf2aa1529065cc91e443ce160506a665a5ebd Mon Sep 17 00:00:00 2001 From: Vic <125237471+vicsanity623@users.noreply.github.com> Date: Mon, 9 Mar 2026 17:07:25 -0700 Subject: [PATCH 2/3] Update core_utils.py --- src/pyob/core_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pyob/core_utils.py b/src/pyob/core_utils.py index 0552887..57e0963 100644 --- a/src/pyob/core_utils.py +++ b/src/pyob/core_utils.py @@ -371,14 +371,16 @@ def stream_github_models(self, prompt: str, on_chunk) -> str: for line in response.iter_lines(): if line: decoded_line = line.decode("utf-8").replace("data: ", "") - if decoded_line == "[DONE]": break + if decoded_line == "[DONE]": + break try: chunk = json.loads(decoded_line) content = chunk["choices"][0]["delta"].get("content", "") if content: full_text += content on_chunk() - except: continue + except Exception: + continue return full_text except Exception as e: return f"ERROR_CODE_EXCEPTION: {e}" From dbf0639bc166325ff3da334c51842bf32aa3d2d9 Mon Sep 17 00:00:00 2001 From: vicsanity623 <125237471+vicsanity623@users.noreply.github.com> Date: Tue, 10 Mar 2026 00:07:49 +0000 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=AA=84=20PyOB:=20Automated=20Lint=20&?= =?UTF-8?q?=20Format=20Fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/pyob/core_utils.py | 50 ++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/src/pyob/core_utils.py b/src/pyob/core_utils.py index 57e0963..9939b2b 100644 --- a/src/pyob/core_utils.py +++ b/src/pyob/core_utils.py @@ -342,7 +342,7 @@ def stream_ollama(self, prompt: str, on_chunk) -> str: except Exception as e: logger.error(f"Ollama Error: {e}") return response_text - + def stream_github_models(self, prompt: str, on_chunk) -> str: """Fallback to GitHub Models API (Phi-4).""" token = os.environ.get("GITHUB_TOKEN") @@ -353,18 +353,20 @@ def stream_github_models(self, prompt: str, on_chunk) -> str: endpoint = "https://models.inference.ai.azure.com/chat/completions" headers = { "Authorization": f"Bearer {token}", - "Content-Type": "application/json" + "Content-Type": "application/json", } data = { "messages": [{"role": "user", "content": prompt}], "model": "Phi-4", "stream": True, - "temperature": 0.1 + "temperature": 0.1, } full_text = "" try: - response = requests.post(endpoint, headers=headers, json=data, stream=True, timeout=120) + response = requests.post( + endpoint, headers=headers, json=data, stream=True, timeout=120 + ) if response.status_code != 200: return f"ERROR_CODE_{response.status_code}" @@ -378,7 +380,7 @@ def stream_github_models(self, prompt: str, on_chunk) -> str: content = chunk["choices"][0]["delta"].get("content", "") if content: full_text += content - on_chunk() + on_chunk() except Exception: continue return full_text @@ -427,7 +429,9 @@ def on_chunk(): response_text = self.stream_gemini(prompt, key, on_chunk) else: if os.environ.get("GITHUB_ACTIONS") == "true": - logger.info("☁️ Gemini limited. Pivoting to GitHub Models (Phi-4)...") + logger.info( + "☁️ Gemini limited. Pivoting to GitHub Models (Phi-4)..." + ) response_text = self.stream_github_models(prompt, on_chunk) else: response_text = self.stream_ollama(prompt, on_chunk) @@ -450,7 +454,9 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s use_ollama = False is_cloud = os.environ.get("GITHUB_ACTIONS") == "true" - logger.info(f"📊 Engine check: Found {len(self.key_cooldowns)} Gemini API keys.") + logger.info( + f"📊 Engine check: Found {len(self.key_cooldowns)} Gemini API keys." + ) while True: key = None @@ -458,20 +464,24 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s available_keys = [ k for k, cooldown in self.key_cooldowns.items() if now > cooldown ] - + if not available_keys: if is_cloud: # In the cloud, we don't 'use_ollama', we just try the GitHub Models fallback # which is handled inside _stream_single_llm(key=None) - use_ollama = False + use_ollama = False else: if not use_ollama: - logger.warning("🚫 Gemini keys limited. Falling back to Local Ollama.") + logger.warning( + "🚫 Gemini keys limited. Falling back to Local Ollama." + ) use_ollama = True else: use_ollama = False key = available_keys[attempts % len(available_keys)] - logger.info(f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}") + logger.info( + f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}" + ) response_text = self._stream_single_llm(prompt, key=key, context=context) @@ -483,10 +493,18 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s continue # If Gemini fails/returns empty in the cloud, perform Smart Sleep - if is_cloud and (response_text.startswith("ERROR_CODE_") or not response_text.strip()): - wait_times = [cooldown - now for cooldown in self.key_cooldowns.values()] - sleep_duration = max(10, min(min(wait_times) if wait_times else 60, 600)) - logger.warning(f"⏳ Cloud limit reached. Resuming in {int(sleep_duration)}s...") + if is_cloud and ( + response_text.startswith("ERROR_CODE_") or not response_text.strip() + ): + wait_times = [ + cooldown - now for cooldown in self.key_cooldowns.values() + ] + sleep_duration = max( + 10, min(min(wait_times) if wait_times else 60, 600) + ) + logger.warning( + f"⏳ Cloud limit reached. Resuming in {int(sleep_duration)}s..." + ) time.sleep(sleep_duration) attempts += 1 continue @@ -498,7 +516,7 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s if validator(response_text): # --- SUCCESS BREATHER --- if is_cloud: - time.sleep(2) + time.sleep(2) return response_text else: logger.warning("LLM response failed validation. Retrying...")