Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 25 additions & 46 deletions src/pyob/core_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ def _stream_single_llm(
input_tokens = len(prompt) // 4
first_chunk_received = [False]
gen_start_time = time.time()
is_cloud = os.environ.get("GITHUB_ACTIONS") == "true"

def spinner():
spinner_chars = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
Expand Down Expand Up @@ -420,17 +421,18 @@ def on_chunk():
sys.stdout.write("\r\033[K")
sys.stdout.flush()
source = f"Gemini ...{key[-4:]}" if key else "GitHub Models"
if not key and not is_cloud:
source = "Local Ollama"
print(f"🤖 AI Output ({source}): ", end="", flush=True)

response_text = ""
try:
if key is not None:
response_text = self.stream_gemini(prompt, key, on_chunk)
elif os.environ.get("GITHUB_ACTIONS") == "true":
# In cloud, 'None' key always means use GitHub Models
elif is_cloud:
# Force GitHub Models in cloud, skip Ollama entirely
response_text = self.stream_github_models(prompt, on_chunk)
else:
# Only iMac uses Ollama
response_text = self.stream_ollama(prompt, on_chunk)
except Exception as e:
first_chunk_received[0] = True
Expand All @@ -448,82 +450,59 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s
attempts = 0
is_cloud = os.environ.get("GITHUB_ACTIONS") == "true"

logger.info(
f"📊 Engine check: Found {len(self.key_cooldowns)} Gemini API keys."
)

while True:
key = None
now = time.time()
available_keys = [
k for k, cooldown in self.key_cooldowns.items() if now > cooldown
]
available_keys = [k for k, cd in self.key_cooldowns.items() if now > cd]

# --- 1. ENGINE SELECTION LOGIC ---
# 1. Select Engine
if available_keys:
# Use Gemini (Primary)
key = available_keys[attempts % len(available_keys)]
logger.info(
f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}"
)
response_text = self._stream_single_llm(
prompt, key=key, context=context
)

elif is_cloud:
# ALL GEMINI KEYS LIMITED -> TRY GITHUB MODELS (Secondary)
logger.warning(
"⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)..."
)
response_text = self._stream_single_llm(
prompt, key=None, context=context
)

# If GitHub Models ALSO fails or returns an error
if not response_text or response_text.startswith("ERROR_CODE_"):
logger.warning(
"🚫 All Cloud AI engines exhausted. Sleeping 5 minutes for cooldown..."
)
time.sleep(300)
continue

# key remains None, which triggers stream_github_models in _stream_single_llm
else:
# LOCAL IMAC -> FALLBACK TO OLLAMA
logger.info("🏠 Using Local Ollama Engine...")
response_text = self._stream_single_llm(
prompt, key=None, context=context
)

# --- 2. RESPONSE VALIDATION & ROTATION ---
response_text = self._stream_single_llm(prompt, key=key, context=context)

# Handle standard Gemini Rate Limit (429)
# 2. Handle Rate Limits (429)
if response_text.startswith("ERROR_CODE_429"):
if key:
self.key_cooldowns[key] = time.time() + 1200
logger.warning(f"⚠️ Key {key[-4:]} rate-limited. Rotating...")
else:
# If GitHub Models is also rate-limited
logger.warning(
"🚫 All cloud engines limited. Sleeping 5 minutes..."
)
time.sleep(300)
attempts += 1
continue

# Handle Empty or Generic Error Responses
# 3. Handle Empty or Error Responses (STOPS THE INFINITE LOOP)
if not response_text or response_text.startswith("ERROR_CODE_"):
logger.warning(
f"⚠️ LLM Error detected ({response_text[:20]}...). Retrying in 10s..."
"⚠️ API Error/Empty Response. Sleeping 10s before retry..."
)
time.sleep(10)
time.sleep(10) # MANDATORY SLEEP to prevent tight-looping
attempts += 1
continue

# Check if the AI's content matches our XML/Format rules
# 4. Final Validation
if validator(response_text):
# SUCCESS BREATHER: Stay under RPM limits
if is_cloud:
time.sleep(2)
time.sleep(2) # Success breather
return response_text
else:
logger.warning("LLM response failed internal validation. Retrying...")
if is_cloud:
time.sleep(5)
attempts += 1

logger.warning("LLM response failed internal validation. Retrying in 5s...")
time.sleep(5)
attempts += 1

def _get_user_prompt_augmentation(self, initial_text: str = "") -> str:
import tempfile
Expand Down