Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 48 additions & 47 deletions src/pyob/core_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,6 @@ def spinner():
elapsed = time.time() - gen_start_time
expected_time = max(1, input_tokens / 12.0)
progress = min(1.0, elapsed / expected_time)

bar_len = max(10, cols - 65)
filled = int(progress * bar_len)
bar = "█" * filled + "░" * (bar_len - filled)
Expand All @@ -420,28 +419,24 @@ def on_chunk():
first_chunk_received[0] = True
sys.stdout.write("\r\033[K")
sys.stdout.flush()
source = f"Gemini ...{key[-4:]}" if key else "Local Ollama"
source = f"Gemini ...{key[-4:]}" if key else "GitHub Models"
print(f"🤖 AI Output ({source}): ", end="", flush=True)

response_text = ""
try:
if key is not None:
response_text = self.stream_gemini(prompt, key, on_chunk)
elif os.environ.get("GITHUB_ACTIONS") == "true":
# In cloud, 'None' key always means use GitHub Models
response_text = self.stream_github_models(prompt, on_chunk)
else:
if os.environ.get("GITHUB_ACTIONS") == "true":
logger.info(
"☁️ Gemini limited. Pivoting to GitHub Models (Phi-4)..."
)
response_text = self.stream_github_models(prompt, on_chunk)
else:
response_text = self.stream_ollama(prompt, on_chunk)
# Only iMac uses Ollama
response_text = self.stream_ollama(prompt, on_chunk)
except Exception as e:
first_chunk_received[0] = True
return f"ERROR_CODE_EXCEPTION: {e}"

if not first_chunk_received[0]:
first_chunk_received[0] = True

first_chunk_received[0] = True
final_time = time.time() - gen_start_time
if response_text and not response_text.startswith("ERROR_CODE_"):
print(
Expand All @@ -451,7 +446,6 @@ def on_chunk():

def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> str:
attempts = 0
use_ollama = False
is_cloud = os.environ.get("GITHUB_ACTIONS") == "true"

logger.info(
Expand All @@ -465,64 +459,71 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s
k for k, cooldown in self.key_cooldowns.items() if now > cooldown
]

if not available_keys:
if is_cloud:
# In the cloud, we don't 'use_ollama', we just try the GitHub Models fallback
# which is handled inside _stream_single_llm(key=None)
use_ollama = False
else:
if not use_ollama:
logger.warning(
"🚫 Gemini keys limited. Falling back to Local Ollama."
)
use_ollama = True
else:
use_ollama = False
# --- 1. ENGINE SELECTION LOGIC ---
if available_keys:
# Use Gemini (Primary)
key = available_keys[attempts % len(available_keys)]
logger.info(
f"Attempting Gemini API Key {attempts % len(available_keys) + 1}/{len(available_keys)}"
)
response_text = self._stream_single_llm(
prompt, key=key, context=context
)

response_text = self._stream_single_llm(prompt, key=key, context=context)
elif is_cloud:
# ALL GEMINI KEYS LIMITED -> TRY GITHUB MODELS (Secondary)
logger.warning(
"⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)..."
)
response_text = self._stream_single_llm(
prompt, key=None, context=context
)

# Handle errors/rate-limits
# If GitHub Models ALSO fails or returns an error
if not response_text or response_text.startswith("ERROR_CODE_"):
logger.warning(
"🚫 All Cloud AI engines exhausted. Sleeping 5 minutes for cooldown..."
)
time.sleep(300)
continue

else:
# LOCAL IMAC -> FALLBACK TO OLLAMA
logger.info("🏠 Using Local Ollama Engine...")
response_text = self._stream_single_llm(
prompt, key=None, context=context
)

# --- 2. RESPONSE VALIDATION & ROTATION ---

# Handle standard Gemini Rate Limit (429)
if response_text.startswith("ERROR_CODE_429"):
if key:
self.key_cooldowns[key] = time.time() + 1200
logger.warning(f"⚠️ Key {key[-4:]} rate-limited. Rotating...")
attempts += 1
continue

# If Gemini fails/returns empty in the cloud, perform Smart Sleep
if is_cloud and (
response_text.startswith("ERROR_CODE_") or not response_text.strip()
):
wait_times = [
cooldown - now for cooldown in self.key_cooldowns.values()
]
sleep_duration = max(
10, min(min(wait_times) if wait_times else 60, 600)
)
# Handle Empty or Generic Error Responses
if not response_text or response_text.startswith("ERROR_CODE_"):
logger.warning(
f"⏳ Cloud limit reached. Resuming in {int(sleep_duration)}s..."
f"⚠️ LLM Error detected ({response_text[:20]}...). Retrying in 10s..."
)
time.sleep(sleep_duration)
attempts += 1
continue

if response_text.startswith("ERROR_CODE_") or not response_text.strip():
time.sleep(10)
attempts += 1
continue

# Check if the AI's content matches our XML/Format rules
if validator(response_text):
# --- SUCCESS BREATHER ---
# SUCCESS BREATHER: Stay under RPM limits
if is_cloud:
time.sleep(2)
return response_text
else:
logger.warning("LLM response failed validation. Retrying...")
attempts += 1
logger.warning("LLM response failed internal validation. Retrying...")
if is_cloud:
time.sleep(5)
attempts += 1

def _get_user_prompt_augmentation(self, initial_text: str = "") -> str:
import tempfile
Expand Down