From 4ad58ab9a13297cdf616433bf3f9f9ac57e9c8fc Mon Sep 17 00:00:00 2001
From: Serhii Sokolenko <sergei.sokolenko@gmail.com>
Date: Sun, 3 Aug 2025 22:00:46 +0200
Subject: [PATCH 1/2] Adding docstring and max_tokens param to llms()

---
 src/tower/_llms.py | 76 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 74 insertions(+), 2 deletions(-)

diff --git a/src/tower/_llms.py b/src/tower/_llms.py
index f331324d..e1a64ddb 100644
--- a/src/tower/_llms.py
+++ b/src/tower/_llms.py
@@ -384,6 +384,43 @@ def resolve_hugging_face_hub_model_name(ctx: TowerContext, requested_model: str)
 
 
 class Llm:
+    """
+    This class provides a unified interface for interacting with language models through
+    different inference providers (e.g. Ollama for local inference, Hugging Face Hub for remote).
+    It abstracts away model name resolution, inference provider selection, and local/remote inference API differences 
+    to provide a consistent interface for text generation tasks.
+    
+    The class supports both chat-based interactions (similar to OpenAI Chat Completions API)
+    and simple prompt-based interactions (similar to legacy OpenAI Completions API).
+
+    This class is typically instantiated through the llms() factory function rather than
+    directly.
+    
+    Attributes:
+        context (TowerContext): The Tower context containing configuration and settings.
+        requested_model_name (str): The original model name requested by the user.
+        model_name (str): The resolved model name after provider-specific resolution.
+        max_tokens (int): Maximum number of tokens to generate in responses.
+        inference_router (str): The inference router to use (e.g., "ollama", "hugging_face_hub").
+        inference_provider (str): The inference provider (same as router when in local mode).
+        inference_router_api_key (str): API key for the inference router if required.
+    
+    Example:
+        # Create an Llm instance (typically done via the llms() factory function)
+        llm = tower.llms("llama3.2", max_tokens=1000)
+        
+        # Use for chat completions
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Hello!"}
+        ]
+        response = llm.complete_chat(messages)
+        
+        # Use for simple prompts
+        response = llm.prompt("What is the capital of France?")
+    
+    """
+    
     def __init__(self, context: TowerContext, model_name: str, max_tokens: int = 1000):
         """
         Wraps up interfacing with a language model in the Tower system.
@@ -475,11 +512,46 @@ def prompt(self, prompt: str) -> str:
             "content": prompt,
         }])
 
-def llms(model_name: str) -> Llm:
+def llms(model_name: str, max_tokens: int = 1000) -> Llm:
+    """
+   
+    This factory function creates an Llm instance configured with the specified model parameters. 
+    It automatically resolves the model name based on the available inference providers 
+    (Ollama for local inference, Hugging Face Hub for remote).
+    The max_tokens parameter is used to set the maximum number of tokens to generate in responses.
+    
+    Args:
+        model_name: Can be a model family name (e.g., "llama3.2", "gemma3.2", "deepseek-r1") 
+                    or a specific model identifier (e.g., "deepseek-r1:14b", "deepseek-ai/DeepSeek-R1-0528").
+                    The function will automatically resolve the exact model name based on
+                    available models in the configured inference provider.
+        max_tokens: Maximum number of tokens to generate in responses. Defaults to 1000.
+                   
+    Returns:
+        Llm: A configured language model instance that can be used for text generation,
+             chat completions, and other language model interactions.
+             
+    Raises:
+        ValueError: If the configured inference router is not supported or if the model
+                   cannot be resolved.
+                   
+    Example:
+        # Create a language model instance
+        llm = llms("llama3.2", max_tokens=500)
+        
+        # Use for chat completions
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Hello!"}
+        ]
+        response = llm.complete_chat(messages)
+        
+    """
     ctx = TowerContext.build()
     return Llm(
         context = ctx,
-        model_name=model_name
+        model_name=model_name,
+        max_tokens=max_tokens
     )
 
 def extract_ollama_message(resp: ChatResponse) -> str:

From 2a1da201f901e45dc0db255c1143e727a5f219eb Mon Sep 17 00:00:00 2001
From: Serhii Sokolenko <sergei.sokolenko@gmail.com>
Date: Sun, 3 Aug 2025 22:03:56 +0200
Subject: [PATCH 2/2] Update src/tower/_llms.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/tower/_llms.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tower/_llms.py b/src/tower/_llms.py
index e1a64ddb..da8c7bea 100644
--- a/src/tower/_llms.py
+++ b/src/tower/_llms.py
@@ -514,7 +514,6 @@ def prompt(self, prompt: str) -> str:
 
 def llms(model_name: str, max_tokens: int = 1000) -> Llm:
     """
-   
     This factory function creates an Llm instance configured with the specified model parameters. 
     It automatically resolves the model name based on the available inference providers 
     (Ollama for local inference, Hugging Face Hub for remote).