Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ resolver = "2"

[workspace.package]
edition = "2021"
version = "0.3.24"
version = "0.3.25"




Expand Down
7 changes: 7 additions & 0 deletions crates/tower-runtime/src/local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,13 @@ async fn execute_local_app(opts: StartOptions, sx: oneshot::Sender<i32>, cancel_
return Ok(())
}

impl Drop for LocalApp {
fn drop(&mut self) {
// We want to ensure that we cancel the process if it is still running.
let _ = self.terminate();
}
}

impl App for LocalApp {
async fn start(opts: StartOptions) -> Result<Self, Error> {
let cancel_token = CancellationToken::new();
Expand Down
4 changes: 4 additions & 0 deletions crates/tower-uv/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ impl Uv {
debug!("Executing UV ({:?}) venv in {:?}", &self.uv_path, cwd);

let child = Command::new(&self.uv_path)
.kill_on_drop(true)
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
Expand All @@ -95,6 +96,7 @@ impl Uv {
if cwd.join("pyproject.toml").exists() {
debug!("Executing UV ({:?}) sync in {:?}", &self.uv_path, cwd);
let child = Command::new(&self.uv_path)
.kill_on_drop(true)
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
Expand All @@ -112,6 +114,7 @@ impl Uv {

// If there is a requirements.txt, then we can use that to sync.
let child = Command::new(&self.uv_path)
.kill_on_drop(true)
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
Expand All @@ -138,6 +141,7 @@ impl Uv {
debug!("Executing UV ({:?}) run {:?} in {:?}", &self.uv_path, program, cwd);

let child = Command::new(&self.uv_path)
.kill_on_drop(true)
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
Expand Down
14 changes: 8 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ build-backend = "maturin"

[project]
name = "tower"
version = "0.3.24"
version = "0.3.25"




Expand Down Expand Up @@ -43,14 +44,15 @@ dependencies = [
"attrs==24.2.0",
"httpx==0.28.1",
"huggingface-hub>=0.34.3",
"ollama>=0.4.7",
"pyiceberg==0.9.0",
"ollama>=0.5.3",
"pydantic-core==2.27.0",
"pyiceberg==0.9.1",
"python-dateutil==2.9.0.post0",
]

[project.optional-dependencies]
ai = ["huggingface-hub==0.34.3", "ollama==0.4.7"]
iceberg = ["polars==1.27.1", "pyarrow==19.0.1", "pyiceberg==0.9.0"]
ai = ["huggingface-hub==0.34.3", "ollama==0.5.3"]
iceberg = ["polars==1.27.1", "pyarrow==19.0.1", "pyiceberg==0.9.1"]
all = ["tower[ai,iceberg]"]

[tool.maturin]
Expand All @@ -71,5 +73,5 @@ dev = [
"pytest==8.3.5",
"pytest-httpx==0.35.0",
"pytest-env>=1.1.3",
"pyiceberg[sql-sqlite]==0.9.0",
"pyiceberg[sql-sqlite]==0.9.1",
]
75 changes: 73 additions & 2 deletions src/tower/_llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,43 @@ def resolve_hugging_face_hub_model_name(ctx: TowerContext, requested_model: str)


class Llm:
"""
This class provides a unified interface for interacting with language models through
different inference providers (e.g. Ollama for local inference, Hugging Face Hub for remote).
It abstracts away model name resolution, inference provider selection, and local/remote inference API differences
to provide a consistent interface for text generation tasks.

The class supports both chat-based interactions (similar to OpenAI Chat Completions API)
and simple prompt-based interactions (similar to legacy OpenAI Completions API).

This class is typically instantiated through the llms() factory function rather than
directly.

Attributes:
context (TowerContext): The Tower context containing configuration and settings.
requested_model_name (str): The original model name requested by the user.
model_name (str): The resolved model name after provider-specific resolution.
max_tokens (int): Maximum number of tokens to generate in responses.
inference_router (str): The inference router to use (e.g., "ollama", "hugging_face_hub").
inference_provider (str): The inference provider (same as router when in local mode).
inference_router_api_key (str): API key for the inference router if required.

Example:
# Create an Llm instance (typically done via the llms() factory function)
llm = tower.llms("llama3.2", max_tokens=1000)

# Use for chat completions
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"}
]
response = llm.complete_chat(messages)

# Use for simple prompts
response = llm.prompt("What is the capital of France?")

"""

def __init__(self, context: TowerContext, model_name: str, max_tokens: int = 1000):
"""
Wraps up interfacing with a language model in the Tower system.
Expand Down Expand Up @@ -475,11 +512,45 @@ def prompt(self, prompt: str) -> str:
"content": prompt,
}])

def llms(model_name: str) -> Llm:
def llms(model_name: str, max_tokens: int = 1000) -> Llm:
"""
This factory function creates an Llm instance configured with the specified model parameters.
It automatically resolves the model name based on the available inference providers
(Ollama for local inference, Hugging Face Hub for remote).
The max_tokens parameter is used to set the maximum number of tokens to generate in responses.

Args:
model_name: Can be a model family name (e.g., "llama3.2", "gemma3.2", "deepseek-r1")
or a specific model identifier (e.g., "deepseek-r1:14b", "deepseek-ai/DeepSeek-R1-0528").
The function will automatically resolve the exact model name based on
available models in the configured inference provider.
max_tokens: Maximum number of tokens to generate in responses. Defaults to 1000.

Returns:
Llm: A configured language model instance that can be used for text generation,
chat completions, and other language model interactions.

Raises:
ValueError: If the configured inference router is not supported or if the model
cannot be resolved.

Example:
# Create a language model instance
llm = llms("llama3.2", max_tokens=500)

# Use for chat completions
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"}
]
response = llm.complete_chat(messages)

"""
ctx = TowerContext.build()
return Llm(
context = ctx,
model_name=model_name
model_name=model_name,
max_tokens=max_tokens
)

def extract_ollama_message(resp: ChatResponse) -> str:
Expand Down
Loading
Loading