Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions docs/cli/byok/baseten.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@ Deploy and serve custom models with Baseten's enterprise-grade infrastructure fo

## Configuration

Add these configurations to `~/.factory/config.json`:
Add to `~/.factory/settings.json`:

```json
{
"custom_models": [
"customModels": [
{
"model_display_name": "Qwen3-Coder-480B [Baseten]",
"model": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
"base_url": "https://inference.baseten.co/v1",
"api_key": "YOUR_BASETEN_API_KEY",
"displayName": "Qwen3-Coder-480B [Baseten]",
"baseUrl": "https://inference.baseten.co/v1",
"apiKey": "YOUR_BASETEN_API_KEY",
"provider": "generic-chat-completion-api",
"max_tokens": 8192
"maxOutputTokens": 8192
}
]
}
Expand Down
36 changes: 18 additions & 18 deletions docs/cli/byok/deepinfra.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,42 +8,42 @@ Access cost-effective inference for a wide variety of open-source models with De

## Configuration

Configuration examples for `~/.factory/config.json`:
Add to `~/.factory/settings.json`:

```json
{
"custom_models": [
"customModels": [
{
"model_display_name": "GLM-4.6 [DeepInfra]",
"model": "zai-org/GLM-4.6",
"base_url": "https://api.deepinfra.com/v1/openai",
"api_key": "YOUR_DEEPINFRA_TOKEN",
"displayName": "GLM-4.6 [DeepInfra]",
"baseUrl": "https://api.deepinfra.com/v1/openai",
"apiKey": "YOUR_DEEPINFRA_TOKEN",
"provider": "generic-chat-completion-api",
"max_tokens": 16384
"maxOutputTokens": 16384
},
{
"model_display_name": "DeepSeek V3.1 Terminus [DeepInfra]",
"model": "deepseek-ai/DeepSeek-V3.1-Terminus",
"base_url": "https://api.deepinfra.com/v1/openai",
"api_key": "YOUR_DEEPINFRA_TOKEN",
"displayName": "DeepSeek V3.1 Terminus [DeepInfra]",
"baseUrl": "https://api.deepinfra.com/v1/openai",
"apiKey": "YOUR_DEEPINFRA_TOKEN",
"provider": "generic-chat-completion-api",
"max_tokens": 16384
"maxOutputTokens": 16384
},
{
"model_display_name": "Kimi K2 Instruct [DeepInfra]",
"model": "moonshotai/Kimi-K2-Instruct-0905",
"base_url": "https://api.deepinfra.com/v1/openai",
"api_key": "YOUR_DEEPINFRA_TOKEN",
"displayName": "Kimi K2 Instruct [DeepInfra]",
"baseUrl": "https://api.deepinfra.com/v1/openai",
"apiKey": "YOUR_DEEPINFRA_TOKEN",
"provider": "generic-chat-completion-api",
"max_tokens": 32768
"maxOutputTokens": 32768
},
{
"model_display_name": "Qwen3 Coder 480B [DeepInfra]",
"model": "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo",
"base_url": "https://api.deepinfra.com/v1/openai",
"api_key": "YOUR_DEEPINFRA_TOKEN",
"displayName": "Qwen3 Coder 480B [DeepInfra]",
"baseUrl": "https://api.deepinfra.com/v1/openai",
"apiKey": "YOUR_DEEPINFRA_TOKEN",
"provider": "generic-chat-completion-api",
"max_tokens": 32768
"maxOutputTokens": 32768
}
]
}
Expand Down
20 changes: 10 additions & 10 deletions docs/cli/byok/fireworks.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,26 @@ Access high-performance inference for open-source models with Fireworks AI's opt

## Configuration

Configuration examples for `~/.factory/config.json`:
Add to `~/.factory/settings.json`:

```json
{
"custom_models": [
"customModels": [
{
"model_display_name": "GLM 4.5 [Fireworks]",
"model": "accounts/fireworks/models/glm-4p5",
"base_url": "https://api.fireworks.ai/inference/v1",
"api_key": "YOUR_FIREWORKS_API_KEY",
"displayName": "GLM 4.5 [Fireworks]",
"baseUrl": "https://api.fireworks.ai/inference/v1",
"apiKey": "YOUR_FIREWORKS_API_KEY",
"provider": "generic-chat-completion-api",
"max_tokens": 16384
"maxOutputTokens": 16384
},
{
"model_display_name": "Deepseek V3.1 Terminus [Fireworks]",
"model": "accounts/fireworks/models/deepseek-v3p1-terminus",
"base_url": "https://api.fireworks.ai/inference/v1",
"api_key": "YOUR_FIREWORKS_API_KEY",
"displayName": "Deepseek V3.1 Terminus [Fireworks]",
"baseUrl": "https://api.fireworks.ai/inference/v1",
"apiKey": "YOUR_FIREWORKS_API_KEY",
"provider": "generic-chat-completion-api",
"max_tokens": 20480
"maxOutputTokens": 20480
}
]
}
Expand Down
28 changes: 14 additions & 14 deletions docs/cli/byok/google-gemini.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,34 +8,34 @@ Connect to Google's Gemini models for advanced AI capabilities with multimodal s

## Configuration

Configuration examples for `~/.factory/config.json`:
Add to `~/.factory/settings.json`:

```json
{
"custom_models": [
"customModels": [
{
"model_display_name": "Gemini 2.5 Pro [Google]",
"model": "gemini-2.5-pro",
"base_url": "https://generativelanguage.googleapis.com/v1beta/",
"api_key": "YOUR_GEMINI_API_KEY",
"displayName": "Gemini 2.5 Pro [Google]",
"baseUrl": "https://generativelanguage.googleapis.com/v1beta/",
"apiKey": "YOUR_GEMINI_API_KEY",
"provider": "generic-chat-completion-api",
"max_tokens": 32000
"maxOutputTokens": 32000
},
{
"model_display_name": "Gemini 1.5 Pro [Google]",
"model": "gemini-1.5-pro",
"base_url": "https://generativelanguage.googleapis.com/v1beta/",
"api_key": "YOUR_GEMINI_API_KEY",
"displayName": "Gemini 1.5 Pro [Google]",
"baseUrl": "https://generativelanguage.googleapis.com/v1beta/",
"apiKey": "YOUR_GEMINI_API_KEY",
"provider": "generic-chat-completion-api",
"max_tokens": 1048576
"maxOutputTokens": 1048576
},
{
"model_display_name": "Gemini 1.5 Flash [Google]",
"model": "gemini-1.5-flash",
"base_url": "https://generativelanguage.googleapis.com/v1beta/",
"api_key": "YOUR_GEMINI_API_KEY",
"displayName": "Gemini 1.5 Flash [Google]",
"baseUrl": "https://generativelanguage.googleapis.com/v1beta/",
"apiKey": "YOUR_GEMINI_API_KEY",
"provider": "generic-chat-completion-api",
"max_tokens": 1048576
"maxOutputTokens": 1048576
}
]
}
Expand Down
12 changes: 6 additions & 6 deletions docs/cli/byok/groq.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@ Access ultra-fast inference powered by Groq's LPU™ (Language Processing Unit)

## Configuration

Configuration examples for `~/.factory/config.json`:
Add to `~/.factory/settings.json`:

```json
{
"custom_models": [
"customModels": [
{
"model_display_name": "Kimi K2 [Groq]",
"model": "moonshotai/kimi-k2-instruct-0905",
"base_url": "https://api.groq.com/openai/v1",
"api_key": "YOUR_GROQ_KEY",
"displayName": "Kimi K2 [Groq]",
"baseUrl": "https://api.groq.com/openai/v1",
"apiKey": "YOUR_GROQ_KEY",
"provider": "generic-chat-completion-api",
"max_tokens": 16384
"maxOutputTokens": 16384
}
]
}
Expand Down
20 changes: 10 additions & 10 deletions docs/cli/byok/huggingface.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,26 @@ Connect to thousands of models hosted on Hugging Face's Inference Providers. Lea

## Configuration

Configuration examples for `~/.factory/config.json`:
Add to `~/.factory/settings.json`:

```json
{
"custom_models": [
"customModels": [
{
"model_display_name": "GPT OSS 120B [HF Router]",
"model": "openai/gpt-oss-120b:fireworks-ai",
"base_url": "https://router.huggingface.co/v1",
"api_key": "YOUR_HF_TOKEN",
"displayName": "GPT OSS 120B [HF Router]",
"baseUrl": "https://router.huggingface.co/v1",
"apiKey": "YOUR_HF_TOKEN",
"provider": "generic-chat-completion-api",
"max_tokens": 32768
"maxOutputTokens": 32768
},
{
"model_display_name": "Llama 4 Scout 17B [HF Router]",
"model": "meta-llama/Llama-4-Scout-17B-16E-Instruct:fireworks-ai",
"base_url": "https://router.huggingface.co/v1",
"api_key": "YOUR_HF_TOKEN",
"displayName": "Llama 4 Scout 17B [HF Router]",
"baseUrl": "https://router.huggingface.co/v1",
"apiKey": "YOUR_HF_TOKEN",
"provider": "generic-chat-completion-api",
"max_tokens": 16384
"maxOutputTokens": 16384
}
]
}
Expand Down
30 changes: 15 additions & 15 deletions docs/cli/byok/ollama.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,26 @@ Run models entirely on your machine with no internet required.

### Configuration

Configuration examples for `~/.factory/config.json`:
Add to `~/.factory/settings.json`:

```json
{
"custom_models": [
"customModels": [
{
"model_display_name": "Qwen 2.5 Coder 32B [Local]",
"model": "qwen2.5-coder:32b",
"base_url": "http://localhost:11434/v1",
"api_key": "not-needed", # add any non-empty value
"displayName": "Qwen 2.5 Coder 32B [Local]",
"baseUrl": "http://localhost:11434/v1",
"apiKey": "not-needed",
"provider": "generic-chat-completion-api",
"max_tokens": 16000
"maxOutputTokens": 16000
},
{
"model_display_name": "Qwen 2.5 Coder 7B [Local]",
"model": "qwen2.5-coder:7b",
"base_url": "http://localhost:11434/v1",
"api_key": "not-needed", # add any non-empty value
"displayName": "Qwen 2.5 Coder 7B [Local]",
"baseUrl": "http://localhost:11434/v1",
"apiKey": "not-needed",
"provider": "generic-chat-completion-api",
"max_tokens": 4000
"maxOutputTokens": 4000
}
]
}
Expand Down Expand Up @@ -90,14 +90,14 @@ For a full list of available cloud models, visit: [ollama.com/search?c=cloud](ht

```json
{
"custom_models": [
"customModels": [
{
"model_display_name": "qwen3-coder [Online]",
"model": "qwen3-coder:480b-cloud",
"base_url": "http://localhost:11434/v1/",
"api_key": "not-needed", # add any non-empty value
"displayName": "qwen3-coder [Online]",
"baseUrl": "http://localhost:11434/v1/",
"apiKey": "not-needed",
"provider": "generic-chat-completion-api",
"max_tokens": 128000
"maxOutputTokens": 128000
}
]
}
Expand Down
20 changes: 10 additions & 10 deletions docs/cli/byok/openai-anthropic.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,26 @@ Use your own API keys for cost control and billing transparency with official Op

## Configuration

Configuration examples for `~/.factory/config.json`:
Add to `~/.factory/settings.json`:

```json
{
"custom_models": [
"customModels": [
{
"model_display_name": "Sonnet 4.5 [Custom]",
"model": "claude-sonnet-4-5-20250929",
"base_url": "https://api.anthropic.com",
"api_key": "YOUR_ANTHROPIC_KEY",
"displayName": "Sonnet 4.5 [Custom]",
"baseUrl": "https://api.anthropic.com",
"apiKey": "YOUR_ANTHROPIC_KEY",
"provider": "anthropic",
"max_tokens": 8192
"maxOutputTokens": 8192
},
{
"model_display_name": "GPT5-Codex [Custom]",
"model": "gpt-5-codex",
"base_url": "https://api.openai.com/v1",
"api_key": "YOUR_OPENAI_KEY",
"displayName": "GPT5-Codex [Custom]",
"baseUrl": "https://api.openai.com/v1",
"apiKey": "YOUR_OPENAI_KEY",
"provider": "openai",
"max_tokens": 16384
"maxOutputTokens": 16384
}
]
}
Expand Down
12 changes: 6 additions & 6 deletions docs/cli/byok/openrouter.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@ Connect to OpenRouter for access to models from multiple providers through a sin

## Configuration

Configuration examples for `~/.factory/config.json`:
Add to `~/.factory/settings.json`:

```json
{
"custom_models": [
"customModels": [
{
"model_display_name": "GPT-OSS-20B [OpenRouter]",
"model": "openai/gpt-oss-20b",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "YOUR_OPENROUTER_KEY",
"displayName": "GPT-OSS-20B [OpenRouter]",
"baseUrl": "https://openrouter.ai/api/v1",
"apiKey": "YOUR_OPENROUTER_KEY",
"provider": "generic-chat-completion-api",
"max_tokens": 32000
"maxOutputTokens": 32000
}
]
}
Expand Down
Loading