diff --git a/docs/cli/byok/baseten.mdx b/docs/cli/byok/baseten.mdx
index e050028..298d946 100644
--- a/docs/cli/byok/baseten.mdx
+++ b/docs/cli/byok/baseten.mdx
@@ -8,18 +8,18 @@ Deploy and serve custom models with Baseten's enterprise-grade infrastructure fo
## Configuration
-Add these configurations to `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "Qwen3-Coder-480B [Baseten]",
"model": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
- "base_url": "https://inference.baseten.co/v1",
- "api_key": "YOUR_BASETEN_API_KEY",
+ "displayName": "Qwen3-Coder-480B [Baseten]",
+ "baseUrl": "https://inference.baseten.co/v1",
+ "apiKey": "YOUR_BASETEN_API_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 8192
+ "maxOutputTokens": 8192
}
]
}
diff --git a/docs/cli/byok/deepinfra.mdx b/docs/cli/byok/deepinfra.mdx
index 9b59a2b..8ee7510 100644
--- a/docs/cli/byok/deepinfra.mdx
+++ b/docs/cli/byok/deepinfra.mdx
@@ -8,42 +8,42 @@ Access cost-effective inference for a wide variety of open-source models with De
## Configuration
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "GLM-4.6 [DeepInfra]",
"model": "zai-org/GLM-4.6",
- "base_url": "https://api.deepinfra.com/v1/openai",
- "api_key": "YOUR_DEEPINFRA_TOKEN",
+ "displayName": "GLM-4.6 [DeepInfra]",
+ "baseUrl": "https://api.deepinfra.com/v1/openai",
+ "apiKey": "YOUR_DEEPINFRA_TOKEN",
"provider": "generic-chat-completion-api",
- "max_tokens": 16384
+ "maxOutputTokens": 16384
},
{
- "model_display_name": "DeepSeek V3.1 Terminus [DeepInfra]",
"model": "deepseek-ai/DeepSeek-V3.1-Terminus",
- "base_url": "https://api.deepinfra.com/v1/openai",
- "api_key": "YOUR_DEEPINFRA_TOKEN",
+ "displayName": "DeepSeek V3.1 Terminus [DeepInfra]",
+ "baseUrl": "https://api.deepinfra.com/v1/openai",
+ "apiKey": "YOUR_DEEPINFRA_TOKEN",
"provider": "generic-chat-completion-api",
- "max_tokens": 16384
+ "maxOutputTokens": 16384
},
{
- "model_display_name": "Kimi K2 Instruct [DeepInfra]",
"model": "moonshotai/Kimi-K2-Instruct-0905",
- "base_url": "https://api.deepinfra.com/v1/openai",
- "api_key": "YOUR_DEEPINFRA_TOKEN",
+ "displayName": "Kimi K2 Instruct [DeepInfra]",
+ "baseUrl": "https://api.deepinfra.com/v1/openai",
+ "apiKey": "YOUR_DEEPINFRA_TOKEN",
"provider": "generic-chat-completion-api",
- "max_tokens": 32768
+ "maxOutputTokens": 32768
},
{
- "model_display_name": "Qwen3 Coder 480B [DeepInfra]",
"model": "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo",
- "base_url": "https://api.deepinfra.com/v1/openai",
- "api_key": "YOUR_DEEPINFRA_TOKEN",
+ "displayName": "Qwen3 Coder 480B [DeepInfra]",
+ "baseUrl": "https://api.deepinfra.com/v1/openai",
+ "apiKey": "YOUR_DEEPINFRA_TOKEN",
"provider": "generic-chat-completion-api",
- "max_tokens": 32768
+ "maxOutputTokens": 32768
}
]
}
diff --git a/docs/cli/byok/fireworks.mdx b/docs/cli/byok/fireworks.mdx
index 1ebf97b..78c0d17 100644
--- a/docs/cli/byok/fireworks.mdx
+++ b/docs/cli/byok/fireworks.mdx
@@ -8,26 +8,26 @@ Access high-performance inference for open-source models with Fireworks AI's opt
## Configuration
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "GLM 4.5 [Fireworks]",
"model": "accounts/fireworks/models/glm-4p5",
- "base_url": "https://api.fireworks.ai/inference/v1",
- "api_key": "YOUR_FIREWORKS_API_KEY",
+ "displayName": "GLM 4.5 [Fireworks]",
+ "baseUrl": "https://api.fireworks.ai/inference/v1",
+ "apiKey": "YOUR_FIREWORKS_API_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 16384
+ "maxOutputTokens": 16384
},
{
- "model_display_name": "Deepseek V3.1 Terminus [Fireworks]",
"model": "accounts/fireworks/models/deepseek-v3p1-terminus",
- "base_url": "https://api.fireworks.ai/inference/v1",
- "api_key": "YOUR_FIREWORKS_API_KEY",
+ "displayName": "Deepseek V3.1 Terminus [Fireworks]",
+ "baseUrl": "https://api.fireworks.ai/inference/v1",
+ "apiKey": "YOUR_FIREWORKS_API_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 20480
+ "maxOutputTokens": 20480
}
]
}
diff --git a/docs/cli/byok/google-gemini.mdx b/docs/cli/byok/google-gemini.mdx
index 4bd3e9a..76d838a 100644
--- a/docs/cli/byok/google-gemini.mdx
+++ b/docs/cli/byok/google-gemini.mdx
@@ -8,34 +8,34 @@ Connect to Google's Gemini models for advanced AI capabilities with multimodal s
## Configuration
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "Gemini 2.5 Pro [Google]",
"model": "gemini-2.5-pro",
- "base_url": "https://generativelanguage.googleapis.com/v1beta/",
- "api_key": "YOUR_GEMINI_API_KEY",
+ "displayName": "Gemini 2.5 Pro [Google]",
+ "baseUrl": "https://generativelanguage.googleapis.com/v1beta/",
+ "apiKey": "YOUR_GEMINI_API_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 32000
+ "maxOutputTokens": 32000
},
{
- "model_display_name": "Gemini 1.5 Pro [Google]",
"model": "gemini-1.5-pro",
- "base_url": "https://generativelanguage.googleapis.com/v1beta/",
- "api_key": "YOUR_GEMINI_API_KEY",
+ "displayName": "Gemini 1.5 Pro [Google]",
+ "baseUrl": "https://generativelanguage.googleapis.com/v1beta/",
+ "apiKey": "YOUR_GEMINI_API_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 1048576
+ "maxOutputTokens": 1048576
},
{
- "model_display_name": "Gemini 1.5 Flash [Google]",
"model": "gemini-1.5-flash",
- "base_url": "https://generativelanguage.googleapis.com/v1beta/",
- "api_key": "YOUR_GEMINI_API_KEY",
+ "displayName": "Gemini 1.5 Flash [Google]",
+ "baseUrl": "https://generativelanguage.googleapis.com/v1beta/",
+ "apiKey": "YOUR_GEMINI_API_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 1048576
+ "maxOutputTokens": 1048576
}
]
}
diff --git a/docs/cli/byok/groq.mdx b/docs/cli/byok/groq.mdx
index 0219a33..01951f5 100644
--- a/docs/cli/byok/groq.mdx
+++ b/docs/cli/byok/groq.mdx
@@ -8,18 +8,18 @@ Access ultra-fast inference powered by Groq's LPU™ (Language Processing Unit)
## Configuration
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "Kimi K2 [Groq]",
"model": "moonshotai/kimi-k2-instruct-0905",
- "base_url": "https://api.groq.com/openai/v1",
- "api_key": "YOUR_GROQ_KEY",
+ "displayName": "Kimi K2 [Groq]",
+ "baseUrl": "https://api.groq.com/openai/v1",
+ "apiKey": "YOUR_GROQ_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 16384
+ "maxOutputTokens": 16384
}
]
}
diff --git a/docs/cli/byok/huggingface.mdx b/docs/cli/byok/huggingface.mdx
index 1e94ddf..c2ddd03 100644
--- a/docs/cli/byok/huggingface.mdx
+++ b/docs/cli/byok/huggingface.mdx
@@ -12,26 +12,26 @@ Connect to thousands of models hosted on Hugging Face's Inference Providers. Lea
## Configuration
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "GPT OSS 120B [HF Router]",
"model": "openai/gpt-oss-120b:fireworks-ai",
- "base_url": "https://router.huggingface.co/v1",
- "api_key": "YOUR_HF_TOKEN",
+ "displayName": "GPT OSS 120B [HF Router]",
+ "baseUrl": "https://router.huggingface.co/v1",
+ "apiKey": "YOUR_HF_TOKEN",
"provider": "generic-chat-completion-api",
- "max_tokens": 32768
+ "maxOutputTokens": 32768
},
{
- "model_display_name": "Llama 4 Scout 17B [HF Router]",
"model": "meta-llama/Llama-4-Scout-17B-16E-Instruct:fireworks-ai",
- "base_url": "https://router.huggingface.co/v1",
- "api_key": "YOUR_HF_TOKEN",
+ "displayName": "Llama 4 Scout 17B [HF Router]",
+ "baseUrl": "https://router.huggingface.co/v1",
+ "apiKey": "YOUR_HF_TOKEN",
"provider": "generic-chat-completion-api",
- "max_tokens": 16384
+ "maxOutputTokens": 16384
}
]
}
diff --git a/docs/cli/byok/ollama.mdx b/docs/cli/byok/ollama.mdx
index 16a427b..219f82b 100644
--- a/docs/cli/byok/ollama.mdx
+++ b/docs/cli/byok/ollama.mdx
@@ -17,26 +17,26 @@ Run models entirely on your machine with no internet required.
### Configuration
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "Qwen 2.5 Coder 32B [Local]",
"model": "qwen2.5-coder:32b",
- "base_url": "http://localhost:11434/v1",
- "api_key": "not-needed", # add any non-empty value
+ "displayName": "Qwen 2.5 Coder 32B [Local]",
+ "baseUrl": "http://localhost:11434/v1",
+ "apiKey": "not-needed",
"provider": "generic-chat-completion-api",
- "max_tokens": 16000
+ "maxOutputTokens": 16000
},
{
- "model_display_name": "Qwen 2.5 Coder 7B [Local]",
"model": "qwen2.5-coder:7b",
- "base_url": "http://localhost:11434/v1",
- "api_key": "not-needed", # add any non-empty value
+ "displayName": "Qwen 2.5 Coder 7B [Local]",
+ "baseUrl": "http://localhost:11434/v1",
+ "apiKey": "not-needed",
"provider": "generic-chat-completion-api",
- "max_tokens": 4000
+ "maxOutputTokens": 4000
}
]
}
@@ -90,14 +90,14 @@ For a full list of available cloud models, visit: [ollama.com/search?c=cloud](ht
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "qwen3-coder [Online]",
"model": "qwen3-coder:480b-cloud",
- "base_url": "http://localhost:11434/v1/",
- "api_key": "not-needed", # add any non-empty value
+ "displayName": "qwen3-coder [Online]",
+ "baseUrl": "http://localhost:11434/v1/",
+ "apiKey": "not-needed",
"provider": "generic-chat-completion-api",
- "max_tokens": 128000
+ "maxOutputTokens": 128000
}
]
}
diff --git a/docs/cli/byok/openai-anthropic.mdx b/docs/cli/byok/openai-anthropic.mdx
index 5c37607..3bfcec0 100644
--- a/docs/cli/byok/openai-anthropic.mdx
+++ b/docs/cli/byok/openai-anthropic.mdx
@@ -8,26 +8,26 @@ Use your own API keys for cost control and billing transparency with official Op
## Configuration
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "Sonnet 4.5 [Custom]",
"model": "claude-sonnet-4-5-20250929",
- "base_url": "https://api.anthropic.com",
- "api_key": "YOUR_ANTHROPIC_KEY",
+ "displayName": "Sonnet 4.5 [Custom]",
+ "baseUrl": "https://api.anthropic.com",
+ "apiKey": "YOUR_ANTHROPIC_KEY",
"provider": "anthropic",
- "max_tokens": 8192
+ "maxOutputTokens": 8192
},
{
- "model_display_name": "GPT5-Codex [Custom]",
"model": "gpt-5-codex",
- "base_url": "https://api.openai.com/v1",
- "api_key": "YOUR_OPENAI_KEY",
+ "displayName": "GPT5-Codex [Custom]",
+ "baseUrl": "https://api.openai.com/v1",
+ "apiKey": "YOUR_OPENAI_KEY",
"provider": "openai",
- "max_tokens": 16384
+ "maxOutputTokens": 16384
}
]
}
diff --git a/docs/cli/byok/openrouter.mdx b/docs/cli/byok/openrouter.mdx
index 95acd36..0e389c2 100644
--- a/docs/cli/byok/openrouter.mdx
+++ b/docs/cli/byok/openrouter.mdx
@@ -8,18 +8,18 @@ Connect to OpenRouter for access to models from multiple providers through a sin
## Configuration
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "GPT-OSS-20B [OpenRouter]",
"model": "openai/gpt-oss-20b",
- "base_url": "https://openrouter.ai/api/v1",
- "api_key": "YOUR_OPENROUTER_KEY",
+ "displayName": "GPT-OSS-20B [OpenRouter]",
+ "baseUrl": "https://openrouter.ai/api/v1",
+ "apiKey": "YOUR_OPENROUTER_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 32000
+ "maxOutputTokens": 32000
}
]
}
diff --git a/docs/cli/byok/overview.mdx b/docs/cli/byok/overview.mdx
index be966a5..f372caf 100644
--- a/docs/cli/byok/overview.mdx
+++ b/docs/cli/byok/overview.mdx
@@ -19,55 +19,82 @@ Factory CLI supports custom model configurations through BYOK (Bring Your Own Ke
## Configuration Reference
-Add custom models in `~/.factory/config.json` under the `custom_models` array.
+Add custom models to `~/.factory/settings.json` under the `customModels` array:
+
+```json
+{
+ "customModels": [
+ {
+ "model": "your-model-id",
+ "displayName": "My Custom Model",
+ "baseUrl": "https://api.provider.com/v1",
+ "apiKey": "YOUR_API_KEY",
+ "provider": "generic-chat-completion-api",
+ "maxOutputTokens": 16384
+ }
+ ]
+}
+```
+
+
+ **Legacy support**: Custom models in `~/.factory/config.json` using snake_case field names (`custom_models`, `base_url`, etc.) are still supported for backwards compatibility. Both files are loaded and merged, with `settings.json` taking priority.
+
### Supported Fields
| Field | Type | Required | Description |
|-------|------|----------|-------------|
-| `model_display_name` | `string` | | Human-friendly name shown in model selector |
| `model` | `string` | ✓ | Model identifier sent via API (e.g., `claude-sonnet-4-5-20250929`, `gpt-5-codex`, `qwen3:4b`) |
-| `base_url` | `string` | ✓ | API endpoint base URL |
-| `api_key` | `string` | ✓ | Your API key for the provider. Can't be empty. |
+| `displayName` | `string` | | Human-friendly name shown in model selector |
+| `baseUrl` | `string` | ✓ | API endpoint base URL |
+| `apiKey` | `string` | ✓ | Your API key for the provider. Can't be empty. |
| `provider` | `string` | ✓ | One of: `anthropic`, `openai`, or `generic-chat-completion-api` |
-| `max_tokens` | `number` | | Maximum output tokens for model responses |
-| `supports_images` | `boolean` | | Whether the model supports image inputs |
-| `extra_args` | `object` | | Additional provider-specific arguments to include in API requests |
-| `extra_headers` | `object` | | Additional HTTP headers to send with requests |
+| `maxOutputTokens` | `number` | | Maximum output tokens for model responses |
+| `supportsImages` | `boolean` | | Whether the model supports image inputs |
+| `extraArgs` | `object` | | Additional provider-specific arguments to include in API requests |
+| `extraHeaders` | `object` | | Additional HTTP headers to send with requests |
-### Using extra_args
+### Using extraArgs
Pass provider-specific parameters like temperature or top_p:
```json
{
- "model_display_name": "Custom Model",
- "model": "your-model",
- "base_url": "https://your-provider.com/v1",
- "api_key": "YOUR_API_KEY",
- "provider": "generic-chat-completion-api",
- "extra_args": {
- "temperature": 0.7,
- "top_p": 0.9
- }
+ "customModels": [
+ {
+ "model": "your-model",
+ "displayName": "Custom Model",
+ "baseUrl": "https://your-provider.com/v1",
+ "apiKey": "YOUR_API_KEY",
+ "provider": "generic-chat-completion-api",
+ "extraArgs": {
+ "temperature": 0.7,
+ "top_p": 0.9
+ }
+ }
+ ]
}
```
-### Using extra_headers
+### Using extraHeaders
Add custom HTTP headers to API requests:
```json
{
- "model_display_name": "Custom Model",
- "model": "your-model",
- "base_url": "https://your-provider.com/v1",
- "api_key": "YOUR_API_KEY",
- "provider": "generic-chat-completion-api",
- "extra_headers": {
- "X-Custom-Header": "value",
- "Authorization": "Bearer YOUR_TOKEN"
- }
+ "customModels": [
+ {
+ "model": "your-model",
+ "displayName": "Custom Model",
+ "baseUrl": "https://your-provider.com/v1",
+ "apiKey": "YOUR_API_KEY",
+ "provider": "generic-chat-completion-api",
+ "extraHeaders": {
+ "X-Custom-Header": "value",
+ "Authorization": "Bearer YOUR_TOKEN"
+ }
+ }
+ ]
}
```
@@ -136,15 +163,15 @@ Once configured, access your custom models in the CLI:
2. Your custom models appear in a separate "Custom models" section below Factory-provided models
3. Select any model to start using it
-Custom models display with the name you set in `model_display_name`, making it easy to identify different providers and configurations.
+Custom models display with the name you set in `displayName`, making it easy to identify different providers and configurations.
---
## Troubleshooting
### Model not appearing in selector
-- Check JSON syntax in `~/.factory/config.json`
-- Restart the CLI after making configuration changes
+- Check JSON syntax in `~/.factory/settings.json` (or `config.json` if using legacy format)
+- Settings changes are detected automatically via file watching
- Verify all required fields are present
### "Invalid provider" error
diff --git a/docs/cli/configuration/byok.mdx b/docs/cli/configuration/byok.mdx
index 197e59d..3a47242 100644
--- a/docs/cli/configuration/byok.mdx
+++ b/docs/cli/configuration/byok.mdx
@@ -18,18 +18,37 @@ Factory CLI supports custom model configurations through BYOK (Bring Your Own Ke
## Configuration Reference
-Add custom models in `~/.factory/config.json` under the `custom_models` array.
+Add custom models to `~/.factory/settings.json` under the `customModels` array:
+
+```json
+{
+ "customModels": [
+ {
+ "model": "your-model-id",
+ "displayName": "My Custom Model",
+ "baseUrl": "https://api.provider.com/v1",
+ "apiKey": "YOUR_API_KEY",
+ "provider": "generic-chat-completion-api",
+ "maxOutputTokens": 16384
+ }
+ ]
+}
+```
+
+
+ **Legacy support**: Custom models in `~/.factory/config.json` using snake_case field names (`custom_models`, `base_url`, etc.) are still supported for backwards compatibility. Both files are loaded and merged, with `settings.json` taking priority.
+
### Supported Fields
| Field | Required | Description |
|-------|----------|-------------|
-| `model_display_name` | ✓ | Human-friendly name shown in model selector |
| `model` | ✓ | Model identifier sent via API (e.g., `claude-sonnet-4-5-20250929`, `gpt-5-codex`, `qwen3:4b`) |
-| `base_url` | ✓ | API endpoint base URL |
-| `api_key` | ✓ | Your API key for the provider. Can't be empty. |
+| `displayName` | | Human-friendly name shown in model selector |
+| `baseUrl` | ✓ | API endpoint base URL |
+| `apiKey` | ✓ | Your API key for the provider. Can't be empty. |
| `provider` | ✓ | One of: `anthropic`, `openai`, or `generic-chat-completion-api` |
-| `max_tokens` | ✓ | Maximum output tokens for model responses |
+| `maxOutputTokens` | | Maximum output tokens for model responses |
---
@@ -76,19 +95,19 @@ Use your own API keys for cost control and billing transparency:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "Sonnet 4.5 [Custom]",
"model": "claude-sonnet-4-5-20250929",
- "base_url": "https://api.anthropic.com",
- "api_key": "YOUR_ANTHROPIC_KEY",
+ "displayName": "Sonnet 4.5 [Custom]",
+ "baseUrl": "https://api.anthropic.com",
+ "apiKey": "YOUR_ANTHROPIC_KEY",
"provider": "anthropic"
},
{
- "model_display_name": "GPT5-Codex [Custom]",
"model": "gpt-5-codex",
- "base_url": "https://api.openai.com/v1",
- "api_key": "YOUR_OPENAI_KEY",
+ "displayName": "GPT5-Codex [Custom]",
+ "baseUrl": "https://api.openai.com/v1",
+ "apiKey": "YOUR_OPENAI_KEY",
"provider": "openai"
}
]
@@ -101,14 +120,14 @@ Connect to OpenRouter for access to models from multiple providers:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "GPT-OSS-20B",
"model": "openai/gpt-oss-20b",
- "base_url": "https://openrouter.ai/api/v1",
- "api_key": "YOUR_OPENROUTER_KEY",
+ "displayName": "GPT-OSS-20B",
+ "baseUrl": "https://openrouter.ai/api/v1",
+ "apiKey": "YOUR_OPENROUTER_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 32000
+ "maxOutputTokens": 32000
}
]
}
@@ -122,22 +141,22 @@ Access high-performance inference for open-source models with optimized serving:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "GLM 4.5 [Fireworks]",
"model": "accounts/fireworks/models/glm-4p5",
- "base_url": "https://api.fireworks.ai/inference/v1",
- "api_key": "YOUR_FIREWORKS_API_KEY",
+ "displayName": "GLM 4.5 [Fireworks]",
+ "baseUrl": "https://api.fireworks.ai/inference/v1",
+ "apiKey": "YOUR_FIREWORKS_API_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 16384
+ "maxOutputTokens": 16384
},
{
- "model_display_name": "Llama 3.1 405B [Fireworks]",
"model": "accounts/fireworks/models/llama-v3p1-405b-instruct",
- "base_url": "https://api.fireworks.ai/inference/v1",
- "api_key": "YOUR_FIREWORKS_API_KEY",
+ "displayName": "Llama 3.1 405B [Fireworks]",
+ "baseUrl": "https://api.fireworks.ai/inference/v1",
+ "apiKey": "YOUR_FIREWORKS_API_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 131072
+ "maxOutputTokens": 131072
}
]
}
@@ -151,22 +170,22 @@ Deploy and serve custom models with enterprise-grade infrastructure:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "Custom Model [Baseten]",
"model": "YOUR_MODEL_ID",
- "base_url": "https://inference.baseten.co/v1",
- "api_key": "YOUR_BASETEN_API_KEY",
+ "displayName": "Custom Model [Baseten]",
+ "baseUrl": "https://inference.baseten.co/v1",
+ "apiKey": "YOUR_BASETEN_API_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 8192
+ "maxOutputTokens": 8192
},
{
- "model_display_name": "Llama 3.1 70B [Baseten]",
"model": "llama-3.1-70b-instruct",
- "base_url": "https://inference.baseten.co/v1",
- "api_key": "YOUR_BASETEN_API_KEY",
+ "displayName": "Llama 3.1 70B [Baseten]",
+ "baseUrl": "https://inference.baseten.co/v1",
+ "apiKey": "YOUR_BASETEN_API_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 131072
+ "maxOutputTokens": 131072
}
]
}
@@ -182,30 +201,30 @@ Access cost-effective inference for a wide variety of open-source models:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "GLM 4.6 [DeepInfra]",
"model": "zai-org/GLM-4.6",
- "base_url": "https://api.deepinfra.com/v1/openai",
- "api_key": "YOUR_DEEPINFRA_TOKEN",
+ "displayName": "GLM 4.6 [DeepInfra]",
+ "baseUrl": "https://api.deepinfra.com/v1/openai",
+ "apiKey": "YOUR_DEEPINFRA_TOKEN",
"provider": "generic-chat-completion-api",
- "max_tokens": 16384
+ "maxOutputTokens": 16384
},
{
- "model_display_name": "Qwen 2.5 Coder 32B [DeepInfra]",
"model": "Qwen/Qwen2.5-Coder-32B-Instruct",
- "base_url": "https://api.deepinfra.com/v1/openai",
- "api_key": "YOUR_DEEPINFRA_TOKEN",
+ "displayName": "Qwen 2.5 Coder 32B [DeepInfra]",
+ "baseUrl": "https://api.deepinfra.com/v1/openai",
+ "apiKey": "YOUR_DEEPINFRA_TOKEN",
"provider": "generic-chat-completion-api",
- "max_tokens": 32768
+ "maxOutputTokens": 32768
},
{
- "model_display_name": "DeepSeek V3 [DeepInfra]",
"model": "deepseek-ai/DeepSeek-V3",
- "base_url": "https://api.deepinfra.com/v1/openai",
- "api_key": "YOUR_DEEPINFRA_TOKEN",
+ "displayName": "DeepSeek V3 [DeepInfra]",
+ "baseUrl": "https://api.deepinfra.com/v1/openai",
+ "apiKey": "YOUR_DEEPINFRA_TOKEN",
"provider": "generic-chat-completion-api",
- "max_tokens": 65536
+ "maxOutputTokens": 65536
}
]
}
@@ -219,22 +238,22 @@ Connect to models hosted on Hugging Face's Inference API:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "Mistral 7B [HF]",
"model": "mistralai/Mistral-7B-Instruct-v0.3",
- "base_url": "https://api-inference.huggingface.co/models",
- "api_key": "YOUR_HF_TOKEN",
+ "displayName": "Mistral 7B [HF]",
+ "baseUrl": "https://api-inference.huggingface.co/models",
+ "apiKey": "YOUR_HF_TOKEN",
"provider": "generic-chat-completion-api",
- "max_tokens": 32768
+ "maxOutputTokens": 32768
},
{
- "model_display_name": "CodeLlama 13B [HF]",
"model": "codellama/CodeLlama-13b-Instruct-hf",
- "base_url": "https://api-inference.huggingface.co/models",
- "api_key": "YOUR_HF_TOKEN",
+ "displayName": "CodeLlama 13B [HF]",
+ "baseUrl": "https://api-inference.huggingface.co/models",
+ "apiKey": "YOUR_HF_TOKEN",
"provider": "generic-chat-completion-api",
- "max_tokens": 16384
+ "maxOutputTokens": 16384
}
]
}
@@ -252,22 +271,22 @@ Run models locally on your hardware with Ollama:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "Qwen 2.5 Coder [Local]",
"model": "qwen2.5-coder:7b",
- "base_url": "http://localhost:11434/v1",
- "api_key": "not-needed",
+ "displayName": "Qwen 2.5 Coder [Local]",
+ "baseUrl": "http://localhost:11434/v1",
+ "apiKey": "not-needed",
"provider": "generic-chat-completion-api",
- "max_tokens": 8192
+ "maxOutputTokens": 8192
},
{
- "model_display_name": "DeepSeek Coder V2 [Local]",
"model": "deepseek-coder-v2:16b",
- "base_url": "http://localhost:11434/v1",
- "api_key": "not-needed",
+ "displayName": "DeepSeek Coder V2 [Local]",
+ "baseUrl": "http://localhost:11434/v1",
+ "apiKey": "not-needed",
"provider": "generic-chat-completion-api",
- "max_tokens": 16384
+ "maxOutputTokens": 16384
}
]
}
@@ -287,15 +306,15 @@ Use Ollama's cloud service for hosted model inference:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "Llama 3.1 8B [Ollama Cloud]",
"model": "llama3.1:8b",
- "base_url": "https://ollama.com",
- "api_key": "YOUR_OLLAMA_API_KEY",
+ "displayName": "Llama 3.1 8B [Ollama Cloud]",
+ "baseUrl": "https://ollama.com",
+ "apiKey": "YOUR_OLLAMA_API_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 131072,
- "headers": {
+ "maxOutputTokens": 131072,
+ "extraHeaders": {
"Authorization": "Bearer YOUR_OLLAMA_API_KEY"
}
}
@@ -313,14 +332,14 @@ Access Google's Gemini models using your Gemini AI API key:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "Gemini 2.5 Pro",
"model": "gemini-2.5-pro",
- "base_url": "https://generativelanguage.googleapis.com/v1beta/",
- "api_key": "YOUR_GEMINI_API_KEY",
+ "displayName": "Gemini 2.5 Pro",
+ "baseUrl": "https://generativelanguage.googleapis.com/v1beta/",
+ "apiKey": "YOUR_GEMINI_API_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 32000
+ "maxOutputTokens": 32000
}
]
}
@@ -338,15 +357,15 @@ Once configured, access your custom models in the CLI:
2. Your custom models appear in a separate "Custom models" section below Factory-provided models
3. Select any model to start using it
-Custom models display with the name you set in `model_display_name`, making it easy to identify different providers and configurations.
+Custom models display with the name you set in `displayName`, making it easy to identify different providers and configurations.
---
## Troubleshooting
### Model not appearing in selector
-- Check JSON syntax in `~/.factory/config.json`
-- Restart the CLI after making configuration changes
+- Check JSON syntax in `~/.factory/settings.json` (or `config.json` if using legacy format)
+- Settings changes are detected automatically via file watching
- Verify all required fields are present
### "Invalid provider" error
diff --git a/docs/cli/configuration/settings.mdx b/docs/cli/configuration/settings.mdx
index c2429d9..93e5b97 100644
--- a/docs/cli/configuration/settings.mdx
+++ b/docs/cli/configuration/settings.mdx
@@ -48,6 +48,7 @@ If the file doesn't exist, it's created with defaults the first time you run **d
| `showThinkingInMainView` | `true`, `false` | `false` | Display AI thinking/reasoning blocks in the main chat view. |
| `allowBackgroundProcesses` | `true`, `false` | `false` | Allow droid to spawn background processes (experimental). |
| `enableReadinessReport` | `true`, `false` | `false` | Enable the `/readiness-report` slash command (experimental). |
+| `customModels` | Array of model configs | `[]` | Custom model configurations for BYOK. See [BYOK docs](/cli/configuration/byok). |
### Model
diff --git a/docs/cli/droid-exec/overview.mdx b/docs/cli/droid-exec/overview.mdx
index 556d7f9..48ff98e 100644
--- a/docs/cli/droid-exec/overview.mdx
+++ b/docs/cli/droid-exec/overview.mdx
@@ -377,16 +377,16 @@ droid exec --auto medium --disabled-tools execute-cli "run edits only"
### Custom models
-You can configure custom models to use with droid exec by adding them to your `~/.factory/config.json` file:
+You can configure custom models to use with droid exec by adding them to your `~/.factory/settings.json` file:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "My Custom Model",
"model": "gpt-5.1-codex-custom",
- "base_url": "https://api.openai.com/v1",
- "api_key": "your-api-key-here",
+ "displayName": "My Custom Model",
+ "baseUrl": "https://api.openai.com/v1",
+ "apiKey": "your-api-key-here",
"provider": "openai"
}
]
@@ -403,22 +403,22 @@ If you have multiple custom models configured:
```json
{
- "custom_models": [
+ "customModels": [
{
- "model_display_name": "Kimi K2 [Groq]",
"model": "kimi-k2",
- "base_url": "https://api.groq.com/openai/v1",
- "api_key": "your-groq-key",
+ "displayName": "Kimi K2 [Groq]",
+ "baseUrl": "https://api.groq.com/openai/v1",
+ "apiKey": "your-groq-key",
"provider": "generic-chat-completion-api",
- "max_tokens": 16384
+ "maxOutputTokens": 16384
},
{
- "model_display_name": "GPT-OSS-20B [OpenRouter]",
"model": "openai/gpt-oss-20b",
- "base_url": "https://openrouter.ai/api/v1",
- "api_key": "YOUR_OPENROUTER_KEY",
+ "displayName": "GPT-OSS-20B [OpenRouter]",
+ "baseUrl": "https://openrouter.ai/api/v1",
+ "apiKey": "YOUR_OPENROUTER_KEY",
"provider": "generic-chat-completion-api",
- "max_tokens": 32000
+ "maxOutputTokens": 32000
}
]
}
@@ -426,9 +426,9 @@ If you have multiple custom models configured:
You would reference them as:
- `--model "custom:Kimi-K2-[Groq]-0"`
-- `--model "custom:GPT-OSS-20B [OpenRouter]-1"`
+- `--model "custom:GPT-OSS-20B-[OpenRouter]-1"`
-The index corresponds to the position in the `custom_models` array (0-based).
+The index corresponds to the position in the `customModels` array (0-based).
Reasoning effort (`-r` / `--reasoning-effort`) is not yet supported for custom models, but coming soon.
diff --git a/docs/enterprise/models-llm-gateways-and-integrations.mdx b/docs/enterprise/models-llm-gateways-and-integrations.mdx
index 316d84b..0374529 100644
--- a/docs/enterprise/models-llm-gateways-and-integrations.mdx
+++ b/docs/enterprise/models-llm-gateways-and-integrations.mdx
@@ -45,7 +45,7 @@ Factory works with gateways in two ways:
When you use a gateway, **data handling and retention policies are those of the gateway and underlying providers**; Droid simply uses the endpoints and credentials you configure.
-For concrete examples of configuring custom models (including gateway‑backed models), see [Custom models & BYOK](/cli/configuration/byok), which covers the `custom_models` array in `~/.factory/config.json` and how those models appear in the `/model` selector.
+For concrete examples of configuring custom models (including gateway‑backed models), see [Custom models & BYOK](/cli/configuration/byok), which covers the `customModels` array in `~/.factory/settings.json` and how those models appear in the `/model` selector.
---