diff --git a/docs/cli/byok/baseten.mdx b/docs/cli/byok/baseten.mdx index e050028..298d946 100644 --- a/docs/cli/byok/baseten.mdx +++ b/docs/cli/byok/baseten.mdx @@ -8,18 +8,18 @@ Deploy and serve custom models with Baseten's enterprise-grade infrastructure fo ## Configuration -Add these configurations to `~/.factory/config.json`: +Add to `~/.factory/settings.json`: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "Qwen3-Coder-480B [Baseten]", "model": "Qwen/Qwen3-Coder-480B-A35B-Instruct", - "base_url": "https://inference.baseten.co/v1", - "api_key": "YOUR_BASETEN_API_KEY", + "displayName": "Qwen3-Coder-480B [Baseten]", + "baseUrl": "https://inference.baseten.co/v1", + "apiKey": "YOUR_BASETEN_API_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 8192 + "maxOutputTokens": 8192 } ] } diff --git a/docs/cli/byok/deepinfra.mdx b/docs/cli/byok/deepinfra.mdx index 9b59a2b..8ee7510 100644 --- a/docs/cli/byok/deepinfra.mdx +++ b/docs/cli/byok/deepinfra.mdx @@ -8,42 +8,42 @@ Access cost-effective inference for a wide variety of open-source models with De ## Configuration -Configuration examples for `~/.factory/config.json`: +Add to `~/.factory/settings.json`: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "GLM-4.6 [DeepInfra]", "model": "zai-org/GLM-4.6", - "base_url": "https://api.deepinfra.com/v1/openai", - "api_key": "YOUR_DEEPINFRA_TOKEN", + "displayName": "GLM-4.6 [DeepInfra]", + "baseUrl": "https://api.deepinfra.com/v1/openai", + "apiKey": "YOUR_DEEPINFRA_TOKEN", "provider": "generic-chat-completion-api", - "max_tokens": 16384 + "maxOutputTokens": 16384 }, { - "model_display_name": "DeepSeek V3.1 Terminus [DeepInfra]", "model": "deepseek-ai/DeepSeek-V3.1-Terminus", - "base_url": "https://api.deepinfra.com/v1/openai", - "api_key": "YOUR_DEEPINFRA_TOKEN", + "displayName": "DeepSeek V3.1 Terminus [DeepInfra]", + "baseUrl": "https://api.deepinfra.com/v1/openai", + "apiKey": "YOUR_DEEPINFRA_TOKEN", "provider": "generic-chat-completion-api", - "max_tokens": 16384 + "maxOutputTokens": 16384 }, { - "model_display_name": "Kimi K2 Instruct [DeepInfra]", "model": "moonshotai/Kimi-K2-Instruct-0905", - "base_url": "https://api.deepinfra.com/v1/openai", - "api_key": "YOUR_DEEPINFRA_TOKEN", + "displayName": "Kimi K2 Instruct [DeepInfra]", + "baseUrl": "https://api.deepinfra.com/v1/openai", + "apiKey": "YOUR_DEEPINFRA_TOKEN", "provider": "generic-chat-completion-api", - "max_tokens": 32768 + "maxOutputTokens": 32768 }, { - "model_display_name": "Qwen3 Coder 480B [DeepInfra]", "model": "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo", - "base_url": "https://api.deepinfra.com/v1/openai", - "api_key": "YOUR_DEEPINFRA_TOKEN", + "displayName": "Qwen3 Coder 480B [DeepInfra]", + "baseUrl": "https://api.deepinfra.com/v1/openai", + "apiKey": "YOUR_DEEPINFRA_TOKEN", "provider": "generic-chat-completion-api", - "max_tokens": 32768 + "maxOutputTokens": 32768 } ] } diff --git a/docs/cli/byok/fireworks.mdx b/docs/cli/byok/fireworks.mdx index 1ebf97b..78c0d17 100644 --- a/docs/cli/byok/fireworks.mdx +++ b/docs/cli/byok/fireworks.mdx @@ -8,26 +8,26 @@ Access high-performance inference for open-source models with Fireworks AI's opt ## Configuration -Configuration examples for `~/.factory/config.json`: +Add to `~/.factory/settings.json`: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "GLM 4.5 [Fireworks]", "model": "accounts/fireworks/models/glm-4p5", - "base_url": "https://api.fireworks.ai/inference/v1", - "api_key": "YOUR_FIREWORKS_API_KEY", + "displayName": "GLM 4.5 [Fireworks]", + "baseUrl": "https://api.fireworks.ai/inference/v1", + "apiKey": "YOUR_FIREWORKS_API_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 16384 + "maxOutputTokens": 16384 }, { - "model_display_name": "Deepseek V3.1 Terminus [Fireworks]", "model": "accounts/fireworks/models/deepseek-v3p1-terminus", - "base_url": "https://api.fireworks.ai/inference/v1", - "api_key": "YOUR_FIREWORKS_API_KEY", + "displayName": "Deepseek V3.1 Terminus [Fireworks]", + "baseUrl": "https://api.fireworks.ai/inference/v1", + "apiKey": "YOUR_FIREWORKS_API_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 20480 + "maxOutputTokens": 20480 } ] } diff --git a/docs/cli/byok/google-gemini.mdx b/docs/cli/byok/google-gemini.mdx index 4bd3e9a..76d838a 100644 --- a/docs/cli/byok/google-gemini.mdx +++ b/docs/cli/byok/google-gemini.mdx @@ -8,34 +8,34 @@ Connect to Google's Gemini models for advanced AI capabilities with multimodal s ## Configuration -Configuration examples for `~/.factory/config.json`: +Add to `~/.factory/settings.json`: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "Gemini 2.5 Pro [Google]", "model": "gemini-2.5-pro", - "base_url": "https://generativelanguage.googleapis.com/v1beta/", - "api_key": "YOUR_GEMINI_API_KEY", + "displayName": "Gemini 2.5 Pro [Google]", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta/", + "apiKey": "YOUR_GEMINI_API_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 32000 + "maxOutputTokens": 32000 }, { - "model_display_name": "Gemini 1.5 Pro [Google]", "model": "gemini-1.5-pro", - "base_url": "https://generativelanguage.googleapis.com/v1beta/", - "api_key": "YOUR_GEMINI_API_KEY", + "displayName": "Gemini 1.5 Pro [Google]", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta/", + "apiKey": "YOUR_GEMINI_API_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 1048576 + "maxOutputTokens": 1048576 }, { - "model_display_name": "Gemini 1.5 Flash [Google]", "model": "gemini-1.5-flash", - "base_url": "https://generativelanguage.googleapis.com/v1beta/", - "api_key": "YOUR_GEMINI_API_KEY", + "displayName": "Gemini 1.5 Flash [Google]", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta/", + "apiKey": "YOUR_GEMINI_API_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 1048576 + "maxOutputTokens": 1048576 } ] } diff --git a/docs/cli/byok/groq.mdx b/docs/cli/byok/groq.mdx index 0219a33..01951f5 100644 --- a/docs/cli/byok/groq.mdx +++ b/docs/cli/byok/groq.mdx @@ -8,18 +8,18 @@ Access ultra-fast inference powered by Groq's LPU™ (Language Processing Unit) ## Configuration -Configuration examples for `~/.factory/config.json`: +Add to `~/.factory/settings.json`: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "Kimi K2 [Groq]", "model": "moonshotai/kimi-k2-instruct-0905", - "base_url": "https://api.groq.com/openai/v1", - "api_key": "YOUR_GROQ_KEY", + "displayName": "Kimi K2 [Groq]", + "baseUrl": "https://api.groq.com/openai/v1", + "apiKey": "YOUR_GROQ_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 16384 + "maxOutputTokens": 16384 } ] } diff --git a/docs/cli/byok/huggingface.mdx b/docs/cli/byok/huggingface.mdx index 1e94ddf..c2ddd03 100644 --- a/docs/cli/byok/huggingface.mdx +++ b/docs/cli/byok/huggingface.mdx @@ -12,26 +12,26 @@ Connect to thousands of models hosted on Hugging Face's Inference Providers. Lea ## Configuration -Configuration examples for `~/.factory/config.json`: +Add to `~/.factory/settings.json`: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "GPT OSS 120B [HF Router]", "model": "openai/gpt-oss-120b:fireworks-ai", - "base_url": "https://router.huggingface.co/v1", - "api_key": "YOUR_HF_TOKEN", + "displayName": "GPT OSS 120B [HF Router]", + "baseUrl": "https://router.huggingface.co/v1", + "apiKey": "YOUR_HF_TOKEN", "provider": "generic-chat-completion-api", - "max_tokens": 32768 + "maxOutputTokens": 32768 }, { - "model_display_name": "Llama 4 Scout 17B [HF Router]", "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct:fireworks-ai", - "base_url": "https://router.huggingface.co/v1", - "api_key": "YOUR_HF_TOKEN", + "displayName": "Llama 4 Scout 17B [HF Router]", + "baseUrl": "https://router.huggingface.co/v1", + "apiKey": "YOUR_HF_TOKEN", "provider": "generic-chat-completion-api", - "max_tokens": 16384 + "maxOutputTokens": 16384 } ] } diff --git a/docs/cli/byok/ollama.mdx b/docs/cli/byok/ollama.mdx index 16a427b..219f82b 100644 --- a/docs/cli/byok/ollama.mdx +++ b/docs/cli/byok/ollama.mdx @@ -17,26 +17,26 @@ Run models entirely on your machine with no internet required. ### Configuration -Configuration examples for `~/.factory/config.json`: +Add to `~/.factory/settings.json`: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "Qwen 2.5 Coder 32B [Local]", "model": "qwen2.5-coder:32b", - "base_url": "http://localhost:11434/v1", - "api_key": "not-needed", # add any non-empty value + "displayName": "Qwen 2.5 Coder 32B [Local]", + "baseUrl": "http://localhost:11434/v1", + "apiKey": "not-needed", "provider": "generic-chat-completion-api", - "max_tokens": 16000 + "maxOutputTokens": 16000 }, { - "model_display_name": "Qwen 2.5 Coder 7B [Local]", "model": "qwen2.5-coder:7b", - "base_url": "http://localhost:11434/v1", - "api_key": "not-needed", # add any non-empty value + "displayName": "Qwen 2.5 Coder 7B [Local]", + "baseUrl": "http://localhost:11434/v1", + "apiKey": "not-needed", "provider": "generic-chat-completion-api", - "max_tokens": 4000 + "maxOutputTokens": 4000 } ] } @@ -90,14 +90,14 @@ For a full list of available cloud models, visit: [ollama.com/search?c=cloud](ht ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "qwen3-coder [Online]", "model": "qwen3-coder:480b-cloud", - "base_url": "http://localhost:11434/v1/", - "api_key": "not-needed", # add any non-empty value + "displayName": "qwen3-coder [Online]", + "baseUrl": "http://localhost:11434/v1/", + "apiKey": "not-needed", "provider": "generic-chat-completion-api", - "max_tokens": 128000 + "maxOutputTokens": 128000 } ] } diff --git a/docs/cli/byok/openai-anthropic.mdx b/docs/cli/byok/openai-anthropic.mdx index 5c37607..3bfcec0 100644 --- a/docs/cli/byok/openai-anthropic.mdx +++ b/docs/cli/byok/openai-anthropic.mdx @@ -8,26 +8,26 @@ Use your own API keys for cost control and billing transparency with official Op ## Configuration -Configuration examples for `~/.factory/config.json`: +Add to `~/.factory/settings.json`: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "Sonnet 4.5 [Custom]", "model": "claude-sonnet-4-5-20250929", - "base_url": "https://api.anthropic.com", - "api_key": "YOUR_ANTHROPIC_KEY", + "displayName": "Sonnet 4.5 [Custom]", + "baseUrl": "https://api.anthropic.com", + "apiKey": "YOUR_ANTHROPIC_KEY", "provider": "anthropic", - "max_tokens": 8192 + "maxOutputTokens": 8192 }, { - "model_display_name": "GPT5-Codex [Custom]", "model": "gpt-5-codex", - "base_url": "https://api.openai.com/v1", - "api_key": "YOUR_OPENAI_KEY", + "displayName": "GPT5-Codex [Custom]", + "baseUrl": "https://api.openai.com/v1", + "apiKey": "YOUR_OPENAI_KEY", "provider": "openai", - "max_tokens": 16384 + "maxOutputTokens": 16384 } ] } diff --git a/docs/cli/byok/openrouter.mdx b/docs/cli/byok/openrouter.mdx index 95acd36..0e389c2 100644 --- a/docs/cli/byok/openrouter.mdx +++ b/docs/cli/byok/openrouter.mdx @@ -8,18 +8,18 @@ Connect to OpenRouter for access to models from multiple providers through a sin ## Configuration -Configuration examples for `~/.factory/config.json`: +Add to `~/.factory/settings.json`: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "GPT-OSS-20B [OpenRouter]", "model": "openai/gpt-oss-20b", - "base_url": "https://openrouter.ai/api/v1", - "api_key": "YOUR_OPENROUTER_KEY", + "displayName": "GPT-OSS-20B [OpenRouter]", + "baseUrl": "https://openrouter.ai/api/v1", + "apiKey": "YOUR_OPENROUTER_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 32000 + "maxOutputTokens": 32000 } ] } diff --git a/docs/cli/byok/overview.mdx b/docs/cli/byok/overview.mdx index be966a5..f372caf 100644 --- a/docs/cli/byok/overview.mdx +++ b/docs/cli/byok/overview.mdx @@ -19,55 +19,82 @@ Factory CLI supports custom model configurations through BYOK (Bring Your Own Ke ## Configuration Reference -Add custom models in `~/.factory/config.json` under the `custom_models` array. +Add custom models to `~/.factory/settings.json` under the `customModels` array: + +```json +{ + "customModels": [ + { + "model": "your-model-id", + "displayName": "My Custom Model", + "baseUrl": "https://api.provider.com/v1", + "apiKey": "YOUR_API_KEY", + "provider": "generic-chat-completion-api", + "maxOutputTokens": 16384 + } + ] +} +``` + + + **Legacy support**: Custom models in `~/.factory/config.json` using snake_case field names (`custom_models`, `base_url`, etc.) are still supported for backwards compatibility. Both files are loaded and merged, with `settings.json` taking priority. + ### Supported Fields | Field | Type | Required | Description | |-------|------|----------|-------------| -| `model_display_name` | `string` | | Human-friendly name shown in model selector | | `model` | `string` | ✓ | Model identifier sent via API (e.g., `claude-sonnet-4-5-20250929`, `gpt-5-codex`, `qwen3:4b`) | -| `base_url` | `string` | ✓ | API endpoint base URL | -| `api_key` | `string` | ✓ | Your API key for the provider. Can't be empty. | +| `displayName` | `string` | | Human-friendly name shown in model selector | +| `baseUrl` | `string` | ✓ | API endpoint base URL | +| `apiKey` | `string` | ✓ | Your API key for the provider. Can't be empty. | | `provider` | `string` | ✓ | One of: `anthropic`, `openai`, or `generic-chat-completion-api` | -| `max_tokens` | `number` | | Maximum output tokens for model responses | -| `supports_images` | `boolean` | | Whether the model supports image inputs | -| `extra_args` | `object` | | Additional provider-specific arguments to include in API requests | -| `extra_headers` | `object` | | Additional HTTP headers to send with requests | +| `maxOutputTokens` | `number` | | Maximum output tokens for model responses | +| `supportsImages` | `boolean` | | Whether the model supports image inputs | +| `extraArgs` | `object` | | Additional provider-specific arguments to include in API requests | +| `extraHeaders` | `object` | | Additional HTTP headers to send with requests | -### Using extra_args +### Using extraArgs Pass provider-specific parameters like temperature or top_p: ```json { - "model_display_name": "Custom Model", - "model": "your-model", - "base_url": "https://your-provider.com/v1", - "api_key": "YOUR_API_KEY", - "provider": "generic-chat-completion-api", - "extra_args": { - "temperature": 0.7, - "top_p": 0.9 - } + "customModels": [ + { + "model": "your-model", + "displayName": "Custom Model", + "baseUrl": "https://your-provider.com/v1", + "apiKey": "YOUR_API_KEY", + "provider": "generic-chat-completion-api", + "extraArgs": { + "temperature": 0.7, + "top_p": 0.9 + } + } + ] } ``` -### Using extra_headers +### Using extraHeaders Add custom HTTP headers to API requests: ```json { - "model_display_name": "Custom Model", - "model": "your-model", - "base_url": "https://your-provider.com/v1", - "api_key": "YOUR_API_KEY", - "provider": "generic-chat-completion-api", - "extra_headers": { - "X-Custom-Header": "value", - "Authorization": "Bearer YOUR_TOKEN" - } + "customModels": [ + { + "model": "your-model", + "displayName": "Custom Model", + "baseUrl": "https://your-provider.com/v1", + "apiKey": "YOUR_API_KEY", + "provider": "generic-chat-completion-api", + "extraHeaders": { + "X-Custom-Header": "value", + "Authorization": "Bearer YOUR_TOKEN" + } + } + ] } ``` @@ -136,15 +163,15 @@ Once configured, access your custom models in the CLI: 2. Your custom models appear in a separate "Custom models" section below Factory-provided models 3. Select any model to start using it -Custom models display with the name you set in `model_display_name`, making it easy to identify different providers and configurations. +Custom models display with the name you set in `displayName`, making it easy to identify different providers and configurations. --- ## Troubleshooting ### Model not appearing in selector -- Check JSON syntax in `~/.factory/config.json` -- Restart the CLI after making configuration changes +- Check JSON syntax in `~/.factory/settings.json` (or `config.json` if using legacy format) +- Settings changes are detected automatically via file watching - Verify all required fields are present ### "Invalid provider" error diff --git a/docs/cli/configuration/byok.mdx b/docs/cli/configuration/byok.mdx index 197e59d..3a47242 100644 --- a/docs/cli/configuration/byok.mdx +++ b/docs/cli/configuration/byok.mdx @@ -18,18 +18,37 @@ Factory CLI supports custom model configurations through BYOK (Bring Your Own Ke ## Configuration Reference -Add custom models in `~/.factory/config.json` under the `custom_models` array. +Add custom models to `~/.factory/settings.json` under the `customModels` array: + +```json +{ + "customModels": [ + { + "model": "your-model-id", + "displayName": "My Custom Model", + "baseUrl": "https://api.provider.com/v1", + "apiKey": "YOUR_API_KEY", + "provider": "generic-chat-completion-api", + "maxOutputTokens": 16384 + } + ] +} +``` + + + **Legacy support**: Custom models in `~/.factory/config.json` using snake_case field names (`custom_models`, `base_url`, etc.) are still supported for backwards compatibility. Both files are loaded and merged, with `settings.json` taking priority. + ### Supported Fields | Field | Required | Description | |-------|----------|-------------| -| `model_display_name` | ✓ | Human-friendly name shown in model selector | | `model` | ✓ | Model identifier sent via API (e.g., `claude-sonnet-4-5-20250929`, `gpt-5-codex`, `qwen3:4b`) | -| `base_url` | ✓ | API endpoint base URL | -| `api_key` | ✓ | Your API key for the provider. Can't be empty. | +| `displayName` | | Human-friendly name shown in model selector | +| `baseUrl` | ✓ | API endpoint base URL | +| `apiKey` | ✓ | Your API key for the provider. Can't be empty. | | `provider` | ✓ | One of: `anthropic`, `openai`, or `generic-chat-completion-api` | -| `max_tokens` | ✓ | Maximum output tokens for model responses | +| `maxOutputTokens` | | Maximum output tokens for model responses | --- @@ -76,19 +95,19 @@ Use your own API keys for cost control and billing transparency: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "Sonnet 4.5 [Custom]", "model": "claude-sonnet-4-5-20250929", - "base_url": "https://api.anthropic.com", - "api_key": "YOUR_ANTHROPIC_KEY", + "displayName": "Sonnet 4.5 [Custom]", + "baseUrl": "https://api.anthropic.com", + "apiKey": "YOUR_ANTHROPIC_KEY", "provider": "anthropic" }, { - "model_display_name": "GPT5-Codex [Custom]", "model": "gpt-5-codex", - "base_url": "https://api.openai.com/v1", - "api_key": "YOUR_OPENAI_KEY", + "displayName": "GPT5-Codex [Custom]", + "baseUrl": "https://api.openai.com/v1", + "apiKey": "YOUR_OPENAI_KEY", "provider": "openai" } ] @@ -101,14 +120,14 @@ Connect to OpenRouter for access to models from multiple providers: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "GPT-OSS-20B", "model": "openai/gpt-oss-20b", - "base_url": "https://openrouter.ai/api/v1", - "api_key": "YOUR_OPENROUTER_KEY", + "displayName": "GPT-OSS-20B", + "baseUrl": "https://openrouter.ai/api/v1", + "apiKey": "YOUR_OPENROUTER_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 32000 + "maxOutputTokens": 32000 } ] } @@ -122,22 +141,22 @@ Access high-performance inference for open-source models with optimized serving: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "GLM 4.5 [Fireworks]", "model": "accounts/fireworks/models/glm-4p5", - "base_url": "https://api.fireworks.ai/inference/v1", - "api_key": "YOUR_FIREWORKS_API_KEY", + "displayName": "GLM 4.5 [Fireworks]", + "baseUrl": "https://api.fireworks.ai/inference/v1", + "apiKey": "YOUR_FIREWORKS_API_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 16384 + "maxOutputTokens": 16384 }, { - "model_display_name": "Llama 3.1 405B [Fireworks]", "model": "accounts/fireworks/models/llama-v3p1-405b-instruct", - "base_url": "https://api.fireworks.ai/inference/v1", - "api_key": "YOUR_FIREWORKS_API_KEY", + "displayName": "Llama 3.1 405B [Fireworks]", + "baseUrl": "https://api.fireworks.ai/inference/v1", + "apiKey": "YOUR_FIREWORKS_API_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 131072 + "maxOutputTokens": 131072 } ] } @@ -151,22 +170,22 @@ Deploy and serve custom models with enterprise-grade infrastructure: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "Custom Model [Baseten]", "model": "YOUR_MODEL_ID", - "base_url": "https://inference.baseten.co/v1", - "api_key": "YOUR_BASETEN_API_KEY", + "displayName": "Custom Model [Baseten]", + "baseUrl": "https://inference.baseten.co/v1", + "apiKey": "YOUR_BASETEN_API_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 8192 + "maxOutputTokens": 8192 }, { - "model_display_name": "Llama 3.1 70B [Baseten]", "model": "llama-3.1-70b-instruct", - "base_url": "https://inference.baseten.co/v1", - "api_key": "YOUR_BASETEN_API_KEY", + "displayName": "Llama 3.1 70B [Baseten]", + "baseUrl": "https://inference.baseten.co/v1", + "apiKey": "YOUR_BASETEN_API_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 131072 + "maxOutputTokens": 131072 } ] } @@ -182,30 +201,30 @@ Access cost-effective inference for a wide variety of open-source models: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "GLM 4.6 [DeepInfra]", "model": "zai-org/GLM-4.6", - "base_url": "https://api.deepinfra.com/v1/openai", - "api_key": "YOUR_DEEPINFRA_TOKEN", + "displayName": "GLM 4.6 [DeepInfra]", + "baseUrl": "https://api.deepinfra.com/v1/openai", + "apiKey": "YOUR_DEEPINFRA_TOKEN", "provider": "generic-chat-completion-api", - "max_tokens": 16384 + "maxOutputTokens": 16384 }, { - "model_display_name": "Qwen 2.5 Coder 32B [DeepInfra]", "model": "Qwen/Qwen2.5-Coder-32B-Instruct", - "base_url": "https://api.deepinfra.com/v1/openai", - "api_key": "YOUR_DEEPINFRA_TOKEN", + "displayName": "Qwen 2.5 Coder 32B [DeepInfra]", + "baseUrl": "https://api.deepinfra.com/v1/openai", + "apiKey": "YOUR_DEEPINFRA_TOKEN", "provider": "generic-chat-completion-api", - "max_tokens": 32768 + "maxOutputTokens": 32768 }, { - "model_display_name": "DeepSeek V3 [DeepInfra]", "model": "deepseek-ai/DeepSeek-V3", - "base_url": "https://api.deepinfra.com/v1/openai", - "api_key": "YOUR_DEEPINFRA_TOKEN", + "displayName": "DeepSeek V3 [DeepInfra]", + "baseUrl": "https://api.deepinfra.com/v1/openai", + "apiKey": "YOUR_DEEPINFRA_TOKEN", "provider": "generic-chat-completion-api", - "max_tokens": 65536 + "maxOutputTokens": 65536 } ] } @@ -219,22 +238,22 @@ Connect to models hosted on Hugging Face's Inference API: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "Mistral 7B [HF]", "model": "mistralai/Mistral-7B-Instruct-v0.3", - "base_url": "https://api-inference.huggingface.co/models", - "api_key": "YOUR_HF_TOKEN", + "displayName": "Mistral 7B [HF]", + "baseUrl": "https://api-inference.huggingface.co/models", + "apiKey": "YOUR_HF_TOKEN", "provider": "generic-chat-completion-api", - "max_tokens": 32768 + "maxOutputTokens": 32768 }, { - "model_display_name": "CodeLlama 13B [HF]", "model": "codellama/CodeLlama-13b-Instruct-hf", - "base_url": "https://api-inference.huggingface.co/models", - "api_key": "YOUR_HF_TOKEN", + "displayName": "CodeLlama 13B [HF]", + "baseUrl": "https://api-inference.huggingface.co/models", + "apiKey": "YOUR_HF_TOKEN", "provider": "generic-chat-completion-api", - "max_tokens": 16384 + "maxOutputTokens": 16384 } ] } @@ -252,22 +271,22 @@ Run models locally on your hardware with Ollama: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "Qwen 2.5 Coder [Local]", "model": "qwen2.5-coder:7b", - "base_url": "http://localhost:11434/v1", - "api_key": "not-needed", + "displayName": "Qwen 2.5 Coder [Local]", + "baseUrl": "http://localhost:11434/v1", + "apiKey": "not-needed", "provider": "generic-chat-completion-api", - "max_tokens": 8192 + "maxOutputTokens": 8192 }, { - "model_display_name": "DeepSeek Coder V2 [Local]", "model": "deepseek-coder-v2:16b", - "base_url": "http://localhost:11434/v1", - "api_key": "not-needed", + "displayName": "DeepSeek Coder V2 [Local]", + "baseUrl": "http://localhost:11434/v1", + "apiKey": "not-needed", "provider": "generic-chat-completion-api", - "max_tokens": 16384 + "maxOutputTokens": 16384 } ] } @@ -287,15 +306,15 @@ Use Ollama's cloud service for hosted model inference: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "Llama 3.1 8B [Ollama Cloud]", "model": "llama3.1:8b", - "base_url": "https://ollama.com", - "api_key": "YOUR_OLLAMA_API_KEY", + "displayName": "Llama 3.1 8B [Ollama Cloud]", + "baseUrl": "https://ollama.com", + "apiKey": "YOUR_OLLAMA_API_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 131072, - "headers": { + "maxOutputTokens": 131072, + "extraHeaders": { "Authorization": "Bearer YOUR_OLLAMA_API_KEY" } } @@ -313,14 +332,14 @@ Access Google's Gemini models using your Gemini AI API key: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "Gemini 2.5 Pro", "model": "gemini-2.5-pro", - "base_url": "https://generativelanguage.googleapis.com/v1beta/", - "api_key": "YOUR_GEMINI_API_KEY", + "displayName": "Gemini 2.5 Pro", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta/", + "apiKey": "YOUR_GEMINI_API_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 32000 + "maxOutputTokens": 32000 } ] } @@ -338,15 +357,15 @@ Once configured, access your custom models in the CLI: 2. Your custom models appear in a separate "Custom models" section below Factory-provided models 3. Select any model to start using it -Custom models display with the name you set in `model_display_name`, making it easy to identify different providers and configurations. +Custom models display with the name you set in `displayName`, making it easy to identify different providers and configurations. --- ## Troubleshooting ### Model not appearing in selector -- Check JSON syntax in `~/.factory/config.json` -- Restart the CLI after making configuration changes +- Check JSON syntax in `~/.factory/settings.json` (or `config.json` if using legacy format) +- Settings changes are detected automatically via file watching - Verify all required fields are present ### "Invalid provider" error diff --git a/docs/cli/configuration/settings.mdx b/docs/cli/configuration/settings.mdx index c2429d9..93e5b97 100644 --- a/docs/cli/configuration/settings.mdx +++ b/docs/cli/configuration/settings.mdx @@ -48,6 +48,7 @@ If the file doesn't exist, it's created with defaults the first time you run **d | `showThinkingInMainView` | `true`, `false` | `false` | Display AI thinking/reasoning blocks in the main chat view. | | `allowBackgroundProcesses` | `true`, `false` | `false` | Allow droid to spawn background processes (experimental). | | `enableReadinessReport` | `true`, `false` | `false` | Enable the `/readiness-report` slash command (experimental). | +| `customModels` | Array of model configs | `[]` | Custom model configurations for BYOK. See [BYOK docs](/cli/configuration/byok). | ### Model diff --git a/docs/cli/droid-exec/overview.mdx b/docs/cli/droid-exec/overview.mdx index 556d7f9..48ff98e 100644 --- a/docs/cli/droid-exec/overview.mdx +++ b/docs/cli/droid-exec/overview.mdx @@ -377,16 +377,16 @@ droid exec --auto medium --disabled-tools execute-cli "run edits only" ### Custom models -You can configure custom models to use with droid exec by adding them to your `~/.factory/config.json` file: +You can configure custom models to use with droid exec by adding them to your `~/.factory/settings.json` file: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "My Custom Model", "model": "gpt-5.1-codex-custom", - "base_url": "https://api.openai.com/v1", - "api_key": "your-api-key-here", + "displayName": "My Custom Model", + "baseUrl": "https://api.openai.com/v1", + "apiKey": "your-api-key-here", "provider": "openai" } ] @@ -403,22 +403,22 @@ If you have multiple custom models configured: ```json { - "custom_models": [ + "customModels": [ { - "model_display_name": "Kimi K2 [Groq]", "model": "kimi-k2", - "base_url": "https://api.groq.com/openai/v1", - "api_key": "your-groq-key", + "displayName": "Kimi K2 [Groq]", + "baseUrl": "https://api.groq.com/openai/v1", + "apiKey": "your-groq-key", "provider": "generic-chat-completion-api", - "max_tokens": 16384 + "maxOutputTokens": 16384 }, { - "model_display_name": "GPT-OSS-20B [OpenRouter]", "model": "openai/gpt-oss-20b", - "base_url": "https://openrouter.ai/api/v1", - "api_key": "YOUR_OPENROUTER_KEY", + "displayName": "GPT-OSS-20B [OpenRouter]", + "baseUrl": "https://openrouter.ai/api/v1", + "apiKey": "YOUR_OPENROUTER_KEY", "provider": "generic-chat-completion-api", - "max_tokens": 32000 + "maxOutputTokens": 32000 } ] } @@ -426,9 +426,9 @@ If you have multiple custom models configured: You would reference them as: - `--model "custom:Kimi-K2-[Groq]-0"` -- `--model "custom:GPT-OSS-20B [OpenRouter]-1"` +- `--model "custom:GPT-OSS-20B-[OpenRouter]-1"` -The index corresponds to the position in the `custom_models` array (0-based). +The index corresponds to the position in the `customModels` array (0-based). Reasoning effort (`-r` / `--reasoning-effort`) is not yet supported for custom models, but coming soon. diff --git a/docs/enterprise/models-llm-gateways-and-integrations.mdx b/docs/enterprise/models-llm-gateways-and-integrations.mdx index 316d84b..0374529 100644 --- a/docs/enterprise/models-llm-gateways-and-integrations.mdx +++ b/docs/enterprise/models-llm-gateways-and-integrations.mdx @@ -45,7 +45,7 @@ Factory works with gateways in two ways: When you use a gateway, **data handling and retention policies are those of the gateway and underlying providers**; Droid simply uses the endpoints and credentials you configure. -For concrete examples of configuring custom models (including gateway‑backed models), see [Custom models & BYOK](/cli/configuration/byok), which covers the `custom_models` array in `~/.factory/config.json` and how those models appear in the `/model` selector. +For concrete examples of configuring custom models (including gateway‑backed models), see [Custom models & BYOK](/cli/configuration/byok), which covers the `customModels` array in `~/.factory/settings.json` and how those models appear in the `/model` selector. ---