diff --git a/docs/cli/byok/baseten.mdx b/docs/cli/byok/baseten.mdx
index e050028..298d946 100644
--- a/docs/cli/byok/baseten.mdx
+++ b/docs/cli/byok/baseten.mdx
@@ -8,18 +8,18 @@ Deploy and serve custom models with Baseten's enterprise-grade infrastructure fo
 
 ## Configuration
 
-Add these configurations to `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "Qwen3-Coder-480B [Baseten]",
       "model": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
-      "base_url": "https://inference.baseten.co/v1",
-      "api_key": "YOUR_BASETEN_API_KEY",
+      "displayName": "Qwen3-Coder-480B [Baseten]",
+      "baseUrl": "https://inference.baseten.co/v1",
+      "apiKey": "YOUR_BASETEN_API_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 8192
+      "maxOutputTokens": 8192
     }
   ]
 }
diff --git a/docs/cli/byok/deepinfra.mdx b/docs/cli/byok/deepinfra.mdx
index 9b59a2b..8ee7510 100644
--- a/docs/cli/byok/deepinfra.mdx
+++ b/docs/cli/byok/deepinfra.mdx
@@ -8,42 +8,42 @@ Access cost-effective inference for a wide variety of open-source models with De
 
 ## Configuration
 
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "GLM-4.6 [DeepInfra]",
       "model": "zai-org/GLM-4.6",
-      "base_url": "https://api.deepinfra.com/v1/openai",
-      "api_key": "YOUR_DEEPINFRA_TOKEN",
+      "displayName": "GLM-4.6 [DeepInfra]",
+      "baseUrl": "https://api.deepinfra.com/v1/openai",
+      "apiKey": "YOUR_DEEPINFRA_TOKEN",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 16384
+      "maxOutputTokens": 16384
     },
     {
-      "model_display_name": "DeepSeek V3.1 Terminus [DeepInfra]",
       "model": "deepseek-ai/DeepSeek-V3.1-Terminus",
-      "base_url": "https://api.deepinfra.com/v1/openai",
-      "api_key": "YOUR_DEEPINFRA_TOKEN",
+      "displayName": "DeepSeek V3.1 Terminus [DeepInfra]",
+      "baseUrl": "https://api.deepinfra.com/v1/openai",
+      "apiKey": "YOUR_DEEPINFRA_TOKEN",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 16384
+      "maxOutputTokens": 16384
     },
     {
-      "model_display_name": "Kimi K2 Instruct [DeepInfra]",
       "model": "moonshotai/Kimi-K2-Instruct-0905",
-      "base_url": "https://api.deepinfra.com/v1/openai",
-      "api_key": "YOUR_DEEPINFRA_TOKEN",
+      "displayName": "Kimi K2 Instruct [DeepInfra]",
+      "baseUrl": "https://api.deepinfra.com/v1/openai",
+      "apiKey": "YOUR_DEEPINFRA_TOKEN",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 32768
+      "maxOutputTokens": 32768
     },
     {
-      "model_display_name": "Qwen3 Coder 480B [DeepInfra]",
       "model": "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo",
-      "base_url": "https://api.deepinfra.com/v1/openai",
-      "api_key": "YOUR_DEEPINFRA_TOKEN",
+      "displayName": "Qwen3 Coder 480B [DeepInfra]",
+      "baseUrl": "https://api.deepinfra.com/v1/openai",
+      "apiKey": "YOUR_DEEPINFRA_TOKEN",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 32768
+      "maxOutputTokens": 32768
     }
   ]
 }
diff --git a/docs/cli/byok/fireworks.mdx b/docs/cli/byok/fireworks.mdx
index 1ebf97b..78c0d17 100644
--- a/docs/cli/byok/fireworks.mdx
+++ b/docs/cli/byok/fireworks.mdx
@@ -8,26 +8,26 @@ Access high-performance inference for open-source models with Fireworks AI's opt
 
 ## Configuration
 
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "GLM 4.5 [Fireworks]",
       "model": "accounts/fireworks/models/glm-4p5",
-      "base_url": "https://api.fireworks.ai/inference/v1",
-      "api_key": "YOUR_FIREWORKS_API_KEY",
+      "displayName": "GLM 4.5 [Fireworks]",
+      "baseUrl": "https://api.fireworks.ai/inference/v1",
+      "apiKey": "YOUR_FIREWORKS_API_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 16384
+      "maxOutputTokens": 16384
     },
     {
-      "model_display_name": "Deepseek V3.1 Terminus [Fireworks]",
       "model": "accounts/fireworks/models/deepseek-v3p1-terminus",
-      "base_url": "https://api.fireworks.ai/inference/v1",
-      "api_key": "YOUR_FIREWORKS_API_KEY",
+      "displayName": "Deepseek V3.1 Terminus [Fireworks]",
+      "baseUrl": "https://api.fireworks.ai/inference/v1",
+      "apiKey": "YOUR_FIREWORKS_API_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 20480
+      "maxOutputTokens": 20480
     }
   ]
 }
diff --git a/docs/cli/byok/google-gemini.mdx b/docs/cli/byok/google-gemini.mdx
index 4bd3e9a..76d838a 100644
--- a/docs/cli/byok/google-gemini.mdx
+++ b/docs/cli/byok/google-gemini.mdx
@@ -8,34 +8,34 @@ Connect to Google's Gemini models for advanced AI capabilities with multimodal s
 
 ## Configuration
 
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "Gemini 2.5 Pro [Google]",
       "model": "gemini-2.5-pro",
-      "base_url": "https://generativelanguage.googleapis.com/v1beta/",
-      "api_key": "YOUR_GEMINI_API_KEY",
+      "displayName": "Gemini 2.5 Pro [Google]",
+      "baseUrl": "https://generativelanguage.googleapis.com/v1beta/",
+      "apiKey": "YOUR_GEMINI_API_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 32000
+      "maxOutputTokens": 32000
     },
     {
-      "model_display_name": "Gemini 1.5 Pro [Google]",
       "model": "gemini-1.5-pro",
-      "base_url": "https://generativelanguage.googleapis.com/v1beta/",
-      "api_key": "YOUR_GEMINI_API_KEY",
+      "displayName": "Gemini 1.5 Pro [Google]",
+      "baseUrl": "https://generativelanguage.googleapis.com/v1beta/",
+      "apiKey": "YOUR_GEMINI_API_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 1048576
+      "maxOutputTokens": 1048576
     },
     {
-      "model_display_name": "Gemini 1.5 Flash [Google]",
       "model": "gemini-1.5-flash",
-      "base_url": "https://generativelanguage.googleapis.com/v1beta/",
-      "api_key": "YOUR_GEMINI_API_KEY",
+      "displayName": "Gemini 1.5 Flash [Google]",
+      "baseUrl": "https://generativelanguage.googleapis.com/v1beta/",
+      "apiKey": "YOUR_GEMINI_API_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 1048576
+      "maxOutputTokens": 1048576
     }
   ]
 }
diff --git a/docs/cli/byok/groq.mdx b/docs/cli/byok/groq.mdx
index 0219a33..01951f5 100644
--- a/docs/cli/byok/groq.mdx
+++ b/docs/cli/byok/groq.mdx
@@ -8,18 +8,18 @@ Access ultra-fast inference powered by Groq's LPU™ (Language Processing Unit)
 
 ## Configuration
 
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "Kimi K2 [Groq]",
       "model": "moonshotai/kimi-k2-instruct-0905",
-      "base_url": "https://api.groq.com/openai/v1",
-      "api_key": "YOUR_GROQ_KEY",
+      "displayName": "Kimi K2 [Groq]",
+      "baseUrl": "https://api.groq.com/openai/v1",
+      "apiKey": "YOUR_GROQ_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 16384
+      "maxOutputTokens": 16384
     }
   ]
 }
diff --git a/docs/cli/byok/huggingface.mdx b/docs/cli/byok/huggingface.mdx
index 1e94ddf..c2ddd03 100644
--- a/docs/cli/byok/huggingface.mdx
+++ b/docs/cli/byok/huggingface.mdx
@@ -12,26 +12,26 @@ Connect to thousands of models hosted on Hugging Face's Inference Providers. Lea
 
 ## Configuration
 
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "GPT OSS 120B [HF Router]",
       "model": "openai/gpt-oss-120b:fireworks-ai",
-      "base_url": "https://router.huggingface.co/v1",
-      "api_key": "YOUR_HF_TOKEN",
+      "displayName": "GPT OSS 120B [HF Router]",
+      "baseUrl": "https://router.huggingface.co/v1",
+      "apiKey": "YOUR_HF_TOKEN",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 32768
+      "maxOutputTokens": 32768
     },
     {
-      "model_display_name": "Llama 4 Scout 17B [HF Router]",
       "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct:fireworks-ai",
-      "base_url": "https://router.huggingface.co/v1",
-      "api_key": "YOUR_HF_TOKEN",
+      "displayName": "Llama 4 Scout 17B [HF Router]",
+      "baseUrl": "https://router.huggingface.co/v1",
+      "apiKey": "YOUR_HF_TOKEN",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 16384
+      "maxOutputTokens": 16384
     }
   ]
 }
diff --git a/docs/cli/byok/ollama.mdx b/docs/cli/byok/ollama.mdx
index 16a427b..219f82b 100644
--- a/docs/cli/byok/ollama.mdx
+++ b/docs/cli/byok/ollama.mdx
@@ -17,26 +17,26 @@ Run models entirely on your machine with no internet required.
 
 ### Configuration
 
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "Qwen 2.5 Coder 32B [Local]",
       "model": "qwen2.5-coder:32b",
-      "base_url": "http://localhost:11434/v1",
-      "api_key": "not-needed",  # add any non-empty value
+      "displayName": "Qwen 2.5 Coder 32B [Local]",
+      "baseUrl": "http://localhost:11434/v1",
+      "apiKey": "not-needed",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 16000
+      "maxOutputTokens": 16000
     },
     {
-      "model_display_name": "Qwen 2.5 Coder 7B [Local]",
       "model": "qwen2.5-coder:7b",
-      "base_url": "http://localhost:11434/v1",
-      "api_key": "not-needed",  # add any non-empty value
+      "displayName": "Qwen 2.5 Coder 7B [Local]",
+      "baseUrl": "http://localhost:11434/v1",
+      "apiKey": "not-needed",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 4000
+      "maxOutputTokens": 4000
     }
   ]
 }
@@ -90,14 +90,14 @@ For a full list of available cloud models, visit: [ollama.com/search?c=cloud](ht
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "qwen3-coder [Online]",
       "model": "qwen3-coder:480b-cloud",
-      "base_url": "http://localhost:11434/v1/",
-      "api_key": "not-needed",  # add any non-empty value
+      "displayName": "qwen3-coder [Online]",
+      "baseUrl": "http://localhost:11434/v1/",
+      "apiKey": "not-needed",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 128000
+      "maxOutputTokens": 128000
     }
   ]
 }
diff --git a/docs/cli/byok/openai-anthropic.mdx b/docs/cli/byok/openai-anthropic.mdx
index 5c37607..3bfcec0 100644
--- a/docs/cli/byok/openai-anthropic.mdx
+++ b/docs/cli/byok/openai-anthropic.mdx
@@ -8,26 +8,26 @@ Use your own API keys for cost control and billing transparency with official Op
 
 ## Configuration
 
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "Sonnet 4.5 [Custom]",
       "model": "claude-sonnet-4-5-20250929",
-      "base_url": "https://api.anthropic.com",
-      "api_key": "YOUR_ANTHROPIC_KEY",
+      "displayName": "Sonnet 4.5 [Custom]",
+      "baseUrl": "https://api.anthropic.com",
+      "apiKey": "YOUR_ANTHROPIC_KEY",
       "provider": "anthropic",
-      "max_tokens": 8192
+      "maxOutputTokens": 8192
     },
     {
-      "model_display_name": "GPT5-Codex [Custom]",
       "model": "gpt-5-codex",
-      "base_url": "https://api.openai.com/v1",
-      "api_key": "YOUR_OPENAI_KEY",
+      "displayName": "GPT5-Codex [Custom]",
+      "baseUrl": "https://api.openai.com/v1",
+      "apiKey": "YOUR_OPENAI_KEY",
       "provider": "openai",
-      "max_tokens": 16384
+      "maxOutputTokens": 16384
     }
   ]
 }
diff --git a/docs/cli/byok/openrouter.mdx b/docs/cli/byok/openrouter.mdx
index 95acd36..0e389c2 100644
--- a/docs/cli/byok/openrouter.mdx
+++ b/docs/cli/byok/openrouter.mdx
@@ -8,18 +8,18 @@ Connect to OpenRouter for access to models from multiple providers through a sin
 
 ## Configuration
 
-Configuration examples for `~/.factory/config.json`:
+Add to `~/.factory/settings.json`:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "GPT-OSS-20B [OpenRouter]",
       "model": "openai/gpt-oss-20b",
-      "base_url": "https://openrouter.ai/api/v1",
-      "api_key": "YOUR_OPENROUTER_KEY",
+      "displayName": "GPT-OSS-20B [OpenRouter]",
+      "baseUrl": "https://openrouter.ai/api/v1",
+      "apiKey": "YOUR_OPENROUTER_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 32000
+      "maxOutputTokens": 32000
     }
   ]
 }
diff --git a/docs/cli/byok/overview.mdx b/docs/cli/byok/overview.mdx
index be966a5..f372caf 100644
--- a/docs/cli/byok/overview.mdx
+++ b/docs/cli/byok/overview.mdx
@@ -19,55 +19,82 @@ Factory CLI supports custom model configurations through BYOK (Bring Your Own Ke
 
 ## Configuration Reference
 
-Add custom models in `~/.factory/config.json` under the `custom_models` array.
+Add custom models to `~/.factory/settings.json` under the `customModels` array:
+
+```json
+{
+  "customModels": [
+    {
+      "model": "your-model-id",
+      "displayName": "My Custom Model",
+      "baseUrl": "https://api.provider.com/v1",
+      "apiKey": "YOUR_API_KEY",
+      "provider": "generic-chat-completion-api",
+      "maxOutputTokens": 16384
+    }
+  ]
+}
+```
+
+<Note>
+  **Legacy support**: Custom models in `~/.factory/config.json` using snake_case field names (`custom_models`, `base_url`, etc.) are still supported for backwards compatibility. Both files are loaded and merged, with `settings.json` taking priority.
+</Note>
 
 ### Supported Fields
 
 | Field | Type | Required | Description |
 |-------|------|----------|-------------|
-| `model_display_name` | `string` | | Human-friendly name shown in model selector |
 | `model` | `string` | ✓ | Model identifier sent via API (e.g., `claude-sonnet-4-5-20250929`, `gpt-5-codex`, `qwen3:4b`) |
-| `base_url` | `string` | ✓ | API endpoint base URL |
-| `api_key` | `string` | ✓ | Your API key for the provider. Can't be empty. |
+| `displayName` | `string` | | Human-friendly name shown in model selector |
+| `baseUrl` | `string` | ✓ | API endpoint base URL |
+| `apiKey` | `string` | ✓ | Your API key for the provider. Can't be empty. |
 | `provider` | `string` | ✓ | One of: `anthropic`, `openai`, or `generic-chat-completion-api` |
-| `max_tokens` | `number` | | Maximum output tokens for model responses |
-| `supports_images` | `boolean` | | Whether the model supports image inputs |
-| `extra_args` | `object` | | Additional provider-specific arguments to include in API requests |
-| `extra_headers` | `object` | | Additional HTTP headers to send with requests |
+| `maxOutputTokens` | `number` | | Maximum output tokens for model responses |
+| `supportsImages` | `boolean` | | Whether the model supports image inputs |
+| `extraArgs` | `object` | | Additional provider-specific arguments to include in API requests |
+| `extraHeaders` | `object` | | Additional HTTP headers to send with requests |
 
-### Using extra_args
+### Using extraArgs
 
 Pass provider-specific parameters like temperature or top_p:
 
 ```json
 {
-  "model_display_name": "Custom Model",
-  "model": "your-model",
-  "base_url": "https://your-provider.com/v1",
-  "api_key": "YOUR_API_KEY",
-  "provider": "generic-chat-completion-api",
-  "extra_args": {
-    "temperature": 0.7,
-    "top_p": 0.9
-  }
+  "customModels": [
+    {
+      "model": "your-model",
+      "displayName": "Custom Model",
+      "baseUrl": "https://your-provider.com/v1",
+      "apiKey": "YOUR_API_KEY",
+      "provider": "generic-chat-completion-api",
+      "extraArgs": {
+        "temperature": 0.7,
+        "top_p": 0.9
+      }
+    }
+  ]
 }
 ```
 
-### Using extra_headers
+### Using extraHeaders
 
 Add custom HTTP headers to API requests:
 
 ```json
 {
-  "model_display_name": "Custom Model",
-  "model": "your-model",
-  "base_url": "https://your-provider.com/v1",
-  "api_key": "YOUR_API_KEY",
-  "provider": "generic-chat-completion-api",
-  "extra_headers": {
-    "X-Custom-Header": "value",
-    "Authorization": "Bearer YOUR_TOKEN"
-  }
+  "customModels": [
+    {
+      "model": "your-model",
+      "displayName": "Custom Model",
+      "baseUrl": "https://your-provider.com/v1",
+      "apiKey": "YOUR_API_KEY",
+      "provider": "generic-chat-completion-api",
+      "extraHeaders": {
+        "X-Custom-Header": "value",
+        "Authorization": "Bearer YOUR_TOKEN"
+      }
+    }
+  ]
 }
 ```
 
@@ -136,15 +163,15 @@ Once configured, access your custom models in the CLI:
 2. Your custom models appear in a separate "Custom models" section below Factory-provided models
 3. Select any model to start using it
 
-Custom models display with the name you set in `model_display_name`, making it easy to identify different providers and configurations.
+Custom models display with the name you set in `displayName`, making it easy to identify different providers and configurations.
 
 ---
 
 ## Troubleshooting
 
 ### Model not appearing in selector
-- Check JSON syntax in `~/.factory/config.json`
-- Restart the CLI after making configuration changes
+- Check JSON syntax in `~/.factory/settings.json` (or `config.json` if using legacy format)
+- Settings changes are detected automatically via file watching
 - Verify all required fields are present
 
 ### "Invalid provider" error
diff --git a/docs/cli/configuration/byok.mdx b/docs/cli/configuration/byok.mdx
index 197e59d..3a47242 100644
--- a/docs/cli/configuration/byok.mdx
+++ b/docs/cli/configuration/byok.mdx
@@ -18,18 +18,37 @@ Factory CLI supports custom model configurations through BYOK (Bring Your Own Ke
 
 ## Configuration Reference
 
-Add custom models in `~/.factory/config.json` under the `custom_models` array.
+Add custom models to `~/.factory/settings.json` under the `customModels` array:
+
+```json
+{
+  "customModels": [
+    {
+      "model": "your-model-id",
+      "displayName": "My Custom Model",
+      "baseUrl": "https://api.provider.com/v1",
+      "apiKey": "YOUR_API_KEY",
+      "provider": "generic-chat-completion-api",
+      "maxOutputTokens": 16384
+    }
+  ]
+}
+```
+
+<Note>
+  **Legacy support**: Custom models in `~/.factory/config.json` using snake_case field names (`custom_models`, `base_url`, etc.) are still supported for backwards compatibility. Both files are loaded and merged, with `settings.json` taking priority.
+</Note>
 
 ### Supported Fields
 
 | Field | Required | Description |
 |-------|----------|-------------|
-| `model_display_name` | ✓ | Human-friendly name shown in model selector |
 | `model` | ✓ | Model identifier sent via API (e.g., `claude-sonnet-4-5-20250929`, `gpt-5-codex`, `qwen3:4b`) |
-| `base_url` | ✓ | API endpoint base URL |
-| `api_key` | ✓ | Your API key for the provider. Can't be empty. |
+| `displayName` | | Human-friendly name shown in model selector |
+| `baseUrl` | ✓ | API endpoint base URL |
+| `apiKey` | ✓ | Your API key for the provider. Can't be empty. |
 | `provider` | ✓ | One of: `anthropic`, `openai`, or `generic-chat-completion-api` |
-| `max_tokens` | ✓ | Maximum output tokens for model responses |
+| `maxOutputTokens` | | Maximum output tokens for model responses |
 
 ---
 
@@ -76,19 +95,19 @@ Use your own API keys for cost control and billing transparency:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "Sonnet 4.5 [Custom]",
       "model": "claude-sonnet-4-5-20250929",
-      "base_url": "https://api.anthropic.com",
-      "api_key": "YOUR_ANTHROPIC_KEY",
+      "displayName": "Sonnet 4.5 [Custom]",
+      "baseUrl": "https://api.anthropic.com",
+      "apiKey": "YOUR_ANTHROPIC_KEY",
       "provider": "anthropic"
     },
     {
-      "model_display_name": "GPT5-Codex [Custom]",
       "model": "gpt-5-codex",
-      "base_url": "https://api.openai.com/v1",
-      "api_key": "YOUR_OPENAI_KEY",
+      "displayName": "GPT5-Codex [Custom]",
+      "baseUrl": "https://api.openai.com/v1",
+      "apiKey": "YOUR_OPENAI_KEY",
       "provider": "openai"
     }
   ]
@@ -101,14 +120,14 @@ Connect to OpenRouter for access to models from multiple providers:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "GPT-OSS-20B",
       "model": "openai/gpt-oss-20b",
-      "base_url": "https://openrouter.ai/api/v1",
-      "api_key": "YOUR_OPENROUTER_KEY",
+      "displayName": "GPT-OSS-20B",
+      "baseUrl": "https://openrouter.ai/api/v1",
+      "apiKey": "YOUR_OPENROUTER_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 32000
+      "maxOutputTokens": 32000
     }
   ]
 }
@@ -122,22 +141,22 @@ Access high-performance inference for open-source models with optimized serving:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "GLM 4.5 [Fireworks]",
       "model": "accounts/fireworks/models/glm-4p5",
-      "base_url": "https://api.fireworks.ai/inference/v1",
-      "api_key": "YOUR_FIREWORKS_API_KEY",
+      "displayName": "GLM 4.5 [Fireworks]",
+      "baseUrl": "https://api.fireworks.ai/inference/v1",
+      "apiKey": "YOUR_FIREWORKS_API_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 16384
+      "maxOutputTokens": 16384
     },
     {
-      "model_display_name": "Llama 3.1 405B [Fireworks]",
       "model": "accounts/fireworks/models/llama-v3p1-405b-instruct",
-      "base_url": "https://api.fireworks.ai/inference/v1",
-      "api_key": "YOUR_FIREWORKS_API_KEY",
+      "displayName": "Llama 3.1 405B [Fireworks]",
+      "baseUrl": "https://api.fireworks.ai/inference/v1",
+      "apiKey": "YOUR_FIREWORKS_API_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 131072
+      "maxOutputTokens": 131072
     }
   ]
 }
@@ -151,22 +170,22 @@ Deploy and serve custom models with enterprise-grade infrastructure:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "Custom Model [Baseten]",
       "model": "YOUR_MODEL_ID",
-      "base_url": "https://inference.baseten.co/v1",
-      "api_key": "YOUR_BASETEN_API_KEY",
+      "displayName": "Custom Model [Baseten]",
+      "baseUrl": "https://inference.baseten.co/v1",
+      "apiKey": "YOUR_BASETEN_API_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 8192
+      "maxOutputTokens": 8192
     },
     {
-      "model_display_name": "Llama 3.1 70B [Baseten]",
       "model": "llama-3.1-70b-instruct",
-      "base_url": "https://inference.baseten.co/v1",
-      "api_key": "YOUR_BASETEN_API_KEY",
+      "displayName": "Llama 3.1 70B [Baseten]",
+      "baseUrl": "https://inference.baseten.co/v1",
+      "apiKey": "YOUR_BASETEN_API_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 131072
+      "maxOutputTokens": 131072
     }
   ]
 }
@@ -182,30 +201,30 @@ Access cost-effective inference for a wide variety of open-source models:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "GLM 4.6 [DeepInfra]",
       "model": "zai-org/GLM-4.6",
-      "base_url": "https://api.deepinfra.com/v1/openai",
-      "api_key": "YOUR_DEEPINFRA_TOKEN",
+      "displayName": "GLM 4.6 [DeepInfra]",
+      "baseUrl": "https://api.deepinfra.com/v1/openai",
+      "apiKey": "YOUR_DEEPINFRA_TOKEN",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 16384
+      "maxOutputTokens": 16384
     },
     {
-      "model_display_name": "Qwen 2.5 Coder 32B [DeepInfra]",
       "model": "Qwen/Qwen2.5-Coder-32B-Instruct",
-      "base_url": "https://api.deepinfra.com/v1/openai",
-      "api_key": "YOUR_DEEPINFRA_TOKEN",
+      "displayName": "Qwen 2.5 Coder 32B [DeepInfra]",
+      "baseUrl": "https://api.deepinfra.com/v1/openai",
+      "apiKey": "YOUR_DEEPINFRA_TOKEN",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 32768
+      "maxOutputTokens": 32768
     },
     {
-      "model_display_name": "DeepSeek V3 [DeepInfra]",
       "model": "deepseek-ai/DeepSeek-V3",
-      "base_url": "https://api.deepinfra.com/v1/openai",
-      "api_key": "YOUR_DEEPINFRA_TOKEN",
+      "displayName": "DeepSeek V3 [DeepInfra]",
+      "baseUrl": "https://api.deepinfra.com/v1/openai",
+      "apiKey": "YOUR_DEEPINFRA_TOKEN",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 65536
+      "maxOutputTokens": 65536
     }
   ]
 }
@@ -219,22 +238,22 @@ Connect to models hosted on Hugging Face's Inference API:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "Mistral 7B [HF]",
       "model": "mistralai/Mistral-7B-Instruct-v0.3",
-      "base_url": "https://api-inference.huggingface.co/models",
-      "api_key": "YOUR_HF_TOKEN",
+      "displayName": "Mistral 7B [HF]",
+      "baseUrl": "https://api-inference.huggingface.co/models",
+      "apiKey": "YOUR_HF_TOKEN",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 32768
+      "maxOutputTokens": 32768
     },
     {
-      "model_display_name": "CodeLlama 13B [HF]",
       "model": "codellama/CodeLlama-13b-Instruct-hf",
-      "base_url": "https://api-inference.huggingface.co/models",
-      "api_key": "YOUR_HF_TOKEN",
+      "displayName": "CodeLlama 13B [HF]",
+      "baseUrl": "https://api-inference.huggingface.co/models",
+      "apiKey": "YOUR_HF_TOKEN",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 16384
+      "maxOutputTokens": 16384
     }
   ]
 }
@@ -252,22 +271,22 @@ Run models locally on your hardware with Ollama:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "Qwen 2.5 Coder [Local]",
       "model": "qwen2.5-coder:7b",
-      "base_url": "http://localhost:11434/v1",
-      "api_key": "not-needed",
+      "displayName": "Qwen 2.5 Coder [Local]",
+      "baseUrl": "http://localhost:11434/v1",
+      "apiKey": "not-needed",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 8192
+      "maxOutputTokens": 8192
     },
     {
-      "model_display_name": "DeepSeek Coder V2 [Local]",
       "model": "deepseek-coder-v2:16b",
-      "base_url": "http://localhost:11434/v1",
-      "api_key": "not-needed",
+      "displayName": "DeepSeek Coder V2 [Local]",
+      "baseUrl": "http://localhost:11434/v1",
+      "apiKey": "not-needed",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 16384
+      "maxOutputTokens": 16384
     }
   ]
 }
@@ -287,15 +306,15 @@ Use Ollama's cloud service for hosted model inference:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "Llama 3.1 8B [Ollama Cloud]",
       "model": "llama3.1:8b",
-      "base_url": "https://ollama.com",
-      "api_key": "YOUR_OLLAMA_API_KEY",
+      "displayName": "Llama 3.1 8B [Ollama Cloud]",
+      "baseUrl": "https://ollama.com",
+      "apiKey": "YOUR_OLLAMA_API_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 131072,
-      "headers": {
+      "maxOutputTokens": 131072,
+      "extraHeaders": {
         "Authorization": "Bearer YOUR_OLLAMA_API_KEY"
       }
     }
@@ -313,14 +332,14 @@ Access Google's Gemini models using your Gemini AI API key:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "Gemini 2.5 Pro",
       "model": "gemini-2.5-pro",
-      "base_url": "https://generativelanguage.googleapis.com/v1beta/",
-      "api_key": "YOUR_GEMINI_API_KEY",
+      "displayName": "Gemini 2.5 Pro",
+      "baseUrl": "https://generativelanguage.googleapis.com/v1beta/",
+      "apiKey": "YOUR_GEMINI_API_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 32000
+      "maxOutputTokens": 32000
     }
   ]
 }
@@ -338,15 +357,15 @@ Once configured, access your custom models in the CLI:
 2. Your custom models appear in a separate "Custom models" section below Factory-provided models
 3. Select any model to start using it
 
-Custom models display with the name you set in `model_display_name`, making it easy to identify different providers and configurations.
+Custom models display with the name you set in `displayName`, making it easy to identify different providers and configurations.
 
 ---
 
 ## Troubleshooting
 
 ### Model not appearing in selector
-- Check JSON syntax in `~/.factory/config.json`
-- Restart the CLI after making configuration changes
+- Check JSON syntax in `~/.factory/settings.json` (or `config.json` if using legacy format)
+- Settings changes are detected automatically via file watching
 - Verify all required fields are present
 
 ### "Invalid provider" error
diff --git a/docs/cli/configuration/settings.mdx b/docs/cli/configuration/settings.mdx
index c2429d9..93e5b97 100644
--- a/docs/cli/configuration/settings.mdx
+++ b/docs/cli/configuration/settings.mdx
@@ -48,6 +48,7 @@ If the file doesn't exist, it's created with defaults the first time you run **d
 | `showThinkingInMainView` | `true`, `false` | `false` | Display AI thinking/reasoning blocks in the main chat view. |
 | `allowBackgroundProcesses` | `true`, `false` | `false` | Allow droid to spawn background processes (experimental). |
 | `enableReadinessReport` | `true`, `false` | `false` | Enable the `/readiness-report` slash command (experimental). |
+| `customModels` | Array of model configs | `[]` | Custom model configurations for BYOK. See [BYOK docs](/cli/configuration/byok). |
 
 ### Model
 
diff --git a/docs/cli/droid-exec/overview.mdx b/docs/cli/droid-exec/overview.mdx
index 556d7f9..48ff98e 100644
--- a/docs/cli/droid-exec/overview.mdx
+++ b/docs/cli/droid-exec/overview.mdx
@@ -377,16 +377,16 @@ droid exec --auto medium --disabled-tools execute-cli "run edits only"
 
 ### Custom models
 
-You can configure custom models to use with droid exec by adding them to your `~/.factory/config.json` file:
+You can configure custom models to use with droid exec by adding them to your `~/.factory/settings.json` file:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "My Custom Model",
       "model": "gpt-5.1-codex-custom",
-      "base_url": "https://api.openai.com/v1",
-      "api_key": "your-api-key-here",
+      "displayName": "My Custom Model",
+      "baseUrl": "https://api.openai.com/v1",
+      "apiKey": "your-api-key-here",
       "provider": "openai"
     }
   ]
@@ -403,22 +403,22 @@ If you have multiple custom models configured:
 
 ```json
 {
-  "custom_models": [
+  "customModels": [
     {
-      "model_display_name": "Kimi K2 [Groq]",
       "model": "kimi-k2",
-      "base_url": "https://api.groq.com/openai/v1",
-      "api_key": "your-groq-key",
+      "displayName": "Kimi K2 [Groq]",
+      "baseUrl": "https://api.groq.com/openai/v1",
+      "apiKey": "your-groq-key",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 16384
+      "maxOutputTokens": 16384
     },
     {
-      "model_display_name": "GPT-OSS-20B [OpenRouter]",
       "model": "openai/gpt-oss-20b",
-      "base_url": "https://openrouter.ai/api/v1",
-      "api_key": "YOUR_OPENROUTER_KEY",
+      "displayName": "GPT-OSS-20B [OpenRouter]",
+      "baseUrl": "https://openrouter.ai/api/v1",
+      "apiKey": "YOUR_OPENROUTER_KEY",
       "provider": "generic-chat-completion-api",
-      "max_tokens": 32000
+      "maxOutputTokens": 32000
     }
   ]
 }
@@ -426,9 +426,9 @@ If you have multiple custom models configured:
 
 You would reference them as:
 - `--model "custom:Kimi-K2-[Groq]-0"`
-- `--model "custom:GPT-OSS-20B [OpenRouter]-1"`
+- `--model "custom:GPT-OSS-20B-[OpenRouter]-1"`
 
-The index corresponds to the position in the `custom_models` array (0-based).
+The index corresponds to the position in the `customModels` array (0-based).
 
 <Note>
 Reasoning effort (`-r` / `--reasoning-effort`) is not yet supported for custom models, but coming soon.
diff --git a/docs/enterprise/models-llm-gateways-and-integrations.mdx b/docs/enterprise/models-llm-gateways-and-integrations.mdx
index 316d84b..0374529 100644
--- a/docs/enterprise/models-llm-gateways-and-integrations.mdx
+++ b/docs/enterprise/models-llm-gateways-and-integrations.mdx
@@ -45,7 +45,7 @@ Factory works with gateways in two ways:
 
 When you use a gateway, **data handling and retention policies are those of the gateway and underlying providers**; Droid simply uses the endpoints and credentials you configure.
 
-For concrete examples of configuring custom models (including gateway‑backed models), see [Custom models & BYOK](/cli/configuration/byok), which covers the `custom_models` array in `~/.factory/config.json` and how those models appear in the `/model` selector.
+For concrete examples of configuring custom models (including gateway‑backed models), see [Custom models & BYOK](/cli/configuration/byok), which covers the `customModels` array in `~/.factory/settings.json` and how those models appear in the `/model` selector.
 
 ---