From 731d00f4aaa1b45ad8a37d7e9344d7e676e41873 Mon Sep 17 00:00:00 2001 From: moizpgedge Date: Mon, 11 May 2026 15:29:11 +0500 Subject: [PATCH 1/4] Add five new fields to `MCPServiceConfig` and `mcpKnownKeys` to support knowledgebase search configuration for the MCP service: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - `kb_enabled` — opt-in bool; when false or absent all other KB fields are ignored and no validation runs - `kb_embedding_provider` — required when KB is enabled; `voyage` and `openai` are the only accepted values for V1 - `kb_embedding_model` — required when KB is enabled - `kb_embedding_api_key` — required for `voyage` and `openai`; scrubbed from API output automatically via the existing `api_key` sensitive-field policy - `kb_database_host_path` — optional override for the KB file path on the host; defaults to `{DataDir}/kb/nla-kb.db` in downstream tickets Validation rules enforced at `ParseMCPServiceConfig` call time: - `ollama` is rejected with an explicit "not yet supported in V1" error; full ollama support is blocked on the `embedding_ollama_url` field mapping question (see PLAT-607) - Setting `kb_enabled: true` together with `disable_search_knowledgebase: true` is rejected — the combination would silently produce a running container with no `search_knowledgebase` tool registered - All KB fields are silently ignored when `kb_enabled` is false or absent, matching the same opt-in pattern used by `llm_enabled` PLAT-590 --- .../internal/database/mcp_service_config.go | 64 +++++ .../database/mcp_service_config_test.go | 240 ++++++++++++++++++ 2 files changed, 304 insertions(+) diff --git a/server/internal/database/mcp_service_config.go b/server/internal/database/mcp_service_config.go index 7dae332c..aaac77e0 100644 --- a/server/internal/database/mcp_service_config.go +++ b/server/internal/database/mcp_service_config.go @@ -52,6 +52,13 @@ type MCPServiceConfig struct { DisableGenerateEmbedding *bool `json:"disable_generate_embedding,omitempty"` DisableSearchKnowledgebase *bool `json:"disable_search_knowledgebase,omitempty"` DisableCountRows *bool `json:"disable_count_rows,omitempty"` + + // Optional - knowledgebase search + KBEnabled *bool `json:"kb_enabled,omitempty"` + KBEmbeddingProvider *string `json:"kb_embedding_provider,omitempty"` + KBEmbeddingModel *string `json:"kb_embedding_model,omitempty"` + KBEmbeddingAPIKey *string `json:"kb_embedding_api_key,omitempty"` + KBDatabaseHostPath *string `json:"kb_database_host_path,omitempty"` } // mcpKnownKeys is the set of all valid config keys for MCP service configuration. @@ -78,10 +85,16 @@ var mcpKnownKeys = map[string]bool{ "disable_generate_embedding": true, "disable_search_knowledgebase": true, "disable_count_rows": true, + "kb_enabled": true, + "kb_embedding_provider": true, + "kb_embedding_model": true, + "kb_embedding_api_key": true, + "kb_database_host_path": true, } var validLLMProviders = []string{"anthropic", "openai", "ollama"} var validEmbeddingProviders = []string{"voyage", "openai", "ollama"} +var validKBEmbeddingProviders = []string{"voyage", "openai"} // ParseMCPServiceConfig parses and validates a config map into a typed MCPServiceConfig. // If isUpdate is true, bootstrap-only fields (init_token, init_users) are rejected. @@ -204,6 +217,24 @@ func ParseMCPServiceConfig(config map[string]any, isUpdate bool) (*MCPServiceCon } } + // Parse KB fields + kbEnabled, kbeErrs := optionalBool(config, "kb_enabled") + errs = append(errs, kbeErrs...) + + isKBEnabled := kbEnabled != nil && *kbEnabled + + kbEmbeddingProvider, kbepErrs := optionalString(config, "kb_embedding_provider") + errs = append(errs, kbepErrs...) + + kbEmbeddingModel, kbemErrs := optionalString(config, "kb_embedding_model") + errs = append(errs, kbemErrs...) + + kbEmbeddingAPIKey, kbeakErrs := optionalString(config, "kb_embedding_api_key") + errs = append(errs, kbeakErrs...) + + kbDatabaseHostPath, kbdhpErrs := optionalString(config, "kb_database_host_path") + errs = append(errs, kbdhpErrs...) + // Parse optional fields allowWrites, awErrs := optionalBool(config, "allow_writes") errs = append(errs, awErrs...) @@ -233,6 +264,34 @@ func ParseMCPServiceConfig(config map[string]any, isUpdate bool) (*MCPServiceCon disableCountRows, dcrErrs := optionalBool(config, "disable_count_rows") errs = append(errs, dcrErrs...) + // KB cross-validation: only when kb_enabled is true + if isKBEnabled { + // Conflict: kb_enabled + disable_search_knowledgebase is always broken + if disableSearchKB != nil && *disableSearchKB { + errs = append(errs, fmt.Errorf("kb_enabled and disable_search_knowledgebase cannot both be true: the search_knowledgebase tool would never register")) + } + + if kbEmbeddingProvider == nil { + errs = append(errs, fmt.Errorf("kb_embedding_provider is required when kb_enabled is true")) + } else { + // ollama is not supported in V1 + if *kbEmbeddingProvider == "ollama" { + errs = append(errs, fmt.Errorf("kb_embedding_provider %q is not yet supported; use %q or %q", "ollama", "voyage", "openai")) + } else if !slices.Contains(validKBEmbeddingProviders, *kbEmbeddingProvider) { + errs = append(errs, fmt.Errorf("kb_embedding_provider must be one of: %s", strings.Join(validKBEmbeddingProviders, ", "))) + } else { + // voyage and openai require an API key + if kbEmbeddingAPIKey == nil { + errs = append(errs, fmt.Errorf("kb_embedding_api_key is required when kb_embedding_provider is %q", *kbEmbeddingProvider)) + } + } + } + + if kbEmbeddingModel == nil { + errs = append(errs, fmt.Errorf("kb_embedding_model is required when kb_enabled is true")) + } + } + if poolMaxConns != nil { if *poolMaxConns <= 0 { errs = append(errs, fmt.Errorf("pool_max_conns must be a positive integer")) @@ -296,6 +355,11 @@ func ParseMCPServiceConfig(config map[string]any, isUpdate bool) (*MCPServiceCon DisableGenerateEmbedding: disableGenEmbed, DisableSearchKnowledgebase: disableSearchKB, DisableCountRows: disableCountRows, + KBEnabled: kbEnabled, + KBEmbeddingProvider: kbEmbeddingProvider, + KBEmbeddingModel: kbEmbeddingModel, + KBEmbeddingAPIKey: kbEmbeddingAPIKey, + KBDatabaseHostPath: kbDatabaseHostPath, }, nil } diff --git a/server/internal/database/mcp_service_config_test.go b/server/internal/database/mcp_service_config_test.go index 65ac2800..4146702d 100644 --- a/server/internal/database/mcp_service_config_test.go +++ b/server/internal/database/mcp_service_config_test.go @@ -868,6 +868,246 @@ func TestParseMCPServiceConfig(t *testing.T) { }) }) + t.Run("knowledgebase config", func(t *testing.T) { + t.Run("happy paths", func(t *testing.T) { + t.Run("kb absent - all KB fields nil", func(t *testing.T) { + cfg, errs := database.ParseMCPServiceConfig(noLLMBase(), false) + require.Empty(t, errs) + assert.Nil(t, cfg.KBEnabled) + assert.Nil(t, cfg.KBEmbeddingProvider) + assert.Nil(t, cfg.KBEmbeddingModel) + assert.Nil(t, cfg.KBEmbeddingAPIKey) + assert.Nil(t, cfg.KBDatabaseHostPath) + }) + + t.Run("kb_enabled false with no KB fields", func(t *testing.T) { + config := map[string]any{"kb_enabled": false} + cfg, errs := database.ParseMCPServiceConfig(config, false) + require.Empty(t, errs) + require.NotNil(t, cfg.KBEnabled) + assert.False(t, *cfg.KBEnabled) + assert.Nil(t, cfg.KBEmbeddingProvider) + assert.Nil(t, cfg.KBEmbeddingModel) + assert.Nil(t, cfg.KBEmbeddingAPIKey) + assert.Nil(t, cfg.KBDatabaseHostPath) + }) + + t.Run("voyage provider", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "voyage", + "kb_embedding_model": "voyage-3-lite", + "kb_embedding_api_key": "voy-key", + } + cfg, errs := database.ParseMCPServiceConfig(config, false) + require.Empty(t, errs) + require.NotNil(t, cfg.KBEnabled) + assert.True(t, *cfg.KBEnabled) + require.NotNil(t, cfg.KBEmbeddingProvider) + assert.Equal(t, "voyage", *cfg.KBEmbeddingProvider) + require.NotNil(t, cfg.KBEmbeddingModel) + assert.Equal(t, "voyage-3-lite", *cfg.KBEmbeddingModel) + require.NotNil(t, cfg.KBEmbeddingAPIKey) + assert.Equal(t, "voy-key", *cfg.KBEmbeddingAPIKey) + assert.Nil(t, cfg.KBDatabaseHostPath) + }) + + t.Run("openai provider", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "openai", + "kb_embedding_model": "text-embedding-3-small", + "kb_embedding_api_key": "sk-openai-key", + } + cfg, errs := database.ParseMCPServiceConfig(config, false) + require.Empty(t, errs) + require.NotNil(t, cfg.KBEmbeddingProvider) + assert.Equal(t, "openai", *cfg.KBEmbeddingProvider) + require.NotNil(t, cfg.KBEmbeddingModel) + assert.Equal(t, "text-embedding-3-small", *cfg.KBEmbeddingModel) + require.NotNil(t, cfg.KBEmbeddingAPIKey) + assert.Equal(t, "sk-openai-key", *cfg.KBEmbeddingAPIKey) + }) + + t.Run("kb_database_host_path override", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "voyage", + "kb_embedding_model": "voyage-3-lite", + "kb_embedding_api_key": "voy-key", + "kb_database_host_path": "/data/custom/my-kb.db", + } + cfg, errs := database.ParseMCPServiceConfig(config, false) + require.Empty(t, errs) + require.NotNil(t, cfg.KBDatabaseHostPath) + assert.Equal(t, "/data/custom/my-kb.db", *cfg.KBDatabaseHostPath) + }) + + t.Run("kb_enabled false ignores invalid KB fields", func(t *testing.T) { + // When kb_enabled is false, provider/model/key are not validated. + config := map[string]any{ + "kb_enabled": false, + "kb_embedding_provider": "ollama", + "kb_embedding_model": "some-model", + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.Empty(t, errs) + }) + }) + + t.Run("required fields when kb_enabled is true", func(t *testing.T) { + t.Run("missing kb_embedding_provider", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_model": "voyage-3-lite", + "kb_embedding_api_key": "voy-key", + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_embedding_provider is required when kb_enabled is true") + }) + + t.Run("missing kb_embedding_model", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "voyage", + "kb_embedding_api_key": "voy-key", + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_embedding_model is required when kb_enabled is true") + }) + + t.Run("voyage without kb_embedding_api_key", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "voyage", + "kb_embedding_model": "voyage-3-lite", + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), `kb_embedding_api_key is required when kb_embedding_provider is "voyage"`) + }) + + t.Run("openai without kb_embedding_api_key", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "openai", + "kb_embedding_model": "text-embedding-3-small", + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), `kb_embedding_api_key is required when kb_embedding_provider is "openai"`) + }) + }) + + t.Run("provider restrictions", func(t *testing.T) { + t.Run("ollama rejected with not yet supported message", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "ollama", + "kb_embedding_model": "nomic-embed-text", + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), `"ollama" is not yet supported`) + }) + + t.Run("unknown provider rejected", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "bedrock", + "kb_embedding_model": "some-model", + "kb_embedding_api_key": "some-key", + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_embedding_provider must be one of") + }) + }) + + t.Run("disable_search_knowledgebase conflict", func(t *testing.T) { + t.Run("kb_enabled true and disable_search_knowledgebase true is rejected", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "voyage", + "kb_embedding_model": "voyage-3-lite", + "kb_embedding_api_key": "voy-key", + "disable_search_knowledgebase": true, + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_enabled and disable_search_knowledgebase cannot both be true") + }) + + t.Run("disable_search_knowledgebase true without kb_enabled is allowed", func(t *testing.T) { + config := map[string]any{"disable_search_knowledgebase": true} + cfg, errs := database.ParseMCPServiceConfig(config, false) + require.Empty(t, errs) + require.NotNil(t, cfg.DisableSearchKnowledgebase) + assert.True(t, *cfg.DisableSearchKnowledgebase) + }) + }) + + t.Run("type errors", func(t *testing.T) { + t.Run("kb_enabled wrong type", func(t *testing.T) { + config := map[string]any{"kb_enabled": "yes"} + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_enabled must be a boolean") + }) + + t.Run("kb_embedding_provider wrong type", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": 42, + "kb_embedding_model": "voyage-3-lite", + "kb_embedding_api_key": "voy-key", + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_embedding_provider must be a string") + }) + + t.Run("kb_embedding_model wrong type", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "voyage", + "kb_embedding_model": 123, + "kb_embedding_api_key": "voy-key", + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_embedding_model must be a string") + }) + + t.Run("kb_embedding_api_key wrong type", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "voyage", + "kb_embedding_model": "voyage-3-lite", + "kb_embedding_api_key": true, + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_embedding_api_key must be a string") + }) + + t.Run("kb_database_host_path wrong type", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "voyage", + "kb_embedding_model": "voyage-3-lite", + "kb_embedding_api_key": "voy-key", + "kb_database_host_path": 99, + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_database_host_path must be a string") + }) + }) + }) + t.Run("llm_enabled false rejects LLM fields", func(t *testing.T) { t.Run("llm_provider rejected", func(t *testing.T) { config := map[string]any{"llm_provider": "anthropic"} From 3006e2c1193ecdda632920e5253bf5e895bc5ef1 Mon Sep 17 00:00:00 2001 From: moizpgedge Date: Mon, 11 May 2026 22:12:31 +0500 Subject: [PATCH 2/4] feat: add knowledgebase support to MCP service (PLAT-590 through PLAT-608) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enables the search_knowledgebase tool in MCP service deployments via a host-placed SQLite KB file bind-mounted read-only into the container. **PLAT-590 — KB fields on MCPServiceConfig (already committed)** Five new config fields: kb_enabled, kb_embedding_provider, kb_embedding_model, kb_embedding_api_key, kb_database_host_path. Parse-time validation rejects unknown providers, missing credentials, and the kb_enabled + disable_search_knowledgebase conflict (PLAT-607). kb_embedding_api_key is scrubbed from API output. **PLAT-591 — knowledgebase: section in config.yaml** GenerateMCPConfig now emits a knowledgebase: block when kb_enabled is true. The database_path uses the container-side path (/app/kb/), derived from the host path basename. The API key field name varies by provider: embedding_voyage_api_key or embedding_openai_api_key. **PLAT-592 — read-only /app/kb bind mount** ServiceContainerSpec adds a read-only bind mount from the host KB directory to /app/kb when KBDirPath is non-empty. The mount is on the directory, not the file, so atomic renames of the KB file are visible to the container immediately. **PLAT-593 — deployment blocked when KB file is missing** MCPConfigResource.Refresh checks for the KB file before checking for config.yaml. This ordering is intentional: a new service has no config.yaml yet, so checking config.yaml first would return ErrNotFound (triggering Create) before the KB check runs. By checking KB first, a missing file returns a plain error that blocks both initial creates and updates. ErrNotFound is never returned for a missing KB file. **PLAT-608 — documentation** docs/services/mcp.md gains a Knowledgebase section under Configuration Reference (5-field table, warning admonition for the staging requirement) and a Knowledgebase Search (Voyage AI) example under Examples. - 19 parse-time validation cases (PLAT-590 + PLAT-607) - 7 config generation cases coveproviders and custom path (PLAT-591) - 2 container spec cases confirming mount presence/absence (PLAT-592) - 9 Refresh cases covering KB disabled, file present, file missing with and without existing config.yaml (PLAT-593) --- docs/services/mcp.md | 92 ++++++++ .../internal/orchestrator/swarm/mcp_config.go | 54 ++++- .../orchestrator/swarm/mcp_config_resource.go | 20 +- .../swarm/mcp_config_resource_test.go | 167 +++++++++++++ .../orchestrator/swarm/mcp_config_test.go | 222 ++++++++++++++++++ .../orchestrator/swarm/orchestrator.go | 16 ++ .../swarm/service_instance_spec.go | 2 + .../orchestrator/swarm/service_spec.go | 6 + .../orchestrator/swarm/service_spec_test.go | 54 +++++ 9 files changed, 625 insertions(+), 8 deletions(-) create mode 100644 server/internal/orchestrator/swarm/mcp_config_resource_test.go diff --git a/docs/services/mcp.md b/docs/services/mcp.md index b8e9d8a2..b48b8e4c 100644 --- a/docs/services/mcp.md +++ b/docs/services/mcp.md @@ -79,6 +79,39 @@ following table describes the embedding configuration fields: | `embedding_model` | string | The embedding model name (e.g., `voyage-3`, `text-embedding-3-small`, `nomic-embed-text`). Required when `embedding_provider` is set. | | `embedding_api_key` | string | API key for the embedding provider. Required for `voyage` and `openai` providers. | +### Knowledgebase + +Knowledgebase support enables the `search_knowledgebase` tool to query +a SQLite-backed knowledge base. The knowledge base file is staged on the +host; the Control Plane bind-mounts it into the container read-only. +Knowledgebase support is opt-in: when `kb_enabled` is `false` (the +default), no KB file is required and no KB validation runs. For V1, +only `voyage` and `openai` are supported as embedding providers for the +knowledgebase; Ollama support is planned for a future release. + +!!! warning + + The Control Plane does not generate the knowledgebase SQLite file. + You must place the file on every host that will run an MCP service + instance **before** setting `kb_enabled: true`. If the file is + missing when the Control Plane attempts to deploy the service, the + deployment will be blocked with a clear error. + + The default location is `{data_dir}/kb/nla-kb.db` (for example, + `/var/lib/pgedge-control-plane/kb/nla-kb.db`). Create the directory + and copy your file there, or use `kb_database_host_path` to specify + a custom path. + +The following table describes the knowledgebase configuration fields: + +| Field | Type | Description | +|---------------------------|---------|-------------| +| `kb_enabled` | boolean | Set to `true` to enable knowledgebase search. When `false` (the default), all other `kb_*` fields are ignored and the `search_knowledgebase` tool operates without a KB. | +| `kb_embedding_provider` | string | Embedding provider for the KB. One of: `voyage`, `openai`. Required when `kb_enabled` is `true`. | +| `kb_embedding_model` | string | Embedding model for the KB (e.g., `voyage-3-lite`, `text-embedding-3-small`). Required when `kb_enabled` is `true`. | +| `kb_embedding_api_key` | string | API key for the KB embedding provider. Required for `voyage` and `openai`. Scrubbed from API responses. | +| `kb_database_host_path` | string | Full path to the KB SQLite file on the host. Defaults to `{data_dir}/kb/nla-kb.db`. | + ### LLM Tuning The LLM tuning fields control the behavior of the LLM proxy and are @@ -333,6 +366,65 @@ to use a self-hosted Ollama server for both the LLM and embeddings: }' ``` +### Knowledgebase Search (Voyage AI) + +In the following example, a `curl` command provisions an MCP service +with knowledgebase support enabled, using Voyage AI as the embedding +provider. Before provisioning, stage the knowledgebase file on every +host that will run an MCP service instance: + +```sh +sudo mkdir -p /var/lib/pgedge-control-plane/kb +sudo cp /path/to/your/nla-kb.db /var/lib/pgedge-control-plane/kb/nla-kb.db +``` + +=== "curl" + + ```sh + curl -X POST http://host-1:3000/v1/databases \ + -H 'Content-Type: application/json' \ + --data '{ + "id": "example", + "spec": { + "database_name": "example", + "nodes": [ + { "name": "n1", "host_ids": ["host-1"] } + ], + "database_users": [ + { + "username": "mcp_user", + "password": "changeme", + "db_owner": true, + "attributes": ["LOGIN"] + } + ], + "services": [ + { + "service_id": "mcp-server", + "service_type": "mcp", + "version": "latest", + "host_ids": ["host-1"], + "port": 8080, + "connect_as": "mcp_user", + "config": { + "kb_enabled": true, + "kb_embedding_provider": "voyage", + "kb_embedding_model": "voyage-3-lite", + "kb_embedding_api_key": "pa-..." + } + } + ] + } + }' + ``` + +To use a custom path for the knowledgebase file, add +`kb_database_host_path` to the `config` object: + +```json +"kb_database_host_path": "/data/kb/my-kb.db" +``` + ## Connecting to the MCP Server The MCP server accepts JSON-RPC 2.0 requests once the service instance diff --git a/server/internal/orchestrator/swarm/mcp_config.go b/server/internal/orchestrator/swarm/mcp_config.go index 487f0d37..ef33b016 100644 --- a/server/internal/orchestrator/swarm/mcp_config.go +++ b/server/internal/orchestrator/swarm/mcp_config.go @@ -2,6 +2,7 @@ package swarm import ( "fmt" + "path/filepath" "github.com/goccy/go-yaml" "github.com/pgEdge/control-plane/server/internal/database" @@ -10,11 +11,12 @@ import ( // mcpYAMLConfig mirrors the MCP server's Config struct for YAML generation. // Only fields the CP needs to set are included. type mcpYAMLConfig struct { - HTTP mcpHTTPConfig `yaml:"http"` - Databases []mcpDatabaseConfig `yaml:"databases"` - LLM *mcpLLMConfig `yaml:"llm,omitempty"` - Embedding *mcpEmbeddingConfig `yaml:"embedding,omitempty"` - Builtins mcpBuiltinsConfig `yaml:"builtins"` + HTTP mcpHTTPConfig `yaml:"http"` + Databases []mcpDatabaseConfig `yaml:"databases"` + LLM *mcpLLMConfig `yaml:"llm,omitempty"` + Embedding *mcpEmbeddingConfig `yaml:"embedding,omitempty"` + Knowledgebase *mcpKBConfig `yaml:"knowledgebase,omitempty"` + Builtins mcpBuiltinsConfig `yaml:"builtins"` } type mcpHTTPConfig struct { @@ -70,6 +72,15 @@ type mcpEmbeddingConfig struct { OllamaURL string `yaml:"ollama_url,omitempty"` } +type mcpKBConfig struct { + Enabled bool `yaml:"enabled"` + DatabasePath string `yaml:"database_path"` + EmbeddingProvider string `yaml:"embedding_provider"` + EmbeddingModel string `yaml:"embedding_model"` + EmbeddingVoyageAPIKey string `yaml:"embedding_voyage_api_key,omitempty"` + EmbeddingOpenAIAPIKey string `yaml:"embedding_openai_api_key,omitempty"` +} + type mcpBuiltinsConfig struct { Tools mcpToolsConfig `yaml:"tools"` } @@ -169,6 +180,34 @@ func GenerateMCPConfig(params *MCPConfigParams) ([]byte, error) { embedding = emb } + // Build KB config (only when kb_enabled is true) + var kb *mcpKBConfig + if cfg.KBEnabled != nil && *cfg.KBEnabled { + containerPath := "/app/kb/nla-kb.db" + if cfg.KBDatabaseHostPath != nil { + containerPath = "/app/kb/" + filepath.Base(*cfg.KBDatabaseHostPath) + } + k := &mcpKBConfig{ + Enabled: true, + DatabasePath: containerPath, + } + if cfg.KBEmbeddingProvider != nil { + k.EmbeddingProvider = *cfg.KBEmbeddingProvider + } + if cfg.KBEmbeddingModel != nil { + k.EmbeddingModel = *cfg.KBEmbeddingModel + } + if cfg.KBEmbeddingAPIKey != nil && cfg.KBEmbeddingProvider != nil { + switch *cfg.KBEmbeddingProvider { + case "voyage": + k.EmbeddingVoyageAPIKey = *cfg.KBEmbeddingAPIKey + case "openai": + k.EmbeddingOpenAIAPIKey = *cfg.KBEmbeddingAPIKey + } + } + kb = k + } + // Build tool toggles falseVal := false tools := mcpToolsConfig{ @@ -227,8 +266,9 @@ func GenerateMCPConfig(params *MCPConfigParams) ([]byte, error) { }, }, }, - LLM: llm, - Embedding: embedding, + LLM: llm, + Embedding: embedding, + Knowledgebase: kb, Builtins: mcpBuiltinsConfig{ Tools: tools, }, diff --git a/server/internal/orchestrator/swarm/mcp_config_resource.go b/server/internal/orchestrator/swarm/mcp_config_resource.go index fda628c5..a795ae69 100644 --- a/server/internal/orchestrator/swarm/mcp_config_resource.go +++ b/server/internal/orchestrator/swarm/mcp_config_resource.go @@ -46,6 +46,9 @@ type MCPConfigResource struct { TargetSessionAttrs string `json:"target_session_attrs"` ConnectAsUsername string `json:"connect_as_username"` ConnectAsPassword string `json:"connect_as_password"` + // KBHostPath is the full path to the KB SQLite file on the host. When non-empty, + // Refresh verifies the file exists before allowing deployment to proceed. + KBHostPath string `json:"kb_host_path,omitempty"` } func (r *MCPConfigResource) ResourceVersion() string { @@ -85,7 +88,22 @@ func (r *MCPConfigResource) Refresh(ctx context.Context, rc *resource.Context) e return fmt.Errorf("failed to get service data dir path: %w", err) } - // Check if config.yaml exists + // Check KB file first so a missing file blocks both initial creates and + // updates. If this check came after the config.yaml check, a brand-new + // service (no config.yaml yet) would return ErrNotFound before reaching + // here, skipping the check and allowing the container to be deployed with + // a broken bind mount. + if r.KBHostPath != "" { + exists, err := afero.Exists(fs, r.KBHostPath) + if err != nil { + return fmt.Errorf("failed to check KB database file at %s: %w", r.KBHostPath, err) + } + if !exists { + return fmt.Errorf("KB database file not found at %s — stage the file on the host before deploying with kb_enabled: true", r.KBHostPath) + } + } + + // Check if config.yaml exists; ErrNotFound here triggers Create. _, err = readResourceFile(fs, filepath.Join(dirPath, "config.yaml")) if err != nil { return fmt.Errorf("failed to read MCP config: %w", err) diff --git a/server/internal/orchestrator/swarm/mcp_config_resource_test.go b/server/internal/orchestrator/swarm/mcp_config_resource_test.go new file mode 100644 index 00000000..442fafed --- /dev/null +++ b/server/internal/orchestrator/swarm/mcp_config_resource_test.go @@ -0,0 +1,167 @@ +package swarm + +import ( + "context" + "errors" + "path/filepath" + "testing" + + "github.com/samber/do" + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/pgEdge/control-plane/server/internal/database" + "github.com/pgEdge/control-plane/server/internal/filesystem" + "github.com/pgEdge/control-plane/server/internal/resource" +) + +// mcpRCAndFs returns a resource.Context backed by an in-memory afero.Fs +// with the given data directory pre-created, and the Fs itself for file setup. +func mcpRCAndFs(t *testing.T, dirResourceID, dirPath string) (*resource.Context, afero.Fs) { + t.Helper() + fs := afero.NewMemMapFs() + _ = fs.MkdirAll(dirPath, 0o700) + + injector := do.New() + do.Provide(injector, func(i *do.Injector) (afero.Fs, error) { + return fs, nil + }) + + dirRes := &filesystem.DirResource{ + ID: dirResourceID, + HostID: "host-1", + Path: dirPath, + FullPath: dirPath, + } + data, err := resource.ToResourceData(dirRes) + if err != nil { + t.Fatalf("ToResourceData() error = %v", err) + } + state := resource.NewState() + state.Add(data) + return &resource.Context{State: state, Injector: injector}, fs +} + +func TestMCPConfigResource_ResourceVersion(t *testing.T) { + r := &MCPConfigResource{} + assert.Equal(t, "3", r.ResourceVersion()) +} + +func TestMCPConfigResource_Identifier(t *testing.T) { + r := &MCPConfigResource{ServiceInstanceID: "db1-mcp-host1"} + id := r.Identifier() + assert.Equal(t, "db1-mcp-host1", id.ID) + assert.Equal(t, ResourceTypeMCPConfig, id.Type) +} + +func TestMCPConfigResource_Executor(t *testing.T) { + r := &MCPConfigResource{HostID: "host-1"} + assert.Equal(t, resource.HostExecutor("host-1"), r.Executor()) +} + +func TestMCPConfigResource_DiffIgnore(t *testing.T) { + r := &MCPConfigResource{} + assert.Nil(t, r.DiffIgnore()) +} + +func TestMCPConfigResource_Dependencies(t *testing.T) { + r := &MCPConfigResource{ + ServiceInstanceID: "db1-mcp-host1", + DirResourceID: "db1-mcp-host1-data", + } + deps := r.Dependencies() + require.Len(t, deps, 1) + assert.Equal(t, filesystem.DirResourceIdentifier("db1-mcp-host1-data"), deps[0]) +} + +func TestMCPConfigResource_Refresh_KBDisabled(t *testing.T) { + // KBHostPath empty (KB not enabled) — Refresh must not block on any KB check. + dirID := "inst-data" + dirPath := "/var/lib/pgedge/services/inst-1" + rc, fs := mcpRCAndFs(t, dirID, dirPath) + + // Write config.yaml so the first check passes. + require.NoError(t, afero.WriteFile(fs, filepath.Join(dirPath, "config.yaml"), []byte("x: 1"), 0o600)) + + r := &MCPConfigResource{ + ServiceInstanceID: "inst-1", + HostID: "host-1", + DirResourceID: dirID, + Config: &database.MCPServiceConfig{}, + KBHostPath: "", // not set + } + err := r.Refresh(context.Background(), rc) + require.NoError(t, err) +} + +func TestMCPConfigResource_Refresh_KBFilePresent(t *testing.T) { + // KBHostPath set and file exists → Refresh succeeds. + dirID := "inst-data" + dirPath := "/var/lib/pgedge/services/inst-kb" + kbPath := "/var/lib/pgedge/kb/nla-kb.db" + rc, fs := mcpRCAndFs(t, dirID, dirPath) + + require.NoError(t, afero.WriteFile(fs, filepath.Join(dirPath, "config.yaml"), []byte("x: 1"), 0o600)) + require.NoError(t, fs.MkdirAll("/var/lib/pgedge/kb", 0o700)) + require.NoError(t, afero.WriteFile(fs, kbPath, []byte("SQLite"), 0o600)) + + r := &MCPConfigResource{ + ServiceInstanceID: "inst-kb", + HostID: "host-1", + DirResourceID: dirID, + Config: &database.MCPServiceConfig{}, + KBHostPath: kbPath, + } + err := r.Refresh(context.Background(), rc) + require.NoError(t, err) +} + +func TestMCPConfigResource_Refresh_KBFileMissing(t *testing.T) { + // KBHostPath set but file does not exist → plain error, NOT ErrNotFound. + // config.yaml is present (update path). + dirID := "inst-data" + dirPath := "/var/lib/pgedge/services/inst-kb-missing" + kbPath := "/var/lib/pgedge/kb/nla-kb.db" + rc, fs := mcpRCAndFs(t, dirID, dirPath) + + require.NoError(t, afero.WriteFile(fs, filepath.Join(dirPath, "config.yaml"), []byte("x: 1"), 0o600)) + // Deliberately do NOT create the KB file. + + r := &MCPConfigResource{ + ServiceInstanceID: "inst-kb-missing", + HostID: "host-1", + DirResourceID: dirID, + Config: &database.MCPServiceConfig{}, + KBHostPath: kbPath, + } + err := r.Refresh(context.Background(), rc) + require.Error(t, err) + assert.Contains(t, err.Error(), kbPath) + // Must NOT be ErrNotFound — a missing KB file blocks deployment, not triggers Create. + assert.False(t, errors.Is(err, resource.ErrNotFound), "missing KB file must not return ErrNotFound") +} + +func TestMCPConfigResource_Refresh_KBFileMissing_NoConfigYet(t *testing.T) { + // KBHostPath set, file missing, AND config.yaml does not exist yet (initial + // create path). The KB check must fire before the config.yaml check so that + // the missing file blocks the deployment rather than being silently skipped + // because ErrNotFound is returned first. + dirID := "inst-data" + dirPath := "/var/lib/pgedge/services/inst-kb-missing-new" + kbPath := "/var/lib/pgedge/kb/nla-kb.db" + rc, _ := mcpRCAndFs(t, dirID, dirPath) + // Neither config.yaml nor the KB file are created. + + r := &MCPConfigResource{ + ServiceInstanceID: "inst-kb-missing-new", + HostID: "host-1", + DirResourceID: dirID, + Config: &database.MCPServiceConfig{}, + KBHostPath: kbPath, + } + err := r.Refresh(context.Background(), rc) + require.Error(t, err) + assert.Contains(t, err.Error(), kbPath) + assert.False(t, errors.Is(err, resource.ErrNotFound), "missing KB file must not return ErrNotFound even on initial create") +} diff --git a/server/internal/orchestrator/swarm/mcp_config_test.go b/server/internal/orchestrator/swarm/mcp_config_test.go index 3400fe21..d538e8ce 100644 --- a/server/internal/orchestrator/swarm/mcp_config_test.go +++ b/server/internal/orchestrator/swarm/mcp_config_test.go @@ -698,6 +698,228 @@ func TestGenerateMCPConfig_DatabaseConfig(t *testing.T) { // service spec → BuildServiceHostList → GenerateMCPConfig → YAML output. // It verifies that the generated YAML contains the correct ordered hosts // array and target_session_attrs for various topologies. +func TestGenerateMCPConfig_KBDisabled_SectionOmitted(t *testing.T) { + params := &MCPConfigParams{ + Config: &database.MCPServiceConfig{KBEnabled: utils.PointerTo(false)}, + DatabaseName: "mydb", + DatabaseHosts: []database.ServiceHostEntry{{Host: "db-host", Port: 5432}}, + Username: "appuser", + Password: "secret", + } + + data, err := GenerateMCPConfig(params) + if err != nil { + t.Fatalf("GenerateMCPConfig() error = %v", err) + } + + cfg := parseYAML(t, data) + if cfg.Knowledgebase != nil { + t.Errorf("knowledgebase section should be absent when kb_enabled is false, got %+v", cfg.Knowledgebase) + } +} + +func TestGenerateMCPConfig_KBNotSet_SectionOmitted(t *testing.T) { + params := &MCPConfigParams{ + Config: &database.MCPServiceConfig{}, + DatabaseName: "mydb", + DatabaseHosts: []database.ServiceHostEntry{{Host: "db-host", Port: 5432}}, + Username: "appuser", + Password: "secret", + } + + data, err := GenerateMCPConfig(params) + if err != nil { + t.Fatalf("GenerateMCPConfig() error = %v", err) + } + + cfg := parseYAML(t, data) + if cfg.Knowledgebase != nil { + t.Errorf("knowledgebase section should be absent when kb_enabled is not set, got %+v", cfg.Knowledgebase) + } +} + +func TestGenerateMCPConfig_KBEnabled_VoyageProvider(t *testing.T) { + params := &MCPConfigParams{ + Config: &database.MCPServiceConfig{ + KBEnabled: utils.PointerTo(true), + KBEmbeddingProvider: strPtr("voyage"), + KBEmbeddingModel: strPtr("voyage-3-lite"), + KBEmbeddingAPIKey: strPtr("pa-voyage-kb-key"), + }, + DatabaseName: "mydb", + DatabaseHosts: []database.ServiceHostEntry{{Host: "db-host", Port: 5432}}, + Username: "appuser", + Password: "secret", + } + + data, err := GenerateMCPConfig(params) + if err != nil { + t.Fatalf("GenerateMCPConfig() error = %v", err) + } + + cfg := parseYAML(t, data) + if cfg.Knowledgebase == nil { + t.Fatal("knowledgebase section should be present when kb_enabled is true") + } + if !cfg.Knowledgebase.Enabled { + t.Error("knowledgebase.enabled should be true") + } + if cfg.Knowledgebase.EmbeddingProvider != "voyage" { + t.Errorf("knowledgebase.embedding_provider = %q, want %q", cfg.Knowledgebase.EmbeddingProvider, "voyage") + } + if cfg.Knowledgebase.EmbeddingModel != "voyage-3-lite" { + t.Errorf("knowledgebase.embedding_model = %q, want %q", cfg.Knowledgebase.EmbeddingModel, "voyage-3-lite") + } + if cfg.Knowledgebase.EmbeddingVoyageAPIKey != "pa-voyage-kb-key" { + t.Errorf("knowledgebase.embedding_voyage_api_key = %q, want %q", cfg.Knowledgebase.EmbeddingVoyageAPIKey, "pa-voyage-kb-key") + } + if cfg.Knowledgebase.EmbeddingOpenAIAPIKey != "" { + t.Errorf("knowledgebase.embedding_openai_api_key should be empty for voyage provider, got %q", cfg.Knowledgebase.EmbeddingOpenAIAPIKey) + } +} + +func TestGenerateMCPConfig_KBEnabled_OpenAIProvider(t *testing.T) { + params := &MCPConfigParams{ + Config: &database.MCPServiceConfig{ + KBEnabled: utils.PointerTo(true), + KBEmbeddingProvider: strPtr("openai"), + KBEmbeddingModel: strPtr("text-embedding-3-small"), + KBEmbeddingAPIKey: strPtr("sk-openai-kb-key"), + }, + DatabaseName: "mydb", + DatabaseHosts: []database.ServiceHostEntry{{Host: "db-host", Port: 5432}}, + Username: "appuser", + Password: "secret", + } + + data, err := GenerateMCPConfig(params) + if err != nil { + t.Fatalf("GenerateMCPConfig() error = %v", err) + } + + cfg := parseYAML(t, data) + if cfg.Knowledgebase == nil { + t.Fatal("knowledgebase section should be present when kb_enabled is true") + } + if cfg.Knowledgebase.EmbeddingProvider != "openai" { + t.Errorf("knowledgebase.embedding_provider = %q, want %q", cfg.Knowledgebase.EmbeddingProvider, "openai") + } + if cfg.Knowledgebase.EmbeddingOpenAIAPIKey != "sk-openai-kb-key" { + t.Errorf("knowledgebase.embedding_openai_api_key = %q, want %q", cfg.Knowledgebase.EmbeddingOpenAIAPIKey, "sk-openai-kb-key") + } + if cfg.Knowledgebase.EmbeddingVoyageAPIKey != "" { + t.Errorf("knowledgebase.embedding_voyage_api_key should be empty for openai provider, got %q", cfg.Knowledgebase.EmbeddingVoyageAPIKey) + } +} + +func TestGenerateMCPConfig_KBEnabled_DefaultPath(t *testing.T) { + // No kb_database_host_path → container path defaults to /app/kb/nla-kb.db + params := &MCPConfigParams{ + Config: &database.MCPServiceConfig{ + KBEnabled: utils.PointerTo(true), + KBEmbeddingProvider: strPtr("voyage"), + KBEmbeddingModel: strPtr("voyage-3-lite"), + KBEmbeddingAPIKey: strPtr("pa-voyage-key"), + }, + DatabaseName: "mydb", + DatabaseHosts: []database.ServiceHostEntry{{Host: "db-host", Port: 5432}}, + Username: "appuser", + Password: "secret", + } + + data, err := GenerateMCPConfig(params) + if err != nil { + t.Fatalf("GenerateMCPConfig() error = %v", err) + } + + cfg := parseYAML(t, data) + if cfg.Knowledgebase == nil { + t.Fatal("knowledgebase section should be present") + } + if cfg.Knowledgebase.DatabasePath != "/app/kb/nla-kb.db" { + t.Errorf("knowledgebase.database_path = %q, want %q", cfg.Knowledgebase.DatabasePath, "/app/kb/nla-kb.db") + } +} + +func TestGenerateMCPConfig_KBEnabled_CustomHostPath(t *testing.T) { + // kb_database_host_path set → container path uses basename under /app/kb/ + params := &MCPConfigParams{ + Config: &database.MCPServiceConfig{ + KBEnabled: utils.PointerTo(true), + KBEmbeddingProvider: strPtr("voyage"), + KBEmbeddingModel: strPtr("voyage-3-lite"), + KBEmbeddingAPIKey: strPtr("pa-voyage-key"), + KBDatabaseHostPath: strPtr("/data/custom/my-kb.db"), + }, + DatabaseName: "mydb", + DatabaseHosts: []database.ServiceHostEntry{{Host: "db-host", Port: 5432}}, + Username: "appuser", + Password: "secret", + } + + data, err := GenerateMCPConfig(params) + if err != nil { + t.Fatalf("GenerateMCPConfig() error = %v", err) + } + + cfg := parseYAML(t, data) + if cfg.Knowledgebase == nil { + t.Fatal("knowledgebase section should be present") + } + if cfg.Knowledgebase.DatabasePath != "/app/kb/my-kb.db" { + t.Errorf("knowledgebase.database_path = %q, want %q", cfg.Knowledgebase.DatabasePath, "/app/kb/my-kb.db") + } +} + +func TestGenerateMCPConfig_KBEnabled_WithLLMAndEmbedding(t *testing.T) { + // All three sections (LLM, embedding, KB) present together — they must not interfere. + params := &MCPConfigParams{ + Config: &database.MCPServiceConfig{ + LLMEnabled: utils.PointerTo(true), + LLMProvider: "anthropic", + LLMModel: "claude-sonnet-4-5", + AnthropicAPIKey: strPtr("sk-ant-api03-test"), + EmbeddingProvider: strPtr("voyage"), + EmbeddingModel: strPtr("voyage-3"), + EmbeddingAPIKey: strPtr("pa-emb-key"), + KBEnabled: utils.PointerTo(true), + KBEmbeddingProvider: strPtr("voyage"), + KBEmbeddingModel: strPtr("voyage-3-lite"), + KBEmbeddingAPIKey: strPtr("pa-kb-key"), + }, + DatabaseName: "mydb", + DatabaseHosts: []database.ServiceHostEntry{{Host: "db-host", Port: 5432}}, + Username: "appuser", + Password: "secret", + } + + data, err := GenerateMCPConfig(params) + if err != nil { + t.Fatalf("GenerateMCPConfig() error = %v", err) + } + + cfg := parseYAML(t, data) + if cfg.LLM == nil { + t.Fatal("llm section should be present") + } + if cfg.Embedding == nil { + t.Fatal("embedding section should be present") + } + if cfg.Knowledgebase == nil { + t.Fatal("knowledgebase section should be present") + } + if cfg.Knowledgebase.EmbeddingProvider != "voyage" { + t.Errorf("knowledgebase.embedding_provider = %q, want %q", cfg.Knowledgebase.EmbeddingProvider, "voyage") + } + // KB uses embedding_voyage_api_key; embedding section uses voyage_api_key — different fields + if cfg.Knowledgebase.EmbeddingVoyageAPIKey != "pa-kb-key" { + t.Errorf("knowledgebase.embedding_voyage_api_key = %q, want %q", cfg.Knowledgebase.EmbeddingVoyageAPIKey, "pa-kb-key") + } + if cfg.Embedding.VoyageAPIKey != "pa-emb-key" { + t.Errorf("embedding.voyage_api_key = %q, want %q", cfg.Embedding.VoyageAPIKey, "pa-emb-key") + } +} + func TestGenerateMCPConfig_MultiHostTopology(t *testing.T) { // inst builds a minimal InstanceSpec for testing. inst := func(instanceID, hostID string) *database.InstanceSpec { diff --git a/server/internal/orchestrator/swarm/orchestrator.go b/server/internal/orchestrator/swarm/orchestrator.go index eb07e007..45386a74 100644 --- a/server/internal/orchestrator/swarm/orchestrator.go +++ b/server/internal/orchestrator/swarm/orchestrator.go @@ -459,6 +459,9 @@ func (o *Orchestrator) generateMCPInstanceResources(spec *database.ServiceInstan // resource block and the per-node authenticator loop below. var parsedPostgRESTConfig *database.PostgRESTServiceConfig + // Hoisted KB paths — computed in the MCP case, consumed by ServiceInstanceSpecResource. + var kbHostPath, kbDirPath string + switch spec.ServiceSpec.ServiceType { case "mcp": mcpConfig, errs := database.ParseMCPServiceConfig(spec.ServiceSpec.Config, false) @@ -473,6 +476,17 @@ func (o *Orchestrator) generateMCPInstanceResources(spec *database.ServiceInstan OwnerUID: mcpContainerUID, OwnerGID: mcpContainerUID, } + // Compute KB paths when KB is enabled. + if mcpConfig.KBEnabled != nil && *mcpConfig.KBEnabled { + if mcpConfig.KBDatabaseHostPath != nil { + kbHostPath = *mcpConfig.KBDatabaseHostPath + kbDirPath = filepath.Dir(*mcpConfig.KBDatabaseHostPath) + } else { + kbHostPath = filepath.Join(o.cfg.DataDir, "kb", "nla-kb.db") + kbDirPath = filepath.Join(o.cfg.DataDir, "kb") + } + } + mcpConfigResource := &MCPConfigResource{ ServiceInstanceID: spec.ServiceInstanceID, ServiceID: spec.ServiceSpec.ServiceID, @@ -484,6 +498,7 @@ func (o *Orchestrator) generateMCPInstanceResources(spec *database.ServiceInstan TargetSessionAttrs: spec.TargetSessionAttrs, ConnectAsUsername: spec.ConnectAsUsername, ConnectAsPassword: spec.ConnectAsPassword, + KBHostPath: kbHostPath, } serviceSpecificResources = []resource.Resource{dataDir, mcpConfigResource} @@ -550,6 +565,7 @@ func (o *Orchestrator) generateMCPInstanceResources(spec *database.ServiceInstan TargetSessionAttrs: spec.TargetSessionAttrs, Port: spec.Port, DataDirID: dataDirID, + KBDirPath: kbDirPath, } // Service instance resource (actual Docker service) diff --git a/server/internal/orchestrator/swarm/service_instance_spec.go b/server/internal/orchestrator/swarm/service_instance_spec.go index 8bb310bb..6597c4ba 100644 --- a/server/internal/orchestrator/swarm/service_instance_spec.go +++ b/server/internal/orchestrator/swarm/service_instance_spec.go @@ -39,6 +39,7 @@ type ServiceInstanceSpecResource struct { TargetSessionAttrs string `json:"target_session_attrs"` // libpq target_session_attrs Port *int `json:"port"` // Service published port (optional, 0 = random) DataDirID string `json:"data_dir_id"` // DirResource ID for the service data directory + KBDirPath string `json:"kb_dir_path,omitempty"` // Host-side KB directory for bind mount (MCP only, KB enabled) Spec swarm.ServiceSpec `json:"spec"` } @@ -143,6 +144,7 @@ func (s *ServiceInstanceSpecResource) Refresh(ctx context.Context, rc *resource. Port: s.Port, DataPath: dataPath, KeysPath: keysPath, + KBDirPath: s.KBDirPath, }) if err != nil { return fmt.Errorf("failed to generate service container spec: %w", err) diff --git a/server/internal/orchestrator/swarm/service_spec.go b/server/internal/orchestrator/swarm/service_spec.go index f1187448..7535d376 100644 --- a/server/internal/orchestrator/swarm/service_spec.go +++ b/server/internal/orchestrator/swarm/service_spec.go @@ -73,6 +73,9 @@ type ServiceContainerSpecOptions struct { // KeysPath is the host-side directory containing API key files. // When non-empty, it is bind-mounted read-only into the container at /app/keys. KeysPath string + // KBDirPath is the host-side directory containing the KB SQLite file. + // When non-empty, it is bind-mounted read-only into the container at /app/kb. + KBDirPath string } // ServiceContainerSpec builds a Docker Swarm service spec for a service instance. @@ -189,6 +192,9 @@ func ServiceContainerSpec(opts *ServiceContainerSpecOptions) (swarm.ServiceSpec, mounts = []mount.Mount{ docker.BuildMount(opts.DataPath, "/app/data", false), } + if opts.KBDirPath != "" { + mounts = append(mounts, docker.BuildMount(opts.KBDirPath, "/app/kb", true)) + } case "rag": user = fmt.Sprintf("%d", ragContainerUID) command = []string{"/app/pgedge-rag-server"} diff --git a/server/internal/orchestrator/swarm/service_spec_test.go b/server/internal/orchestrator/swarm/service_spec_test.go index f747b344..4a66e1b0 100644 --- a/server/internal/orchestrator/swarm/service_spec_test.go +++ b/server/internal/orchestrator/swarm/service_spec_test.go @@ -579,3 +579,57 @@ func TestServiceContainerSpec_PostgREST_User(t *testing.T) { t.Errorf("User = %q, want %q (PostgREST runs as UID 1000 per official Dockerfile)", spec.TaskTemplate.ContainerSpec.User, want) } } + +func makeMCPSpecOpts() *ServiceContainerSpecOptions { + return &ServiceContainerSpecOptions{ + ServiceSpec: &database.ServiceSpec{ + ServiceID: "mcp-server", + ServiceType: "mcp", + }, + ServiceInstanceID: "db1-mcp-host1", + DatabaseID: "db1", + DatabaseName: "testdb", + HostID: "host1", + ServiceName: "db1-mcp-host1", + Hostname: "mcp-host1", + CohortMemberID: "node-123", + ServiceImage: &ServiceImage{Tag: "ghcr.io/pgedge/postgres-mcp:latest"}, + DatabaseNetworkID: "db1-database", + DataPath: "/var/lib/pgedge/services/db1-mcp-host1", + } +} + +func TestServiceContainerSpec_MCP_NoKBMount(t *testing.T) { + // KBDirPath not set → only the data mount. + opts := makeMCPSpecOpts() + spec, err := ServiceContainerSpec(opts) + require.NoError(t, err) + + mounts := spec.TaskTemplate.ContainerSpec.Mounts + if len(mounts) != 1 { + t.Fatalf("got %d mounts, want 1 (data only)", len(mounts)) + } + if mounts[0].Target != "/app/data" { + t.Errorf("mounts[0].Target = %q, want %q", mounts[0].Target, "/app/data") + } +} + +func TestServiceContainerSpec_MCP_KBMount(t *testing.T) { + // KBDirPath set → data mount + read-only KB dir mount. + opts := makeMCPSpecOpts() + opts.KBDirPath = "/var/lib/pgedge/kb" + + spec, err := ServiceContainerSpec(opts) + require.NoError(t, err) + + mounts := spec.TaskTemplate.ContainerSpec.Mounts + if len(mounts) != 2 { + t.Fatalf("got %d mounts, want 2 (data + kb)", len(mounts)) + } + assert.Equal(t, "/app/data", mounts[0].Target) + assert.Equal(t, "/var/lib/pgedge/kb", mounts[1].Source) + assert.Equal(t, "/app/kb", mounts[1].Target) + if !mounts[1].ReadOnly { + t.Error("KB mount must be read-only") + } +} From dd215cd1e4b4b10aae76196447d983b002b06733 Mon Sep 17 00:00:00 2001 From: moizpgedge Date: Tue, 12 May 2026 00:05:40 +0500 Subject: [PATCH 3/4] fix: harden KB config validation and generation (PLAT-590) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address security and correctness issues raised in code review: - Reject `kb_database_host_path` values that are not absolute or contain `..` components. An unsanitized host path could be used to bind-mount arbitrary host directories into the container via the CP API. - Reject stale `kb_*` fields (provider, model, api_key, host_path) when `kb_enabled` is false or absent, matching the existing LLM pattern. Previously these were silently ignored, hiding typos and stale config. - Make `ollama` provider check case-insensitive (`strings.ToLower`) so `"Ollama"` gets the explicit "not yet supported" error instead of the generic "must be one of" message. - Replace defensive nil checks in `GenerateMCPConfig` with an invariant assertion. If KB fields are nil despite `kb_enabled=true`, the function now returns an error immediately rather than silently emitting empty strings into `config.yaml`. Fields are dereferenced unconditionally below the assertion. - Fix discarded `MkdirAll` error in `mcpRCAndFs` test helper — use `require.NoError` instead of `_ =`. - Add `!!! note` to `docs/services/mcp.md` stating that any `kb_*` config change requires a container restart, not just a SIGHUP reload. - Add test cases: relative path, `..` path, case-insensitive ollama, and four cases for stale KB fields rejected when disabled. PLAT-590 --- docs/services/mcp.md | 14 +++- .../internal/database/mcp_service_config.go | 23 +++++- .../database/mcp_service_config_test.go | 80 +++++++++++++++++-- .../internal/orchestrator/swarm/mcp_config.go | 27 +++---- .../swarm/mcp_config_resource_test.go | 2 +- 5 files changed, 120 insertions(+), 26 deletions(-) diff --git a/docs/services/mcp.md b/docs/services/mcp.md index b48b8e4c..42004a57 100644 --- a/docs/services/mcp.md +++ b/docs/services/mcp.md @@ -85,8 +85,8 @@ Knowledgebase support enables the `search_knowledgebase` tool to query a SQLite-backed knowledge base. The knowledge base file is staged on the host; the Control Plane bind-mounts it into the container read-only. Knowledgebase support is opt-in: when `kb_enabled` is `false` (the -default), no KB file is required and no KB validation runs. For V1, -only `voyage` and `openai` are supported as embedding providers for the +default), no KB file is required and no KB validation runs. Only +`voyage` and `openai` are supported as embedding providers for the knowledgebase; Ollama support is planned for a future release. !!! warning @@ -110,7 +110,15 @@ The following table describes the knowledgebase configuration fields: | `kb_embedding_provider` | string | Embedding provider for the KB. One of: `voyage`, `openai`. Required when `kb_enabled` is `true`. | | `kb_embedding_model` | string | Embedding model for the KB (e.g., `voyage-3-lite`, `text-embedding-3-small`). Required when `kb_enabled` is `true`. | | `kb_embedding_api_key` | string | API key for the KB embedding provider. Required for `voyage` and `openai`. Scrubbed from API responses. | -| `kb_database_host_path` | string | Full path to the KB SQLite file on the host. Defaults to `{data_dir}/kb/nla-kb.db`. | +| `kb_database_host_path` | string | Full path to the KB SQLite file on the host. Defaults to `{data_dir}/kb/nla-kb.db`. Must be an absolute path. | + +!!! note + + Changing any `kb_*` field (provider, model, credentials, or path) + requires a service redeploy — not just a config reload. SIGHUP only + reloads database connection settings and does not reinitialize the + knowledgebase. Use `update-database` to apply KB config changes; the + Control Plane will restart the container automatically. ### LLM Tuning diff --git a/server/internal/database/mcp_service_config.go b/server/internal/database/mcp_service_config.go index aaac77e0..31e2a2e1 100644 --- a/server/internal/database/mcp_service_config.go +++ b/server/internal/database/mcp_service_config.go @@ -3,6 +3,7 @@ package database import ( "encoding/json" "fmt" + "path/filepath" "slices" "sort" "strings" @@ -274,8 +275,8 @@ func ParseMCPServiceConfig(config map[string]any, isUpdate bool) (*MCPServiceCon if kbEmbeddingProvider == nil { errs = append(errs, fmt.Errorf("kb_embedding_provider is required when kb_enabled is true")) } else { - // ollama is not supported in V1 - if *kbEmbeddingProvider == "ollama" { + // ollama is not yet supported as a KB embedding provider + if strings.ToLower(*kbEmbeddingProvider) == "ollama" { errs = append(errs, fmt.Errorf("kb_embedding_provider %q is not yet supported; use %q or %q", "ollama", "voyage", "openai")) } else if !slices.Contains(validKBEmbeddingProviders, *kbEmbeddingProvider) { errs = append(errs, fmt.Errorf("kb_embedding_provider must be one of: %s", strings.Join(validKBEmbeddingProviders, ", "))) @@ -290,6 +291,24 @@ func ParseMCPServiceConfig(config map[string]any, isUpdate bool) (*MCPServiceCon if kbEmbeddingModel == nil { errs = append(errs, fmt.Errorf("kb_embedding_model is required when kb_enabled is true")) } + + // Path sanitization: must be absolute and clean (no .. components) + if kbDatabaseHostPath != nil { + p := *kbDatabaseHostPath + if !filepath.IsAbs(p) { + errs = append(errs, fmt.Errorf("kb_database_host_path must be an absolute path")) + } else if filepath.Clean(p) != p { + errs = append(errs, fmt.Errorf("kb_database_host_path must be a clean absolute path (no .. or redundant separators)")) + } + } + } else { + // KB is disabled — reject KB-specific fields to prevent silent misconfiguration + kbOnlyFields := []string{"kb_embedding_provider", "kb_embedding_model", "kb_embedding_api_key", "kb_database_host_path"} + for _, key := range kbOnlyFields { + if _, ok := config[key]; ok { + errs = append(errs, fmt.Errorf("%s must not be set unless kb_enabled is true", key)) + } + } } if poolMaxConns != nil { diff --git a/server/internal/database/mcp_service_config_test.go b/server/internal/database/mcp_service_config_test.go index 4146702d..7febaf05 100644 --- a/server/internal/database/mcp_service_config_test.go +++ b/server/internal/database/mcp_service_config_test.go @@ -943,15 +943,18 @@ func TestParseMCPServiceConfig(t *testing.T) { assert.Equal(t, "/data/custom/my-kb.db", *cfg.KBDatabaseHostPath) }) - t.Run("kb_enabled false ignores invalid KB fields", func(t *testing.T) { - // When kb_enabled is false, provider/model/key are not validated. + t.Run("kb_enabled false rejects stale KB fields", func(t *testing.T) { + // When kb_enabled is false, KB-specific fields must not be set. config := map[string]any{ "kb_enabled": false, - "kb_embedding_provider": "ollama", - "kb_embedding_model": "some-model", + "kb_embedding_provider": "openai", + "kb_embedding_model": "text-embedding-3-small", } _, errs := database.ParseMCPServiceConfig(config, false) - require.Empty(t, errs) + require.NotEmpty(t, errs) + joined := joinedErr(errs).Error() + assert.Contains(t, joined, "kb_embedding_provider must not be set unless kb_enabled is true") + assert.Contains(t, joined, "kb_embedding_model must not be set unless kb_enabled is true") }) }) @@ -1105,6 +1108,73 @@ func TestParseMCPServiceConfig(t *testing.T) { require.NotEmpty(t, errs) assert.Contains(t, joinedErr(errs).Error(), "kb_database_host_path must be a string") }) + + t.Run("kb_database_host_path relative path rejected", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "voyage", + "kb_embedding_model": "voyage-3-lite", + "kb_embedding_api_key": "voy-key", + "kb_database_host_path": "kb/nla-kb.db", + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_database_host_path must be an absolute path") + }) + + t.Run("kb_database_host_path with .. rejected", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "voyage", + "kb_embedding_model": "voyage-3-lite", + "kb_embedding_api_key": "voy-key", + "kb_database_host_path": "/data/kb/../../etc/shadow", + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_database_host_path must be a clean absolute path") + }) + + t.Run("ollama provider name case insensitive", func(t *testing.T) { + config := map[string]any{ + "kb_enabled": true, + "kb_embedding_provider": "Ollama", + "kb_embedding_model": "nomic-embed-text", + } + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), `kb_embedding_provider "ollama" is not yet supported`) + }) + }) + }) + + t.Run("kb_enabled false rejects kb_* fields", func(t *testing.T) { + t.Run("kb_embedding_provider rejected", func(t *testing.T) { + config := map[string]any{"kb_embedding_provider": "openai"} + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_embedding_provider must not be set unless kb_enabled is true") + }) + + t.Run("kb_embedding_model rejected", func(t *testing.T) { + config := map[string]any{"kb_embedding_model": "text-embedding-3-small"} + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_embedding_model must not be set unless kb_enabled is true") + }) + + t.Run("kb_embedding_api_key rejected", func(t *testing.T) { + config := map[string]any{"kb_embedding_api_key": "sk-x"} + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_embedding_api_key must not be set unless kb_enabled is true") + }) + + t.Run("kb_database_host_path rejected", func(t *testing.T) { + config := map[string]any{"kb_database_host_path": "/data/kb/nla-kb.db"} + _, errs := database.ParseMCPServiceConfig(config, false) + require.NotEmpty(t, errs) + assert.Contains(t, joinedErr(errs).Error(), "kb_database_host_path must not be set unless kb_enabled is true") }) }) diff --git a/server/internal/orchestrator/swarm/mcp_config.go b/server/internal/orchestrator/swarm/mcp_config.go index ef33b016..357b2482 100644 --- a/server/internal/orchestrator/swarm/mcp_config.go +++ b/server/internal/orchestrator/swarm/mcp_config.go @@ -183,27 +183,24 @@ func GenerateMCPConfig(params *MCPConfigParams) ([]byte, error) { // Build KB config (only when kb_enabled is true) var kb *mcpKBConfig if cfg.KBEnabled != nil && *cfg.KBEnabled { + if cfg.KBEmbeddingProvider == nil || cfg.KBEmbeddingModel == nil || cfg.KBEmbeddingAPIKey == nil { + return nil, fmt.Errorf("internal: KB provider/model/key nil despite kb_enabled=true; validation was bypassed") + } containerPath := "/app/kb/nla-kb.db" if cfg.KBDatabaseHostPath != nil { containerPath = "/app/kb/" + filepath.Base(*cfg.KBDatabaseHostPath) } k := &mcpKBConfig{ - Enabled: true, - DatabasePath: containerPath, - } - if cfg.KBEmbeddingProvider != nil { - k.EmbeddingProvider = *cfg.KBEmbeddingProvider + Enabled: true, + DatabasePath: containerPath, + EmbeddingProvider: *cfg.KBEmbeddingProvider, + EmbeddingModel: *cfg.KBEmbeddingModel, } - if cfg.KBEmbeddingModel != nil { - k.EmbeddingModel = *cfg.KBEmbeddingModel - } - if cfg.KBEmbeddingAPIKey != nil && cfg.KBEmbeddingProvider != nil { - switch *cfg.KBEmbeddingProvider { - case "voyage": - k.EmbeddingVoyageAPIKey = *cfg.KBEmbeddingAPIKey - case "openai": - k.EmbeddingOpenAIAPIKey = *cfg.KBEmbeddingAPIKey - } + switch *cfg.KBEmbeddingProvider { + case "voyage": + k.EmbeddingVoyageAPIKey = *cfg.KBEmbeddingAPIKey + case "openai": + k.EmbeddingOpenAIAPIKey = *cfg.KBEmbeddingAPIKey } kb = k } diff --git a/server/internal/orchestrator/swarm/mcp_config_resource_test.go b/server/internal/orchestrator/swarm/mcp_config_resource_test.go index 442fafed..7857ce94 100644 --- a/server/internal/orchestrator/swarm/mcp_config_resource_test.go +++ b/server/internal/orchestrator/swarm/mcp_config_resource_test.go @@ -21,7 +21,7 @@ import ( func mcpRCAndFs(t *testing.T, dirResourceID, dirPath string) (*resource.Context, afero.Fs) { t.Helper() fs := afero.NewMemMapFs() - _ = fs.MkdirAll(dirPath, 0o700) + require.NoError(t, fs.MkdirAll(dirPath, 0o700)) injector := do.New() do.Provide(injector, func(i *do.Injector) (afero.Fs, error) { From 6ffa366571f905bc1696ef1076a3c26121a1f3a0 Mon Sep 17 00:00:00 2001 From: moizpgedge Date: Tue, 12 May 2026 00:24:06 +0500 Subject: [PATCH 4/4] docs(mcp): correct kb_* field behavior when kb_enabled is false --- docs/services/mcp.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/services/mcp.md b/docs/services/mcp.md index 42004a57..b115851e 100644 --- a/docs/services/mcp.md +++ b/docs/services/mcp.md @@ -84,9 +84,10 @@ following table describes the embedding configuration fields: Knowledgebase support enables the `search_knowledgebase` tool to query a SQLite-backed knowledge base. The knowledge base file is staged on the host; the Control Plane bind-mounts it into the container read-only. -Knowledgebase support is opt-in: when `kb_enabled` is `false` (the -default), no KB file is required and no KB validation runs. Only -`voyage` and `openai` are supported as embedding providers for the +Knowledgebase support is opt-in. When `kb_enabled` is `false` (the +default), no KB file is required — and any other `kb_*` fields present +in the config are **rejected** by the validator, not silently ignored. +Only `voyage` and `openai` are supported as embedding providers for the knowledgebase; Ollama support is planned for a future release. !!! warning @@ -106,7 +107,7 @@ The following table describes the knowledgebase configuration fields: | Field | Type | Description | |---------------------------|---------|-------------| -| `kb_enabled` | boolean | Set to `true` to enable knowledgebase search. When `false` (the default), all other `kb_*` fields are ignored and the `search_knowledgebase` tool operates without a KB. | +| `kb_enabled` | boolean | Set to `true` to enable knowledgebase search. When `false` (the default), any other `kb_*` fields in the config are **rejected** — they must be removed before the config is accepted. | | `kb_embedding_provider` | string | Embedding provider for the KB. One of: `voyage`, `openai`. Required when `kb_enabled` is `true`. | | `kb_embedding_model` | string | Embedding model for the KB (e.g., `voyage-3-lite`, `text-embedding-3-small`). Required when `kb_enabled` is `true`. | | `kb_embedding_api_key` | string | API key for the KB embedding provider. Required for `voyage` and `openai`. Scrubbed from API responses. |