From 17ecc64081dca8bf3bf4eb96326a8a94019c83ca Mon Sep 17 00:00:00 2001 From: Fabian Rodriguez Date: Wed, 15 Apr 2026 13:52:45 +0200 Subject: [PATCH 01/53] feat(new-ai-gateway): add model entity and collection --- app/_ai_gateway_entities/model.md | 18 ++++++++++++++++++ jekyll.yml | 12 ++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 app/_ai_gateway_entities/model.md diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md new file mode 100644 index 0000000000..a617970a4f --- /dev/null +++ b/app/_ai_gateway_entities/model.md @@ -0,0 +1,18 @@ +--- +title: Model +content_type: reference +entities: + - model + +products: + - ai-gateway + +description: AI Models registered with the {{site.ai_gateway}}. + +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayModel + +works_on: + - konnect +--- \ No newline at end of file diff --git a/jekyll.yml b/jekyll.yml index eb8bc7c287..e63afd33e7 100644 --- a/jekyll.yml +++ b/jekyll.yml @@ -34,6 +34,8 @@ include: # Collections collections: + ai_gateway_entities: + output: true gateway_entities: output: true how-tos: @@ -52,6 +54,16 @@ defaults: permalink: "/how-to/:path/" breadcrumbs: - "/how-to/" + - scope: + path: "_ai_gateway_entities" + type: "ai_gateway_entities" + values: + layout: "gateway_entity" + permalink: "/ai-gateway/entities/:path/" + products: + - ai-gateway + breadcrumbs: + - "/ai-gateway/" - scope: path: "_gateway_entities" type: "gateway_entities" From 33456c8c80b1563ac1a8e43b129bd25b8172e84d Mon Sep 17 00:00:00 2001 From: Fabian Rodriguez Date: Tue, 28 Apr 2026 13:32:53 +0200 Subject: [PATCH 02/53] hack: to make ai gateway requests work with portal v3 TODO: revert this commit before release --- app/_assets/javascripts/apps/EntitySchema.vue | 9 ++++----- vite.config.ts | 6 +++++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/app/_assets/javascripts/apps/EntitySchema.vue b/app/_assets/javascripts/apps/EntitySchema.vue index d82db15813..9c3839efb2 100644 --- a/app/_assets/javascripts/apps/EntitySchema.vue +++ b/app/_assets/javascripts/apps/EntitySchema.vue @@ -15,6 +15,7 @@ diff --git a/vite.config.ts b/vite.config.ts index 3e7741a319..a73582a2b1 100644 --- a/vite.config.ts +++ b/vite.config.ts @@ -63,12 +63,16 @@ export default ({ command, mode }) => { server: { cors: { origin: 'http://localhost:8888' }, proxy: { - '^/api': { + '/vite-dev/api': { changeOrigin: true, target: portalApiUrl, configure: (proxy, options) => { mutateCookieAttributes(proxy) setHostHeader(proxy) + }, + rewrite: (path) => { + return path + .replace(/^\/vite-dev\/api/, '/api/'); } } } From efba509cad2796175b9830a9139b7575f892642b Mon Sep 17 00:00:00 2001 From: Fabian Rodriguez Date: Tue, 28 Apr 2026 13:34:25 +0200 Subject: [PATCH 03/53] feat: add konnect oas data and api page for the new ai gateway, and add releases to ai-gateway TODO: update this info with the actual product ids and version ids when the new API spec is in the dev portal --- app/_ai_gateway_entities/model.md | 12 +++++++++++- app/_api/konnect/ai-gateway/_index.md | 3 +++ app/_data/konnect_oas_data.json | 21 +++++++++++++++++++++ app/_data/products/ai-gateway.yml | 8 +++++++- 4 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 app/_api/konnect/ai-gateway/_index.md diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index a617970a4f..80c8426f34 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -15,4 +15,14 @@ schema: works_on: - konnect ---- \ No newline at end of file + +tools: + - admin-api + - konnect-api + - deck +--- + + +## Schema + +{% entity_schema %} \ No newline at end of file diff --git a/app/_api/konnect/ai-gateway/_index.md b/app/_api/konnect/ai-gateway/_index.md new file mode 100644 index 0000000000..a04c2cee46 --- /dev/null +++ b/app/_api/konnect/ai-gateway/_index.md @@ -0,0 +1,3 @@ +--- +konnect_product_id: 38df0a35-37de-48fa-ac9d-60595d26eddf +--- \ No newline at end of file diff --git a/app/_data/konnect_oas_data.json b/app/_data/konnect_oas_data.json index 0d492c6990..586dd8a9aa 100644 --- a/app/_data/konnect_oas_data.json +++ b/app/_data/konnect_oas_data.json @@ -1,4 +1,25 @@ [ + { + "id": "38df0a35-37de-48fa-ac9d-60595d26eddf", + "title": "New AI Gateway", + "latestVersion": { + "name": "v2", + "id": "987bb874-f9f9-471e-9ae3-51897cbd2ccd" + }, + "description": "New AI Gateway API.", + "documentCount": 0, + "versionCount": 1, + "versions": [ + { + "id": "987bb874-f9f9-471e-9ae3-51897cbd2ccd", + "created_at": "2024-02-21T17:28:17.757Z", + "updated_at": "2024-10-17T19:13:18.223Z", + "name": "v2", + "deprecated": false, + "registration_configs": [] + } + ] + }, { "id": "ccb264be-1963-49a4-b6e8-bc7c98a6e4c2", "title": "Application Auth Strategies", diff --git a/app/_data/products/ai-gateway.yml b/app/_data/products/ai-gateway.yml index e40c9b2a8b..fc1a92a474 100644 --- a/app/_data/products/ai-gateway.yml +++ b/app/_data/products/ai-gateway.yml @@ -1,2 +1,8 @@ name: AI Gateway -icon: /_assets/icons/products/ai-gateway.svg \ No newline at end of file +icon: /_assets/icons/products/ai-gateway.svg + +releases: + - release: "2.0" + version: "2.0.0" + name: "v2" + latest: true \ No newline at end of file From a25b1866902302d20542c41134d41a3458ef1968 Mon Sep 17 00:00:00 2001 From: Fabian Rodriguez Date: Tue, 28 Apr 2026 17:13:25 +0200 Subject: [PATCH 04/53] feat(new-ai-gateway): add admin-api and konnect urls, endpoints and variables to entity_examples --- app/_data/entity_examples/config.yml | 13 ++++++++++++- .../entity_example/presenters/admin-api.rb | 15 +++++++++++++-- .../entity_example/presenters/konnect-api.rb | 18 ++++++++++++++---- 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/app/_data/entity_examples/config.yml b/app/_data/entity_examples/config.yml index ae36a3af09..35fbc69c94 100644 --- a/app/_data/entity_examples/config.yml +++ b/app/_data/entity_examples/config.yml @@ -32,6 +32,7 @@ formats: admin-api: label: 'Admin API' base_url: 'http://localhost:8001' + ai_gateway_base_url: 'http://localhost:8001/ai-gateways/{ai_gateway}' endpoints: consumer: '/consumers/' consumer_group: '/consumer_groups/' @@ -61,11 +62,15 @@ formats: global: '/plugins/' variables: <<: *variables + ai_gateway: + placeholder: 'AIGatewayId' + description: 'The `id` of the AI Gateway.' konnect-api: label: 'Konnect API' base_url: 'https://{region}.api.konghq.com/v2/control-planes/{control_plane}/core-entities' event_gateway_base_url: 'https://{region}.api.konghq.com/v1/event-gateways/{event_gateway}' + ai_gateway_base_url: 'https://{region}.api.konghq.com/v1/ai-gateways/{ai_gateway}' endpoints: consumer: '/consumers/' consumer_group: '/consumer_groups/' @@ -85,6 +90,8 @@ formats: listener: '/listeners' schema_registry: '/schema-registries' static_key: '/static-keys' + model: '/models' + policy: '/policies' plugin_endpoints: consumer: '/consumers/{consumer}/plugins/' consumer_group: '/consumer_groups/{consumer_group}/plugins/' @@ -127,7 +134,11 @@ formats: event_gateway_listener: placeholder: 'eventGatewayListenerId' description: The `id` of the Event Gateway Listener. - + ai_gateway_variables: + <<: *konnect_variables + ai_gateway: + placeholder: 'AIGatewayId' + description: 'The `id` of the AI Gateway.' kic: label: 'KIC' diff --git a/app/_plugins/drops/entity_example/presenters/admin-api.rb b/app/_plugins/drops/entity_example/presenters/admin-api.rb index 9eebea6126..266ae0e0ca 100644 --- a/app/_plugins/drops/entity_example/presenters/admin-api.rb +++ b/app/_plugins/drops/entity_example/presenters/admin-api.rb @@ -46,10 +46,21 @@ def data_validate_on_prem def build_url [ - formats['admin-api']['base_url'], + base_url, formats['admin-api']['endpoints'][entity_type] ].join end + + def base_url + @base_url ||= case @example_drop.product + when 'gateway' + formats['admin-api']['base_url'] + when 'ai-gateway' + formats['admin-api']['ai_gateway_base_url'] + else + raise ArgumentError, "Unsupported product: #{@example_drop.product}" + end + end end class Plugin < Base @@ -72,7 +83,7 @@ def missing_variables def build_url [ - formats['admin-api']['base_url'], + base_url, formats['admin-api']['plugin_endpoints'][@example_drop.target.key] ].join end diff --git a/app/_plugins/drops/entity_example/presenters/konnect-api.rb b/app/_plugins/drops/entity_example/presenters/konnect-api.rb index a890099167..f7d1e08f74 100644 --- a/app/_plugins/drops/entity_example/presenters/konnect-api.rb +++ b/app/_plugins/drops/entity_example/presenters/konnect-api.rb @@ -44,10 +44,15 @@ def product def default_variables @default_variables ||= - if @example_drop.product == 'gateway' + case @example_drop.product + when 'gateway' formats['konnect-api']['variables'] - else + when 'event_gateway' formats['konnect-api']['event_gateway_variables'] + when 'ai-gateway' + formats['konnect-api']['ai_gateway_variables'] + else + raise ArgumentError, "Unsupported product: #{@example_drop.product}" end end @@ -59,10 +64,15 @@ def build_url end def base_url - @base_url ||= if @example_drop.product == 'gateway' + @base_url ||= case @example_drop.product + when 'gateway' formats['konnect-api']['base_url'] - else + when 'event_gateway' formats['konnect-api']['event_gateway_base_url'] + when 'ai-gateway' + formats['konnect-api']['ai_gateway_base_url'] + else + raise ArgumentError, "Unsupported product: #{@example_drop.product}" end end end From 39414f8a3d0787bcfbc417b153eaa7aaabe76661 Mon Sep 17 00:00:00 2001 From: Fabian Rodriguez Date: Tue, 28 Apr 2026 17:15:59 +0200 Subject: [PATCH 05/53] wip(new-ai-gateway): drop admin-api for now and add dummy entity_example to the model entity --- app/_ai_gateway_entities/model.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 80c8426f34..bf346168c3 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -17,12 +17,19 @@ works_on: - konnect tools: - - admin-api - konnect-api - deck --- +## Set up a Model + +{% entity_example %} +type: model +data: + model: openai-something +{% endentity_example %} + ## Schema {% entity_schema %} \ No newline at end of file From 914d26449d7e111fef0f38ec923bc538c25fa805 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Tue, 28 Apr 2026 14:22:23 +0200 Subject: [PATCH 06/53] add rough draft --- app/_ai_gateway_entities/model.md | 249 ++++++++++++++++++++++++++++-- 1 file changed, 238 insertions(+), 11 deletions(-) diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index bf346168c3..b6c992f8d2 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -3,24 +3,251 @@ title: Model content_type: reference entities: - model - products: - ai-gateway - description: AI Models registered with the {{site.ai_gateway}}. - schema: - api: konnect/ai-gateway - path: /schemas/AIGatewayModel - + api: konnect/ai-gateway + path: /schemas/AIGatewayModel works_on: - - konnect - + - konnect + - on-prem tools: - - konnect-api - - deck + - deck + - admin-api + - konnect-api +related_resources: + - text: About {{site.ai_gateway}} + url: /ai-gateway/ + - text: AI Proxy plugin + url: /plugins/ai-proxy/ + - text: AI Proxy Advanced plugin + url: /plugins/ai-proxy-advanced/ + - text: Plugin entity + url: /gateway/entities/plugin/ + - text: Consumer entity + url: /gateway/entities/consumer/ + - text: Consumer Group entity + url: /gateway/entities/consumer-group/ +faqs: + - q: What happens if a request's model doesn't match any Model entity? + a: Plugins scoped to a Model won't run. Plugins without a Model scope run normally. + + - q: Can the same plugin be configured for multiple Models? + a: Yes. Create one plugin entry per Model scope. Each entry is resolved independently by the plugin iterator. + + - q: Can I scope a plugin to both a Model and a Consumer, Route, or Service? + a: Yes. Combined scopes are supported and follow the precedence chain described in [Plugin configuration precedence](#plugin-configuration-precedence). + + - q: Does the Model entity configure the provider, credentials, or endpoint? + a: | + Model identifies the model and defines model-level behavior. Provider credentials are managed separately (for example, through provider configuration) and are not stored on the Model itself. + + - q: Does the runtime Model entity have the same fields as the {{site.konnect_short_name}} Model entity? + a: | + Not necessarily. The runtime entity is a smaller surface than the {{site.konnect_short_name}} entity, and field parity isn't guaranteed across releases. + See [Models in {{site.konnect_short_name}} and on-prem deployments](#models-in-konnect-and-on-prem-deployments). + + - q: Where do {{site.konnect_short_name}} Policies fit in? + a: | + Policies are control-plane entities. They are translated into runtime plugin configurations scoped to the matching Model. There is no separate runtime Policy entity. --- +## What is a Model? + +A Model is a first-class AI Gateway entity that defines a named AI model (for example, `openai/gpt-4o`) for model selection and policy targeting. + +You can target policies and supported plugin behavior to a specific Model. This lets you apply different rate limits, guardrails, and transformations per model without duplicating Routes or Services. + +In both deployment modes, you configure AI Gateway through first-class AI entities (for example, `ai_model`, `ai_provider`, and `ai_policy`). The control plane derives and manages the underlying runtime primitives (such as Services, Routes, and plugins) from those entities. + +## Model and plugin interaction + +Model participates in runtime plugin resolution alongside other scoping dimensions, such as [Consumer](/gateway/entities/consumer/), [Consumer Group](/gateway/entities/consumer-group/), [Route](/gateway/entities/route/), and [Service](/gateway/entities/service/). + +Model behavior spans both control-plane configuration and request-time execution: + +{% mermaid %} +flowchart LR + A["Incoming request"] --> B["Route + Service selection"] + B --> C["Resolve request model"] + C --> D{"Model-scoped config match?"} + D -- "yes" --> E["Apply model-scoped plugins"] + D -- "no" --> F["Apply non-model-scoped plugins"] +{% endmermaid %} + +At runtime, the request model is resolved by the AI routing flow (AI Proxy, AI Proxy Advanced, or a model shim flow, depending on deployment configuration). The plugin iterator then uses the resolved Model to select matching plugin configurations. + +{:.warning} +> **Caveat for plugins with priority higher than AI Proxy** +> +> The AI Proxy and AI Proxy Advanced plugins run at priority `770`. Any plugin with a higher priority runs *before* the model is resolved. For those earlier plugins, the Model context is not yet available, and Model-scoped configurations won't activate on that request. See [Limitations](#limitations). + +## Models in {{site.konnect_short_name}} and on-prem deployments + +The Model entity exists in both {{site.konnect_short_name}} (control plane) and {{site.base_gateway}} (runtime). + +In {{site.konnect_short_name}}, you declare Model through the AI Gateway control-plane APIs. During config sync, the control plane translates the configuration into runtime-native data plane configuration. + +In on-prem AI Gateway, you declare Model through the `/ai/models` API surface (or compatible tooling such as decK). The on-prem control plane stores AI entities as first-class objects and manages derived runtime primitives for you. + +### Deployment mode differences + +* In {{site.konnect_short_name}}, Model is managed through {{site.konnect_short_name}} AI Gateway APIs. +* In on-prem AI Gateway, Model is managed through `/ai/*` Admin API endpoints. +* In both modes, Model is first-class and runtime primitives are derived from AI entities. + +### Policies are control-plane only + +AI Gateway exposes a Policy entity for declaring AI guardrails, rate limits, and similar controls against Models. + +The Policy entity has no runtime counterpart. During config sync, each Policy is translated into one or more runtime plugin configurations that target the corresponding runtime Model. + +### {{site.konnect_short_name}} and runtime field parity + +The runtime Model entity is intentionally a smaller surface than control-plane Model APIs. Depending on deployment mode, some control-plane fields may not map 1:1 to runtime fields. + +## Plugin configuration precedence + +When multiple plugin configurations could match a request, {{site.base_gateway}} picks the most specific one. Model is treated as an additional specificity axis: within any Consumer / Route / Service tier, the variant with `+ Model` outranks the variant without. + +The full precedence chain is: + +{% table %} +columns: + - title: Rank + key: rank + - title: Scope combination + key: scope +rows: + - rank: 1 + scope: Consumer + Route + Service + Model + - rank: 2 + scope: Consumer + Route + Service + - rank: 3 + scope: Consumer Group + Route + Service + Model + - rank: 4 + scope: Consumer Group + Route + Service + - rank: 5 + scope: Consumer + Route + Model + - rank: 6 + scope: Consumer + Route + - rank: 7 + scope: Consumer + Service + Model + - rank: 8 + scope: Consumer + Service + - rank: 9 + scope: Consumer Group + Route + Model + - rank: 10 + scope: Consumer Group + Route + - rank: 11 + scope: Consumer Group + Service + Model + - rank: 12 + scope: Consumer Group + Service + - rank: 13 + scope: Route + Service + Model + - rank: 14 + scope: Route + Service + - rank: 15 + scope: Consumer + Model + - rank: 16 + scope: Consumer + - rank: 17 + scope: Consumer Group + Model + - rank: 18 + scope: Consumer Group + - rank: 19 + scope: Route + Model + - rank: 20 + scope: Route + - rank: 21 + scope: Service + Model + - rank: 22 + scope: Service + - rank: 23 + scope: Model + - rank: 24 + scope: Global +{% endtable %} + +## Limitations + +Not every plugin can be scoped to a Model. Some plugins run before model context is available, and some are structurally incompatible with Model scoping. + +### Plugins that cannot be scoped to a Model + +The following plugins do not accept a Model scope: + +* Authentication plugins, because they must run before any AI-specific processing to establish the consumer identity that Model-scoped configs depend on: + * [Basic Authentication](/plugins/basic-auth/) + * [HMAC Authentication](/plugins/hmac-auth/) + * [JWE Decrypt](/plugins/jwe-decrypt/) + * [JWT](/plugins/jwt/) + * [JWT Signer](/plugins/jwt-signer/) + * [Key Authentication](/plugins/key-auth/) + * [Key Authentication Encrypted](/plugins/key-auth-enc/) + * [LDAP Authentication](/plugins/ldap-auth/) + * [LDAP Authentication Advanced](/plugins/ldap-auth-advanced/) + * [OAuth 2.0](/plugins/oauth2/) + * [OAuth 2.0 Introspection](/plugins/oauth2-introspection/) + * [OpenID Connect](/plugins/openid-connect/) + * [Session](/plugins/session/) + * [Mutual TLS Authentication](/plugins/mtls-auth/) + * [Header Certificate Authentication](/plugins/header-cert-auth/) + * [SAML](/plugins/saml/) + * [Vault Authentication](/plugins/vault-auth/) +* AI routing and agent plugins, because these plugins resolve the model (AI Proxy, AI Proxy Advanced) or operate on protocols where Model scoping is not meaningful (A2A, MCP): + * [AI Proxy](/plugins/ai-proxy/) + * [AI Proxy Advanced](/plugins/ai-proxy-advanced/) + * [AI A2A Proxy](/plugins/ai-a2a-proxy/) + * [AI MCP Proxy](/plugins/ai-mcp-proxy/) + +### Plugins that run before model resolution + +Any plugin with a priority higher than `770` (the priority of AI Proxy and AI Proxy Advanced) runs before the model is known. For those plugins, Model-scoped configs are not applied unless one of the following is true: + +* [Dynamic plugin ordering](/gateway/plugin-development/entities/plugin/) is enabled to push the plugin's execution after AI Proxy. +* The AI Model Shim plugin is deployed on the route or service to resolve the model during the access phase before other plugins run. + +## Set up a Model + +The following example shows a Model named `openai/gpt-4o`. + +{% entity_example %} +type: model +data: + name: openai/gpt-4o +formats: + - deck + - admin-api + - konnect-api +{% endentity_example %} + +## Scope a plugin to a Model + +Once a Model exists, you can scope a Model-aware plugin configuration by setting the `model` field on the plugin. + +The following example assumes two Models (`openai/gpt-4o` and `openai/gpt-4o-mini`) already exist and applies a quota to one of them. + +{% entity_example %} +type: plugin +data: + name: ai-rate-limiting-advanced + model: openai/gpt-4o + config: + llm_providers: + - name: openai + limit: + - 3 + window_size: + - 30 + window_type: fixed +formats: + - deck + - admin-api +{% endentity_example %} + + ## Set up a Model @@ -32,4 +259,4 @@ data: ## Schema -{% entity_schema %} \ No newline at end of file +{% entity_schema %} From 5b8852f38446c4045f2afac51fe6205dd3c90ded Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Tue, 28 Apr 2026 14:34:01 +0200 Subject: [PATCH 07/53] updates --- app/_ai_gateway_entities/model.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index b6c992f8d2..7031f8a907 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -61,6 +61,8 @@ You can target policies and supported plugin behavior to a specific Model. This In both deployment modes, you configure AI Gateway through first-class AI entities (for example, `ai_model`, `ai_provider`, and `ai_policy`). The control plane derives and manages the underlying runtime primitives (such as Services, Routes, and plugins) from those entities. +For on-prem deployments, this `/ai/*` surface is a bridge architecture that aligns on-prem AI Gateway with the same domain model used in {{site.konnect_short_name}} while the next-generation runtime is introduced. + ## Model and plugin interaction Model participates in runtime plugin resolution alongside other scoping dimensions, such as [Consumer](/gateway/entities/consumer/), [Consumer Group](/gateway/entities/consumer-group/), [Route](/gateway/entities/route/), and [Service](/gateway/entities/service/). @@ -94,7 +96,7 @@ In on-prem AI Gateway, you declare Model through the `/ai/models` API surface (o ### Deployment mode differences * In {{site.konnect_short_name}}, Model is managed through {{site.konnect_short_name}} AI Gateway APIs. -* In on-prem AI Gateway, Model is managed through `/ai/*` Admin API endpoints. +* In on-prem AI Gateway, Model is managed through `/ai/*` Admin API endpoints (no workspace prefix). * In both modes, Model is first-class and runtime primitives are derived from AI entities. ### Policies are control-plane only From 396e58ca8ed16f0077b57759f2d4fcc867900090 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Wed, 29 Apr 2026 06:07:16 +0200 Subject: [PATCH 08/53] updates --- app/_ai_gateway_entities/model.md | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 7031f8a907..5232389849 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -67,16 +67,7 @@ For on-prem deployments, this `/ai/*` surface is a bridge architecture that alig Model participates in runtime plugin resolution alongside other scoping dimensions, such as [Consumer](/gateway/entities/consumer/), [Consumer Group](/gateway/entities/consumer-group/), [Route](/gateway/entities/route/), and [Service](/gateway/entities/service/). -Model behavior spans both control-plane configuration and request-time execution: - -{% mermaid %} -flowchart LR - A["Incoming request"] --> B["Route + Service selection"] - B --> C["Resolve request model"] - C --> D{"Model-scoped config match?"} - D -- "yes" --> E["Apply model-scoped plugins"] - D -- "no" --> F["Apply non-model-scoped plugins"] -{% endmermaid %} +A plugin configuration can reference a Model through its `model` field. When a plugin entry is scoped to a Model, that entry only applies to requests where AI Proxy or AI Proxy Advanced resolves the same model name from the request. Plugin entries without a `model` field apply regardless of which model the request targets. At runtime, the request model is resolved by the AI routing flow (AI Proxy, AI Proxy Advanced, or a model shim flow, depending on deployment configuration). The plugin iterator then uses the resolved Model to select matching plugin configurations. From b18f469c36931e8252c32aaef79cc49296d07d1c Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Wed, 29 Apr 2026 06:11:21 +0200 Subject: [PATCH 09/53] fix --- app/_ai_gateway_entities/model.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 5232389849..7799fd6029 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -209,11 +209,7 @@ The following example shows a Model named `openai/gpt-4o`. {% entity_example %} type: model data: - name: openai/gpt-4o -formats: - - deck - - admin-api - - konnect-api + model: openai-something {% endentity_example %} ## Scope a plugin to a Model From 4487228138e69e61317f4db2a305911e0a427f7e Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Wed, 29 Apr 2026 09:55:56 +0200 Subject: [PATCH 10/53] Update endpoints handling for on-prem --- app/_data/entity_examples/config.yml | 27 +++++++++++++++++-- .../entity_example/presenters/admin-api.rb | 2 +- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/app/_data/entity_examples/config.yml b/app/_data/entity_examples/config.yml index 35fbc69c94..1f0a20fc91 100644 --- a/app/_data/entity_examples/config.yml +++ b/app/_data/entity_examples/config.yml @@ -32,10 +32,12 @@ formats: admin-api: label: 'Admin API' base_url: 'http://localhost:8001' - ai_gateway_base_url: 'http://localhost:8001/ai-gateways/{ai_gateway}' + ai_gateway_base_url: 'http://localhost:8001/ai' endpoints: + # core entities consumer: '/consumers/' consumer_group: '/consumer_groups/' + model: '/models/' route: '/routes/' service: '/services/' target: '/upstreams/{upstream}/targets/' @@ -54,17 +56,38 @@ formats: keyring: '/keyring/' event_hook: '/event-hooks/' partial: '/partials/' + # AI entities (/ai/* on on-prem AI Gateway) + ai_provider: '/providers/' + ai_model: '/models/' + ai_agent: '/agents/' + ai_mcp_server: '/mcp-servers/' + ai_policy: '/policies/' + ai_consumer: '/consumers/' + ai_consumer_group: '/consumer-groups/' plugin_endpoints: consumer: '/consumers/{consumer}/plugins/' consumer_group: '/consumer_groups/{consumer_group}/plugins/' route: '/routes/{route}/plugins/' service: '/services/{service}/plugins/' global: '/plugins/' + ai_policy_endpoints: + ai_model: '/models/{ai_model}/policies/' + ai_agent: '/agents/{ai_agent}/policies/' + ai_mcp_server: '/mcp-servers/{ai_mcp_server}/policies/' variables: <<: *variables ai_gateway: placeholder: 'AIGatewayId' description: 'The `id` of the AI Gateway.' + ai_model: + placeholder: 'aiModelId' + description: 'The `id` of the AI Model.' + ai_agent: + placeholder: 'aiAgentId' + description: 'The `id` of the AI Agent.' + ai_mcp_server: + placeholder: 'aiMCPServerId' + description: 'The `id` of the AI MCP Server.' konnect-api: label: 'Konnect API' @@ -187,4 +210,4 @@ phases: produce: label: 'Produce Phase' cluster: - label: 'Cluster Phase' \ No newline at end of file + label: 'Cluster Phase' diff --git a/app/_plugins/drops/entity_example/presenters/admin-api.rb b/app/_plugins/drops/entity_example/presenters/admin-api.rb index 266ae0e0ca..5ccb85a7a8 100644 --- a/app/_plugins/drops/entity_example/presenters/admin-api.rb +++ b/app/_plugins/drops/entity_example/presenters/admin-api.rb @@ -56,7 +56,7 @@ def base_url when 'gateway' formats['admin-api']['base_url'] when 'ai-gateway' - formats['admin-api']['ai_gateway_base_url'] + formats['admin-api']['ai_gateway_base_url'] || formats['admin-api']['base_url'] else raise ArgumentError, "Unsupported product: #{@example_drop.product}" end From abcfba0dc597fd3290022916f2b05dc7feb929d2 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Wed, 29 Apr 2026 09:56:29 +0200 Subject: [PATCH 11/53] disambiguate CP/DP references --- app/_ai_gateway_entities/model.md | 46 ++++++++++++++++--------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 7799fd6029..6200686e37 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -19,6 +19,8 @@ tools: related_resources: - text: About {{site.ai_gateway}} url: /ai-gateway/ + - text: Control Plane and Data Plane networking in {{site.konnect_short_name}} + url: /konnect-platform/network/ - text: AI Proxy plugin url: /plugins/ai-proxy/ - text: AI Proxy Advanced plugin @@ -43,14 +45,14 @@ faqs: a: | Model identifies the model and defines model-level behavior. Provider credentials are managed separately (for example, through provider configuration) and are not stored on the Model itself. - - q: Does the runtime Model entity have the same fields as the {{site.konnect_short_name}} Model entity? + - q: Does the Data Plane Model entity have the same fields as the {{site.konnect_short_name}} Model entity? a: | - Not necessarily. The runtime entity is a smaller surface than the {{site.konnect_short_name}} entity, and field parity isn't guaranteed across releases. + Not necessarily. The Data Plane Model entity is a smaller surface than the {{site.konnect_short_name}} entity, and field parity isn't guaranteed across releases. See [Models in {{site.konnect_short_name}} and on-prem deployments](#models-in-konnect-and-on-prem-deployments). - q: Where do {{site.konnect_short_name}} Policies fit in? a: | - Policies are control-plane entities. They are translated into runtime plugin configurations scoped to the matching Model. There is no separate runtime Policy entity. + Policies are Control Plane entities. They are translated into plugin configurations used for runtime behavior on the Data Plane. There is no separate Data Plane Policy entity. --- ## What is a Model? @@ -59,17 +61,24 @@ A Model is a first-class AI Gateway entity that defines a named AI model (for ex You can target policies and supported plugin behavior to a specific Model. This lets you apply different rate limits, guardrails, and transformations per model without duplicating Routes or Services. -In both deployment modes, you configure AI Gateway through first-class AI entities (for example, `ai_model`, `ai_provider`, and `ai_policy`). The control plane derives and manages the underlying runtime primitives (such as Services, Routes, and plugins) from those entities. +In both deployment modes, you configure AI Gateway through first-class AI entities (for example, `ai_model`, `ai_provider`, and `ai_policy`). The Control Plane derives and manages the underlying Data Plane primitives (such as Services, Routes, and plugins) from those entities. -For on-prem deployments, this `/ai/*` surface is a bridge architecture that aligns on-prem AI Gateway with the same domain model used in {{site.konnect_short_name}} while the next-generation runtime is introduced. +In this document, **Control Plane** refers to where AI entities are declared and managed, while **Data Plane** refers to where request-time routing and plugin matching execute. + +For on-prem deployments, the AI Gateway Admin API (`/ai/*`) is the Control Plane surface for managing first-class AI entities; it follows the same domain model as {{site.konnect_short_name}} and translates those entities into Data Plane primitives. ## Model and plugin interaction -Model participates in runtime plugin resolution alongside other scoping dimensions, such as [Consumer](/gateway/entities/consumer/), [Consumer Group](/gateway/entities/consumer-group/), [Route](/gateway/entities/route/), and [Service](/gateway/entities/service/). +Model participates in plugin resolution for runtime behavior on the Data Plane, alongside other scoping dimensions, such as [Consumer](/gateway/entities/consumer/), [Consumer Group](/gateway/entities/consumer-group/), [Route](/gateway/entities/route/), and [Service](/gateway/entities/service/). + +Control Plane management differs by deployment mode: + +* In {{site.konnect_short_name}}, you manage Model and related AI entities through {{site.konnect_short_name}} AI Gateway APIs. +* In on-prem AI Gateway, you manage the same entity concepts through `/ai/*` Admin API endpoints. A plugin configuration can reference a Model through its `model` field. When a plugin entry is scoped to a Model, that entry only applies to requests where AI Proxy or AI Proxy Advanced resolves the same model name from the request. Plugin entries without a `model` field apply regardless of which model the request targets. -At runtime, the request model is resolved by the AI routing flow (AI Proxy, AI Proxy Advanced, or a model shim flow, depending on deployment configuration). The plugin iterator then uses the resolved Model to select matching plugin configurations. +After Control Plane configuration is translated and applied to the Data Plane, behavior is shared across deployment modes: the request model is resolved by the AI routing flow (AI Proxy, AI Proxy Advanced, or a model shim flow, depending on deployment configuration), and the plugin iterator uses the resolved Model to select matching plugin configurations. {:.warning} > **Caveat for plugins with priority higher than AI Proxy** @@ -78,27 +87,23 @@ At runtime, the request model is resolved by the AI routing flow (AI Proxy, AI P ## Models in {{site.konnect_short_name}} and on-prem deployments -The Model entity exists in both {{site.konnect_short_name}} (control plane) and {{site.base_gateway}} (runtime). +The Model entity exists in both {{site.konnect_short_name}} (Control Plane) and {{site.base_gateway}} (Data Plane). -In {{site.konnect_short_name}}, you declare Model through the AI Gateway control-plane APIs. During config sync, the control plane translates the configuration into runtime-native data plane configuration. +In {{site.konnect_short_name}}, you declare Model through the AI Gateway Control Plane APIs. During config sync, the Control Plane translates the configuration into Data Plane configuration. -In on-prem AI Gateway, you declare Model through the `/ai/models` API surface (or compatible tooling such as decK). The on-prem control plane stores AI entities as first-class objects and manages derived runtime primitives for you. +In on-prem AI Gateway, you declare Model through the `/ai/models` API surface (or compatible tooling such as decK). The on-prem Control Plane stores AI entities as first-class objects and manages derived Data Plane primitives for you. -### Deployment mode differences +For request-time behavior and plugin matching details, see [Model and plugin interaction](#model-and-plugin-interaction). -* In {{site.konnect_short_name}}, Model is managed through {{site.konnect_short_name}} AI Gateway APIs. -* In on-prem AI Gateway, Model is managed through `/ai/*` Admin API endpoints (no workspace prefix). -* In both modes, Model is first-class and runtime primitives are derived from AI entities. - -### Policies are control-plane only +### Policies are Control Plane only AI Gateway exposes a Policy entity for declaring AI guardrails, rate limits, and similar controls against Models. -The Policy entity has no runtime counterpart. During config sync, each Policy is translated into one or more runtime plugin configurations that target the corresponding runtime Model. +The Policy entity has no Data Plane counterpart. During config sync, each Policy is translated into one or more plugin configurations that target the corresponding Data Plane Model. -### {{site.konnect_short_name}} and runtime field parity +### {{site.konnect_short_name}} and Data Plane field parity -The runtime Model entity is intentionally a smaller surface than control-plane Model APIs. Depending on deployment mode, some control-plane fields may not map 1:1 to runtime fields. +The Data Plane Model entity is intentionally a smaller surface than Control Plane Model APIs. Depending on deployment mode, some Control Plane fields may not map 1:1 to Data Plane fields. ## Plugin configuration precedence @@ -231,9 +236,6 @@ data: window_size: - 30 window_type: fixed -formats: - - deck - - admin-api {% endentity_example %} From a8d56ee296a2ef220e4f7325cdd80db0db8ac433 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Wed, 29 Apr 2026 12:21:50 +0200 Subject: [PATCH 12/53] Add landing pageand entity subpages --- app/_ai_gateway_entities/agent.md | 23 +++++ app/_ai_gateway_entities/consumer-group.md | 23 +++++ app/_ai_gateway_entities/consumer.md | 23 +++++ app/_ai_gateway_entities/mcp-server.md | 23 +++++ app/_ai_gateway_entities/policy.md | 24 +++++ app/_ai_gateway_entities/provider.md | 23 +++++ app/_landing_pages/ai-gateway/entities.yaml | 109 ++++++++++++++++++++ 7 files changed, 248 insertions(+) create mode 100644 app/_ai_gateway_entities/agent.md create mode 100644 app/_ai_gateway_entities/consumer-group.md create mode 100644 app/_ai_gateway_entities/consumer.md create mode 100644 app/_ai_gateway_entities/mcp-server.md create mode 100644 app/_ai_gateway_entities/policy.md create mode 100644 app/_ai_gateway_entities/provider.md create mode 100644 app/_landing_pages/ai-gateway/entities.yaml diff --git a/app/_ai_gateway_entities/agent.md b/app/_ai_gateway_entities/agent.md new file mode 100644 index 0000000000..07fa26bc4d --- /dev/null +++ b/app/_ai_gateway_entities/agent.md @@ -0,0 +1,23 @@ +--- +title: AI Agent +content_type: reference +entities: + - ai-agent +products: + - ai-gateway +description: Agent entity used by {{site.ai_gateway}} for A2A and HTTP agent configurations. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayAgent +works_on: + - konnect + - on-prem +tools: + - deck + - admin-api + - konnect-api +--- + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/consumer-group.md b/app/_ai_gateway_entities/consumer-group.md new file mode 100644 index 0000000000..0cdc629200 --- /dev/null +++ b/app/_ai_gateway_entities/consumer-group.md @@ -0,0 +1,23 @@ +--- +title: AI Consumer Group +content_type: reference +entities: + - ai-consumer-group +products: + - ai-gateway +description: AI consumer group entity wrapper for group-scoped AI Gateway behavior. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayConsumerGroup +works_on: + - konnect + - on-prem +tools: + - deck + - admin-api + - konnect-api +--- + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/consumer.md b/app/_ai_gateway_entities/consumer.md new file mode 100644 index 0000000000..7fa2b5958e --- /dev/null +++ b/app/_ai_gateway_entities/consumer.md @@ -0,0 +1,23 @@ +--- +title: AI Consumer +content_type: reference +entities: + - ai-consumer +products: + - ai-gateway +description: AI consumer entity wrapper for consumer-specific AI Gateway behavior. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayConsumer +works_on: + - konnect + - on-prem +tools: + - deck + - admin-api + - konnect-api +--- + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/mcp-server.md b/app/_ai_gateway_entities/mcp-server.md new file mode 100644 index 0000000000..ddaf5b86eb --- /dev/null +++ b/app/_ai_gateway_entities/mcp-server.md @@ -0,0 +1,23 @@ +--- +title: AI MCP Server +content_type: reference +entities: + - ai-mcp-server +products: + - ai-gateway +description: MCP server entity used by {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayMCPServer +works_on: + - konnect + - on-prem +tools: + - deck + - admin-api + - konnect-api +--- + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md new file mode 100644 index 0000000000..46023c70d0 --- /dev/null +++ b/app/_ai_gateway_entities/policy.md @@ -0,0 +1,24 @@ +--- +title: AI Policy +content_type: reference +entities: + - ai-policy +products: + - ai-gateway +description: Policy entity for AI Gateway plugin configuration and scoping. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayPolicy +works_on: + - konnect + - on-prem +tools: + - deck + - admin-api + - konnect-api +--- + + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/provider.md b/app/_ai_gateway_entities/provider.md new file mode 100644 index 0000000000..00562573ba --- /dev/null +++ b/app/_ai_gateway_entities/provider.md @@ -0,0 +1,23 @@ +--- +title: AI Provider +content_type: reference +entities: + - ai-provider +products: + - ai-gateway +description: AI provider credentials and configuration used by {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayProvider +works_on: + - konnect + - on-prem +tools: + - deck + - admin-api + - konnect-api +--- + +## Schema + +{% entity_schema %} diff --git a/app/_landing_pages/ai-gateway/entities.yaml b/app/_landing_pages/ai-gateway/entities.yaml new file mode 100644 index 0000000000..84114498a0 --- /dev/null +++ b/app/_landing_pages/ai-gateway/entities.yaml @@ -0,0 +1,109 @@ +metadata: + title: "{{site.ai_gateway}} entities" + content_type: landing_page + description: This page lists the entities that make up {{site.ai_gateway}}. + breadcrumbs: + - /ai-gateway/ + products: + - ai-gateway + works_on: + - on-prem + - konnect + +rows: + - header: + type: h1 + text: "{{site.ai_gateway}} entities" + sub_text: "Entities are the components and objects that make up {{site.ai_gateway}}." + + - header: + type: h2 + text: "Core entities" + column_count: 3 + columns: + - blocks: + - type: card + config: + title: AI Provider + description: Stores upstream provider credentials and connection configuration. Providers are reusable and are not model endpoints. + cta: + text: AI Provider entity + url: /ai-gateway/entities/provider/ + - blocks: + - type: card + config: + title: Model + description: Defines a model endpoint and capability configuration used for model selection and policy targeting. + cta: + text: Model entity + url: /ai-gateway/entities/model/ + - blocks: + - type: card + config: + title: AI Agent + description: An A2A or HTTP agent exposed through the A2A proxy flow. Independent of Model. + cta: + text: AI Agent entity + url: /ai-gateway/entities/agent/ + - blocks: + - type: card + config: + title: AI MCP Server + description: An MCP server in passthrough, listener, or conversion-listener mode. Mode is immutable after creation. + cta: + text: AI MCP Server entity + url: /ai-gateway/entities/mcp-server/ + - blocks: + - type: card + config: + title: AI Policy + description: An AI Gateway plugin instance scoped globally or to a specific AI entity. Policy instances are independent. + cta: + text: AI Policy entity + url: /ai-gateway/entities/policy/ + - blocks: + - type: card + config: + title: AI Consumer + description: A thin wrapper around the existing Consumer entity. + cta: + text: AI Consumer entity + url: /ai-gateway/entities/consumer/ + - blocks: + - type: card + config: + title: AI Consumer Group + description: A thin wrapper around the existing Consumer Group entity. + cta: + text: AI Consumer Group entity + url: /ai-gateway/entities/consumer-group/ + + - header: + type: h2 + text: "Security" + column_count: 3 + columns: + - blocks: + - type: card + config: + title: Vault + description: Store and reference secrets used by AI Gateway entities and plugins. + cta: + text: Vault entity + url: /ai-gateway/entities/vault/ + - blocks: + - type: card + config: + title: Key + description: Manage cryptographic key material used by AI Gateway security workflows. + cta: + text: Key entity + url: /ai-gateway/entities/key/ + - blocks: + - type: card + config: + title: Key Set + description: Group keys for rotation and key management in AI Gateway configurations. + cta: + text: Key Set entity + url: /ai-gateway/entities/key-set/ From 49d6b35a27de6d26fda77021ae8971dafe36be10 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Wed, 29 Apr 2026 13:10:52 +0200 Subject: [PATCH 13/53] appease vale --- app/_ai_gateway_entities/consumer-group.md | 2 +- app/_ai_gateway_entities/consumer.md | 2 +- app/_ai_gateway_entities/model.md | 16 ++++++++-------- app/_ai_gateway_entities/policy.md | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/app/_ai_gateway_entities/consumer-group.md b/app/_ai_gateway_entities/consumer-group.md index 0cdc629200..b8cefaf857 100644 --- a/app/_ai_gateway_entities/consumer-group.md +++ b/app/_ai_gateway_entities/consumer-group.md @@ -5,7 +5,7 @@ entities: - ai-consumer-group products: - ai-gateway -description: AI consumer group entity wrapper for group-scoped AI Gateway behavior. +description: AI consumer group entity wrapper for group-scoped {{site.ai_gateway}} behavior. schema: api: konnect/ai-gateway path: /schemas/AIGatewayConsumerGroup diff --git a/app/_ai_gateway_entities/consumer.md b/app/_ai_gateway_entities/consumer.md index 7fa2b5958e..47b36c40ba 100644 --- a/app/_ai_gateway_entities/consumer.md +++ b/app/_ai_gateway_entities/consumer.md @@ -5,7 +5,7 @@ entities: - ai-consumer products: - ai-gateway -description: AI consumer entity wrapper for consumer-specific AI Gateway behavior. +description: AI consumer entity wrapper for consumer-specific {{site.ai_gateway}} behavior. schema: api: konnect/ai-gateway path: /schemas/AIGatewayConsumer diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 6200686e37..5fd8ca3838 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -57,15 +57,15 @@ faqs: ## What is a Model? -A Model is a first-class AI Gateway entity that defines a named AI model (for example, `openai/gpt-4o`) for model selection and policy targeting. +A Model is a first-class {{site.ai_gateway}} entity that defines a named AI model (for example, `openai/gpt-4o`) for model selection and policy targeting. You can target policies and supported plugin behavior to a specific Model. This lets you apply different rate limits, guardrails, and transformations per model without duplicating Routes or Services. -In both deployment modes, you configure AI Gateway through first-class AI entities (for example, `ai_model`, `ai_provider`, and `ai_policy`). The Control Plane derives and manages the underlying Data Plane primitives (such as Services, Routes, and plugins) from those entities. +In both deployment modes, you configure {{site.ai_gateway}} through first-class AI entities (for example, `ai_model`, `ai_provider`, and `ai_policy`). The Control Plane derives and manages the underlying Data Plane primitives (such as Services, Routes, and plugins) from those entities. In this document, **Control Plane** refers to where AI entities are declared and managed, while **Data Plane** refers to where request-time routing and plugin matching execute. -For on-prem deployments, the AI Gateway Admin API (`/ai/*`) is the Control Plane surface for managing first-class AI entities; it follows the same domain model as {{site.konnect_short_name}} and translates those entities into Data Plane primitives. +For on-prem deployments, the {{site.ai_gateway}} Admin API (`/ai/*`) is the Control Plane surface for managing first-class AI entities; it follows the same domain model as {{site.konnect_short_name}} and translates those entities into Data Plane primitives. ## Model and plugin interaction @@ -73,8 +73,8 @@ Model participates in plugin resolution for runtime behavior on the Data Plane, Control Plane management differs by deployment mode: -* In {{site.konnect_short_name}}, you manage Model and related AI entities through {{site.konnect_short_name}} AI Gateway APIs. -* In on-prem AI Gateway, you manage the same entity concepts through `/ai/*` Admin API endpoints. +* In {{site.konnect_short_name}}, you manage Model and related AI entities through {{site.konnect_short_name}} {{site.ai_gateway}} APIs. +* In on-prem {{site.ai_gateway}}, you manage the same entity concepts through `/ai/*` Admin API endpoints. A plugin configuration can reference a Model through its `model` field. When a plugin entry is scoped to a Model, that entry only applies to requests where AI Proxy or AI Proxy Advanced resolves the same model name from the request. Plugin entries without a `model` field apply regardless of which model the request targets. @@ -89,15 +89,15 @@ After Control Plane configuration is translated and applied to the Data Plane, b The Model entity exists in both {{site.konnect_short_name}} (Control Plane) and {{site.base_gateway}} (Data Plane). -In {{site.konnect_short_name}}, you declare Model through the AI Gateway Control Plane APIs. During config sync, the Control Plane translates the configuration into Data Plane configuration. +In {{site.konnect_short_name}}, you declare Model through the {{site.ai_gateway}} Control Plane APIs. During config sync, the Control Plane translates the configuration into Data Plane configuration. -In on-prem AI Gateway, you declare Model through the `/ai/models` API surface (or compatible tooling such as decK). The on-prem Control Plane stores AI entities as first-class objects and manages derived Data Plane primitives for you. +In on-prem {{site.ai_gateway}}, you declare Model through the `/ai/models` API surface (or compatible tooling such as decK). The on-prem Control Plane stores AI entities as first-class objects and manages derived Data Plane primitives for you. For request-time behavior and plugin matching details, see [Model and plugin interaction](#model-and-plugin-interaction). ### Policies are Control Plane only -AI Gateway exposes a Policy entity for declaring AI guardrails, rate limits, and similar controls against Models. +{{site.ai_gateway}} exposes a Policy entity for declaring AI guardrails, rate limits, and similar controls against Models. The Policy entity has no Data Plane counterpart. During config sync, each Policy is translated into one or more plugin configurations that target the corresponding Data Plane Model. diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index 46023c70d0..a859351ca1 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -5,7 +5,7 @@ entities: - ai-policy products: - ai-gateway -description: Policy entity for AI Gateway plugin configuration and scoping. +description: Policy entity for {{site.ai_gateway}} plugin configuration and scoping. schema: api: konnect/ai-gateway path: /schemas/AIGatewayPolicy From d560c2f74bc97a1a9020a596fa1464d83976bc53 Mon Sep 17 00:00:00 2001 From: Fabian Rodriguez Date: Wed, 29 Apr 2026 15:11:51 +0200 Subject: [PATCH 14/53] wip: add dummy oas spec for ai gateway --- api-specs/konnect/ai-gateway/v2/openapi.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 api-specs/konnect/ai-gateway/v2/openapi.yaml diff --git a/api-specs/konnect/ai-gateway/v2/openapi.yaml b/api-specs/konnect/ai-gateway/v2/openapi.yaml new file mode 100644 index 0000000000..170981dc02 --- /dev/null +++ b/api-specs/konnect/ai-gateway/v2/openapi.yaml @@ -0,0 +1,19 @@ +openapi: 3.0.0 +info: + title: Konnect AI Gateway + version: 0.0.0 + description: Internal API for managing Kong AI Gateway policies. + contact: + name: Kong + url: 'https://cloud.konghq.com' +servers: + - url: 'https://us.api.konghq.com/v1' + description: US Region Base URL + - url: 'https://eu.api.konghq.com/v1' + description: EU Region Base URL + - url: 'https://au.api.konghq.com/v1' + description: AU Region Base URL + - url: 'https://me.api.konghq.com/v1' + description: Middle-East Production region + - url: 'https://in.api.konghq.com/v1' + description: India Production region From e655ff53666cf33766f80c95d9817b7016f4d4dd Mon Sep 17 00:00:00 2001 From: Fabian Rodriguez Date: Wed, 29 Apr 2026 15:12:18 +0200 Subject: [PATCH 15/53] fix: product name, it's event-gateway --- app/_plugins/drops/entity_example/presenters/konnect-api.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/_plugins/drops/entity_example/presenters/konnect-api.rb b/app/_plugins/drops/entity_example/presenters/konnect-api.rb index f7d1e08f74..0d152376fb 100644 --- a/app/_plugins/drops/entity_example/presenters/konnect-api.rb +++ b/app/_plugins/drops/entity_example/presenters/konnect-api.rb @@ -47,7 +47,7 @@ def default_variables case @example_drop.product when 'gateway' formats['konnect-api']['variables'] - when 'event_gateway' + when 'event-gateway' formats['konnect-api']['event_gateway_variables'] when 'ai-gateway' formats['konnect-api']['ai_gateway_variables'] @@ -67,7 +67,7 @@ def base_url @base_url ||= case @example_drop.product when 'gateway' formats['konnect-api']['base_url'] - when 'event_gateway' + when 'event-gateway' formats['konnect-api']['event_gateway_base_url'] when 'ai-gateway' formats['konnect-api']['ai_gateway_base_url'] From 2d276541393409c52bc924c65a2545b12743dc3b Mon Sep 17 00:00:00 2001 From: Fabian Rodriguez Date: Wed, 29 Apr 2026 15:12:51 +0200 Subject: [PATCH 16/53] fix: use schema.api for fetching the api-spec file for rendering entity_schemas --- app/_plugins/drops/entity_schema.rb | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/app/_plugins/drops/entity_schema.rb b/app/_plugins/drops/entity_schema.rb index fd37919bc2..62b1585efd 100644 --- a/app/_plugins/drops/entity_schema.rb +++ b/app/_plugins/drops/entity_schema.rb @@ -57,20 +57,12 @@ def api_file @api_file ||= [ File.expand_path('../', @site.source), 'api-specs', - *product_path, + @schema.fetch('api'), release_path, 'openapi.yaml' ].join('/') end - def product_path - if @release.ee_version - %w[gateway admin-ee] - else - %w[konnect event-gateway] - end - end - def release_path if @release.ee_version @release.number From 4167c07d5b8e77db978be776a40bee03632550a6 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 30 Apr 2026 08:57:07 +0200 Subject: [PATCH 17/53] refactor models doc --- app/_ai_gateway_entities/model.md | 307 +++++++++++++----------------- 1 file changed, 132 insertions(+), 175 deletions(-) diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 5fd8ca3838..de796bcd9e 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -19,235 +19,192 @@ tools: related_resources: - text: About {{site.ai_gateway}} url: /ai-gateway/ - - text: Control Plane and Data Plane networking in {{site.konnect_short_name}} - url: /konnect-platform/network/ - - text: AI Proxy plugin - url: /plugins/ai-proxy/ + - text: AI Gateway providers + url: /ai-gateway/ai-providers/ + - text: Load balancing with AI Proxy Advanced + url: /ai-gateway/load-balancing/ + - text: Provider entity + url: /ai-gateway/entities/provider/ + - text: Policy entity + url: /ai-gateway/entities/policy/ - text: AI Proxy Advanced plugin url: /plugins/ai-proxy-advanced/ - - text: Plugin entity - url: /gateway/entities/plugin/ - - text: Consumer entity - url: /gateway/entities/consumer/ - text: Consumer Group entity url: /gateway/entities/consumer-group/ faqs: - - q: What happens if a request's model doesn't match any Model entity? - a: Plugins scoped to a Model won't run. Plugins without a Model scope run normally. + - q: What's the difference between a Model entity and the `model` field inside an AI Proxy Advanced plugin config? + a: | + A Model entity is the first-class {{site.ai_gateway}} entity you declare through the `/ai/models` API or {{site.konnect_short_name}}. + {{site.ai_gateway}} derives the AI Proxy Advanced plugin (and its `model` configuration) from the entity. + You don't configure the underlying plugin directly. - - q: Can the same plugin be configured for multiple Models? - a: Yes. Create one plugin entry per Model scope. Each entry is resolved independently by the plugin iterator. + - q: Can I edit the Service, Routes, or plugins that {{site.ai_gateway}} generates from a Model? + a: | + No. Generated primitives are protected from direct modification through the standard Admin API. + Update the Model entity instead, and {{site.ai_gateway}} recreates the underlying primitives within a single transaction. - - q: Can I scope a plugin to both a Model and a Consumer, Route, or Service? - a: Yes. Combined scopes are supported and follow the precedence chain described in [Plugin configuration precedence](#plugin-configuration-precedence). + - q: What happens when I update a Model? + a: | + {{site.ai_gateway}} deletes the Model's derived primitives and recreates them from the updated entity state, all within a single database transaction. + On failure, the transaction rolls back and no partial state is written. - - q: Does the Model entity configure the provider, credentials, or endpoint? + - q: What happens when I delete a Model? a: | - Model identifies the model and defines model-level behavior. Provider credentials are managed separately (for example, through provider configuration) and are not stored on the Model itself. + The Model and all its derived primitives (Service, Routes, plugin instances) are deleted within a single transaction. - - q: Does the Data Plane Model entity have the same fields as the {{site.konnect_short_name}} Model entity? + - q: Can I apply the same configuration to multiple Models? a: | - Not necessarily. The Data Plane Model entity is a smaller surface than the {{site.konnect_short_name}} entity, and field parity isn't guaranteed across releases. - See [Models in {{site.konnect_short_name}} and on-prem deployments](#models-in-konnect-and-on-prem-deployments). + Yes, by attaching one Policy with that configuration to each Model. + Policies are not shared between entities, each instance is independent. + See [Policy entity](/ai-gateway/entities/policy/). - - q: Where do {{site.konnect_short_name}} Policies fit in? + - q: How do I limit which consumers can reach a Model? a: | - Policies are Control Plane entities. They are translated into plugin configurations used for runtime behavior on the Data Plane. There is no separate Data Plane Policy entity. ---- + Set the `acls` field on the Model with allow or deny lists referencing Consumer Groups. + Consumer-level access is not configured on the Model directly. -## What is a Model? + - q: Does the Model entity store provider credentials? + a: | + No. Provider credentials live on the [Provider entity](/ai-gateway/entities/provider/) and are materialized into the generated AI Proxy Advanced plugin configuration at Model creation time. + Updating a Provider propagates the credential change to all Models that reference it. -A Model is a first-class {{site.ai_gateway}} entity that defines a named AI model (for example, `openai/gpt-4o`) for model selection and policy targeting. + - q: Are on-prem and {{site.konnect_short_name}} Model entities the same? + a: | + The schemas are intentionally aligned at the field level. The same Model definition works in both modes. + On-prem omits a few {{site.konnect_short_name}}-specific path segments and concepts that don't apply in a single-deployment context, such as the `ai-gateways/{id}` container and Data Plane certificate or node management. The Model entity itself is identical. +--- -You can target policies and supported plugin behavior to a specific Model. This lets you apply different rate limits, guardrails, and transformations per model without duplicating Routes or Services. +## What is a Model? -In both deployment modes, you configure {{site.ai_gateway}} through first-class AI entities (for example, `ai_model`, `ai_provider`, and `ai_policy`). The Control Plane derives and manages the underlying Data Plane primitives (such as Services, Routes, and plugins) from those entities. +A Model is a first-class {{site.ai_gateway}} entity that represents an AI model endpoint exposed through {{site.ai_gateway}}. -In this document, **Control Plane** refers to where AI entities are declared and managed, while **Data Plane** refers to where request-time routing and plugin matching execute. +A Model declares which capabilities it exposes (such as `chat`, `responses`, or `embeddings`), which upstream provider models it routes to, and how requests are load-balanced and logged. {{site.ai_gateway}} translates a Model into the underlying primitives that the runtime uses to serve traffic, so you don't assemble Services, Routes, or plugin entries by hand. -For on-prem deployments, the {{site.ai_gateway}} Admin API (`/ai/*`) is the Control Plane surface for managing first-class AI entities; it follows the same domain model as {{site.konnect_short_name}} and translates those entities into Data Plane primitives. +Models are managed through the {{site.ai_gateway}} entity surface in both deployment modes: -## Model and plugin interaction +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/models + - deployment: On-prem + cp: Admin API + endpoint: /ai/models +{% endtable %} -Model participates in plugin resolution for runtime behavior on the Data Plane, alongside other scoping dimensions, such as [Consumer](/gateway/entities/consumer/), [Consumer Group](/gateway/entities/consumer-group/), [Route](/gateway/entities/route/), and [Service](/gateway/entities/service/). +## How a Model maps to runtime configuration -Control Plane management differs by deployment mode: +When you create or update a Model, {{site.ai_gateway}} generates a fixed set of primitives: -* In {{site.konnect_short_name}}, you manage Model and related AI entities through {{site.konnect_short_name}} {{site.ai_gateway}} APIs. -* In on-prem {{site.ai_gateway}}, you manage the same entity concepts through `/ai/*` Admin API endpoints. +* One [Gateway Service](/gateway/entities/service/). +* One [Route](/gateway/entities/route/) per declared capability in the `capabilities` array. +* One [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin per generated Route. -A plugin configuration can reference a Model through its `model` field. When a plugin entry is scoped to a Model, that entry only applies to requests where AI Proxy or AI Proxy Advanced resolves the same model name from the request. Plugin entries without a `model` field apply regardless of which model the request targets. +Provider credentials are materialized into the AI Proxy Advanced plugin configuration at generation time, sourced from the Provider entity that the Model's `target_models` reference. Updating the Provider propagates credential changes to every Model that uses it. -After Control Plane configuration is translated and applied to the Data Plane, behavior is shared across deployment modes: the request model is resolved by the AI routing flow (AI Proxy, AI Proxy Advanced, or a model shim flow, depending on deployment configuration), and the plugin iterator uses the resolved Model to select matching plugin configurations. +Generated primitives are protected. Direct PUT, PATCH, or DELETE calls against the underlying Service, Routes, or plugin entries through the standard Admin API are rejected. To change anything about a Model's runtime footprint, update the Model entity. {{site.ai_gateway}} deletes and recreates the derived primitives within a single transaction. -{:.warning} -> **Caveat for plugins with priority higher than AI Proxy** +{:.note} +> **Why a transaction instead of an in-place update?** > -> The AI Proxy and AI Proxy Advanced plugins run at priority `770`. Any plugin with a higher priority runs *before* the model is resolved. For those earlier plugins, the Model context is not yet available, and Model-scoped configurations won't activate on that request. See [Limitations](#limitations). - -## Models in {{site.konnect_short_name}} and on-prem deployments - -The Model entity exists in both {{site.konnect_short_name}} (Control Plane) and {{site.base_gateway}} (Data Plane). - -In {{site.konnect_short_name}}, you declare Model through the {{site.ai_gateway}} Control Plane APIs. During config sync, the Control Plane translates the configuration into Data Plane configuration. +> A Model's structure (which capabilities exist, which providers it routes to) determines how many Routes and plugin entries are needed. A delete-and-recreate cycle is the simplest way to keep the entity and its derived primitives consistent, especially when capabilities are added or removed. -In on-prem {{site.ai_gateway}}, you declare Model through the `/ai/models` API surface (or compatible tooling such as decK). The on-prem Control Plane stores AI entities as first-class objects and manages derived Data Plane primitives for you. +## Capabilities -For request-time behavior and plugin matching details, see [Model and plugin interaction](#model-and-plugin-interaction). +The `capabilities` field tells {{site.ai_gateway}} which AI workflows the Model exposes. Each capability becomes one Route on the generated Service. A Model must declare at least one capability. -### Policies are Control Plane only +Supported values for a `model` type are: -{{site.ai_gateway}} exposes a Policy entity for declaring AI guardrails, rate limits, and similar controls against Models. +* `chat` +* `responses` +* `embeddings` +* `image-generation` +* `image-edits` +* `audio-transcriptions` +* `audio-translations` +* `realtime` -The Policy entity has no Data Plane counterpart. During config sync, each Policy is translated into one or more plugin configurations that target the corresponding Data Plane Model. +For an `api` type Model (used for batch and file APIs), the supported values are `batches` and `files`. -### {{site.konnect_short_name}} and Data Plane field parity +Not every provider supports every capability. The set of capabilities you can declare on a Model depends on what the provider in `target_models` exposes. See [AI Gateway providers](/ai-gateway/ai-providers/) for per-provider details.## Target models and load balancing -The Data Plane Model entity is intentionally a smaller surface than Control Plane Model APIs. Depending on deployment mode, some Control Plane fields may not map 1:1 to Data Plane fields. +A Model's `target_models` field lists one or more upstream provider model instances. Each entry references a Provider (by `id` or `ref`), names the upstream model (for example, `gpt-4o`), and can override per-target settings such as `temperature`, `max_tokens`, `input_cost`, and `output_cost`. -## Plugin configuration precedence +When a Model has more than one target, requests are load-balanced according to `config.balancer`. For the supported algorithms, configuration options, and tuning guidance, see [Load balancing with AI Proxy Advanced](/ai-gateway/load-balancing/). -When multiple plugin configurations could match a request, {{site.base_gateway}} picks the most specific one. Model is treated as an additional specificity axis: within any Consumer / Route / Service tier, the variant with `+ Model` outranks the variant without. +## Access control -The full precedence chain is: +A Model's `acls` field controls which Consumer Groups are allowed to reach the Model. The field accepts `allow` and `deny` lists, each containing references to Consumer Groups by `id` or `ref`. Access is enforced at the Service level of the generated primitives. -{% table %} -columns: - - title: Rank - key: rank - - title: Scope combination - key: scope -rows: - - rank: 1 - scope: Consumer + Route + Service + Model - - rank: 2 - scope: Consumer + Route + Service - - rank: 3 - scope: Consumer Group + Route + Service + Model - - rank: 4 - scope: Consumer Group + Route + Service - - rank: 5 - scope: Consumer + Route + Model - - rank: 6 - scope: Consumer + Route - - rank: 7 - scope: Consumer + Service + Model - - rank: 8 - scope: Consumer + Service - - rank: 9 - scope: Consumer Group + Route + Model - - rank: 10 - scope: Consumer Group + Route - - rank: 11 - scope: Consumer Group + Service + Model - - rank: 12 - scope: Consumer Group + Service - - rank: 13 - scope: Route + Service + Model - - rank: 14 - scope: Route + Service - - rank: 15 - scope: Consumer + Model - - rank: 16 - scope: Consumer - - rank: 17 - scope: Consumer Group + Model - - rank: 18 - scope: Consumer Group - - rank: 19 - scope: Route + Model - - rank: 20 - scope: Route - - rank: 21 - scope: Service + Model - - rank: 22 - scope: Service - - rank: 23 - scope: Model - - rank: 24 - scope: Global -{% endtable %} +For per-request authentication and identity, configure the appropriate authentication plugin globally or as a Policy on the Model. -## Limitations - -Not every plugin can be scoped to a Model. Some plugins run before model context is available, and some are structurally incompatible with Model scoping. - -### Plugins that cannot be scoped to a Model - -The following plugins do not accept a Model scope: - -* Authentication plugins, because they must run before any AI-specific processing to establish the consumer identity that Model-scoped configs depend on: - * [Basic Authentication](/plugins/basic-auth/) - * [HMAC Authentication](/plugins/hmac-auth/) - * [JWE Decrypt](/plugins/jwe-decrypt/) - * [JWT](/plugins/jwt/) - * [JWT Signer](/plugins/jwt-signer/) - * [Key Authentication](/plugins/key-auth/) - * [Key Authentication Encrypted](/plugins/key-auth-enc/) - * [LDAP Authentication](/plugins/ldap-auth/) - * [LDAP Authentication Advanced](/plugins/ldap-auth-advanced/) - * [OAuth 2.0](/plugins/oauth2/) - * [OAuth 2.0 Introspection](/plugins/oauth2-introspection/) - * [OpenID Connect](/plugins/openid-connect/) - * [Session](/plugins/session/) - * [Mutual TLS Authentication](/plugins/mtls-auth/) - * [Header Certificate Authentication](/plugins/header-cert-auth/) - * [SAML](/plugins/saml/) - * [Vault Authentication](/plugins/vault-auth/) -* AI routing and agent plugins, because these plugins resolve the model (AI Proxy, AI Proxy Advanced) or operate on protocols where Model scoping is not meaningful (A2A, MCP): - * [AI Proxy](/plugins/ai-proxy/) - * [AI Proxy Advanced](/plugins/ai-proxy-advanced/) - * [AI A2A Proxy](/plugins/ai-a2a-proxy/) - * [AI MCP Proxy](/plugins/ai-mcp-proxy/) - -### Plugins that run before model resolution - -Any plugin with a priority higher than `770` (the priority of AI Proxy and AI Proxy Advanced) runs before the model is known. For those plugins, Model-scoped configs are not applied unless one of the following is true: - -* [Dynamic plugin ordering](/gateway/plugin-development/entities/plugin/) is enabled to push the plugin's execution after AI Proxy. -* The AI Model Shim plugin is deployed on the route or service to resolve the model during the access phase before other plugins run. +## Attach Policies -## Set up a Model - -The following example shows a Model named `openai/gpt-4o`. +Policies are the way you apply plugin configurations to a Model. A Policy attached to a Model runs at the Service level of the Model's generated primitives, so it applies to every request routed through any of the Model's capabilities. -{% entity_example %} -type: model -data: - model: openai-something -{% endentity_example %} +You can attach multiple Policies to a single Model. Each Policy is an independent plugin instance, so attaching the same plugin type twice with different configurations creates two separate plugin entries. -## Scope a plugin to a Model +Not every plugin type is valid as a Model Policy. For the supported set, see the [Policy entity](/ai-gateway/entities/policy/) reference. -Once a Model exists, you can scope a Model-aware plugin configuration by setting the `model` field on the plugin. +Policies attached to a Model are deleted when the Model is deleted. -The following example assumes two Models (`openai/gpt-4o` and `openai/gpt-4o-mini`) already exist and applies a quota to one of them. +### Plugin priority and Policy execution order -{% entity_example %} -type: plugin -data: - name: ai-rate-limiting-advanced - model: openai/gpt-4o - config: - llm_providers: - - name: openai - limit: - - 3 - window_size: - - 30 - window_type: fixed -{% endentity_example %} +A Policy attached to a Model creates one plugin entry on the Service of the Model's derived primitives. That plugin runs at the [priority](/gateway/entities/plugin/#plugin-priority) of its underlying plugin type, which determines when it executes relative to other plugins on the request. +The AI Proxy Advanced plugin runs at priority `770` and is the plugin that parses the request body and resolves the model name. Any Policy whose underlying plugin type has a priority higher than `770` runs before that resolution. Authentication plugin types (such as OpenID Connect) fall into this category. They run before AI Proxy Advanced parses the request, but after the request has been routed to the Model's generated Service, so they still gate access correctly. The Model identity in the AI sense (which provider, which target model) is just not available to them. +For Policies whose runtime behavior depends on the resolved Model identity, attach plugin types that run at priority `770` or lower, or use [dynamic plugin ordering](/gateway/plugin-development/entities/plugin/) to push their execution later. ## Set up a Model +The following example creates an OpenAI Model that exposes both `chat` and `responses` capabilities, routed through a single OpenAI Provider, with token usage logging enabled. + {% entity_example %} type: model data: - model: openai-something + display_name: GPT-4o Production + ref: gpt-4o-production + type: model + enabled: true + capabilities: + - chat + - responses + formats: + - openai + acls: + allow: + - ref: internal-teams + target_models: + - name: gpt-4o + provider: + ref: my-openai-account + config: + temperature: 0.7 + max_tokens: 4096 + input_cost: 0.0000025 + output_cost: 0.000010 + config: + logging: + log_statistics: true + log_payloads: false + response_streaming: allow + max_request_body_size: 1048576 + balancer: + algorithm: round-robin + retries: 3 + connect_timeout: 60000 + read_timeout: 60000 + write_timeout: 60000 {% endentity_example %} ## Schema -{% entity_schema %} +{% entity_schema %} \ No newline at end of file From 82ef9ba6d28329b5e712340a95109d3b2c3874ca Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 30 Apr 2026 09:07:11 +0200 Subject: [PATCH 18/53] appease vale --- app/_ai_gateway_entities/model.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index de796bcd9e..57cd5d63f6 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -111,7 +111,7 @@ Provider credentials are materialized into the AI Proxy Advanced plugin configur Generated primitives are protected. Direct PUT, PATCH, or DELETE calls against the underlying Service, Routes, or plugin entries through the standard Admin API are rejected. To change anything about a Model's runtime footprint, update the Model entity. {{site.ai_gateway}} deletes and recreates the derived primitives within a single transaction. -{:.note} +{:.info} > **Why a transaction instead of an in-place update?** > > A Model's structure (which capabilities exist, which providers it routes to) determines how many Routes and plugin entries are needed. A delete-and-recreate cycle is the simplest way to keep the entity and its derived primitives consistent, especially when capabilities are added or removed. @@ -133,7 +133,9 @@ Supported values for a `model` type are: For an `api` type Model (used for batch and file APIs), the supported values are `batches` and `files`. -Not every provider supports every capability. The set of capabilities you can declare on a Model depends on what the provider in `target_models` exposes. See [AI Gateway providers](/ai-gateway/ai-providers/) for per-provider details.## Target models and load balancing +Not every provider supports every capability. The set of capabilities you can declare on a Model depends on what the provider in `target_models` exposes. See [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for per-provider details. + +## Target models and load balancing A Model's `target_models` field lists one or more upstream provider model instances. Each entry references a Provider (by `id` or `ref`), names the upstream model (for example, `gpt-4o`), and can override per-target settings such as `temperature`, `max_tokens`, `input_cost`, and `output_cost`. @@ -207,4 +209,4 @@ data: ## Schema -{% entity_schema %} \ No newline at end of file +{% entity_schema %} From beadc84043c0c4b76f2b31189bbee8e3ff66c879 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 30 Apr 2026 09:09:48 +0200 Subject: [PATCH 19/53] appease --- app/_ai_gateway_entities/model.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 57cd5d63f6..fc97ffd587 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -19,7 +19,7 @@ tools: related_resources: - text: About {{site.ai_gateway}} url: /ai-gateway/ - - text: AI Gateway providers + - text: "{{site.ai_gateway}} providers" url: /ai-gateway/ai-providers/ - text: Load balancing with AI Proxy Advanced url: /ai-gateway/load-balancing/ From 5474656d37f252ee464bcf7e2e425890e68d6baa Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 30 Apr 2026 09:34:21 +0200 Subject: [PATCH 20/53] fix broken links --- app/_ai_gateway_entities/model.md | 2 +- app/_ai_gateway_entities/policy.md | 131 +++++++++++++++++++- app/_landing_pages/ai-gateway/entities.yaml | 6 +- 3 files changed, 131 insertions(+), 8 deletions(-) diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index fc97ffd587..bbc34c121c 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -163,7 +163,7 @@ A Policy attached to a Model creates one plugin entry on the Service of the Mode The AI Proxy Advanced plugin runs at priority `770` and is the plugin that parses the request body and resolves the model name. Any Policy whose underlying plugin type has a priority higher than `770` runs before that resolution. Authentication plugin types (such as OpenID Connect) fall into this category. They run before AI Proxy Advanced parses the request, but after the request has been routed to the Model's generated Service, so they still gate access correctly. The Model identity in the AI sense (which provider, which target model) is just not available to them. -For Policies whose runtime behavior depends on the resolved Model identity, attach plugin types that run at priority `770` or lower, or use [dynamic plugin ordering](/gateway/plugin-development/entities/plugin/) to push their execution later. +For Policies whose runtime behavior depends on the resolved Model identity, attach plugin types that run at priority `770` or lower, or use [dynamic plugin ordering](/gateway/entities/plugin/) to push their execution later. ## Set up a Model diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index a859351ca1..59f3548176 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -1,11 +1,11 @@ --- -title: AI Policy +title: Policy content_type: reference entities: - - ai-policy + - policy products: - ai-gateway -description: Policy entity for {{site.ai_gateway}} plugin configuration and scoping. +description: AI Gateway policies registered with the {{site.ai_gateway}}. schema: api: konnect/ai-gateway path: /schemas/AIGatewayPolicy @@ -16,9 +16,132 @@ tools: - deck - admin-api - konnect-api +related_resources: + - text: About {{site.ai_gateway}} + url: /ai-gateway/ + - text: Model entity + url: /ai-gateway/entities/model/ + - text: Agent entity + url: /ai-gateway/entities/agent/ + - text: MCP Server entity + url: /ai-gateway/entities/mcp-server/ + - text: Plugin entity + url: /gateway/entities/plugin/ +faqs: + - q: Are Policies shared across multiple entities? + a: | + No. Each Policy is an independent instance. To apply the same plugin + configuration to two Models, create two Policies with matching `config`, + one per Model. + + - q: How is a Policy different from a plugin? + a: | + A Policy is a plugin instance configured through the AI Gateway entity + surface instead of the classic `/plugins` endpoint. The runtime effect + is the same: a plugin attached at the appropriate scope. The differences + are how you create it (`/ai/policies` or under a parent entity), how it's + tagged in the workspace, and that the AI Gateway control plane manages + its lifecycle alongside the entity it's attached to. + + - q: Can a Policy be scoped to a Consumer or Consumer Group? + a: | + Not directly. A Policy attaches to the {{site.ai_gateway}} globally or + to a Model, Agent, or MCP Server. Per-consumer access is expressed + through the `acls` field on those parent entities, which gates which + Consumer Groups can reach the entity in the first place. + + - q: What plugin types can a Policy use? + a: | + Any AI Gateway-compatible plugin. Common values include `ai-sanitizer`, + `ai-prompt-guard`, `ai-prompt-decorator`, `ai-rate-limiting-advanced`, + and `openid-connect`. Set the plugin name in the Policy's `type` field + and provide the plugin's configuration in the `config` field. + + - q: What happens to a Policy when its parent entity is deleted? + a: | + Policies attached to a Model, Agent, or MCP Server are removed when the + parent entity is deleted, along with the rest of that entity's derived + primitives. Global policies are independent and aren't affected by + deletions of other entities. --- +## What is a Policy? + +A Policy is a plugin instance registered through the {{site.ai_gateway}} entity surface. + +Each Policy declares a `type` (the plugin name, for example `ai-sanitizer` or `ai-rate-limiting-advanced`) and a `config` block whose contents follow that plugin's own schema. The {{site.ai_gateway}} control plane attaches the configured plugin at the scope you select: globally, or to a specific Model, Agent, or MCP Server. + +Policies are not shared. Each Policy is one plugin instance. To apply the same configuration to two parent entities, create two Policies. + +## Policy scopes + +A Policy is scoped at the time you create it, by the endpoint you POST it to: + +* **Global**: `POST /ai/policies` attaches the underlying plugin at the global scope of the `_ai_gateway` workspace. The plugin runs for every {{site.ai_gateway}} request that reaches the runtime. +* **Model**: `POST /ai/models/{modelId}/policies` attaches the underlying plugin at the Service of the Model's derived primitives. The plugin runs for requests routed through that Model. +* **Agent**: `POST /ai/agents/{agentId}/policies` attaches the plugin at the Service of the Agent's derived primitives. +* **MCP Server**: `POST /ai/mcp-servers/{mcpServerId}/policies` attaches the plugin at the Service of the MCP Server's derived primitives. + +Scope is fixed at creation time. Moving a Policy from one scope to another means deleting it and creating a new one under the target endpoint. + +## Policy and plugin relationship + +Creating a Policy creates exactly one plugin entry in the underlying runtime. Updating a Policy updates that plugin entry. Deleting a Policy deletes that plugin entry. + +The `config` field is passed through to the plugin without translation. Refer to the documentation for the specific plugin to see the available fields, defaults, and validation rules. + +{:.note} +> **Plugin config schemas live with the plugin docs** +> +> {{site.ai_gateway}} does not redeclare plugin configuration schemas under the Policy entity. For each plugin you intend to use as a Policy `type`, look up that plugin's reference page for its `config` shape. + +## Set up a global Policy + +The following example creates a global PII sanitizer Policy that runs for every AI Gateway request. + +{% entity_example %} +type: policy +data: + display_name: PII Sanitizer - Global + ref: pii-sanitizer-global + type: ai-sanitizer + enabled: true + config: + anonymize: + - phone + - creditcard + stop_on_error: true +formats: + - admin-api + - konnect-api +{% endentity_example %} + +## Set up a Model-scoped Policy + +The following example attaches a rate limiting Policy to a Model. It assumes a Model with `id` `bf138ba2-c9b1-4229-b268-04d9d8a6410b` already exists. + +{% entity_example %} +type: policy +data: + display_name: Rate Limit - Production GPT-4o + ref: rate-limit-prod-gpt4o + type: ai-rate-limiting-advanced + enabled: true + config: + llm_providers: + - name: openai + limit: + - 30 + window_size: + - 60 + window_type: sliding +formats: + - admin-api + - konnect-api +{% endentity_example %} + + ## Schema -{% entity_schema %} +{% entity_schema %} \ No newline at end of file diff --git a/app/_landing_pages/ai-gateway/entities.yaml b/app/_landing_pages/ai-gateway/entities.yaml index 84114498a0..ca13364a1d 100644 --- a/app/_landing_pages/ai-gateway/entities.yaml +++ b/app/_landing_pages/ai-gateway/entities.yaml @@ -90,7 +90,7 @@ rows: description: Store and reference secrets used by AI Gateway entities and plugins. cta: text: Vault entity - url: /ai-gateway/entities/vault/ + url: /gateway/entities/vault/ - blocks: - type: card config: @@ -98,7 +98,7 @@ rows: description: Manage cryptographic key material used by AI Gateway security workflows. cta: text: Key entity - url: /ai-gateway/entities/key/ + url: /gateway/entities/key/ - blocks: - type: card config: @@ -106,4 +106,4 @@ rows: description: Group keys for rotation and key management in AI Gateway configurations. cta: text: Key Set entity - url: /ai-gateway/entities/key-set/ + url: /gateway/entities/key-set/ From 37923fa9ae7bb18053342c63952f0f12b2d675e1 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 30 Apr 2026 09:35:24 +0200 Subject: [PATCH 21/53] fix --- app/_ai_gateway_entities/policy.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index 59f3548176..5082f837db 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -5,7 +5,7 @@ entities: - policy products: - ai-gateway -description: AI Gateway policies registered with the {{site.ai_gateway}}. +description: {{site.ai_gateway}}. policies schema: api: konnect/ai-gateway path: /schemas/AIGatewayPolicy @@ -36,11 +36,11 @@ faqs: - q: How is a Policy different from a plugin? a: | - A Policy is a plugin instance configured through the AI Gateway entity + A Policy is a plugin instance configured through the {{site.ai_gateway}}. entity surface instead of the classic `/plugins` endpoint. The runtime effect is the same: a plugin attached at the appropriate scope. The differences are how you create it (`/ai/policies` or under a parent entity), how it's - tagged in the workspace, and that the AI Gateway control plane manages + tagged in the workspace, and that the {{site.ai_gateway}}. control plane manages its lifecycle alongside the entity it's attached to. - q: Can a Policy be scoped to a Consumer or Consumer Group? @@ -52,7 +52,7 @@ faqs: - q: What plugin types can a Policy use? a: | - Any AI Gateway-compatible plugin. Common values include `ai-sanitizer`, + Any {{site.ai_gateway}}.-compatible plugin. Common values include `ai-sanitizer`, `ai-prompt-guard`, `ai-prompt-decorator`, `ai-rate-limiting-advanced`, and `openid-connect`. Set the plugin name in the Policy's `type` field and provide the plugin's configuration in the `config` field. @@ -97,7 +97,7 @@ The `config` field is passed through to the plugin without translation. Refer to ## Set up a global Policy -The following example creates a global PII sanitizer Policy that runs for every AI Gateway request. +The following example creates a global PII sanitizer Policy that runs for every {{site.ai_gateway}}. request. {% entity_example %} type: policy From 660e8804af803eda15168e5bcb3df824bb44039c Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 30 Apr 2026 09:45:21 +0200 Subject: [PATCH 22/53] fix frontmatter --- app/_ai_gateway_entities/policy.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index 5082f837db..4835638745 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -5,7 +5,7 @@ entities: - policy products: - ai-gateway -description: {{site.ai_gateway}}. policies +description: "Policies for {{site.ai_gateway}}." schema: api: konnect/ai-gateway path: /schemas/AIGatewayPolicy @@ -17,7 +17,7 @@ tools: - admin-api - konnect-api related_resources: - - text: About {{site.ai_gateway}} + - text: "About {{site.ai_gateway}}" url: /ai-gateway/ - text: Model entity url: /ai-gateway/entities/model/ @@ -144,4 +144,4 @@ formats: ## Schema -{% entity_schema %} \ No newline at end of file +{% entity_schema %} From d29bc7766f31a4587410d82a9fd44b8e5cd3af69 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 30 Apr 2026 09:50:49 +0200 Subject: [PATCH 23/53] fix admos --- app/_ai_gateway_entities/policy.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index 4835638745..bbfb88bc7b 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -90,10 +90,10 @@ Creating a Policy creates exactly one plugin entry in the underlying runtime. Up The `config` field is passed through to the plugin without translation. Refer to the documentation for the specific plugin to see the available fields, defaults, and validation rules. -{:.note} +{:.info} > **Plugin config schemas live with the plugin docs** > -> {{site.ai_gateway}} does not redeclare plugin configuration schemas under the Policy entity. For each plugin you intend to use as a Policy `type`, look up that plugin's reference page for its `config` shape. +> {{site.ai_gateway}} does not define plugin configuration schemas under the Policy entity. For each plugin you intend to use as a Policy `type`, look up that plugin's reference page for its `config` shape. ## Set up a global Policy From f85a78588452d47de77b0ca4ac0ef01738c6e73e Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 30 Apr 2026 11:56:42 +0200 Subject: [PATCH 24/53] update policy doc --- app/_ai_gateway_entities/policy.md | 80 ++++++++++++++++++------------ 1 file changed, 49 insertions(+), 31 deletions(-) diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index bbfb88bc7b..ed02b234a0 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -36,68 +36,86 @@ faqs: - q: How is a Policy different from a plugin? a: | - A Policy is a plugin instance configured through the {{site.ai_gateway}}. entity - surface instead of the classic `/plugins` endpoint. The runtime effect - is the same: a plugin attached at the appropriate scope. The differences - are how you create it (`/ai/policies` or under a parent entity), how it's - tagged in the workspace, and that the {{site.ai_gateway}}. control plane manages - its lifecycle alongside the entity it's attached to. + A Policy is a plugin instance configured through the {{site.ai_gateway}} entity surface + instead of the classic `/plugins` endpoint. The runtime effect is the same: a plugin attached + at the appropriate scope. {{site.ai_gateway}} manages the Policy's lifecycle alongside the + entity it's attached to. - q: Can a Policy be scoped to a Consumer or Consumer Group? a: | - Not directly. A Policy attaches to the {{site.ai_gateway}} globally or - to a Model, Agent, or MCP Server. Per-consumer access is expressed - through the `acls` field on those parent entities, which gates which - Consumer Groups can reach the entity in the first place. + Not directly. A Policy attaches to {{site.ai_gateway}} globally or to a Model, Agent, + or MCP Server. Per-consumer access is expressed through the `acls` field on those parent + entities, which gates which Consumer Groups can reach the entity in the first place. - q: What plugin types can a Policy use? a: | - Any {{site.ai_gateway}}.-compatible plugin. Common values include `ai-sanitizer`, - `ai-prompt-guard`, `ai-prompt-decorator`, `ai-rate-limiting-advanced`, - and `openid-connect`. Set the plugin name in the Policy's `type` field - and provide the plugin's configuration in the `config` field. + Set the plugin name in the Policy's `type` field and provide the plugin's configuration + in the `config` field. Examples include `ai-sanitizer`, `ai-prompt-guard`, + `ai-prompt-decorator`, `ai-rate-limiting-advanced`, and `openid-connect`. The supported set + isn't enumerated on this page, refer to the {{site.ai_gateway}} plugin reference for the full list. - q: What happens to a Policy when its parent entity is deleted? a: | - Policies attached to a Model, Agent, or MCP Server are removed when the - parent entity is deleted, along with the rest of that entity's derived - primitives. Global policies are independent and aren't affected by - deletions of other entities. + Policies attached to a Model, Agent, or MCP Server are removed when the parent entity is + deleted, along with the rest of that entity's derived primitives. Global policies are + independent and aren't affected by deletions of other entities. --- ## What is a Policy? A Policy is a plugin instance registered through the {{site.ai_gateway}} entity surface. -Each Policy declares a `type` (the plugin name, for example `ai-sanitizer` or `ai-rate-limiting-advanced`) and a `config` block whose contents follow that plugin's own schema. The {{site.ai_gateway}} control plane attaches the configured plugin at the scope you select: globally, or to a specific Model, Agent, or MCP Server. +Each Policy declares a `type` (the plugin name, for example `ai-sanitizer` or `ai-rate-limiting-advanced`) and a `config` block whose contents follow that plugin's own schema. {{site.ai_gateway}} attaches the configured plugin at the scope you select: globally, or to a specific Model, Agent, or MCP Server. Policies are not shared. Each Policy is one plugin instance. To apply the same configuration to two parent entities, create two Policies. +Policies are managed through the {{site.ai_gateway}} entity surface in both deployment modes: + +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/policies + - deployment: On-prem + cp: Admin API + endpoint: /ai/policies +{% endtable %} + + + ## Policy scopes A Policy is scoped at the time you create it, by the endpoint you POST it to: -* **Global**: `POST /ai/policies` attaches the underlying plugin at the global scope of the `_ai_gateway` workspace. The plugin runs for every {{site.ai_gateway}} request that reaches the runtime. -* **Model**: `POST /ai/models/{modelId}/policies` attaches the underlying plugin at the Service of the Model's derived primitives. The plugin runs for requests routed through that Model. -* **Agent**: `POST /ai/agents/{agentId}/policies` attaches the plugin at the Service of the Agent's derived primitives. -* **MCP Server**: `POST /ai/mcp-servers/{mcpServerId}/policies` attaches the plugin at the Service of the MCP Server's derived primitives. +* **Global**: `POST /ai/policies` attaches the underlying plugin globally so it runs for every {{site.ai_gateway}} route on the data plane. Non-AI traffic on the same data plane is not affected. +* **Model**: `POST /ai/models/{modelId}/policies` attaches the underlying plugin at the Service of the Model's derived primitives. The plugin runs for requests routed through that Model. See the [Model entity](/ai-gateway/entities/model/). +* **Agent**: `POST /ai/agents/{agentId}/policies` attaches the plugin at the Service of the Agent's derived primitives. See the [Agent entity](/ai-gateway/entities/agent/). +* **MCP Server**: `POST /ai/mcp-servers/{mcpServerId}/policies` attaches the plugin at the Service of the MCP Server's derived primitives. See the [MCP Server entity](/ai-gateway/entities/mcp-server/). Scope is fixed at creation time. Moving a Policy from one scope to another means deleting it and creating a new one under the target endpoint. -## Policy and plugin relationship +## Lifecycle -Creating a Policy creates exactly one plugin entry in the underlying runtime. Updating a Policy updates that plugin entry. Deleting a Policy deletes that plugin entry. +Creating a Policy creates exactly one plugin entry in the underlying runtime. Updating a Policy updates that plugin entry. Deleting a Policy deletes that plugin entry. All scopes support standard CRUD operations through the matching path. -The `config` field is passed through to the plugin without translation. Refer to the documentation for the specific plugin to see the available fields, defaults, and validation rules. +The `config` field is passed through to the plugin without translation. -{:.info} +{:.note} > **Plugin config schemas live with the plugin docs** > -> {{site.ai_gateway}} does not define plugin configuration schemas under the Policy entity. For each plugin you intend to use as a Policy `type`, look up that plugin's reference page for its `config` shape. +> {{site.ai_gateway}} does not define plugin configuration schemas under the Policy entity. +> For each plugin you intend to use as a Policy `type`, look up that plugin's reference page for its `config` shape. ## Set up a global Policy -The following example creates a global PII sanitizer Policy that runs for every {{site.ai_gateway}}. request. +The following example creates a global PII sanitizer Policy that runs for every {{site.ai_gateway}} route. {% entity_example %} type: policy @@ -118,7 +136,7 @@ formats: ## Set up a Model-scoped Policy -The following example attaches a rate limiting Policy to a Model. It assumes a Model with `id` `bf138ba2-c9b1-4229-b268-04d9d8a6410b` already exists. +The following example attaches a rate-limiting Policy to a Model. {% entity_example %} type: policy @@ -144,4 +162,4 @@ formats: ## Schema -{% entity_schema %} +{% entity_schema %} \ No newline at end of file From 9f3e331a0901df1ca9eb708dd91710869ba703a5 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 30 Apr 2026 13:43:30 +0200 Subject: [PATCH 25/53] Fix formats --- app/_ai_gateway_entities/policy.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index ed02b234a0..3d3cddff43 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -129,9 +129,6 @@ data: - phone - creditcard stop_on_error: true -formats: - - admin-api - - konnect-api {% endentity_example %} ## Set up a Model-scoped Policy @@ -153,9 +150,6 @@ data: window_size: - 60 window_type: sliding -formats: - - admin-api - - konnect-api {% endentity_example %} From 83df105db5862f03f9f40c4e0536c8a4ac6a0ddf Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Fri, 1 May 2026 06:47:42 +0200 Subject: [PATCH 26/53] add consumers draft --- app/_ai_gateway_entities/consumer.md | 136 ++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 4 deletions(-) diff --git a/app/_ai_gateway_entities/consumer.md b/app/_ai_gateway_entities/consumer.md index 47b36c40ba..4e405d055b 100644 --- a/app/_ai_gateway_entities/consumer.md +++ b/app/_ai_gateway_entities/consumer.md @@ -1,11 +1,11 @@ --- -title: AI Consumer +title: Consumer content_type: reference entities: - - ai-consumer + - consumer products: - ai-gateway -description: AI consumer entity wrapper for consumer-specific {{site.ai_gateway}} behavior. +description: "Consumers for {{site.ai_gateway}}." schema: api: konnect/ai-gateway path: /schemas/AIGatewayConsumer @@ -16,8 +16,136 @@ tools: - deck - admin-api - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Model entity + url: /ai-gateway/entities/model/ + - text: Policy entity + url: /ai-gateway/entities/policy/ + - text: "Kong Gateway Consumer entity" + url: /gateway/entities/consumer/ +faqs: + - q: How is an {{site.ai_gateway}} Consumer different from a Kong Gateway Consumer? + a: | + The runtime entity is a regular Kong Consumer. The {{site.ai_gateway}} surface adds a required + authentication `type` field, accepts inline Consumer Group assignment at creation, and uses the + {{site.ai_gateway}} entity convention (`display_name`, `ref`, `labels`). The AI Consumer's `ref` + maps to the underlying Kong Consumer's `username`. The AI Consumer surface does not expose + `custom_id` or `tags`. + + - q: Can I edit the underlying Kong Consumer that {{site.ai_gateway}} generates? + a: | + No. The generated Kong Consumer is protected from direct modification through the standard + `/consumers` Admin API. Update the AI Consumer instead. + + - q: How do I add credentials to an AI Consumer? + a: | + + + - q: Can a Consumer belong to multiple Consumer Groups? + a: | + Yes. The `consumer_groups` array accepts one or more AI Consumer Group references by `id` or `ref`. --- +## What is a Consumer? + +A Consumer is the {{site.ai_gateway}} surface for an external client of the AI APIs you publish through {{site.ai_gateway}}. The underlying runtime entity is a regular {{site.base_gateway}} [Consumer](/gateway/entities/consumer/). + +You use Consumers to authenticate clients, assign them to Consumer Groups, and gate access to Models, Agents, and MCP Servers through those parent entities' `acls` field. + + + +The following diagram shows where a Consumer participates in an {{site.ai_gateway}} request. The client passes credentials to a Model, an Auth Policy on that Model identifies the Consumer, and the identified Consumer is then available to other Policies on the Model before the request reaches the upstream provider. + +{% mermaid %} +flowchart LR + +Client(["Client"]) +Consumer(AI Consumer +entity) +Auth(Auth Policy) +Model(Model entity) +Policies("Policies +attached to Model") +Provider[Upstream +AI provider] + +Client --pass +credentials--> Model +subgraph id1 ["`**AI GATEWAY**`"] + subgraph padding[ ] + + subgraph Identify ["Consumer Identity Added"] + direction LR + Model --> Auth + Auth--identify + Consumer-->Consumer + end + end + + Consumer--> Policies +end +Policies --apply +per-Model policies--> Provider + +style Identify stroke-dasharray: 5 5 +style padding stroke:none!important,fill:none!important + +{% endmermaid %} + +Consumers are managed through the {{site.ai_gateway}} entity surface in both deployment modes: + +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/consumers + - deployment: On-prem + cp: Admin API + endpoint: /ai/consumers +{% endtable %} + +## Authentication type + +The `type` field declares how the Consumer authenticates to {{site.ai_gateway}}. Supported values are: + +* `apikey` +* `oauth` + + + +## Consumer Group membership + +You can assign a Consumer to one or more AI Consumer Groups inline on the `consumer_groups` array. Each entry references an AI Consumer Group by `id` or `ref`. + +AI Consumer Groups themselves are managed through the AI Consumer Group surface, which is documented separately. + +## Set up a Consumer + +The following example creates an AI Consumer assigned to a single Consumer Group. + +{% entity_example %} +type: consumer +data: + display_name: Mobile App - Production + ref: mobile-app-production + type: apikey + consumer_groups: + - ref: internal-teams +formats: + - admin-api + - konnect-api +{% endentity_example %} + ## Schema -{% entity_schema %} +{% entity_schema %} \ No newline at end of file From 00060372af4829049240b6cdcff1f34f999ae8b2 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Fri, 1 May 2026 06:50:16 +0200 Subject: [PATCH 27/53] appease vale --- app/_ai_gateway_entities/policy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index 3d3cddff43..1f70898a05 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -107,7 +107,7 @@ Creating a Policy creates exactly one plugin entry in the underlying runtime. Up The `config` field is passed through to the plugin without translation. -{:.note} +{:.info} > **Plugin config schemas live with the plugin docs** > > {{site.ai_gateway}} does not define plugin configuration schemas under the Policy entity. From 5aa2a642c4887d48e48562ff09af29d77f4dc6f4 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Fri, 1 May 2026 07:05:55 +0200 Subject: [PATCH 28/53] update consumers doc --- app/_ai_gateway_entities/consumer.md | 64 ++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/app/_ai_gateway_entities/consumer.md b/app/_ai_gateway_entities/consumer.md index 4e405d055b..9d55ce1e57 100644 --- a/app/_ai_gateway_entities/consumer.md +++ b/app/_ai_gateway_entities/consumer.md @@ -28,11 +28,10 @@ related_resources: faqs: - q: How is an {{site.ai_gateway}} Consumer different from a Kong Gateway Consumer? a: | - The runtime entity is a regular Kong Consumer. The {{site.ai_gateway}} surface adds a required - authentication `type` field, accepts inline Consumer Group assignment at creation, and uses the - {{site.ai_gateway}} entity convention (`display_name`, `ref`, `labels`). The AI Consumer's `ref` - maps to the underlying Kong Consumer's `username`. The AI Consumer surface does not expose - `custom_id` or `tags`. + The runtime entity is a regular Kong Consumer. The {{site.ai_gateway}} surface uses the + {{site.ai_gateway}} entity convention (`display_name`, `name`, `labels`), requires an + authentication `type` field, accepts inline Consumer Group assignment, and lets you reference + Policies and embed credentials directly on the Consumer. - q: Can I edit the underlying Kong Consumer that {{site.ai_gateway}} generates? a: | @@ -41,19 +40,25 @@ faqs: - q: How do I add credentials to an AI Consumer? a: | - + For `type: apikey`, set `config.credentials[].api_key` on the Consumer. + Each entry can also set a `ttl` in seconds. - q: Can a Consumer belong to multiple Consumer Groups? a: | - Yes. The `consumer_groups` array accepts one or more AI Consumer Group references by `id` or `ref`. + Yes. The `consumer_groups` array accepts one or more references to Consumer Groups by + `name` or `id`. + + - q: How do I attach Policies to a Consumer? + a: | + Add the Policy's `name` or `id` to the Consumer's `policies` array. + See the [Policy entity](/ai-gateway/entities/policy/) reference. --- ## What is a Consumer? A Consumer is the {{site.ai_gateway}} surface for an external client of the AI APIs you publish through {{site.ai_gateway}}. The underlying runtime entity is a regular {{site.base_gateway}} [Consumer](/gateway/entities/consumer/). -You use Consumers to authenticate clients, assign them to Consumer Groups, and gate access to Models, Agents, and MCP Servers through those parent entities' `acls` field. +You use Consumers to authenticate clients, assign them to Consumer Groups, attach Policies, and gate access to Models, Agents, and MCP Servers through those parent entities' `acls` field. @@ -121,29 +126,52 @@ The `type` field declares how the Consumer authenticates to {{site.ai_gateway}}. * `apikey` * `oauth` - + + +## Credentials + +For Consumers of `type: apikey`, you can declare credentials inline on the Consumer's `config.credentials` array. Each entry has: + +* `api_key`: the API key value the client presents. +* `ttl`: optional time-to-live in seconds. Once elapsed, the credential is no longer valid. + +Multiple credentials can be declared per Consumer. Rotating a key means adding a new entry and removing the old one. + +## External identity mapping + +The `config.custom_id` field stores an external identifier for the Consumer, such as an OIDC Client ID. This field is optional and informational. {{site.ai_gateway}} does not use it for authentication or routing. ## Consumer Group membership -You can assign a Consumer to one or more AI Consumer Groups inline on the `consumer_groups` array. Each entry references an AI Consumer Group by `id` or `ref`. +You can assign a Consumer to one or more Consumer Groups through the `consumer_groups` array. Each entry references a Consumer Group by `name` or `id`. + +Consumer Groups are managed through their own entity surface. + +## Attach Policies + +To attach a Policy to a Consumer, add the Policy's `name` or `id` to the Consumer's `policies` array. The Policy's underlying plugin then runs in the request lifecycle when this Consumer is identified. -AI Consumer Groups themselves are managed through the AI Consumer Group surface, which is documented separately. +You can reference multiple Policies from a single Consumer. Each Policy is an independent instance. + +For the supported plugin types and how Policies attach to other entities, see the [Policy entity](/ai-gateway/entities/policy/) reference. ## Set up a Consumer -The following example creates an AI Consumer assigned to a single Consumer Group. +The following example creates an AI Consumer with one API key credential, assigned to a single Consumer Group. {% entity_example %} type: consumer data: display_name: Mobile App - Production - ref: mobile-app-production + name: mobile-app-production type: apikey consumer_groups: - - ref: internal-teams -formats: - - admin-api - - konnect-api + - internal-teams + policies: [] + config: + credentials: + - api_key: sk-387788hd3xnej + ttl: 86400 {% endentity_example %} ## Schema From 0d3449efa071411fb6e8008d487032d496a0a15c Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Fri, 1 May 2026 07:08:05 +0200 Subject: [PATCH 29/53] fix --- app/_ai_gateway_entities/consumer.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/_ai_gateway_entities/consumer.md b/app/_ai_gateway_entities/consumer.md index 9d55ce1e57..432920576a 100644 --- a/app/_ai_gateway_entities/consumer.md +++ b/app/_ai_gateway_entities/consumer.md @@ -23,10 +23,10 @@ related_resources: url: /ai-gateway/entities/model/ - text: Policy entity url: /ai-gateway/entities/policy/ - - text: "Kong Gateway Consumer entity" + - text: "{{site.base_gateway}} Consumer entity" url: /gateway/entities/consumer/ faqs: - - q: How is an {{site.ai_gateway}} Consumer different from a Kong Gateway Consumer? + - q: How is an {{site.ai_gateway}} Consumer different from a {{site.base_gateway}} Consumer? a: | The runtime entity is a regular Kong Consumer. The {{site.ai_gateway}} surface uses the {{site.ai_gateway}} entity convention (`display_name`, `name`, `labels`), requires an From d5aaa1a52bd8859cb61d0be13b64270c257a5fc5 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Mon, 4 May 2026 08:22:37 +0200 Subject: [PATCH 30/53] Apply suggestions from code review Co-authored-by: jbaross --- app/_ai_gateway_entities/consumer.md | 12 ++++++------ app/_ai_gateway_entities/model.md | 10 ++++++---- app/_ai_gateway_entities/policy.md | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/app/_ai_gateway_entities/consumer.md b/app/_ai_gateway_entities/consumer.md index 432920576a..eceba7493c 100644 --- a/app/_ai_gateway_entities/consumer.md +++ b/app/_ai_gateway_entities/consumer.md @@ -56,13 +56,13 @@ faqs: ## What is a Consumer? -A Consumer is the {{site.ai_gateway}} surface for an external client of the AI APIs you publish through {{site.ai_gateway}}. The underlying runtime entity is a regular {{site.base_gateway}} [Consumer](/gateway/entities/consumer/). +A Consumer is the {{site.ai_gateway}} entity that represents an downstream client of the AI APIs you publish through {{site.ai_gateway}}. -You use Consumers to authenticate clients, assign them to Consumer Groups, attach Policies, and gate access to Models, Agents, and MCP Servers through those parent entities' `acls` field. +You can use Consumers and Consumer Groups to authenticate clients, attach Policies, and gate access to Models, Agents, and MCP Servers through those parent entities' `acls` field. -The following diagram shows where a Consumer participates in an {{site.ai_gateway}} request. The client passes credentials to a Model, an Auth Policy on that Model identifies the Consumer, and the identified Consumer is then available to other Policies on the Model before the request reaches the upstream provider. +The following diagram shows how a Consumer participates in an {{site.ai_gateway}} request. The client passes {{site.ai_gateway}} credentials for a Model, an Auth Policy on that Model identifies the Consumer, and the identified Consumer is then available to other Policies on the Model before the request reaches the upstream provider. {% mermaid %} flowchart LR @@ -100,7 +100,7 @@ style padding stroke:none!important,fill:none!important {% endmermaid %} -Consumers are managed through the {{site.ai_gateway}} entity surface in both deployment modes: +Consumers are managed through the {{site.ai_gateway}} entity API surface in either deployment modes: {% table %} columns: @@ -149,9 +149,9 @@ Consumer Groups are managed through their own entity surface. - -The following diagram shows how a Consumer participates in an {{site.ai_gateway}} request. The client passes {{site.ai_gateway}} credentials for a Model, an Auth Policy on that Model identifies the Consumer, and the identified Consumer is then available to other Policies on the Model before the request reaches the upstream provider. - -{% mermaid %} -flowchart LR - -Client(["Client"]) -Consumer(AI Consumer -entity) -Auth(Auth Policy) -Model(Model entity) -Policies("Policies -attached to Model") -Provider[Upstream -AI provider] - -Client --pass -credentials--> Model -subgraph id1 ["`**AI GATEWAY**`"] - subgraph padding[ ] - - subgraph Identify ["Consumer Identity Added"] - direction LR - Model --> Auth - Auth--identify - Consumer-->Consumer - end - end - - Consumer--> Policies -end -Policies --apply -per-Model policies--> Provider - -style Identify stroke-dasharray: 5 5 -style padding stroke:none!important,fill:none!important - -{% endmermaid %} - Consumers are managed through the {{site.ai_gateway}} entity API surface in either deployment modes: {% table %} diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 9a4bd0be32..c19769cb7b 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -60,8 +60,8 @@ faqs: - q: How do I limit which consumers can reach a Model? a: | - Set the `acls` field on the Model with allow or deny lists referencing Consumer Groups. - Consumer-level access is not configured on the Model directly. + Set the `acls` field on the Model with allow or deny lists. + Each entry is a string that references a Consumer, Consumer Group, or Authenticated Group by name. - q: Does the Model entity store provider credentials? a: | @@ -120,33 +120,22 @@ Generated primitives are protected. Direct PUT, PATCH, or DELETE calls against t The `capabilities` field tells {{site.ai_gateway}} which AI workflows the Model exposes. Each capability becomes one Route on the generated Service. A Model must declare at least one capability. -Supported values for a `model` type are: - -* `chat` -* `responses` -* `embeddings` -* `image-generation` -* `image-edits` -* `audio-transcriptions` -* `audio-translations` -* `realtime` - Model `type` controls which capability set applies: -* `model`: general LLM workloads, with capabilities such as `chat`, `responses`, and `embeddings`. -* `api`: API-style workloads, where the supported capabilities are `batches` and `files`. +* `model`: synchronous request/response workloads through generative APIs. Supported capabilities are `chat`, `embeddings`, `assistants`, `responses`, `audio-transcriptions`, `audio-translations`, `image-generation`, `image-edits`, `video-generations`, and `realtime`. +* `api`: asynchronous workloads through the files and batches APIs. Supported capabilities are `batches` and `files`. Not every provider supports every capability. The set of capabilities you can declare on a Model depends on what the provider in `target_models` exposes. See [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for per-provider details. ## Target models and load balancing -A Model's `target_models` field lists one or more upstream provider model instances. For each entry, you provide the upstream model name (for example, `gpt-4o`) and select the Provider to use (by `id` or `ref`). Each target can also override settings such as `temperature`, `max_tokens`, `input_cost`, and `output_cost`. +A Model's `target_models` field lists one or more upstream provider model instances. For each entry, you provide the upstream model name (for example, `gpt-4o`) and reference the Provider to use by its `name`. Each target can also override settings such as `temperature`, `max_tokens`, `input_cost`, and `output_cost`. When a Model has more than one target, requests are load-balanced according to `config.balancer`. For the supported algorithms, configuration options, and tuning guidance, see [Load balancing with AI Proxy Advanced](/ai-gateway/load-balancing/). ## Access control -A Model's `acls` field controls which Consumer Groups are allowed to reach the Model. The field accepts `allow` and `deny` lists, each containing references to Consumer Groups by `id` or `ref`. Access is enforced at the Service level of the generated primitives. ACLs on a Model apply to Consumer Groups, not individual Consumers. +A Model's `acls` field controls which identities are allowed to reach the Model. The field accepts `allow` and `deny` lists. Each entry is a string that references a Consumer, Consumer Group, or Authenticated Group by name. Access is enforced at the Service level of the generated primitives. For per-request authentication and identity, configure the appropriate authentication plugin globally or as a Policy on the Model. @@ -154,9 +143,11 @@ For per-request authentication and identity, configure the appropriate authentic Policies are the way you apply plugin configurations to a Model. A Policy attached to a Model runs at the Service level of the Model's generated primitives, so it applies to every request routed through any of the Model's capabilities. +A Model declares the Policies it uses through its `policies` field. Each entry is a string that references a Policy by name or ID. {{site.konnect_short_name}} resolves these references against Policies created at `/v1/ai-gateways/{aiGatewayId}/policies`. On-prem also supports the nested endpoint `/ai/models/{modelId}/policies`, which creates and attaches a Policy in one call. + You can attach multiple Policies to a single Model. Each Policy has an independent plugin instance, so attaching the same plugin type twice with different configurations creates two separate plugin entries. -Not every plugin type is valid as a Model Policy. +Not every plugin type is valid as a Model Policy. Policies attached to a Model are deleted when the Model is deleted. @@ -178,21 +169,23 @@ The following example creates an OpenAI Model that exposes both `chat` and `resp type: model data: display_name: GPT-4o Production - ref: gpt-4o-production + name: gpt-4o-production type: model enabled: true capabilities: - chat - responses formats: - - openai + - type: openai acls: allow: - - ref: internal-teams + - internal-teams + deny: [] + policies: [] target_models: - name: gpt-4o provider: - ref: my-openai-account + name: my-openai-account config: temperature: 0.7 max_tokens: 4096 @@ -200,10 +193,12 @@ data: output_cost: 0.000010 config: logging: - log_statistics: true - log_payloads: false + statistics: true + payloads: false response_streaming: allow max_request_body_size: 1048576 + model: + name_header: true balancer: algorithm: round-robin retries: 3 diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index 7b639bdf9a..986de5ab9c 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -43,9 +43,14 @@ faqs: - q: Can a Policy be scoped to a Consumer or Consumer Group? a: | - Not directly. A Policy attaches to {{site.ai_gateway}} globally or to a Model, Agent, - or MCP Server. Per-consumer access is expressed through the `acls` field on those parent - entities, which gates which Consumer Groups can reach the entity in the first place. + Yes. Add the Policy's `name` or `id` to the Consumer's or Consumer Group's `policies` array. + The plugin runs when the Consumer is identified during a request, or when a member of the + Consumer Group is identified. + + Unlike Model, Agent, and MCP Server, on-prem does not expose nested policy endpoints + (`/ai/consumers/{id}/policies` or `/ai/consumer-groups/{id}/policies`) for these two entity + types. The reference-array mechanism is the only way to attach a Policy to a Consumer or + Consumer Group in either deployment mode. - q: What plugin types can a Policy use? a: | @@ -56,9 +61,12 @@ faqs: - q: What happens to a Policy when its parent entity is deleted? a: | - Policies attached to a Model, Agent, or MCP Server are removed when the parent entity is - deleted, along with the rest of that entity's derived primitives. Global policies are - independent and aren't affected by deletions of other entities. + Policies created through an on-prem nested endpoint (`POST /ai/models/{modelId}/policies`, + `POST /ai/agents/{agentId}/policies`, or `POST /ai/mcp-servers/{mcpServerId}/policies`) are + lifecycle-coupled to the parent and removed when the parent is deleted, along with the rest + of that entity's derived primitives. + Standalone Policies referenced from parent entities through a `policies` array are independent + and aren't deleted when a referencing parent is deleted. The reference is simply removed. --- ## What is a Policy? @@ -88,18 +96,30 @@ rows: endpoint: /ai/policies {% endtable %} - - ## Policy scopes -A Policy is scoped at the time you create it, by the endpoint you POST it to: +A Policy is scoped by where it's referenced from. Each Policy is an independent plugin instance attached at exactly one scope. To apply the same configuration in multiple places, create one Policy per place. + +The available scopes are: + +* **Global**: a Policy that no parent entity references runs for every {{site.ai_gateway}} route on the data plane. Non-AI traffic on the same data plane isn't affected. +* **Model**: referenced from the `policies` array on a [Model entity](/ai-gateway/entities/model/). The plugin runs at the Service of the Model's derived primitives. +* **Agent**: referenced from the `policies` array on an [Agent entity](/ai-gateway/entities/agent/). The plugin runs at the Service of the Agent's derived primitives. +* **MCP Server**: referenced from the `policies` array on an [MCP Server entity](/ai-gateway/entities/mcp-server/). The plugin runs at the Service of the MCP Server's derived primitives. +* **Consumer**: referenced from the `policies` array on a [Consumer entity](/ai-gateway/entities/consumer/). The plugin runs when the Consumer is identified during a request. +* **Consumer Group**: referenced from the `policies` array on a [Consumer Group entity](/ai-gateway/entities/consumer-group/). The plugin runs when a member of the Consumer Group is identified during a request. + +### Creating Policies + +In {{site.konnect_short_name}}, all Policies are created through a single endpoint at `/v1/ai-gateways/{aiGatewayId}/policies`. Scope is set entirely through the reference-array mechanism above: add the Policy's `name` or `id` to the parent entity's `policies` array, or leave it unreferenced for global scope. + +In on-prem, the same flat creation endpoint is available at `/ai/policies`. On-prem additionally exposes convenience nested endpoints that create and scope a Policy in one call: -* **Global**: `POST /ai/policies` attaches the underlying plugin globally so it runs for every {{site.ai_gateway}} route on the data plane. Non-AI traffic on the same data plane is not affected. -* **Model**: `POST /ai/models/{modelId}/policies` attaches the underlying plugin at the Service of the Model's derived primitives. The plugin runs for requests routed through that Model. See the [Model entity](/ai-gateway/entities/model/). -* **Agent**: `POST /ai/agents/{agentId}/policies` attaches the plugin at the Service of the Agent's derived primitives. See the [Agent entity](/ai-gateway/entities/agent/). -* **MCP Server**: `POST /ai/mcp-servers/{mcpServerId}/policies` attaches the plugin at the Service of the MCP Server's derived primitives. See the [MCP Server entity](/ai-gateway/entities/mcp-server/). +* `POST /ai/models/{modelId}/policies` +* `POST /ai/agents/{agentId}/policies` +* `POST /ai/mcp-servers/{mcpServerId}/policies` -Scope is fixed at creation time. Moving a Policy from one scope to another means deleting it and creating a new one under the target endpoint. +Consumer and Consumer Group scoping uses the reference-array mechanism in both deployment modes. ## Lifecycle @@ -121,7 +141,7 @@ The following example creates a global PII sanitizer Policy that runs for every type: policy data: display_name: PII Sanitizer - Global - ref: pii-sanitizer-global + name: pii-sanitizer-global type: ai-sanitizer enabled: true config: @@ -139,7 +159,7 @@ The following example attaches a rate-limiting Policy to a Model. type: policy data: display_name: Rate Limit - Production GPT-4o - ref: rate-limit-prod-gpt4o + name: rate-limit-prod-gpt4o type: ai-rate-limiting-advanced enabled: true config: From e3f2e24b9c46f099f60b5409657f403338d09a5c Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Mon, 4 May 2026 10:22:26 +0200 Subject: [PATCH 34/53] appease vale --- app/_ai_gateway_entities/consumer-group.md | 108 ++++++++++++++++++++- app/_ai_gateway_entities/consumer.md | 4 +- app/_ai_gateway_entities/policy.md | 4 +- 3 files changed, 111 insertions(+), 5 deletions(-) diff --git a/app/_ai_gateway_entities/consumer-group.md b/app/_ai_gateway_entities/consumer-group.md index b8cefaf857..d13348df23 100644 --- a/app/_ai_gateway_entities/consumer-group.md +++ b/app/_ai_gateway_entities/consumer-group.md @@ -5,7 +5,7 @@ entities: - ai-consumer-group products: - ai-gateway -description: AI consumer group entity wrapper for group-scoped {{site.ai_gateway}} behavior. +description: Consumer Groups for {{site.ai_gateway}}. schema: api: konnect/ai-gateway path: /schemas/AIGatewayConsumerGroup @@ -16,8 +16,114 @@ tools: - deck - admin-api - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Consumer entity + url: /ai-gateway/entities/consumer/ + - text: Model entity + url: /ai-gateway/entities/model/ + - text: Policy entity + url: /ai-gateway/entities/policy/ + - text: "{{site.base_gateway}} Consumer Group entity" + url: /gateway/entities/consumer-group/ +faqs: + - q: How is an {{site.ai_gateway}} Consumer Group different from a {{site.base_gateway}} Consumer Group? + a: | + The runtime entity is a regular Kong Consumer Group. The {{site.ai_gateway}} surface adds + the entity convention (`display_name`, `name`, `labels`) and a required `policies` array + for attaching plugin instances at the group scope. + + - q: Can I edit the underlying Kong Consumer Group that {{site.ai_gateway}} generates? + a: | + No. The generated Kong Consumer Group is protected from direct modification through the + standard `/consumer-groups` Admin API. Update the AI Consumer Group instead. + + - q: How do I assign a Consumer to a Consumer Group? + a: | + Set the `consumer_groups` array on the Consumer entity to reference this group by + `name` or `id`. Membership is managed from the Consumer side. + See the [Consumer entity](/ai-gateway/entities/consumer/) reference. + + - q: Can a Consumer belong to multiple Consumer Groups? + a: | + Yes. The Consumer's `consumer_groups` array accepts one or more references. + + - q: How do I attach Policies to a Consumer Group? + a: | + Add the Policy's `name` or `id` to the Consumer Group's `policies` array. + The plugin runs when a member of the group is identified during a request. + See the [Policy entity](/ai-gateway/entities/policy/) reference. + + Unlike Model, Agent, and MCP Server, on-prem does not expose a nested + `/ai/consumer-groups/{id}/policies` endpoint. The reference-array mechanism is the only + way to attach a Policy to a Consumer Group in either deployment mode. + + - q: How do I gate access to a Model, Agent, or MCP Server with a Consumer Group? + a: | + Add the Consumer Group's name to the parent entity's `acls.allow` or `acls.deny` list. + ACLs accept Consumer, Consumer Group, and Authenticated Group names. + See the [Model entity](/ai-gateway/entities/model/) reference. --- +## What is a Consumer Group? + +A Consumer Group is the {{site.ai_gateway}} entity that represents a collection of Consumers grouped for the purpose of applying shared Policies and access controls. + +Use Consumer Groups to scope group-wide behavior, such as rate limits, prompt guards, or content moderation, without configuring each Consumer individually. Consumer Groups also appear in the `acls` field of Model, Agent, and MCP Server entities, where they gate access to those parent entities. + +Consumer Groups are managed through the {{site.ai_gateway}} entity API surface in both deployment modes: + +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/consumer-groups + - deployment: On-prem + cp: Admin API + endpoint: /ai/consumer-groups +{% endtable %} + +## Membership + +A Consumer Group doesn't list its members directly. Membership is set on the Consumer entity through the Consumer's `consumer_groups` array. Each entry references a Consumer Group by `name` or `id`. A single Consumer can belong to multiple Consumer Groups. + +For the Consumer-side configuration, see the [Consumer entity](/ai-gateway/entities/consumer/) reference. + +## Attach Policies + +Policies attached to a Consumer Group run when a member of that group is identified during a request. To attach a Policy, add its `name` or `id` to the Consumer Group's `policies` array. + +You can attach multiple Policies to a single Consumer Group. Each Policy is an independent plugin instance, so attaching the same plugin type twice with different configurations creates two separate plugin entries. + +For the supported plugin types and how Policies attach to other entities, see the [Policy entity](/ai-gateway/entities/policy/) reference. + +## Use in parent entity ACLs + +The `acls` field on Model, Agent, and MCP Server entities accepts Consumer Group names alongside Consumer and Authenticated Group names. Add a Consumer Group to a parent entity's `acls.allow` list to permit its members access, or to `acls.deny` to block them. + +ACLs are evaluated at the Service level of the parent entity's derived primitives. Consumer Group membership is resolved after the request is authenticated and the Consumer is identified. + +## Set up a Consumer Group + +The following example creates an AI Consumer Group with one attached Policy that applies a shared rate limit to its members. + +{% entity_example %} +type: consumer-group +data: + display_name: Internal Teams + name: internal-teams + policies: + - rate-limit-internal-teams +{% endentity_example %} + ## Schema {% entity_schema %} diff --git a/app/_ai_gateway_entities/consumer.md b/app/_ai_gateway_entities/consumer.md index 8eabf555dd..a904eb38d4 100644 --- a/app/_ai_gateway_entities/consumer.md +++ b/app/_ai_gateway_entities/consumer.md @@ -103,7 +103,7 @@ The `config.custom_id` field stores an external identifier for the Consumer, suc You can assign a Consumer to one or more Consumer Groups through the `consumer_groups` array. Each entry references a Consumer Group by `name` or `id`. -Consumer Groups are managed through their own entity surface. +Consumer Groups are managed through their own entity surface. See the [Consumer Group entity](/ai-gateway/entities/consumer-group/) reference. ## Attach Policies @@ -128,7 +128,7 @@ data: policies: [] config: credentials: - - api_key: sk-387788hd3xnej + - api_key: ttl: 86400 {% endentity_example %} diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index 986de5ab9c..90936f655e 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -71,7 +71,7 @@ faqs: ## What is a Policy? -A Policy is an AI Gateway entity that represents an action, taken by a plugin, that can be attached to an AI Gateway entity. +A Policy is an {{site.ai_gateway}} entity that represents an action, taken by a plugin, that can be attached to an {{site.ai_gateway}} entity. Each Policy declares a `type` (which is a plugin name, for example `ai-sanitizer` or `ai-rate-limiting-advanced`) and a `config` block whose contents follow that plugin's own schema. {{site.ai_gateway}} attaches the configured plugin at the scope you select: globally, or to a specific Model, Agent, or MCP Server. @@ -111,7 +111,7 @@ The available scopes are: ### Creating Policies -In {{site.konnect_short_name}}, all Policies are created through a single endpoint at `/v1/ai-gateways/{aiGatewayId}/policies`. Scope is set entirely through the reference-array mechanism above: add the Policy's `name` or `id` to the parent entity's `policies` array, or leave it unreferenced for global scope. +In {{site.konnect_short_name}}, all Policies are created through a single endpoint at `/v1/ai-gateways/{aiGatewayId}/policies`. Scope is set entirely through the reference-array mechanism above: add the Policy's `name` or `id` to the parent entity's `policies` array, or omit the reference for global scope. In on-prem, the same flat creation endpoint is available at `/ai/policies`. On-prem additionally exposes convenience nested endpoints that create and scope a Policy in one call: From 10acac2ca6e271ab3b35cea99d0332837ddf1f3d Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Mon, 4 May 2026 11:49:51 +0200 Subject: [PATCH 35/53] align titles --- app/_ai_gateway_entities/agent.md | 2 +- app/_ai_gateway_entities/consumer-group.md | 2 +- app/_ai_gateway_entities/consumer.md | 2 +- app/_ai_gateway_entities/mcp-server.md | 2 +- app/_ai_gateway_entities/model.md | 2 +- app/_ai_gateway_entities/policy.md | 2 +- app/_ai_gateway_entities/provider.md | 100 ++++++++++++++++++++- 7 files changed, 105 insertions(+), 7 deletions(-) diff --git a/app/_ai_gateway_entities/agent.md b/app/_ai_gateway_entities/agent.md index 07fa26bc4d..6256bf3a2f 100644 --- a/app/_ai_gateway_entities/agent.md +++ b/app/_ai_gateway_entities/agent.md @@ -1,5 +1,5 @@ --- -title: AI Agent +title: AI Agents content_type: reference entities: - ai-agent diff --git a/app/_ai_gateway_entities/consumer-group.md b/app/_ai_gateway_entities/consumer-group.md index d13348df23..474e686563 100644 --- a/app/_ai_gateway_entities/consumer-group.md +++ b/app/_ai_gateway_entities/consumer-group.md @@ -1,5 +1,5 @@ --- -title: AI Consumer Group +title: AI Consumer Groups content_type: reference entities: - ai-consumer-group diff --git a/app/_ai_gateway_entities/consumer.md b/app/_ai_gateway_entities/consumer.md index a904eb38d4..69d7a32fa4 100644 --- a/app/_ai_gateway_entities/consumer.md +++ b/app/_ai_gateway_entities/consumer.md @@ -1,5 +1,5 @@ --- -title: Consumer +title: AI Consumers content_type: reference entities: - consumer diff --git a/app/_ai_gateway_entities/mcp-server.md b/app/_ai_gateway_entities/mcp-server.md index ddaf5b86eb..6c7e16b0c5 100644 --- a/app/_ai_gateway_entities/mcp-server.md +++ b/app/_ai_gateway_entities/mcp-server.md @@ -1,5 +1,5 @@ --- -title: AI MCP Server +title: AI MCP Servers content_type: reference entities: - ai-mcp-server diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index c19769cb7b..2ac9071207 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -1,5 +1,5 @@ --- -title: Model +title: AI Models content_type: reference entities: - model diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index 90936f655e..9471a96494 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -1,5 +1,5 @@ --- -title: Policy +title: AI Policies content_type: reference entities: - policy diff --git a/app/_ai_gateway_entities/provider.md b/app/_ai_gateway_entities/provider.md index 00562573ba..f4837f7911 100644 --- a/app/_ai_gateway_entities/provider.md +++ b/app/_ai_gateway_entities/provider.md @@ -1,5 +1,5 @@ --- -title: AI Provider +title: AI Providers content_type: reference entities: - ai-provider @@ -16,8 +16,106 @@ tools: - deck - admin-api - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: "{{site.ai_gateway}} providers" + url: /ai-gateway/ai-providers/ + - text: Model entity + url: /ai-gateway/entities/model/ + - text: Policy entity + url: /ai-gateway/entities/policy/ +faqs: + - q: What happens when I update a Provider's credentials? + a: | + {{site.ai_gateway}} propagates the credential change to every Model that references the + Provider by `name`. The next request through any of those Models uses the updated credentials. + + - q: How does a Model reference a Provider? + a: | + Set `target_models[].provider.name` on the Model to the Provider's `name`. Provider references + take a `name` only, not an ID. + + - q: Do Providers generate any runtime primitives on their own? + a: | + No. A Provider entity is a write-time template. Credentials and configuration only enter + the runtime when a Model references the Provider; at that point, the Provider's values are + materialized into the AI Proxy Advanced plugin instances generated for the Model. --- +## What is a Provider? + +A Provider is a first-class {{site.ai_gateway}} entity that represents an upstream LLM service connection: credentials, endpoint configuration, and provider-type-specific options. Each Provider has a `type` that selects the upstream LLM service (see the schema below for supported values, and the per-provider pages under [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for provider-specific guidance). + +Models reference a Provider by `name` to route their `target_models` to that upstream. {{site.ai_gateway}} materializes the Provider's credentials into the AI Proxy Advanced plugin configuration of every Model that references it. Updating a Provider propagates credential changes to all referencing Models. + +Providers are managed through the {{site.ai_gateway}} entity API surface in both deployment modes: + +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/providers + - deployment: On-prem + cp: Admin API + endpoint: /ai/providers +{% endtable %} + +## Authentication + +The `config.auth` object declares how {{site.ai_gateway}} authenticates to the upstream provider. The shape of `auth` depends on the Provider's `type`: + +* **`basic`**: header- or query-parameter-based auth. Used by most provider types. +* **`aws`**: IAM access-key and assume-role auth. Used by `bedrock`. +* **`azure`**: Microsoft Entra ID or managed-identity auth. Used by `azure`. +* **`gcp`**: Google service-account auth. Used by `gemini`. + +`bedrock`, `azure`, and `gemini` can also fall back to `basic` auth. See the schema below for field-level details, and the per-provider pages under [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for provider-specific guidance. + +{:.warning} +> Don't commit credential values to source control. Use a secret-management system to inject +> auth values at deployment time, and treat any value checked into a configuration file as +> compromised. + +## Provider references + +Models reference a Provider by `name` through the `target_models[].provider.name` field. The same reference shape is used elsewhere in the schema (such as the embeddings model under a Model's load balancer config). Provider references in {{site.ai_gateway}} entities accept the Provider's `name` only, not its ID. + +Because references resolve by `name`, the `name` field is the stable handle for a Provider across the entity surface. Renaming a Provider (changing `name`) breaks any Model reference that pointed at the old value. + +## Lifecycle + +Creating a Provider stores the entity but doesn't generate any runtime primitives. Provider credentials enter the runtime only when a Model references the Provider — at that point, the credentials are materialized into the AI Proxy Advanced plugin configuration of the Model's derived primitives. + +Updating a Provider re-materializes credentials into every Model that references it. The change takes effect on the next request through any referencing Model. + + + +## Set up a Provider + +The following example creates an OpenAI Provider that authenticates with a single bearer-token header. A Model can then route to this Provider by setting `target_models[].provider.name` to `my-openai-account`. + +{% entity_example %} +type: provider +data: + display_name: OpenAI Production + name: my-openai-account + type: openai + config: + auth: + type: basic + headers: + - name: Authorization + value: Bearer +{% endentity_example %} + ## Schema {% entity_schema %} From 42be6798778a6a8846298317a32fe48fd449d0ed Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Tue, 5 May 2026 05:50:36 +0200 Subject: [PATCH 36/53] Add provisional UI instructions to entity config examples --- app/_ai_gateway_entities/consumer.md | 2 +- app/_ai_gateway_entities/model.md | 2 +- app/_ai_gateway_entities/policy.md | 7 +- app/_data/entity_examples/config.yml | 7 ++ .../components/entity_example/format/ui_ai.md | 81 +++++++++++++++++++ .../drops/entity_example/presenters/ui.rb | 6 +- 6 files changed, 100 insertions(+), 5 deletions(-) create mode 100644 app/_includes/components/entity_example/format/ui_ai.md diff --git a/app/_ai_gateway_entities/consumer.md b/app/_ai_gateway_entities/consumer.md index 69d7a32fa4..4585ff967b 100644 --- a/app/_ai_gateway_entities/consumer.md +++ b/app/_ai_gateway_entities/consumer.md @@ -2,7 +2,7 @@ title: AI Consumers content_type: reference entities: - - consumer + - ai-consumer products: - ai-gateway description: "Consumers for {{site.ai_gateway}}." diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 2ac9071207..3bcd34a5ad 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -2,7 +2,7 @@ title: AI Models content_type: reference entities: - - model + - ai-model products: - ai-gateway description: AI Models registered with the {{site.ai_gateway}}. diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index 9471a96494..c2a754eeb2 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -2,9 +2,12 @@ title: AI Policies content_type: reference entities: - - policy + - ai-policy products: - ai-gateway +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ description: "Policies for {{site.ai_gateway}}." schema: api: konnect/ai-gateway @@ -176,4 +179,4 @@ data: ## Schema -{% entity_schema %} \ No newline at end of file +{% entity_schema %} diff --git a/app/_data/entity_examples/config.yml b/app/_data/entity_examples/config.yml index 1f0a20fc91..a96f82ded5 100644 --- a/app/_data/entity_examples/config.yml +++ b/app/_data/entity_examples/config.yml @@ -174,6 +174,13 @@ formats: ui: label: 'UI' entities: + - ai-provider + - ai-model + - ai-agent + - ai-mcp-server + - ai-policy + - ai-consumer + - ai-consumer-group - admin - ca_certificate - certificate diff --git a/app/_includes/components/entity_example/format/ui_ai.md b/app/_includes/components/entity_example/format/ui_ai.md new file mode 100644 index 0000000000..a7b461b420 --- /dev/null +++ b/app/_includes/components/entity_example/format/ui_ai.md @@ -0,0 +1,81 @@ +{% if page.layout == 'gateway_entity' %} +{% case include.presenter.entity_type %} +{% when 'provider' %} +The following creates a new AI Provider. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an AI Gateway. +1. Navigate to **Providers**. +1. Click **New Provider**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Select a provider (for example: `{{ include.presenter.data['type'] }}`). +1. Configure authentication and connection settings for the selected provider type. +1. Click **Create**. +{% when 'policy' %} +The following creates a new AI Policy. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an AI Gateway. +1. Navigate to **Policies**. +1. Click **New Policy**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Select a policy **Type** (for example: `{{ include.presenter.data['type'] }}`). +1. Configure the policy `config` fields. +1. Click **Create**. +{% when 'consumer' %} +The following creates a new AI Consumer. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an AI Gateway. +1. Navigate to **Consumers**. +1. Click **New Consumer**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Select an authentication **Type** (for example: `{{ include.presenter.data['type'] }}`). +1. Configure credentials and optional Consumer Group or Policy references. +1. Click **Create**. +{% when 'consumer-group' %} +The following creates a new AI Consumer Group. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an AI Gateway. +1. Navigate to **Credentials**. +1. Select the **Groups** tab. +1. Click **New Group**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Optionally add policy references for group-level enforcement. +1. Click **Create**. +{% when 'model' %} +The following creates a new AI Model. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an AI Gateway. +1. Navigate to **Models**. +1. Click **New Model**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Configure at least one target model and select the Provider reference. +1. Optionally add policies, ACLs, labels, and fallback/load-balancing settings. +1. Click **Create**. +{% when 'agent' %} +The following creates a new AI Agent. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an AI Gateway. +1. Navigate to **Agents**. +1. Click **New Agent**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Configure the agent settings, model/tool references, and optional policies. +1. Click **Create**. +{% when 'mcp-server' %} +The following creates a new AI MCP Server. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an AI Gateway. +1. Navigate to **MCP Servers**. +1. Click **New MCP Server**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Configure endpoint/auth settings and optional policies. +1. Click **Create**. +{% else %} +UI instructions are not yet available for this AI Gateway entity type. +{% endcase %} +{% endif %} diff --git a/app/_plugins/drops/entity_example/presenters/ui.rb b/app/_plugins/drops/entity_example/presenters/ui.rb index 84b80506a3..62851b692f 100644 --- a/app/_plugins/drops/entity_example/presenters/ui.rb +++ b/app/_plugins/drops/entity_example/presenters/ui.rb @@ -13,7 +13,11 @@ def data end def template_file - '/components/entity_example/format/ui.md' + if @example_drop.product == 'ai-gateway' + '/components/entity_example/format/ui_ai.md' + else + '/components/entity_example/format/ui.md' + end end end From be9d642477db07f5c558de886a7208fa70c84177 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Tue, 5 May 2026 09:10:12 +0200 Subject: [PATCH 37/53] add missing entity pages --- .../consumer-credential.md | 132 ++++++++++++++++++ app/_ai_gateway_entities/consumer.md | 57 ++++---- app/_ai_gateway_entities/vault.md | 107 ++++++++++++++ 3 files changed, 264 insertions(+), 32 deletions(-) create mode 100644 app/_ai_gateway_entities/consumer-credential.md create mode 100644 app/_ai_gateway_entities/vault.md diff --git a/app/_ai_gateway_entities/consumer-credential.md b/app/_ai_gateway_entities/consumer-credential.md new file mode 100644 index 0000000000..152d486615 --- /dev/null +++ b/app/_ai_gateway_entities/consumer-credential.md @@ -0,0 +1,132 @@ +--- +title: AI Consumer Credentials +content_type: reference +entities: + - ai-consumer-credential +products: + - ai-gateway +description: Credentials issued to AI Consumers for authenticating to {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayConsumerCredential +works_on: + - konnect + - on-prem +tools: + - deck + - admin-api + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Consumer entity + url: /ai-gateway/entities/consumer/ + - text: Consumer Group entity + url: /ai-gateway/entities/consumer-group/ + - text: Policy entity + url: /ai-gateway/entities/policy/ +faqs: + - q: Why are credentials a separate entity instead of a field on the Consumer? + a: | + Each credential has its own lifecycle, identifier, and (for API keys) TTL. Modeling them as + a sub-entity of the Consumer lets you list, rotate, and revoke individual credentials + independently of the Consumer record. + + - q: What credential types are supported? + a: | + Two types: `api-key` and `oauth`. The `type` of the Credential must match the Consumer's + `type`. An `api-key` credential carries the `api_key` value (and an optional `ttl`). An + `oauth` credential carries a `custom_id` that maps to the OAuth provider's identifier. + + - q: Can a Consumer have multiple credentials? + a: | + Yes. Issue one Credential per environment, client, or rotation cycle, and revoke individual + Credentials without affecting the others. + + - q: Is the API key value visible after creation? + a: | + No. The `api_key` field is write-only; subsequent reads return the Credential's metadata + (`name`, `display_name`, `ttl`, timestamps) but not the secret. Distribute the key value at + creation time, and rotate by issuing a new Credential and revoking the old one. + + - q: What's the relationship between `ttl` and the Consumer's lifecycle? + a: | + `ttl` controls how long the API key value remains valid in seconds. When it elapses, the + Credential stops authenticating but the Credential record (and the parent Consumer) remain. + Issue a new Credential to keep the Consumer authenticating. +--- + +## What is a Consumer Credential? + +A Consumer Credential is the {{site.ai_gateway}} entity that represents the secret material a [Consumer](/ai-gateway/entities/consumer/) presents to authenticate to {{site.ai_gateway}}. + +Credentials are nested under their owning Consumer: each Credential belongs to exactly one Consumer, and removing the Consumer removes its Credentials. + +Consumer Credentials are managed through the {{site.ai_gateway}} entity API surface in both deployment modes: + +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/consumers/{consumerId}/credentials + - deployment: On-prem + cp: Admin API + endpoint: /ai/consumers/{consumerId}/credentials +{% endtable %} + +## Credential types + +The `type` field on a Credential must match the parent Consumer's `type`: + +* **`api-key`**: the Credential carries an `api_key` value the client presents on each request. An optional `ttl` (seconds) bounds the validity period; once it elapses, the value no longer authenticates. +* **`oauth`**: the Credential carries a `custom_id` that maps to the OAuth provider's identifier (for example, an OIDC Client ID). The actual token is issued and validated by the OAuth provider, not stored on the Credential. + +The `api_key` field is write-only and cannot be retrieved after creation. Treat creation responses as the only opportunity to capture the key value. + +## Lifecycle + +Each Credential has its own UUID and is independently listable, gettable, and deletable through the nested endpoints under its parent Consumer. There's no PUT operation: rotation is an explicit "create new, delete old" flow, which avoids long-lived stale references. + +Deleting a Credential immediately stops authenticating that key. Deleting the parent Consumer removes all of its Credentials. + +## Set up an API key Credential + +The following example issues a 24-hour API key credential to an existing Consumer named `mobile-app-production`. + +{% entity_example %} +type: consumer-credential +data: + display_name: Mobile App - Dev Key + name: mobile-app-dev-key + type: api-key + api_key: + ttl: 86400 +{% endentity_example %} + +{:.warning} +> Don't commit `api_key` values to source control. Inject them at creation time from a +> secret-management system, and treat any value checked into a configuration file as compromised. + +## Set up an OAuth Credential + +The following example issues an OAuth credential that maps an external OIDC client ID to a Consumer. + +{% entity_example %} +type: consumer-credential +data: + display_name: Mobile App - OIDC Mapping + name: mobile-app-oidc-mapping + type: oauth + custom_id: 0oatibf4t2PlDxqgR1d7 +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/consumer.md b/app/_ai_gateway_entities/consumer.md index 4585ff967b..4c9eaf9eb5 100644 --- a/app/_ai_gateway_entities/consumer.md +++ b/app/_ai_gateway_entities/consumer.md @@ -19,6 +19,10 @@ tools: related_resources: - text: "About {{site.ai_gateway}}" url: /ai-gateway/ + - text: Consumer Credential entity + url: /ai-gateway/entities/consumer-credential/ + - text: Consumer Group entity + url: /ai-gateway/entities/consumer-group/ - text: Model entity url: /ai-gateway/entities/model/ - text: Policy entity @@ -30,18 +34,22 @@ faqs: a: | The runtime entity is a regular Kong Consumer. The {{site.ai_gateway}} surface uses the {{site.ai_gateway}} entity convention (`display_name`, `name`, `labels`), requires an - authentication `type` field, accepts inline Consumer Group assignment, and lets you reference - Policies and embed credentials directly on the Consumer. + authentication `type` field, accepts inline Consumer Group assignment, and lets you + reference Policies. Credentials are managed as a separate sub-entity rather than embedded + on the Consumer. - - q: Can I edit the underlying Kong Consumer that {{site.ai_gateway}} generates? + - q: How do I add credentials to an AI Consumer? a: | - No. The generated Kong Consumer is protected from direct modification through the standard - `/consumers` Admin API. Update the AI Consumer instead. + Credentials are a separate sub-entity, not a field on the Consumer. Create them under the + Consumer's nested credentials endpoint. See the + [Consumer Credential entity](/ai-gateway/entities/consumer-credential/) reference. - - q: How do I add credentials to an AI Consumer? + - q: "What's the difference between `type: api-key` and `type: oauth`?" a: | - For `type: apikey`, set `config.credentials[].api_key` on the Consumer. - Each entry can also set a `ttl` in seconds. + The `type` declares which credential family the Consumer authenticates with. An `api-key` + Consumer holds one or more `api-key` Credentials. An `oauth` Consumer holds one or more + `oauth` Credentials whose `custom_id` maps to the OAuth provider's identifier. The + Credential's `type` must match the Consumer's `type`. - q: Can a Consumer belong to multiple Consumer Groups? a: | @@ -60,7 +68,7 @@ A Consumer is the {{site.ai_gateway}} entity that represents a downstream client You can use Consumers and Consumer Groups to authenticate clients, attach Policies, and gate access to Models, Agents, and MCP Servers through those parent entities' `acls` field. -Consumers are managed through the {{site.ai_gateway}} entity API surface in either deployment modes: +Consumers are managed through the {{site.ai_gateway}} entity API surface in both deployment modes: {% table %} columns: @@ -81,23 +89,12 @@ rows: ## Authentication type -The `type` field declares how the Consumer authenticates to {{site.ai_gateway}}. Supported values are: - -* `apikey` -* `oauth` - - - -## Credentials - -For Consumers of `type: apikey`, set credentials in the Consumer's `config.credentials` array. Each entry has: - -* `api_key`: the API key value the client presents. -* `ttl`: optional time-to-live in seconds. Once elapsed, the credential is no longer valid. +The `type` field declares which credential family the Consumer authenticates with. Supported values are: -## External identity mapping +* `api-key`: the Consumer authenticates with one or more API key Credentials. +* `oauth`: the Consumer authenticates with one or more OAuth Credentials whose `custom_id` maps to the OAuth provider's identifier. -The `config.custom_id` field stores an external identifier for the Consumer, such as an OIDC Client ID. This field is optional and informational. {{site.ai_gateway}} does not use it for authentication or routing. +The `type` of every Credential issued to the Consumer must match the Consumer's `type`. See the [Consumer Credential entity](/ai-gateway/entities/consumer-credential/) reference for credential management. ## Consumer Group membership @@ -107,29 +104,25 @@ Consumer Groups are managed through their own entity surface. See the [Consumer ## Attach Policies -A Policy is an {{site.ai_gateway}} Entity that triggers an action using a plugin. You can attach a Policy to a Consumer and the underlying plugin will run in the request lifecycle when this Consumer is identified. To attach a Policy add the Policy's `name` or `id` to the Consumer's `policies` array. +A Policy is an {{site.ai_gateway}} entity that triggers an action using a plugin. You can attach a Policy to a Consumer and the underlying plugin will run in the request lifecycle when this Consumer is identified. To attach a Policy, add the Policy's `name` or `id` to the Consumer's `policies` array. -You can add multiple Policies to a single Consumer. Each Policy is an independent instance. +You can attach multiple Policies to a single Consumer. Each Policy is an independent instance. For the supported plugin types and how Policies attach to other entities, see the [Policy entity](/ai-gateway/entities/policy/) reference. ## Set up a Consumer -The following example creates an AI Consumer with one API key credential, assigned to a single Consumer Group. +The following example creates an AI Consumer assigned to a single Consumer Group. Credentials are issued separately through the [Consumer Credential entity](/ai-gateway/entities/consumer-credential/). {% entity_example %} type: consumer data: display_name: Mobile App - Production name: mobile-app-production - type: apikey + type: api-key consumer_groups: - internal-teams policies: [] - config: - credentials: - - api_key: - ttl: 86400 {% endentity_example %} ## Schema diff --git a/app/_ai_gateway_entities/vault.md b/app/_ai_gateway_entities/vault.md new file mode 100644 index 0000000000..0e95202d4c --- /dev/null +++ b/app/_ai_gateway_entities/vault.md @@ -0,0 +1,107 @@ +--- +title: AI Vaults +content_type: reference +entities: + - ai-vault +products: + - ai-gateway +description: Vaults for storing and referencing secrets used by {{site.ai_gateway}} entities. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayVault +works_on: + - konnect + - on-prem +tools: + - deck + - admin-api + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Provider entity + url: /ai-gateway/entities/provider/ + - text: Model entity + url: /ai-gateway/entities/model/ + - text: "{{site.base_gateway}} Vault entity" + url: /gateway/entities/vault/ +faqs: + - q: How is an {{site.ai_gateway}} Vault different from a {{site.base_gateway}} Vault? + a: | + The runtime entity is the same secret-management abstraction. The {{site.ai_gateway}} surface + manages Vaults through the AI entity convention (`display_name`, `name`, `description`, + `labels`) and exposes them at the `/ai/vaults` API alongside the other AI entities. + + - q: Which secret backends are supported? + a: | + The `type` field selects the backend: `env`, `aws`, `gcp`, `azure`, `conjur`, or `hcv`. + Each type carries its own `config` shape. HashiCorp Vault (`hcv`) further selects an + `auth_method` from `token`, `cert`, `jwt`, `approle`, `kubernetes`, `gcp_iam`, `gcp_gce`, + `aws_ec2`, `aws_iam`, or `azure`. + + - q: How are Vault secrets referenced from other {{site.ai_gateway}} entities? + a: | + Sensitive fields on Provider, Model, MCP Server, and other entities are annotated as + referenceable. Set those fields to a vault reference string (for example, a `{vault://...}` + placeholder) instead of a literal value. The Vault `name` is the lookup key. + + - q: What does `name` control? + a: | + `name` is a user-defined unique identifier and the stable handle used to look up the Vault + configuration when other entities reference secrets. Renaming a Vault breaks any reference + pointing at the old value. +--- + +## What is a Vault? + +A Vault is a first-class {{site.ai_gateway}} entity that registers a secret-management backend so that other entities (Providers, Models, MCP Servers) can reference secrets instead of embedding values directly. + +A Vault entity stores the connection configuration and credentials needed to reach the backend. {{site.ai_gateway}} resolves vault references against the registered Vaults at request time. + +Vaults can be created and managed through {{site.konnect_short_name}}, the on-prem Admin API, decK, or the {{site.konnect_short_name}} UI: + +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/vaults + - deployment: On-prem + cp: Admin API + endpoint: /ai/vaults +{% endtable %} + +## Backends + +Each Vault selects one of the supported secret backends — environment variables, AWS Secrets Manager, Google Secret Manager, Azure Key Vault, CyberArk Conjur, or HashiCorp Vault. The connection details vary per backend; the {{site.konnect_short_name}} UI surfaces the relevant fields based on the backend you choose. + +HashiCorp Vault additionally supports several authentication methods (token, AppRole, JWT, Kubernetes, AWS, GCP, Azure, and others). See the [{{site.base_gateway}} Vault entity](/gateway/entities/vault/) for backend-specific guidance that applies to both deployment modes. + +## Caching + +Cloud-backed vault types (`aws`, `gcp`, `azure`, `conjur`, `hcv`) cache resolved secrets so that {{site.ai_gateway}} doesn't hit the backend on every reference. Cache duration, negative-lookup caching, and how long expired secrets stay in use during backend outages are all tunable. The `env` type doesn't cache because environment-variable lookups don't hit the network. + +## Set up a Vault + +The following example registers an environment-variable vault that resolves references against process environment variables prefixed with `KONG_`. + +{% entity_example %} +type: vault +data: + display_name: Production Env Vault + name: prod-env-vault + description: Vault for production secrets sourced from environment variables. + type: env + config: + prefix: KONG_ +{% endentity_example %} + +## Schema + +{% entity_schema %} From 0dd3b9d72f03066e74f0cc22b73ebc4a3dfc9ef7 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Tue, 5 May 2026 09:13:10 +0200 Subject: [PATCH 38/53] Appease vale --- .../components/entity_example/format/ui_ai.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/app/_includes/components/entity_example/format/ui_ai.md b/app/_includes/components/entity_example/format/ui_ai.md index a7b461b420..4e9bbb75e2 100644 --- a/app/_includes/components/entity_example/format/ui_ai.md +++ b/app/_includes/components/entity_example/format/ui_ai.md @@ -4,7 +4,7 @@ The following creates a new AI Provider. Suggested values are shown in backticks: 1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. -1. Select an AI Gateway. +1. Select an {{site.ai_gateway}}. 1. Navigate to **Providers**. 1. Click **New Provider**. 1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). @@ -15,7 +15,7 @@ The following creates a new AI Provider. Suggested values are shown in backticks The following creates a new AI Policy. Suggested values are shown in backticks: 1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. -1. Select an AI Gateway. +1. Select an {{site.ai_gateway}}. 1. Navigate to **Policies**. 1. Click **New Policy**. 1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). @@ -26,7 +26,7 @@ The following creates a new AI Policy. Suggested values are shown in backticks: The following creates a new AI Consumer. Suggested values are shown in backticks: 1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. -1. Select an AI Gateway. +1. Select an {{site.ai_gateway}}. 1. Navigate to **Consumers**. 1. Click **New Consumer**. 1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). @@ -37,7 +37,7 @@ The following creates a new AI Consumer. Suggested values are shown in backticks The following creates a new AI Consumer Group. Suggested values are shown in backticks: 1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. -1. Select an AI Gateway. +1. Select an {{site.ai_gateway}}. 1. Navigate to **Credentials**. 1. Select the **Groups** tab. 1. Click **New Group**. @@ -48,7 +48,7 @@ The following creates a new AI Consumer Group. Suggested values are shown in bac The following creates a new AI Model. Suggested values are shown in backticks: 1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. -1. Select an AI Gateway. +1. Select an {{site.ai_gateway}}. 1. Navigate to **Models**. 1. Click **New Model**. 1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). @@ -59,7 +59,7 @@ The following creates a new AI Model. Suggested values are shown in backticks: The following creates a new AI Agent. Suggested values are shown in backticks: 1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. -1. Select an AI Gateway. +1. Select an {{site.ai_gateway}}. 1. Navigate to **Agents**. 1. Click **New Agent**. 1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). @@ -69,13 +69,13 @@ The following creates a new AI Agent. Suggested values are shown in backticks: The following creates a new AI MCP Server. Suggested values are shown in backticks: 1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. -1. Select an AI Gateway. +1. Select an {{site.ai_gateway}}. 1. Navigate to **MCP Servers**. 1. Click **New MCP Server**. 1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). 1. Configure endpoint/auth settings and optional policies. 1. Click **Create**. {% else %} -UI instructions are not yet available for this AI Gateway entity type. +UI instructions are not yet available for this {{site.ai_gateway}} entity type. {% endcase %} {% endif %} From 0b137cc72257ca96c911bb98943dd42bd1367c47 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Tue, 5 May 2026 09:26:25 +0200 Subject: [PATCH 39/53] appease vale --- app/_ai_gateway_entities/consumer-credential.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/_ai_gateway_entities/consumer-credential.md b/app/_ai_gateway_entities/consumer-credential.md index 152d486615..f9b531bf19 100644 --- a/app/_ai_gateway_entities/consumer-credential.md +++ b/app/_ai_gateway_entities/consumer-credential.md @@ -92,9 +92,9 @@ The `api_key` field is write-only and cannot be retrieved after creation. Treat ## Lifecycle -Each Credential has its own UUID and is independently listable, gettable, and deletable through the nested endpoints under its parent Consumer. There's no PUT operation: rotation is an explicit "create new, delete old" flow, which avoids long-lived stale references. +Each Credential has its own UUID and supports independent list, get, and delete operations through the nested endpoints under its parent Consumer. There is no `PUT` operation: rotation is an explicit "create new, delete old" flow, which avoids long-lived stale references. -Deleting a Credential immediately stops authenticating that key. Deleting the parent Consumer removes all of its Credentials. +Deleting a Credential immediately stops it from authenticating. Deleting the parent Consumer removes all of its Credentials. ## Set up an API key Credential From 4fda7473b7256e0e6de9b0dec83b258d14d86b95 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Wed, 6 May 2026 07:41:22 +0200 Subject: [PATCH 40/53] update --- .../data-plane-certificate.md | 100 ++++++++++++++++++ app/_landing_pages/ai-gateway/entities.yaml | 22 ++-- 2 files changed, 107 insertions(+), 15 deletions(-) create mode 100644 app/_ai_gateway_entities/data-plane-certificate.md diff --git a/app/_ai_gateway_entities/data-plane-certificate.md b/app/_ai_gateway_entities/data-plane-certificate.md new file mode 100644 index 0000000000..06b8eb3ebd --- /dev/null +++ b/app/_ai_gateway_entities/data-plane-certificate.md @@ -0,0 +1,100 @@ +--- +title: AI Data Plane Certificates +content_type: reference +entities: + - ai-data-plane-certificate +products: + - ai-gateway +description: Client certificates that authorize data planes to connect to an {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayDataPlaneClientCertificate +works_on: + - konnect +tools: + - konnect-api + - terraform +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Provider entity + url: /ai-gateway/entities/provider/ + - text: Vault entity + url: /ai-gateway/entities/vault/ +faqs: + - q: Why is there no update operation? + a: | + The certificate body is immutable once registered. To rotate, register a new Data Plane + Certificate alongside the existing one, roll the data planes onto the new certificate, then + delete the old entry. This pattern avoids a window where no certificate is installed. + + - q: What happens to connected data planes when a certificate is deleted? + a: | + Any data plane currently connecting with the deleted certificate loses its trust anchor and + can no longer establish a connection to the {{site.ai_gateway}}. Roll data planes onto a + replacement certificate before deleting the old one. + + - q: Is the private key stored alongside the certificate? + a: | + No. Only the public certificate is registered with the {{site.ai_gateway}}. The corresponding + private key stays on the data plane and is never sent to {{site.konnect_short_name}}. + + - q: Can the same certificate be used by multiple data planes? + a: | + Yes. Any data plane provisioned with the registered certificate and its private key can + establish a connection. Use multiple certificates when you need to revoke trust for a subset + of data planes independently. + + - q: How does this relate to the {{site.base_gateway}} data plane client certificate? + a: | + It plays the same role — establishing mutual TLS between the control plane and a data plane — + but it is scoped to a single {{site.ai_gateway}} instance and managed through the + {{site.ai_gateway}} entity surface, not the {{site.konnect_short_name}} Gateway control plane API. +--- + +## What is a Data Plane Certificate? + +A Data Plane Certificate is an {{site.ai_gateway}} entity that registers a public X.509 certificate as a trusted client identity for an {{site.ai_gateway}}. Data planes presenting the matching private key during the mTLS handshake are allowed to connect; data planes without a matching registered certificate are rejected. + +Each Data Plane Certificate belongs to exactly one {{site.ai_gateway}}. An {{site.ai_gateway}} can have multiple registered certificates so that you can issue one per data plane fleet, rotate keys without downtime, or revoke trust for a subset of data planes independently. + +Data Plane Certificates are managed through the {{site.konnect_short_name}} {{site.ai_gateway}} API, the {{site.konnect_short_name}} UI, or Terraform: + +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/data-plane-certificates +{% endtable %} + +There is no on-prem equivalent for this entity. Self-managed {{site.base_gateway}} deployments use the existing [`/certificates`](/gateway/entities/certificate/) and node configuration mechanisms instead. + +## Trust model + +The {{site.ai_gateway}} acts as the control plane in a CP/DP topology. Each data plane presents a client certificate during the TLS handshake, and the {{site.ai_gateway}} accepts the connection only if the presented certificate matches one that has been registered as a Data Plane Certificate on that {{site.ai_gateway}}. + +Only the public certificate is registered with the {{site.ai_gateway}}. The private key is generated and held on the data plane side; it never leaves the data plane host. + +## Lifecycle + +Data Plane Certificates support create, list, get, and delete operations. There is no update endpoint — the certificate body is immutable. + +To rotate a certificate without downtime: + +1. Register the new certificate as an additional Data Plane Certificate on the {{site.ai_gateway}}. +1. Reconfigure the data planes to present the new certificate and key. +1. Verify that data planes have reconnected with the new identity. +1. Delete the old Data Plane Certificate. + +Deleting a Data Plane Certificate immediately invalidates the trust for any data plane still using it. Existing connections are dropped and reconnect attempts using the deleted certificate are rejected. + +## Schema + +{% entity_schema %} diff --git a/app/_landing_pages/ai-gateway/entities.yaml b/app/_landing_pages/ai-gateway/entities.yaml index ca13364a1d..4119129ce8 100644 --- a/app/_landing_pages/ai-gateway/entities.yaml +++ b/app/_landing_pages/ai-gateway/entities.yaml @@ -86,24 +86,16 @@ rows: - blocks: - type: card config: - title: Vault + title: AI Vault description: Store and reference secrets used by AI Gateway entities and plugins. cta: - text: Vault entity - url: /gateway/entities/vault/ + text: AI Vault entity + url: /ai-gateway/entities/vault/ - blocks: - type: card config: - title: Key - description: Manage cryptographic key material used by AI Gateway security workflows. + title: AI Data Plane Certificate + description: Public client certificates that authorize data planes to establish mTLS connections to an AI Gateway. cta: - text: Key entity - url: /gateway/entities/key/ - - blocks: - - type: card - config: - title: Key Set - description: Group keys for rotation and key management in AI Gateway configurations. - cta: - text: Key Set entity - url: /gateway/entities/key-set/ + text: AI Data Plane Certificate entity + url: /ai-gateway/entities/data-plane-certificate/ From 1e7a6b343902c272bfdd01a3650d9822c23e3715 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 7 May 2026 13:19:36 +0200 Subject: [PATCH 41/53] Add main entity reference wip --- app/_ai_gateway_entities/ai-gateway.md | 120 +++++++++++++++++++++++++ app/_ai_gateway_entities/model.md | 8 +- 2 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 app/_ai_gateway_entities/ai-gateway.md diff --git a/app/_ai_gateway_entities/ai-gateway.md b/app/_ai_gateway_entities/ai-gateway.md new file mode 100644 index 0000000000..37c07c504f --- /dev/null +++ b/app/_ai_gateway_entities/ai-gateway.md @@ -0,0 +1,120 @@ +--- +title: AI Gateways +content_type: reference +entities: + - ai-gateway +products: + - ai-gateway +description: The top-level {{site.ai_gateway}} entity that owns Models, Providers, Policies, Agents, MCP Servers, and other AI-specific entities. +schema: + api: konnect/ai-gateway + path: /schemas/AIGateway +works_on: + - konnect +related_resources: + - text: About {{site.ai_gateway}} + url: /ai-gateway/ + - text: "{{site.ai_gateway}} entities" + url: /ai-gateway/entities/ + - text: Model entity + url: /ai-gateway/entities/model/ + - text: Provider entity + url: /ai-gateway/entities/provider/ + - text: Policy entity + url: /ai-gateway/entities/policy/ + - text: Data Plane Certificate entity + url: /ai-gateway/entities/data-plane-certificate/ +faqs: + - q: How is an AI Gateway different from a {{site.konnect_short_name}} Gateway control plane? + a: | + An AI Gateway is a dedicated control plane purpose-built for AI traffic. It exposes its own + entity surface (Models, Providers, Policies, Agents, MCP Servers, and so on) and its own + data plane runtime. It doesn't share entities or data planes with a regular + {{site.konnect_short_name}} Gateway control plane. + + - q: Can I run more than one AI Gateway in an organization? + a: | + Yes. An organization can hold multiple AI Gateway entities. Each one has its own + configuration and telemetry endpoints, its own set of child entities, and its own + data planes. + + - q: What does `config_hash` represent? + a: | + `config_hash` is a hash of the AI Gateway's latest configuration, including all of its + child entities. It changes any time something under the AI Gateway is created, updated, + or deleted. Compare it to the `config_hash` reported by a data plane node to check whether + the node has the current configuration. + + - q: What happens to child entities when I delete an AI Gateway? + a: | + Deleting an AI Gateway removes the entity. Its child entities (Models, Providers, Policies, + Agents, MCP Servers, Vaults, Consumers, Consumer Groups, and Data Plane Certificates) are + tied to the AI Gateway and are not addressable without it. + + - q: Is the AI Gateway entity available on-prem? + a: | + No. The AI Gateway entity is a {{site.konnect_short_name}} concept. On-prem deployments + manage the same child entities (Models, Providers, Policies, and so on) directly through + the Admin API, without a parent `ai-gateways/{id}` container. +--- + +## What is an AI Gateway? + +An AI Gateway is the top-level {{site.ai_gateway}} entity. It's a dedicated control plane for AI traffic — separate from a regular {{site.konnect_short_name}} Gateway control plane — that owns the entities {{site.ai_gateway}} uses to serve LLM and agent workloads: + +1. [Models](/ai-gateway/entities/model/) — AI model endpoints, capabilities, and load balancing. +1. [Providers](/ai-gateway/entities/provider/) — upstream LLM service connections and credentials. +1. [Policies](/ai-gateway/entities/policy/) — security, rate limiting, and guardrail behavior attached to other entities. +1. [Agents](/ai-gateway/entities/agent/) — A2A and HTTP agent routing. +1. [MCP Servers](/ai-gateway/entities/mcp-server/) — MCP tool exposure and session handling. +1. [Vaults](/ai-gateway/entities/vault/) — secret storage referenced from other entities. +1. [Consumers](/ai-gateway/entities/consumer/), [Consumer Groups](/ai-gateway/entities/consumer-group/), [Consumer Credentials](/ai-gateway/entities/consumer-credential/) — identities used in access control. +1. [Data Plane Certificates](/ai-gateway/entities/data-plane-certificate/) — certificates that authorize data plane nodes to connect. + +Every other {{site.ai_gateway}} entity is created under an AI Gateway and addressed through its ID: + +{% table %} +columns: + - title: Surface + key: surface + - title: Endpoint + key: endpoint +rows: + - surface: AI Gateway + endpoint: /v1/ai-gateways + - surface: Child entities + endpoint: /v1/ai-gateways/{aiGatewayId}/{entity} +{% endtable %} + +## Endpoints + +When an AI Gateway is created, {{site.ai_gateway}} provisions two endpoints that data planes connect to: + +1. **Configuration endpoint** (`endpoints.configuration`) — the URL data plane nodes use to receive their configuration from the control plane. +1. **Telemetry endpoint** (`endpoints.telemetry`) — the URL data plane nodes use to ship analytics and runtime telemetry back to {{site.konnect_short_name}}. + +Both endpoints are read-only, assigned at creation time, and stable for the lifetime of the AI Gateway. Data plane nodes need both URLs, along with a [Data Plane Certificate](/ai-gateway/entities/data-plane-certificate/), to register with the AI Gateway. + +## Configuration hash + +`config_hash` is a read-only field that {{site.ai_gateway}} updates every time anything under the AI Gateway changes — a new Model, an updated Policy, a deleted Provider, and so on. Each data plane node reports back the `config_hash` of the configuration it's running. The two values match when the node is in sync with the control plane. + +Use `config_hash` to verify rollout: after a configuration change, watch the node `config_hash` (through [List Nodes](/ai-gateway/entities/data-plane-certificate/) or the {{site.konnect_short_name}} UI) until every node reports the AI Gateway's current value. + +## Labels + +`labels` is a free-form `key: value` map for organization. Use it to tag AI Gateways by environment (`env: production`), team ownership, cost center, or any other dimension you filter on. Labels don't affect runtime behavior. + +## Lifecycle + +AI Gateways are created and managed through the {{site.konnect_short_name}} UI. Once an AI Gateway exists, its child entities (Models, Providers, Policies, and so on) are managed through the {{site.ai_gateway}} API, Terraform, or decK as documented on each entity page. + +Creating an AI Gateway provisions the configuration and telemetry endpoints and gives you the parent ID needed to create child entities. The AI Gateway has no runtime traffic of its own — traffic flows once at least one Model, Agent, or MCP Server is configured under it and a data plane node is connected. + +Updating an AI Gateway changes its `name`, `description`, or `labels`. Endpoints and `config_hash` are managed by {{site.ai_gateway}} and can't be set directly. + +Deleting an AI Gateway removes the entity. Its child entities are scoped to the AI Gateway and can't be addressed without it. + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 3bcd34a5ad..1f634249f7 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -129,9 +129,13 @@ Not every provider supports every capability. The set of capabilities you can de ## Target models and load balancing -A Model's `target_models` field lists one or more upstream provider model instances. For each entry, you provide the upstream model name (for example, `gpt-4o`) and reference the Provider to use by its `name`. Each target can also override settings such as `temperature`, `max_tokens`, `input_cost`, and `output_cost`. +A Model is a virtual model: it exposes one route (`config.route`) and one set of capabilities, and routes requests to one or more concrete upstream models declared in its `target_models` array. Each entry in `target_models` represents a single upstream model instance with one URL. -When a Model has more than one target, requests are load-balanced according to `config.balancer`. For the supported algorithms, configuration options, and tuning guidance, see [Load balancing with AI Proxy Advanced](/ai-gateway/load-balancing/). +For each target, you provide the upstream model name (for example, `gpt-4o`) and reference the Provider to use by its `name`. Each target can also override settings such as `temperature`, `max_tokens`, `input_cost`, and `output_cost`. + +There's no separate Target Model entity or endpoint. Target models are managed only as nested data inside a Model — through the same Model API surface used to create, update, and delete the parent. Adding, removing, or modifying a target is an update to the Model itself. + +When a Model has more than one target, the load balancer sits between the virtual model and its targets, distributing requests according to `config.balancer`. For the supported algorithms, configuration options, and tuning guidance, see [Load balancing with AI Proxy Advanced](/ai-gateway/load-balancing/). ## Access control From 75f2b8ead3cf93fa1ce739517ec531d2fe9dc678 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 7 May 2026 14:23:40 +0200 Subject: [PATCH 42/53] Add WIP content for agents entity --- app/_ai_gateway_entities/agent.md | 272 ++++++++++++++++++ .../components/entity_example/format/ui_ai.md | 4 +- 2 files changed, 275 insertions(+), 1 deletion(-) diff --git a/app/_ai_gateway_entities/agent.md b/app/_ai_gateway_entities/agent.md index 6256bf3a2f..cdefb5030d 100644 --- a/app/_ai_gateway_entities/agent.md +++ b/app/_ai_gateway_entities/agent.md @@ -16,8 +16,280 @@ tools: - deck - admin-api - konnect-api +related_resources: + - text: About {{site.ai_gateway}} + url: /ai-gateway/ + - text: "{{site.ai_gateway}} entities" + url: /ai-gateway/entities/ + - text: Policy entity + url: /ai-gateway/entities/policy/ + - text: Consumer Group entity + url: /ai-gateway/entities/consumer-group/ + - text: A2A protocol specification + url: https://a2aproject.github.io/A2A/ +faqs: + - q: What's the difference between an `a2a` Agent and an `http` Agent? + a: | + An `a2a` Agent applies Agent-to-Agent protocol awareness (JSON-RPC and REST binding detection, + agent-card URL rewriting, structured A2A telemetry) to traffic flowing to an upstream agent. + An `http` Agent is a generic HTTP route to an upstream agent without A2A-specific processing. + Use `a2a` when the upstream speaks the A2A protocol and you want observability tied to A2A + task and message semantics. + + - q: Does the Agent entity modify request routing or aggregate responses? + a: | + No. The runtime behind an Agent operates as a transparent proxy. It detects A2A requests, + records telemetry, and rewrites agent-card URLs to the gateway address. It does not change + routing decisions, merge responses, or hold task state on behalf of clients. + + - q: Why is the agent-card URL rewritten? + a: | + A2A clients use agent-card responses (at `/.well-known/agent-card.json`) to discover where to + send subsequent requests. Rewriting the `url` field, and any `additionalInterfaces[].url` + fields, to the {{site.ai_gateway}} address means clients route follow-up traffic through the + gateway instead of bypassing it. The rewrite honors `X-Forwarded-*` headers when the gateway + sits behind a load balancer. + + - q: How does streaming work? + a: | + Server-sent events (`Content-Type: text/event-stream`) pass through chunk-by-chunk without + buffering. The runtime counts SSE events, captures time-to-first-byte, and extracts task state + from the final event for analytics. Latency is preserved. + + - q: How do I limit which consumers can reach an Agent? + a: | + Set the `acls` field on the Agent with allow or deny lists. Each entry is a string that + references a Consumer, Consumer Group, or Authenticated Group by name. + + - q: Can the same plugin run on an Agent that I'd attach to a route or service? + a: | + Plugin configuration that applies to the Agent goes through the [Policy entity](/ai-gateway/entities/policy/). + Attach Policies to the Agent through its `policies` field. --- +## What is an Agent? + +An Agent is a first-class {{site.ai_gateway}} entity that represents an upstream agent endpoint exposed through {{site.ai_gateway}}. An Agent has a type, either `a2a` for [Agent-to-Agent protocol](https://a2aproject.github.io/A2A/) traffic or `http` for generic HTTP agent routing, plus configuration that points {{site.ai_gateway}} at the upstream and shapes how requests flow. + +For `a2a` Agents, the runtime adds protocol-aware behavior on top of plain proxying: it detects A2A requests across both JSON-RPC and REST bindings, rewrites agent-card URLs so clients discover the gateway as the canonical endpoint, and emits structured A2A telemetry to {{site.konnect_short_name}} analytics and OpenTelemetry. For `http` Agents, requests are proxied without A2A-specific processing. + +Agents can be created and managed through {{site.konnect_short_name}}, the on-prem Admin API, decK, or the {{site.konnect_short_name}} UI: + +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/agents + - deployment: On-prem + cp: Admin API + endpoint: /ai/agents +{% endtable %} + +## How A2A traffic flows + +When an Agent has type `a2a`, the runtime processes traffic in four phases: + +1. **Access**. Detects whether the request is an A2A operation (JSON-RPC or REST binding). When statistics logging is enabled, starts an OpenTelemetry span and records the request body for payload logging if that's enabled too. +1. **Header filter**. Detects streaming responses (`Content-Type: text/event-stream`) and records time to first byte. Buffers agent-card responses for URL rewriting. +1. **Body filter**. Streams SSE chunks through to the client without buffering. Buffers non-streaming responses to extract task metadata. Rewrites agent-card URLs to the gateway address. Emits analytics at end of response. +1. **Log**. Finalizes the OpenTelemetry span with task state, task ID, and any error information. + +Non-A2A traffic, and traffic to `http` Agents, is proxied without these steps. + + +{% mermaid %} +sequenceDiagram + autonumber + participant Client as A2A Client + participant Gateway as {{site.ai_gateway}}
(Agent) + participant Agent as Upstream A2A Agent + + Client->>Gateway: A2A request (JSON-RPC or REST) + Note over Gateway: Detect A2A binding and method
Start OTel span (if logging enabled) + + Gateway->>Agent: Proxied request
(Accept-Encoding removed if logging enabled) + + alt Streaming response (SSE) + Agent-->>Gateway: text/event-stream chunks + Note over Gateway: Pass through each chunk
Count SSE events, track TTFB + Gateway-->>Client: SSE chunks (unchanged) + Note over Gateway: On final chunk:
Extract task state, set analytics + else Non-streaming response + Agent->>Gateway: JSON response + Note over Gateway: Buffer response
Extract task metadata + Gateway->>Client: Response (unchanged) + end + + Note over Gateway: Finish OTel span
Emit ai.a2a metrics to log plugins +{% endmermaid %} + + +## Core A2A protocol elements + +A2A defines the communication elements between agents. The runtime surfaces data tied to these elements in log output and OpenTelemetry spans for `a2a` Agents. + +{% table %} +columns: + - title: Element + key: element + - title: Description + key: description + - title: Purpose + key: purpose +rows: + - element: Agent Card + description: A JSON metadata document describing an agent's identity, capabilities, endpoint, skills, and authentication requirements. + purpose: Enables clients to discover agents and understand how to interact with them. + - element: Task + description: A stateful unit of work initiated by an agent, with a unique ID and defined lifecycle. + purpose: Tracks long-running operations and supports multi-turn interactions. + - element: Message + description: A single turn of communication between a client and an agent, containing content and a role (`user` or `agent`). + purpose: Conveys instructions, context, questions, answers, or status updates that are not formal artifacts. + - element: Part + description: The fundamental content container (for example, `TextPart`, `FilePart`, `DataPart`) used within messages and artifacts. + purpose: Provides flexibility for agents to exchange different content types within messages and artifacts. + - element: Artifact + description: A tangible output generated by an agent during a task (for example, a document, image, or structured data). + purpose: Carries the concrete output of a task in a structured, retrievable form. +{% endtable %} + +## Protocol detection + +A2A traffic is auto-detected per request. There's no per-route opt-in, and non-A2A traffic passes through without overhead. + +**REST binding.** Detection anchors to the end of the request path, so any prefix added by the route is ignored. For example, both `/v1/message:send` and `/api/agents/v1/message:send` match `SendMessage`: + + +{% table %} +columns: + - title: HTTP method + key: method + - title: Path suffix + key: path + - title: A2A operation + key: operation + - title: Canonical method + key: canonical +rows: + - method: "`POST`" + path: "`/v1/message:send`" + operation: SendMessage + canonical: "`message/send`" + - method: "`POST`" + path: "`/v1/message:stream`" + operation: SendStreamingMessage + canonical: "`message/stream`" + - method: "`GET`" + path: "`/.well-known/agent-card.json`" + operation: GetAgentCard + canonical: "`agent/getCard`" + - method: "`GET`" + path: "`/v1/extendedAgentCard`" + operation: GetExtendedAgentCard + canonical: "`agent/getExtendedAgentCard`" + - method: "`GET`" + path: "`/v1/tasks/{id}`" + operation: GetTask + canonical: "`tasks/get`" + - method: "`GET`" + path: "`/v1/tasks`" + operation: ListTasks + canonical: "`tasks/list`" + - method: "`POST`" + path: "`/v1/tasks/{id}:cancel`" + operation: CancelTask + canonical: "`tasks/cancel`" + - method: "`POST`" + path: "`/v1/tasks/{id}:subscribe`" + operation: SubscribeToTask + canonical: "`tasks/resubscribe`" + - method: "`POST`" + path: "`/v1/tasks`" + operation: ListTasks + canonical: "`tasks/list`" +{% endtable %} + + +The canonical method name is what appears in OpenTelemetry span attributes and log output. + +**JSON-RPC binding.** Detected by the `"jsonrpc"` field in the request body, combined with a recognized A2A method name or an `A2A-Version` request header. Recognized methods include `message/send`, `message/stream`, `tasks/get`, `tasks/list`, `tasks/cancel`, `tasks/resubscribe`, the `tasks/pushNotificationConfig/*` family, and `agent/getExtendedAgentCard`. + +A request carrying an `A2A-Version` header is treated as JSON-RPC even if the method isn't in the recognized list. When an unknown method is accepted this way, the `method` field in log output is recorded as `"unknown"` to bound metric cardinality. The OpenTelemetry span's `kong.a2a.operation` attribute still receives the actual method name. + +## Agent-card URL rewriting + +When an upstream agent returns an agent card, the runtime rewrites the `url` field, and any `additionalInterfaces[].url` fields, to the {{site.ai_gateway}} address. A2A clients then discover the gateway as the canonical endpoint instead of contacting the upstream directly. The rewrite uses `X-Forwarded-*` headers to construct the correct scheme, host, and port when the gateway is deployed behind a load balancer or reverse proxy. + +## Logging and observability + +Statistics logging records structured A2A telemetry per request: the A2A method, binding type, task state, task ID, context ID, latency, time to first byte (for streaming), SSE event count, and response size. The runtime emits this data into the `ai.a2a` namespace consumed by {{site.konnect_short_name}} analytics and any attached log plugins, and creates a `kong.a2a` child span when {{site.base_gateway}} tracing is configured. + +{:.info} +> When statistics logging is enabled, the runtime removes the `Accept-Encoding` request header +> before forwarding to the upstream. This prevents compressed responses that the runtime can't +> parse for metadata extraction. + +Payload logging additionally captures request and response bodies. Payloads are truncated at the configured payload size limit. Enable with care. Payload logging may expose sensitive data. + +You can view A2A analytics in {{site.konnect_short_name}} Explorer and Dashboards through the [Agentic usage analytics](/observability/explorer/?tab=agentic-usage#metrics) view. + +### Log output fields + +{% include /plugins/ai-a2a-proxy/log-output-fields.md %} + +### OpenTelemetry span attributes + +When statistics logging is enabled and {{site.base_gateway}} tracing is configured, the runtime creates a `kong.a2a` child span with the following attributes: + +{% include /plugins/ai-a2a-proxy/otel-span-attributes.md %} + +### Task states + +Task state values surfaced in logs and spans are normalized to lowercase A2A spec format, regardless of the upstream SDK version: `submitted`, `working`, `input-required`, `completed`, `canceled`, `failed`, `rejected`, `auth-required`, `unknown`. + +## Access control + +The `acls` field controls which identities are allowed to reach the Agent. The field accepts `allow` and `deny` lists. Each entry is a string that references a Consumer, Consumer Group, or Authenticated Group by name. Access is enforced before traffic reaches the upstream agent. + +For per-request authentication and identity, attach an authentication Policy to the Agent. + +## Attach Policies + +Policies are how plugin configurations apply to an Agent. Attach them through the Agent's `policies` field. Each entry is a string that references a Policy by name or ID. Multiple Policies can attach to one Agent; each runs as an independent plugin instance. + +For details, see the [Policy entity](/ai-gateway/entities/policy/) reference. + +## Set up an Agent + +The following example creates an `a2a` Agent that proxies traffic to an upstream A2A agent at `https://booking-agent.internal.kongair.com`, with statistics logging enabled and access restricted to the `internal-teams` Consumer Group. + +{% entity_example %} +type: agent +data: + display_name: KongAir Flight Booking Agent + name: kongair-flight-booking-agent + type: a2a + acls: + allow: + - internal-teams + deny: [] + policies: [] + config: + url: https://booking-agent.internal.kongair.com + logging: + statistics: true + payloads: false + max_payload_size: 524288 +{% endentity_example %} + ## Schema {% entity_schema %} diff --git a/app/_includes/components/entity_example/format/ui_ai.md b/app/_includes/components/entity_example/format/ui_ai.md index 4e9bbb75e2..d68e48f874 100644 --- a/app/_includes/components/entity_example/format/ui_ai.md +++ b/app/_includes/components/entity_example/format/ui_ai.md @@ -63,7 +63,9 @@ The following creates a new AI Agent. Suggested values are shown in backticks: 1. Navigate to **Agents**. 1. Click **New Agent**. 1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). -1. Configure the agent settings, model/tool references, and optional policies. +1. Select an Agent **Type** (for example: `{{ include.presenter.data['type'] }}`). +1. Enter the upstream Agent **URL** (for example: `{{ include.presenter.data['config']['url'] }}`). +1. Optionally configure logging, max payload size, ACLs, and Policy references. 1. Click **Create**. {% when 'mcp-server' %} The following creates a new AI MCP Server. Suggested values are shown in backticks: From 7f78647886cc7a622ff0f42e32e558646aac0d78 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Fri, 8 May 2026 10:52:05 +0200 Subject: [PATCH 43/53] add mcp-server entity --- app/_ai_gateway_entities/agent.md | 5 + app/_ai_gateway_entities/ai-gateway.md | 5 + .../consumer-credential.md | 5 + app/_ai_gateway_entities/consumer-group.md | 5 + app/_ai_gateway_entities/consumer.md | 5 + .../data-plane-certificate.md | 5 + app/_ai_gateway_entities/mcp-server.md | 497 +++++++++++++++++- app/_ai_gateway_entities/model.md | 5 + app/_ai_gateway_entities/policy.md | 5 + app/_ai_gateway_entities/provider.md | 5 + app/_ai_gateway_entities/vault.md | 5 + 11 files changed, 546 insertions(+), 1 deletion(-) diff --git a/app/_ai_gateway_entities/agent.md b/app/_ai_gateway_entities/agent.md index cdefb5030d..b0bb6c92d6 100644 --- a/app/_ai_gateway_entities/agent.md +++ b/app/_ai_gateway_entities/agent.md @@ -5,6 +5,11 @@ entities: - ai-agent products: - ai-gateway +min_version: + ai-gateway: '2.0.0' +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ description: Agent entity used by {{site.ai_gateway}} for A2A and HTTP agent configurations. schema: api: konnect/ai-gateway diff --git a/app/_ai_gateway_entities/ai-gateway.md b/app/_ai_gateway_entities/ai-gateway.md index 37c07c504f..9f543151c7 100644 --- a/app/_ai_gateway_entities/ai-gateway.md +++ b/app/_ai_gateway_entities/ai-gateway.md @@ -5,6 +5,11 @@ entities: - ai-gateway products: - ai-gateway +min_version: + ai-gateway: '2.0.0' +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ description: The top-level {{site.ai_gateway}} entity that owns Models, Providers, Policies, Agents, MCP Servers, and other AI-specific entities. schema: api: konnect/ai-gateway diff --git a/app/_ai_gateway_entities/consumer-credential.md b/app/_ai_gateway_entities/consumer-credential.md index f9b531bf19..286ae81be2 100644 --- a/app/_ai_gateway_entities/consumer-credential.md +++ b/app/_ai_gateway_entities/consumer-credential.md @@ -5,6 +5,11 @@ entities: - ai-consumer-credential products: - ai-gateway +min_version: + ai-gateway: '2.0.0' +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ description: Credentials issued to AI Consumers for authenticating to {{site.ai_gateway}}. schema: api: konnect/ai-gateway diff --git a/app/_ai_gateway_entities/consumer-group.md b/app/_ai_gateway_entities/consumer-group.md index 474e686563..9dd52d575d 100644 --- a/app/_ai_gateway_entities/consumer-group.md +++ b/app/_ai_gateway_entities/consumer-group.md @@ -5,6 +5,11 @@ entities: - ai-consumer-group products: - ai-gateway +min_version: + ai-gateway: '2.0.0' +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ description: Consumer Groups for {{site.ai_gateway}}. schema: api: konnect/ai-gateway diff --git a/app/_ai_gateway_entities/consumer.md b/app/_ai_gateway_entities/consumer.md index 4c9eaf9eb5..317c76e5ff 100644 --- a/app/_ai_gateway_entities/consumer.md +++ b/app/_ai_gateway_entities/consumer.md @@ -5,6 +5,11 @@ entities: - ai-consumer products: - ai-gateway +min_version: + ai-gateway: '2.0.0' +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ description: "Consumers for {{site.ai_gateway}}." schema: api: konnect/ai-gateway diff --git a/app/_ai_gateway_entities/data-plane-certificate.md b/app/_ai_gateway_entities/data-plane-certificate.md index 06b8eb3ebd..afcd38ccfd 100644 --- a/app/_ai_gateway_entities/data-plane-certificate.md +++ b/app/_ai_gateway_entities/data-plane-certificate.md @@ -5,6 +5,11 @@ entities: - ai-data-plane-certificate products: - ai-gateway +min_version: + ai-gateway: '2.0.0' +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ description: Client certificates that authorize data planes to connect to an {{site.ai_gateway}}. schema: api: konnect/ai-gateway diff --git a/app/_ai_gateway_entities/mcp-server.md b/app/_ai_gateway_entities/mcp-server.md index 6c7e16b0c5..fd5248c93d 100644 --- a/app/_ai_gateway_entities/mcp-server.md +++ b/app/_ai_gateway_entities/mcp-server.md @@ -5,7 +5,12 @@ entities: - ai-mcp-server products: - ai-gateway -description: MCP server entity used by {{site.ai_gateway}}. +min_version: + ai-gateway: '2.0.0' +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: MCP Server entity used by {{site.ai_gateway}} to expose tools and proxy MCP traffic. schema: api: konnect/ai-gateway path: /schemas/AIGatewayMCPServer @@ -16,8 +21,498 @@ tools: - deck - admin-api - konnect-api +related_resources: + - text: About {{site.ai_gateway}} + url: /ai-gateway/ + - text: "{{site.ai_gateway}} entities" + url: /ai-gateway/entities/ + - text: Policy entity + url: /ai-gateway/entities/policy/ + - text: Consumer Group entity + url: /ai-gateway/entities/consumer-group/ + - text: Kong MCP traffic gateway + url: /mcp/ + - text: Model Context Protocol specification + url: https://modelcontextprotocol.io/ +faqs: + - q: Which MCP protocol version does the runtime use? + a: | + The MCP runtime behind an MCP Server entity speaks MCP protocol version `2025-06-18`. Upstream + MCP servers may run `2025-06-18` or `2025-11-25`. Versions from 2024 are not supported. + + - q: What's the difference between the four server types? + a: | + `passthrough-listener` proxies MCP traffic to an upstream MCP server without converting tools. + `conversion-listener` converts a RESTful API into MCP tools and accepts MCP requests on the + same Route. `conversion-only` defines a tool library that other MCP Servers reference by tag + but doesn't accept incoming MCP traffic itself. `listener` aggregates tools from one or more + `conversion-only` MCP Servers into a single MCP endpoint. + + - q: Can the same Consumer's identity gate access to specific tools? + a: | + Yes. Set `default_tool_acls` on the MCP Server with `allow` and `deny` lists, and override per + tool through `tools[].acls`. A per-tool ACL replaces the default for that tool, it doesn't + merge. + + - q: How do OAuth-based ACLs differ from Consumer-based ACLs? + a: | + Set `acl_attribute_type` to `oauth_access_token` and provide `access_token_claim_field` (a jq + filter, for example `.user.email`). ACLs then evaluate against the claim value extracted from + the OAuth access token instead of the resolved Consumer identity. The OAuth flow is supplied + by the [AI MCP OAuth2 Policy](/plugins/ai-mcp-oauth2/). + + - q: What error code do denied requests return? + a: | + `HTTP 403 Forbidden`. Earlier {{site.ai_gateway}} versions returned the JSON-RPC error code + `INVALID_PARAMS -32602`; from {{site.ai_gateway}} 3.14 onward, denials follow the + [MCP 2025-11-25 authorization specification](https://modelcontextprotocol.io/specification/2025-11-25/basic/authorization#error-handling). + + - q: Can I attach the same authentication or rate-limiting plugin that I'd attach to a Route? + a: | + Plugin configuration that applies to the MCP Server goes through the + [Policy entity](/ai-gateway/entities/policy/). Attach Policies to the MCP Server through its + `policies` field. --- +## What is an MCP Server? + +An MCP Server is a first-class {{site.ai_gateway}} entity that exposes tools to MCP-compatible clients (such as [Insomnia](https://konghq.com/products/kong-insomnia), [Claude](https://claude.ai/), [Cursor](https://cursor.com/), or [LMstudio](https://lmstudio.ai/)) over the [Model Context Protocol](https://modelcontextprotocol.io/). The runtime acts as a protocol bridge, translating between MCP and HTTP so MCP clients can either call existing APIs through {{site.ai_gateway}} or interact with upstream MCP servers. + +Because the runtime executes inside {{site.ai_gateway}}, MCP endpoints are provisioned dynamically on demand. You don't host or scale them separately, and the same authentication, traffic control, and observability features available to traditional API traffic apply to MCP traffic at the same scale. + +MCP Servers can be created and managed through {{site.konnect_short_name}}, the on-prem Admin API, decK, or the {{site.konnect_short_name}} UI: + +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/mcp-servers + - deployment: On-prem + cp: Admin API + endpoint: /ai/mcp-servers +{% endtable %} + +## Apply gateway features to MCP traffic + +An MCP Server brings {{site.ai_gateway}} traffic management, security, and observability features to MCP endpoints. Attach the relevant plugin as a [Policy](/ai-gateway/entities/policy/) on the MCP Server: + + +{% table %} +columns: + - title: Use case + key: use_case + - title: Example + key: example +rows: + - use_case: Authentication + example: | + Apply [OpenID Connect](/plugins/openid-connect/) or the [Key Auth](/plugins/key-auth/) plugin to an MCP Server. + - use_case: Rate limiting + example: | + Use [Rate Limiting](/plugins/rate-limiting/) or [Rate Limiting Advanced](/plugins/rate-limiting-advanced/) to control MCP request volume. + - use_case: Observability + example: | + Add [logging and tracing plugins](/plugins/?category=logging) for full request and response visibility. + - use_case: Traffic control + example: | + Apply [request and response transformation plugins](/plugins/?category=transformations) or [ACL policies](/plugins/acl/). +{% endtable %} + + +## Server modes + +The `type` field selects one of four modes. Each mode determines how the runtime handles MCP requests and whether it converts RESTful APIs into MCP tools. + + +{% table %} +columns: + - title: Mode + key: mode + - title: Description + key: description + - title: Use cases + key: usecase +rows: + - mode: "`passthrough-listener`" + description: | + Listens for incoming MCP requests and proxies them to an upstream MCP server without + converting tools. Generates MCP observability metrics. + usecase: | + You already operate an MCP server and want {{site.ai_gateway}} to act as an authenticated, + observable entrypoint. Common for third-party or internally hosted MCP services exposed + through {{site.ai_gateway}}. + - mode: "`conversion-listener`" + description: | + Converts RESTful API paths into MCP tools and accepts incoming MCP requests on the Route + path. Tools are defined directly on the MCP Server and an optional server block applies. + {% new_in 3.13 %} Supports session identifiers set by authentication services for cookie-based + authentication. + usecase: | + Make an existing REST API available to MCP clients directly through {{site.ai_gateway}}. + Common for services that both define and handle their own tools. + - mode: "`conversion-only`" + description: | + Converts RESTful API paths into MCP tools but does not accept incoming MCP requests. + Tools are tagged at the MCP Server level so a `listener` MCP Server can reference them. + Used together with one or more `listener` MCP Servers. + usecase: | + Define reusable tool specifications without serving them. Suitable for teams that maintain + a shared library of tool definitions. + - mode: "`listener`" + description: | + Similar to `conversion-listener`, but instead of defining its own tools, it binds tools + from one or more `conversion-only` MCP Servers through `config.server.tag`. + usecase: | + A single MCP endpoint that aggregates tools from multiple `conversion-only` MCP Servers. + Typical in multi-service or multi-team environments that expose a unified MCP interface. +{% endtable %} + + +## How MCP traffic flows + +For `conversion-listener`, `conversion-only`, and `listener` modes, the runtime converts MCP requests into HTTP calls and wraps the responses back in MCP format: + +1. Accepts an MCP protocol request from a client. +1. Parses the MCP tool call and matches it to a tool definition. +1. Converts the call into a standard HTTP request. +1. Sends the request to the upstream Service. +1. Wraps the HTTP response in MCP format and returns it to the client. + +For `passthrough-listener` mode, the runtime proxies MCP traffic directly to the upstream MCP server without conversion. + + +{% mermaid %} +sequenceDiagram + participant Client as MCP Client + participant Gateway as {{site.ai_gateway}}
(MCP Server) + participant Upstream as Upstream Service + + Client->>Gateway: MCP request (tool invocation) + activate Gateway + Gateway->>Gateway: Parse MCP payload + Gateway->>Gateway: Map to HTTP endpoint + Gateway->>Upstream: HTTP request + deactivate Gateway + activate Upstream + Upstream-->>Gateway: HTTP response + deactivate Upstream + activate Gateway + Gateway->>Gateway: Convert to MCP format + Gateway-->>Client: MCP response + deactivate Gateway +{% endmermaid %} + + +{:.info} +> Pings from MCP clients are included in the total request count for an {{site.ai_gateway}} +> instance, in addition to requests made to the MCP server itself. + +## Tools + +A [tool](#schema-aigateway-mcpserver-tools) maps an MCP tool name to an upstream HTTP endpoint. Each tool needs at minimum a description and an HTTP method. The runtime extracts the host, path, headers, and query from the route configuration, so most tool entries don't need to specify them. Override these on the tool entry only when the route doesn't match the upstream endpoint exactly. + +For richer mapping, supply [`request_body`](#schema-aigateway-mcpserver-tools-request-body), [`responses`](#schema-aigateway-mcpserver-tools-responses), and [`parameters`](#schema-aigateway-mcpserver-tools-parameters) specifications in OpenAPI JSON format. The runtime uses them to validate calls and shape upstream HTTP requests. + +Tools can also carry MCP-spec [annotations](#schema-aigateway-mcpserver-tools-annotations) that hint at tool behavior to clients (for example, whether a tool is read-only, idempotent, or destructive). Annotations don't change runtime behavior; they help clients decide whether to surface a tool, confirm before invocation, or treat it as safe to retry. + +[Per-tool ACLs](#schema-aigateway-mcpserver-tools-acls) override the MCP Server's [default tool ACLs](#schema-aigateway-mcpserver-default-tool-acls). See [ACL tool control](#acl-tool-control). + +## Sessions + +`listener` and `conversion-listener` MCP Servers support managed sessions for stateful interactions. Configure session storage through `config.server.session`. The `passthrough-listener` mode doesn't use managed sessions because session state lives on the upstream MCP server. + +Two session strategies: + +1. **Client.** Session state is encrypted into the MCP session ID assigned to the client. Requires `secrets` (the encryption keys; the first entry is used for encryption, all entries for decryption to support key rotation). +1. **Redis.** Session state is stored in Redis. Configure connection details and authentication in `config.server.session.redis`. Cloud Redis providers (AWS ElastiCache, Azure, GCP) authenticate through provider-specific blocks under `redis.cloud_authentication`. + +`session_ttl` controls how long sessions live (default 24 hours). Set `managed: false` to disable managed sessions when the upstream maintains state externally. + +Cross-link: secrets used in session encryption can be referenced from a [Vault](/ai-gateway/entities/vault/). + +## Server configuration + +The `config.server` block carries runtime settings that apply across all tools on the MCP Server: + +1. `forward_client_headers` (default `true`). Whether to forward client request headers to the upstream when calling tools. +1. `tag`. A single tag used by `listener` MCP Servers to filter which `conversion-only` tools to expose. +1. `timeout` (default 10 seconds). Maximum time to wait for an upstream tool call. + +`config.max_request_body_size` controls the maximum incoming request body size accepted by the MCP Server (default 1 MB). + +## ACL tool control + +When exposing MCP servers through {{site.ai_gateway}}, you may need granular control over which authenticated API consumers can discover and invoke specific tools. The MCP Server's ACL feature lets you define access rules at both the default level (applying to all tools) and per-tool level (for fine-grained exceptions). + +This way, consumers only interact with tools appropriate to their role, while maintaining a complete audit trail of all access attempts. Authentication is handled by an authentication Policy attached to the MCP Server (such as [Key Auth](/plugins/key-auth/) or an OIDC flow), and the resulting Consumer identity is used for ACL checks. + +{:.info} +> **ACL in `listener` mode** +> +> Listener mode does not support direct ACL configuration. Instead, it inherits ACL rules from tagged `conversion-listener` or `conversion-only` MCP Servers. +> +> To use ACLs with `listener` mode: +> 1. Configure `conversion-listener` or `conversion-only` MCP Servers with ACL rules and tags. +> 1. Configure `listener` mode to aggregate tools by matching tags. +> 1. Set `include_consumer_groups: true` on the listener. Without this setting, the listener cannot pass Consumer Group membership to the aggregated tools, and ACL rules will not evaluate correctly. +> +> See [Enforce ACLs on aggregated MCP servers](/mcp/enforce-acls-on-aggregated-mcp-servers/) for a complete example. + +### Attribute types + +Two attribute types determine what the runtime evaluates against: + +1. **`consumer`** (default). Evaluates against the resolved Consumer identity. +1. **`oauth_access_token`**. Evaluates against a claim extracted from the OAuth access token. Set `access_token_claim_field` to a jq filter (for example, `.user.email` for a nested claim). The OAuth flow itself is supplied by the [AI MCP OAuth2 Policy](/plugins/ai-mcp-oauth2/). + +### Supported identifier types + +When `acl_attribute_type` is `consumer`, ACL rules can reference [Consumers](/gateway/entities/consumer/) and [Consumer Groups](/gateway/entities/consumer-group/) using these identifier types in `allow` and `deny` lists: + +* [`username`](/gateway/entities/consumer/#schema-consumer-username): Consumer username +* [`id`](/gateway/entities/consumer/#schema-consumer-username): Consumer UUID +* [`custom_id`](/gateway/entities/consumer/#schema-consumer-custom-id): Custom Consumer identifier +* [`consumer_groups.name`](/gateway/entities/consumer/#schema-consumer-custom-id): Consumer Group name + +The authenticated Consumer identity is matched against these identifiers. If the [Consumer](/gateway/entities/consumer/) or any of their [Consumer Groups](/gateway/entities/consumer-group/) match an ACL entry, the rule applies. + +### How default and per-tool ACLs work + +The runtime evaluates access using a two-tier system: + + +{% table %} +columns: + - title: ACL type + key: field + - title: Description + key: description +rows: + - field: "`default_tool_acls`" + description: | + Baseline rules that apply to all tools unless overridden. + - field: "`tools[].acls`" + description: | + When configured, these rules replace the default ACL for that specific tool. The per-tool ACL doesn't inherit or merge with `default_tool_acls`. It is an all-or-nothing override. +{% endtable %} + + +{:.info} +> If a tool defines its own ACL, the runtime ignores `default_tool_acls` for that tool: +> +> - Tools with no ACL configuration inherit the default rules (both `allow` and `deny` lists). +> - Tools with an ACL must explicitly list all allowed subjects (even if they were already in `default_tool_acls`). + +### ACL evaluation logic + +Both default and per-tool ACLs use `allow` and `deny` lists. Evaluation follows this order: + +1. **Deny list configuration**. If a `deny` list exists and the subject matches any `deny` entry, the request is rejected (`HTTP 403 Forbidden`). +1. **Allow list configuration**. If an `allow` list exists, the subject must match at least one entry; otherwise, the request is denied (`HTTP 403 Forbidden`). +1. **No allow list configuration**. If no `allow` list exists and the subject is not in `deny`, the request is allowed. +1. **No ACL configuration**. If neither list exists, the request is allowed. + +All access attempts (allowed or denied) are written to the audit log. + +The table below summarizes the possible ACL configurations and their outcomes. + +{% table %} +columns: + - title: Condition + key: condition + - title: "Proxied to upstream service?" + key: proxy + - title: Response code + key: response +rows: + - condition: "Subject matches any `deny` rule" + proxy: No + response: HTTP 403 Forbidden + - condition: "`allow` list exists and subject is not in it" + proxy: No + response: HTTP 403 Forbidden + - condition: "Only `deny` list exists and subject is not in it" + proxy: Yes + response: 200 + - condition: "No ACL rules configured" + proxy: Yes + response: 200 +{% endtable %} + +### ACL tool control request flow + +The runtime evaluates ACLs for both tool discovery and tool invocation. These are two distinct operations with different behaviors: + +**Tool discovery (list tools)**: + +1. MCP client requests the list of available tools. +1. The authentication Policy validates the request and identifies the Consumer. +1. The runtime loads the Consumer's group memberships. +1. The runtime evaluates each tool against `default_tool_acls`. +1. The runtime returns an HTTP 200 response with only the tools the Consumer is allowed to access. +1. The runtime logs the discovery attempt. + +**Tool invocation**: + +1. MCP client invokes a specific tool. +1. The authentication Policy validates the request and identifies the Consumer. +1. The runtime loads the Consumer's group memberships. +1. The runtime evaluates the tool-specific ACL if it exists, or the default ACL otherwise. +1. The runtime logs the access attempt (allowed or denied). +1. The runtime returns `HTTP 403 Forbidden` if denied, or forwards the request to the upstream MCP server if allowed. + + +{% mermaid %} +sequenceDiagram + participant Client as MCP Client + participant Gateway as {{site.ai_gateway}} + participant Auth as AuthN Policy + participant ACL as MCP Server (ACL/Audit) + participant Up as Upstream MCP Server + participant Log as Audit Sink + + %% ----- List Tools ----- + rect + note over Client,Gateway: List Tools (Default ACL Scope) + Client->>Gateway: GET /tools + Gateway->>Auth: Authenticate + Auth-->>Gateway: Consumer identity + Gateway->>ACL: Evaluate scoped default ACL + ACL-->>Log: Audit entry + alt If allowed + Gateway-->>Client: Filtered tool list + else If denied + Gateway-->>Client: HTTP 403 Forbidden + end + end + + %% ----- Tool Invocation ----- + rect + note over Client,Up: Tool Invocation (Per-tool ACL) + Client->>Gateway: POST /tools/{tool} + Gateway->>Auth: Authenticate + Auth-->>Gateway: Consumer identity + Gateway->>ACL: Evaluate per-tool ACL + ACL-->>Log: Audit entry + alt If allowed + Gateway->>Up: Forward request + Up-->>Gateway: Response + Gateway-->>Client: Response + else If denied + Gateway-->>Client: HTTP 403 Forbidden + end + end +{% endmermaid %} + + +## Logging and audits + +[Logging](#schema-aigateway-mcpserver-config-logging) captures three layers of MCP traffic: per-request statistics for telemetry, request and response payloads for full visibility, and [audit entries](/ai-gateway/ai-audit-log-reference/#ai-mcp-logs) for every ACL decision. Payload logging may expose sensitive data; enable it with care. MCP Server analytics surface in [{{site.konnect_short_name}} Explorer and Dashboards](/ai-gateway/monitor-ai-llm-metrics/#mcp-traffic-metrics) alongside other {{site.ai_gateway}} traffic, and export through [OpenTelemetry](/ai-gateway/ai-otel-metrics/#mcp-metrics). + +## Attach Policies + +Policies are how plugin configurations apply to an MCP Server. Authentication, rate limiting, request and response transformation, and OAuth gating (through [AI MCP OAuth2](/plugins/ai-mcp-oauth2/)) attach to the MCP Server through the `policies` field. Each entry is a string that references a Policy by name or ID. Multiple Policies can attach to one MCP Server; each runs as an independent plugin instance. + +For details, see the [Policy entity](/ai-gateway/entities/policy/) reference. + +## Scope of support + +The MCP Server runtime supports MCP operations and upstream interactions, while certain advanced features and non-HTTP protocols are not currently supported. The table below summarizes what is supported and what is outside the current scope. + + +{% feature_table %} +item_title: Features +columns: + - title: Description + key: description + - title: Supported + key: supported + +features: + - title: "Protocol" + description: Handling latest streamable HTTP with HTTP and HTTPS upstreams + supported: true + - title: "OpenAPI operations" + description: Mapping MCP calls to upstream HTTP operations based on the OpenAPI schema + supported: true + - title: "JSON format" + description: Handling standard JSON request and response bodies + supported: true + - title: "Form-encoded data" + description: Handling `application/x-www-form-urlencoded` + supported: true + - title: "SNI routing" + description: Converting SNI-only routes + supported: false + - title: "Form and XML data" + description: Handling formats such as multipart/form-data or XML + supported: false + - title: "Advanced MCP features" + description: Handling structured output, active notifications on tool changes, and session sharing between instances + supported: false + - title: "Non-HTTP protocols" + description: Handling WebSocket and gRPC upstreams + supported: false + - title: "AI Guardrails" + description: Applying guardrails to MCP AI requests and responses + supported: false +{% endfeature_table %} + + +## Set up an MCP Server + +The following example creates a `conversion-listener` MCP Server that converts a flight-booking REST API into a single `searchFlights` MCP tool, restricts access to the `internal-teams` Consumer Group, and stores managed sessions in client-side encrypted form. + +{% entity_example %} +type: mcp-server +data: + display_name: KongAir Flights + name: kongair-flights + type: conversion-listener + acl_attribute_type: consumer + acls: + allow: + - internal-teams + deny: [] + default_tool_acls: + allow: + - internal-teams + deny: [] + policies: [] + config: + logging: + statistics: true + payloads: false + audits: true + max_request_body_size: 1048576 + server: + forward_client_headers: true + timeout: 10000 + session: + managed: true + strategy: client + session_ttl: 86400 + client: + secrets: + - "{vault://my-vault/session-secret}" + tools: + - name: searchFlights + description: Search for available flights between two airports. + method: GET + path: /flights + annotations: + title: Search flights + read_only_hint: true + idempotent_hint: true +{% endentity_example %} + ## Schema {% entity_schema %} diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 1f634249f7..179edfbc76 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -5,6 +5,11 @@ entities: - ai-model products: - ai-gateway +min_version: + ai-gateway: '2.0.0' +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ description: AI Models registered with the {{site.ai_gateway}}. schema: api: konnect/ai-gateway diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index c2a754eeb2..3833ae53ad 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -5,6 +5,11 @@ entities: - ai-policy products: - ai-gateway +min_version: + ai-gateway: '2.0.0' +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ breadcrumbs: - /ai-gateway/ - /ai-gateway/entities/ diff --git a/app/_ai_gateway_entities/provider.md b/app/_ai_gateway_entities/provider.md index f4837f7911..6f26c73f3b 100644 --- a/app/_ai_gateway_entities/provider.md +++ b/app/_ai_gateway_entities/provider.md @@ -5,6 +5,11 @@ entities: - ai-provider products: - ai-gateway +min_version: + ai-gateway: '2.0.0' +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ description: AI provider credentials and configuration used by {{site.ai_gateway}}. schema: api: konnect/ai-gateway diff --git a/app/_ai_gateway_entities/vault.md b/app/_ai_gateway_entities/vault.md index 0e95202d4c..0e9072d380 100644 --- a/app/_ai_gateway_entities/vault.md +++ b/app/_ai_gateway_entities/vault.md @@ -5,6 +5,11 @@ entities: - ai-vault products: - ai-gateway +min_version: + ai-gateway: '2.0.0' +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ description: Vaults for storing and referencing secrets used by {{site.ai_gateway}} entities. schema: api: konnect/ai-gateway From 3cc41300ac5c5896740e1a2b3b04195dc1266f07 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Fri, 8 May 2026 15:39:28 +0200 Subject: [PATCH 44/53] Update model entity doc --- app/_ai_gateway_entities/model.md | 207 +++++++++++++++++++++++++++-- app/_ai_gateway_entities/policy.md | 3 - 2 files changed, 198 insertions(+), 12 deletions(-) diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 179edfbc76..04d7c487ce 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -37,10 +37,10 @@ related_resources: - text: Consumer Group entity url: /gateway/entities/consumer-group/ faqs: - - q: What's the difference between a Model entity and the `model` field inside an AI Proxy Advanced plugin config? + - q: What's the difference between a Model entity and a `model` field inside a plugin configuration? a: | A Model entity is the first-class {{site.ai_gateway}} entity you declare through the `/ai/models` API or {{site.konnect_short_name}}. - {{site.ai_gateway}} derives the AI Proxy Advanced plugin (and its `model` configuration) from the entity. + {{site.ai_gateway}} derives the underlying plugin and its `model` configuration from the entity. You don't configure the underlying plugin directly. - q: Can I edit the Service, Routes, or plugins that {{site.ai_gateway}} generates from a Model? @@ -70,9 +70,26 @@ faqs: - q: Does the Model entity store provider credentials? a: | - No. Provider credentials live on the [Provider entity](/ai-gateway/entities/provider/) and are materialized into the generated AI Proxy Advanced plugin configuration at Model creation time. + No. Provider credentials live on the [Provider entity](/ai-gateway/entities/provider/) and are materialized into the underlying primitives at Model creation time. Updating a Provider propagates the credential change to all Models that reference it. + - q: Can a client override the model name from the request body? + a: | + By default, no. The request `model` field must match the upstream model on one of the Model's targets, otherwise the runtime returns a `400` error. + To accept a client-side alias, set `config.model.alias` on the Model and clients can send the alias value in the request `model` field instead of the upstream provider model name. + + - q: Can a client override `temperature`, `top_p`, or `top_k` from the request? + a: | + Yes. Values for `temperature`, `top_p`, and `top_k` in the request take precedence over the per-target configuration declared on `target_models[].config`. + + - q: Which algorithm does `lowest-latency` use to pick the fastest target? + a: | + Exponentially Weighted Moving Average (EWMA). EWMA continuously updates with every response, weighting recent observations more heavily, so older latencies decay over time but still contribute. There is no fixed learning-phase window. + + - q: Does the load balancer keep probing slower targets after picking a winner? + a: | + Yes. EWMA ensures every target continues to receive a small share of traffic (typically 0.1% to 5%, depending on the latency gap). This ongoing probing lets the load balancer adapt if a previously slower target becomes faster. + - q: Are on-prem and {{site.konnect_short_name}} Model entities the same? a: | The schemas are intentionally aligned at the field level. The same Model definition works in both modes. @@ -104,6 +121,19 @@ rows: endpoint: /ai/models {% endtable %} +## How it works + +At request time, the Model mediates traffic between clients and upstream provider APIs: + +1. Translates between the request and response format chosen for the Model and the upstream provider's native format. +1. Resolves upstream connection coordinates (protocol, host, port, path, HTTP method) from the selected target and its [Provider](/ai-gateway/entities/provider/), unless the target is a self-hosted model. +1. Authenticates to the upstream provider using credentials stored on the Provider entity. +1. Decorates the upstream request with per-target configuration (such as temperature or token-limit overrides) declared on `target_models[].config`. +1. Records usage statistics (tokens, cost, latency) for attached log Policies, and optionally the full request and response when payload logging is enabled. +1. Fulfills requests to self-hosted models using the supported native format transformations. + +A single Model can expose multiple upstream providers behind a consistent client-facing format, so callers don't change their request shape when the underlying Provider changes. + ## How a Model maps to runtime configuration When you create or update a Model, {{site.ai_gateway}} generates a fixed set of primitives: @@ -123,24 +153,183 @@ Generated primitives are protected. Direct PUT, PATCH, or DELETE calls against t ## Capabilities -The `capabilities` field tells {{site.ai_gateway}} which AI workflows the Model exposes. Each capability becomes one Route on the generated Service. A Model must declare at least one capability. +The [`capabilities`](#schema-aigateway-model-capabilities) field tells {{site.ai_gateway}} which AI workflows the Model exposes. Each capability becomes one Route on the generated Service. A Model must declare at least one capability. -Model `type` controls which capability set applies: +Model [`type`](#schema-aigateway-model-type) controls which capability set applies: * `model`: synchronous request/response workloads through generative APIs. Supported capabilities are `chat`, `embeddings`, `assistants`, `responses`, `audio-transcriptions`, `audio-translations`, `image-generation`, `image-edits`, `video-generations`, and `realtime`. * `api`: asynchronous workloads through the files and batches APIs. Supported capabilities are `batches` and `files`. Not every provider supports every capability. The set of capabilities you can declare on a Model depends on what the provider in `target_models` exposes. See [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for per-provider details. -## Target models and load balancing +The following table maps each capability to an OpenAI API reference and the corresponding [AI Proxy plugin](/plugins/ai-proxy/) example. + + +{% table %} +columns: + - title: Capability + key: capability + - title: Description + key: description + - title: Example route + key: example +rows: + - capability: "`chat`" + description: Conversational responses from a sequence of messages. + example: "[`llm/v1/chat`](/plugins/ai-proxy/examples/openai-chat-route/)" + - capability: "`embeddings`" + description: Vector representations for semantic search and similarity matching. + example: "[`llm/v1/embeddings`](/plugins/ai-proxy/examples/embeddings-route-type/)" + - capability: "`assistants`" + description: Persistent tool-using agents with metadata for debugging and evaluation. + example: "[`llm/v1/assistants`](/plugins/ai-proxy/examples/assistants-route-type/)" + - capability: "`responses`" + description: REST-based full-text responses. + example: "[`llm/v1/responses`](/plugins/ai-proxy/examples/responses-route-type/)" + - capability: "`audio-transcriptions`" + description: Speech-to-text. + example: "[`audio/v1/audio/transcriptions`](/plugins/ai-proxy/examples/audio-transcription-openai/)" + - capability: "`audio-translations`" + description: Audio translation between languages. + example: "[`audio/v1/audio/translations`](/plugins/ai-proxy/examples/audio-translation-openai/)" + - capability: "`image-generation`" + description: Generate images from text prompts. + example: "[`image/v1/images/generations`](/plugins/ai-proxy/examples/image-generation-openai/)" + - capability: "`image-edits`" + description: Modify images from text prompts. + example: "[`image/v1/images/edits`](/plugins/ai-proxy/examples/image-edits-openai/)" + - capability: "`video-generations`" + description: Generate videos from text prompts. + example: "[`video/v1/videos/generations`](/plugins/ai-proxy/examples/video-generation-openai/)" + - capability: "`realtime`" + description: Bidirectional WebSocket streaming for low-latency, interactive voice and text. + example: "[`realtime/v1/realtime`](/plugins/ai-proxy-advanced/examples/realtime-route-openai/)" + - capability: "`batches`" + description: Asynchronous bulk LLM requests for long workloads. + example: "[`llm/v1/batches`](/plugins/ai-proxy/examples/batches-route-type/)" + - capability: "`files`" + description: File uploads for long documents and structured input. + example: "[`llm/v1/files`](/plugins/ai-proxy/examples/files-route-type/)" +{% endtable %} + + +## Request and response formats + +The [`formats`](#schema-aigateway-model-formats) array on a Model declares the request and response shapes the Model accepts. Each entry has a `type` that selects the format. The default `openai` format flattens upstream provider responses into the OpenAI shape, so clients can use a single request and response format across providers. + +To preserve a provider's native request and response format instead, set `formats[].type` to a non-OpenAI value. The Model passes requests upstream without conversion, while {{site.ai_gateway}} continues to provide analytics, logging, and cost calculation. + + +{% table %} +columns: + - title: Format + key: format + - title: Provider + key: provider + - title: Native capabilities + key: capabilities +rows: + - format: "`openai`" + provider: All supported providers (default) + capabilities: Translates between OpenAI request and response shapes and the upstream provider format. + - format: "`anthropic`" + provider: "[Anthropic](/ai-gateway/ai-providers/anthropic/#supported-native-llm-formats-for-anthropic)" + capabilities: Messages, batch processing. + - format: "`bedrock`" + provider: "[Amazon Bedrock](/ai-gateway/ai-providers/bedrock/#supported-native-llm-formats-for-amazon-bedrock)" + capabilities: Converse, RAG (RetrieveAndGenerate), reranking, async invocation. + - format: "`cohere`" + provider: "[Cohere](/ai-gateway/ai-providers/cohere/#supported-native-llm-formats-for-cohere)" + capabilities: Reranking. + - format: "`gemini`" + provider: "[Gemini](/ai-gateway/ai-providers/gemini/#supported-native-llm-formats-for-gemini), [Vertex AI](/ai-gateway/ai-providers/vertex/#supported-native-llm-formats-for-gemini-vertex)" + capabilities: Content generation, embeddings, batches, file uploads, reranking, long-running predictions. + - format: "`huggingface`" + provider: "[Hugging Face](/ai-gateway/ai-providers/huggingface/#supported-native-llm-formats-for-hugging-face)" + capabilities: Text generation, streaming. +{% endtable %} + + +When a native format is set, only the corresponding provider is supported with its specific APIs. For format-specific behavior and limitations, see the [AI Proxy plugin reference](/plugins/ai-proxy/#supported-native-llm-formats). -A Model is a virtual model: it exposes one route (`config.route`) and one set of capabilities, and routes requests to one or more concrete upstream models declared in its `target_models` array. Each entry in `target_models` represents a single upstream model instance with one URL. +## Target models + +A Model is a virtual model: it exposes one route ([`config.route`](#schema-aigateway-model-config-route)) and one set of capabilities, and routes requests to one or more concrete upstream models declared in its [`target_models`](#schema-aigateway-model-target-models) array. Each entry represents a single upstream model instance with one URL. For each target, you provide the upstream model name (for example, `gpt-4o`) and reference the Provider to use by its `name`. Each target can also override settings such as `temperature`, `max_tokens`, `input_cost`, and `output_cost`. -There's no separate Target Model entity or endpoint. Target models are managed only as nested data inside a Model — through the same Model API surface used to create, update, and delete the parent. Adding, removing, or modifying a target is an update to the Model itself. +There's no separate Target Model entity or endpoint. Target models are managed only as nested data inside a Model, through the same Model API surface used to create, update, and delete the parent. Adding, removing, or modifying a target is an update to the Model itself. + +## Load balancing + +When a Model has more than one target, the [load balancer](#schema-aigateway-model-config-balancer) sits between the virtual model and its targets, distributing requests according to `config.balancer`. For algorithm details, selection guidance, and tuning, see [Load balancing with AI Proxy Advanced](/ai-gateway/load-balancing/). + +### Algorithms + +The [`algorithm`](#schema-aigateway-model-config-balancer-algorithm) field selects one of seven load balancing strategies for distributing requests across target models. + + +{% table %} +columns: + - title: Algorithm + key: algorithm + - title: Behavior + key: behavior +rows: + - algorithm: "[`round-robin`](/plugins/ai-proxy-advanced/examples/round-robin/)" + behavior: Weighted traffic distribution across targets. + - algorithm: "[`consistent-hashing`](/plugins/ai-proxy-advanced/examples/consistent-hashing/)" + behavior: Sticky sessions based on header values. + - algorithm: "[`least-connections`](/plugins/ai-proxy-advanced/examples/least-connections/)" + behavior: Route to backends with spare capacity. + - algorithm: "[`lowest-latency`](/plugins/ai-proxy-advanced/examples/lowest-latency/)" + behavior: Route to the fastest-responding model. + - algorithm: "[`lowest-usage`](/plugins/ai-proxy-advanced/examples/lowest-usage/)" + behavior: Route based on token counts or cost. + - algorithm: "[`semantic`](/plugins/ai-proxy-advanced/examples/semantic/)" + behavior: Route based on prompt-to-model similarity. + - algorithm: "[`priority`](/plugins/ai-proxy-advanced/examples/priority/)" + behavior: Tiered failover across model groups. +{% endtable %} + + +### Retry and fallback + +The load balancer supports configurable retries, timeouts, and failover to different targets when one is unavailable. Fallback works across targets with any supported format, so you can mix providers freely (for example, OpenAI and Mistral). For configuration details, see [Retry and fallback configuration](/ai-gateway/load-balancing/#retry-and-fallback). + +{:.info} +> Client errors don't trigger failover. To fail over on additional error types, set +> [`failover_criteria`](#schema-aigateway-model-config-balancer-failover-criteria) to include HTTP codes +> like `http_429` or `http_502`, and `non_idempotent` for POST requests. + +### Health check and circuit breaker + +The load balancer includes a circuit breaker that improves reliability under sustained failures. When a target reaches the failure threshold set by [`max_fails`](#schema-aigateway-model-config-balancer-max-fails), the load balancer stops routing requests to it until the [`fail_timeout`](#schema-aigateway-model-config-balancer-fail-timeout) period elapses. For behavior examples and tuning, see [Circuit breaker](/ai-gateway/load-balancing/#health-check-and-circuit-breaker). + +### Vector store + +A vector store holds numerical representations (embeddings) of requests and responses so the runtime can match new requests against stored vectors. It powers the [`semantic`](#schema-aigateway-model-config-balancer-algorithm) algorithm and any similarity-matching workflow on the Model. Configure storage through [`config.balancer.vectordb`](#schema-aigateway-model-config-balancer-vectordb) by selecting a `strategy`: + +* `redis`: connects to Redis with Vector Similarity Search (VSS), AWS MemoryDB for Redis, or Valkey. {{site.ai_gateway}} auto-detects Valkey from the server name field and uses the Valkey-specific driver. +* `pgvector`: connects to PostgreSQL with the pgvector extension. + +For deeper background on vector storage and similarity matching, see [Embedding-based similarity matching](/ai-gateway/semantic-similarity/). + +### Embeddings + +An embedding model converts request and response text into vector representations for the vector store. Set [`config.balancer.embeddings`](#schema-aigateway-model-config-balancer-embeddings) to reference a Provider and an embedding model name. Supported provider types are `azure`, `bedrock`, `gemini`, and `huggingface`. The same embedding model also powers the `lowest-usage` algorithm when usage is calculated against semantic content. + +## Templating + +The Model resolves runtime values from request data using placeholder substitution. This lets you select the target model dynamically per request, route to per-deployment Azure endpoints, or fan out to multiple providers from a single Model. + +Substitution applies to the [`name`](#schema-aigateway-model-target-models-name) of each target model and to any per-target [`config`](#schema-aigateway-model-target-models-config) option. Three placeholders are available: + +* `$(headers.header_name)`: the value of a request header. +* `$(uri_captures.path_parameter_name)`: the value of a captured URI path parameter. +* `$(query_params.query_parameter_name)`: the value of a query string parameter. -When a Model has more than one target, the load balancer sits between the virtual model and its targets, distributing requests according to `config.balancer`. For the supported algorithms, configuration options, and tuning guidance, see [Load balancing with AI Proxy Advanced](/ai-gateway/load-balancing/). +For end-to-end examples, see [dynamic model selection](/plugins/ai-proxy/examples/sdk-dynamic-model-selection/), [Azure deployment routing](/plugins/ai-proxy/examples/sdk-azure-deployment/), and [proxying multiple models in one Azure instance](/plugins/ai-proxy/examples/sdk-multiple-providers/) on the AI Proxy plugin page. ## Access control diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index 3833ae53ad..d8118225d5 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -10,9 +10,6 @@ min_version: breadcrumbs: - /ai-gateway/ - /ai-gateway/entities/ -breadcrumbs: - - /ai-gateway/ - - /ai-gateway/entities/ description: "Policies for {{site.ai_gateway}}." schema: api: konnect/ai-gateway From 9d395247e57169205ed57e835b1638a76aa0cbc9 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Fri, 8 May 2026 16:00:14 +0200 Subject: [PATCH 45/53] update provider docs --- app/_ai_gateway_entities/provider.md | 32 ++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/app/_ai_gateway_entities/provider.md b/app/_ai_gateway_entities/provider.md index 6f26c73f3b..5c72fb0a58 100644 --- a/app/_ai_gateway_entities/provider.md +++ b/app/_ai_gateway_entities/provider.md @@ -45,16 +45,16 @@ faqs: a: | No. A Provider entity is a write-time template. Credentials and configuration only enter the runtime when a Model references the Provider; at that point, the Provider's values are - materialized into the AI Proxy Advanced plugin instances generated for the Model. + materialized into the underlying primitives generated for the Model. --- ## What is a Provider? A Provider is a first-class {{site.ai_gateway}} entity that represents an upstream LLM service connection: credentials, endpoint configuration, and provider-type-specific options. Each Provider has a `type` that selects the upstream LLM service (see the schema below for supported values, and the per-provider pages under [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for provider-specific guidance). -Models reference a Provider by `name` to route their `target_models` to that upstream. {{site.ai_gateway}} materializes the Provider's credentials into the AI Proxy Advanced plugin configuration of every Model that references it. Updating a Provider propagates credential changes to all referencing Models. +Models reference a Provider by `name` to route their `target_models` to that upstream. {{site.ai_gateway}} materializes the Provider's credentials into the underlying primitives of every Model that references it. Updating a Provider propagates credential changes to all referencing Models. -Providers are managed through the {{site.ai_gateway}} entity API surface in both deployment modes: +Providers can be created and managed through {{site.konnect_short_name}}, the on-prem Admin API, decK, or the {{site.konnect_short_name}} UI: {% table %} columns: @@ -73,6 +73,30 @@ rows: endpoint: /ai/providers {% endtable %} +## Supported providers + +{{site.ai_gateway}} supports the following upstream providers. The Provider's [`type`](#schema-aigateway-provider-type) field selects one of these connections. Per-provider pages document supported capabilities, configuration requirements, and provider-specific limitations. + +{% html_tag type="div" css_classes="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3" %} +{% icon_card icon="openai.svg" title="OpenAI" cta_url="/ai-gateway/ai-providers/openai/" %} +{% icon_card icon="azure.svg" title="Azure OpenAI" cta_url="/ai-gateway/ai-providers/azure/" %} +{% icon_card icon="bedrock.svg" title="Amazon Bedrock" cta_url="/ai-gateway/ai-providers/bedrock/" %} +{% icon_card icon="anthropic.svg" title="Anthropic" cta_url="/ai-gateway/ai-providers/anthropic/" %} +{% icon_card icon="gemini.svg" title="Gemini" cta_url="/ai-gateway/ai-providers/gemini/" %} +{% icon_card icon="vertex.svg" title="Vertex AI" cta_url="/ai-gateway/ai-providers/vertex/" %} +{% icon_card icon="cohere.svg" title="Cohere" cta_url="/ai-gateway/ai-providers/cohere/" %} +{% icon_card icon="mistral.svg" title="Mistral" cta_url="/ai-gateway/ai-providers/mistral/" %} +{% icon_card icon="huggingface.svg" title="Hugging Face" cta_url="/ai-gateway/ai-providers/huggingface/" %} +{% icon_card icon="metaai.svg" title="Llama" cta_url="/ai-gateway/ai-providers/llama/" %} +{% icon_card icon="xai.svg" title="xAI" cta_url="/ai-gateway/ai-providers/xai/" %} +{% icon_card icon="dashscope.svg" title="Alibaba Cloud DashScope" cta_url="/ai-gateway/ai-providers/dashscope/" %} +{% icon_card icon="cerebras.svg" title="Cerebras" cta_url="/ai-gateway/ai-providers/cerebras/" %} +{% icon_card icon="deepseek.svg" title="DeepSeek" cta_url="/ai-gateway/ai-providers/deepseek/" %} +{% icon_card icon="ollama.svg" title="Ollama" cta_url="/ai-gateway/ai-providers/ollama/" %} +{% icon_card icon="databricks.svg" title="Databricks" cta_url="/ai-gateway/ai-providers/databricks/" %} +{% icon_card icon="vllm.svg" title="vLLM" cta_url="/ai-gateway/ai-providers/vllm/" %} +{% endhtml_tag %} + ## Authentication The `config.auth` object declares how {{site.ai_gateway}} authenticates to the upstream provider. The shape of `auth` depends on the Provider's `type`: @@ -97,7 +121,7 @@ Because references resolve by `name`, the `name` field is the stable handle for ## Lifecycle -Creating a Provider stores the entity but doesn't generate any runtime primitives. Provider credentials enter the runtime only when a Model references the Provider — at that point, the credentials are materialized into the AI Proxy Advanced plugin configuration of the Model's derived primitives. +Creating a Provider stores the entity but doesn't generate any runtime primitives. Provider credentials enter the runtime only when a Model references the Provider. At that point, the credentials are materialized into the underlying primitives of the Model. Updating a Provider re-materializes credentials into every Model that references it. The change takes effect on the next request through any referencing Model. From f517cccce41f2519847dfcc2b04a7c8393043de4 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Mon, 11 May 2026 09:54:50 +0200 Subject: [PATCH 46/53] push updates --- app/_ai_gateway_entities/consumer.md | 4 ++-- app/_ai_gateway_entities/mcp-server.md | 8 ++++---- app/_ai_gateway_entities/model.md | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/app/_ai_gateway_entities/consumer.md b/app/_ai_gateway_entities/consumer.md index 317c76e5ff..c384686966 100644 --- a/app/_ai_gateway_entities/consumer.md +++ b/app/_ai_gateway_entities/consumer.md @@ -109,9 +109,9 @@ Consumer Groups are managed through their own entity surface. See the [Consumer ## Attach Policies -A Policy is an {{site.ai_gateway}} entity that triggers an action using a plugin. You can attach a Policy to a Consumer and the underlying plugin will run in the request lifecycle when this Consumer is identified. To attach a Policy, add the Policy's `name` or `id` to the Consumer's `policies` array. +Policies are how plugin configurations apply to a Consumer. Attach a Policy by adding its `name` or `id` to the Consumer's `policies` array. The underlying plugin runs in the request lifecycle when the Consumer is identified. -You can attach multiple Policies to a single Consumer. Each Policy is an independent instance. +You can attach multiple Policies to a single Consumer. Each Policy is an independent plugin instance. For the supported plugin types and how Policies attach to other entities, see the [Policy entity](/ai-gateway/entities/policy/) reference. diff --git a/app/_ai_gateway_entities/mcp-server.md b/app/_ai_gateway_entities/mcp-server.md index fd5248c93d..9ff3c9825d 100644 --- a/app/_ai_gateway_entities/mcp-server.md +++ b/app/_ai_gateway_entities/mcp-server.md @@ -99,9 +99,9 @@ rows: endpoint: /ai/mcp-servers {% endtable %} -## Apply gateway features to MCP traffic +## Common Policies -An MCP Server brings {{site.ai_gateway}} traffic management, security, and observability features to MCP endpoints. Attach the relevant plugin as a [Policy](/ai-gateway/entities/policy/) on the MCP Server: +Attach plugins as [Policies](/ai-gateway/entities/policy/) on the MCP Server to handle authentication, rate limiting, observability, and traffic control: {% table %} @@ -113,13 +113,13 @@ columns: rows: - use_case: Authentication example: | - Apply [OpenID Connect](/plugins/openid-connect/) or the [Key Auth](/plugins/key-auth/) plugin to an MCP Server. + Apply [AI MCP OAuth2](/plugins/ai-mcp-oauth2/) for MCP-spec OAuth 2.0 flows, or [OpenID Connect](/plugins/openid-connect/) / [Key Auth](/plugins/key-auth/) for non-OAuth identity. - use_case: Rate limiting example: | Use [Rate Limiting](/plugins/rate-limiting/) or [Rate Limiting Advanced](/plugins/rate-limiting-advanced/) to control MCP request volume. - use_case: Observability example: | - Add [logging and tracing plugins](/plugins/?category=logging) for full request and response visibility. + Add [logging and tracing plugins](/plugins/?category=logging) for full request and response visibility. MCP metrics surface in [{{site.konnect_short_name}} analytics](/ai-gateway/monitor-ai-llm-metrics/#mcp-traffic-metrics). - use_case: Traffic control example: | Apply [request and response transformation plugins](/plugins/?category=transformations) or [ACL policies](/plugins/acl/). diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 04d7c487ce..2278a7d19d 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -339,7 +339,7 @@ For per-request authentication and identity, configure the appropriate authentic ## Attach Policies -Policies are the way you apply plugin configurations to a Model. A Policy attached to a Model runs at the Service level of the Model's generated primitives, so it applies to every request routed through any of the Model's capabilities. +Policies are how plugin configurations apply to a Model. A Policy attached to a Model runs at the Service level of the Model's generated primitives, so it applies to every request routed through any of the Model's capabilities. A Model declares the Policies it uses through its `policies` field. Each entry is a string that references a Policy by name or ID. {{site.konnect_short_name}} resolves these references against Policies created at `/v1/ai-gateways/{aiGatewayId}/policies`. On-prem also supports the nested endpoint `/ai/models/{modelId}/policies`, which creates and attaches a Policy in one call. From f52bba6f23661ad70f9e625305fcdf81174e96dd Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Fri, 15 May 2026 06:07:29 +0200 Subject: [PATCH 47/53] Apply suggestions from code review Co-authored-by: jbaross --- app/_ai_gateway_entities/agent.md | 24 ++++++++++++++++------ app/_ai_gateway_entities/consumer-group.md | 2 +- app/_ai_gateway_entities/mcp-server.md | 2 +- app/_ai_gateway_entities/provider.md | 2 +- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/app/_ai_gateway_entities/agent.md b/app/_ai_gateway_entities/agent.md index b0bb6c92d6..98886f0fd1 100644 --- a/app/_ai_gateway_entities/agent.md +++ b/app/_ai_gateway_entities/agent.md @@ -74,9 +74,9 @@ faqs: ## What is an Agent? -An Agent is a first-class {{site.ai_gateway}} entity that represents an upstream agent endpoint exposed through {{site.ai_gateway}}. An Agent has a type, either `a2a` for [Agent-to-Agent protocol](https://a2aproject.github.io/A2A/) traffic or `http` for generic HTTP agent routing, plus configuration that points {{site.ai_gateway}} at the upstream and shapes how requests flow. +An Agent is a first-class {{site.ai_gateway}} entity that represents an upstream agent endpoint exposed through {{site.ai_gateway}}. An Agent has a type, either `a2a` for [Agent-to-Agent protocol](https://a2aproject.github.io/A2A/) traffic or `http` for generic HTTP agent routing, and a configuration that points {{site.ai_gateway}} at the upstream and shapes how requests flow. -For `a2a` Agents, the runtime adds protocol-aware behavior on top of plain proxying: it detects A2A requests across both JSON-RPC and REST bindings, rewrites agent-card URLs so clients discover the gateway as the canonical endpoint, and emits structured A2A telemetry to {{site.konnect_short_name}} analytics and OpenTelemetry. For `http` Agents, requests are proxied without A2A-specific processing. +For `http` type Agents, requests are proxied without A2A-specific processing. For `a2a` type Agents, {{site.ai_gateway}} adds protocol-aware behaviour on top of plain proxying: it detects A2A requests across both JSON-RPC and REST bindings, rewrites agent-card URLs so clients discover the gateway as the canonical endpoint, and emits structured A2A telemetry to {{site.konnect_short_name}} analytics and OpenTelemetry. Agents can be created and managed through {{site.konnect_short_name}}, the on-prem Admin API, decK, or the {{site.konnect_short_name}} UI: @@ -99,9 +99,9 @@ rows: ## How A2A traffic flows -When an Agent has type `a2a`, the runtime processes traffic in four phases: +When an Agent has type `a2a`, proxied traffic is processed in four phases: -1. **Access**. Detects whether the request is an A2A operation (JSON-RPC or REST binding). When statistics logging is enabled, starts an OpenTelemetry span and records the request body for payload logging if that's enabled too. +1. **Access**. Detects whether the request is an A2A operation (JSON-RPC or REST binding). When statistics logging is enabled, this starts an OpenTelemetry span and records the request body for payload logging if that's also enabled. 1. **Header filter**. Detects streaming responses (`Content-Type: text/event-stream`) and records time to first byte. Buffers agent-card responses for URL rewriting. 1. **Body filter**. Streams SSE chunks through to the client without buffering. Buffers non-streaming responses to extract task metadata. Rewrites agent-card URLs to the gateway address. Emits analytics at end of response. 1. **Log**. Finalizes the OpenTelemetry span with task state, task ID, and any error information. @@ -168,7 +168,7 @@ rows: ## Protocol detection -A2A traffic is auto-detected per request. There's no per-route opt-in, and non-A2A traffic passes through without overhead. +A2A traffic is auto-detected per request and non-A2A traffic passes through without overhead. **REST binding.** Detection anchors to the end of the request path, so any prefix added by the route is ignored. For example, both `/v1/message:send` and `/api/agents/v1/message:send` match `SendMessage`: @@ -235,7 +235,19 @@ When an upstream agent returns an agent card, the runtime rewrites the `url` fie ## Logging and observability -Statistics logging records structured A2A telemetry per request: the A2A method, binding type, task state, task ID, context ID, latency, time to first byte (for streaming), SSE event count, and response size. The runtime emits this data into the `ai.a2a` namespace consumed by {{site.konnect_short_name}} analytics and any attached log plugins, and creates a `kong.a2a` child span when {{site.base_gateway}} tracing is configured. +When Statistics logging is enabled the {{site.ai_gateway}} records the following structured A2A telemetry per request: + +- A2A method +- Binding type +- Task state +- Task ID +- Context ID +- Latency +- Time to first byte (for streaming) +- SSE event count +- Response size. + +The runtime emits this data into the `ai.a2a` namespace consumed by {{site.konnect_short_name}} analytics and any attached logging plugins, and creates a `kong.a2a` child span when {{site.base_gateway}} tracing is configured. {:.info} > When statistics logging is enabled, the runtime removes the `Accept-Encoding` request header diff --git a/app/_ai_gateway_entities/consumer-group.md b/app/_ai_gateway_entities/consumer-group.md index 9dd52d575d..9a5c2cc86b 100644 --- a/app/_ai_gateway_entities/consumer-group.md +++ b/app/_ai_gateway_entities/consumer-group.md @@ -75,7 +75,7 @@ faqs: A Consumer Group is the {{site.ai_gateway}} entity that represents a collection of Consumers grouped for the purpose of applying shared Policies and access controls. -Use Consumer Groups to scope group-wide behavior, such as rate limits, prompt guards, or content moderation, without configuring each Consumer individually. Consumer Groups also appear in the `acls` field of Model, Agent, and MCP Server entities, where they gate access to those parent entities. +Use Consumer Groups to scope group-wide behavior, such as rate limits, prompt guards, or content moderation, without configuring each Consumer individually. Consumer Groups can appear in the `acls` field of Model, Agent, and MCP Server entities, where they gate access to those parent entities. Consumer Groups are managed through the {{site.ai_gateway}} entity API surface in both deployment modes: diff --git a/app/_ai_gateway_entities/mcp-server.md b/app/_ai_gateway_entities/mcp-server.md index 9ff3c9825d..6344d7d0ea 100644 --- a/app/_ai_gateway_entities/mcp-server.md +++ b/app/_ai_gateway_entities/mcp-server.md @@ -230,7 +230,7 @@ Tools can also carry MCP-spec [annotations](#schema-aigateway-mcpserver-tools-an Two session strategies: -1. **Client.** Session state is encrypted into the MCP session ID assigned to the client. Requires `secrets` (the encryption keys; the first entry is used for encryption, all entries for decryption to support key rotation). +1. **Client.** Session state is encrypted into the MCP session ID assigned to the client. Requires `secrets` which are encryption keys; the first entry is used for encryption, all entries are used for decryption to support key rotation. 1. **Redis.** Session state is stored in Redis. Configure connection details and authentication in `config.server.session.redis`. Cloud Redis providers (AWS ElastiCache, Azure, GCP) authenticate through provider-specific blocks under `redis.cloud_authentication`. `session_ttl` controls how long sessions live (default 24 hours). Set `managed: false` to disable managed sessions when the upstream maintains state externally. diff --git a/app/_ai_gateway_entities/provider.md b/app/_ai_gateway_entities/provider.md index 5c72fb0a58..349257a58f 100644 --- a/app/_ai_gateway_entities/provider.md +++ b/app/_ai_gateway_entities/provider.md @@ -50,7 +50,7 @@ faqs: ## What is a Provider? -A Provider is a first-class {{site.ai_gateway}} entity that represents an upstream LLM service connection: credentials, endpoint configuration, and provider-type-specific options. Each Provider has a `type` that selects the upstream LLM service (see the schema below for supported values, and the per-provider pages under [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for provider-specific guidance). +A Provider is a first-class {{site.ai_gateway}} entity that represents an upstream LLM service connection and their credentials, endpoint configuration, and provider-type-specific options. Each Provider has a `type` that selects the upstream LLM service. See the schema below for supported values, and the per-provider pages under [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for provider-specific guidance. Models reference a Provider by `name` to route their `target_models` to that upstream. {{site.ai_gateway}} materializes the Provider's credentials into the underlying primitives of every Model that references it. Updating a Provider propagates credential changes to all referencing Models. From f0287ab0efcb42fb88f4d750c2eb34d9e8a575c1 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Fri, 15 May 2026 09:54:54 +0200 Subject: [PATCH 48/53] Apply feedback from the second review --- app/_ai_gateway_entities/agent.md | 22 ++++-- app/_ai_gateway_entities/ai-gateway.md | 70 +++++++++---------- .../consumer-credential.md | 2 +- app/_ai_gateway_entities/consumer.md | 2 +- .../data-plane-certificate.md | 24 ++++++- app/_ai_gateway_entities/mcp-server.md | 36 +++++++--- app/_ai_gateway_entities/policy.md | 2 + app/_ai_gateway_entities/provider.md | 8 ++- app/_ai_gateway_entities/vault.md | 2 +- app/_landing_pages/ai-gateway/entities.yaml | 8 +++ 10 files changed, 119 insertions(+), 57 deletions(-) diff --git a/app/_ai_gateway_entities/agent.md b/app/_ai_gateway_entities/agent.md index 98886f0fd1..e5f1823ca4 100644 --- a/app/_ai_gateway_entities/agent.md +++ b/app/_ai_gateway_entities/agent.md @@ -78,7 +78,7 @@ An Agent is a first-class {{site.ai_gateway}} entity that represents an upstream For `http` type Agents, requests are proxied without A2A-specific processing. For `a2a` type Agents, {{site.ai_gateway}} adds protocol-aware behaviour on top of plain proxying: it detects A2A requests across both JSON-RPC and REST bindings, rewrites agent-card URLs so clients discover the gateway as the canonical endpoint, and emits structured A2A telemetry to {{site.konnect_short_name}} analytics and OpenTelemetry. -Agents can be created and managed through {{site.konnect_short_name}}, the on-prem Admin API, decK, or the {{site.konnect_short_name}} UI: +Agents can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, decK, or the on-prem Admin API: {% table %} columns: @@ -166,11 +166,13 @@ rows: purpose: Carries the concrete output of a task in a structured, retrievable form. {% endtable %} -## Protocol detection +### Protocol detection A2A traffic is auto-detected per request and non-A2A traffic passes through without overhead. -**REST binding.** Detection anchors to the end of the request path, so any prefix added by the route is ignored. For example, both `/v1/message:send` and `/api/agents/v1/message:send` match `SendMessage`: +#### REST binding + +Detection anchors to the end of the request path, so any prefix added by the route is ignored. For example, both `/v1/message:send` and `/api/agents/v1/message:send` match `SendMessage`: {% table %} @@ -225,11 +227,13 @@ rows: The canonical method name is what appears in OpenTelemetry span attributes and log output. -**JSON-RPC binding.** Detected by the `"jsonrpc"` field in the request body, combined with a recognized A2A method name or an `A2A-Version` request header. Recognized methods include `message/send`, `message/stream`, `tasks/get`, `tasks/list`, `tasks/cancel`, `tasks/resubscribe`, the `tasks/pushNotificationConfig/*` family, and `agent/getExtendedAgentCard`. +#### JSON-RPC binding + +Detected by the `"jsonrpc"` field in the request body, combined with a recognized A2A method name or an `A2A-Version` request header. Recognized methods include `message/send`, `message/stream`, `tasks/get`, `tasks/list`, `tasks/cancel`, `tasks/resubscribe`, the `tasks/pushNotificationConfig/*` family, and `agent/getExtendedAgentCard`. A request carrying an `A2A-Version` header is treated as JSON-RPC even if the method isn't in the recognized list. When an unknown method is accepted this way, the `method` field in log output is recorded as `"unknown"` to bound metric cardinality. The OpenTelemetry span's `kong.a2a.operation` attribute still receives the actual method name. -## Agent-card URL rewriting +### Agent-card URL rewriting When an upstream agent returns an agent card, the runtime rewrites the `url` field, and any `additionalInterfaces[].url` fields, to the {{site.ai_gateway}} address. A2A clients then discover the gateway as the canonical endpoint instead of contacting the upstream directly. The rewrite uses `X-Forwarded-*` headers to construct the correct scheme, host, and port when the gateway is deployed behind a load balancer or reverse proxy. @@ -247,14 +251,18 @@ When Statistics logging is enabled the {{site.ai_gateway}} records the following - SSE event count - Response size. -The runtime emits this data into the `ai.a2a` namespace consumed by {{site.konnect_short_name}} analytics and any attached logging plugins, and creates a `kong.a2a` child span when {{site.base_gateway}} tracing is configured. +The runtime emits this data into the `ai.a2a` namespace consumed by {{site.konnect_short_name}} analytics and any attached logging plugins, and creates a `kong.a2a` child span when [{{site.base_gateway}} tracing](/gateway/tracing/) is configured. {:.info} > When statistics logging is enabled, the runtime removes the `Accept-Encoding` request header > before forwarding to the upstream. This prevents compressed responses that the runtime can't > parse for metadata extraction. -Payload logging additionally captures request and response bodies. Payloads are truncated at the configured payload size limit. Enable with care. Payload logging may expose sensitive data. +Payload logging additionally captures request and response bodies. Payloads are truncated at the configured payload size limit. + +{:.warning} +> Payload logging may expose sensitive data. Only enable it when you're prepared to handle +> request and response bodies in your logging pipeline. You can view A2A analytics in {{site.konnect_short_name}} Explorer and Dashboards through the [Agentic usage analytics](/observability/explorer/?tab=agentic-usage#metrics) view. diff --git a/app/_ai_gateway_entities/ai-gateway.md b/app/_ai_gateway_entities/ai-gateway.md index 9f543151c7..b90ad43199 100644 --- a/app/_ai_gateway_entities/ai-gateway.md +++ b/app/_ai_gateway_entities/ai-gateway.md @@ -1,5 +1,5 @@ --- -title: AI Gateways +title: {{site.ai_gateway}}s content_type: reference entities: - ai-gateway @@ -30,53 +30,53 @@ related_resources: - text: Data Plane Certificate entity url: /ai-gateway/entities/data-plane-certificate/ faqs: - - q: How is an AI Gateway different from a {{site.konnect_short_name}} Gateway control plane? + - q: How is an {{site.ai_gateway}} different from a {{site.konnect_short_name}} Gateway control plane? a: | - An AI Gateway is a dedicated control plane purpose-built for AI traffic. It exposes its own + An {{site.ai_gateway}} is a dedicated control plane purpose-built for AI traffic. It exposes its own entity surface (Models, Providers, Policies, Agents, MCP Servers, and so on) and its own data plane runtime. It doesn't share entities or data planes with a regular {{site.konnect_short_name}} Gateway control plane. - - q: Can I run more than one AI Gateway in an organization? + - q: Can I run more than one {{site.ai_gateway}} in an organization? a: | - Yes. An organization can hold multiple AI Gateway entities. Each one has its own + Yes. An organization can hold multiple {{site.ai_gateway}} entities. Each one has its own configuration and telemetry endpoints, its own set of child entities, and its own data planes. - q: What does `config_hash` represent? a: | - `config_hash` is a hash of the AI Gateway's latest configuration, including all of its - child entities. It changes any time something under the AI Gateway is created, updated, + `config_hash` is a hash of the {{site.ai_gateway}}'s latest configuration, including all of its + child entities. It changes any time something under the {{site.ai_gateway}} is created, updated, or deleted. Compare it to the `config_hash` reported by a data plane node to check whether the node has the current configuration. - - q: What happens to child entities when I delete an AI Gateway? + - q: What happens to child entities when I delete an {{site.ai_gateway}}? a: | - Deleting an AI Gateway removes the entity. Its child entities (Models, Providers, Policies, + Deleting an {{site.ai_gateway}} removes the entity. Its child entities (Models, Providers, Policies, Agents, MCP Servers, Vaults, Consumers, Consumer Groups, and Data Plane Certificates) are - tied to the AI Gateway and are not addressable without it. + tied to the {{site.ai_gateway}} and are not addressable without it. - - q: Is the AI Gateway entity available on-prem? + - q: Is the {{site.ai_gateway}} entity available on-prem? a: | - No. The AI Gateway entity is a {{site.konnect_short_name}} concept. On-prem deployments + No. The {{site.ai_gateway}} entity is a {{site.konnect_short_name}} concept. On-prem deployments manage the same child entities (Models, Providers, Policies, and so on) directly through the Admin API, without a parent `ai-gateways/{id}` container. --- -## What is an AI Gateway? +## What is an {{site.ai_gateway}}? -An AI Gateway is the top-level {{site.ai_gateway}} entity. It's a dedicated control plane for AI traffic — separate from a regular {{site.konnect_short_name}} Gateway control plane — that owns the entities {{site.ai_gateway}} uses to serve LLM and agent workloads: +An {{site.ai_gateway}} is the top-level {{site.ai_gateway}} entity. It's a dedicated control plane for AI traffic, separate from a regular {{site.konnect_short_name}} Gateway control plane, that owns the entities {{site.ai_gateway}} uses to serve LLM and agent workloads: -1. [Models](/ai-gateway/entities/model/) — AI model endpoints, capabilities, and load balancing. -1. [Providers](/ai-gateway/entities/provider/) — upstream LLM service connections and credentials. -1. [Policies](/ai-gateway/entities/policy/) — security, rate limiting, and guardrail behavior attached to other entities. -1. [Agents](/ai-gateway/entities/agent/) — A2A and HTTP agent routing. -1. [MCP Servers](/ai-gateway/entities/mcp-server/) — MCP tool exposure and session handling. -1. [Vaults](/ai-gateway/entities/vault/) — secret storage referenced from other entities. -1. [Consumers](/ai-gateway/entities/consumer/), [Consumer Groups](/ai-gateway/entities/consumer-group/), [Consumer Credentials](/ai-gateway/entities/consumer-credential/) — identities used in access control. -1. [Data Plane Certificates](/ai-gateway/entities/data-plane-certificate/) — certificates that authorize data plane nodes to connect. +1. [Models](/ai-gateway/entities/model/): AI model endpoints, capabilities, and load balancing. +1. [Providers](/ai-gateway/entities/provider/): upstream LLM service connections and credentials. +1. [Policies](/ai-gateway/entities/policy/): security, rate limiting, and guardrail behavior attached to other entities. +1. [Agents](/ai-gateway/entities/agent/): A2A and HTTP agent routing. +1. [MCP Servers](/ai-gateway/entities/mcp-server/): MCP tool exposure and session handling. +1. [Vaults](/ai-gateway/entities/vault/): secret storage referenced from other entities. +1. [Consumers](/ai-gateway/entities/consumer/), [Consumer Groups](/ai-gateway/entities/consumer-group/), [Consumer Credentials](/ai-gateway/entities/consumer-credential/): identities used in access control. +1. [Data Plane Certificates](/ai-gateway/entities/data-plane-certificate/): certificates that authorize data plane nodes to connect. -Every other {{site.ai_gateway}} entity is created under an AI Gateway and addressed through its ID: +Every other {{site.ai_gateway}} entity is created under an {{site.ai_gateway}} and addressed through its ID: {% table %} columns: @@ -85,7 +85,7 @@ columns: - title: Endpoint key: endpoint rows: - - surface: AI Gateway + - surface: {{site.ai_gateway}} endpoint: /v1/ai-gateways - surface: Child entities endpoint: /v1/ai-gateways/{aiGatewayId}/{entity} @@ -93,32 +93,32 @@ rows: ## Endpoints -When an AI Gateway is created, {{site.ai_gateway}} provisions two endpoints that data planes connect to: +When an {{site.ai_gateway}} is created, {{site.ai_gateway}} provisions two endpoints that data planes connect to: -1. **Configuration endpoint** (`endpoints.configuration`) — the URL data plane nodes use to receive their configuration from the control plane. -1. **Telemetry endpoint** (`endpoints.telemetry`) — the URL data plane nodes use to ship analytics and runtime telemetry back to {{site.konnect_short_name}}. +1. **Configuration endpoint** (`endpoints.configuration`): the URL data plane nodes use to receive their configuration from the control plane. +1. **Telemetry endpoint** (`endpoints.telemetry`): the URL data plane nodes use to ship analytics and runtime telemetry back to {{site.konnect_short_name}}. -Both endpoints are read-only, assigned at creation time, and stable for the lifetime of the AI Gateway. Data plane nodes need both URLs, along with a [Data Plane Certificate](/ai-gateway/entities/data-plane-certificate/), to register with the AI Gateway. +Both endpoints are read-only, assigned at creation time, and stable for the lifetime of the {{site.ai_gateway}}. Data plane nodes need both URLs, along with a [Data Plane Certificate](/ai-gateway/entities/data-plane-certificate/), to register with the {{site.ai_gateway}}. ## Configuration hash -`config_hash` is a read-only field that {{site.ai_gateway}} updates every time anything under the AI Gateway changes — a new Model, an updated Policy, a deleted Provider, and so on. Each data plane node reports back the `config_hash` of the configuration it's running. The two values match when the node is in sync with the control plane. +`config_hash` is a read-only field that {{site.ai_gateway}} updates every time anything under the {{site.ai_gateway}} changes, such as a new Model, an updated Policy, or a deleted Provider. Each data plane node reports back the `config_hash` of the configuration it's running. The two values match when the node is in sync with the control plane. -Use `config_hash` to verify rollout: after a configuration change, watch the node `config_hash` (through [List Nodes](/ai-gateway/entities/data-plane-certificate/) or the {{site.konnect_short_name}} UI) until every node reports the AI Gateway's current value. +Use `config_hash` to verify rollout: after a configuration change, watch the node `config_hash` (through [List Nodes](/ai-gateway/entities/data-plane-certificate/) or the {{site.konnect_short_name}} UI) until every node reports the {{site.ai_gateway}}'s current value. ## Labels -`labels` is a free-form `key: value` map for organization. Use it to tag AI Gateways by environment (`env: production`), team ownership, cost center, or any other dimension you filter on. Labels don't affect runtime behavior. +`labels` are a free-form `key: value` map for organization. Use them to tag {{site.ai_gateway}}s by environment (`env: production`), team ownership, cost center, or any other dimension you filter on. Labels don't affect runtime behavior. ## Lifecycle -AI Gateways are created and managed through the {{site.konnect_short_name}} UI. Once an AI Gateway exists, its child entities (Models, Providers, Policies, and so on) are managed through the {{site.ai_gateway}} API, Terraform, or decK as documented on each entity page. +{{site.ai_gateway}}s are created and managed through the {{site.konnect_short_name}} UI. Once an {{site.ai_gateway}} exists, its child entities (Models, Providers, Policies, and so on) are managed through the {{site.ai_gateway}} API, Terraform, or decK as documented on each entity page. -Creating an AI Gateway provisions the configuration and telemetry endpoints and gives you the parent ID needed to create child entities. The AI Gateway has no runtime traffic of its own — traffic flows once at least one Model, Agent, or MCP Server is configured under it and a data plane node is connected. +Creating an {{site.ai_gateway}} provisions the configuration and telemetry endpoints and gives you the parent ID needed to create child entities. The {{site.ai_gateway}} has no runtime traffic of its own. Traffic flows once at least one Model, Agent, or MCP Server is configured under it and a data plane node is connected. -Updating an AI Gateway changes its `name`, `description`, or `labels`. Endpoints and `config_hash` are managed by {{site.ai_gateway}} and can't be set directly. +Updating an {{site.ai_gateway}} changes its `name`, `description`, or `labels`. Endpoints and `config_hash` are managed by {{site.ai_gateway}} and can't be set directly. -Deleting an AI Gateway removes the entity. Its child entities are scoped to the AI Gateway and can't be addressed without it. +Deleting an {{site.ai_gateway}} removes the entity. Its child entities are scoped to the {{site.ai_gateway}} and can't be addressed without it. ## Schema diff --git a/app/_ai_gateway_entities/consumer-credential.md b/app/_ai_gateway_entities/consumer-credential.md index 286ae81be2..31f0255be7 100644 --- a/app/_ai_gateway_entities/consumer-credential.md +++ b/app/_ai_gateway_entities/consumer-credential.md @@ -91,7 +91,7 @@ rows: The `type` field on a Credential must match the parent Consumer's `type`: * **`api-key`**: the Credential carries an `api_key` value the client presents on each request. An optional `ttl` (seconds) bounds the validity period; once it elapses, the value no longer authenticates. -* **`oauth`**: the Credential carries a `custom_id` that maps to the OAuth provider's identifier (for example, an OIDC Client ID). The actual token is issued and validated by the OAuth provider, not stored on the Credential. +* **`oauth`**: the Credential carries a `custom_id` that maps a Consumer to an OAuth identity issued by an external provider. {{site.ai_gateway}} works with any standards-compliant OAuth 2.0 / OpenID Connect provider configured through the [OpenID Connect plugin](/plugins/openid-connect/), or, for MCP traffic, the [AI MCP OAuth2 plugin](/plugins/ai-mcp-oauth2/). The `custom_id` is typically the OIDC `sub` claim or the Client ID issued by the OAuth provider. The actual access token is issued and validated by the OAuth provider, not stored on the Credential. The `api_key` field is write-only and cannot be retrieved after creation. Treat creation responses as the only opportunity to capture the key value. diff --git a/app/_ai_gateway_entities/consumer.md b/app/_ai_gateway_entities/consumer.md index c384686966..cfd88b939f 100644 --- a/app/_ai_gateway_entities/consumer.md +++ b/app/_ai_gateway_entities/consumer.md @@ -97,7 +97,7 @@ rows: The `type` field declares which credential family the Consumer authenticates with. Supported values are: * `api-key`: the Consumer authenticates with one or more API key Credentials. -* `oauth`: the Consumer authenticates with one or more OAuth Credentials whose `custom_id` maps to the OAuth provider's identifier. +* `oauth`: the Consumer authenticates through an OAuth identity issued by an external OIDC provider. {{site.ai_gateway}} accepts any standards-compliant OAuth 2.0 / OpenID Connect provider configured through the [OpenID Connect plugin](/plugins/openid-connect/), or, for MCP traffic, through the [AI MCP OAuth2 plugin](/plugins/ai-mcp-oauth2/). The Consumer Credential carries a `custom_id` that maps to the OAuth provider's user identifier (for example, an OIDC Client ID or `sub` claim). The `type` of every Credential issued to the Consumer must match the Consumer's `type`. See the [Consumer Credential entity](/ai-gateway/entities/consumer-credential/) reference for credential management. diff --git a/app/_ai_gateway_entities/data-plane-certificate.md b/app/_ai_gateway_entities/data-plane-certificate.md index afcd38ccfd..f0926324da 100644 --- a/app/_ai_gateway_entities/data-plane-certificate.md +++ b/app/_ai_gateway_entities/data-plane-certificate.md @@ -52,7 +52,7 @@ faqs: - q: How does this relate to the {{site.base_gateway}} data plane client certificate? a: | - It plays the same role — establishing mutual TLS between the control plane and a data plane — + It plays the same role, establishing mutual TLS between the control plane and a data plane, but it is scoped to a single {{site.ai_gateway}} instance and managed through the {{site.ai_gateway}} entity surface, not the {{site.konnect_short_name}} Gateway control plane API. --- @@ -79,7 +79,7 @@ rows: endpoint: /v1/ai-gateways/{aiGatewayId}/data-plane-certificates {% endtable %} -There is no on-prem equivalent for this entity. Self-managed {{site.base_gateway}} deployments use the existing [`/certificates`](/gateway/entities/certificate/) and node configuration mechanisms instead. +There is no on-prem equivalent for this entity. Self-managed {{site.base_gateway}} deployments use the existing [`/certificates`](/gateway/entities/certificate/) entity and [hybrid mode node configuration](/gateway/hybrid-mode/) instead. ## Trust model @@ -87,9 +87,27 @@ The {{site.ai_gateway}} acts as the control plane in a CP/DP topology. Each data Only the public certificate is registered with the {{site.ai_gateway}}. The private key is generated and held on the data plane side; it never leaves the data plane host. + +{% mermaid %} +sequenceDiagram + participant DP as Data Plane + participant CP as {{site.ai_gateway}} (Control Plane) + + Note over DP: Holds private key locally
(never sent over the network) + DP->>CP: TLS handshake with client certificate + Note over CP: Compare presented certificate against
registered Data Plane Certificates + alt Certificate matches a registered entry + CP-->>DP: TLS handshake completes + DP->>CP: Receive configuration and stream telemetry + else No matching registered certificate + CP-->>DP: Connection rejected + end +{% endmermaid %} + + ## Lifecycle -Data Plane Certificates support create, list, get, and delete operations. There is no update endpoint — the certificate body is immutable. +Data Plane Certificates support create, list, get, and delete operations. There is no update endpoint, the certificate body is immutable. To rotate a certificate without downtime: diff --git a/app/_ai_gateway_entities/mcp-server.md b/app/_ai_gateway_entities/mcp-server.md index 6344d7d0ea..b659a13dad 100644 --- a/app/_ai_gateway_entities/mcp-server.md +++ b/app/_ai_gateway_entities/mcp-server.md @@ -230,22 +230,42 @@ Tools can also carry MCP-spec [annotations](#schema-aigateway-mcpserver-tools-an Two session strategies: -1. **Client.** Session state is encrypted into the MCP session ID assigned to the client. Requires `secrets` which are encryption keys; the first entry is used for encryption, all entries are used for decryption to support key rotation. -1. **Redis.** Session state is stored in Redis. Configure connection details and authentication in `config.server.session.redis`. Cloud Redis providers (AWS ElastiCache, Azure, GCP) authenticate through provider-specific blocks under `redis.cloud_authentication`. +1. **Client.** Session state is encrypted into the MCP session ID assigned to the client. Requires `secrets` which are encryption keys; the first entry is used for encryption, all entries are used for decryption to support key rotation. +1. **Redis.** Session state is stored in Redis. Configure connection details and authentication in `config.server.session.redis`. + +{% include_cached /plugins/redis/redis-cloud-auth.md tier='enterprise' %} `session_ttl` controls how long sessions live (default 24 hours). Set `managed: false` to disable managed sessions when the upstream maintains state externally. -Cross-link: secrets used in session encryption can be referenced from a [Vault](/ai-gateway/entities/vault/). +Secrets used in session encryption can be referenced from a [Vault](/ai-gateway/entities/vault/). ## Server configuration The `config.server` block carries runtime settings that apply across all tools on the MCP Server: -1. `forward_client_headers` (default `true`). Whether to forward client request headers to the upstream when calling tools. -1. `tag`. A single tag used by `listener` MCP Servers to filter which `conversion-only` tools to expose. -1. `timeout` (default 10 seconds). Maximum time to wait for an upstream tool call. + +{% table %} +columns: + - title: Field + key: field + - title: Default + key: default + - title: Description + key: description +rows: + - field: "[`forward_client_headers`](#schema-aigateway-mcpserver-config-server-forward-client-headers)" + default: "`true`" + description: Whether to forward client request headers to the upstream when calling tools. + - field: "[`tag`](#schema-aigateway-mcpserver-config-server-tag)" + default: (none) + description: A single tag used by `listener` MCP Servers to filter which `conversion-only` tools to expose. + - field: "[`timeout`](#schema-aigateway-mcpserver-config-server-timeout)" + default: 10 seconds + description: Maximum time to wait for an upstream tool call. +{% endtable %} + -`config.max_request_body_size` controls the maximum incoming request body size accepted by the MCP Server (default 1 MB). +[`config.max_request_body_size`](#schema-aigateway-mcpserver-config-max-request-body-size) controls the maximum incoming request body size accepted by the MCP Server (default 1 MB). ## ACL tool control @@ -267,7 +287,7 @@ This way, consumers only interact with tools appropriate to their role, while ma ### Attribute types -Two attribute types determine what the runtime evaluates against: +Two attribute types determine what the MCP Server evaluates ACL rules against: 1. **`consumer`** (default). Evaluates against the resolved Consumer identity. 1. **`oauth_access_token`**. Evaluates against a claim extracted from the OAuth access token. Set `access_token_claim_field` to a jq filter (for example, `.user.email` for a nested claim). The OAuth flow itself is supplied by the [AI MCP OAuth2 Policy](/plugins/ai-mcp-oauth2/). diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index d8118225d5..1116e7cde2 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -80,6 +80,8 @@ A Policy is an {{site.ai_gateway}} entity that represents an action, taken by a Each Policy declares a `type` (which is a plugin name, for example `ai-sanitizer` or `ai-rate-limiting-advanced`) and a `config` block whose contents follow that plugin's own schema. {{site.ai_gateway}} attaches the configured plugin at the scope you select: globally, or to a specific Model, Agent, or MCP Server. +For the set of plugin types you can use as a Policy `type`, see the [AI plugin reference](/plugins/?category=ai). + Policies are not shared. Each Policy is one plugin instance. To apply the same configuration to two parent entities, create two Policies. Policies are managed through the {{site.ai_gateway}} entity surface in both deployment modes: diff --git a/app/_ai_gateway_entities/provider.md b/app/_ai_gateway_entities/provider.md index 349257a58f..b91d442dfe 100644 --- a/app/_ai_gateway_entities/provider.md +++ b/app/_ai_gateway_entities/provider.md @@ -54,7 +54,13 @@ A Provider is a first-class {{site.ai_gateway}} entity that represents an upstre Models reference a Provider by `name` to route their `target_models` to that upstream. {{site.ai_gateway}} materializes the Provider's credentials into the underlying primitives of every Model that references it. Updating a Provider propagates credential changes to all referencing Models. -Providers can be created and managed through {{site.konnect_short_name}}, the on-prem Admin API, decK, or the {{site.konnect_short_name}} UI: +### Relationship to Models + +A Provider stores how to reach and authenticate to an upstream LLM service. A [Model](/ai-gateway/entities/model/) decides which upstream provider model to call and how requests are load-balanced, formatted, and logged. The relationship is many-to-many at the target level: a single Provider can back many Models (for example, an `openai` Provider used by both a chat Model and an embeddings Model), and a single Model can route across multiple Providers through its `target_models` array (for example, a Model with one OpenAI target and one Anthropic target for fallback). + +Providers don't expose model endpoints on their own. They become routable only through a Model that references them. + +Providers can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, decK, or the on-prem Admin API: {% table %} columns: diff --git a/app/_ai_gateway_entities/vault.md b/app/_ai_gateway_entities/vault.md index 0e9072d380..82d35d98e3 100644 --- a/app/_ai_gateway_entities/vault.md +++ b/app/_ai_gateway_entities/vault.md @@ -84,7 +84,7 @@ rows: ## Backends -Each Vault selects one of the supported secret backends — environment variables, AWS Secrets Manager, Google Secret Manager, Azure Key Vault, CyberArk Conjur, or HashiCorp Vault. The connection details vary per backend; the {{site.konnect_short_name}} UI surfaces the relevant fields based on the backend you choose. +Each Vault selects one of the supported secret backends: environment variables, AWS Secrets Manager, Google Secret Manager, Azure Key Vault, CyberArk Conjur, or HashiCorp Vault. The connection details vary per backend; the {{site.konnect_short_name}} UI surfaces the relevant fields based on the backend you choose. HashiCorp Vault additionally supports several authentication methods (token, AppRole, JWT, Kubernetes, AWS, GCP, Azure, and others). See the [{{site.base_gateway}} Vault entity](/gateway/entities/vault/) for backend-specific guidance that applies to both deployment modes. diff --git a/app/_landing_pages/ai-gateway/entities.yaml b/app/_landing_pages/ai-gateway/entities.yaml index 4119129ce8..cac61b0cfe 100644 --- a/app/_landing_pages/ai-gateway/entities.yaml +++ b/app/_landing_pages/ai-gateway/entities.yaml @@ -21,6 +21,14 @@ rows: text: "Core entities" column_count: 3 columns: + - blocks: + - type: card + config: + title: {{site.ai_gateway}} + description: The top-level entity that owns Models, Providers, Policies, Agents, MCP Servers, and other AI-specific entities. + cta: + text: {{site.ai_gateway}} entity + url: /ai-gateway/entities/ai-gateway/ - blocks: - type: card config: From 877e01f5a7701a124dab399caf23522c65f301a6 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Fri, 15 May 2026 09:58:45 +0200 Subject: [PATCH 49/53] Fix frontmatter --- app/_ai_gateway_entities/ai-gateway.md | 7 ++++--- app/_landing_pages/ai-gateway/entities.yaml | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/app/_ai_gateway_entities/ai-gateway.md b/app/_ai_gateway_entities/ai-gateway.md index b90ad43199..b2ce8ba608 100644 --- a/app/_ai_gateway_entities/ai-gateway.md +++ b/app/_ai_gateway_entities/ai-gateway.md @@ -1,5 +1,5 @@ --- -title: {{site.ai_gateway}}s +title: "{{site.ai_gateway}}" content_type: reference entities: - ai-gateway @@ -10,14 +10,15 @@ min_version: breadcrumbs: - /ai-gateway/ - /ai-gateway/entities/ -description: The top-level {{site.ai_gateway}} entity that owns Models, Providers, Policies, Agents, MCP Servers, and other AI-specific entities. +description: | + The top-level {{site.ai_gateway}} entity that owns Models, Providers, Policies, Agents, MCP Servers, and other AI-specific entities. schema: api: konnect/ai-gateway path: /schemas/AIGateway works_on: - konnect related_resources: - - text: About {{site.ai_gateway}} + - text: "About {{site.ai_gateway}}" url: /ai-gateway/ - text: "{{site.ai_gateway}} entities" url: /ai-gateway/entities/ diff --git a/app/_landing_pages/ai-gateway/entities.yaml b/app/_landing_pages/ai-gateway/entities.yaml index cac61b0cfe..3b0bb70dda 100644 --- a/app/_landing_pages/ai-gateway/entities.yaml +++ b/app/_landing_pages/ai-gateway/entities.yaml @@ -24,15 +24,15 @@ rows: - blocks: - type: card config: - title: {{site.ai_gateway}} + title: "{{site.ai_gateway}}" description: The top-level entity that owns Models, Providers, Policies, Agents, MCP Servers, and other AI-specific entities. cta: - text: {{site.ai_gateway}} entity + text: "{{site.ai_gateway}} entity" url: /ai-gateway/entities/ai-gateway/ - blocks: - type: card config: - title: AI Provider + title: "{{site.ai_gateway}} Provider" description: Stores upstream provider credentials and connection configuration. Providers are reusable and are not model endpoints. cta: text: AI Provider entity From 60b7a8488fcb76766af8e3d1c5e6cc7f2c897f18 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Fri, 15 May 2026 10:31:41 +0200 Subject: [PATCH 50/53] Updates --- app/_ai_gateway_entities/policy.md | 2 -- app/_ai_gateway_entities/provider.md | 16 +++++++--------- app/_ai_gateway_entities/vault.md | 4 ++-- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/app/_ai_gateway_entities/policy.md b/app/_ai_gateway_entities/policy.md index 1116e7cde2..b60c222b50 100644 --- a/app/_ai_gateway_entities/policy.md +++ b/app/_ai_gateway_entities/policy.md @@ -179,8 +179,6 @@ data: window_type: sliding {% endentity_example %} - - ## Schema {% entity_schema %} diff --git a/app/_ai_gateway_entities/provider.md b/app/_ai_gateway_entities/provider.md index b91d442dfe..216a1e8af4 100644 --- a/app/_ai_gateway_entities/provider.md +++ b/app/_ai_gateway_entities/provider.md @@ -34,12 +34,12 @@ faqs: - q: What happens when I update a Provider's credentials? a: | {{site.ai_gateway}} propagates the credential change to every Model that references the - Provider by `name`. The next request through any of those Models uses the updated credentials. + Provider (by `name` or `id`). The next request through any of those Models uses the updated + credentials. - q: How does a Model reference a Provider? a: | - Set `target_models[].provider.name` on the Model to the Provider's `name`. Provider references - take a `name` only, not an ID. + Set `target_models[].provider` on the Model to the Provider's `name` or `id`. - q: Do Providers generate any runtime primitives on their own? a: | @@ -52,7 +52,7 @@ faqs: A Provider is a first-class {{site.ai_gateway}} entity that represents an upstream LLM service connection and their credentials, endpoint configuration, and provider-type-specific options. Each Provider has a `type` that selects the upstream LLM service. See the schema below for supported values, and the per-provider pages under [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for provider-specific guidance. -Models reference a Provider by `name` to route their `target_models` to that upstream. {{site.ai_gateway}} materializes the Provider's credentials into the underlying primitives of every Model that references it. Updating a Provider propagates credential changes to all referencing Models. +Models reference a Provider through `target_models[].provider` to route their `target_models` to that upstream. The reference can use either the Provider `name` or `id`. {{site.ai_gateway}} materializes the Provider's credentials into the underlying primitives of every Model that references it. Updating a Provider propagates credential changes to all referencing Models. ### Relationship to Models @@ -121,9 +121,9 @@ The `config.auth` object declares how {{site.ai_gateway}} authenticates to the u ## Provider references -Models reference a Provider by `name` through the `target_models[].provider.name` field. The same reference shape is used elsewhere in the schema (such as the embeddings model under a Model's load balancer config). Provider references in {{site.ai_gateway}} entities accept the Provider's `name` only, not its ID. +Models reference a Provider through the `target_models[].provider` field. The same reference shape is used elsewhere in the schema (such as the embeddings model under a Model's load balancer config). Provider references in {{site.ai_gateway}} entities accept either the Provider `name` or `id`. -Because references resolve by `name`, the `name` field is the stable handle for a Provider across the entity surface. Renaming a Provider (changing `name`) breaks any Model reference that pointed at the old value. +If references use `name`, the `name` field acts as a stable human-readable handle. Renaming a Provider (changing `name`) breaks any Model references that point at the old name. ## Lifecycle @@ -131,11 +131,9 @@ Creating a Provider stores the entity but doesn't generate any runtime primitive Updating a Provider re-materializes credentials into every Model that references it. The change takes effect on the next request through any referencing Model. - - ## Set up a Provider -The following example creates an OpenAI Provider that authenticates with a single bearer-token header. A Model can then route to this Provider by setting `target_models[].provider.name` to `my-openai-account`. +The following example creates an OpenAI Provider that authenticates with a single bearer-token header. A Model can then route to this Provider by setting `target_models[].provider` to `my-openai-account` (or the Provider `id`). {% entity_example %} type: provider diff --git a/app/_ai_gateway_entities/vault.md b/app/_ai_gateway_entities/vault.md index 82d35d98e3..bc50f2ec75 100644 --- a/app/_ai_gateway_entities/vault.md +++ b/app/_ai_gateway_entities/vault.md @@ -39,7 +39,7 @@ faqs: - q: Which secret backends are supported? a: | - The `type` field selects the backend: `env`, `aws`, `gcp`, `azure`, `conjur`, or `hcv`. + The `type` field selects the backend: `konnect`, `env`, `aws`, `gcp`, `azure`, `conjur`, or `hcv`. Each type carries its own `config` shape. HashiCorp Vault (`hcv`) further selects an `auth_method` from `token`, `cert`, `jwt`, `approle`, `kubernetes`, `gcp_iam`, `gcp_gce`, `aws_ec2`, `aws_iam`, or `azure`. @@ -84,7 +84,7 @@ rows: ## Backends -Each Vault selects one of the supported secret backends: environment variables, AWS Secrets Manager, Google Secret Manager, Azure Key Vault, CyberArk Conjur, or HashiCorp Vault. The connection details vary per backend; the {{site.konnect_short_name}} UI surfaces the relevant fields based on the backend you choose. +Each Vault selects one of the supported secret backends: {{site.konnect_short_name}} Config Store, environment variables, AWS Secrets Manager, Google Secret Manager, Azure Key Vault, CyberArk Conjur, or HashiCorp Vault. The connection details vary per backend; the {{site.konnect_short_name}} UI surfaces the relevant fields based on the backend you choose. HashiCorp Vault additionally supports several authentication methods (token, AppRole, JWT, Kubernetes, AWS, GCP, Azure, and others). See the [{{site.base_gateway}} Vault entity](/gateway/entities/vault/) for backend-specific guidance that applies to both deployment modes. From 747436f1cea802e3a93cb14e409d8a6ef02b3365 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Fri, 15 May 2026 11:43:08 +0200 Subject: [PATCH 51/53] update attach policies note --- app/_ai_gateway_entities/model.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 2278a7d19d..97a971db9f 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -347,9 +347,9 @@ You can attach multiple Policies to a single Model. Each Policy has an independe Not every plugin type is valid as a Model Policy. -Policies attached to a Model are deleted when the Model is deleted. +Policies created through the nested on-prem endpoint (`POST /ai/models/{modelId}/policies`) are deleted when the Model is deleted. Policies created independently (for example, at `/v1/ai-gateways/{aiGatewayId}/policies` or `/ai/policies`) are not deleted when the Model is deleted; only the Model's reference is removed. -For further information see the [Policy entity](/ai-gateway/entities/policy/) reference. +For further information, see the [Policy entity](/ai-gateway/entities/policy/) reference. ### Plugin priority and Policy execution order From c7ab3bf36090a6bd97836e8ed347251641fc04e3 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Mon, 18 May 2026 09:33:02 +0200 Subject: [PATCH 52/53] appease vale --- .github/styles/base/Dictionary.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/styles/base/Dictionary.txt b/.github/styles/base/Dictionary.txt index 4656efd372..9061bb1108 100644 --- a/.github/styles/base/Dictionary.txt +++ b/.github/styles/base/Dictionary.txt @@ -13,6 +13,7 @@ ai_rate_limiting_policy agentic Agno Agno's +AIGateway Alertmanager Alibaba allow_terminated From 001605e1f31faba3b31509692613db874b0b65cc Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Mon, 18 May 2026 11:45:48 +0200 Subject: [PATCH 53/53] fix ambiguiities --- app/_ai_gateway_entities/agent.md | 14 ++------------ app/_ai_gateway_entities/ai-gateway.md | 2 +- app/_ai_gateway_entities/mcp-server.md | 2 +- app/_ai_gateway_entities/model.md | 2 +- app/_ai_gateway_entities/provider.md | 2 +- 5 files changed, 6 insertions(+), 16 deletions(-) diff --git a/app/_ai_gateway_entities/agent.md b/app/_ai_gateway_entities/agent.md index e5f1823ca4..0a05004141 100644 --- a/app/_ai_gateway_entities/agent.md +++ b/app/_ai_gateway_entities/agent.md @@ -76,7 +76,7 @@ faqs: An Agent is a first-class {{site.ai_gateway}} entity that represents an upstream agent endpoint exposed through {{site.ai_gateway}}. An Agent has a type, either `a2a` for [Agent-to-Agent protocol](https://a2aproject.github.io/A2A/) traffic or `http` for generic HTTP agent routing, and a configuration that points {{site.ai_gateway}} at the upstream and shapes how requests flow. -For `http` type Agents, requests are proxied without A2A-specific processing. For `a2a` type Agents, {{site.ai_gateway}} adds protocol-aware behaviour on top of plain proxying: it detects A2A requests across both JSON-RPC and REST bindings, rewrites agent-card URLs so clients discover the gateway as the canonical endpoint, and emits structured A2A telemetry to {{site.konnect_short_name}} analytics and OpenTelemetry. +For `http` type Agents, requests are proxied without A2A-specific processing. For `a2a` type Agents, {{site.ai_gateway}} adds protocol-aware behavior on top of plain proxying: it detects A2A requests across both JSON-RPC and REST bindings, rewrites agent-card URLs so clients discover the gateway as the canonical endpoint, and emits structured A2A telemetry to {{site.konnect_short_name}} analytics and OpenTelemetry. Agents can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, decK, or the on-prem Admin API: @@ -239,17 +239,7 @@ When an upstream agent returns an agent card, the runtime rewrites the `url` fie ## Logging and observability -When Statistics logging is enabled the {{site.ai_gateway}} records the following structured A2A telemetry per request: - -- A2A method -- Binding type -- Task state -- Task ID -- Context ID -- Latency -- Time to first byte (for streaming) -- SSE event count -- Response size. +When Statistics logging is enabled, {{site.ai_gateway}} records structured A2A telemetry per request and exposes it in {{site.konnect_short_name}} analytics, attached log plugins, and OpenTelemetry when [{{site.base_gateway}} tracing](/gateway/tracing/) is configured. For the canonical metric and attribute list, see [A2A metrics](/ai-gateway/ai-otel-metrics/#a2a-metrics). The runtime emits this data into the `ai.a2a` namespace consumed by {{site.konnect_short_name}} analytics and any attached logging plugins, and creates a `kong.a2a` child span when [{{site.base_gateway}} tracing](/gateway/tracing/) is configured. diff --git a/app/_ai_gateway_entities/ai-gateway.md b/app/_ai_gateway_entities/ai-gateway.md index b2ce8ba608..176b06ef5c 100644 --- a/app/_ai_gateway_entities/ai-gateway.md +++ b/app/_ai_gateway_entities/ai-gateway.md @@ -113,7 +113,7 @@ Use `config_hash` to verify rollout: after a configuration change, watch the nod ## Lifecycle -{{site.ai_gateway}}s are created and managed through the {{site.konnect_short_name}} UI. Once an {{site.ai_gateway}} exists, its child entities (Models, Providers, Policies, and so on) are managed through the {{site.ai_gateway}} API, Terraform, or decK as documented on each entity page. +{{site.ai_gateway}}s can be created and managed through the {{site.konnect_short_name}} UI or the {{site.ai_gateway}} API. Once an {{site.ai_gateway}} exists, its child entities (Models, Providers, Policies, and so on) are managed through the {{site.ai_gateway}} API or decK as documented on each entity page. Creating an {{site.ai_gateway}} provisions the configuration and telemetry endpoints and gives you the parent ID needed to create child entities. The {{site.ai_gateway}} has no runtime traffic of its own. Traffic flows once at least one Model, Agent, or MCP Server is configured under it and a data plane node is connected. diff --git a/app/_ai_gateway_entities/mcp-server.md b/app/_ai_gateway_entities/mcp-server.md index b659a13dad..a9045e9e21 100644 --- a/app/_ai_gateway_entities/mcp-server.md +++ b/app/_ai_gateway_entities/mcp-server.md @@ -76,7 +76,7 @@ faqs: ## What is an MCP Server? -An MCP Server is a first-class {{site.ai_gateway}} entity that exposes tools to MCP-compatible clients (such as [Insomnia](https://konghq.com/products/kong-insomnia), [Claude](https://claude.ai/), [Cursor](https://cursor.com/), or [LMstudio](https://lmstudio.ai/)) over the [Model Context Protocol](https://modelcontextprotocol.io/). The runtime acts as a protocol bridge, translating between MCP and HTTP so MCP clients can either call existing APIs through {{site.ai_gateway}} or interact with upstream MCP servers. +An MCP Server is a first-class {{site.ai_gateway}} entity that exposes tools to MCP-compatible clients (such as [Insomnia](https://konghq.com/products/kong-insomnia), [Claude](https://claude.ai/), [Cursor](https://cursor.com/), or [LM Studio](https://lmstudio.ai/)) over the [Model Context Protocol](https://modelcontextprotocol.io/). The runtime acts as a protocol bridge, translating between MCP and HTTP so MCP clients can either call existing APIs through {{site.ai_gateway}} or interact with upstream MCP servers. Because the runtime executes inside {{site.ai_gateway}}, MCP endpoints are provisioned dynamically on demand. You don't host or scale them separately, and the same authentication, traffic control, and observability features available to traditional API traffic apply to MCP traffic at the same scale. diff --git a/app/_ai_gateway_entities/model.md b/app/_ai_gateway_entities/model.md index 97a971db9f..6390c09d80 100644 --- a/app/_ai_gateway_entities/model.md +++ b/app/_ai_gateway_entities/model.md @@ -35,7 +35,7 @@ related_resources: - text: "{{site.ai_gateway}} entities" url: /ai-gateway/entities/ - text: Consumer Group entity - url: /gateway/entities/consumer-group/ + url: /ai-gateway/entities/consumer-group/ faqs: - q: What's the difference between a Model entity and a `model` field inside a plugin configuration? a: | diff --git a/app/_ai_gateway_entities/provider.md b/app/_ai_gateway_entities/provider.md index 216a1e8af4..1b69141a51 100644 --- a/app/_ai_gateway_entities/provider.md +++ b/app/_ai_gateway_entities/provider.md @@ -50,7 +50,7 @@ faqs: ## What is a Provider? -A Provider is a first-class {{site.ai_gateway}} entity that represents an upstream LLM service connection and their credentials, endpoint configuration, and provider-type-specific options. Each Provider has a `type` that selects the upstream LLM service. See the schema below for supported values, and the per-provider pages under [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for provider-specific guidance. +A Provider is a first-class {{site.ai_gateway}} entity that represents an upstream LLM service connection and its credentials, endpoint configuration, and provider-type-specific options. Each Provider has a `type` that selects the upstream LLM service. See the schema below for supported values, and the per-provider pages under [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for provider-specific guidance. Models reference a Provider through `target_models[].provider` to route their `target_models` to that upstream. The reference can use either the Provider `name` or `id`. {{site.ai_gateway}} materializes the Provider's credentials into the underlying primitives of every Model that references it. Updating a Provider propagates credential changes to all referencing Models.