diff --git a/n8n/README.md b/n8n/README.md index 18eb081e7890a..9e9d410984a93 100644 --- a/n8n/README.md +++ b/n8n/README.md @@ -2,15 +2,15 @@ ## Overview -This check monitors [n8n][1] through the Datadog Agent. +This check monitors [n8n][1] through the Datadog Agent. Collect n8n metrics including: -- Cache metrics: Hit and miss statistics. -- Message event bus metrics: Event-related metrics. -- Workflow metrics: Can include workflow ID labels. -- Node metrics: Can include node type labels. -- Credential metrics: Can include credential type labels. -- Queue metrics +- Cache metrics: hit, miss, and update counts. +- Workflow metrics: started, success, failed counters, audit workflow lifecycle counters; in n8n 2.x, an execution-duration histogram. +- Node metrics: per-node started and finished counters emitted by worker processes in queue mode. +- Queue metrics: queue depth; enqueued, dequeued, completed, failed, and stalled counters; and scaling-mode worker gauges. +- HTTP metrics: request duration histograms tagged with status code. +- Process and Node.js runtime metrics. ## Setup @@ -40,6 +40,10 @@ N8N_METRICS_INCLUDE_CACHE_METRICS=true N8N_METRICS_INCLUDE_MESSAGE_EVENT_BUS_METRICS=true N8N_METRICS_INCLUDE_WORKFLOW_ID_LABEL=true N8N_METRICS_INCLUDE_API_ENDPOINTS=true +N8N_METRICS_INCLUDE_QUEUE_METRICS=true + +# Optional: n8n 2.x adds workflow_statistics gauges (workflows, users, executions, ...) - opt in +N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS=true # Optional: Customize the metric prefix (default is 'n8n_') N8N_METRICS_PREFIX=n8n_ @@ -47,6 +51,68 @@ N8N_METRICS_PREFIX=n8n_ For more details, see the n8n documentation on [enabling Prometheus metrics][10]. +If you change `N8N_METRICS_PREFIX` from its default of `n8n_`, you **must** also set `raw_metric_prefix` in the integration's `conf.yaml` to the same value. Otherwise the check will not recognize the exposed metric names and will silently submit nothing: + +```yaml +instances: + - openmetrics_endpoint: http://localhost:5678/metrics + raw_metric_prefix: my_custom_prefix_ +``` + +#### Event-driven counters + +Most n8n counters are registered dynamically the first time their underlying event fires. The integration ships mappings for around 70 of these event-bus counters, including: + +- Workflow lifecycle: `n8n.workflow.started.count`, `n8n.workflow.success.count`, `n8n.workflow.failed.count`, `n8n.workflow.cancelled.count` +- Audit (workflow, user, credentials, package, variable, execution data): `n8n.audit.workflow.executed.count`, `n8n.audit.user.login.success.count`, `n8n.audit.user.credentials.created.count`, and similar +- AI nodes: `n8n.ai.tool.called.count`, `n8n.ai.llm.generated.count`, `n8n.ai.vector.store.searched.count`, and similar +- Runner, queue, and node lifecycle: `n8n.runner.task.requested.count`, `n8n.queue.job.completed.count`, `n8n.node.started.count`, `n8n.node.finished.count` + +These counters do not appear on the `/metrics` endpoint until the corresponding event has occurred. A healthy idle deployment will not produce data points for them until that activity fires. The complete list is in [`metadata.csv`][7]. + +If a future n8n release exposes a new event-driven counter that is not yet covered by this integration, add it to the `extra_metrics` option in your instance configuration: + +```yaml +instances: + - openmetrics_endpoint: http://n8n:5678/metrics + extra_metrics: + - some_new_n8n_event_total: some.new.n8n.event +``` + +The left-hand side is the Prometheus counter name as n8n exposes it (keep the `_total` suffix). The right-hand side is the dotted Datadog metric name to submit it as. + +#### Queue mode and workers + +In queue mode, n8n runs separate worker processes that execute jobs picked up from a Redis-backed queue. Each worker exposes its own `/metrics` endpoint and emits a different subset of metrics than the main process. Worker-observed metrics include `n8n.queue.job.dequeued.count`, `n8n.queue.job.stalled.count`, `n8n.node.started.count`, `n8n.node.finished.count`, and `n8n.runner.task.requested.count`. Main-only metrics include `n8n.instance.role.leader` and the `n8n.scaling.mode.queue.jobs.*` family. + +To expose worker metrics, set `QUEUE_HEALTH_CHECK_ACTIVE=true` and `QUEUE_HEALTH_CHECK_PORT=` on each worker. **In n8n 2.x, port `5679` is reserved for the task runner broker, so pick a different port (for example `5680`).** + +For full coverage in queue deployments, configure one Datadog instance per n8n process exposing `/metrics`, including main and worker processes: + +```yaml +instances: + - openmetrics_endpoint: http://n8n-main:5678/metrics + - openmetrics_endpoint: http://n8n-worker:5680/metrics +``` + +#### Version-specific metrics + +Several metric families were introduced in n8n 2.x and are not emitted on n8n 1.x: + +- `n8n.workflow.execution.duration.seconds.*` (histogram). Gated by `N8N_METRICS_INCLUDE_WORKFLOW_EXECUTION_DURATION`, which defaults to `true` in n8n 2.x. +- `n8n.audit.workflow.activated.count`, `n8n.audit.workflow.deactivated.count`, `n8n.audit.workflow.executed.count`, `n8n.audit.workflow.resumed.count`, `n8n.audit.workflow.version.updated.count`, and `n8n.audit.workflow.waiting.count` +- `n8n.embed.login.requests.count` (tagged with `result:success` or `result:failure`), `n8n.embed.login.failures.count` (tagged with `reason`) +- `n8n.token.exchange.requests.count` (tagged with `result:success` or `result:failure`), `n8n.token.exchange.failures.count` (tagged with `reason`), `n8n.token.exchange.identity.linked.count`, `n8n.token.exchange.jit.provisioning.count` +- `n8n.process.pss.bytes` (Linux only) +- The `n8n.{production,manual,production.root}.executions`, `n8n.users.total`, `n8n.enabled.users`, `n8n.workflows.total`, and `n8n.credentials.total` family. Only emitted when `N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS=true` is set. +- The `n8n.expression.*` family (`evaluation.duration.seconds`, `code.cache.{hit,miss,eviction,size}`, `pool.{acquired,replenish.failed,scaled.up,scaled.to.zero}`). Only emitted when n8n is running the new VM-isolated expression engine *and* observability for it is on. Set `N8N_EXPRESSION_ENGINE=vm` and `N8N_EXPRESSION_ENGINE_OBSERVABILITY_ENABLED=true` on the n8n process; both default to off (the engine defaults to `legacy`). These metrics surface the per-expression evaluation latency, the compiled-expression LRU cache hit and miss rates, and the V8-isolate pool's idle scaling behavior. They are most useful for troubleshooting workflow latency that traces back to slow `{{ ... }}` evaluation. + +Some metrics only emit samples after the corresponding runtime event occurs. For example, failures-only counters (`*.failures.count`) need an authentication failure, audit workflow counters need the matching workflow state transition, and the libuv `n8n.nodejs.active.requests` gauge needs an in-flight libuv request. A healthy idle deployment may not produce data points for these metrics until that activity occurs. + +#### Tag cardinality + +When `N8N_METRICS_INCLUDE_WORKFLOW_ID_LABEL=true`, http and workflow execution histograms are tagged with `workflow_id` (and similar labels for nodes). On deployments with many distinct workflows or nodes, this can produce high-cardinality metrics. Drop the label via `exclude_labels` or omit `N8N_METRICS_INCLUDE_WORKFLOW_ID_LABEL` to keep tag cardinality bounded. + #### Configure the Datadog Agent 1. Edit the `n8n.d/conf.yaml` file, in the `conf.d/` folder at the root of your Agent's configuration directory to start collecting your n8n performance data. See the [sample n8n.d/conf.yaml][4] for all available configuration options. @@ -59,27 +125,32 @@ _Available for Agent versions >6.0_ #### Enable n8n logging -Configure n8n to output logs by setting the following environment variables: +Configure n8n application logs by setting the following environment variables: ```bash # Set the log level (error, warn, info, debug) N8N_LOG_LEVEL=info -# Output logs to console (for containerized environments) or file +# Output application logs to console or file N8N_LOG_OUTPUT=console -# If using file output, specify the log file location +# Use JSON formatting so Datadog can parse n8n application log attributes +N8N_LOG_FORMAT=json + +# If using file output, specify the application log file location N8N_LOG_FILE_LOCATION=/var/log/n8n/n8n.log ``` #### Structured event logs -n8n can output structured JSON logs to `n8nEventLog.log` containing detailed workflow execution events. Enable this by setting the log output to file: +n8n also writes structured event bus logs to `n8nEventLog*.log`. These logs contain workflow, node, queue, runner, and audit events and are separate from the application logs controlled by `N8N_LOG_OUTPUT` and `N8N_LOG_FILE_LOCATION`. -```bash -N8N_LOG_OUTPUT=file -N8N_LOG_FILE_LOCATION=/var/log/n8n/ -``` +By default, event bus log files are written under the n8n user folder, for example: + +- Host installations: `~/.n8n/n8nEventLog*.log` +- Official Docker image: `/home/node/.n8n/n8nEventLog*.log` + +If you use a custom n8n user folder, collect the event bus logs from that folder instead. If you customize the event bus log file base name with `N8N_EVENTBUS_LOGWRITER_LOGBASENAME`, update the Datadog log path to match. The event log includes the following event types: @@ -102,32 +173,46 @@ Each event contains rich metadata including `executionId`, `workflowId`, `workfl logs_enabled: true ``` -2. Add this configuration block to your `n8n.d/conf.yaml` file to start collecting your n8n logs: +2. Add log collection entries to your `n8n.d/conf.yaml` file. + + For a host-based n8n installation where the Agent can read local files, collect the application log file and the event bus log files: ```yaml logs: - type: file path: /var/log/n8n/*.log source: n8n - service: n8n + service: + - type: file + path: /home/n8n/.n8n/n8nEventLog*.log + source: n8n + service: ``` - For containerized environments using Docker, use the following configuration instead: + Adjust `/home/n8n/.n8n/n8nEventLog*.log` to the n8n user folder on your host. + + For a containerized n8n deployment, collect stdout and stderr from the n8n container for application logs, and make the n8n user folder available to the Agent for event bus file logs. For example, if the n8n data directory is mounted on the host at `/var/lib/n8n`, configure: ```yaml logs: - type: docker source: n8n - service: n8n + service: + - type: file + path: /var/lib/n8n/n8nEventLog*.log + source: n8n + service: ``` + If the Agent runs in a container, mount the n8n data volume or host directory into the Agent container and use the path as seen from inside the Agent container. + 3. [Restart the Agent][5]. ### Validation [Run the Agent's status subcommand][6] and look for `n8n` under the Checks section. -## Data Collected +## Data collected ### Metrics @@ -137,7 +222,7 @@ See [metadata.csv][7] for a list of metrics provided by this integration. The n8n integration does not include any events. -### Service Checks +### Service checks See [service_checks.json][8] for a list of service checks provided by this integration. diff --git a/n8n/assets/configuration/spec.yaml b/n8n/assets/configuration/spec.yaml index f828a10ec05c0..cea34bff83932 100644 --- a/n8n/assets/configuration/spec.yaml +++ b/n8n/assets/configuration/spec.yaml @@ -12,7 +12,7 @@ files: openmetrics_endpoint.required: true openmetrics_endpoint.hidden: false openmetrics_endpoint.display_priority: 1 - openmetrics_endpoint.value.example: http://localhost:5678 + openmetrics_endpoint.value.example: http://localhost:5678/metrics openmetrics_endpoint.description: | Endpoint exposing the n8n's metrics in the OpenMetrics format. For more information, refer to: https://docs.n8n.io/hosting/logging-monitoring/monitoring/ diff --git a/n8n/changelog.d/23635.changed b/n8n/changelog.d/23635.changed new file mode 100644 index 0000000000000..4707adc21dbb9 --- /dev/null +++ b/n8n/changelog.d/23635.changed @@ -0,0 +1,6 @@ +Improve the n8n metric coverage: + + - Correct missing or incorrect metrics. + - Add metrics introduced in n8n 2.x (workflow execution duration, audit events, authentication, workflow and user statistics, expression engine, and process memory). + - Track n8n's dynamic events (workflow cancellations, audit activity, AI nodes, user and credential changes, package and variable changes). + - Add support for monitoring n8n worker processes alongside the main process. diff --git a/n8n/datadog_checks/n8n/check.py b/n8n/datadog_checks/n8n/check.py index 00c41569b83d5..ea3ed741c2f29 100644 --- a/n8n/datadog_checks/n8n/check.py +++ b/n8n/datadog_checks/n8n/check.py @@ -2,58 +2,55 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -from urllib.parse import urljoin +from functools import cached_property +from typing import Any +from urllib.parse import urljoin, urlparse + +from requests.exceptions import RequestException from datadog_checks.base import OpenMetricsBaseCheckV2 from datadog_checks.n8n.metrics import METRIC_MAP, RENAME_LABELS_MAP from .config_models import ConfigMixin -DEFAULT_READY_ENDPOINT = '/healthz/readiness' +DEFAULT_READY_PATH = '/healthz/readiness' class N8nCheck(OpenMetricsBaseCheckV2, ConfigMixin): __NAMESPACE__ = 'n8n' DEFAULT_METRIC_LIMIT = 0 - def __init__(self, name, init_config, instances=None): - super(N8nCheck, self).__init__( - name, - init_config, - instances, - ) - self.openmetrics_endpoint = self.instance["openmetrics_endpoint"] - self.tags = self.instance.get('tags', []) - self._ready_endpoint = DEFAULT_READY_ENDPOINT - - def get_default_config(self): + def get_default_config(self) -> dict[str, Any]: return { 'metrics': [METRIC_MAP], 'rename_labels': RENAME_LABELS_MAP, 'raw_metric_prefix': 'n8n_', } - def _check_n8n_readiness(self): - endpoint = urljoin(self.openmetrics_endpoint, self._ready_endpoint) - response = self.http.get(endpoint) - - # Determine metric value and status_code tag - if response.status_code is None: - self.log.warning("The readiness endpoint did not return a status code") - metric_value = 0 - metric_tags = self.tags + ['status_code:null'] - elif response.status_code == 200: - # Ready - submit 1 - metric_value = 1 - metric_tags = self.tags + [f'status_code:{response.status_code}'] - else: - # Not ready - submit 0 - metric_value = 0 - metric_tags = self.tags + [f'status_code:{response.status_code}'] - - # Submit metric with appropriate value and status_code tag - self.gauge('readiness.check', metric_value, tags=metric_tags) - - def check(self, instance): - super().check(instance) + @cached_property + def _readiness_endpoint(self) -> str: + parsed = urlparse(self.config.openmetrics_endpoint) + base = f'{parsed.scheme}://{parsed.netloc}' + return urljoin(base, DEFAULT_READY_PATH) + + def _check_n8n_readiness(self) -> None: + endpoint = self._readiness_endpoint + tags = list(self.config.tags or ()) + + try: + response = self.http.get(endpoint) + except RequestException as e: + self.log.warning("Could not reach n8n readiness endpoint %s: %s", endpoint, e) + self.gauge('readiness.check', 0, tags=tags + ['status_code:none']) + return + + is_ready = 200 <= response.status_code < 300 + self.gauge( + 'readiness.check', + 1 if is_ready else 0, + tags=tags + [f'status_code:{response.status_code}'], + ) + + def check(self, instance: dict[str, Any]) -> None: self._check_n8n_readiness() + super().check(instance) diff --git a/n8n/datadog_checks/n8n/data/conf.yaml.example b/n8n/datadog_checks/n8n/data/conf.yaml.example index e80f23c8c08c1..5f96c4acb66fe 100644 --- a/n8n/datadog_checks/n8n/data/conf.yaml.example +++ b/n8n/datadog_checks/n8n/data/conf.yaml.example @@ -18,7 +18,7 @@ instances: ## https://docs.n8n.io/hosting/logging-monitoring/monitoring/ ## https://docs.n8n.io/hosting/configuration/environment-variables/endpoints/ # - - openmetrics_endpoint: http://localhost:5678 + - openmetrics_endpoint: http://localhost:5678/metrics ## @param raw_metric_prefix - string - optional - default: n8n_ ## The prefix prepended to all metrics from n8n. diff --git a/n8n/datadog_checks/n8n/metrics.py b/n8n/datadog_checks/n8n/metrics.py index 5e29ba629340c..20776372ee2f6 100644 --- a/n8n/datadog_checks/n8n/metrics.py +++ b/n8n/datadog_checks/n8n/metrics.py @@ -2,36 +2,122 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -# Metrics mapping without prefix - use raw_metric_prefix config to strip prefixes like 'n8n_', 'n8n_my_team_', etc. -# Namespace will be applied by the check -# Note: OpenMetrics automatically appends .count to counter metrics, so don't add it here +# Metrics emitted by n8n's /metrics endpoint, verified live against n8n@1.118.1 +# and n8n@2.19.5. +# +# The OpenMetrics base check strips `_total` from counter names before lookup +# and appends `.count` on submission, so counter keys here are written without +# the `_total` suffix (e.g. `cache_hits_total` -> key `cache_hits`). +# +# Many counters are dynamically registered from EventBus events (event +# `n8n...` becomes counter `___total`) and only appear once +# the corresponding event fires at runtime. In queue mode, worker processes +# emit `node_started_total`, `node_finished_total`, `queue_job_dequeued_total`, +# `queue_job_stalled_total`, and `runner_task_requested_total`. +# +# Several families were introduced in n8n 2.x (see the README "Version-specific +# metrics" section). The `workflow_statistics_*` and SSO/embed token-exchange +# families require additional flags (`N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS`, +# token-exchange counters always register but only emit on auth events). +# Event-bus events whose generated counter name contains a hyphen are silently rejected +# by n8n's own ``prom-client.validateMetricName`` check (see ``prometheus-metrics.service.ts``). +# These events are emitted on the bus and reach n8n's event-log file but never register a +# Prometheus counter on /metrics, so they are intentionally absent from ``METRIC_MAP``: +# audit.2fa-enforcement.{enabled,disabled} +# audit.cluster.{hostid-clash,split-brain,version-mismatch}.{detected,resolved} +# audit.cluster.{instance-joined,instance-left} +# audit.external-secrets.connection.{created,deleted,reloaded,tested,updated} +# audit.external-secrets.provider.{reloaded,settings.saved} +# audit.personal-{publishing,sharing}-restricted.{enabled,disabled} +# audit.role-mapping.rule.{created,updated,deleted} +# audit.role-mapping.{roles-resolved,rules.bulk-deleted} +# audit.token-exchange.{succeeded,failed,identity-linked,user-provisioned,role-updated,embed-login,embed-login-failed} +# execution.started-during-bootup +# To collect these signals, tail n8n's event-log files via the ``logs`` block instead of /metrics. METRIC_MAP = { 'active_workflow_count': 'active.workflow.count', - 'api_request_duration_seconds': 'api.request.duration.seconds', - 'api_requests': 'api.requests', - 'cache_errors': 'cache.errors', + 'ai_document_processed': 'ai.document.processed', + 'ai_embeddings_embedded_document': 'ai.embeddings.embedded.document', + 'ai_embeddings_embedded_query': 'ai.embeddings.embedded.query', + 'ai_llm_error': 'ai.llm.error', + 'ai_llm_generated': 'ai.llm.generated', + 'ai_memory_added_message': 'ai.memory.added.message', + 'ai_memory_get_messages': 'ai.memory.get.messages', + 'ai_output_parser_parsed': 'ai.output.parser.parsed', + 'ai_retriever_get_relevant_documents': 'ai.retriever.get.relevant.documents', + 'ai_text_splitter_split': 'ai.text.splitter.split', + 'ai_tool_called': 'ai.tool.called', + 'ai_vector_store_populated': 'ai.vector.store.populated', + 'ai_vector_store_searched': 'ai.vector.store.searched', + 'ai_vector_store_updated': 'ai.vector.store.updated', + 'audit_execution_data_reveal_failure': 'audit.execution.data.reveal_failure', + 'audit_execution_data_revealed': 'audit.execution.data.revealed', + 'audit_package_deleted': 'audit.package.deleted', + 'audit_package_installed': 'audit.package.installed', + 'audit_package_updated': 'audit.package.updated', + 'audit_user_api_created': 'audit.user.api.created', + 'audit_user_api_deleted': 'audit.user.api.deleted', + 'audit_user_credentials_created': 'audit.user.credentials.created', + 'audit_user_credentials_deleted': 'audit.user.credentials.deleted', + 'audit_user_credentials_shared': 'audit.user.credentials.shared', + 'audit_user_credentials_updated': 'audit.user.credentials.updated', + 'audit_user_deleted': 'audit.user.deleted', + 'audit_user_email_failed': 'audit.user.email.failed', + 'audit_user_execution_deleted': 'audit.user.execution.deleted', + 'audit_user_invitation_accepted': 'audit.user.invitation.accepted', + 'audit_user_invited': 'audit.user.invited', + 'audit_user_login_failed': 'audit.user.login.failed', + 'audit_user_login_success': 'audit.user.login.success', + 'audit_user_mfa_disabled': 'audit.user.mfa.disabled', + 'audit_user_mfa_enabled': 'audit.user.mfa.enabled', + 'audit_user_reinvited': 'audit.user.reinvited', + 'audit_user_reset': 'audit.user.reset', + 'audit_user_reset_requested': 'audit.user.reset.requested', + 'audit_user_signedup': 'audit.user.signedup', + 'audit_user_updated': 'audit.user.updated', + 'audit_variable_created': 'audit.variable.created', + 'audit_variable_deleted': 'audit.variable.deleted', + 'audit_variable_updated': 'audit.variable.updated', + 'audit_workflow_activated': 'audit.workflow.activated', # n8n 2.x+ + 'audit_workflow_archived': 'audit.workflow.archived', + 'audit_workflow_created': 'audit.workflow.created', + 'audit_workflow_deactivated': 'audit.workflow.deactivated', # n8n 2.x+ + 'audit_workflow_deleted': 'audit.workflow.deleted', + 'audit_workflow_executed': 'audit.workflow.executed', # n8n 2.x+ + 'audit_workflow_resumed': 'audit.workflow.resumed', # n8n 2.x+ + 'audit_workflow_unarchived': 'audit.workflow.unarchived', + 'audit_workflow_updated': 'audit.workflow.updated', + 'audit_workflow_version_updated': 'audit.workflow.version.updated', # n8n 2.x+ + 'audit_workflow_waiting': 'audit.workflow.waiting', # n8n 2.x+ 'cache_hits': 'cache.hits', - 'cache_latency_seconds': 'cache.latency.seconds', 'cache_misses': 'cache.misses', - 'cache_operations': 'cache.operations', - 'eventbus_connections_total': 'eventbus.connections.total', - 'eventbus_events_failed': 'eventbus.events.failed', - 'eventbus_events_processed': 'eventbus.events.processed', - 'eventbus_events': 'eventbus.events', - 'eventbus_queue_size': 'eventbus.queue.size', + 'cache_updates': 'cache.updates', + 'credentials': 'credentials.total', # n8n 2.x+, requires N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS + 'embed_login_failures': 'embed.login.failures', # n8n 2.x+ + 'embed_login_requests': 'embed.login.requests', # n8n 2.x+ + 'enabled_users': 'enabled.users', # n8n 2.x+, requires N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS + 'execution_throttled': 'execution.throttled', + 'expression_code_cache_eviction': 'expression.code.cache.eviction', + 'expression_code_cache_hit': 'expression.code.cache.hit', + 'expression_code_cache_miss': 'expression.code.cache.miss', + 'expression_code_cache_size': 'expression.code.cache.size', + 'expression_evaluation_duration_seconds': 'expression.evaluation.duration.seconds', + 'expression_pool_acquired': 'expression.pool.acquired', + 'expression_pool_replenish_failed': 'expression.pool.replenish.failed', + 'expression_pool_scaled_to_zero': 'expression.pool.scaled.to.zero', + 'expression_pool_scaled_up': 'expression.pool.scaled.up', 'http_request_duration_seconds': 'http.request.duration.seconds', 'instance_role_leader': 'instance.role.leader', - 'last_activity': { - 'name': 'last.activity', - 'type': 'time_elapsed', - }, + 'last_activity': {'name': 'last.activity', 'type': 'time_elapsed'}, + 'manual_executions': 'manual.executions', # n8n 2.x+, requires N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS + 'node_finished': 'node.finished', + 'node_started': 'node.started', 'nodejs_active_handles': 'nodejs.active.handles', 'nodejs_active_handles_total': 'nodejs.active.handles.total', 'nodejs_active_requests': 'nodejs.active.requests', 'nodejs_active_requests_total': 'nodejs.active.requests.total', 'nodejs_active_resources': 'nodejs.active.resources', 'nodejs_active_resources_total': 'nodejs.active.resources.total', - 'nodejs_event_loop_lag_seconds': 'nodejs.event.loop.lag.seconds', 'nodejs_eventloop_lag_max_seconds': 'nodejs.eventloop.lag.max.seconds', 'nodejs_eventloop_lag_mean_seconds': 'nodejs.eventloop.lag.mean.seconds', 'nodejs_eventloop_lag_min_seconds': 'nodejs.eventloop.lag.min.seconds', @@ -47,47 +133,44 @@ 'nodejs_heap_space_size_available_bytes': 'nodejs.heap.space.size.available.bytes', 'nodejs_heap_space_size_total_bytes': 'nodejs.heap.space.size.total.bytes', 'nodejs_heap_space_size_used_bytes': 'nodejs.heap.space.size.used.bytes', - 'nodejs_heap_total_bytes': 'nodejs.heap.total.bytes', - 'nodejs_heap_used_bytes': 'nodejs.heap.used.bytes', + 'nodejs_version_info': {'type': 'metadata', 'label': 'version', 'name': 'nodejs.version'}, + 'process_cpu_seconds': 'process.cpu.seconds', 'process_cpu_system_seconds': 'process.cpu.system.seconds', 'process_cpu_user_seconds': 'process.cpu.user.seconds', 'process_heap_bytes': 'process.heap.bytes', 'process_max_fds': 'process.max.fds', 'process_open_fds': 'process.open.fds', + 'process_pss_bytes': 'process.pss.bytes', # n8n 2.x+ 'process_resident_memory_bytes': 'process.resident.memory.bytes', - 'process_start_time_seconds': { - 'name': 'process.uptime.seconds', - 'type': 'time_elapsed', - }, + 'process_start_time_seconds': {'name': 'process.uptime.seconds', 'type': 'time_elapsed'}, 'process_virtual_memory_bytes': 'process.virtual.memory.bytes', - 'queue_job_active_total': 'queue.job.active.total', - 'queue_job_attempts': 'queue.job.attempts', + 'production_executions': 'production.executions', # n8n 2.x+, requires N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS + 'production_root_executions': 'production.root.executions', # n8n 2.x+, requires flag 'queue_job_completed': 'queue.job.completed', - 'queue_job_delayed_total': 'queue.job.delayed.total', 'queue_job_dequeued': 'queue.job.dequeued', 'queue_job_enqueued': 'queue.job.enqueued', 'queue_job_failed': 'queue.job.failed', - 'queue_job_waiting_duration_seconds': 'queue.job.waiting.duration.seconds', - 'queue_job_waiting_total': 'queue.job.waiting.total', - 'queue_jobs_duration_seconds': 'queue.jobs.duration.seconds', - 'queue_jobs': 'queue.jobs', - 'workflow_executions_active': 'workflow.executions.active', - 'workflow_executions_duration_seconds': 'workflow.executions.duration.seconds', - 'workflow_executions': 'workflow.executions', + 'queue_job_stalled': 'queue.job.stalled', + 'runner_response_received': 'runner.response.received', + 'runner_task_requested': 'runner.task.requested', + 'scaling_mode_queue_jobs_active': 'scaling.mode.queue.jobs.active', + 'scaling_mode_queue_jobs_completed': 'scaling.mode.queue.jobs.completed', + 'scaling_mode_queue_jobs_failed': 'scaling.mode.queue.jobs.failed', + 'scaling_mode_queue_jobs_waiting': 'scaling.mode.queue.jobs.waiting', + 'token_exchange_failures': 'token.exchange.failures', # n8n 2.x+ + 'token_exchange_identity_linked': 'token.exchange.identity.linked', # n8n 2.x+ + 'token_exchange_jit_provisioning': 'token.exchange.jit.provisioning', # n8n 2.x+ + 'token_exchange_requests': 'token.exchange.requests', # n8n 2.x+ + 'users': 'users.total', # n8n 2.x+, requires N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS + 'version_info': {'type': 'metadata', 'label': 'version', 'name': 'version'}, + 'workflow_cancelled': 'workflow.cancelled', + 'workflow_execution_duration_seconds': 'workflow.execution.duration.seconds', # n8n 2.x+ 'workflow_failed': 'workflow.failed', 'workflow_started': 'workflow.started', 'workflow_success': 'workflow.success', - 'process_cpu_seconds': 'process.cpu.seconds', - 'version_info': 'version.info', - 'nodejs_version_info': 'nodejs.version.info', + 'workflows': 'workflows.total', # n8n 2.x+, requires N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS } -N8N_VERSION = {'version_info': {'type': 'metadata', 'label': 'version', 'name': 'version'}} -NODEJS_VERSION = {'nodejs_version_info': {'type': 'metadata', 'label': 'version', 'name': 'nodejs.version'}} - -METRIC_MAP.update(N8N_VERSION) -METRIC_MAP.update(NODEJS_VERSION) - RENAME_LABELS_MAP = { 'name': 'n8n_name', 'namespace': 'n8n_namespace', diff --git a/n8n/hatch.toml b/n8n/hatch.toml index 15f12fe355887..b1232ac43fbf2 100644 --- a/n8n/hatch.toml +++ b/n8n/hatch.toml @@ -3,9 +3,21 @@ [[envs.default.matrix]] python = ["3.13"] -version = ["1.118.1"] +version = ["1", "2"] [envs.default.overrides] matrix.version.env-vars = [ - { key = "N8N_VERSION", value = "1.118.1", if = ["1.118.1"] }, -] \ No newline at end of file + { key = "N8N_VERSION", value = "1.118.1", if = ["1"] }, + { key = "N8N_VERSION", value = "2.19.5", if = ["2"] }, +] + +[envs.lab] +dependencies = ["click", "httpx", "pyyaml", "rich"] + +[envs.lab.scripts] +start = "python -m tests.lab.traffic_generator start {args}" +generate = "python -m tests.lab.traffic_generator generate {args}" +stop = "python -m tests.lab.traffic_generator stop {args}" + +[envs.lab.env-vars] +N8N_IS_LAB = "true" diff --git a/n8n/metadata.csv b/n8n/metadata.csv index 29f8c23c7483e..f50056896b384 100644 --- a/n8n/metadata.csv +++ b/n8n/metadata.csv @@ -1,32 +1,91 @@ metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name,curated_metric,sample_tags n8n.active.workflow.count,gauge,,,,Total number of active workflows.,0,n8n,,, -n8n.api.request.duration.seconds.bucket,count,,,,Histogram bucket for API request duration in seconds,0,n8n,,, -n8n.api.request.duration.seconds.count,count,,,,The count of API request duration in seconds,0,n8n,,, -n8n.api.request.duration.seconds.sum,count,,,,The sum of API request duration in seconds,0,n8n,,, -n8n.api.requests.count,count,,,,Total API requests,0,n8n,,, -n8n.cache.errors.count,count,,,,Cache errors,0,n8n,,, -n8n.cache.hits.count,count,,,,Cache hits,0,n8n,,, -n8n.cache.latency.seconds.bucket,count,,,,Histogram bucket for cache operation latency in seconds,0,n8n,,, -n8n.cache.latency.seconds.count,count,,,,The count of cache operation latency in seconds,0,n8n,,, -n8n.cache.latency.seconds.sum,count,,,,The sum of cache operation latency in seconds,0,n8n,,, -n8n.cache.misses.count,count,,,,Cache misses,0,n8n,,, -n8n.cache.operations.count,count,,,,Total cache operations,0,n8n,,, -n8n.eventbus.connections.total,gauge,,,,Active event bus backend connections,0,n8n,,, -n8n.eventbus.events.count,count,,,,Total events published on the event bus,0,n8n,,, -n8n.eventbus.events.failed.count,count,,,,Total failed event processing,0,n8n,,, -n8n.eventbus.events.processed.count,count,,,,Total processed events,0,n8n,,, -n8n.eventbus.queue.size,gauge,,,,Current event queue size,0,n8n,,, -n8n.http.request.duration.seconds.count,count,,,,The count of the http responses duration labeled with: status_code,0,n8n,,, -n8n.http.request.duration.seconds.sum,count,,,,The sum of the http responses duration labeled with: status_code,0,n8n,,, +n8n.ai.document.processed.count,count,,,,Total number of documents processed by AI nodes. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.ai.embeddings.embedded.document.count,count,,,,Total number of documents embedded by AI embedding nodes. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.ai.embeddings.embedded.query.count,count,,,,Total number of queries embedded by AI embedding nodes. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.ai.llm.error.count,count,,,,Total number of LLM errors raised by AI nodes. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.ai.llm.generated.count,count,,,,Total number of successful LLM generations from AI nodes. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.ai.memory.added.message.count,count,,,,Total number of AI memory message writes. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.ai.memory.get.messages.count,count,,,,Total number of AI memory message reads. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.ai.output.parser.parsed.count,count,,,,Total number of AI output parser invocations. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.ai.retriever.get.relevant.documents.count,count,,,,Total number of AI retriever relevant-document fetches. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.ai.text.splitter.split.count,count,,,,Total number of AI text-splitter splits. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.ai.tool.called.count,count,,,,Total number of AI tool invocations. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.ai.vector.store.populated.count,count,,,,Total number of AI vector-store population events. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.ai.vector.store.searched.count,count,,,,Total number of AI vector-store searches. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.ai.vector.store.updated.count,count,,,,Total number of AI vector-store update events. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.execution.data.reveal_failure.count,count,,,,Total number of execution-data reveal failures audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.execution.data.revealed.count,count,,,,Total number of execution-data reveal events audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.package.deleted.count,count,,,,Total number of community package removals audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.package.installed.count,count,,,,Total number of community package installations audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.package.updated.count,count,,,,Total number of community package updates audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.api.created.count,count,,,,Total number of API key creations audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.api.deleted.count,count,,,,Total number of API key deletions audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.credentials.created.count,count,,,,Total number of credential creations audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.credentials.deleted.count,count,,,,Total number of credential deletions audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.credentials.shared.count,count,,,,Total number of credential share events audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.credentials.updated.count,count,,,,Total number of credential updates audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.deleted.count,count,,,,Total number of user deletions audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.email.failed.count,count,,,,Total number of user-facing email send failures audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.execution.deleted.count,count,,,,Total number of executions deleted by users audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.invitation.accepted.count,count,,,,Total number of user invitations accepted. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.invited.count,count,,,,Total number of user invitations sent. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.login.failed.count,count,,,,Total number of failed user login attempts audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.login.success.count,count,,,,Total number of successful user logins audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.mfa.disabled.count,count,,,,Total number of times a user disabled multi-factor authentication. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.mfa.enabled.count,count,,,,Total number of times a user enabled multi-factor authentication. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.reinvited.count,count,,,,Total number of user re-invitations sent. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.reset.count,count,,,,Total number of completed password resets audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.reset.requested.count,count,,,,Total number of password reset requests audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.signedup.count,count,,,,Total number of user sign-ups audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.user.updated.count,count,,,,Total number of user profile updates audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.variable.created.count,count,,,,Total number of variable creations audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.variable.deleted.count,count,,,,Total number of variable deletions audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.variable.updated.count,count,,,,Total number of variable updates audited. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.audit.workflow.activated.count,count,,,,Total number of audited workflow activations. Available in n8n 2.x and later.,0,n8n,,, +n8n.audit.workflow.archived.count,count,,,,Total number of audited workflow archive events.,0,n8n,,, +n8n.audit.workflow.created.count,count,,,,Total number of audited workflow creations.,0,n8n,,, +n8n.audit.workflow.deactivated.count,count,,,,Total number of audited workflow deactivations. Available in n8n 2.x and later.,0,n8n,,, +n8n.audit.workflow.deleted.count,count,,,,Total number of audited workflow deletions.,0,n8n,,, +n8n.audit.workflow.executed.count,count,,,,Total number of audited workflow executions. Available in n8n 2.x and later.,0,n8n,,, +n8n.audit.workflow.resumed.count,count,,,,Total number of audited workflow resumptions. Available in n8n 2.x and later.,0,n8n,,, +n8n.audit.workflow.unarchived.count,count,,,,Total number of audited workflow unarchive events.,0,n8n,,, +n8n.audit.workflow.updated.count,count,,,,Total number of audited workflow updates.,0,n8n,,, +n8n.audit.workflow.version.updated.count,count,,,,Total number of audited workflow version updates. Available in n8n 2.x and later.,0,n8n,,, +n8n.audit.workflow.waiting.count,count,,,,Total number of audited workflow executions entering a waiting state. Available in n8n 2.x and later.,0,n8n,,, +n8n.cache.hits.count,count,,,,Total number of cache hits.,0,n8n,,, +n8n.cache.misses.count,count,,,,Total number of cache misses.,0,n8n,,, +n8n.cache.updates.count,count,,,,Total number of cache updates.,0,n8n,,, +n8n.credentials.total,gauge,,,,Total number of credentials. Available in n8n 2.x and later when N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS is enabled.,0,n8n,,, +n8n.embed.login.failures.count,count,,,,Total number of embed login failures broken down by reason. Available in n8n 2.x and later. Only emits samples after the first failure.,0,n8n,,, +n8n.embed.login.requests.count,count,,,,Total number of embed login requests (tagged with `result:success`/`result:failure`). Available in n8n 2.x and later.,0,n8n,,, +n8n.enabled.users,gauge,,,,Total number of enabled users. Available in n8n 2.x and later when N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS is enabled.,0,n8n,,, +n8n.execution.throttled.count,count,,,,Total number of executions throttled because the production concurrency limit was reached. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.expression.code.cache.eviction.count,count,,,,Total expression code cache evictions. Available in n8n 2.x and later when N8N_EXPRESSION_ENGINE=vm and N8N_EXPRESSION_ENGINE_OBSERVABILITY_ENABLED=true.,0,n8n,,, +n8n.expression.code.cache.hit.count,count,,,,Total expression code cache hits (compiled expression reused). Available in n8n 2.x and later when N8N_EXPRESSION_ENGINE=vm and N8N_EXPRESSION_ENGINE_OBSERVABILITY_ENABLED=true.,0,n8n,,, +n8n.expression.code.cache.miss.count,count,,,,Total expression code cache misses (expression compiled for the first time). Available in n8n 2.x and later when N8N_EXPRESSION_ENGINE=vm and N8N_EXPRESSION_ENGINE_OBSERVABILITY_ENABLED=true.,0,n8n,,, +n8n.expression.code.cache.size,gauge,,,,Current number of compiled expressions held in the expression code cache. Available in n8n 2.x and later when N8N_EXPRESSION_ENGINE=vm and N8N_EXPRESSION_ENGINE_OBSERVABILITY_ENABLED=true.,0,n8n,,, +n8n.expression.evaluation.duration.seconds.bucket,count,,,,Histogram bucket for VM-isolated expression evaluation duration in seconds (tagged with `status` and `type`). Available in n8n 2.x and later when N8N_EXPRESSION_ENGINE=vm and N8N_EXPRESSION_ENGINE_OBSERVABILITY_ENABLED=true.,0,n8n,,, +n8n.expression.evaluation.duration.seconds.count,count,,,,Count of VM-isolated expression evaluations (tagged with `status` and `type`). Available in n8n 2.x and later when N8N_EXPRESSION_ENGINE=vm and N8N_EXPRESSION_ENGINE_OBSERVABILITY_ENABLED=true.,0,n8n,,, +n8n.expression.evaluation.duration.seconds.sum,count,,,,Sum of VM-isolated expression evaluation durations in seconds. Available in n8n 2.x and later when N8N_EXPRESSION_ENGINE=vm and N8N_EXPRESSION_ENGINE_OBSERVABILITY_ENABLED=true.,0,n8n,,, +n8n.expression.pool.acquired.count,count,,,,Total VM bridges acquired from the expression pool for evaluation. Available in n8n 2.x and later when N8N_EXPRESSION_ENGINE=vm and N8N_EXPRESSION_ENGINE_OBSERVABILITY_ENABLED=true.,0,n8n,,, +n8n.expression.pool.replenish.failed.count,count,,,,Total times the expression pool failed to replenish a bridge after release/dispose. Should remain near zero. Available in n8n 2.x and later when N8N_EXPRESSION_ENGINE=vm and N8N_EXPRESSION_ENGINE_OBSERVABILITY_ENABLED=true.,0,n8n,,, +n8n.expression.pool.scaled.to.zero.count,count,,,,Total times the expression pool scaled to zero after `N8N_EXPRESSION_ENGINE_IDLE_TIMEOUT` seconds of inactivity. Available in n8n 2.x and later when N8N_EXPRESSION_ENGINE=vm and N8N_EXPRESSION_ENGINE_OBSERVABILITY_ENABLED=true.,0,n8n,,, +n8n.expression.pool.scaled.up.count,count,,,,Total times the expression pool scaled up from idle on a new acquire. Available in n8n 2.x and later when N8N_EXPRESSION_ENGINE=vm and N8N_EXPRESSION_ENGINE_OBSERVABILITY_ENABLED=true.,0,n8n,,, +n8n.http.request.duration.seconds.bucket,count,,,,Histogram bucket for HTTP request duration in seconds labeled with status_code.,0,n8n,,, +n8n.http.request.duration.seconds.count,count,,,,The count of HTTP request duration samples.,0,n8n,,, +n8n.http.request.duration.seconds.sum,count,,,,The sum of HTTP request duration in seconds.,0,n8n,,, n8n.instance.role.leader,gauge,,,,Whether this main instance is the leader (1) or not (0).,0,n8n,,, n8n.last.activity,gauge,,second,,Time elapsed since the last instance activity (backend request).,0,n8n,,, +n8n.manual.executions,gauge,,,,Total number of manual workflow executions. Available in n8n 2.x and later when N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS is enabled.,0,n8n,,, +n8n.node.finished.count,count,,,,Total number of node executions that finished. Emitted by worker processes in queue mode (n8n.node.finished event).,0,n8n,,, +n8n.node.started.count,count,,,,Total number of node executions that started. Emitted by worker processes in queue mode (n8n.node.started event).,0,n8n,,, n8n.nodejs.active.handles,gauge,,,,Number of active libuv handles grouped by handle type. Every handle type is C++ class name.,0,n8n,,, n8n.nodejs.active.handles.total,gauge,,,,Total number of active handles.,0,n8n,,, -n8n.nodejs.active.requests,gauge,,,,Number of active libuv requests grouped by request type. Every request type is C++ class name.,0,n8n,,, +n8n.nodejs.active.requests,gauge,,,,Number of active libuv requests grouped by request type. Only emits samples for request types currently in flight at scrape time (from prom-client's default collector via process._getActiveRequests).,0,n8n,,, n8n.nodejs.active.requests.total,gauge,,,,Total number of active requests.,0,n8n,,, -n8n.nodejs.active.resources,gauge,,,,"Number of active resources that are currently keeping the event loop alive, grouped by async resource type.",0,n8n,,, +n8n.nodejs.active.resources,gauge,,,,Number of active resources keeping the event loop alive grouped by async resource type.,0,n8n,,, n8n.nodejs.active.resources.total,gauge,,,,Total number of active resources.,0,n8n,,, -n8n.nodejs.event.loop.lag.seconds,gauge,,,,Event loop lag in seconds,0,n8n,,, n8n.nodejs.eventloop.lag.max.seconds,gauge,,,,The maximum recorded event loop delay.,0,n8n,,, n8n.nodejs.eventloop.lag.mean.seconds,gauge,,,,The mean of the recorded event loop delays.,0,n8n,,, n8n.nodejs.eventloop.lag.min.seconds,gauge,,,,The minimum recorded event loop delay.,0,n8n,,, @@ -36,47 +95,48 @@ n8n.nodejs.eventloop.lag.p99.seconds,gauge,,,,The 99th percentile of the recorde n8n.nodejs.eventloop.lag.seconds,gauge,,,,Lag of event loop in seconds.,0,n8n,,, n8n.nodejs.eventloop.lag.stddev.seconds,gauge,,,,The standard deviation of the recorded event loop delays.,0,n8n,,, n8n.nodejs.external.memory.bytes,gauge,,,,Node.js external memory size in bytes.,0,n8n,,, -n8n.nodejs.gc.duration.seconds.bucket,count,,,,Histogram bucket for garbage collection duration by kind,0,n8n,,, -n8n.nodejs.gc.duration.seconds.count,count,,,,The count of garbage collection duration by kind,0,n8n,,, -n8n.nodejs.gc.duration.seconds.sum,count,,,,The sum of garbage collection duration by kind,0,n8n,,, +n8n.nodejs.gc.duration.seconds.bucket,count,,,,Histogram bucket for garbage collection duration by kind.,0,n8n,,, +n8n.nodejs.gc.duration.seconds.count,count,,,,The count of garbage collection duration samples.,0,n8n,,, +n8n.nodejs.gc.duration.seconds.sum,count,,,,The sum of garbage collection duration in seconds.,0,n8n,,, n8n.nodejs.heap.size.total.bytes,gauge,,,,Process heap size from Node.js in bytes.,0,n8n,,, n8n.nodejs.heap.size.used.bytes,gauge,,,,Process heap size used from Node.js in bytes.,0,n8n,,, n8n.nodejs.heap.space.size.available.bytes,gauge,,,,Process heap space size available from Node.js in bytes.,0,n8n,,, n8n.nodejs.heap.space.size.total.bytes,gauge,,,,Process heap space size total from Node.js in bytes.,0,n8n,,, n8n.nodejs.heap.space.size.used.bytes,gauge,,,,Process heap space size used from Node.js in bytes.,0,n8n,,, -n8n.nodejs.heap.total.bytes,gauge,,,,Total heap size allocated in bytes,0,n8n,,, -n8n.nodejs.heap.used.bytes,gauge,,,,Heap memory used in bytes,0,n8n,,, n8n.process.cpu.seconds.count,count,,,,Total user and system CPU time spent in seconds.,0,n8n,,, n8n.process.cpu.system.seconds.count,count,,,,Total system CPU time spent in seconds.,0,n8n,,, n8n.process.cpu.user.seconds.count,count,,,,Total user CPU time spent in seconds.,0,n8n,,, n8n.process.heap.bytes,gauge,,,,Process heap size in bytes.,0,n8n,,, n8n.process.max.fds,gauge,,,,Maximum number of open file descriptors.,0,n8n,,, n8n.process.open.fds,gauge,,,,Number of open file descriptors.,0,n8n,,, +n8n.process.pss.bytes,gauge,,,,Proportional set size of the process in bytes. Available in n8n 2.x and later on Linux.,0,n8n,,, n8n.process.resident.memory.bytes,gauge,,,,Resident memory size in bytes.,0,n8n,,, -n8n.process.start.time.seconds,gauge,,,,Start time of the process since unix epoch in seconds.,0,n8n,,, -n8n.process.uptime.seconds,gauge,,,,Process uptime in seconds.,0,n8n,,, +n8n.process.uptime.seconds,gauge,,second,,Process uptime in seconds.,0,n8n,,, n8n.process.virtual.memory.bytes,gauge,,,,Virtual memory size in bytes.,0,n8n,,, -n8n.queue.job.active.total,gauge,,,,Number of jobs currently being processed,0,n8n,,, -n8n.queue.job.attempts.count,count,,,,Total number of job attempts,0,n8n,,, -n8n.queue.job.completed.count,count,,,,Number of jobs completed successfully,0,n8n,,, -n8n.queue.job.delayed.total,gauge,,,,Number of jobs scheduled to run later,0,n8n,,, -n8n.queue.job.dequeued.count,count,,,,Number of jobs dequeued (picked up from queue),0,n8n,,, -n8n.queue.job.enqueued.count,count,,,,Number of jobs added to the queue,0,n8n,,, -n8n.queue.job.failed.count,count,,,,Number of jobs that have failed,0,n8n,,, -n8n.queue.job.waiting.duration.seconds.bucket,count,,,,Histogram bucket for duration jobs spend waiting before being processed,0,n8n,,, -n8n.queue.job.waiting.duration.seconds.count,count,,,,The count of duration jobs spend waiting before being processed,0,n8n,,, -n8n.queue.job.waiting.duration.seconds.sum,count,,,,The sum of duration jobs spend waiting before being processed,0,n8n,,, -n8n.queue.job.waiting.total,gauge,,,,Number of jobs currently waiting in the queue,0,n8n,,, -n8n.queue.jobs.count,count,,,,Total number of queue jobs,0,n8n,,, -n8n.queue.jobs.duration.seconds.bucket,count,,,,Histogram bucket for job duration in seconds,0,n8n,,, -n8n.queue.jobs.duration.seconds.count,count,,,,The count of job duration in seconds,0,n8n,,, -n8n.queue.jobs.duration.seconds.sum,count,,,,The sum of job duration in seconds,0,n8n,,, -n8n.readiness.check,gauge,,,,Readiness check status (1 if ready with status code 200 otherwise 0) with status code tag,0,n8n,,,status_code -n8n.workflow.executions.active,gauge,,,,Number of active workflow executions,0,n8n,,, -n8n.workflow.executions.count,count,,,,Total number of workflow executions,0,n8n,,, -n8n.workflow.executions.duration.seconds.bucket,count,,,,Histogram bucket for workflow execution duration in seconds,0,n8n,,, -n8n.workflow.executions.duration.seconds.count,count,,,,The count of workflow execution duration in seconds,0,n8n,,, -n8n.workflow.executions.duration.seconds.sum,count,,,,The sum of workflow execution duration in seconds,0,n8n,,, -n8n.workflow.failed.count,count,,,,Total number of workflows that failed,0,n8n,,, -n8n.workflow.started.count,count,,,,Total number of workflows started,0,n8n,,, -n8n.workflow.success.count,count,,,,Total number of workflows completed successfully,0,n8n,,, +n8n.production.executions,gauge,,,,Total number of production workflow executions. Available in n8n 2.x and later when N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS is enabled.,0,n8n,,, +n8n.production.root.executions,gauge,,,,Total number of production root workflow executions (excludes sub-workflows). Available in n8n 2.x and later when N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS is enabled.,0,n8n,,, +n8n.queue.job.completed.count,count,,,,Number of jobs completed successfully (n8n.queue.job.completed event).,0,n8n,,, +n8n.queue.job.dequeued.count,count,,,,Number of jobs dequeued by workers (n8n.queue.job.dequeued event). Emitted by worker processes in queue mode.,0,n8n,,, +n8n.queue.job.enqueued.count,count,,,,Number of jobs added to the queue (n8n.queue.job.enqueued event).,0,n8n,,, +n8n.queue.job.failed.count,count,,,,Number of jobs that have failed (n8n.queue.job.failed event).,0,n8n,,, +n8n.queue.job.stalled.count,count,,,,Number of jobs that stalled (n8n.queue.job.stalled event).,0,n8n,,, +n8n.readiness.check,gauge,,,,Readiness check status (1 if ready with status code 200 otherwise 0) with status code tag.,0,n8n,,,status_code +n8n.runner.response.received.count,count,,,,Total number of task-runner responses received by worker processes. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.runner.task.requested.count,count,,,,Total number of runner tasks requested by worker processes.,0,n8n,,, +n8n.scaling.mode.queue.jobs.active,gauge,,,,Current number of jobs being processed across all workers in scaling mode.,0,n8n,,, +n8n.scaling.mode.queue.jobs.completed.count,count,,,,Total number of jobs completed across all workers in scaling mode since instance start.,0,n8n,,, +n8n.scaling.mode.queue.jobs.failed.count,count,,,,Total number of jobs failed across all workers in scaling mode since instance start.,0,n8n,,, +n8n.scaling.mode.queue.jobs.waiting,gauge,,,,Current number of enqueued jobs waiting for pickup in scaling mode.,0,n8n,,, +n8n.token.exchange.failures.count,count,,,,Total number of token exchange failures broken down by reason. Available in n8n 2.x and later. Only emits samples after the first failure.,0,n8n,,, +n8n.token.exchange.identity.linked.count,count,,,,Total number of identities linked to existing users via token exchange. Available in n8n 2.x and later.,0,n8n,,, +n8n.token.exchange.jit.provisioning.count,count,,,,Total number of users JIT-provisioned via token exchange. Available in n8n 2.x and later.,0,n8n,,, +n8n.token.exchange.requests.count,count,,,,Total number of token exchange requests. Available in n8n 2.x and later.,0,n8n,,, +n8n.users.total,gauge,,,,Total number of users. Available in n8n 2.x and later when N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS is enabled.,0,n8n,,, +n8n.workflow.cancelled.count,count,,,,Total number of workflows cancelled mid-execution. Best-effort dynamic event-bus metric: only emitted when the n8n event fires.,0,n8n,,, +n8n.workflow.execution.duration.seconds.bucket,count,,,,Histogram bucket for workflow execution duration in seconds. Available in n8n 2.x and later.,0,n8n,,, +n8n.workflow.execution.duration.seconds.count,count,,,,The count of workflow execution duration samples. Available in n8n 2.x and later.,0,n8n,,, +n8n.workflow.execution.duration.seconds.sum,count,,,,The sum of workflow execution duration in seconds. Available in n8n 2.x and later.,0,n8n,,, +n8n.workflow.failed.count,count,,,,Total number of workflows that failed (n8n.workflow.failed event).,0,n8n,,, +n8n.workflow.started.count,count,,,,Total number of workflows started (n8n.workflow.started event).,0,n8n,,, +n8n.workflow.success.count,count,,,,Total number of workflows completed successfully (n8n.workflow.success event).,0,n8n,,, +n8n.workflows.total,gauge,,,,Total number of workflows. Available in n8n 2.x and later when N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS is enabled.,0,n8n,,, diff --git a/n8n/tests/common.py b/n8n/tests/common.py index 34e3fb84ead38..e16b77d30072c 100644 --- a/n8n/tests/common.py +++ b/n8n/tests/common.py @@ -4,102 +4,210 @@ import os from datadog_checks.dev import get_docker_hostname +from datadog_checks.dev.utils import find_free_ports, get_metadata_metrics HERE = os.path.dirname(os.path.abspath(__file__)) -COMPOSE_FILE = os.path.join(HERE, 'docker', 'docker-compose.yaml') HOST = get_docker_hostname() -PORT = 5678 +# The lab uses its own compose file with bind-mounted lab workflows and fixed ports. +# Selected via the ``N8N_IS_LAB`` env var that hatch sets in ``[envs.lab.env-vars]``. +IS_LAB = os.environ.get('N8N_IS_LAB') == 'true' -def get_fixture_path(filename): - return os.path.join(HERE, 'fixtures', filename) +if IS_LAB: + COMPOSE_FILE = os.path.join(HERE, 'lab', 'docker-compose.yaml') + # The lab is meant to run interactively on a developer machine, so we own host port + # allocation and just hardcode 5678/5680 to keep the README, traffic generator, and + # agent config consistent. + MAIN_PORT, WORKER_PORT = 5678, 5680 +else: + COMPOSE_FILE = os.path.join(HERE, 'docker', 'docker-compose.yaml') + # Allocate free host ports once per session. The values are forwarded to docker compose + # via the ``env_vars`` argument of ``docker_run`` (see ``conftest.py``) so re-runs don't + # collide with stale containers or other locally-bound services. + MAIN_PORT, WORKER_PORT = find_free_ports('127.0.0.1', 2) +N8N_VERSION = os.environ.get('N8N_VERSION', '1.118.1') +N8N_MAJOR = int(N8N_VERSION.split('.', 1)[0]) -OPENMETRICS_URL = f'http://{HOST}:{PORT}' -INSTANCE = { - 'openmetrics_endpoint': f'{OPENMETRICS_URL}/metrics', -} +# Submitted by the check itself, not by the OpenMetrics scrape. +CHECK_LEVEL_METRIC_NAMES = frozenset({'n8n.readiness.check'}) + +# Metric families introduced in n8n 2.x — verified live against n8n@1.118.1 and n8n@2.19.5. +V2_ONLY_METRIC_NAMES = frozenset( + { + 'n8n.audit.workflow.activated.count', + 'n8n.audit.workflow.deactivated.count', + 'n8n.audit.workflow.executed.count', + 'n8n.audit.workflow.resumed.count', + 'n8n.audit.workflow.version.updated.count', + 'n8n.audit.workflow.waiting.count', + 'n8n.credentials.total', + 'n8n.embed.login.failures.count', + 'n8n.embed.login.requests.count', + 'n8n.enabled.users', + 'n8n.expression.code.cache.eviction.count', + 'n8n.expression.code.cache.hit.count', + 'n8n.expression.code.cache.miss.count', + 'n8n.expression.code.cache.size', + 'n8n.expression.evaluation.duration.seconds.bucket', + 'n8n.expression.evaluation.duration.seconds.count', + 'n8n.expression.evaluation.duration.seconds.sum', + 'n8n.expression.pool.acquired.count', + 'n8n.expression.pool.replenish.failed.count', + 'n8n.expression.pool.scaled.to.zero.count', + 'n8n.expression.pool.scaled.up.count', + 'n8n.manual.executions', + 'n8n.process.pss.bytes', + 'n8n.production.executions', + 'n8n.production.root.executions', + 'n8n.token.exchange.failures.count', + 'n8n.token.exchange.identity.linked.count', + 'n8n.token.exchange.jit.provisioning.count', + 'n8n.token.exchange.requests.count', + 'n8n.users.total', + 'n8n.workflow.execution.duration.seconds.bucket', + 'n8n.workflow.execution.duration.seconds.count', + 'n8n.workflow.execution.duration.seconds.sum', + 'n8n.workflows.total', + } +) + +# Metrics that are mapped and present in metadata but only emit samples after a specific +# event fires (auth failure, audit state transition, libuv request mid-flight). The unit +# fixture has synthetic samples for them; live integration/e2e runs cannot guarantee +# samples and exclude them from the symmetric metadata assertion. A metric can appear in +# both ``V2_ONLY_METRIC_NAMES`` and this set when it is 2.x-only *and* event-gated; the +# duplication is intentional, so each rule stays auditable in isolation when v1 runs (which +# strip ``V2_ONLY_METRIC_NAMES`` first) and v2 runs (which strip this set on top). +RARE_EVENT_METRIC_NAMES = frozenset( + { + 'n8n.ai.document.processed.count', + 'n8n.ai.embeddings.embedded.document.count', + 'n8n.ai.embeddings.embedded.query.count', + 'n8n.ai.llm.error.count', + 'n8n.ai.llm.generated.count', + 'n8n.ai.memory.added.message.count', + 'n8n.ai.memory.get.messages.count', + 'n8n.ai.output.parser.parsed.count', + 'n8n.ai.retriever.get.relevant.documents.count', + 'n8n.ai.text.splitter.split.count', + 'n8n.ai.tool.called.count', + 'n8n.ai.vector.store.populated.count', + 'n8n.ai.vector.store.searched.count', + 'n8n.ai.vector.store.updated.count', + 'n8n.audit.execution.data.reveal_failure.count', + 'n8n.audit.execution.data.revealed.count', + 'n8n.audit.package.deleted.count', + 'n8n.audit.package.installed.count', + 'n8n.audit.package.updated.count', + 'n8n.audit.user.api.created.count', + 'n8n.audit.user.api.deleted.count', + 'n8n.audit.user.credentials.created.count', + 'n8n.audit.user.credentials.deleted.count', + 'n8n.audit.user.credentials.shared.count', + 'n8n.audit.user.credentials.updated.count', + 'n8n.audit.user.deleted.count', + 'n8n.audit.user.email.failed.count', + 'n8n.audit.user.execution.deleted.count', + 'n8n.audit.user.invitation.accepted.count', + 'n8n.audit.user.invited.count', + 'n8n.audit.user.login.failed.count', + 'n8n.audit.user.login.success.count', + 'n8n.audit.user.mfa.disabled.count', + 'n8n.audit.user.mfa.enabled.count', + 'n8n.audit.user.reinvited.count', + 'n8n.audit.user.reset.count', + 'n8n.audit.user.reset.requested.count', + 'n8n.audit.user.signedup.count', + 'n8n.audit.user.updated.count', + 'n8n.audit.variable.created.count', + 'n8n.audit.variable.deleted.count', + 'n8n.audit.variable.updated.count', + 'n8n.audit.workflow.archived.count', + 'n8n.audit.workflow.created.count', + 'n8n.audit.workflow.deactivated.count', + 'n8n.audit.workflow.deleted.count', + 'n8n.audit.workflow.resumed.count', + 'n8n.audit.workflow.unarchived.count', + 'n8n.audit.workflow.updated.count', + 'n8n.audit.workflow.version.updated.count', + 'n8n.audit.workflow.waiting.count', + 'n8n.embed.login.failures.count', + 'n8n.execution.throttled.count', + # Expression-engine observability metrics: gated on N8N_EXPRESSION_ENGINE=vm and + # N8N_EXPRESSION_ENGINE_OBSERVABILITY_ENABLED=true, neither of which the test or + # lab compose enable. Mapped + documented; live containers don't emit them. + 'n8n.expression.code.cache.eviction.count', + 'n8n.expression.code.cache.hit.count', + 'n8n.expression.code.cache.miss.count', + 'n8n.expression.code.cache.size', + 'n8n.expression.evaluation.duration.seconds.bucket', + 'n8n.expression.evaluation.duration.seconds.count', + 'n8n.expression.evaluation.duration.seconds.sum', + 'n8n.expression.pool.acquired.count', + 'n8n.expression.pool.replenish.failed.count', + 'n8n.expression.pool.scaled.to.zero.count', + 'n8n.expression.pool.scaled.up.count', + # prom-client's per-type libuv request gauge: only has samples while a libuv request is in flight + # at scrape time, so live containers can produce or omit it depending on timing. + 'n8n.nodejs.active.requests', + 'n8n.queue.job.stalled.count', + 'n8n.runner.response.received.count', + 'n8n.runner.task.requested.count', + 'n8n.token.exchange.failures.count', + 'n8n.workflow.cancelled.count', + } +) -E2E_METADATA = { - 'docker_volumes': ['/var/run/docker.sock:/var/run/docker.sock:ro'], +MAIN_INSTANCE = { + 'openmetrics_endpoint': f'http://{HOST}:{MAIN_PORT}/metrics', + 'tags': ['n8n_process:main'], } +WORKER_INSTANCE = { + 'openmetrics_endpoint': f'http://{HOST}:{WORKER_PORT}/metrics', + 'tags': ['n8n_process:worker'], +} +INSTANCE = MAIN_INSTANCE # back-compat default for unit tests + +E2E_METADATA = {'docker_volumes': ['/var/run/docker.sock:/var/run/docker.sock:ro']} + + +def get_compose_env_vars() -> dict[str, str]: + """Variables consumed by ``tests/docker/docker-compose.yaml``'s ``${...}`` placeholders. + + The lab compose hardcodes ports, so it doesn't need these — but passing the dict in either + mode is harmless and keeps ``conftest.py`` simple. + """ + return { + 'N8N_MAIN_HOST_PORT': str(MAIN_PORT), + 'N8N_WORKER_HOST_PORT': str(WORKER_PORT), + } + + +def get_fixture_path(filename: str) -> str: + return os.path.join(HERE, 'fixtures', filename) + + +def get_metadata_metrics_for_version(major: int = N8N_MAJOR, *, exclude_rare: bool = False) -> dict: + """Return the metadata.csv subset that the given n8n major version is expected to emit. + + Includes ``CHECK_LEVEL_METRIC_NAMES`` (e.g. ``n8n.readiness.check``) because they are submitted + by the check itself, not by the OpenMetrics scrape. Use ``get_openmetrics_metadata_metrics`` + when asserting only against the OpenMetrics surface. + """ + metadata = get_metadata_metrics() + if major < 2: + for name in V2_ONLY_METRIC_NAMES: + metadata.pop(name, None) + if exclude_rare: + for name in RARE_EVENT_METRIC_NAMES: + metadata.pop(name, None) + return metadata + -TEST_METRICS = [ - 'n8n.active.workflow.count', - 'n8n.api.request.duration.seconds.bucket', - 'n8n.api.request.duration.seconds.count', - 'n8n.api.request.duration.seconds.sum', - 'n8n.api.requests.count', - 'n8n.cache.errors.count', - 'n8n.cache.hits.count', - 'n8n.cache.latency.seconds.bucket', - 'n8n.cache.latency.seconds.count', - 'n8n.cache.latency.seconds.sum', - 'n8n.cache.misses.count', - 'n8n.cache.operations.count', - 'n8n.eventbus.connections.total', - 'n8n.eventbus.events.failed.count', - 'n8n.eventbus.events.processed.count', - 'n8n.eventbus.events.count', - 'n8n.eventbus.queue.size', - 'n8n.instance.role.leader', - 'n8n.last.activity', - 'n8n.nodejs.active.handles', - 'n8n.nodejs.active.handles.total', - 'n8n.nodejs.active.requests.total', - 'n8n.nodejs.active.resources', - 'n8n.nodejs.active.resources.total', - 'n8n.nodejs.event.loop.lag.seconds', - 'n8n.nodejs.eventloop.lag.max.seconds', - 'n8n.nodejs.eventloop.lag.mean.seconds', - 'n8n.nodejs.eventloop.lag.min.seconds', - 'n8n.nodejs.eventloop.lag.p50.seconds', - 'n8n.nodejs.eventloop.lag.p90.seconds', - 'n8n.nodejs.eventloop.lag.p99.seconds', - 'n8n.nodejs.eventloop.lag.seconds', - 'n8n.nodejs.eventloop.lag.stddev.seconds', - 'n8n.nodejs.external.memory.bytes', - 'n8n.nodejs.gc.duration.seconds.bucket', - 'n8n.nodejs.gc.duration.seconds.count', - 'n8n.nodejs.gc.duration.seconds.sum', - 'n8n.nodejs.heap.size.total.bytes', - 'n8n.nodejs.heap.size.used.bytes', - 'n8n.nodejs.heap.space.size.available.bytes', - 'n8n.nodejs.heap.space.size.total.bytes', - 'n8n.nodejs.heap.space.size.used.bytes', - 'n8n.nodejs.heap.total.bytes', - 'n8n.nodejs.heap.used.bytes', - 'n8n.process.cpu.system.seconds.count', - 'n8n.process.cpu.user.seconds.count', - 'n8n.process.heap.bytes', - 'n8n.process.max.fds', - 'n8n.process.open.fds', - 'n8n.process.resident.memory.bytes', - 'n8n.process.uptime.seconds', - 'n8n.process.virtual.memory.bytes', - 'n8n.queue.job.active.total', - 'n8n.queue.job.attempts.count', - 'n8n.queue.job.completed.count', - 'n8n.queue.job.delayed.total', - 'n8n.queue.job.dequeued.count', - 'n8n.queue.job.enqueued.count', - 'n8n.queue.job.failed.count', - 'n8n.queue.job.waiting.duration.seconds.bucket', - 'n8n.queue.job.waiting.duration.seconds.count', - 'n8n.queue.job.waiting.duration.seconds.sum', - 'n8n.queue.job.waiting.total', - 'n8n.queue.jobs.duration.seconds.bucket', - 'n8n.queue.jobs.duration.seconds.count', - 'n8n.queue.jobs.duration.seconds.sum', - 'n8n.queue.jobs.count', - 'n8n.readiness.check', - 'n8n.workflow.executions.active', - 'n8n.workflow.executions.duration.seconds.bucket', - 'n8n.workflow.executions.duration.seconds.count', - 'n8n.workflow.executions.duration.seconds.sum', - 'n8n.workflow.executions.count', - 'n8n.workflow.failed.count', - 'n8n.workflow.started.count', - 'n8n.workflow.success.count', - 'n8n.process.cpu.seconds.count', -] +def get_openmetrics_metadata_metrics(major: int = N8N_MAJOR, *, exclude_rare: bool = False) -> dict: + """Version-aware metadata subset minus metrics submitted by the check itself.""" + metadata = get_metadata_metrics_for_version(major, exclude_rare=exclude_rare) + for name in CHECK_LEVEL_METRIC_NAMES: + metadata.pop(name, None) + return metadata diff --git a/n8n/tests/conftest.py b/n8n/tests/conftest.py index c6face31f7d4c..3c775384528a0 100644 --- a/n8n/tests/conftest.py +++ b/n8n/tests/conftest.py @@ -3,27 +3,206 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import copy +import json +import subprocess +from contextlib import suppress +from pathlib import Path +from typing import Any, Iterator import pytest +import requests from datadog_checks.dev import docker_run -from datadog_checks.dev.conditions import CheckEndpoints +from datadog_checks.dev.conditions import CheckEndpoints, WaitFor from . import common +# Test webhook paths (only the test-fixture workflows expose these; the lab workflows +# expose /webhook/lab/* paths and are exercised by the lab traffic generator instead). +WEBHOOK_OK_PATH = '/webhook/test' +WEBHOOK_FAIL_PATH = '/webhook/fail' + +CONTAINER = 'n8n-test' + +# Directories whose ``*.json`` workflow files are bind-mounted into the container. +# In test mode only the two test fixtures are mounted at ``/workflows/``; +# in lab mode the lab compose mounts both the test fixtures and the lab workflows. +_TEST_WORKFLOW_DIR = Path(common.HERE) / 'docker' +_LAB_WORKFLOW_DIR = Path(common.HERE) / 'lab' / 'workflows' + + +def _docker_exec(*cmd: str) -> str: + return subprocess.check_output(['docker', 'exec', CONTAINER, *cmd], stderr=subprocess.STDOUT).decode() + + +def _n8n_healthy() -> None: + """WaitFor predicate: succeeds once /healthz returns 200, retries on connection errors or non-2xx.""" + requests.get(f'http://{common.HOST}:{common.MAIN_PORT}/healthz', timeout=2).raise_for_status() + + +def _test_webhook_registered() -> None: + """WaitFor predicate: succeeds once /webhook/test responds non-404. + + After ``docker compose restart n8n`` the ``/healthz`` endpoint can be served before n8n has + finished re-registering active workflows' webhook routes. On n8n 2.x that gap is wide enough + to make ``_generate_workflow_traffic`` race the registration and observe a 404. Polling the + webhook itself closes the race. + """ + response = requests.get(f'http://{common.HOST}:{common.MAIN_PORT}{WEBHOOK_OK_PATH}', timeout=5) + if response.status_code == 404: + raise RuntimeError(f'Webhook {WEBHOOK_OK_PATH} not yet registered (status 404)') + + +def _workflow_files() -> list[Path]: + """Return every workflow JSON file that the active compose mounts into the container. + + The lab compose mounts both the test fixtures and the lab workflows under ``/workflows/``; + the test compose mounts only the two test fixtures. + """ + files = sorted(_TEST_WORKFLOW_DIR.glob('sample_workflow*.json')) + if common.IS_LAB: + files += sorted(_LAB_WORKFLOW_DIR.glob('lab_*.json')) + return files + + +def _workflow_id(path: Path) -> str: + return json.loads(path.read_text())['id'] + + +def _activate_imported_workflows() -> None: + """Import all bind-mounted workflows by stable id, activate them, restart n8n so webhooks register. + + Used as a ``docker_run`` condition. The earlier ``CheckEndpoints`` conditions guarantee n8n is + booted before we issue CLI commands; the internal ``WaitFor(_n8n_healthy)`` re-waits for n8n + after the restart so the next condition runs against a live process. + """ + for path in _workflow_files(): + _docker_exec('n8n', 'import:workflow', f'--input=/workflows/{path.name}') + _docker_exec('n8n', 'update:workflow', f'--id={_workflow_id(path)}', '--active=true') + + subprocess.check_call( + ['docker', 'compose', '-f', common.COMPOSE_FILE, 'restart', 'n8n'], + stderr=subprocess.STDOUT, + ) + WaitFor(_n8n_healthy, attempts=45, wait=2)() + if not common.IS_LAB: + # /healthz returns 200 before webhook routes are re-registered (visible on n8n 2.x); + # wait for the integration-test webhook to actually serve before downstream conditions. + WaitFor(_test_webhook_registered, attempts=30, wait=2)() + + +def _generate_workflow_traffic(iterations: int = 5) -> None: + """Trigger the test webhooks + a few API endpoints so workflow / HTTP histogram metrics fire. + + Lab mode skips this — the lab traffic generator owns traffic generation and runs much + longer / richer mixes than the integration tests need. + """ + if common.IS_LAB: + return + + base_url = f'http://{common.HOST}:{common.MAIN_PORT}' + api_paths = ('/healthz', '/healthz/readiness', '/rest/login') + ok_responses = 0 + last_status: int | None = None + last_exc: Exception | None = None + for _ in range(iterations): + try: + ok = requests.get(f'{base_url}{WEBHOOK_OK_PATH}', timeout=5) + last_status = ok.status_code + # 4xx means the webhook responded but didn't execute the workflow (e.g. not yet + # registered after restart); only 200 proves the workflow body ran end-to-end. + if ok.status_code == 200: + ok_responses += 1 + except requests.RequestException as exc: + last_exc = exc + # Webhook fail is *expected* to error out — that's the point of triggering it. + for path in (WEBHOOK_FAIL_PATH, *api_paths): + with suppress(requests.RequestException): + requests.get(f'{base_url}{path}', timeout=5) + if ok_responses == 0: + raise RuntimeError( + f'Test webhook returned no 200 responses (last_status={last_status}, last_exc={last_exc!r}); ' + 'workflow registration likely failed' + ) + + +def _workflow_started_non_zero() -> None: + """WaitFor predicate: succeeds once any ``n8n_workflow_started_total`` sample is non-zero. + + Raises with the last seen samples on failure so that ``WaitFor``'s ``RetryError`` surfaces + actionable diagnostics on timeout (e.g. n8n renamed the counter, or no execution fired). + Parses the metric value as a float so that ``0.0`` / ``0e+0`` are recognised as zero and + ``# HELP``/``# TYPE`` comment lines that happen to share the prefix are skipped. + """ + payload = requests.get(common.MAIN_INSTANCE['openmetrics_endpoint'], timeout=3).text + matching: list[str] = [] + for line in payload.splitlines(): + if line.startswith('#') or not line.startswith('n8n_workflow_started_total'): + continue + matching.append(line) + try: + value = float(line.rsplit(' ', 1)[-1]) + except ValueError: + continue + if value > 0: + return + raise RuntimeError(f'No non-zero workflow_started_total samples yet. Last seen: {matching or ""}') + @pytest.fixture(scope='session') -def dd_environment(): - compose_file = common.COMPOSE_FILE - conditions = [ - CheckEndpoints(common.INSTANCE["openmetrics_endpoint"]), +def dd_environment() -> Iterator[Any]: + conditions: list[Any] = [ + # n8n main is booted and serving /metrics. + CheckEndpoints(common.MAIN_INSTANCE['openmetrics_endpoint']), + # Import + activate workflows, restart n8n so webhooks register, wait for /healthz. + _activate_imported_workflows, + # Worker is checked *after* the main restart so any cascade effect on the worker is caught + # before downstream conditions try to talk to it. ``docker compose restart n8n`` does not + # touch the worker today, but the assertion is cheap and forward-proofs against changes. + CheckEndpoints(common.WORKER_INSTANCE['openmetrics_endpoint']), ] - with docker_run(compose_file, conditions=conditions): - yield { - 'instances': [common.INSTANCE], - } + if not common.IS_LAB: + # Fire enough webhook traffic to register samples for the workflow and HTTP histograms, + # then wait until ``n8n_workflow_started_total`` actually goes non-zero. Both stay in + # ``conditions`` so that ``docker_run``'s ``attempts=2`` retry covers transient failures + # without leaving these calls exposed to the post-yield teardown path. + conditions.append(_generate_workflow_traffic) + conditions.append(WaitFor(_workflow_started_non_zero, attempts=15, wait=2)) + + instances = {'instances': [common.MAIN_INSTANCE, common.WORKER_INSTANCE]} + with docker_run(common.COMPOSE_FILE, conditions=conditions, env_vars=common.get_compose_env_vars()): + if common.IS_LAB: + lab_config = copy.deepcopy(instances) + lab_config['logs'] = [ + { + 'type': 'file', + 'path': '/n8n-event-logs/n8nEventLog*.log', + 'source': 'n8n', + 'service': 'n8n-event-bus', + }, + ] + # Lab mode: mount Docker state for stdout autodiscovery and the n8n data + # volume for event-bus file logs. + yield ( + lab_config, + { + 'docker_volumes': [ + '/var/run/docker.sock:/var/run/docker.sock:ro', + '/var/lib/docker/containers:/var/lib/docker/containers:ro', + '/opt/datadog-agent/run:/opt/datadog-agent/run:rw', + 'n8n_lab_data:/n8n-event-logs:ro', + ], + }, + ) + else: + yield instances, common.E2E_METADATA + + +@pytest.fixture +def instance() -> dict[str, Any]: + return copy.deepcopy(common.MAIN_INSTANCE) @pytest.fixture -def instance(): - return copy.deepcopy(common.INSTANCE) +def worker_instance() -> dict[str, Any]: + return copy.deepcopy(common.WORKER_INSTANCE) diff --git a/n8n/tests/docker/Dockerfile b/n8n/tests/docker/Dockerfile deleted file mode 100644 index d74b7ccd9c162..0000000000000 --- a/n8n/tests/docker/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -ARG N8N_VERSION=1.118.1 -FROM n8nio/n8n:${N8N_VERSION} - -# Set environment variables to enable metrics and logging -ENV N8N_METRICS=true \ - N8N_LOG_LEVEL=debug \ - N8N_METRICS_INCLUDE_DEFAULT_METRICS=true \ - N8N_METRICS_INCLUDE_CACHE_METRICS=true \ - N8N_METRICS_INCLUDE_MESSAGE_EVENT_BUS_METRICS=true \ - N8N_HOST=0.0.0.0 \ - N8N_PORT=5678 - -# Expose the n8n port -EXPOSE 5678 diff --git a/n8n/tests/docker/README.md b/n8n/tests/docker/README.md index bb1d23cc34ce1..ac2ded112e06f 100644 --- a/n8n/tests/docker/README.md +++ b/n8n/tests/docker/README.md @@ -82,7 +82,8 @@ This setup is designed for integration testing. The n8n instance will: ## Notes -- The container uses the latest official n8n Docker image +- The container uses the official `n8nio/n8n` image at the version selected via the `N8N_VERSION` environment variable (forwarded by `hatch.toml`'s test matrix). The default in `docker-compose.yaml` is `1.118.1`. +- Queue mode is enabled with a Redis container and a separate `n8n-worker` service that exposes its own `/metrics` endpoint on host port `5680` (the default `5679` collides with the n8n 2.x task runner broker). - Data is persisted in a Docker volume named `n8n_data` - The health check waits up to 30 seconds for n8n to start before marking it as healthy diff --git a/n8n/tests/docker/docker-compose.yaml b/n8n/tests/docker/docker-compose.yaml index fb8da72559b78..554114d2819a2 100644 --- a/n8n/tests/docker/docker-compose.yaml +++ b/n8n/tests/docker/docker-compose.yaml @@ -1,33 +1,50 @@ services: + redis: + image: redis:7-alpine + container_name: n8n-test-redis + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 5 + n8n: - build: - context: . - dockerfile: Dockerfile + image: n8nio/n8n:${N8N_VERSION:-1.118.1} container_name: n8n-test ports: - - "5678:5678" + - "${N8N_MAIN_HOST_PORT:-5678}:5678" environment: - # Enable metrics endpoint - - N8N_METRICS=true - - N8N_METRICS_INCLUDE_DEFAULT_METRICS=true - - N8N_METRICS_INCLUDE_CACHE_METRICS=true - - N8N_METRICS_INCLUDE_MESSAGE_EVENT_BUS_METRICS=true - - N8N_METRICS_INCLUDE_API_ENDPOINTS=true - - N8N_METRICS_INCLUDE_WORKFLOW_ID_LABEL=true - # Logging configuration + - EXECUTIONS_MODE=queue + - QUEUE_BULL_REDIS_HOST=redis + - QUEUE_BULL_REDIS_PORT=6379 - N8N_LOG_LEVEL=debug - N8N_LOG_OUTPUT=console - # Basic configuration - N8N_HOST=0.0.0.0 - N8N_PORT=5678 - N8N_PROTOCOL=http - # Authentication (optional for testing) - N8N_BASIC_AUTH_ACTIVE=true - N8N_BASIC_AUTH_USER=admin - N8N_BASIC_AUTH_PASSWORD=admin + - N8N_DIAGNOSTICS_ENABLED=false + - N8N_VERSION_NOTIFICATIONS_ENABLED=false + - N8N_TEMPLATES_ENABLED=false + - N8N_RUNNERS_ENABLED=false + - N8N_METRICS=true + - N8N_METRICS_INCLUDE_DEFAULT_METRICS=true + - N8N_METRICS_INCLUDE_CACHE_METRICS=true + - N8N_METRICS_INCLUDE_MESSAGE_EVENT_BUS_METRICS=true + - N8N_METRICS_INCLUDE_API_ENDPOINTS=true + - N8N_METRICS_INCLUDE_QUEUE_METRICS=true + - N8N_METRICS_INCLUDE_WORKFLOW_ID_LABEL=true + - N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS=true volumes: - n8n_data:/home/node/.n8n - ${N8N_LOG_FOLDER:-./logs}:/var/log/n8n + - ./sample_workflow.json:/workflows/sample_workflow.json:ro + - ./sample_workflow_failing.json:/workflows/sample_workflow_failing.json:ro + depends_on: + redis: + condition: service_healthy healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:5678/healthz"] interval: 10s @@ -35,7 +52,35 @@ services: retries: 5 start_period: 30s + n8n-worker: + image: n8nio/n8n:${N8N_VERSION:-1.118.1} + container_name: n8n-test-worker + command: ["worker"] + ports: + - "${N8N_WORKER_HOST_PORT:-5680}:5680" + environment: + - EXECUTIONS_MODE=queue + - QUEUE_BULL_REDIS_HOST=redis + - QUEUE_BULL_REDIS_PORT=6379 + - N8N_LOG_LEVEL=info + - N8N_LOG_OUTPUT=console + - N8N_RUNNERS_ENABLED=false + - N8N_METRICS=true + - N8N_METRICS_INCLUDE_DEFAULT_METRICS=true + - N8N_METRICS_INCLUDE_CACHE_METRICS=true + - N8N_METRICS_INCLUDE_MESSAGE_EVENT_BUS_METRICS=true + - N8N_METRICS_INCLUDE_API_ENDPOINTS=true + - N8N_METRICS_INCLUDE_QUEUE_METRICS=true + - N8N_METRICS_INCLUDE_WORKFLOW_ID_LABEL=true + - N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS=true + - QUEUE_HEALTH_CHECK_ACTIVE=true + - QUEUE_HEALTH_CHECK_PORT=5680 + volumes: + - n8n_data:/home/node/.n8n + depends_on: + n8n: + condition: service_healthy + volumes: n8n_data: driver: local - diff --git a/n8n/tests/docker/sample_workflow.json b/n8n/tests/docker/sample_workflow.json new file mode 100644 index 0000000000000..94400565cfd9e --- /dev/null +++ b/n8n/tests/docker/sample_workflow.json @@ -0,0 +1,59 @@ +{ + "id": "testWorkflowOk", + "versionId": "00000000-0000-0000-0000-000000000001", + "name": "Test Workflow", + "nodes": [ + { + "parameters": { + "httpMethod": "GET", + "path": "test", + "responseMode": "lastNode", + "options": {} + }, + "id": "11111111-1111-1111-1111-111111111111", + "name": "Webhook", + "type": "n8n-nodes-base.webhook", + "typeVersion": 2, + "position": [240, 300], + "webhookId": "test-webhook-aaaa-bbbb-cccc-111111111111" + }, + { + "parameters": { + "assignments": { + "assignments": [ + { + "id": "1", + "name": "ok", + "value": "true", + "type": "string" + } + ] + }, + "options": {} + }, + "id": "22222222-2222-2222-2222-222222222222", + "name": "Set", + "type": "n8n-nodes-base.set", + "typeVersion": 3.4, + "position": [460, 300] + } + ], + "connections": { + "Webhook": { + "main": [ + [ + { + "node": "Set", + "type": "main", + "index": 0 + } + ] + ] + } + }, + "active": false, + "settings": { + "executionOrder": "v1" + }, + "pinData": {} +} diff --git a/n8n/tests/docker/sample_workflow_failing.json b/n8n/tests/docker/sample_workflow_failing.json new file mode 100644 index 0000000000000..159f08bfc8843 --- /dev/null +++ b/n8n/tests/docker/sample_workflow_failing.json @@ -0,0 +1,50 @@ +{ + "id": "testWorkflowFail", + "versionId": "00000000-0000-0000-0000-000000000002", + "name": "Failing Test Workflow", + "nodes": [ + { + "parameters": { + "httpMethod": "GET", + "path": "fail", + "responseMode": "lastNode", + "options": {} + }, + "id": "33333333-3333-3333-3333-333333333333", + "name": "Webhook", + "type": "n8n-nodes-base.webhook", + "typeVersion": 2, + "position": [240, 300], + "webhookId": "test-fail-aaaa-bbbb-cccc-333333333333" + }, + { + "parameters": { + "language": "javaScript", + "jsCode": "throw new Error('intentional failure for metrics tests');" + }, + "id": "44444444-4444-4444-4444-444444444444", + "name": "Code", + "type": "n8n-nodes-base.code", + "typeVersion": 2, + "position": [460, 300] + } + ], + "connections": { + "Webhook": { + "main": [ + [ + { + "node": "Code", + "type": "main", + "index": 0 + } + ] + ] + } + }, + "active": false, + "settings": { + "executionOrder": "v1" + }, + "pinData": {} +} diff --git a/n8n/tests/fixtures/n8n.txt b/n8n/tests/fixtures/n8n.txt index c670f02d7fe46..0eaa373daa5bf 100644 --- a/n8n/tests/fixtures/n8n.txt +++ b/n8n/tests/fixtures/n8n.txt @@ -1,34 +1,34 @@ # HELP n8n_process_cpu_user_seconds_total Total user CPU time spent in seconds. # TYPE n8n_process_cpu_user_seconds_total counter -n8n_process_cpu_user_seconds_total 8.298932999999998 +n8n_process_cpu_user_seconds_total 0.921656 # HELP n8n_process_cpu_system_seconds_total Total system CPU time spent in seconds. # TYPE n8n_process_cpu_system_seconds_total counter -n8n_process_cpu_system_seconds_total 3.1041119999999998 +n8n_process_cpu_system_seconds_total 0.157367 # HELP n8n_process_cpu_seconds_total Total user and system CPU time spent in seconds. # TYPE n8n_process_cpu_seconds_total counter -n8n_process_cpu_seconds_total 11.403044999999999 +n8n_process_cpu_seconds_total 1.0790229999999998 # HELP n8n_process_start_time_seconds Start time of the process since unix epoch in seconds. # TYPE n8n_process_start_time_seconds gauge -n8n_process_start_time_seconds 1761656578 +n8n_process_start_time_seconds 1778234580 # HELP n8n_process_resident_memory_bytes Resident memory size in bytes. # TYPE n8n_process_resident_memory_bytes gauge -n8n_process_resident_memory_bytes 245043200 +n8n_process_resident_memory_bytes 267681792 # HELP n8n_process_virtual_memory_bytes Virtual memory size in bytes. # TYPE n8n_process_virtual_memory_bytes gauge -n8n_process_virtual_memory_bytes 33656197120 +n8n_process_virtual_memory_bytes 18517532672 # HELP n8n_process_heap_bytes Process heap size in bytes. # TYPE n8n_process_heap_bytes gauge -n8n_process_heap_bytes 277200896 +n8n_process_heap_bytes 840728576 # HELP n8n_process_open_fds Number of open file descriptors. # TYPE n8n_process_open_fds gauge -n8n_process_open_fds 44 +n8n_process_open_fds 45 # HELP n8n_process_max_fds Maximum number of open file descriptors. # TYPE n8n_process_max_fds gauge @@ -36,59 +36,62 @@ n8n_process_max_fds 1048576 # HELP n8n_nodejs_eventloop_lag_seconds Lag of event loop in seconds. # TYPE n8n_nodejs_eventloop_lag_seconds gauge -n8n_nodejs_eventloop_lag_seconds 0.002765567 +n8n_nodejs_eventloop_lag_seconds 0.008676917 # HELP n8n_nodejs_eventloop_lag_min_seconds The minimum recorded event loop delay. # TYPE n8n_nodejs_eventloop_lag_min_seconds gauge -n8n_nodejs_eventloop_lag_min_seconds 0.010018816 +n8n_nodejs_eventloop_lag_min_seconds 0.006340608 # HELP n8n_nodejs_eventloop_lag_max_seconds The maximum recorded event loop delay. # TYPE n8n_nodejs_eventloop_lag_max_seconds gauge -n8n_nodejs_eventloop_lag_max_seconds 0.011239423 +n8n_nodejs_eventloop_lag_max_seconds 0.030228479 # HELP n8n_nodejs_eventloop_lag_mean_seconds The mean of the recorded event loop delays. # TYPE n8n_nodejs_eventloop_lag_mean_seconds gauge -n8n_nodejs_eventloop_lag_mean_seconds 0.010092521938958708 +n8n_nodejs_eventloop_lag_mean_seconds 0.012079332927643785 # HELP n8n_nodejs_eventloop_lag_stddev_seconds The standard deviation of the recorded event loop delays. # TYPE n8n_nodejs_eventloop_lag_stddev_seconds gauge -n8n_nodejs_eventloop_lag_stddev_seconds 0.00016945350643679045 +n8n_nodejs_eventloop_lag_stddev_seconds 0.0011467288819057616 # HELP n8n_nodejs_eventloop_lag_p50_seconds The 50th percentile of the recorded event loop delays. # TYPE n8n_nodejs_eventloop_lag_p50_seconds gauge -n8n_nodejs_eventloop_lag_p50_seconds 0.010067967 +n8n_nodejs_eventloop_lag_p50_seconds 0.012001279 # HELP n8n_nodejs_eventloop_lag_p90_seconds The 90th percentile of the recorded event loop delays. # TYPE n8n_nodejs_eventloop_lag_p90_seconds gauge -n8n_nodejs_eventloop_lag_p90_seconds 0.010067967 +n8n_nodejs_eventloop_lag_p90_seconds 0.013254655 # HELP n8n_nodejs_eventloop_lag_p99_seconds The 99th percentile of the recorded event loop delays. # TYPE n8n_nodejs_eventloop_lag_p99_seconds gauge -n8n_nodejs_eventloop_lag_p99_seconds 0.011124735 +n8n_nodejs_eventloop_lag_p99_seconds 0.014426111 # HELP n8n_nodejs_active_resources Number of active resources that are currently keeping the event loop alive, grouped by async resource type. # TYPE n8n_nodejs_active_resources gauge -n8n_nodejs_active_resources{type="PipeWrap"} 2 -n8n_nodejs_active_resources{type="TCPServerWrap"} 1 -n8n_nodejs_active_resources{type="TCPSocketWrap"} 1 -n8n_nodejs_active_resources{type="Timeout"} 13 +n8n_nodejs_active_resources{type="PipeWrap"} 5 +n8n_nodejs_active_resources{type="TCPServerWrap"} 2 +n8n_nodejs_active_resources{type="TCPSocketWrap"} 9 +n8n_nodejs_active_resources{type="ProcessWrap"} 1 +n8n_nodejs_active_resources{type="Timeout"} 20 n8n_nodejs_active_resources{type="Immediate"} 1 # HELP n8n_nodejs_active_resources_total Total number of active resources. # TYPE n8n_nodejs_active_resources_total gauge -n8n_nodejs_active_resources_total 18 +n8n_nodejs_active_resources_total 38 # HELP n8n_nodejs_active_handles Number of active libuv handles grouped by handle type. Every handle type is C++ class name. # TYPE n8n_nodejs_active_handles gauge -n8n_nodejs_active_handles{type="Socket"} 3 -n8n_nodejs_active_handles{type="Server"} 1 +n8n_nodejs_active_handles{type="Socket"} 14 +n8n_nodejs_active_handles{type="Server"} 2 +n8n_nodejs_active_handles{type="ChildProcess"} 1 # HELP n8n_nodejs_active_handles_total Total number of active handles. # TYPE n8n_nodejs_active_handles_total gauge -n8n_nodejs_active_handles_total 4 +n8n_nodejs_active_handles_total 17 # HELP n8n_nodejs_active_requests Number of active libuv requests grouped by request type. Every request type is C++ class name. # TYPE n8n_nodejs_active_requests gauge +n8n_nodejs_active_requests{type="FSReqCallback"} 1 # HELP n8n_nodejs_active_requests_total Total number of active requests. # TYPE n8n_nodejs_active_requests_total gauge @@ -96,81 +99,87 @@ n8n_nodejs_active_requests_total 0 # HELP n8n_nodejs_heap_size_total_bytes Process heap size from Node.js in bytes. # TYPE n8n_nodejs_heap_size_total_bytes gauge -n8n_nodejs_heap_size_total_bytes 142774272 +n8n_nodejs_heap_size_total_bytes 146391040 # HELP n8n_nodejs_heap_size_used_bytes Process heap size used from Node.js in bytes. # TYPE n8n_nodejs_heap_size_used_bytes gauge -n8n_nodejs_heap_size_used_bytes 136342632 +n8n_nodejs_heap_size_used_bytes 136336448 # HELP n8n_nodejs_external_memory_bytes Node.js external memory size in bytes. # TYPE n8n_nodejs_external_memory_bytes gauge -n8n_nodejs_external_memory_bytes 20824585 +n8n_nodejs_external_memory_bytes 20993559 # HELP n8n_nodejs_heap_space_size_total_bytes Process heap space size total from Node.js in bytes. # TYPE n8n_nodejs_heap_space_size_total_bytes gauge n8n_nodejs_heap_space_size_total_bytes{space="read_only"} 0 -n8n_nodejs_heap_space_size_total_bytes{space="new"} 1048576 -n8n_nodejs_heap_space_size_total_bytes{space="old"} 122208256 -n8n_nodejs_heap_space_size_total_bytes{space="code"} 4718592 +n8n_nodejs_heap_space_size_total_bytes{space="new"} 2097152 +n8n_nodejs_heap_space_size_total_bytes{space="old"} 116920320 +n8n_nodejs_heap_space_size_total_bytes{space="code"} 5505024 n8n_nodejs_heap_space_size_total_bytes{space="shared"} 0 -n8n_nodejs_heap_space_size_total_bytes{space="trusted"} 7643136 +n8n_nodejs_heap_space_size_total_bytes{space="trusted"} 11624448 +n8n_nodejs_heap_space_size_total_bytes{space="shared_trusted"} 0 n8n_nodejs_heap_space_size_total_bytes{space="new_large_object"} 0 -n8n_nodejs_heap_space_size_total_bytes{space="large_object"} 7000064 -n8n_nodejs_heap_space_size_total_bytes{space="code_large_object"} 155648 +n8n_nodejs_heap_space_size_total_bytes{space="large_object"} 9875456 +n8n_nodejs_heap_space_size_total_bytes{space="code_large_object"} 368640 n8n_nodejs_heap_space_size_total_bytes{space="shared_large_object"} 0 +n8n_nodejs_heap_space_size_total_bytes{space="shared_trusted_large_object"} 0 n8n_nodejs_heap_space_size_total_bytes{space="trusted_large_object"} 0 # HELP n8n_nodejs_heap_space_size_used_bytes Process heap space size used from Node.js in bytes. # TYPE n8n_nodejs_heap_space_size_used_bytes gauge n8n_nodejs_heap_space_size_used_bytes{space="read_only"} 0 -n8n_nodejs_heap_space_size_used_bytes{space="new"} 652896 -n8n_nodejs_heap_space_size_used_bytes{space="old"} 119347344 -n8n_nodejs_heap_space_size_used_bytes{space="code"} 4183424 +n8n_nodejs_heap_space_size_used_bytes{space="new"} 382808 +n8n_nodejs_heap_space_size_used_bytes{space="old"} 111099512 +n8n_nodejs_heap_space_size_used_bytes{space="code"} 4853344 n8n_nodejs_heap_space_size_used_bytes{space="shared"} 0 -n8n_nodejs_heap_space_size_used_bytes{space="trusted"} 5187192 +n8n_nodejs_heap_space_size_used_bytes{space="trusted"} 9839592 +n8n_nodejs_heap_space_size_used_bytes{space="shared_trusted"} 0 n8n_nodejs_heap_space_size_used_bytes{space="new_large_object"} 0 -n8n_nodejs_heap_space_size_used_bytes{space="large_object"} 6837144 -n8n_nodejs_heap_space_size_used_bytes{space="code_large_object"} 138432 +n8n_nodejs_heap_space_size_used_bytes{space="large_object"} 9806288 +n8n_nodejs_heap_space_size_used_bytes{space="code_large_object"} 361728 n8n_nodejs_heap_space_size_used_bytes{space="shared_large_object"} 0 +n8n_nodejs_heap_space_size_used_bytes{space="shared_trusted_large_object"} 0 n8n_nodejs_heap_space_size_used_bytes{space="trusted_large_object"} 0 # HELP n8n_nodejs_heap_space_size_available_bytes Process heap space size available from Node.js in bytes. # TYPE n8n_nodejs_heap_space_size_available_bytes gauge n8n_nodejs_heap_space_size_available_bytes{space="read_only"} 0 -n8n_nodejs_heap_space_size_available_bytes{space="new"} 378016 -n8n_nodejs_heap_space_size_available_bytes{space="old"} 430568 -n8n_nodejs_heap_space_size_available_bytes{space="code"} 239680 +n8n_nodejs_heap_space_size_available_bytes{space="new"} 665704 +n8n_nodejs_heap_space_size_available_bytes{space="old"} 5484264 +n8n_nodejs_heap_space_size_available_bytes{space="code"} 651008 n8n_nodejs_heap_space_size_available_bytes{space="shared"} 0 -n8n_nodejs_heap_space_size_available_bytes{space="trusted"} 2323072 +n8n_nodejs_heap_space_size_available_bytes{space="trusted"} 1771032 +n8n_nodejs_heap_space_size_available_bytes{space="shared_trusted"} 0 n8n_nodejs_heap_space_size_available_bytes{space="new_large_object"} 1048576 n8n_nodejs_heap_space_size_available_bytes{space="large_object"} 0 n8n_nodejs_heap_space_size_available_bytes{space="code_large_object"} 0 n8n_nodejs_heap_space_size_available_bytes{space="shared_large_object"} 0 +n8n_nodejs_heap_space_size_available_bytes{space="shared_trusted_large_object"} 0 n8n_nodejs_heap_space_size_available_bytes{space="trusted_large_object"} 0 # HELP n8n_nodejs_version_info Node.js version info. # TYPE n8n_nodejs_version_info gauge -n8n_nodejs_version_info{version="v22.18.0",major="22",minor="18",patch="0"} 1 +n8n_nodejs_version_info{version="v24.14.1",major="24",minor="14",patch="1"} 1 # HELP n8n_nodejs_gc_duration_seconds Garbage collection duration by kind, one of major, minor, incremental or weakcb. # TYPE n8n_nodejs_gc_duration_seconds histogram -n8n_nodejs_gc_duration_seconds_bucket{le="0.001",kind="minor"} 128 -n8n_nodejs_gc_duration_seconds_bucket{le="0.01",kind="minor"} 132 -n8n_nodejs_gc_duration_seconds_bucket{le="0.1",kind="minor"} 132 -n8n_nodejs_gc_duration_seconds_bucket{le="1",kind="minor"} 132 -n8n_nodejs_gc_duration_seconds_bucket{le="2",kind="minor"} 132 -n8n_nodejs_gc_duration_seconds_bucket{le="5",kind="minor"} 132 -n8n_nodejs_gc_duration_seconds_bucket{le="+Inf",kind="minor"} 132 -n8n_nodejs_gc_duration_seconds_sum{kind="minor"} 0.09924478498101237 -n8n_nodejs_gc_duration_seconds_count{kind="minor"} 132 -n8n_nodejs_gc_duration_seconds_bucket{le="0.001",kind="incremental"} 1 +n8n_nodejs_gc_duration_seconds_bucket{le="0.001",kind="minor"} 0 +n8n_nodejs_gc_duration_seconds_bucket{le="0.01",kind="minor"} 2 +n8n_nodejs_gc_duration_seconds_bucket{le="0.1",kind="minor"} 2 +n8n_nodejs_gc_duration_seconds_bucket{le="1",kind="minor"} 2 +n8n_nodejs_gc_duration_seconds_bucket{le="2",kind="minor"} 2 +n8n_nodejs_gc_duration_seconds_bucket{le="5",kind="minor"} 2 +n8n_nodejs_gc_duration_seconds_bucket{le="+Inf",kind="minor"} 2 +n8n_nodejs_gc_duration_seconds_sum{kind="minor"} 0.004925500000128522 +n8n_nodejs_gc_duration_seconds_count{kind="minor"} 2 +n8n_nodejs_gc_duration_seconds_bucket{le="0.001",kind="incremental"} 0 n8n_nodejs_gc_duration_seconds_bucket{le="0.01",kind="incremental"} 2 n8n_nodejs_gc_duration_seconds_bucket{le="0.1",kind="incremental"} 2 n8n_nodejs_gc_duration_seconds_bucket{le="1",kind="incremental"} 2 n8n_nodejs_gc_duration_seconds_bucket{le="2",kind="incremental"} 2 n8n_nodejs_gc_duration_seconds_bucket{le="5",kind="incremental"} 2 n8n_nodejs_gc_duration_seconds_bucket{le="+Inf",kind="incremental"} 2 -n8n_nodejs_gc_duration_seconds_sum{kind="incremental"} 0.0022786640077829363 +n8n_nodejs_gc_duration_seconds_sum{kind="incremental"} 0.005939041999867186 n8n_nodejs_gc_duration_seconds_count{kind="incremental"} 2 n8n_nodejs_gc_duration_seconds_bucket{le="0.001",kind="major"} 0 n8n_nodejs_gc_duration_seconds_bucket{le="0.01",kind="major"} 0 @@ -179,231 +188,425 @@ n8n_nodejs_gc_duration_seconds_bucket{le="1",kind="major"} 2 n8n_nodejs_gc_duration_seconds_bucket{le="2",kind="major"} 2 n8n_nodejs_gc_duration_seconds_bucket{le="5",kind="major"} 2 n8n_nodejs_gc_duration_seconds_bucket{le="+Inf",kind="major"} 2 -n8n_nodejs_gc_duration_seconds_sum{kind="major"} 0.1028408939987421 +n8n_nodejs_gc_duration_seconds_sum{kind="major"} 0.032123332999879496 n8n_nodejs_gc_duration_seconds_count{kind="major"} 2 +# HELP n8n_process_pss_bytes Proportional Set Size of the process in bytes. +# TYPE n8n_process_pss_bytes gauge +n8n_process_pss_bytes 220097536 + # HELP n8n_version_info n8n version info. # TYPE n8n_version_info gauge -n8n_version_info{version="v1.117.2",major="1",minor="117",patch="2"} 1 +n8n_version_info{version="v2.19.5",major="2",minor="19",patch="5"} 1 # HELP n8n_instance_role_leader Whether this main instance is the leader (1) or not (0). # TYPE n8n_instance_role_leader gauge n8n_instance_role_leader 1 +# HELP n8n_cache_hits_total Total number of cache hits. +# TYPE n8n_cache_hits_total counter +n8n_cache_hits_total 53 + +# HELP n8n_cache_misses_total Total number of cache misses. +# TYPE n8n_cache_misses_total counter +n8n_cache_misses_total 15 + +# HELP n8n_cache_updates_total Total number of cache updates. +# TYPE n8n_cache_updates_total counter +n8n_cache_updates_total 1 + # HELP n8n_http_request_duration_seconds duration histogram of http responses labeled with: status_code # TYPE n8n_http_request_duration_seconds histogram +n8n_http_request_duration_seconds_bucket{le="0.003"} 5 +n8n_http_request_duration_seconds_bucket{le="0.03"} 5 +n8n_http_request_duration_seconds_bucket{le="0.1"} 5 +n8n_http_request_duration_seconds_bucket{le="0.3"} 5 +n8n_http_request_duration_seconds_bucket{le="1.5"} 5 +n8n_http_request_duration_seconds_bucket{le="10"} 5 +n8n_http_request_duration_seconds_bucket{le="+Inf"} 5 +n8n_http_request_duration_seconds_sum 0.0018007910000000002 +n8n_http_request_duration_seconds_count 5 # HELP n8n_last_activity last instance activity (backend request) in Unix time (seconds). # TYPE n8n_last_activity gauge -n8n_last_activity 1761656582 +n8n_last_activity 1778234587 + +# HELP n8n_scaling_mode_queue_jobs_waiting Current number of enqueued jobs waiting for pickup in scaling mode. +# TYPE n8n_scaling_mode_queue_jobs_waiting gauge +n8n_scaling_mode_queue_jobs_waiting 0 + +# HELP n8n_scaling_mode_queue_jobs_active Current number of jobs being processed across all workers in scaling mode. +# TYPE n8n_scaling_mode_queue_jobs_active gauge +n8n_scaling_mode_queue_jobs_active 0 + +# HELP n8n_scaling_mode_queue_jobs_completed Total number of jobs completed across all workers in scaling mode since instance start. +# TYPE n8n_scaling_mode_queue_jobs_completed counter +n8n_scaling_mode_queue_jobs_completed 8 + +# HELP n8n_scaling_mode_queue_jobs_failed Total number of jobs failed across all workers in scaling mode since instance start. +# TYPE n8n_scaling_mode_queue_jobs_failed counter +n8n_scaling_mode_queue_jobs_failed 0 + +# HELP n8n_workflow_execution_duration_seconds Workflow execution duration in seconds. +# TYPE n8n_workflow_execution_duration_seconds histogram +n8n_workflow_execution_duration_seconds_bucket{le="0.005",status="success",mode="webhook",workflow_id="testWorkflowOk"} 3 +n8n_workflow_execution_duration_seconds_bucket{le="0.01",status="success",mode="webhook",workflow_id="testWorkflowOk"} 3 +n8n_workflow_execution_duration_seconds_bucket{le="0.025",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="0.05",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="0.1",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="0.25",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="0.5",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="1",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="2.5",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="5",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="10",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="30",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="60",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="120",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="300",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="600",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="+Inf",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_sum{status="success",mode="webhook",workflow_id="testWorkflowOk"} 0.027999999999999997 +n8n_workflow_execution_duration_seconds_count{status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="0.005",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 3 +n8n_workflow_execution_duration_seconds_bucket{le="0.01",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 3 +n8n_workflow_execution_duration_seconds_bucket{le="0.025",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 3 +n8n_workflow_execution_duration_seconds_bucket{le="0.05",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 3 +n8n_workflow_execution_duration_seconds_bucket{le="0.1",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 3 +n8n_workflow_execution_duration_seconds_bucket{le="0.25",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 3 +n8n_workflow_execution_duration_seconds_bucket{le="0.5",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="1",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="2.5",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="5",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="10",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="30",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="60",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="120",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="300",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="600",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +n8n_workflow_execution_duration_seconds_bucket{le="+Inf",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +n8n_workflow_execution_duration_seconds_sum{status="failed",mode="webhook",workflow_id="testWorkflowFail"} 0.405 +n8n_workflow_execution_duration_seconds_count{status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 # HELP n8n_active_workflow_count Total number of active workflows. # TYPE n8n_active_workflow_count gauge -n8n_active_workflow_count{workflow_id="wf_8a3b2c1d"} 0 -n8n_active_workflow_count{workflow_id="wf_7f4e9a2b"} 0 -n8n_active_workflow_count{workflow_id="wf_5d6c8e1f"} 0 - -# HELP n8n_nodejs_event_loop_lag_seconds Event loop lag in seconds -# TYPE n8n_nodejs_event_loop_lag_seconds gauge -n8n_nodejs_event_loop_lag_seconds 0.0035 - -# HELP n8n_nodejs_heap_total_bytes Total heap size allocated in bytes -# TYPE n8n_nodejs_heap_total_bytes gauge -n8n_nodejs_heap_total_bytes 73400320 - -# HELP n8n_nodejs_heap_used_bytes Heap memory used in bytes -# TYPE n8n_nodejs_heap_used_bytes gauge -n8n_nodejs_heap_used_bytes 51200000 - -# HELP n8n_workflow_executions_total Total number of workflow executions -# TYPE n8n_workflow_executions_total counter -n8n_workflow_executions_total{status="success",workflow_id="wf_8a3b2c1d"} 45 -n8n_workflow_executions_total{status="success",workflow_id="wf_7f4e9a2b"} 38 -n8n_workflow_executions_total{status="success",workflow_id="wf_5d6c8e1f"} 45 -n8n_workflow_executions_total{status="error",workflow_id="wf_8a3b2c1d"} 3 -n8n_workflow_executions_total{status="error",workflow_id="wf_5d6c8e1f"} 4 - -# HELP n8n_workflow_executions_duration_seconds Workflow execution duration in seconds -# TYPE n8n_workflow_executions_duration_seconds histogram -n8n_workflow_executions_duration_seconds_bucket{le="0.1",workflow_id="wf_8a3b2c1d"} 5 -n8n_workflow_executions_duration_seconds_bucket{le="1",workflow_id="wf_8a3b2c1d"} 18 -n8n_workflow_executions_duration_seconds_bucket{le="+Inf",workflow_id="wf_8a3b2c1d"} 48 -n8n_workflow_executions_duration_seconds_sum{workflow_id="wf_8a3b2c1d"} 14.3 -n8n_workflow_executions_duration_seconds_count{workflow_id="wf_8a3b2c1d"} 48 -n8n_workflow_executions_duration_seconds_bucket{le="0.1",workflow_id="wf_7f4e9a2b"} 4 -n8n_workflow_executions_duration_seconds_bucket{le="1",workflow_id="wf_7f4e9a2b"} 15 -n8n_workflow_executions_duration_seconds_bucket{le="+Inf",workflow_id="wf_7f4e9a2b"} 38 -n8n_workflow_executions_duration_seconds_sum{workflow_id="wf_7f4e9a2b"} 11.2 -n8n_workflow_executions_duration_seconds_count{workflow_id="wf_7f4e9a2b"} 38 -n8n_workflow_executions_duration_seconds_bucket{le="0.1",workflow_id="wf_5d6c8e1f"} 3 -n8n_workflow_executions_duration_seconds_bucket{le="1",workflow_id="wf_5d6c8e1f"} 12 -n8n_workflow_executions_duration_seconds_bucket{le="+Inf",workflow_id="wf_5d6c8e1f"} 49 -n8n_workflow_executions_duration_seconds_sum{workflow_id="wf_5d6c8e1f"} 12.7 -n8n_workflow_executions_duration_seconds_count{workflow_id="wf_5d6c8e1f"} 49 - -# HELP n8n_workflow_started_total Total number of workflows started -# TYPE n8n_workflow_started_total counter -n8n_workflow_started_total 25634 -n8n_workflow_started_total{workflow_id="12",workflow_name="CRM Sync"} 8142 -n8n_workflow_started_total{workflow_id="25",workflow_name="Webhook Intake"} 14290 -n8n_workflow_started_total{workflow_id="33",workflow_name="Slack Alerts"} 2202 +n8n_active_workflow_count 2 -# HELP n8n_workflow_success_total Total number of workflows completed successfully -# TYPE n8n_workflow_success_total counter -n8n_workflow_success_total 25209 -n8n_workflow_success_total{workflow_id="12",workflow_name="CRM Sync"} 8059 -n8n_workflow_success_total{workflow_id="25",workflow_name="Webhook Intake"} 14135 -n8n_workflow_success_total{workflow_id="33",workflow_name="Slack Alerts"} 2015 +# HELP n8n_production_executions Total number of production workflow executions (success + error). +# TYPE n8n_production_executions gauge +n8n_production_executions 8 -# HELP n8n_workflow_failed_total Total number of workflows that failed -# TYPE n8n_workflow_failed_total counter -n8n_workflow_failed_total 425 -n8n_workflow_failed_total{workflow_id="12",workflow_name="CRM Sync"} 83 -n8n_workflow_failed_total{workflow_id="25",workflow_name="Webhook Intake"} 155 -n8n_workflow_failed_total{workflow_id="33",workflow_name="Slack Alerts"} 187 - - -# HELP n8n_queue_jobs_total Total number of queue jobs -# TYPE n8n_queue_jobs_total counter -n8n_queue_jobs_total{state="waiting"} 3 -n8n_queue_jobs_total{state="active"} 2 -n8n_queue_jobs_total{state="completed"} 148 -n8n_queue_jobs_total{state="failed"} 5 - -# HELP n8n_queue_jobs_duration_seconds Job duration in seconds -# TYPE n8n_queue_jobs_duration_seconds histogram -n8n_queue_jobs_duration_seconds_bucket{le="0.1"} 22 -n8n_queue_jobs_duration_seconds_bucket{le="1"} 84 -n8n_queue_jobs_duration_seconds_bucket{le="+Inf"} 150 -n8n_queue_jobs_duration_seconds_sum 44.8 -n8n_queue_jobs_duration_seconds_count 150 - -# HELP n8n_queue_job_waiting_total Number of jobs currently waiting in the queue -# TYPE n8n_queue_job_waiting_total gauge -n8n_queue_job_waiting_total{queue="default"} 3 - -# HELP n8n_queue_job_active_total Number of jobs currently being processed -# TYPE n8n_queue_job_active_total gauge -n8n_queue_job_active_total{queue="default"} 2 - -# HELP n8n_queue_job_completed_total Number of jobs completed successfully -# TYPE n8n_queue_job_completed_total counter -n8n_queue_job_completed_total{queue="default"} 15892 +# HELP n8n_production_root_executions Total number of production root workflow executions (excludes sub-workflows). +# TYPE n8n_production_root_executions gauge +n8n_production_root_executions 8 -# HELP n8n_queue_job_failed_total Number of jobs that have failed -# TYPE n8n_queue_job_failed_total counter -n8n_queue_job_failed_total{queue="default"} 47 +# HELP n8n_manual_executions Total number of manual workflow executions (success + error). +# TYPE n8n_manual_executions gauge +n8n_manual_executions 0 -# HELP n8n_queue_job_dequeued_total Number of jobs dequeued (picked up from queue) -# TYPE n8n_queue_job_dequeued_total counter -n8n_queue_job_dequeued_total{queue="default"} 15939 +# HELP n8n_enabled_users Total number of enabled users. +# TYPE n8n_enabled_users gauge +n8n_enabled_users 1 + +# HELP n8n_users Total number of users. +# TYPE n8n_users gauge +n8n_users 1 + +# HELP n8n_workflows Total number of workflows. +# TYPE n8n_workflows gauge +n8n_workflows 2 + +# HELP n8n_credentials Total number of credentials. +# TYPE n8n_credentials gauge +n8n_credentials 0 + +# HELP n8n_token_exchange_requests_total Total number of token exchange requests. +# TYPE n8n_token_exchange_requests_total counter +n8n_token_exchange_requests_total{result="success"} 0 +n8n_token_exchange_requests_total{result="failure"} 0 + +# HELP n8n_token_exchange_failures_total Total number of token exchange failures broken down by reason. +# TYPE n8n_token_exchange_failures_total counter +n8n_token_exchange_failures_total{reason="invalid_token"} 0 + +# HELP n8n_embed_login_requests_total Total number of embed login requests. +# TYPE n8n_embed_login_requests_total counter +n8n_embed_login_requests_total{result="success"} 0 +n8n_embed_login_requests_total{result="failure"} 0 + +# HELP n8n_embed_login_failures_total Total number of embed login failures broken down by reason. +# TYPE n8n_embed_login_failures_total counter +n8n_embed_login_failures_total{reason="unauthorized"} 0 + +# HELP n8n_token_exchange_jit_provisioning_total Total number of users JIT-provisioned via token exchange. +# TYPE n8n_token_exchange_jit_provisioning_total counter +n8n_token_exchange_jit_provisioning_total 0 -# HELP n8n_queue_job_enqueued_total Number of jobs added to the queue +# HELP n8n_token_exchange_identity_linked_total Total number of external identities linked to existing users via token exchange. +# TYPE n8n_token_exchange_identity_linked_total counter +n8n_token_exchange_identity_linked_total 0 + +# HELP n8n_audit_workflow_activated_total Total number of n8n.audit.workflow.activated events. +# TYPE n8n_audit_workflow_activated_total counter +n8n_audit_workflow_activated_total{workflow_id="testWorkflowOk"} 1 +n8n_audit_workflow_activated_total{workflow_id="testWorkflowFail"} 1 + +# HELP n8n_audit_workflow_archived_total Total number of n8n.audit.workflow.archived events. +# TYPE n8n_audit_workflow_archived_total counter +n8n_audit_workflow_archived_total{workflow_id="testWorkflowOk"} 1 + +# HELP n8n_audit_workflow_created_total Total number of n8n.audit.workflow.created events. +# TYPE n8n_audit_workflow_created_total counter +n8n_audit_workflow_created_total{workflow_id="testWorkflowOk"} 1 + +# HELP n8n_audit_workflow_deactivated_total Total number of n8n.audit.workflow.deactivated events. +# TYPE n8n_audit_workflow_deactivated_total counter +n8n_audit_workflow_deactivated_total{workflow_id="testWorkflowOk"} 1 + +# HELP n8n_audit_workflow_deleted_total Total number of n8n.audit.workflow.deleted events. +# TYPE n8n_audit_workflow_deleted_total counter +n8n_audit_workflow_deleted_total{workflow_id="testWorkflowOk"} 1 + +# HELP n8n_queue_job_enqueued_total Total number of n8n.queue.job.enqueued events. # TYPE n8n_queue_job_enqueued_total counter -n8n_queue_job_enqueued_total{queue="default"} 15670 - -# HELP n8n_queue_job_delayed_total Number of jobs scheduled to run later -# TYPE n8n_queue_job_delayed_total gauge -n8n_queue_job_delayed_total{queue="default"} 5 - -# HELP n8n_queue_job_waiting_duration_seconds Duration jobs spend waiting before being processed -# TYPE n8n_queue_job_waiting_duration_seconds histogram -n8n_queue_job_waiting_duration_seconds_bucket{queue="default",le="0.1"} 50 -n8n_queue_job_waiting_duration_seconds_bucket{queue="default",le="1"} 241 -n8n_queue_job_waiting_duration_seconds_bucket{queue="default",le="5"} 820 -n8n_queue_job_waiting_duration_seconds_bucket{queue="default",le="10"} 1105 -n8n_queue_job_waiting_duration_seconds_bucket{queue="default",le="30"} 1240 -n8n_queue_job_waiting_duration_seconds_bucket{queue="default",le="+Inf"} 1253 -n8n_queue_job_waiting_duration_seconds_sum{queue="default"} 450.32 -n8n_queue_job_waiting_duration_seconds_count{queue="default"} 1253 - -# HELP n8n_api_requests_total Total API requests -# TYPE n8n_api_requests_total counter -n8n_api_requests_total{method="GET",endpoint="/workflows"} 240 -n8n_api_requests_total{method="POST",endpoint="/executions"} 75 - -# HELP n8n_api_request_duration_seconds API request duration in seconds -# TYPE n8n_api_request_duration_seconds histogram -n8n_api_request_duration_seconds_bucket{le="0.1"} 90 -n8n_api_request_duration_seconds_bucket{le="1"} 120 -n8n_api_request_duration_seconds_bucket{le="+Inf"} 125 -n8n_api_request_duration_seconds_sum 15.3 -n8n_api_request_duration_seconds_count 125 - -# HELP n8n_cache_operations_total Total cache operations -# TYPE n8n_cache_operations_total counter -n8n_cache_operations_total{operation="get"} 1250 -n8n_cache_operations_total{operation="set"} 320 -n8n_cache_operations_total{operation="delete"} 10 - -# HELP n8n_cache_hits_total Cache hits -# TYPE n8n_cache_hits_total counter -n8n_cache_hits_total 1080 +n8n_queue_job_enqueued_total 8 -# HELP n8n_cache_misses_total Cache misses -# TYPE n8n_cache_misses_total counter -n8n_cache_misses_total 170 - -# HELP n8n_cache_errors_total Cache errors -# TYPE n8n_cache_errors_total counter -n8n_cache_errors_total 0 - -# HELP n8n_cache_latency_seconds Cache operation latency in seconds -# TYPE n8n_cache_latency_seconds histogram -n8n_cache_latency_seconds_bucket{le="0.001"} 90 -n8n_cache_latency_seconds_bucket{le="0.01"} 240 -n8n_cache_latency_seconds_bucket{le="+Inf"} 260 -n8n_cache_latency_seconds_sum 1.42 -n8n_cache_latency_seconds_count 260 - -# HELP n8n_eventbus_events_total Total events published on the event bus -# TYPE n8n_eventbus_events_total counter -n8n_eventbus_events_total{event_type="workflowStarted"} 140 -n8n_eventbus_events_total{event_type="workflowCompleted"} 135 -n8n_eventbus_events_total{event_type="workflowFailed"} 5 - -# HELP n8n_eventbus_events_processed_total Total processed events -# TYPE n8n_eventbus_events_processed_total counter -n8n_eventbus_events_processed_total 138 - -# HELP n8n_eventbus_events_failed_total Total failed event processing -# TYPE n8n_eventbus_events_failed_total counter -n8n_eventbus_events_failed_total 2 - -# HELP n8n_eventbus_queue_size Current event queue size -# TYPE n8n_eventbus_queue_size gauge -n8n_eventbus_queue_size 1 - -# HELP n8n_eventbus_connections_total Active event bus backend connections -# TYPE n8n_eventbus_connections_total gauge -n8n_eventbus_connections_total 1 - -# HELP n8n_workflow_executions_active Number of active workflow executions -# TYPE n8n_workflow_executions_active gauge -n8n_workflow_executions_active 3 - -# HELP n8n_queue_job_attempts_total Total number of job attempts -# TYPE n8n_queue_job_attempts_total counter -n8n_queue_job_attempts_total{result="success"} 435 -n8n_queue_job_attempts_total{result="failed"} 12 - -# HELP n8n_workflow_started_total Total number of workflows started +# HELP n8n_workflow_started_total Total number of n8n.workflow.started events. # TYPE n8n_workflow_started_total counter -n8n_workflow_started_total 25634 -n8n_workflow_started_total{workflow_id="12",workflow_name="CRM Sync"} 8142 -n8n_workflow_started_total{workflow_id="25",workflow_name="Webhook Intake"} 14290 -n8n_workflow_started_total{workflow_id="33",workflow_name="Slack Alerts"} 2202 +n8n_workflow_started_total{workflow_id="testWorkflowOk"} 4 +n8n_workflow_started_total{workflow_id="testWorkflowFail"} 4 + +# HELP n8n_audit_workflow_executed_total Total number of n8n.audit.workflow.executed events. +# TYPE n8n_audit_workflow_executed_total counter +n8n_audit_workflow_executed_total{workflow_id="testWorkflowOk"} 4 +n8n_audit_workflow_executed_total{workflow_id="testWorkflowFail"} 4 + +# HELP n8n_audit_workflow_resumed_total Total number of n8n.audit.workflow.resumed events. +# TYPE n8n_audit_workflow_resumed_total counter +n8n_audit_workflow_resumed_total{workflow_id="testWorkflowOk"} 1 -# HELP n8n_workflow_success_total Total number of workflows completed successfully +# HELP n8n_audit_workflow_unarchived_total Total number of n8n.audit.workflow.unarchived events. +# TYPE n8n_audit_workflow_unarchived_total counter +n8n_audit_workflow_unarchived_total{workflow_id="testWorkflowOk"} 1 + +# HELP n8n_audit_workflow_updated_total Total number of n8n.audit.workflow.updated events. +# TYPE n8n_audit_workflow_updated_total counter +n8n_audit_workflow_updated_total{workflow_id="testWorkflowOk"} 1 + +# HELP n8n_audit_workflow_version_updated_total Total number of n8n.audit.workflow.version.updated events. +# TYPE n8n_audit_workflow_version_updated_total counter +n8n_audit_workflow_version_updated_total{workflow_id="testWorkflowOk"} 1 + +# HELP n8n_audit_workflow_waiting_total Total number of n8n.audit.workflow.waiting events. +# TYPE n8n_audit_workflow_waiting_total counter +n8n_audit_workflow_waiting_total{workflow_id="testWorkflowOk"} 1 + +# HELP n8n_workflow_success_total Total number of n8n.workflow.success events. # TYPE n8n_workflow_success_total counter -n8n_workflow_success_total 25209 -n8n_workflow_success_total{workflow_id="12",workflow_name="CRM Sync"} 8059 -n8n_workflow_success_total{workflow_id="25",workflow_name="Webhook Intake"} 14135 -n8n_workflow_success_total{workflow_id="33",workflow_name="Slack Alerts"} 2015 +n8n_workflow_success_total{workflow_id="testWorkflowOk"} 4 -# HELP n8n_workflow_failed_total Total number of workflows that failed +# HELP n8n_queue_job_completed_total Total number of n8n.queue.job.completed events. +# TYPE n8n_queue_job_completed_total counter +n8n_queue_job_completed_total 4 + +# HELP n8n_workflow_failed_total Total number of n8n.workflow.failed events. # TYPE n8n_workflow_failed_total counter -n8n_workflow_failed_total 425 -n8n_workflow_failed_total{workflow_id="12",workflow_name="CRM Sync"} 83 -n8n_workflow_failed_total{workflow_id="25",workflow_name="Webhook Intake"} 155 -n8n_workflow_failed_total{workflow_id="33",workflow_name="Slack Alerts"} 187 \ No newline at end of file +n8n_workflow_failed_total{workflow_id="testWorkflowFail"} 4 + +# HELP n8n_queue_job_failed_total Total number of n8n.queue.job.failed events. +# TYPE n8n_queue_job_failed_total counter +n8n_queue_job_failed_total 4 +# HELP n8n_queue_job_stalled_total Total number of n8n.queue.job.stalled events. +# TYPE n8n_queue_job_stalled_total counter +n8n_queue_job_stalled_total 1 +# HELP n8n_queue_job_dequeued_total Total number of n8n.queue.job.dequeued events. +# TYPE n8n_queue_job_dequeued_total counter +n8n_queue_job_dequeued_total 8 + +# HELP n8n_node_started_total Total number of n8n.node.started events. +# TYPE n8n_node_started_total counter +n8n_node_started_total{workflow_id="testWorkflowOk"} 8 +n8n_node_started_total{workflow_id="testWorkflowFail"} 8 + +# HELP n8n_node_finished_total Total number of n8n.node.finished events. +# TYPE n8n_node_finished_total counter +n8n_node_finished_total{workflow_id="testWorkflowOk"} 8 +n8n_node_finished_total{workflow_id="testWorkflowFail"} 8 + +# HELP n8n_runner_task_requested_total Total number of n8n.runner.task.requested events. +# TYPE n8n_runner_task_requested_total counter +n8n_runner_task_requested_total 4 + +# HELP n8n_expression_code_cache_hit_total Expression code cache hits. +# TYPE n8n_expression_code_cache_hit_total counter +n8n_expression_code_cache_hit_total 0 + +# HELP n8n_expression_code_cache_miss_total Expression code cache misses. +# TYPE n8n_expression_code_cache_miss_total counter +n8n_expression_code_cache_miss_total 0 + +# HELP n8n_expression_code_cache_eviction_total Expression code cache evictions. +# TYPE n8n_expression_code_cache_eviction_total counter +n8n_expression_code_cache_eviction_total 0 + +# HELP n8n_expression_code_cache_size Current expression code cache size. +# TYPE n8n_expression_code_cache_size gauge +n8n_expression_code_cache_size 0 + +# HELP n8n_expression_evaluation_duration_seconds Duration of VM-based expression evaluation in seconds. +# TYPE n8n_expression_evaluation_duration_seconds histogram +n8n_expression_evaluation_duration_seconds_bucket{le="0.0001",status="success",type="none"} 0 +n8n_expression_evaluation_duration_seconds_bucket{le="+Inf",status="success",type="none"} 0 +n8n_expression_evaluation_duration_seconds_count{status="success",type="none"} 0 +n8n_expression_evaluation_duration_seconds_sum{status="success",type="none"} 0 + +# HELP n8n_expression_pool_acquired_total Bridges acquired from the expression pool. +# TYPE n8n_expression_pool_acquired_total counter +n8n_expression_pool_acquired_total 0 + +# HELP n8n_expression_pool_replenish_failed_total Failed pool bridge replenishments. +# TYPE n8n_expression_pool_replenish_failed_total counter +n8n_expression_pool_replenish_failed_total 0 + +# HELP n8n_expression_pool_scaled_up_total Expression pool scaled up from idle. +# TYPE n8n_expression_pool_scaled_up_total counter +n8n_expression_pool_scaled_up_total 0 + +# HELP n8n_expression_pool_scaled_to_zero_total Expression pool scaled to zero after inactivity. +# TYPE n8n_expression_pool_scaled_to_zero_total counter +n8n_expression_pool_scaled_to_zero_total 0 +# HELP n8n_ai_document_processed_total Total number of n8n.ai.document.processed events. +# TYPE n8n_ai_document_processed_total counter +n8n_ai_document_processed_total 0 +# HELP n8n_ai_embeddings_embedded_document_total Total number of n8n.ai.embeddings.embedded.document events. +# TYPE n8n_ai_embeddings_embedded_document_total counter +n8n_ai_embeddings_embedded_document_total 0 +# HELP n8n_ai_embeddings_embedded_query_total Total number of n8n.ai.embeddings.embedded.query events. +# TYPE n8n_ai_embeddings_embedded_query_total counter +n8n_ai_embeddings_embedded_query_total 0 +# HELP n8n_ai_llm_error_total Total number of n8n.ai.llm.error events. +# TYPE n8n_ai_llm_error_total counter +n8n_ai_llm_error_total 0 +# HELP n8n_ai_llm_generated_total Total number of n8n.ai.llm.generated events. +# TYPE n8n_ai_llm_generated_total counter +n8n_ai_llm_generated_total 0 +# HELP n8n_ai_memory_added_message_total Total number of n8n.ai.memory.added.message events. +# TYPE n8n_ai_memory_added_message_total counter +n8n_ai_memory_added_message_total 0 +# HELP n8n_ai_memory_get_messages_total Total number of n8n.ai.memory.get.messages events. +# TYPE n8n_ai_memory_get_messages_total counter +n8n_ai_memory_get_messages_total 0 +# HELP n8n_ai_output_parser_parsed_total Total number of n8n.ai.output.parser.parsed events. +# TYPE n8n_ai_output_parser_parsed_total counter +n8n_ai_output_parser_parsed_total 0 +# HELP n8n_ai_retriever_get_relevant_documents_total Total number of n8n.ai.retriever.get.relevant.documents events. +# TYPE n8n_ai_retriever_get_relevant_documents_total counter +n8n_ai_retriever_get_relevant_documents_total 0 +# HELP n8n_ai_text_splitter_split_total Total number of n8n.ai.text.splitter.split events. +# TYPE n8n_ai_text_splitter_split_total counter +n8n_ai_text_splitter_split_total 0 +# HELP n8n_ai_tool_called_total Total number of n8n.ai.tool.called events. +# TYPE n8n_ai_tool_called_total counter +n8n_ai_tool_called_total 0 +# HELP n8n_ai_vector_store_populated_total Total number of n8n.ai.vector.store.populated events. +# TYPE n8n_ai_vector_store_populated_total counter +n8n_ai_vector_store_populated_total 0 +# HELP n8n_ai_vector_store_searched_total Total number of n8n.ai.vector.store.searched events. +# TYPE n8n_ai_vector_store_searched_total counter +n8n_ai_vector_store_searched_total 0 +# HELP n8n_ai_vector_store_updated_total Total number of n8n.ai.vector.store.updated events. +# TYPE n8n_ai_vector_store_updated_total counter +n8n_ai_vector_store_updated_total 0 +# HELP n8n_audit_execution_data_reveal_failure_total Total number of n8n.audit.execution.data.reveal.failure events. +# TYPE n8n_audit_execution_data_reveal_failure_total counter +n8n_audit_execution_data_reveal_failure_total 0 +# HELP n8n_audit_execution_data_revealed_total Total number of n8n.audit.execution.data.revealed events. +# TYPE n8n_audit_execution_data_revealed_total counter +n8n_audit_execution_data_revealed_total 0 +# HELP n8n_audit_package_deleted_total Total number of n8n.audit.package.deleted events. +# TYPE n8n_audit_package_deleted_total counter +n8n_audit_package_deleted_total 0 +# HELP n8n_audit_package_installed_total Total number of n8n.audit.package.installed events. +# TYPE n8n_audit_package_installed_total counter +n8n_audit_package_installed_total 0 +# HELP n8n_audit_package_updated_total Total number of n8n.audit.package.updated events. +# TYPE n8n_audit_package_updated_total counter +n8n_audit_package_updated_total 0 +# HELP n8n_audit_user_api_created_total Total number of n8n.audit.user.api.created events. +# TYPE n8n_audit_user_api_created_total counter +n8n_audit_user_api_created_total 0 +# HELP n8n_audit_user_api_deleted_total Total number of n8n.audit.user.api.deleted events. +# TYPE n8n_audit_user_api_deleted_total counter +n8n_audit_user_api_deleted_total 0 +# HELP n8n_audit_user_credentials_created_total Total number of n8n.audit.user.credentials.created events. +# TYPE n8n_audit_user_credentials_created_total counter +n8n_audit_user_credentials_created_total 0 +# HELP n8n_audit_user_credentials_deleted_total Total number of n8n.audit.user.credentials.deleted events. +# TYPE n8n_audit_user_credentials_deleted_total counter +n8n_audit_user_credentials_deleted_total 0 +# HELP n8n_audit_user_credentials_shared_total Total number of n8n.audit.user.credentials.shared events. +# TYPE n8n_audit_user_credentials_shared_total counter +n8n_audit_user_credentials_shared_total 0 +# HELP n8n_audit_user_credentials_updated_total Total number of n8n.audit.user.credentials.updated events. +# TYPE n8n_audit_user_credentials_updated_total counter +n8n_audit_user_credentials_updated_total 0 +# HELP n8n_audit_user_deleted_total Total number of n8n.audit.user.deleted events. +# TYPE n8n_audit_user_deleted_total counter +n8n_audit_user_deleted_total 0 +# HELP n8n_audit_user_email_failed_total Total number of n8n.audit.user.email.failed events. +# TYPE n8n_audit_user_email_failed_total counter +n8n_audit_user_email_failed_total 0 +# HELP n8n_audit_user_execution_deleted_total Total number of n8n.audit.user.execution.deleted events. +# TYPE n8n_audit_user_execution_deleted_total counter +n8n_audit_user_execution_deleted_total 0 +# HELP n8n_audit_user_invitation_accepted_total Total number of n8n.audit.user.invitation.accepted events. +# TYPE n8n_audit_user_invitation_accepted_total counter +n8n_audit_user_invitation_accepted_total 0 +# HELP n8n_audit_user_invited_total Total number of n8n.audit.user.invited events. +# TYPE n8n_audit_user_invited_total counter +n8n_audit_user_invited_total 0 +# HELP n8n_audit_user_login_failed_total Total number of n8n.audit.user.login.failed events. +# TYPE n8n_audit_user_login_failed_total counter +n8n_audit_user_login_failed_total 0 +# HELP n8n_audit_user_login_success_total Total number of n8n.audit.user.login.success events. +# TYPE n8n_audit_user_login_success_total counter +n8n_audit_user_login_success_total 0 +# HELP n8n_audit_user_mfa_disabled_total Total number of n8n.audit.user.mfa.disabled events. +# TYPE n8n_audit_user_mfa_disabled_total counter +n8n_audit_user_mfa_disabled_total 0 +# HELP n8n_audit_user_mfa_enabled_total Total number of n8n.audit.user.mfa.enabled events. +# TYPE n8n_audit_user_mfa_enabled_total counter +n8n_audit_user_mfa_enabled_total 0 +# HELP n8n_audit_user_reinvited_total Total number of n8n.audit.user.reinvited events. +# TYPE n8n_audit_user_reinvited_total counter +n8n_audit_user_reinvited_total 0 +# HELP n8n_audit_user_reset_total Total number of n8n.audit.user.reset events. +# TYPE n8n_audit_user_reset_total counter +n8n_audit_user_reset_total 0 +# HELP n8n_audit_user_reset_requested_total Total number of n8n.audit.user.reset.requested events. +# TYPE n8n_audit_user_reset_requested_total counter +n8n_audit_user_reset_requested_total 0 +# HELP n8n_audit_user_signedup_total Total number of n8n.audit.user.signedup events. +# TYPE n8n_audit_user_signedup_total counter +n8n_audit_user_signedup_total 0 +# HELP n8n_audit_user_updated_total Total number of n8n.audit.user.updated events. +# TYPE n8n_audit_user_updated_total counter +n8n_audit_user_updated_total 0 +# HELP n8n_audit_variable_created_total Total number of n8n.audit.variable.created events. +# TYPE n8n_audit_variable_created_total counter +n8n_audit_variable_created_total 0 +# HELP n8n_audit_variable_deleted_total Total number of n8n.audit.variable.deleted events. +# TYPE n8n_audit_variable_deleted_total counter +n8n_audit_variable_deleted_total 0 +# HELP n8n_audit_variable_updated_total Total number of n8n.audit.variable.updated events. +# TYPE n8n_audit_variable_updated_total counter +n8n_audit_variable_updated_total 0 +# HELP n8n_execution_throttled_total Total number of n8n.execution.throttled events. +# TYPE n8n_execution_throttled_total counter +n8n_execution_throttled_total 0 +# HELP n8n_runner_response_received_total Total number of n8n.runner.response.received events. +# TYPE n8n_runner_response_received_total counter +n8n_runner_response_received_total 0 +# HELP n8n_workflow_cancelled_total Total number of n8n.workflow.cancelled events. +# TYPE n8n_workflow_cancelled_total counter +n8n_workflow_cancelled_total 0 diff --git a/n8n/tests/fixtures/n8n_custom.txt b/n8n/tests/fixtures/n8n_custom.txt index d06fa2589b0ba..9ccba19b14d57 100644 --- a/n8n/tests/fixtures/n8n_custom.txt +++ b/n8n/tests/fixtures/n8n_custom.txt @@ -1,34 +1,34 @@ # HELP test_process_cpu_user_seconds_total Total user CPU time spent in seconds. # TYPE test_process_cpu_user_seconds_total counter -test_process_cpu_user_seconds_total 8.298932999999998 +test_process_cpu_user_seconds_total 0.921656 # HELP test_process_cpu_system_seconds_total Total system CPU time spent in seconds. # TYPE test_process_cpu_system_seconds_total counter -test_process_cpu_system_seconds_total 3.1041119999999998 +test_process_cpu_system_seconds_total 0.157367 # HELP test_process_cpu_seconds_total Total user and system CPU time spent in seconds. # TYPE test_process_cpu_seconds_total counter -test_process_cpu_seconds_total 11.403044999999999 +test_process_cpu_seconds_total 1.0790229999999998 # HELP test_process_start_time_seconds Start time of the process since unix epoch in seconds. # TYPE test_process_start_time_seconds gauge -test_process_start_time_seconds 1761656578 +test_process_start_time_seconds 1778234580 # HELP test_process_resident_memory_bytes Resident memory size in bytes. # TYPE test_process_resident_memory_bytes gauge -test_process_resident_memory_bytes 245043200 +test_process_resident_memory_bytes 267681792 # HELP test_process_virtual_memory_bytes Virtual memory size in bytes. # TYPE test_process_virtual_memory_bytes gauge -test_process_virtual_memory_bytes 33656197120 +test_process_virtual_memory_bytes 18517532672 # HELP test_process_heap_bytes Process heap size in bytes. # TYPE test_process_heap_bytes gauge -test_process_heap_bytes 277200896 +test_process_heap_bytes 840728576 # HELP test_process_open_fds Number of open file descriptors. # TYPE test_process_open_fds gauge -test_process_open_fds 44 +test_process_open_fds 45 # HELP test_process_max_fds Maximum number of open file descriptors. # TYPE test_process_max_fds gauge @@ -36,59 +36,62 @@ test_process_max_fds 1048576 # HELP test_nodejs_eventloop_lag_seconds Lag of event loop in seconds. # TYPE test_nodejs_eventloop_lag_seconds gauge -test_nodejs_eventloop_lag_seconds 0.002765567 +test_nodejs_eventloop_lag_seconds 0.008676917 # HELP test_nodejs_eventloop_lag_min_seconds The minimum recorded event loop delay. # TYPE test_nodejs_eventloop_lag_min_seconds gauge -test_nodejs_eventloop_lag_min_seconds 0.010018816 +test_nodejs_eventloop_lag_min_seconds 0.006340608 # HELP test_nodejs_eventloop_lag_max_seconds The maximum recorded event loop delay. # TYPE test_nodejs_eventloop_lag_max_seconds gauge -test_nodejs_eventloop_lag_max_seconds 0.011239423 +test_nodejs_eventloop_lag_max_seconds 0.030228479 # HELP test_nodejs_eventloop_lag_mean_seconds The mean of the recorded event loop delays. # TYPE test_nodejs_eventloop_lag_mean_seconds gauge -test_nodejs_eventloop_lag_mean_seconds 0.010092521938958708 +test_nodejs_eventloop_lag_mean_seconds 0.012079332927643785 # HELP test_nodejs_eventloop_lag_stddev_seconds The standard deviation of the recorded event loop delays. # TYPE test_nodejs_eventloop_lag_stddev_seconds gauge -test_nodejs_eventloop_lag_stddev_seconds 0.00016945350643679045 +test_nodejs_eventloop_lag_stddev_seconds 0.0011467288819057616 # HELP test_nodejs_eventloop_lag_p50_seconds The 50th percentile of the recorded event loop delays. # TYPE test_nodejs_eventloop_lag_p50_seconds gauge -test_nodejs_eventloop_lag_p50_seconds 0.010067967 +test_nodejs_eventloop_lag_p50_seconds 0.012001279 # HELP test_nodejs_eventloop_lag_p90_seconds The 90th percentile of the recorded event loop delays. # TYPE test_nodejs_eventloop_lag_p90_seconds gauge -test_nodejs_eventloop_lag_p90_seconds 0.010067967 +test_nodejs_eventloop_lag_p90_seconds 0.013254655 # HELP test_nodejs_eventloop_lag_p99_seconds The 99th percentile of the recorded event loop delays. # TYPE test_nodejs_eventloop_lag_p99_seconds gauge -test_nodejs_eventloop_lag_p99_seconds 0.011124735 +test_nodejs_eventloop_lag_p99_seconds 0.014426111 # HELP test_nodejs_active_resources Number of active resources that are currently keeping the event loop alive, grouped by async resource type. # TYPE test_nodejs_active_resources gauge -test_nodejs_active_resources{type="PipeWrap"} 2 -test_nodejs_active_resources{type="TCPServerWrap"} 1 -test_nodejs_active_resources{type="TCPSocketWrap"} 1 -test_nodejs_active_resources{type="Timeout"} 13 +test_nodejs_active_resources{type="PipeWrap"} 5 +test_nodejs_active_resources{type="TCPServerWrap"} 2 +test_nodejs_active_resources{type="TCPSocketWrap"} 9 +test_nodejs_active_resources{type="ProcessWrap"} 1 +test_nodejs_active_resources{type="Timeout"} 20 test_nodejs_active_resources{type="Immediate"} 1 # HELP test_nodejs_active_resources_total Total number of active resources. # TYPE test_nodejs_active_resources_total gauge -test_nodejs_active_resources_total 18 +test_nodejs_active_resources_total 38 # HELP test_nodejs_active_handles Number of active libuv handles grouped by handle type. Every handle type is C++ class name. # TYPE test_nodejs_active_handles gauge -test_nodejs_active_handles{type="Socket"} 3 -test_nodejs_active_handles{type="Server"} 1 +test_nodejs_active_handles{type="Socket"} 14 +test_nodejs_active_handles{type="Server"} 2 +test_nodejs_active_handles{type="ChildProcess"} 1 # HELP test_nodejs_active_handles_total Total number of active handles. # TYPE test_nodejs_active_handles_total gauge -test_nodejs_active_handles_total 4 +test_nodejs_active_handles_total 17 # HELP test_nodejs_active_requests Number of active libuv requests grouped by request type. Every request type is C++ class name. # TYPE test_nodejs_active_requests gauge +test_nodejs_active_requests{type="FSReqCallback"} 1 # HELP test_nodejs_active_requests_total Total number of active requests. # TYPE test_nodejs_active_requests_total gauge @@ -96,81 +99,87 @@ test_nodejs_active_requests_total 0 # HELP test_nodejs_heap_size_total_bytes Process heap size from Node.js in bytes. # TYPE test_nodejs_heap_size_total_bytes gauge -test_nodejs_heap_size_total_bytes 142774272 +test_nodejs_heap_size_total_bytes 146391040 # HELP test_nodejs_heap_size_used_bytes Process heap size used from Node.js in bytes. # TYPE test_nodejs_heap_size_used_bytes gauge -test_nodejs_heap_size_used_bytes 136342632 +test_nodejs_heap_size_used_bytes 136336448 # HELP test_nodejs_external_memory_bytes Node.js external memory size in bytes. # TYPE test_nodejs_external_memory_bytes gauge -test_nodejs_external_memory_bytes 20824585 +test_nodejs_external_memory_bytes 20993559 # HELP test_nodejs_heap_space_size_total_bytes Process heap space size total from Node.js in bytes. # TYPE test_nodejs_heap_space_size_total_bytes gauge test_nodejs_heap_space_size_total_bytes{space="read_only"} 0 -test_nodejs_heap_space_size_total_bytes{space="new"} 1048576 -test_nodejs_heap_space_size_total_bytes{space="old"} 122208256 -test_nodejs_heap_space_size_total_bytes{space="code"} 4718592 +test_nodejs_heap_space_size_total_bytes{space="new"} 2097152 +test_nodejs_heap_space_size_total_bytes{space="old"} 116920320 +test_nodejs_heap_space_size_total_bytes{space="code"} 5505024 test_nodejs_heap_space_size_total_bytes{space="shared"} 0 -test_nodejs_heap_space_size_total_bytes{space="trusted"} 7643136 +test_nodejs_heap_space_size_total_bytes{space="trusted"} 11624448 +test_nodejs_heap_space_size_total_bytes{space="shared_trusted"} 0 test_nodejs_heap_space_size_total_bytes{space="new_large_object"} 0 -test_nodejs_heap_space_size_total_bytes{space="large_object"} 7000064 -test_nodejs_heap_space_size_total_bytes{space="code_large_object"} 155648 +test_nodejs_heap_space_size_total_bytes{space="large_object"} 9875456 +test_nodejs_heap_space_size_total_bytes{space="code_large_object"} 368640 test_nodejs_heap_space_size_total_bytes{space="shared_large_object"} 0 +test_nodejs_heap_space_size_total_bytes{space="shared_trusted_large_object"} 0 test_nodejs_heap_space_size_total_bytes{space="trusted_large_object"} 0 # HELP test_nodejs_heap_space_size_used_bytes Process heap space size used from Node.js in bytes. # TYPE test_nodejs_heap_space_size_used_bytes gauge test_nodejs_heap_space_size_used_bytes{space="read_only"} 0 -test_nodejs_heap_space_size_used_bytes{space="new"} 652896 -test_nodejs_heap_space_size_used_bytes{space="old"} 119347344 -test_nodejs_heap_space_size_used_bytes{space="code"} 4183424 +test_nodejs_heap_space_size_used_bytes{space="new"} 382808 +test_nodejs_heap_space_size_used_bytes{space="old"} 111099512 +test_nodejs_heap_space_size_used_bytes{space="code"} 4853344 test_nodejs_heap_space_size_used_bytes{space="shared"} 0 -test_nodejs_heap_space_size_used_bytes{space="trusted"} 5187192 +test_nodejs_heap_space_size_used_bytes{space="trusted"} 9839592 +test_nodejs_heap_space_size_used_bytes{space="shared_trusted"} 0 test_nodejs_heap_space_size_used_bytes{space="new_large_object"} 0 -test_nodejs_heap_space_size_used_bytes{space="large_object"} 6837144 -test_nodejs_heap_space_size_used_bytes{space="code_large_object"} 138432 +test_nodejs_heap_space_size_used_bytes{space="large_object"} 9806288 +test_nodejs_heap_space_size_used_bytes{space="code_large_object"} 361728 test_nodejs_heap_space_size_used_bytes{space="shared_large_object"} 0 +test_nodejs_heap_space_size_used_bytes{space="shared_trusted_large_object"} 0 test_nodejs_heap_space_size_used_bytes{space="trusted_large_object"} 0 # HELP test_nodejs_heap_space_size_available_bytes Process heap space size available from Node.js in bytes. # TYPE test_nodejs_heap_space_size_available_bytes gauge test_nodejs_heap_space_size_available_bytes{space="read_only"} 0 -test_nodejs_heap_space_size_available_bytes{space="new"} 378016 -test_nodejs_heap_space_size_available_bytes{space="old"} 430568 -test_nodejs_heap_space_size_available_bytes{space="code"} 239680 +test_nodejs_heap_space_size_available_bytes{space="new"} 665704 +test_nodejs_heap_space_size_available_bytes{space="old"} 5484264 +test_nodejs_heap_space_size_available_bytes{space="code"} 651008 test_nodejs_heap_space_size_available_bytes{space="shared"} 0 -test_nodejs_heap_space_size_available_bytes{space="trusted"} 2323072 +test_nodejs_heap_space_size_available_bytes{space="trusted"} 1771032 +test_nodejs_heap_space_size_available_bytes{space="shared_trusted"} 0 test_nodejs_heap_space_size_available_bytes{space="new_large_object"} 1048576 test_nodejs_heap_space_size_available_bytes{space="large_object"} 0 test_nodejs_heap_space_size_available_bytes{space="code_large_object"} 0 test_nodejs_heap_space_size_available_bytes{space="shared_large_object"} 0 +test_nodejs_heap_space_size_available_bytes{space="shared_trusted_large_object"} 0 test_nodejs_heap_space_size_available_bytes{space="trusted_large_object"} 0 # HELP test_nodejs_version_info Node.js version info. # TYPE test_nodejs_version_info gauge -test_nodejs_version_info{version="v22.18.0",major="22",minor="18",patch="0"} 1 +test_nodejs_version_info{version="v24.14.1",major="24",minor="14",patch="1"} 1 # HELP test_nodejs_gc_duration_seconds Garbage collection duration by kind, one of major, minor, incremental or weakcb. # TYPE test_nodejs_gc_duration_seconds histogram -test_nodejs_gc_duration_seconds_bucket{le="0.001",kind="minor"} 128 -test_nodejs_gc_duration_seconds_bucket{le="0.01",kind="minor"} 132 -test_nodejs_gc_duration_seconds_bucket{le="0.1",kind="minor"} 132 -test_nodejs_gc_duration_seconds_bucket{le="1",kind="minor"} 132 -test_nodejs_gc_duration_seconds_bucket{le="2",kind="minor"} 132 -test_nodejs_gc_duration_seconds_bucket{le="5",kind="minor"} 132 -test_nodejs_gc_duration_seconds_bucket{le="+Inf",kind="minor"} 132 -test_nodejs_gc_duration_seconds_sum{kind="minor"} 0.09924478498101237 -test_nodejs_gc_duration_seconds_count{kind="minor"} 132 -test_nodejs_gc_duration_seconds_bucket{le="0.001",kind="incremental"} 1 +test_nodejs_gc_duration_seconds_bucket{le="0.001",kind="minor"} 0 +test_nodejs_gc_duration_seconds_bucket{le="0.01",kind="minor"} 2 +test_nodejs_gc_duration_seconds_bucket{le="0.1",kind="minor"} 2 +test_nodejs_gc_duration_seconds_bucket{le="1",kind="minor"} 2 +test_nodejs_gc_duration_seconds_bucket{le="2",kind="minor"} 2 +test_nodejs_gc_duration_seconds_bucket{le="5",kind="minor"} 2 +test_nodejs_gc_duration_seconds_bucket{le="+Inf",kind="minor"} 2 +test_nodejs_gc_duration_seconds_sum{kind="minor"} 0.004925500000128522 +test_nodejs_gc_duration_seconds_count{kind="minor"} 2 +test_nodejs_gc_duration_seconds_bucket{le="0.001",kind="incremental"} 0 test_nodejs_gc_duration_seconds_bucket{le="0.01",kind="incremental"} 2 test_nodejs_gc_duration_seconds_bucket{le="0.1",kind="incremental"} 2 test_nodejs_gc_duration_seconds_bucket{le="1",kind="incremental"} 2 test_nodejs_gc_duration_seconds_bucket{le="2",kind="incremental"} 2 test_nodejs_gc_duration_seconds_bucket{le="5",kind="incremental"} 2 test_nodejs_gc_duration_seconds_bucket{le="+Inf",kind="incremental"} 2 -test_nodejs_gc_duration_seconds_sum{kind="incremental"} 0.0022786640077829363 +test_nodejs_gc_duration_seconds_sum{kind="incremental"} 0.005939041999867186 test_nodejs_gc_duration_seconds_count{kind="incremental"} 2 test_nodejs_gc_duration_seconds_bucket{le="0.001",kind="major"} 0 test_nodejs_gc_duration_seconds_bucket{le="0.01",kind="major"} 0 @@ -179,232 +188,425 @@ test_nodejs_gc_duration_seconds_bucket{le="1",kind="major"} 2 test_nodejs_gc_duration_seconds_bucket{le="2",kind="major"} 2 test_nodejs_gc_duration_seconds_bucket{le="5",kind="major"} 2 test_nodejs_gc_duration_seconds_bucket{le="+Inf",kind="major"} 2 -test_nodejs_gc_duration_seconds_sum{kind="major"} 0.1028408939987421 +test_nodejs_gc_duration_seconds_sum{kind="major"} 0.032123332999879496 test_nodejs_gc_duration_seconds_count{kind="major"} 2 +# HELP test_process_pss_bytes Proportional Set Size of the process in bytes. +# TYPE test_process_pss_bytes gauge +test_process_pss_bytes 220097536 + # HELP test_version_info n8n version info. # TYPE test_version_info gauge -test_version_info{version="v1.117.2",major="1",minor="117",patch="2"} 1 +test_version_info{version="v2.19.5",major="2",minor="19",patch="5"} 1 # HELP test_instance_role_leader Whether this main instance is the leader (1) or not (0). # TYPE test_instance_role_leader gauge test_instance_role_leader 1 +# HELP test_cache_hits_total Total number of cache hits. +# TYPE test_cache_hits_total counter +test_cache_hits_total 53 + +# HELP test_cache_misses_total Total number of cache misses. +# TYPE test_cache_misses_total counter +test_cache_misses_total 15 + +# HELP test_cache_updates_total Total number of cache updates. +# TYPE test_cache_updates_total counter +test_cache_updates_total 1 + # HELP test_http_request_duration_seconds duration histogram of http responses labeled with: status_code # TYPE test_http_request_duration_seconds histogram +test_http_request_duration_seconds_bucket{le="0.003"} 5 +test_http_request_duration_seconds_bucket{le="0.03"} 5 +test_http_request_duration_seconds_bucket{le="0.1"} 5 +test_http_request_duration_seconds_bucket{le="0.3"} 5 +test_http_request_duration_seconds_bucket{le="1.5"} 5 +test_http_request_duration_seconds_bucket{le="10"} 5 +test_http_request_duration_seconds_bucket{le="+Inf"} 5 +test_http_request_duration_seconds_sum 0.0018007910000000002 +test_http_request_duration_seconds_count 5 # HELP test_last_activity last instance activity (backend request) in Unix time (seconds). # TYPE test_last_activity gauge -test_last_activity 1761656582 +test_last_activity 1778234587 + +# HELP test_scaling_mode_queue_jobs_waiting Current number of enqueued jobs waiting for pickup in scaling mode. +# TYPE test_scaling_mode_queue_jobs_waiting gauge +test_scaling_mode_queue_jobs_waiting 0 + +# HELP test_scaling_mode_queue_jobs_active Current number of jobs being processed across all workers in scaling mode. +# TYPE test_scaling_mode_queue_jobs_active gauge +test_scaling_mode_queue_jobs_active 0 + +# HELP test_scaling_mode_queue_jobs_completed Total number of jobs completed across all workers in scaling mode since instance start. +# TYPE test_scaling_mode_queue_jobs_completed counter +test_scaling_mode_queue_jobs_completed 8 + +# HELP test_scaling_mode_queue_jobs_failed Total number of jobs failed across all workers in scaling mode since instance start. +# TYPE test_scaling_mode_queue_jobs_failed counter +test_scaling_mode_queue_jobs_failed 0 + +# HELP test_workflow_execution_duration_seconds Workflow execution duration in seconds. +# TYPE test_workflow_execution_duration_seconds histogram +test_workflow_execution_duration_seconds_bucket{le="0.005",status="success",mode="webhook",workflow_id="testWorkflowOk"} 3 +test_workflow_execution_duration_seconds_bucket{le="0.01",status="success",mode="webhook",workflow_id="testWorkflowOk"} 3 +test_workflow_execution_duration_seconds_bucket{le="0.025",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="0.05",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="0.1",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="0.25",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="0.5",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="1",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="2.5",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="5",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="10",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="30",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="60",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="120",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="300",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="600",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="+Inf",status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_sum{status="success",mode="webhook",workflow_id="testWorkflowOk"} 0.027999999999999997 +test_workflow_execution_duration_seconds_count{status="success",mode="webhook",workflow_id="testWorkflowOk"} 4 +test_workflow_execution_duration_seconds_bucket{le="0.005",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 3 +test_workflow_execution_duration_seconds_bucket{le="0.01",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 3 +test_workflow_execution_duration_seconds_bucket{le="0.025",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 3 +test_workflow_execution_duration_seconds_bucket{le="0.05",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 3 +test_workflow_execution_duration_seconds_bucket{le="0.1",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 3 +test_workflow_execution_duration_seconds_bucket{le="0.25",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 3 +test_workflow_execution_duration_seconds_bucket{le="0.5",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +test_workflow_execution_duration_seconds_bucket{le="1",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +test_workflow_execution_duration_seconds_bucket{le="2.5",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +test_workflow_execution_duration_seconds_bucket{le="5",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +test_workflow_execution_duration_seconds_bucket{le="10",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +test_workflow_execution_duration_seconds_bucket{le="30",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +test_workflow_execution_duration_seconds_bucket{le="60",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +test_workflow_execution_duration_seconds_bucket{le="120",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +test_workflow_execution_duration_seconds_bucket{le="300",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +test_workflow_execution_duration_seconds_bucket{le="600",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +test_workflow_execution_duration_seconds_bucket{le="+Inf",status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 +test_workflow_execution_duration_seconds_sum{status="failed",mode="webhook",workflow_id="testWorkflowFail"} 0.405 +test_workflow_execution_duration_seconds_count{status="failed",mode="webhook",workflow_id="testWorkflowFail"} 4 # HELP test_active_workflow_count Total number of active workflows. # TYPE test_active_workflow_count gauge -test_active_workflow_count{workflow_id="wf_8a3b2c1d"} 0 -test_active_workflow_count{workflow_id="wf_7f4e9a2b"} 0 -test_active_workflow_count{workflow_id="wf_5d6c8e1f"} 0 - -# HELP test_nodejs_event_loop_lag_seconds Event loop lag in seconds -# TYPE test_nodejs_event_loop_lag_seconds gauge -test_nodejs_event_loop_lag_seconds 0.0035 - -# HELP test_nodejs_heap_total_bytes Total heap size allocated in bytes -# TYPE test_nodejs_heap_total_bytes gauge -test_nodejs_heap_total_bytes 73400320 - -# HELP test_nodejs_heap_used_bytes Heap memory used in bytes -# TYPE test_nodejs_heap_used_bytes gauge -test_nodejs_heap_used_bytes 51200000 - -# HELP test_workflow_executions_total Total number of workflow executions -# TYPE test_workflow_executions_total counter -test_workflow_executions_total{status="success",workflow_id="wf_8a3b2c1d"} 45 -test_workflow_executions_total{status="success",workflow_id="wf_7f4e9a2b"} 38 -test_workflow_executions_total{status="success",workflow_id="wf_5d6c8e1f"} 45 -test_workflow_executions_total{status="error",workflow_id="wf_8a3b2c1d"} 3 -test_workflow_executions_total{status="error",workflow_id="wf_5d6c8e1f"} 4 - -# HELP test_workflow_executions_duration_seconds Workflow execution duration in seconds -# TYPE test_workflow_executions_duration_seconds histogram -test_workflow_executions_duration_seconds_bucket{le="0.1",workflow_id="wf_8a3b2c1d"} 5 -test_workflow_executions_duration_seconds_bucket{le="1",workflow_id="wf_8a3b2c1d"} 18 -test_workflow_executions_duration_seconds_bucket{le="+Inf",workflow_id="wf_8a3b2c1d"} 48 -test_workflow_executions_duration_seconds_sum{workflow_id="wf_8a3b2c1d"} 14.3 -test_workflow_executions_duration_seconds_count{workflow_id="wf_8a3b2c1d"} 48 -test_workflow_executions_duration_seconds_bucket{le="0.1",workflow_id="wf_7f4e9a2b"} 4 -test_workflow_executions_duration_seconds_bucket{le="1",workflow_id="wf_7f4e9a2b"} 15 -test_workflow_executions_duration_seconds_bucket{le="+Inf",workflow_id="wf_7f4e9a2b"} 38 -test_workflow_executions_duration_seconds_sum{workflow_id="wf_7f4e9a2b"} 11.2 -test_workflow_executions_duration_seconds_count{workflow_id="wf_7f4e9a2b"} 38 -test_workflow_executions_duration_seconds_bucket{le="0.1",workflow_id="wf_5d6c8e1f"} 3 -test_workflow_executions_duration_seconds_bucket{le="1",workflow_id="wf_5d6c8e1f"} 12 -test_workflow_executions_duration_seconds_bucket{le="+Inf",workflow_id="wf_5d6c8e1f"} 49 -test_workflow_executions_duration_seconds_sum{workflow_id="wf_5d6c8e1f"} 12.7 -test_workflow_executions_duration_seconds_count{workflow_id="wf_5d6c8e1f"} 49 - -# HELP test_workflow_started_total Total number of workflows started -# TYPE test_workflow_started_total counter -test_workflow_started_total 25634 -test_workflow_started_total{workflow_id="12",workflow_name="CRM Sync"} 8142 -test_workflow_started_total{workflow_id="25",workflow_name="Webhook Intake"} 14290 -test_workflow_started_total{workflow_id="33",workflow_name="Slack Alerts"} 2202 +test_active_workflow_count 2 -# HELP test_workflow_success_total Total number of workflows completed successfully -# TYPE test_workflow_success_total counter -test_workflow_success_total 25209 -test_workflow_success_total{workflow_id="12",workflow_name="CRM Sync"} 8059 -test_workflow_success_total{workflow_id="25",workflow_name="Webhook Intake"} 14135 -test_workflow_success_total{workflow_id="33",workflow_name="Slack Alerts"} 2015 +# HELP test_production_executions Total number of production workflow executions (success + error). +# TYPE test_production_executions gauge +test_production_executions 8 -# HELP test_workflow_failed_total Total number of workflows that failed -# TYPE test_workflow_failed_total counter -test_workflow_failed_total 425 -test_workflow_failed_total{workflow_id="12",workflow_name="CRM Sync"} 83 -test_workflow_failed_total{workflow_id="25",workflow_name="Webhook Intake"} 155 -test_workflow_failed_total{workflow_id="33",workflow_name="Slack Alerts"} 187 - - -# HELP test_queue_jobs_total Total number of queue jobs -# TYPE test_queue_jobs_total counter -test_queue_jobs_total{state="waiting"} 3 -test_queue_jobs_total{state="active"} 2 -test_queue_jobs_total{state="completed"} 148 -test_queue_jobs_total{state="failed"} 5 - -# HELP test_queue_jobs_duration_seconds Job duration in seconds -# TYPE test_queue_jobs_duration_seconds histogram -test_queue_jobs_duration_seconds_bucket{le="0.1"} 22 -test_queue_jobs_duration_seconds_bucket{le="1"} 84 -test_queue_jobs_duration_seconds_bucket{le="+Inf"} 150 -test_queue_jobs_duration_seconds_sum 44.8 -test_queue_jobs_duration_seconds_count 150 - -# HELP test_queue_job_waiting_total Number of jobs currently waiting in the queue -# TYPE test_queue_job_waiting_total gauge -test_queue_job_waiting_total{queue="default"} 3 - -# HELP test_queue_job_active_total Number of jobs currently being processed -# TYPE test_queue_job_active_total gauge -test_queue_job_active_total{queue="default"} 2 - -# HELP test_queue_job_completed_total Number of jobs completed successfully -# TYPE test_queue_job_completed_total counter -test_queue_job_completed_total{queue="default"} 15892 +# HELP test_production_root_executions Total number of production root workflow executions (excludes sub-workflows). +# TYPE test_production_root_executions gauge +test_production_root_executions 8 -# HELP test_queue_job_failed_total Number of jobs that have failed -# TYPE test_queue_job_failed_total counter -test_queue_job_failed_total{queue="default"} 47 +# HELP test_manual_executions Total number of manual workflow executions (success + error). +# TYPE test_manual_executions gauge +test_manual_executions 0 -# HELP test_queue_job_dequeued_total Number of jobs dequeued (picked up from queue) -# TYPE test_queue_job_dequeued_total counter -test_queue_job_dequeued_total{queue="default"} 15939 +# HELP test_enabled_users Total number of enabled users. +# TYPE test_enabled_users gauge +test_enabled_users 1 + +# HELP test_users Total number of users. +# TYPE test_users gauge +test_users 1 + +# HELP test_workflows Total number of workflows. +# TYPE test_workflows gauge +test_workflows 2 + +# HELP test_credentials Total number of credentials. +# TYPE test_credentials gauge +test_credentials 0 + +# HELP test_token_exchange_requests_total Total number of token exchange requests. +# TYPE test_token_exchange_requests_total counter +test_token_exchange_requests_total{result="success"} 0 +test_token_exchange_requests_total{result="failure"} 0 + +# HELP test_token_exchange_failures_total Total number of token exchange failures broken down by reason. +# TYPE test_token_exchange_failures_total counter +test_token_exchange_failures_total{reason="invalid_token"} 0 + +# HELP test_embed_login_requests_total Total number of embed login requests. +# TYPE test_embed_login_requests_total counter +test_embed_login_requests_total{result="success"} 0 +test_embed_login_requests_total{result="failure"} 0 + +# HELP test_embed_login_failures_total Total number of embed login failures broken down by reason. +# TYPE test_embed_login_failures_total counter +test_embed_login_failures_total{reason="unauthorized"} 0 -# HELP test_queue_job_enqueued_total Number of jobs added to the queue +# HELP test_token_exchange_jit_provisioning_total Total number of users JIT-provisioned via token exchange. +# TYPE test_token_exchange_jit_provisioning_total counter +test_token_exchange_jit_provisioning_total 0 + +# HELP test_token_exchange_identity_linked_total Total number of external identities linked to existing users via token exchange. +# TYPE test_token_exchange_identity_linked_total counter +test_token_exchange_identity_linked_total 0 + +# HELP test_audit_workflow_activated_total Total number of n8n.audit.workflow.activated events. +# TYPE test_audit_workflow_activated_total counter +test_audit_workflow_activated_total{workflow_id="testWorkflowOk"} 1 +test_audit_workflow_activated_total{workflow_id="testWorkflowFail"} 1 + +# HELP test_audit_workflow_archived_total Total number of n8n.audit.workflow.archived events. +# TYPE test_audit_workflow_archived_total counter +test_audit_workflow_archived_total{workflow_id="testWorkflowOk"} 1 + +# HELP test_audit_workflow_created_total Total number of n8n.audit.workflow.created events. +# TYPE test_audit_workflow_created_total counter +test_audit_workflow_created_total{workflow_id="testWorkflowOk"} 1 + +# HELP test_audit_workflow_deactivated_total Total number of n8n.audit.workflow.deactivated events. +# TYPE test_audit_workflow_deactivated_total counter +test_audit_workflow_deactivated_total{workflow_id="testWorkflowOk"} 1 + +# HELP test_audit_workflow_deleted_total Total number of n8n.audit.workflow.deleted events. +# TYPE test_audit_workflow_deleted_total counter +test_audit_workflow_deleted_total{workflow_id="testWorkflowOk"} 1 + +# HELP test_queue_job_enqueued_total Total number of n8n.queue.job.enqueued events. # TYPE test_queue_job_enqueued_total counter -test_queue_job_enqueued_total{queue="default"} 15670 - -# HELP test_queue_job_delayed_total Number of jobs scheduled to run later -# TYPE test_queue_job_delayed_total gauge -test_queue_job_delayed_total{queue="default"} 5 - -# HELP test_queue_job_waiting_duration_seconds Duration jobs spend waiting before being processed -# TYPE test_queue_job_waiting_duration_seconds histogram -test_queue_job_waiting_duration_seconds_bucket{queue="default",le="0.1"} 50 -test_queue_job_waiting_duration_seconds_bucket{queue="default",le="1"} 241 -test_queue_job_waiting_duration_seconds_bucket{queue="default",le="5"} 820 -test_queue_job_waiting_duration_seconds_bucket{queue="default",le="10"} 1105 -test_queue_job_waiting_duration_seconds_bucket{queue="default",le="30"} 1240 -test_queue_job_waiting_duration_seconds_bucket{queue="default",le="+Inf"} 1253 -test_queue_job_waiting_duration_seconds_sum{queue="default"} 450.32 -test_queue_job_waiting_duration_seconds_count{queue="default"} 1253 - -# HELP test_api_requests_total Total API requests -# TYPE test_api_requests_total counter -test_api_requests_total{method="GET",endpoint="/workflows"} 240 -test_api_requests_total{method="POST",endpoint="/executions"} 75 - -# HELP test_api_request_duration_seconds API request duration in seconds -# TYPE test_api_request_duration_seconds histogram -test_api_request_duration_seconds_bucket{le="0.1"} 90 -test_api_request_duration_seconds_bucket{le="1"} 120 -test_api_request_duration_seconds_bucket{le="+Inf"} 125 -test_api_request_duration_seconds_sum 15.3 -test_api_request_duration_seconds_count 125 - -# HELP test_cache_operations_total Total cache operations -# TYPE test_cache_operations_total counter -test_cache_operations_total{operation="get"} 1250 -test_cache_operations_total{operation="set"} 320 -test_cache_operations_total{operation="delete"} 10 - -# HELP test_cache_hits_total Cache hits -# TYPE test_cache_hits_total counter -test_cache_hits_total 1080 +test_queue_job_enqueued_total 8 -# HELP test_cache_misses_total Cache misses -# TYPE test_cache_misses_total counter -test_cache_misses_total 170 - -# HELP test_cache_errors_total Cache errors -# TYPE test_cache_errors_total counter -test_cache_errors_total 0 - -# HELP test_cache_latency_seconds Cache operation latency in seconds -# TYPE test_cache_latency_seconds histogram -test_cache_latency_seconds_bucket{le="0.001"} 90 -test_cache_latency_seconds_bucket{le="0.01"} 240 -test_cache_latency_seconds_bucket{le="+Inf"} 260 -test_cache_latency_seconds_sum 1.42 -test_cache_latency_seconds_count 260 - -# HELP test_eventbus_events_total Total events published on the event bus -# TYPE test_eventbus_events_total counter -test_eventbus_events_total{event_type="workflowStarted"} 140 -test_eventbus_events_total{event_type="workflowCompleted"} 135 -test_eventbus_events_total{event_type="workflowFailed"} 5 - -# HELP test_eventbus_events_processed_total Total processed events -# TYPE test_eventbus_events_processed_total counter -test_eventbus_events_processed_total 138 - -# HELP test_eventbus_events_failed_total Total failed event processing -# TYPE test_eventbus_events_failed_total counter -test_eventbus_events_failed_total 2 - -# HELP test_eventbus_queue_size Current event queue size -# TYPE test_eventbus_queue_size gauge -test_eventbus_queue_size 1 - -# HELP test_eventbus_connections_total Active event bus backend connections -# TYPE test_eventbus_connections_total gauge -test_eventbus_connections_total 1 - -# HELP test_workflow_executions_active Number of active workflow executions -# TYPE test_workflow_executions_active gauge -test_workflow_executions_active 3 - -# HELP test_queue_job_attempts_total Total number of job attempts -# TYPE test_queue_job_attempts_total counter -test_queue_job_attempts_total{result="success"} 435 -test_queue_job_attempts_total{result="failed"} 12 - -# HELP test_workflow_started_total Total number of workflows started +# HELP test_workflow_started_total Total number of n8n.workflow.started events. # TYPE test_workflow_started_total counter -test_workflow_started_total 25634 -test_workflow_started_total{workflow_id="12",workflow_name="CRM Sync"} 8142 -test_workflow_started_total{workflow_id="25",workflow_name="Webhook Intake"} 14290 -test_workflow_started_total{workflow_id="33",workflow_name="Slack Alerts"} 2202 +test_workflow_started_total{workflow_id="testWorkflowOk"} 4 +test_workflow_started_total{workflow_id="testWorkflowFail"} 4 + +# HELP test_audit_workflow_executed_total Total number of n8n.audit.workflow.executed events. +# TYPE test_audit_workflow_executed_total counter +test_audit_workflow_executed_total{workflow_id="testWorkflowOk"} 4 +test_audit_workflow_executed_total{workflow_id="testWorkflowFail"} 4 -# HELP test_workflow_success_total Total number of workflows completed successfully +# HELP test_audit_workflow_resumed_total Total number of n8n.audit.workflow.resumed events. +# TYPE test_audit_workflow_resumed_total counter +test_audit_workflow_resumed_total{workflow_id="testWorkflowOk"} 1 + +# HELP test_audit_workflow_unarchived_total Total number of n8n.audit.workflow.unarchived events. +# TYPE test_audit_workflow_unarchived_total counter +test_audit_workflow_unarchived_total{workflow_id="testWorkflowOk"} 1 + +# HELP test_audit_workflow_updated_total Total number of n8n.audit.workflow.updated events. +# TYPE test_audit_workflow_updated_total counter +test_audit_workflow_updated_total{workflow_id="testWorkflowOk"} 1 + +# HELP test_audit_workflow_version_updated_total Total number of n8n.audit.workflow.version.updated events. +# TYPE test_audit_workflow_version_updated_total counter +test_audit_workflow_version_updated_total{workflow_id="testWorkflowOk"} 1 + +# HELP test_audit_workflow_waiting_total Total number of n8n.audit.workflow.waiting events. +# TYPE test_audit_workflow_waiting_total counter +test_audit_workflow_waiting_total{workflow_id="testWorkflowOk"} 1 + +# HELP test_workflow_success_total Total number of n8n.workflow.success events. # TYPE test_workflow_success_total counter -test_workflow_success_total 25209 -test_workflow_success_total{workflow_id="12",workflow_name="CRM Sync"} 8059 -test_workflow_success_total{workflow_id="25",workflow_name="Webhook Intake"} 14135 -test_workflow_success_total{workflow_id="33",workflow_name="Slack Alerts"} 2015 +test_workflow_success_total{workflow_id="testWorkflowOk"} 4 -# HELP test_workflow_failed_total Total number of workflows that failed +# HELP test_queue_job_completed_total Total number of n8n.queue.job.completed events. +# TYPE test_queue_job_completed_total counter +test_queue_job_completed_total 4 + +# HELP test_workflow_failed_total Total number of n8n.workflow.failed events. # TYPE test_workflow_failed_total counter -test_workflow_failed_total 425 -test_workflow_failed_total{workflow_id="12",workflow_name="CRM Sync"} 83 -test_workflow_failed_total{workflow_id="25",workflow_name="Webhook Intake"} 155 -test_workflow_failed_total{workflow_id="33",workflow_name="Slack Alerts"} 187 +test_workflow_failed_total{workflow_id="testWorkflowFail"} 4 +# HELP test_queue_job_failed_total Total number of n8n.queue.job.failed events. +# TYPE test_queue_job_failed_total counter +test_queue_job_failed_total 4 +# HELP test_queue_job_stalled_total Total number of n8n.queue.job.stalled events. +# TYPE test_queue_job_stalled_total counter +test_queue_job_stalled_total 1 +# HELP test_queue_job_dequeued_total Total number of n8n.queue.job.dequeued events. +# TYPE test_queue_job_dequeued_total counter +test_queue_job_dequeued_total 8 + +# HELP test_node_started_total Total number of n8n.node.started events. +# TYPE test_node_started_total counter +test_node_started_total{workflow_id="testWorkflowOk"} 8 +test_node_started_total{workflow_id="testWorkflowFail"} 8 + +# HELP test_node_finished_total Total number of n8n.node.finished events. +# TYPE test_node_finished_total counter +test_node_finished_total{workflow_id="testWorkflowOk"} 8 +test_node_finished_total{workflow_id="testWorkflowFail"} 8 + +# HELP test_runner_task_requested_total Total number of n8n.runner.task.requested events. +# TYPE test_runner_task_requested_total counter +test_runner_task_requested_total 4 + +# HELP test_expression_code_cache_hit_total Expression code cache hits. +# TYPE test_expression_code_cache_hit_total counter +test_expression_code_cache_hit_total 0 + +# HELP test_expression_code_cache_miss_total Expression code cache misses. +# TYPE test_expression_code_cache_miss_total counter +test_expression_code_cache_miss_total 0 + +# HELP test_expression_code_cache_eviction_total Expression code cache evictions. +# TYPE test_expression_code_cache_eviction_total counter +test_expression_code_cache_eviction_total 0 + +# HELP test_expression_code_cache_size Current expression code cache size. +# TYPE test_expression_code_cache_size gauge +test_expression_code_cache_size 0 + +# HELP test_expression_evaluation_duration_seconds Duration of VM-based expression evaluation in seconds. +# TYPE test_expression_evaluation_duration_seconds histogram +test_expression_evaluation_duration_seconds_bucket{le="0.0001",status="success",type="none"} 0 +test_expression_evaluation_duration_seconds_bucket{le="+Inf",status="success",type="none"} 0 +test_expression_evaluation_duration_seconds_count{status="success",type="none"} 0 +test_expression_evaluation_duration_seconds_sum{status="success",type="none"} 0 + +# HELP test_expression_pool_acquired_total Bridges acquired from the expression pool. +# TYPE test_expression_pool_acquired_total counter +test_expression_pool_acquired_total 0 + +# HELP test_expression_pool_replenish_failed_total Failed pool bridge replenishments. +# TYPE test_expression_pool_replenish_failed_total counter +test_expression_pool_replenish_failed_total 0 + +# HELP test_expression_pool_scaled_up_total Expression pool scaled up from idle. +# TYPE test_expression_pool_scaled_up_total counter +test_expression_pool_scaled_up_total 0 + +# HELP test_expression_pool_scaled_to_zero_total Expression pool scaled to zero after inactivity. +# TYPE test_expression_pool_scaled_to_zero_total counter +test_expression_pool_scaled_to_zero_total 0 +# HELP test_ai_document_processed_total Total number of test.ai.document.processed events. +# TYPE test_ai_document_processed_total counter +test_ai_document_processed_total 0 +# HELP test_ai_embeddings_embedded_document_total Total number of test.ai.embeddings.embedded.document events. +# TYPE test_ai_embeddings_embedded_document_total counter +test_ai_embeddings_embedded_document_total 0 +# HELP test_ai_embeddings_embedded_query_total Total number of test.ai.embeddings.embedded.query events. +# TYPE test_ai_embeddings_embedded_query_total counter +test_ai_embeddings_embedded_query_total 0 +# HELP test_ai_llm_error_total Total number of test.ai.llm.error events. +# TYPE test_ai_llm_error_total counter +test_ai_llm_error_total 0 +# HELP test_ai_llm_generated_total Total number of test.ai.llm.generated events. +# TYPE test_ai_llm_generated_total counter +test_ai_llm_generated_total 0 +# HELP test_ai_memory_added_message_total Total number of test.ai.memory.added.message events. +# TYPE test_ai_memory_added_message_total counter +test_ai_memory_added_message_total 0 +# HELP test_ai_memory_get_messages_total Total number of test.ai.memory.get.messages events. +# TYPE test_ai_memory_get_messages_total counter +test_ai_memory_get_messages_total 0 +# HELP test_ai_output_parser_parsed_total Total number of test.ai.output.parser.parsed events. +# TYPE test_ai_output_parser_parsed_total counter +test_ai_output_parser_parsed_total 0 +# HELP test_ai_retriever_get_relevant_documents_total Total number of test.ai.retriever.get.relevant.documents events. +# TYPE test_ai_retriever_get_relevant_documents_total counter +test_ai_retriever_get_relevant_documents_total 0 +# HELP test_ai_text_splitter_split_total Total number of test.ai.text.splitter.split events. +# TYPE test_ai_text_splitter_split_total counter +test_ai_text_splitter_split_total 0 +# HELP test_ai_tool_called_total Total number of test.ai.tool.called events. +# TYPE test_ai_tool_called_total counter +test_ai_tool_called_total 0 +# HELP test_ai_vector_store_populated_total Total number of test.ai.vector.store.populated events. +# TYPE test_ai_vector_store_populated_total counter +test_ai_vector_store_populated_total 0 +# HELP test_ai_vector_store_searched_total Total number of test.ai.vector.store.searched events. +# TYPE test_ai_vector_store_searched_total counter +test_ai_vector_store_searched_total 0 +# HELP test_ai_vector_store_updated_total Total number of test.ai.vector.store.updated events. +# TYPE test_ai_vector_store_updated_total counter +test_ai_vector_store_updated_total 0 +# HELP test_audit_execution_data_reveal_failure_total Total number of test.audit.execution.data.reveal.failure events. +# TYPE test_audit_execution_data_reveal_failure_total counter +test_audit_execution_data_reveal_failure_total 0 +# HELP test_audit_execution_data_revealed_total Total number of test.audit.execution.data.revealed events. +# TYPE test_audit_execution_data_revealed_total counter +test_audit_execution_data_revealed_total 0 +# HELP test_audit_package_deleted_total Total number of test.audit.package.deleted events. +# TYPE test_audit_package_deleted_total counter +test_audit_package_deleted_total 0 +# HELP test_audit_package_installed_total Total number of test.audit.package.installed events. +# TYPE test_audit_package_installed_total counter +test_audit_package_installed_total 0 +# HELP test_audit_package_updated_total Total number of test.audit.package.updated events. +# TYPE test_audit_package_updated_total counter +test_audit_package_updated_total 0 +# HELP test_audit_user_api_created_total Total number of test.audit.user.api.created events. +# TYPE test_audit_user_api_created_total counter +test_audit_user_api_created_total 0 +# HELP test_audit_user_api_deleted_total Total number of test.audit.user.api.deleted events. +# TYPE test_audit_user_api_deleted_total counter +test_audit_user_api_deleted_total 0 +# HELP test_audit_user_credentials_created_total Total number of test.audit.user.credentials.created events. +# TYPE test_audit_user_credentials_created_total counter +test_audit_user_credentials_created_total 0 +# HELP test_audit_user_credentials_deleted_total Total number of test.audit.user.credentials.deleted events. +# TYPE test_audit_user_credentials_deleted_total counter +test_audit_user_credentials_deleted_total 0 +# HELP test_audit_user_credentials_shared_total Total number of test.audit.user.credentials.shared events. +# TYPE test_audit_user_credentials_shared_total counter +test_audit_user_credentials_shared_total 0 +# HELP test_audit_user_credentials_updated_total Total number of test.audit.user.credentials.updated events. +# TYPE test_audit_user_credentials_updated_total counter +test_audit_user_credentials_updated_total 0 +# HELP test_audit_user_deleted_total Total number of test.audit.user.deleted events. +# TYPE test_audit_user_deleted_total counter +test_audit_user_deleted_total 0 +# HELP test_audit_user_email_failed_total Total number of test.audit.user.email.failed events. +# TYPE test_audit_user_email_failed_total counter +test_audit_user_email_failed_total 0 +# HELP test_audit_user_execution_deleted_total Total number of test.audit.user.execution.deleted events. +# TYPE test_audit_user_execution_deleted_total counter +test_audit_user_execution_deleted_total 0 +# HELP test_audit_user_invitation_accepted_total Total number of test.audit.user.invitation.accepted events. +# TYPE test_audit_user_invitation_accepted_total counter +test_audit_user_invitation_accepted_total 0 +# HELP test_audit_user_invited_total Total number of test.audit.user.invited events. +# TYPE test_audit_user_invited_total counter +test_audit_user_invited_total 0 +# HELP test_audit_user_login_failed_total Total number of test.audit.user.login.failed events. +# TYPE test_audit_user_login_failed_total counter +test_audit_user_login_failed_total 0 +# HELP test_audit_user_login_success_total Total number of test.audit.user.login.success events. +# TYPE test_audit_user_login_success_total counter +test_audit_user_login_success_total 0 +# HELP test_audit_user_mfa_disabled_total Total number of test.audit.user.mfa.disabled events. +# TYPE test_audit_user_mfa_disabled_total counter +test_audit_user_mfa_disabled_total 0 +# HELP test_audit_user_mfa_enabled_total Total number of test.audit.user.mfa.enabled events. +# TYPE test_audit_user_mfa_enabled_total counter +test_audit_user_mfa_enabled_total 0 +# HELP test_audit_user_reinvited_total Total number of test.audit.user.reinvited events. +# TYPE test_audit_user_reinvited_total counter +test_audit_user_reinvited_total 0 +# HELP test_audit_user_reset_total Total number of test.audit.user.reset events. +# TYPE test_audit_user_reset_total counter +test_audit_user_reset_total 0 +# HELP test_audit_user_reset_requested_total Total number of test.audit.user.reset.requested events. +# TYPE test_audit_user_reset_requested_total counter +test_audit_user_reset_requested_total 0 +# HELP test_audit_user_signedup_total Total number of test.audit.user.signedup events. +# TYPE test_audit_user_signedup_total counter +test_audit_user_signedup_total 0 +# HELP test_audit_user_updated_total Total number of test.audit.user.updated events. +# TYPE test_audit_user_updated_total counter +test_audit_user_updated_total 0 +# HELP test_audit_variable_created_total Total number of test.audit.variable.created events. +# TYPE test_audit_variable_created_total counter +test_audit_variable_created_total 0 +# HELP test_audit_variable_deleted_total Total number of test.audit.variable.deleted events. +# TYPE test_audit_variable_deleted_total counter +test_audit_variable_deleted_total 0 +# HELP test_audit_variable_updated_total Total number of test.audit.variable.updated events. +# TYPE test_audit_variable_updated_total counter +test_audit_variable_updated_total 0 +# HELP test_execution_throttled_total Total number of test.execution.throttled events. +# TYPE test_execution_throttled_total counter +test_execution_throttled_total 0 +# HELP test_runner_response_received_total Total number of test.runner.response.received events. +# TYPE test_runner_response_received_total counter +test_runner_response_received_total 0 +# HELP test_workflow_cancelled_total Total number of test.workflow.cancelled events. +# TYPE test_workflow_cancelled_total counter +test_workflow_cancelled_total 0 diff --git a/n8n/tests/lab/README.md b/n8n/tests/lab/README.md new file mode 100644 index 0000000000000..df1055ee17331 --- /dev/null +++ b/n8n/tests/lab/README.md @@ -0,0 +1,100 @@ +# n8n integration lab + +A long-running n8n simulation that pushes real metrics to a Datadog org so you can iterate on dashboards, monitors, and customer reports against live data. + +It uses a dedicated docker-compose (`tests/lab/docker-compose.yaml`) that pulls the integration test stack — queue mode, a worker, redis, the Datadog Agent — and bind-mounts both the test workflows and the lab workflows into `/workflows/`. `tests/conftest.py` (gated on the `N8N_IS_LAB` env var that the `lab` hatch env sets) imports and activates everything it finds there as part of `ddev env start`. Ports are hardcoded to `5678` (main) and `5680` (worker) since the lab owns the host. + +On top you get: + +- five lab-only workflows with distinct shapes (fast, slow, always-fail, flaky, multi-step chain), and +- an async traffic generator that drives a configurable webhook + REST API mix and reloads its config on the fly. + +## Setup + +### Datadog credentials + +The lab uses a `.ddev.toml` in this directory (already committed) to point at an `n8nlab` ddev org. Add the matching entry to your global `~/.ddev/config.toml`: + +```toml +[orgs.n8nlab] +api_key = "" +site = "datadoghq.com" +``` + +Use any org name you like; just keep `org = "n8nlab"` in `tests/lab/.ddev.toml` aligned with what you put in your global config. + +### Traffic configuration + +`tests/lab/config.yaml` controls the traffic mix. Probabilities are independent draws per tick, and values above `1.0` mean "more than one call per tick on average": + +```yaml +webhook_probabilities: + /webhook/lab/fast: 0.9 # bulk traffic, fast histogram bucket + /webhook/lab/slow: 0.4 # populates higher histogram buckets + /webhook/lab/fail: 0.15 # populates workflow_failed + /webhook/lab/flaky: 0.5 # mixed success/failure + /webhook/lab/chain: 0.3 # 4 Set nodes -> 4x node.* events +api_probabilities: + /healthz: 1.0 + /healthz/readiness: 0.5 + /rest/login: 0.2 # 401s +tick_seconds: 1.0 +reload_interval: 5 +``` + +Edit this file while the lab is running and the generator will pick it up on the next `reload_interval` tick. + +## Usage + +### One-shot (recommended) + +```bash +./tests/lab/run_lab.sh # default env: py3.13-2 (n8n 2.19.5) +./tests/lab/run_lab.sh -e py3.13-1 # n8n 1.118.1 +``` + +The script brings up the env (which goes through `dd_environment` and activates every mounted workflow) and starts the traffic generator. `Ctrl+C` triggers a `cleanup` trap that runs `lab:stop` to tear everything down. + +### Individual hatch commands + +```bash +hatch run lab:start -e py3.13-2 # ddev env start (lab compose; imports + activates workflows) +hatch run lab:generate # traffic loop (foreground; Ctrl+C to stop) +hatch run lab:stop -e py3.13-2 # ddev env stop +``` + +## What this exercises + +The lab is wired to populate every metric family the integration maps that does not require an SSO/embed flow: + +| Metric family | How the lab drives it | +| --- | --- | +| `n8n.workflow.started/.success/.failed.count` | every webhook hit goes through the EventBus | +| `n8n.workflow.execution.duration.seconds.*` (n8n 2.x) | the slow & chain workflows spread the histogram | +| `n8n.node.started/.finished.count` | the worker fires per-node events; the chain workflow yields 4× per call | +| `n8n.queue.job.enqueued/.dequeued/.completed/.failed.count` | queue mode is enabled in the test compose | +| `n8n.scaling.mode.queue.jobs.{active,waiting,completed,failed}` | main process tracks queue depth | +| `n8n.http.request.duration.seconds.*` | the API mix (`/healthz`, `/rest/login`) drives status code labels | +| `n8n.cache.hits/.misses/.updates.count` | cache traffic comes from n8n itself during execution | +| `n8n.last.activity` | refreshed on every API call | +| `n8n.{production,production.root,manual,enabled.users,users,workflows,credentials}.total` | enabled in the test compose via `N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS` | + +What it does **not** exercise (these need extra infra and are documented in the README "Version-specific metrics" section): + +- `n8n.token.exchange.*` and `n8n.embed.login.*` — require an SSO IdP / embed integration. +- `n8n.audit.workflow.*` — fire on UI-driven activate/deactivate; not currently driven by the generator. Future iteration could call the n8n REST API to toggle workflow active state on a slow timer. + +## Log pipeline validation + +The lab validates both n8n log surfaces: + +- The main and worker containers set `N8N_LOG_OUTPUT=console` and `N8N_LOG_FORMAT=json`. The Datadog autodiscovery labels on those services mark Docker stdout logs with `source:n8n`, so the n8n log pipeline can parse JSON application logs from both processes. +- The Agent also mounts the n8n data volume read-only at `/n8n-event-logs` and tails `/n8n-event-logs/n8nEventLog*.log` with `source:n8n`. This lets the same pipeline parse workflow, node, queue, runner, and audit event-bus logs. + +## Stopping the lab + +`Ctrl+C` from `run_lab.sh` cleans up automatically. If you ran the hatch commands directly: + +```bash +hatch run lab:stop -e py3.13-2 +``` diff --git a/n8n/tests/lab/config.yaml b/n8n/tests/lab/config.yaml new file mode 100644 index 0000000000000..9e1e2e76706b2 --- /dev/null +++ b/n8n/tests/lab/config.yaml @@ -0,0 +1,25 @@ +# n8n lab traffic configuration. Edit this file while the lab is running and +# changes are picked up every `reload_interval` seconds. + +# Probability of hitting each webhook on every traffic tick. Independent draws — +# multiple endpoints can fire on the same tick. Values can exceed 1.0 to issue +# multiple invocations per tick (e.g. 2.5 = 2 calls + a 50% chance of a third). +webhook_probabilities: + /webhook/lab/fast: 0.9 # bulk of the workflow_started counter and HTTP histogram + /webhook/lab/slow: 0.4 # Wait node spreads execution-duration buckets + /webhook/lab/fail: 0.15 # populates workflow_failed and node_finished{status="failed"} + /webhook/lab/flaky: 0.5 # mixed success/failure, ~30% fail rate + /webhook/lab/chain: 0.3 # 4 Set nodes => 4x node.started/finished events per call + +# Probability of hitting each REST API endpoint per tick. Used to drive the +# http_request_duration_seconds histogram across status code labels. +api_probabilities: + /healthz: 1.0 + /healthz/readiness: 0.5 + /rest/login: 0.2 # 401s — useful for status_code label coverage + +# How long to sleep between traffic ticks (seconds). +tick_seconds: 1.0 + +# Reload this file every N seconds (live config). +reload_interval: 5 diff --git a/n8n/tests/lab/docker-compose.yaml b/n8n/tests/lab/docker-compose.yaml new file mode 100644 index 0000000000000..eeeaf08405068 --- /dev/null +++ b/n8n/tests/lab/docker-compose.yaml @@ -0,0 +1,106 @@ +services: + redis: + image: redis:7-alpine + container_name: n8n-test-redis + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 5 + + n8n: + image: n8nio/n8n:${N8N_VERSION:-1.118.1} + container_name: n8n-test + # Datadog autodiscovery: tells the Agent (started by `ddev env start --base` + # with DD_LOGS_ENABLED=true) to tail this container's stdout/stderr as n8n + # logs. Without this label the Agent has DD_LOGS_ENABLED but doesn't know + # which containers to attach to. + labels: + com.datadoghq.ad.logs: '[{"source": "n8n", "service": "n8n-main", "type": "docker"}]' + ports: + # Hardcoded ports for the lab (we own the host) so the traffic generator, + # the README, and the agent config can all assume `localhost:5678`/`5680`. + - "5678:5678" + environment: + - EXECUTIONS_MODE=queue + - QUEUE_BULL_REDIS_HOST=redis + - QUEUE_BULL_REDIS_PORT=6379 + - N8N_LOG_LEVEL=info + - N8N_LOG_OUTPUT=console + - N8N_LOG_FORMAT=json + - N8N_HOST=0.0.0.0 + - N8N_PORT=5678 + - N8N_PROTOCOL=http + - N8N_BASIC_AUTH_ACTIVE=true + - N8N_BASIC_AUTH_USER=admin + - N8N_BASIC_AUTH_PASSWORD=admin + - N8N_DIAGNOSTICS_ENABLED=false + - N8N_VERSION_NOTIFICATIONS_ENABLED=false + - N8N_TEMPLATES_ENABLED=false + - N8N_RUNNERS_ENABLED=false + - N8N_METRICS=true + - N8N_METRICS_INCLUDE_DEFAULT_METRICS=true + - N8N_METRICS_INCLUDE_CACHE_METRICS=true + - N8N_METRICS_INCLUDE_MESSAGE_EVENT_BUS_METRICS=true + - N8N_METRICS_INCLUDE_API_ENDPOINTS=true + - N8N_METRICS_INCLUDE_QUEUE_METRICS=true + - N8N_METRICS_INCLUDE_WORKFLOW_ID_LABEL=true + - N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS=true + volumes: + - n8n_data:/home/node/.n8n + - ${N8N_LOG_FOLDER:-./logs}:/var/log/n8n + # Mount every workflow (test fixtures + lab fixtures) under a single + # /workflows/ tree so conftest can discover them with a single glob. + - ../docker/sample_workflow.json:/workflows/sample_workflow.json:ro + - ../docker/sample_workflow_failing.json:/workflows/sample_workflow_failing.json:ro + - ./workflows/lab_fast.json:/workflows/lab_fast.json:ro + - ./workflows/lab_slow.json:/workflows/lab_slow.json:ro + - ./workflows/lab_fail.json:/workflows/lab_fail.json:ro + - ./workflows/lab_flaky.json:/workflows/lab_flaky.json:ro + - ./workflows/lab_chain.json:/workflows/lab_chain.json:ro + depends_on: + redis: + condition: service_healthy + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:5678/healthz"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + + n8n-worker: + image: n8nio/n8n:${N8N_VERSION:-1.118.1} + container_name: n8n-test-worker + command: ["worker"] + labels: + com.datadoghq.ad.logs: '[{"source": "n8n", "service": "n8n-worker", "type": "docker"}]' + ports: + - "5680:5680" + environment: + - EXECUTIONS_MODE=queue + - QUEUE_BULL_REDIS_HOST=redis + - QUEUE_BULL_REDIS_PORT=6379 + - N8N_LOG_LEVEL=info + - N8N_LOG_OUTPUT=console + - N8N_LOG_FORMAT=json + - N8N_RUNNERS_ENABLED=false + - N8N_METRICS=true + - N8N_METRICS_INCLUDE_DEFAULT_METRICS=true + - N8N_METRICS_INCLUDE_CACHE_METRICS=true + - N8N_METRICS_INCLUDE_MESSAGE_EVENT_BUS_METRICS=true + - N8N_METRICS_INCLUDE_API_ENDPOINTS=true + - N8N_METRICS_INCLUDE_QUEUE_METRICS=true + - N8N_METRICS_INCLUDE_WORKFLOW_ID_LABEL=true + - N8N_METRICS_INCLUDE_WORKFLOW_STATISTICS=true + - QUEUE_HEALTH_CHECK_ACTIVE=true + - QUEUE_HEALTH_CHECK_PORT=5680 + volumes: + - n8n_data:/home/node/.n8n + depends_on: + n8n: + condition: service_healthy + +volumes: + n8n_data: + name: n8n_lab_data + driver: local diff --git a/n8n/tests/lab/run_lab.sh b/n8n/tests/lab/run_lab.sh new file mode 100755 index 0000000000000..5e8d56e5ce525 --- /dev/null +++ b/n8n/tests/lab/run_lab.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -e + +ORIGINAL_DIR=$(pwd) +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ENV="py3.13-2" + +while [[ $# -gt 0 ]]; do + case $1 in + -e|--env) + ENV="$2" + shift 2 + ;; + -h|--help) + echo "Usage: $0 [-e|--env ENV]" + echo "" + echo "Options:" + echo " -e, --env ENV ddev environment to use (default: py3.13-2)" + echo " -h, --help Show this help message" + exit 0 + ;; + *) + echo "Unknown option: $1" + echo "Use --help for usage information" + exit 1 + ;; + esac +done + +cleanup() { + echo "" + echo "Cleaning up..." + cd "$SCRIPT_DIR" + hatch run lab:stop -e "$ENV" || true + cd "$ORIGINAL_DIR" + exit 0 +} + +# `lab:generate` runs through `hatch`, which traps SIGINT itself, so we +# install our own EXIT trap to make sure `lab:stop` always runs even on Ctrl+C. +trap cleanup EXIT + +cd "$SCRIPT_DIR" +hatch run lab:start -e "$ENV" + +echo "Starting traffic (Ctrl+C to stop)..." +hatch run lab:generate diff --git a/n8n/tests/lab/traffic_generator.py b/n8n/tests/lab/traffic_generator.py new file mode 100644 index 0000000000000..fd93a0c9794a3 --- /dev/null +++ b/n8n/tests/lab/traffic_generator.py @@ -0,0 +1,249 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""n8n lab traffic generator. + +The lab brings up a dedicated docker-compose (``tests/lab/docker-compose.yaml``) +that bind-mounts both the test fixtures and the lab workflow JSONs under +``/workflows/``. ``tests/conftest.py`` (gated on ``N8N_IS_LAB``) imports and +activates every workflow it finds there as part of ``ddev env start --base``, +so by the time this generator runs the webhooks are already live. + +Ports are hardcoded in the lab compose, so the generator can assume +``localhost:5678`` and skip the dynamic discovery the integration tests need. +""" + +from __future__ import annotations + +import asyncio +import random +import signal +import subprocess +import sys +import time +from pathlib import Path +from typing import Any + +import click +import httpx +import yaml +from rich.console import Console +from rich.table import Table + +ConfigDict = dict[str, Any] +LAB_DIR = Path(__file__).resolve().parent +CONFIG_PATH = LAB_DIR / "config.yaml" + +MAIN_BASE_URL = "http://localhost:5678" + + +def _load_config(path: Path, fallback: ConfigDict) -> tuple[ConfigDict, str]: + """Read ``path`` and return ``(new_config, error_message)``. + + On any failure the ``fallback`` config is returned unchanged together with a non-empty + error message describing why the reload was rejected. + """ + try: + with open(path) as f: + data = yaml.safe_load(f) or {} + except FileNotFoundError: + return fallback, f"Reload failed: config file {path} not found. Lab still running with previous config." + except yaml.YAMLError as exc: + return fallback, f"Reload failed: cannot parse {path} ({exc}). Lab still running with previous config." + + if not isinstance(data, dict): + return fallback, ( + f"Reload failed: {path} must be a mapping at the top level. Lab still running with previous config." + ) + + return data, "" + + +def _wait_for_endpoint(url: str, *, timeout: int = 90) -> None: + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + try: + if httpx.get(url, timeout=2).status_code == 200: + return + except httpx.RequestError: + pass + time.sleep(2) + raise RuntimeError(f"Endpoint {url} never became reachable") + + +def _make_output_table() -> Table: + table = Table(show_header=False, box=None, show_edge=False) + table.add_column("Timestamp", style="dim", width=20) + table.add_column("Scenario", width=10) + table.add_column("Endpoint", width=28) + table.add_column("Status", justify="right", width=14) + table.add_column("Latency (ms)", justify="right", width=14) + return table + + +def _print_row(console: Console, ts: str, scenario: str, target: str, status: str, latency_ms: str) -> None: + table = _make_output_table() + table.add_row(ts, scenario, target, status, latency_ms) + console.print(table) + + +async def _hit(client: httpx.AsyncClient, console: Console, scenario: str, path: str) -> None: + url = f"{MAIN_BASE_URL}{path}" + ts = time.strftime("%H:%M:%S") + start = time.perf_counter() + try: + resp = await client.get(url, timeout=10.0) + latency_ms = f"{(time.perf_counter() - start) * 1000:.0f}" + style = "green" if 200 <= resp.status_code < 400 else "red" + _print_row(console, ts, scenario, path, f"[{style}]{resp.status_code}[/]", latency_ms) + except httpx.TimeoutException: + _print_row(console, ts, scenario, path, "[bold yellow]TIMEOUT[/]", "") + except httpx.RequestError as exc: + _print_row(console, ts, scenario, path, f"[bold red]ERR[/] {type(exc).__name__}", "") + + +def _draws(probability: float) -> int: + """Return the number of times an event should fire this tick. + + ``probability`` is interpreted as expected count: ``2.5`` => 2 firings + a + 50% chance of a third. Values <= 1 act like a single Bernoulli trial. + """ + whole = int(probability) + fractional = probability - whole + extra = 1 if random.random() < fractional else 0 + return whole + extra + + +async def _config_reloader( + path: Path, console: Console, shutdown_event: asyncio.Event, state: dict[str, ConfigDict] +) -> None: + while not shutdown_event.is_set(): + new_config, error = _load_config(path, state["current"]) + if error: + console.print(f"[bold yellow]{error}[/bold yellow]") + elif new_config != state["current"]: + state["current"] = new_config + console.print(f"[bold cyan]Reloaded config from {path}[/bold cyan]") + try: + await asyncio.wait_for(shutdown_event.wait(), timeout=float(state["current"].get("reload_interval", 5))) + except asyncio.TimeoutError: + pass + + +async def _run_traffic(console: Console) -> None: + shutdown_event = asyncio.Event() + loop = asyncio.get_running_loop() + for sig in (signal.SIGINT, signal.SIGTERM): + loop.add_signal_handler(sig, shutdown_event.set) + + initial, error = _load_config(CONFIG_PATH, {}) + if error: + console.print(f"[bold red]{error}[/bold red]") + sys.exit(1) + state: dict[str, ConfigDict] = {"current": initial} + + console.print( + f"[dim]Traffic config: {CONFIG_PATH}\n" + f"n8n base URL: {MAIN_BASE_URL}\n" + "Edit config.yaml while the lab runs to change the mix.[/dim]\n" + ) + + reloader = asyncio.create_task(_config_reloader(CONFIG_PATH, console, shutdown_event, state)) + async with httpx.AsyncClient() as client: + try: + while not shutdown_event.is_set(): + current = state["current"] + tasks = [] + for path, probability in (current.get("webhook_probabilities") or {}).items(): + for _ in range(_draws(float(probability))): + tasks.append(_hit(client, console, "webhook", path)) + for path, probability in (current.get("api_probabilities") or {}).items(): + for _ in range(_draws(float(probability))): + tasks.append(_hit(client, console, "api", path)) + if tasks: + await asyncio.gather(*tasks, return_exceptions=True) + try: + await asyncio.wait_for( + shutdown_event.wait(), + timeout=float(state["current"].get("tick_seconds", 1.0)), + ) + except asyncio.TimeoutError: + pass + finally: + reloader.cancel() + try: + await reloader + except asyncio.CancelledError: + pass + + +@click.group() +def cli() -> None: + """n8n traffic lab commands.""" + + +@cli.command() +@click.option("-e", "--env", default="py3.13-2", help="ddev env name to start (matches hatch matrix entry).") +def start(env: str) -> None: + """Bring up the n8n lab compose + Datadog Agent. + + The lab compose bind-mounts the lab + test workflow JSONs under ``/workflows/``, + and ``tests/conftest.py`` (in lab mode, gated on ``N8N_IS_LAB``) imports and + activates them as part of ``ddev env start``. This command therefore does + nothing fancy — it just hands off to ddev. + """ + console = Console() + console.print(f"[bold cyan]Starting environment {env} via ddev (this also starts the Agent)...[/bold cyan]") + rc = subprocess.call( + [ + "ddev", + "env", + "start", + "n8n", + "--base", + env, + "-e", + "DD_LOGS_ENABLED=true", + # Attach stdout tailers via Docker autodiscovery. Event-bus file logs are + # configured in ``tests/conftest.py`` through the lab-only ``logs`` block. + "-e", + "DD_LOGS_CONFIG_CONTAINER_COLLECT_ALL=true", + ] + ) + if rc != 0: + console.print(f"[bold red]ddev env start failed (exit {rc})[/bold red]") + sys.exit(rc) + + _wait_for_endpoint(f"{MAIN_BASE_URL}/healthz") + console.print( + "\n[bold green]Lab is up.[/bold green] " + "Run [bold]hatch run lab:generate[/bold] to start traffic, " + "[bold]hatch run lab:stop[/bold] to tear down." + ) + + +@cli.command() +def generate() -> None: + """Drive a continuous, configurable traffic mix against the running lab.""" + console = Console() + try: + asyncio.run(_run_traffic(console)) + except KeyboardInterrupt: + console.print("\n[bold yellow]Traffic stopped.[/bold yellow]") + + +@cli.command() +@click.option("-e", "--env", default="py3.13-2", help="ddev env name to stop.") +def stop(env: str) -> None: + """Tear down the lab environment.""" + console = Console() + console.print(f"[bold cyan]Stopping environment {env}...[/bold cyan]") + rc = subprocess.call(["ddev", "env", "stop", "n8n", env]) + if rc != 0: + console.print(f"[bold red]ddev env stop failed (exit {rc})[/bold red]") + sys.exit(rc) + console.print("[bold green]Lab stopped.[/bold green]") + + +if __name__ == "__main__": + cli() diff --git a/n8n/tests/lab/workflows/lab_chain.json b/n8n/tests/lab/workflows/lab_chain.json new file mode 100644 index 0000000000000..27c8279d63dd1 --- /dev/null +++ b/n8n/tests/lab/workflows/lab_chain.json @@ -0,0 +1,82 @@ +{ + "id": "labLongChain", + "versionId": "10000000-0000-0000-0000-000000000005", + "name": "Lab Long Chain", + "nodes": [ + { + "parameters": { + "httpMethod": "GET", + "path": "lab/chain", + "responseMode": "lastNode", + "options": {} + }, + "id": "11111111-0000-0000-0000-000000000005", + "name": "Webhook", + "type": "n8n-nodes-base.webhook", + "typeVersion": 2, + "position": [240, 300], + "webhookId": "lab-chain-aaaa-bbbb-cccc-000000000005" + }, + { + "parameters": { + "assignments": { + "assignments": [{"id": "1", "name": "step", "value": "one", "type": "string"}] + }, + "options": {} + }, + "id": "22222222-0000-0000-0000-000000000005", + "name": "Step 1", + "type": "n8n-nodes-base.set", + "typeVersion": 3.4, + "position": [460, 300] + }, + { + "parameters": { + "assignments": { + "assignments": [{"id": "2", "name": "step", "value": "two", "type": "string"}] + }, + "options": {} + }, + "id": "33333333-0000-0000-0000-000000000005", + "name": "Step 2", + "type": "n8n-nodes-base.set", + "typeVersion": 3.4, + "position": [680, 300] + }, + { + "parameters": { + "assignments": { + "assignments": [{"id": "3", "name": "step", "value": "three", "type": "string"}] + }, + "options": {} + }, + "id": "44444444-0000-0000-0000-000000000005", + "name": "Step 3", + "type": "n8n-nodes-base.set", + "typeVersion": 3.4, + "position": [900, 300] + }, + { + "parameters": { + "assignments": { + "assignments": [{"id": "4", "name": "scenario", "value": "chain", "type": "string"}] + }, + "options": {} + }, + "id": "55555555-0000-0000-0000-000000000005", + "name": "Step 4", + "type": "n8n-nodes-base.set", + "typeVersion": 3.4, + "position": [1120, 300] + } + ], + "connections": { + "Webhook": {"main": [[{"node": "Step 1", "type": "main", "index": 0}]]}, + "Step 1": {"main": [[{"node": "Step 2", "type": "main", "index": 0}]]}, + "Step 2": {"main": [[{"node": "Step 3", "type": "main", "index": 0}]]}, + "Step 3": {"main": [[{"node": "Step 4", "type": "main", "index": 0}]]} + }, + "active": false, + "settings": {"executionOrder": "v1"}, + "pinData": {} +} diff --git a/n8n/tests/lab/workflows/lab_fail.json b/n8n/tests/lab/workflows/lab_fail.json new file mode 100644 index 0000000000000..327cb511ced65 --- /dev/null +++ b/n8n/tests/lab/workflows/lab_fail.json @@ -0,0 +1,38 @@ +{ + "id": "labAlwaysFail", + "versionId": "10000000-0000-0000-0000-000000000003", + "name": "Lab Always Fail", + "nodes": [ + { + "parameters": { + "httpMethod": "GET", + "path": "lab/fail", + "responseMode": "lastNode", + "options": {} + }, + "id": "11111111-0000-0000-0000-000000000003", + "name": "Webhook", + "type": "n8n-nodes-base.webhook", + "typeVersion": 2, + "position": [240, 300], + "webhookId": "lab-fail-aaaa-bbbb-cccc-000000000003" + }, + { + "parameters": { + "language": "javaScript", + "jsCode": "throw new Error('intentional lab failure');" + }, + "id": "22222222-0000-0000-0000-000000000003", + "name": "Code", + "type": "n8n-nodes-base.code", + "typeVersion": 2, + "position": [460, 300] + } + ], + "connections": { + "Webhook": {"main": [[{"node": "Code", "type": "main", "index": 0}]]} + }, + "active": false, + "settings": {"executionOrder": "v1"}, + "pinData": {} +} diff --git a/n8n/tests/lab/workflows/lab_fast.json b/n8n/tests/lab/workflows/lab_fast.json new file mode 100644 index 0000000000000..ebada5057e67b --- /dev/null +++ b/n8n/tests/lab/workflows/lab_fast.json @@ -0,0 +1,42 @@ +{ + "id": "labFastSuccess", + "versionId": "10000000-0000-0000-0000-000000000001", + "name": "Lab Fast Success", + "nodes": [ + { + "parameters": { + "httpMethod": "GET", + "path": "lab/fast", + "responseMode": "lastNode", + "options": {} + }, + "id": "11111111-0000-0000-0000-000000000001", + "name": "Webhook", + "type": "n8n-nodes-base.webhook", + "typeVersion": 2, + "position": [240, 300], + "webhookId": "lab-fast-aaaa-bbbb-cccc-000000000001" + }, + { + "parameters": { + "assignments": { + "assignments": [ + {"id": "1", "name": "scenario", "value": "fast", "type": "string"} + ] + }, + "options": {} + }, + "id": "22222222-0000-0000-0000-000000000001", + "name": "Set", + "type": "n8n-nodes-base.set", + "typeVersion": 3.4, + "position": [460, 300] + } + ], + "connections": { + "Webhook": {"main": [[{"node": "Set", "type": "main", "index": 0}]]} + }, + "active": false, + "settings": {"executionOrder": "v1"}, + "pinData": {} +} diff --git a/n8n/tests/lab/workflows/lab_flaky.json b/n8n/tests/lab/workflows/lab_flaky.json new file mode 100644 index 0000000000000..2485f7646ecd1 --- /dev/null +++ b/n8n/tests/lab/workflows/lab_flaky.json @@ -0,0 +1,38 @@ +{ + "id": "labFlaky", + "versionId": "10000000-0000-0000-0000-000000000004", + "name": "Lab Flaky", + "nodes": [ + { + "parameters": { + "httpMethod": "GET", + "path": "lab/flaky", + "responseMode": "lastNode", + "options": {} + }, + "id": "11111111-0000-0000-0000-000000000004", + "name": "Webhook", + "type": "n8n-nodes-base.webhook", + "typeVersion": 2, + "position": [240, 300], + "webhookId": "lab-flaky-aaaa-bbbb-cccc-000000000004" + }, + { + "parameters": { + "language": "javaScript", + "jsCode": "if (Math.random() < 0.3) { throw new Error('flaky lab failure'); } return [{json: {ok: true, scenario: 'flaky'}}];" + }, + "id": "22222222-0000-0000-0000-000000000004", + "name": "Code", + "type": "n8n-nodes-base.code", + "typeVersion": 2, + "position": [460, 300] + } + ], + "connections": { + "Webhook": {"main": [[{"node": "Code", "type": "main", "index": 0}]]} + }, + "active": false, + "settings": {"executionOrder": "v1"}, + "pinData": {} +} diff --git a/n8n/tests/lab/workflows/lab_slow.json b/n8n/tests/lab/workflows/lab_slow.json new file mode 100644 index 0000000000000..b8adbbcd2d204 --- /dev/null +++ b/n8n/tests/lab/workflows/lab_slow.json @@ -0,0 +1,54 @@ +{ + "id": "labSlowSuccess", + "versionId": "10000000-0000-0000-0000-000000000002", + "name": "Lab Slow Success", + "nodes": [ + { + "parameters": { + "httpMethod": "GET", + "path": "lab/slow", + "responseMode": "lastNode", + "options": {} + }, + "id": "11111111-0000-0000-0000-000000000002", + "name": "Webhook", + "type": "n8n-nodes-base.webhook", + "typeVersion": 2, + "position": [240, 300], + "webhookId": "lab-slow-aaaa-bbbb-cccc-000000000002" + }, + { + "parameters": { + "amount": 500, + "unit": "ms" + }, + "id": "22222222-0000-0000-0000-000000000002", + "name": "Wait", + "type": "n8n-nodes-base.wait", + "typeVersion": 1, + "position": [460, 300] + }, + { + "parameters": { + "assignments": { + "assignments": [ + {"id": "1", "name": "scenario", "value": "slow", "type": "string"} + ] + }, + "options": {} + }, + "id": "33333333-0000-0000-0000-000000000002", + "name": "Set", + "type": "n8n-nodes-base.set", + "typeVersion": 3.4, + "position": [680, 300] + } + ], + "connections": { + "Webhook": {"main": [[{"node": "Wait", "type": "main", "index": 0}]]}, + "Wait": {"main": [[{"node": "Set", "type": "main", "index": 0}]]} + }, + "active": false, + "settings": {"executionOrder": "v1"}, + "pinData": {} +} diff --git a/n8n/tests/test_e2e.py b/n8n/tests/test_e2e.py index 2571135ebce6a..d16b74c257117 100644 --- a/n8n/tests/test_e2e.py +++ b/n8n/tests/test_e2e.py @@ -1,13 +1,29 @@ # (C) Datadog, Inc. 2026-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +from typing import Any, Callable + +import pytest + from datadog_checks.dev.utils import assert_service_checks +from . import common + -def test_check_n8n_e2e(dd_agent_check, instance): - aggregator = dd_agent_check(instance, rate=True) +@pytest.mark.e2e +def test_check_n8n_e2e( + dd_agent_check: Callable[..., Any], +): + aggregator = dd_agent_check(rate=True) - # Assert the readiness check metric is present with status_code tag - aggregator.assert_metric('n8n.readiness.check', value=1, tags=["status_code:200"], at_least=1) + aggregator.assert_metric('n8n.readiness.check', value=1, tags=['status_code:200', 'n8n_process:main'], at_least=1) + # Worker also exposes /healthz/readiness via QUEUE_HEALTH_CHECK_ACTIVE on its own port. + aggregator.assert_metric('n8n.readiness.check', value=1, tags=['status_code:200', 'n8n_process:worker'], at_least=1) + aggregator.assert_metrics_using_metadata( + common.get_metadata_metrics_for_version(exclude_rare=True), + check_submission_type=True, + check_symmetric_inclusion=True, + exclude=list(common.RARE_EVENT_METRIC_NAMES), + ) assert_service_checks(aggregator) diff --git a/n8n/tests/test_integration.py b/n8n/tests/test_integration.py new file mode 100644 index 0000000000000..18bd90a2bef74 --- /dev/null +++ b/n8n/tests/test_integration.py @@ -0,0 +1,62 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from typing import Any, Callable + +import pytest + +from datadog_checks.base.stubs.aggregator import AggregatorStub +from datadog_checks.n8n import N8nCheck + +from . import common + +pytestmark = [pytest.mark.usefixtures('dd_environment'), pytest.mark.integration] + + +def _run_check_twice(instance: dict[str, Any], dd_run_check: Callable[[N8nCheck], Any]) -> N8nCheck: + check = N8nCheck('n8n', {}, [instance]) + # First run primes any one-shot/cached metrics; the second exercises the steady state. + dd_run_check(check) + dd_run_check(check) + return check + + +@pytest.fixture +def warmed_main( + instance: dict[str, Any], + dd_run_check: Callable[[N8nCheck], Any], + aggregator: AggregatorStub, +) -> N8nCheck: + return _run_check_twice(instance, dd_run_check) + + +@pytest.fixture +def warmed_both( + instance: dict[str, Any], + worker_instance: dict[str, Any], + dd_run_check: Callable[[N8nCheck], Any], + aggregator: AggregatorStub, +) -> AggregatorStub: + """Run the check against both the main and worker /metrics endpoints into one aggregator.""" + _run_check_twice(instance, dd_run_check) + _run_check_twice(worker_instance, dd_run_check) + return aggregator + + +def test_all_metadata_metrics_emitted(warmed_both: AggregatorStub): + """Across main and worker, every metadata metric for this n8n version is emitted.""" + # ``exclude`` skips the rare-event metrics from the submitted-side iteration (live + # containers may or may not produce samples for them depending on timing); the + # ``exclude_rare=True`` metadata subset symmetrically drops them from the expected + # set so check_symmetric_inclusion stays stable in both directions. + warmed_both.assert_metrics_using_metadata( + common.get_metadata_metrics_for_version(exclude_rare=True), + check_submission_type=True, + check_symmetric_inclusion=True, + exclude=list(common.RARE_EVENT_METRIC_NAMES), + ) + + +def test_readiness_check_metric(warmed_main: N8nCheck, aggregator: AggregatorStub): + aggregator.assert_metric('n8n.readiness.check', value=1, tags=['status_code:200', 'n8n_process:main'], at_least=1) diff --git a/n8n/tests/test_unit.py b/n8n/tests/test_unit.py index cc3b314428044..df5ce1f31c039 100644 --- a/n8n/tests/test_unit.py +++ b/n8n/tests/test_unit.py @@ -2,91 +2,116 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +from typing import Any, Callable from unittest import mock -from datadog_checks.dev.utils import get_metadata_metrics +import pytest +import requests + +from datadog_checks.base.stubs.aggregator import AggregatorStub +from datadog_checks.base.stubs.datadog_agent import DatadogAgentStub from datadog_checks.n8n import N8nCheck from . import common - -def test_unit_metrics(dd_run_check, instance, aggregator, mock_http_response): - mock_http_response(file_path=common.get_fixture_path('n8n.txt')) +pytestmark = pytest.mark.unit + + +@pytest.mark.parametrize( + 'fixture, extra_instance', + [ + pytest.param('n8n.txt', {}, id='default-prefix'), + pytest.param('n8n_custom.txt', {'raw_metric_prefix': 'test_'}, id='custom-prefix'), + ], +) +def test_check_emits_metrics_as_in_metadata( + dd_run_check: Callable[[N8nCheck], Any], + aggregator: AggregatorStub, + mock_http_response: Callable[..., Any], + fixture: str, + extra_instance: dict[str, Any], +): + # The fixtures are a static capture of n8n@2.19.5; the assertion is version-pinned + # to major=2 regardless of which hatch matrix leg runs the unit tier. + mock_http_response(file_path=common.get_fixture_path(fixture)) + instance: dict[str, Any] = {'openmetrics_endpoint': 'http://localhost:5678/metrics', **extra_instance} check = N8nCheck('n8n', {}, [instance]) - dd_run_check(check) + with mock.patch.object(N8nCheck, '_check_n8n_readiness', return_value=None): + dd_run_check(check) - for metric in common.TEST_METRICS: - aggregator.assert_metric(metric) - aggregator.assert_all_metrics_covered() - aggregator.assert_metrics_using_metadata(get_metadata_metrics()) + aggregator.assert_metrics_using_metadata( + common.get_openmetrics_metadata_metrics(major=2), + check_submission_type=True, + check_symmetric_inclusion=True, + ) -def test_metrics_custom_prefx(dd_run_check, aggregator, mock_http_response): - mock_http_response(file_path=common.get_fixture_path('n8n_custom.txt')) - instance = { - 'openmetrics_endpoint': 'http://localhost:5678/metrics', - 'raw_metric_prefix': 'test_', - } +@pytest.fixture +def initialized_check(instance: dict[str, Any]) -> N8nCheck: check = N8nCheck('n8n', {}, [instance]) - dd_run_check(check) - - for metric in common.TEST_METRICS: - aggregator.assert_metric(metric) - aggregator.assert_all_metrics_covered() - aggregator.assert_metrics_using_metadata(get_metadata_metrics()) - - -def test_readiness_check_ready(aggregator, instance): + check.load_configuration_models() + return check + + +@pytest.mark.parametrize( + 'status_code, expected_value', + [ + pytest.param(200, 1, id='ready_200'), + pytest.param(204, 1, id='ready_204'), + pytest.param(299, 1, id='ready_299_edge'), + pytest.param(300, 0, id='not_ready_3xx'), + pytest.param(503, 0, id='not_ready_503'), + ], +) +def test_readiness_check( + aggregator: AggregatorStub, + initialized_check: N8nCheck, + status_code: int, + expected_value: int, +): with mock.patch( 'requests.Session.get', - return_value=mock.Mock(ok=True, status_code=200), + return_value=mock.Mock(ok=expected_value == 1, status_code=status_code), ): - check = N8nCheck('n8n', {}, [instance]) - check._check_n8n_readiness() + initialized_check._check_n8n_readiness() - # Assert metric value is 1 (ready) with status_code:200 tag - aggregator.assert_metric('n8n.readiness.check', value=1, tags=['status_code:200']) + aggregator.assert_metric( + 'n8n.readiness.check', + value=expected_value, + tags=['n8n_process:main', f'status_code:{status_code}'], + ) -def test_readiness_check_not_ready(aggregator, instance): - with mock.patch( - 'requests.Session.get', - return_value=mock.Mock(ok=False, status_code=503), - ): - check = N8nCheck('n8n', {}, [instance]) - check._check_n8n_readiness() - - # Assert metric value is 0 (not ready) with status_code:503 tag - aggregator.assert_metric('n8n.readiness.check', value=0, tags=['status_code:503']) +def test_readiness_check_unreachable(aggregator: AggregatorStub, initialized_check: N8nCheck): + with mock.patch('requests.Session.get', side_effect=requests.ConnectionError('boom')): + initialized_check._check_n8n_readiness() + aggregator.assert_metric('n8n.readiness.check', value=0, tags=['n8n_process:main', 'status_code:none']) -def test_readiness_check_no_status_code(aggregator, instance): - with mock.patch( - 'requests.Session.get', - return_value=mock.Mock(ok=False, status_code=None), - ): - check = N8nCheck('n8n', {}, [instance]) - check._check_n8n_readiness() - # Assert metric value is 0 (not ready) with status_code:null tag - aggregator.assert_metric('n8n.readiness.check', value=0, tags=['status_code:null']) +def test_readiness_uses_endpoint_host_not_metrics_path(initialized_check: N8nCheck): + """The readiness endpoint must be derived from the host, not appended to /metrics.""" + expected = f'http://{common.HOST}:{common.MAIN_PORT}/healthz/readiness' + assert initialized_check._readiness_endpoint == expected -def test_version_metadata(datadog_agent, dd_run_check, mock_http_response, instance): - """ - Test version metadata collection from Prometheus metrics - """ +def test_version_metadata( + datadog_agent: DatadogAgentStub, + dd_run_check: Callable[[N8nCheck], Any], + mock_http_response: Callable[..., Any], + instance: dict[str, Any], +): mock_http_response(file_path=common.get_fixture_path('n8n.txt')) check = N8nCheck('n8n', {}, [instance]) check.check_id = 'n8n_test' - dd_run_check(check) - # Version from fixture: n8n_version_info{version="v1.117.2",major="1",minor="117",patch="2"} 1 + with mock.patch.object(N8nCheck, '_check_n8n_readiness', return_value=None): + dd_run_check(check) version_metadata = { 'version.scheme': 'semver', - 'version.major': '1', - 'version.minor': '117', - 'version.patch': '2', - 'version.raw': 'v1.117.2', + 'version.major': '2', + 'version.minor': '19', + 'version.patch': '5', + 'version.raw': 'v2.19.5', } datadog_agent.assert_metadata('n8n_test', version_metadata)