diff --git a/addons/monitoring/README.md b/addons/monitoring/README.md new file mode 100644 index 00000000..ffdb9e21 --- /dev/null +++ b/addons/monitoring/README.md @@ -0,0 +1,51 @@ +# Database Monitoring Add-On +## Purpose +The add-on tracks database connection usage and sends an email alert when usage reaches 70% of `max_connections`. Alerts are stateful (sent once per state change) to avoid spam. Emails include detailed metrics. + +## Key features +- Monitors connection usage (Usage = Threads / max_connections). +- Stateful notifications (one email per state change): + - `OK` — Usage < 70% (back to normal). + - `THRESHOLD` — Usage ≥ 70% (threshold exceeded). + - `STATUS_ERROR` — failed to get `mysqladmin status`. + - `MAXCONN_ERROR` — failed to get `max_connections` (`SHOW VARIABLES`). +- Metrics from `mysqladmin status` and `SHOW VARIABLES LIKE 'max_connections'`. +- Configurable schedule (Quartz cron) via add-on settings: 5, 10, 15, 20, 30, 40, 50 minutes. +- Runs on all `sqldb` nodes (group execution). + +## How it works +1. Install: + - Downloads `/usr/local/sbin/db-monitoring.sh` to all `sqldb` nodes. + - Creates a runner script `db-monitoring.js` that invokes the shell script on the `sqldb` group, passing `USER_SESSION` and `USER_EMAIL`. + - Installs a system cron job `/etc/cron.d/db-monitoring` and sets interval via `setSchedulerInterval` (every `N` minutes): `*/N * * * * root /usr/local/sbin/db-monitoring.sh check`. +2. Runtime: + - Reads DB credentials from `/.jelenv`: `REPLICA_USER`/`REPLICA_PSWD`. + - Collects metrics: `mysqladmin status` and `SHOW VARIABLES LIKE 'max_connections'`. + - Calculates usage and determines state (OK/THRESHOLD/ERROR). + - Stores the last state in `/var/tmp/db-monitoring.status` and sends email only on state changes. + - On state change, triggers platform event `onCustomNodeEvent [name:executeScript]`, which calls the runner script and sends the email. + - Logs to `/var/log/db-monitoring.log`. + +## Email content and metrics +Emails are HTML with bold labels and `
` line breaks. Included: +- Status: + - Uptime — node uptime (days/hours/minutes). + - Threads — current number of active connections. + - Slow queries — number of slow queries. + - Open tables — tables currently open. + - Queries per second avg — average queries per second since start. +- max_connections — maximum concurrent connections. +- Current threads (connections) — current connections count. +- Usage — share of used connections (percent). +- Timestamp — report time. + +## Logs and artifacts +- Monitoring log: `/var/log/db-monitoring.log` (start/finish, email send, errors). +- State file: `/var/tmp/db-monitoring.status` (last state to suppress duplicate emails). +- Cron: `/etc/cron.d/db-monitoring` (interval managed by the add-on). + +## Configuration +- Monitoring interval is controlled in the add-on settings (5/10/15/20/30/40/50 minutes). +- The add-on updates cron with `setSchedulerInterval` to `*/N * * * *`. +- Email sending uses platform messaging API and requires valid `session` and `userEmail`, which are passed by the add-on during event handling. + diff --git a/addons/monitoring/manifest.yml b/addons/monitoring/manifest.yml new file mode 100644 index 00000000..52e0d6ce --- /dev/null +++ b/addons/monitoring/manifest.yml @@ -0,0 +1,154 @@ +type: update +name: Database Monitoring +id: db-monitoring + +description: + text: The Database Monitoring add-on tracks MySQL/MariaDB/Percona connection usage and sends single-on-state-change email alerts. It periodically collects mysqladmin status and max_connections, alerts when usage reaches 70%, and includes detailed metrics with human‑readable uptime. The check runs online without stopping database services and supports configurable intervals. + short: Email alerts for DB connection usage with detailed metrics. + +logo: /images/database-monitoring.png + +baseUrl: https://raw.githubusercontent.com/sych74/mysql-cluster/JE-66040/addons/monitoring + +mixins: + - https://cdn.jsdelivr.net/gh/jelastic-jps/mysql-cluster@3.0.0/scripts/common.yml + +targetNodes: + nodeType: + - mysql + - mariadb-dockerized + - mariadb + - perconadb + +settings: + main: + submitUnchanged: true + fields: + - name: monitorInterval + caption: Monitoring interval + type: list + editable: false + values: + - value: 5 + caption: Every 5 minutes + - value: 10 + caption: Every 10 minutes + - value: 15 + caption: Every 15 minutes + - value: 20 + caption: Every 20 minutes + - value: 30 + caption: Every 30 minutes + - value: 40 + caption: Every 40 minutes + - value: 50 + caption: Every 50 minutes + default: 10 + + - name: user + caption: DB User + type: string + required: true + - name: password + caption: DB Password + type: string + inputType: password + required: true + +buttons: + - name: Configure + caption: Configure + action: configure + settings: main + loadingText: Configuring... + successText: The monitoring configs have been updated successfully. + +globals: + scriptSufix: db-monitoring + scriptName: ${env.name}-${globals.scriptSufix} + random: ${fn.random} + +onAfterClone: + install: ${baseUrl}/manifest.yml?_r=${fn.random} + envName: ${event.response.env.envName} + nodeGroup: ${targetNodes.nodeGroup} + settings: + install: true + monitorInterval: ${settings.monitorInterval} + +onInstall: + - getReplicaUser + - downloadScripts + - createScript + - setSchedulerInterval + +onUninstall: + - cleanupMonitoring + +onBeforeDelete: + - cleanupMonitoring + +onCustomNodeEvent [name:executeScript]: + script: | + let URL = "${platformUrl}${globals.scriptName}?appid=" + appid + "&token=${globals.random}&envName=${env.name}&uid=${user.uid}&session=" + session + "&userEmail=${user.email}"; + let Transport = com.hivext.api.core.utils.Transport; + resp = new Transport().get(URL); + resp = JSON.parse(resp); + if (resp.response && resp.response != 0) { + return new Transport().get("${platformUrl}/1.0/environment/jerror/rest/jerror?appid=" + appid + "&actionname=db-monitoring&callparameters=" + URL + "&email=${user.email}&errorcode=4121&errormessage=" + encodeURIComponent(resp.response.message) + "&priority=high"); + } + return { result: 0 } + +actions: + + downloadScripts: + - cmd[sqldb]: |- + curl -fsSL ${baseUrl}/scripts/db-monitoring.sh -o /usr/local/sbin/db-monitoring.sh + chmod +x /usr/local/sbin/db-monitoring.sh + user: root + + createScript: + script: | + let Response = com.hivext.api.Response; + let Transport = com.hivext.api.core.utils.Transport; + let StrSubstitutor = org.apache.commons.lang3.text.StrSubstitutor; + let scriptBody = new Transport().get("${baseUrl}/scripts/db-monitoring.js"); + scriptBody = new StrSubstitutor({token: "${globals.random}"}, "${", "}").replace(scriptBody); + + var resp = api.dev.scripting.GetScript(appid, session, "${globals.scriptName}"); + if (resp.result == Response.OK) { + api.dev.scripting.DeleteScript(appid, session, "${globals.scriptName}"); + } + resp = api.dev.scripting.CreateScript(appid, session, "${globals.scriptName}", "js", scriptBody); + if (resp.result != 0) return resp; + java.lang.Thread.sleep(1000); + resp = api.dev.scripting.Build(appid, session, "${globals.scriptName}"); + if (resp.result != 0) return resp; + + let command = "sed -ri 's|PLATFORM_DOMAIN=.*|PLATFORM_DOMAIN=\"${platformUrl}\"|g' /usr/local/sbin/db-monitoring.sh; " + + "sed -ri 's|USER_SCRIPT_PATH=.*|USER_SCRIPT_PATH=\"${platformUrl}${globals.scriptName}\"|g' /usr/local/sbin/db-monitoring.sh"; + return api.env.control.ExecCmdByGroup("${env.name}", session, "sqldb", toJSON([{ command: command }]), true, false, "root"); + + setSchedulerInterval: + - cmd[sqldb]: bash /usr/local/sbin/db-monitoring.sh setSchedulerTimeout --interval=${settings.monitorInterval} + user: root + + cleanupMonitoring: + script: | + let envName = "${env.name}"; + var resp = api.dev.scripting.GetScript(appid, session, "${globals.scriptName}"); + if (resp.result == com.hivext.api.Response.OK) { + api.dev.scripting.DeleteScript(appid, session, "${globals.scriptName}"); + } + + resp = api.env.control.ExecCmdByGroup(envName, session, "sqldb", toJSON([ + {command: "rm -f /etc/cron.d/db-monitoring || true"}, + {command: "rm -f /usr/local/sbin/db-monitoring.sh || true"}, + {command: "systemctl reload crond || service cron reload || true"} + ]), true, false, "root"); + if (resp.result != 0) return resp; + + return { result: 0 }; + + configure: + - setSchedulerInterval \ No newline at end of file diff --git a/addons/monitoring/scripts/db-monitoring.js b/addons/monitoring/scripts/db-monitoring.js new file mode 100644 index 00000000..00620678 --- /dev/null +++ b/addons/monitoring/scripts/db-monitoring.js @@ -0,0 +1,41 @@ +//@reg(envName, token, uid) + +var ROOT = "root"; +var envName = getParam("envName", "${env.envName}"); +var Response = com.hivext.api.Response; +var SQLDB = "sqldb"; + +function run() { + var tokenParam = String(getParam("token", "")).replace(/\s/g, ""); + if (!session && tokenParam != "${token}") { + return { + result: Response.PERMISSION_DENIED, + error: "wrong token", + type: "error", + message: "Token [" + tokenParam + "] does not match", + response: { result: Response.PERMISSION_DENIED } + }; + } + var info = jelastic.env.control.GetEnvInfo(envName, session); + if (info.result != 0) return info; + + var resp; + var userEmail = getParam("userEmail", ""); + var userSession = getParam("session", session); + api.marketplace.console.WriteLog(appid, session, "DB Monitoring: sendEmail started for env " + envName); + var command = "/usr/local/sbin/db-monitoring.sh sendEmail '" + userSession + "' '" + userEmail + "'"; + + resp = api.env.control.ExecCmdByGroup(envName, session, SQLDB, toJSON([{ command: command }]), true, false, ROOT); + if (resp.result != 0) return resp; + + api.marketplace.console.WriteLog(appid, session, "DB Monitoring: sendEmail completed"); + return { result: 0 }; +} + +try { + return run(); +} catch (ex) { + return { result: com.hivext.api.Response.ERROR_UNKNOWN, error: "Error: " + toJSON(ex) }; +} + + diff --git a/addons/monitoring/scripts/db-monitoring.sh b/addons/monitoring/scripts/db-monitoring.sh new file mode 100644 index 00000000..4b380bf8 --- /dev/null +++ b/addons/monitoring/scripts/db-monitoring.sh @@ -0,0 +1,254 @@ +#!/bin/bash +PLATFORM_DOMAIN="{PLATFORM_DOMAIN}" +USER_SCRIPT_PATH="{URL}" +USER_SESSION="$1" +USER_EMAIL="$2" +THRESHOLD=70 +MONITORING_LOG=/var/log/db-monitoring.log +STATUS_FILE=/var/tmp/db-monitoring.status +HOSTNAME_SHORT=$(hostname -s 2>/dev/null || hostname) +BODY_ERROR_PREFIX="DataBase monitoring error on ${HOSTNAME_SHORT}" +# email notification via Virtuozzo API +function sendEmailNotification(){ + if [ -e "/usr/lib/jelastic/modules/api.module" ]; then + [ -e "/var/run/jem.pid" ] && return 0 + echo $(date) ${HOSTNAME_SHORT} "Sending e-mail notification about high DB connections usage" | tee -a $MONITORING_LOG + SUBJECT="${HOSTNAME_SHORT}: MySQL connections usage reached threshold" + BODY="$1" + jem api apicall -s --connect-timeout 3 --max-time 15 [API_DOMAIN]/1.0/message/email/rest/send \ + --data-urlencode "session=$USER_SESSION" \ + --data-urlencode "to=$USER_EMAIL" \ + --data-urlencode "subject=$SUBJECT" \ + --data-urlencode body@- <<< "$BODY" + if [[ $? != 0 ]]; then + echo $(date) ${HOSTNAME_SHORT} "Sending of e-mail notification failed" | tee -a $MONITORING_LOG + else + echo $(date) ${HOSTNAME_SHORT} "E-mail notification is sent successfully" | tee -a $MONITORING_LOG + fi + else + echo $(date) ${HOSTNAME_SHORT} "Email notification is not sent because this functionality is unavailable for current platform." | tee -a $MONITORING_LOG + fi +} + +# trigger sendevent for onCustomNodeEvent flow +function trigger_sendevent(){ + echo $(date) ${HOSTNAME_SHORT} "Trigger onCustomNodeEvent 'executeScript'" | tee -a $MONITORING_LOG + curl -fsSL --max-time 10 --retry 1 --retry-max-time 15 \ + --location --request POST "${PLATFORM_DOMAIN}1.0/environment/node/rest/sendevent" \ + --data-urlencode "params={'name': 'executeScript'}" >/dev/null 2>&1 +} + +function get_last_status(){ + [ -f "$STATUS_FILE" ] && cat "$STATUS_FILE" || echo "" +} + +function set_status(){ + local status="$1" + echo "$status" > "$STATUS_FILE" +} + +# Build reusable metrics body +function build_metrics_body(){ + local title="$1" + cat < +Database connections ${title} on ${HOSTNAME_SHORT}
+
+STATUS
+Uptime: $UPTIME_HUMAN
+Threads: $THREADS
+Slow queries: $SLOW
+Open tables: $OPEN_TABLES
+Queries per second avg: $QPS
+
+Max connections: $MAX_CONNECTIONS
+Current connections: $THREADS
+Usage: ${USAGE_PCT}%
+Timestamp: $(date) + +EOF +} + +function send_on_status_change(){ + local new_status="$1" + local last_status="$(get_last_status)" + if [ "$new_status" != "$last_status" ]; then + trigger_sendevent + set_status "$new_status" + else + echo "$(date) ${HOSTNAME_SHORT} Status '$new_status' unchanged, skipping email" >> $MONITORING_LOG + fi +} + +# credentials check and load +function check_credentials(){ + source "/.jelenv" + DB_USER="$REPLICA_USER" + DB_PASSWORD="$REPLICA_PSWD" + + if [ -z "$DB_USER" ] || [ -z "$DB_PASSWORD" ]; then + BODY=$(cat < +${BODY_ERROR_PREFIX}
+
+Issue: Missing REPLICA_USER or REPLICA_PSWD in environment variables
+Observed values: REPLICA_USER='${REPLICA_USER:-EMPTY}', REPLICA_PSWD='${REPLICA_PSWD:+SET}'
+Action required: Populate both variables in environment variables
+Timestamp: $(date) + +EOF +) + echo "$BODY" >> $MONITORING_LOG + send_on_status_change "CREDENTIALS_MISSING" "$BODY" + echo "Monitoring finished at $(date)" >> $MONITORING_LOG + exit 1 + fi +} + +# collect DB metrics using mysqladmin and mysql client +function collect_metrics(){ + STATUS_RAW=$(mysqladmin status -u"$DB_USER" -p"$DB_PASSWORD" 2>&1) + RET=$? + if [ $RET -ne 0 ] || [ -z "$STATUS_RAW" ]; then + OUT_ESC=$(printf '%s' "$STATUS_RAW" | sed -e 's/&/\&/g' -e 's//\>/g' | sed ':a;N;$!ba;s/\n//g') + BODY=$(cat < +${BODY_ERROR_PREFIX}
+
+Action: mysqladmin status
+Exit code: $RET
+Output: $OUT_ESC
+Timestamp: $(date) + +EOF +) + echo "$BODY" >> $MONITORING_LOG + send_on_status_change "STATUS_ERROR" "$BODY" + echo "Monitoring finished at $(date)" >> $MONITORING_LOG + exit 1 + fi + + UPTIME=$(echo "$STATUS_RAW" | grep -o 'Uptime: [0-9]\+' | awk '{print $2}') + THREADS=$(echo "$STATUS_RAW" | grep -o 'Threads: [0-9]\+' | awk '{print $2}') + QUESTIONS=$(echo "$STATUS_RAW" | grep -o 'Questions: [0-9]\+' | awk '{print $2}') + SLOW=$(echo "$STATUS_RAW" | grep -o 'Slow queries: [0-9]\+' | awk '{print $3}') + OPENS=$(echo "$STATUS_RAW" | grep -o 'Opens: [0-9]\+' | awk '{print $2}') + FLUSHES=$(echo "$STATUS_RAW" | grep -o 'Flush tables: [0-9]\+' | awk '{print $3}') + OPEN_TABLES=$(echo "$STATUS_RAW" | grep -o 'Open tables: [0-9]\+' | awk '{print $3}') + QPS=$(echo "$STATUS_RAW" | sed -n 's/.*Queries per second avg: \([0-9.]*\).*/\1/p') + + UPTIME_HUMAN="$UPTIME" + if [[ "$UPTIME" =~ ^[0-9]+$ ]]; then + D=$((UPTIME/86400)) + H=$(((UPTIME%86400)/3600)) + M=$(((UPTIME%3600)/60)) + UPTIME_HUMAN="${D} days ${H} hours ${M} minutes" + fi + + VAR_RAW=$(mysql -Nse "SHOW VARIABLES LIKE 'max_connections';" -u"$DB_USER" -p"$DB_PASSWORD" 2>&1) + VAR_RC=$? + if [ $VAR_RC -eq 0 ]; then + MAX_CONNECTIONS=$(echo "$VAR_RAW" | awk '{print $2}') + fi + + if ! [[ "$MAX_CONNECTIONS" =~ ^[0-9]+$ ]]; then + VAR_ESC=$(printf '%s' "$VAR_RAW" | sed -e 's/&/\&/g' -e 's//\>/g' | sed ':a;N;$!ba;s/\n//g') + BODY=$(cat < +${BODY_ERROR_PREFIX}
+
+Issue: Unable to determine max_connections
+mysql SHOW VARIABLES exit: ${VAR_RC}
+mysql SHOW VARIABLES output: ${VAR_ESC}
+Timestamp: $(date) + +EOF +) + echo "$BODY" >> $MONITORING_LOG + send_on_status_change "MAXCONN_ERROR" "$BODY" + MAX_CONNECTIONS=0 + fi + + USAGE_PCT=0 + if [ "$MAX_CONNECTIONS" -gt 0 ]; then + USAGE_PCT=$(awk -v th="$THREADS" -v max="$MAX_CONNECTIONS" 'BEGIN { if (max>0) printf("%d", (th*100)/max); else print 0 }') + fi +} + +# cron management: add scheduler and adjust interval +function addScheduler(){ + local cron_file="/etc/cron.d/db-monitoring" + echo "*/10 * * * * root /usr/local/sbin/db-monitoring.sh check >> /var/log/db-monitoring.log 2>&1" > "$cron_file" + chmod 0644 "$cron_file" + chown root:root "$cron_file" + systemctl reload crond + echo "$(date) ${HOSTNAME_SHORT} Cron installed at $cron_file" >> $MONITORING_LOG +} + +function setSchedulerInterval(){ + local INTERVAL=10 + for i in "$@"; do + case $i in + --interval=*) + INTERVAL=${i#*=} + shift + shift + ;; + *) + ;; + esac + done + local cron_file="/etc/cron.d/db-monitoring" + [ -f "$cron_file" ] || addScheduler + sed -ri "s|^[#]*[^ ]+ +\* +\* +\* +\* +root .*$|*/${INTERVAL} * * * * root /usr/local/sbin/db-monitoring.sh check >> /var/log/db-monitoring.log 2>\&1|" "$cron_file" + systemctl reload crond + echo "$(date) ${HOSTNAME_SHORT} Cron interval set to every ${INTERVAL} minutes" >> $MONITORING_LOG +} + +function check(){ + echo "Monitoring started at $(date)" >> $MONITORING_LOG + check_credentials + collect_metrics + if [ "$USAGE_PCT" -ge "$THRESHOLD" ]; then + send_on_status_change "THRESHOLD" + else + send_on_status_change "OK" + fi + echo "Monitoring finished at $(date)" >> $MONITORING_LOG +} + +function sendEmail(){ + local USER_SESSION="$1" + local USER_EMAIL="$2" + echo "Send email started at $(date)" >> $MONITORING_LOG + check_credentials + collect_metrics + local status="$(get_last_status)" + local title="usage alert" + if [ "$status" = "OK" ]; then + title="back to normal" + fi + local BODY="$(build_metrics_body "$title")" + sendEmailNotification "$BODY" + echo "Send email finished at $(date)" >> $MONITORING_LOG +} + +case "$1" in + setSchedulerInterval) + shift + setSchedulerInterval "$@" + ;; + check) + check + ;; + sendEmail) + shift + sendEmail "$@" + ;; + *) + echo "Usage: $0 {setSchedulerInterval --interval=N|check|sendEmail USER_SESSION USER_EMAIL}" | tee -a $MONITORING_LOG + exit 1 + ;; +esac + +exit 0 \ No newline at end of file