Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[run]
branch = True
source =
monster
mbuilder

[report]
omit =
*/tests/*
*/__init__.py
setup.py
99 changes: 99 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
name: Monster Build

on:
push:
branches: [main]
pull_request:
branches: [main]

jobs:
test:
runs-on: ubuntu-latest

steps:
- name: Checkout repo
uses: actions/checkout@v4

- name: Set up python
uses: actions/setup-python@v4
with:
python-version: 3.11

- name: Install dependencies
run: |
python -m venv venv
source venv/bin/activate
pip install --upgrade pip
pip install -r requirements.txt
pip install -e .

- name: Run tests with coverage
run: |
source venv/bin/activate
pytest \
--cov \
--cov-branch \
--cov-report=html \
--cov-fail-under=65

- name: Coverage summary
run: |
source venv/bin/activate
python << 'EOF'
import coverage
import os
from pathlib import Path

cov = coverage.Coverage(data_file=".coverage")
cov.load()

lines = []
lines.append("| File | Statements | Missing | Coverage |")
lines.append("|------|------------|---------|----------|")

total_statements = 0
total_missing = 0
total_executed = 0

for f in sorted(cov.get_data().measured_files()):
analysis = cov._analyze(f)
stmt = analysis.numbers.n_statements
miss = analysis.numbers.n_missing
executed = analysis.numbers.n_executed
percent = round(executed / stmt * 100) if stmt else 0

total_statements += stmt
total_missing += miss
total_executed += executed

path_parts = Path(f).parts
short_path = Path(f).name

for i in range(len(path_parts) - 1, -1, -1):
if path_parts[i] in ('mbuilder', 'monster'):
short_path = '/'.join(path_parts[i:])
break

lines.append(f"| {short_path} | {stmt} | {miss} | {percent}% |")

total_percent = round(total_executed / total_statements * 100) if total_statements else 0
lines.append(f"| **TOTAL** | {total_statements} | {total_missing} | **{total_percent}%** |")

summary = "\n".join(lines)

summary_file = os.environ.get("GITHUB_STEP_SUMMARY")
if summary_file:
with open(summary_file, "a") as f:
f.write("## Test Coverage (Per File)\n")
f.write(summary + "\n")
else:
print("GITHUB_STEP_SUMMARY not found")
print(summary)
EOF

- name: Upload HTML coverage report
uses: actions/upload-artifact@v4
with:
name: coverage-report
path: htmlcov/
retention-days: 7
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ token.json
json
mbuilder_server
*.egg-info
test*
*nfs*
OIDs.txt
mb_api_h100.py*
mb_api_zen4.py*
htmlcov
.coverage
3 changes: 1 addition & 2 deletions monster/idrac.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
import psycopg2
import random

import logger
import process
from monster import logger, process

log = logger.get_logger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion monster/infra.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import process
from monster import process


def get_pdu_metrics_pull(pdu_api: list, timestamp, pdu_list: list,
Expand Down
7 changes: 2 additions & 5 deletions monster/init_tsdb.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
import psycopg2

import sql
import idrac
import logger
from monster import sql, idrac, logger, utils, snmp_irc

import schema
import asyncio
import hostlist
from monster import utils
from monster import snmp_irc

log = logger.get_logger(__name__)

Expand Down
3 changes: 1 addition & 2 deletions monster/monit_idrac.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from pgcopy import CopyManager
from datetime import datetime, timezone

import idrac
from monster import utils
from monster import idrac, utils


def monit_idrac_pull(config):
Expand Down
3 changes: 1 addition & 2 deletions monster/monit_irc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from pgcopy import CopyManager
from datetime import datetime, timezone

import infra
from monster import utils
from monster import infra, utils


def monit_irc(config):
Expand Down
3 changes: 1 addition & 2 deletions monster/monit_pdu.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from pgcopy import CopyManager
from datetime import datetime, timezone

import infra
from monster import utils
from monster import infra, utils


def monit_pdu(config):
Expand Down
4 changes: 1 addition & 3 deletions monster/monit_slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
import urllib3
from datetime import datetime, timezone

import process
import slurm
from monster import utils
from monster import process, slurm, utils

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

Expand Down
9 changes: 3 additions & 6 deletions monster/process.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import sql
import json
import logger
import asyncio
import multiprocessing
from itertools import repeat
Expand All @@ -14,8 +12,7 @@
from pgcopy import CopyManager
from requests.adapters import HTTPAdapter

import snmp_irc
from monster import utils
from monster import snmp_irc, utils, logger, sql

log = logger.get_logger(__name__)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
Expand Down Expand Up @@ -161,9 +158,9 @@ def extract_metadata(system_info: dict, bmc_info: dict, node: str):
# On repacss, the hostname is set to c+number, e.g. c001, and g+number, e.g. g001
# This part is currently hardcoded for the repacss cluster
hostname = metrics.get("HostName", None)
if (hostname.startswith("c")):
if hostname is not None and hostname.startswith("c"):
new_hostname = bmc_ip_addr.replace("10.101.", "rpc-").replace(".", "-")
elif (hostname.startswith("g")):
elif hostname is not None and hostname.startswith("g"):
new_hostname = bmc_ip_addr.replace("10.101.", "rpg-").replace(".", "-")
else:
# This is only for the h100-build node, as it does not have a valid hostname
Expand Down
3 changes: 1 addition & 2 deletions monster/schema.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logger
from monster import utils
from monster import utils, logger

log = logger.get_logger(__name__)

Expand Down
8 changes: 7 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,10 @@ python_dateutil
sqlalchemy
pandas
aiohttp-sse-client
tqdm
tqdm
pysnmp
pytest
pytest-asyncio
pytest-mock
pytest-cov
httpx
125 changes: 125 additions & 0 deletions tests/mbuilder/test_mb_sql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
from mbuilder import mb_sql
from dateutil.parser import parse


def test_generate_slurm_jobs_sql():
start = "2023-01-01 00:00:00"
end = "2023-01-02 00:00:00"

sql_query = mb_sql.generate_slurm_jobs_sql(start, end)

start_epoch = int(parse(start).timestamp())
end_epoch = int(parse(end).timestamp())

expected_sql = f"SELECT * FROM slurm.jobs WHERE start_time < {end_epoch} AND end_time > {start_epoch};"

assert expected_sql == sql_query


def test_generate_slurm_node_jobs_sql():
start = "2023-01-01 00:00:00"
end = "2023-01-02 00:00:00"
interval = "5 minutes"

sql_query = mb_sql.generate_slurm_node_jobs_sql(start, end, interval)

expected_sql = f"SELECT time_bucket_gapfill('{interval}', timestamp) AS time, \
nodes.hostname as node, jsonb_agg(jobs) AS jobs, jsonb_agg(cpus) AS cpus \
FROM slurm.node_jobs \
JOIN nodes \
ON slurm.node_jobs.nodeid = nodes.nodeid \
WHERE timestamp >= '{start}' \
AND timestamp <= '{end}' \
GROUP BY time, node \
ORDER BY time;"

assert expected_sql == sql_query


def test_generate_slurm_state_sql():
start = "2023-01-01 00:00:00"
end = "2023-01-02 00:00:00"
interval = "1 hour"

sql_query = mb_sql.generate_slurm_state_sql(start, end, interval)

expected_sql = f"SELECT time_bucket_gapfill('{interval}', timestamp) AS time, \
nodes.hostname as node, jsonb_agg(value) AS value \
FROM slurm.state \
JOIN nodes \
ON slurm.state.nodeid = nodes.nodeid \
WHERE timestamp >= '{start}' \
AND timestamp <= '{end}' \
GROUP BY time, node \
ORDER BY time;"

assert expected_sql == sql_query


def test_generate_idrac_metric_sql():
table = "fans"
start = "2023-01-01 00:00:00"
end = "2023-01-02 00:00:00"
interval = "1h"
aggregation = "avg"

sql_query = mb_sql.generate_idrac_metric_sql(table, start, end, interval, aggregation)

expected_sql = f"SELECT time_bucket_gapfill('{interval}', timestamp) AS time, \
nodes.hostname as node, fqdd.fqdd AS label, {aggregation}(value) AS value \
FROM idrac.{table} \
JOIN nodes \
ON idrac.{table}.nodeid = nodes.nodeid \
JOIN fqdd \
ON idrac.{table}.fqdd = fqdd.id \
WHERE timestamp >= '{start}' \
AND timestamp <= '{end}' \
GROUP BY time, node, label \
ORDER BY time;"

assert expected_sql == sql_query


def test_generate_idrac_metric_raw_sql():
table = "temp"
start = "2023-01-01 00:00:00"
end = "2023-01-02 00:00:00"
node = "node1"

sql_query = mb_sql.generate_idrac_metric_raw_sql(table, start, end, node)

expected_sql = f"SELECT timestamp AS time, \
nodes.hostname as node, fqdd.fqdd AS label, value \
FROM idrac.{table} \
JOIN nodes \
ON idrac.{table}.nodeid = nodes.nodeid \
JOIN fqdd \
ON idrac.{table}.fqdd = fqdd.id \
WHERE timestamp >= '{start}' \
AND timestamp <= '{end}' \
AND nodes.hostname = '{node}' \
ORDER BY time;"

assert expected_sql == sql_query


def test_generate_slurm_metric_sql():
table = "cpu"
start = "2023-01-01 00:00:00"
end = "2023-01-02 00:00:00"
interval = "15m"
aggregation = "max"

sql_query = mb_sql.generate_slurm_metric_sql(table, start, end, interval, aggregation)

expected_sql = f"SELECT time_bucket_gapfill('{interval}', timestamp) AS time, \
nodes.hostname as node, {aggregation}(value) AS value \
FROM slurm.{table} \
JOIN nodes \
ON slurm.{table}.nodeid = nodes.nodeid \
WHERE timestamp >= '{start}' \
AND timestamp <= '{end}' \
GROUP BY time, node \
ORDER BY time;"

assert expected_sql == sql_query
Loading