Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions collector/fixtures/e2e-64k-page-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1423,6 +1423,8 @@ node_forks_total 26442
# HELP node_hwmon_chip_names Annotation metric for human-readable chip names
# TYPE node_hwmon_chip_names gauge
node_hwmon_chip_names{chip="nct6779",chip_name="nct6779"} 1
node_hwmon_chip_names{chip="platform_asus_nb_wmi_asus",chip_name="asus"} 1
node_hwmon_chip_names{chip="platform_asus_nb_wmi_asus_wmi_sensors",chip_name="asus_wmi_sensors"} 1
node_hwmon_chip_names{chip="platform_coretemp_0",chip_name="coretemp"} 1
node_hwmon_chip_names{chip="platform_coretemp_1",chip_name="coretemp"} 1
# HELP node_hwmon_fan_alarm Hardware sensor alarm status (fan)
Expand Down Expand Up @@ -1530,6 +1532,8 @@ node_hwmon_pwm_crit_temp_tolerance{chip="nct6779",sensor="pwm1"} 2000
# HELP node_hwmon_pwm_enable Hardware monitor pwm element enable
# TYPE node_hwmon_pwm_enable gauge
node_hwmon_pwm_enable{chip="nct6779",sensor="pwm1"} 5
node_hwmon_pwm_enable{chip="platform_asus_nb_wmi_asus",sensor="pwm1"} 2
node_hwmon_pwm_enable{chip="platform_asus_nb_wmi_asus_wmi_sensors",sensor="pwm1"} 2
# HELP node_hwmon_pwm_floor Hardware monitor pwm element floor
# TYPE node_hwmon_pwm_floor gauge
node_hwmon_pwm_floor{chip="nct6779",sensor="pwm1"} 1
Expand Down
4 changes: 4 additions & 0 deletions collector/fixtures/e2e-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1455,6 +1455,8 @@ node_forks_total 26442
# HELP node_hwmon_chip_names Annotation metric for human-readable chip names
# TYPE node_hwmon_chip_names gauge
node_hwmon_chip_names{chip="nct6779",chip_name="nct6779"} 1
node_hwmon_chip_names{chip="platform_asus_nb_wmi_asus",chip_name="asus"} 1
node_hwmon_chip_names{chip="platform_asus_nb_wmi_asus_wmi_sensors",chip_name="asus_wmi_sensors"} 1
node_hwmon_chip_names{chip="platform_coretemp_0",chip_name="coretemp"} 1
node_hwmon_chip_names{chip="platform_coretemp_1",chip_name="coretemp"} 1
# HELP node_hwmon_fan_alarm Hardware sensor alarm status (fan)
Expand Down Expand Up @@ -1562,6 +1564,8 @@ node_hwmon_pwm_crit_temp_tolerance{chip="nct6779",sensor="pwm1"} 2000
# HELP node_hwmon_pwm_enable Hardware monitor pwm element enable
# TYPE node_hwmon_pwm_enable gauge
node_hwmon_pwm_enable{chip="nct6779",sensor="pwm1"} 5
node_hwmon_pwm_enable{chip="platform_asus_nb_wmi_asus",sensor="pwm1"} 2
node_hwmon_pwm_enable{chip="platform_asus_nb_wmi_asus_wmi_sensors",sensor="pwm1"} 2
# HELP node_hwmon_pwm_floor Hardware monitor pwm element floor
# TYPE node_hwmon_pwm_floor gauge
node_hwmon_pwm_floor{chip="nct6779",sensor="pwm1"} 1
Expand Down
44 changes: 44 additions & 0 deletions collector/fixtures/sys.ttar
Original file line number Diff line number Diff line change
Expand Up @@ -1299,6 +1299,12 @@ Mode: 644
Path: sys/class/hwmon/hwmon5
SymlinkTo: ../../devices/platform/bogus.0/hwmon/hwmon5/
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/hwmon/hwmon6
SymlinkTo: ../../devices/platform/asus-nb-wmi/hwmon/hwmon6
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/class/hwmon/hwmon7
SymlinkTo: ../../devices/platform/asus-nb-wmi/hwmon/hwmon7
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/class/infiniband
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Expand Down Expand Up @@ -8202,6 +8208,44 @@ Lines: 1
applesmc
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/devices/platform/asus-nb-wmi
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/devices/platform/asus-nb-wmi/hwmon
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/devices/platform/asus-nb-wmi/hwmon/hwmon6
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/devices/platform/asus-nb-wmi/hwmon/hwmon6/device
SymlinkTo: ../../../asus-nb-wmi
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/devices/platform/asus-nb-wmi/hwmon/hwmon6/name
Lines: 1
asus
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/devices/platform/asus-nb-wmi/hwmon/hwmon6/pwm1_enable
Lines: 1
2
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/devices/platform/asus-nb-wmi/hwmon/hwmon7
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/devices/platform/asus-nb-wmi/hwmon/hwmon7/device
SymlinkTo: ../../../asus-nb-wmi
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/devices/platform/asus-nb-wmi/hwmon/hwmon7/name
Lines: 1
asus_wmi_sensors
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/devices/platform/asus-nb-wmi/hwmon/hwmon7/pwm1_enable
Lines: 1
2
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/devices/platform/bogus.0
Mode: 775
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Expand Down
74 changes: 59 additions & 15 deletions collector/hwmon_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,19 +161,9 @@ func collectSensorData(dir string, data map[string]map[string]string) error {
return nil
}

func (c *hwMonCollector) updateHwmon(ch chan<- prometheus.Metric, dir string) error {
hwmonName, err := c.hwmonName(dir)
if err != nil {
return err
}

if c.deviceFilter.ignored(hwmonName) {
c.logger.Debug("ignoring hwmon chip", "chip", hwmonName)
return nil
}

func (c *hwMonCollector) updateHwmon(ch chan<- prometheus.Metric, dir, hwmonName string) error {
data := make(map[string]map[string]string)
err = collectSensorData(dir, data)
err := collectSensorData(dir, data)
if err != nil {
return err
}
Expand Down Expand Up @@ -452,7 +442,7 @@ func (c *hwMonCollector) hwmonHumanReadableChipName(dir string) (string, error)

func (c *hwMonCollector) Update(ch chan<- prometheus.Metric) error {
// Step 1: scan /sys/class/hwmon, resolve all symlinks and call
// updatesHwmon for each folder
// updateHwmon for each folder.

hwmonPathName := filepath.Join(sysFilePath("class"), "hwmon")

Expand All @@ -466,7 +456,20 @@ func (c *hwMonCollector) Update(ch chan<- prometheus.Metric) error {
return err
}

var lastErr error
// Pass 1: enumerate hwmon directories and pre-compute the device-derived
// chip name. Multiple hwmon nodes can share a single parent device (for
// example, asus-nb-wmi exposes one hwmon for fan control and another for
// WMI sensors), which makes hwmonName collide and triggers the "metric
// collected before with the same name and label values" registry error.
type hwmonEntry struct {
dir string
baseName string
nameFile string
}

entries := make([]hwmonEntry, 0, len(hwmonFiles))
chipCounts := make(map[string]int)
nameCounts := make(map[string]int)
for _, hwDir := range hwmonFiles {
hwmonXPathName := filepath.Join(hwmonPathName, hwDir.Name())
fileInfo, err := os.Lstat(hwmonXPathName)
Expand All @@ -485,7 +488,48 @@ func (c *hwMonCollector) Update(ch chan<- prometheus.Metric) error {
continue
}

if err = c.updateHwmon(ch, hwmonXPathName); err != nil {
baseName, err := c.hwmonName(hwmonXPathName)
if err != nil {
c.logger.Debug("failed to derive hwmon chip name", "dir", hwmonXPathName, "err", err)
continue
}

nameFile := ""
if raw, err := os.ReadFile(filepath.Join(hwmonXPathName, "name")); err == nil {
nameFile = strings.TrimSpace(string(raw))
}

entries = append(entries, hwmonEntry{
dir: hwmonXPathName,
baseName: baseName,
nameFile: nameFile,
})
chipCounts[baseName]++
nameCounts[baseName+"\x00"+nameFile]++
}

// Pass 2: emit metrics. For each entry, resolve a unique chip label
// (disambiguating colliding base names with the chip's `name` file
// content when distinct, else with the hwmonX basename) and only then
// apply the include/exclude filter so user regexes match the label
// that ends up in the metrics.
var lastErr error
for _, e := range entries {
chipName := e.baseName
if chipCounts[e.baseName] > 1 {
suffix := cleanMetricName(e.nameFile)
if suffix == "" || nameCounts[e.baseName+"\x00"+e.nameFile] > 1 {
suffix = cleanMetricName(filepath.Base(e.dir))
}
chipName = e.baseName + "_" + suffix
}

if c.deviceFilter.ignored(chipName) {
c.logger.Debug("ignoring hwmon chip", "chip", chipName)
continue
}

if err := c.updateHwmon(ch, e.dir, chipName); err != nil {
lastErr = err
}
}
Expand Down
Loading
Loading