Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 80 additions & 20 deletions collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,46 @@ const (
)

var (
factories = make(map[string]func(logger *slog.Logger) (Collector, error))
initiatedCollectorsMtx = sync.Mutex{}
initiatedCollectors = make(map[string]Collector)
collectorState = make(map[string]*bool)
forcedCollectors = map[string]bool{} // collectors which have been explicitly enabled or disabled
factories = make(map[string]func(logger *slog.Logger) (Collector, error))
collectorState = make(map[string]*bool)
forcedCollectors = map[string]bool{} // collectors which have been explicitly enabled or disabled
)

type collectorRuntimeState struct {
config collectorConfig
enabled map[string]bool
collectors map[string]Collector
mtx sync.Mutex
}

type runtimeConfiguredCollector interface {
configureRuntimeState(*collectorRuntimeState) error
}

type collectorConfig struct {
paths collectorPathConfig
cpu cpuCollectorConfig
textfile textFileCollectorConfig
}

type collectorPathConfig struct {
procMountPoint string
sysMountPoint string
rootfsPath string
udevDataPath string
}

type cpuCollectorConfig struct {
guestEnabled bool
infoEnabled bool
flagsInclude string
bugsInclude string
}

type textFileCollectorConfig struct {
directories []string
}

func registerCollector(collector string, isDefaultEnabled bool, factory func(logger *slog.Logger) (Collector, error)) {
var helpDefaultState string
if isDefaultEnabled {
Expand All @@ -80,14 +113,30 @@ type NodeCollector struct {
logger *slog.Logger
}

// DisableDefaultCollectors sets the collector state to false for all collectors which
// have not been explicitly enabled on the command line.
func DisableDefaultCollectors() {
for c := range collectorState {
if _, ok := forcedCollectors[c]; !ok {
*collectorState[c] = false
func newCollectorRuntimeState(disableDefaultCollectors bool) *collectorRuntimeState {
enabled := make(map[string]bool, len(collectorState))
for collector, state := range collectorState {
enabled[collector] = *state
if disableDefaultCollectors {
if _, ok := forcedCollectors[collector]; !ok {
enabled[collector] = false
}
}
}

return &collectorRuntimeState{
config: newCollectorConfig(),
enabled: enabled,
collectors: make(map[string]Collector),
}
}

func newCollectorConfig() collectorConfig {
return collectorConfig{
paths: newCollectorPathConfig(),
cpu: newCPUCollectorConfig(),
textfile: newTextFileCollectorConfig(),
}
}

// collectorFlagAction generates a new action function for the given collector
Expand All @@ -103,39 +152,50 @@ func collectorFlagAction(collector string) func(ctx *kingpin.ParseContext) error
}

// NewNodeCollector creates a new NodeCollector.
func NewNodeCollector(logger *slog.Logger, filters ...string) (*NodeCollector, error) {
func (s *collectorRuntimeState) NewNodeCollector(logger *slog.Logger, filters ...string) (*NodeCollector, error) {
f := make(map[string]bool)
for _, filter := range filters {
enabled, exist := collectorState[filter]
enabled, exist := s.enabled[filter]
if !exist {
return nil, fmt.Errorf("missing collector: %s", filter)
}
if !*enabled {
if !enabled {
return nil, fmt.Errorf("disabled collector: %s", filter)
}
f[filter] = true
}
collectors := make(map[string]Collector)
initiatedCollectorsMtx.Lock()
defer initiatedCollectorsMtx.Unlock()
for key, enabled := range collectorState {
if !*enabled || (len(f) > 0 && !f[key]) {
s.mtx.Lock()
defer s.mtx.Unlock()
for key, enabled := range s.enabled {
if !enabled || (len(f) > 0 && !f[key]) {
continue
}
if collector, ok := initiatedCollectors[key]; ok {
if collector, ok := s.collectors[key]; ok {
collectors[key] = collector
} else {
collector, err := factories[key](logger.With("collector", key))
if err != nil {
return nil, err
}
if err := s.configureCollector(collector); err != nil {
return nil, err
}
collectors[key] = collector
initiatedCollectors[key] = collector
s.collectors[key] = collector
}
}
return &NodeCollector{Collectors: collectors, logger: logger}, nil
}

func (s *collectorRuntimeState) configureCollector(collector Collector) error {
if configuredCollector, ok := collector.(runtimeConfiguredCollector); ok {
return configuredCollector.configureRuntimeState(s)
}

return nil
}

// Describe implements the prometheus.Collector interface.
func (n NodeCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- scrapeDurationDesc
Expand Down
20 changes: 20 additions & 0 deletions collector/cpu_config_stub.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright 2026 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !linux || nocpu

package collector

func newCPUCollectorConfig() cpuCollectorConfig {
return cpuCollectorConfig{}
}
101 changes: 60 additions & 41 deletions collector/cpu_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
type cpuCollector struct {
procfs procfs.FS
sysfs sysfs.FS
sysMountPoint string
cpu *prometheus.Desc
cpuInfo *prometheus.Desc
cpuFrequencyHz *prometheus.Desc
Expand All @@ -50,6 +51,9 @@ type cpuCollector struct {
cpuStats map[int64]procfs.CPUStat
cpuStatsMutex sync.Mutex
isolatedCpus []uint16
cpuFreqEnabled bool
guestEnabled bool
infoEnabled bool

cpuFlagsIncludeRegexp *regexp.Regexp
cpuBugsIncludeRegexp *regexp.Regexp
Expand All @@ -66,34 +70,23 @@ var (
jumpBackDebugMessage = fmt.Sprintf("CPU Idle counter jumped backwards more than %f seconds, possible hotplug event, resetting CPU stats", jumpBackSeconds)
)

func newCPUCollectorConfig() cpuCollectorConfig {
return cpuCollectorConfig{
guestEnabled: *enableCPUGuest,
infoEnabled: *enableCPUInfo,
flagsInclude: *flagsInclude,
bugsInclude: *bugsInclude,
}
}

func init() {
registerCollector("cpu", defaultEnabled, NewCPUCollector)
}

// NewCPUCollector returns a new Collector exposing kernel/system statistics.
func NewCPUCollector(logger *slog.Logger) (Collector, error) {
pfs, err := procfs.NewFS(*procPath)
if err != nil {
return nil, fmt.Errorf("failed to open procfs: %w", err)
}

sfs, err := sysfs.NewFS(*sysPath)
if err != nil {
return nil, fmt.Errorf("failed to open sysfs: %w", err)
}

isolcpus, err := sfs.IsolatedCPUs()
if err != nil {
if !os.IsNotExist(err) {
return nil, fmt.Errorf("unable to get isolated cpus: %w", err)
}
logger.Debug("couldn't open isolated file", "error", err)
}

c := &cpuCollector{
procfs: pfs,
sysfs: sfs,
cpu: nodeCPUSecondsDesc,
cpu: nodeCPUSecondsDesc,
cpuInfo: prometheus.NewDesc(
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "info"),
"CPU information from /proc/cpuinfo.",
Expand Down Expand Up @@ -139,32 +132,61 @@ func NewCPUCollector(logger *slog.Logger) (Collector, error) {
"CPUs that are online and being scheduled.",
[]string{"cpu"}, nil,
),
logger: logger,
isolatedCpus: isolcpus,
cpuStats: make(map[int64]procfs.CPUStat),
logger: logger,
cpuStats: make(map[int64]procfs.CPUStat),
}
err = c.compileIncludeFlags(flagsInclude, bugsInclude)
return c, nil
}

func (c *cpuCollector) configureRuntimeState(state *collectorRuntimeState) error {
pfs, err := procfs.NewFS(state.config.paths.procMountPoint)
if err != nil {
return nil, fmt.Errorf("fail to compile --collector.cpu.info.flags-include and --collector.cpu.info.bugs-include, the values of them must be regular expressions: %w", err)
return fmt.Errorf("failed to open procfs: %w", err)
}
return c, nil

sfs, err := sysfs.NewFS(state.config.paths.sysMountPoint)
if err != nil {
return fmt.Errorf("failed to open sysfs: %w", err)
}

isolcpus, err := sfs.IsolatedCPUs()
if err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("unable to get isolated cpus: %w", err)
}
c.logger.Debug("couldn't open isolated file", "error", err)
}

c.procfs = pfs
c.sysfs = sfs
c.sysMountPoint = state.config.paths.sysMountPoint
c.isolatedCpus = isolcpus
c.cpuFreqEnabled = state.enabled["cpufreq"]
c.guestEnabled = state.config.cpu.guestEnabled
c.infoEnabled = state.config.cpu.infoEnabled

if err := c.compileIncludeFlags(state.config.cpu); err != nil {
return fmt.Errorf("fail to compile --collector.cpu.info.flags-include and --collector.cpu.info.bugs-include, the values of them must be regular expressions: %w", err)
}

return nil
}

func (c *cpuCollector) compileIncludeFlags(flagsIncludeFlag, bugsIncludeFlag *string) error {
if (*flagsIncludeFlag != "" || *bugsIncludeFlag != "") && !*enableCPUInfo {
*enableCPUInfo = true
func (c *cpuCollector) compileIncludeFlags(cfg cpuCollectorConfig) error {
if (cfg.flagsInclude != "" || cfg.bugsInclude != "") && !c.infoEnabled {
c.infoEnabled = true
c.logger.Info("--collector.cpu.info has been set to `true` because you set the following flags, like --collector.cpu.info.flags-include and --collector.cpu.info.bugs-include")
}

var err error
if *flagsIncludeFlag != "" {
c.cpuFlagsIncludeRegexp, err = regexp.Compile(*flagsIncludeFlag)
if cfg.flagsInclude != "" {
c.cpuFlagsIncludeRegexp, err = regexp.Compile(cfg.flagsInclude)
if err != nil {
return err
}
}
if *bugsIncludeFlag != "" {
c.cpuBugsIncludeRegexp, err = regexp.Compile(*bugsIncludeFlag)
if cfg.bugsInclude != "" {
c.cpuBugsIncludeRegexp, err = regexp.Compile(cfg.bugsInclude)
if err != nil {
return err
}
Expand All @@ -174,7 +196,7 @@ func (c *cpuCollector) compileIncludeFlags(flagsIncludeFlag, bugsIncludeFlag *st

// Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/.
func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error {
if *enableCPUInfo {
if c.infoEnabled {
if err := c.updateInfo(ch); err != nil {
return err
}
Expand Down Expand Up @@ -219,10 +241,7 @@ func (c *cpuCollector) updateInfo(ch chan<- prometheus.Metric) error {
cpu.CacheSize)
}

cpuFreqEnabled, ok := collectorState["cpufreq"]
if !ok || cpuFreqEnabled == nil {
c.logger.Debug("cpufreq key missing or nil value in collectorState map")
} else if *cpuFreqEnabled {
if c.cpuFreqEnabled {
for _, cpu := range info {
ch <- prometheus.MustNewConstMetric(c.cpuFrequencyHz,
prometheus.GaugeValue,
Expand Down Expand Up @@ -266,7 +285,7 @@ func updateFieldInfo(valueList []string, filter *regexp.Regexp, desc *prometheus

// updateThermalThrottle reads /sys/devices/system/cpu/cpu* and expose thermal throttle statistics.
func (c *cpuCollector) updateThermalThrottle(ch chan<- prometheus.Metric) error {
cpus, err := filepath.Glob(sysFilePath("devices/system/cpu/cpu[0-9]*"))
cpus, err := filepath.Glob(filepath.Join(c.sysMountPoint, "devices/system/cpu/cpu[0-9]*"))
if err != nil {
return err
}
Expand Down Expand Up @@ -395,7 +414,7 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error {
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuNum, "softirq")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuNum, "steal")

if *enableCPUGuest {
if c.guestEnabled {
// Guest CPU is also accounted for in cpuStat.User and cpuStat.Nice, expose these as separate metrics.
ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.Guest, cpuNum, "user")
ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.GuestNice, cpuNum, "nice")
Expand Down
9 changes: 9 additions & 0 deletions collector/paths.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ var (
udevDataPath = kingpin.Flag("path.udev.data", "udev data path.").Default("/run/udev/data").String()
)

func newCollectorPathConfig() collectorPathConfig {
return collectorPathConfig{
procMountPoint: *procPath,
sysMountPoint: *sysPath,
rootfsPath: *rootfsPath,
udevDataPath: *udevDataPath,
}
}

func procFilePath(name string) string {
return filepath.Join(*procPath, name)
}
Expand Down
Loading