Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 96 additions & 22 deletions qa/1397.out
Original file line number Diff line number Diff line change
Expand Up @@ -1958,6 +1958,13 @@ Help:
Decoded status code type for the error
No value(s) available!

smart.nvme_error_log.total PMID: 150.264.0 [Total number of NVMe Error Log Pages with errors]
Data Type: 64-bit unsigned int InDom: 150.0 0x25800000
Semantics: instant Units: none
Help:
Total number of NVMe Error Log Pages with errors
inst [0 or "sda"] value 0

smart.nvme_info.active_power_state PMID: 150.256.14 [The current active power state for the NVME drive]
Data Type: 32-bit unsigned int InDom: 150.0 0x25800000
Semantics: instant Units: byte
Expand Down Expand Up @@ -4522,6 +4529,13 @@ Help:
Decoded status code type for the error
No value(s) available!

smart.wwid.nvme_error_log.total PMID: 150.1264.0 [Total number of NVMe Error Log Pages with errors]
Data Type: 64-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: none
Help:
Total number of NVMe Error Log Pages with errors
inst [0 or "8ce38e0500882b63"] value 0

smart.wwid.nvme_info.active_power_state PMID: 150.1256.14 [The current active power state for the NVME drive]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: byte
Expand Down Expand Up @@ -7479,6 +7493,17 @@ Help:
Decoded status code type for the error
No value(s) available!

smart.nvme_error_log.total PMID: 150.264.0 [Total number of NVMe Error Log Pages with errors]
Data Type: 64-bit unsigned int InDom: 150.0 0x25800000
Semantics: instant Units: none
Help:
Total number of NVMe Error Log Pages with errors
inst [0 or "sda"] value 0
inst [1 or "sdb"] value 0
inst [2 or "sdc"] value 0
inst [3 or "sdd"] value 0
inst [4 or "sde"] value 0

smart.nvme_info.active_power_state PMID: 150.256.14 [The current active power state for the NVME drive]
Data Type: 32-bit unsigned int InDom: 150.0 0x25800000
Semantics: instant Units: byte
Expand Down Expand Up @@ -10043,6 +10068,13 @@ Help:
Decoded status code type for the error
No value(s) available!

smart.wwid.nvme_error_log.total PMID: 150.1264.0 [Total number of NVMe Error Log Pages with errors]
Data Type: 64-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: none
Help:
Total number of NVMe Error Log Pages with errors
inst [0 or "8ce38e0500882b63"] value 0

smart.wwid.nvme_info.active_power_state PMID: 150.1256.14 [The current active power state for the NVME drive]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: byte
Expand Down Expand Up @@ -12662,6 +12694,13 @@ Help:
Decoded status code type for the error
No value(s) available!

smart.nvme_error_log.total PMID: 150.264.0 [Total number of NVMe Error Log Pages with errors]
Data Type: 64-bit unsigned int InDom: 150.0 0x25800000
Semantics: instant Units: none
Help:
Total number of NVMe Error Log Pages with errors
inst [0 or "nvme0n1"] value 0

smart.nvme_info.active_power_state PMID: 150.256.14 [The current active power state for the NVME drive]
Data Type: 32-bit unsigned int InDom: 150.0 0x25800000
Semantics: instant Units: byte
Expand Down Expand Up @@ -13292,13 +13331,13 @@ Help:
Raw metric value for the given SMART metric
No value(s) available!

smart.wwid.attributes.airflow_temp_celsius.thresh PMID: 150.1190.3 [Normalised threshold value for the given SMART metric]
smart.wwid.attributcat: nvme0n1.error: No such file or directory
es.airflow_temp_celsius.thresh PMID: 150.1190.3 [Normalised threshold value for the given SMART metric]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: byte
Help:
Normalised threshold value for the given SMART metric
No value(s) availablcat: nvme0n1.error: No such file or directory
e!
No value(s) available!

smart.wwid.attributes.airflow_temp_celsius.value PMID: 150.1190.1 [Normalised current recorded value for the given SMART metric]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Expand Down Expand Up @@ -14280,14 +14319,14 @@ Help:
Normalised threshold value for the given SMART metric
No value(s) available!

smart.wwid.attributes.read_soft_error_rate.value PMID: 150.1013.1 [Normalised current recorded value for the given SMART metric]
smart.wwid.attributes.read_soft_cat: nvme0n1.error: No such file or directory
cat: nvme0n1.error: No such file or directory
error_rate.value PMID: 150.1013.1 [Normalised current recorded value for the given SMART metric]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: byte
Help:
Normalised current recorded value for the given SMART metric
No value(s) availablcat: nvme0n1.error: No such file or directory
cat: nvme0n1.error: No such file or directory
e!
No value(s) available!

smart.wwid.attributes.read_soft_error_rate.worst PMID: 150.1013.2 [Normalised worst recorded value for the given SMART metric]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Expand Down Expand Up @@ -15154,15 +15193,15 @@ Currrent temperature as reported by temp sensor 2

smart.wwid.nvme_attributes.unsafe_shutdowns PMID: 150.1255.12 [Contains the number of unsafe shutdowns]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: byte
Semantics: inscat: nvme0n1.error: No such file or directory
cat: nvme0n1.error: No such file or directory
tant Units: byte
Help:
Contains the number of unsafe shutdowns
inst [0 or "8ce38e0500882b63"] value 134

smart.wwid.nvme_attributes.warning_composite_temperature_time PMID: 150.1255.15 [Contains amount of time in minutes over composite temp]
Data Type: 32-bit unsigned incat: nvme0n1.error: No such file or directory
cat: nvme0n1.error: No such file or directory
t InDom: 150.1 0x25800001
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: byte
Help:
Contains amount of time in minutes over composite temp
Expand Down Expand Up @@ -15231,6 +15270,13 @@ Help:
Decoded status code type for the error
No value(s) available!

smart.wwid.nvme_error_log.total PMID: 150.1264.0 [Total number of NVMe Error Log Pages with errors]
Data Type: 64-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: none
Help:
Total number of NVMe Error Log Pages with errors
inst [0 or "8ce38e0500882b63"] value 0

smart.wwid.nvme_info.active_power_state PMID: 150.1256.14 [The current active power state for the NVME drive]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: byte
Expand Down Expand Up @@ -17842,6 +17888,13 @@ Help:
Decoded status code type for the error
No value(s) available!

smart.nvme_error_log.total PMID: 150.264.0 [Total number of NVMe Error Log Pages with errors]
Data Type: 64-bit unsigned int InDom: 150.0 0x25800000
Semantics: instant Units: none
Help:
Total number of NVMe Error Log Pages with errors
inst [0 or "nvme0n1"] value 0

smart.nvme_info.active_power_state PMID: 150.256.14 [The current active power state for the NVME drive]
Data Type: 32-bit unsigned int InDom: 150.0 0x25800000
Semantics: instant Units: byte
Expand Down Expand Up @@ -18556,15 +18609,15 @@ Help:
Normalised current recorded value for the given SMART metric
No value(s) available!

smart.wwid.attributes.command_timeout.worst PMID: 150.1188.2 [Normalised worst recorded value for the given SMART metric]
smart.wwid.attributes.command_timeout.worst PMID: 150.1188.2 [Normalised worst recorded value forcat: nvme0n1.error: No such file or directory
the given SMART metric]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: byte
Help:
Normalised worst recorded value for the given SMART metric
No value(s) available!

smart.wwid.attributes.current_helium_level.id PMID: 150.1022.0 [The gicat: nvme0n1.error: No such file or directory
ven ID for SMART metric]
smart.wwid.attributes.current_helium_level.id PMID: 150.1022.0 [The given ID for SMART metric]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: byte
Help:
Expand Down Expand Up @@ -19447,16 +19500,16 @@ The given ID for SMART metric
No value(s) available!

smart.wwid.attributes.read_soft_error_rate.raw PMID: 150.1013.4 [Raw metric value for the given SMART metric]
Data Type: 64-bit unsigned int InDom: 150.1 0x25800001
Data Type: 64-bit unsigncat: nvme0n1.error: No such file or directory
cat: nvme0n1.error: No such file or directory
ed int InDom: 150.1 0x25800001
Semantics: instant Units: count
Help:
Raw metric value for the given SMART metric
No value(s) available!

smart.wwid.attributes.read_soft_error_rate.thresh PMID: 150.1013.3 [Normalised threshold value for the given SMART metric]
Data Type:cat: nvme0n1.error: No such file or directory
cat: nvme0n1.error: No such file or directory
32-bit unsigned int InDom: 150.1 0x25800001
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: byte
Help:
Normalised threshold value for the given SMART metric
Expand Down Expand Up @@ -20325,14 +20378,14 @@ Help:
Current temperature as reported by temp sensor 3
inst [0 or "8ce38e0500882b63"] value 0

smart.wwid.nvme_attributes.temperature_sensor_two PMID: 150.1255.18 [Currrent temperature as reported by temp sensor 2]
smart.wwid.nvme_attributes.temcat: nvme0n1.error: No such file or directory
cat: nvme0n1.error: No such file or directory
perature_sensor_two PMID: 150.1255.18 [Currrent temperature as reported by temp sensor 2]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: byte
Help:
Currrent temperature as reported by temp sensor 2
inst [0 or "8ce38e0500882b63"] valcat: nvme0n1.error: No such file or directory
cat: nvme0n1.error: No such file or directory
ue 0
inst [0 or "8ce38e0500882b63"] value 0

smart.wwid.nvme_attributes.unsafe_shutdowns PMID: 150.1255.12 [Contains the number of unsafe shutdowns]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Expand Down Expand Up @@ -20411,6 +20464,13 @@ Help:
Decoded status code type for the error
No value(s) available!

smart.wwid.nvme_error_log.total PMID: 150.1264.0 [Total number of NVMe Error Log Pages with errors]
Data Type: 64-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: none
Help:
Total number of NVMe Error Log Pages with errors
inst [0 or "8ce38e0500882b63"] value 0

smart.wwid.nvme_info.active_power_state PMID: 150.1256.14 [The current active power state for the NVME drive]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: byte
Expand Down Expand Up @@ -23045,6 +23105,13 @@ Decoded status code type for the error
inst [2 or "nvme0n1::entry_2"] value "Command Specific Status"
inst [3 or "nvme0n1::entry_3"] value "Command Specific Status"

smart.nvme_error_log.total PMID: 150.264.0 [Total number of NVMe Error Log Pages with errors]
Data Type: 64-bit unsigned int InDom: 150.0 0x25800000
Semantics: instant Units: none
Help:
Total number of NVMe Error Log Pages with errors
inst [0 or "nvme0n1"] value 4

smart.nvme_info.active_power_state PMID: 150.256.14 [The current active power state for the NVME drive]
Data Type: 32-bit unsigned int InDom: 150.0 0x25800000
Semantics: instant Units: byte
Expand Down Expand Up @@ -25636,6 +25703,13 @@ Decoded status code type for the error
inst [2 or "002538d211514cfa::entry_3"] value "Command Specific Status"
inst [3 or "002538d211514cfa::entry_4"] value "Command Specific Status"

smart.wwid.nvme_error_log.total PMID: 150.1264.0 [Total number of NVMe Error Log Pages with errors]
Data Type: 64-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: none
Help:
Total number of NVMe Error Log Pages with errors
inst [0 or "002538d211514cfa"] value 0

smart.wwid.nvme_info.active_power_state PMID: 150.1256.14 [The current active power state for the NVME drive]
Data Type: 32-bit unsigned int InDom: 150.1 0x25800001
Semantics: instant Units: byte
Expand Down
2 changes: 2 additions & 0 deletions src/pmdas/smart/help
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,7 @@ Whether the given drive has PASSED or FAILED SMART the self-test
@smart.nvme_power_states.power_state_5.entry_latency Entry latency going into this power state
@smart.nvme_power_states.power_state_5.exit_latency Exit latency leaving from this power state

@smart.nvme_error_log.total Total number of NVMe Error Log Pages with errors
@smart.nvme_error_log.error_count Incrementing error count (unique id for error)
@smart.nvme_error_log.sqid Submission queue id of the command that the error is associated with
@smart.nvme_error_log.cmdid Command id of the command that the error is associated with
Expand Down Expand Up @@ -903,6 +904,7 @@ The given block device name for the disk drive.
@smart.wwid.nvme_power_states.power_state_5.entry_latency Entry latency going into this power state
@smart.wwid.nvme_power_states.power_state_5.exit_latency Exit latency leaving from this power state

@smart.wwid.nvme_error_log.total Total number of NVMe Error Log Pages with errors
@smart.wwid.nvme_error_log.error_count Incrementing error count (unique id for error)
@smart.wwid.nvme_error_log.sqid Submission queue id of the command that the error is associated with
@smart.wwid.nvme_error_log.cmdid Command id of the command that the error is associated with
Expand Down
22 changes: 22 additions & 0 deletions src/pmdas/smart/pmda.c
Original file line number Diff line number Diff line change
Expand Up @@ -1473,6 +1473,10 @@ pmdaMetric metrictable[] = {
PM_TYPE_U32, DISK_INDOM, PM_SEM_INSTANT,
PMDA_PMUNITS(1,0,0,0,0,0) }, },
/*NMVE - Error Log */
{ .m_desc = {
PMDA_PMID(CLUSTER_NVME_ERROR_LOG_TOTAL, 0),
PM_TYPE_U64, DISK_INDOM, PM_SEM_INSTANT,
PMDA_PMUNITS(0,0,0,0,0,0) }, },
{ .m_desc = {
PMDA_PMID(CLUSTER_NVME_ERROR_LOG, ERROR_COUNT),
PM_TYPE_U64, DISK_NVME_LOG_INDOM, PM_SEM_INSTANT,
Expand Down Expand Up @@ -2947,6 +2951,10 @@ pmdaMetric metrictable[] = {
PM_TYPE_U32, UUID_INDOM, PM_SEM_INSTANT,
PMDA_PMUNITS(1,0,0,0,0,0) }, },
/*UUID NMVE - Error Log */
{ .m_desc = {
PMDA_PMID(CLUSTER_UUID_NVME_ERROR_LOG_TOTAL, 0),
PM_TYPE_U64, UUID_INDOM, PM_SEM_INSTANT,
PMDA_PMUNITS(0,0,0,0,0,0) }, },
{ .m_desc = {
PMDA_PMID(CLUSTER_UUID_NVME_ERROR_LOG, ERROR_COUNT),
PM_TYPE_U64, UUID_NVME_LOG_INDOM, PM_SEM_INSTANT,
Expand Down Expand Up @@ -3437,6 +3445,13 @@ smart_fetchCallBack(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom)
return sts;
return nvme_error_log_fetch(item, cluster, nvme_error_log, atom);

case CLUSTER_NVME_ERROR_LOG_TOTAL:
sts = pmdaCacheLookup(INDOM(DISK_INDOM), inst, NULL, (void**)&dev);
if (sts <0)
return sts;
atom->ull = dev->nvme_error_log_total;
return PMDA_FETCH_STATIC;

case CLUSTER_UUID_INFO:
sts = pmdaCacheLookup(INDOM(UUID_INDOM), inst, NULL, (void **)&dev);
if (sts < 0)
Expand Down Expand Up @@ -3528,6 +3543,13 @@ smart_fetchCallBack(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom)
return sts;
return nvme_error_log_fetch(item, cluster, nvme_error_log, atom);

case CLUSTER_UUID_NVME_ERROR_LOG_TOTAL:
sts = pmdaCacheLookup(INDOM(UUID_INDOM), inst, NULL, (void**)&dev);
if (sts <0)
return sts;
atom->ull = dev->nvme_error_log_total;
return PMDA_FETCH_STATIC;

default:
return PM_ERR_PMID;
}
Expand Down
3 changes: 3 additions & 0 deletions src/pmdas/smart/pmdasmart.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ enum {
CLUSTER_POWER_STATE_4 = 261,
CLUSTER_POWER_STATE_5 = 262,
CLUSTER_NVME_ERROR_LOG = 263,
CLUSTER_NVME_ERROR_LOG_TOTAL = 264,
CLUSTER_UUID_INFO = 1000,
CLUSTER_UUID_RAW_READ_ERROR_RATE = 1001,
CLUSTER_UUID_THROUGHPUT_PERFORMANCE = 1002,
Expand Down Expand Up @@ -137,6 +138,7 @@ enum {
CLUSTER_UUID_POWER_STATE_4 = 1261,
CLUSTER_UUID_POWER_STATE_5 = 1262,
CLUSTER_UUID_NVME_ERROR_LOG = 1263,
CLUSTER_UUID_NVME_ERROR_LOG_TOTAL = 1264,
NUM_CLUSTERS
};

Expand All @@ -156,6 +158,7 @@ struct block_dev {
struct nvme_device_info nvme_device_info;
struct nvme_smart_data nvme_smart_data;
struct nvme_power_states nvme_power_states;
uint64_t nvme_error_log_total;
};

extern pmdaMetric metrictable[];
Expand Down
2 changes: 2 additions & 0 deletions src/pmdas/smart/pmns
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,7 @@ smart.nvme_power_states.power_state_5 {
}

smart.nvme_error_log {
total SMART:264:0
error_count SMART:263:0
sqid SMART:263:1
cmdid SMART:263:2
Expand Down Expand Up @@ -1216,6 +1217,7 @@ smart.wwid.nvme_power_states.power_state_5 {
}

smart.wwid.nvme_error_log {
total SMART:1264:0
error_count SMART:1263:0
sqid SMART:1263:1
cmdid SMART:1263:2
Expand Down
6 changes: 6 additions & 0 deletions src/pmdas/smart/smart_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -1433,6 +1433,7 @@ nvme_error_log_refresh(void)
int found_nvme_log = 0;
int count = 0;
int total;
dev->nvme_error_log_total = 0;

pmsprintf(buffer, sizeof(buffer), "%s -l error /dev/%s", smart_setup_stats, dev_name);

Expand Down Expand Up @@ -1514,6 +1515,11 @@ nvme_error_log_refresh(void)
}
}
pclose(pf);

// Take the value of (count) which is the running counter of the number of
// NMVe Error Log entries that we have seen for the current drive as the
// total number of errors.
dev->nvme_error_log_total = count;
}
return 0;
}
Expand Down
Loading