Skip to content

Commit af83384

Browse files
committed
tfscheduler: reset monitoring counters on the start of a new run
1 parent 02348cc commit af83384

File tree

4 files changed

+29
-21
lines changed

4 files changed

+29
-21
lines changed

src/TfScheduler/TfSchedulerStfInfo.cxx

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,14 @@ void TfSchedulerStfInfo::SchedulingThread()
4545
std::map<std::string, std::uint64_t> lStfSenderMissingCnt;
4646
std::optional<std::vector<StfInfo>> lStfInfosOpt;
4747

48-
// total number of scheduled Tfs
49-
std::size_t lScheduledTfs = 0;
50-
5148
// Build or discard
5249
bool lBuildIncomplete = mDiscoveryConfig->getBoolParam(BuildIncompleteTfsKey, BuildIncompleteTfsValue);
5350
IDDLOG("TfScheduler: Building of incomplete TimeFrames is {}.", lBuildIncomplete ? "enabled" : "disabled");
5451

5552
while ((lStfInfosOpt = mCompleteStfsInfoQueue.pop()) != std::nullopt) {
5653

5754
DDMON("tfscheduler", "tf.rejected.total", mNotScheduledTfsCount);
58-
DDMON("tfscheduler", "tf.scheduled.total", lScheduledTfs);
55+
DDMON("tfscheduler", "tf.scheduled.total", mScheduledTfs);
5956

6057
const std::vector<StfInfo> &lStfInfos = lStfInfosOpt.value();
6158
TfBuildingInformation lRequest;
@@ -105,7 +102,7 @@ void TfSchedulerStfInfo::SchedulingThread()
105102
switch (lResponse.status()) {
106103
case BuildTfResponse::OK:
107104
// marked TfBuilder as scheduled
108-
lScheduledTfs++;
105+
mScheduledTfs++;
109106
mTfBuilderInfo.markTfBuilderWithTfId(lTfBuilderId, lRequest.tf_id());
110107
break;
111108
case BuildTfResponse::ERROR_NOMEM:
@@ -434,6 +431,7 @@ void TfSchedulerStfInfo::addStfInfo(const StfSenderStfInfo &pStfInfo, SchedulerS
434431
DDDLOG("New RunNumber received. run_number={}", lRunNumber);
435432
// reset internal counters
436433
reset();
434+
mTfBuilderInfo.resetCounters();
437435
mRunNumber = lRunNumber;
438436
} else if (mRunNumber > lRunNumber) {
439437
EDDLOG_GRL(500, "New RunNumber is smaller than the previous. run_number={} prev_run_number={}", lRunNumber, mRunNumber);

src/TfScheduler/TfSchedulerStfInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ class TfSchedulerStfInfo
180180
std::uint64_t mMaxCompletedTfId = 0;
181181
std::uint64_t mNotScheduledTfsCount = 0;
182182
std::uint64_t mStaleTfCount = 0;
183+
std::uint64_t mScheduledTfs = 0;
183184
EventRecorder mDroppedStfs;
184185
EventRecorder mBuiltTfs;
185186

@@ -189,6 +190,7 @@ class TfSchedulerStfInfo
189190
mMaxCompletedTfId = 0;
190191
mNotScheduledTfsCount = 0;
191192
mStaleTfCount = 0;
193+
mScheduledTfs = 0;
192194
mDroppedStfs.reset();
193195
mBuiltTfs.reset();
194196

src/TfScheduler/TfSchedulerTfBuilderInfo.cxx

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,6 @@ void TfSchedulerTfBuilderInfo::updateTfBuilderInfo(const TfBuilderUpdateMessage
129129

130130
bool TfSchedulerTfBuilderInfo::findTfBuilderForTf(const std::uint64_t pSize, std::string& pTfBuilderId /*out*/)
131131
{
132-
static std::atomic_uint64_t sNoTfBuilderAvailable = 0;
133-
static std::atomic_uint64_t sNoMemoryAvailable = 0;
134-
static std::atomic_uint64_t sTfNumExceeeded = 0;
135-
136132
// NOTE: we will overestimate memory requirement by a factor, until TfBuilder updates
137133
// us with the actual size.
138134
const std::uint64_t lTfEstSize = pSize * (sTfSizeOverestimatePercent + 100) / 100;
@@ -159,22 +155,22 @@ bool TfSchedulerTfBuilderInfo::findTfBuilderForTf(const std::uint64_t pSize, std
159155
// TfBuilder not found?
160156
if ( lIt == mReadyTfBuilders.end() ) {
161157
if (mReadyTfBuilders.empty()) {
162-
++sNoTfBuilderAvailable;
163-
DDMON("tfscheduler", "tf.rejected.no_tfb_inst", sNoTfBuilderAvailable);
158+
++mNoTfBuilderAvailable;
159+
DDMON("tfscheduler", "tf.rejected.no_tfb_inst", mNoTfBuilderAvailable);
164160

165-
WDDLOG_RL(1000, "FindTfBuilder: TF cannot be scheduled. reason=NO_TFBUILDERS total={}",
166-
sNoTfBuilderAvailable);
161+
WDDLOG_RL(10000, "FindTfBuilder: TF cannot be scheduled. reason=NO_TFBUILDERS total={}",
162+
mNoTfBuilderAvailable);
167163

168164
} else if (lMaxTfExceeded) {
169-
++sTfNumExceeeded;
170-
WDDLOG_RL(1000, "FindTfBuilder: TF cannot be scheduled. reason=NUM_TF_EXCEEEDED total={} tf_size={} ready_tfb={}",
171-
sTfNumExceeeded, lTfEstSize, mReadyTfBuilders.size());
172-
DDMON("tfscheduler", "tf.rejected.max_tf_exceeded", sTfNumExceeeded);
165+
++mTfNumExceeeded;
166+
WDDLOG_RL(10000, "FindTfBuilder: TF cannot be scheduled. reason=NUM_TF_EXCEEEDED total={} tf_size={} ready_tfb={}",
167+
mTfNumExceeeded, lTfEstSize, mReadyTfBuilders.size());
168+
DDMON("tfscheduler", "tf.rejected.max_tf_exceeded", mTfNumExceeeded);
173169
} else {
174-
++sNoMemoryAvailable;
175-
DDMON("tfscheduler", "tf.rejected.no_tfb_buf", sNoMemoryAvailable);
176-
WDDLOG_RL(1000, "FindTfBuilder: TF cannot be scheduled. reason=NO_MEMORY total={} tf_size={} ready_tfb={}",
177-
sNoMemoryAvailable, lTfEstSize, mReadyTfBuilders.size());
170+
++mNoMemoryAvailable;
171+
DDMON("tfscheduler", "tf.rejected.no_tfb_buf", mNoMemoryAvailable);
172+
WDDLOG_RL(10000, "FindTfBuilder: TF cannot be scheduled. reason=NO_MEMORY total={} tf_size={} ready_tfb={}",
173+
mNoMemoryAvailable, lTfEstSize, mReadyTfBuilders.size());
178174
}
179175
return false;
180176
}

src/TfScheduler/TfSchedulerTfBuilderInfo.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,12 @@ class TfSchedulerTfBuilderInfo
176176
}
177177
}
178178

179+
void resetCounters() {
180+
mNoTfBuilderAvailable = 0;
181+
mNoMemoryAvailable = 0;
182+
mTfNumExceeeded = 0;
183+
}
184+
179185
private:
180186
/// Overestimation of actual size for TF building
181187
static constexpr std::uint64_t sTfSizeOverestimatePercent = std::uint64_t(10);
@@ -204,6 +210,12 @@ class TfSchedulerTfBuilderInfo
204210
/// List of TfBuilders for Topological distribution
205211
mutable std::recursive_mutex mTopoInfoLock;
206212
std::unordered_map<TfBuilderTopoInfo, std::shared_ptr<TfBuilderInfo>, TfBuilderTopoInfo> mTopoTfBuilders;
213+
214+
215+
/// Counters
216+
std::atomic_uint64_t mNoTfBuilderAvailable = 0;
217+
std::atomic_uint64_t mNoMemoryAvailable = 0;
218+
std::atomic_uint64_t mTfNumExceeeded = 0;
207219
};
208220

209221

0 commit comments

Comments
 (0)