-
Notifications
You must be signed in to change notification settings - Fork 53
OU-1040: feat(incidents): add support absolute start dates in Incidents component #892
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,6 +39,7 @@ import { | |
| onIncidentFiltersSelect, | ||
| parseUrlParams, | ||
| updateBrowserUrl, | ||
| DAY_MS, | ||
| } from './utils'; | ||
| import { groupAlertsForTable, convertToAlerts } from './processAlerts'; | ||
| import { CompressArrowsAltIcon, CompressIcon, FilterIcon } from '@patternfly/react-icons'; | ||
|
|
@@ -231,52 +232,54 @@ const IncidentsPage = () => { | |
| }, [incidentsActiveFilters.days]); | ||
|
|
||
| useEffect(() => { | ||
| (async () => { | ||
| const currentTime = incidentsLastRefreshTime; | ||
| Promise.all( | ||
| timeRanges.map(async (range) => { | ||
| const response = await fetchDataForIncidentsAndAlerts( | ||
| safeFetch, | ||
| range, | ||
| createAlertsQuery(incidentForAlertProcessing), | ||
| ); | ||
| return response.data.result; | ||
| }), | ||
| ) | ||
| .then((results) => { | ||
| const prometheusResults = results.flat(); | ||
| const alerts = convertToAlerts( | ||
| prometheusResults, | ||
| incidentForAlertProcessing, | ||
| currentTime, | ||
| ); | ||
| if (incidentForAlertProcessing.length === 0) { | ||
| return; | ||
| } | ||
|
|
||
| const currentTime = incidentsLastRefreshTime; | ||
|
|
||
| // Always fetch 15 days of alert data so firstTimestamp is computed from full history | ||
| const fetchTimeRanges = getIncidentsTimeRanges(15 * DAY_MS, currentTime); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this a performance issue, this will create a significant load on prometheus as all users will fetch 15 days historic data regardless of their current time range. Is there a better way to get this initial value?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately at the moment this is the only suitable way to move to absolute dates. We've tried in the past (#749) different approaches (loading only timestamps) but it was not suitable with the current data model (which involves gaps and severity changes and can't allow a 1:1 mapping between incident and metric timestamp). The final tradeoff to manage this is to load 15-day windows via multiple smaller requests rather than a single bulk call. This increases the total request count but ensures each response involves less datapoints (as it actually happen). /cc @falox
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We've tried several approaches, but we haven't found a better solution yet. It's the price we have to pay for a good user experience. @iNecas We can mitigate this by adding query memoization. The start/end parameters are rounded on the client side and remain unchanged for 5 minutes. This would reduce the number of calls when filters are changed. |
||
|
|
||
| Promise.all( | ||
| fetchTimeRanges.map(async (range) => { | ||
| const response = await fetchDataForIncidentsAndAlerts( | ||
| safeFetch, | ||
| range, | ||
| createAlertsQuery(incidentForAlertProcessing), | ||
| ); | ||
| return response.data.result; | ||
| }), | ||
| ) | ||
| .then((alertsResults) => { | ||
| const prometheusResults = alertsResults.flat(); | ||
| const alerts = convertToAlerts( | ||
| prometheusResults, | ||
| incidentForAlertProcessing, | ||
| currentTime, | ||
| daysSpan, | ||
| ); | ||
| dispatch( | ||
| setAlertsData({ | ||
| alertsData: alerts, | ||
| }), | ||
| ); | ||
| if (rules && alerts) { | ||
| dispatch( | ||
| setAlertsData({ | ||
| alertsData: alerts, | ||
| setAlertsTableData({ | ||
| alertsTableData: groupAlertsForTable(alerts, rules), | ||
| }), | ||
| ); | ||
| if (rules && alerts) { | ||
| dispatch( | ||
| setAlertsTableData({ | ||
| alertsTableData: groupAlertsForTable(alerts, rules), | ||
| }), | ||
| ); | ||
| } | ||
| if (!isEmpty(filteredData)) { | ||
| dispatch(setAlertsAreLoading({ alertsAreLoading: false })); | ||
| } else { | ||
| dispatch(setAlertsAreLoading({ alertsAreLoading: true })); | ||
| } | ||
| }) | ||
| .catch((err) => { | ||
| // eslint-disable-next-line no-console | ||
| console.log(err); | ||
|
|
||
| dispatch(setAlertsAreLoading({ alertsAreLoading: false })); | ||
| setLoadError(err); | ||
| }); | ||
| })(); | ||
| }, [incidentForAlertProcessing]); | ||
| } | ||
| dispatch(setAlertsAreLoading({ alertsAreLoading: false })); | ||
| }) | ||
| .catch((err) => { | ||
| // eslint-disable-next-line no-console | ||
| console.error(err); | ||
| dispatch(setAlertsAreLoading({ alertsAreLoading: false })); | ||
| setLoadError(err); | ||
| }); | ||
| }, [incidentForAlertProcessing, rules, daysSpan]); | ||
|
|
||
| useEffect(() => { | ||
| if (!isInitialized) return; | ||
|
|
@@ -293,30 +296,34 @@ const IncidentsPage = () => { | |
| ? incidentsActiveFilters.days[0].split(' ')[0] + 'd' | ||
| : '', | ||
| ); | ||
| const calculatedTimeRanges = getIncidentsTimeRanges(daysDuration, currentTime); | ||
|
|
||
| const isGroupSelected = !!selectedGroupId; | ||
| const incidentsQuery = isGroupSelected | ||
| ? `cluster_health_components_map{group_id='${selectedGroupId}'}` | ||
| : 'cluster_health_components_map'; | ||
|
|
||
| // Always fetch 15 days of data so firstTimestamp is computed from full history | ||
| const fetchTimeRanges = getIncidentsTimeRanges(15 * DAY_MS, currentTime); | ||
|
|
||
| Promise.all( | ||
| calculatedTimeRanges.map(async (range) => { | ||
| fetchTimeRanges.map(async (range) => { | ||
| const response = await fetchDataForIncidentsAndAlerts(safeFetch, range, incidentsQuery); | ||
| return response.data.result; | ||
| }), | ||
| ) | ||
| .then((results) => { | ||
| const prometheusResults = results.flat(); | ||
| const incidents = convertToIncidents(prometheusResults, currentTime); | ||
| .then((incidentsResults) => { | ||
| const prometheusResults = incidentsResults.flat(); | ||
| const incidents = convertToIncidents(prometheusResults, currentTime, daysDuration); | ||
|
|
||
| // Update the raw, unfiltered incidents state | ||
| dispatch(setIncidents({ incidents })); | ||
|
|
||
| const filteredData = filterIncident(incidentsActiveFilters, incidents); | ||
|
|
||
| // Filter the incidents and dispatch | ||
| dispatch( | ||
| setFilteredIncidentsData({ | ||
| filteredIncidentsData: filterIncident(incidentsActiveFilters, incidents), | ||
| filteredIncidentsData: filteredData, | ||
| }), | ||
| ); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this change anyhow related to the original issue. Identity of alerts might not be as simple as using alertname, namespace and severity (e.g. cluster_operator_down has
name, which is unique as well). IMO This deserves more attention than just "a btw. change" while addressing something else.