Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 8 additions & 28 deletions treeherder/model/data_cycling/removal_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ def remove(self, using: CursorWrapper):
Although the WHERE clause in del_replicate looks redundant, it is intentionally kept to guide
the PostgreSQL planner toward a more efficient execution plan.
"""
chunk_size = self._find_ideal_chunk_size()
using.execute(
"""
WITH target_datum AS (
Expand Down Expand Up @@ -116,21 +115,13 @@ def remove(self, using: CursorWrapper):
USING target_datum td
WHERE pd.id = td.id
""",
[self._max_timestamp, chunk_size, self._max_timestamp],
[self._max_timestamp, self._chunk_size, self._max_timestamp],
)

@property
def name(self) -> str:
return "main removal strategy"

def _find_ideal_chunk_size(self) -> int:
max_id = self._manager.filter(push_timestamp__gt=self._max_timestamp).order_by("-id")[0].id
older_ids = self._manager.filter(
push_timestamp__lte=self._max_timestamp, id__lte=max_id
).order_by("id")[: self._chunk_size]

return len(older_ids) or self._chunk_size


class TryDataRemoval(RemovalStrategy):
"""
Expand Down Expand Up @@ -323,7 +314,6 @@ def remove(self, using: CursorWrapper):
Although the WHERE clause in del_replicate looks redundant, it is intentionally kept to guide
the PostgreSQL planner toward a more efficient execution plan.
"""
chunk_size = self._find_ideal_chunk_size()
repository_id = self.irrelevant_repo
using.execute(
"""
Expand Down Expand Up @@ -357,24 +347,14 @@ def remove(self, using: CursorWrapper):
USING target_datum td
WHERE pd.id = td.id
""",
[repository_id, self._max_timestamp, chunk_size, repository_id, self._max_timestamp],
)

def _find_ideal_chunk_size(self) -> int:
max_id_of_non_expired_row = (
self._manager.filter(push_timestamp__gt=self._max_timestamp)
.filter(repository_id__in=self.irrelevant_repositories)
.order_by("-id")[0]
.id
)
older_perf_data_rows = (
self._manager.filter(
push_timestamp__lte=self._max_timestamp, id__lte=max_id_of_non_expired_row
)
.filter(repository_id__in=self.irrelevant_repositories)
.order_by("id")[: self._chunk_size]
[
repository_id,
self._max_timestamp,
self._chunk_size,
repository_id,
self._max_timestamp,
],
)
return len(older_perf_data_rows) or self._chunk_size


class StalledDataRemoval(RemovalStrategy):
Expand Down