Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 1 addition & 12 deletions pyiceberg/table/update/snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,15 @@
# under the License.
from __future__ import annotations

import concurrent.futures
import itertools
import uuid
from abc import abstractmethod
from collections import defaultdict
from collections.abc import Callable
from concurrent.futures import Future
from datetime import datetime
from functools import cached_property
from typing import TYPE_CHECKING, Generic

from sortedcontainers import SortedList

from pyiceberg.avro.codecs import AvroCompressionCodec
from pyiceberg.expressions import (
AlwaysFalse,
Expand Down Expand Up @@ -792,14 +788,7 @@ def merge_bin(manifest_bin: list[ManifestFile]) -> list[ManifestFile]:

executor = ExecutorFactory.get_or_create()
futures = [executor.submit(merge_bin, b) for b in bins]

# for consistent ordering, we need to maintain future order
futures_index = {f: i for i, f in enumerate(futures)}
completed_futures: SortedList[Future[list[ManifestFile]]] = SortedList(iterable=[], key=lambda f: futures_index[f])
for future in concurrent.futures.as_completed(futures):
completed_futures.add(future)

bin_results: list[list[ManifestFile]] = [f.result() for f in completed_futures if f.result()]
bin_results: list[list[ManifestFile]] = [r for f in futures if (r := f.result())]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can use a walrus operator to avoid calling .result() twice per future

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice!


return [manifest for bin_result in bin_results for manifest in bin_result]

Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ dependencies = [
"rich>=10.11.0,<15.0.0",
"strictyaml>=1.7.0,<2.0.0", # CVE-2020-14343 was fixed in 5.4.
"pydantic>=2.0,<3.0,!=2.4.0,!=2.4.1,!=2.12.0,!=2.12.1", # 2.4.0, 2.4.1, 2.12.0, 2.12.1 has a critical bug
"sortedcontainers==2.4.0",
"fsspec>=2023.1.0",
"pyparsing>=3.1.0,<4.0.0",
"tenacity>=8.2.3,<10.0.0",
Expand Down
11 changes: 0 additions & 11 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.