Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions docs/src/format/table/branch_tag.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,10 @@ Tags are always stored at the root dataset level, regardless of which branch the

Each tag file is a JSON file with the following fields:

| JSON Key | Type | Optional | Description |
|-----------------|--------|----------|--------------------------------------------------------------------------|
| `branch` | string | Yes | Branch name being tagged. `null` or absent indicates main branch. |
| `version` | number | | Version number being tagged within that branch. |
| `manifest_size` | number | | Size of the manifest file in bytes. Used for efficient manifest loading. |
| JSON Key | Type | Optional | Description |
|----------------|--------|----------|-----------------------------------------------------------------------------------|
| `branch` | string | Yes | Branch name being tagged. `null` or absent indicates main branch. |
| `version` | number | | Version number being tagged within that branch. |
| `createdAt` | string | Yes | RFC 3339 UTC timestamp when the tag was first created. Historical tags may omit it. |
| `updatedAt` | string | Yes | RFC 3339 UTC timestamp when the tag was last modified. On creation it matches `createdAt`. |
| `manifestSize` | number | | Size of the manifest file in bytes. Used for efficient manifest loading. |
10 changes: 5 additions & 5 deletions docs/src/guide/tags_and_branches.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,21 @@ print(ds.tags.list())
# {}
ds.tags.create("v1-prod", (None, 1))
print(ds.tags.list())
# {'v1-prod': {'version': 1, 'manifest_size': ...}}
# {'v1-prod': {'version': 1, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...}}
ds.tags.update("v1-prod", (None, 2))
print(ds.tags.list())
# {'v1-prod': {'version': 2, 'manifest_size': ...}}
# {'v1-prod': {'version': 2, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...}}
ds.tags.delete("v1-prod")
print(ds.tags.list())
# {}
print(ds.tags.list_ordered())
# []
ds.tags.create("v1-prod", (None, 1))
print(ds.tags.list_ordered())
# [('v1-prod', {'version': 1, 'manifest_size': ...})]
# [('v1-prod', {'version': 1, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...})]
ds.tags.update("v1-prod", (None, 2))
print(ds.tags.list_ordered())
# [('v1-prod', {'version': 2, 'manifest_size': ...})]
# [('v1-prod', {'version': 2, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...})]
ds.tags.delete("v1-prod")
print(ds.tags.list_ordered())
# []
Expand Down Expand Up @@ -122,4 +122,4 @@ print(ds.branches.list_ordered(order="desc"))

Branches hold references to data files. Lance ensures that cleanup does not delete files still referenced by any branch.

Delete unused branches to allow their referenced files to be cleaned up by `cleanup_old_versions()`.
Delete unused branches to allow their referenced files to be cleaned up by `cleanup_old_versions()`.
26 changes: 25 additions & 1 deletion java/lance-jni/src/blocking_dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2364,6 +2364,26 @@ fn inner_list_tags<'local>(
env: &mut JNIEnv<'local>,
java_dataset: JObject,
) -> Result<JObject<'local>> {
fn optional_datetime_to_java_instant<'local>(
env: &mut JNIEnv<'local>,
timestamp: Option<&DateTime<Utc>>,
) -> Result<JObject<'local>> {
if let Some(timestamp) = timestamp {
let seconds = timestamp.timestamp();
let nanos = timestamp.timestamp_subsec_nanos() as i64;
Ok(env
.call_static_method(
"java/time/Instant",
"ofEpochSecond",
"(JJ)Ljava/time/Instant;",
&[JValue::Long(seconds), JValue::Long(nanos)],
)?
.l()?)
} else {
Ok(JObject::null())
}
}

let tag_map = {
let dataset_guard =
unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?;
Expand All @@ -2377,14 +2397,18 @@ fn inner_list_tags<'local>(
} else {
JObject::null()
};
let created_at = optional_datetime_to_java_instant(env, tag_contents.created_at.as_ref())?;
let updated_at = optional_datetime_to_java_instant(env, tag_contents.updated_at.as_ref())?;
let java_tag = env.new_object(
"org/lance/Tag",
"(Ljava/lang/String;Ljava/lang/String;JI)V",
"(Ljava/lang/String;Ljava/lang/String;JILjava/time/Instant;Ljava/time/Instant;)V",
&[
JValue::Object(&env.new_string(tag_name)?.into()),
JValue::Object(&branch_name),
JValue::Long(tag_contents.version as i64),
JValue::Int(tag_contents.manifest_size as i32),
JValue::Object(&created_at),
JValue::Object(&updated_at),
],
)?;
env.call_method(
Expand Down
30 changes: 30 additions & 0 deletions java/src/main/java/org/lance/Tag.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import com.google.common.base.MoreObjects;

import java.time.Instant;
import java.util.Objects;
import java.util.Optional;

Expand All @@ -23,12 +24,31 @@ public class Tag {
private final Optional<String> branch;
private final long version;
private final int manifestSize;
private final Optional<Instant> createdAt;
private final Optional<Instant> updatedAt;

public Tag(String name, String branch, long version, int manifestSize) {
this(name, branch, version, manifestSize, null, null);
}

/**
* Constructor used by JNI when reading tag metadata from native code.
*
* <p>Timestamps are system-generated metadata and are not part of the public Java input surface.
*/
private Tag(
String name,
String branch,
long version,
int manifestSize,
Instant createdAt,
Instant updatedAt) {
this.name = name;
this.branch = Optional.ofNullable(branch);
this.version = version;
this.manifestSize = manifestSize;
this.createdAt = Optional.ofNullable(createdAt);
this.updatedAt = Optional.ofNullable(updatedAt);
}

public String getName() {
Expand All @@ -47,13 +67,23 @@ public int getManifestSize() {
return manifestSize;
}

public Optional<Instant> getCreatedAt() {
return createdAt;
}

public Optional<Instant> getUpdatedAt() {
return updatedAt;
}

@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("name", name)
.add("branch", branch)
.add("version", version)
.add("manifestSize", manifestSize)
.add("createdAt", createdAt)
.add("updatedAt", updatedAt)
.toString();
}

Expand Down
43 changes: 41 additions & 2 deletions java/src/test/java/org/lance/DatasetTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Clock;
import java.time.Instant;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -317,8 +318,15 @@ void testTags(@TempDir Path tempDir) {
try (Dataset dataset = testDataset.createEmptyDataset()) {
assertEquals(1, dataset.version());
dataset.tags().create("tag1", Ref.ofMain());
assertEquals(1, dataset.tags().list().size());
assertEquals(1, dataset.tags().list().get(0).getVersion());
List<Tag> tags = dataset.tags().list();
Tag tag1 = tags.get(0);
assertEquals(1, tags.size());
assertEquals(1, tag1.getVersion());
assertEquals(new Tag("tag1", null, 1, tag1.getManifestSize()), tag1);
assertTrue(new HashSet<>(tags).contains(new Tag("tag1", null, 1, tag1.getManifestSize())));
assertTrue(tag1.getCreatedAt().isPresent());
assertTrue(tag1.getUpdatedAt().isPresent());
assertEquals(tag1.getCreatedAt(), tag1.getUpdatedAt());
assertEquals(1, dataset.tags().getVersion("tag1"));
}

Expand All @@ -332,10 +340,41 @@ void testTags(@TempDir Path tempDir) {
assertEquals(2, dataset2.tags().list().size());
assertEquals(1, dataset2.tags().getVersion("tag1"));
assertEquals(2, dataset2.tags().getVersion("tag2"));
Instant tag2CreatedAt =
dataset2.tags().list().stream()
.filter(t -> t.getName().equals("tag2"))
.findFirst()
.orElseThrow()
.getCreatedAt()
.orElseThrow();
Instant tag2UpdatedAt =
dataset2.tags().list().stream()
.filter(t -> t.getName().equals("tag2"))
.findFirst()
.orElseThrow()
.getUpdatedAt()
.orElseThrow();
assertEquals(tag2CreatedAt, tag2UpdatedAt);
dataset2.tags().update("tag2", Ref.ofMain(1));
assertEquals(2, dataset2.tags().list().size());
assertEquals(1, dataset2.tags().list().get(0).getVersion());
assertEquals(1, dataset2.tags().list().get(1).getVersion());
Instant updatedTag2CreatedAt =
dataset2.tags().list().stream()
.filter(t -> t.getName().equals("tag2"))
.findFirst()
.orElseThrow()
.getCreatedAt()
.orElseThrow();
Instant updatedTag2 =
dataset2.tags().list().stream()
.filter(t -> t.getName().equals("tag2"))
.findFirst()
.orElseThrow()
.getUpdatedAt()
.orElseThrow();
assertEquals(updatedTag2CreatedAt, tag2CreatedAt);
assertFalse(updatedTag2.isBefore(tag2UpdatedAt));
assertEquals(1, dataset2.tags().getVersion("tag1"));
assertEquals(1, dataset2.tags().getVersion("tag2"));
dataset2.tags().delete("tag2");
Expand Down
4 changes: 3 additions & 1 deletion python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4370,6 +4370,8 @@ class Transaction:
class Tag(TypedDict):
branch: Optional[str]
version: int
created_at: Optional[datetime]
updated_at: Optional[datetime]
manifest_size: int


Expand Down Expand Up @@ -5727,7 +5729,7 @@ def list(self) -> dict[str, Tag]:
Returns
-------
dict[str, Tag]
A dictionary mapping tag names to version numbers.
A dictionary mapping tag names to tag metadata.
"""
return self._ds.tags()

Expand Down
21 changes: 20 additions & 1 deletion python/python/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,12 @@ def test_tag(tmp_path: Path):

ds.tags.create("tag1", 1)
assert len(ds.tags.list()) == 1
tag1_meta = ds.tags.list()["tag1"]
assert tag1_meta["created_at"] is not None
assert isinstance(tag1_meta["created_at"], datetime)
assert tag1_meta["updated_at"] is not None
assert isinstance(tag1_meta["updated_at"], datetime)
assert tag1_meta["created_at"] == tag1_meta["updated_at"]

with pytest.raises(ValueError):
ds.tags.create("tag1", 1)
Expand Down Expand Up @@ -505,7 +511,16 @@ def test_tag(tmp_path: Path):
):
ds.tags.update("tag3", 1)

tag1_meta = ds.tags.list()["tag1"]
tag1_created_at = tag1_meta["created_at"]
tag1_updated_at = tag1_meta["updated_at"]
assert tag1_created_at is not None
assert tag1_updated_at is not None
ds.tags.update("tag1", 2)
updated_tag1_meta = ds.tags.list()["tag1"]
assert updated_tag1_meta["created_at"] == tag1_created_at
assert updated_tag1_meta["updated_at"] is not None
assert updated_tag1_meta["updated_at"] >= tag1_updated_at
ds = lance.dataset(base_dir, "tag1")
assert ds.version == 2

Expand All @@ -524,7 +539,6 @@ def test_tag(tmp_path: Path):
assert target_tag is not None
assert target_tag["version"] == 1
assert target_tag["branch"] == "branch"

ds.tags.update("tag3", (None, 2))
target_tag = ds.tags.list()["tag3"]
assert ds.tags.get_version("tag3") == 2
Expand Down Expand Up @@ -558,6 +572,11 @@ def test_tag_order(tmp_path: Path):

tags_asc = ds.tags.list_ordered(order="asc")
assert len(tags_asc) == 3
first_tag = tags_asc[0][1]
assert first_tag["created_at"] is not None
assert isinstance(first_tag["created_at"], datetime)
assert first_tag["updated_at"] is not None
assert isinstance(first_tag["updated_at"], datetime)
tag_names_asc = [t[0] for t in tags_asc]
assert tag_names_asc == sorted(expected_tags.keys()), (
f"Unexpected ascending order: {tag_names_asc}"
Expand Down
4 changes: 4 additions & 0 deletions python/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1681,6 +1681,8 @@ impl Dataset {
for (tag_name, tag_content) in tags {
let dict = PyDict::new(py);
dict.set_item("version", tag_content.version)?;
dict.set_item("created_at", tag_content.created_at)?;
dict.set_item("updated_at", tag_content.updated_at)?;
dict.set_item("manifest_size", tag_content.manifest_size)?;

pylist.append((tag_name.as_str(), dict))?;
Expand All @@ -1698,6 +1700,8 @@ impl Dataset {
let dict = PyDict::new(py);
dict.set_item("branch", v.branch.clone())?;
dict.set_item("version", v.version)?;
dict.set_item("created_at", v.created_at)?;
dict.set_item("updated_at", v.updated_at)?;
dict.set_item("manifest_size", v.manifest_size)?;
pytags.set_item(k, dict.into_py_any(py)?)?;
}
Expand Down
Loading
Loading