From 5e678535344448aa3faecce2918b7c6b7e9d445f Mon Sep 17 00:00:00 2001 From: majin1102 Date: Mon, 30 Mar 2026 15:30:31 +0800 Subject: [PATCH 1/8] feat: add updatedAt to tag metadata --- docs/src/format/table/branch_tag.md | 11 +++--- docs/src/guide/tags_and_branches.md | 10 +++--- docs/src/rest.yaml | 12 +++++++ java/lance-jni/src/blocking_dataset.rs | 16 ++++++++- java/src/main/java/org/lance/Tag.java | 15 +++++++- java/src/test/java/org/lance/DatasetTest.java | 21 ++++++++++++ python/python/lance/dataset.py | 3 +- python/python/tests/test_dataset.py | 25 ++++++++++++++ python/src/dataset.rs | 2 ++ rust/lance/src/dataset/refs.rs | 25 ++++++++++++-- .../src/dataset/tests/dataset_versioning.rs | 34 ++++++++++++++++++- 11 files changed, 157 insertions(+), 17 deletions(-) diff --git a/docs/src/format/table/branch_tag.md b/docs/src/format/table/branch_tag.md index e3bd328d5d2..8fc23b210b3 100644 --- a/docs/src/format/table/branch_tag.md +++ b/docs/src/format/table/branch_tag.md @@ -114,8 +114,9 @@ Tags are always stored at the root dataset level, regardless of which branch the Each tag file is a JSON file with the following fields: -| JSON Key | Type | Optional | Description | -|-----------------|--------|----------|--------------------------------------------------------------------------| -| `branch` | string | Yes | Branch name being tagged. `null` or absent indicates main branch. | -| `version` | number | | Version number being tagged within that branch. | -| `manifest_size` | number | | Size of the manifest file in bytes. Used for efficient manifest loading. | +| JSON Key | Type | Optional | Description | +|----------------|--------|----------|--------------------------------------------------------------------------| +| `branch` | string | Yes | Branch name being tagged. `null` or absent indicates main branch. | +| `version` | number | | Version number being tagged within that branch. | +| `updatedAt` | string | Yes | RFC 3339 UTC timestamp when the tag was last created or updated. | +| `manifestSize` | number | | Size of the manifest file in bytes. Used for efficient manifest loading. | diff --git a/docs/src/guide/tags_and_branches.md b/docs/src/guide/tags_and_branches.md index 02701f29e84..a56589592e8 100644 --- a/docs/src/guide/tags_and_branches.md +++ b/docs/src/guide/tags_and_branches.md @@ -36,10 +36,10 @@ print(ds.tags.list()) # {} ds.tags.create("v1-prod", (None, 1)) print(ds.tags.list()) -# {'v1-prod': {'version': 1, 'manifest_size': ...}} +# {'v1-prod': {'version': 1, 'updated_at': ..., 'manifest_size': ...}} ds.tags.update("v1-prod", (None, 2)) print(ds.tags.list()) -# {'v1-prod': {'version': 2, 'manifest_size': ...}} +# {'v1-prod': {'version': 2, 'updated_at': ..., 'manifest_size': ...}} ds.tags.delete("v1-prod") print(ds.tags.list()) # {} @@ -47,10 +47,10 @@ print(ds.tags.list_ordered()) # [] ds.tags.create("v1-prod", (None, 1)) print(ds.tags.list_ordered()) -# [('v1-prod', {'version': 1, 'manifest_size': ...})] +# [('v1-prod', {'version': 1, 'updated_at': ..., 'manifest_size': ...})] ds.tags.update("v1-prod", (None, 2)) print(ds.tags.list_ordered()) -# [('v1-prod', {'version': 2, 'manifest_size': ...})] +# [('v1-prod', {'version': 2, 'updated_at': ..., 'manifest_size': ...})] ds.tags.delete("v1-prod") print(ds.tags.list_ordered()) # [] @@ -122,4 +122,4 @@ print(ds.branches.list_ordered(order="desc")) Branches hold references to data files. Lance ensures that cleanup does not delete files still referenced by any branch. - Delete unused branches to allow their referenced files to be cleaned up by `cleanup_old_versions()`. \ No newline at end of file + Delete unused branches to allow their referenced files to be cleaned up by `cleanup_old_versions()`. diff --git a/docs/src/rest.yaml b/docs/src/rest.yaml index b3af38ba7ef..81b1f254f4c 100644 --- a/docs/src/rest.yaml +++ b/docs/src/rest.yaml @@ -2880,11 +2880,23 @@ components: required: - version properties: + branch: + type: string + description: Branch name that the tag points to. If absent, the tag points to main. version: type: integer format: int64 minimum: 0 description: Version number that the tag points to + updatedAt: + type: string + format: date-time + description: Timestamp when the tag was last created or updated. Historical tags may omit this field. + manifestSize: + type: integer + format: int64 + minimum: 0 + description: Size of the referenced manifest file in bytes. RestoreTableRequest: type: object diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index cc82c8acee5..15c8fcd1530 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -2377,14 +2377,28 @@ fn inner_list_tags<'local>( } else { JObject::null() }; + let updated_at = if let Some(updated_at) = tag_contents.updated_at.as_ref() { + let seconds = updated_at.timestamp(); + let nanos = updated_at.timestamp_subsec_nanos() as i64; + env.call_static_method( + "java/time/Instant", + "ofEpochSecond", + "(JJ)Ljava/time/Instant;", + &[JValue::Long(seconds), JValue::Long(nanos)], + )? + .l()? + } else { + JObject::null() + }; let java_tag = env.new_object( "org/lance/Tag", - "(Ljava/lang/String;Ljava/lang/String;JI)V", + "(Ljava/lang/String;Ljava/lang/String;JILjava/time/Instant;)V", &[ JValue::Object(&env.new_string(tag_name)?.into()), JValue::Object(&branch_name), JValue::Long(tag_contents.version as i64), JValue::Int(tag_contents.manifest_size as i32), + JValue::Object(&updated_at), ], )?; env.call_method( diff --git a/java/src/main/java/org/lance/Tag.java b/java/src/main/java/org/lance/Tag.java index f7ce7be83cc..551f96d2059 100644 --- a/java/src/main/java/org/lance/Tag.java +++ b/java/src/main/java/org/lance/Tag.java @@ -15,6 +15,7 @@ import com.google.common.base.MoreObjects; +import java.time.Instant; import java.util.Objects; import java.util.Optional; @@ -23,12 +24,18 @@ public class Tag { private final Optional branch; private final long version; private final int manifestSize; + private final Optional updatedAt; public Tag(String name, String branch, long version, int manifestSize) { + this(name, branch, version, manifestSize, null); + } + + public Tag(String name, String branch, long version, int manifestSize, Instant updatedAt) { this.name = name; this.branch = Optional.ofNullable(branch); this.version = version; this.manifestSize = manifestSize; + this.updatedAt = Optional.ofNullable(updatedAt); } public String getName() { @@ -47,6 +54,10 @@ public int getManifestSize() { return manifestSize; } + public Optional getUpdatedAt() { + return updatedAt; + } + @Override public String toString() { return MoreObjects.toStringHelper(this) @@ -54,6 +65,7 @@ public String toString() { .add("branch", branch) .add("version", version) .add("manifestSize", manifestSize) + .add("updatedAt", updatedAt) .toString(); } @@ -69,11 +81,12 @@ public boolean equals(Object o) { return version == tag.version && Objects.equals(branch, tag.branch) && manifestSize == tag.manifestSize + && Objects.equals(updatedAt, tag.updatedAt) && Objects.equals(name, tag.name); } @Override public int hashCode() { - return Objects.hash(name, branch, version, manifestSize); + return Objects.hash(name, branch, version, manifestSize, updatedAt); } } diff --git a/java/src/test/java/org/lance/DatasetTest.java b/java/src/test/java/org/lance/DatasetTest.java index a707b4f4a3c..20b75209d35 100644 --- a/java/src/test/java/org/lance/DatasetTest.java +++ b/java/src/test/java/org/lance/DatasetTest.java @@ -61,6 +61,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.time.Clock; +import java.time.Instant; import java.time.ZonedDateTime; import java.util.ArrayList; import java.util.Arrays; @@ -319,6 +320,7 @@ void testTags(@TempDir Path tempDir) { dataset.tags().create("tag1", Ref.ofMain()); assertEquals(1, dataset.tags().list().size()); assertEquals(1, dataset.tags().list().get(0).getVersion()); + assertTrue(dataset.tags().list().get(0).getUpdatedAt().isPresent()); assertEquals(1, dataset.tags().getVersion("tag1")); } @@ -327,15 +329,31 @@ void testTags(@TempDir Path tempDir) { assertEquals(2, dataset2.version()); assertEquals(1, dataset2.tags().list().size()); assertEquals(1, dataset2.tags().list().get(0).getVersion()); + assertTrue(dataset2.tags().list().get(0).getUpdatedAt().isPresent()); assertEquals(1, dataset2.tags().getVersion("tag1")); dataset2.tags().create("tag2", Ref.ofMain(2)); assertEquals(2, dataset2.tags().list().size()); assertEquals(1, dataset2.tags().getVersion("tag1")); assertEquals(2, dataset2.tags().getVersion("tag2")); + Instant tag2UpdatedAt = + dataset2.tags().list().stream() + .filter(t -> t.getName().equals("tag2")) + .findFirst() + .orElseThrow() + .getUpdatedAt() + .orElseThrow(); dataset2.tags().update("tag2", Ref.ofMain(1)); assertEquals(2, dataset2.tags().list().size()); assertEquals(1, dataset2.tags().list().get(0).getVersion()); assertEquals(1, dataset2.tags().list().get(1).getVersion()); + Instant updatedTag2 = + dataset2.tags().list().stream() + .filter(t -> t.getName().equals("tag2")) + .findFirst() + .orElseThrow() + .getUpdatedAt() + .orElseThrow(); + assertFalse(updatedTag2.isBefore(tag2UpdatedAt)); assertEquals(1, dataset2.tags().getVersion("tag1")); assertEquals(1, dataset2.tags().getVersion("tag2")); dataset2.tags().delete("tag2"); @@ -352,6 +370,7 @@ void testTags(@TempDir Path tempDir) { assertEquals(0, checkoutV1.countRows()); assertEquals(1, checkoutV1.tags().list().size()); assertEquals(1, checkoutV1.tags().list().get(0).getVersion()); + assertTrue(checkoutV1.tags().list().get(0).getUpdatedAt().isPresent()); assertEquals(1, checkoutV1.tags().getVersion("tag1")); } @@ -367,6 +386,7 @@ void testTags(@TempDir Path tempDir) { assertTrue(tagOptional.isPresent()); assertEquals(2, tagOptional.get().getVersion()); assertEquals(Optional.of("branch"), tagOptional.get().getBranch()); + assertTrue(tagOptional.get().getUpdatedAt().isPresent()); dataset2.tags().update("tag1", Ref.ofBranch("branch")); tags = dataset2.tags().list(); @@ -378,6 +398,7 @@ void testTags(@TempDir Path tempDir) { assertTrue(tagOptional.isPresent()); assertEquals(2, tagOptional.get().getVersion()); assertEquals(Optional.of("branch"), tagOptional.get().getBranch()); + assertTrue(tagOptional.get().getUpdatedAt().isPresent()); } assertEquals(2, dataset2.tags().list().size()); diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 7496746285a..f60c6815703 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -4370,6 +4370,7 @@ class Transaction: class Tag(TypedDict): branch: Optional[str] version: int + updated_at: Optional[datetime] manifest_size: int @@ -5727,7 +5728,7 @@ def list(self) -> dict[str, Tag]: Returns ------- dict[str, Tag] - A dictionary mapping tag names to version numbers. + A dictionary mapping tag names to tag metadata. """ return self._ds.tags() diff --git a/python/python/tests/test_dataset.py b/python/python/tests/test_dataset.py index 889c37036a0..63378b72955 100644 --- a/python/python/tests/test_dataset.py +++ b/python/python/tests/test_dataset.py @@ -472,6 +472,9 @@ def test_tag(tmp_path: Path): ds.tags.create("tag1", 1) assert len(ds.tags.list()) == 1 + tag1_meta = ds.tags.list()["tag1"] + assert tag1_meta["updated_at"] is not None + assert isinstance(tag1_meta["updated_at"], datetime) with pytest.raises(ValueError): ds.tags.create("tag1", 1) @@ -505,11 +508,21 @@ def test_tag(tmp_path: Path): ): ds.tags.update("tag3", 1) + tag1_updated_at = ds.tags.list()["tag1"]["updated_at"] + assert tag1_updated_at is not None ds.tags.update("tag1", 2) + updated_tag1_meta = ds.tags.list()["tag1"] + assert updated_tag1_meta["updated_at"] is not None + assert updated_tag1_meta["updated_at"] >= tag1_updated_at ds = lance.dataset(base_dir, "tag1") assert ds.version == 2 + tag1_updated_at = ds.tags.list()["tag1"]["updated_at"] + assert tag1_updated_at is not None ds.tags.update("tag1", 1) + updated_tag1_meta = ds.tags.list()["tag1"] + assert updated_tag1_meta["updated_at"] is not None + assert updated_tag1_meta["updated_at"] >= tag1_updated_at ds = lance.dataset(base_dir, "tag1") assert ds.version == 1 @@ -524,14 +537,22 @@ def test_tag(tmp_path: Path): assert target_tag is not None assert target_tag["version"] == 1 assert target_tag["branch"] == "branch" + assert target_tag["updated_at"] is not None + assert isinstance(target_tag["updated_at"], datetime) + tag3_updated_at = target_tag["updated_at"] + assert tag3_updated_at is not None ds.tags.update("tag3", (None, 2)) target_tag = ds.tags.list()["tag3"] assert ds.tags.get_version("tag3") == 2 assert target_tag is not None assert target_tag["version"] == 2 assert target_tag["branch"] is None + assert target_tag["updated_at"] is not None + assert target_tag["updated_at"] >= tag3_updated_at + tag3_updated_at = target_tag["updated_at"] + assert tag3_updated_at is not None ds.create_branch("branch2", 2) ds.tags.update("tag3", ("branch2", 2)) target_tag = ds.tags.list()["tag3"] @@ -539,6 +560,8 @@ def test_tag(tmp_path: Path): assert target_tag is not None assert target_tag["version"] == 2 assert target_tag["branch"] == "branch2" + assert target_tag["updated_at"] is not None + assert target_tag["updated_at"] >= tag3_updated_at ds.tags.delete("tag3") assert len(ds.tags.list()) == 2 @@ -558,6 +581,8 @@ def test_tag_order(tmp_path: Path): tags_asc = ds.tags.list_ordered(order="asc") assert len(tags_asc) == 3 + assert all(tag["updated_at"] is not None for _, tag in tags_asc) + assert all(isinstance(tag["updated_at"], datetime) for _, tag in tags_asc) tag_names_asc = [t[0] for t in tags_asc] assert tag_names_asc == sorted(expected_tags.keys()), ( f"Unexpected ascending order: {tag_names_asc}" diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 35306636c93..2bebcd5c1bb 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -1681,6 +1681,7 @@ impl Dataset { for (tag_name, tag_content) in tags { let dict = PyDict::new(py); dict.set_item("version", tag_content.version)?; + dict.set_item("updated_at", tag_content.updated_at)?; dict.set_item("manifest_size", tag_content.manifest_size)?; pylist.append((tag_name.as_str(), dict))?; @@ -1698,6 +1699,7 @@ impl Dataset { let dict = PyDict::new(py); dict.set_item("branch", v.branch.clone())?; dict.set_item("version", v.version)?; + dict.set_item("updated_at", v.updated_at)?; dict.set_item("manifest_size", v.manifest_size)?; pytags.set_item(k, dict.into_py_any(py)?)?; } diff --git a/rust/lance/src/dataset/refs.rs b/rust/lance/src/dataset/refs.rs index 15d4e74a50d..a36ed19fd3e 100644 --- a/rust/lance/src/dataset/refs.rs +++ b/rust/lance/src/dataset/refs.rs @@ -3,6 +3,7 @@ use std::ops::Range; +use chrono::{DateTime, Utc}; use futures::stream::{StreamExt, TryStreamExt}; use itertools::Itertools; use lance_io::object_store::ObjectStore; @@ -13,6 +14,7 @@ use std::sync::Arc; use crate::dataset::branch_location::BranchLocation; use crate::dataset::refs::Ref::{Tag, Version, VersionNumber}; +use crate::utils::temporal::utc_now; use crate::{Error, Result}; use serde::de::DeserializeOwned; use std::cmp::Ordering; @@ -221,7 +223,9 @@ impl Tags<'_> { message: format!("tag {} already exists", tag), }); } - let tag_contents = self.build_tag_content_by_ref(reference).await?; + let tag_contents = self + .build_tag_content_by_ref(reference, Some(utc_now())) + .await?; self.object_store() .put( @@ -257,7 +261,9 @@ impl Tags<'_> { message: format!("tag {} does not exist", tag), }); } - let tag_contents = self.build_tag_content_by_ref(reference).await?; + let tag_contents = self + .build_tag_content_by_ref(reference, Some(utc_now())) + .await?; self.object_store() .put( @@ -268,7 +274,11 @@ impl Tags<'_> { .map(|_| ()) } - async fn build_tag_content_by_ref(&self, reference: impl Into) -> Result { + async fn build_tag_content_by_ref( + &self, + reference: impl Into, + updated_at: Option>, + ) -> Result { let reference = reference.into(); let (branch, version_number) = match reference { Version(branch, version_number) => (branch, version_number), @@ -313,6 +323,7 @@ impl Tags<'_> { let tag_contents = TagContents { branch, version: manifest_file.version, + updated_at, manifest_size, }; Ok(tag_contents) @@ -654,6 +665,7 @@ impl<'a> BranchRelativePath<'a> { pub struct TagContents { pub branch: Option, pub version: u64, + pub updated_at: Option>, pub manifest_size: usize, } @@ -1095,6 +1107,7 @@ mod tests { let tag_contents = TagContents { branch: Some("feature".to_string()), version: 10, + updated_at: Some(chrono::DateTime::from_timestamp(1_234_567_890, 123_000_000).unwrap()), manifest_size: 2048, }; @@ -1102,13 +1115,19 @@ mod tests { let json = serde_json::to_string(&tag_contents).unwrap(); assert!(json.contains("branch")); assert!(json.contains("version")); + assert!(json.contains("updatedAt")); assert!(json.contains("manifestSize")); // Test deserialization let deserialized: TagContents = serde_json::from_str(&json).unwrap(); assert_eq!(deserialized.branch, tag_contents.branch); assert_eq!(deserialized.version, tag_contents.version); + assert_eq!(deserialized.updated_at, tag_contents.updated_at); assert_eq!(deserialized.manifest_size, tag_contents.manifest_size); + + let legacy_json = r#"{"branch":"feature","version":10,"manifestSize":2048}"#; + let legacy_deserialized: TagContents = serde_json::from_str(legacy_json).unwrap(); + assert_eq!(legacy_deserialized.updated_at, None); } #[rstest] diff --git a/rust/lance/src/dataset/tests/dataset_versioning.rs b/rust/lance/src/dataset/tests/dataset_versioning.rs index e9253cc69fe..21b8bc810bb 100644 --- a/rust/lance/src/dataset/tests/dataset_versioning.rs +++ b/rust/lance/src/dataset/tests/dataset_versioning.rs @@ -327,6 +327,13 @@ async fn test_tag( dataset.tags().create("tag1", 1).await.unwrap(); assert_eq!(dataset.tags().list().await.unwrap().len(), 1); + let tag1_updated_at = dataset + .tags() + .get("tag1") + .await + .unwrap() + .updated_at + .expect("newly created tag should have updated_at"); let another_bad_tag_creation = dataset.tags().create("tag1", 1).await; assert_eq!( @@ -349,6 +356,11 @@ async fn test_tag( ["v1.0.0-rc1", "tag1", "tag2"], "Default ordering mismatch" ); + assert!( + default_order + .iter() + .all(|(_, tag)| tag.updated_at.is_some()) + ); let asc_order = dataset .tags() @@ -361,6 +373,7 @@ async fn test_tag( ["tag1", "tag2", "v1.0.0-rc1"], "Ascending ordering mismatch" ); + assert!(asc_order.iter().all(|(_, tag)| tag.updated_at.is_some())); let desc_order = dataset .tags() @@ -373,8 +386,11 @@ async fn test_tag( ["v1.0.0-rc1", "tag1", "tag2"], "Descending ordering mismatch" ); + assert!(desc_order.iter().all(|(_, tag)| tag.updated_at.is_some())); - assert_eq!(dataset.tags().list().await.unwrap().len(), 3); + let tags = dataset.tags().list().await.unwrap(); + assert_eq!(tags.len(), 3); + assert!(tags.values().all(|tag| tag.updated_at.is_some())); let bad_checkout = dataset.checkout_version("tag3").await; assert_eq!( @@ -406,10 +422,26 @@ async fn test_tag( ); dataset.tags().update("tag1", 2).await.unwrap(); + let tag1_updated_after_first_update = dataset + .tags() + .get("tag1") + .await + .unwrap() + .updated_at + .expect("updated tag should have updated_at"); + assert!(tag1_updated_after_first_update >= tag1_updated_at); dataset = dataset.checkout_version("tag1").await.unwrap(); assert_eq!(dataset.manifest.version, 2); dataset.tags().update("tag1", 1).await.unwrap(); + let tag1_updated_after_second_update = dataset + .tags() + .get("tag1") + .await + .unwrap() + .updated_at + .expect("updated tag should have updated_at"); + assert!(tag1_updated_after_second_update >= tag1_updated_after_first_update); dataset = dataset.checkout_version("tag1").await.unwrap(); assert_eq!(dataset.manifest.version, 1); } From 9b9be6fa88832db9d7fbd8f4cf8081c3ba3f8c47 Mon Sep 17 00:00:00 2001 From: majin1102 Date: Mon, 30 Mar 2026 17:25:46 +0800 Subject: [PATCH 2/8] feat: add createdAt to tag metadata --- docs/src/format/table/branch_tag.md | 13 ++-- docs/src/guide/tags_and_branches.md | 8 +- docs/src/rest.yaml | 8 +- java/lance-jni/src/blocking_dataset.rs | 40 ++++++---- java/src/main/java/org/lance/Tag.java | 22 +++++- java/src/test/java/org/lance/DatasetTest.java | 25 ++++++ python/python/lance/dataset.py | 1 + python/python/tests/test_dataset.py | 24 +++++- python/src/dataset.rs | 2 + rust/lance/src/dataset/refs.rs | 40 +++++++++- .../src/dataset/tests/dataset_versioning.rs | 77 ++++++++++++++----- 11 files changed, 210 insertions(+), 50 deletions(-) diff --git a/docs/src/format/table/branch_tag.md b/docs/src/format/table/branch_tag.md index 8fc23b210b3..e04fc4200e4 100644 --- a/docs/src/format/table/branch_tag.md +++ b/docs/src/format/table/branch_tag.md @@ -114,9 +114,10 @@ Tags are always stored at the root dataset level, regardless of which branch the Each tag file is a JSON file with the following fields: -| JSON Key | Type | Optional | Description | -|----------------|--------|----------|--------------------------------------------------------------------------| -| `branch` | string | Yes | Branch name being tagged. `null` or absent indicates main branch. | -| `version` | number | | Version number being tagged within that branch. | -| `updatedAt` | string | Yes | RFC 3339 UTC timestamp when the tag was last created or updated. | -| `manifestSize` | number | | Size of the manifest file in bytes. Used for efficient manifest loading. | +| JSON Key | Type | Optional | Description | +|----------------|--------|----------|-----------------------------------------------------------------------------------| +| `branch` | string | Yes | Branch name being tagged. `null` or absent indicates main branch. | +| `version` | number | | Version number being tagged within that branch. | +| `createdAt` | string | Yes | RFC 3339 UTC timestamp when the tag was first created. Historical tags may omit it. | +| `updatedAt` | string | Yes | RFC 3339 UTC timestamp when the tag was last modified. On creation it matches `createdAt`. | +| `manifestSize` | number | | Size of the manifest file in bytes. Used for efficient manifest loading. | diff --git a/docs/src/guide/tags_and_branches.md b/docs/src/guide/tags_and_branches.md index a56589592e8..8af2302bfb0 100644 --- a/docs/src/guide/tags_and_branches.md +++ b/docs/src/guide/tags_and_branches.md @@ -36,10 +36,10 @@ print(ds.tags.list()) # {} ds.tags.create("v1-prod", (None, 1)) print(ds.tags.list()) -# {'v1-prod': {'version': 1, 'updated_at': ..., 'manifest_size': ...}} +# {'v1-prod': {'version': 1, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...}} ds.tags.update("v1-prod", (None, 2)) print(ds.tags.list()) -# {'v1-prod': {'version': 2, 'updated_at': ..., 'manifest_size': ...}} +# {'v1-prod': {'version': 2, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...}} ds.tags.delete("v1-prod") print(ds.tags.list()) # {} @@ -47,10 +47,10 @@ print(ds.tags.list_ordered()) # [] ds.tags.create("v1-prod", (None, 1)) print(ds.tags.list_ordered()) -# [('v1-prod', {'version': 1, 'updated_at': ..., 'manifest_size': ...})] +# [('v1-prod', {'version': 1, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...})] ds.tags.update("v1-prod", (None, 2)) print(ds.tags.list_ordered()) -# [('v1-prod', {'version': 2, 'updated_at': ..., 'manifest_size': ...})] +# [('v1-prod', {'version': 2, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...})] ds.tags.delete("v1-prod") print(ds.tags.list_ordered()) # [] diff --git a/docs/src/rest.yaml b/docs/src/rest.yaml index 81b1f254f4c..162b1388bae 100644 --- a/docs/src/rest.yaml +++ b/docs/src/rest.yaml @@ -2888,10 +2888,16 @@ components: format: int64 minimum: 0 description: Version number that the tag points to + createdAt: + type: string + format: date-time + nullable: true + description: Timestamp when the tag was first created. Historical tags may omit this field. updatedAt: type: string format: date-time - description: Timestamp when the tag was last created or updated. Historical tags may omit this field. + nullable: true + description: Timestamp when the tag was last modified. On creation, this equals createdAt. Historical tags may omit this field. manifestSize: type: integer format: int64 diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index 15c8fcd1530..24309dcee45 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -2364,6 +2364,26 @@ fn inner_list_tags<'local>( env: &mut JNIEnv<'local>, java_dataset: JObject, ) -> Result> { + fn optional_datetime_to_java_instant<'local>( + env: &mut JNIEnv<'local>, + timestamp: Option<&DateTime>, + ) -> Result> { + if let Some(timestamp) = timestamp { + let seconds = timestamp.timestamp(); + let nanos = timestamp.timestamp_subsec_nanos() as i64; + Ok(env + .call_static_method( + "java/time/Instant", + "ofEpochSecond", + "(JJ)Ljava/time/Instant;", + &[JValue::Long(seconds), JValue::Long(nanos)], + )? + .l()?) + } else { + Ok(JObject::null()) + } + } + let tag_map = { let dataset_guard = unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?; @@ -2377,27 +2397,19 @@ fn inner_list_tags<'local>( } else { JObject::null() }; - let updated_at = if let Some(updated_at) = tag_contents.updated_at.as_ref() { - let seconds = updated_at.timestamp(); - let nanos = updated_at.timestamp_subsec_nanos() as i64; - env.call_static_method( - "java/time/Instant", - "ofEpochSecond", - "(JJ)Ljava/time/Instant;", - &[JValue::Long(seconds), JValue::Long(nanos)], - )? - .l()? - } else { - JObject::null() - }; + let created_at = + optional_datetime_to_java_instant(env, tag_contents.created_at.as_ref())?; + let updated_at = + optional_datetime_to_java_instant(env, tag_contents.updated_at.as_ref())?; let java_tag = env.new_object( "org/lance/Tag", - "(Ljava/lang/String;Ljava/lang/String;JILjava/time/Instant;)V", + "(Ljava/lang/String;Ljava/lang/String;JILjava/time/Instant;Ljava/time/Instant;)V", &[ JValue::Object(&env.new_string(tag_name)?.into()), JValue::Object(&branch_name), JValue::Long(tag_contents.version as i64), JValue::Int(tag_contents.manifest_size as i32), + JValue::Object(&created_at), JValue::Object(&updated_at), ], )?; diff --git a/java/src/main/java/org/lance/Tag.java b/java/src/main/java/org/lance/Tag.java index 551f96d2059..7484b9ac736 100644 --- a/java/src/main/java/org/lance/Tag.java +++ b/java/src/main/java/org/lance/Tag.java @@ -24,17 +24,29 @@ public class Tag { private final Optional branch; private final long version; private final int manifestSize; + private final Optional createdAt; private final Optional updatedAt; public Tag(String name, String branch, long version, int manifestSize) { - this(name, branch, version, manifestSize, null); + this(name, branch, version, manifestSize, null, null); } public Tag(String name, String branch, long version, int manifestSize, Instant updatedAt) { + this(name, branch, version, manifestSize, null, updatedAt); + } + + public Tag( + String name, + String branch, + long version, + int manifestSize, + Instant createdAt, + Instant updatedAt) { this.name = name; this.branch = Optional.ofNullable(branch); this.version = version; this.manifestSize = manifestSize; + this.createdAt = Optional.ofNullable(createdAt); this.updatedAt = Optional.ofNullable(updatedAt); } @@ -54,6 +66,10 @@ public int getManifestSize() { return manifestSize; } + public Optional getCreatedAt() { + return createdAt; + } + public Optional getUpdatedAt() { return updatedAt; } @@ -65,6 +81,7 @@ public String toString() { .add("branch", branch) .add("version", version) .add("manifestSize", manifestSize) + .add("createdAt", createdAt) .add("updatedAt", updatedAt) .toString(); } @@ -81,12 +98,13 @@ public boolean equals(Object o) { return version == tag.version && Objects.equals(branch, tag.branch) && manifestSize == tag.manifestSize + && Objects.equals(createdAt, tag.createdAt) && Objects.equals(updatedAt, tag.updatedAt) && Objects.equals(name, tag.name); } @Override public int hashCode() { - return Objects.hash(name, branch, version, manifestSize, updatedAt); + return Objects.hash(name, branch, version, manifestSize, createdAt, updatedAt); } } diff --git a/java/src/test/java/org/lance/DatasetTest.java b/java/src/test/java/org/lance/DatasetTest.java index 20b75209d35..3223bacb102 100644 --- a/java/src/test/java/org/lance/DatasetTest.java +++ b/java/src/test/java/org/lance/DatasetTest.java @@ -320,7 +320,11 @@ void testTags(@TempDir Path tempDir) { dataset.tags().create("tag1", Ref.ofMain()); assertEquals(1, dataset.tags().list().size()); assertEquals(1, dataset.tags().list().get(0).getVersion()); + assertTrue(dataset.tags().list().get(0).getCreatedAt().isPresent()); assertTrue(dataset.tags().list().get(0).getUpdatedAt().isPresent()); + assertEquals( + dataset.tags().list().get(0).getCreatedAt(), + dataset.tags().list().get(0).getUpdatedAt()); assertEquals(1, dataset.tags().getVersion("tag1")); } @@ -329,12 +333,20 @@ void testTags(@TempDir Path tempDir) { assertEquals(2, dataset2.version()); assertEquals(1, dataset2.tags().list().size()); assertEquals(1, dataset2.tags().list().get(0).getVersion()); + assertTrue(dataset2.tags().list().get(0).getCreatedAt().isPresent()); assertTrue(dataset2.tags().list().get(0).getUpdatedAt().isPresent()); assertEquals(1, dataset2.tags().getVersion("tag1")); dataset2.tags().create("tag2", Ref.ofMain(2)); assertEquals(2, dataset2.tags().list().size()); assertEquals(1, dataset2.tags().getVersion("tag1")); assertEquals(2, dataset2.tags().getVersion("tag2")); + Instant tag2CreatedAt = + dataset2.tags().list().stream() + .filter(t -> t.getName().equals("tag2")) + .findFirst() + .orElseThrow() + .getCreatedAt() + .orElseThrow(); Instant tag2UpdatedAt = dataset2.tags().list().stream() .filter(t -> t.getName().equals("tag2")) @@ -342,10 +354,18 @@ void testTags(@TempDir Path tempDir) { .orElseThrow() .getUpdatedAt() .orElseThrow(); + assertEquals(tag2CreatedAt, tag2UpdatedAt); dataset2.tags().update("tag2", Ref.ofMain(1)); assertEquals(2, dataset2.tags().list().size()); assertEquals(1, dataset2.tags().list().get(0).getVersion()); assertEquals(1, dataset2.tags().list().get(1).getVersion()); + Instant updatedTag2CreatedAt = + dataset2.tags().list().stream() + .filter(t -> t.getName().equals("tag2")) + .findFirst() + .orElseThrow() + .getCreatedAt() + .orElseThrow(); Instant updatedTag2 = dataset2.tags().list().stream() .filter(t -> t.getName().equals("tag2")) @@ -353,6 +373,7 @@ void testTags(@TempDir Path tempDir) { .orElseThrow() .getUpdatedAt() .orElseThrow(); + assertEquals(updatedTag2CreatedAt, tag2CreatedAt); assertFalse(updatedTag2.isBefore(tag2UpdatedAt)); assertEquals(1, dataset2.tags().getVersion("tag1")); assertEquals(1, dataset2.tags().getVersion("tag2")); @@ -370,6 +391,7 @@ void testTags(@TempDir Path tempDir) { assertEquals(0, checkoutV1.countRows()); assertEquals(1, checkoutV1.tags().list().size()); assertEquals(1, checkoutV1.tags().list().get(0).getVersion()); + assertTrue(checkoutV1.tags().list().get(0).getCreatedAt().isPresent()); assertTrue(checkoutV1.tags().list().get(0).getUpdatedAt().isPresent()); assertEquals(1, checkoutV1.tags().getVersion("tag1")); } @@ -386,7 +408,9 @@ void testTags(@TempDir Path tempDir) { assertTrue(tagOptional.isPresent()); assertEquals(2, tagOptional.get().getVersion()); assertEquals(Optional.of("branch"), tagOptional.get().getBranch()); + assertTrue(tagOptional.get().getCreatedAt().isPresent()); assertTrue(tagOptional.get().getUpdatedAt().isPresent()); + assertEquals(tagOptional.get().getCreatedAt(), tagOptional.get().getUpdatedAt()); dataset2.tags().update("tag1", Ref.ofBranch("branch")); tags = dataset2.tags().list(); @@ -398,6 +422,7 @@ void testTags(@TempDir Path tempDir) { assertTrue(tagOptional.isPresent()); assertEquals(2, tagOptional.get().getVersion()); assertEquals(Optional.of("branch"), tagOptional.get().getBranch()); + assertTrue(tagOptional.get().getCreatedAt().isPresent()); assertTrue(tagOptional.get().getUpdatedAt().isPresent()); } diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index f60c6815703..fb2ef39811a 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -4370,6 +4370,7 @@ class Transaction: class Tag(TypedDict): branch: Optional[str] version: int + created_at: Optional[datetime] updated_at: Optional[datetime] manifest_size: int diff --git a/python/python/tests/test_dataset.py b/python/python/tests/test_dataset.py index 63378b72955..675d50be6ce 100644 --- a/python/python/tests/test_dataset.py +++ b/python/python/tests/test_dataset.py @@ -473,8 +473,11 @@ def test_tag(tmp_path: Path): ds.tags.create("tag1", 1) assert len(ds.tags.list()) == 1 tag1_meta = ds.tags.list()["tag1"] + assert tag1_meta["created_at"] is not None + assert isinstance(tag1_meta["created_at"], datetime) assert tag1_meta["updated_at"] is not None assert isinstance(tag1_meta["updated_at"], datetime) + assert tag1_meta["created_at"] == tag1_meta["updated_at"] with pytest.raises(ValueError): ds.tags.create("tag1", 1) @@ -508,19 +511,27 @@ def test_tag(tmp_path: Path): ): ds.tags.update("tag3", 1) - tag1_updated_at = ds.tags.list()["tag1"]["updated_at"] + tag1_meta = ds.tags.list()["tag1"] + tag1_created_at = tag1_meta["created_at"] + tag1_updated_at = tag1_meta["updated_at"] + assert tag1_created_at is not None assert tag1_updated_at is not None ds.tags.update("tag1", 2) updated_tag1_meta = ds.tags.list()["tag1"] + assert updated_tag1_meta["created_at"] == tag1_created_at assert updated_tag1_meta["updated_at"] is not None assert updated_tag1_meta["updated_at"] >= tag1_updated_at ds = lance.dataset(base_dir, "tag1") assert ds.version == 2 - tag1_updated_at = ds.tags.list()["tag1"]["updated_at"] + tag1_meta = ds.tags.list()["tag1"] + tag1_created_at = tag1_meta["created_at"] + tag1_updated_at = tag1_meta["updated_at"] + assert tag1_created_at is not None assert tag1_updated_at is not None ds.tags.update("tag1", 1) updated_tag1_meta = ds.tags.list()["tag1"] + assert updated_tag1_meta["created_at"] == tag1_created_at assert updated_tag1_meta["updated_at"] is not None assert updated_tag1_meta["updated_at"] >= tag1_updated_at ds = lance.dataset(base_dir, "tag1") @@ -537,10 +548,15 @@ def test_tag(tmp_path: Path): assert target_tag is not None assert target_tag["version"] == 1 assert target_tag["branch"] == "branch" + assert target_tag["created_at"] is not None assert target_tag["updated_at"] is not None + assert isinstance(target_tag["created_at"], datetime) assert isinstance(target_tag["updated_at"], datetime) + assert target_tag["created_at"] == target_tag["updated_at"] + tag3_created_at = target_tag["created_at"] tag3_updated_at = target_tag["updated_at"] + assert tag3_created_at is not None assert tag3_updated_at is not None ds.tags.update("tag3", (None, 2)) target_tag = ds.tags.list()["tag3"] @@ -548,6 +564,7 @@ def test_tag(tmp_path: Path): assert target_tag is not None assert target_tag["version"] == 2 assert target_tag["branch"] is None + assert target_tag["created_at"] == tag3_created_at assert target_tag["updated_at"] is not None assert target_tag["updated_at"] >= tag3_updated_at @@ -560,6 +577,7 @@ def test_tag(tmp_path: Path): assert target_tag is not None assert target_tag["version"] == 2 assert target_tag["branch"] == "branch2" + assert target_tag["created_at"] == tag3_created_at assert target_tag["updated_at"] is not None assert target_tag["updated_at"] >= tag3_updated_at @@ -581,6 +599,8 @@ def test_tag_order(tmp_path: Path): tags_asc = ds.tags.list_ordered(order="asc") assert len(tags_asc) == 3 + assert all(tag["created_at"] is not None for _, tag in tags_asc) + assert all(isinstance(tag["created_at"], datetime) for _, tag in tags_asc) assert all(tag["updated_at"] is not None for _, tag in tags_asc) assert all(isinstance(tag["updated_at"], datetime) for _, tag in tags_asc) tag_names_asc = [t[0] for t in tags_asc] diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 2bebcd5c1bb..c9a22d4e2c8 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -1681,6 +1681,7 @@ impl Dataset { for (tag_name, tag_content) in tags { let dict = PyDict::new(py); dict.set_item("version", tag_content.version)?; + dict.set_item("created_at", tag_content.created_at)?; dict.set_item("updated_at", tag_content.updated_at)?; dict.set_item("manifest_size", tag_content.manifest_size)?; @@ -1699,6 +1700,7 @@ impl Dataset { let dict = PyDict::new(py); dict.set_item("branch", v.branch.clone())?; dict.set_item("version", v.version)?; + dict.set_item("created_at", v.created_at)?; dict.set_item("updated_at", v.updated_at)?; dict.set_item("manifest_size", v.manifest_size)?; pytags.set_item(k, dict.into_py_any(py)?)?; diff --git a/rust/lance/src/dataset/refs.rs b/rust/lance/src/dataset/refs.rs index a36ed19fd3e..714d89f87e7 100644 --- a/rust/lance/src/dataset/refs.rs +++ b/rust/lance/src/dataset/refs.rs @@ -223,8 +223,9 @@ impl Tags<'_> { message: format!("tag {} already exists", tag), }); } + let now = utc_now(); let tag_contents = self - .build_tag_content_by_ref(reference, Some(utc_now())) + .build_tag_content_by_ref(reference, Some(now), Some(now)) .await?; self.object_store() @@ -261,8 +262,9 @@ impl Tags<'_> { message: format!("tag {} does not exist", tag), }); } + let previous_tag = self.get(tag).await?; let tag_contents = self - .build_tag_content_by_ref(reference, Some(utc_now())) + .build_tag_content_by_ref(reference, previous_tag.created_at, Some(utc_now())) .await?; self.object_store() @@ -277,6 +279,7 @@ impl Tags<'_> { async fn build_tag_content_by_ref( &self, reference: impl Into, + created_at: Option>, updated_at: Option>, ) -> Result { let reference = reference.into(); @@ -323,6 +326,7 @@ impl Tags<'_> { let tag_contents = TagContents { branch, version: manifest_file.version, + created_at, updated_at, manifest_size, }; @@ -665,6 +669,8 @@ impl<'a> BranchRelativePath<'a> { pub struct TagContents { pub branch: Option, pub version: u64, + #[serde(skip_serializing_if = "Option::is_none")] + pub created_at: Option>, pub updated_at: Option>, pub manifest_size: usize, } @@ -1107,6 +1113,7 @@ mod tests { let tag_contents = TagContents { branch: Some("feature".to_string()), version: 10, + created_at: Some(chrono::DateTime::from_timestamp(1_234_567_000, 456_000_000).unwrap()), updated_at: Some(chrono::DateTime::from_timestamp(1_234_567_890, 123_000_000).unwrap()), manifest_size: 2048, }; @@ -1115,6 +1122,7 @@ mod tests { let json = serde_json::to_string(&tag_contents).unwrap(); assert!(json.contains("branch")); assert!(json.contains("version")); + assert!(json.contains("createdAt")); assert!(json.contains("updatedAt")); assert!(json.contains("manifestSize")); @@ -1122,12 +1130,40 @@ mod tests { let deserialized: TagContents = serde_json::from_str(&json).unwrap(); assert_eq!(deserialized.branch, tag_contents.branch); assert_eq!(deserialized.version, tag_contents.version); + assert_eq!(deserialized.created_at, tag_contents.created_at); assert_eq!(deserialized.updated_at, tag_contents.updated_at); assert_eq!(deserialized.manifest_size, tag_contents.manifest_size); + let tag_contents_without_created_at = TagContents { + branch: Some("feature".to_string()), + version: 10, + created_at: None, + updated_at: Some(chrono::DateTime::from_timestamp(1_234_567_890, 123_000_000).unwrap()), + manifest_size: 2048, + }; + let json_without_created_at = + serde_json::to_string(&tag_contents_without_created_at).unwrap(); + assert!(!json_without_created_at.contains("createdAt")); + assert!(json_without_created_at.contains("updatedAt")); + let legacy_json = r#"{"branch":"feature","version":10,"manifestSize":2048}"#; let legacy_deserialized: TagContents = serde_json::from_str(legacy_json).unwrap(); + assert_eq!(legacy_deserialized.created_at, None); assert_eq!(legacy_deserialized.updated_at, None); + + let legacy_updated_only_json = r#"{"branch":"feature","version":10,"updatedAt":"2009-02-13T23:31:30.123Z","manifestSize":2048}"#; + let legacy_updated_only_deserialized: TagContents = + serde_json::from_str(legacy_updated_only_json).unwrap(); + assert_eq!(legacy_updated_only_deserialized.created_at, None); + assert_eq!( + legacy_updated_only_deserialized.updated_at, + Some(chrono::DateTime::from_timestamp(1_234_567_890, 123_000_000).unwrap()) + ); + + let null_created_at_json = r#"{"branch":"feature","version":10,"createdAt":null,"updatedAt":"2009-02-13T23:31:30.123Z","manifestSize":2048}"#; + let null_created_at_deserialized: TagContents = + serde_json::from_str(null_created_at_json).unwrap(); + assert_eq!(null_created_at_deserialized.created_at, None); } #[rstest] diff --git a/rust/lance/src/dataset/tests/dataset_versioning.rs b/rust/lance/src/dataset/tests/dataset_versioning.rs index 21b8bc810bb..16e45c5b5d8 100644 --- a/rust/lance/src/dataset/tests/dataset_versioning.rs +++ b/rust/lance/src/dataset/tests/dataset_versioning.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use std::vec; +use std::{fs, path::Path as StdPath}; use crate::Dataset; use crate::dataset::UpdateBuilder; @@ -327,13 +328,14 @@ async fn test_tag( dataset.tags().create("tag1", 1).await.unwrap(); assert_eq!(dataset.tags().list().await.unwrap().len(), 1); - let tag1_updated_at = dataset - .tags() - .get("tag1") - .await - .unwrap() + let tag1_metadata = dataset.tags().get("tag1").await.unwrap(); + let tag1_created_at = tag1_metadata + .created_at + .expect("newly created tag should have created_at"); + let tag1_updated_at = tag1_metadata .updated_at .expect("newly created tag should have updated_at"); + assert_eq!(tag1_created_at, tag1_updated_at); let another_bad_tag_creation = dataset.tags().create("tag1", 1).await; assert_eq!( @@ -359,7 +361,7 @@ async fn test_tag( assert!( default_order .iter() - .all(|(_, tag)| tag.updated_at.is_some()) + .all(|(_, tag)| tag.created_at.is_some() && tag.updated_at.is_some()) ); let asc_order = dataset @@ -373,7 +375,11 @@ async fn test_tag( ["tag1", "tag2", "v1.0.0-rc1"], "Ascending ordering mismatch" ); - assert!(asc_order.iter().all(|(_, tag)| tag.updated_at.is_some())); + assert!( + asc_order + .iter() + .all(|(_, tag)| tag.created_at.is_some() && tag.updated_at.is_some()) + ); let desc_order = dataset .tags() @@ -386,11 +392,18 @@ async fn test_tag( ["v1.0.0-rc1", "tag1", "tag2"], "Descending ordering mismatch" ); - assert!(desc_order.iter().all(|(_, tag)| tag.updated_at.is_some())); + assert!( + desc_order + .iter() + .all(|(_, tag)| tag.created_at.is_some() && tag.updated_at.is_some()) + ); let tags = dataset.tags().list().await.unwrap(); assert_eq!(tags.len(), 3); - assert!(tags.values().all(|tag| tag.updated_at.is_some())); + assert!( + tags.values() + .all(|tag| tag.created_at.is_some() && tag.updated_at.is_some()) + ); let bad_checkout = dataset.checkout_version("tag3").await; assert_eq!( @@ -422,11 +435,9 @@ async fn test_tag( ); dataset.tags().update("tag1", 2).await.unwrap(); - let tag1_updated_after_first_update = dataset - .tags() - .get("tag1") - .await - .unwrap() + let tag1_after_first_update = dataset.tags().get("tag1").await.unwrap(); + assert_eq!(tag1_after_first_update.created_at, Some(tag1_created_at)); + let tag1_updated_after_first_update = tag1_after_first_update .updated_at .expect("updated tag should have updated_at"); assert!(tag1_updated_after_first_update >= tag1_updated_at); @@ -434,16 +445,44 @@ async fn test_tag( assert_eq!(dataset.manifest.version, 2); dataset.tags().update("tag1", 1).await.unwrap(); - let tag1_updated_after_second_update = dataset - .tags() - .get("tag1") - .await - .unwrap() + let tag1_after_second_update = dataset.tags().get("tag1").await.unwrap(); + assert_eq!(tag1_after_second_update.created_at, Some(tag1_created_at)); + let tag1_updated_after_second_update = tag1_after_second_update .updated_at .expect("updated tag should have updated_at"); assert!(tag1_updated_after_second_update >= tag1_updated_after_first_update); dataset = dataset.checkout_version("tag1").await.unwrap(); assert_eq!(dataset.manifest.version, 1); + + let legacy_tag_path = StdPath::new(test_uri.as_ref()) + .join("_refs") + .join("tags") + .join("legacy-tag.json"); + fs::write(&legacy_tag_path, r#"{"version":1,"manifestSize":123}"#).unwrap(); + + let legacy_tag_before_update = dataset.tags().get("legacy-tag").await.unwrap(); + assert_eq!(legacy_tag_before_update.created_at, None); + assert_eq!(legacy_tag_before_update.updated_at, None); + + dataset.tags().update("legacy-tag", 2).await.unwrap(); + + let legacy_tag_after_update = dataset.tags().get("legacy-tag").await.unwrap(); + assert_eq!(legacy_tag_after_update.created_at, None); + assert!( + legacy_tag_after_update.updated_at.is_some(), + "legacy tag update should refresh updated_at" + ); + assert_eq!(legacy_tag_after_update.version, 2); + + let legacy_tag_json = fs::read_to_string(&legacy_tag_path).unwrap(); + assert!( + !legacy_tag_json.contains("createdAt"), + "legacy tag update should preserve missing createdAt" + ); + assert!( + legacy_tag_json.contains("updatedAt"), + "legacy tag update should persist updatedAt" + ); } #[rstest] From b6bfafdb90d83bf37f0ddbb019904388404acc90 Mon Sep 17 00:00:00 2001 From: majin1102 Date: Mon, 30 Mar 2026 17:32:41 +0800 Subject: [PATCH 3/8] chore: format java jni rust code --- java/lance-jni/src/blocking_dataset.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index 24309dcee45..c7fe287002f 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -2397,10 +2397,8 @@ fn inner_list_tags<'local>( } else { JObject::null() }; - let created_at = - optional_datetime_to_java_instant(env, tag_contents.created_at.as_ref())?; - let updated_at = - optional_datetime_to_java_instant(env, tag_contents.updated_at.as_ref())?; + let created_at = optional_datetime_to_java_instant(env, tag_contents.created_at.as_ref())?; + let updated_at = optional_datetime_to_java_instant(env, tag_contents.updated_at.as_ref())?; let java_tag = env.new_object( "org/lance/Tag", "(Ljava/lang/String;Ljava/lang/String;JILjava/time/Instant;Ljava/time/Instant;)V", From 22d54f139f26ab8065b6d9b9bc558491fc421647 Mon Sep 17 00:00:00 2001 From: majin1102 Date: Mon, 30 Mar 2026 17:42:36 +0800 Subject: [PATCH 4/8] test: narrow tag metadata assertions --- java/src/test/java/org/lance/DatasetTest.java | 9 -- python/python/tests/test_dataset.py | 36 +----- rust/lance/src/dataset/refs.rs | 17 --- .../src/dataset/tests/dataset_versioning.rs | 115 +++++++++++------- 4 files changed, 73 insertions(+), 104 deletions(-) diff --git a/java/src/test/java/org/lance/DatasetTest.java b/java/src/test/java/org/lance/DatasetTest.java index 3223bacb102..bd682ebba7d 100644 --- a/java/src/test/java/org/lance/DatasetTest.java +++ b/java/src/test/java/org/lance/DatasetTest.java @@ -333,8 +333,6 @@ void testTags(@TempDir Path tempDir) { assertEquals(2, dataset2.version()); assertEquals(1, dataset2.tags().list().size()); assertEquals(1, dataset2.tags().list().get(0).getVersion()); - assertTrue(dataset2.tags().list().get(0).getCreatedAt().isPresent()); - assertTrue(dataset2.tags().list().get(0).getUpdatedAt().isPresent()); assertEquals(1, dataset2.tags().getVersion("tag1")); dataset2.tags().create("tag2", Ref.ofMain(2)); assertEquals(2, dataset2.tags().list().size()); @@ -391,8 +389,6 @@ void testTags(@TempDir Path tempDir) { assertEquals(0, checkoutV1.countRows()); assertEquals(1, checkoutV1.tags().list().size()); assertEquals(1, checkoutV1.tags().list().get(0).getVersion()); - assertTrue(checkoutV1.tags().list().get(0).getCreatedAt().isPresent()); - assertTrue(checkoutV1.tags().list().get(0).getUpdatedAt().isPresent()); assertEquals(1, checkoutV1.tags().getVersion("tag1")); } @@ -408,9 +404,6 @@ void testTags(@TempDir Path tempDir) { assertTrue(tagOptional.isPresent()); assertEquals(2, tagOptional.get().getVersion()); assertEquals(Optional.of("branch"), tagOptional.get().getBranch()); - assertTrue(tagOptional.get().getCreatedAt().isPresent()); - assertTrue(tagOptional.get().getUpdatedAt().isPresent()); - assertEquals(tagOptional.get().getCreatedAt(), tagOptional.get().getUpdatedAt()); dataset2.tags().update("tag1", Ref.ofBranch("branch")); tags = dataset2.tags().list(); @@ -422,8 +415,6 @@ void testTags(@TempDir Path tempDir) { assertTrue(tagOptional.isPresent()); assertEquals(2, tagOptional.get().getVersion()); assertEquals(Optional.of("branch"), tagOptional.get().getBranch()); - assertTrue(tagOptional.get().getCreatedAt().isPresent()); - assertTrue(tagOptional.get().getUpdatedAt().isPresent()); } assertEquals(2, dataset2.tags().list().size()); diff --git a/python/python/tests/test_dataset.py b/python/python/tests/test_dataset.py index 675d50be6ce..185aef059ae 100644 --- a/python/python/tests/test_dataset.py +++ b/python/python/tests/test_dataset.py @@ -524,16 +524,7 @@ def test_tag(tmp_path: Path): ds = lance.dataset(base_dir, "tag1") assert ds.version == 2 - tag1_meta = ds.tags.list()["tag1"] - tag1_created_at = tag1_meta["created_at"] - tag1_updated_at = tag1_meta["updated_at"] - assert tag1_created_at is not None - assert tag1_updated_at is not None ds.tags.update("tag1", 1) - updated_tag1_meta = ds.tags.list()["tag1"] - assert updated_tag1_meta["created_at"] == tag1_created_at - assert updated_tag1_meta["updated_at"] is not None - assert updated_tag1_meta["updated_at"] >= tag1_updated_at ds = lance.dataset(base_dir, "tag1") assert ds.version == 1 @@ -548,28 +539,13 @@ def test_tag(tmp_path: Path): assert target_tag is not None assert target_tag["version"] == 1 assert target_tag["branch"] == "branch" - assert target_tag["created_at"] is not None - assert target_tag["updated_at"] is not None - assert isinstance(target_tag["created_at"], datetime) - assert isinstance(target_tag["updated_at"], datetime) - assert target_tag["created_at"] == target_tag["updated_at"] - - tag3_created_at = target_tag["created_at"] - tag3_updated_at = target_tag["updated_at"] - assert tag3_created_at is not None - assert tag3_updated_at is not None ds.tags.update("tag3", (None, 2)) target_tag = ds.tags.list()["tag3"] assert ds.tags.get_version("tag3") == 2 assert target_tag is not None assert target_tag["version"] == 2 assert target_tag["branch"] is None - assert target_tag["created_at"] == tag3_created_at - assert target_tag["updated_at"] is not None - assert target_tag["updated_at"] >= tag3_updated_at - tag3_updated_at = target_tag["updated_at"] - assert tag3_updated_at is not None ds.create_branch("branch2", 2) ds.tags.update("tag3", ("branch2", 2)) target_tag = ds.tags.list()["tag3"] @@ -577,9 +553,6 @@ def test_tag(tmp_path: Path): assert target_tag is not None assert target_tag["version"] == 2 assert target_tag["branch"] == "branch2" - assert target_tag["created_at"] == tag3_created_at - assert target_tag["updated_at"] is not None - assert target_tag["updated_at"] >= tag3_updated_at ds.tags.delete("tag3") assert len(ds.tags.list()) == 2 @@ -599,10 +572,11 @@ def test_tag_order(tmp_path: Path): tags_asc = ds.tags.list_ordered(order="asc") assert len(tags_asc) == 3 - assert all(tag["created_at"] is not None for _, tag in tags_asc) - assert all(isinstance(tag["created_at"], datetime) for _, tag in tags_asc) - assert all(tag["updated_at"] is not None for _, tag in tags_asc) - assert all(isinstance(tag["updated_at"], datetime) for _, tag in tags_asc) + first_tag = tags_asc[0][1] + assert first_tag["created_at"] is not None + assert isinstance(first_tag["created_at"], datetime) + assert first_tag["updated_at"] is not None + assert isinstance(first_tag["updated_at"], datetime) tag_names_asc = [t[0] for t in tags_asc] assert tag_names_asc == sorted(expected_tags.keys()), ( f"Unexpected ascending order: {tag_names_asc}" diff --git a/rust/lance/src/dataset/refs.rs b/rust/lance/src/dataset/refs.rs index 714d89f87e7..d2d4b3b89f1 100644 --- a/rust/lance/src/dataset/refs.rs +++ b/rust/lance/src/dataset/refs.rs @@ -1134,18 +1134,6 @@ mod tests { assert_eq!(deserialized.updated_at, tag_contents.updated_at); assert_eq!(deserialized.manifest_size, tag_contents.manifest_size); - let tag_contents_without_created_at = TagContents { - branch: Some("feature".to_string()), - version: 10, - created_at: None, - updated_at: Some(chrono::DateTime::from_timestamp(1_234_567_890, 123_000_000).unwrap()), - manifest_size: 2048, - }; - let json_without_created_at = - serde_json::to_string(&tag_contents_without_created_at).unwrap(); - assert!(!json_without_created_at.contains("createdAt")); - assert!(json_without_created_at.contains("updatedAt")); - let legacy_json = r#"{"branch":"feature","version":10,"manifestSize":2048}"#; let legacy_deserialized: TagContents = serde_json::from_str(legacy_json).unwrap(); assert_eq!(legacy_deserialized.created_at, None); @@ -1159,11 +1147,6 @@ mod tests { legacy_updated_only_deserialized.updated_at, Some(chrono::DateTime::from_timestamp(1_234_567_890, 123_000_000).unwrap()) ); - - let null_created_at_json = r#"{"branch":"feature","version":10,"createdAt":null,"updatedAt":"2009-02-13T23:31:30.123Z","manifestSize":2048}"#; - let null_created_at_deserialized: TagContents = - serde_json::from_str(null_created_at_json).unwrap(); - assert_eq!(null_created_at_deserialized.created_at, None); } #[rstest] diff --git a/rust/lance/src/dataset/tests/dataset_versioning.rs b/rust/lance/src/dataset/tests/dataset_versioning.rs index 16e45c5b5d8..08607bcf009 100644 --- a/rust/lance/src/dataset/tests/dataset_versioning.rs +++ b/rust/lance/src/dataset/tests/dataset_versioning.rs @@ -1,9 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors +use std::fs; use std::sync::Arc; use std::vec; -use std::{fs, path::Path as StdPath}; use crate::Dataset; use crate::dataset::UpdateBuilder; @@ -44,6 +44,21 @@ fn assert_all_manifests_use_scheme(test_dir: &TempStdDir, scheme: ManifestNaming ); } +fn write_tag_metadata_without_created_at( + dataset_dir: &TempStdDir, + tag_name: &str, + version: u64, +) -> std::path::PathBuf { + let tag_path = dataset_dir + .join("_refs") + .join("tags") + .join(format!("{tag_name}.json")); + fs::create_dir_all(tag_path.parent().unwrap()).expect("create tag metadata directory"); + let tag_json = format!(r#"{{"version":{version},"manifestSize":123}}"#); + fs::write(&tag_path, tag_json).expect("write historical tag metadata"); + tag_path +} + #[tokio::test] async fn test_v2_manifest_path_create() { // Can create a dataset, using V2 paths @@ -358,11 +373,6 @@ async fn test_tag( ["v1.0.0-rc1", "tag1", "tag2"], "Default ordering mismatch" ); - assert!( - default_order - .iter() - .all(|(_, tag)| tag.created_at.is_some() && tag.updated_at.is_some()) - ); let asc_order = dataset .tags() @@ -375,11 +385,6 @@ async fn test_tag( ["tag1", "tag2", "v1.0.0-rc1"], "Ascending ordering mismatch" ); - assert!( - asc_order - .iter() - .all(|(_, tag)| tag.created_at.is_some() && tag.updated_at.is_some()) - ); let desc_order = dataset .tags() @@ -392,18 +397,8 @@ async fn test_tag( ["v1.0.0-rc1", "tag1", "tag2"], "Descending ordering mismatch" ); - assert!( - desc_order - .iter() - .all(|(_, tag)| tag.created_at.is_some() && tag.updated_at.is_some()) - ); - let tags = dataset.tags().list().await.unwrap(); - assert_eq!(tags.len(), 3); - assert!( - tags.values() - .all(|tag| tag.created_at.is_some() && tag.updated_at.is_some()) - ); + assert_eq!(dataset.tags().list().await.unwrap().len(), 3); let bad_checkout = dataset.checkout_version("tag3").await; assert_eq!( @@ -445,43 +440,69 @@ async fn test_tag( assert_eq!(dataset.manifest.version, 2); dataset.tags().update("tag1", 1).await.unwrap(); - let tag1_after_second_update = dataset.tags().get("tag1").await.unwrap(); - assert_eq!(tag1_after_second_update.created_at, Some(tag1_created_at)); - let tag1_updated_after_second_update = tag1_after_second_update - .updated_at - .expect("updated tag should have updated_at"); - assert!(tag1_updated_after_second_update >= tag1_updated_after_first_update); dataset = dataset.checkout_version("tag1").await.unwrap(); assert_eq!(dataset.manifest.version, 1); +} - let legacy_tag_path = StdPath::new(test_uri.as_ref()) - .join("_refs") - .join("tags") - .join("legacy-tag.json"); - fs::write(&legacy_tag_path, r#"{"version":1,"manifestSize":123}"#).unwrap(); +#[rstest] +#[tokio::test] +async fn test_update_preserves_missing_created_at_for_historical_tag( + #[values(LanceFileVersion::Legacy, LanceFileVersion::Stable)] + data_storage_version: LanceFileVersion, +) { + let schema = Arc::new(ArrowSchema::new(vec![ArrowField::new( + "i", + DataType::UInt32, + false, + )])); + + let test_dir = TempStdDir::default(); + let test_uri = test_dir.to_str().unwrap(); + + let data = RecordBatch::try_new( + schema.clone(), + vec![Arc::new(UInt32Array::from_iter_values(0..100))], + ); + let reader = RecordBatchIterator::new(vec![data.unwrap()].into_iter().map(Ok), schema); + let mut dataset = Dataset::write( + reader, + test_uri, + Some(WriteParams { + data_storage_version: Some(data_storage_version), + ..Default::default() + }), + ) + .await + .unwrap(); + + dataset.delete("i > 50").await.unwrap(); + + // Seed a historical tag file that predates the createdAt field. + let historical_tag_path = write_tag_metadata_without_created_at(&test_dir, "historical-tag", 1); - let legacy_tag_before_update = dataset.tags().get("legacy-tag").await.unwrap(); - assert_eq!(legacy_tag_before_update.created_at, None); - assert_eq!(legacy_tag_before_update.updated_at, None); + let historical_tag = dataset.tags().get("historical-tag").await.unwrap(); + assert!(historical_tag.created_at.is_none()); + assert!(historical_tag.updated_at.is_none()); - dataset.tags().update("legacy-tag", 2).await.unwrap(); + dataset.tags().update("historical-tag", 2).await.unwrap(); - let legacy_tag_after_update = dataset.tags().get("legacy-tag").await.unwrap(); - assert_eq!(legacy_tag_after_update.created_at, None); + let updated_tag = dataset.tags().get("historical-tag").await.unwrap(); + assert!(updated_tag.created_at.is_none()); assert!( - legacy_tag_after_update.updated_at.is_some(), - "legacy tag update should refresh updated_at" + updated_tag.updated_at.is_some(), + "historical tag update should refresh updated_at" ); - assert_eq!(legacy_tag_after_update.version, 2); + assert_eq!(updated_tag.version, 2); - let legacy_tag_json = fs::read_to_string(&legacy_tag_path).unwrap(); + let updated_tag_json = + fs::read_to_string(&historical_tag_path).expect("read updated historical tag metadata"); assert!( - !legacy_tag_json.contains("createdAt"), - "legacy tag update should preserve missing createdAt" + !updated_tag_json.contains("\"createdAt\""), + "historical tag update should preserve missing createdAt" ); assert!( - legacy_tag_json.contains("updatedAt"), - "legacy tag update should persist updatedAt" + updated_tag_json.contains("\"updatedAt\""), + "historical tag update should persist updatedAt" ); } From df8ddb03bda014e316aba9dfccaeeac0cfa3f755 Mon Sep 17 00:00:00 2001 From: majin1102 Date: Mon, 30 Mar 2026 18:00:14 +0800 Subject: [PATCH 5/8] chore: refine tag metadata bindings and tests --- java/src/main/java/org/lance/Tag.java | 14 +++- rust/lance/src/dataset/refs.rs | 12 +++ .../src/dataset/tests/dataset_versioning.rs | 78 ------------------- 3 files changed, 24 insertions(+), 80 deletions(-) diff --git a/java/src/main/java/org/lance/Tag.java b/java/src/main/java/org/lance/Tag.java index 7484b9ac736..8662eb488e7 100644 --- a/java/src/main/java/org/lance/Tag.java +++ b/java/src/main/java/org/lance/Tag.java @@ -31,11 +31,21 @@ public Tag(String name, String branch, long version, int manifestSize) { this(name, branch, version, manifestSize, null, null); } - public Tag(String name, String branch, long version, int manifestSize, Instant updatedAt) { + /** + * Constructor used by JNI when reading tag metadata from native code. + * + *

Timestamps are system-generated metadata and are not part of the public Java input surface. + */ + private Tag(String name, String branch, long version, int manifestSize, Instant updatedAt) { this(name, branch, version, manifestSize, null, updatedAt); } - public Tag( + /** + * Constructor used by JNI when reading tag metadata from native code. + * + *

Timestamps are system-generated metadata and are not part of the public Java input surface. + */ + private Tag( String name, String branch, long version, diff --git a/rust/lance/src/dataset/refs.rs b/rust/lance/src/dataset/refs.rs index d2d4b3b89f1..a301cc6f19e 100644 --- a/rust/lance/src/dataset/refs.rs +++ b/rust/lance/src/dataset/refs.rs @@ -1134,6 +1134,18 @@ mod tests { assert_eq!(deserialized.updated_at, tag_contents.updated_at); assert_eq!(deserialized.manifest_size, tag_contents.manifest_size); + let tag_contents_without_created_at = TagContents { + branch: Some("feature".to_string()), + version: 10, + created_at: None, + updated_at: Some(chrono::DateTime::from_timestamp(1_234_567_890, 123_000_000).unwrap()), + manifest_size: 2048, + }; + let json_without_created_at = + serde_json::to_string(&tag_contents_without_created_at).unwrap(); + assert!(!json_without_created_at.contains("createdAt")); + assert!(json_without_created_at.contains("updatedAt")); + let legacy_json = r#"{"branch":"feature","version":10,"manifestSize":2048}"#; let legacy_deserialized: TagContents = serde_json::from_str(legacy_json).unwrap(); assert_eq!(legacy_deserialized.created_at, None); diff --git a/rust/lance/src/dataset/tests/dataset_versioning.rs b/rust/lance/src/dataset/tests/dataset_versioning.rs index 08607bcf009..ef0cd67c3b7 100644 --- a/rust/lance/src/dataset/tests/dataset_versioning.rs +++ b/rust/lance/src/dataset/tests/dataset_versioning.rs @@ -1,7 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -use std::fs; use std::sync::Arc; use std::vec; @@ -44,21 +43,6 @@ fn assert_all_manifests_use_scheme(test_dir: &TempStdDir, scheme: ManifestNaming ); } -fn write_tag_metadata_without_created_at( - dataset_dir: &TempStdDir, - tag_name: &str, - version: u64, -) -> std::path::PathBuf { - let tag_path = dataset_dir - .join("_refs") - .join("tags") - .join(format!("{tag_name}.json")); - fs::create_dir_all(tag_path.parent().unwrap()).expect("create tag metadata directory"); - let tag_json = format!(r#"{{"version":{version},"manifestSize":123}}"#); - fs::write(&tag_path, tag_json).expect("write historical tag metadata"); - tag_path -} - #[tokio::test] async fn test_v2_manifest_path_create() { // Can create a dataset, using V2 paths @@ -444,68 +428,6 @@ async fn test_tag( assert_eq!(dataset.manifest.version, 1); } -#[rstest] -#[tokio::test] -async fn test_update_preserves_missing_created_at_for_historical_tag( - #[values(LanceFileVersion::Legacy, LanceFileVersion::Stable)] - data_storage_version: LanceFileVersion, -) { - let schema = Arc::new(ArrowSchema::new(vec![ArrowField::new( - "i", - DataType::UInt32, - false, - )])); - - let test_dir = TempStdDir::default(); - let test_uri = test_dir.to_str().unwrap(); - - let data = RecordBatch::try_new( - schema.clone(), - vec![Arc::new(UInt32Array::from_iter_values(0..100))], - ); - let reader = RecordBatchIterator::new(vec![data.unwrap()].into_iter().map(Ok), schema); - let mut dataset = Dataset::write( - reader, - test_uri, - Some(WriteParams { - data_storage_version: Some(data_storage_version), - ..Default::default() - }), - ) - .await - .unwrap(); - - dataset.delete("i > 50").await.unwrap(); - - // Seed a historical tag file that predates the createdAt field. - let historical_tag_path = write_tag_metadata_without_created_at(&test_dir, "historical-tag", 1); - - let historical_tag = dataset.tags().get("historical-tag").await.unwrap(); - assert!(historical_tag.created_at.is_none()); - assert!(historical_tag.updated_at.is_none()); - - dataset.tags().update("historical-tag", 2).await.unwrap(); - - let updated_tag = dataset.tags().get("historical-tag").await.unwrap(); - assert!(updated_tag.created_at.is_none()); - assert!( - updated_tag.updated_at.is_some(), - "historical tag update should refresh updated_at" - ); - assert_eq!(updated_tag.version, 2); - - let updated_tag_json = - fs::read_to_string(&historical_tag_path).expect("read updated historical tag metadata"); - assert!( - !updated_tag_json.contains("\"createdAt\""), - "historical tag update should preserve missing createdAt" - ); - assert!( - updated_tag_json.contains("\"updatedAt\""), - "historical tag update should persist updatedAt" - ); -} - #[rstest] #[tokio::test] async fn test_fragment_id_zero_not_reused() { From ca77cabb739f8cad8e036fdf224ed3554af6f85a Mon Sep 17 00:00:00 2001 From: majin1102 Date: Mon, 30 Mar 2026 18:08:33 +0800 Subject: [PATCH 6/8] docs: keep tag rest schema aligned with implementation --- docs/src/rest.yaml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/docs/src/rest.yaml b/docs/src/rest.yaml index 162b1388bae..59065fa27fc 100644 --- a/docs/src/rest.yaml +++ b/docs/src/rest.yaml @@ -2888,16 +2888,6 @@ components: format: int64 minimum: 0 description: Version number that the tag points to - createdAt: - type: string - format: date-time - nullable: true - description: Timestamp when the tag was first created. Historical tags may omit this field. - updatedAt: - type: string - format: date-time - nullable: true - description: Timestamp when the tag was last modified. On creation, this equals createdAt. Historical tags may omit this field. manifestSize: type: integer format: int64 From 96e4f3147fbd8479c387bd10f5551322df1ed4d4 Mon Sep 17 00:00:00 2001 From: majin1102 Date: Mon, 30 Mar 2026 18:21:09 +0800 Subject: [PATCH 7/8] docs: revert tag rest schema changes --- docs/src/rest.yaml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/docs/src/rest.yaml b/docs/src/rest.yaml index 59065fa27fc..b3af38ba7ef 100644 --- a/docs/src/rest.yaml +++ b/docs/src/rest.yaml @@ -2880,19 +2880,11 @@ components: required: - version properties: - branch: - type: string - description: Branch name that the tag points to. If absent, the tag points to main. version: type: integer format: int64 minimum: 0 description: Version number that the tag points to - manifestSize: - type: integer - format: int64 - minimum: 0 - description: Size of the referenced manifest file in bytes. RestoreTableRequest: type: object From fa1407afadbb4ab32a2e8d7a52e2302396165580 Mon Sep 17 00:00:00 2001 From: majin1102 Date: Mon, 30 Mar 2026 19:34:17 +0800 Subject: [PATCH 8/8] refactor(java): ignore tag timestamps in equality --- java/src/main/java/org/lance/Tag.java | 13 +------------ java/src/test/java/org/lance/DatasetTest.java | 16 +++++++++------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/java/src/main/java/org/lance/Tag.java b/java/src/main/java/org/lance/Tag.java index 8662eb488e7..7aabf6f451b 100644 --- a/java/src/main/java/org/lance/Tag.java +++ b/java/src/main/java/org/lance/Tag.java @@ -31,15 +31,6 @@ public Tag(String name, String branch, long version, int manifestSize) { this(name, branch, version, manifestSize, null, null); } - /** - * Constructor used by JNI when reading tag metadata from native code. - * - *

Timestamps are system-generated metadata and are not part of the public Java input surface. - */ - private Tag(String name, String branch, long version, int manifestSize, Instant updatedAt) { - this(name, branch, version, manifestSize, null, updatedAt); - } - /** * Constructor used by JNI when reading tag metadata from native code. * @@ -108,13 +99,11 @@ public boolean equals(Object o) { return version == tag.version && Objects.equals(branch, tag.branch) && manifestSize == tag.manifestSize - && Objects.equals(createdAt, tag.createdAt) - && Objects.equals(updatedAt, tag.updatedAt) && Objects.equals(name, tag.name); } @Override public int hashCode() { - return Objects.hash(name, branch, version, manifestSize, createdAt, updatedAt); + return Objects.hash(name, branch, version, manifestSize); } } diff --git a/java/src/test/java/org/lance/DatasetTest.java b/java/src/test/java/org/lance/DatasetTest.java index bd682ebba7d..0da50bfed7e 100644 --- a/java/src/test/java/org/lance/DatasetTest.java +++ b/java/src/test/java/org/lance/DatasetTest.java @@ -318,13 +318,15 @@ void testTags(@TempDir Path tempDir) { try (Dataset dataset = testDataset.createEmptyDataset()) { assertEquals(1, dataset.version()); dataset.tags().create("tag1", Ref.ofMain()); - assertEquals(1, dataset.tags().list().size()); - assertEquals(1, dataset.tags().list().get(0).getVersion()); - assertTrue(dataset.tags().list().get(0).getCreatedAt().isPresent()); - assertTrue(dataset.tags().list().get(0).getUpdatedAt().isPresent()); - assertEquals( - dataset.tags().list().get(0).getCreatedAt(), - dataset.tags().list().get(0).getUpdatedAt()); + List tags = dataset.tags().list(); + Tag tag1 = tags.get(0); + assertEquals(1, tags.size()); + assertEquals(1, tag1.getVersion()); + assertEquals(new Tag("tag1", null, 1, tag1.getManifestSize()), tag1); + assertTrue(new HashSet<>(tags).contains(new Tag("tag1", null, 1, tag1.getManifestSize()))); + assertTrue(tag1.getCreatedAt().isPresent()); + assertTrue(tag1.getUpdatedAt().isPresent()); + assertEquals(tag1.getCreatedAt(), tag1.getUpdatedAt()); assertEquals(1, dataset.tags().getVersion("tag1")); }