Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions docs/src/format/table/branch_tag.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,11 @@ Each branch metadata file is a JSON file with the following fields:

| JSON Key | Type | Optional | Description |
|------------------|--------|----------|--------------------------------------------------------------------------------|
| `parent_branch` | string | Yes | Name of the branch this was created from. `null` indicates branched from main. |
| `parent_version` | number | | Version number of the parent branch at the time this branch was created. |
| `create_at` | number | | Unix timestamp (seconds since epoch) when the branch was created. |
| `manifest_size` | number | | Size of the initial manifest file in bytes. |
| `parentBranch` | string | Yes | Name of the branch this was created from. `null` indicates branched from main. |
| `parentVersion` | number | | Version number of the parent branch at the time this branch was created. |
| `createAt` | number | | Unix timestamp (seconds since epoch) when the branch was created. |
| `manifestSize` | number | | Size of the initial manifest file in bytes. |
| `metadata` | object | Yes | String key/value metadata map. If absent, it is treated as an empty object. |

### Branch Dataset Layout

Expand Down Expand Up @@ -118,4 +119,7 @@ Each tag file is a JSON file with the following fields:
|-----------------|--------|----------|--------------------------------------------------------------------------|
| `branch` | string | Yes | Branch name being tagged. `null` or absent indicates main branch. |
| `version` | number | | Version number being tagged within that branch. |
| `manifest_size` | number | | Size of the manifest file in bytes. Used for efficient manifest loading. |
| `createdAt` | string | Yes | RFC 3339 timestamp for when the tag was first created. |
| `updatedAt` | string | Yes | RFC 3339 timestamp for the latest tag reference update. |
Comment on lines +122 to +123
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we have manifest_size should this be created_at and updated_at for consistency?

Or actually, looking at the test, I wonder if this is supposed to be manifestSize below?

| `manifestSize` | number | | Size of the manifest file in bytes. Used for efficient manifest loading. |
| `metadata` | object | Yes | String key/value metadata map. If absent, it is treated as an empty object. |
10 changes: 5 additions & 5 deletions docs/src/guide/tags_and_branches.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,21 @@ print(ds.tags.list())
# {}
ds.tags.create("v1-prod", (None, 1))
print(ds.tags.list())
# {'v1-prod': {'version': 1, 'manifest_size': ...}}
# {'v1-prod': {'version': 1, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...}}
ds.tags.update("v1-prod", (None, 2))
print(ds.tags.list())
# {'v1-prod': {'version': 2, 'manifest_size': ...}}
# {'v1-prod': {'version': 2, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...}}
ds.tags.delete("v1-prod")
print(ds.tags.list())
# {}
print(ds.tags.list_ordered())
# []
ds.tags.create("v1-prod", (None, 1))
print(ds.tags.list_ordered())
# [('v1-prod', {'version': 1, 'manifest_size': ...})]
# [('v1-prod', {'version': 1, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...})]
ds.tags.update("v1-prod", (None, 2))
print(ds.tags.list_ordered())
# [('v1-prod', {'version': 2, 'manifest_size': ...})]
# [('v1-prod', {'version': 2, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...})]
ds.tags.delete("v1-prod")
print(ds.tags.list_ordered())
# []
Expand Down Expand Up @@ -122,4 +122,4 @@ print(ds.branches.list_ordered(order="desc"))

Branches hold references to data files. Lance ensures that cleanup does not delete files still referenced by any branch.

Delete unused branches to allow their referenced files to be cleaned up by `cleanup_old_versions()`.
Delete unused branches to allow their referenced files to be cleaned up by `cleanup_old_versions()`.
121 changes: 113 additions & 8 deletions java/lance-jni/src/blocking_dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::storage_options::JavaStorageOptionsProvider;
use crate::traits::{FromJObjectWithEnv, FromJString, export_vec, import_vec, import_vec_to_rust};
use crate::utils::{
build_compaction_options, extract_storage_options, extract_write_params,
get_scalar_index_params, get_vector_index_params, to_rust_map,
get_scalar_index_params, get_vector_index_params, to_java_map, to_rust_map,
};
use crate::{RT, traits::IntoJava};
use arrow::array::RecordBatchReader;
Expand Down Expand Up @@ -314,6 +314,24 @@ impl BlockingDataset {
Ok(())
}

pub fn replace_tag_metadata(
&mut self,
tag: &str,
metadata: HashMap<String, String>,
) -> Result<()> {
RT.block_on(self.inner.tags().replace_metadata(tag, metadata))?;
Ok(())
}

pub fn replace_branch_metadata(
&mut self,
branch: &str,
metadata: HashMap<String, String>,
) -> Result<()> {
RT.block_on(self.inner.branches().replace_metadata(branch, metadata))?;
Ok(())
}

pub fn get_version(&self, tag: &str) -> Result<u64> {
let version = RT.block_on(self.inner.tags().get_version(tag))?;
Ok(version)
Expand Down Expand Up @@ -2261,6 +2279,26 @@ fn inner_add_columns_by_schema(
//////////////////////////////
// Tag operation Methods //
//////////////////////////////
fn optional_datetime_to_java_instant<'local>(
env: &mut JNIEnv<'local>,
timestamp: Option<&DateTime<Utc>>,
) -> Result<JObject<'local>> {
if let Some(timestamp) = timestamp {
let seconds = timestamp.timestamp();
let nanos = timestamp.timestamp_subsec_nanos() as i64;
Ok(env
.call_static_method(
"java/time/Instant",
"ofEpochSecond",
"(JJ)Ljava/time/Instant;",
&[JValue::Long(seconds), JValue::Long(nanos)],
)?
.l()?)
} else {
Ok(JObject::null())
}
}

#[unsafe(no_mangle)]
pub extern "system" fn Java_org_lance_Dataset_nativeListTags<'local>(
mut env: JNIEnv<'local>,
Expand All @@ -2273,27 +2311,34 @@ fn inner_list_tags<'local>(
env: &mut JNIEnv<'local>,
java_dataset: JObject,
) -> Result<JObject<'local>> {
let tag_map = {
let mut tags: Vec<_> = {
let dataset_guard =
unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?;
dataset_guard.list_tags()?
dataset_guard.list_tags()?.into_iter().collect()
};
tags.sort_unstable_by(|(left_name, _), (right_name, _)| left_name.cmp(right_name));
let array_list = env.new_object("java/util/ArrayList", "()V", &[])?;

for (tag_name, tag_contents) in tag_map {
for (tag_name, tag_contents) in tags {
let branch_name: JObject = if let Some(branch_name) = tag_contents.branch.as_ref() {
env.new_string(branch_name)?.into()
} else {
JObject::null()
};
let created_at = optional_datetime_to_java_instant(env, tag_contents.created_at.as_ref())?;
let updated_at = optional_datetime_to_java_instant(env, tag_contents.updated_at.as_ref())?;
let java_metadata = to_java_map(env, &tag_contents.metadata)?;
let java_tag = env.new_object(
"org/lance/Tag",
"(Ljava/lang/String;Ljava/lang/String;JI)V",
"(Ljava/lang/String;Ljava/lang/String;JILjava/time/Instant;Ljava/time/Instant;Ljava/util/Map;)V",
&[
JValue::Object(&env.new_string(tag_name)?.into()),
JValue::Object(&branch_name),
JValue::Long(tag_contents.version as i64),
JValue::Int(tag_contents.manifest_size as i32),
JValue::Object(&created_at),
JValue::Object(&updated_at),
JValue::Object(&java_metadata),
],
)?;
env.call_method(
Expand Down Expand Up @@ -2375,6 +2420,32 @@ fn inner_update_tag(
dataset_guard.update_tag(tag.as_str(), reference)
}

#[unsafe(no_mangle)]
pub extern "system" fn Java_org_lance_Dataset_nativeReplaceTagMetadata(
mut env: JNIEnv,
java_dataset: JObject,
jtag_name: JString,
jmetadata: JObject,
) {
ok_or_throw_without_return!(
env,
inner_replace_tag_metadata(&mut env, java_dataset, jtag_name, jmetadata)
)
}

fn inner_replace_tag_metadata(
env: &mut JNIEnv,
java_dataset: JObject,
jtag_name: JString,
jmetadata: JObject,
) -> Result<()> {
let tag = jtag_name.extract(env)?;
let metadata = extract_metadata_map(env, &jmetadata)?;
let mut dataset_guard =
{ unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }? };
dataset_guard.replace_tag_metadata(tag.as_str(), metadata)
}

#[unsafe(no_mangle)]
pub extern "system" fn Java_org_lance_Dataset_nativeGetVersionByTag(
mut env: JNIEnv,
Expand Down Expand Up @@ -2414,11 +2485,12 @@ fn inner_list_branches<'local>(
env: &mut JNIEnv<'local>,
java_dataset: JObject,
) -> Result<JObject<'local>> {
let branches = {
let mut branches: Vec<_> = {
let dataset_guard =
unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?;
dataset_guard.list_branches()?
dataset_guard.list_branches()?.into_iter().collect()
};
branches.sort_unstable_by(|(left_name, _), (right_name, _)| left_name.cmp(right_name));
let array_list = env.new_object("java/util/ArrayList", "()V", &[])?;

for (name, contents) in branches {
Expand Down Expand Up @@ -2447,16 +2519,18 @@ fn inner_list_branches<'local>(
&[JValue::Object(&jmapping)],
)?;
}
let java_metadata = to_java_map(env, &contents.metadata)?;
let jbranch = env.new_object(
"org/lance/Branch",
"(Ljava/lang/String;Ljava/lang/String;Ljava/util/List;JJI)V",
"(Ljava/lang/String;Ljava/lang/String;Ljava/util/List;JJILjava/util/Map;)V",
&[
JValue::Object(&jname),
JValue::Object(&jparent),
JValue::Object(&jbranch_identifier),
JValue::Long(contents.parent_version as i64),
JValue::Long(contents.create_at as i64),
JValue::Int(contents.manifest_size as i32),
JValue::Object(&java_metadata),
],
)?;
env.call_method(
Expand Down Expand Up @@ -2507,6 +2581,37 @@ fn inner_create_branch<'local>(
new_blocking_dataset.into_java(env)
}

#[unsafe(no_mangle)]
pub extern "system" fn Java_org_lance_Dataset_nativeReplaceBranchMetadata(
mut env: JNIEnv,
java_dataset: JObject,
jbranch: JString,
jmetadata: JObject,
) {
ok_or_throw_without_return!(
env,
inner_replace_branch_metadata(&mut env, java_dataset, jbranch, jmetadata)
)
}

fn inner_replace_branch_metadata(
env: &mut JNIEnv,
java_dataset: JObject,
jbranch: JString,
jmetadata: JObject,
) -> Result<()> {
let branch: String = jbranch.extract(env)?;
let metadata = extract_metadata_map(env, &jmetadata)?;
let mut dataset_guard =
unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?;
dataset_guard.replace_branch_metadata(&branch, metadata)
}

fn extract_metadata_map(env: &mut JNIEnv, jmetadata: &JObject) -> Result<HashMap<String, String>> {
let jmap = JMap::from_env(env, jmetadata)?;
to_rust_map(env, &jmap)
}

fn transform_jref_to_ref(jref: JObject, env: &mut JNIEnv) -> Result<Ref> {
let source_tag_name = env.get_optional_string_from_method(&jref, "getTagName")?;
let source_version_number = env.get_optional_u64_from_method(&jref, "getVersionNumber")?;
Expand Down
9 changes: 9 additions & 0 deletions java/lance-jni/src/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,15 @@ impl IntoJava for JLance<Option<usize>> {
}
}

impl IntoJava for JLance<Option<&str>> {
fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> Result<JObject<'a>> {
Ok(match self.0 {
Some(value) => env.new_string(value)?.into(),
None => JObject::null(),
})
}
}

impl FromJObjectWithEnv<Option<i64>> for JObject<'_> {
fn extract_object(&self, env: &mut JNIEnv<'_>) -> Result<Option<i64>> {
let ret = if self.is_null() {
Expand Down
27 changes: 23 additions & 4 deletions java/src/main/java/org/lance/Branch.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
import com.google.common.base.MoreObjects;
import com.google.common.collect.ImmutableList;

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;

Expand Down Expand Up @@ -70,10 +73,18 @@ public int hashCode() {
private final long parentVersion;
private final long createAt;
private final int manifestSize;
private final Map<String, String> metadata;

public Branch(
String name, String parentBranch, long parentVersion, long createAt, int manifestSize) {
this(name, parentBranch, ImmutableList.of(), parentVersion, createAt, manifestSize);
this(
name,
parentBranch,
ImmutableList.of(),
parentVersion,
createAt,
manifestSize,
Collections.emptyMap());
}

public Branch(
Expand All @@ -82,13 +93,15 @@ public Branch(
List<BranchVersionMapping> branchIdentifier,
long parentVersion,
long createAt,
int manifestSize) {
int manifestSize,
Map<String, String> metadata) {
this.name = name;
this.parentBranch = Optional.ofNullable(parentBranch);
this.branchIdentifier = ImmutableList.copyOf(Objects.requireNonNull(branchIdentifier));
this.parentVersion = parentVersion;
this.createAt = createAt;
this.manifestSize = manifestSize;
this.metadata = Collections.unmodifiableMap(new HashMap<>(metadata));
}

public String getName() {
Expand All @@ -115,6 +128,10 @@ public int getManifestSize() {
return manifestSize;
}

public Map<String, String> getMetadata() {
return metadata;
}

@Override
public String toString() {
return MoreObjects.toStringHelper(this)
Expand All @@ -124,6 +141,7 @@ public String toString() {
.add("parentVersion", parentVersion)
.add("createAt", createAt)
.add("manifestSize", manifestSize)
.add("metadata", metadata)
.toString();
}

Expand All @@ -137,12 +155,13 @@ public boolean equals(Object o) {
&& manifestSize == branch.manifestSize
&& Objects.equals(name, branch.name)
&& Objects.equals(parentBranch, branch.parentBranch)
&& Objects.equals(branchIdentifier, branch.branchIdentifier);
&& Objects.equals(branchIdentifier, branch.branchIdentifier)
&& Objects.equals(metadata, branch.metadata);
}

@Override
public int hashCode() {
return Objects.hash(
name, parentBranch, branchIdentifier, parentVersion, createAt, manifestSize);
name, parentBranch, branchIdentifier, parentVersion, createAt, manifestSize, metadata);
}
}
Loading
Loading