From 934a1f1329a18ca0c4a25ded1c2c88a13ebe7d51 Mon Sep 17 00:00:00 2001 From: summaryzb Date: Wed, 1 Apr 2026 20:09:23 +0800 Subject: [PATCH] feat: compile-time feature-gated Rust backtrace for JNI error diagnostics --- java/lance-jni/Cargo.lock | 14 +++ java/lance-jni/Cargo.toml | 1 + java/lance-jni/src/error.rs | 122 ++++++++++++++++++-- java/pom.xml | 7 ++ rust/lance-core/Cargo.toml | 4 + rust/lance-core/src/error.rs | 215 ++++++++++++++++++++++++++++++++++- rust/lance/Cargo.toml | 1 + 7 files changed, 356 insertions(+), 8 deletions(-) diff --git a/java/lance-jni/Cargo.lock b/java/lance-jni/Cargo.lock index 2f98efa51e2..53a5bdd3cf1 100644 --- a/java/lance-jni/Cargo.lock +++ b/java/lance-jni/Cargo.lock @@ -3187,6 +3187,17 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" +[[package]] +name = "io-uring" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd7bddefd0a8833b88a4b68f90dae22c7450d11b354198baee3874fd811b344" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + [[package]] name = "ipnet" version = "2.12.0" @@ -3740,10 +3751,13 @@ dependencies = [ "deepsize", "futures", "http 1.4.0", + "io-uring", "lance-arrow", "lance-core", "lance-namespace", + "libc", "log", + "moka", "object_store", "object_store_opendal", "opendal", diff --git a/java/lance-jni/Cargo.toml b/java/lance-jni/Cargo.toml index 5a1e7f3f655..a6fd2903676 100644 --- a/java/lance-jni/Cargo.toml +++ b/java/lance-jni/Cargo.toml @@ -14,6 +14,7 @@ crate-type = ["cdylib"] [features] default = [] +backtrace = ["lance/backtrace", "lance-core/backtrace"] [dependencies] lance = { path = "../../rust/lance", features = ["substrait"] } diff --git a/java/lance-jni/src/error.rs b/java/lance-jni/src/error.rs index e02203a9567..e2f3fb83109 100644 --- a/java/lance-jni/src/error.rs +++ b/java/lance-jni/src/error.rs @@ -175,28 +175,35 @@ impl std::fmt::Display for Error { impl From for Error { fn from(err: LanceError) -> Self { + let backtrace_suffix = err + .backtrace() + .map(|bt| format!("\n\nRust backtrace:\n{}", bt)) + .unwrap_or_default(); + let message = format!("{}{}", err, backtrace_suffix); + match &err { LanceError::DatasetNotFound { .. } | LanceError::DatasetAlreadyExists { .. } | LanceError::CommitConflict { .. } - | LanceError::InvalidInput { .. } => Self::input_error(err.to_string()), - LanceError::IO { .. } => Self::io_error(err.to_string()), - LanceError::NotSupported { .. } => Self::unsupported_error(err.to_string()), - LanceError::NotFound { .. } => Self::io_error(err.to_string()), + | LanceError::InvalidInput { .. } => Self::input_error(message), + LanceError::IO { .. } => Self::io_error(message), + LanceError::NotSupported { .. } => Self::unsupported_error(message), + LanceError::NotFound { .. } => Self::io_error(message), LanceError::Namespace { source, .. } => { // Try to downcast to NamespaceError and get the error code if let Some(ns_err) = source.downcast_ref::() { - Self::namespace_error(ns_err.code().as_u32(), ns_err.to_string()) + let ns_message = format!("{}{}", ns_err, backtrace_suffix); + Self::namespace_error(ns_err.code().as_u32(), ns_message) } else { log::warn!( "Failed to downcast NamespaceError source, falling back to runtime error. \ This may indicate a version mismatch. Source type: {:?}", source ); - Self::runtime_error(err.to_string()) + Self::runtime_error(message) } } - _ => Self::runtime_error(err.to_string()), + _ => Self::runtime_error(message), } } } @@ -234,3 +241,104 @@ impl From for Error { Self::input_error(err.to_string()) } } + +#[cfg(test)] +mod tests { + use super::*; + + // Helper: extract the java_class from an Error via Display output + fn java_class(err: &Error) -> &JavaExceptionClass { + &err.java_class + } + + #[test] + fn test_invalid_input_maps_to_illegal_argument() { + let lance_err = LanceError::invalid_input("bad input"); + let jni_err: Error = lance_err.into(); + assert_eq!( + *java_class(&jni_err), + JavaExceptionClass::IllegalArgumentException + ); + assert!(jni_err.message.contains("bad input")); + } + + #[test] + fn test_dataset_not_found_maps_to_illegal_argument() { + let lance_err = LanceError::dataset_not_found("my_dataset", "not found".to_string().into()); + let jni_err: Error = lance_err.into(); + assert_eq!( + *java_class(&jni_err), + JavaExceptionClass::IllegalArgumentException + ); + assert!(jni_err.message.contains("my_dataset")); + } + + #[test] + fn test_dataset_already_exists_maps_to_illegal_argument() { + let lance_err = LanceError::dataset_already_exists("my_dataset"); + let jni_err: Error = lance_err.into(); + assert_eq!( + *java_class(&jni_err), + JavaExceptionClass::IllegalArgumentException + ); + assert!(jni_err.message.contains("my_dataset")); + } + + #[test] + fn test_commit_conflict_maps_to_illegal_argument() { + let lance_err = LanceError::commit_conflict_source(42, "conflict".to_string().into()); + let jni_err: Error = lance_err.into(); + assert_eq!( + *java_class(&jni_err), + JavaExceptionClass::IllegalArgumentException + ); + } + + #[test] + fn test_io_maps_to_ioexception() { + let lance_err = LanceError::io("disk failure"); + let jni_err: Error = lance_err.into(); + assert_eq!(*java_class(&jni_err), JavaExceptionClass::IOException); + assert!(jni_err.message.contains("disk failure")); + } + + #[test] + fn test_not_supported_maps_to_unsupported() { + let lance_err = LanceError::not_supported("nope"); + let jni_err: Error = lance_err.into(); + assert_eq!( + *java_class(&jni_err), + JavaExceptionClass::UnsupportedOperationException + ); + assert!(jni_err.message.contains("nope")); + } + + #[test] + fn test_not_found_maps_to_ioexception() { + let lance_err = LanceError::not_found("missing_uri"); + let jni_err: Error = lance_err.into(); + assert_eq!(*java_class(&jni_err), JavaExceptionClass::IOException); + assert!(jni_err.message.contains("missing_uri")); + } + + #[test] + fn test_fallthrough_maps_to_runtime() { + let lance_err = LanceError::internal("internal oops"); + let jni_err: Error = lance_err.into(); + assert_eq!(*java_class(&jni_err), JavaExceptionClass::RuntimeException); + assert!(jni_err.message.contains("internal oops")); + } + + #[test] + fn test_no_backtrace_suffix_when_backtrace_is_none() { + // Without the backtrace feature enabled in lance-core default tests, + // backtrace() returns None, so no suffix should be appended. + let lance_err = LanceError::io("clean message"); + let jni_err: Error = lance_err.into(); + assert!( + !jni_err.message.contains("Rust backtrace:"), + "Expected no backtrace suffix, got: {}", + jni_err.message + ); + } +} diff --git a/java/pom.xml b/java/pom.xml index e31d77745b0..17f58e89f2f 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -39,6 +39,7 @@ 3.7.5 package false + false org.lance.shaded @@ -396,6 +397,9 @@ lance-jni ${rust.release.build} + + ${rust.features} + ${project.build.directory}/classes/nativelib true @@ -409,6 +413,9 @@ lance-jni ${rust.release.build} + + ${rust.features} + -v diff --git a/rust/lance-core/Cargo.toml b/rust/lance-core/Cargo.toml index 3ca71e524c3..60cce5cb52f 100644 --- a/rust/lance-core/Cargo.toml +++ b/rust/lance-core/Cargo.toml @@ -55,6 +55,10 @@ proptest.workspace = true rstest.workspace = true [features] +# Capture Rust backtraces in error types. When disabled (the default), +# the backtrace field is zero-sized with no overhead. At runtime, capture +# is still gated by RUST_BACKTRACE=1. +backtrace = [] datafusion = ["dep:datafusion-common", "dep:datafusion-sql"] [lints] diff --git a/rust/lance-core/src/error.rs b/rust/lance-core/src/error.rs index 6fc7885908f..54304317cc7 100644 --- a/rust/lance-core/src/error.rs +++ b/rust/lance-core/src/error.rs @@ -8,6 +8,52 @@ use snafu::{IntoError as _, Location, Snafu}; type BoxedError = Box; +#[cfg(feature = "backtrace")] +mod backtrace_support { + use std::backtrace::Backtrace; + + use snafu::{AsBacktrace, GenerateImplicitData}; + + #[derive(Debug)] + pub struct MaybeBacktrace(pub Option); + + impl GenerateImplicitData for MaybeBacktrace { + fn generate() -> Self { + Self(>::generate()) + } + } + + impl AsBacktrace for MaybeBacktrace { + fn as_backtrace(&self) -> Option<&Backtrace> { + self.0.as_ref() + } + } +} + +#[cfg(not(feature = "backtrace"))] +mod backtrace_support { + use std::backtrace::Backtrace; + + use snafu::{AsBacktrace, GenerateImplicitData}; + + #[derive(Debug)] + pub struct MaybeBacktrace; + + impl GenerateImplicitData for MaybeBacktrace { + fn generate() -> Self { + Self + } + } + + impl AsBacktrace for MaybeBacktrace { + fn as_backtrace(&self) -> Option<&Backtrace> { + None + } + } +} + +use backtrace_support::MaybeBacktrace; + /// Error for when a requested field is not found in a schema. /// /// This error computes suggestions lazily (only when displayed) to avoid @@ -59,18 +105,24 @@ pub enum Error { source: BoxedError, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Dataset already exists: {uri}, {location}"))] DatasetAlreadyExists { uri: String, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Append with different schema: {difference}, location: {location}"))] SchemaMismatch { difference: String, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Dataset at path {path} was not found: {source}, {location}"))] DatasetNotFound { @@ -78,6 +130,8 @@ pub enum Error { source: BoxedError, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Encountered corrupt file {path}: {source}, {location}"))] CorruptFile { @@ -85,13 +139,16 @@ pub enum Error { source: BoxedError, #[snafu(implicit)] location: Location, - // TODO: add backtrace? + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Not supported: {source}, {location}"))] NotSupported { source: BoxedError, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Commit conflict for version {version}: {source}, {location}"))] CommitConflict { @@ -99,12 +156,16 @@ pub enum Error { source: BoxedError, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Incompatible transaction: {source}, {location}"))] IncompatibleTransaction { source: BoxedError, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Retryable commit conflict for version {version}: {source}, {location}"))] RetryableCommitConflict { @@ -112,12 +173,16 @@ pub enum Error { source: BoxedError, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Too many concurrent writers. {message}, {location}"))] TooMuchWriteContention { message: String, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display( "Encountered internal error. Please file a bug report at https://github.com/lance-format/lance/issues. {message}, {location}" @@ -126,54 +191,72 @@ pub enum Error { message: String, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("A prerequisite task failed: {message}, {location}"))] PrerequisiteFailed { message: String, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Unprocessable: {message}, {location}"))] Unprocessable { message: String, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("LanceError(Arrow): {message}, {location}"))] Arrow { message: String, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("LanceError(Schema): {message}, {location}"))] Schema { message: String, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Not found: {uri}, {location}"))] NotFound { uri: String, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("LanceError(IO): {source}, {location}"))] IO { source: BoxedError, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("LanceError(Index): {message}, {location}"))] Index { message: String, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Lance index not found: {identity}, {location}"))] IndexNotFound { identity: String, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Cannot infer storage location from: {message}"))] InvalidTableLocation { message: String }, @@ -184,18 +267,24 @@ pub enum Error { error: BoxedError, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Cloned error: {message}, {location}"))] Cloned { message: String, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Query Execution error: {message}, {location}"))] Execution { message: String, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Ref is invalid: {message}"))] InvalidRef { message: String }, @@ -214,12 +303,16 @@ pub enum Error { minor_version: u16, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, #[snafu(display("Namespace error: {source}, {location}"))] Namespace { source: BoxedError, #[snafu(implicit)] location: Location, + #[snafu(implicit)] + backtrace: MaybeBacktrace, }, /// External error passed through from user code. /// @@ -235,6 +328,62 @@ pub enum Error { } impl Error { + /// Returns the captured Rust backtrace, if available. + /// + /// Requires the `backtrace` feature to be enabled at compile time + /// and `RUST_BACKTRACE=1` at runtime. + #[cfg(feature = "backtrace")] + pub fn backtrace(&self) -> Option<&std::backtrace::Backtrace> { + match self { + Self::InvalidInput { backtrace, .. } + | Self::DatasetAlreadyExists { backtrace, .. } + | Self::SchemaMismatch { backtrace, .. } + | Self::DatasetNotFound { backtrace, .. } + | Self::CorruptFile { backtrace, .. } + | Self::NotSupported { backtrace, .. } + | Self::CommitConflict { backtrace, .. } + | Self::IncompatibleTransaction { backtrace, .. } + | Self::RetryableCommitConflict { backtrace, .. } + | Self::TooMuchWriteContention { backtrace, .. } + | Self::Internal { backtrace, .. } + | Self::PrerequisiteFailed { backtrace, .. } + | Self::Unprocessable { backtrace, .. } + | Self::Arrow { backtrace, .. } + | Self::Schema { backtrace, .. } + | Self::NotFound { backtrace, .. } + | Self::IO { backtrace, .. } + | Self::Index { backtrace, .. } + | Self::IndexNotFound { backtrace, .. } + | Self::Wrapped { backtrace, .. } + | Self::Cloned { backtrace, .. } + | Self::Execution { backtrace, .. } + | Self::VersionConflict { backtrace, .. } + | Self::Namespace { backtrace, .. } => { + use snafu::AsBacktrace; + backtrace.as_backtrace() + } + // Variants without a backtrace field — listed explicitly so that + // adding a new variant with a backtrace field triggers a compiler error. + Self::InvalidTableLocation { .. } + | Self::Stop + | Self::InvalidRef { .. } + | Self::RefConflict { .. } + | Self::RefNotFound { .. } + | Self::Cleanup { .. } + | Self::VersionNotFound { .. } + | Self::External { .. } + | Self::FieldNotFound { .. } => None, + } + } + + /// Returns the captured Rust backtrace, if available. + /// + /// Always returns `None` when the `backtrace` feature is not enabled. + #[cfg(not(feature = "backtrace"))] + pub fn backtrace(&self) -> Option<&std::backtrace::Backtrace> { + None + } + #[track_caller] pub fn corrupt_file(path: object_store::path::Path, message: impl Into) -> Self { CorruptFileSnafu { path }.into_error(message.into().into()) @@ -887,4 +1036,68 @@ mod test { _ => panic!("Expected InvalidInput variant, got {:?}", recovered), } } + + #[test] + fn test_backtrace_accessor() { + // Verify that backtrace() returns the expected result based on feature state + let err = Error::io("test backtrace"); + let bt = err.backtrace(); + #[cfg(feature = "backtrace")] + { + // With the backtrace feature enabled, whether a backtrace is captured + // depends on the RUST_BACKTRACE env var at runtime. We just verify + // the accessor doesn't panic and returns a valid Option. + let _ = bt; + } + #[cfg(not(feature = "backtrace"))] + { + // Without the backtrace feature, this must always be None. + assert!(bt.is_none()); + } + } + + #[test] + fn test_backtrace_captured_when_feature_enabled() { + // Test that backtrace is actually captured when the feature is on and + // RUST_BACKTRACE=1 is set in the environment before the process starts. + // + // NOTE: std::backtrace::Backtrace caches the RUST_BACKTRACE env check, + // so set_var at runtime does not reliably enable capture. This test + // verifies the accessor works correctly in both cases: + // - If RUST_BACKTRACE=1 was set before the test binary started, we get Some. + // - If not, we get None (even with the feature on), which is expected. + #[cfg(feature = "backtrace")] + { + let err = Error::io("backtrace capture test"); + if std::env::var("RUST_BACKTRACE").is_ok() { + assert!( + err.backtrace().is_some(), + "Expected a backtrace when RUST_BACKTRACE=1 and backtrace feature is enabled" + ); + } + // When RUST_BACKTRACE is not set, backtrace() may return None even + // with the feature enabled — this is correct runtime gating behavior. + } + #[cfg(not(feature = "backtrace"))] + { + let err = Error::io("backtrace capture test"); + assert!(err.backtrace().is_none()); + } + } + + #[test] + fn test_backtrace_returns_none_for_variants_without_location() { + let err = Error::InvalidTableLocation { + message: "test".to_string(), + }; + assert!(err.backtrace().is_none()); + + let err = Error::InvalidRef { + message: "test".to_string(), + }; + assert!(err.backtrace().is_none()); + + let err = Error::Stop; + assert!(err.backtrace().is_none()); + } } diff --git a/rust/lance/Cargo.toml b/rust/lance/Cargo.toml index e2755822b88..03d5bb8e9c4 100644 --- a/rust/lance/Cargo.toml +++ b/rust/lance/Cargo.toml @@ -121,6 +121,7 @@ datafusion-substrait = { workspace = true } [features] default = ["aws", "azure", "gcp", "oss", "huggingface", "tencent", "geo"] +backtrace = ["lance-core/backtrace"] fp16kernels = ["lance-linalg/fp16kernels"] # Prevent dynamic linking of lzma, which comes from datafusion cli = ["dep:clap", "lzma-sys/static"]