From 51e8471ed7085a130ad6ead8527a08fd9ff34f1e Mon Sep 17 00:00:00 2001 From: leiyuou Date: Mon, 2 Feb 2026 20:18:34 -0800 Subject: [PATCH 01/12] rust code --- .../src/datafusion_planner/analysis.rs | 10 +- .../builder/aggregate_ops.rs | 7 +- .../datafusion_planner/builder/basic_ops.rs | 15 +- .../datafusion_planner/builder/expand_ops.rs | 3 +- .../builder/join_builder.rs | 3 +- .../src/datafusion_planner/config_helpers.rs | 15 +- .../src/datafusion_planner/expression.rs | 215 +++++++++++------- .../src/datafusion_planner/join_ops.rs | 3 + .../lance-graph/src/datafusion_planner/mod.rs | 11 +- .../src/datafusion_planner/scan_ops.rs | 7 +- crates/lance-graph/src/parser.rs | 20 +- crates/lance-graph/src/query.rs | 3 +- .../tests/test_datafusion_pipeline.rs | 49 ++++ 13 files changed, 252 insertions(+), 109 deletions(-) diff --git a/crates/lance-graph/src/datafusion_planner/analysis.rs b/crates/lance-graph/src/datafusion_planner/analysis.rs index 113072c2..f078e77b 100644 --- a/crates/lance-graph/src/datafusion_planner/analysis.rs +++ b/crates/lance-graph/src/datafusion_planner/analysis.rs @@ -38,13 +38,18 @@ pub struct RelationshipInstance { pub struct PlanningContext<'a> { pub analysis: &'a QueryAnalysis, pub(crate) relationship_instance_idx: HashMap, + pub parameters: &'a HashMap, } impl<'a> PlanningContext<'a> { - pub fn new(analysis: &'a QueryAnalysis) -> Self { + pub fn new( + analysis: &'a QueryAnalysis, + parameters: &'a HashMap, + ) -> Self { Self { analysis, relationship_instance_idx: HashMap::new(), + parameters, } } @@ -383,7 +388,8 @@ mod tests { required_datasets: HashSet::new(), }; - let mut ctx = PlanningContext::new(&analysis); + let empty_params = HashMap::new(); + let mut ctx = PlanningContext::new(&analysis, &empty_params); // First call should return first instance let inst1 = ctx.next_relationship_instance("KNOWS").unwrap(); diff --git a/crates/lance-graph/src/datafusion_planner/builder/aggregate_ops.rs b/crates/lance-graph/src/datafusion_planner/builder/aggregate_ops.rs index 6feb2c2e..0f9cbec1 100644 --- a/crates/lance-graph/src/datafusion_planner/builder/aggregate_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/builder/aggregate_ops.rs @@ -3,6 +3,7 @@ //! Aggregation operations: Projection with aggregates and grouping +use crate::datafusion_planner::analysis::PlanningContext; use crate::datafusion_planner::DataFusionPlanner; use crate::error::Result; use crate::logical_plan::*; @@ -11,6 +12,7 @@ use datafusion::logical_expr::{col, LogicalPlan, LogicalPlanBuilder}; impl DataFusionPlanner { pub(crate) fn build_project_with_aggregates( &self, + ctx: &mut PlanningContext, input_plan: LogicalPlan, projections: &[ProjectionItem], ) -> Result { @@ -21,7 +23,7 @@ impl DataFusionPlanner { let mut agg_aliases = Vec::new(); for p in projections { - let expr = super::super::expression::to_df_value_expr(&p.expression); + let expr = super::super::expression::to_df_value_expr(&p.expression, ctx.parameters); if super::super::expression::contains_aggregate(&p.expression) { // Aggregate expressions get aliased @@ -44,7 +46,8 @@ impl DataFusionPlanner { for p in projections { if !super::super::expression::contains_aggregate(&p.expression) { // Re-create the expression and apply alias - let expr = super::super::expression::to_df_value_expr(&p.expression); + let expr = + super::super::expression::to_df_value_expr(&p.expression, ctx.parameters); let aliased = if let Some(alias) = &p.alias { expr.alias(alias) } else { diff --git a/crates/lance-graph/src/datafusion_planner/builder/basic_ops.rs b/crates/lance-graph/src/datafusion_planner/builder/basic_ops.rs index 8f1c893d..10b3c874 100644 --- a/crates/lance-graph/src/datafusion_planner/builder/basic_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/builder/basic_ops.rs @@ -17,7 +17,7 @@ impl DataFusionPlanner { predicate: &crate::ast::BooleanExpression, ) -> Result { let input_plan = self.build_operator(ctx, input)?; - let expr = super::super::expression::to_df_boolean_expr(predicate); + let expr = super::super::expression::to_df_boolean_expr(predicate, ctx.parameters); LogicalPlanBuilder::from(input_plan) .filter(expr) .map_err(|e| self.plan_error("Failed to build filter", e))? @@ -39,21 +39,23 @@ impl DataFusionPlanner { .any(|p| super::super::expression::contains_aggregate(&p.expression)); if has_aggregates { - self.build_project_with_aggregates(input_plan, projections) + self.build_project_with_aggregates(ctx, input_plan, projections) } else { - self.build_simple_project(input_plan, projections) + self.build_simple_project(ctx, input_plan, projections) } } pub(crate) fn build_simple_project( &self, + ctx: &mut PlanningContext, input_plan: LogicalPlan, projections: &[ProjectionItem], ) -> Result { let exprs: Vec = projections .iter() .map(|p| { - let expr = super::super::expression::to_df_value_expr(&p.expression); + let expr = + super::super::expression::to_df_value_expr(&p.expression, ctx.parameters); // Apply alias if provided, otherwise use Cypher dot notation // Normalize alias to lowercase for case-insensitive behavior if let Some(alias) = &p.alias { @@ -98,7 +100,8 @@ impl DataFusionPlanner { let sort_exprs: Vec = sort_items .iter() .map(|item| { - let expr = super::super::expression::to_df_value_expr(&item.expression); + let expr = + super::super::expression::to_df_value_expr(&item.expression, ctx.parameters); let asc = matches!(item.direction, crate::ast::SortDirection::Ascending); SortExpr { expr, @@ -160,7 +163,7 @@ impl DataFusionPlanner { }; // Convert expression to DataFusion Expr - let df_expr = super::super::expression::to_df_value_expr(expression); + let df_expr = super::super::expression::to_df_value_expr(expression, ctx.parameters); // We project the list expression first (aliased as the target alias temporarily) // DataFusion unnest takes a column name. diff --git a/crates/lance-graph/src/datafusion_planner/builder/expand_ops.rs b/crates/lance-graph/src/datafusion_planner/builder/expand_ops.rs index 85231ced..5522fe57 100644 --- a/crates/lance-graph/src/datafusion_planner/builder/expand_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/builder/expand_ops.rs @@ -59,7 +59,7 @@ impl DataFusionPlanner { // Build relationship scan with qualified columns and property filters let rel_scan = - self.build_relationship_scan(&rel_instance, rel_source, relationship_properties)?; + self.build_relationship_scan(ctx, &rel_instance, rel_source, relationship_properties)?; // Join source node with relationship let source_params = SourceJoinParams { @@ -297,6 +297,7 @@ impl DataFusionPlanner { // Build target node scan and join let target_scan = self.build_qualified_target_scan( + ctx, catalog, &target_label, target_variable, diff --git a/crates/lance-graph/src/datafusion_planner/builder/join_builder.rs b/crates/lance-graph/src/datafusion_planner/builder/join_builder.rs index 019d7710..caddf870 100644 --- a/crates/lance-graph/src/datafusion_planner/builder/join_builder.rs +++ b/crates/lance-graph/src/datafusion_planner/builder/join_builder.rs @@ -515,7 +515,8 @@ mod tests { // Analyze both patterns to build the context let left_analysis = analysis::analyze(&expand_left).unwrap(); - let left_ctx = analysis::PlanningContext::new(&left_analysis); + let empty_params = std::collections::HashMap::new(); + let left_ctx = analysis::PlanningContext::new(&left_analysis, &empty_params); // Test the key inference logic directly let (left_keys, right_keys) = diff --git a/crates/lance-graph/src/datafusion_planner/config_helpers.rs b/crates/lance-graph/src/datafusion_planner/config_helpers.rs index fd944330..6065ae00 100644 --- a/crates/lance-graph/src/datafusion_planner/config_helpers.rs +++ b/crates/lance-graph/src/datafusion_planner/config_helpers.rs @@ -140,7 +140,8 @@ mod tests { analysis .var_to_label .insert("b".to_string(), "Person".to_string()); - let ctx = PlanningContext::new(&analysis); + let empty_params = std::collections::HashMap::new(); + let ctx = PlanningContext::new(&analysis, &empty_params); let (label, node_map) = planner .get_target_node_mapping(&ctx, "b") @@ -156,7 +157,8 @@ mod tests { analysis .var_to_label .insert("a".to_string(), "Person".to_string()); - let ctx = PlanningContext::new(&analysis); + let empty_params = std::collections::HashMap::new(); + let ctx = PlanningContext::new(&analysis, &empty_params); let (label, node_map) = planner .get_target_node_mapping(&ctx, "_temp_a_1") @@ -169,7 +171,8 @@ mod tests { fn test_get_target_node_mapping_invalid_temp_variable() { let planner = planner_with_basic_config(); let analysis = QueryAnalysis::default(); - let ctx = PlanningContext::new(&analysis); + let empty_params = std::collections::HashMap::new(); + let ctx = PlanningContext::new(&analysis, &empty_params); let err = planner .get_target_node_mapping(&ctx, "_temp_invalid") @@ -185,7 +188,8 @@ mod tests { analysis .var_to_label .insert("a".to_string(), "Person".to_string()); - let ctx = PlanningContext::new(&analysis); + let empty_params = std::collections::HashMap::new(); + let ctx = PlanningContext::new(&analysis, &empty_params); let err = planner.get_target_node_mapping(&ctx, "c").unwrap_err(); let msg = format!("{}", err); @@ -203,7 +207,8 @@ mod tests { analysis .var_to_label .insert("b".to_string(), "Organization".to_string()); - let ctx = PlanningContext::new(&analysis); + let empty_params = std::collections::HashMap::new(); + let ctx = PlanningContext::new(&analysis, &empty_params); let err = planner.get_target_node_mapping(&ctx, "b").unwrap_err(); let msg = format!("{}", err); diff --git a/crates/lance-graph/src/datafusion_planner/expression.rs b/crates/lance-graph/src/datafusion_planner/expression.rs index 2f4dd7e4..c4a6b581 100644 --- a/crates/lance-graph/src/datafusion_planner/expression.rs +++ b/crates/lance-graph/src/datafusion_planner/expression.rs @@ -18,12 +18,38 @@ use datafusion_functions_aggregate::count::count_distinct; use datafusion_functions_aggregate::min_max::max; use datafusion_functions_aggregate::min_max::min; use datafusion_functions_aggregate::sum::sum; +use std::collections::HashMap; + +/// Helper function to convert serde_json::Value to DataFusion ScalarValue +fn json_to_scalar(value: &serde_json::Value) -> datafusion::scalar::ScalarValue { + use datafusion::scalar::ScalarValue; + match value { + serde_json::Value::Null => ScalarValue::Null, + serde_json::Value::Bool(b) => ScalarValue::Boolean(Some(*b)), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + ScalarValue::Int64(Some(i)) + } else if let Some(f) = n.as_f64() { + ScalarValue::Float64(Some(f)) + } else { + ScalarValue::Null + } + } + serde_json::Value::String(s) => ScalarValue::Utf8(Some(s.clone())), + serde_json::Value::Array(_) | serde_json::Value::Object(_) => ScalarValue::Null, // Complex types not supported yet + } +} /// Helper function to create LIKE expressions with consistent settings -fn create_like_expr(expression: &ValueExpression, pattern: &str, case_insensitive: bool) -> Expr { +fn create_like_expr( + expression: &ValueExpression, + pattern: &str, + case_insensitive: bool, + parameters: &HashMap, +) -> Expr { Expr::Like(datafusion::logical_expr::Like { negated: false, - expr: Box::new(to_df_value_expr(expression)), + expr: Box::new(to_df_value_expr(expression, parameters)), pattern: Box::new(lit(pattern.to_string())), escape_char: None, case_insensitive, @@ -31,7 +57,10 @@ fn create_like_expr(expression: &ValueExpression, pattern: &str, case_insensitiv } /// Convert BooleanExpression to DataFusion Expr -pub(crate) fn to_df_boolean_expr(expr: &BooleanExpression) -> Expr { +pub(crate) fn to_df_boolean_expr( + expr: &BooleanExpression, + parameters: &HashMap, +) -> Expr { use crate::ast::{BooleanExpression as BE, ComparisonOperator as CO}; match expr { BE::Comparison { @@ -39,8 +68,8 @@ pub(crate) fn to_df_boolean_expr(expr: &BooleanExpression) -> Expr { operator, right, } => { - let l = to_df_value_expr(left); - let r = to_df_value_expr(right); + let l = to_df_value_expr(left, parameters); + let r = to_df_value_expr(right, parameters); let op = match operator { CO::Equal => Operator::Eq, CO::NotEqual => Operator::NotEq, @@ -57,57 +86,66 @@ pub(crate) fn to_df_boolean_expr(expr: &BooleanExpression) -> Expr { } BE::In { expression, list } => { use datafusion::logical_expr::expr::InList as DFInList; - let expr = to_df_value_expr(expression); - let list_exprs = list.iter().map(to_df_value_expr).collect::>(); + let expr = to_df_value_expr(expression, parameters); + let list_exprs = list + .iter() + .map(|e| to_df_value_expr(e, parameters)) + .collect::>(); Expr::InList(DFInList::new(Box::new(expr), list_exprs, false)) } BE::And(l, r) => Expr::BinaryExpr(BinaryExpr { - left: Box::new(to_df_boolean_expr(l)), + left: Box::new(to_df_boolean_expr(l, parameters)), op: Operator::And, - right: Box::new(to_df_boolean_expr(r)), + right: Box::new(to_df_boolean_expr(r, parameters)), }), BE::Or(l, r) => Expr::BinaryExpr(BinaryExpr { - left: Box::new(to_df_boolean_expr(l)), + left: Box::new(to_df_boolean_expr(l, parameters)), op: Operator::Or, - right: Box::new(to_df_boolean_expr(r)), + right: Box::new(to_df_boolean_expr(r, parameters)), }), - BE::Not(inner) => Expr::Not(Box::new(to_df_boolean_expr(inner))), + BE::Not(inner) => Expr::Not(Box::new(to_df_boolean_expr(inner, parameters))), BE::Exists(prop) => Expr::IsNotNull(Box::new(to_df_value_expr( &ValueExpression::Property(prop.clone()), + parameters, ))), - BE::IsNull(expression) => Expr::IsNull(Box::new(to_df_value_expr(expression))), - BE::IsNotNull(expression) => Expr::IsNotNull(Box::new(to_df_value_expr(expression))), + BE::IsNull(expression) => Expr::IsNull(Box::new(to_df_value_expr(expression, parameters))), + BE::IsNotNull(expression) => { + Expr::IsNotNull(Box::new(to_df_value_expr(expression, parameters))) + } BE::Like { expression, pattern, - } => create_like_expr(expression, pattern, false), + } => create_like_expr(expression, pattern, false, parameters), BE::ILike { expression, pattern, - } => create_like_expr(expression, pattern, true), + } => create_like_expr(expression, pattern, true, parameters), BE::Contains { expression, substring, } => { // CONTAINS is equivalent to LIKE '%substring%' let pattern = format!("%{}%", substring); - create_like_expr(expression, &pattern, false) + create_like_expr(expression, &pattern, false, parameters) } BE::StartsWith { expression, prefix } => { // STARTS WITH is equivalent to LIKE 'prefix%' let pattern = format!("{}%", prefix); - create_like_expr(expression, &pattern, false) + create_like_expr(expression, &pattern, false, parameters) } BE::EndsWith { expression, suffix } => { // ENDS WITH is equivalent to LIKE '%suffix' let pattern = format!("%{}", suffix); - create_like_expr(expression, &pattern, false) + create_like_expr(expression, &pattern, false, parameters) } } } /// Convert ValueExpression to DataFusion Expr -pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { +pub(crate) fn to_df_value_expr( + expr: &ValueExpression, + parameters: &HashMap, +) -> Expr { use crate::ast::{PropertyValue as PV, ValueExpression as VE}; match expr { VE::Property(prop) => { @@ -122,7 +160,14 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { VE::Literal(PV::Null) => { datafusion::logical_expr::Expr::Literal(datafusion::scalar::ScalarValue::Null, None) } - VE::Literal(PV::Parameter(_)) => lit(0), + VE::Literal(PV::Parameter(name)) => { + // Handle parameter in literal (if that ever happens, though usually it's separate) + if let Some(value) = parameters.get(name) { + Expr::Literal(json_to_scalar(value), None) + } else { + col(format!("${}", name)) + } + } VE::Literal(PV::Property(prop)) => { // Create qualified column name: variable__property (lowercase for case-insensitivity) col(qualify_column(&prop.variable, &prop.property)) @@ -131,7 +176,7 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { match name.to_lowercase().as_str() { "tolower" | "lower" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0]); + let arg_expr = to_df_value_expr(&args[0], parameters); lower().call(vec![arg_expr]) } else { // Invalid argument count - return NULL @@ -140,7 +185,7 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { } "toupper" | "upper" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0]); + let arg_expr = to_df_value_expr(&args[0], parameters); upper().call(vec![arg_expr]) } else { // Invalid argument count - return NULL @@ -182,7 +227,7 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { } } else { // COUNT(p.property) - count non-null values of that property - to_df_value_expr(&args[0]) + to_df_value_expr(&args[0], parameters) }; // Use DataFusion's count or count_distinct @@ -198,7 +243,7 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { } "sum" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0]); + let arg_expr = to_df_value_expr(&args[0], parameters); sum(arg_expr) } else { lit(0) @@ -206,7 +251,7 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { } "avg" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0]); + let arg_expr = to_df_value_expr(&args[0], parameters); avg(arg_expr) } else { lit(0) @@ -214,7 +259,7 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { } "min" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0]); + let arg_expr = to_df_value_expr(&args[0], parameters); min(arg_expr) } else { lit(0) @@ -222,7 +267,7 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { } "max" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0]); + let arg_expr = to_df_value_expr(&args[0], parameters); max(arg_expr) } else { lit(0) @@ -230,7 +275,7 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { } "collect" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0]); + let arg_expr = to_df_value_expr(&args[0], parameters); array_agg(arg_expr) } else { lit(0) @@ -253,8 +298,8 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { right, } => { use crate::ast::ArithmeticOperator as AO; - let l = to_df_value_expr(left); - let r = to_df_value_expr(right); + let l = to_df_value_expr(left, parameters); + let r = to_df_value_expr(right, parameters); let op = match operator { AO::Add => Operator::Plus, AO::Subtract => Operator::Minus, @@ -275,8 +320,8 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { } => { // Create UDF for vector distance computation let udf = udf::create_vector_distance_udf(metric); - let left_expr = to_df_value_expr(left); - let right_expr = to_df_value_expr(right); + let left_expr = to_df_value_expr(left, parameters); + let right_expr = to_df_value_expr(right, parameters); Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf( udf, vec![left_expr, right_expr], @@ -289,8 +334,8 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { } => { // Create UDF for vector similarity computation let udf = udf::create_vector_similarity_udf(metric); - let left_expr = to_df_value_expr(left); - let right_expr = to_df_value_expr(right); + let left_expr = to_df_value_expr(left, parameters); + let right_expr = to_df_value_expr(right, parameters); Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf( udf, vec![left_expr, right_expr], @@ -316,18 +361,11 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { lit(scalar) } VE::Parameter(name) => { - // TODO: Implement proper parameter resolution - // Parameters ($param) should be resolved to literal values from the query's - // parameter map (CypherQuery::parameters()) before or during planning. - // - // Current limitation: This creates a column reference as a placeholder, - // which will fail at execution if the column doesn't exist. - // - // Proper fix requires one of: - // 1. Resolve parameters during semantic analysis (substitute before planning) - // 2. Pass parameter map to to_df_value_expr and resolve here - // 3. Use DataFusion's parameter binding mechanism - col(format!("${}", name)) + if let Some(value) = parameters.get(name) { + Expr::Literal(json_to_scalar(value), None) + } else { + col(format!("${}", name)) + } } } } @@ -420,7 +458,7 @@ mod tests { right: ValueExpression::Literal(PropertyValue::Integer(30)), }; - let df_expr = to_df_boolean_expr(&expr); + let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!(s.contains("p__age"), "Should contain qualified column"); assert!( @@ -451,7 +489,7 @@ mod tests { right: ValueExpression::Literal(PropertyValue::Integer(30)), }; - let df_expr = to_df_boolean_expr(&expr); + let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); // Should successfully translate without panicking assert!(format!("{:?}", df_expr).contains("p__age")); } @@ -479,7 +517,7 @@ mod tests { }), ); - let df_expr = to_df_boolean_expr(&expr); + let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!(s.contains("And"), "Should contain AND operator"); assert!(s.contains("p__age"), "Should contain column reference"); @@ -507,7 +545,7 @@ mod tests { }), ); - let df_expr = to_df_boolean_expr(&expr); + let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!(s.contains("Or"), "Should contain OR operator"); } @@ -524,7 +562,7 @@ mod tests { right: ValueExpression::Literal(PropertyValue::Boolean(true)), })); - let df_expr = to_df_boolean_expr(&expr); + let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!(s.contains("Not"), "Should contain NOT operator"); } @@ -536,7 +574,7 @@ mod tests { property: "email".into(), }); - let df_expr = to_df_boolean_expr(&expr); + let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!( s.contains("IsNotNull") || s.contains("p__email"), @@ -557,7 +595,8 @@ mod tests { ], }; - if let Expr::InList(in_list) = to_df_boolean_expr(&expr) { + if let Expr::InList(in_list) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) + { assert!(!in_list.negated); assert_eq!(in_list.list.len(), 2); match *in_list.expr { @@ -581,7 +620,8 @@ mod tests { pattern: "A%".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) + { assert!(!like_expr.negated, "Should not be negated"); assert!(!like_expr.case_insensitive, "Should be case sensitive"); assert_eq!(like_expr.escape_char, None, "Should have no escape char"); @@ -611,7 +651,8 @@ mod tests { pattern: "alice%".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) + { assert!(!like_expr.negated, "Should not be negated"); assert!( like_expr.case_insensitive, @@ -652,7 +693,8 @@ mod tests { pattern: "Test%".into(), }; - if let Expr::Like(like) = to_df_boolean_expr(&like_expr) { + if let Expr::Like(like) = to_df_boolean_expr(&like_expr, &std::collections::HashMap::new()) + { assert!( !like.case_insensitive, "LIKE should be case-sensitive (case_insensitive = false)" @@ -670,7 +712,9 @@ mod tests { pattern: "Test%".into(), }; - if let Expr::Like(ilike) = to_df_boolean_expr(&ilike_expr) { + if let Expr::Like(ilike) = + to_df_boolean_expr(&ilike_expr, &std::collections::HashMap::new()) + { assert!( ilike.case_insensitive, "ILIKE should be case-insensitive (case_insensitive = true)" @@ -690,7 +734,7 @@ mod tests { pattern: "%@example.com".into(), }; - let df_expr = to_df_boolean_expr(&expr); + let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!( s.contains("Like") || s.contains("like"), @@ -709,7 +753,8 @@ mod tests { substring: "ali".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) + { assert!(!like_expr.negated, "Should not be negated"); assert!(!like_expr.case_insensitive, "Should be case sensitive"); assert_eq!(like_expr.escape_char, None, "Should have no escape char"); @@ -745,7 +790,8 @@ mod tests { prefix: "admin".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) + { assert!(!like_expr.negated, "Should not be negated"); assert!(!like_expr.case_insensitive, "Should be case sensitive"); @@ -784,7 +830,8 @@ mod tests { suffix: "@example.com".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) + { assert!(!like_expr.negated, "Should not be negated"); assert!(!like_expr.case_insensitive, "Should be case sensitive"); @@ -824,7 +871,8 @@ mod tests { substring: "Test".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) + { assert!( !like_expr.case_insensitive, "CONTAINS should be case-sensitive by default" @@ -842,7 +890,8 @@ mod tests { substring: "test".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) + { match *like_expr.expr { Expr::Column(ref col_expr) => { assert_eq!( @@ -869,7 +918,7 @@ mod tests { property: "name".into(), }); - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert_eq!( s, @@ -880,7 +929,7 @@ mod tests { #[test] fn test_value_expr_literal_integer() { let expr = ValueExpression::Literal(PropertyValue::Integer(42)); - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!(s.contains("42") || s.contains("Int64(42)")); } @@ -888,7 +937,7 @@ mod tests { #[test] fn test_value_expr_literal_float() { let expr = ValueExpression::Literal(PropertyValue::Float(std::f64::consts::PI)); - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!(s.contains("3.14") || s.contains("Float64")); } @@ -896,7 +945,7 @@ mod tests { #[test] fn test_value_expr_literal_string() { let expr = ValueExpression::Literal(PropertyValue::String("hello".into())); - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!(s.contains("hello") || s.contains("Utf8")); } @@ -904,7 +953,7 @@ mod tests { #[test] fn test_value_expr_literal_boolean() { let expr = ValueExpression::Literal(PropertyValue::Boolean(true)); - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!(s.contains("true") || s.contains("Boolean")); } @@ -912,7 +961,7 @@ mod tests { #[test] fn test_value_expr_literal_null() { let expr = ValueExpression::Literal(PropertyValue::Null); - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); // Null literals are translated to Literal with Null value assert!(s.contains("Literal"), "Should be a Literal expression"); @@ -930,7 +979,7 @@ mod tests { right: Box::new(ValueExpression::Literal(PropertyValue::Integer(5))), }; - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); // Arithmetic expressions should now return a BinaryExpr with Plus operator assert!(s.contains("BinaryExpr"), "Should be a BinaryExpr"); @@ -959,7 +1008,7 @@ mod tests { right: Box::new(ValueExpression::Literal(PropertyValue::Integer(2))), }; - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); // Should translate to BinaryExpr with the correct operator assert!( @@ -983,7 +1032,7 @@ mod tests { distinct: false, }; - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!( s.contains("count") || s.contains("Count"), @@ -1002,7 +1051,7 @@ mod tests { distinct: false, }; - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!( s.contains("count") || s.contains("Count"), @@ -1022,7 +1071,7 @@ mod tests { distinct: false, }; - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!( s.contains("sum") || s.contains("Sum"), @@ -1042,7 +1091,7 @@ mod tests { distinct: false, }; - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!( s.contains("avg") || s.contains("Avg"), @@ -1062,7 +1111,7 @@ mod tests { distinct: false, }; - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!( s.contains("min") || s.contains("Min"), @@ -1082,7 +1131,7 @@ mod tests { distinct: false, }; - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!( s.contains("max") || s.contains("Max"), @@ -1101,7 +1150,7 @@ mod tests { })], }; - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); // Should be a ScalarFunction with lower assert!( @@ -1122,7 +1171,7 @@ mod tests { })], }; - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); // Should be a ScalarFunction with upper assert!( @@ -1144,7 +1193,7 @@ mod tests { })], }; - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!( s.contains("lower") || s.contains("Lower"), @@ -1164,7 +1213,7 @@ mod tests { })], }; - let df_expr = to_df_value_expr(&expr); + let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); let s = format!("{:?}", df_expr); assert!( s.contains("upper") || s.contains("Upper"), @@ -1190,7 +1239,7 @@ mod tests { substring: "offer".into(), }; - let df_expr = to_df_boolean_expr(&contains_expr); + let df_expr = to_df_boolean_expr(&contains_expr, &HashMap::new()); let s = format!("{:?}", df_expr); // Should be a Like expression with lower() on the column, not lit(0) diff --git a/crates/lance-graph/src/datafusion_planner/join_ops.rs b/crates/lance-graph/src/datafusion_planner/join_ops.rs index d88a1daa..a84a4561 100644 --- a/crates/lance-graph/src/datafusion_planner/join_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/join_ops.rs @@ -48,6 +48,7 @@ impl DataFusionPlanner { target_label: &str, target_variable: &str, target_properties: &HashMap, + parameters: &HashMap, ) -> Result { let target_schema = target_source.schema(); let normalized_target_label = target_label.to_lowercase(); @@ -60,6 +61,7 @@ impl DataFusionPlanner { for (k, v) in target_properties.iter() { let lit_expr = super::expression::to_df_value_expr( &crate::ast::ValueExpression::Literal(v.clone()), + parameters, ); let filter_expr = Expr::BinaryExpr(BinaryExpr { left: Box::new(col(k.to_lowercase())), @@ -213,6 +215,7 @@ impl DataFusionPlanner { &target_label, params.target_variable, params.target_properties, + ctx.parameters, )?; // Determine target join keys diff --git a/crates/lance-graph/src/datafusion_planner/mod.rs b/crates/lance-graph/src/datafusion_planner/mod.rs index dfb4bb5b..5c7e8acb 100644 --- a/crates/lance-graph/src/datafusion_planner/mod.rs +++ b/crates/lance-graph/src/datafusion_planner/mod.rs @@ -34,6 +34,7 @@ use crate::error::Result; use crate::logical_plan::LogicalOperator; use datafusion::logical_expr::LogicalPlan; use lance_graph_catalog::GraphSourceCatalog; +use std::collections::HashMap; use std::sync::Arc; /// Planner abstraction for graph-to-physical planning @@ -45,6 +46,7 @@ pub trait GraphPhysicalPlanner { pub struct DataFusionPlanner { pub(crate) config: GraphConfig, pub(crate) catalog: Option>, + pub(crate) parameters: HashMap, } impl DataFusionPlanner { @@ -52,6 +54,7 @@ impl DataFusionPlanner { Self { config, catalog: None, + parameters: HashMap::new(), } } @@ -59,9 +62,15 @@ impl DataFusionPlanner { Self { config, catalog: Some(catalog), + parameters: HashMap::new(), } } + pub fn with_parameters(mut self, params: HashMap) -> Self { + self.parameters = params; + self + } + /// Helper to convert DataFusion builder errors into GraphError::PlanError with context pub(crate) fn plan_error( &self, @@ -81,7 +90,7 @@ impl GraphPhysicalPlanner for DataFusionPlanner { let analysis = analysis::analyze(logical_plan)?; // Phase 2: Build execution plan with context - let mut ctx = PlanningContext::new(&analysis); + let mut ctx = PlanningContext::new(&analysis, &self.parameters); self.build_operator(&mut ctx, logical_plan) } } diff --git a/crates/lance-graph/src/datafusion_planner/scan_ops.rs b/crates/lance-graph/src/datafusion_planner/scan_ops.rs index 200ae9a1..5c27acfe 100644 --- a/crates/lance-graph/src/datafusion_planner/scan_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/scan_ops.rs @@ -21,7 +21,7 @@ impl DataFusionPlanner { /// Build a qualified node scan with property filters and column aliasing pub(crate) fn build_scan( &self, - _ctx: &PlanningContext, + ctx: &PlanningContext, variable: &str, label: &str, properties: &HashMap, @@ -46,6 +46,7 @@ impl DataFusionPlanner { .map(|(k, v)| { let lit_expr = super::expression::to_df_value_expr( &crate::ast::ValueExpression::Literal(v.clone()), + ctx.parameters, ); Expr::BinaryExpr(BinaryExpr { left: Box::new(col(k)), @@ -122,6 +123,7 @@ impl DataFusionPlanner { /// Build a qualified relationship scan with property filters pub(crate) fn build_relationship_scan( &self, + ctx: &PlanningContext, rel_instance: &RelationshipInstance, rel_source: Arc, relationship_properties: &HashMap, @@ -140,6 +142,7 @@ impl DataFusionPlanner { for (k, v) in relationship_properties.iter() { let lit_expr = super::expression::to_df_value_expr( &crate::ast::ValueExpression::Literal(v.clone()), + ctx.parameters, ); let filter_expr = Expr::BinaryExpr(BinaryExpr { left: Box::new(col(k)), @@ -259,6 +262,7 @@ impl DataFusionPlanner { /// Build a qualified target node scan with property filters pub(crate) fn build_qualified_target_scan( &self, + ctx: &PlanningContext, catalog: &Arc, target_label: &str, target_variable: &str, @@ -285,6 +289,7 @@ impl DataFusionPlanner { for (k, v) in target_properties.iter() { let lit_expr = super::expression::to_df_value_expr( &crate::ast::ValueExpression::Literal(v.clone()), + ctx.parameters, ); let filter_expr = Expr::BinaryExpr(BinaryExpr { left: Box::new(col(k)), diff --git a/crates/lance-graph/src/parser.rs b/crates/lance-graph/src/parser.rs index 8887ce23..65b9360e 100644 --- a/crates/lance-graph/src/parser.rs +++ b/crates/lance-graph/src/parser.rs @@ -589,9 +589,8 @@ fn parse_vector_similarity(input: &str) -> IResult<&str, ValueExpression> { // Parse parameter reference: $name fn parse_parameter(input: &str) -> IResult<&str, ValueExpression> { - let (input, _) = char('$')(input)?; - let (input, name) = identifier(input)?; - Ok((input, ValueExpression::Parameter(name.to_string()))) + let (input, name) = parameter(input)?; + Ok((input, ValueExpression::Parameter(name))) } // Parse a function call: function_name(args) @@ -973,9 +972,18 @@ fn boolean_literal(input: &str) -> IResult<&str, bool> { // Parse a parameter reference fn parameter(input: &str) -> IResult<&str, String> { - let (input, _) = char('$')(input)?; - let (input, name) = identifier(input)?; - Ok((input, name.to_string())) + alt(( + // $param + map(preceded(char('$'), identifier), |s| s.to_string()), + // @param + map(preceded(char('@'), identifier), |s| s.to_string()), + // :param + map(preceded(char(':'), identifier), |s| s.to_string()), + // {param} + map(delimited(char('{'), identifier, char('}')), |s| { + s.to_string() + }), + ))(input) } // Parse comma with optional whitespace diff --git a/crates/lance-graph/src/query.rs b/crates/lance-graph/src/query.rs index bdf33384..081d4a6c 100644 --- a/crates/lance-graph/src/query.rs +++ b/crates/lance-graph/src/query.rs @@ -782,7 +782,8 @@ impl CypherQuery { let logical_plan = logical_planner.plan(&self.ast)?; // Phase 3: DataFusion Logical Plan - let df_planner = DataFusionPlanner::with_catalog(config.clone(), catalog); + let df_planner = DataFusionPlanner::with_catalog(config.clone(), catalog) + .with_parameters(self.parameters.clone()); let df_logical_plan = df_planner.plan(&logical_plan)?; Ok((logical_plan, df_logical_plan)) diff --git a/crates/lance-graph/tests/test_datafusion_pipeline.rs b/crates/lance-graph/tests/test_datafusion_pipeline.rs index b1b203bb..906f27e6 100644 --- a/crates/lance-graph/tests/test_datafusion_pipeline.rs +++ b/crates/lance-graph/tests/test_datafusion_pipeline.rs @@ -5101,3 +5101,52 @@ async fn test_datafusion_variable_reuse_multi_pattern_optimization() { person_scan_count ); } + +#[tokio::test] +async fn test_datafusion_parameter_filtering_age() { + let config = create_graph_config(); + let person_batch = create_person_dataset(); + + let mut params = HashMap::new(); + // Filter for people older than 30 (Bob:35, David:40) + params.insert("min_age".to_string(), serde_json::json!(30)); + + let query = CypherQuery::new("MATCH (p:Person) WHERE p.age > $min_age RETURN p.name, p.age") + .unwrap() + .with_config(config) + .with_parameters(params); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + // Should return 2 people (Bob:35, David:40) + assert_eq!(result.num_rows(), 2); + assert_eq!(result.num_columns(), 2); + + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let ages = result + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + + let mut results = Vec::new(); + for i in 0..result.num_rows() { + results.push((names.value(i).to_string(), ages.value(i))); + } + + results.sort(); + assert_eq!( + results, + vec![("Bob".to_string(), 35), ("David".to_string(), 40)] + ); +} From cbeeb017e122a89d292ba910cf76a95e7b445c40 Mon Sep 17 00:00:00 2001 From: leiyuou Date: Tue, 3 Feb 2026 21:08:59 -0800 Subject: [PATCH 02/12] add python test --- crates/lance-graph/src/parser.rs | 86 +++++++++++++++++++++++ crates/lance-graph/src/query.rs | 10 ++- python/python/tests/test_cypher_engine.py | 44 ++++++++++++ 3 files changed, 137 insertions(+), 3 deletions(-) diff --git a/crates/lance-graph/src/parser.rs b/crates/lance-graph/src/parser.rs index 65b9360e..d0e3a934 100644 --- a/crates/lance-graph/src/parser.rs +++ b/crates/lance-graph/src/parser.rs @@ -1707,6 +1707,92 @@ mod tests { } } + #[test] + fn test_parse_multiple_parameters() { + let query = "MATCH (p:Person) WHERE p.age > $min_age AND p.age < $max_age RETURN p"; + let result = parse_cypher_query(query); + assert!( + result.is_ok(), + "Multiple parameters should parse successfully" + ); + + let ast = result.unwrap(); + let where_clause = ast.where_clause.expect("Expected WHERE clause"); + + match where_clause.expression { + BooleanExpression::And(left, right) => { + // Check left: p.age > $min_age + match *left { + BooleanExpression::Comparison { + right: val_right, .. + } => match val_right { + ValueExpression::Parameter(name) => { + assert_eq!(name, "min_age"); + } + _ => panic!("Expected Parameter min_age"), + }, + _ => panic!("Expected comparison on left"), + } + + // Check right: p.age < $max_age + match *right { + BooleanExpression::Comparison { + right: val_right, .. + } => match val_right { + ValueExpression::Parameter(name) => { + assert_eq!(name, "max_age"); + } + _ => panic!("Expected Parameter max_age"), + }, + _ => panic!("Expected comparison on right"), + } + } + _ => panic!("Expected AND expression"), + } + } + + #[test] + fn test_parse_parameter_formats() { + // Test @param + let query = "MATCH (p:Person) WHERE p.age > @min_age RETURN p"; + let result = parse_cypher_query(query); + assert!(result.is_ok(), "@param should parse successfully"); + let where_clause = result.unwrap().where_clause.expect("Expected WHERE clause"); + match where_clause.expression { + BooleanExpression::Comparison { right, .. } => match right { + ValueExpression::Parameter(name) => assert_eq!(name, "min_age"), + _ => panic!("Expected Parameter for @param"), + }, + _ => panic!("Expected comparison"), + } + + // Test :param + let query = "MATCH (p:Person) WHERE p.age > :min_age RETURN p"; + let result = parse_cypher_query(query); + assert!(result.is_ok(), ":param should parse successfully"); + let where_clause = result.unwrap().where_clause.expect("Expected WHERE clause"); + match where_clause.expression { + BooleanExpression::Comparison { right, .. } => match right { + ValueExpression::Parameter(name) => assert_eq!(name, "min_age"), + _ => panic!("Expected Parameter for :param"), + }, + _ => panic!("Expected comparison"), + } + + // Test {param} + let query = "MATCH (p:Person) WHERE p.age > {min_age} RETURN p"; + let result = parse_cypher_query(query); + assert!(result.is_ok(), "{{param}} should parse successfully"); + let where_clause = result.unwrap().where_clause.expect("Expected WHERE clause"); + match where_clause.expression { + BooleanExpression::Comparison { right, .. } => match right { + ValueExpression::Parameter(name) => assert_eq!(name, "min_age"), + _ => panic!("Expected Parameter for {{param}}"), + }, + _ => panic!("Expected comparison"), + } + } + #[test] fn test_vector_distance_metrics() { for metric in &["cosine", "l2", "dot"] { diff --git a/crates/lance-graph/src/query.rs b/crates/lance-graph/src/query.rs index 081d4a6c..63bbfd11 100644 --- a/crates/lance-graph/src/query.rs +++ b/crates/lance-graph/src/query.rs @@ -1490,12 +1490,16 @@ mod tests { fn test_query_with_parameters() { let mut params = HashMap::new(); params.insert("minAge".to_string(), serde_json::Value::Number(30.into())); + params.insert("maxAge".to_string(), serde_json::Value::Number(50.into())); - let query = CypherQuery::new("MATCH (n:Person) WHERE n.age > $minAge RETURN n.name") - .unwrap() - .with_parameters(params); + let query = CypherQuery::new( + "MATCH (n:Person) WHERE n.age > $minAge AND n.age < $maxAge RETURN n.name", + ) + .unwrap() + .with_parameters(params); assert!(query.parameters().contains_key("minAge")); + assert!(query.parameters().contains_key("maxAge")); } #[test] diff --git a/python/python/tests/test_cypher_engine.py b/python/python/tests/test_cypher_engine.py index 84372f69..e5efac76 100644 --- a/python/python/tests/test_cypher_engine.py +++ b/python/python/tests/test_cypher_engine.py @@ -166,3 +166,47 @@ def test_cypher_engine_config_access(graph_env): assert "person" in engine_config.node_labels() # case-insensitive assert "company" in engine_config.node_labels() + + +def test_cypher_parameter_syntax(graph_env): + """Test various Cypher parameter syntaxes ($ @ : {}).""" + config, datasets = graph_env + + # 1. Test $param + query_dollar = CypherQuery("MATCH (p:Person) WHERE p.age > $age RETURN p.name").with_config(config) + result = query_dollar.with_parameter("age", 30).execute(datasets) + data = result.to_pydict() + assert set(data["p.name"]) == {"Bob", "David"} + + # 2. Test @param + query_at = CypherQuery("MATCH (p:Person) WHERE p.age > @age RETURN p.name").with_config(config) + result = query_at.with_parameter("age", 30).execute(datasets) + data = result.to_pydict() + assert set(data["p.name"]) == {"Bob", "David"} + + # 3. Test :param + query_colon = CypherQuery("MATCH (p:Person) WHERE p.age > :age RETURN p.name").with_config(config) + result = query_colon.with_parameter("age", 30).execute(datasets) + data = result.to_pydict() + assert set(data["p.name"]) == {"Bob", "David"} + + # 4. Test {param} + query_curly = CypherQuery("MATCH (p:Person) WHERE p.age > {age} RETURN p.name").with_config(config) + result = query_curly.with_parameter("age", 30).execute(datasets) + data = result.to_pydict() + assert set(data["p.name"]) == {"Bob", "David"} + + # 5. Test multiple parameters + query_multi = CypherQuery( + "MATCH (p:Person) WHERE p.age > $min_age AND p.age < $max_age RETURN p.name" + ).with_config(config) + result = ( + query_multi + .with_parameter("min_age", 25) + .with_parameter("max_age", 35) + .execute(datasets) + ) + data = result.to_pydict() + # Should get Alice (28), Carol (29), Bob (34) + # David is 42 (excluded) + assert set(data["p.name"]) == {"Alice", "Carol", "Bob"} From c2d08257715b72c775de12e9270aece125fcdd44 Mon Sep 17 00:00:00 2001 From: leiyuou Date: Tue, 3 Feb 2026 21:42:34 -0800 Subject: [PATCH 03/12] py format --- python/python/tests/test_cypher_engine.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/python/python/tests/test_cypher_engine.py b/python/python/tests/test_cypher_engine.py index e5efac76..89fcc720 100644 --- a/python/python/tests/test_cypher_engine.py +++ b/python/python/tests/test_cypher_engine.py @@ -173,25 +173,33 @@ def test_cypher_parameter_syntax(graph_env): config, datasets = graph_env # 1. Test $param - query_dollar = CypherQuery("MATCH (p:Person) WHERE p.age > $age RETURN p.name").with_config(config) + query_dollar = CypherQuery( + "MATCH (p:Person) WHERE p.age > $age RETURN p.name" + ).with_config(config) result = query_dollar.with_parameter("age", 30).execute(datasets) data = result.to_pydict() assert set(data["p.name"]) == {"Bob", "David"} # 2. Test @param - query_at = CypherQuery("MATCH (p:Person) WHERE p.age > @age RETURN p.name").with_config(config) + query_at = CypherQuery( + "MATCH (p:Person) WHERE p.age > @age RETURN p.name" + ).with_config(config) result = query_at.with_parameter("age", 30).execute(datasets) data = result.to_pydict() assert set(data["p.name"]) == {"Bob", "David"} # 3. Test :param - query_colon = CypherQuery("MATCH (p:Person) WHERE p.age > :age RETURN p.name").with_config(config) + query_colon = CypherQuery( + "MATCH (p:Person) WHERE p.age > :age RETURN p.name" + ).with_config(config) result = query_colon.with_parameter("age", 30).execute(datasets) data = result.to_pydict() assert set(data["p.name"]) == {"Bob", "David"} # 4. Test {param} - query_curly = CypherQuery("MATCH (p:Person) WHERE p.age > {age} RETURN p.name").with_config(config) + query_curly = CypherQuery( + "MATCH (p:Person) WHERE p.age > {age} RETURN p.name" + ).with_config(config) result = query_curly.with_parameter("age", 30).execute(datasets) data = result.to_pydict() assert set(data["p.name"]) == {"Bob", "David"} @@ -201,8 +209,7 @@ def test_cypher_parameter_syntax(graph_env): "MATCH (p:Person) WHERE p.age > $min_age AND p.age < $max_age RETURN p.name" ).with_config(config) result = ( - query_multi - .with_parameter("min_age", 25) + query_multi.with_parameter("min_age", 25) .with_parameter("max_age", 35) .execute(datasets) ) From 2ebec6ef21f3c99123900505a1a6c0f9444096dc Mon Sep 17 00:00:00 2001 From: leiyuou Date: Tue, 10 Feb 2026 22:36:12 -0800 Subject: [PATCH 04/12] old approach --- crates/lance-graph/src/parser.rs | 55 ++++++++------------------------ 1 file changed, 13 insertions(+), 42 deletions(-) diff --git a/crates/lance-graph/src/parser.rs b/crates/lance-graph/src/parser.rs index d0e3a934..3690a4c0 100644 --- a/crates/lance-graph/src/parser.rs +++ b/crates/lance-graph/src/parser.rs @@ -972,18 +972,8 @@ fn boolean_literal(input: &str) -> IResult<&str, bool> { // Parse a parameter reference fn parameter(input: &str) -> IResult<&str, String> { - alt(( - // $param - map(preceded(char('$'), identifier), |s| s.to_string()), - // @param - map(preceded(char('@'), identifier), |s| s.to_string()), - // :param - map(preceded(char(':'), identifier), |s| s.to_string()), - // {param} - map(delimited(char('{'), identifier, char('}')), |s| { - s.to_string() - }), - ))(input) + // Only support $param syntax + map(preceded(char('$'), identifier), |s| s.to_string())(input) } // Parse comma with optional whitespace @@ -1753,44 +1743,25 @@ mod tests { #[test] fn test_parse_parameter_formats() { - // Test @param + // Test $param (should succeed) + let query = "MATCH (p:Person) WHERE p.age > $min_age RETURN p"; + let result = parse_cypher_query(query); + assert!(result.is_ok(), "$param should parse successfully"); + + // Test @param (should fail) let query = "MATCH (p:Person) WHERE p.age > @min_age RETURN p"; let result = parse_cypher_query(query); - assert!(result.is_ok(), "@param should parse successfully"); - let where_clause = result.unwrap().where_clause.expect("Expected WHERE clause"); - match where_clause.expression { - BooleanExpression::Comparison { right, .. } => match right { - ValueExpression::Parameter(name) => assert_eq!(name, "min_age"), - _ => panic!("Expected Parameter for @param"), - }, - _ => panic!("Expected comparison"), - } + assert!(result.is_err(), "@param should fail"); - // Test :param + // Test :param (should fail) let query = "MATCH (p:Person) WHERE p.age > :min_age RETURN p"; let result = parse_cypher_query(query); - assert!(result.is_ok(), ":param should parse successfully"); - let where_clause = result.unwrap().where_clause.expect("Expected WHERE clause"); - match where_clause.expression { - BooleanExpression::Comparison { right, .. } => match right { - ValueExpression::Parameter(name) => assert_eq!(name, "min_age"), - _ => panic!("Expected Parameter for :param"), - }, - _ => panic!("Expected comparison"), - } + assert!(result.is_err(), ":param should fail"); - // Test {param} + // Test {param} (should fail) let query = "MATCH (p:Person) WHERE p.age > {min_age} RETURN p"; let result = parse_cypher_query(query); - assert!(result.is_ok(), "{{param}} should parse successfully"); - let where_clause = result.unwrap().where_clause.expect("Expected WHERE clause"); - match where_clause.expression { - BooleanExpression::Comparison { right, .. } => match right { - ValueExpression::Parameter(name) => assert_eq!(name, "min_age"), - _ => panic!("Expected Parameter for {{param}}"), - }, - _ => panic!("Expected comparison"), - } + assert!(result.is_err(), "{{param}} should fail"); } #[test] From 256c2133aee5708b05dbb6730761bbc41d1be10a Mon Sep 17 00:00:00 2001 From: leiyuou Date: Wed, 11 Feb 2026 21:45:14 -0800 Subject: [PATCH 05/12] refator to resolve para at semantic step --- .../src/datafusion_planner/analysis.rs | 10 +- .../builder/aggregate_ops.rs | 7 +- .../datafusion_planner/builder/basic_ops.rs | 15 +- .../datafusion_planner/builder/expand_ops.rs | 3 +- .../builder/join_builder.rs | 3 +- .../src/datafusion_planner/builder/mod.rs | 2 +- .../src/datafusion_planner/config_helpers.rs | 15 +- .../src/datafusion_planner/expression.rs | 188 ++++------ .../src/datafusion_planner/join_ops.rs | 3 - .../lance-graph/src/datafusion_planner/mod.rs | 11 +- .../src/datafusion_planner/scan_ops.rs | 6 - crates/lance-graph/src/query.rs | 10 +- crates/lance-graph/src/semantic.rs | 349 +++++++++++++++++- python/python/tests/test_cypher_engine.py | 48 --- python/python/tests/test_graph.py | 28 ++ 15 files changed, 462 insertions(+), 236 deletions(-) diff --git a/crates/lance-graph/src/datafusion_planner/analysis.rs b/crates/lance-graph/src/datafusion_planner/analysis.rs index f078e77b..113072c2 100644 --- a/crates/lance-graph/src/datafusion_planner/analysis.rs +++ b/crates/lance-graph/src/datafusion_planner/analysis.rs @@ -38,18 +38,13 @@ pub struct RelationshipInstance { pub struct PlanningContext<'a> { pub analysis: &'a QueryAnalysis, pub(crate) relationship_instance_idx: HashMap, - pub parameters: &'a HashMap, } impl<'a> PlanningContext<'a> { - pub fn new( - analysis: &'a QueryAnalysis, - parameters: &'a HashMap, - ) -> Self { + pub fn new(analysis: &'a QueryAnalysis) -> Self { Self { analysis, relationship_instance_idx: HashMap::new(), - parameters, } } @@ -388,8 +383,7 @@ mod tests { required_datasets: HashSet::new(), }; - let empty_params = HashMap::new(); - let mut ctx = PlanningContext::new(&analysis, &empty_params); + let mut ctx = PlanningContext::new(&analysis); // First call should return first instance let inst1 = ctx.next_relationship_instance("KNOWS").unwrap(); diff --git a/crates/lance-graph/src/datafusion_planner/builder/aggregate_ops.rs b/crates/lance-graph/src/datafusion_planner/builder/aggregate_ops.rs index 0f9cbec1..6feb2c2e 100644 --- a/crates/lance-graph/src/datafusion_planner/builder/aggregate_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/builder/aggregate_ops.rs @@ -3,7 +3,6 @@ //! Aggregation operations: Projection with aggregates and grouping -use crate::datafusion_planner::analysis::PlanningContext; use crate::datafusion_planner::DataFusionPlanner; use crate::error::Result; use crate::logical_plan::*; @@ -12,7 +11,6 @@ use datafusion::logical_expr::{col, LogicalPlan, LogicalPlanBuilder}; impl DataFusionPlanner { pub(crate) fn build_project_with_aggregates( &self, - ctx: &mut PlanningContext, input_plan: LogicalPlan, projections: &[ProjectionItem], ) -> Result { @@ -23,7 +21,7 @@ impl DataFusionPlanner { let mut agg_aliases = Vec::new(); for p in projections { - let expr = super::super::expression::to_df_value_expr(&p.expression, ctx.parameters); + let expr = super::super::expression::to_df_value_expr(&p.expression); if super::super::expression::contains_aggregate(&p.expression) { // Aggregate expressions get aliased @@ -46,8 +44,7 @@ impl DataFusionPlanner { for p in projections { if !super::super::expression::contains_aggregate(&p.expression) { // Re-create the expression and apply alias - let expr = - super::super::expression::to_df_value_expr(&p.expression, ctx.parameters); + let expr = super::super::expression::to_df_value_expr(&p.expression); let aliased = if let Some(alias) = &p.alias { expr.alias(alias) } else { diff --git a/crates/lance-graph/src/datafusion_planner/builder/basic_ops.rs b/crates/lance-graph/src/datafusion_planner/builder/basic_ops.rs index 10b3c874..8f1c893d 100644 --- a/crates/lance-graph/src/datafusion_planner/builder/basic_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/builder/basic_ops.rs @@ -17,7 +17,7 @@ impl DataFusionPlanner { predicate: &crate::ast::BooleanExpression, ) -> Result { let input_plan = self.build_operator(ctx, input)?; - let expr = super::super::expression::to_df_boolean_expr(predicate, ctx.parameters); + let expr = super::super::expression::to_df_boolean_expr(predicate); LogicalPlanBuilder::from(input_plan) .filter(expr) .map_err(|e| self.plan_error("Failed to build filter", e))? @@ -39,23 +39,21 @@ impl DataFusionPlanner { .any(|p| super::super::expression::contains_aggregate(&p.expression)); if has_aggregates { - self.build_project_with_aggregates(ctx, input_plan, projections) + self.build_project_with_aggregates(input_plan, projections) } else { - self.build_simple_project(ctx, input_plan, projections) + self.build_simple_project(input_plan, projections) } } pub(crate) fn build_simple_project( &self, - ctx: &mut PlanningContext, input_plan: LogicalPlan, projections: &[ProjectionItem], ) -> Result { let exprs: Vec = projections .iter() .map(|p| { - let expr = - super::super::expression::to_df_value_expr(&p.expression, ctx.parameters); + let expr = super::super::expression::to_df_value_expr(&p.expression); // Apply alias if provided, otherwise use Cypher dot notation // Normalize alias to lowercase for case-insensitive behavior if let Some(alias) = &p.alias { @@ -100,8 +98,7 @@ impl DataFusionPlanner { let sort_exprs: Vec = sort_items .iter() .map(|item| { - let expr = - super::super::expression::to_df_value_expr(&item.expression, ctx.parameters); + let expr = super::super::expression::to_df_value_expr(&item.expression); let asc = matches!(item.direction, crate::ast::SortDirection::Ascending); SortExpr { expr, @@ -163,7 +160,7 @@ impl DataFusionPlanner { }; // Convert expression to DataFusion Expr - let df_expr = super::super::expression::to_df_value_expr(expression, ctx.parameters); + let df_expr = super::super::expression::to_df_value_expr(expression); // We project the list expression first (aliased as the target alias temporarily) // DataFusion unnest takes a column name. diff --git a/crates/lance-graph/src/datafusion_planner/builder/expand_ops.rs b/crates/lance-graph/src/datafusion_planner/builder/expand_ops.rs index 5522fe57..85231ced 100644 --- a/crates/lance-graph/src/datafusion_planner/builder/expand_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/builder/expand_ops.rs @@ -59,7 +59,7 @@ impl DataFusionPlanner { // Build relationship scan with qualified columns and property filters let rel_scan = - self.build_relationship_scan(ctx, &rel_instance, rel_source, relationship_properties)?; + self.build_relationship_scan(&rel_instance, rel_source, relationship_properties)?; // Join source node with relationship let source_params = SourceJoinParams { @@ -297,7 +297,6 @@ impl DataFusionPlanner { // Build target node scan and join let target_scan = self.build_qualified_target_scan( - ctx, catalog, &target_label, target_variable, diff --git a/crates/lance-graph/src/datafusion_planner/builder/join_builder.rs b/crates/lance-graph/src/datafusion_planner/builder/join_builder.rs index caddf870..019d7710 100644 --- a/crates/lance-graph/src/datafusion_planner/builder/join_builder.rs +++ b/crates/lance-graph/src/datafusion_planner/builder/join_builder.rs @@ -515,8 +515,7 @@ mod tests { // Analyze both patterns to build the context let left_analysis = analysis::analyze(&expand_left).unwrap(); - let empty_params = std::collections::HashMap::new(); - let left_ctx = analysis::PlanningContext::new(&left_analysis, &empty_params); + let left_ctx = analysis::PlanningContext::new(&left_analysis); // Test the key inference logic directly let (left_keys, right_keys) = diff --git a/crates/lance-graph/src/datafusion_planner/builder/mod.rs b/crates/lance-graph/src/datafusion_planner/builder/mod.rs index aaf23872..1b16559f 100644 --- a/crates/lance-graph/src/datafusion_planner/builder/mod.rs +++ b/crates/lance-graph/src/datafusion_planner/builder/mod.rs @@ -38,7 +38,7 @@ impl DataFusionPlanner { label, properties, .. - } => self.build_scan(ctx, variable, label, properties), + } => self.build_scan(variable, label, properties), LogicalOperator::Filter { input, predicate } => { self.build_filter(ctx, input, predicate) } diff --git a/crates/lance-graph/src/datafusion_planner/config_helpers.rs b/crates/lance-graph/src/datafusion_planner/config_helpers.rs index 6065ae00..fd944330 100644 --- a/crates/lance-graph/src/datafusion_planner/config_helpers.rs +++ b/crates/lance-graph/src/datafusion_planner/config_helpers.rs @@ -140,8 +140,7 @@ mod tests { analysis .var_to_label .insert("b".to_string(), "Person".to_string()); - let empty_params = std::collections::HashMap::new(); - let ctx = PlanningContext::new(&analysis, &empty_params); + let ctx = PlanningContext::new(&analysis); let (label, node_map) = planner .get_target_node_mapping(&ctx, "b") @@ -157,8 +156,7 @@ mod tests { analysis .var_to_label .insert("a".to_string(), "Person".to_string()); - let empty_params = std::collections::HashMap::new(); - let ctx = PlanningContext::new(&analysis, &empty_params); + let ctx = PlanningContext::new(&analysis); let (label, node_map) = planner .get_target_node_mapping(&ctx, "_temp_a_1") @@ -171,8 +169,7 @@ mod tests { fn test_get_target_node_mapping_invalid_temp_variable() { let planner = planner_with_basic_config(); let analysis = QueryAnalysis::default(); - let empty_params = std::collections::HashMap::new(); - let ctx = PlanningContext::new(&analysis, &empty_params); + let ctx = PlanningContext::new(&analysis); let err = planner .get_target_node_mapping(&ctx, "_temp_invalid") @@ -188,8 +185,7 @@ mod tests { analysis .var_to_label .insert("a".to_string(), "Person".to_string()); - let empty_params = std::collections::HashMap::new(); - let ctx = PlanningContext::new(&analysis, &empty_params); + let ctx = PlanningContext::new(&analysis); let err = planner.get_target_node_mapping(&ctx, "c").unwrap_err(); let msg = format!("{}", err); @@ -207,8 +203,7 @@ mod tests { analysis .var_to_label .insert("b".to_string(), "Organization".to_string()); - let empty_params = std::collections::HashMap::new(); - let ctx = PlanningContext::new(&analysis, &empty_params); + let ctx = PlanningContext::new(&analysis); let err = planner.get_target_node_mapping(&ctx, "b").unwrap_err(); let msg = format!("{}", err); diff --git a/crates/lance-graph/src/datafusion_planner/expression.rs b/crates/lance-graph/src/datafusion_planner/expression.rs index c4a6b581..3f37ff81 100644 --- a/crates/lance-graph/src/datafusion_planner/expression.rs +++ b/crates/lance-graph/src/datafusion_planner/expression.rs @@ -18,7 +18,6 @@ use datafusion_functions_aggregate::count::count_distinct; use datafusion_functions_aggregate::min_max::max; use datafusion_functions_aggregate::min_max::min; use datafusion_functions_aggregate::sum::sum; -use std::collections::HashMap; /// Helper function to convert serde_json::Value to DataFusion ScalarValue fn json_to_scalar(value: &serde_json::Value) -> datafusion::scalar::ScalarValue { @@ -41,15 +40,10 @@ fn json_to_scalar(value: &serde_json::Value) -> datafusion::scalar::ScalarValue } /// Helper function to create LIKE expressions with consistent settings -fn create_like_expr( - expression: &ValueExpression, - pattern: &str, - case_insensitive: bool, - parameters: &HashMap, -) -> Expr { +fn create_like_expr(expression: &ValueExpression, pattern: &str, case_insensitive: bool) -> Expr { Expr::Like(datafusion::logical_expr::Like { negated: false, - expr: Box::new(to_df_value_expr(expression, parameters)), + expr: Box::new(to_df_value_expr(expression)), pattern: Box::new(lit(pattern.to_string())), escape_char: None, case_insensitive, @@ -57,10 +51,7 @@ fn create_like_expr( } /// Convert BooleanExpression to DataFusion Expr -pub(crate) fn to_df_boolean_expr( - expr: &BooleanExpression, - parameters: &HashMap, -) -> Expr { +pub(crate) fn to_df_boolean_expr(expr: &BooleanExpression) -> Expr { use crate::ast::{BooleanExpression as BE, ComparisonOperator as CO}; match expr { BE::Comparison { @@ -68,8 +59,8 @@ pub(crate) fn to_df_boolean_expr( operator, right, } => { - let l = to_df_value_expr(left, parameters); - let r = to_df_value_expr(right, parameters); + let l = to_df_value_expr(left); + let r = to_df_value_expr(right); let op = match operator { CO::Equal => Operator::Eq, CO::NotEqual => Operator::NotEq, @@ -86,66 +77,57 @@ pub(crate) fn to_df_boolean_expr( } BE::In { expression, list } => { use datafusion::logical_expr::expr::InList as DFInList; - let expr = to_df_value_expr(expression, parameters); - let list_exprs = list - .iter() - .map(|e| to_df_value_expr(e, parameters)) - .collect::>(); + let expr = to_df_value_expr(expression); + let list_exprs = list.iter().map(|e| to_df_value_expr(e)).collect::>(); Expr::InList(DFInList::new(Box::new(expr), list_exprs, false)) } BE::And(l, r) => Expr::BinaryExpr(BinaryExpr { - left: Box::new(to_df_boolean_expr(l, parameters)), + left: Box::new(to_df_boolean_expr(l)), op: Operator::And, - right: Box::new(to_df_boolean_expr(r, parameters)), + right: Box::new(to_df_boolean_expr(r)), }), BE::Or(l, r) => Expr::BinaryExpr(BinaryExpr { - left: Box::new(to_df_boolean_expr(l, parameters)), + left: Box::new(to_df_boolean_expr(l)), op: Operator::Or, - right: Box::new(to_df_boolean_expr(r, parameters)), + right: Box::new(to_df_boolean_expr(r)), }), - BE::Not(inner) => Expr::Not(Box::new(to_df_boolean_expr(inner, parameters))), + BE::Not(inner) => Expr::Not(Box::new(to_df_boolean_expr(inner))), BE::Exists(prop) => Expr::IsNotNull(Box::new(to_df_value_expr( &ValueExpression::Property(prop.clone()), - parameters, ))), - BE::IsNull(expression) => Expr::IsNull(Box::new(to_df_value_expr(expression, parameters))), - BE::IsNotNull(expression) => { - Expr::IsNotNull(Box::new(to_df_value_expr(expression, parameters))) - } + BE::IsNull(expression) => Expr::IsNull(Box::new(to_df_value_expr(expression))), + BE::IsNotNull(expression) => Expr::IsNotNull(Box::new(to_df_value_expr(expression))), BE::Like { expression, pattern, - } => create_like_expr(expression, pattern, false, parameters), + } => create_like_expr(expression, pattern, false), BE::ILike { expression, pattern, - } => create_like_expr(expression, pattern, true, parameters), + } => create_like_expr(expression, pattern, true), BE::Contains { expression, substring, } => { // CONTAINS is equivalent to LIKE '%substring%' let pattern = format!("%{}%", substring); - create_like_expr(expression, &pattern, false, parameters) + create_like_expr(expression, &pattern, false) } BE::StartsWith { expression, prefix } => { // STARTS WITH is equivalent to LIKE 'prefix%' let pattern = format!("{}%", prefix); - create_like_expr(expression, &pattern, false, parameters) + create_like_expr(expression, &pattern, false) } BE::EndsWith { expression, suffix } => { // ENDS WITH is equivalent to LIKE '%suffix' let pattern = format!("%{}", suffix); - create_like_expr(expression, &pattern, false, parameters) + create_like_expr(expression, &pattern, false) } } } /// Convert ValueExpression to DataFusion Expr -pub(crate) fn to_df_value_expr( - expr: &ValueExpression, - parameters: &HashMap, -) -> Expr { +pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { use crate::ast::{PropertyValue as PV, ValueExpression as VE}; match expr { VE::Property(prop) => { @@ -161,12 +143,10 @@ pub(crate) fn to_df_value_expr( datafusion::logical_expr::Expr::Literal(datafusion::scalar::ScalarValue::Null, None) } VE::Literal(PV::Parameter(name)) => { - // Handle parameter in literal (if that ever happens, though usually it's separate) - if let Some(value) = parameters.get(name) { - Expr::Literal(json_to_scalar(value), None) - } else { - col(format!("${}", name)) - } + panic!( + "Parameter ${} should have been substituted during semantic analysis", + name + ); } VE::Literal(PV::Property(prop)) => { // Create qualified column name: variable__property (lowercase for case-insensitivity) @@ -176,7 +156,7 @@ pub(crate) fn to_df_value_expr( match name.to_lowercase().as_str() { "tolower" | "lower" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0], parameters); + let arg_expr = to_df_value_expr(&args[0]); lower().call(vec![arg_expr]) } else { // Invalid argument count - return NULL @@ -185,7 +165,7 @@ pub(crate) fn to_df_value_expr( } "toupper" | "upper" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0], parameters); + let arg_expr = to_df_value_expr(&args[0]); upper().call(vec![arg_expr]) } else { // Invalid argument count - return NULL @@ -227,7 +207,7 @@ pub(crate) fn to_df_value_expr( } } else { // COUNT(p.property) - count non-null values of that property - to_df_value_expr(&args[0], parameters) + to_df_value_expr(&args[0]) }; // Use DataFusion's count or count_distinct @@ -243,7 +223,7 @@ pub(crate) fn to_df_value_expr( } "sum" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0], parameters); + let arg_expr = to_df_value_expr(&args[0]); sum(arg_expr) } else { lit(0) @@ -251,7 +231,7 @@ pub(crate) fn to_df_value_expr( } "avg" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0], parameters); + let arg_expr = to_df_value_expr(&args[0]); avg(arg_expr) } else { lit(0) @@ -259,7 +239,7 @@ pub(crate) fn to_df_value_expr( } "min" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0], parameters); + let arg_expr = to_df_value_expr(&args[0]); min(arg_expr) } else { lit(0) @@ -267,7 +247,7 @@ pub(crate) fn to_df_value_expr( } "max" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0], parameters); + let arg_expr = to_df_value_expr(&args[0]); max(arg_expr) } else { lit(0) @@ -275,7 +255,7 @@ pub(crate) fn to_df_value_expr( } "collect" => { if args.len() == 1 { - let arg_expr = to_df_value_expr(&args[0], parameters); + let arg_expr = to_df_value_expr(&args[0]); array_agg(arg_expr) } else { lit(0) @@ -298,8 +278,8 @@ pub(crate) fn to_df_value_expr( right, } => { use crate::ast::ArithmeticOperator as AO; - let l = to_df_value_expr(left, parameters); - let r = to_df_value_expr(right, parameters); + let l = to_df_value_expr(left); + let r = to_df_value_expr(right); let op = match operator { AO::Add => Operator::Plus, AO::Subtract => Operator::Minus, @@ -320,8 +300,8 @@ pub(crate) fn to_df_value_expr( } => { // Create UDF for vector distance computation let udf = udf::create_vector_distance_udf(metric); - let left_expr = to_df_value_expr(left, parameters); - let right_expr = to_df_value_expr(right, parameters); + let left_expr = to_df_value_expr(left); + let right_expr = to_df_value_expr(right); Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf( udf, vec![left_expr, right_expr], @@ -334,8 +314,8 @@ pub(crate) fn to_df_value_expr( } => { // Create UDF for vector similarity computation let udf = udf::create_vector_similarity_udf(metric); - let left_expr = to_df_value_expr(left, parameters); - let right_expr = to_df_value_expr(right, parameters); + let left_expr = to_df_value_expr(left); + let right_expr = to_df_value_expr(right); Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf( udf, vec![left_expr, right_expr], @@ -361,11 +341,10 @@ pub(crate) fn to_df_value_expr( lit(scalar) } VE::Parameter(name) => { - if let Some(value) = parameters.get(name) { - Expr::Literal(json_to_scalar(value), None) - } else { - col(format!("${}", name)) - } + panic!( + "Parameter ${} should have been substituted during semantic analysis", + name + ); } } } @@ -458,7 +437,7 @@ mod tests { right: ValueExpression::Literal(PropertyValue::Integer(30)), }; - let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_boolean_expr(&expr); let s = format!("{:?}", df_expr); assert!(s.contains("p__age"), "Should contain qualified column"); assert!( @@ -489,7 +468,7 @@ mod tests { right: ValueExpression::Literal(PropertyValue::Integer(30)), }; - let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_boolean_expr(&expr); // Should successfully translate without panicking assert!(format!("{:?}", df_expr).contains("p__age")); } @@ -517,7 +496,7 @@ mod tests { }), ); - let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_boolean_expr(&expr); let s = format!("{:?}", df_expr); assert!(s.contains("And"), "Should contain AND operator"); assert!(s.contains("p__age"), "Should contain column reference"); @@ -545,7 +524,7 @@ mod tests { }), ); - let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_boolean_expr(&expr); let s = format!("{:?}", df_expr); assert!(s.contains("Or"), "Should contain OR operator"); } @@ -562,7 +541,7 @@ mod tests { right: ValueExpression::Literal(PropertyValue::Boolean(true)), })); - let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_boolean_expr(&expr); let s = format!("{:?}", df_expr); assert!(s.contains("Not"), "Should contain NOT operator"); } @@ -574,7 +553,7 @@ mod tests { property: "email".into(), }); - let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_boolean_expr(&expr); let s = format!("{:?}", df_expr); assert!( s.contains("IsNotNull") || s.contains("p__email"), @@ -595,8 +574,7 @@ mod tests { ], }; - if let Expr::InList(in_list) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) - { + if let Expr::InList(in_list) = to_df_boolean_expr(&expr) { assert!(!in_list.negated); assert_eq!(in_list.list.len(), 2); match *in_list.expr { @@ -620,8 +598,7 @@ mod tests { pattern: "A%".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) - { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { assert!(!like_expr.negated, "Should not be negated"); assert!(!like_expr.case_insensitive, "Should be case sensitive"); assert_eq!(like_expr.escape_char, None, "Should have no escape char"); @@ -651,8 +628,7 @@ mod tests { pattern: "alice%".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) - { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { assert!(!like_expr.negated, "Should not be negated"); assert!( like_expr.case_insensitive, @@ -693,8 +669,7 @@ mod tests { pattern: "Test%".into(), }; - if let Expr::Like(like) = to_df_boolean_expr(&like_expr, &std::collections::HashMap::new()) - { + if let Expr::Like(like) = to_df_boolean_expr(&like_expr) { assert!( !like.case_insensitive, "LIKE should be case-sensitive (case_insensitive = false)" @@ -712,9 +687,7 @@ mod tests { pattern: "Test%".into(), }; - if let Expr::Like(ilike) = - to_df_boolean_expr(&ilike_expr, &std::collections::HashMap::new()) - { + if let Expr::Like(ilike) = to_df_boolean_expr(&ilike_expr) { assert!( ilike.case_insensitive, "ILIKE should be case-insensitive (case_insensitive = true)" @@ -734,7 +707,7 @@ mod tests { pattern: "%@example.com".into(), }; - let df_expr = to_df_boolean_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_boolean_expr(&expr); let s = format!("{:?}", df_expr); assert!( s.contains("Like") || s.contains("like"), @@ -753,8 +726,7 @@ mod tests { substring: "ali".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) - { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { assert!(!like_expr.negated, "Should not be negated"); assert!(!like_expr.case_insensitive, "Should be case sensitive"); assert_eq!(like_expr.escape_char, None, "Should have no escape char"); @@ -790,8 +762,7 @@ mod tests { prefix: "admin".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) - { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { assert!(!like_expr.negated, "Should not be negated"); assert!(!like_expr.case_insensitive, "Should be case sensitive"); @@ -830,8 +801,7 @@ mod tests { suffix: "@example.com".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) - { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { assert!(!like_expr.negated, "Should not be negated"); assert!(!like_expr.case_insensitive, "Should be case sensitive"); @@ -871,8 +841,7 @@ mod tests { substring: "Test".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) - { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { assert!( !like_expr.case_insensitive, "CONTAINS should be case-sensitive by default" @@ -890,8 +859,7 @@ mod tests { substring: "test".into(), }; - if let Expr::Like(like_expr) = to_df_boolean_expr(&expr, &std::collections::HashMap::new()) - { + if let Expr::Like(like_expr) = to_df_boolean_expr(&expr) { match *like_expr.expr { Expr::Column(ref col_expr) => { assert_eq!( @@ -918,7 +886,7 @@ mod tests { property: "name".into(), }); - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); assert_eq!( s, @@ -929,7 +897,7 @@ mod tests { #[test] fn test_value_expr_literal_integer() { let expr = ValueExpression::Literal(PropertyValue::Integer(42)); - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); assert!(s.contains("42") || s.contains("Int64(42)")); } @@ -937,7 +905,7 @@ mod tests { #[test] fn test_value_expr_literal_float() { let expr = ValueExpression::Literal(PropertyValue::Float(std::f64::consts::PI)); - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); assert!(s.contains("3.14") || s.contains("Float64")); } @@ -945,7 +913,7 @@ mod tests { #[test] fn test_value_expr_literal_string() { let expr = ValueExpression::Literal(PropertyValue::String("hello".into())); - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); assert!(s.contains("hello") || s.contains("Utf8")); } @@ -953,7 +921,7 @@ mod tests { #[test] fn test_value_expr_literal_boolean() { let expr = ValueExpression::Literal(PropertyValue::Boolean(true)); - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); assert!(s.contains("true") || s.contains("Boolean")); } @@ -961,7 +929,7 @@ mod tests { #[test] fn test_value_expr_literal_null() { let expr = ValueExpression::Literal(PropertyValue::Null); - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); // Null literals are translated to Literal with Null value assert!(s.contains("Literal"), "Should be a Literal expression"); @@ -979,7 +947,7 @@ mod tests { right: Box::new(ValueExpression::Literal(PropertyValue::Integer(5))), }; - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); // Arithmetic expressions should now return a BinaryExpr with Plus operator assert!(s.contains("BinaryExpr"), "Should be a BinaryExpr"); @@ -1008,7 +976,7 @@ mod tests { right: Box::new(ValueExpression::Literal(PropertyValue::Integer(2))), }; - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); // Should translate to BinaryExpr with the correct operator assert!( @@ -1032,7 +1000,7 @@ mod tests { distinct: false, }; - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); assert!( s.contains("count") || s.contains("Count"), @@ -1051,7 +1019,7 @@ mod tests { distinct: false, }; - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); assert!( s.contains("count") || s.contains("Count"), @@ -1071,7 +1039,7 @@ mod tests { distinct: false, }; - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); assert!( s.contains("sum") || s.contains("Sum"), @@ -1091,7 +1059,7 @@ mod tests { distinct: false, }; - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); assert!( s.contains("avg") || s.contains("Avg"), @@ -1111,7 +1079,7 @@ mod tests { distinct: false, }; - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); assert!( s.contains("min") || s.contains("Min"), @@ -1131,7 +1099,7 @@ mod tests { distinct: false, }; - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); assert!( s.contains("max") || s.contains("Max"), @@ -1150,7 +1118,7 @@ mod tests { })], }; - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); // Should be a ScalarFunction with lower assert!( @@ -1171,7 +1139,7 @@ mod tests { })], }; - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); // Should be a ScalarFunction with upper assert!( @@ -1193,7 +1161,7 @@ mod tests { })], }; - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); assert!( s.contains("lower") || s.contains("Lower"), @@ -1213,7 +1181,7 @@ mod tests { })], }; - let df_expr = to_df_value_expr(&expr, &std::collections::HashMap::new()); + let df_expr = to_df_value_expr(&expr); let s = format!("{:?}", df_expr); assert!( s.contains("upper") || s.contains("Upper"), @@ -1239,7 +1207,7 @@ mod tests { substring: "offer".into(), }; - let df_expr = to_df_boolean_expr(&contains_expr, &HashMap::new()); + let df_expr = to_df_boolean_expr(&contains_expr); let s = format!("{:?}", df_expr); // Should be a Like expression with lower() on the column, not lit(0) diff --git a/crates/lance-graph/src/datafusion_planner/join_ops.rs b/crates/lance-graph/src/datafusion_planner/join_ops.rs index a84a4561..d88a1daa 100644 --- a/crates/lance-graph/src/datafusion_planner/join_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/join_ops.rs @@ -48,7 +48,6 @@ impl DataFusionPlanner { target_label: &str, target_variable: &str, target_properties: &HashMap, - parameters: &HashMap, ) -> Result { let target_schema = target_source.schema(); let normalized_target_label = target_label.to_lowercase(); @@ -61,7 +60,6 @@ impl DataFusionPlanner { for (k, v) in target_properties.iter() { let lit_expr = super::expression::to_df_value_expr( &crate::ast::ValueExpression::Literal(v.clone()), - parameters, ); let filter_expr = Expr::BinaryExpr(BinaryExpr { left: Box::new(col(k.to_lowercase())), @@ -215,7 +213,6 @@ impl DataFusionPlanner { &target_label, params.target_variable, params.target_properties, - ctx.parameters, )?; // Determine target join keys diff --git a/crates/lance-graph/src/datafusion_planner/mod.rs b/crates/lance-graph/src/datafusion_planner/mod.rs index 5c7e8acb..db8a7b08 100644 --- a/crates/lance-graph/src/datafusion_planner/mod.rs +++ b/crates/lance-graph/src/datafusion_planner/mod.rs @@ -35,6 +35,7 @@ use crate::logical_plan::LogicalOperator; use datafusion::logical_expr::LogicalPlan; use lance_graph_catalog::GraphSourceCatalog; use std::collections::HashMap; + use std::sync::Arc; /// Planner abstraction for graph-to-physical planning @@ -46,7 +47,6 @@ pub trait GraphPhysicalPlanner { pub struct DataFusionPlanner { pub(crate) config: GraphConfig, pub(crate) catalog: Option>, - pub(crate) parameters: HashMap, } impl DataFusionPlanner { @@ -54,7 +54,6 @@ impl DataFusionPlanner { Self { config, catalog: None, - parameters: HashMap::new(), } } @@ -62,15 +61,9 @@ impl DataFusionPlanner { Self { config, catalog: Some(catalog), - parameters: HashMap::new(), } } - pub fn with_parameters(mut self, params: HashMap) -> Self { - self.parameters = params; - self - } - /// Helper to convert DataFusion builder errors into GraphError::PlanError with context pub(crate) fn plan_error( &self, @@ -90,7 +83,7 @@ impl GraphPhysicalPlanner for DataFusionPlanner { let analysis = analysis::analyze(logical_plan)?; // Phase 2: Build execution plan with context - let mut ctx = PlanningContext::new(&analysis, &self.parameters); + let mut ctx = PlanningContext::new(&analysis); self.build_operator(&mut ctx, logical_plan) } } diff --git a/crates/lance-graph/src/datafusion_planner/scan_ops.rs b/crates/lance-graph/src/datafusion_planner/scan_ops.rs index 5c27acfe..2c5e489f 100644 --- a/crates/lance-graph/src/datafusion_planner/scan_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/scan_ops.rs @@ -21,7 +21,6 @@ impl DataFusionPlanner { /// Build a qualified node scan with property filters and column aliasing pub(crate) fn build_scan( &self, - ctx: &PlanningContext, variable: &str, label: &str, properties: &HashMap, @@ -46,7 +45,6 @@ impl DataFusionPlanner { .map(|(k, v)| { let lit_expr = super::expression::to_df_value_expr( &crate::ast::ValueExpression::Literal(v.clone()), - ctx.parameters, ); Expr::BinaryExpr(BinaryExpr { left: Box::new(col(k)), @@ -123,7 +121,6 @@ impl DataFusionPlanner { /// Build a qualified relationship scan with property filters pub(crate) fn build_relationship_scan( &self, - ctx: &PlanningContext, rel_instance: &RelationshipInstance, rel_source: Arc, relationship_properties: &HashMap, @@ -142,7 +139,6 @@ impl DataFusionPlanner { for (k, v) in relationship_properties.iter() { let lit_expr = super::expression::to_df_value_expr( &crate::ast::ValueExpression::Literal(v.clone()), - ctx.parameters, ); let filter_expr = Expr::BinaryExpr(BinaryExpr { left: Box::new(col(k)), @@ -262,7 +258,6 @@ impl DataFusionPlanner { /// Build a qualified target node scan with property filters pub(crate) fn build_qualified_target_scan( &self, - ctx: &PlanningContext, catalog: &Arc, target_label: &str, target_variable: &str, @@ -289,7 +284,6 @@ impl DataFusionPlanner { for (k, v) in target_properties.iter() { let lit_expr = super::expression::to_df_value_expr( &crate::ast::ValueExpression::Literal(v.clone()), - ctx.parameters, ); let filter_expr = Expr::BinaryExpr(BinaryExpr { left: Box::new(col(k)), diff --git a/crates/lance-graph/src/query.rs b/crates/lance-graph/src/query.rs index 63bbfd11..2b8162a1 100644 --- a/crates/lance-graph/src/query.rs +++ b/crates/lance-graph/src/query.rs @@ -769,7 +769,7 @@ impl CypherQuery { // Phase 1: Semantic Analysis let mut analyzer = SemanticAnalyzer::new(config.clone()); - let semantic = analyzer.analyze(&self.ast)?; + let semantic = analyzer.analyze(&self.ast, &self.parameters)?; if !semantic.errors.is_empty() { return Err(GraphError::PlanError { message: format!("Semantic analysis failed:\n{}", semantic.errors.join("\n")), @@ -779,11 +779,11 @@ impl CypherQuery { // Phase 2: Graph Logical Plan let mut logical_planner = LogicalPlanner::new(config); - let logical_plan = logical_planner.plan(&self.ast)?; + // Use the transformed AST (with parameters substituted) + let logical_plan = logical_planner.plan(&semantic.ast)?; // Phase 3: DataFusion Logical Plan - let df_planner = DataFusionPlanner::with_catalog(config.clone(), catalog) - .with_parameters(self.parameters.clone()); + let df_planner = DataFusionPlanner::with_catalog(config.clone(), catalog); let df_logical_plan = df_planner.plan(&logical_plan)?; Ok((logical_plan, df_logical_plan)) @@ -944,7 +944,7 @@ impl CypherQuery { // Ensure we don't silently ignore unsupported features (e.g. scalar functions). let mut analyzer = SemanticAnalyzer::new(config); - let semantic = analyzer.analyze(&self.ast)?; + let semantic = analyzer.analyze(&self.ast, &self.parameters)?; if !semantic.errors.is_empty() { return Err(GraphError::PlanError { message: format!("Semantic analysis failed:\n{}", semantic.errors.join("\n")), diff --git a/crates/lance-graph/src/semantic.rs b/crates/lance-graph/src/semantic.rs index 065e36ae..5d8b710a 100644 --- a/crates/lance-graph/src/semantic.rs +++ b/crates/lance-graph/src/semantic.rs @@ -54,6 +54,8 @@ pub enum ScopeType { /// Semantic analysis result with validated and enriched AST #[derive(Debug, Clone)] pub struct SemanticResult { + /// The AST with parameters substituted and validated + pub ast: CypherQuery, pub variables: HashMap, pub errors: Vec, pub warnings: Vec, @@ -69,13 +71,23 @@ impl SemanticAnalyzer { } /// Analyze a Cypher query AST - pub fn analyze(&mut self, query: &CypherQuery) -> Result { + pub fn analyze( + &mut self, + query: &CypherQuery, + parameters: &HashMap, + ) -> Result { + // Clone the query to perform parameter substitution + let mut analyzed_query = query.clone(); + + // Perform parameter substitution + self.substitute_parameters(&mut analyzed_query, parameters)?; + let mut errors = Vec::new(); let mut warnings = Vec::new(); // Phase 1: Variable discovery in READING clauses (MATCH/UNWIND) self.current_scope = ScopeType::Match; - for clause in &query.reading_clauses { + for clause in &analyzed_query.reading_clauses { match clause { ReadingClause::Match(match_clause) => { if let Err(e) = self.analyze_match_clause(match_clause) { @@ -91,7 +103,7 @@ impl SemanticAnalyzer { } // Phase 2: Validate WHERE clause (before WITH) - if let Some(where_clause) = &query.where_clause { + if let Some(where_clause) = &analyzed_query.where_clause { self.current_scope = ScopeType::Where; if let Err(e) = self.analyze_where_clause(where_clause) { errors.push(format!("WHERE clause error: {}", e)); @@ -99,7 +111,7 @@ impl SemanticAnalyzer { } // Phase 3: Validate WITH clause if present - if let Some(with_clause) = &query.with_clause { + if let Some(with_clause) = &analyzed_query.with_clause { self.current_scope = ScopeType::With; if let Err(e) = self.analyze_with_clause(with_clause) { errors.push(format!("WITH clause error: {}", e)); @@ -108,7 +120,7 @@ impl SemanticAnalyzer { // Phase 4: Variable discovery in post-WITH READING clauses (query chaining) self.current_scope = ScopeType::Match; - for clause in &query.post_with_reading_clauses { + for clause in &analyzed_query.post_with_reading_clauses { match clause { ReadingClause::Match(match_clause) => { if let Err(e) = self.analyze_match_clause(match_clause) { @@ -124,7 +136,7 @@ impl SemanticAnalyzer { } // Phase 4: Validate post-WITH WHERE clause if present - if let Some(post_where) = &query.post_with_where_clause { + if let Some(post_where) = &analyzed_query.post_with_where_clause { self.current_scope = ScopeType::PostWithWhere; if let Err(e) = self.analyze_where_clause(post_where) { errors.push(format!("Post-WITH WHERE clause error: {}", e)); @@ -133,12 +145,12 @@ impl SemanticAnalyzer { // Phase 5: Validate RETURN clause self.current_scope = ScopeType::Return; - if let Err(e) = self.analyze_return_clause(&query.return_clause) { + if let Err(e) = self.analyze_return_clause(&analyzed_query.return_clause) { errors.push(format!("RETURN clause error: {}", e)); } // Phase 6: Validate ORDER BY clause - if let Some(order_by) = &query.order_by { + if let Some(order_by) = &analyzed_query.order_by { self.current_scope = ScopeType::OrderBy; if let Err(e) = self.analyze_order_by_clause(order_by) { errors.push(format!("ORDER BY clause error: {}", e)); @@ -152,6 +164,7 @@ impl SemanticAnalyzer { self.validate_types(&mut errors); Ok(SemanticResult { + ast: analyzed_query, variables: self.variables.clone(), errors, warnings, @@ -730,6 +743,254 @@ impl SemanticAnalyzer { } Ok(()) } + /// Substitute parameters with literal values in the AST + fn substitute_parameters( + &self, + query: &mut CypherQuery, + parameters: &HashMap, + ) -> Result<()> { + // Substitute in READING clauses + for clause in &mut query.reading_clauses { + match clause { + ReadingClause::Match(match_clause) => { + for pattern in &mut match_clause.patterns { + self.substitute_in_graph_pattern(pattern, parameters)?; + } + } + ReadingClause::Unwind(unwind_clause) => { + self.substitute_in_value_expression(&mut unwind_clause.expression, parameters)?; + } + } + } + + // Substitute in WHERE clause + if let Some(where_clause) = &mut query.where_clause { + self.substitute_in_where_clause(where_clause, parameters)?; + } + + // Substitute in WITH clause + if let Some(with_clause) = &mut query.with_clause { + for item in &mut with_clause.items { + self.substitute_in_value_expression(&mut item.expression, parameters)?; + } + if let Some(order_by) = &mut with_clause.order_by { + for item in &mut order_by.items { + self.substitute_in_value_expression(&mut item.expression, parameters)?; + } + } + } + + // Substitute in post-WITH READING clauses + for clause in &mut query.post_with_reading_clauses { + match clause { + ReadingClause::Match(match_clause) => { + for pattern in &mut match_clause.patterns { + self.substitute_in_graph_pattern(pattern, parameters)?; + } + } + ReadingClause::Unwind(unwind_clause) => { + self.substitute_in_value_expression(&mut unwind_clause.expression, parameters)?; + } + } + } + + // Substitute in post-WITH WHERE clause + if let Some(post_where) = &mut query.post_with_where_clause { + self.substitute_in_where_clause(post_where, parameters)?; + } + + // Substitute in RETURN clause + for item in &mut query.return_clause.items { + self.substitute_in_value_expression(&mut item.expression, parameters)?; + } + + // Substitute in ORDER BY clause + if let Some(order_by) = &mut query.order_by { + for item in &mut order_by.items { + self.substitute_in_value_expression(&mut item.expression, parameters)?; + } + } + + Ok(()) + } + + fn substitute_in_graph_pattern( + &self, + pattern: &mut GraphPattern, + parameters: &HashMap, + ) -> Result<()> { + match pattern { + GraphPattern::Node(node) => { + for value in node.properties.values_mut() { + self.substitute_in_property_value(value, parameters)?; + } + } + GraphPattern::Path(path) => { + for value in path.start_node.properties.values_mut() { + self.substitute_in_property_value(value, parameters)?; + } + for segment in &mut path.segments { + for value in segment.relationship.properties.values_mut() { + self.substitute_in_property_value(value, parameters)?; + } + for value in segment.end_node.properties.values_mut() { + self.substitute_in_property_value(value, parameters)?; + } + } + } + } + Ok(()) + } + + fn substitute_in_property_value( + &self, + value: &mut PropertyValue, + parameters: &HashMap, + ) -> Result<()> { + if let PropertyValue::Parameter(name) = value { + let param_value = parameters.get(name).ok_or_else(|| GraphError::PlanError { + message: format!("Missing parameter: ${}", name), + location: snafu::Location::new(file!(), line!(), column!()), + })?; + + *value = self.json_to_property_value(param_value)?; + } + Ok(()) + } + + fn substitute_in_where_clause( + &self, + where_clause: &mut WhereClause, + parameters: &HashMap, + ) -> Result<()> { + self.substitute_in_boolean_expression(&mut where_clause.expression, parameters) + } + + fn substitute_in_boolean_expression( + &self, + expr: &mut BooleanExpression, + parameters: &HashMap, + ) -> Result<()> { + match expr { + BooleanExpression::Comparison { left, right, .. } => { + self.substitute_in_value_expression(left, parameters)?; + self.substitute_in_value_expression(right, parameters)?; + } + BooleanExpression::And(left, right) | BooleanExpression::Or(left, right) => { + self.substitute_in_boolean_expression(left, parameters)?; + self.substitute_in_boolean_expression(right, parameters)?; + } + BooleanExpression::Not(inner) => { + self.substitute_in_boolean_expression(inner, parameters)?; + } + BooleanExpression::Exists(_) => {} + BooleanExpression::In { expression, list } => { + self.substitute_in_value_expression(expression, parameters)?; + for item in list { + self.substitute_in_value_expression(item, parameters)?; + } + } + BooleanExpression::Like { expression, .. } + | BooleanExpression::ILike { expression, .. } + | BooleanExpression::Contains { expression, .. } + | BooleanExpression::StartsWith { expression, .. } + | BooleanExpression::EndsWith { expression, .. } + | BooleanExpression::IsNull(expression) + | BooleanExpression::IsNotNull(expression) => { + self.substitute_in_value_expression(expression, parameters)?; + } + } + Ok(()) + } + + fn substitute_in_value_expression( + &self, + expr: &mut ValueExpression, + parameters: &HashMap, + ) -> Result<()> { + match expr { + ValueExpression::Parameter(name) => { + let param_value = parameters.get(name).ok_or_else(|| GraphError::PlanError { + message: format!("Missing parameter: ${}", name), + location: snafu::Location::new(file!(), line!(), column!()), + })?; + + match self.json_to_property_value(param_value)? { + // Try to convert to VectorLiteral if it's an array of numbers + // Since PropertyValue doesn't support generic lists yet, strict JSON array -> PropertyValue + // will likely fail or return Null/String. But we want to support VectorLiteral. + // Let's rely on json_to_property_value first. + // Wait, PropertyValue doesn't have VectorLiteral. ValueExpression does. + // We need a way to convert JSON array to VectorLiteral. + _ => { + // Check for array to VectorLiteral conversion + if let serde_json::Value::Array(arr) = param_value { + let mut floats = Vec::new(); + for v in arr { + if let Some(f) = v.as_f64() { + floats.push(f as f32); + } else { + return Err(GraphError::PlanError { + message: format!( + "Parameter ${} is a list but contains non-numeric values. Only float vectors are supported as list parameters currently.", + name + ), + location: snafu::Location::new(file!(), line!(), column!()), + }); + } + } + *expr = ValueExpression::VectorLiteral(floats); + return Ok(()); + } + + // Scalar conversion + let prop_val = self.json_to_property_value(param_value)?; + *expr = ValueExpression::Literal(prop_val); + } + } + } + ValueExpression::ScalarFunction { args, .. } + | ValueExpression::AggregateFunction { args, .. } => { + for arg in args { + self.substitute_in_value_expression(arg, parameters)?; + } + } + ValueExpression::Arithmetic { left, right, .. } => { + self.substitute_in_value_expression(left, parameters)?; + self.substitute_in_value_expression(right, parameters)?; + } + ValueExpression::VectorDistance { left, right, .. } + | ValueExpression::VectorSimilarity { left, right, .. } => { + self.substitute_in_value_expression(left, parameters)?; + self.substitute_in_value_expression(right, parameters)?; + } + _ => {} + } + Ok(()) + } + + fn json_to_property_value(&self, value: &serde_json::Value) -> Result { + match value { + serde_json::Value::Null => Ok(PropertyValue::Null), + serde_json::Value::Bool(b) => Ok(PropertyValue::Boolean(*b)), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + Ok(PropertyValue::Integer(i)) + } else if let Some(f) = n.as_f64() { + Ok(PropertyValue::Float(f)) + } else { + Ok(PropertyValue::Null) + } + } + serde_json::Value::String(s) => Ok(PropertyValue::String(s.clone())), + serde_json::Value::Array(_) | serde_json::Value::Object(_) => { + Err(GraphError::PlanError { + message: "Complex types (List, Map) are not fully supported as parameters yet (except float vectors).".to_string(), + location: snafu::Location::new(file!(), line!(), column!()), + }) + } + } + } } #[cfg(test)] @@ -772,7 +1033,7 @@ mod tests { skip: None, }; let mut analyzer = SemanticAnalyzer::new(test_config()); - analyzer.analyze(&query) + analyzer.analyze(&query, &HashMap::new()) } // Helper: analyze a query with a single MATCH (var:label) and a RETURN expression @@ -802,7 +1063,7 @@ mod tests { skip: None, }; let mut analyzer = SemanticAnalyzer::new(test_config()); - analyzer.analyze(&query) + analyzer.analyze(&query, &HashMap::new()) } #[test] @@ -833,7 +1094,7 @@ mod tests { }; let mut analyzer = SemanticAnalyzer::new(test_config()); - let result = analyzer.analyze(&query).unwrap(); + let result = analyzer.analyze(&query, &HashMap::new()).unwrap(); assert!(result.errors.is_empty()); let n = result.variables.get("n").expect("variable n present"); // Labels merged @@ -882,7 +1143,7 @@ mod tests { }; let mut analyzer = SemanticAnalyzer::new(test_config()); - let result = analyzer.analyze(&query).unwrap(); + let result = analyzer.analyze(&query, &HashMap::new()).unwrap(); assert!(result .errors .iter() @@ -914,7 +1175,7 @@ mod tests { }; let mut analyzer = SemanticAnalyzer::new(test_config()); - let result = analyzer.analyze(&query).unwrap(); + let result = analyzer.analyze(&query, &HashMap::new()).unwrap(); assert!(result .errors .iter() @@ -956,7 +1217,7 @@ mod tests { }; let mut analyzer = SemanticAnalyzer::new(test_config()); - let result = analyzer.analyze(&query).unwrap(); + let result = analyzer.analyze(&query, &HashMap::new()).unwrap(); assert!(result .errors .iter() @@ -985,7 +1246,7 @@ mod tests { }; let mut analyzer = SemanticAnalyzer::new(test_config()); - let result = analyzer.analyze(&query).unwrap(); + let result = analyzer.analyze(&query, &HashMap::new()).unwrap(); assert!(result .warnings .iter() @@ -1025,7 +1286,7 @@ mod tests { }; let mut analyzer = SemanticAnalyzer::new(custom_config); - let result = analyzer.analyze(&query).unwrap(); + let result = analyzer.analyze(&query, &HashMap::new()).unwrap(); assert!(result .errors .iter() @@ -1070,7 +1331,7 @@ mod tests { }; let mut analyzer = SemanticAnalyzer::new(test_config()); - let result = analyzer.analyze(&query).unwrap(); + let result = analyzer.analyze(&query, &HashMap::new()).unwrap(); assert!(result .errors .iter() @@ -1137,7 +1398,7 @@ mod tests { }; let mut analyzer = SemanticAnalyzer::new(custom_config); - let result = analyzer.analyze(&query).unwrap(); + let result = analyzer.analyze(&query, &HashMap::new()).unwrap(); let r = result.variables.get("r").expect("variable r present"); // Types merged assert!(r.labels.contains(&"KNOWS".to_string())); @@ -1147,6 +1408,58 @@ mod tests { assert!(r.properties.contains("level")); } + #[test] + fn test_parameter_substitution() { + // MATCH (n:Person) WHERE n.age > $min_age RETURN n + let node = NodePattern::new(Some("n".to_string())).with_label("Person"); + let where_clause = WhereClause { + expression: BooleanExpression::Comparison { + left: ValueExpression::Property(PropertyRef::new("n", "age")), + operator: crate::ast::ComparisonOperator::GreaterThan, + right: ValueExpression::Parameter("min_age".to_string()), + }, + }; + let query = CypherQuery { + reading_clauses: vec![ReadingClause::Match(MatchClause { + patterns: vec![GraphPattern::Node(node)], + })], + where_clause: Some(where_clause), + with_clause: None, + post_with_reading_clauses: vec![], + post_with_where_clause: None, + return_clause: ReturnClause { + distinct: false, + items: vec![ReturnItem { + expression: ValueExpression::Variable("n".to_string()), + alias: None, + }], + }, + limit: None, + order_by: None, + skip: None, + }; + + let mut parameters = HashMap::new(); + parameters.insert("min_age".to_string(), serde_json::json!(18)); + + let mut analyzer = SemanticAnalyzer::new(test_config()); + let result = analyzer + .analyze(&query, ¶meters) + .expect("Analysis failed"); + + // Verify substitution in AST + let where_clause = result.ast.where_clause.as_ref().unwrap(); + match &where_clause.expression { + BooleanExpression::Comparison { right, .. } => match right { + ValueExpression::Literal(PropertyValue::Integer(val)) => { + assert_eq!(*val, 18); + } + _ => panic!("Expected Integer literal, got {:?}", right), + }, + _ => panic!("Expected Comparison expression"), + } + } + #[test] fn test_function_argument_undefined_variable_in_return() { // RETURN toUpper(m.name) diff --git a/python/python/tests/test_cypher_engine.py b/python/python/tests/test_cypher_engine.py index 89fcc720..ee450215 100644 --- a/python/python/tests/test_cypher_engine.py +++ b/python/python/tests/test_cypher_engine.py @@ -168,52 +168,4 @@ def test_cypher_engine_config_access(graph_env): assert "company" in engine_config.node_labels() -def test_cypher_parameter_syntax(graph_env): - """Test various Cypher parameter syntaxes ($ @ : {}).""" - config, datasets = graph_env - - # 1. Test $param - query_dollar = CypherQuery( - "MATCH (p:Person) WHERE p.age > $age RETURN p.name" - ).with_config(config) - result = query_dollar.with_parameter("age", 30).execute(datasets) - data = result.to_pydict() - assert set(data["p.name"]) == {"Bob", "David"} - # 2. Test @param - query_at = CypherQuery( - "MATCH (p:Person) WHERE p.age > @age RETURN p.name" - ).with_config(config) - result = query_at.with_parameter("age", 30).execute(datasets) - data = result.to_pydict() - assert set(data["p.name"]) == {"Bob", "David"} - - # 3. Test :param - query_colon = CypherQuery( - "MATCH (p:Person) WHERE p.age > :age RETURN p.name" - ).with_config(config) - result = query_colon.with_parameter("age", 30).execute(datasets) - data = result.to_pydict() - assert set(data["p.name"]) == {"Bob", "David"} - - # 4. Test {param} - query_curly = CypherQuery( - "MATCH (p:Person) WHERE p.age > {age} RETURN p.name" - ).with_config(config) - result = query_curly.with_parameter("age", 30).execute(datasets) - data = result.to_pydict() - assert set(data["p.name"]) == {"Bob", "David"} - - # 5. Test multiple parameters - query_multi = CypherQuery( - "MATCH (p:Person) WHERE p.age > $min_age AND p.age < $max_age RETURN p.name" - ).with_config(config) - result = ( - query_multi.with_parameter("min_age", 25) - .with_parameter("max_age", 35) - .execute(datasets) - ) - data = result.to_pydict() - # Should get Alice (28), Carol (29), Bob (34) - # David is 42 (excluded) - assert set(data["p.name"]) == {"Alice", "Carol", "Bob"} diff --git a/python/python/tests/test_graph.py b/python/python/tests/test_graph.py index 4fca84fa..09474062 100644 --- a/python/python/tests/test_graph.py +++ b/python/python/tests/test_graph.py @@ -215,3 +215,31 @@ def test_execute_with_directory_namespace(graph_env, tmp_path): data = result.to_pydict() assert set(data["p.name"]) == {"Bob", "David"} + + +def test_cypher_parameter_syntax(graph_env): + """Test Cypher parameter syntax ($).""" + config, datasets, _ = graph_env + + # 1. Test $param + query_dollar = CypherQuery( + "MATCH (p:Person) WHERE p.age > $age RETURN p.name" + ).with_config(config) + result = query_dollar.with_parameter("age", 30).execute(datasets) + data = result.to_pydict() + assert set(data["p.name"]) == {"Bob", "David"} + + # 2. Test multiple parameters + query_multi = CypherQuery( + "MATCH (p:Person) WHERE p.age > $min_age AND p.age < $max_age RETURN p.name" + ).with_config(config) + result = ( + query_multi.with_parameter("min_age", 25) + .with_parameter("max_age", 35) + .execute(datasets) + ) + data = result.to_pydict() + # Should get Alice (28), Carol (29), Bob (34) + # David is 42 (excluded) + assert set(data["p.name"]) == {"Alice", "Carol", "Bob"} + From d0c6153bcd270410a88db41b9fb2062fafe00034 Mon Sep 17 00:00:00 2001 From: leiyuou Date: Wed, 11 Feb 2026 22:07:00 -0800 Subject: [PATCH 06/12] fmt --- .../src/datafusion_planner/builder/mod.rs | 2 +- .../src/datafusion_planner/expression.rs | 22 +------------------ .../src/datafusion_planner/scan_ops.rs | 1 + crates/lance-graph/src/parser.rs | 15 ------------- python/python/tests/test_cypher_engine.py | 3 --- 5 files changed, 3 insertions(+), 40 deletions(-) diff --git a/crates/lance-graph/src/datafusion_planner/builder/mod.rs b/crates/lance-graph/src/datafusion_planner/builder/mod.rs index 1b16559f..aaf23872 100644 --- a/crates/lance-graph/src/datafusion_planner/builder/mod.rs +++ b/crates/lance-graph/src/datafusion_planner/builder/mod.rs @@ -38,7 +38,7 @@ impl DataFusionPlanner { label, properties, .. - } => self.build_scan(variable, label, properties), + } => self.build_scan(ctx, variable, label, properties), LogicalOperator::Filter { input, predicate } => { self.build_filter(ctx, input, predicate) } diff --git a/crates/lance-graph/src/datafusion_planner/expression.rs b/crates/lance-graph/src/datafusion_planner/expression.rs index 3f37ff81..8688ba58 100644 --- a/crates/lance-graph/src/datafusion_planner/expression.rs +++ b/crates/lance-graph/src/datafusion_planner/expression.rs @@ -19,26 +19,6 @@ use datafusion_functions_aggregate::min_max::max; use datafusion_functions_aggregate::min_max::min; use datafusion_functions_aggregate::sum::sum; -/// Helper function to convert serde_json::Value to DataFusion ScalarValue -fn json_to_scalar(value: &serde_json::Value) -> datafusion::scalar::ScalarValue { - use datafusion::scalar::ScalarValue; - match value { - serde_json::Value::Null => ScalarValue::Null, - serde_json::Value::Bool(b) => ScalarValue::Boolean(Some(*b)), - serde_json::Value::Number(n) => { - if let Some(i) = n.as_i64() { - ScalarValue::Int64(Some(i)) - } else if let Some(f) = n.as_f64() { - ScalarValue::Float64(Some(f)) - } else { - ScalarValue::Null - } - } - serde_json::Value::String(s) => ScalarValue::Utf8(Some(s.clone())), - serde_json::Value::Array(_) | serde_json::Value::Object(_) => ScalarValue::Null, // Complex types not supported yet - } -} - /// Helper function to create LIKE expressions with consistent settings fn create_like_expr(expression: &ValueExpression, pattern: &str, case_insensitive: bool) -> Expr { Expr::Like(datafusion::logical_expr::Like { @@ -78,7 +58,7 @@ pub(crate) fn to_df_boolean_expr(expr: &BooleanExpression) -> Expr { BE::In { expression, list } => { use datafusion::logical_expr::expr::InList as DFInList; let expr = to_df_value_expr(expression); - let list_exprs = list.iter().map(|e| to_df_value_expr(e)).collect::>(); + let list_exprs = list.iter().map(to_df_value_expr).collect::>(); Expr::InList(DFInList::new(Box::new(expr), list_exprs, false)) } BE::And(l, r) => Expr::BinaryExpr(BinaryExpr { diff --git a/crates/lance-graph/src/datafusion_planner/scan_ops.rs b/crates/lance-graph/src/datafusion_planner/scan_ops.rs index 2c5e489f..200ae9a1 100644 --- a/crates/lance-graph/src/datafusion_planner/scan_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/scan_ops.rs @@ -21,6 +21,7 @@ impl DataFusionPlanner { /// Build a qualified node scan with property filters and column aliasing pub(crate) fn build_scan( &self, + _ctx: &PlanningContext, variable: &str, label: &str, properties: &HashMap, diff --git a/crates/lance-graph/src/parser.rs b/crates/lance-graph/src/parser.rs index 3690a4c0..03a79e58 100644 --- a/crates/lance-graph/src/parser.rs +++ b/crates/lance-graph/src/parser.rs @@ -1747,21 +1747,6 @@ mod tests { let query = "MATCH (p:Person) WHERE p.age > $min_age RETURN p"; let result = parse_cypher_query(query); assert!(result.is_ok(), "$param should parse successfully"); - - // Test @param (should fail) - let query = "MATCH (p:Person) WHERE p.age > @min_age RETURN p"; - let result = parse_cypher_query(query); - assert!(result.is_err(), "@param should fail"); - - // Test :param (should fail) - let query = "MATCH (p:Person) WHERE p.age > :min_age RETURN p"; - let result = parse_cypher_query(query); - assert!(result.is_err(), ":param should fail"); - - // Test {param} (should fail) - let query = "MATCH (p:Person) WHERE p.age > {min_age} RETURN p"; - let result = parse_cypher_query(query); - assert!(result.is_err(), "{{param}} should fail"); } #[test] diff --git a/python/python/tests/test_cypher_engine.py b/python/python/tests/test_cypher_engine.py index ee450215..84372f69 100644 --- a/python/python/tests/test_cypher_engine.py +++ b/python/python/tests/test_cypher_engine.py @@ -166,6 +166,3 @@ def test_cypher_engine_config_access(graph_env): assert "person" in engine_config.node_labels() # case-insensitive assert "company" in engine_config.node_labels() - - - From 36428e129ca5148f5f96600e1ab4fa3f6f7875f5 Mon Sep 17 00:00:00 2001 From: leiyuou Date: Wed, 11 Feb 2026 22:29:37 -0800 Subject: [PATCH 07/12] fix --- crates/lance-graph/src/semantic.rs | 50 ++++++++++++------------------ 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/crates/lance-graph/src/semantic.rs b/crates/lance-graph/src/semantic.rs index 5d8b710a..98ca8581 100644 --- a/crates/lance-graph/src/semantic.rs +++ b/crates/lance-graph/src/semantic.rs @@ -915,39 +915,29 @@ impl SemanticAnalyzer { location: snafu::Location::new(file!(), line!(), column!()), })?; - match self.json_to_property_value(param_value)? { - // Try to convert to VectorLiteral if it's an array of numbers - // Since PropertyValue doesn't support generic lists yet, strict JSON array -> PropertyValue - // will likely fail or return Null/String. But we want to support VectorLiteral. - // Let's rely on json_to_property_value first. - // Wait, PropertyValue doesn't have VectorLiteral. ValueExpression does. - // We need a way to convert JSON array to VectorLiteral. - _ => { - // Check for array to VectorLiteral conversion - if let serde_json::Value::Array(arr) = param_value { - let mut floats = Vec::new(); - for v in arr { - if let Some(f) = v.as_f64() { - floats.push(f as f32); - } else { - return Err(GraphError::PlanError { - message: format!( - "Parameter ${} is a list but contains non-numeric values. Only float vectors are supported as list parameters currently.", - name - ), - location: snafu::Location::new(file!(), line!(), column!()), - }); - } - } - *expr = ValueExpression::VectorLiteral(floats); - return Ok(()); + // Check for array to VectorLiteral conversion + if let serde_json::Value::Array(arr) = param_value { + let mut floats = Vec::new(); + for v in arr { + if let Some(f) = v.as_f64() { + floats.push(f as f32); + } else { + return Err(GraphError::PlanError { + message: format!( + "Parameter ${} is a list but contains non-numeric values. Only float vectors are supported as list parameters currently.", + name + ), + location: snafu::Location::new(file!(), line!(), column!()), + }); } - - // Scalar conversion - let prop_val = self.json_to_property_value(param_value)?; - *expr = ValueExpression::Literal(prop_val); } + *expr = ValueExpression::VectorLiteral(floats); + return Ok(()); } + + // Scalar conversion + let prop_val = self.json_to_property_value(param_value)?; + *expr = ValueExpression::Literal(prop_val); } ValueExpression::ScalarFunction { args, .. } | ValueExpression::AggregateFunction { args, .. } => { From a0d7a2ec8201e8e4332e6a59fb298020f6356c87 Mon Sep 17 00:00:00 2001 From: leiyuou Date: Wed, 11 Feb 2026 23:07:29 -0800 Subject: [PATCH 08/12] fixup --- crates/lance-graph/src/lib.rs | 2 + .../lance-graph/src/parameter_substitution.rs | 271 ++++++++++++++++++ crates/lance-graph/src/semantic.rs | 232 +-------------- python/python/tests/test_graph.py | 1 - 4 files changed, 274 insertions(+), 232 deletions(-) create mode 100644 crates/lance-graph/src/parameter_substitution.rs diff --git a/crates/lance-graph/src/lib.rs b/crates/lance-graph/src/lib.rs index 692773ad..c06b63b8 100644 --- a/crates/lance-graph/src/lib.rs +++ b/crates/lance-graph/src/lib.rs @@ -43,6 +43,8 @@ pub mod error; pub mod lance_native_planner; pub mod lance_vector_search; pub mod logical_plan; +pub mod namespace; +pub mod parameter_substitution; pub mod parser; pub mod query; pub mod semantic; diff --git a/crates/lance-graph/src/parameter_substitution.rs b/crates/lance-graph/src/parameter_substitution.rs new file mode 100644 index 00000000..d58b5f4e --- /dev/null +++ b/crates/lance-graph/src/parameter_substitution.rs @@ -0,0 +1,271 @@ +use crate::ast::*; +use crate::error::{GraphError, Result}; +use std::collections::HashMap; + +/// Substitute parameters with literal values in the AST +pub fn substitute_parameters( + query: &mut CypherQuery, + parameters: &HashMap, +) -> Result<()> { + // Substitute in READING clauses + for clause in &mut query.reading_clauses { + substitute_in_reading_clause(clause, parameters)?; + } + + // Substitute in WHERE clause + if let Some(where_clause) = &mut query.where_clause { + substitute_in_where_clause(where_clause, parameters)?; + } + + // Substitute in WITH clause + if let Some(with_clause) = &mut query.with_clause { + substitute_in_with_clause(with_clause, parameters)?; + } + + // Substitute in post-WITH READING clauses + for clause in &mut query.post_with_reading_clauses { + substitute_in_reading_clause(clause, parameters)?; + } + + // Substitute in post-WITH WHERE clause + if let Some(post_where) = &mut query.post_with_where_clause { + substitute_in_where_clause(post_where, parameters)?; + } + + // Substitute in RETURN clause + substitute_in_return_clause(&mut query.return_clause, parameters)?; + + // Substitute in ORDER BY clause + if let Some(order_by) = &mut query.order_by { + substitute_in_order_by_clause(order_by, parameters)?; + } + + Ok(()) +} + +fn substitute_in_reading_clause( + clause: &mut ReadingClause, + parameters: &HashMap, +) -> Result<()> { + match clause { + ReadingClause::Match(match_clause) => { + for pattern in &mut match_clause.patterns { + substitute_in_graph_pattern(pattern, parameters)?; + } + } + ReadingClause::Unwind(unwind_clause) => { + substitute_in_value_expression(&mut unwind_clause.expression, parameters)?; + } + } + Ok(()) +} + +fn substitute_in_graph_pattern( + pattern: &mut GraphPattern, + parameters: &HashMap, +) -> Result<()> { + match pattern { + GraphPattern::Node(node) => { + for value in node.properties.values_mut() { + substitute_in_property_value(value, parameters)?; + } + } + GraphPattern::Path(path) => { + substitute_in_node_pattern(&mut path.start_node, parameters)?; + for segment in &mut path.segments { + substitute_in_relationship_pattern(&mut segment.relationship, parameters)?; + substitute_in_node_pattern(&mut segment.end_node, parameters)?; + } + } + } + Ok(()) +} + +fn substitute_in_node_pattern( + node: &mut NodePattern, + parameters: &HashMap, +) -> Result<()> { + for value in node.properties.values_mut() { + substitute_in_property_value(value, parameters)?; + } + Ok(()) +} + +fn substitute_in_relationship_pattern( + rel: &mut RelationshipPattern, + parameters: &HashMap, +) -> Result<()> { + for value in rel.properties.values_mut() { + substitute_in_property_value(value, parameters)?; + } + Ok(()) +} + +fn substitute_in_property_value( + value: &mut PropertyValue, + parameters: &HashMap, +) -> Result<()> { + if let PropertyValue::Parameter(name) = value { + let param_value = parameters.get(name).ok_or_else(|| GraphError::PlanError { + message: format!("Missing parameter: ${}", name), + location: snafu::Location::new(file!(), line!(), column!()), + })?; + + *value = json_to_property_value(param_value)?; + } + Ok(()) +} + +fn substitute_in_where_clause( + where_clause: &mut WhereClause, + parameters: &HashMap, +) -> Result<()> { + substitute_in_boolean_expression(&mut where_clause.expression, parameters) +} + +fn substitute_in_with_clause( + with_clause: &mut WithClause, + parameters: &HashMap, +) -> Result<()> { + for item in &mut with_clause.items { + substitute_in_value_expression(&mut item.expression, parameters)?; + } + if let Some(order_by) = &mut with_clause.order_by { + substitute_in_order_by_clause(order_by, parameters)?; + } + Ok(()) +} + +fn substitute_in_return_clause( + return_clause: &mut ReturnClause, + parameters: &HashMap, +) -> Result<()> { + for item in &mut return_clause.items { + substitute_in_value_expression(&mut item.expression, parameters)?; + } + Ok(()) +} + +fn substitute_in_order_by_clause( + order_by: &mut OrderByClause, + parameters: &HashMap, +) -> Result<()> { + for item in &mut order_by.items { + substitute_in_value_expression(&mut item.expression, parameters)?; + } + Ok(()) +} + +fn substitute_in_boolean_expression( + expr: &mut BooleanExpression, + parameters: &HashMap, +) -> Result<()> { + match expr { + BooleanExpression::Comparison { left, right, .. } => { + substitute_in_value_expression(left, parameters)?; + substitute_in_value_expression(right, parameters)?; + } + BooleanExpression::And(left, right) | BooleanExpression::Or(left, right) => { + substitute_in_boolean_expression(left, parameters)?; + substitute_in_boolean_expression(right, parameters)?; + } + BooleanExpression::Not(inner) => { + substitute_in_boolean_expression(inner, parameters)?; + } + BooleanExpression::Exists(_) => {} + BooleanExpression::In { expression, list } => { + substitute_in_value_expression(expression, parameters)?; + for item in list { + substitute_in_value_expression(item, parameters)?; + } + } + BooleanExpression::Like { expression, .. } + | BooleanExpression::ILike { expression, .. } + | BooleanExpression::Contains { expression, .. } + | BooleanExpression::StartsWith { expression, .. } + | BooleanExpression::EndsWith { expression, .. } + | BooleanExpression::IsNull(expression) + | BooleanExpression::IsNotNull(expression) => { + substitute_in_value_expression(expression, parameters)?; + } + } + Ok(()) +} + +fn substitute_in_value_expression( + expr: &mut ValueExpression, + parameters: &HashMap, +) -> Result<()> { + match expr { + ValueExpression::Parameter(name) => { + let param_value = parameters.get(name).ok_or_else(|| GraphError::PlanError { + message: format!("Missing parameter: ${}", name), + location: snafu::Location::new(file!(), line!(), column!()), + })?; + + // Check for array to VectorLiteral conversion + if let serde_json::Value::Array(arr) = param_value { + let mut floats = Vec::new(); + for v in arr { + if let Some(f) = v.as_f64() { + floats.push(f as f32); + } else { + return Err(GraphError::PlanError { + message: format!( + "Parameter ${} is a list but contains non-numeric values. Only float vectors are supported as list parameters currently.", + name + ), + location: snafu::Location::new(file!(), line!(), column!()), + }); + } + } + *expr = ValueExpression::VectorLiteral(floats); + return Ok(()); + } + + // Scalar conversion + let prop_val = json_to_property_value(param_value)?; + *expr = ValueExpression::Literal(prop_val); + } + ValueExpression::ScalarFunction { args, .. } + | ValueExpression::AggregateFunction { args, .. } => { + for arg in args { + substitute_in_value_expression(arg, parameters)?; + } + } + ValueExpression::Arithmetic { left, right, .. } => { + substitute_in_value_expression(left, parameters)?; + substitute_in_value_expression(right, parameters)?; + } + ValueExpression::VectorDistance { left, right, .. } + | ValueExpression::VectorSimilarity { left, right, .. } => { + substitute_in_value_expression(left, parameters)?; + substitute_in_value_expression(right, parameters)?; + } + _ => {} + } + Ok(()) +} + +fn json_to_property_value(value: &serde_json::Value) -> Result { + match value { + serde_json::Value::Null => Ok(PropertyValue::Null), + serde_json::Value::Bool(b) => Ok(PropertyValue::Boolean(*b)), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + Ok(PropertyValue::Integer(i)) + } else if let Some(f) = n.as_f64() { + Ok(PropertyValue::Float(f)) + } else { + Ok(PropertyValue::Null) + } + } + serde_json::Value::String(s) => Ok(PropertyValue::String(s.clone())), + serde_json::Value::Array(_) | serde_json::Value::Object(_) => { + Err(GraphError::PlanError { + message: "Complex types (List, Map) are not fully supported as parameters yet (except float vectors).".to_string(), + location: snafu::Location::new(file!(), line!(), column!()), + }) + } + } +} diff --git a/crates/lance-graph/src/semantic.rs b/crates/lance-graph/src/semantic.rs index 98ca8581..c1b64028 100644 --- a/crates/lance-graph/src/semantic.rs +++ b/crates/lance-graph/src/semantic.rs @@ -749,237 +749,7 @@ impl SemanticAnalyzer { query: &mut CypherQuery, parameters: &HashMap, ) -> Result<()> { - // Substitute in READING clauses - for clause in &mut query.reading_clauses { - match clause { - ReadingClause::Match(match_clause) => { - for pattern in &mut match_clause.patterns { - self.substitute_in_graph_pattern(pattern, parameters)?; - } - } - ReadingClause::Unwind(unwind_clause) => { - self.substitute_in_value_expression(&mut unwind_clause.expression, parameters)?; - } - } - } - - // Substitute in WHERE clause - if let Some(where_clause) = &mut query.where_clause { - self.substitute_in_where_clause(where_clause, parameters)?; - } - - // Substitute in WITH clause - if let Some(with_clause) = &mut query.with_clause { - for item in &mut with_clause.items { - self.substitute_in_value_expression(&mut item.expression, parameters)?; - } - if let Some(order_by) = &mut with_clause.order_by { - for item in &mut order_by.items { - self.substitute_in_value_expression(&mut item.expression, parameters)?; - } - } - } - - // Substitute in post-WITH READING clauses - for clause in &mut query.post_with_reading_clauses { - match clause { - ReadingClause::Match(match_clause) => { - for pattern in &mut match_clause.patterns { - self.substitute_in_graph_pattern(pattern, parameters)?; - } - } - ReadingClause::Unwind(unwind_clause) => { - self.substitute_in_value_expression(&mut unwind_clause.expression, parameters)?; - } - } - } - - // Substitute in post-WITH WHERE clause - if let Some(post_where) = &mut query.post_with_where_clause { - self.substitute_in_where_clause(post_where, parameters)?; - } - - // Substitute in RETURN clause - for item in &mut query.return_clause.items { - self.substitute_in_value_expression(&mut item.expression, parameters)?; - } - - // Substitute in ORDER BY clause - if let Some(order_by) = &mut query.order_by { - for item in &mut order_by.items { - self.substitute_in_value_expression(&mut item.expression, parameters)?; - } - } - - Ok(()) - } - - fn substitute_in_graph_pattern( - &self, - pattern: &mut GraphPattern, - parameters: &HashMap, - ) -> Result<()> { - match pattern { - GraphPattern::Node(node) => { - for value in node.properties.values_mut() { - self.substitute_in_property_value(value, parameters)?; - } - } - GraphPattern::Path(path) => { - for value in path.start_node.properties.values_mut() { - self.substitute_in_property_value(value, parameters)?; - } - for segment in &mut path.segments { - for value in segment.relationship.properties.values_mut() { - self.substitute_in_property_value(value, parameters)?; - } - for value in segment.end_node.properties.values_mut() { - self.substitute_in_property_value(value, parameters)?; - } - } - } - } - Ok(()) - } - - fn substitute_in_property_value( - &self, - value: &mut PropertyValue, - parameters: &HashMap, - ) -> Result<()> { - if let PropertyValue::Parameter(name) = value { - let param_value = parameters.get(name).ok_or_else(|| GraphError::PlanError { - message: format!("Missing parameter: ${}", name), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - - *value = self.json_to_property_value(param_value)?; - } - Ok(()) - } - - fn substitute_in_where_clause( - &self, - where_clause: &mut WhereClause, - parameters: &HashMap, - ) -> Result<()> { - self.substitute_in_boolean_expression(&mut where_clause.expression, parameters) - } - - fn substitute_in_boolean_expression( - &self, - expr: &mut BooleanExpression, - parameters: &HashMap, - ) -> Result<()> { - match expr { - BooleanExpression::Comparison { left, right, .. } => { - self.substitute_in_value_expression(left, parameters)?; - self.substitute_in_value_expression(right, parameters)?; - } - BooleanExpression::And(left, right) | BooleanExpression::Or(left, right) => { - self.substitute_in_boolean_expression(left, parameters)?; - self.substitute_in_boolean_expression(right, parameters)?; - } - BooleanExpression::Not(inner) => { - self.substitute_in_boolean_expression(inner, parameters)?; - } - BooleanExpression::Exists(_) => {} - BooleanExpression::In { expression, list } => { - self.substitute_in_value_expression(expression, parameters)?; - for item in list { - self.substitute_in_value_expression(item, parameters)?; - } - } - BooleanExpression::Like { expression, .. } - | BooleanExpression::ILike { expression, .. } - | BooleanExpression::Contains { expression, .. } - | BooleanExpression::StartsWith { expression, .. } - | BooleanExpression::EndsWith { expression, .. } - | BooleanExpression::IsNull(expression) - | BooleanExpression::IsNotNull(expression) => { - self.substitute_in_value_expression(expression, parameters)?; - } - } - Ok(()) - } - - fn substitute_in_value_expression( - &self, - expr: &mut ValueExpression, - parameters: &HashMap, - ) -> Result<()> { - match expr { - ValueExpression::Parameter(name) => { - let param_value = parameters.get(name).ok_or_else(|| GraphError::PlanError { - message: format!("Missing parameter: ${}", name), - location: snafu::Location::new(file!(), line!(), column!()), - })?; - - // Check for array to VectorLiteral conversion - if let serde_json::Value::Array(arr) = param_value { - let mut floats = Vec::new(); - for v in arr { - if let Some(f) = v.as_f64() { - floats.push(f as f32); - } else { - return Err(GraphError::PlanError { - message: format!( - "Parameter ${} is a list but contains non-numeric values. Only float vectors are supported as list parameters currently.", - name - ), - location: snafu::Location::new(file!(), line!(), column!()), - }); - } - } - *expr = ValueExpression::VectorLiteral(floats); - return Ok(()); - } - - // Scalar conversion - let prop_val = self.json_to_property_value(param_value)?; - *expr = ValueExpression::Literal(prop_val); - } - ValueExpression::ScalarFunction { args, .. } - | ValueExpression::AggregateFunction { args, .. } => { - for arg in args { - self.substitute_in_value_expression(arg, parameters)?; - } - } - ValueExpression::Arithmetic { left, right, .. } => { - self.substitute_in_value_expression(left, parameters)?; - self.substitute_in_value_expression(right, parameters)?; - } - ValueExpression::VectorDistance { left, right, .. } - | ValueExpression::VectorSimilarity { left, right, .. } => { - self.substitute_in_value_expression(left, parameters)?; - self.substitute_in_value_expression(right, parameters)?; - } - _ => {} - } - Ok(()) - } - - fn json_to_property_value(&self, value: &serde_json::Value) -> Result { - match value { - serde_json::Value::Null => Ok(PropertyValue::Null), - serde_json::Value::Bool(b) => Ok(PropertyValue::Boolean(*b)), - serde_json::Value::Number(n) => { - if let Some(i) = n.as_i64() { - Ok(PropertyValue::Integer(i)) - } else if let Some(f) = n.as_f64() { - Ok(PropertyValue::Float(f)) - } else { - Ok(PropertyValue::Null) - } - } - serde_json::Value::String(s) => Ok(PropertyValue::String(s.clone())), - serde_json::Value::Array(_) | serde_json::Value::Object(_) => { - Err(GraphError::PlanError { - message: "Complex types (List, Map) are not fully supported as parameters yet (except float vectors).".to_string(), - location: snafu::Location::new(file!(), line!(), column!()), - }) - } - } + crate::parameter_substitution::substitute_parameters(query, parameters) } } diff --git a/python/python/tests/test_graph.py b/python/python/tests/test_graph.py index 09474062..bb1143fa 100644 --- a/python/python/tests/test_graph.py +++ b/python/python/tests/test_graph.py @@ -242,4 +242,3 @@ def test_cypher_parameter_syntax(graph_env): # Should get Alice (28), Carol (29), Bob (34) # David is 42 (excluded) assert set(data["p.name"]) == {"Alice", "Carol", "Bob"} - From 5a2d2ac63c93d9345b1f52305292e4a1672ec37f Mon Sep 17 00:00:00 2001 From: leiyuou Date: Wed, 11 Feb 2026 23:20:03 -0800 Subject: [PATCH 09/12] fixup --- crates/lance-graph/src/parameter_substitution.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/lance-graph/src/parameter_substitution.rs b/crates/lance-graph/src/parameter_substitution.rs index d58b5f4e..08abc724 100644 --- a/crates/lance-graph/src/parameter_substitution.rs +++ b/crates/lance-graph/src/parameter_substitution.rs @@ -8,8 +8,8 @@ pub fn substitute_parameters( parameters: &HashMap, ) -> Result<()> { // Substitute in READING clauses - for clause in &mut query.reading_clauses { - substitute_in_reading_clause(clause, parameters)?; + for reading_clause in &mut query.reading_clauses { + substitute_in_reading_clause(reading_clause, parameters)?; } // Substitute in WHERE clause @@ -23,8 +23,8 @@ pub fn substitute_parameters( } // Substitute in post-WITH READING clauses - for clause in &mut query.post_with_reading_clauses { - substitute_in_reading_clause(clause, parameters)?; + for reading_clause in &mut query.post_with_reading_clauses { + substitute_in_reading_clause(reading_clause, parameters)?; } // Substitute in post-WITH WHERE clause From c681a23b420191552c5949e4d8bca95bbe057392 Mon Sep 17 00:00:00 2001 From: leiyuou Date: Thu, 12 Feb 2026 20:20:02 -0800 Subject: [PATCH 10/12] fixup --- crates/lance-graph/src/datafusion_planner/mod.rs | 1 - crates/lance-graph/src/lib.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/crates/lance-graph/src/datafusion_planner/mod.rs b/crates/lance-graph/src/datafusion_planner/mod.rs index db8a7b08..ce8ffb5c 100644 --- a/crates/lance-graph/src/datafusion_planner/mod.rs +++ b/crates/lance-graph/src/datafusion_planner/mod.rs @@ -34,7 +34,6 @@ use crate::error::Result; use crate::logical_plan::LogicalOperator; use datafusion::logical_expr::LogicalPlan; use lance_graph_catalog::GraphSourceCatalog; -use std::collections::HashMap; use std::sync::Arc; diff --git a/crates/lance-graph/src/lib.rs b/crates/lance-graph/src/lib.rs index c06b63b8..d36043cd 100644 --- a/crates/lance-graph/src/lib.rs +++ b/crates/lance-graph/src/lib.rs @@ -43,7 +43,6 @@ pub mod error; pub mod lance_native_planner; pub mod lance_vector_search; pub mod logical_plan; -pub mod namespace; pub mod parameter_substitution; pub mod parser; pub mod query; From 5a473b7098599c6b3e5fbeae91fec9d59d014653 Mon Sep 17 00:00:00 2001 From: leiyuou Date: Thu, 12 Feb 2026 20:21:33 -0800 Subject: [PATCH 11/12] fixup --- crates/lance-graph/src/datafusion_planner/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/lance-graph/src/datafusion_planner/mod.rs b/crates/lance-graph/src/datafusion_planner/mod.rs index ce8ffb5c..dfb4bb5b 100644 --- a/crates/lance-graph/src/datafusion_planner/mod.rs +++ b/crates/lance-graph/src/datafusion_planner/mod.rs @@ -34,7 +34,6 @@ use crate::error::Result; use crate::logical_plan::LogicalOperator; use datafusion::logical_expr::LogicalPlan; use lance_graph_catalog::GraphSourceCatalog; - use std::sync::Arc; /// Planner abstraction for graph-to-physical planning From cf3f56e402728fbedb1037ab8e6888717195e3bb Mon Sep 17 00:00:00 2001 From: leiyuou Date: Tue, 24 Feb 2026 21:42:42 -0800 Subject: [PATCH 12/12] address comment --- .../lance-graph/src/parameter_substitution.rs | 27 ++++++++++++------- crates/lance-graph/src/query.rs | 11 +++++--- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/crates/lance-graph/src/parameter_substitution.rs b/crates/lance-graph/src/parameter_substitution.rs index 08abc724..5f641c29 100644 --- a/crates/lance-graph/src/parameter_substitution.rs +++ b/crates/lance-graph/src/parameter_substitution.rs @@ -106,10 +106,13 @@ fn substitute_in_property_value( parameters: &HashMap, ) -> Result<()> { if let PropertyValue::Parameter(name) = value { - let param_value = parameters.get(name).ok_or_else(|| GraphError::PlanError { - message: format!("Missing parameter: ${}", name), - location: snafu::Location::new(file!(), line!(), column!()), - })?; + let param_value = + parameters + .get(&name.to_lowercase()) + .ok_or_else(|| GraphError::PlanError { + message: format!("Missing parameter: ${}", name), + location: snafu::Location::new(file!(), line!(), column!()), + })?; *value = json_to_property_value(param_value)?; } @@ -198,10 +201,13 @@ fn substitute_in_value_expression( ) -> Result<()> { match expr { ValueExpression::Parameter(name) => { - let param_value = parameters.get(name).ok_or_else(|| GraphError::PlanError { - message: format!("Missing parameter: ${}", name), - location: snafu::Location::new(file!(), line!(), column!()), - })?; + let param_value = + parameters + .get(&name.to_lowercase()) + .ok_or_else(|| GraphError::PlanError { + message: format!("Missing parameter: ${}", name), + location: snafu::Location::new(file!(), line!(), column!()), + })?; // Check for array to VectorLiteral conversion if let serde_json::Value::Array(arr) = param_value { @@ -257,7 +263,10 @@ fn json_to_property_value(value: &serde_json::Value) -> Result { } else if let Some(f) = n.as_f64() { Ok(PropertyValue::Float(f)) } else { - Ok(PropertyValue::Null) + Err(GraphError::PlanError { + message: format!("Number parameter could not be converted to i64 or f64: {}", n), + location: snafu::Location::new(file!(), line!(), column!()), + }) } } serde_json::Value::String(s) => Ok(PropertyValue::String(s.clone())), diff --git a/crates/lance-graph/src/query.rs b/crates/lance-graph/src/query.rs index 2b8162a1..b3a2dcec 100644 --- a/crates/lance-graph/src/query.rs +++ b/crates/lance-graph/src/query.rs @@ -101,13 +101,16 @@ impl CypherQuery { K: Into, V: Into, { - self.parameters.insert(key.into(), value.into()); + self.parameters + .insert(key.into().to_lowercase(), value.into()); self } /// Add multiple parameters to the query pub fn with_parameters(mut self, params: HashMap) -> Self { - self.parameters.extend(params); + for (k, v) in params { + self.parameters.insert(k.to_lowercase(), v); + } self } @@ -1498,8 +1501,8 @@ mod tests { .unwrap() .with_parameters(params); - assert!(query.parameters().contains_key("minAge")); - assert!(query.parameters().contains_key("maxAge")); + assert!(query.parameters().contains_key("minage")); + assert!(query.parameters().contains_key("maxage")); } #[test]