Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions src/cli/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -932,8 +932,17 @@ fn try_hybrid_search(
})
.collect();

// 8. RRFマージ
Ok(rrf_merge(&bm25_results, &filtered_semantic, options.limit))
// 8. RRFマージ(BM25=0件の場合はセマンティックフォールバック)
if bm25_results.is_empty() && !filtered_semantic.is_empty() {
eprintln!("[hybrid] BM25 returned 0 results, using semantic-only results.");
Ok(crate::search::hybrid::semantic_fallback(
&filtered_semantic,
&similar_results,
options.limit,
))
} else {
Ok(rrf_merge(&bm25_results, &filtered_semantic, options.limit))
}
}

/// セマンティック検索結果をSearchResult型に変換する(ハイブリッド検索用)
Expand Down
39 changes: 39 additions & 0 deletions src/search/hybrid.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::collections::HashMap;

use crate::embedding::store::EmbeddingSimilarityResult;
use crate::indexer::reader::SearchResult;

/// RRF定数(業界標準値)
Expand Down Expand Up @@ -60,6 +61,44 @@ pub fn rrf_merge(
rrf_merge_multiple(&[bm25_results.to_vec(), semantic_results.to_vec()], limit)
}

/// BM25が0件の場合にセマンティック結果をコサイン類似度スコアで返すフォールバック。
///
/// `filtered_semantic` はtantivyのSearchResult型に変換済みのセマンティック結果。
/// `similar_results` は元のEmbeddingSimilarityResult(コサイン類似度を保持)。
/// スコアをコサイン類似度に置換し、類似度降順でソートしてlimitで切り詰める。
pub fn semantic_fallback(
filtered_semantic: &[SearchResult],
similar_results: &[EmbeddingSimilarityResult],
limit: usize,
) -> Vec<SearchResult> {
let similarity_map: HashMap<(String, String), f32> = similar_results
.iter()
.map(|r| {
(
(r.file_path.clone(), r.section_heading.clone()),
r.similarity,
)
})
.collect();
let mut results: Vec<SearchResult> = filtered_semantic
.iter()
.map(|r| {
let mut result = r.clone();
if let Some(&sim) = similarity_map.get(&(r.path.clone(), r.heading.clone())) {
result.score = sim;
}
result
})
.collect();
results.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
results.truncate(limit);
results
}

/// ファイルキーのRRFスコアを計算する内部ヘルパー
fn compute_file_rrf_scores(ranked_lists: &[&[(String, f32)]]) -> HashMap<String, f32> {
let mut scores = HashMap::new();
Expand Down
119 changes: 119 additions & 0 deletions tests/e2e_semantic_hybrid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,125 @@ fn test_rerank_fallback_llm_comment() {
);
}

// ===========================================================================
// BM25=0 semantic fallback tests (Issue #178)
// ===========================================================================

#[test]
fn test_hybrid_bm25_zero_semantic_fallback() {
// When BM25 returns 0 results but semantic has hits,
// the fallback should return semantic results with cosine similarity scores.
// BM25 is empty — this test verifies the semantic fallback path
let semantic = vec![
make_search_result("alpha.md", "Alpha Document", 0.95),
make_search_result("beta.md", "Beta Document", 0.80),
];

// Build a similarity map to simulate what try_hybrid_search does
let similar_results = vec![
commandindex::embedding::store::EmbeddingSimilarityResult {
file_path: "alpha.md".to_string(),
section_heading: "Alpha Document".to_string(),
similarity: 0.95,
},
commandindex::embedding::store::EmbeddingSimilarityResult {
file_path: "beta.md".to_string(),
section_heading: "Beta Document".to_string(),
similarity: 0.80,
},
];

// Use the new fallback function
let results = commandindex::search::hybrid::semantic_fallback(&semantic, &similar_results, 10);

assert!(
!results.is_empty(),
"test_hybrid_bm25_zero_semantic_fallback: should return results when BM25 is empty"
);
assert_eq!(
results.len(),
2,
"test_hybrid_bm25_zero_semantic_fallback: should return 2 results"
);

// Scores should be cosine similarity values (0.0 to 1.0 range)
for r in &results {
assert!(
r.score >= 0.0 && r.score <= 1.0,
"test_hybrid_bm25_zero_semantic_fallback: score {} should be in [0.0, 1.0]",
r.score
);
}

// alpha should rank first (higher similarity)
assert_eq!(
results[0].path, "alpha.md",
"test_hybrid_bm25_zero_semantic_fallback: alpha (0.95) should rank first"
);
assert_eq!(
results[1].path, "beta.md",
"test_hybrid_bm25_zero_semantic_fallback: beta (0.80) should rank second"
);

// Verify actual score values match cosine similarity
assert!(
(results[0].score - 0.95).abs() < 1e-6,
"test_hybrid_bm25_zero_semantic_fallback: alpha score {} should be ~0.95",
results[0].score
);
assert!(
(results[1].score - 0.80).abs() < 1e-6,
"test_hybrid_bm25_zero_semantic_fallback: beta score {} should be ~0.80",
results[1].score
);
}

#[test]
fn test_hybrid_bm25_zero_semantic_zero() {
// When both BM25 and semantic return 0 results, the result should be empty.
// Both BM25 and semantic are empty
let semantic: Vec<SearchResult> = vec![];
let similar_results: Vec<commandindex::embedding::store::EmbeddingSimilarityResult> = vec![];

let results = commandindex::search::hybrid::semantic_fallback(&semantic, &similar_results, 10);

assert!(
results.is_empty(),
"test_hybrid_bm25_zero_semantic_zero: should return empty when both are empty"
);
}

#[test]
fn test_hybrid_bm25_zero_respects_limit() {
// When BM25=0, semantic fallback should respect the limit parameter.
let semantic: Vec<SearchResult> = (0..5)
.map(|i| {
make_search_result(
&format!("doc{i}.md"),
&format!("Doc {i}"),
0.9 - i as f32 * 0.1,
)
})
.collect();
let similar_results: Vec<commandindex::embedding::store::EmbeddingSimilarityResult> = (0..5)
.map(
|i| commandindex::embedding::store::EmbeddingSimilarityResult {
file_path: format!("doc{i}.md"),
section_heading: format!("Doc {i}"),
similarity: 0.9 - i as f32 * 0.1,
},
)
.collect();

let results = commandindex::search::hybrid::semantic_fallback(&semantic, &similar_results, 3);

assert_eq!(
results.len(),
3,
"test_hybrid_bm25_zero_respects_limit: should truncate to limit=3"
);
}

// ===========================================================================
// Environment-dependent tests (require Ollama)
// ===========================================================================
Expand Down
Loading