diff --git a/docs/byok_guide.md b/docs/byok_guide.md
index 5213a8d15..e1d353919 100644
--- a/docs/byok_guide.md
+++ b/docs/byok_guide.md
@@ -79,12 +79,18 @@ Both modes rely on:
 
 Inline RAG additionally supports:
 - **Score Multiplier**: Optional weight applied per BYOK vector store when mixing multiple sources. Allows custom prioritization of content.
+- **Relevance cutoff score** (`relevance_cutoff_score` in `byok_rag`): Minimum raw similarity score for a chunk to be returned from that BYOK vector store. Chunks below the threshold are dropped before results are merged and ranked with other sources. Configure per knowledge source (each `byok_rag` entry has its own value). The default when omitted is `0.3` (see `DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE` in `src/constants.py`). This value is passed to Llama Stack as the vector search `score_threshold` for that store.
 
 > [!NOTE]
 > OKP and BYOK scores are not directly comparable (different scoring systems), so
 > `score_multiplier` does not apply to OKP results. To control the amount of retrieved
 > context, set the `BYOK_RAG_MAX_CHUNKS` and `OKP_RAG_MAX_CHUNKS` constants in `src/constants.py`
 > (defaults: 10 and 5 respectively). For Tool RAG, use `TOOL_RAG_MAX_CHUNKS` (default: 10).
+>
+> [!NOTE]
+> `relevance_cutoff_score` applies to Inline RAG only. When the model uses Tool RAG (`file_search`),
+> Lightspeed Stack does not send this setting; retrieval uses Llama Stack’s default ranking for that path.
+> Use Inline RAG if you need per-store cutoff behavior from configuration.
 
 ---
 
@@ -288,11 +294,16 @@ registered_resources:
 >     embedding_dimension: 768
 >     vector_db_id: your-index-id  # Llama Stack vector store ID (from index generation)
 >     db_path: /path/to/vector_db/faiss_store.db
->     score_multiplier: 1.0       # Optional: weight results when mixing multiple sources
+>     score_multiplier: 1.0        # Optional: weight results when mixing multiple BYOK sources (Inline RAG)
+>     relevance_cutoff_score: 0.3  # Optional: min raw similarity per chunk for this store (Inline RAG only; default 0.3)
 > ```
 >
 > When multiple BYOK sources are configured, `score_multiplier` adjusts the relative importance of
 > each store's results during Inline RAG retrieval. Values above 1.0 boost a store; below 1.0 reduce it.
+>
+> `relevance_cutoff_score` is interpreted in the same score space as the vector backend for that
+> store. It is not comparable across different vector stores or OKP; tune each `byok_rag` entry
+> using retrieval quality on that corpus.
 
 ### Step 5: Configure RAG Strategy
 
@@ -319,10 +330,10 @@ okp:
 
 Both modes can be enabled simultaneously. Choose based on your latency and control preferences:
 
-| Mode | When context is fetched | Tool call needed | score_multiplier |
-|------|------------------------|------------------|-----------------|
-| Inline RAG | With every query | No | Yes (BYOK only) |
-| Tool RAG | On LLM demand | Yes | No |
+| Mode | When context is fetched | Tool call needed | score_multiplier | relevance_cutoff_score |
+|------|------------------------|------------------|----------------|------------------------|
+| Inline RAG | With every query | No | Yes (BYOK only) | Yes (BYOK only) |
+| Tool RAG | On LLM demand | Yes | No | No  |
 
 > [!TIP]
 > A ready-to-use example combining BYOK and OKP is available at
@@ -572,4 +583,4 @@ For additional support and advanced configurations, refer to:
 - [Llama Stack Documentation](https://llama-stack.readthedocs.io/)
 - [rag-content Tool Repository](https://github.com/lightspeed-core/rag-content)
 
-Remember to regularly update your knowledge sources and monitor system performance to maintain optimal BYOK functionality.
\ No newline at end of file
+Remember to regularly update your knowledge sources and monitor system performance to maintain optimal BYOK functionality.
diff --git a/docs/openapi.json b/docs/openapi.json
index 117a15621..f5926690f 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -11806,6 +11806,13 @@
                         "title": "Score multiplier",
                         "description": "Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them.",
                         "default": 1.0
+                    },
+                    "relevance_cutoff_score": {
+                        "type": "number",
+                        "exclusiveMinimum": 0.0,
+                        "title": "Relevance cutoff score",
+                        "description": "Minimum raw similarity score to consider a result relevant. Results with a similarity score below this threshold are not returned.",
+                        "default": 0.3
                     }
                 },
                 "additionalProperties": false,
diff --git a/src/constants.py b/src/constants.py
index cbc3cdd74..ebe016311 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -193,6 +193,9 @@
 BYOK_RAG_MAX_CHUNKS: Final[int] = 10  # retrieved from BYOK RAG
 OKP_RAG_MAX_CHUNKS: Final[int] = 5  # retrieved from OKP RAG
 
+# Default minimum raw similarity per BYOK store
+DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE: Final[float] = 0.3
+
 # Solr OKP constants
 SOLR_VECTOR_SEARCH_DEFAULT_K: Final[int] = 5
 SOLR_VECTOR_SEARCH_DEFAULT_SCORE_THRESHOLD: Final[float] = 0.3
diff --git a/src/models/config.py b/src/models/config.py
index c0724c7ff..901933bdd 100644
--- a/src/models/config.py
+++ b/src/models/config.py
@@ -1718,6 +1718,14 @@ class ByokRag(ConfigurationBase):
         "Values > 1 boost this store's results; values < 1 reduce them.",
     )
 
+    relevance_cutoff_score: float = Field(
+        constants.DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE,
+        gt=0,
+        title="Relevance cutoff score",
+        description="Minimum raw similarity score to consider a result relevant. "
+        "Results with a similarity score below this threshold are not returned.",
+    )
+
 
 class QuotaLimiterConfiguration(ConfigurationBase):
     """Configuration for one quota limiter.
diff --git a/src/utils/vector_search.py b/src/utils/vector_search.py
index 9d901271e..9ecaf0d07 100644
--- a/src/utils/vector_search.py
+++ b/src/utils/vector_search.py
@@ -26,6 +26,24 @@
 logger = get_logger(__name__)
 
 
+def _relevance_cutoff_for_vector_store(vector_store_id: str) -> float:
+    """Return configured relevance cutoff for a Llama Stack vector store ID.
+
+    Args:
+        vector_store_id: Llama Stack vector store identifier (``vector_db_id``)
+            used to find a matching BYOK RAG entry in configuration.
+
+    Returns:
+        ``brag.relevance_cutoff_score`` from the ``ByokRag`` whose ``vector_db_id``
+        matches ``vector_store_id``, or ``constants.DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE``
+        when no BYOK entry matches.
+    """
+    for brag in configuration.configuration.byok_rag:
+        if brag.vector_db_id == vector_store_id:
+            return brag.relevance_cutoff_score
+    return constants.DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE
+
+
 def _get_okp_base_url() -> AnyUrl:
     """Return OKP document base URL from configuration (rhokp_url), or default if unset.
 
@@ -180,6 +198,7 @@ async def _query_store_for_byok_rag(
     vector_store_id: str,
     query: str,
     weight: float,
+    score_threshold: float,
 ) -> list[dict[str, Any]]:
     """Query a single vector store for BYOK RAG.
 
@@ -188,6 +207,7 @@ async def _query_store_for_byok_rag(
         vector_store_id: ID of the vector store to query
         query: Search query string
         weight: Score multiplier to apply
+        score_threshold: Minimum raw similarity score (``relevance_cutoff_score``)
 
     Returns:
         List of weighted result dictionaries, or empty list on error
@@ -199,6 +219,7 @@ async def _query_store_for_byok_rag(
             params={
                 "max_chunks": constants.BYOK_RAG_MAX_CHUNKS,
                 "mode": "vector",
+                "score_threshold": score_threshold,
             },
         )
         return _extract_byok_rag_chunks(search_response, vector_store_id, weight)
@@ -410,6 +431,7 @@ async def _fetch_byok_rag(
                     vector_store_id,
                     query,
                     score_multiplier_mapping.get(vector_store_id, 1.0),
+                    _relevance_cutoff_for_vector_store(vector_store_id),
                 )
                 for vector_store_id in vector_store_ids_to_query
             ]
diff --git a/tests/unit/models/config/test_byok_rag.py b/tests/unit/models/config/test_byok_rag.py
index e80e749c3..547459dd6 100644
--- a/tests/unit/models/config/test_byok_rag.py
+++ b/tests/unit/models/config/test_byok_rag.py
@@ -4,6 +4,7 @@
 from pydantic import ValidationError
 
 from constants import (
+    DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE,
     DEFAULT_EMBEDDING_DIMENSION,
     DEFAULT_EMBEDDING_MODEL,
     DEFAULT_RAG_TYPE,
@@ -35,6 +36,7 @@ def test_byok_rag_configuration_default_values() -> None:
     assert byok_rag.vector_db_id == "vector_db_id"
     assert byok_rag.db_path == "tests/configuration/rag.txt"
     assert byok_rag.score_multiplier == DEFAULT_SCORE_MULTIPLIER
+    assert byok_rag.relevance_cutoff_score == DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE
 
 
 def test_byok_rag_configuration_nondefault_values() -> None:
@@ -54,6 +56,7 @@ def test_byok_rag_configuration_nondefault_values() -> None:
         vector_db_id="vector_db_id",
         db_path="tests/configuration/rag.txt",
         score_multiplier=1.0,
+        relevance_cutoff_score=0.72,
     )
     assert byok_rag is not None
     assert byok_rag.rag_id == "rag_id"
@@ -62,6 +65,7 @@ def test_byok_rag_configuration_nondefault_values() -> None:
     assert byok_rag.embedding_dimension == 1024
     assert byok_rag.vector_db_id == "vector_db_id"
     assert byok_rag.db_path == "tests/configuration/rag.txt"
+    assert byok_rag.relevance_cutoff_score == 0.72
 
 
 def test_byok_rag_configuration_wrong_dimension() -> None:
@@ -199,3 +203,18 @@ def test_byok_rag_configuration_score_multiplier_must_be_positive() -> None:
             db_path="tests/configuration/rag.txt",
             score_multiplier=0.0,
         )
+
+
+def test_byok_rag_configuration_relevance_cutoff_must_be_positive() -> None:
+    """Test that relevance_cutoff_score must be greater than 0."""
+    with pytest.raises(ValidationError, match="greater than 0"):
+        _ = ByokRag(
+            rag_id="rag_id",
+            rag_type="rag_type",
+            vector_db_id="vector_db_id",
+            embedding_model="embedding_model",
+            embedding_dimension=1024,
+            db_path="tests/configuration/rag.txt",
+            score_multiplier=1.0,
+            relevance_cutoff_score=0.0,
+        )
diff --git a/tests/unit/models/config/test_dump_configuration.py b/tests/unit/models/config/test_dump_configuration.py
index 14ee69bbb..1cba5a2bf 100644
--- a/tests/unit/models/config/test_dump_configuration.py
+++ b/tests/unit/models/config/test_dump_configuration.py
@@ -8,6 +8,7 @@
 
 from pydantic import SecretStr
 
+from constants import DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE
 from models.config import (
     ByokRag,
     Configuration,
@@ -1024,6 +1025,7 @@ def test_dump_configuration_byok(tmp_path: Path) -> None:
                     "rag_type": "inline::faiss",
                     "vector_db_id": "vector_db_id",
                     "score_multiplier": 1.0,
+                    "relevance_cutoff_score": DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE,
                 },
             ],
             "quota_handlers": {
diff --git a/tests/unit/utils/test_vector_search.py b/tests/unit/utils/test_vector_search.py
index 0945bb236..8fd6868a3 100644
--- a/tests/unit/utils/test_vector_search.py
+++ b/tests/unit/utils/test_vector_search.py
@@ -1,5 +1,8 @@
 """Unit tests for vector search utilities."""
 
+from collections.abc import Awaitable, Callable
+from typing import Any
+
 import pytest
 from pydantic import AnyUrl
 from pytest_mock import MockerFixture
@@ -20,10 +23,37 @@
     _get_okp_base_url,
     _get_solr_vector_store_ids,
     _is_solr_enabled,
+    _query_store_for_byok_rag,
     build_rag_context,
 )
 
 
+def _vector_io_query_stub_like_backend(
+    chunk_score_pairs: list[tuple[Any, float]], mocker: MockerFixture
+) -> Callable[..., Awaitable[Any]]:
+    """Build an async ``vector_io.query`` stand-in that honors ``score_threshold``.
+
+    Production code forwards ``relevance_cutoff_score`` as ``params['score_threshold']``;
+    Llama Stack filters hits server-side. The stub keeps pairs whose raw score is at or
+    above that minimum (``>=``), matching the docstring on ``_query_store_for_byok_rag``.
+    """
+
+    async def _query(**kwargs: Any) -> Any:
+        threshold = float(kwargs["params"]["score_threshold"])
+        chunks_out: list[Any] = []
+        scores_out: list[float] = []
+        for chunk, raw_score in chunk_score_pairs:
+            if raw_score >= threshold:
+                chunks_out.append(chunk)
+                scores_out.append(raw_score)
+        out = mocker.Mock()
+        out.chunks = chunks_out
+        out.scores = scores_out
+        return out
+
+    return _query
+
+
 class TestIsSolrEnabled:
     """Tests for _is_solr_enabled function."""
 
@@ -427,6 +457,9 @@ async def test_byok_enabled_success(self, mocker: MockerFixture) -> None:
         byok_rag_mock = mocker.Mock()
         byok_rag_mock.rag_id = "rag_1"
         byok_rag_mock.vector_db_id = "vs_1"
+        byok_rag_mock.relevance_cutoff_score = (
+            constants.DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE
+        )
         config_mock.configuration.rag.inline = ["rag_1"]
         config_mock.configuration.byok_rag = [byok_rag_mock]
         config_mock.score_multiplier_mapping = {"vs_1": 1.5}
@@ -466,6 +499,9 @@ async def test_user_facing_ids_translated_to_internal_ids(
         byok_rag_mock = mocker.Mock()
         byok_rag_mock.rag_id = "my-kb"
         byok_rag_mock.vector_db_id = "vs-internal-001"
+        byok_rag_mock.relevance_cutoff_score = (
+            constants.DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE
+        )
         config_mock.configuration.byok_rag = [byok_rag_mock]
         config_mock.configuration.rag.inline = ["my-kb"]
         config_mock.score_multiplier_mapping = {"vs-internal-001": 1.0}
@@ -491,7 +527,11 @@ async def test_user_facing_ids_translated_to_internal_ids(
         client_mock.vector_io.query.assert_called_once_with(
             vector_store_id="vs-internal-001",
             query="test query",
-            params={"max_chunks": constants.BYOK_RAG_MAX_CHUNKS, "mode": "vector"},
+            params={
+                "max_chunks": constants.BYOK_RAG_MAX_CHUNKS,
+                "mode": "vector",
+                "score_threshold": constants.DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE,
+            },
         )
 
     @pytest.mark.asyncio
@@ -503,9 +543,15 @@ async def test_multiple_user_facing_ids_each_translated(
         byok_rag_1 = mocker.Mock()
         byok_rag_1.rag_id = "kb-part1"
         byok_rag_1.vector_db_id = "vs-aaa-111"
+        byok_rag_1.relevance_cutoff_score = (
+            constants.DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE
+        )
         byok_rag_2 = mocker.Mock()
         byok_rag_2.rag_id = "kb-part2"
         byok_rag_2.vector_db_id = "vs-bbb-222"
+        byok_rag_2.relevance_cutoff_score = (
+            constants.DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE
+        )
         config_mock.configuration.byok_rag = [byok_rag_1, byok_rag_2]
         config_mock.configuration.rag.inline = ["kb-part1", "kb-part2"]
         config_mock.score_multiplier_mapping = {"vs-aaa-111": 1.0, "vs-bbb-222": 1.0}
@@ -542,6 +588,146 @@ async def test_multiple_user_facing_ids_each_translated(
         assert "kb-part1" not in call_args
         assert "kb-part2" not in call_args
 
+    @pytest.mark.asyncio
+    async def test_byok_passes_configured_relevance_cutoff_to_vector_io(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Configured ``relevance_cutoff_score`` is sent as ``score_threshold``."""
+        config_mock = mocker.Mock(spec=AppConfig)
+        byok_rag_mock = mocker.Mock()
+        byok_rag_mock.rag_id = "my-kb"
+        byok_rag_mock.vector_db_id = "vs-internal-001"
+        byok_rag_mock.relevance_cutoff_score = 0.55
+        config_mock.configuration.byok_rag = [byok_rag_mock]
+        config_mock.configuration.rag.inline = ["my-kb"]
+        config_mock.score_multiplier_mapping = {"vs-internal-001": 1.0}
+        config_mock.rag_id_mapping = {"vs-internal-001": "my-kb"}
+        mocker.patch("utils.vector_search.configuration", config_mock)
+
+        chunk_mock = mocker.Mock()
+        chunk_mock.content = "Test content"
+        chunk_mock.chunk_id = "chunk_1"
+        chunk_mock.metadata = {"document_id": "doc_1"}
+
+        search_response = mocker.Mock()
+        search_response.chunks = [chunk_mock]
+        search_response.scores = [0.9]
+
+        client_mock = mocker.AsyncMock()
+        client_mock.vector_io.query.return_value = search_response
+
+        await _fetch_byok_rag(client_mock, "test query", vector_store_ids=["my-kb"])
+
+        client_mock.vector_io.query.assert_called_once_with(
+            vector_store_id="vs-internal-001",
+            query="test query",
+            params={
+                "max_chunks": constants.BYOK_RAG_MAX_CHUNKS,
+                "mode": "vector",
+                "score_threshold": 0.55,
+            },
+        )
+
+    @pytest.mark.asyncio
+    async def test_query_store_for_byok_rag_forwards_score_threshold(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Cutoff is applied by vector backends; this layer forwards it on ``vector_io.query``.
+
+        ``_query_store_for_byok_rag`` is the code that maps ``relevance_cutoff_score`` (via
+        callers) into ``params["score_threshold"]``. It does not re-rank or drop hits by
+        score—whatever ``vector_io.query`` returns is passed to ``_extract_byok_rag_chunks``.
+        """
+        score_threshold = 0.37
+        chunk = mocker.Mock()
+        chunk.content = "chunk text"
+        chunk.chunk_id = "chunk-1"
+        chunk.metadata = {"document_id": "doc-1"}
+
+        search_response = mocker.Mock()
+        search_response.chunks = [chunk]
+        search_response.scores = [0.91]
+
+        client = mocker.AsyncMock()
+        client.vector_io.query.return_value = search_response
+
+        result = await _query_store_for_byok_rag(
+            client,
+            vector_store_id="vs-test",
+            query="q",
+            weight=2.0,
+            score_threshold=score_threshold,
+        )
+
+        client.vector_io.query.assert_awaited_once_with(
+            vector_store_id="vs-test",
+            query="q",
+            params={
+                "max_chunks": constants.BYOK_RAG_MAX_CHUNKS,
+                "mode": "vector",
+                "score_threshold": score_threshold,
+            },
+        )
+        assert len(result) == 1
+        assert result[0]["content"] == "chunk text"
+        assert result[0]["score"] == 0.91
+        assert result[0]["weighted_score"] == pytest.approx(1.82)
+
+    @pytest.mark.asyncio
+    async def test_fetch_byok_rag_omits_chunks_below_vector_io_score_threshold(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Sub-threshold hits never become ``RAGChunk`` rows when vector_io enforces the cutoff.
+
+        The full path resolves ``relevance_cutoff_score``, calls ``vector_io.query`` with
+        ``score_threshold``, then maps the response. The stub models backend filtering so
+        scores strictly below the cutoff are absent from the mocked response.
+        """
+        cutoff = 0.5
+        config_mock = mocker.Mock(spec=AppConfig)
+        byok_rag_mock = mocker.Mock()
+        byok_rag_mock.rag_id = "kb"
+        byok_rag_mock.vector_db_id = "vs-cutoff"
+        byok_rag_mock.relevance_cutoff_score = cutoff
+        config_mock.configuration.byok_rag = [byok_rag_mock]
+        config_mock.configuration.rag.inline = ["kb"]
+        config_mock.score_multiplier_mapping = {"vs-cutoff": 1.0}
+        config_mock.rag_id_mapping = {"vs-cutoff": "kb"}
+        mocker.patch("utils.vector_search.configuration", config_mock)
+
+        def chunk(content: str, cid: str) -> Any:
+            ch = mocker.Mock()
+            ch.content = content
+            ch.chunk_id = cid
+            ch.metadata = {"document_id": cid}
+            return ch
+
+        chunk_score_pairs: list[tuple[Any, float]] = [
+            (chunk("below_cutoff", "c_low"), 0.3),
+            (chunk("at_cutoff", "c_edge"), cutoff),
+            (chunk("above_cutoff", "c_high"), 0.85),
+        ]
+
+        client = mocker.AsyncMock()
+        client.vector_io.query.side_effect = _vector_io_query_stub_like_backend(
+            chunk_score_pairs, mocker
+        )
+
+        rag_chunks, _referenced = await _fetch_byok_rag(
+            client, "test query", vector_store_ids=["kb"]
+        )
+
+        assert (
+            client.vector_io.query.await_args.kwargs["params"]["score_threshold"]
+            == cutoff
+        )
+        contents = {c.content for c in rag_chunks}
+        assert "below_cutoff" not in contents
+        assert contents == {"at_cutoff", "above_cutoff"}
+        for ch in rag_chunks:
+            assert ch.score is not None
+            assert ch.score >= cutoff
+
     @pytest.mark.asyncio
     async def test_no_inline_rag_configured_skips_byok(
         self, mocker: MockerFixture
@@ -693,6 +879,9 @@ async def test_byok_enabled_only(self, mocker: MockerFixture) -> None:
         byok_rag_mock = mocker.Mock()
         byok_rag_mock.rag_id = "rag_1"
         byok_rag_mock.vector_db_id = "vs_1"
+        byok_rag_mock.relevance_cutoff_score = (
+            constants.DEFAULT_BYOK_RAG_RELEVANCE_CUTOFF_SCORE
+        )
         config_mock.configuration.rag.inline = ["rag_1"]
         config_mock.configuration.byok_rag = [byok_rag_mock]
         config_mock.inline_solr_enabled = False