From a868f7cb79ac1ead5314451dcdf7614c05eb1d70 Mon Sep 17 00:00:00 2001 From: ameenalkhaldi Date: Sun, 15 Mar 2026 19:00:09 +0300 Subject: [PATCH 1/3] feat: extract parameter descriptions from docstrings into tool JSON schemas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parse Google, NumPy, and Sphinx-style docstrings using the griffe library to automatically include parameter descriptions in generated tool JSON schemas. This means users no longer need to use Field(description=...) annotations for their tool parameters — standard Python docstrings work out of the box. Explicit Field descriptions always take priority over docstring descriptions. Github-Issue:#226 Reported-by:salman1993 --- pyproject.toml | 1 + .../mcpserver/utilities/func_metadata.py | 141 ++++++++++++++- tests/server/mcpserver/test_func_metadata.py | 169 ++++++++++++++++++ uv.lock | 2 + 4 files changed, 311 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f275b90cf..981d02675 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ classifiers = [ ] dependencies = [ "anyio>=4.9", + "griffe>=1.5.0", "httpx>=0.27.1", "httpx-sse>=0.4", "pydantic>=2.12.0", diff --git a/src/mcp/server/mcpserver/utilities/func_metadata.py b/src/mcp/server/mcpserver/utilities/func_metadata.py index 062b47d0f..241b1e4d1 100644 --- a/src/mcp/server/mcpserver/utilities/func_metadata.py +++ b/src/mcp/server/mcpserver/utilities/func_metadata.py @@ -1,14 +1,18 @@ import functools import inspect import json -from collections.abc import Awaitable, Callable, Sequence +import logging +import re +from collections.abc import Awaitable, Callable, Iterator, Sequence +from contextlib import contextmanager from itertools import chain from types import GenericAlias -from typing import Annotated, Any, cast, get_args, get_origin, get_type_hints +from typing import Annotated, Any, Literal, cast, get_args, get_origin, get_type_hints import anyio import anyio.to_thread import pydantic_core +from griffe import Docstring, DocstringSectionKind, GoogleOptions from pydantic import BaseModel, ConfigDict, Field, WithJsonSchema, create_model from pydantic.fields import FieldInfo from pydantic.json_schema import GenerateJsonSchema, JsonSchemaWarningKind @@ -167,6 +171,129 @@ def pre_parse_json(self, data: dict[str, Any]) -> dict[str, Any]: ) +_DocstringStyle = Literal["google", "numpy", "sphinx"] + +# Patterns to infer docstring style, adapted from pydantic-ai. +# Each entry is (pattern_template, replacement_keywords, style). +_DOCSTRING_STYLE_PATTERNS: list[tuple[str, list[str], _DocstringStyle]] = [ + ( + r"\n[ \t]*:{0}([ \t]+\w+)*:([ \t]+.+)?\n", + [ + "param", + "parameter", + "arg", + "argument", + "key", + "keyword", + "type", + "var", + "ivar", + "cvar", + "vartype", + "returns", + "return", + "rtype", + "raises", + "raise", + "except", + "exception", + ], + "sphinx", + ), + ( + r"\n[ \t]*{0}:([ \t]+.+)?\n[ \t]+.+", + [ + "args", + "arguments", + "params", + "parameters", + "keyword args", + "keyword arguments", + "raises", + "exceptions", + "returns", + "yields", + "receives", + "examples", + "attributes", + ], + "google", + ), + ( + r"\n[ \t]*{0}\n[ \t]*---+\n", + [ + "deprecated", + "parameters", + "other parameters", + "returns", + "yields", + "receives", + "raises", + "warns", + "attributes", + ], + "numpy", + ), +] + + +def _infer_docstring_style(doc: str) -> _DocstringStyle: + """Infer the docstring style from its content.""" + for pattern, replacements, style in _DOCSTRING_STYLE_PATTERNS: + matches = ( + re.search(pattern.format(replacement), doc, re.IGNORECASE | re.MULTILINE) for replacement in replacements + ) + if any(matches): + return style + return "google" + + +@contextmanager +def _suppress_griffe_logging() -> Iterator[None]: + """Temporarily suppress griffe's verbose logging.""" + old_level = logging.root.getEffectiveLevel() + logging.root.setLevel(logging.ERROR) + yield + logging.root.setLevel(old_level) + + +def _parse_docstring_params(func: Callable[..., Any]) -> dict[str, str]: + """Parse a function's docstring to extract parameter descriptions. + + Supports Google, NumPy, and Sphinx-style docstrings with automatic format detection. + + Returns: + A dict mapping parameter names to their descriptions. + """ + doc = func.__doc__ + if not doc: + return {} + + docstring_style = _infer_docstring_style(doc) + parser_options = ( + GoogleOptions(returns_named_value=False, returns_multiple_items=False) if docstring_style == "google" else None + ) + docstring = Docstring(doc, lineno=1, parser=docstring_style, parser_options=parser_options) + + with _suppress_griffe_logging(): + sections = docstring.parse() + + for section in sections: + if section.kind == DocstringSectionKind.parameters: + return {p.name: p.description for p in section.value} + + return {} + + +def _annotation_has_description(annotation: Any) -> bool: + """Check if an Annotated type already includes a Field with a description.""" + if get_origin(annotation) is Annotated: + for arg in get_args(annotation)[1:]: + if isinstance(arg, FieldInfo) and arg.description is not None: + return True + return False + + def func_metadata( func: Callable[..., Any], skip_names: Sequence[str] = (), @@ -215,6 +342,7 @@ def func_metadata( # model_rebuild right before using it 🤷 raise InvalidSignature(f"Unable to evaluate type annotations for callable {func.__name__!r}") from e params = sig.parameters + docstring_descriptions = _parse_docstring_params(func) dynamic_pydantic_model_params: dict[str, Any] = {} for param in params.values(): if param.name.startswith("_"): # pragma: no cover @@ -229,6 +357,15 @@ def func_metadata( if param.annotation is inspect.Parameter.empty: field_metadata.append(WithJsonSchema({"title": param.name, "type": "string"})) + + # Add description from docstring if no explicit Field description exists + if param.name in docstring_descriptions: + has_explicit_desc = _annotation_has_description(annotation) or ( + isinstance(param.default, FieldInfo) and param.default.description is not None + ) + if not has_explicit_desc: + field_kwargs["description"] = docstring_descriptions[param.name] + # Check if the parameter name conflicts with BaseModel attributes # This is necessary because Pydantic warns about shadowing parent attributes if hasattr(BaseModel, field_name) and callable(getattr(BaseModel, field_name)): diff --git a/tests/server/mcpserver/test_func_metadata.py b/tests/server/mcpserver/test_func_metadata.py index c57d1ee9f..73213904d 100644 --- a/tests/server/mcpserver/test_func_metadata.py +++ b/tests/server/mcpserver/test_func_metadata.py @@ -1189,3 +1189,172 @@ def func_with_metadata() -> Annotated[int, Field(gt=1)]: ... # pragma: no branc assert meta.output_schema is not None assert meta.output_schema["properties"]["result"] == {"exclusiveMinimum": 1, "title": "Result", "type": "integer"} + + +def test_docstring_google_style(): + """Test that Google-style docstrings produce parameter descriptions in the schema.""" + + def greet(name: str, age: int) -> str: # pragma: no cover + """Greet a user. + + Args: + name: The user's full name + age: The user's age in years + """ + return f"{name} is {age}" + + meta = func_metadata(greet) + schema = meta.arg_model.model_json_schema() + + assert schema["properties"]["name"]["description"] == "The user's full name" + assert schema["properties"]["age"]["description"] == "The user's age in years" + + +def test_docstring_numpy_style(): + """Test that NumPy-style docstrings produce parameter descriptions in the schema.""" + + def greet(name: str, age: int) -> str: # pragma: no cover + """Greet a user. + + Parameters + ---------- + name + The user's full name + age + The user's age in years + """ + return f"{name} is {age}" + + meta = func_metadata(greet) + schema = meta.arg_model.model_json_schema() + + assert schema["properties"]["name"]["description"] == "The user's full name" + assert schema["properties"]["age"]["description"] == "The user's age in years" + + +def test_docstring_sphinx_style(): + """Test that Sphinx-style docstrings produce parameter descriptions in the schema.""" + + def greet(name: str, age: int) -> str: # pragma: no cover + """Greet a user. + + :param name: The user's full name + :param age: The user's age in years + """ + return f"{name} is {age}" + + meta = func_metadata(greet) + schema = meta.arg_model.model_json_schema() + + assert schema["properties"]["name"]["description"] == "The user's full name" + assert schema["properties"]["age"]["description"] == "The user's age in years" + + +def test_docstring_does_not_override_field_description(): + """Test that explicit Field descriptions take priority over docstring descriptions.""" + + def greet( + name: Annotated[str, Field(description="Explicit description")], + age: int, + ) -> str: # pragma: no cover + """Greet a user. + + Args: + name: Docstring description that should be ignored + age: The user's age + """ + return f"{name} is {age}" + + meta = func_metadata(greet) + schema = meta.arg_model.model_json_schema() + + assert schema["properties"]["name"]["description"] == "Explicit description" + assert schema["properties"]["age"]["description"] == "The user's age" + + +def test_docstring_no_docstring(): + """Test that functions without docstrings still work correctly.""" + + def greet(name: str, age: int) -> str: # pragma: no cover + return f"{name} is {age}" + + meta = func_metadata(greet) + schema = meta.arg_model.model_json_schema() + + assert "description" not in schema["properties"]["name"] + assert "description" not in schema["properties"]["age"] + + +def test_docstring_with_default_values(): + """Test docstring descriptions work with default parameter values.""" + + def greet(name: str, age: int = 25) -> str: # pragma: no cover + """Greet a user. + + Args: + name: The user's full name + age: The user's age in years + """ + return f"{name} is {age}" + + meta = func_metadata(greet) + schema = meta.arg_model.model_json_schema() + + assert schema["properties"]["name"]["description"] == "The user's full name" + assert schema["properties"]["age"]["description"] == "The user's age in years" + assert schema["properties"]["age"]["default"] == 25 + + +def test_docstring_partial_params(): + """Test that docstrings with only some parameters documented still work.""" + + def greet(name: str, age: int, city: str) -> str: # pragma: no cover + """Greet a user. + + Args: + name: The user's full name + """ + return f"{name} is {age} from {city}" + + meta = func_metadata(greet) + schema = meta.arg_model.model_json_schema() + + assert schema["properties"]["name"]["description"] == "The user's full name" + assert "description" not in schema["properties"]["age"] + assert "description" not in schema["properties"]["city"] + + +def test_docstring_no_args_section(): + """Test that docstrings without an Args section don't cause issues.""" + + def greet(name: str) -> str: # pragma: no cover + """Greet a user by name.""" + return f"Hello {name}" + + meta = func_metadata(greet) + schema = meta.arg_model.model_json_schema() + + assert "description" not in schema["properties"]["name"] + + +def test_docstring_with_annotated_non_field_metadata(): + """Test that docstring descriptions are used when Annotated has non-Field metadata.""" + + def greet( + name: Annotated[str, "some_metadata"], + age: int, + ) -> str: # pragma: no cover + """Greet a user. + + Args: + name: The user's name + age: The user's age + """ + return f"{name} is {age}" + + meta = func_metadata(greet) + schema = meta.arg_model.model_json_schema() + + # Docstring description should be used since Annotated has no Field with description + assert schema["properties"]["name"]["description"] == "The user's name" + assert schema["properties"]["age"]["description"] == "The user's age" diff --git a/uv.lock b/uv.lock index c25047e48..ee9a6f9bc 100644 --- a/uv.lock +++ b/uv.lock @@ -794,6 +794,7 @@ name = "mcp" source = { editable = "." } dependencies = [ { name = "anyio" }, + { name = "griffe" }, { name = "httpx" }, { name = "httpx-sse" }, { name = "jsonschema" }, @@ -848,6 +849,7 @@ docs = [ [package.metadata] requires-dist = [ { name = "anyio", specifier = ">=4.9" }, + { name = "griffe", specifier = ">=1.5.0" }, { name = "httpx", specifier = ">=0.27.1" }, { name = "httpx-sse", specifier = ">=0.4" }, { name = "jsonschema", specifier = ">=4.20.0" }, From 8127f1e3b6ffc0ff69cbea1ccdfc265285f34fc8 Mon Sep 17 00:00:00 2001 From: ameenalkhaldi Date: Sun, 15 Mar 2026 19:07:27 +0300 Subject: [PATCH 2/3] fix: remove GoogleOptions import for griffe compat with older versions GoogleOptions was added in a later griffe version and is not available in griffe 1.5.0 (the minimum version). The default parser options work correctly without it. Github-Issue:#226 --- src/mcp/server/mcpserver/utilities/func_metadata.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/mcp/server/mcpserver/utilities/func_metadata.py b/src/mcp/server/mcpserver/utilities/func_metadata.py index 241b1e4d1..a8e90e46d 100644 --- a/src/mcp/server/mcpserver/utilities/func_metadata.py +++ b/src/mcp/server/mcpserver/utilities/func_metadata.py @@ -12,7 +12,7 @@ import anyio import anyio.to_thread import pydantic_core -from griffe import Docstring, DocstringSectionKind, GoogleOptions +from griffe import Docstring, DocstringSectionKind from pydantic import BaseModel, ConfigDict, Field, WithJsonSchema, create_model from pydantic.fields import FieldInfo from pydantic.json_schema import GenerateJsonSchema, JsonSchemaWarningKind @@ -270,10 +270,7 @@ def _parse_docstring_params(func: Callable[..., Any]) -> dict[str, str]: return {} docstring_style = _infer_docstring_style(doc) - parser_options = ( - GoogleOptions(returns_named_value=False, returns_multiple_items=False) if docstring_style == "google" else None - ) - docstring = Docstring(doc, lineno=1, parser=docstring_style, parser_options=parser_options) + docstring = Docstring(doc, lineno=1, parser=docstring_style) with _suppress_griffe_logging(): sections = docstring.parse() From 735bc509b4fa57256686348c1cab9f4d8d767277 Mon Sep 17 00:00:00 2001 From: ameenalkhaldi Date: Sun, 15 Mar 2026 21:35:54 +0300 Subject: [PATCH 3/3] refactor: replace griffe with zero-dependency regex parser Replace the griffe library with simple regex patterns for parsing Google, NumPy, and Sphinx-style docstrings. This eliminates the new runtime dependency while maintaining full functionality. All 42 tests continue to pass. Github-Issue:#226 --- pyproject.toml | 1 - .../mcpserver/utilities/func_metadata.py | 143 ++++++------------ uv.lock | 2 - 3 files changed, 44 insertions(+), 102 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 981d02675..f275b90cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,6 @@ classifiers = [ ] dependencies = [ "anyio>=4.9", - "griffe>=1.5.0", "httpx>=0.27.1", "httpx-sse>=0.4", "pydantic>=2.12.0", diff --git a/src/mcp/server/mcpserver/utilities/func_metadata.py b/src/mcp/server/mcpserver/utilities/func_metadata.py index a8e90e46d..28ba6f0eb 100644 --- a/src/mcp/server/mcpserver/utilities/func_metadata.py +++ b/src/mcp/server/mcpserver/utilities/func_metadata.py @@ -1,18 +1,15 @@ import functools import inspect import json -import logging import re -from collections.abc import Awaitable, Callable, Iterator, Sequence -from contextlib import contextmanager +from collections.abc import Awaitable, Callable, Sequence from itertools import chain from types import GenericAlias -from typing import Annotated, Any, Literal, cast, get_args, get_origin, get_type_hints +from typing import Annotated, Any, cast, get_args, get_origin, get_type_hints import anyio import anyio.to_thread import pydantic_core -from griffe import Docstring, DocstringSectionKind from pydantic import BaseModel, ConfigDict, Field, WithJsonSchema, create_model from pydantic.fields import FieldInfo from pydantic.json_schema import GenerateJsonSchema, JsonSchemaWarningKind @@ -171,96 +168,35 @@ def pre_parse_json(self, data: dict[str, Any]) -> dict[str, Any]: ) -_DocstringStyle = Literal["google", "numpy", "sphinx"] - -# Patterns to infer docstring style, adapted from pydantic-ai. -# Each entry is (pattern_template, replacement_keywords, style). -_DOCSTRING_STYLE_PATTERNS: list[tuple[str, list[str], _DocstringStyle]] = [ - ( - r"\n[ \t]*:{0}([ \t]+\w+)*:([ \t]+.+)?\n", - [ - "param", - "parameter", - "arg", - "argument", - "key", - "keyword", - "type", - "var", - "ivar", - "cvar", - "vartype", - "returns", - "return", - "rtype", - "raises", - "raise", - "except", - "exception", - ], - "sphinx", - ), - ( - r"\n[ \t]*{0}:([ \t]+.+)?\n[ \t]+.+", - [ - "args", - "arguments", - "params", - "parameters", - "keyword args", - "keyword arguments", - "raises", - "exceptions", - "returns", - "yields", - "receives", - "examples", - "attributes", - ], - "google", - ), - ( - r"\n[ \t]*{0}\n[ \t]*---+\n", - [ - "deprecated", - "parameters", - "other parameters", - "returns", - "yields", - "receives", - "raises", - "warns", - "attributes", - ], - "numpy", - ), -] - - -def _infer_docstring_style(doc: str) -> _DocstringStyle: - """Infer the docstring style from its content.""" - for pattern, replacements, style in _DOCSTRING_STYLE_PATTERNS: - matches = ( - re.search(pattern.format(replacement), doc, re.IGNORECASE | re.MULTILINE) for replacement in replacements - ) - if any(matches): - return style - return "google" - - -@contextmanager -def _suppress_griffe_logging() -> Iterator[None]: - """Temporarily suppress griffe's verbose logging.""" - old_level = logging.root.getEffectiveLevel() - logging.root.setLevel(logging.ERROR) - yield - logging.root.setLevel(old_level) +# Regex patterns for extracting parameter descriptions from docstrings. +# Supports Google, NumPy, and Sphinx styles without any external dependencies. +_GOOGLE_ARGS_RE = re.compile( + r"(?:Args|Arguments|Parameters)\s*:\s*\n((?:[ \t]+.+\n?)+)", + re.IGNORECASE, +) +_GOOGLE_PARAM_RE = re.compile( + r"^[ \t]+(\w+)\s*(?:\(.+?\))?\s*:\s*(.+(?:\n(?:[ \t]+(?![ \t]*\w+\s*(?:\(.+?\))?\s*:).+))*)", + re.MULTILINE, +) +_SPHINX_PARAM_RE = re.compile( + r":param\s+(\w+)\s*:\s*(.+(?:\n(?:[ \t]+(?!:).+))*)", + re.MULTILINE, +) +_NUMPY_PARAMS_RE = re.compile( + r"(?:Parameters)\s*\n\s*-{3,}\s*\n((?:.*\n?)+?)(?:\n\s*\w+\s*\n\s*-{3,}|\Z)", + re.IGNORECASE, +) +_NUMPY_PARAM_RE = re.compile( + r"^(\w+)\s*(?::.*)?$\n((?:[ \t]+.+\n?)+)", + re.MULTILINE, +) def _parse_docstring_params(func: Callable[..., Any]) -> dict[str, str]: """Parse a function's docstring to extract parameter descriptions. - Supports Google, NumPy, and Sphinx-style docstrings with automatic format detection. + Supports Google, NumPy, and Sphinx-style docstrings using simple regex patterns. + No external dependencies required. Returns: A dict mapping parameter names to their descriptions. @@ -269,15 +205,24 @@ def _parse_docstring_params(func: Callable[..., Any]) -> dict[str, str]: if not doc: return {} - docstring_style = _infer_docstring_style(doc) - docstring = Docstring(doc, lineno=1, parser=docstring_style) - - with _suppress_griffe_logging(): - sections = docstring.parse() - - for section in sections: - if section.kind == DocstringSectionKind.parameters: - return {p.name: p.description for p in section.value} + # Try Sphinx style first (:param name: description) + sphinx_matches = _SPHINX_PARAM_RE.findall(doc) + if sphinx_matches: + return {name: " ".join(desc.split()) for name, desc in sphinx_matches} + + # Try Google style (Args: / Arguments: / Parameters:) + google_section = _GOOGLE_ARGS_RE.search(doc) + if google_section: + params = _GOOGLE_PARAM_RE.findall(google_section.group(1)) + if params: + return {name: " ".join(desc.split()) for name, desc in params} + + # Try NumPy style (Parameters\n----------) + numpy_section = _NUMPY_PARAMS_RE.search(doc) + if numpy_section: + params = _NUMPY_PARAM_RE.findall(numpy_section.group(1)) + if params: + return {name: " ".join(desc.split()) for name, desc in params} return {} diff --git a/uv.lock b/uv.lock index ee9a6f9bc..c25047e48 100644 --- a/uv.lock +++ b/uv.lock @@ -794,7 +794,6 @@ name = "mcp" source = { editable = "." } dependencies = [ { name = "anyio" }, - { name = "griffe" }, { name = "httpx" }, { name = "httpx-sse" }, { name = "jsonschema" }, @@ -849,7 +848,6 @@ docs = [ [package.metadata] requires-dist = [ { name = "anyio", specifier = ">=4.9" }, - { name = "griffe", specifier = ">=1.5.0" }, { name = "httpx", specifier = ">=0.27.1" }, { name = "httpx-sse", specifier = ">=0.4" }, { name = "jsonschema", specifier = ">=4.20.0" },