From 2560031fa1ebedf53cc5da5fe775357fcd81da7a Mon Sep 17 00:00:00 2001
From: Patel230 <Lakshmanp230@gmail.com>
Date: Thu, 14 May 2026 21:25:32 +0530
Subject: [PATCH 1/4] =?UTF-8?q?feat(hawk-sdk-python):=20production=20harde?=
 =?UTF-8?q?ning=20=E2=80=94=20ruff,=20mypy,=20version=20bump?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Added ruff linter config (E, F, W, I, N, UP, B, A, SIM, TCH, RUF rules)
- Added mypy strict type checking config
- Added Makefile with standard targets (test, lint, format, typecheck)
- Bumped version to 0.2.0
- Added pytest strict markers and short traceback config
---
 Makefile       | 25 +++++++++++++++++++++++++
 pyproject.toml | 22 +++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 Makefile

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..9075d1d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,25 @@
+.PHONY: all test lint format typecheck clean help
+
+all: lint typecheck test
+
+test: ## Run tests
+	python -m pytest
+
+test-coverage: ## Run tests with coverage
+	python -m pytest --cov=src/hawk --cov-report=term-missing
+
+lint: ## Run ruff linter
+	ruff check .
+
+format: ## Format code
+	ruff format .
+	ruff check --fix .
+
+typecheck: ## Run mypy type checker
+	mypy src/
+
+clean: ## Clean artifacts
+	rm -rf dist/ build/ *.egg-info .pytest_cache .mypy_cache .ruff_cache
+
+help: ## Show this help
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
diff --git a/pyproject.toml b/pyproject.toml
index 36f167d..9eabe83 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "hawk-sdk"
-version = "0.1.0"
+version = "0.2.0"
 description = "Python SDK for the Hawk daemon API"
 readme = "README.md"
 license = "MIT"
@@ -35,6 +35,8 @@ dev = [
     "pytest>=7.0",
     "pytest-asyncio>=0.21",
     "respx>=0.21",
+    "ruff>=0.4.0",
+    "mypy>=1.0",
 ]
 
 [tool.hatch.build.targets.wheel]
@@ -43,3 +45,21 @@ packages = ["src/hawk"]
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
 testpaths = ["tests"]
+addopts = "--strict-markers --tb=short -q"
+
+[tool.ruff]
+target-version = "py39"
+line-length = 100
+
+[tool.ruff.lint]
+select = ["E", "F", "W", "I", "N", "UP", "B", "A", "SIM", "TCH", "RUF"]
+ignore = ["E501"]
+
+[tool.ruff.lint.isort]
+known-first-party = ["hawk"]
+
+[tool.mypy]
+python_version = "3.9"
+strict = true
+warn_return_any = true
+warn_unused_configs = true

From 1dc0be45da0cd218aef8792242864edaf45db034 Mon Sep 17 00:00:00 2001
From: Patel230 <Lakshmanp230@gmail.com>
Date: Fri, 15 May 2026 01:00:20 +0530
Subject: [PATCH 2/4] feat(hawk-sdk-python): align __version__ with pyproject,
 add User-Agent, full OSS bootstrap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The prior hardening commit bumped pyproject.toml to 0.2.0 but missed
`src/hawk/_version.py`, which still reported 0.1.0. This commit fixes
that inconsistency and lands the OSS standard files that were missing.

Version surface (fixed + added):
  - src/hawk/_version.py — `__version__ = "0.2.0"` (was 0.1.0).
    pyproject.toml was already 0.2.0.
  - src/hawk/client.py — both `HawkClient._build_headers` and
    `AsyncHawkClient._build_headers` now set
    `User-Agent: hawk-sdk-python/<__version__>`. httpx merges
    client-default headers with per-request overrides, so this also
    covers the streaming endpoint without changing the per-request
    `headers={"Accept": "text/event-stream"}` override.

New OSS files (this is the first PR to add them):
  - CHANGELOG.md — Keep-a-Changelog format with [Unreleased] capturing
    this PR + a backfilled [0.1.0] entry for the initial SDK and prior
    hardening pass.
  - CONTRIBUTING.md — quick start with venv + editable install, branch
    flow (this repo branches from main), conventional commits, code
    standards (mypy --strict, ruff, async-first, Pydantic v2,
    User-Agent rule), testing with respx, and the
    bump-both-version-files procedure.
  - SECURITY.md — vulnerability reporting via GitHub Security
    Advisories, in-scope examples (token leakage, TLS misuse, Pydantic
    deserialization issues, redirect host escape), out-of-scope
    pointers (daemon issues to hawk repo, third-party-package issues
    upstream).
  - CODE_OF_CONDUCT.md — Contributor Covenant 2.1.
  - .gitattributes — LF normalization, binary detection, linguist
    hints to collapse lock files.
  - .editorconfig — UTF-8, LF, 4-space indent for Python (PEP 8),
    2-space for YAML/JSON/TOML, no-trim for Markdown.
  - .github/workflows/ci.yml — pytest matrix on Python 3.9 / 3.10 /
    3.11 / 3.12 / 3.13, ruff (lint + format check), mypy --strict,
    build sdist + wheel + twine check.
  - .github/dependabot.yml — weekly pip + github-actions, pip
    grouped by pydantic and pytest to reduce PR noise.
  - .github/PULL_REQUEST_TEMPLATE.md — Summary / Changes / API impact
    (with bump-both-files reminder) / Daemon compatibility / Async
    compatibility (sync + async kept in lock-step) / Testing /
    Checklist (incl. User-Agent rule).
  - .github/ISSUE_TEMPLATE/bug_report.yml — surface dropdown
    (HawkClient / AsyncHawkClient / streaming / retry / tools /
    agent / workflow / typed errors / build), required SDK + daemon
    + Python versions, package-versions textarea.
  - .github/ISSUE_TEMPLATE/feature_request.yml — kind selector
    covering 9 areas (client method / streaming / retry / errors /
    tools / agent-workflow / pydantic / config / tooling) and
    solo-dev fit checks (incl. 'sync and async kept in lock-step',
    'does not break wire-compatibility with existing daemon
    versions').
  - .github/ISSUE_TEMPLATE/config.yml — routes security to
    advisories, questions to discussions, blocks blank issues.
  - .gitignore — expanded from 6 lines to cover the broader Python
    toolchain (venv dirs, .mypy_cache, .ruff_cache, .pytest_cache,
    .tox, .nox, htmlcov, coverage.xml, .env).

Verification:
  - `pytest` — 65/65 pass (1 pre-existing cosmetic warning in
    test_workflow.py about an unawaited coroutine in a mock; not
    introduced by this PR)
  - `hawk.__version__` returns "0.2.0"
  - `HawkClient._build_headers()` returns
    {'Accept': 'application/json', 'User-Agent': 'hawk-sdk-python/0.2.0'}
---
 .editorconfig                              |  29 +++++
 .gitattributes                             |  38 ++++++
 .github/ISSUE_TEMPLATE/bug_report.yml      | 127 +++++++++++++++++++++
 .github/ISSUE_TEMPLATE/config.yml          |   8 ++
 .github/ISSUE_TEMPLATE/feature_request.yml |  75 ++++++++++++
 .github/PULL_REQUEST_TEMPLATE.md           |  87 ++++++++++++++
 .github/dependabot.yml                     |  35 ++++++
 .github/workflows/ci.yml                   |  89 +++++++++++++++
 .gitignore                                 |  46 +++++++-
 CHANGELOG.md                               |  83 ++++++++++++++
 CODE_OF_CONDUCT.md                         |  55 +++++++++
 CONTRIBUTING.md                            | 101 ++++++++++++++++
 SECURITY.md                                |  53 +++++++++
 src/hawk/_version.py                       |   2 +-
 src/hawk/client.py                         |  11 +-
 15 files changed, 833 insertions(+), 6 deletions(-)
 create mode 100644 .editorconfig
 create mode 100644 .gitattributes
 create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml
 create mode 100644 .github/ISSUE_TEMPLATE/config.yml
 create mode 100644 .github/ISSUE_TEMPLATE/feature_request.yml
 create mode 100644 .github/PULL_REQUEST_TEMPLATE.md
 create mode 100644 .github/dependabot.yml
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 CHANGELOG.md
 create mode 100644 CODE_OF_CONDUCT.md
 create mode 100644 CONTRIBUTING.md
 create mode 100644 SECURITY.md

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..5fe9816
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,29 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+indent_style = space
+indent_size = 2
+
+# Python: 4 spaces (PEP 8).
+[*.py]
+indent_style = space
+indent_size = 4
+max_line_length = 100
+
+# Makefile-likes use tabs by language convention.
+[{Makefile,*.mk,**.mk}]
+indent_style = tab
+indent_size = 4
+
+# YAML / JSON / TOML: 2 spaces.
+[*.{yml,yaml,json,toml}]
+indent_style = space
+indent_size = 2
+
+# Markdown: trailing whitespace can be significant (line break with two spaces).
+[*.md]
+trim_trailing_whitespace = false
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..e3077d0
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,38 @@
+# Default: normalize line endings to LF on commit, leave the working copy alone.
+* text=auto eol=lf
+
+# Explicitly LF for source, scripts, and config — never CRLF.
+*.py     text eol=lf
+*.md     text eol=lf
+*.yml    text eol=lf
+*.yaml   text eol=lf
+*.json   text eol=lf
+*.toml   text eol=lf
+*.cfg    text eol=lf
+*.ini    text eol=lf
+*.sh     text eol=lf
+Makefile text eol=lf
+
+# Windows-only files keep CRLF.
+*.bat    text eol=crlf
+*.cmd    text eol=crlf
+*.ps1    text eol=crlf
+
+# Binary files — never diffed, never EOL-normalized.
+*.png    binary
+*.jpg    binary
+*.jpeg   binary
+*.gif    binary
+*.ico    binary
+*.zip    binary
+*.tar    binary
+*.tar.gz binary
+*.gz     binary
+*.pdf    binary
+*.whl    binary
+
+# Generated / lock files — collapse in PR diffs (GitHub linguist hint).
+poetry.lock        linguist-generated=true
+Pipfile.lock       linguist-generated=true
+uv.lock            linguist-generated=true
+requirements*.txt  linguist-generated=true
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 0000000..06bb64e
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,127 @@
+name: Bug report
+description: Something is broken or behaving unexpectedly.
+title: "bug: <one-line summary>"
+labels: ["bug", "triage"]
+
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to file a bug report. Please fill in as much
+        of the form as you can — the more we know, the faster we can fix it.
+
+        Before submitting:
+        - Search [existing issues](https://github.com/GrayCodeAI/hawk-sdk-python/issues) to avoid duplicates.
+        - If this is a security issue, please **do not** file a public issue. See `SECURITY.md`.
+
+  - type: textarea
+    id: what-happened
+    attributes:
+      label: What happened?
+      description: A clear, concise description of the bug.
+      placeholder: When I call HawkClient.<method>, I expected X but got Y.
+    validations:
+      required: true
+
+  - type: dropdown
+    id: surface
+    attributes:
+      label: Surface
+      description: Which SDK surface is affected?
+      options:
+        - "HawkClient (sync)"
+        - "AsyncHawkClient (async)"
+        - "Streaming (chat_stream / StreamReader)"
+        - "Retry / backoff"
+        - "Tools (chat_with_tools, @tool decorator)"
+        - "Agent / AsyncAgent"
+        - "Workflow / AsyncWorkflow"
+        - "Typed errors (HawkAPIError hierarchy)"
+        - "Build / packaging"
+    validations:
+      required: true
+
+  - type: textarea
+    id: reproduce
+    attributes:
+      label: Steps to reproduce
+      description: Minimal Python snippet that reliably reproduces the problem.
+      render: python
+      placeholder: |
+        from hawk import HawkClient
+        with HawkClient() as c:
+            resp = c.chat("hello")
+        # ^ wrong shape / panic / hang / etc.
+    validations:
+      required: true
+
+  - type: textarea
+    id: expected
+    attributes:
+      label: Expected behavior
+      description: What did you expect to happen instead?
+    validations:
+      required: true
+
+  - type: input
+    id: sdk-version
+    attributes:
+      label: hawk-sdk version
+      description: Output of `python -c "import hawk; print(hawk.__version__)"`.
+      placeholder: "0.2.0"
+    validations:
+      required: true
+
+  - type: input
+    id: daemon-version
+    attributes:
+      label: hawk daemon version
+      description: Output of `hawk version` (the daemon you're hitting).
+      placeholder: "0.2.0"
+    validations:
+      required: true
+
+  - type: input
+    id: python-version
+    attributes:
+      label: Python version
+      description: Output of `python --version`.
+      placeholder: "Python 3.11.9"
+    validations:
+      required: true
+
+  - type: input
+    id: os
+    attributes:
+      label: Operating system
+      description: e.g. macOS 14.5 (arm64), Ubuntu 24.04 (amd64), Windows 11 (amd64).
+      placeholder: "macOS 14.5 (arm64)"
+    validations:
+      required: true
+
+  - type: textarea
+    id: deps
+    attributes:
+      label: Relevant package versions
+      description: |
+        Paste the output of `pip freeze | grep -E "^(httpx|pydantic|hawk-sdk)"` (or `uv pip list` equivalent).
+      render: shell
+
+  - type: textarea
+    id: logs
+    attributes:
+      label: Logs / traceback
+      description: |
+        Paste any relevant output, including the full traceback.
+        **Redact API tokens, session IDs, and any private data first.**
+      render: shell
+
+  - type: checkboxes
+    id: confirm
+    attributes:
+      label: Confirmation
+      options:
+        - label: I searched existing issues and did not find a duplicate.
+          required: true
+        - label: I redacted any secrets, tokens, or private data from logs.
+          required: true
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000..c88dd67
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,8 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Security vulnerability
+    url: https://github.com/GrayCodeAI/hawk-sdk-python/security/advisories/new
+    about: Please report security issues privately via a GitHub Security Advisory. See SECURITY.md.
+  - name: Question / discussion
+    url: https://github.com/GrayCodeAI/hawk-sdk-python/discussions
+    about: Have a question or want to discuss an idea? Open a discussion instead of an issue.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 0000000..c840e71
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,75 @@
+name: Feature request
+description: Suggest an improvement or a new SDK capability.
+title: "feat: <one-line summary>"
+labels: ["enhancement", "triage"]
+
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for proposing a feature. hawk-sdk is a thin Python client for
+        the local hawk daemon. Every feature is evaluated against whether it
+        serves **a single developer** running their own hawk daemon — i.e.
+        it improves ergonomics, lowers latency, or simplifies integration.
+
+        Before submitting:
+        - Search [existing issues](https://github.com/GrayCodeAI/hawk-sdk-python/issues) to avoid duplicates.
+        - For new daemon endpoints, the daemon side must land first.
+
+  - type: dropdown
+    id: kind
+    attributes:
+      label: Kind of feature
+      description: What flavour of change is this?
+      options:
+        - "New client method (wraps a daemon endpoint)"
+        - "Streaming / SSE handling"
+        - "Retry / backoff / resilience"
+        - "Typed errors / error categories"
+        - "Tools (chat_with_tools, @tool decorator)"
+        - "Agent / Workflow orchestration"
+        - "Pydantic model / type-hint improvement"
+        - "Configuration (httpx transport, timeouts, etc.)"
+        - "Tooling / CI / docs / packaging"
+    validations:
+      required: true
+
+  - type: textarea
+    id: problem
+    attributes:
+      label: What problem are you trying to solve?
+      description: Describe the user problem first. Solutions can come later.
+      placeholder: When I call <method>, I have to write boilerplate Y because the SDK doesn't expose X.
+    validations:
+      required: true
+
+  - type: textarea
+    id: proposal
+    attributes:
+      label: Proposed solution
+      description: How would you like the SDK to behave? Snippet of API you'd want.
+      render: python
+    validations:
+      required: true
+
+  - type: textarea
+    id: alternatives
+    attributes:
+      label: Alternatives considered
+      description: |
+        What did you try? What do other SDKs (`openai-python`,
+        `anthropic-sdk-python`, `langchain`, `llama-index`, `dspy`,
+        `instructor`, `marvin`, `pydantic-ai`, `mirascope`, `magentic`)
+        do? Why isn't that enough?
+
+  - type: checkboxes
+    id: principles
+    attributes:
+      label: Solo-developer fit
+      description: hawk-sdk avoids enterprise scope. Confirm this feature respects that.
+      options:
+        - label: Works with zero configuration (sensible defaults).
+        - label: Does not introduce a third-party network dependency.
+        - label: Does not break wire-compatibility with existing daemon versions.
+        - label: Sync and async variants are kept in lock-step.
+        - label: Has an escape hatch (override via parameter, transport, or env).
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..f11661e
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,87 @@
+<!--
+  Thanks for your contribution! Please fill out this template so reviewers can
+  understand the change quickly. Anything that does not apply can be left in
+  place; do not delete unanswered sections — write "n/a".
+-->
+
+## Summary
+
+<!--
+  One paragraph describing what this PR does and why. Link the related
+  issue(s) with `Fixes #N` or `Refs #N` if applicable.
+-->
+
+## Changes
+
+<!--
+  Bullet list of what changed, grouped by area (client, agent, tools,
+  workflow, streaming, errors, retry, types, version, CI, docs).
+  Reviewers should be able to skim this and know what to look at first.
+-->
+
+-
+
+## API impact
+
+<!--
+  Did you add, remove, rename, or change the signature of any exported
+  symbol? List them here. If yes, confirm whether this is a breaking
+  change and bump the version accordingly in `pyproject.toml` and
+  `src/hawk/_version.py` (both must agree). If no exported surface
+  changed, write "n/a".
+-->
+
+## Daemon compatibility
+
+<!--
+  This SDK targets the hawk daemon `v1` API. Did you change endpoints,
+  request/response shapes, headers, or status-code handling?
+
+  - Which daemon versions did you test against (commit SHA / tag)?
+  - Is the change wire-compatible with the latest released daemon?
+  - If not, link the corresponding daemon PR.
+-->
+
+## Async compatibility
+
+<!--
+  Every public client method has both a sync and an async variant.
+  Did you change one without the other? If yes, explain why. Otherwise
+  confirm both `HawkClient.<method>` and `AsyncHawkClient.<method>`
+  were updated together.
+-->
+
+## Testing
+
+<!--
+  Describe how you tested. Paste output of `make test` and `make lint`.
+  If you added new tests, list them.
+-->
+
+```text
+$ make test
+...
+$ make lint
+...
+$ make typecheck
+...
+```
+
+## Checklist
+
+- [ ] Commits follow [Conventional Commits](https://www.conventionalcommits.org/)
+      (`feat:`, `fix:`, `perf:`, `refactor:`, `docs:`, `test:`, etc.)
+- [ ] `make test` passes locally
+- [ ] `make lint` (ruff check + ruff format --check) passes
+- [ ] `make typecheck` (mypy --strict) passes
+- [ ] New or changed code has tests (table-driven / parametrized where
+      appropriate, using `respx` for HTTP mocking)
+- [ ] Public APIs have docstrings and type hints
+- [ ] `CHANGELOG.md` updated under `## [Unreleased]` if user-visible
+- [ ] **Both `pyproject.toml` and `src/hawk/_version.py` are bumped
+      together** if this is a release-eligible change
+- [ ] Sync and async client variants are kept in lock-step
+- [ ] Every new outbound HTTP request inherits `User-Agent:
+      hawk-sdk-python/<__version__>` via `_build_headers()`
+- [ ] No secrets, tokens, or PII added to the repo
+- [ ] No `Co-authored-by:` trailers (this is solo-developer work)
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..b86542e
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,35 @@
+version: 2
+updates:
+  - package-ecosystem: pip
+    directory: /
+    schedule:
+      interval: weekly
+      day: monday
+    open-pull-requests-limit: 5
+    labels:
+      - dependencies
+      - python
+    commit-message:
+      prefix: "chore(deps)"
+      include: scope
+    groups:
+      pydantic:
+        patterns:
+          - "pydantic*"
+      pytest:
+        patterns:
+          - "pytest*"
+          - "respx*"
+
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: weekly
+      day: monday
+    open-pull-requests-limit: 3
+    labels:
+      - dependencies
+      - github-actions
+    commit-message:
+      prefix: "chore(ci)"
+      include: scope
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..086581f
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,89 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+jobs:
+  test:
+    name: Test (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: pip
+      - name: Install
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+      - name: pytest
+        run: pytest --strict-markers --tb=short
+
+  lint:
+    name: Lint (ruff)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+      - name: Install
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+      - name: ruff check
+        run: ruff check .
+      - name: ruff format --check
+        run: ruff format --check .
+
+  typecheck:
+    name: Type check (mypy --strict)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+      - name: Install
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+      - name: mypy
+        run: mypy src/
+
+  build:
+    name: Build sdist + wheel
+    runs-on: ubuntu-latest
+    needs: [test, lint, typecheck]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+      - name: Install build
+        run: |
+          python -m pip install --upgrade pip
+          pip install build twine
+      - name: Build
+        run: python -m build
+      - name: Twine check
+        run: twine check dist/*
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
diff --git a/.gitignore b/.gitignore
index 7989b07..c0b9142 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,46 @@
+# Byte-compiled / optimized / DLL files
 __pycache__/
-*.pyc
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
 *.egg-info/
-dist/
-build/
 .eggs/
+build/
+dist/
+sdist/
+wheels/
+*.egg
+MANIFEST
+
+# Virtual environments
+.venv/
+venv/
+env/
+ENV/
+
+# Tooling caches
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.tox/
+.nox/
+.coverage
+.coverage.*
+htmlcov/
+coverage.xml
+*.cover
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Local env files
+.env
+.env.local
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..21acc1c
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,83 @@
+# Changelog
+
+All notable changes to `hawk-sdk` (Python) are documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Fixed
+- **`__version__` now agrees with `pyproject.toml`.** The prior hardening
+  commit bumped `pyproject.toml` to `0.2.0` but missed
+  `src/hawk/_version.py`, which still reported `0.1.0`. Both now report
+  `0.2.0`. Aligns the SDK with the rest of the hawk-eco ecosystem
+  (`hawk`, `tok`, `eyrie`, `yaad`, `trace`, `sight`, `inspect`,
+  `hawk-sdk-go`).
+
+### Added
+- **`User-Agent: hawk-sdk-python/<__version__>` header** on every
+  outbound HTTP request from both `HawkClient` (sync) and
+  `AsyncHawkClient` (async). Set via the `_build_headers()` helper, so
+  it applies to both regular API calls and the `chat_stream` SSE
+  endpoint (httpx merges client-default headers with per-request
+  overrides). Lets daemon operators identify SDK clients in logs and
+  reject misbehaving versions cleanly.
+- **OSS standard files** (this is the first PR to add them):
+  - `CHANGELOG.md` — Keep-a-Changelog format with `[Unreleased]` and
+    a backfilled `[0.1.0]` entry for the initial SDK + prior hardening
+    commit.
+  - `CONTRIBUTING.md` — quick start, branch flow (this repo branches
+    from `main`), conventional commits, code standards, testing,
+    SDK-version-bump procedure (must update both `pyproject.toml` and
+    `src/hawk/_version.py`).
+  - `SECURITY.md` — vulnerability reporting via GitHub Security
+    Advisories.
+  - `CODE_OF_CONDUCT.md` — Contributor Covenant 2.1.
+  - `.gitattributes` — LF normalization, binary detection.
+  - `.editorconfig` — UTF-8, LF, 4-space indent for Python, 2-space
+    for YAML/JSON/TOML.
+  - `.github/workflows/ci.yml` — pytest matrix on Python 3.9 / 3.10 /
+    3.11 / 3.12 / 3.13, ruff (lint + format check), mypy strict, build
+    sdist + wheel.
+  - `.github/dependabot.yml` — weekly `pip` + `github-actions`
+    updates.
+  - `.github/PULL_REQUEST_TEMPLATE.md` — Summary / Changes / API
+    impact / Daemon compatibility / Async compatibility / Testing /
+    Checklist.
+  - `.github/ISSUE_TEMPLATE/bug_report.yml` — surface dropdown
+    (HawkClient / AsyncHawkClient / streaming / retry / tools /
+    workflow / typed errors / build).
+  - `.github/ISSUE_TEMPLATE/feature_request.yml` — feature request
+    with `kind` selector + solo-dev fit checks.
+  - `.github/ISSUE_TEMPLATE/config.yml` — routes security to
+    advisories, questions to discussions, blocks blank issues.
+- Expanded `.gitignore` with the broader Python toolchain footprint
+  (`.mypy_cache`, `.ruff_cache`, `.pytest_cache`, virtualenv dirs,
+  `htmlcov`, `coverage.xml`, `.tox`, `.nox`).
+
+## [0.1.0] — 2026-05-13
+
+### Added
+- Initial Python SDK for the hawk daemon API:
+  - `HawkClient` (sync) and `AsyncHawkClient` (async), both built on
+    `httpx`, with `health`, `chat`, `chat_stream`, `create_session`,
+    `get_session`, `list_sessions`, `list_messages`, `delete_session`,
+    `stats`.
+  - Pydantic v2 models for every request and response shape.
+  - Streaming via `StreamReader` / `AsyncStreamReader` (SSE).
+  - Typed error hierarchy (`HawkAPIError`, `AuthenticationError`,
+    `BadRequestError`, `NotFoundError`, `RateLimitError`,
+    `InternalServerError`, `ServiceUnavailableError`).
+  - Retry with exponential backoff via `RetryConfig`.
+  - `Tool`, `Agent` / `AsyncAgent`, `Workflow` / `AsyncWorkflow`
+    orchestration helpers.
+
+### Production-hardening pass already on this branch (commit `2560031`)
+- Added strict `ruff` lint config (E, F, W, I, N, UP, B, A, SIM, TCH,
+  RUF rule sets) and `mypy --strict` config in `pyproject.toml`.
+- Added `Makefile` with standard targets (`test`, `test-coverage`,
+  `lint`, `format`, `typecheck`, `clean`, `help`).
+- Bumped `pyproject.toml` version to `0.2.0` (this PR completes the
+  bump by also updating `_version.py`).
+- Added `pytest` strict-markers and short-traceback config.
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..681ab22
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,55 @@
+# Code of Conduct
+
+## Our pledge
+
+We — the maintainers and contributors of the hawk-sdk-python project —
+pledge to make participation in our community a harassment-free experience
+for everyone, regardless of age, body size, visible or invisible disability,
+ethnicity, sex characteristics, gender identity and expression, level of
+experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our standards
+
+Examples of behavior that contributes to a positive environment:
+
+- Demonstrating empathy and kindness toward other people
+- Being respectful of differing opinions, viewpoints, and experiences
+- Giving and gracefully accepting constructive feedback
+- Accepting responsibility, apologizing to those affected by mistakes,
+  and learning from the experience
+- Focusing on what is best not just for us as individuals, but for the
+  overall community
+
+Examples of unacceptable behavior:
+
+- The use of sexualized language or imagery, and sexual attention or advances
+- Trolling, insulting or derogatory comments, and personal or political attacks
+- Public or private harassment
+- Publishing others' private information, such as a physical or email address,
+  without their explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement
+
+Community leaders are responsible for clarifying and enforcing our standards
+of acceptable behavior, and will take appropriate and fair corrective action
+in response to any behavior they deem inappropriate, threatening, offensive,
+or harmful.
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported via the contact in `SECURITY.md` or by opening a confidential GitHub
+Security Advisory. All complaints will be reviewed and investigated promptly
+and fairly.
+
+All community leaders are obligated to respect the privacy and security of
+the reporter of any incident.
+
+## Attribution
+
+This Code of Conduct is adapted from the
+[Contributor Covenant, version 2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html).
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..4c94edc
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,101 @@
+# Contributing to hawk-sdk-python
+
+Thanks for considering a contribution. `hawk-sdk` is the Python client for
+the hawk daemon API. It is built for **solo developers** running their own
+hawk daemon locally — small surface area, async-first, fully type-checked.
+
+## Quick start
+
+```bash
+git clone https://github.com/GrayCodeAI/hawk-sdk-python.git
+cd hawk-sdk-python
+python -m venv .venv && source .venv/bin/activate
+pip install -e ".[dev]"
+make test       # pytest, asyncio_mode=auto, strict markers
+make lint       # ruff check
+make typecheck  # mypy --strict on src/
+```
+
+Python 3.9+ is supported. CI tests against 3.9 / 3.10 / 3.11 / 3.12 / 3.13.
+
+## Branch flow
+
+This repo does **not** have a `dev` branch. Branch from `main`:
+
+```bash
+git checkout main
+git pull origin main
+git checkout -b feat/<short-description>
+```
+
+Open the PR against `main`. Do **not** push directly to `main`.
+
+One PR per logical change. Do not mix unrelated changes in a single PR.
+
+## Commit messages
+
+Use [Conventional Commits](https://www.conventionalcommits.org/):
+
+```
+feat(client): add streaming back-pressure
+fix(retry): respect Retry-After when value is a date string
+perf(stream): avoid reparsing SSE event prefix
+docs(readme): document async usage
+test(retry): add coverage for context cancellation during backoff
+```
+
+Allowed types: `feat`, `fix`, `perf`, `refactor`, `test`, `docs`, `chore`,
+`build`, `ci`, `style`. Add a scope when it clarifies the change. Do not add
+`Co-authored-by:` trailers — this is solo-developer work.
+
+## Code standards
+
+- **Type hints everywhere.** `mypy --strict` must pass on `src/`.
+- **`ruff check .` must be clean.** Use `# noqa: <code>` only with a
+  one-line justification on the same line.
+- **Async-first.** Every public client method has an async variant.
+- **Pydantic v2 for wire shapes.** Every request and response model
+  inherits from `pydantic.BaseModel` and uses `model_validate` /
+  `model_dump`.
+- **Context managers for resource cleanup.** All clients support `with`
+  / `async with`.
+- **Rich error hierarchy.** Map daemon error responses to the typed
+  exceptions in `errors.py` via `parse_error()`.
+- **Set `User-Agent: hawk-sdk-python/<__version__>`** on every new
+  outbound HTTP request via the `_build_headers()` helper. Tests should
+  not assert on the exact User-Agent string (it changes with version).
+
+## Bumping the SDK version
+
+The version lives in **two** places — `pyproject.toml` (build metadata)
+and `src/hawk/_version.py` (runtime `__version__`). When bumping:
+
+1. Edit both files to the new version.
+2. Add a `## [X.Y.Z] — YYYY-MM-DD` entry at the top of `CHANGELOG.md`.
+3. Tag the release: `git tag vX.Y.Z && git push origin vX.Y.Z`.
+
+This SDK adheres to [SemVer](https://semver.org/spec/v2.0.0.html).
+Breaking changes to the daemon API or the SDK surface bump the major
+version. New SDK methods or daemon endpoints bump the minor. Bug fixes
+and internal improvements bump the patch.
+
+## Testing
+
+```bash
+make test           # full pytest suite
+make test-coverage  # coverage totals
+```
+
+When adding a new client method, cover: success path, retryable error
+(429 with `Retry-After`), non-retryable error, context cancellation,
+and (for streaming) early `close()` and clean iteration.
+
+`tests/` uses `respx` to mock httpx — avoid hitting the real network in
+unit tests.
+
+## Reporting bugs / requesting features
+
+- Bug: open an issue using the bug-report template.
+- Feature: open an issue using the feature-request template.
+- Security: do **not** file a public issue. Use a GitHub Security Advisory
+  per `SECURITY.md`.
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..3bfe42e
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,53 @@
+# Security policy
+
+## Supported versions
+
+Only the latest minor version of `hawk-sdk` (Python) receives security
+updates. The current supported version is the most recent `0.x` release on
+PyPI.
+
+## Reporting a vulnerability
+
+Please **do not** file a public GitHub issue for security vulnerabilities.
+
+Instead, open a **private** GitHub Security Advisory:
+
+> https://github.com/GrayCodeAI/hawk-sdk-python/security/advisories/new
+
+Include, where possible:
+
+- A clear description of the issue and impact.
+- Steps to reproduce (a minimal Python snippet is ideal).
+- The affected `hawk-sdk` version (`hawk.__version__`) and Python version.
+- The hawk daemon version you were targeting, if relevant.
+- Any mitigations or patches you have already explored.
+
+We aim to respond to advisories within **5 business days** and to release
+a fix within **30 days** for high-severity issues.
+
+## What counts as a security issue
+
+Examples of in-scope issues:
+
+- The SDK leaking secrets, API tokens, or session IDs into logs, errors,
+  or metrics.
+- The SDK accepting and forwarding data that bypasses daemon-side
+  authentication, authorization, or rate limiting.
+- TLS misuse — accepting untrusted certificates, downgrade to HTTP, or
+  ignoring proxy configuration.
+- Pydantic-model deserialization issues that lead to memory exhaustion or
+  arbitrary code execution on attacker-controlled input.
+- Path / URL handling that lets a malicious daemon URL escape the
+  expected host (e.g. via redirects).
+
+Out of scope:
+
+- Issues in the hawk daemon itself — please report those at
+  https://github.com/GrayCodeAI/hawk/security/advisories/new.
+- Issues in third-party Python packages (`httpx`, `pydantic`, etc.) —
+  please report those upstream.
+
+## Disclosure
+
+Once a fix is released, we will publish the advisory with credit to the
+reporter (unless they request anonymity).
diff --git a/src/hawk/_version.py b/src/hawk/_version.py
index a51ea98..b57e19e 100644
--- a/src/hawk/_version.py
+++ b/src/hawk/_version.py
@@ -1,3 +1,3 @@
 """Version information for hawk-sdk."""
 
-__version__ = "0.1.0"
+__version__ = "0.2.0"
diff --git a/src/hawk/client.py b/src/hawk/client.py
index c13a533..d039f1d 100644
--- a/src/hawk/client.py
+++ b/src/hawk/client.py
@@ -9,6 +9,7 @@
 from .errors import parse_error
 from .retry import DEFAULT_RETRY_CONFIG, RetryConfig, with_retry, with_retry_sync
 from .streaming import AsyncStreamReader, StreamReader
+from ._version import __version__
 from .types import (
     ChatRequest,
     ChatResponse,
@@ -51,7 +52,10 @@ def __init__(
         )
 
     def _build_headers(self) -> dict[str, str]:
-        headers: dict[str, str] = {"Accept": "application/json"}
+        headers: dict[str, str] = {
+            "Accept": "application/json",
+            "User-Agent": f"hawk-sdk-python/{__version__}",
+        }
         if self._api_key:
             headers["Authorization"] = f"Bearer {self._api_key}"
         return headers
@@ -260,7 +264,10 @@ def __init__(
         )
 
     def _build_headers(self) -> dict[str, str]:
-        headers: dict[str, str] = {"Accept": "application/json"}
+        headers: dict[str, str] = {
+            "Accept": "application/json",
+            "User-Agent": f"hawk-sdk-python/{__version__}",
+        }
         if self._api_key:
             headers["Authorization"] = f"Bearer {self._api_key}"
         return headers

From 9e4780df0ee169c52fff1a817f1efcc0d0ed6d8a Mon Sep 17 00:00:00 2001
From: Patel230 <Lakshmanp230@gmail.com>
Date: Fri, 15 May 2026 15:21:07 +0530
Subject: [PATCH 3/4] chore: standardize eco-wide infra (versioning, CI, hooks,
 templates)

- VERSION file as single source of truth
- CODEOWNERS for auto-review routing
- Canonical Makefile with standard targets
- release-please config + workflow
- lefthook/pre-commit hooks (conventional commits, fmt, lint, secrets)
- Canonical CI + release GitHub Actions workflows
- Standardized .editorconfig, .gitattributes, CODE_OF_CONDUCT, SECURITY, CONTRIBUTING
- goreleaser config (where applicable)

Part of hawk-eco standardization sweep.
---
 .editorconfig                        |  62 ++++++++--
 .gitattributes                       |  96 +++++++++++----
 .github/workflows/ci.yml             |  36 ++++--
 .github/workflows/release-please.yml |  43 +++++++
 .github/workflows/release.yml        |  41 +++++++
 .pre-commit-config.yaml              |  43 +++++++
 .release-please-manifest.json        |   3 +
 CODEOWNERS                           |  20 ++++
 CODE_OF_CONDUCT.md                   |  59 ++++-----
 CONTRIBUTING.md                      | 173 ++++++++++++++-------------
 Makefile                             | 106 +++++++++++++---
 SECURITY.md                          |  88 ++++++++------
 VERSION                              |   1 +
 pyproject.toml                       |  24 +++-
 release-please-config.json           |  27 +++++
 src/hawk/_version.py                 |  29 ++++-
 16 files changed, 646 insertions(+), 205 deletions(-)
 create mode 100644 .github/workflows/release-please.yml
 create mode 100644 .github/workflows/release.yml
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 .release-please-manifest.json
 create mode 100644 CODEOWNERS
 create mode 100644 VERSION
 create mode 100644 release-please-config.json

diff --git a/.editorconfig b/.editorconfig
index 5fe9816..39f1a41 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -1,29 +1,67 @@
+# EditorConfig — https://editorconfig.org
+# Canonical eco-wide template (.shared-templates/editorconfig.tmpl).
+
 root = true
 
+# Default for everything.
 [*]
 charset = utf-8
 end_of_line = lf
 insert_final_newline = true
 trim_trailing_whitespace = true
 indent_style = space
-indent_size = 2
-
-# Python: 4 spaces (PEP 8).
-[*.py]
-indent_style = space
 indent_size = 4
-max_line_length = 100
 
-# Makefile-likes use tabs by language convention.
-[{Makefile,*.mk,**.mk}]
+# Go uses tabs by convention.
+[*.go]
 indent_style = tab
 indent_size = 4
 
-# YAML / JSON / TOML: 2 spaces.
-[*.{yml,yaml,json,toml}]
-indent_style = space
+# Python — PEP 8.
+[*.py]
+indent_size = 4
+
+# TypeScript / JavaScript — 2 spaces, ecosystem default.
+[*.{ts,tsx,js,jsx,mjs,cjs}]
+indent_size = 2
+
+# Web assets.
+[*.{html,css,scss}]
+indent_size = 2
+
+# YAML — 2 spaces (ecosystem standard, GitHub Actions, k8s, etc.).
+[*.{yml,yaml}]
+indent_size = 2
+
+# JSON / JSONC.
+[*.{json,jsonc}]
+indent_size = 2
+
+# TOML.
+[*.toml]
 indent_size = 2
 
-# Markdown: trailing whitespace can be significant (line break with two spaces).
+# Markdown — 2 spaces, preserve trailing whitespace (used for line breaks).
 [*.md]
 trim_trailing_whitespace = false
+indent_size = 2
+
+# Shell scripts.
+[*.{sh,bash,zsh,fish}]
+indent_size = 4
+
+# Makefiles must use tabs.
+[{Makefile,*.mk}]
+indent_style = tab
+
+# Dockerfiles.
+[Dockerfile*]
+indent_size = 4
+
+# GitHub Actions workflows — 2 spaces.
+[.github/**/*.{yml,yaml}]
+indent_size = 2
+
+# Config files.
+[*.{cfg,ini,conf}]
+indent_size = 4
diff --git a/.gitattributes b/.gitattributes
index e3077d0..3342e8f 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,38 +1,86 @@
-# Default: normalize line endings to LF on commit, leave the working copy alone.
+# Canonical eco-wide .gitattributes template (.shared-templates/gitattributes.tmpl).
+# Auto-detect text files and normalise line endings to LF.
+
 * text=auto eol=lf
 
-# Explicitly LF for source, scripts, and config — never CRLF.
-*.py     text eol=lf
-*.md     text eol=lf
-*.yml    text eol=lf
-*.yaml   text eol=lf
-*.json   text eol=lf
-*.toml   text eol=lf
-*.cfg    text eol=lf
-*.ini    text eol=lf
+# --- Source code -----------------------------------------------------------
+*.go     text eol=lf diff=golang
+*.py     text eol=lf diff=python
+*.ts     text eol=lf
+*.tsx    text eol=lf
+*.js     text eol=lf
+*.jsx    text eol=lf
+*.mjs    text eol=lf
+*.cjs    text eol=lf
+*.rs     text eol=lf diff=rust
+
+# --- Shell + config --------------------------------------------------------
 *.sh     text eol=lf
-Makefile text eol=lf
+*.bash   text eol=lf
+*.toml   text eol=lf
+*.yaml   text eol=lf
+*.yml    text eol=lf
+*.json   text eol=lf linguist-language=JSON
+*.jsonc  text eol=lf linguist-language=JSON
+*.cff    text eol=lf
+
+# --- Documentation ---------------------------------------------------------
+*.md     text eol=lf diff=markdown
+*.txt    text eol=lf
+
+# --- Build / packaging ----------------------------------------------------
+Makefile        text eol=lf
+*.mk            text eol=lf
+Dockerfile*     text eol=lf
+docker-compose*.yml text eol=lf
+.github/**/*.yml    text eol=lf
+.github/**/*.yaml   text eol=lf
 
-# Windows-only files keep CRLF.
-*.bat    text eol=crlf
-*.cmd    text eol=crlf
-*.ps1    text eol=crlf
+# --- Generated artefacts (mark as such for diffs and language stats) ------
+go.mod          text eol=lf linguist-generated
+go.sum          text eol=lf linguist-generated
+*.pb.go         linguist-generated
+*_generated.go  linguist-generated
+package-lock.json   linguist-generated
+pnpm-lock.yaml      linguist-generated
+yarn.lock           linguist-generated
 
-# Binary files — never diffed, never EOL-normalized.
+# --- Vendored / external sources ------------------------------------------
+vendor/**       linguist-vendored
+node_modules/** linguist-vendored
+testdata/**     linguist-vendored
+benchmarks/data/** linguist-vendored
+
+# --- Binary files (do not text-normalise) ---------------------------------
+*.exe    binary
+*.dll    binary
+*.so     binary
+*.dylib  binary
+*.a      binary
+*.o      binary
+*.db     binary
+*.sqlite binary
 *.png    binary
 *.jpg    binary
 *.jpeg   binary
 *.gif    binary
 *.ico    binary
+*.svg    text eol=lf
+*.pdf    binary
 *.zip    binary
-*.tar    binary
 *.tar.gz binary
-*.gz     binary
-*.pdf    binary
+*.tgz    binary
 *.whl    binary
 
-# Generated / lock files — collapse in PR diffs (GitHub linguist hint).
-poetry.lock        linguist-generated=true
-Pipfile.lock       linguist-generated=true
-uv.lock            linguist-generated=true
-requirements*.txt  linguist-generated=true
+# --- Source archive hygiene (excluded from `git archive`) -----------------
+.github         export-ignore
+.shared-templates export-ignore
+.gitattributes  export-ignore
+.gitignore      export-ignore
+.editorconfig   export-ignore
+.golangci.yml   export-ignore
+.goreleaser.yml export-ignore
+.goreleaser.yaml export-ignore
+testdata/       export-ignore
+benchmarks/     export-ignore
+e2e/            export-ignore
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 086581f..3b280e2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,3 +1,6 @@
+# Canonical CI workflow for hawk-eco Python repos.
+# Source of truth: .shared-templates/workflows/python-ci.yml.tmpl
+
 name: CI
 
 on:
@@ -9,9 +12,13 @@ on:
 permissions:
   contents: read
 
+concurrency:
+  group: ci-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   test:
-    name: Test (Python ${{ matrix.python-version }})
+    name: test (Python ${{ matrix.python-version }})
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
@@ -31,7 +38,7 @@ jobs:
         run: pytest --strict-markers --tb=short
 
   lint:
-    name: Lint (ruff)
+    name: lint (ruff)
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
@@ -49,7 +56,7 @@ jobs:
         run: ruff format --check .
 
   typecheck:
-    name: Type check (mypy --strict)
+    name: typecheck (mypy --strict)
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
@@ -64,8 +71,24 @@ jobs:
       - name: mypy
         run: mypy src/
 
+  security:
+    name: security (pip-audit)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+      - name: Install
+        run: |
+          python -m pip install --upgrade pip pip-audit
+          pip install -e ".[dev]"
+      - name: pip-audit
+        run: pip-audit
+
   build:
-    name: Build sdist + wheel
+    name: build (sdist + wheel)
     runs-on: ubuntu-latest
     needs: [test, lint, typecheck]
     steps:
@@ -74,10 +97,9 @@ jobs:
         with:
           python-version: "3.12"
           cache: pip
-      - name: Install build
+      - name: Install build tools
         run: |
-          python -m pip install --upgrade pip
-          pip install build twine
+          python -m pip install --upgrade pip build twine
       - name: Build
         run: python -m build
       - name: Twine check
diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml
new file mode 100644
index 0000000..639f55f
--- /dev/null
+++ b/.github/workflows/release-please.yml
@@ -0,0 +1,43 @@
+# Canonical release-please workflow for hawk-eco repos.
+# Opens / updates a release PR on every push to main; on merge of that PR,
+# tags the new release. The tag triggers goreleaser (separate workflow).
+#
+# Source of truth: .shared-templates/release-please.yml.tmpl at the eco root.
+
+name: release-please
+
+on:
+  push:
+    branches: [main]
+
+permissions:
+  contents: write
+  pull-requests: write
+  issues: write
+
+concurrency:
+  group: release-please-${{ github.ref }}
+  cancel-in-progress: false
+
+jobs:
+  release-please:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Run release-please
+        id: release
+        uses: googleapis/release-please-action@v4
+        with:
+          config-file: release-please-config.json
+          manifest-file: .release-please-manifest.json
+          token: ${{ secrets.RELEASE_PLEASE_TOKEN || secrets.GITHUB_TOKEN }}
+
+      - name: Summary
+        if: always()
+        run: |
+          if [[ "${{ steps.release.outputs.release_created }}" == "true" ]]; then
+            echo "Released ${{ steps.release.outputs.tag_name }}." >> $GITHUB_STEP_SUMMARY
+          elif [[ "${{ steps.release.outputs.pr }}" != "" ]]; then
+            echo "Updated release PR: ${{ steps.release.outputs.pr }}" >> $GITHUB_STEP_SUMMARY
+          else
+            echo "No release-relevant changes detected." >> $GITHUB_STEP_SUMMARY
+          fi
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..4833743
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,41 @@
+# Canonical PyPI publish workflow for hawk-eco Python repos.
+# Triggered by release-please when it pushes a v* tag.
+# Source of truth: .shared-templates/workflows/python-release.yml.tmpl
+#
+# Uses PyPI Trusted Publishing (OIDC) — no API tokens stored in GitHub.
+# Configure once at https://pypi.org/manage/account/publishing/
+
+name: release
+
+on:
+  push:
+    tags: ["v*"]
+
+permissions:
+  contents: read
+  id-token: write   # required for PyPI Trusted Publishing
+
+jobs:
+  build-and-publish:
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/hawk-sdk
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install build tooling
+        run: |
+          python -m pip install --upgrade pip build
+
+      - name: Build sdist + wheel
+        run: python -m build
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: dist/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..b449ec5
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,43 @@
+# Canonical pre-commit config for hawk-eco Python repos.
+# Source of truth: .shared-templates/pre-commit-config.yaml.tmpl
+#
+# Install:    pip install pre-commit
+# Activate:   pre-commit install --install-hooks
+# Run all:    pre-commit run --all-files
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: trailing-whitespace
+        exclude: '\.md$'           # markdown uses trailing whitespace for line breaks
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-toml
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-added-large-files
+        args: [--maxkb=512]
+      - id: detect-private-key
+      - id: mixed-line-ending
+        args: [--fix=lf]
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.8.0
+    hooks:
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
+      - id: ruff-format
+
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.13.0
+    hooks:
+      - id: mypy
+        additional_dependencies: [pydantic>=2.0, httpx>=0.25]
+        args: [--strict, --ignore-missing-imports]
+
+  - repo: https://github.com/commitizen-tools/commitizen
+    rev: v3.30.1
+    hooks:
+      - id: commitizen
+        stages: [commit-msg]
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
new file mode 100644
index 0000000..2be9c43
--- /dev/null
+++ b/.release-please-manifest.json
@@ -0,0 +1,3 @@
+{
+  ".": "0.2.0"
+}
diff --git a/CODEOWNERS b/CODEOWNERS
new file mode 100644
index 0000000..4cdd829
--- /dev/null
+++ b/CODEOWNERS
@@ -0,0 +1,20 @@
+# CODEOWNERS for hawk-sdk-python
+* @GrayCodeAI/maintainers
+
+# Public API surface — bump SDK version (VERSION file) when these change
+/src/hawk/client.py     @GrayCodeAI/sdk-team
+/src/hawk/agent.py      @GrayCodeAI/sdk-team
+/src/hawk/workflow.py   @GrayCodeAI/sdk-team
+/src/hawk/tools.py      @GrayCodeAI/sdk-team
+/src/hawk/types.py      @GrayCodeAI/sdk-team
+/src/hawk/errors.py     @GrayCodeAI/sdk-team
+/src/hawk/_version.py   @GrayCodeAI/maintainers
+/VERSION                @GrayCodeAI/maintainers
+
+# Build / packaging
+/pyproject.toml         @GrayCodeAI/devops-team
+/.github/               @GrayCodeAI/devops-team
+/Makefile               @GrayCodeAI/devops-team
+
+# Documentation
+*.md                    @GrayCodeAI/docs-team
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 681ab22..314f97c 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -2,49 +2,51 @@
 
 ## Our pledge
 
-We — the maintainers and contributors of the hawk-sdk-python project —
-pledge to make participation in our community a harassment-free experience
-for everyone, regardless of age, body size, visible or invisible disability,
-ethnicity, sex characteristics, gender identity and expression, level of
-experience, education, socio-economic status, nationality, personal
-appearance, race, religion, or sexual identity and orientation.
+We — the maintainers and contributors of the hawk-sdk-python project — pledge to
+make participation in our community a harassment-free experience for everyone,
+regardless of age, body size, visible or invisible disability, ethnicity, sex
+characteristics, gender identity and expression, level of experience,
+education, socio-economic status, nationality, personal appearance, race,
+religion, or sexual identity and orientation.
 
 We pledge to act and interact in ways that contribute to an open, welcoming,
 diverse, inclusive, and healthy community.
 
 ## Our standards
 
-Examples of behavior that contributes to a positive environment:
+Examples of behaviour that contributes to a positive environment:
 
-- Demonstrating empathy and kindness toward other people
-- Being respectful of differing opinions, viewpoints, and experiences
-- Giving and gracefully accepting constructive feedback
-- Accepting responsibility, apologizing to those affected by mistakes,
-  and learning from the experience
+- Demonstrating empathy and kindness toward other people.
+- Being respectful of differing opinions, viewpoints, and experiences.
+- Giving and gracefully accepting constructive feedback.
+- Accepting responsibility, apologising to those affected by mistakes, and
+  learning from the experience.
 - Focusing on what is best not just for us as individuals, but for the
-  overall community
+  overall community.
 
-Examples of unacceptable behavior:
+Examples of unacceptable behaviour:
 
-- The use of sexualized language or imagery, and sexual attention or advances
-- Trolling, insulting or derogatory comments, and personal or political attacks
-- Public or private harassment
-- Publishing others' private information, such as a physical or email address,
-  without their explicit permission
+- The use of sexualised language or imagery, and sexual attention or advances.
+- Trolling, insulting or derogatory comments, and personal or political
+  attacks.
+- Public or private harassment.
+- Publishing others' private information, such as a physical or email
+  address, without their explicit permission.
 - Other conduct which could reasonably be considered inappropriate in a
-  professional setting
+  professional setting.
 
 ## Enforcement
 
 Community leaders are responsible for clarifying and enforcing our standards
-of acceptable behavior, and will take appropriate and fair corrective action
-in response to any behavior they deem inappropriate, threatening, offensive,
-or harmful.
+of acceptable behaviour, and will take appropriate and fair corrective
+action in response to any behaviour they deem inappropriate, threatening,
+offensive, or harmful.
 
-Instances of abusive, harassing, or otherwise unacceptable behavior may be
-reported via the contact in `SECURITY.md` or by opening a confidential GitHub
-Security Advisory. All complaints will be reviewed and investigated promptly
-and fairly.
+Instances of abusive, harassing, or otherwise unacceptable behaviour may be
+reported to the maintainers via the contact in `SECURITY.md` or by opening a
+confidential GitHub Security Advisory at
+<https://github.com/GrayCodeAI/hawk-sdk-python/security/advisories>. All
+complaints will be reviewed and investigated promptly and fairly.
 
 All community leaders are obligated to respect the privacy and security of
 the reporter of any incident.
@@ -53,3 +55,6 @@ the reporter of any incident.
 
 This Code of Conduct is adapted from the
 [Contributor Covenant, version 2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html).
+
+For answers to common questions about this code of conduct, see the
+Contributor Covenant FAQ at <https://www.contributor-covenant.org/faq>.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4c94edc..5fba294 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,101 +1,114 @@
 # Contributing to hawk-sdk-python
 
-Thanks for considering a contribution. `hawk-sdk` is the Python client for
-the hawk daemon API. It is built for **solo developers** running their own
-hawk daemon locally — small surface area, async-first, fully type-checked.
+Thanks for your interest! This guide covers the conventions used across the
+hawk-eco. The eco-wide standards (versioning, release tooling, repo layout)
+are defined in <https://github.com/GrayCodeAI/hawk/blob/main/VERSIONING.md>.
 
 ## Quick start
 
-```bash
-git clone https://github.com/GrayCodeAI/hawk-sdk-python.git
-cd hawk-sdk-python
-python -m venv .venv && source .venv/bin/activate
-pip install -e ".[dev]"
-make test       # pytest, asyncio_mode=auto, strict markers
-make lint       # ruff check
-make typecheck  # mypy --strict on src/
-```
-
-Python 3.9+ is supported. CI tests against 3.9 / 3.10 / 3.11 / 3.12 / 3.13.
+1. Fork the repo and create a feature branch off `main`:
+   ```bash
+   git checkout -b feat/short-description
+   ```
+2. Make your changes in small, focused commits.
+3. Run the full local check before pushing:
+   ```bash
+   make ci
+   ```
+4. Open a pull request. CI will re-run the same checks plus security
+   scanning, race-detector tests, and (where applicable) integration tests.
+
+## Build & test
+
+This repo uses the standardised hawk-eco Makefile targets. Run `make help`
+for the full list. The most common targets:
+
+| Target              | What it does                                     |
+| ------------------- | ------------------------------------------------ |
+| `make build`        | Build the binary / verify the library compiles  |
+| `make test`         | Run unit tests                                   |
+| `make test-race`    | Run unit tests with the race detector            |
+| `make cover`        | Generate a coverage report                       |
+| `make lint`         | Run the linter (`golangci-lint` / `ruff`)        |
+| `make fmt`          | Format source files                              |
+| `make vet`          | Run `go vet` / `mypy`                            |
+| `make security`     | Run `govulncheck` / `pip-audit`                  |
+| `make ci`           | Run everything CI runs (the gate before pushing) |
+
+## Commit message convention
+
+We use [Conventional Commits](https://www.conventionalcommits.org/). This
+isn't cosmetic — release-please reads commit messages to bump the `VERSION`
+file and generate the CHANGELOG, so getting them right matters.
 
-## Branch flow
+```
+<type>(<optional scope>): <short summary>
 
-This repo does **not** have a `dev` branch. Branch from `main`:
+<optional body>
 
-```bash
-git checkout main
-git pull origin main
-git checkout -b feat/<short-description>
+<optional footer(s)>
 ```
 
-Open the PR against `main`. Do **not** push directly to `main`.
+**Types:**
 
-One PR per logical change. Do not mix unrelated changes in a single PR.
+- `feat:` — a new feature (triggers a minor version bump)
+- `fix:` — a bug fix (triggers a patch version bump)
+- `perf:` — performance improvement
+- `refactor:` — code restructure with no behaviour change
+- `docs:` — documentation only
+- `test:` — adding or fixing tests
+- `build:` — build system or dependencies
+- `ci:` — CI configuration
+- `chore:` — anything else (no release effect)
+- `revert:` — reverts a previous commit
 
-## Commit messages
+**Breaking changes:** add `!` after the type/scope or include `BREAKING
+CHANGE:` in the footer. This triggers a major version bump.
 
-Use [Conventional Commits](https://www.conventionalcommits.org/):
+Examples:
 
 ```
-feat(client): add streaming back-pressure
-fix(retry): respect Retry-After when value is a date string
-perf(stream): avoid reparsing SSE event prefix
-docs(readme): document async usage
-test(retry): add coverage for context cancellation during backoff
+feat(client): add streaming retry with exponential backoff
+fix: handle empty response body in chat handler
+refactor!: rename ClientV1 to Client (BREAKING CHANGE)
 ```
 
-Allowed types: `feat`, `fix`, `perf`, `refactor`, `test`, `docs`, `chore`,
-`build`, `ci`, `style`. Add a scope when it clarifies the change. Do not add
-`Co-authored-by:` trailers — this is solo-developer work.
-
-## Code standards
-
-- **Type hints everywhere.** `mypy --strict` must pass on `src/`.
-- **`ruff check .` must be clean.** Use `# noqa: <code>` only with a
-  one-line justification on the same line.
-- **Async-first.** Every public client method has an async variant.
-- **Pydantic v2 for wire shapes.** Every request and response model
-  inherits from `pydantic.BaseModel` and uses `model_validate` /
-  `model_dump`.
-- **Context managers for resource cleanup.** All clients support `with`
-  / `async with`.
-- **Rich error hierarchy.** Map daemon error responses to the typed
-  exceptions in `errors.py` via `parse_error()`.
-- **Set `User-Agent: hawk-sdk-python/<__version__>`** on every new
-  outbound HTTP request via the `_build_headers()` helper. Tests should
-  not assert on the exact User-Agent string (it changes with version).
-
-## Bumping the SDK version
-
-The version lives in **two** places — `pyproject.toml` (build metadata)
-and `src/hawk/_version.py` (runtime `__version__`). When bumping:
-
-1. Edit both files to the new version.
-2. Add a `## [X.Y.Z] — YYYY-MM-DD` entry at the top of `CHANGELOG.md`.
-3. Tag the release: `git tag vX.Y.Z && git push origin vX.Y.Z`.
-
-This SDK adheres to [SemVer](https://semver.org/spec/v2.0.0.html).
-Breaking changes to the daemon API or the SDK surface bump the major
-version. New SDK methods or daemon endpoints bump the minor. Bug fixes
-and internal improvements bump the patch.
-
-## Testing
-
-```bash
-make test           # full pytest suite
-make test-coverage  # coverage totals
-```
+## Pull request checklist
+
+Before requesting review:
+
+- [ ] `make ci` passes locally.
+- [ ] New behaviour has tests; bug fixes have a regression test.
+- [ ] `CHANGELOG.md` entries are **not** edited manually — release-please
+      generates them from your commit messages.
+- [ ] The `VERSION` file is **not** edited manually — release-please bumps
+      it on release.
+- [ ] Public API changes have updated doc comments.
+- [ ] No secrets, API keys, or PII in code, comments, tests, or fixtures.
+
+## Code review etiquette
+
+- Reviewers focus on correctness, design, and tests; formatting is
+  enforced by tooling, not humans.
+- Authors respond to every comment (resolved, addressed, or politely
+  declined with rationale) — no silent dismissals.
+- Squash-merge by default; the PR title becomes the commit (so it must
+  be a valid Conventional Commit message).
+- One approving review from a CODEOWNERS-listed reviewer is required.
+
+## Reporting bugs
+
+Open an issue using the bug-report template. Include the `hawk-sdk-python`
+version (`hawk-sdk-python --version` for binaries, `hawk-sdk-python.Version` for
+libraries — see this repo's `VERSION` file), reproduction steps, expected
+behaviour, and actual behaviour.
 
-When adding a new client method, cover: success path, retryable error
-(429 with `Retry-After`), non-retryable error, context cancellation,
-and (for streaming) early `close()` and clean iteration.
+## Reporting security issues
 
-`tests/` uses `respx` to mock httpx — avoid hitting the real network in
-unit tests.
+**Do not open a public issue.** See [SECURITY.md](./SECURITY.md) for
+private reporting channels.
 
-## Reporting bugs / requesting features
+## License
 
-- Bug: open an issue using the bug-report template.
-- Feature: open an issue using the feature-request template.
-- Security: do **not** file a public issue. Use a GitHub Security Advisory
-  per `SECURITY.md`.
+By contributing, you agree that your contributions will be licensed under
+the same license as this repo (see [LICENSE](./LICENSE)).
diff --git a/Makefile b/Makefile
index 9075d1d..5f8f390 100644
--- a/Makefile
+++ b/Makefile
@@ -1,25 +1,97 @@
-.PHONY: all test lint format typecheck clean help
+# Canonical hawk-eco Makefile for Python repos.
+# Source of truth: .shared-templates/Makefile.python.tmpl at the eco root.
+# Placeholders rendered per repo: hawk-sdk.
 
-all: lint typecheck test
+# ---------------------------------------------------------------------------
+# Project metadata
+# ---------------------------------------------------------------------------
+NAME := hawk-sdk
 
-test: ## Run tests
-	python -m pytest
+# ---------------------------------------------------------------------------
+# Versioning — sourced from VERSION file at repo root (single source of
+# truth, also consumed by hatch + release-please).
+# ---------------------------------------------------------------------------
+VERSION ?= $(shell cat VERSION 2>/dev/null | head -n1 | tr -d '[:space:]' || echo "dev")
 
-test-coverage: ## Run tests with coverage
-	python -m pytest --cov=src/hawk --cov-report=term-missing
+PYTHON  ?= python3
+PIP     ?= $(PYTHON) -m pip
 
-lint: ## Run ruff linter
-	ruff check .
+# ---------------------------------------------------------------------------
+# Phony declarations (alphabetical).
+# ---------------------------------------------------------------------------
+.PHONY: all bench build ci clean cover fmt help install lint lint-fix \
+        release security test test-race tidy version vet
 
-format: ## Format code
-	ruff format .
-	ruff check --fix .
+# ---------------------------------------------------------------------------
+# Default target.
+# ---------------------------------------------------------------------------
+all: lint test build ## Default — lint, test, build.
 
-typecheck: ## Run mypy type checker
-	mypy src/
+# ---------------------------------------------------------------------------
+# Build / install / release.
+# ---------------------------------------------------------------------------
+build: ## Build wheel + sdist into dist/.
+	$(PYTHON) -m build
 
-clean: ## Clean artifacts
-	rm -rf dist/ build/ *.egg-info .pytest_cache .mypy_cache .ruff_cache
+install: ## Install in editable mode with dev extras.
+	$(PIP) install -e ".[dev]"
 
-help: ## Show this help
-	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
+release: build ## Upload to PyPI (expects $TWINE_USERNAME / $TWINE_PASSWORD).
+	$(PYTHON) -m twine upload dist/*
+
+# ---------------------------------------------------------------------------
+# Tests.
+# ---------------------------------------------------------------------------
+test: ## Run unit tests.
+	$(PYTHON) -m pytest
+
+test-race: test ## Alias for `test` (Python has no race detector).
+
+cover: ## Run tests with coverage report.
+	$(PYTHON) -m pytest --cov=src --cov-report=term-missing --cov-report=html
+	@echo "Coverage report: htmlcov/index.html"
+
+bench: ## Run benchmarks (requires pytest-benchmark).
+	$(PYTHON) -m pytest --benchmark-only
+
+# ---------------------------------------------------------------------------
+# Quality gates.
+# ---------------------------------------------------------------------------
+fmt: ## Format with ruff.
+	$(PYTHON) -m ruff format .
+
+vet: ## Type-check with mypy.
+	$(PYTHON) -m mypy src/
+
+lint: ## Lint with ruff.
+	$(PYTHON) -m ruff check .
+
+lint-fix: ## Lint with ruff --fix.
+	$(PYTHON) -m ruff check --fix .
+
+security: ## Run pip-audit on resolved dependencies.
+	@command -v pip-audit >/dev/null 2>&1 || (echo "install: pip install pip-audit" && exit 1)
+	pip-audit
+
+tidy: ## No-op for Python (lockfile management is via pyproject.toml).
+	@echo "tidy: nothing to do for Python repos."
+
+# ---------------------------------------------------------------------------
+# Composite gate used by CI and pre-push.
+# ---------------------------------------------------------------------------
+ci: fmt vet lint test security ## Run everything CI runs.
+	@echo "All CI checks passed."
+
+# ---------------------------------------------------------------------------
+# Misc.
+# ---------------------------------------------------------------------------
+version: ## Print the version that will be packaged.
+	@echo "Version: $(VERSION)"
+
+clean: ## Remove build artefacts and caches.
+	rm -rf dist/ build/ *.egg-info htmlcov/ .coverage
+	rm -rf .pytest_cache .mypy_cache .ruff_cache
+	find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
+
+help: ## Show this help.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}'
diff --git a/SECURITY.md b/SECURITY.md
index 3bfe42e..0c39f7a 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,53 +1,71 @@
-# Security policy
+# Security Policy — hawk-sdk-python
 
 ## Supported versions
 
-Only the latest minor version of `hawk-sdk` (Python) receives security
-updates. The current supported version is the most recent `0.x` release on
-PyPI.
+We support the latest minor version on each `0.x` line, and the latest two
+minor versions once `1.x` ships. Older versions receive critical-severity
+fixes only on a best-effort basis.
+
+The current canonical version is the contents of the [`VERSION`](./VERSION)
+file at the repo root. See [`VERSIONING.md`](https://github.com/GrayCodeAI/hawk/blob/main/VERSIONING.md)
+for the eco-wide versioning scheme.
 
 ## Reporting a vulnerability
 
-Please **do not** file a public GitHub issue for security vulnerabilities.
+**Do not open a public GitHub issue for security vulnerabilities.** Instead:
+
+1. Open a private [GitHub Security Advisory](https://github.com/GrayCodeAI/hawk-sdk-python/security/advisories/new), **or**
+2. Email `security@graycode.ai` with the details below.
+
+Include in your report:
 
-Instead, open a **private** GitHub Security Advisory:
+- A description of the vulnerability and the affected component.
+- Steps to reproduce, ideally with a minimal proof-of-concept.
+- The version (`VERSION` file or git SHA) you tested against.
+- The potential impact and any suggested mitigation.
 
-> https://github.com/GrayCodeAI/hawk-sdk-python/security/advisories/new
+**Response targets:**
 
-Include, where possible:
+- Initial acknowledgement: within **48 hours**.
+- Triage and severity assessment: within **5 business days**.
+- Coordinated fix and disclosure: within **30 days** for high/critical, **90
+  days** for medium/low (per industry-standard responsible disclosure).
 
-- A clear description of the issue and impact.
-- Steps to reproduce (a minimal Python snippet is ideal).
-- The affected `hawk-sdk` version (`hawk.__version__`) and Python version.
-- The hawk daemon version you were targeting, if relevant.
-- Any mitigations or patches you have already explored.
+## Disclosure policy
 
-We aim to respond to advisories within **5 business days** and to release
-a fix within **30 days** for high-severity issues.
+We follow [coordinated vulnerability disclosure](https://en.wikipedia.org/wiki/Coordinated_vulnerability_disclosure):
 
-## What counts as a security issue
+- Reporters receive credit in the advisory and CHANGELOG (unless they opt
+  out).
+- We request that reporters refrain from public disclosure until a fix has
+  been released or the disclosure deadline above has elapsed.
+- We will not pursue legal action against good-faith researchers acting
+  within this policy.
 
-Examples of in-scope issues:
+## Security practices in this repo
 
-- The SDK leaking secrets, API tokens, or session IDs into logs, errors,
-  or metrics.
-- The SDK accepting and forwarding data that bypasses daemon-side
-  authentication, authorization, or rate limiting.
-- TLS misuse — accepting untrusted certificates, downgrade to HTTP, or
-  ignoring proxy configuration.
-- Pydantic-model deserialization issues that lead to memory exhaustion or
-  arbitrary code execution on attacker-controlled input.
-- Path / URL handling that lets a malicious daemon URL escape the
-  expected host (e.g. via redirects).
+- **Dependency monitoring:** automated via Dependabot (see
+  `.github/dependabot.yml`).
+- **Static analysis:** `golangci-lint` / `ruff` / `mypy` enforced in CI.
+- **Vulnerability scanning:** `govulncheck` (Go) / `pip-audit` (Python) run
+  on every CI build.
+- **Lockfiles:** `go.sum` / `pnpm-lock.yaml` / `pyproject.toml` are pinned
+  and committed.
+- **Reproducible builds:** release artefacts ship with SHA-256 checksums via
+  goreleaser.
+- **No secrets in source:** API keys are configuration, not constants. Pre-
+  commit hooks block accidental secret commits.
 
-Out of scope:
+## Scope
 
-- Issues in the hawk daemon itself — please report those at
-  https://github.com/GrayCodeAI/hawk/security/advisories/new.
-- Issues in third-party Python packages (`httpx`, `pydantic`, etc.) —
-  please report those upstream.
+This policy covers the code in this repository and the release artefacts
+published from it. It does not cover:
 
-## Disclosure
+- Third-party dependencies (report to upstream).
+- LLM provider services that hawk-sdk-python integrates with (report to the
+  provider).
+- Local filesystem misuse where an attacker already has shell access (out of
+  threat model).
 
-Once a fix is released, we will publish the advisory with credit to the
-reporter (unless they request anonymity).
+For hawk-sdk-python-specific threat-model notes, see the README and any docs in
+this repo.
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..0ea3a94
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+0.2.0
diff --git a/pyproject.toml b/pyproject.toml
index 9eabe83..53a5eec 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,10 @@ build-backend = "hatchling.build"
 
 [project]
 name = "hawk-sdk"
-version = "0.2.0"
+# Version is read dynamically from the VERSION file at the repo root —
+# the single source of truth used by release tooling, CI, and the runtime
+# `hawk.__version__` constant.
+dynamic = ["version"]
 description = "Python SDK for the Hawk daemon API"
 readme = "README.md"
 license = "MIT"
@@ -39,8 +42,27 @@ dev = [
     "mypy>=1.0",
 ]
 
+# Read the package version from the repo-root VERSION file at build time.
+# This keeps Python package metadata, runtime `__version__`, and release
+# tooling all in sync from a single source.
+[tool.hatch.version]
+source = "regex"
+path = "VERSION"
+pattern = "^(?P<version>[^\\s]+)"
+
 [tool.hatch.build.targets.wheel]
 packages = ["src/hawk"]
+# Ship the VERSION file inside the package so `_version.py` can read it at
+# runtime even after install.
+force-include = { "VERSION" = "hawk/VERSION" }
+
+[tool.hatch.build.targets.sdist]
+include = [
+    "src/hawk",
+    "VERSION",
+    "README.md",
+    "LICENSE",
+]
 
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
diff --git a/release-please-config.json b/release-please-config.json
new file mode 100644
index 0000000..9343c9a
--- /dev/null
+++ b/release-please-config.json
@@ -0,0 +1,27 @@
+{
+  "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json",
+  "packages": {
+    ".": {
+      "release-type": "python",
+      "package-name": "hawk-sdk-python",
+      "include-v-in-tag": true,
+      "include-component-in-tag": false,
+      "bump-minor-pre-major": true,
+      "bump-patch-for-minor-pre-major": false,
+      "changelog-sections": [
+        { "type": "feat",     "section": "Features" },
+        { "type": "fix",      "section": "Bug Fixes" },
+        { "type": "perf",     "section": "Performance" },
+        { "type": "refactor", "section": "Refactoring" },
+        { "type": "revert",   "section": "Reverts" },
+        { "type": "docs",     "section": "Documentation", "hidden": false },
+        { "type": "test",     "section": "Tests",         "hidden": false },
+        { "type": "build",    "section": "Build",         "hidden": true },
+        { "type": "ci",       "section": "CI",            "hidden": true },
+        { "type": "chore",    "section": "Chores",        "hidden": true },
+        { "type": "style",    "section": "Style",         "hidden": true }
+      ],
+      "extra-files": [{"type":"version-txt","path":"VERSION"}]
+    }
+  }
+}
diff --git a/src/hawk/_version.py b/src/hawk/_version.py
index b57e19e..b4dcf7e 100644
--- a/src/hawk/_version.py
+++ b/src/hawk/_version.py
@@ -1,3 +1,28 @@
-"""Version information for hawk-sdk."""
+"""Version information for hawk-sdk.
 
-__version__ = "0.2.0"
+The version is read from the VERSION file at the repo root, which is the
+single source of truth used by release tooling, CI, and package metadata.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+
+def _read_version() -> str:
+    """Read the canonical version from the VERSION file at the repo root.
+
+    Walks up from this file until a VERSION file is found, so the lookup works
+    both during development (running from a clone) and after install (where the
+    VERSION file is shipped as package data).
+    """
+    here = Path(__file__).resolve()
+    for parent in [here.parent, *here.parents]:
+        candidate = parent / "VERSION"
+        if candidate.is_file():
+            return candidate.read_text(encoding="utf-8").strip()
+    # Fallback: should not happen in a properly built/installed package.
+    return "0.0.0+unknown"
+
+
+__version__ = _read_version()

From 076d83d5bfc7bce0c993026eb6339b2bdd9cffd2 Mon Sep 17 00:00:00 2001
From: Patel230 <Lakshmanp230@gmail.com>
Date: Fri, 15 May 2026 16:14:27 +0530
Subject: [PATCH 4/4] =?UTF-8?q?feat(hawk-sdk-python):=20adopt=20agentscope?=
 =?UTF-8?q?=20patterns=20=E2=80=94=20toolkit,=20plan,=20tracing,=20eval,?=
 =?UTF-8?q?=20discovery,=20memory?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add 6 new modules inspired by agentscope-ai/agentscope:
- toolkit: tool groups, middleware chain, async background execution
- plan: plan-as-tools with contextual hints for autonomous steering
- tracing: OTel decorator-based tracing (zero-cost when disabled)
- evaluate: agent benchmarking framework with N-run aggregation
- discovery: A2A agent discovery (file, HTTP well-known, composite)
- memory_tools: voluntary record/retrieve/forget as agent tools
---
 src/hawk/__init__.py     |  55 ++++++
 src/hawk/discovery.py    | 194 ++++++++++++++++++++
 src/hawk/evaluate.py     | 204 +++++++++++++++++++++
 src/hawk/memory_tools.py | 132 ++++++++++++++
 src/hawk/plan.py         | 384 +++++++++++++++++++++++++++++++++++++++
 src/hawk/toolkit.py      | 315 ++++++++++++++++++++++++++++++++
 src/hawk/tracing.py      | 307 +++++++++++++++++++++++++++++++
 7 files changed, 1591 insertions(+)
 create mode 100644 src/hawk/discovery.py
 create mode 100644 src/hawk/evaluate.py
 create mode 100644 src/hawk/memory_tools.py
 create mode 100644 src/hawk/plan.py
 create mode 100644 src/hawk/toolkit.py
 create mode 100644 src/hawk/tracing.py

diff --git a/src/hawk/__init__.py b/src/hawk/__init__.py
index a374247..136f0ff 100644
--- a/src/hawk/__init__.py
+++ b/src/hawk/__init__.py
@@ -13,9 +13,19 @@
     RateLimitError,
     ServiceUnavailableError,
 )
+from .plan import Plan, PlanNotebook, SubTask, SubTaskState
 from .retry import DEFAULT_RETRY_CONFIG, RetryConfig
 from .streaming import AsyncStreamReader, StreamReader
 from .tools import Tool, chat_with_tools, chat_with_tools_async, tool
+from .toolkit import BackgroundTask, ToolGroup, Toolkit
+from .tracing import (
+    configure_tracing,
+    detect_provider,
+    is_tracing_enabled,
+    trace,
+    trace_chat,
+    trace_tool,
+)
 from .types import (
     ChatRequest,
     ChatResponse,
@@ -31,6 +41,21 @@
     ToolCall,
     Usage,
 )
+from .discovery import (
+    AgentCard,
+    AgentResolver,
+    CompositeResolver,
+    FileResolver,
+    WellKnownResolver,
+)
+from .evaluate import (
+    BenchmarkResults,
+    EvalResult,
+    EvalTask,
+    run_benchmark,
+    run_benchmark_async,
+)
+from .memory_tools import MemoryTools
 from .workflow import AsyncWorkflow, Workflow
 
 __all__ = [
@@ -51,6 +76,15 @@
     "tool",
     "chat_with_tools",
     "chat_with_tools_async",
+    # Plan
+    "PlanNotebook",
+    "Plan",
+    "SubTask",
+    "SubTaskState",
+    # Toolkit
+    "Toolkit",
+    "ToolGroup",
+    "BackgroundTask",
     # Workflow
     "Workflow",
     "AsyncWorkflow",
@@ -71,6 +105,13 @@
     "StreamEventType",
     "ToolCall",
     "Usage",
+    # Tracing
+    "configure_tracing",
+    "detect_provider",
+    "is_tracing_enabled",
+    "trace",
+    "trace_chat",
+    "trace_tool",
     # Errors
     "HawkAPIError",
     "BadRequestError",
@@ -80,4 +121,18 @@
     "RateLimitError",
     "InternalServerError",
     "ServiceUnavailableError",
+    # Evaluate
+    "EvalTask",
+    "EvalResult",
+    "BenchmarkResults",
+    "run_benchmark",
+    "run_benchmark_async",
+    # Discovery
+    "AgentCard",
+    "AgentResolver",
+    "FileResolver",
+    "WellKnownResolver",
+    "CompositeResolver",
+    # Memory
+    "MemoryTools",
 ]
diff --git a/src/hawk/discovery.py b/src/hawk/discovery.py
new file mode 100644
index 0000000..346f668
--- /dev/null
+++ b/src/hawk/discovery.py
@@ -0,0 +1,194 @@
+"""Agent-to-Agent discovery protocol for Hawk.
+
+Enables agents to discover and communicate with other agents via
+multiple resolution strategies (HTTP well-known, file-based, registry).
+
+Usage:
+    from hawk.discovery import AgentCard, WellKnownResolver, FileResolver
+
+    resolver = WellKnownResolver()
+    card = await resolver.resolve("assistant-agent")
+    # card.endpoint -> "http://localhost:8080/v1/chat"
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from dataclasses import dataclass, field
+from typing import Any, Optional, Protocol
+
+
+@dataclass
+class AgentCard:
+    """Describes a discoverable agent's capabilities and endpoint."""
+    name: str
+    endpoint: str
+    description: str = ""
+    capabilities: list[str] = field(default_factory=list)
+    version: str = "1.0"
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "name": self.name,
+            "endpoint": self.endpoint,
+            "description": self.description,
+            "capabilities": self.capabilities,
+            "version": self.version,
+            "metadata": self.metadata,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "AgentCard":
+        return cls(
+            name=data["name"],
+            endpoint=data["endpoint"],
+            description=data.get("description", ""),
+            capabilities=data.get("capabilities", []),
+            version=data.get("version", "1.0"),
+            metadata=data.get("metadata", {}),
+        )
+
+
+class AgentResolver(Protocol):
+    """Protocol for agent discovery resolvers."""
+
+    async def resolve(self, agent_name: str) -> Optional[AgentCard]:
+        """Resolve an agent name to its card."""
+        ...
+
+    async def list_agents(self) -> list[AgentCard]:
+        """List all known agents."""
+        ...
+
+    async def register(self, card: AgentCard) -> None:
+        """Register an agent card."""
+        ...
+
+
+class FileResolver:
+    """File-based agent discovery for local development.
+
+    Reads agent cards from a JSON file.
+
+    Usage:
+        resolver = FileResolver("/path/to/agents.json")
+        card = await resolver.resolve("my-agent")
+    """
+
+    def __init__(self, path: str = ".hawk/agents.json") -> None:
+        self._path = path
+        self._cards: dict[str, AgentCard] = {}
+        self._load()
+
+    def _load(self) -> None:
+        if os.path.exists(self._path):
+            with open(self._path) as f:
+                data = json.load(f)
+            for entry in data.get("agents", []):
+                card = AgentCard.from_dict(entry)
+                self._cards[card.name] = card
+
+    def _save(self) -> None:
+        os.makedirs(os.path.dirname(self._path) or ".", exist_ok=True)
+        data = {"agents": [c.to_dict() for c in self._cards.values()]}
+        with open(self._path, "w") as f:
+            json.dump(data, f, indent=2)
+
+    async def resolve(self, agent_name: str) -> Optional[AgentCard]:
+        return self._cards.get(agent_name)
+
+    async def list_agents(self) -> list[AgentCard]:
+        return list(self._cards.values())
+
+    async def register(self, card: AgentCard) -> None:
+        self._cards[card.name] = card
+        self._save()
+
+
+class WellKnownResolver:
+    """HTTP-based agent discovery via well-known URLs.
+
+    Discovers agents by fetching {base_url}/.well-known/agent.json
+
+    Usage:
+        resolver = WellKnownResolver(["http://localhost:8080", "http://agent2:8080"])
+        card = await resolver.resolve("assistant")
+    """
+
+    def __init__(self, base_urls: Optional[list[str]] = None) -> None:
+        self._base_urls = base_urls or []
+        self._cache: dict[str, AgentCard] = {}
+
+    async def resolve(self, agent_name: str) -> Optional[AgentCard]:
+        if agent_name in self._cache:
+            return self._cache[agent_name]
+
+        for url in self._base_urls:
+            card = await self._fetch_card(url)
+            if card and card.name == agent_name:
+                self._cache[agent_name] = card
+                return card
+        return None
+
+    async def list_agents(self) -> list[AgentCard]:
+        cards = []
+        for url in self._base_urls:
+            card = await self._fetch_card(url)
+            if card:
+                cards.append(card)
+        return cards
+
+    async def register(self, card: AgentCard) -> None:
+        self._cache[card.name] = card
+        if card.endpoint not in self._base_urls:
+            self._base_urls.append(card.endpoint)
+
+    async def _fetch_card(self, base_url: str) -> Optional[AgentCard]:
+        try:
+            import httpx
+            url = f"{base_url.rstrip('/')}/.well-known/agent.json"
+            async with httpx.AsyncClient(timeout=5.0) as client:
+                resp = await client.get(url)
+                if resp.status_code == 200:
+                    return AgentCard.from_dict(resp.json())
+        except Exception:
+            pass
+        return None
+
+
+class CompositeResolver:
+    """Chains multiple resolvers, returning the first match.
+
+    Usage:
+        resolver = CompositeResolver([
+            FileResolver(".hawk/agents.json"),
+            WellKnownResolver(["http://localhost:8080"]),
+        ])
+        card = await resolver.resolve("my-agent")
+    """
+
+    def __init__(self, resolvers: list[Any]) -> None:
+        self._resolvers = resolvers
+
+    async def resolve(self, agent_name: str) -> Optional[AgentCard]:
+        for resolver in self._resolvers:
+            card = await resolver.resolve(agent_name)
+            if card:
+                return card
+        return None
+
+    async def list_agents(self) -> list[AgentCard]:
+        seen = set()
+        cards = []
+        for resolver in self._resolvers:
+            for card in await resolver.list_agents():
+                if card.name not in seen:
+                    seen.add(card.name)
+                    cards.append(card)
+        return cards
+
+    async def register(self, card: AgentCard) -> None:
+        if self._resolvers:
+            await self._resolvers[0].register(card)
diff --git a/src/hawk/evaluate.py b/src/hawk/evaluate.py
new file mode 100644
index 0000000..574a48d
--- /dev/null
+++ b/src/hawk/evaluate.py
@@ -0,0 +1,204 @@
+"""Agent evaluation framework for systematic benchmarking.
+
+Defines tasks with metrics, runs agents N times, aggregates results
+with statistics. Inspired by agentscope's evaluation module.
+
+Usage:
+    from hawk.evaluate import Benchmark, EvalTask, run_benchmark
+
+    tasks = [
+        EvalTask(
+            name="weather-lookup",
+            prompt="What's the weather in NYC?",
+            expected_tools=["get_weather"],
+            validate=lambda r: "temperature" in r.response,
+        ),
+    ]
+    results = run_benchmark(agent, tasks, runs=3)
+    print(results.summary())
+"""
+
+from __future__ import annotations
+
+import time
+import statistics
+from dataclasses import dataclass, field
+from typing import Any, Callable, Optional
+
+
+@dataclass
+class EvalTask:
+    """A single evaluation task."""
+    name: str
+    prompt: str
+    category: str = "general"
+    expected_tools: list[str] = field(default_factory=list)
+    validate: Optional[Callable[[Any], bool]] = None
+    max_turns: int = 10
+
+
+@dataclass
+class EvalResult:
+    """Result of a single evaluation run."""
+    task_name: str
+    success: bool
+    duration_ms: float
+    tokens_in: int = 0
+    tokens_out: int = 0
+    turns_taken: int = 0
+    error: Optional[str] = None
+
+
+@dataclass
+class BenchmarkResults:
+    """Aggregated benchmark results."""
+    results: list[EvalResult] = field(default_factory=list)
+
+    @property
+    def total_tasks(self) -> int:
+        return len(self.results)
+
+    @property
+    def passed(self) -> int:
+        return sum(1 for r in self.results if r.success)
+
+    @property
+    def failed(self) -> int:
+        return self.total_tasks - self.passed
+
+    @property
+    def pass_rate(self) -> float:
+        return self.passed / max(self.total_tasks, 1)
+
+    @property
+    def avg_duration_ms(self) -> float:
+        durations = [r.duration_ms for r in self.results]
+        return statistics.mean(durations) if durations else 0.0
+
+    @property
+    def total_tokens(self) -> int:
+        return sum(r.tokens_in + r.tokens_out for r in self.results)
+
+    def by_category(self) -> dict[str, list[EvalResult]]:
+        cats: dict[str, list[EvalResult]] = {}
+        for r in self.results:
+            cat = r.task_name.split("/")[0] if "/" in r.task_name else "general"
+            cats.setdefault(cat, []).append(r)
+        return cats
+
+    def summary(self) -> str:
+        lines = [
+            f"Benchmark Results: {self.passed}/{self.total_tasks} passed ({self.pass_rate:.0%})",
+            f"Avg duration: {self.avg_duration_ms:.0f}ms",
+            f"Total tokens: {self.total_tokens}",
+        ]
+        if self.failed > 0:
+            failures = [r for r in self.results if not r.success]
+            lines.append(f"Failures:")
+            for f in failures[:10]:
+                lines.append(f"  - {f.task_name}: {f.error or 'validation failed'}")
+        return "\n".join(lines)
+
+
+def run_benchmark(
+    agent: Any,
+    tasks: list[EvalTask],
+    *,
+    runs: int = 1,
+    reset_between_tasks: bool = True,
+) -> BenchmarkResults:
+    """Run a benchmark suite against an agent.
+
+    Args:
+        agent: A hawk Agent instance with a .chat() method.
+        tasks: List of evaluation tasks.
+        runs: Number of times to run each task.
+        reset_between_tasks: Whether to reset agent state between tasks.
+
+    Returns:
+        Aggregated benchmark results.
+    """
+    results = BenchmarkResults()
+
+    for task in tasks:
+        for run_idx in range(runs):
+            if reset_between_tasks:
+                agent.reset()
+
+            start = time.perf_counter()
+            try:
+                response = agent.chat(task.prompt)
+                duration = (time.perf_counter() - start) * 1000
+
+                success = True
+                error = None
+                if task.validate:
+                    success = task.validate(response)
+                    if not success:
+                        error = "Validation failed"
+
+                results.results.append(EvalResult(
+                    task_name=f"{task.category}/{task.name}" if task.category != "general" else task.name,
+                    success=success,
+                    duration_ms=duration,
+                    tokens_in=getattr(response, "tokens_in", 0),
+                    tokens_out=getattr(response, "tokens_out", 0),
+                    turns_taken=getattr(response, "turns_taken", 0),
+                ))
+            except Exception as e:
+                duration = (time.perf_counter() - start) * 1000
+                results.results.append(EvalResult(
+                    task_name=f"{task.category}/{task.name}" if task.category != "general" else task.name,
+                    success=False,
+                    duration_ms=duration,
+                    error=str(e),
+                ))
+
+    return results
+
+
+async def run_benchmark_async(
+    agent: Any,
+    tasks: list[EvalTask],
+    *,
+    runs: int = 1,
+    reset_between_tasks: bool = True,
+) -> BenchmarkResults:
+    """Async version of run_benchmark."""
+    results = BenchmarkResults()
+
+    for task in tasks:
+        for run_idx in range(runs):
+            if reset_between_tasks:
+                agent.reset()
+
+            start = time.perf_counter()
+            try:
+                response = await agent.chat(task.prompt)
+                duration = (time.perf_counter() - start) * 1000
+
+                success = True
+                error = None
+                if task.validate:
+                    success = task.validate(response)
+                    if not success:
+                        error = "Validation failed"
+
+                results.results.append(EvalResult(
+                    task_name=f"{task.category}/{task.name}" if task.category != "general" else task.name,
+                    success=success,
+                    duration_ms=duration,
+                    tokens_in=getattr(response, "tokens_in", 0),
+                    tokens_out=getattr(response, "tokens_out", 0),
+                    turns_taken=getattr(response, "turns_taken", 0),
+                ))
+            except Exception as e:
+                duration = (time.perf_counter() - start) * 1000
+                results.results.append(EvalResult(
+                    task_name=f"{task.category}/{task.name}" if task.category != "general" else task.name,
+                    success=False,
+                    duration_ms=duration,
+                    error=str(e),
+                ))
+
+    return results
diff --git a/src/hawk/memory_tools.py b/src/hawk/memory_tools.py
new file mode 100644
index 0000000..97be2b5
--- /dev/null
+++ b/src/hawk/memory_tools.py
@@ -0,0 +1,132 @@
+"""Memory-as-voluntary-tools for agent-driven memory management.
+
+Lets agents strategically decide what to remember/recall rather than
+auto-ingesting everything. Wraps yaad's memory API as tool functions.
+
+Usage:
+    from hawk.memory_tools import MemoryTools
+
+    mem = MemoryTools(client)
+    agent_config.tools.extend(mem.get_tools())
+"""
+
+from __future__ import annotations
+
+from typing import Any, Optional
+
+from .tools import Tool
+
+
+class MemoryTools:
+    """Provides record/retrieve memory operations as agent tools.
+
+    The agent can voluntarily decide to:
+    - Record important information for future reference
+    - Retrieve relevant memories for the current task
+    - Forget outdated or incorrect information
+    """
+
+    def __init__(self, client: Any, *, session_id: Optional[str] = None) -> None:
+        self._client = client
+        self._session_id = session_id
+        self._local_memories: list[dict[str, str]] = []
+
+    def record_memory(self, content: str, category: str = "general", importance: str = "normal") -> str:
+        """Record important information to long-term memory."""
+        memory = {
+            "content": content,
+            "category": category,
+            "importance": importance,
+        }
+        self._local_memories.append(memory)
+
+        # If client supports yaad memory API, persist
+        try:
+            if hasattr(self._client, "remember"):
+                self._client.remember(content, session_id=self._session_id)
+                return f"Recorded to persistent memory: '{content[:100]}...'"
+        except Exception:
+            pass
+
+        return f"Recorded to session memory: '{content[:100]}...'"
+
+    def retrieve_memories(self, query: str, limit: int = 5) -> str:
+        """Retrieve relevant memories for the current context."""
+        results = []
+
+        # Try yaad recall
+        try:
+            if hasattr(self._client, "recall"):
+                recalled = self._client.recall(query, limit=limit, session_id=self._session_id)
+                if recalled:
+                    return f"Recalled {len(recalled)} memories:\n" + "\n".join(
+                        f"- {m}" for m in recalled
+                    )
+        except Exception:
+            pass
+
+        # Fallback to local fuzzy match
+        query_lower = query.lower()
+        for mem in self._local_memories:
+            if query_lower in mem["content"].lower():
+                results.append(mem["content"])
+
+        if results:
+            return f"Found {len(results)} relevant memories:\n" + "\n".join(
+                f"- {r}" for r in results[:limit]
+            )
+        return "No relevant memories found."
+
+    def forget_memory(self, content_fragment: str) -> str:
+        """Remove a memory matching the given fragment."""
+        before = len(self._local_memories)
+        self._local_memories = [
+            m for m in self._local_memories
+            if content_fragment.lower() not in m["content"].lower()
+        ]
+        removed = before - len(self._local_memories)
+        return f"Removed {removed} matching memories." if removed else "No matching memories found."
+
+    def get_tools(self) -> list[Tool]:
+        """Return memory operations as Tool objects."""
+        return [
+            Tool(
+                name="record_memory",
+                description="Record important information to long-term memory for future reference. Use for key findings, decisions, or facts you'll need later.",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "content": {"type": "string", "description": "The information to remember"},
+                        "category": {"type": "string", "description": "Category (general/technical/personal/project)", "default": "general"},
+                        "importance": {"type": "string", "description": "Importance level (low/normal/high)", "default": "normal"},
+                    },
+                    "required": ["content"],
+                },
+                fn=self.record_memory,
+            ),
+            Tool(
+                name="retrieve_memories",
+                description="Search long-term memory for relevant information. Use when you need context from previous interactions.",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "query": {"type": "string", "description": "Search query for relevant memories"},
+                        "limit": {"type": "integer", "description": "Max results to return", "default": 5},
+                    },
+                    "required": ["query"],
+                },
+                fn=self.retrieve_memories,
+            ),
+            Tool(
+                name="forget_memory",
+                description="Remove outdated or incorrect information from memory.",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "content_fragment": {"type": "string", "description": "Text fragment to match for deletion"},
+                    },
+                    "required": ["content_fragment"],
+                },
+                fn=self.forget_memory,
+            ),
+        ]
diff --git a/src/hawk/plan.py b/src/hawk/plan.py
new file mode 100644
index 0000000..b2d78f8
--- /dev/null
+++ b/src/hawk/plan.py
@@ -0,0 +1,384 @@
+"""Plan-as-tools module with contextual hints for autonomous agent steering."""
+
+from __future__ import annotations
+
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Any, Callable, Literal, Optional
+
+from .tools import Tool
+
+
+class SubTaskState(str, Enum):
+    TODO = "todo"
+    IN_PROGRESS = "in_progress"
+    DONE = "done"
+    ABANDONED = "abandoned"
+
+
+@dataclass
+class SubTask:
+    name: str
+    description: str
+    expected_outcome: str
+    state: SubTaskState = SubTaskState.TODO
+    outcome: Optional[str] = None
+    created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
+    finished_at: Optional[str] = None
+
+    def finish(self, outcome: str) -> None:
+        self.state = SubTaskState.DONE
+        self.outcome = outcome
+        self.finished_at = datetime.now(timezone.utc).isoformat()
+
+    def to_markdown(self, detailed: bool = False) -> str:
+        status_map = {
+            SubTaskState.TODO: "- [ ]",
+            SubTaskState.IN_PROGRESS: "- [ ] [WIP]",
+            SubTaskState.DONE: "- [x]",
+            SubTaskState.ABANDONED: "- [ ] [Abandoned]",
+        }
+        if not detailed:
+            return f"{status_map[self.state]} {self.name}"
+        lines = [
+            f"{status_map[self.state]} {self.name}",
+            f"    Description: {self.description}",
+            f"    Expected Outcome: {self.expected_outcome}",
+            f"    State: {self.state.value}",
+        ]
+        if self.state == SubTaskState.DONE:
+            lines.append(f"    Outcome: {self.outcome}")
+            lines.append(f"    Finished: {self.finished_at}")
+        return "\n".join(lines)
+
+
+@dataclass
+class Plan:
+    name: str
+    description: str
+    expected_outcome: str
+    subtasks: list[SubTask] = field(default_factory=list)
+    id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
+    state: SubTaskState = SubTaskState.TODO
+    created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
+    outcome: Optional[str] = None
+    finished_at: Optional[str] = None
+
+    def to_markdown(self) -> str:
+        subtasks_md = "\n".join(st.to_markdown() for st in self.subtasks)
+        return (
+            f"# {self.name}\n"
+            f"**Description**: {self.description}\n"
+            f"**Expected Outcome**: {self.expected_outcome}\n"
+            f"**State**: {self.state.value}\n"
+            f"## Subtasks\n{subtasks_md}"
+        )
+
+    def finish(self, state: Literal["done", "abandoned"], outcome: str) -> None:
+        self.state = SubTaskState.DONE if state == "done" else SubTaskState.ABANDONED
+        self.outcome = outcome
+        self.finished_at = datetime.now(timezone.utc).isoformat()
+
+
+class PlanNotebook:
+    """Manages plans as tool-functions, providing contextual hints to steer the agent.
+
+    The plan notebook exposes plan operations as Tools that can be registered
+    with a Toolkit or Agent. After each agent turn, call get_hint() to get
+    a system-level message that guides the agent on what to do next.
+
+    Usage:
+        notebook = PlanNotebook()
+        # Register plan tools with your agent
+        agent_config.tools.extend(notebook.get_tools())
+        # After each turn, inject hint
+        hint = notebook.get_hint()
+        if hint:
+            # Prepend to next message or inject as system context
+            ...
+    """
+
+    def __init__(self, max_subtasks: int = 20) -> None:
+        self._current_plan: Optional[Plan] = None
+        self._history: list[Plan] = []
+        self._max_subtasks = max_subtasks
+        self._on_change: list[Callable[[Optional[Plan]], None]] = []
+
+    @property
+    def current_plan(self) -> Optional[Plan]:
+        return self._current_plan
+
+    def on_plan_change(self, callback: Callable[[Optional[Plan]], None]) -> None:
+        self._on_change.append(callback)
+
+    def _notify(self) -> None:
+        for cb in self._on_change:
+            cb(self._current_plan)
+
+    def create_plan(
+        self, name: str, description: str, expected_outcome: str, subtasks: list[dict[str, str]]
+    ) -> str:
+        """Create a new plan with subtasks. Replaces current plan if one exists."""
+        tasks = [
+            SubTask(
+                name=st["name"],
+                description=st["description"],
+                expected_outcome=st["expected_outcome"],
+            )
+            for st in subtasks[:self._max_subtasks]
+        ]
+        if self._current_plan and self._current_plan.state not in (SubTaskState.DONE, SubTaskState.ABANDONED):
+            self._current_plan.finish("abandoned", "Replaced by new plan")
+            self._history.append(self._current_plan)
+
+        self._current_plan = Plan(
+            name=name, description=description,
+            expected_outcome=expected_outcome, subtasks=tasks,
+        )
+        self._notify()
+        return f"Plan '{name}' created with {len(tasks)} subtasks."
+
+    def update_subtask_state(self, subtask_idx: int, state: str) -> str:
+        """Update a subtask's state to todo, in_progress, or abandoned."""
+        if not self._current_plan:
+            return "Error: No active plan. Create one first."
+        if not 0 <= subtask_idx < len(self._current_plan.subtasks):
+            return f"Error: Invalid index {subtask_idx}."
+        if state not in ("todo", "in_progress", "abandoned"):
+            return f"Error: Invalid state '{state}'."
+
+        if state == "in_progress":
+            for i, st in enumerate(self._current_plan.subtasks):
+                if st.state == SubTaskState.IN_PROGRESS:
+                    return f"Error: Subtask {i} ('{st.name}') already in progress. Finish it first."
+                if i < subtask_idx and st.state not in (SubTaskState.DONE, SubTaskState.ABANDONED):
+                    return f"Error: Previous subtask {i} ('{st.name}') not done yet."
+
+        self._current_plan.subtasks[subtask_idx].state = SubTaskState(state)
+        if self._current_plan.state == SubTaskState.TODO:
+            self._current_plan.state = SubTaskState.IN_PROGRESS
+        self._notify()
+        return f"Subtask {subtask_idx} marked as '{state}'."
+
+    def finish_subtask(self, subtask_idx: int, outcome: str) -> str:
+        """Mark a subtask as done with its specific outcome."""
+        if not self._current_plan:
+            return "Error: No active plan."
+        if not 0 <= subtask_idx < len(self._current_plan.subtasks):
+            return f"Error: Invalid index {subtask_idx}."
+
+        self._current_plan.subtasks[subtask_idx].finish(outcome)
+
+        # Auto-activate next subtask
+        if subtask_idx + 1 < len(self._current_plan.subtasks):
+            self._current_plan.subtasks[subtask_idx + 1].state = SubTaskState.IN_PROGRESS
+            next_name = self._current_plan.subtasks[subtask_idx + 1].name
+            self._notify()
+            return f"Subtask {subtask_idx} done. Next: '{next_name}' (now in_progress)."
+
+        self._notify()
+        return f"Subtask {subtask_idx} done. All subtasks complete — call finish_plan."
+
+    def revise_plan(self, subtask_idx: int, action: str, subtask: Optional[dict[str, str]] = None) -> str:
+        """Revise plan: add/revise/delete a subtask."""
+        if not self._current_plan:
+            return "Error: No active plan."
+
+        if action == "delete":
+            if 0 <= subtask_idx < len(self._current_plan.subtasks):
+                removed = self._current_plan.subtasks.pop(subtask_idx)
+                self._notify()
+                return f"Deleted subtask '{removed.name}'."
+            return f"Error: Invalid index {subtask_idx}."
+
+        if not subtask:
+            return "Error: subtask required for add/revise."
+
+        new_st = SubTask(
+            name=subtask["name"],
+            description=subtask["description"],
+            expected_outcome=subtask["expected_outcome"],
+        )
+
+        if action == "add":
+            self._current_plan.subtasks.insert(subtask_idx, new_st)
+            self._notify()
+            return f"Added subtask '{new_st.name}' at index {subtask_idx}."
+        elif action == "revise":
+            if 0 <= subtask_idx < len(self._current_plan.subtasks):
+                self._current_plan.subtasks[subtask_idx] = new_st
+                self._notify()
+                return f"Revised subtask at index {subtask_idx}."
+
+        return f"Error: Invalid action '{action}'."
+
+    def finish_plan(self, state: str, outcome: str) -> str:
+        """Finish or abandon the current plan."""
+        if not self._current_plan:
+            return "Error: No active plan."
+        self._current_plan.finish(state, outcome)
+        self._history.append(self._current_plan)
+        self._current_plan = None
+        self._notify()
+        return f"Plan finished as '{state}'."
+
+    def view_history(self) -> str:
+        """View historical plans."""
+        if not self._history:
+            return "No historical plans."
+        lines = []
+        for p in self._history:
+            lines.append(f"- {p.name} (id={p.id}, state={p.state.value})")
+        return "\n".join(lines)
+
+    def get_hint(self) -> Optional[str]:
+        """Generate a contextual hint based on current plan state.
+
+        Returns a string to inject as system context, or None if no guidance needed.
+        """
+        if self._current_plan is None:
+            return (
+                "<system-hint>If the task is complex or multi-step, create a plan "
+                "by calling 'create_plan'. Otherwise proceed directly.</system-hint>"
+            )
+
+        plan = self._current_plan
+        n_done = sum(1 for st in plan.subtasks if st.state == SubTaskState.DONE)
+        n_abandoned = sum(1 for st in plan.subtasks if st.state == SubTaskState.ABANDONED)
+        in_progress = None
+        in_progress_idx = None
+
+        for i, st in enumerate(plan.subtasks):
+            if st.state == SubTaskState.IN_PROGRESS:
+                in_progress = st
+                in_progress_idx = i
+                break
+
+        plan_md = plan.to_markdown()
+
+        if n_done == 0 and in_progress is None:
+            return (
+                f"<system-hint>Current plan:\n```\n{plan_md}\n```\n"
+                "Start by marking the first subtask as 'in_progress' via "
+                "update_subtask_state(0, 'in_progress'), then execute it.</system-hint>"
+            )
+
+        if in_progress and in_progress_idx is not None:
+            return (
+                f"<system-hint>Current plan:\n```\n{plan_md}\n```\n"
+                f"Subtask {in_progress_idx} ('{in_progress.name}') is in progress.\n"
+                f"Details: {in_progress.description}\n"
+                f"Expected outcome: {in_progress.expected_outcome}\n"
+                "Execute it, then call finish_subtask with the specific outcome.</system-hint>"
+            )
+
+        if n_done + n_abandoned == len(plan.subtasks):
+            return (
+                f"<system-hint>Current plan:\n```\n{plan_md}\n```\n"
+                "All subtasks complete. Call finish_plan('done', outcome) to wrap up.</system-hint>"
+            )
+
+        # Some done but nothing in progress
+        return (
+            f"<system-hint>Current plan:\n```\n{plan_md}\n```\n"
+            f"{n_done} subtasks done. Mark the next as 'in_progress' and execute it.</system-hint>"
+        )
+
+    def get_tools(self) -> list[Tool]:
+        """Return plan operations as Tool objects for agent registration."""
+        return [
+            Tool(
+                name="create_plan",
+                description="Create a plan with sequential subtasks for complex tasks",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string", "description": "Plan name (concise, <10 words)"},
+                        "description": {"type": "string", "description": "Plan description with constraints and goals"},
+                        "expected_outcome": {"type": "string", "description": "Specific measurable outcome"},
+                        "subtasks": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "name": {"type": "string"},
+                                    "description": {"type": "string"},
+                                    "expected_outcome": {"type": "string"},
+                                },
+                                "required": ["name", "description", "expected_outcome"],
+                            },
+                        },
+                    },
+                    "required": ["name", "description", "expected_outcome", "subtasks"],
+                },
+                fn=self.create_plan,
+            ),
+            Tool(
+                name="update_subtask_state",
+                description="Update a subtask's state (todo/in_progress/abandoned)",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "subtask_idx": {"type": "integer", "description": "Subtask index (0-based)"},
+                        "state": {"type": "string", "enum": ["todo", "in_progress", "abandoned"]},
+                    },
+                    "required": ["subtask_idx", "state"],
+                },
+                fn=self.update_subtask_state,
+            ),
+            Tool(
+                name="finish_subtask",
+                description="Mark subtask as done with specific outcome (not 'I did X' but the actual result)",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "subtask_idx": {"type": "integer", "description": "Subtask index (0-based)"},
+                        "outcome": {"type": "string", "description": "Specific outcome data/result"},
+                    },
+                    "required": ["subtask_idx", "outcome"],
+                },
+                fn=self.finish_subtask,
+            ),
+            Tool(
+                name="revise_plan",
+                description="Revise current plan: add/revise/delete a subtask",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "subtask_idx": {"type": "integer"},
+                        "action": {"type": "string", "enum": ["add", "revise", "delete"]},
+                        "subtask": {
+                            "type": "object",
+                            "properties": {
+                                "name": {"type": "string"},
+                                "description": {"type": "string"},
+                                "expected_outcome": {"type": "string"},
+                            },
+                        },
+                    },
+                    "required": ["subtask_idx", "action"],
+                },
+                fn=self.revise_plan,
+            ),
+            Tool(
+                name="finish_plan",
+                description="Finish or abandon the current plan with outcome/reason",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "state": {"type": "string", "enum": ["done", "abandoned"]},
+                        "outcome": {"type": "string", "description": "Outcome or reason"},
+                    },
+                    "required": ["state", "outcome"],
+                },
+                fn=self.finish_plan,
+            ),
+            Tool(
+                name="view_plan_history",
+                description="View historical completed/abandoned plans",
+                parameters={"type": "object", "properties": {}},
+                fn=self.view_history,
+            ),
+        ]
diff --git a/src/hawk/toolkit.py b/src/hawk/toolkit.py
new file mode 100644
index 0000000..019f76c
--- /dev/null
+++ b/src/hawk/toolkit.py
@@ -0,0 +1,315 @@
+"""Advanced toolkit with groups, middleware, and async execution for Hawk SDK."""
+
+from __future__ import annotations
+
+import asyncio
+import inspect
+import json
+import uuid
+from dataclasses import dataclass, field
+from typing import Any, AsyncGenerator, Awaitable, Callable, Literal, Optional
+
+from .tools import Tool
+
+
+@dataclass
+class ToolGroup:
+    name: str
+    description: str
+    active: bool = False
+    notes: Optional[str] = None
+
+
+@dataclass
+class BackgroundTask:
+    id: str
+    tool_name: str
+    task: asyncio.Task
+    result: Optional[str] = None
+    done: bool = False
+    cancelled: bool = False
+
+
+MiddlewareFunc = Callable[[dict[str, Any], Callable[..., Any]], Any]
+PostprocessFunc = Callable[[str, str], str]  # (tool_name, result) -> modified_result
+
+
+class Toolkit:
+    """Advanced toolkit with tool groups, middleware chain, and async execution.
+
+    Features:
+        - Tool groups: organize tools and activate/deactivate groups dynamically
+        - Middleware: onion-model pre/post processing of tool calls
+        - Background execution: long-running tools get a task_id for polling
+        - Postprocess: per-tool output transformation
+
+    Usage:
+        toolkit = Toolkit()
+        toolkit.create_group("web", "Web browsing tools")
+        toolkit.register(my_tool, group="web")
+        toolkit.activate_groups(["web"])
+
+        # Add middleware
+        toolkit.register_middleware(logging_middleware)
+
+        # Get active tools for sending to model
+        active_tools = toolkit.get_active_tools()
+    """
+
+    def __init__(self) -> None:
+        self._tools: dict[str, Tool] = {}
+        self._tool_groups: dict[str, str] = {}  # tool_name -> group_name
+        self._groups: dict[str, ToolGroup] = {
+            "basic": ToolGroup(name="basic", description="Default tool group", active=True)
+        }
+        self._middlewares: list[MiddlewareFunc] = []
+        self._postprocess: dict[str, PostprocessFunc] = {}  # tool_name -> postprocess fn
+        self._background_tasks: dict[str, BackgroundTask] = {}
+        self._async_execution: set[str] = set()  # tool names with async execution enabled
+
+    def create_group(
+        self, name: str, description: str, *, active: bool = False, notes: Optional[str] = None
+    ) -> None:
+        if name in self._groups:
+            raise ValueError(f"Group '{name}' already exists")
+        self._groups[name] = ToolGroup(name=name, description=description, active=active, notes=notes)
+
+    def activate_groups(self, names: list[str]) -> None:
+        for name in names:
+            if name in self._groups:
+                self._groups[name].active = True
+
+    def deactivate_groups(self, names: list[str]) -> None:
+        for name in names:
+            if name in self._groups and name != "basic":
+                self._groups[name].active = False
+
+    def reset_groups(self, active_groups: list[str]) -> str:
+        """Set the absolute state of all groups. Groups not listed are deactivated."""
+        for name, group in self._groups.items():
+            if name == "basic":
+                continue
+            group.active = name in active_groups
+
+        activated = [n for n in active_groups if n in self._groups]
+        notes_parts = []
+        for name in activated:
+            g = self._groups[name]
+            if g.notes:
+                notes_parts.append(f"## {name}\n{g.notes}")
+
+        if not activated:
+            return "All tool groups deactivated."
+        msg = f"Activated: {', '.join(activated)}"
+        if notes_parts:
+            msg += "\n" + "\n".join(notes_parts)
+        return msg
+
+    def register(
+        self,
+        tool: Tool,
+        *,
+        group: str = "basic",
+        postprocess: Optional[PostprocessFunc] = None,
+        async_execution: bool = False,
+        on_conflict: Literal["raise", "override", "skip"] = "raise",
+    ) -> None:
+        if group not in self._groups:
+            raise ValueError(f"Group '{group}' does not exist. Create it first.")
+
+        if tool.name in self._tools:
+            if on_conflict == "raise":
+                raise ValueError(f"Tool '{tool.name}' already registered")
+            elif on_conflict == "skip":
+                return
+            # override falls through
+
+        self._tools[tool.name] = tool
+        self._tool_groups[tool.name] = group
+        if postprocess:
+            self._postprocess[tool.name] = postprocess
+        if async_execution:
+            self._async_execution.add(tool.name)
+
+    def unregister(self, tool_name: str) -> None:
+        self._tools.pop(tool_name, None)
+        self._tool_groups.pop(tool_name, None)
+        self._postprocess.pop(tool_name, None)
+        self._async_execution.discard(tool_name)
+
+    def register_middleware(self, middleware: MiddlewareFunc) -> None:
+        self._middlewares.append(middleware)
+
+    def get_active_tools(self) -> list[Tool]:
+        """Return tools belonging to active groups."""
+        return [
+            t
+            for name, t in self._tools.items()
+            if self._tool_groups.get(name, "basic") == "basic"
+            or self._groups.get(
+                self._tool_groups.get(name, "basic"),
+                ToolGroup(name="", description=""),
+            ).active
+        ]
+
+    def get_tool_schemas(self) -> list[dict[str, Any]]:
+        """Return JSON schemas for active tools."""
+        return [t.to_dict() for t in self.get_active_tools()]
+
+    def execute(self, tool_name: str, arguments: dict[str, Any]) -> str:
+        """Execute a tool with middleware chain and postprocessing."""
+        if tool_name not in self._tools:
+            return json.dumps({"error": f"Tool '{tool_name}' not found"})
+
+        tool = self._tools[tool_name]
+        group_name = self._tool_groups.get(tool_name, "basic")
+        group = self._groups.get(group_name)
+
+        if group and not group.active and group_name != "basic":
+            return json.dumps({"error": f"Tool '{tool_name}' is in inactive group '{group_name}'"})
+
+        # Build context for middleware
+        context = {"tool_name": tool_name, "arguments": arguments, "tool": tool}
+
+        # Apply middleware chain
+        def base_handler(ctx: dict[str, Any]) -> str:
+            t = ctx["tool"]
+            args = ctx["arguments"]
+            if t.fn is None:
+                return json.dumps({"error": f"Tool '{t.name}' has no implementation"})
+            try:
+                result = t.fn(**args)
+                if inspect.isawaitable(result):
+                    raise TypeError(f"Tool '{t.name}' is async. Use execute_async.")
+                return result if isinstance(result, str) else json.dumps(result)
+            except Exception as e:
+                return json.dumps({"error": str(e)})
+
+        handler = base_handler
+        for mw in reversed(self._middlewares):
+            prev_handler = handler
+            handler = lambda ctx, _mw=mw, _prev=prev_handler: _mw(ctx, _prev)
+
+        result = handler(context)
+
+        # Apply postprocess
+        if tool_name in self._postprocess:
+            result = self._postprocess[tool_name](tool_name, result)
+
+        return result
+
+    async def execute_async(self, tool_name: str, arguments: dict[str, Any]) -> str:
+        """Execute a tool asynchronously with middleware and postprocessing."""
+        if tool_name not in self._tools:
+            return json.dumps({"error": f"Tool '{tool_name}' not found"})
+
+        tool = self._tools[tool_name]
+        group_name = self._tool_groups.get(tool_name, "basic")
+        group = self._groups.get(group_name)
+
+        if group and not group.active and group_name != "basic":
+            return json.dumps({"error": f"Tool '{tool_name}' is in inactive group '{group_name}'"})
+
+        # Check if background execution
+        if tool_name in self._async_execution:
+            task_id = str(uuid.uuid4())[:8]
+            task = asyncio.create_task(self._run_background(task_id, tool, arguments))
+            self._background_tasks[task_id] = BackgroundTask(
+                id=task_id, tool_name=tool_name, task=task
+            )
+            return json.dumps({
+                "status": "running",
+                "task_id": task_id,
+                "message": f"Tool '{tool_name}' executing in background. Use view_task('{task_id}') to check status.",
+            })
+
+        context = {"tool_name": tool_name, "arguments": arguments, "tool": tool}
+
+        async def base_handler(ctx: dict[str, Any]) -> str:
+            t = ctx["tool"]
+            args = ctx["arguments"]
+            if t.fn is None:
+                return json.dumps({"error": f"Tool '{t.name}' has no implementation"})
+            try:
+                result = t.fn(**args)
+                if inspect.isawaitable(result):
+                    result = await result
+                return result if isinstance(result, str) else json.dumps(result)
+            except Exception as e:
+                return json.dumps({"error": str(e)})
+
+        # For async, just run directly (middleware can be extended later for async)
+        result = await base_handler(context)
+
+        if tool_name in self._postprocess:
+            result = self._postprocess[tool_name](tool_name, result)
+
+        return result
+
+    async def _run_background(
+        self, task_id: str, tool: Tool, arguments: dict[str, Any]
+    ) -> None:
+        try:
+            if tool.fn is None:
+                result = json.dumps({"error": f"Tool '{tool.name}' has no implementation"})
+            else:
+                res = tool.fn(**arguments)
+                if inspect.isawaitable(res):
+                    res = await res
+                result = res if isinstance(res, str) else json.dumps(res)
+
+            if tool.name in self._postprocess:
+                result = self._postprocess[tool.name](tool.name, result)
+
+            bt = self._background_tasks[task_id]
+            bt.result = result
+            bt.done = True
+        except asyncio.CancelledError:
+            bt = self._background_tasks[task_id]
+            bt.cancelled = True
+            bt.done = True
+        except Exception as e:
+            bt = self._background_tasks[task_id]
+            bt.result = json.dumps({"error": str(e)})
+            bt.done = True
+
+    def view_task(self, task_id: str) -> str:
+        if task_id not in self._background_tasks:
+            return json.dumps({"error": f"Task '{task_id}' not found"})
+        bt = self._background_tasks[task_id]
+        if bt.cancelled:
+            return json.dumps({"status": "cancelled", "task_id": task_id})
+        if bt.done:
+            result = bt.result
+            del self._background_tasks[task_id]
+            return result or json.dumps({"status": "done", "result": None})
+        return json.dumps({"status": "running", "task_id": task_id, "tool": bt.tool_name})
+
+    async def wait_task(self, task_id: str, timeout: float = 30.0) -> str:
+        if task_id not in self._background_tasks:
+            return json.dumps({"error": f"Task '{task_id}' not found"})
+        bt = self._background_tasks[task_id]
+        if bt.done:
+            return self.view_task(task_id)
+        try:
+            await asyncio.wait_for(asyncio.shield(bt.task), timeout=timeout)
+        except asyncio.TimeoutError:
+            return json.dumps({"status": "running", "message": f"Still running after {timeout}s"})
+        return self.view_task(task_id)
+
+    def cancel_task(self, task_id: str) -> str:
+        if task_id not in self._background_tasks:
+            return json.dumps({"error": f"Task '{task_id}' not found"})
+        bt = self._background_tasks[task_id]
+        if bt.done:
+            return json.dumps({"error": "Task already completed"})
+        bt.task.cancel()
+        return json.dumps({"status": "cancelled", "task_id": task_id})
+
+    def state_dict(self) -> dict[str, Any]:
+        return {"active_groups": [n for n, g in self._groups.items() if g.active]}
+
+    def load_state_dict(self, state: dict[str, Any]) -> None:
+        active = state.get("active_groups", [])
+        for name, group in self._groups.items():
+            group.active = name in active
diff --git a/src/hawk/tracing.py b/src/hawk/tracing.py
new file mode 100644
index 0000000..ecffa49
--- /dev/null
+++ b/src/hawk/tracing.py
@@ -0,0 +1,307 @@
+"""OpenTelemetry tracing decorators for Hawk SDK.
+
+Zero-cost when disabled. Provides automatic instrumentation of agent chat,
+tool execution, and client calls with OTel GenAI semantic conventions.
+
+Usage:
+    from hawk.tracing import configure_tracing, trace_chat, trace_tool
+
+    # Enable tracing (call once at startup)
+    configure_tracing(endpoint="http://localhost:4318")
+
+    # Decorators are already applied to SDK internals, or use on custom code:
+    @trace_tool
+    def my_tool(query: str) -> str:
+        ...
+"""
+
+from __future__ import annotations
+
+import functools
+import inspect
+from typing import Any, AsyncGenerator, Callable, Generator, Optional, TypeVar
+
+T = TypeVar("T")
+
+_tracing_enabled: bool = False
+_tracer: Any = None
+
+# Provider detection from base URL fragments
+_PROVIDER_PATTERNS: list[tuple[str, str]] = [
+    ("api.anthropic.com", "anthropic"),
+    ("api.openai.com", "openai"),
+    ("generativelanguage.googleapis.com", "google"),
+    ("api.cohere.com", "cohere"),
+    ("api.mistral.ai", "mistral"),
+    ("api.groq.com", "groq"),
+    ("localhost", "local"),
+    ("127.0.0.1", "local"),
+]
+
+
+def configure_tracing(
+    *,
+    endpoint: Optional[str] = None,
+    service_name: str = "hawk-sdk",
+    enabled: bool = True,
+) -> None:
+    """Configure OpenTelemetry tracing for the Hawk SDK.
+
+    Args:
+        endpoint: OTLP endpoint (e.g. "http://localhost:4318").
+                  If None, uses OTEL_EXPORTER_OTLP_ENDPOINT env var.
+        service_name: Service name for traces.
+        enabled: Whether tracing is enabled.
+    """
+    global _tracing_enabled, _tracer
+
+    if not enabled:
+        _tracing_enabled = False
+        return
+
+    try:
+        from opentelemetry import trace
+        from opentelemetry.sdk.trace import TracerProvider
+        from opentelemetry.sdk.trace.export import BatchSpanProcessor
+        from opentelemetry.sdk.resources import Resource
+
+        resource = Resource.create({"service.name": service_name})
+        provider = TracerProvider(resource=resource)
+
+        if endpoint:
+            from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+            exporter = OTLPSpanExporter(endpoint=endpoint)
+            provider.add_span_processor(BatchSpanProcessor(exporter))
+
+        trace.set_tracer_provider(provider)
+        _tracer = trace.get_tracer("hawk-sdk")
+        _tracing_enabled = True
+    except ImportError:
+        _tracing_enabled = False
+
+
+def is_tracing_enabled() -> bool:
+    """Check if tracing is currently enabled."""
+    return _tracing_enabled
+
+
+def detect_provider(base_url: str) -> str:
+    """Detect LLM provider from base URL."""
+    for pattern, provider in _PROVIDER_PATTERNS:
+        if pattern in base_url:
+            return provider
+    return "unknown"
+
+
+def trace_chat(func: Callable[..., T]) -> Callable[..., T]:
+    """Decorator to trace agent chat calls with OTel spans.
+
+    Captures: operation name, model, input message, output response,
+    token usage, duration.
+    """
+    if inspect.iscoroutinefunction(func):
+        @functools.wraps(func)
+        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
+            if not _tracing_enabled or _tracer is None:
+                return await func(*args, **kwargs)
+
+            span_name = f"chat {func.__qualname__}"
+            attributes = {
+                "gen_ai.operation.name": "chat",
+                "gen_ai.system": "hawk",
+            }
+
+            # Extract message from args if available
+            if args and len(args) > 1 and isinstance(args[1], str):
+                attributes["gen_ai.request.message"] = args[1][:500]
+            elif "message" in kwargs:
+                attributes["gen_ai.request.message"] = str(kwargs["message"])[:500]
+
+            with _tracer.start_as_current_span(span_name, attributes=attributes, end_on_exit=False) as span:
+                try:
+                    result = await func(*args, **kwargs)
+                    if hasattr(result, "response"):
+                        span.set_attribute("gen_ai.response.text", str(result.response)[:500])
+                    if hasattr(result, "tokens_in"):
+                        span.set_attribute("gen_ai.usage.input_tokens", result.tokens_in)
+                    if hasattr(result, "tokens_out"):
+                        span.set_attribute("gen_ai.usage.output_tokens", result.tokens_out)
+                    span.set_status(_ok_status())
+                    span.end()
+                    return result
+                except Exception as e:
+                    _record_error(span, e)
+                    raise
+        return async_wrapper  # type: ignore
+
+    @functools.wraps(func)
+    def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
+        if not _tracing_enabled or _tracer is None:
+            return func(*args, **kwargs)
+
+        span_name = f"chat {func.__qualname__}"
+        attributes = {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.system": "hawk",
+        }
+
+        if args and len(args) > 1 and isinstance(args[1], str):
+            attributes["gen_ai.request.message"] = args[1][:500]
+        elif "message" in kwargs:
+            attributes["gen_ai.request.message"] = str(kwargs["message"])[:500]
+
+        with _tracer.start_as_current_span(span_name, attributes=attributes, end_on_exit=False) as span:
+            try:
+                result = func(*args, **kwargs)
+                if hasattr(result, "response"):
+                    span.set_attribute("gen_ai.response.text", str(result.response)[:500])
+                if hasattr(result, "tokens_in"):
+                    span.set_attribute("gen_ai.usage.input_tokens", result.tokens_in)
+                if hasattr(result, "tokens_out"):
+                    span.set_attribute("gen_ai.usage.output_tokens", result.tokens_out)
+                span.set_status(_ok_status())
+                span.end()
+                return result
+            except Exception as e:
+                _record_error(span, e)
+                raise
+    return sync_wrapper  # type: ignore
+
+
+def trace_tool(func: Callable[..., T]) -> Callable[..., T]:
+    """Decorator to trace tool execution with OTel spans.
+
+    Captures: tool name, arguments, result, duration, errors.
+    """
+    if inspect.iscoroutinefunction(func):
+        @functools.wraps(func)
+        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
+            if not _tracing_enabled or _tracer is None:
+                return await func(*args, **kwargs)
+
+            tool_name = func.__name__
+            span_name = f"tool {tool_name}"
+            attributes = {
+                "gen_ai.operation.name": "execute_tool",
+                "gen_ai.tool.name": tool_name,
+            }
+
+            with _tracer.start_as_current_span(span_name, attributes=attributes, end_on_exit=False) as span:
+                try:
+                    result = await func(*args, **kwargs)
+                    if isinstance(result, str):
+                        span.set_attribute("gen_ai.tool.result", result[:1000])
+                    span.set_status(_ok_status())
+                    span.end()
+                    return result
+                except Exception as e:
+                    _record_error(span, e)
+                    raise
+        return async_wrapper  # type: ignore
+
+    @functools.wraps(func)
+    def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
+        if not _tracing_enabled or _tracer is None:
+            return func(*args, **kwargs)
+
+        tool_name = func.__name__
+        span_name = f"tool {tool_name}"
+        attributes = {
+            "gen_ai.operation.name": "execute_tool",
+            "gen_ai.tool.name": tool_name,
+        }
+
+        with _tracer.start_as_current_span(span_name, attributes=attributes, end_on_exit=False) as span:
+            try:
+                result = func(*args, **kwargs)
+                if isinstance(result, str):
+                    span.set_attribute("gen_ai.tool.result", result[:1000])
+                span.set_status(_ok_status())
+                span.end()
+                return result
+            except Exception as e:
+                _record_error(span, e)
+                raise
+    return sync_wrapper  # type: ignore
+
+
+def trace(name: Optional[str] = None) -> Callable:
+    """Generic tracing decorator for any function.
+
+    Args:
+        name: Custom span name. Defaults to function name.
+    """
+    def decorator(func: Callable[..., T]) -> Callable[..., T]:
+        if inspect.iscoroutinefunction(func):
+            @functools.wraps(func)
+            async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
+                if not _tracing_enabled or _tracer is None:
+                    return await func(*args, **kwargs)
+
+                span_name = name or func.__qualname__
+                with _tracer.start_as_current_span(span_name, end_on_exit=False) as span:
+                    try:
+                        result = await func(*args, **kwargs)
+                        span.set_status(_ok_status())
+                        span.end()
+                        return result
+                    except Exception as e:
+                        _record_error(span, e)
+                        raise
+            return async_wrapper  # type: ignore
+
+        @functools.wraps(func)
+        def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
+            if not _tracing_enabled or _tracer is None:
+                return func(*args, **kwargs)
+
+            span_name = name or func.__qualname__
+            with _tracer.start_as_current_span(span_name, end_on_exit=False) as span:
+                try:
+                    result = func(*args, **kwargs)
+                    if isinstance(result, Generator):
+                        return _trace_generator(result, span)
+                    span.set_status(_ok_status())
+                    span.end()
+                    return result
+                except Exception as e:
+                    _record_error(span, e)
+                    raise
+        return sync_wrapper  # type: ignore
+    return decorator
+
+
+def _trace_generator(gen: Generator, span: Any) -> Generator:
+    """Wrap a generator to trace its lifecycle."""
+    try:
+        for item in gen:
+            yield item
+        span.set_status(_ok_status())
+        span.end()
+    except Exception as e:
+        _record_error(span, e)
+        raise
+
+
+def _ok_status() -> Any:
+    """Get OK status code."""
+    try:
+        from opentelemetry.trace import StatusCode
+        return StatusCode.OK
+    except ImportError:
+        return None
+
+
+def _record_error(span: Any, error: Exception) -> None:
+    """Record an error on a span."""
+    try:
+        from opentelemetry.trace import StatusCode
+        span.set_status(StatusCode.ERROR, str(error))
+        span.record_exception(error)
+    except (ImportError, AttributeError):
+        pass
+    finally:
+        try:
+            span.end()
+        except Exception:
+            pass