diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..39f1a41
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,67 @@
+# EditorConfig — https://editorconfig.org
+# Canonical eco-wide template (.shared-templates/editorconfig.tmpl).
+
+root = true
+
+# Default for everything.
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+indent_style = space
+indent_size = 4
+
+# Go uses tabs by convention.
+[*.go]
+indent_style = tab
+indent_size = 4
+
+# Python — PEP 8.
+[*.py]
+indent_size = 4
+
+# TypeScript / JavaScript — 2 spaces, ecosystem default.
+[*.{ts,tsx,js,jsx,mjs,cjs}]
+indent_size = 2
+
+# Web assets.
+[*.{html,css,scss}]
+indent_size = 2
+
+# YAML — 2 spaces (ecosystem standard, GitHub Actions, k8s, etc.).
+[*.{yml,yaml}]
+indent_size = 2
+
+# JSON / JSONC.
+[*.{json,jsonc}]
+indent_size = 2
+
+# TOML.
+[*.toml]
+indent_size = 2
+
+# Markdown — 2 spaces, preserve trailing whitespace (used for line breaks).
+[*.md]
+trim_trailing_whitespace = false
+indent_size = 2
+
+# Shell scripts.
+[*.{sh,bash,zsh,fish}]
+indent_size = 4
+
+# Makefiles must use tabs.
+[{Makefile,*.mk}]
+indent_style = tab
+
+# Dockerfiles.
+[Dockerfile*]
+indent_size = 4
+
+# GitHub Actions workflows — 2 spaces.
+[.github/**/*.{yml,yaml}]
+indent_size = 2
+
+# Config files.
+[*.{cfg,ini,conf}]
+indent_size = 4
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..3342e8f
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,86 @@
+# Canonical eco-wide .gitattributes template (.shared-templates/gitattributes.tmpl).
+# Auto-detect text files and normalise line endings to LF.
+
+* text=auto eol=lf
+
+# --- Source code -----------------------------------------------------------
+*.go     text eol=lf diff=golang
+*.py     text eol=lf diff=python
+*.ts     text eol=lf
+*.tsx    text eol=lf
+*.js     text eol=lf
+*.jsx    text eol=lf
+*.mjs    text eol=lf
+*.cjs    text eol=lf
+*.rs     text eol=lf diff=rust
+
+# --- Shell + config --------------------------------------------------------
+*.sh     text eol=lf
+*.bash   text eol=lf
+*.toml   text eol=lf
+*.yaml   text eol=lf
+*.yml    text eol=lf
+*.json   text eol=lf linguist-language=JSON
+*.jsonc  text eol=lf linguist-language=JSON
+*.cff    text eol=lf
+
+# --- Documentation ---------------------------------------------------------
+*.md     text eol=lf diff=markdown
+*.txt    text eol=lf
+
+# --- Build / packaging ----------------------------------------------------
+Makefile        text eol=lf
+*.mk            text eol=lf
+Dockerfile*     text eol=lf
+docker-compose*.yml text eol=lf
+.github/**/*.yml    text eol=lf
+.github/**/*.yaml   text eol=lf
+
+# --- Generated artefacts (mark as such for diffs and language stats) ------
+go.mod          text eol=lf linguist-generated
+go.sum          text eol=lf linguist-generated
+*.pb.go         linguist-generated
+*_generated.go  linguist-generated
+package-lock.json   linguist-generated
+pnpm-lock.yaml      linguist-generated
+yarn.lock           linguist-generated
+
+# --- Vendored / external sources ------------------------------------------
+vendor/**       linguist-vendored
+node_modules/** linguist-vendored
+testdata/**     linguist-vendored
+benchmarks/data/** linguist-vendored
+
+# --- Binary files (do not text-normalise) ---------------------------------
+*.exe    binary
+*.dll    binary
+*.so     binary
+*.dylib  binary
+*.a      binary
+*.o      binary
+*.db     binary
+*.sqlite binary
+*.png    binary
+*.jpg    binary
+*.jpeg   binary
+*.gif    binary
+*.ico    binary
+*.svg    text eol=lf
+*.pdf    binary
+*.zip    binary
+*.tar.gz binary
+*.tgz    binary
+*.whl    binary
+
+# --- Source archive hygiene (excluded from `git archive`) -----------------
+.github         export-ignore
+.shared-templates export-ignore
+.gitattributes  export-ignore
+.gitignore      export-ignore
+.editorconfig   export-ignore
+.golangci.yml   export-ignore
+.goreleaser.yml export-ignore
+.goreleaser.yaml export-ignore
+testdata/       export-ignore
+benchmarks/     export-ignore
+e2e/            export-ignore
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 0000000..06bb64e
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,127 @@
+name: Bug report
+description: Something is broken or behaving unexpectedly.
+title: "bug: <one-line summary>"
+labels: ["bug", "triage"]
+
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to file a bug report. Please fill in as much
+        of the form as you can — the more we know, the faster we can fix it.
+
+        Before submitting:
+        - Search [existing issues](https://github.com/GrayCodeAI/hawk-sdk-python/issues) to avoid duplicates.
+        - If this is a security issue, please **do not** file a public issue. See `SECURITY.md`.
+
+  - type: textarea
+    id: what-happened
+    attributes:
+      label: What happened?
+      description: A clear, concise description of the bug.
+      placeholder: When I call HawkClient.<method>, I expected X but got Y.
+    validations:
+      required: true
+
+  - type: dropdown
+    id: surface
+    attributes:
+      label: Surface
+      description: Which SDK surface is affected?
+      options:
+        - "HawkClient (sync)"
+        - "AsyncHawkClient (async)"
+        - "Streaming (chat_stream / StreamReader)"
+        - "Retry / backoff"
+        - "Tools (chat_with_tools, @tool decorator)"
+        - "Agent / AsyncAgent"
+        - "Workflow / AsyncWorkflow"
+        - "Typed errors (HawkAPIError hierarchy)"
+        - "Build / packaging"
+    validations:
+      required: true
+
+  - type: textarea
+    id: reproduce
+    attributes:
+      label: Steps to reproduce
+      description: Minimal Python snippet that reliably reproduces the problem.
+      render: python
+      placeholder: |
+        from hawk import HawkClient
+        with HawkClient() as c:
+            resp = c.chat("hello")
+        # ^ wrong shape / panic / hang / etc.
+    validations:
+      required: true
+
+  - type: textarea
+    id: expected
+    attributes:
+      label: Expected behavior
+      description: What did you expect to happen instead?
+    validations:
+      required: true
+
+  - type: input
+    id: sdk-version
+    attributes:
+      label: hawk-sdk version
+      description: Output of `python -c "import hawk; print(hawk.__version__)"`.
+      placeholder: "0.2.0"
+    validations:
+      required: true
+
+  - type: input
+    id: daemon-version
+    attributes:
+      label: hawk daemon version
+      description: Output of `hawk version` (the daemon you're hitting).
+      placeholder: "0.2.0"
+    validations:
+      required: true
+
+  - type: input
+    id: python-version
+    attributes:
+      label: Python version
+      description: Output of `python --version`.
+      placeholder: "Python 3.11.9"
+    validations:
+      required: true
+
+  - type: input
+    id: os
+    attributes:
+      label: Operating system
+      description: e.g. macOS 14.5 (arm64), Ubuntu 24.04 (amd64), Windows 11 (amd64).
+      placeholder: "macOS 14.5 (arm64)"
+    validations:
+      required: true
+
+  - type: textarea
+    id: deps
+    attributes:
+      label: Relevant package versions
+      description: |
+        Paste the output of `pip freeze | grep -E "^(httpx|pydantic|hawk-sdk)"` (or `uv pip list` equivalent).
+      render: shell
+
+  - type: textarea
+    id: logs
+    attributes:
+      label: Logs / traceback
+      description: |
+        Paste any relevant output, including the full traceback.
+        **Redact API tokens, session IDs, and any private data first.**
+      render: shell
+
+  - type: checkboxes
+    id: confirm
+    attributes:
+      label: Confirmation
+      options:
+        - label: I searched existing issues and did not find a duplicate.
+          required: true
+        - label: I redacted any secrets, tokens, or private data from logs.
+          required: true
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000..c88dd67
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,8 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Security vulnerability
+    url: https://github.com/GrayCodeAI/hawk-sdk-python/security/advisories/new
+    about: Please report security issues privately via a GitHub Security Advisory. See SECURITY.md.
+  - name: Question / discussion
+    url: https://github.com/GrayCodeAI/hawk-sdk-python/discussions
+    about: Have a question or want to discuss an idea? Open a discussion instead of an issue.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 0000000..c840e71
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,75 @@
+name: Feature request
+description: Suggest an improvement or a new SDK capability.
+title: "feat: <one-line summary>"
+labels: ["enhancement", "triage"]
+
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for proposing a feature. hawk-sdk is a thin Python client for
+        the local hawk daemon. Every feature is evaluated against whether it
+        serves **a single developer** running their own hawk daemon — i.e.
+        it improves ergonomics, lowers latency, or simplifies integration.
+
+        Before submitting:
+        - Search [existing issues](https://github.com/GrayCodeAI/hawk-sdk-python/issues) to avoid duplicates.
+        - For new daemon endpoints, the daemon side must land first.
+
+  - type: dropdown
+    id: kind
+    attributes:
+      label: Kind of feature
+      description: What flavour of change is this?
+      options:
+        - "New client method (wraps a daemon endpoint)"
+        - "Streaming / SSE handling"
+        - "Retry / backoff / resilience"
+        - "Typed errors / error categories"
+        - "Tools (chat_with_tools, @tool decorator)"
+        - "Agent / Workflow orchestration"
+        - "Pydantic model / type-hint improvement"
+        - "Configuration (httpx transport, timeouts, etc.)"
+        - "Tooling / CI / docs / packaging"
+    validations:
+      required: true
+
+  - type: textarea
+    id: problem
+    attributes:
+      label: What problem are you trying to solve?
+      description: Describe the user problem first. Solutions can come later.
+      placeholder: When I call <method>, I have to write boilerplate Y because the SDK doesn't expose X.
+    validations:
+      required: true
+
+  - type: textarea
+    id: proposal
+    attributes:
+      label: Proposed solution
+      description: How would you like the SDK to behave? Snippet of API you'd want.
+      render: python
+    validations:
+      required: true
+
+  - type: textarea
+    id: alternatives
+    attributes:
+      label: Alternatives considered
+      description: |
+        What did you try? What do other SDKs (`openai-python`,
+        `anthropic-sdk-python`, `langchain`, `llama-index`, `dspy`,
+        `instructor`, `marvin`, `pydantic-ai`, `mirascope`, `magentic`)
+        do? Why isn't that enough?
+
+  - type: checkboxes
+    id: principles
+    attributes:
+      label: Solo-developer fit
+      description: hawk-sdk avoids enterprise scope. Confirm this feature respects that.
+      options:
+        - label: Works with zero configuration (sensible defaults).
+        - label: Does not introduce a third-party network dependency.
+        - label: Does not break wire-compatibility with existing daemon versions.
+        - label: Sync and async variants are kept in lock-step.
+        - label: Has an escape hatch (override via parameter, transport, or env).
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..f11661e
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,87 @@
+<!--
+  Thanks for your contribution! Please fill out this template so reviewers can
+  understand the change quickly. Anything that does not apply can be left in
+  place; do not delete unanswered sections — write "n/a".
+-->
+
+## Summary
+
+<!--
+  One paragraph describing what this PR does and why. Link the related
+  issue(s) with `Fixes #N` or `Refs #N` if applicable.
+-->
+
+## Changes
+
+<!--
+  Bullet list of what changed, grouped by area (client, agent, tools,
+  workflow, streaming, errors, retry, types, version, CI, docs).
+  Reviewers should be able to skim this and know what to look at first.
+-->
+
+-
+
+## API impact
+
+<!--
+  Did you add, remove, rename, or change the signature of any exported
+  symbol? List them here. If yes, confirm whether this is a breaking
+  change and bump the version accordingly in `pyproject.toml` and
+  `src/hawk/_version.py` (both must agree). If no exported surface
+  changed, write "n/a".
+-->
+
+## Daemon compatibility
+
+<!--
+  This SDK targets the hawk daemon `v1` API. Did you change endpoints,
+  request/response shapes, headers, or status-code handling?
+
+  - Which daemon versions did you test against (commit SHA / tag)?
+  - Is the change wire-compatible with the latest released daemon?
+  - If not, link the corresponding daemon PR.
+-->
+
+## Async compatibility
+
+<!--
+  Every public client method has both a sync and an async variant.
+  Did you change one without the other? If yes, explain why. Otherwise
+  confirm both `HawkClient.<method>` and `AsyncHawkClient.<method>`
+  were updated together.
+-->
+
+## Testing
+
+<!--
+  Describe how you tested. Paste output of `make test` and `make lint`.
+  If you added new tests, list them.
+-->
+
+```text
+$ make test
+...
+$ make lint
+...
+$ make typecheck
+...
+```
+
+## Checklist
+
+- [ ] Commits follow [Conventional Commits](https://www.conventionalcommits.org/)
+      (`feat:`, `fix:`, `perf:`, `refactor:`, `docs:`, `test:`, etc.)
+- [ ] `make test` passes locally
+- [ ] `make lint` (ruff check + ruff format --check) passes
+- [ ] `make typecheck` (mypy --strict) passes
+- [ ] New or changed code has tests (table-driven / parametrized where
+      appropriate, using `respx` for HTTP mocking)
+- [ ] Public APIs have docstrings and type hints
+- [ ] `CHANGELOG.md` updated under `## [Unreleased]` if user-visible
+- [ ] **Both `pyproject.toml` and `src/hawk/_version.py` are bumped
+      together** if this is a release-eligible change
+- [ ] Sync and async client variants are kept in lock-step
+- [ ] Every new outbound HTTP request inherits `User-Agent:
+      hawk-sdk-python/<__version__>` via `_build_headers()`
+- [ ] No secrets, tokens, or PII added to the repo
+- [ ] No `Co-authored-by:` trailers (this is solo-developer work)
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..b86542e
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,35 @@
+version: 2
+updates:
+  - package-ecosystem: pip
+    directory: /
+    schedule:
+      interval: weekly
+      day: monday
+    open-pull-requests-limit: 5
+    labels:
+      - dependencies
+      - python
+    commit-message:
+      prefix: "chore(deps)"
+      include: scope
+    groups:
+      pydantic:
+        patterns:
+          - "pydantic*"
+      pytest:
+        patterns:
+          - "pytest*"
+          - "respx*"
+
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: weekly
+      day: monday
+    open-pull-requests-limit: 3
+    labels:
+      - dependencies
+      - github-actions
+    commit-message:
+      prefix: "chore(ci)"
+      include: scope
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..3b280e2
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,111 @@
+# Canonical CI workflow for hawk-eco Python repos.
+# Source of truth: .shared-templates/workflows/python-ci.yml.tmpl
+
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ci-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    name: test (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: pip
+      - name: Install
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+      - name: pytest
+        run: pytest --strict-markers --tb=short
+
+  lint:
+    name: lint (ruff)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+      - name: Install
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+      - name: ruff check
+        run: ruff check .
+      - name: ruff format --check
+        run: ruff format --check .
+
+  typecheck:
+    name: typecheck (mypy --strict)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+      - name: Install
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+      - name: mypy
+        run: mypy src/
+
+  security:
+    name: security (pip-audit)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+      - name: Install
+        run: |
+          python -m pip install --upgrade pip pip-audit
+          pip install -e ".[dev]"
+      - name: pip-audit
+        run: pip-audit
+
+  build:
+    name: build (sdist + wheel)
+    runs-on: ubuntu-latest
+    needs: [test, lint, typecheck]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+      - name: Install build tools
+        run: |
+          python -m pip install --upgrade pip build twine
+      - name: Build
+        run: python -m build
+      - name: Twine check
+        run: twine check dist/*
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml
new file mode 100644
index 0000000..639f55f
--- /dev/null
+++ b/.github/workflows/release-please.yml
@@ -0,0 +1,43 @@
+# Canonical release-please workflow for hawk-eco repos.
+# Opens / updates a release PR on every push to main; on merge of that PR,
+# tags the new release. The tag triggers goreleaser (separate workflow).
+#
+# Source of truth: .shared-templates/release-please.yml.tmpl at the eco root.
+
+name: release-please
+
+on:
+  push:
+    branches: [main]
+
+permissions:
+  contents: write
+  pull-requests: write
+  issues: write
+
+concurrency:
+  group: release-please-${{ github.ref }}
+  cancel-in-progress: false
+
+jobs:
+  release-please:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Run release-please
+        id: release
+        uses: googleapis/release-please-action@v4
+        with:
+          config-file: release-please-config.json
+          manifest-file: .release-please-manifest.json
+          token: ${{ secrets.RELEASE_PLEASE_TOKEN || secrets.GITHUB_TOKEN }}
+
+      - name: Summary
+        if: always()
+        run: |
+          if [[ "${{ steps.release.outputs.release_created }}" == "true" ]]; then
+            echo "Released ${{ steps.release.outputs.tag_name }}." >> $GITHUB_STEP_SUMMARY
+          elif [[ "${{ steps.release.outputs.pr }}" != "" ]]; then
+            echo "Updated release PR: ${{ steps.release.outputs.pr }}" >> $GITHUB_STEP_SUMMARY
+          else
+            echo "No release-relevant changes detected." >> $GITHUB_STEP_SUMMARY
+          fi
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..4833743
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,41 @@
+# Canonical PyPI publish workflow for hawk-eco Python repos.
+# Triggered by release-please when it pushes a v* tag.
+# Source of truth: .shared-templates/workflows/python-release.yml.tmpl
+#
+# Uses PyPI Trusted Publishing (OIDC) — no API tokens stored in GitHub.
+# Configure once at https://pypi.org/manage/account/publishing/
+
+name: release
+
+on:
+  push:
+    tags: ["v*"]
+
+permissions:
+  contents: read
+  id-token: write   # required for PyPI Trusted Publishing
+
+jobs:
+  build-and-publish:
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/hawk-sdk
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install build tooling
+        run: |
+          python -m pip install --upgrade pip build
+
+      - name: Build sdist + wheel
+        run: python -m build
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: dist/
diff --git a/.gitignore b/.gitignore
index 7989b07..c0b9142 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,46 @@
+# Byte-compiled / optimized / DLL files
 __pycache__/
-*.pyc
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
 *.egg-info/
-dist/
-build/
 .eggs/
+build/
+dist/
+sdist/
+wheels/
+*.egg
+MANIFEST
+
+# Virtual environments
+.venv/
+venv/
+env/
+ENV/
+
+# Tooling caches
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.tox/
+.nox/
+.coverage
+.coverage.*
+htmlcov/
+coverage.xml
+*.cover
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Local env files
+.env
+.env.local
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..b449ec5
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,43 @@
+# Canonical pre-commit config for hawk-eco Python repos.
+# Source of truth: .shared-templates/pre-commit-config.yaml.tmpl
+#
+# Install:    pip install pre-commit
+# Activate:   pre-commit install --install-hooks
+# Run all:    pre-commit run --all-files
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: trailing-whitespace
+        exclude: '\.md$'           # markdown uses trailing whitespace for line breaks
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-toml
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-added-large-files
+        args: [--maxkb=512]
+      - id: detect-private-key
+      - id: mixed-line-ending
+        args: [--fix=lf]
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.8.0
+    hooks:
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
+      - id: ruff-format
+
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.13.0
+    hooks:
+      - id: mypy
+        additional_dependencies: [pydantic>=2.0, httpx>=0.25]
+        args: [--strict, --ignore-missing-imports]
+
+  - repo: https://github.com/commitizen-tools/commitizen
+    rev: v3.30.1
+    hooks:
+      - id: commitizen
+        stages: [commit-msg]
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
new file mode 100644
index 0000000..2be9c43
--- /dev/null
+++ b/.release-please-manifest.json
@@ -0,0 +1,3 @@
+{
+  ".": "0.2.0"
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..21acc1c
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,83 @@
+# Changelog
+
+All notable changes to `hawk-sdk` (Python) are documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Fixed
+- **`__version__` now agrees with `pyproject.toml`.** The prior hardening
+  commit bumped `pyproject.toml` to `0.2.0` but missed
+  `src/hawk/_version.py`, which still reported `0.1.0`. Both now report
+  `0.2.0`. Aligns the SDK with the rest of the hawk-eco ecosystem
+  (`hawk`, `tok`, `eyrie`, `yaad`, `trace`, `sight`, `inspect`,
+  `hawk-sdk-go`).
+
+### Added
+- **`User-Agent: hawk-sdk-python/<__version__>` header** on every
+  outbound HTTP request from both `HawkClient` (sync) and
+  `AsyncHawkClient` (async). Set via the `_build_headers()` helper, so
+  it applies to both regular API calls and the `chat_stream` SSE
+  endpoint (httpx merges client-default headers with per-request
+  overrides). Lets daemon operators identify SDK clients in logs and
+  reject misbehaving versions cleanly.
+- **OSS standard files** (this is the first PR to add them):
+  - `CHANGELOG.md` — Keep-a-Changelog format with `[Unreleased]` and
+    a backfilled `[0.1.0]` entry for the initial SDK + prior hardening
+    commit.
+  - `CONTRIBUTING.md` — quick start, branch flow (this repo branches
+    from `main`), conventional commits, code standards, testing,
+    SDK-version-bump procedure (must update both `pyproject.toml` and
+    `src/hawk/_version.py`).
+  - `SECURITY.md` — vulnerability reporting via GitHub Security
+    Advisories.
+  - `CODE_OF_CONDUCT.md` — Contributor Covenant 2.1.
+  - `.gitattributes` — LF normalization, binary detection.
+  - `.editorconfig` — UTF-8, LF, 4-space indent for Python, 2-space
+    for YAML/JSON/TOML.
+  - `.github/workflows/ci.yml` — pytest matrix on Python 3.9 / 3.10 /
+    3.11 / 3.12 / 3.13, ruff (lint + format check), mypy strict, build
+    sdist + wheel.
+  - `.github/dependabot.yml` — weekly `pip` + `github-actions`
+    updates.
+  - `.github/PULL_REQUEST_TEMPLATE.md` — Summary / Changes / API
+    impact / Daemon compatibility / Async compatibility / Testing /
+    Checklist.
+  - `.github/ISSUE_TEMPLATE/bug_report.yml` — surface dropdown
+    (HawkClient / AsyncHawkClient / streaming / retry / tools /
+    workflow / typed errors / build).
+  - `.github/ISSUE_TEMPLATE/feature_request.yml` — feature request
+    with `kind` selector + solo-dev fit checks.
+  - `.github/ISSUE_TEMPLATE/config.yml` — routes security to
+    advisories, questions to discussions, blocks blank issues.
+- Expanded `.gitignore` with the broader Python toolchain footprint
+  (`.mypy_cache`, `.ruff_cache`, `.pytest_cache`, virtualenv dirs,
+  `htmlcov`, `coverage.xml`, `.tox`, `.nox`).
+
+## [0.1.0] — 2026-05-13
+
+### Added
+- Initial Python SDK for the hawk daemon API:
+  - `HawkClient` (sync) and `AsyncHawkClient` (async), both built on
+    `httpx`, with `health`, `chat`, `chat_stream`, `create_session`,
+    `get_session`, `list_sessions`, `list_messages`, `delete_session`,
+    `stats`.
+  - Pydantic v2 models for every request and response shape.
+  - Streaming via `StreamReader` / `AsyncStreamReader` (SSE).
+  - Typed error hierarchy (`HawkAPIError`, `AuthenticationError`,
+    `BadRequestError`, `NotFoundError`, `RateLimitError`,
+    `InternalServerError`, `ServiceUnavailableError`).
+  - Retry with exponential backoff via `RetryConfig`.
+  - `Tool`, `Agent` / `AsyncAgent`, `Workflow` / `AsyncWorkflow`
+    orchestration helpers.
+
+### Production-hardening pass already on this branch (commit `2560031`)
+- Added strict `ruff` lint config (E, F, W, I, N, UP, B, A, SIM, TCH,
+  RUF rule sets) and `mypy --strict` config in `pyproject.toml`.
+- Added `Makefile` with standard targets (`test`, `test-coverage`,
+  `lint`, `format`, `typecheck`, `clean`, `help`).
+- Bumped `pyproject.toml` version to `0.2.0` (this PR completes the
+  bump by also updating `_version.py`).
+- Added `pytest` strict-markers and short-traceback config.
diff --git a/CODEOWNERS b/CODEOWNERS
new file mode 100644
index 0000000..4cdd829
--- /dev/null
+++ b/CODEOWNERS
@@ -0,0 +1,20 @@
+# CODEOWNERS for hawk-sdk-python
+* @GrayCodeAI/maintainers
+
+# Public API surface — bump SDK version (VERSION file) when these change
+/src/hawk/client.py     @GrayCodeAI/sdk-team
+/src/hawk/agent.py      @GrayCodeAI/sdk-team
+/src/hawk/workflow.py   @GrayCodeAI/sdk-team
+/src/hawk/tools.py      @GrayCodeAI/sdk-team
+/src/hawk/types.py      @GrayCodeAI/sdk-team
+/src/hawk/errors.py     @GrayCodeAI/sdk-team
+/src/hawk/_version.py   @GrayCodeAI/maintainers
+/VERSION                @GrayCodeAI/maintainers
+
+# Build / packaging
+/pyproject.toml         @GrayCodeAI/devops-team
+/.github/               @GrayCodeAI/devops-team
+/Makefile               @GrayCodeAI/devops-team
+
+# Documentation
+*.md                    @GrayCodeAI/docs-team
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..314f97c
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,60 @@
+# Code of Conduct
+
+## Our pledge
+
+We — the maintainers and contributors of the hawk-sdk-python project — pledge to
+make participation in our community a harassment-free experience for everyone,
+regardless of age, body size, visible or invisible disability, ethnicity, sex
+characteristics, gender identity and expression, level of experience,
+education, socio-economic status, nationality, personal appearance, race,
+religion, or sexual identity and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our standards
+
+Examples of behaviour that contributes to a positive environment:
+
+- Demonstrating empathy and kindness toward other people.
+- Being respectful of differing opinions, viewpoints, and experiences.
+- Giving and gracefully accepting constructive feedback.
+- Accepting responsibility, apologising to those affected by mistakes, and
+  learning from the experience.
+- Focusing on what is best not just for us as individuals, but for the
+  overall community.
+
+Examples of unacceptable behaviour:
+
+- The use of sexualised language or imagery, and sexual attention or advances.
+- Trolling, insulting or derogatory comments, and personal or political
+  attacks.
+- Public or private harassment.
+- Publishing others' private information, such as a physical or email
+  address, without their explicit permission.
+- Other conduct which could reasonably be considered inappropriate in a
+  professional setting.
+
+## Enforcement
+
+Community leaders are responsible for clarifying and enforcing our standards
+of acceptable behaviour, and will take appropriate and fair corrective
+action in response to any behaviour they deem inappropriate, threatening,
+offensive, or harmful.
+
+Instances of abusive, harassing, or otherwise unacceptable behaviour may be
+reported to the maintainers via the contact in `SECURITY.md` or by opening a
+confidential GitHub Security Advisory at
+<https://github.com/GrayCodeAI/hawk-sdk-python/security/advisories>. All
+complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of
+the reporter of any incident.
+
+## Attribution
+
+This Code of Conduct is adapted from the
+[Contributor Covenant, version 2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html).
+
+For answers to common questions about this code of conduct, see the
+Contributor Covenant FAQ at <https://www.contributor-covenant.org/faq>.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..5fba294
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,114 @@
+# Contributing to hawk-sdk-python
+
+Thanks for your interest! This guide covers the conventions used across the
+hawk-eco. The eco-wide standards (versioning, release tooling, repo layout)
+are defined in <https://github.com/GrayCodeAI/hawk/blob/main/VERSIONING.md>.
+
+## Quick start
+
+1. Fork the repo and create a feature branch off `main`:
+   ```bash
+   git checkout -b feat/short-description
+   ```
+2. Make your changes in small, focused commits.
+3. Run the full local check before pushing:
+   ```bash
+   make ci
+   ```
+4. Open a pull request. CI will re-run the same checks plus security
+   scanning, race-detector tests, and (where applicable) integration tests.
+
+## Build & test
+
+This repo uses the standardised hawk-eco Makefile targets. Run `make help`
+for the full list. The most common targets:
+
+| Target              | What it does                                     |
+| ------------------- | ------------------------------------------------ |
+| `make build`        | Build the binary / verify the library compiles  |
+| `make test`         | Run unit tests                                   |
+| `make test-race`    | Run unit tests with the race detector            |
+| `make cover`        | Generate a coverage report                       |
+| `make lint`         | Run the linter (`golangci-lint` / `ruff`)        |
+| `make fmt`          | Format source files                              |
+| `make vet`          | Run `go vet` / `mypy`                            |
+| `make security`     | Run `govulncheck` / `pip-audit`                  |
+| `make ci`           | Run everything CI runs (the gate before pushing) |
+
+## Commit message convention
+
+We use [Conventional Commits](https://www.conventionalcommits.org/). This
+isn't cosmetic — release-please reads commit messages to bump the `VERSION`
+file and generate the CHANGELOG, so getting them right matters.
+
+```
+<type>(<optional scope>): <short summary>
+
+<optional body>
+
+<optional footer(s)>
+```
+
+**Types:**
+
+- `feat:` — a new feature (triggers a minor version bump)
+- `fix:` — a bug fix (triggers a patch version bump)
+- `perf:` — performance improvement
+- `refactor:` — code restructure with no behaviour change
+- `docs:` — documentation only
+- `test:` — adding or fixing tests
+- `build:` — build system or dependencies
+- `ci:` — CI configuration
+- `chore:` — anything else (no release effect)
+- `revert:` — reverts a previous commit
+
+**Breaking changes:** add `!` after the type/scope or include `BREAKING
+CHANGE:` in the footer. This triggers a major version bump.
+
+Examples:
+
+```
+feat(client): add streaming retry with exponential backoff
+fix: handle empty response body in chat handler
+refactor!: rename ClientV1 to Client (BREAKING CHANGE)
+```
+
+## Pull request checklist
+
+Before requesting review:
+
+- [ ] `make ci` passes locally.
+- [ ] New behaviour has tests; bug fixes have a regression test.
+- [ ] `CHANGELOG.md` entries are **not** edited manually — release-please
+      generates them from your commit messages.
+- [ ] The `VERSION` file is **not** edited manually — release-please bumps
+      it on release.
+- [ ] Public API changes have updated doc comments.
+- [ ] No secrets, API keys, or PII in code, comments, tests, or fixtures.
+
+## Code review etiquette
+
+- Reviewers focus on correctness, design, and tests; formatting is
+  enforced by tooling, not humans.
+- Authors respond to every comment (resolved, addressed, or politely
+  declined with rationale) — no silent dismissals.
+- Squash-merge by default; the PR title becomes the commit (so it must
+  be a valid Conventional Commit message).
+- One approving review from a CODEOWNERS-listed reviewer is required.
+
+## Reporting bugs
+
+Open an issue using the bug-report template. Include the `hawk-sdk-python`
+version (`hawk-sdk-python --version` for binaries, `hawk-sdk-python.Version` for
+libraries — see this repo's `VERSION` file), reproduction steps, expected
+behaviour, and actual behaviour.
+
+## Reporting security issues
+
+**Do not open a public issue.** See [SECURITY.md](./SECURITY.md) for
+private reporting channels.
+
+## License
+
+By contributing, you agree that your contributions will be licensed under
+the same license as this repo (see [LICENSE](./LICENSE)).
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..5f8f390
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,97 @@
+# Canonical hawk-eco Makefile for Python repos.
+# Source of truth: .shared-templates/Makefile.python.tmpl at the eco root.
+# Placeholders rendered per repo: hawk-sdk.
+
+# ---------------------------------------------------------------------------
+# Project metadata
+# ---------------------------------------------------------------------------
+NAME := hawk-sdk
+
+# ---------------------------------------------------------------------------
+# Versioning — sourced from VERSION file at repo root (single source of
+# truth, also consumed by hatch + release-please).
+# ---------------------------------------------------------------------------
+VERSION ?= $(shell cat VERSION 2>/dev/null | head -n1 | tr -d '[:space:]' || echo "dev")
+
+PYTHON  ?= python3
+PIP     ?= $(PYTHON) -m pip
+
+# ---------------------------------------------------------------------------
+# Phony declarations (alphabetical).
+# ---------------------------------------------------------------------------
+.PHONY: all bench build ci clean cover fmt help install lint lint-fix \
+        release security test test-race tidy version vet
+
+# ---------------------------------------------------------------------------
+# Default target.
+# ---------------------------------------------------------------------------
+all: lint test build ## Default — lint, test, build.
+
+# ---------------------------------------------------------------------------
+# Build / install / release.
+# ---------------------------------------------------------------------------
+build: ## Build wheel + sdist into dist/.
+	$(PYTHON) -m build
+
+install: ## Install in editable mode with dev extras.
+	$(PIP) install -e ".[dev]"
+
+release: build ## Upload to PyPI (expects $TWINE_USERNAME / $TWINE_PASSWORD).
+	$(PYTHON) -m twine upload dist/*
+
+# ---------------------------------------------------------------------------
+# Tests.
+# ---------------------------------------------------------------------------
+test: ## Run unit tests.
+	$(PYTHON) -m pytest
+
+test-race: test ## Alias for `test` (Python has no race detector).
+
+cover: ## Run tests with coverage report.
+	$(PYTHON) -m pytest --cov=src --cov-report=term-missing --cov-report=html
+	@echo "Coverage report: htmlcov/index.html"
+
+bench: ## Run benchmarks (requires pytest-benchmark).
+	$(PYTHON) -m pytest --benchmark-only
+
+# ---------------------------------------------------------------------------
+# Quality gates.
+# ---------------------------------------------------------------------------
+fmt: ## Format with ruff.
+	$(PYTHON) -m ruff format .
+
+vet: ## Type-check with mypy.
+	$(PYTHON) -m mypy src/
+
+lint: ## Lint with ruff.
+	$(PYTHON) -m ruff check .
+
+lint-fix: ## Lint with ruff --fix.
+	$(PYTHON) -m ruff check --fix .
+
+security: ## Run pip-audit on resolved dependencies.
+	@command -v pip-audit >/dev/null 2>&1 || (echo "install: pip install pip-audit" && exit 1)
+	pip-audit
+
+tidy: ## No-op for Python (lockfile management is via pyproject.toml).
+	@echo "tidy: nothing to do for Python repos."
+
+# ---------------------------------------------------------------------------
+# Composite gate used by CI and pre-push.
+# ---------------------------------------------------------------------------
+ci: fmt vet lint test security ## Run everything CI runs.
+	@echo "All CI checks passed."
+
+# ---------------------------------------------------------------------------
+# Misc.
+# ---------------------------------------------------------------------------
+version: ## Print the version that will be packaged.
+	@echo "Version: $(VERSION)"
+
+clean: ## Remove build artefacts and caches.
+	rm -rf dist/ build/ *.egg-info htmlcov/ .coverage
+	rm -rf .pytest_cache .mypy_cache .ruff_cache
+	find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
+
+help: ## Show this help.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}'
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..0c39f7a
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,71 @@
+# Security Policy — hawk-sdk-python
+
+## Supported versions
+
+We support the latest minor version on each `0.x` line, and the latest two
+minor versions once `1.x` ships. Older versions receive critical-severity
+fixes only on a best-effort basis.
+
+The current canonical version is the contents of the [`VERSION`](./VERSION)
+file at the repo root. See [`VERSIONING.md`](https://github.com/GrayCodeAI/hawk/blob/main/VERSIONING.md)
+for the eco-wide versioning scheme.
+
+## Reporting a vulnerability
+
+**Do not open a public GitHub issue for security vulnerabilities.** Instead:
+
+1. Open a private [GitHub Security Advisory](https://github.com/GrayCodeAI/hawk-sdk-python/security/advisories/new), **or**
+2. Email `security@graycode.ai` with the details below.
+
+Include in your report:
+
+- A description of the vulnerability and the affected component.
+- Steps to reproduce, ideally with a minimal proof-of-concept.
+- The version (`VERSION` file or git SHA) you tested against.
+- The potential impact and any suggested mitigation.
+
+**Response targets:**
+
+- Initial acknowledgement: within **48 hours**.
+- Triage and severity assessment: within **5 business days**.
+- Coordinated fix and disclosure: within **30 days** for high/critical, **90
+  days** for medium/low (per industry-standard responsible disclosure).
+
+## Disclosure policy
+
+We follow [coordinated vulnerability disclosure](https://en.wikipedia.org/wiki/Coordinated_vulnerability_disclosure):
+
+- Reporters receive credit in the advisory and CHANGELOG (unless they opt
+  out).
+- We request that reporters refrain from public disclosure until a fix has
+  been released or the disclosure deadline above has elapsed.
+- We will not pursue legal action against good-faith researchers acting
+  within this policy.
+
+## Security practices in this repo
+
+- **Dependency monitoring:** automated via Dependabot (see
+  `.github/dependabot.yml`).
+- **Static analysis:** `golangci-lint` / `ruff` / `mypy` enforced in CI.
+- **Vulnerability scanning:** `govulncheck` (Go) / `pip-audit` (Python) run
+  on every CI build.
+- **Lockfiles:** `go.sum` / `pnpm-lock.yaml` / `pyproject.toml` are pinned
+  and committed.
+- **Reproducible builds:** release artefacts ship with SHA-256 checksums via
+  goreleaser.
+- **No secrets in source:** API keys are configuration, not constants. Pre-
+  commit hooks block accidental secret commits.
+
+## Scope
+
+This policy covers the code in this repository and the release artefacts
+published from it. It does not cover:
+
+- Third-party dependencies (report to upstream).
+- LLM provider services that hawk-sdk-python integrates with (report to the
+  provider).
+- Local filesystem misuse where an attacker already has shell access (out of
+  threat model).
+
+For hawk-sdk-python-specific threat-model notes, see the README and any docs in
+this repo.
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..0ea3a94
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+0.2.0
diff --git a/pyproject.toml b/pyproject.toml
index 36f167d..53a5eec 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,10 @@ build-backend = "hatchling.build"
 
 [project]
 name = "hawk-sdk"
-version = "0.1.0"
+# Version is read dynamically from the VERSION file at the repo root —
+# the single source of truth used by release tooling, CI, and the runtime
+# `hawk.__version__` constant.
+dynamic = ["version"]
 description = "Python SDK for the Hawk daemon API"
 readme = "README.md"
 license = "MIT"
@@ -35,11 +38,50 @@ dev = [
     "pytest>=7.0",
     "pytest-asyncio>=0.21",
     "respx>=0.21",
+    "ruff>=0.4.0",
+    "mypy>=1.0",
 ]
 
+# Read the package version from the repo-root VERSION file at build time.
+# This keeps Python package metadata, runtime `__version__`, and release
+# tooling all in sync from a single source.
+[tool.hatch.version]
+source = "regex"
+path = "VERSION"
+pattern = "^(?P<version>[^\\s]+)"
+
 [tool.hatch.build.targets.wheel]
 packages = ["src/hawk"]
+# Ship the VERSION file inside the package so `_version.py` can read it at
+# runtime even after install.
+force-include = { "VERSION" = "hawk/VERSION" }
+
+[tool.hatch.build.targets.sdist]
+include = [
+    "src/hawk",
+    "VERSION",
+    "README.md",
+    "LICENSE",
+]
 
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
 testpaths = ["tests"]
+addopts = "--strict-markers --tb=short -q"
+
+[tool.ruff]
+target-version = "py39"
+line-length = 100
+
+[tool.ruff.lint]
+select = ["E", "F", "W", "I", "N", "UP", "B", "A", "SIM", "TCH", "RUF"]
+ignore = ["E501"]
+
+[tool.ruff.lint.isort]
+known-first-party = ["hawk"]
+
+[tool.mypy]
+python_version = "3.9"
+strict = true
+warn_return_any = true
+warn_unused_configs = true
diff --git a/release-please-config.json b/release-please-config.json
new file mode 100644
index 0000000..9343c9a
--- /dev/null
+++ b/release-please-config.json
@@ -0,0 +1,27 @@
+{
+  "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json",
+  "packages": {
+    ".": {
+      "release-type": "python",
+      "package-name": "hawk-sdk-python",
+      "include-v-in-tag": true,
+      "include-component-in-tag": false,
+      "bump-minor-pre-major": true,
+      "bump-patch-for-minor-pre-major": false,
+      "changelog-sections": [
+        { "type": "feat",     "section": "Features" },
+        { "type": "fix",      "section": "Bug Fixes" },
+        { "type": "perf",     "section": "Performance" },
+        { "type": "refactor", "section": "Refactoring" },
+        { "type": "revert",   "section": "Reverts" },
+        { "type": "docs",     "section": "Documentation", "hidden": false },
+        { "type": "test",     "section": "Tests",         "hidden": false },
+        { "type": "build",    "section": "Build",         "hidden": true },
+        { "type": "ci",       "section": "CI",            "hidden": true },
+        { "type": "chore",    "section": "Chores",        "hidden": true },
+        { "type": "style",    "section": "Style",         "hidden": true }
+      ],
+      "extra-files": [{"type":"version-txt","path":"VERSION"}]
+    }
+  }
+}
diff --git a/src/hawk/__init__.py b/src/hawk/__init__.py
index a374247..136f0ff 100644
--- a/src/hawk/__init__.py
+++ b/src/hawk/__init__.py
@@ -13,9 +13,19 @@
     RateLimitError,
     ServiceUnavailableError,
 )
+from .plan import Plan, PlanNotebook, SubTask, SubTaskState
 from .retry import DEFAULT_RETRY_CONFIG, RetryConfig
 from .streaming import AsyncStreamReader, StreamReader
 from .tools import Tool, chat_with_tools, chat_with_tools_async, tool
+from .toolkit import BackgroundTask, ToolGroup, Toolkit
+from .tracing import (
+    configure_tracing,
+    detect_provider,
+    is_tracing_enabled,
+    trace,
+    trace_chat,
+    trace_tool,
+)
 from .types import (
     ChatRequest,
     ChatResponse,
@@ -31,6 +41,21 @@
     ToolCall,
     Usage,
 )
+from .discovery import (
+    AgentCard,
+    AgentResolver,
+    CompositeResolver,
+    FileResolver,
+    WellKnownResolver,
+)
+from .evaluate import (
+    BenchmarkResults,
+    EvalResult,
+    EvalTask,
+    run_benchmark,
+    run_benchmark_async,
+)
+from .memory_tools import MemoryTools
 from .workflow import AsyncWorkflow, Workflow
 
 __all__ = [
@@ -51,6 +76,15 @@
     "tool",
     "chat_with_tools",
     "chat_with_tools_async",
+    # Plan
+    "PlanNotebook",
+    "Plan",
+    "SubTask",
+    "SubTaskState",
+    # Toolkit
+    "Toolkit",
+    "ToolGroup",
+    "BackgroundTask",
     # Workflow
     "Workflow",
     "AsyncWorkflow",
@@ -71,6 +105,13 @@
     "StreamEventType",
     "ToolCall",
     "Usage",
+    # Tracing
+    "configure_tracing",
+    "detect_provider",
+    "is_tracing_enabled",
+    "trace",
+    "trace_chat",
+    "trace_tool",
     # Errors
     "HawkAPIError",
     "BadRequestError",
@@ -80,4 +121,18 @@
     "RateLimitError",
     "InternalServerError",
     "ServiceUnavailableError",
+    # Evaluate
+    "EvalTask",
+    "EvalResult",
+    "BenchmarkResults",
+    "run_benchmark",
+    "run_benchmark_async",
+    # Discovery
+    "AgentCard",
+    "AgentResolver",
+    "FileResolver",
+    "WellKnownResolver",
+    "CompositeResolver",
+    # Memory
+    "MemoryTools",
 ]
diff --git a/src/hawk/_version.py b/src/hawk/_version.py
index a51ea98..b4dcf7e 100644
--- a/src/hawk/_version.py
+++ b/src/hawk/_version.py
@@ -1,3 +1,28 @@
-"""Version information for hawk-sdk."""
+"""Version information for hawk-sdk.
 
-__version__ = "0.1.0"
+The version is read from the VERSION file at the repo root, which is the
+single source of truth used by release tooling, CI, and package metadata.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+
+def _read_version() -> str:
+    """Read the canonical version from the VERSION file at the repo root.
+
+    Walks up from this file until a VERSION file is found, so the lookup works
+    both during development (running from a clone) and after install (where the
+    VERSION file is shipped as package data).
+    """
+    here = Path(__file__).resolve()
+    for parent in [here.parent, *here.parents]:
+        candidate = parent / "VERSION"
+        if candidate.is_file():
+            return candidate.read_text(encoding="utf-8").strip()
+    # Fallback: should not happen in a properly built/installed package.
+    return "0.0.0+unknown"
+
+
+__version__ = _read_version()
diff --git a/src/hawk/client.py b/src/hawk/client.py
index c13a533..d039f1d 100644
--- a/src/hawk/client.py
+++ b/src/hawk/client.py
@@ -9,6 +9,7 @@
 from .errors import parse_error
 from .retry import DEFAULT_RETRY_CONFIG, RetryConfig, with_retry, with_retry_sync
 from .streaming import AsyncStreamReader, StreamReader
+from ._version import __version__
 from .types import (
     ChatRequest,
     ChatResponse,
@@ -51,7 +52,10 @@ def __init__(
         )
 
     def _build_headers(self) -> dict[str, str]:
-        headers: dict[str, str] = {"Accept": "application/json"}
+        headers: dict[str, str] = {
+            "Accept": "application/json",
+            "User-Agent": f"hawk-sdk-python/{__version__}",
+        }
         if self._api_key:
             headers["Authorization"] = f"Bearer {self._api_key}"
         return headers
@@ -260,7 +264,10 @@ def __init__(
         )
 
     def _build_headers(self) -> dict[str, str]:
-        headers: dict[str, str] = {"Accept": "application/json"}
+        headers: dict[str, str] = {
+            "Accept": "application/json",
+            "User-Agent": f"hawk-sdk-python/{__version__}",
+        }
         if self._api_key:
             headers["Authorization"] = f"Bearer {self._api_key}"
         return headers
diff --git a/src/hawk/discovery.py b/src/hawk/discovery.py
new file mode 100644
index 0000000..346f668
--- /dev/null
+++ b/src/hawk/discovery.py
@@ -0,0 +1,194 @@
+"""Agent-to-Agent discovery protocol for Hawk.
+
+Enables agents to discover and communicate with other agents via
+multiple resolution strategies (HTTP well-known, file-based, registry).
+
+Usage:
+    from hawk.discovery import AgentCard, WellKnownResolver, FileResolver
+
+    resolver = WellKnownResolver()
+    card = await resolver.resolve("assistant-agent")
+    # card.endpoint -> "http://localhost:8080/v1/chat"
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from dataclasses import dataclass, field
+from typing import Any, Optional, Protocol
+
+
+@dataclass
+class AgentCard:
+    """Describes a discoverable agent's capabilities and endpoint."""
+    name: str
+    endpoint: str
+    description: str = ""
+    capabilities: list[str] = field(default_factory=list)
+    version: str = "1.0"
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "name": self.name,
+            "endpoint": self.endpoint,
+            "description": self.description,
+            "capabilities": self.capabilities,
+            "version": self.version,
+            "metadata": self.metadata,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "AgentCard":
+        return cls(
+            name=data["name"],
+            endpoint=data["endpoint"],
+            description=data.get("description", ""),
+            capabilities=data.get("capabilities", []),
+            version=data.get("version", "1.0"),
+            metadata=data.get("metadata", {}),
+        )
+
+
+class AgentResolver(Protocol):
+    """Protocol for agent discovery resolvers."""
+
+    async def resolve(self, agent_name: str) -> Optional[AgentCard]:
+        """Resolve an agent name to its card."""
+        ...
+
+    async def list_agents(self) -> list[AgentCard]:
+        """List all known agents."""
+        ...
+
+    async def register(self, card: AgentCard) -> None:
+        """Register an agent card."""
+        ...
+
+
+class FileResolver:
+    """File-based agent discovery for local development.
+
+    Reads agent cards from a JSON file.
+
+    Usage:
+        resolver = FileResolver("/path/to/agents.json")
+        card = await resolver.resolve("my-agent")
+    """
+
+    def __init__(self, path: str = ".hawk/agents.json") -> None:
+        self._path = path
+        self._cards: dict[str, AgentCard] = {}
+        self._load()
+
+    def _load(self) -> None:
+        if os.path.exists(self._path):
+            with open(self._path) as f:
+                data = json.load(f)
+            for entry in data.get("agents", []):
+                card = AgentCard.from_dict(entry)
+                self._cards[card.name] = card
+
+    def _save(self) -> None:
+        os.makedirs(os.path.dirname(self._path) or ".", exist_ok=True)
+        data = {"agents": [c.to_dict() for c in self._cards.values()]}
+        with open(self._path, "w") as f:
+            json.dump(data, f, indent=2)
+
+    async def resolve(self, agent_name: str) -> Optional[AgentCard]:
+        return self._cards.get(agent_name)
+
+    async def list_agents(self) -> list[AgentCard]:
+        return list(self._cards.values())
+
+    async def register(self, card: AgentCard) -> None:
+        self._cards[card.name] = card
+        self._save()
+
+
+class WellKnownResolver:
+    """HTTP-based agent discovery via well-known URLs.
+
+    Discovers agents by fetching {base_url}/.well-known/agent.json
+
+    Usage:
+        resolver = WellKnownResolver(["http://localhost:8080", "http://agent2:8080"])
+        card = await resolver.resolve("assistant")
+    """
+
+    def __init__(self, base_urls: Optional[list[str]] = None) -> None:
+        self._base_urls = base_urls or []
+        self._cache: dict[str, AgentCard] = {}
+
+    async def resolve(self, agent_name: str) -> Optional[AgentCard]:
+        if agent_name in self._cache:
+            return self._cache[agent_name]
+
+        for url in self._base_urls:
+            card = await self._fetch_card(url)
+            if card and card.name == agent_name:
+                self._cache[agent_name] = card
+                return card
+        return None
+
+    async def list_agents(self) -> list[AgentCard]:
+        cards = []
+        for url in self._base_urls:
+            card = await self._fetch_card(url)
+            if card:
+                cards.append(card)
+        return cards
+
+    async def register(self, card: AgentCard) -> None:
+        self._cache[card.name] = card
+        if card.endpoint not in self._base_urls:
+            self._base_urls.append(card.endpoint)
+
+    async def _fetch_card(self, base_url: str) -> Optional[AgentCard]:
+        try:
+            import httpx
+            url = f"{base_url.rstrip('/')}/.well-known/agent.json"
+            async with httpx.AsyncClient(timeout=5.0) as client:
+                resp = await client.get(url)
+                if resp.status_code == 200:
+                    return AgentCard.from_dict(resp.json())
+        except Exception:
+            pass
+        return None
+
+
+class CompositeResolver:
+    """Chains multiple resolvers, returning the first match.
+
+    Usage:
+        resolver = CompositeResolver([
+            FileResolver(".hawk/agents.json"),
+            WellKnownResolver(["http://localhost:8080"]),
+        ])
+        card = await resolver.resolve("my-agent")
+    """
+
+    def __init__(self, resolvers: list[Any]) -> None:
+        self._resolvers = resolvers
+
+    async def resolve(self, agent_name: str) -> Optional[AgentCard]:
+        for resolver in self._resolvers:
+            card = await resolver.resolve(agent_name)
+            if card:
+                return card
+        return None
+
+    async def list_agents(self) -> list[AgentCard]:
+        seen = set()
+        cards = []
+        for resolver in self._resolvers:
+            for card in await resolver.list_agents():
+                if card.name not in seen:
+                    seen.add(card.name)
+                    cards.append(card)
+        return cards
+
+    async def register(self, card: AgentCard) -> None:
+        if self._resolvers:
+            await self._resolvers[0].register(card)
diff --git a/src/hawk/evaluate.py b/src/hawk/evaluate.py
new file mode 100644
index 0000000..574a48d
--- /dev/null
+++ b/src/hawk/evaluate.py
@@ -0,0 +1,204 @@
+"""Agent evaluation framework for systematic benchmarking.
+
+Defines tasks with metrics, runs agents N times, aggregates results
+with statistics. Inspired by agentscope's evaluation module.
+
+Usage:
+    from hawk.evaluate import Benchmark, EvalTask, run_benchmark
+
+    tasks = [
+        EvalTask(
+            name="weather-lookup",
+            prompt="What's the weather in NYC?",
+            expected_tools=["get_weather"],
+            validate=lambda r: "temperature" in r.response,
+        ),
+    ]
+    results = run_benchmark(agent, tasks, runs=3)
+    print(results.summary())
+"""
+
+from __future__ import annotations
+
+import time
+import statistics
+from dataclasses import dataclass, field
+from typing import Any, Callable, Optional
+
+
+@dataclass
+class EvalTask:
+    """A single evaluation task."""
+    name: str
+    prompt: str
+    category: str = "general"
+    expected_tools: list[str] = field(default_factory=list)
+    validate: Optional[Callable[[Any], bool]] = None
+    max_turns: int = 10
+
+
+@dataclass
+class EvalResult:
+    """Result of a single evaluation run."""
+    task_name: str
+    success: bool
+    duration_ms: float
+    tokens_in: int = 0
+    tokens_out: int = 0
+    turns_taken: int = 0
+    error: Optional[str] = None
+
+
+@dataclass
+class BenchmarkResults:
+    """Aggregated benchmark results."""
+    results: list[EvalResult] = field(default_factory=list)
+
+    @property
+    def total_tasks(self) -> int:
+        return len(self.results)
+
+    @property
+    def passed(self) -> int:
+        return sum(1 for r in self.results if r.success)
+
+    @property
+    def failed(self) -> int:
+        return self.total_tasks - self.passed
+
+    @property
+    def pass_rate(self) -> float:
+        return self.passed / max(self.total_tasks, 1)
+
+    @property
+    def avg_duration_ms(self) -> float:
+        durations = [r.duration_ms for r in self.results]
+        return statistics.mean(durations) if durations else 0.0
+
+    @property
+    def total_tokens(self) -> int:
+        return sum(r.tokens_in + r.tokens_out for r in self.results)
+
+    def by_category(self) -> dict[str, list[EvalResult]]:
+        cats: dict[str, list[EvalResult]] = {}
+        for r in self.results:
+            cat = r.task_name.split("/")[0] if "/" in r.task_name else "general"
+            cats.setdefault(cat, []).append(r)
+        return cats
+
+    def summary(self) -> str:
+        lines = [
+            f"Benchmark Results: {self.passed}/{self.total_tasks} passed ({self.pass_rate:.0%})",
+            f"Avg duration: {self.avg_duration_ms:.0f}ms",
+            f"Total tokens: {self.total_tokens}",
+        ]
+        if self.failed > 0:
+            failures = [r for r in self.results if not r.success]
+            lines.append(f"Failures:")
+            for f in failures[:10]:
+                lines.append(f"  - {f.task_name}: {f.error or 'validation failed'}")
+        return "\n".join(lines)
+
+
+def run_benchmark(
+    agent: Any,
+    tasks: list[EvalTask],
+    *,
+    runs: int = 1,
+    reset_between_tasks: bool = True,
+) -> BenchmarkResults:
+    """Run a benchmark suite against an agent.
+
+    Args:
+        agent: A hawk Agent instance with a .chat() method.
+        tasks: List of evaluation tasks.
+        runs: Number of times to run each task.
+        reset_between_tasks: Whether to reset agent state between tasks.
+
+    Returns:
+        Aggregated benchmark results.
+    """
+    results = BenchmarkResults()
+
+    for task in tasks:
+        for run_idx in range(runs):
+            if reset_between_tasks:
+                agent.reset()
+
+            start = time.perf_counter()
+            try:
+                response = agent.chat(task.prompt)
+                duration = (time.perf_counter() - start) * 1000
+
+                success = True
+                error = None
+                if task.validate:
+                    success = task.validate(response)
+                    if not success:
+                        error = "Validation failed"
+
+                results.results.append(EvalResult(
+                    task_name=f"{task.category}/{task.name}" if task.category != "general" else task.name,
+                    success=success,
+                    duration_ms=duration,
+                    tokens_in=getattr(response, "tokens_in", 0),
+                    tokens_out=getattr(response, "tokens_out", 0),
+                    turns_taken=getattr(response, "turns_taken", 0),
+                ))
+            except Exception as e:
+                duration = (time.perf_counter() - start) * 1000
+                results.results.append(EvalResult(
+                    task_name=f"{task.category}/{task.name}" if task.category != "general" else task.name,
+                    success=False,
+                    duration_ms=duration,
+                    error=str(e),
+                ))
+
+    return results
+
+
+async def run_benchmark_async(
+    agent: Any,
+    tasks: list[EvalTask],
+    *,
+    runs: int = 1,
+    reset_between_tasks: bool = True,
+) -> BenchmarkResults:
+    """Async version of run_benchmark."""
+    results = BenchmarkResults()
+
+    for task in tasks:
+        for run_idx in range(runs):
+            if reset_between_tasks:
+                agent.reset()
+
+            start = time.perf_counter()
+            try:
+                response = await agent.chat(task.prompt)
+                duration = (time.perf_counter() - start) * 1000
+
+                success = True
+                error = None
+                if task.validate:
+                    success = task.validate(response)
+                    if not success:
+                        error = "Validation failed"
+
+                results.results.append(EvalResult(
+                    task_name=f"{task.category}/{task.name}" if task.category != "general" else task.name,
+                    success=success,
+                    duration_ms=duration,
+                    tokens_in=getattr(response, "tokens_in", 0),
+                    tokens_out=getattr(response, "tokens_out", 0),
+                    turns_taken=getattr(response, "turns_taken", 0),
+                ))
+            except Exception as e:
+                duration = (time.perf_counter() - start) * 1000
+                results.results.append(EvalResult(
+                    task_name=f"{task.category}/{task.name}" if task.category != "general" else task.name,
+                    success=False,
+                    duration_ms=duration,
+                    error=str(e),
+                ))
+
+    return results
diff --git a/src/hawk/memory_tools.py b/src/hawk/memory_tools.py
new file mode 100644
index 0000000..97be2b5
--- /dev/null
+++ b/src/hawk/memory_tools.py
@@ -0,0 +1,132 @@
+"""Memory-as-voluntary-tools for agent-driven memory management.
+
+Lets agents strategically decide what to remember/recall rather than
+auto-ingesting everything. Wraps yaad's memory API as tool functions.
+
+Usage:
+    from hawk.memory_tools import MemoryTools
+
+    mem = MemoryTools(client)
+    agent_config.tools.extend(mem.get_tools())
+"""
+
+from __future__ import annotations
+
+from typing import Any, Optional
+
+from .tools import Tool
+
+
+class MemoryTools:
+    """Provides record/retrieve memory operations as agent tools.
+
+    The agent can voluntarily decide to:
+    - Record important information for future reference
+    - Retrieve relevant memories for the current task
+    - Forget outdated or incorrect information
+    """
+
+    def __init__(self, client: Any, *, session_id: Optional[str] = None) -> None:
+        self._client = client
+        self._session_id = session_id
+        self._local_memories: list[dict[str, str]] = []
+
+    def record_memory(self, content: str, category: str = "general", importance: str = "normal") -> str:
+        """Record important information to long-term memory."""
+        memory = {
+            "content": content,
+            "category": category,
+            "importance": importance,
+        }
+        self._local_memories.append(memory)
+
+        # If client supports yaad memory API, persist
+        try:
+            if hasattr(self._client, "remember"):
+                self._client.remember(content, session_id=self._session_id)
+                return f"Recorded to persistent memory: '{content[:100]}...'"
+        except Exception:
+            pass
+
+        return f"Recorded to session memory: '{content[:100]}...'"
+
+    def retrieve_memories(self, query: str, limit: int = 5) -> str:
+        """Retrieve relevant memories for the current context."""
+        results = []
+
+        # Try yaad recall
+        try:
+            if hasattr(self._client, "recall"):
+                recalled = self._client.recall(query, limit=limit, session_id=self._session_id)
+                if recalled:
+                    return f"Recalled {len(recalled)} memories:\n" + "\n".join(
+                        f"- {m}" for m in recalled
+                    )
+        except Exception:
+            pass
+
+        # Fallback to local fuzzy match
+        query_lower = query.lower()
+        for mem in self._local_memories:
+            if query_lower in mem["content"].lower():
+                results.append(mem["content"])
+
+        if results:
+            return f"Found {len(results)} relevant memories:\n" + "\n".join(
+                f"- {r}" for r in results[:limit]
+            )
+        return "No relevant memories found."
+
+    def forget_memory(self, content_fragment: str) -> str:
+        """Remove a memory matching the given fragment."""
+        before = len(self._local_memories)
+        self._local_memories = [
+            m for m in self._local_memories
+            if content_fragment.lower() not in m["content"].lower()
+        ]
+        removed = before - len(self._local_memories)
+        return f"Removed {removed} matching memories." if removed else "No matching memories found."
+
+    def get_tools(self) -> list[Tool]:
+        """Return memory operations as Tool objects."""
+        return [
+            Tool(
+                name="record_memory",
+                description="Record important information to long-term memory for future reference. Use for key findings, decisions, or facts you'll need later.",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "content": {"type": "string", "description": "The information to remember"},
+                        "category": {"type": "string", "description": "Category (general/technical/personal/project)", "default": "general"},
+                        "importance": {"type": "string", "description": "Importance level (low/normal/high)", "default": "normal"},
+                    },
+                    "required": ["content"],
+                },
+                fn=self.record_memory,
+            ),
+            Tool(
+                name="retrieve_memories",
+                description="Search long-term memory for relevant information. Use when you need context from previous interactions.",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "query": {"type": "string", "description": "Search query for relevant memories"},
+                        "limit": {"type": "integer", "description": "Max results to return", "default": 5},
+                    },
+                    "required": ["query"],
+                },
+                fn=self.retrieve_memories,
+            ),
+            Tool(
+                name="forget_memory",
+                description="Remove outdated or incorrect information from memory.",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "content_fragment": {"type": "string", "description": "Text fragment to match for deletion"},
+                    },
+                    "required": ["content_fragment"],
+                },
+                fn=self.forget_memory,
+            ),
+        ]
diff --git a/src/hawk/plan.py b/src/hawk/plan.py
new file mode 100644
index 0000000..b2d78f8
--- /dev/null
+++ b/src/hawk/plan.py
@@ -0,0 +1,384 @@
+"""Plan-as-tools module with contextual hints for autonomous agent steering."""
+
+from __future__ import annotations
+
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Any, Callable, Literal, Optional
+
+from .tools import Tool
+
+
+class SubTaskState(str, Enum):
+    TODO = "todo"
+    IN_PROGRESS = "in_progress"
+    DONE = "done"
+    ABANDONED = "abandoned"
+
+
+@dataclass
+class SubTask:
+    name: str
+    description: str
+    expected_outcome: str
+    state: SubTaskState = SubTaskState.TODO
+    outcome: Optional[str] = None
+    created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
+    finished_at: Optional[str] = None
+
+    def finish(self, outcome: str) -> None:
+        self.state = SubTaskState.DONE
+        self.outcome = outcome
+        self.finished_at = datetime.now(timezone.utc).isoformat()
+
+    def to_markdown(self, detailed: bool = False) -> str:
+        status_map = {
+            SubTaskState.TODO: "- [ ]",
+            SubTaskState.IN_PROGRESS: "- [ ] [WIP]",
+            SubTaskState.DONE: "- [x]",
+            SubTaskState.ABANDONED: "- [ ] [Abandoned]",
+        }
+        if not detailed:
+            return f"{status_map[self.state]} {self.name}"
+        lines = [
+            f"{status_map[self.state]} {self.name}",
+            f"    Description: {self.description}",
+            f"    Expected Outcome: {self.expected_outcome}",
+            f"    State: {self.state.value}",
+        ]
+        if self.state == SubTaskState.DONE:
+            lines.append(f"    Outcome: {self.outcome}")
+            lines.append(f"    Finished: {self.finished_at}")
+        return "\n".join(lines)
+
+
+@dataclass
+class Plan:
+    name: str
+    description: str
+    expected_outcome: str
+    subtasks: list[SubTask] = field(default_factory=list)
+    id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
+    state: SubTaskState = SubTaskState.TODO
+    created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
+    outcome: Optional[str] = None
+    finished_at: Optional[str] = None
+
+    def to_markdown(self) -> str:
+        subtasks_md = "\n".join(st.to_markdown() for st in self.subtasks)
+        return (
+            f"# {self.name}\n"
+            f"**Description**: {self.description}\n"
+            f"**Expected Outcome**: {self.expected_outcome}\n"
+            f"**State**: {self.state.value}\n"
+            f"## Subtasks\n{subtasks_md}"
+        )
+
+    def finish(self, state: Literal["done", "abandoned"], outcome: str) -> None:
+        self.state = SubTaskState.DONE if state == "done" else SubTaskState.ABANDONED
+        self.outcome = outcome
+        self.finished_at = datetime.now(timezone.utc).isoformat()
+
+
+class PlanNotebook:
+    """Manages plans as tool-functions, providing contextual hints to steer the agent.
+
+    The plan notebook exposes plan operations as Tools that can be registered
+    with a Toolkit or Agent. After each agent turn, call get_hint() to get
+    a system-level message that guides the agent on what to do next.
+
+    Usage:
+        notebook = PlanNotebook()
+        # Register plan tools with your agent
+        agent_config.tools.extend(notebook.get_tools())
+        # After each turn, inject hint
+        hint = notebook.get_hint()
+        if hint:
+            # Prepend to next message or inject as system context
+            ...
+    """
+
+    def __init__(self, max_subtasks: int = 20) -> None:
+        self._current_plan: Optional[Plan] = None
+        self._history: list[Plan] = []
+        self._max_subtasks = max_subtasks
+        self._on_change: list[Callable[[Optional[Plan]], None]] = []
+
+    @property
+    def current_plan(self) -> Optional[Plan]:
+        return self._current_plan
+
+    def on_plan_change(self, callback: Callable[[Optional[Plan]], None]) -> None:
+        self._on_change.append(callback)
+
+    def _notify(self) -> None:
+        for cb in self._on_change:
+            cb(self._current_plan)
+
+    def create_plan(
+        self, name: str, description: str, expected_outcome: str, subtasks: list[dict[str, str]]
+    ) -> str:
+        """Create a new plan with subtasks. Replaces current plan if one exists."""
+        tasks = [
+            SubTask(
+                name=st["name"],
+                description=st["description"],
+                expected_outcome=st["expected_outcome"],
+            )
+            for st in subtasks[:self._max_subtasks]
+        ]
+        if self._current_plan and self._current_plan.state not in (SubTaskState.DONE, SubTaskState.ABANDONED):
+            self._current_plan.finish("abandoned", "Replaced by new plan")
+            self._history.append(self._current_plan)
+
+        self._current_plan = Plan(
+            name=name, description=description,
+            expected_outcome=expected_outcome, subtasks=tasks,
+        )
+        self._notify()
+        return f"Plan '{name}' created with {len(tasks)} subtasks."
+
+    def update_subtask_state(self, subtask_idx: int, state: str) -> str:
+        """Update a subtask's state to todo, in_progress, or abandoned."""
+        if not self._current_plan:
+            return "Error: No active plan. Create one first."
+        if not 0 <= subtask_idx < len(self._current_plan.subtasks):
+            return f"Error: Invalid index {subtask_idx}."
+        if state not in ("todo", "in_progress", "abandoned"):
+            return f"Error: Invalid state '{state}'."
+
+        if state == "in_progress":
+            for i, st in enumerate(self._current_plan.subtasks):
+                if st.state == SubTaskState.IN_PROGRESS:
+                    return f"Error: Subtask {i} ('{st.name}') already in progress. Finish it first."
+                if i < subtask_idx and st.state not in (SubTaskState.DONE, SubTaskState.ABANDONED):
+                    return f"Error: Previous subtask {i} ('{st.name}') not done yet."
+
+        self._current_plan.subtasks[subtask_idx].state = SubTaskState(state)
+        if self._current_plan.state == SubTaskState.TODO:
+            self._current_plan.state = SubTaskState.IN_PROGRESS
+        self._notify()
+        return f"Subtask {subtask_idx} marked as '{state}'."
+
+    def finish_subtask(self, subtask_idx: int, outcome: str) -> str:
+        """Mark a subtask as done with its specific outcome."""
+        if not self._current_plan:
+            return "Error: No active plan."
+        if not 0 <= subtask_idx < len(self._current_plan.subtasks):
+            return f"Error: Invalid index {subtask_idx}."
+
+        self._current_plan.subtasks[subtask_idx].finish(outcome)
+
+        # Auto-activate next subtask
+        if subtask_idx + 1 < len(self._current_plan.subtasks):
+            self._current_plan.subtasks[subtask_idx + 1].state = SubTaskState.IN_PROGRESS
+            next_name = self._current_plan.subtasks[subtask_idx + 1].name
+            self._notify()
+            return f"Subtask {subtask_idx} done. Next: '{next_name}' (now in_progress)."
+
+        self._notify()
+        return f"Subtask {subtask_idx} done. All subtasks complete — call finish_plan."
+
+    def revise_plan(self, subtask_idx: int, action: str, subtask: Optional[dict[str, str]] = None) -> str:
+        """Revise plan: add/revise/delete a subtask."""
+        if not self._current_plan:
+            return "Error: No active plan."
+
+        if action == "delete":
+            if 0 <= subtask_idx < len(self._current_plan.subtasks):
+                removed = self._current_plan.subtasks.pop(subtask_idx)
+                self._notify()
+                return f"Deleted subtask '{removed.name}'."
+            return f"Error: Invalid index {subtask_idx}."
+
+        if not subtask:
+            return "Error: subtask required for add/revise."
+
+        new_st = SubTask(
+            name=subtask["name"],
+            description=subtask["description"],
+            expected_outcome=subtask["expected_outcome"],
+        )
+
+        if action == "add":
+            self._current_plan.subtasks.insert(subtask_idx, new_st)
+            self._notify()
+            return f"Added subtask '{new_st.name}' at index {subtask_idx}."
+        elif action == "revise":
+            if 0 <= subtask_idx < len(self._current_plan.subtasks):
+                self._current_plan.subtasks[subtask_idx] = new_st
+                self._notify()
+                return f"Revised subtask at index {subtask_idx}."
+
+        return f"Error: Invalid action '{action}'."
+
+    def finish_plan(self, state: str, outcome: str) -> str:
+        """Finish or abandon the current plan."""
+        if not self._current_plan:
+            return "Error: No active plan."
+        self._current_plan.finish(state, outcome)
+        self._history.append(self._current_plan)
+        self._current_plan = None
+        self._notify()
+        return f"Plan finished as '{state}'."
+
+    def view_history(self) -> str:
+        """View historical plans."""
+        if not self._history:
+            return "No historical plans."
+        lines = []
+        for p in self._history:
+            lines.append(f"- {p.name} (id={p.id}, state={p.state.value})")
+        return "\n".join(lines)
+
+    def get_hint(self) -> Optional[str]:
+        """Generate a contextual hint based on current plan state.
+
+        Returns a string to inject as system context, or None if no guidance needed.
+        """
+        if self._current_plan is None:
+            return (
+                "<system-hint>If the task is complex or multi-step, create a plan "
+                "by calling 'create_plan'. Otherwise proceed directly.</system-hint>"
+            )
+
+        plan = self._current_plan
+        n_done = sum(1 for st in plan.subtasks if st.state == SubTaskState.DONE)
+        n_abandoned = sum(1 for st in plan.subtasks if st.state == SubTaskState.ABANDONED)
+        in_progress = None
+        in_progress_idx = None
+
+        for i, st in enumerate(plan.subtasks):
+            if st.state == SubTaskState.IN_PROGRESS:
+                in_progress = st
+                in_progress_idx = i
+                break
+
+        plan_md = plan.to_markdown()
+
+        if n_done == 0 and in_progress is None:
+            return (
+                f"<system-hint>Current plan:\n```\n{plan_md}\n```\n"
+                "Start by marking the first subtask as 'in_progress' via "
+                "update_subtask_state(0, 'in_progress'), then execute it.</system-hint>"
+            )
+
+        if in_progress and in_progress_idx is not None:
+            return (
+                f"<system-hint>Current plan:\n```\n{plan_md}\n```\n"
+                f"Subtask {in_progress_idx} ('{in_progress.name}') is in progress.\n"
+                f"Details: {in_progress.description}\n"
+                f"Expected outcome: {in_progress.expected_outcome}\n"
+                "Execute it, then call finish_subtask with the specific outcome.</system-hint>"
+            )
+
+        if n_done + n_abandoned == len(plan.subtasks):
+            return (
+                f"<system-hint>Current plan:\n```\n{plan_md}\n```\n"
+                "All subtasks complete. Call finish_plan('done', outcome) to wrap up.</system-hint>"
+            )
+
+        # Some done but nothing in progress
+        return (
+            f"<system-hint>Current plan:\n```\n{plan_md}\n```\n"
+            f"{n_done} subtasks done. Mark the next as 'in_progress' and execute it.</system-hint>"
+        )
+
+    def get_tools(self) -> list[Tool]:
+        """Return plan operations as Tool objects for agent registration."""
+        return [
+            Tool(
+                name="create_plan",
+                description="Create a plan with sequential subtasks for complex tasks",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string", "description": "Plan name (concise, <10 words)"},
+                        "description": {"type": "string", "description": "Plan description with constraints and goals"},
+                        "expected_outcome": {"type": "string", "description": "Specific measurable outcome"},
+                        "subtasks": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "name": {"type": "string"},
+                                    "description": {"type": "string"},
+                                    "expected_outcome": {"type": "string"},
+                                },
+                                "required": ["name", "description", "expected_outcome"],
+                            },
+                        },
+                    },
+                    "required": ["name", "description", "expected_outcome", "subtasks"],
+                },
+                fn=self.create_plan,
+            ),
+            Tool(
+                name="update_subtask_state",
+                description="Update a subtask's state (todo/in_progress/abandoned)",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "subtask_idx": {"type": "integer", "description": "Subtask index (0-based)"},
+                        "state": {"type": "string", "enum": ["todo", "in_progress", "abandoned"]},
+                    },
+                    "required": ["subtask_idx", "state"],
+                },
+                fn=self.update_subtask_state,
+            ),
+            Tool(
+                name="finish_subtask",
+                description="Mark subtask as done with specific outcome (not 'I did X' but the actual result)",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "subtask_idx": {"type": "integer", "description": "Subtask index (0-based)"},
+                        "outcome": {"type": "string", "description": "Specific outcome data/result"},
+                    },
+                    "required": ["subtask_idx", "outcome"],
+                },
+                fn=self.finish_subtask,
+            ),
+            Tool(
+                name="revise_plan",
+                description="Revise current plan: add/revise/delete a subtask",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "subtask_idx": {"type": "integer"},
+                        "action": {"type": "string", "enum": ["add", "revise", "delete"]},
+                        "subtask": {
+                            "type": "object",
+                            "properties": {
+                                "name": {"type": "string"},
+                                "description": {"type": "string"},
+                                "expected_outcome": {"type": "string"},
+                            },
+                        },
+                    },
+                    "required": ["subtask_idx", "action"],
+                },
+                fn=self.revise_plan,
+            ),
+            Tool(
+                name="finish_plan",
+                description="Finish or abandon the current plan with outcome/reason",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "state": {"type": "string", "enum": ["done", "abandoned"]},
+                        "outcome": {"type": "string", "description": "Outcome or reason"},
+                    },
+                    "required": ["state", "outcome"],
+                },
+                fn=self.finish_plan,
+            ),
+            Tool(
+                name="view_plan_history",
+                description="View historical completed/abandoned plans",
+                parameters={"type": "object", "properties": {}},
+                fn=self.view_history,
+            ),
+        ]
diff --git a/src/hawk/toolkit.py b/src/hawk/toolkit.py
new file mode 100644
index 0000000..019f76c
--- /dev/null
+++ b/src/hawk/toolkit.py
@@ -0,0 +1,315 @@
+"""Advanced toolkit with groups, middleware, and async execution for Hawk SDK."""
+
+from __future__ import annotations
+
+import asyncio
+import inspect
+import json
+import uuid
+from dataclasses import dataclass, field
+from typing import Any, AsyncGenerator, Awaitable, Callable, Literal, Optional
+
+from .tools import Tool
+
+
+@dataclass
+class ToolGroup:
+    name: str
+    description: str
+    active: bool = False
+    notes: Optional[str] = None
+
+
+@dataclass
+class BackgroundTask:
+    id: str
+    tool_name: str
+    task: asyncio.Task
+    result: Optional[str] = None
+    done: bool = False
+    cancelled: bool = False
+
+
+MiddlewareFunc = Callable[[dict[str, Any], Callable[..., Any]], Any]
+PostprocessFunc = Callable[[str, str], str]  # (tool_name, result) -> modified_result
+
+
+class Toolkit:
+    """Advanced toolkit with tool groups, middleware chain, and async execution.
+
+    Features:
+        - Tool groups: organize tools and activate/deactivate groups dynamically
+        - Middleware: onion-model pre/post processing of tool calls
+        - Background execution: long-running tools get a task_id for polling
+        - Postprocess: per-tool output transformation
+
+    Usage:
+        toolkit = Toolkit()
+        toolkit.create_group("web", "Web browsing tools")
+        toolkit.register(my_tool, group="web")
+        toolkit.activate_groups(["web"])
+
+        # Add middleware
+        toolkit.register_middleware(logging_middleware)
+
+        # Get active tools for sending to model
+        active_tools = toolkit.get_active_tools()
+    """
+
+    def __init__(self) -> None:
+        self._tools: dict[str, Tool] = {}
+        self._tool_groups: dict[str, str] = {}  # tool_name -> group_name
+        self._groups: dict[str, ToolGroup] = {
+            "basic": ToolGroup(name="basic", description="Default tool group", active=True)
+        }
+        self._middlewares: list[MiddlewareFunc] = []
+        self._postprocess: dict[str, PostprocessFunc] = {}  # tool_name -> postprocess fn
+        self._background_tasks: dict[str, BackgroundTask] = {}
+        self._async_execution: set[str] = set()  # tool names with async execution enabled
+
+    def create_group(
+        self, name: str, description: str, *, active: bool = False, notes: Optional[str] = None
+    ) -> None:
+        if name in self._groups:
+            raise ValueError(f"Group '{name}' already exists")
+        self._groups[name] = ToolGroup(name=name, description=description, active=active, notes=notes)
+
+    def activate_groups(self, names: list[str]) -> None:
+        for name in names:
+            if name in self._groups:
+                self._groups[name].active = True
+
+    def deactivate_groups(self, names: list[str]) -> None:
+        for name in names:
+            if name in self._groups and name != "basic":
+                self._groups[name].active = False
+
+    def reset_groups(self, active_groups: list[str]) -> str:
+        """Set the absolute state of all groups. Groups not listed are deactivated."""
+        for name, group in self._groups.items():
+            if name == "basic":
+                continue
+            group.active = name in active_groups
+
+        activated = [n for n in active_groups if n in self._groups]
+        notes_parts = []
+        for name in activated:
+            g = self._groups[name]
+            if g.notes:
+                notes_parts.append(f"## {name}\n{g.notes}")
+
+        if not activated:
+            return "All tool groups deactivated."
+        msg = f"Activated: {', '.join(activated)}"
+        if notes_parts:
+            msg += "\n" + "\n".join(notes_parts)
+        return msg
+
+    def register(
+        self,
+        tool: Tool,
+        *,
+        group: str = "basic",
+        postprocess: Optional[PostprocessFunc] = None,
+        async_execution: bool = False,
+        on_conflict: Literal["raise", "override", "skip"] = "raise",
+    ) -> None:
+        if group not in self._groups:
+            raise ValueError(f"Group '{group}' does not exist. Create it first.")
+
+        if tool.name in self._tools:
+            if on_conflict == "raise":
+                raise ValueError(f"Tool '{tool.name}' already registered")
+            elif on_conflict == "skip":
+                return
+            # override falls through
+
+        self._tools[tool.name] = tool
+        self._tool_groups[tool.name] = group
+        if postprocess:
+            self._postprocess[tool.name] = postprocess
+        if async_execution:
+            self._async_execution.add(tool.name)
+
+    def unregister(self, tool_name: str) -> None:
+        self._tools.pop(tool_name, None)
+        self._tool_groups.pop(tool_name, None)
+        self._postprocess.pop(tool_name, None)
+        self._async_execution.discard(tool_name)
+
+    def register_middleware(self, middleware: MiddlewareFunc) -> None:
+        self._middlewares.append(middleware)
+
+    def get_active_tools(self) -> list[Tool]:
+        """Return tools belonging to active groups."""
+        return [
+            t
+            for name, t in self._tools.items()
+            if self._tool_groups.get(name, "basic") == "basic"
+            or self._groups.get(
+                self._tool_groups.get(name, "basic"),
+                ToolGroup(name="", description=""),
+            ).active
+        ]
+
+    def get_tool_schemas(self) -> list[dict[str, Any]]:
+        """Return JSON schemas for active tools."""
+        return [t.to_dict() for t in self.get_active_tools()]
+
+    def execute(self, tool_name: str, arguments: dict[str, Any]) -> str:
+        """Execute a tool with middleware chain and postprocessing."""
+        if tool_name not in self._tools:
+            return json.dumps({"error": f"Tool '{tool_name}' not found"})
+
+        tool = self._tools[tool_name]
+        group_name = self._tool_groups.get(tool_name, "basic")
+        group = self._groups.get(group_name)
+
+        if group and not group.active and group_name != "basic":
+            return json.dumps({"error": f"Tool '{tool_name}' is in inactive group '{group_name}'"})
+
+        # Build context for middleware
+        context = {"tool_name": tool_name, "arguments": arguments, "tool": tool}
+
+        # Apply middleware chain
+        def base_handler(ctx: dict[str, Any]) -> str:
+            t = ctx["tool"]
+            args = ctx["arguments"]
+            if t.fn is None:
+                return json.dumps({"error": f"Tool '{t.name}' has no implementation"})
+            try:
+                result = t.fn(**args)
+                if inspect.isawaitable(result):
+                    raise TypeError(f"Tool '{t.name}' is async. Use execute_async.")
+                return result if isinstance(result, str) else json.dumps(result)
+            except Exception as e:
+                return json.dumps({"error": str(e)})
+
+        handler = base_handler
+        for mw in reversed(self._middlewares):
+            prev_handler = handler
+            handler = lambda ctx, _mw=mw, _prev=prev_handler: _mw(ctx, _prev)
+
+        result = handler(context)
+
+        # Apply postprocess
+        if tool_name in self._postprocess:
+            result = self._postprocess[tool_name](tool_name, result)
+
+        return result
+
+    async def execute_async(self, tool_name: str, arguments: dict[str, Any]) -> str:
+        """Execute a tool asynchronously with middleware and postprocessing."""
+        if tool_name not in self._tools:
+            return json.dumps({"error": f"Tool '{tool_name}' not found"})
+
+        tool = self._tools[tool_name]
+        group_name = self._tool_groups.get(tool_name, "basic")
+        group = self._groups.get(group_name)
+
+        if group and not group.active and group_name != "basic":
+            return json.dumps({"error": f"Tool '{tool_name}' is in inactive group '{group_name}'"})
+
+        # Check if background execution
+        if tool_name in self._async_execution:
+            task_id = str(uuid.uuid4())[:8]
+            task = asyncio.create_task(self._run_background(task_id, tool, arguments))
+            self._background_tasks[task_id] = BackgroundTask(
+                id=task_id, tool_name=tool_name, task=task
+            )
+            return json.dumps({
+                "status": "running",
+                "task_id": task_id,
+                "message": f"Tool '{tool_name}' executing in background. Use view_task('{task_id}') to check status.",
+            })
+
+        context = {"tool_name": tool_name, "arguments": arguments, "tool": tool}
+
+        async def base_handler(ctx: dict[str, Any]) -> str:
+            t = ctx["tool"]
+            args = ctx["arguments"]
+            if t.fn is None:
+                return json.dumps({"error": f"Tool '{t.name}' has no implementation"})
+            try:
+                result = t.fn(**args)
+                if inspect.isawaitable(result):
+                    result = await result
+                return result if isinstance(result, str) else json.dumps(result)
+            except Exception as e:
+                return json.dumps({"error": str(e)})
+
+        # For async, just run directly (middleware can be extended later for async)
+        result = await base_handler(context)
+
+        if tool_name in self._postprocess:
+            result = self._postprocess[tool_name](tool_name, result)
+
+        return result
+
+    async def _run_background(
+        self, task_id: str, tool: Tool, arguments: dict[str, Any]
+    ) -> None:
+        try:
+            if tool.fn is None:
+                result = json.dumps({"error": f"Tool '{tool.name}' has no implementation"})
+            else:
+                res = tool.fn(**arguments)
+                if inspect.isawaitable(res):
+                    res = await res
+                result = res if isinstance(res, str) else json.dumps(res)
+
+            if tool.name in self._postprocess:
+                result = self._postprocess[tool.name](tool.name, result)
+
+            bt = self._background_tasks[task_id]
+            bt.result = result
+            bt.done = True
+        except asyncio.CancelledError:
+            bt = self._background_tasks[task_id]
+            bt.cancelled = True
+            bt.done = True
+        except Exception as e:
+            bt = self._background_tasks[task_id]
+            bt.result = json.dumps({"error": str(e)})
+            bt.done = True
+
+    def view_task(self, task_id: str) -> str:
+        if task_id not in self._background_tasks:
+            return json.dumps({"error": f"Task '{task_id}' not found"})
+        bt = self._background_tasks[task_id]
+        if bt.cancelled:
+            return json.dumps({"status": "cancelled", "task_id": task_id})
+        if bt.done:
+            result = bt.result
+            del self._background_tasks[task_id]
+            return result or json.dumps({"status": "done", "result": None})
+        return json.dumps({"status": "running", "task_id": task_id, "tool": bt.tool_name})
+
+    async def wait_task(self, task_id: str, timeout: float = 30.0) -> str:
+        if task_id not in self._background_tasks:
+            return json.dumps({"error": f"Task '{task_id}' not found"})
+        bt = self._background_tasks[task_id]
+        if bt.done:
+            return self.view_task(task_id)
+        try:
+            await asyncio.wait_for(asyncio.shield(bt.task), timeout=timeout)
+        except asyncio.TimeoutError:
+            return json.dumps({"status": "running", "message": f"Still running after {timeout}s"})
+        return self.view_task(task_id)
+
+    def cancel_task(self, task_id: str) -> str:
+        if task_id not in self._background_tasks:
+            return json.dumps({"error": f"Task '{task_id}' not found"})
+        bt = self._background_tasks[task_id]
+        if bt.done:
+            return json.dumps({"error": "Task already completed"})
+        bt.task.cancel()
+        return json.dumps({"status": "cancelled", "task_id": task_id})
+
+    def state_dict(self) -> dict[str, Any]:
+        return {"active_groups": [n for n, g in self._groups.items() if g.active]}
+
+    def load_state_dict(self, state: dict[str, Any]) -> None:
+        active = state.get("active_groups", [])
+        for name, group in self._groups.items():
+            group.active = name in active
diff --git a/src/hawk/tracing.py b/src/hawk/tracing.py
new file mode 100644
index 0000000..ecffa49
--- /dev/null
+++ b/src/hawk/tracing.py
@@ -0,0 +1,307 @@
+"""OpenTelemetry tracing decorators for Hawk SDK.
+
+Zero-cost when disabled. Provides automatic instrumentation of agent chat,
+tool execution, and client calls with OTel GenAI semantic conventions.
+
+Usage:
+    from hawk.tracing import configure_tracing, trace_chat, trace_tool
+
+    # Enable tracing (call once at startup)
+    configure_tracing(endpoint="http://localhost:4318")
+
+    # Decorators are already applied to SDK internals, or use on custom code:
+    @trace_tool
+    def my_tool(query: str) -> str:
+        ...
+"""
+
+from __future__ import annotations
+
+import functools
+import inspect
+from typing import Any, AsyncGenerator, Callable, Generator, Optional, TypeVar
+
+T = TypeVar("T")
+
+_tracing_enabled: bool = False
+_tracer: Any = None
+
+# Provider detection from base URL fragments
+_PROVIDER_PATTERNS: list[tuple[str, str]] = [
+    ("api.anthropic.com", "anthropic"),
+    ("api.openai.com", "openai"),
+    ("generativelanguage.googleapis.com", "google"),
+    ("api.cohere.com", "cohere"),
+    ("api.mistral.ai", "mistral"),
+    ("api.groq.com", "groq"),
+    ("localhost", "local"),
+    ("127.0.0.1", "local"),
+]
+
+
+def configure_tracing(
+    *,
+    endpoint: Optional[str] = None,
+    service_name: str = "hawk-sdk",
+    enabled: bool = True,
+) -> None:
+    """Configure OpenTelemetry tracing for the Hawk SDK.
+
+    Args:
+        endpoint: OTLP endpoint (e.g. "http://localhost:4318").
+                  If None, uses OTEL_EXPORTER_OTLP_ENDPOINT env var.
+        service_name: Service name for traces.
+        enabled: Whether tracing is enabled.
+    """
+    global _tracing_enabled, _tracer
+
+    if not enabled:
+        _tracing_enabled = False
+        return
+
+    try:
+        from opentelemetry import trace
+        from opentelemetry.sdk.trace import TracerProvider
+        from opentelemetry.sdk.trace.export import BatchSpanProcessor
+        from opentelemetry.sdk.resources import Resource
+
+        resource = Resource.create({"service.name": service_name})
+        provider = TracerProvider(resource=resource)
+
+        if endpoint:
+            from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+            exporter = OTLPSpanExporter(endpoint=endpoint)
+            provider.add_span_processor(BatchSpanProcessor(exporter))
+
+        trace.set_tracer_provider(provider)
+        _tracer = trace.get_tracer("hawk-sdk")
+        _tracing_enabled = True
+    except ImportError:
+        _tracing_enabled = False
+
+
+def is_tracing_enabled() -> bool:
+    """Check if tracing is currently enabled."""
+    return _tracing_enabled
+
+
+def detect_provider(base_url: str) -> str:
+    """Detect LLM provider from base URL."""
+    for pattern, provider in _PROVIDER_PATTERNS:
+        if pattern in base_url:
+            return provider
+    return "unknown"
+
+
+def trace_chat(func: Callable[..., T]) -> Callable[..., T]:
+    """Decorator to trace agent chat calls with OTel spans.
+
+    Captures: operation name, model, input message, output response,
+    token usage, duration.
+    """
+    if inspect.iscoroutinefunction(func):
+        @functools.wraps(func)
+        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
+            if not _tracing_enabled or _tracer is None:
+                return await func(*args, **kwargs)
+
+            span_name = f"chat {func.__qualname__}"
+            attributes = {
+                "gen_ai.operation.name": "chat",
+                "gen_ai.system": "hawk",
+            }
+
+            # Extract message from args if available
+            if args and len(args) > 1 and isinstance(args[1], str):
+                attributes["gen_ai.request.message"] = args[1][:500]
+            elif "message" in kwargs:
+                attributes["gen_ai.request.message"] = str(kwargs["message"])[:500]
+
+            with _tracer.start_as_current_span(span_name, attributes=attributes, end_on_exit=False) as span:
+                try:
+                    result = await func(*args, **kwargs)
+                    if hasattr(result, "response"):
+                        span.set_attribute("gen_ai.response.text", str(result.response)[:500])
+                    if hasattr(result, "tokens_in"):
+                        span.set_attribute("gen_ai.usage.input_tokens", result.tokens_in)
+                    if hasattr(result, "tokens_out"):
+                        span.set_attribute("gen_ai.usage.output_tokens", result.tokens_out)
+                    span.set_status(_ok_status())
+                    span.end()
+                    return result
+                except Exception as e:
+                    _record_error(span, e)
+                    raise
+        return async_wrapper  # type: ignore
+
+    @functools.wraps(func)
+    def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
+        if not _tracing_enabled or _tracer is None:
+            return func(*args, **kwargs)
+
+        span_name = f"chat {func.__qualname__}"
+        attributes = {
+            "gen_ai.operation.name": "chat",
+            "gen_ai.system": "hawk",
+        }
+
+        if args and len(args) > 1 and isinstance(args[1], str):
+            attributes["gen_ai.request.message"] = args[1][:500]
+        elif "message" in kwargs:
+            attributes["gen_ai.request.message"] = str(kwargs["message"])[:500]
+
+        with _tracer.start_as_current_span(span_name, attributes=attributes, end_on_exit=False) as span:
+            try:
+                result = func(*args, **kwargs)
+                if hasattr(result, "response"):
+                    span.set_attribute("gen_ai.response.text", str(result.response)[:500])
+                if hasattr(result, "tokens_in"):
+                    span.set_attribute("gen_ai.usage.input_tokens", result.tokens_in)
+                if hasattr(result, "tokens_out"):
+                    span.set_attribute("gen_ai.usage.output_tokens", result.tokens_out)
+                span.set_status(_ok_status())
+                span.end()
+                return result
+            except Exception as e:
+                _record_error(span, e)
+                raise
+    return sync_wrapper  # type: ignore
+
+
+def trace_tool(func: Callable[..., T]) -> Callable[..., T]:
+    """Decorator to trace tool execution with OTel spans.
+
+    Captures: tool name, arguments, result, duration, errors.
+    """
+    if inspect.iscoroutinefunction(func):
+        @functools.wraps(func)
+        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
+            if not _tracing_enabled or _tracer is None:
+                return await func(*args, **kwargs)
+
+            tool_name = func.__name__
+            span_name = f"tool {tool_name}"
+            attributes = {
+                "gen_ai.operation.name": "execute_tool",
+                "gen_ai.tool.name": tool_name,
+            }
+
+            with _tracer.start_as_current_span(span_name, attributes=attributes, end_on_exit=False) as span:
+                try:
+                    result = await func(*args, **kwargs)
+                    if isinstance(result, str):
+                        span.set_attribute("gen_ai.tool.result", result[:1000])
+                    span.set_status(_ok_status())
+                    span.end()
+                    return result
+                except Exception as e:
+                    _record_error(span, e)
+                    raise
+        return async_wrapper  # type: ignore
+
+    @functools.wraps(func)
+    def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
+        if not _tracing_enabled or _tracer is None:
+            return func(*args, **kwargs)
+
+        tool_name = func.__name__
+        span_name = f"tool {tool_name}"
+        attributes = {
+            "gen_ai.operation.name": "execute_tool",
+            "gen_ai.tool.name": tool_name,
+        }
+
+        with _tracer.start_as_current_span(span_name, attributes=attributes, end_on_exit=False) as span:
+            try:
+                result = func(*args, **kwargs)
+                if isinstance(result, str):
+                    span.set_attribute("gen_ai.tool.result", result[:1000])
+                span.set_status(_ok_status())
+                span.end()
+                return result
+            except Exception as e:
+                _record_error(span, e)
+                raise
+    return sync_wrapper  # type: ignore
+
+
+def trace(name: Optional[str] = None) -> Callable:
+    """Generic tracing decorator for any function.
+
+    Args:
+        name: Custom span name. Defaults to function name.
+    """
+    def decorator(func: Callable[..., T]) -> Callable[..., T]:
+        if inspect.iscoroutinefunction(func):
+            @functools.wraps(func)
+            async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
+                if not _tracing_enabled or _tracer is None:
+                    return await func(*args, **kwargs)
+
+                span_name = name or func.__qualname__
+                with _tracer.start_as_current_span(span_name, end_on_exit=False) as span:
+                    try:
+                        result = await func(*args, **kwargs)
+                        span.set_status(_ok_status())
+                        span.end()
+                        return result
+                    except Exception as e:
+                        _record_error(span, e)
+                        raise
+            return async_wrapper  # type: ignore
+
+        @functools.wraps(func)
+        def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
+            if not _tracing_enabled or _tracer is None:
+                return func(*args, **kwargs)
+
+            span_name = name or func.__qualname__
+            with _tracer.start_as_current_span(span_name, end_on_exit=False) as span:
+                try:
+                    result = func(*args, **kwargs)
+                    if isinstance(result, Generator):
+                        return _trace_generator(result, span)
+                    span.set_status(_ok_status())
+                    span.end()
+                    return result
+                except Exception as e:
+                    _record_error(span, e)
+                    raise
+        return sync_wrapper  # type: ignore
+    return decorator
+
+
+def _trace_generator(gen: Generator, span: Any) -> Generator:
+    """Wrap a generator to trace its lifecycle."""
+    try:
+        for item in gen:
+            yield item
+        span.set_status(_ok_status())
+        span.end()
+    except Exception as e:
+        _record_error(span, e)
+        raise
+
+
+def _ok_status() -> Any:
+    """Get OK status code."""
+    try:
+        from opentelemetry.trace import StatusCode
+        return StatusCode.OK
+    except ImportError:
+        return None
+
+
+def _record_error(span: Any, error: Exception) -> None:
+    """Record an error on a span."""
+    try:
+        from opentelemetry.trace import StatusCode
+        span.set_status(StatusCode.ERROR, str(error))
+        span.record_exception(error)
+    except (ImportError, AttributeError):
+        pass
+    finally:
+        try:
+            span.end()
+        except Exception:
+            pass