diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..39f1a41 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,67 @@ +# EditorConfig — https://editorconfig.org +# Canonical eco-wide template (.shared-templates/editorconfig.tmpl). + +root = true + +# Default for everything. +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_style = space +indent_size = 4 + +# Go uses tabs by convention. +[*.go] +indent_style = tab +indent_size = 4 + +# Python — PEP 8. +[*.py] +indent_size = 4 + +# TypeScript / JavaScript — 2 spaces, ecosystem default. +[*.{ts,tsx,js,jsx,mjs,cjs}] +indent_size = 2 + +# Web assets. +[*.{html,css,scss}] +indent_size = 2 + +# YAML — 2 spaces (ecosystem standard, GitHub Actions, k8s, etc.). +[*.{yml,yaml}] +indent_size = 2 + +# JSON / JSONC. +[*.{json,jsonc}] +indent_size = 2 + +# TOML. +[*.toml] +indent_size = 2 + +# Markdown — 2 spaces, preserve trailing whitespace (used for line breaks). +[*.md] +trim_trailing_whitespace = false +indent_size = 2 + +# Shell scripts. +[*.{sh,bash,zsh,fish}] +indent_size = 4 + +# Makefiles must use tabs. +[{Makefile,*.mk}] +indent_style = tab + +# Dockerfiles. +[Dockerfile*] +indent_size = 4 + +# GitHub Actions workflows — 2 spaces. +[.github/**/*.{yml,yaml}] +indent_size = 2 + +# Config files. +[*.{cfg,ini,conf}] +indent_size = 4 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..3342e8f --- /dev/null +++ b/.gitattributes @@ -0,0 +1,86 @@ +# Canonical eco-wide .gitattributes template (.shared-templates/gitattributes.tmpl). +# Auto-detect text files and normalise line endings to LF. + +* text=auto eol=lf + +# --- Source code ----------------------------------------------------------- +*.go text eol=lf diff=golang +*.py text eol=lf diff=python +*.ts text eol=lf +*.tsx text eol=lf +*.js text eol=lf +*.jsx text eol=lf +*.mjs text eol=lf +*.cjs text eol=lf +*.rs text eol=lf diff=rust + +# --- Shell + config -------------------------------------------------------- +*.sh text eol=lf +*.bash text eol=lf +*.toml text eol=lf +*.yaml text eol=lf +*.yml text eol=lf +*.json text eol=lf linguist-language=JSON +*.jsonc text eol=lf linguist-language=JSON +*.cff text eol=lf + +# --- Documentation --------------------------------------------------------- +*.md text eol=lf diff=markdown +*.txt text eol=lf + +# --- Build / packaging ---------------------------------------------------- +Makefile text eol=lf +*.mk text eol=lf +Dockerfile* text eol=lf +docker-compose*.yml text eol=lf +.github/**/*.yml text eol=lf +.github/**/*.yaml text eol=lf + +# --- Generated artefacts (mark as such for diffs and language stats) ------ +go.mod text eol=lf linguist-generated +go.sum text eol=lf linguist-generated +*.pb.go linguist-generated +*_generated.go linguist-generated +package-lock.json linguist-generated +pnpm-lock.yaml linguist-generated +yarn.lock linguist-generated + +# --- Vendored / external sources ------------------------------------------ +vendor/** linguist-vendored +node_modules/** linguist-vendored +testdata/** linguist-vendored +benchmarks/data/** linguist-vendored + +# --- Binary files (do not text-normalise) --------------------------------- +*.exe binary +*.dll binary +*.so binary +*.dylib binary +*.a binary +*.o binary +*.db binary +*.sqlite binary +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.ico binary +*.svg text eol=lf +*.pdf binary +*.zip binary +*.tar.gz binary +*.tgz binary +*.whl binary + +# --- Source archive hygiene (excluded from `git archive`) ----------------- +.github export-ignore +.shared-templates export-ignore +.gitattributes export-ignore +.gitignore export-ignore +.editorconfig export-ignore +.golangci.yml export-ignore +.goreleaser.yml export-ignore +.goreleaser.yaml export-ignore +testdata/ export-ignore +benchmarks/ export-ignore +e2e/ export-ignore diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..06bb64e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,127 @@ +name: Bug report +description: Something is broken or behaving unexpectedly. +title: "bug: " +labels: ["bug", "triage"] + +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to file a bug report. Please fill in as much + of the form as you can — the more we know, the faster we can fix it. + + Before submitting: + - Search [existing issues](https://github.com/GrayCodeAI/hawk-sdk-python/issues) to avoid duplicates. + - If this is a security issue, please **do not** file a public issue. See `SECURITY.md`. + + - type: textarea + id: what-happened + attributes: + label: What happened? + description: A clear, concise description of the bug. + placeholder: When I call HawkClient., I expected X but got Y. + validations: + required: true + + - type: dropdown + id: surface + attributes: + label: Surface + description: Which SDK surface is affected? + options: + - "HawkClient (sync)" + - "AsyncHawkClient (async)" + - "Streaming (chat_stream / StreamReader)" + - "Retry / backoff" + - "Tools (chat_with_tools, @tool decorator)" + - "Agent / AsyncAgent" + - "Workflow / AsyncWorkflow" + - "Typed errors (HawkAPIError hierarchy)" + - "Build / packaging" + validations: + required: true + + - type: textarea + id: reproduce + attributes: + label: Steps to reproduce + description: Minimal Python snippet that reliably reproduces the problem. + render: python + placeholder: | + from hawk import HawkClient + with HawkClient() as c: + resp = c.chat("hello") + # ^ wrong shape / panic / hang / etc. + validations: + required: true + + - type: textarea + id: expected + attributes: + label: Expected behavior + description: What did you expect to happen instead? + validations: + required: true + + - type: input + id: sdk-version + attributes: + label: hawk-sdk version + description: Output of `python -c "import hawk; print(hawk.__version__)"`. + placeholder: "0.2.0" + validations: + required: true + + - type: input + id: daemon-version + attributes: + label: hawk daemon version + description: Output of `hawk version` (the daemon you're hitting). + placeholder: "0.2.0" + validations: + required: true + + - type: input + id: python-version + attributes: + label: Python version + description: Output of `python --version`. + placeholder: "Python 3.11.9" + validations: + required: true + + - type: input + id: os + attributes: + label: Operating system + description: e.g. macOS 14.5 (arm64), Ubuntu 24.04 (amd64), Windows 11 (amd64). + placeholder: "macOS 14.5 (arm64)" + validations: + required: true + + - type: textarea + id: deps + attributes: + label: Relevant package versions + description: | + Paste the output of `pip freeze | grep -E "^(httpx|pydantic|hawk-sdk)"` (or `uv pip list` equivalent). + render: shell + + - type: textarea + id: logs + attributes: + label: Logs / traceback + description: | + Paste any relevant output, including the full traceback. + **Redact API tokens, session IDs, and any private data first.** + render: shell + + - type: checkboxes + id: confirm + attributes: + label: Confirmation + options: + - label: I searched existing issues and did not find a duplicate. + required: true + - label: I redacted any secrets, tokens, or private data from logs. + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..c88dd67 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: + - name: Security vulnerability + url: https://github.com/GrayCodeAI/hawk-sdk-python/security/advisories/new + about: Please report security issues privately via a GitHub Security Advisory. See SECURITY.md. + - name: Question / discussion + url: https://github.com/GrayCodeAI/hawk-sdk-python/discussions + about: Have a question or want to discuss an idea? Open a discussion instead of an issue. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..c840e71 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,75 @@ +name: Feature request +description: Suggest an improvement or a new SDK capability. +title: "feat: " +labels: ["enhancement", "triage"] + +body: + - type: markdown + attributes: + value: | + Thanks for proposing a feature. hawk-sdk is a thin Python client for + the local hawk daemon. Every feature is evaluated against whether it + serves **a single developer** running their own hawk daemon — i.e. + it improves ergonomics, lowers latency, or simplifies integration. + + Before submitting: + - Search [existing issues](https://github.com/GrayCodeAI/hawk-sdk-python/issues) to avoid duplicates. + - For new daemon endpoints, the daemon side must land first. + + - type: dropdown + id: kind + attributes: + label: Kind of feature + description: What flavour of change is this? + options: + - "New client method (wraps a daemon endpoint)" + - "Streaming / SSE handling" + - "Retry / backoff / resilience" + - "Typed errors / error categories" + - "Tools (chat_with_tools, @tool decorator)" + - "Agent / Workflow orchestration" + - "Pydantic model / type-hint improvement" + - "Configuration (httpx transport, timeouts, etc.)" + - "Tooling / CI / docs / packaging" + validations: + required: true + + - type: textarea + id: problem + attributes: + label: What problem are you trying to solve? + description: Describe the user problem first. Solutions can come later. + placeholder: When I call , I have to write boilerplate Y because the SDK doesn't expose X. + validations: + required: true + + - type: textarea + id: proposal + attributes: + label: Proposed solution + description: How would you like the SDK to behave? Snippet of API you'd want. + render: python + validations: + required: true + + - type: textarea + id: alternatives + attributes: + label: Alternatives considered + description: | + What did you try? What do other SDKs (`openai-python`, + `anthropic-sdk-python`, `langchain`, `llama-index`, `dspy`, + `instructor`, `marvin`, `pydantic-ai`, `mirascope`, `magentic`) + do? Why isn't that enough? + + - type: checkboxes + id: principles + attributes: + label: Solo-developer fit + description: hawk-sdk avoids enterprise scope. Confirm this feature respects that. + options: + - label: Works with zero configuration (sensible defaults). + - label: Does not introduce a third-party network dependency. + - label: Does not break wire-compatibility with existing daemon versions. + - label: Sync and async variants are kept in lock-step. + - label: Has an escape hatch (override via parameter, transport, or env). diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..f11661e --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,87 @@ + + +## Summary + + + +## Changes + + + +- + +## API impact + + + +## Daemon compatibility + + + +## Async compatibility + + + +## Testing + + + +```text +$ make test +... +$ make lint +... +$ make typecheck +... +``` + +## Checklist + +- [ ] Commits follow [Conventional Commits](https://www.conventionalcommits.org/) + (`feat:`, `fix:`, `perf:`, `refactor:`, `docs:`, `test:`, etc.) +- [ ] `make test` passes locally +- [ ] `make lint` (ruff check + ruff format --check) passes +- [ ] `make typecheck` (mypy --strict) passes +- [ ] New or changed code has tests (table-driven / parametrized where + appropriate, using `respx` for HTTP mocking) +- [ ] Public APIs have docstrings and type hints +- [ ] `CHANGELOG.md` updated under `## [Unreleased]` if user-visible +- [ ] **Both `pyproject.toml` and `src/hawk/_version.py` are bumped + together** if this is a release-eligible change +- [ ] Sync and async client variants are kept in lock-step +- [ ] Every new outbound HTTP request inherits `User-Agent: + hawk-sdk-python/<__version__>` via `_build_headers()` +- [ ] No secrets, tokens, or PII added to the repo +- [ ] No `Co-authored-by:` trailers (this is solo-developer work) diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..b86542e --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,35 @@ +version: 2 +updates: + - package-ecosystem: pip + directory: / + schedule: + interval: weekly + day: monday + open-pull-requests-limit: 5 + labels: + - dependencies + - python + commit-message: + prefix: "chore(deps)" + include: scope + groups: + pydantic: + patterns: + - "pydantic*" + pytest: + patterns: + - "pytest*" + - "respx*" + + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly + day: monday + open-pull-requests-limit: 3 + labels: + - dependencies + - github-actions + commit-message: + prefix: "chore(ci)" + include: scope diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..3b280e2 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,111 @@ +# Canonical CI workflow for hawk-eco Python repos. +# Source of truth: .shared-templates/workflows/python-ci.yml.tmpl + +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +concurrency: + group: ci-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + name: test (Python ${{ matrix.python-version }}) + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: pip + - name: Install + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + - name: pytest + run: pytest --strict-markers --tb=short + + lint: + name: lint (ruff) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + - name: Install + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + - name: ruff check + run: ruff check . + - name: ruff format --check + run: ruff format --check . + + typecheck: + name: typecheck (mypy --strict) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + - name: Install + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + - name: mypy + run: mypy src/ + + security: + name: security (pip-audit) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + - name: Install + run: | + python -m pip install --upgrade pip pip-audit + pip install -e ".[dev]" + - name: pip-audit + run: pip-audit + + build: + name: build (sdist + wheel) + runs-on: ubuntu-latest + needs: [test, lint, typecheck] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + - name: Install build tools + run: | + python -m pip install --upgrade pip build twine + - name: Build + run: python -m build + - name: Twine check + run: twine check dist/* + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/ diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml new file mode 100644 index 0000000..639f55f --- /dev/null +++ b/.github/workflows/release-please.yml @@ -0,0 +1,43 @@ +# Canonical release-please workflow for hawk-eco repos. +# Opens / updates a release PR on every push to main; on merge of that PR, +# tags the new release. The tag triggers goreleaser (separate workflow). +# +# Source of truth: .shared-templates/release-please.yml.tmpl at the eco root. + +name: release-please + +on: + push: + branches: [main] + +permissions: + contents: write + pull-requests: write + issues: write + +concurrency: + group: release-please-${{ github.ref }} + cancel-in-progress: false + +jobs: + release-please: + runs-on: ubuntu-latest + steps: + - name: Run release-please + id: release + uses: googleapis/release-please-action@v4 + with: + config-file: release-please-config.json + manifest-file: .release-please-manifest.json + token: ${{ secrets.RELEASE_PLEASE_TOKEN || secrets.GITHUB_TOKEN }} + + - name: Summary + if: always() + run: | + if [[ "${{ steps.release.outputs.release_created }}" == "true" ]]; then + echo "Released ${{ steps.release.outputs.tag_name }}." >> $GITHUB_STEP_SUMMARY + elif [[ "${{ steps.release.outputs.pr }}" != "" ]]; then + echo "Updated release PR: ${{ steps.release.outputs.pr }}" >> $GITHUB_STEP_SUMMARY + else + echo "No release-relevant changes detected." >> $GITHUB_STEP_SUMMARY + fi diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..4833743 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,41 @@ +# Canonical PyPI publish workflow for hawk-eco Python repos. +# Triggered by release-please when it pushes a v* tag. +# Source of truth: .shared-templates/workflows/python-release.yml.tmpl +# +# Uses PyPI Trusted Publishing (OIDC) — no API tokens stored in GitHub. +# Configure once at https://pypi.org/manage/account/publishing/ + +name: release + +on: + push: + tags: ["v*"] + +permissions: + contents: read + id-token: write # required for PyPI Trusted Publishing + +jobs: + build-and-publish: + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/hawk-sdk + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install build tooling + run: | + python -m pip install --upgrade pip build + + - name: Build sdist + wheel + run: python -m build + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: dist/ diff --git a/.gitignore b/.gitignore index 7989b07..c0b9142 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,46 @@ +# Byte-compiled / optimized / DLL files __pycache__/ -*.pyc +*.py[cod] +*$py.class + +# Distribution / packaging *.egg-info/ -dist/ -build/ .eggs/ +build/ +dist/ +sdist/ +wheels/ +*.egg +MANIFEST + +# Virtual environments +.venv/ +venv/ +env/ +ENV/ + +# Tooling caches +.pytest_cache/ +.mypy_cache/ +.ruff_cache/ +.tox/ +.nox/ +.coverage +.coverage.* +htmlcov/ +coverage.xml +*.cover + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Local env files +.env +.env.local diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..b449ec5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,43 @@ +# Canonical pre-commit config for hawk-eco Python repos. +# Source of truth: .shared-templates/pre-commit-config.yaml.tmpl +# +# Install: pip install pre-commit +# Activate: pre-commit install --install-hooks +# Run all: pre-commit run --all-files + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + exclude: '\.md$' # markdown uses trailing whitespace for line breaks + - id: end-of-file-fixer + - id: check-yaml + - id: check-toml + - id: check-json + - id: check-merge-conflict + - id: check-added-large-files + args: [--maxkb=512] + - id: detect-private-key + - id: mixed-line-ending + args: [--fix=lf] + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.0 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.13.0 + hooks: + - id: mypy + additional_dependencies: [pydantic>=2.0, httpx>=0.25] + args: [--strict, --ignore-missing-imports] + + - repo: https://github.com/commitizen-tools/commitizen + rev: v3.30.1 + hooks: + - id: commitizen + stages: [commit-msg] diff --git a/.release-please-manifest.json b/.release-please-manifest.json new file mode 100644 index 0000000..2be9c43 --- /dev/null +++ b/.release-please-manifest.json @@ -0,0 +1,3 @@ +{ + ".": "0.2.0" +} diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..21acc1c --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,83 @@ +# Changelog + +All notable changes to `hawk-sdk` (Python) are documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Fixed +- **`__version__` now agrees with `pyproject.toml`.** The prior hardening + commit bumped `pyproject.toml` to `0.2.0` but missed + `src/hawk/_version.py`, which still reported `0.1.0`. Both now report + `0.2.0`. Aligns the SDK with the rest of the hawk-eco ecosystem + (`hawk`, `tok`, `eyrie`, `yaad`, `trace`, `sight`, `inspect`, + `hawk-sdk-go`). + +### Added +- **`User-Agent: hawk-sdk-python/<__version__>` header** on every + outbound HTTP request from both `HawkClient` (sync) and + `AsyncHawkClient` (async). Set via the `_build_headers()` helper, so + it applies to both regular API calls and the `chat_stream` SSE + endpoint (httpx merges client-default headers with per-request + overrides). Lets daemon operators identify SDK clients in logs and + reject misbehaving versions cleanly. +- **OSS standard files** (this is the first PR to add them): + - `CHANGELOG.md` — Keep-a-Changelog format with `[Unreleased]` and + a backfilled `[0.1.0]` entry for the initial SDK + prior hardening + commit. + - `CONTRIBUTING.md` — quick start, branch flow (this repo branches + from `main`), conventional commits, code standards, testing, + SDK-version-bump procedure (must update both `pyproject.toml` and + `src/hawk/_version.py`). + - `SECURITY.md` — vulnerability reporting via GitHub Security + Advisories. + - `CODE_OF_CONDUCT.md` — Contributor Covenant 2.1. + - `.gitattributes` — LF normalization, binary detection. + - `.editorconfig` — UTF-8, LF, 4-space indent for Python, 2-space + for YAML/JSON/TOML. + - `.github/workflows/ci.yml` — pytest matrix on Python 3.9 / 3.10 / + 3.11 / 3.12 / 3.13, ruff (lint + format check), mypy strict, build + sdist + wheel. + - `.github/dependabot.yml` — weekly `pip` + `github-actions` + updates. + - `.github/PULL_REQUEST_TEMPLATE.md` — Summary / Changes / API + impact / Daemon compatibility / Async compatibility / Testing / + Checklist. + - `.github/ISSUE_TEMPLATE/bug_report.yml` — surface dropdown + (HawkClient / AsyncHawkClient / streaming / retry / tools / + workflow / typed errors / build). + - `.github/ISSUE_TEMPLATE/feature_request.yml` — feature request + with `kind` selector + solo-dev fit checks. + - `.github/ISSUE_TEMPLATE/config.yml` — routes security to + advisories, questions to discussions, blocks blank issues. +- Expanded `.gitignore` with the broader Python toolchain footprint + (`.mypy_cache`, `.ruff_cache`, `.pytest_cache`, virtualenv dirs, + `htmlcov`, `coverage.xml`, `.tox`, `.nox`). + +## [0.1.0] — 2026-05-13 + +### Added +- Initial Python SDK for the hawk daemon API: + - `HawkClient` (sync) and `AsyncHawkClient` (async), both built on + `httpx`, with `health`, `chat`, `chat_stream`, `create_session`, + `get_session`, `list_sessions`, `list_messages`, `delete_session`, + `stats`. + - Pydantic v2 models for every request and response shape. + - Streaming via `StreamReader` / `AsyncStreamReader` (SSE). + - Typed error hierarchy (`HawkAPIError`, `AuthenticationError`, + `BadRequestError`, `NotFoundError`, `RateLimitError`, + `InternalServerError`, `ServiceUnavailableError`). + - Retry with exponential backoff via `RetryConfig`. + - `Tool`, `Agent` / `AsyncAgent`, `Workflow` / `AsyncWorkflow` + orchestration helpers. + +### Production-hardening pass already on this branch (commit `2560031`) +- Added strict `ruff` lint config (E, F, W, I, N, UP, B, A, SIM, TCH, + RUF rule sets) and `mypy --strict` config in `pyproject.toml`. +- Added `Makefile` with standard targets (`test`, `test-coverage`, + `lint`, `format`, `typecheck`, `clean`, `help`). +- Bumped `pyproject.toml` version to `0.2.0` (this PR completes the + bump by also updating `_version.py`). +- Added `pytest` strict-markers and short-traceback config. diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000..4cdd829 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,20 @@ +# CODEOWNERS for hawk-sdk-python +* @GrayCodeAI/maintainers + +# Public API surface — bump SDK version (VERSION file) when these change +/src/hawk/client.py @GrayCodeAI/sdk-team +/src/hawk/agent.py @GrayCodeAI/sdk-team +/src/hawk/workflow.py @GrayCodeAI/sdk-team +/src/hawk/tools.py @GrayCodeAI/sdk-team +/src/hawk/types.py @GrayCodeAI/sdk-team +/src/hawk/errors.py @GrayCodeAI/sdk-team +/src/hawk/_version.py @GrayCodeAI/maintainers +/VERSION @GrayCodeAI/maintainers + +# Build / packaging +/pyproject.toml @GrayCodeAI/devops-team +/.github/ @GrayCodeAI/devops-team +/Makefile @GrayCodeAI/devops-team + +# Documentation +*.md @GrayCodeAI/docs-team diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..314f97c --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,60 @@ +# Code of Conduct + +## Our pledge + +We — the maintainers and contributors of the hawk-sdk-python project — pledge to +make participation in our community a harassment-free experience for everyone, +regardless of age, body size, visible or invisible disability, ethnicity, sex +characteristics, gender identity and expression, level of experience, +education, socio-economic status, nationality, personal appearance, race, +religion, or sexual identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our standards + +Examples of behaviour that contributes to a positive environment: + +- Demonstrating empathy and kindness toward other people. +- Being respectful of differing opinions, viewpoints, and experiences. +- Giving and gracefully accepting constructive feedback. +- Accepting responsibility, apologising to those affected by mistakes, and + learning from the experience. +- Focusing on what is best not just for us as individuals, but for the + overall community. + +Examples of unacceptable behaviour: + +- The use of sexualised language or imagery, and sexual attention or advances. +- Trolling, insulting or derogatory comments, and personal or political + attacks. +- Public or private harassment. +- Publishing others' private information, such as a physical or email + address, without their explicit permission. +- Other conduct which could reasonably be considered inappropriate in a + professional setting. + +## Enforcement + +Community leaders are responsible for clarifying and enforcing our standards +of acceptable behaviour, and will take appropriate and fair corrective +action in response to any behaviour they deem inappropriate, threatening, +offensive, or harmful. + +Instances of abusive, harassing, or otherwise unacceptable behaviour may be +reported to the maintainers via the contact in `SECURITY.md` or by opening a +confidential GitHub Security Advisory at +. All +complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of +the reporter of any incident. + +## Attribution + +This Code of Conduct is adapted from the +[Contributor Covenant, version 2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html). + +For answers to common questions about this code of conduct, see the +Contributor Covenant FAQ at . diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..5fba294 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,114 @@ +# Contributing to hawk-sdk-python + +Thanks for your interest! This guide covers the conventions used across the +hawk-eco. The eco-wide standards (versioning, release tooling, repo layout) +are defined in . + +## Quick start + +1. Fork the repo and create a feature branch off `main`: + ```bash + git checkout -b feat/short-description + ``` +2. Make your changes in small, focused commits. +3. Run the full local check before pushing: + ```bash + make ci + ``` +4. Open a pull request. CI will re-run the same checks plus security + scanning, race-detector tests, and (where applicable) integration tests. + +## Build & test + +This repo uses the standardised hawk-eco Makefile targets. Run `make help` +for the full list. The most common targets: + +| Target | What it does | +| ------------------- | ------------------------------------------------ | +| `make build` | Build the binary / verify the library compiles | +| `make test` | Run unit tests | +| `make test-race` | Run unit tests with the race detector | +| `make cover` | Generate a coverage report | +| `make lint` | Run the linter (`golangci-lint` / `ruff`) | +| `make fmt` | Format source files | +| `make vet` | Run `go vet` / `mypy` | +| `make security` | Run `govulncheck` / `pip-audit` | +| `make ci` | Run everything CI runs (the gate before pushing) | + +## Commit message convention + +We use [Conventional Commits](https://www.conventionalcommits.org/). This +isn't cosmetic — release-please reads commit messages to bump the `VERSION` +file and generate the CHANGELOG, so getting them right matters. + +``` +(): + + + + +``` + +**Types:** + +- `feat:` — a new feature (triggers a minor version bump) +- `fix:` — a bug fix (triggers a patch version bump) +- `perf:` — performance improvement +- `refactor:` — code restructure with no behaviour change +- `docs:` — documentation only +- `test:` — adding or fixing tests +- `build:` — build system or dependencies +- `ci:` — CI configuration +- `chore:` — anything else (no release effect) +- `revert:` — reverts a previous commit + +**Breaking changes:** add `!` after the type/scope or include `BREAKING +CHANGE:` in the footer. This triggers a major version bump. + +Examples: + +``` +feat(client): add streaming retry with exponential backoff +fix: handle empty response body in chat handler +refactor!: rename ClientV1 to Client (BREAKING CHANGE) +``` + +## Pull request checklist + +Before requesting review: + +- [ ] `make ci` passes locally. +- [ ] New behaviour has tests; bug fixes have a regression test. +- [ ] `CHANGELOG.md` entries are **not** edited manually — release-please + generates them from your commit messages. +- [ ] The `VERSION` file is **not** edited manually — release-please bumps + it on release. +- [ ] Public API changes have updated doc comments. +- [ ] No secrets, API keys, or PII in code, comments, tests, or fixtures. + +## Code review etiquette + +- Reviewers focus on correctness, design, and tests; formatting is + enforced by tooling, not humans. +- Authors respond to every comment (resolved, addressed, or politely + declined with rationale) — no silent dismissals. +- Squash-merge by default; the PR title becomes the commit (so it must + be a valid Conventional Commit message). +- One approving review from a CODEOWNERS-listed reviewer is required. + +## Reporting bugs + +Open an issue using the bug-report template. Include the `hawk-sdk-python` +version (`hawk-sdk-python --version` for binaries, `hawk-sdk-python.Version` for +libraries — see this repo's `VERSION` file), reproduction steps, expected +behaviour, and actual behaviour. + +## Reporting security issues + +**Do not open a public issue.** See [SECURITY.md](./SECURITY.md) for +private reporting channels. + +## License + +By contributing, you agree that your contributions will be licensed under +the same license as this repo (see [LICENSE](./LICENSE)). diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5f8f390 --- /dev/null +++ b/Makefile @@ -0,0 +1,97 @@ +# Canonical hawk-eco Makefile for Python repos. +# Source of truth: .shared-templates/Makefile.python.tmpl at the eco root. +# Placeholders rendered per repo: hawk-sdk. + +# --------------------------------------------------------------------------- +# Project metadata +# --------------------------------------------------------------------------- +NAME := hawk-sdk + +# --------------------------------------------------------------------------- +# Versioning — sourced from VERSION file at repo root (single source of +# truth, also consumed by hatch + release-please). +# --------------------------------------------------------------------------- +VERSION ?= $(shell cat VERSION 2>/dev/null | head -n1 | tr -d '[:space:]' || echo "dev") + +PYTHON ?= python3 +PIP ?= $(PYTHON) -m pip + +# --------------------------------------------------------------------------- +# Phony declarations (alphabetical). +# --------------------------------------------------------------------------- +.PHONY: all bench build ci clean cover fmt help install lint lint-fix \ + release security test test-race tidy version vet + +# --------------------------------------------------------------------------- +# Default target. +# --------------------------------------------------------------------------- +all: lint test build ## Default — lint, test, build. + +# --------------------------------------------------------------------------- +# Build / install / release. +# --------------------------------------------------------------------------- +build: ## Build wheel + sdist into dist/. + $(PYTHON) -m build + +install: ## Install in editable mode with dev extras. + $(PIP) install -e ".[dev]" + +release: build ## Upload to PyPI (expects $TWINE_USERNAME / $TWINE_PASSWORD). + $(PYTHON) -m twine upload dist/* + +# --------------------------------------------------------------------------- +# Tests. +# --------------------------------------------------------------------------- +test: ## Run unit tests. + $(PYTHON) -m pytest + +test-race: test ## Alias for `test` (Python has no race detector). + +cover: ## Run tests with coverage report. + $(PYTHON) -m pytest --cov=src --cov-report=term-missing --cov-report=html + @echo "Coverage report: htmlcov/index.html" + +bench: ## Run benchmarks (requires pytest-benchmark). + $(PYTHON) -m pytest --benchmark-only + +# --------------------------------------------------------------------------- +# Quality gates. +# --------------------------------------------------------------------------- +fmt: ## Format with ruff. + $(PYTHON) -m ruff format . + +vet: ## Type-check with mypy. + $(PYTHON) -m mypy src/ + +lint: ## Lint with ruff. + $(PYTHON) -m ruff check . + +lint-fix: ## Lint with ruff --fix. + $(PYTHON) -m ruff check --fix . + +security: ## Run pip-audit on resolved dependencies. + @command -v pip-audit >/dev/null 2>&1 || (echo "install: pip install pip-audit" && exit 1) + pip-audit + +tidy: ## No-op for Python (lockfile management is via pyproject.toml). + @echo "tidy: nothing to do for Python repos." + +# --------------------------------------------------------------------------- +# Composite gate used by CI and pre-push. +# --------------------------------------------------------------------------- +ci: fmt vet lint test security ## Run everything CI runs. + @echo "All CI checks passed." + +# --------------------------------------------------------------------------- +# Misc. +# --------------------------------------------------------------------------- +version: ## Print the version that will be packaged. + @echo "Version: $(VERSION)" + +clean: ## Remove build artefacts and caches. + rm -rf dist/ build/ *.egg-info htmlcov/ .coverage + rm -rf .pytest_cache .mypy_cache .ruff_cache + find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true + +help: ## Show this help. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}' diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..0c39f7a --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,71 @@ +# Security Policy — hawk-sdk-python + +## Supported versions + +We support the latest minor version on each `0.x` line, and the latest two +minor versions once `1.x` ships. Older versions receive critical-severity +fixes only on a best-effort basis. + +The current canonical version is the contents of the [`VERSION`](./VERSION) +file at the repo root. See [`VERSIONING.md`](https://github.com/GrayCodeAI/hawk/blob/main/VERSIONING.md) +for the eco-wide versioning scheme. + +## Reporting a vulnerability + +**Do not open a public GitHub issue for security vulnerabilities.** Instead: + +1. Open a private [GitHub Security Advisory](https://github.com/GrayCodeAI/hawk-sdk-python/security/advisories/new), **or** +2. Email `security@graycode.ai` with the details below. + +Include in your report: + +- A description of the vulnerability and the affected component. +- Steps to reproduce, ideally with a minimal proof-of-concept. +- The version (`VERSION` file or git SHA) you tested against. +- The potential impact and any suggested mitigation. + +**Response targets:** + +- Initial acknowledgement: within **48 hours**. +- Triage and severity assessment: within **5 business days**. +- Coordinated fix and disclosure: within **30 days** for high/critical, **90 + days** for medium/low (per industry-standard responsible disclosure). + +## Disclosure policy + +We follow [coordinated vulnerability disclosure](https://en.wikipedia.org/wiki/Coordinated_vulnerability_disclosure): + +- Reporters receive credit in the advisory and CHANGELOG (unless they opt + out). +- We request that reporters refrain from public disclosure until a fix has + been released or the disclosure deadline above has elapsed. +- We will not pursue legal action against good-faith researchers acting + within this policy. + +## Security practices in this repo + +- **Dependency monitoring:** automated via Dependabot (see + `.github/dependabot.yml`). +- **Static analysis:** `golangci-lint` / `ruff` / `mypy` enforced in CI. +- **Vulnerability scanning:** `govulncheck` (Go) / `pip-audit` (Python) run + on every CI build. +- **Lockfiles:** `go.sum` / `pnpm-lock.yaml` / `pyproject.toml` are pinned + and committed. +- **Reproducible builds:** release artefacts ship with SHA-256 checksums via + goreleaser. +- **No secrets in source:** API keys are configuration, not constants. Pre- + commit hooks block accidental secret commits. + +## Scope + +This policy covers the code in this repository and the release artefacts +published from it. It does not cover: + +- Third-party dependencies (report to upstream). +- LLM provider services that hawk-sdk-python integrates with (report to the + provider). +- Local filesystem misuse where an attacker already has shell access (out of + threat model). + +For hawk-sdk-python-specific threat-model notes, see the README and any docs in +this repo. diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..0ea3a94 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.2.0 diff --git a/pyproject.toml b/pyproject.toml index 36f167d..53a5eec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,10 @@ build-backend = "hatchling.build" [project] name = "hawk-sdk" -version = "0.1.0" +# Version is read dynamically from the VERSION file at the repo root — +# the single source of truth used by release tooling, CI, and the runtime +# `hawk.__version__` constant. +dynamic = ["version"] description = "Python SDK for the Hawk daemon API" readme = "README.md" license = "MIT" @@ -35,11 +38,50 @@ dev = [ "pytest>=7.0", "pytest-asyncio>=0.21", "respx>=0.21", + "ruff>=0.4.0", + "mypy>=1.0", ] +# Read the package version from the repo-root VERSION file at build time. +# This keeps Python package metadata, runtime `__version__`, and release +# tooling all in sync from a single source. +[tool.hatch.version] +source = "regex" +path = "VERSION" +pattern = "^(?P[^\\s]+)" + [tool.hatch.build.targets.wheel] packages = ["src/hawk"] +# Ship the VERSION file inside the package so `_version.py` can read it at +# runtime even after install. +force-include = { "VERSION" = "hawk/VERSION" } + +[tool.hatch.build.targets.sdist] +include = [ + "src/hawk", + "VERSION", + "README.md", + "LICENSE", +] [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] +addopts = "--strict-markers --tb=short -q" + +[tool.ruff] +target-version = "py39" +line-length = 100 + +[tool.ruff.lint] +select = ["E", "F", "W", "I", "N", "UP", "B", "A", "SIM", "TCH", "RUF"] +ignore = ["E501"] + +[tool.ruff.lint.isort] +known-first-party = ["hawk"] + +[tool.mypy] +python_version = "3.9" +strict = true +warn_return_any = true +warn_unused_configs = true diff --git a/release-please-config.json b/release-please-config.json new file mode 100644 index 0000000..9343c9a --- /dev/null +++ b/release-please-config.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json", + "packages": { + ".": { + "release-type": "python", + "package-name": "hawk-sdk-python", + "include-v-in-tag": true, + "include-component-in-tag": false, + "bump-minor-pre-major": true, + "bump-patch-for-minor-pre-major": false, + "changelog-sections": [ + { "type": "feat", "section": "Features" }, + { "type": "fix", "section": "Bug Fixes" }, + { "type": "perf", "section": "Performance" }, + { "type": "refactor", "section": "Refactoring" }, + { "type": "revert", "section": "Reverts" }, + { "type": "docs", "section": "Documentation", "hidden": false }, + { "type": "test", "section": "Tests", "hidden": false }, + { "type": "build", "section": "Build", "hidden": true }, + { "type": "ci", "section": "CI", "hidden": true }, + { "type": "chore", "section": "Chores", "hidden": true }, + { "type": "style", "section": "Style", "hidden": true } + ], + "extra-files": [{"type":"version-txt","path":"VERSION"}] + } + } +} diff --git a/src/hawk/__init__.py b/src/hawk/__init__.py index a374247..136f0ff 100644 --- a/src/hawk/__init__.py +++ b/src/hawk/__init__.py @@ -13,9 +13,19 @@ RateLimitError, ServiceUnavailableError, ) +from .plan import Plan, PlanNotebook, SubTask, SubTaskState from .retry import DEFAULT_RETRY_CONFIG, RetryConfig from .streaming import AsyncStreamReader, StreamReader from .tools import Tool, chat_with_tools, chat_with_tools_async, tool +from .toolkit import BackgroundTask, ToolGroup, Toolkit +from .tracing import ( + configure_tracing, + detect_provider, + is_tracing_enabled, + trace, + trace_chat, + trace_tool, +) from .types import ( ChatRequest, ChatResponse, @@ -31,6 +41,21 @@ ToolCall, Usage, ) +from .discovery import ( + AgentCard, + AgentResolver, + CompositeResolver, + FileResolver, + WellKnownResolver, +) +from .evaluate import ( + BenchmarkResults, + EvalResult, + EvalTask, + run_benchmark, + run_benchmark_async, +) +from .memory_tools import MemoryTools from .workflow import AsyncWorkflow, Workflow __all__ = [ @@ -51,6 +76,15 @@ "tool", "chat_with_tools", "chat_with_tools_async", + # Plan + "PlanNotebook", + "Plan", + "SubTask", + "SubTaskState", + # Toolkit + "Toolkit", + "ToolGroup", + "BackgroundTask", # Workflow "Workflow", "AsyncWorkflow", @@ -71,6 +105,13 @@ "StreamEventType", "ToolCall", "Usage", + # Tracing + "configure_tracing", + "detect_provider", + "is_tracing_enabled", + "trace", + "trace_chat", + "trace_tool", # Errors "HawkAPIError", "BadRequestError", @@ -80,4 +121,18 @@ "RateLimitError", "InternalServerError", "ServiceUnavailableError", + # Evaluate + "EvalTask", + "EvalResult", + "BenchmarkResults", + "run_benchmark", + "run_benchmark_async", + # Discovery + "AgentCard", + "AgentResolver", + "FileResolver", + "WellKnownResolver", + "CompositeResolver", + # Memory + "MemoryTools", ] diff --git a/src/hawk/_version.py b/src/hawk/_version.py index a51ea98..b4dcf7e 100644 --- a/src/hawk/_version.py +++ b/src/hawk/_version.py @@ -1,3 +1,28 @@ -"""Version information for hawk-sdk.""" +"""Version information for hawk-sdk. -__version__ = "0.1.0" +The version is read from the VERSION file at the repo root, which is the +single source of truth used by release tooling, CI, and package metadata. +""" + +from __future__ import annotations + +from pathlib import Path + + +def _read_version() -> str: + """Read the canonical version from the VERSION file at the repo root. + + Walks up from this file until a VERSION file is found, so the lookup works + both during development (running from a clone) and after install (where the + VERSION file is shipped as package data). + """ + here = Path(__file__).resolve() + for parent in [here.parent, *here.parents]: + candidate = parent / "VERSION" + if candidate.is_file(): + return candidate.read_text(encoding="utf-8").strip() + # Fallback: should not happen in a properly built/installed package. + return "0.0.0+unknown" + + +__version__ = _read_version() diff --git a/src/hawk/client.py b/src/hawk/client.py index c13a533..d039f1d 100644 --- a/src/hawk/client.py +++ b/src/hawk/client.py @@ -9,6 +9,7 @@ from .errors import parse_error from .retry import DEFAULT_RETRY_CONFIG, RetryConfig, with_retry, with_retry_sync from .streaming import AsyncStreamReader, StreamReader +from ._version import __version__ from .types import ( ChatRequest, ChatResponse, @@ -51,7 +52,10 @@ def __init__( ) def _build_headers(self) -> dict[str, str]: - headers: dict[str, str] = {"Accept": "application/json"} + headers: dict[str, str] = { + "Accept": "application/json", + "User-Agent": f"hawk-sdk-python/{__version__}", + } if self._api_key: headers["Authorization"] = f"Bearer {self._api_key}" return headers @@ -260,7 +264,10 @@ def __init__( ) def _build_headers(self) -> dict[str, str]: - headers: dict[str, str] = {"Accept": "application/json"} + headers: dict[str, str] = { + "Accept": "application/json", + "User-Agent": f"hawk-sdk-python/{__version__}", + } if self._api_key: headers["Authorization"] = f"Bearer {self._api_key}" return headers diff --git a/src/hawk/discovery.py b/src/hawk/discovery.py new file mode 100644 index 0000000..346f668 --- /dev/null +++ b/src/hawk/discovery.py @@ -0,0 +1,194 @@ +"""Agent-to-Agent discovery protocol for Hawk. + +Enables agents to discover and communicate with other agents via +multiple resolution strategies (HTTP well-known, file-based, registry). + +Usage: + from hawk.discovery import AgentCard, WellKnownResolver, FileResolver + + resolver = WellKnownResolver() + card = await resolver.resolve("assistant-agent") + # card.endpoint -> "http://localhost:8080/v1/chat" +""" + +from __future__ import annotations + +import json +import os +from dataclasses import dataclass, field +from typing import Any, Optional, Protocol + + +@dataclass +class AgentCard: + """Describes a discoverable agent's capabilities and endpoint.""" + name: str + endpoint: str + description: str = "" + capabilities: list[str] = field(default_factory=list) + version: str = "1.0" + metadata: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return { + "name": self.name, + "endpoint": self.endpoint, + "description": self.description, + "capabilities": self.capabilities, + "version": self.version, + "metadata": self.metadata, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "AgentCard": + return cls( + name=data["name"], + endpoint=data["endpoint"], + description=data.get("description", ""), + capabilities=data.get("capabilities", []), + version=data.get("version", "1.0"), + metadata=data.get("metadata", {}), + ) + + +class AgentResolver(Protocol): + """Protocol for agent discovery resolvers.""" + + async def resolve(self, agent_name: str) -> Optional[AgentCard]: + """Resolve an agent name to its card.""" + ... + + async def list_agents(self) -> list[AgentCard]: + """List all known agents.""" + ... + + async def register(self, card: AgentCard) -> None: + """Register an agent card.""" + ... + + +class FileResolver: + """File-based agent discovery for local development. + + Reads agent cards from a JSON file. + + Usage: + resolver = FileResolver("/path/to/agents.json") + card = await resolver.resolve("my-agent") + """ + + def __init__(self, path: str = ".hawk/agents.json") -> None: + self._path = path + self._cards: dict[str, AgentCard] = {} + self._load() + + def _load(self) -> None: + if os.path.exists(self._path): + with open(self._path) as f: + data = json.load(f) + for entry in data.get("agents", []): + card = AgentCard.from_dict(entry) + self._cards[card.name] = card + + def _save(self) -> None: + os.makedirs(os.path.dirname(self._path) or ".", exist_ok=True) + data = {"agents": [c.to_dict() for c in self._cards.values()]} + with open(self._path, "w") as f: + json.dump(data, f, indent=2) + + async def resolve(self, agent_name: str) -> Optional[AgentCard]: + return self._cards.get(agent_name) + + async def list_agents(self) -> list[AgentCard]: + return list(self._cards.values()) + + async def register(self, card: AgentCard) -> None: + self._cards[card.name] = card + self._save() + + +class WellKnownResolver: + """HTTP-based agent discovery via well-known URLs. + + Discovers agents by fetching {base_url}/.well-known/agent.json + + Usage: + resolver = WellKnownResolver(["http://localhost:8080", "http://agent2:8080"]) + card = await resolver.resolve("assistant") + """ + + def __init__(self, base_urls: Optional[list[str]] = None) -> None: + self._base_urls = base_urls or [] + self._cache: dict[str, AgentCard] = {} + + async def resolve(self, agent_name: str) -> Optional[AgentCard]: + if agent_name in self._cache: + return self._cache[agent_name] + + for url in self._base_urls: + card = await self._fetch_card(url) + if card and card.name == agent_name: + self._cache[agent_name] = card + return card + return None + + async def list_agents(self) -> list[AgentCard]: + cards = [] + for url in self._base_urls: + card = await self._fetch_card(url) + if card: + cards.append(card) + return cards + + async def register(self, card: AgentCard) -> None: + self._cache[card.name] = card + if card.endpoint not in self._base_urls: + self._base_urls.append(card.endpoint) + + async def _fetch_card(self, base_url: str) -> Optional[AgentCard]: + try: + import httpx + url = f"{base_url.rstrip('/')}/.well-known/agent.json" + async with httpx.AsyncClient(timeout=5.0) as client: + resp = await client.get(url) + if resp.status_code == 200: + return AgentCard.from_dict(resp.json()) + except Exception: + pass + return None + + +class CompositeResolver: + """Chains multiple resolvers, returning the first match. + + Usage: + resolver = CompositeResolver([ + FileResolver(".hawk/agents.json"), + WellKnownResolver(["http://localhost:8080"]), + ]) + card = await resolver.resolve("my-agent") + """ + + def __init__(self, resolvers: list[Any]) -> None: + self._resolvers = resolvers + + async def resolve(self, agent_name: str) -> Optional[AgentCard]: + for resolver in self._resolvers: + card = await resolver.resolve(agent_name) + if card: + return card + return None + + async def list_agents(self) -> list[AgentCard]: + seen = set() + cards = [] + for resolver in self._resolvers: + for card in await resolver.list_agents(): + if card.name not in seen: + seen.add(card.name) + cards.append(card) + return cards + + async def register(self, card: AgentCard) -> None: + if self._resolvers: + await self._resolvers[0].register(card) diff --git a/src/hawk/evaluate.py b/src/hawk/evaluate.py new file mode 100644 index 0000000..574a48d --- /dev/null +++ b/src/hawk/evaluate.py @@ -0,0 +1,204 @@ +"""Agent evaluation framework for systematic benchmarking. + +Defines tasks with metrics, runs agents N times, aggregates results +with statistics. Inspired by agentscope's evaluation module. + +Usage: + from hawk.evaluate import Benchmark, EvalTask, run_benchmark + + tasks = [ + EvalTask( + name="weather-lookup", + prompt="What's the weather in NYC?", + expected_tools=["get_weather"], + validate=lambda r: "temperature" in r.response, + ), + ] + results = run_benchmark(agent, tasks, runs=3) + print(results.summary()) +""" + +from __future__ import annotations + +import time +import statistics +from dataclasses import dataclass, field +from typing import Any, Callable, Optional + + +@dataclass +class EvalTask: + """A single evaluation task.""" + name: str + prompt: str + category: str = "general" + expected_tools: list[str] = field(default_factory=list) + validate: Optional[Callable[[Any], bool]] = None + max_turns: int = 10 + + +@dataclass +class EvalResult: + """Result of a single evaluation run.""" + task_name: str + success: bool + duration_ms: float + tokens_in: int = 0 + tokens_out: int = 0 + turns_taken: int = 0 + error: Optional[str] = None + + +@dataclass +class BenchmarkResults: + """Aggregated benchmark results.""" + results: list[EvalResult] = field(default_factory=list) + + @property + def total_tasks(self) -> int: + return len(self.results) + + @property + def passed(self) -> int: + return sum(1 for r in self.results if r.success) + + @property + def failed(self) -> int: + return self.total_tasks - self.passed + + @property + def pass_rate(self) -> float: + return self.passed / max(self.total_tasks, 1) + + @property + def avg_duration_ms(self) -> float: + durations = [r.duration_ms for r in self.results] + return statistics.mean(durations) if durations else 0.0 + + @property + def total_tokens(self) -> int: + return sum(r.tokens_in + r.tokens_out for r in self.results) + + def by_category(self) -> dict[str, list[EvalResult]]: + cats: dict[str, list[EvalResult]] = {} + for r in self.results: + cat = r.task_name.split("/")[0] if "/" in r.task_name else "general" + cats.setdefault(cat, []).append(r) + return cats + + def summary(self) -> str: + lines = [ + f"Benchmark Results: {self.passed}/{self.total_tasks} passed ({self.pass_rate:.0%})", + f"Avg duration: {self.avg_duration_ms:.0f}ms", + f"Total tokens: {self.total_tokens}", + ] + if self.failed > 0: + failures = [r for r in self.results if not r.success] + lines.append(f"Failures:") + for f in failures[:10]: + lines.append(f" - {f.task_name}: {f.error or 'validation failed'}") + return "\n".join(lines) + + +def run_benchmark( + agent: Any, + tasks: list[EvalTask], + *, + runs: int = 1, + reset_between_tasks: bool = True, +) -> BenchmarkResults: + """Run a benchmark suite against an agent. + + Args: + agent: A hawk Agent instance with a .chat() method. + tasks: List of evaluation tasks. + runs: Number of times to run each task. + reset_between_tasks: Whether to reset agent state between tasks. + + Returns: + Aggregated benchmark results. + """ + results = BenchmarkResults() + + for task in tasks: + for run_idx in range(runs): + if reset_between_tasks: + agent.reset() + + start = time.perf_counter() + try: + response = agent.chat(task.prompt) + duration = (time.perf_counter() - start) * 1000 + + success = True + error = None + if task.validate: + success = task.validate(response) + if not success: + error = "Validation failed" + + results.results.append(EvalResult( + task_name=f"{task.category}/{task.name}" if task.category != "general" else task.name, + success=success, + duration_ms=duration, + tokens_in=getattr(response, "tokens_in", 0), + tokens_out=getattr(response, "tokens_out", 0), + turns_taken=getattr(response, "turns_taken", 0), + )) + except Exception as e: + duration = (time.perf_counter() - start) * 1000 + results.results.append(EvalResult( + task_name=f"{task.category}/{task.name}" if task.category != "general" else task.name, + success=False, + duration_ms=duration, + error=str(e), + )) + + return results + + +async def run_benchmark_async( + agent: Any, + tasks: list[EvalTask], + *, + runs: int = 1, + reset_between_tasks: bool = True, +) -> BenchmarkResults: + """Async version of run_benchmark.""" + results = BenchmarkResults() + + for task in tasks: + for run_idx in range(runs): + if reset_between_tasks: + agent.reset() + + start = time.perf_counter() + try: + response = await agent.chat(task.prompt) + duration = (time.perf_counter() - start) * 1000 + + success = True + error = None + if task.validate: + success = task.validate(response) + if not success: + error = "Validation failed" + + results.results.append(EvalResult( + task_name=f"{task.category}/{task.name}" if task.category != "general" else task.name, + success=success, + duration_ms=duration, + tokens_in=getattr(response, "tokens_in", 0), + tokens_out=getattr(response, "tokens_out", 0), + turns_taken=getattr(response, "turns_taken", 0), + )) + except Exception as e: + duration = (time.perf_counter() - start) * 1000 + results.results.append(EvalResult( + task_name=f"{task.category}/{task.name}" if task.category != "general" else task.name, + success=False, + duration_ms=duration, + error=str(e), + )) + + return results diff --git a/src/hawk/memory_tools.py b/src/hawk/memory_tools.py new file mode 100644 index 0000000..97be2b5 --- /dev/null +++ b/src/hawk/memory_tools.py @@ -0,0 +1,132 @@ +"""Memory-as-voluntary-tools for agent-driven memory management. + +Lets agents strategically decide what to remember/recall rather than +auto-ingesting everything. Wraps yaad's memory API as tool functions. + +Usage: + from hawk.memory_tools import MemoryTools + + mem = MemoryTools(client) + agent_config.tools.extend(mem.get_tools()) +""" + +from __future__ import annotations + +from typing import Any, Optional + +from .tools import Tool + + +class MemoryTools: + """Provides record/retrieve memory operations as agent tools. + + The agent can voluntarily decide to: + - Record important information for future reference + - Retrieve relevant memories for the current task + - Forget outdated or incorrect information + """ + + def __init__(self, client: Any, *, session_id: Optional[str] = None) -> None: + self._client = client + self._session_id = session_id + self._local_memories: list[dict[str, str]] = [] + + def record_memory(self, content: str, category: str = "general", importance: str = "normal") -> str: + """Record important information to long-term memory.""" + memory = { + "content": content, + "category": category, + "importance": importance, + } + self._local_memories.append(memory) + + # If client supports yaad memory API, persist + try: + if hasattr(self._client, "remember"): + self._client.remember(content, session_id=self._session_id) + return f"Recorded to persistent memory: '{content[:100]}...'" + except Exception: + pass + + return f"Recorded to session memory: '{content[:100]}...'" + + def retrieve_memories(self, query: str, limit: int = 5) -> str: + """Retrieve relevant memories for the current context.""" + results = [] + + # Try yaad recall + try: + if hasattr(self._client, "recall"): + recalled = self._client.recall(query, limit=limit, session_id=self._session_id) + if recalled: + return f"Recalled {len(recalled)} memories:\n" + "\n".join( + f"- {m}" for m in recalled + ) + except Exception: + pass + + # Fallback to local fuzzy match + query_lower = query.lower() + for mem in self._local_memories: + if query_lower in mem["content"].lower(): + results.append(mem["content"]) + + if results: + return f"Found {len(results)} relevant memories:\n" + "\n".join( + f"- {r}" for r in results[:limit] + ) + return "No relevant memories found." + + def forget_memory(self, content_fragment: str) -> str: + """Remove a memory matching the given fragment.""" + before = len(self._local_memories) + self._local_memories = [ + m for m in self._local_memories + if content_fragment.lower() not in m["content"].lower() + ] + removed = before - len(self._local_memories) + return f"Removed {removed} matching memories." if removed else "No matching memories found." + + def get_tools(self) -> list[Tool]: + """Return memory operations as Tool objects.""" + return [ + Tool( + name="record_memory", + description="Record important information to long-term memory for future reference. Use for key findings, decisions, or facts you'll need later.", + parameters={ + "type": "object", + "properties": { + "content": {"type": "string", "description": "The information to remember"}, + "category": {"type": "string", "description": "Category (general/technical/personal/project)", "default": "general"}, + "importance": {"type": "string", "description": "Importance level (low/normal/high)", "default": "normal"}, + }, + "required": ["content"], + }, + fn=self.record_memory, + ), + Tool( + name="retrieve_memories", + description="Search long-term memory for relevant information. Use when you need context from previous interactions.", + parameters={ + "type": "object", + "properties": { + "query": {"type": "string", "description": "Search query for relevant memories"}, + "limit": {"type": "integer", "description": "Max results to return", "default": 5}, + }, + "required": ["query"], + }, + fn=self.retrieve_memories, + ), + Tool( + name="forget_memory", + description="Remove outdated or incorrect information from memory.", + parameters={ + "type": "object", + "properties": { + "content_fragment": {"type": "string", "description": "Text fragment to match for deletion"}, + }, + "required": ["content_fragment"], + }, + fn=self.forget_memory, + ), + ] diff --git a/src/hawk/plan.py b/src/hawk/plan.py new file mode 100644 index 0000000..b2d78f8 --- /dev/null +++ b/src/hawk/plan.py @@ -0,0 +1,384 @@ +"""Plan-as-tools module with contextual hints for autonomous agent steering.""" + +from __future__ import annotations + +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timezone +from enum import Enum +from typing import Any, Callable, Literal, Optional + +from .tools import Tool + + +class SubTaskState(str, Enum): + TODO = "todo" + IN_PROGRESS = "in_progress" + DONE = "done" + ABANDONED = "abandoned" + + +@dataclass +class SubTask: + name: str + description: str + expected_outcome: str + state: SubTaskState = SubTaskState.TODO + outcome: Optional[str] = None + created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat()) + finished_at: Optional[str] = None + + def finish(self, outcome: str) -> None: + self.state = SubTaskState.DONE + self.outcome = outcome + self.finished_at = datetime.now(timezone.utc).isoformat() + + def to_markdown(self, detailed: bool = False) -> str: + status_map = { + SubTaskState.TODO: "- [ ]", + SubTaskState.IN_PROGRESS: "- [ ] [WIP]", + SubTaskState.DONE: "- [x]", + SubTaskState.ABANDONED: "- [ ] [Abandoned]", + } + if not detailed: + return f"{status_map[self.state]} {self.name}" + lines = [ + f"{status_map[self.state]} {self.name}", + f" Description: {self.description}", + f" Expected Outcome: {self.expected_outcome}", + f" State: {self.state.value}", + ] + if self.state == SubTaskState.DONE: + lines.append(f" Outcome: {self.outcome}") + lines.append(f" Finished: {self.finished_at}") + return "\n".join(lines) + + +@dataclass +class Plan: + name: str + description: str + expected_outcome: str + subtasks: list[SubTask] = field(default_factory=list) + id: str = field(default_factory=lambda: str(uuid.uuid4())[:8]) + state: SubTaskState = SubTaskState.TODO + created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat()) + outcome: Optional[str] = None + finished_at: Optional[str] = None + + def to_markdown(self) -> str: + subtasks_md = "\n".join(st.to_markdown() for st in self.subtasks) + return ( + f"# {self.name}\n" + f"**Description**: {self.description}\n" + f"**Expected Outcome**: {self.expected_outcome}\n" + f"**State**: {self.state.value}\n" + f"## Subtasks\n{subtasks_md}" + ) + + def finish(self, state: Literal["done", "abandoned"], outcome: str) -> None: + self.state = SubTaskState.DONE if state == "done" else SubTaskState.ABANDONED + self.outcome = outcome + self.finished_at = datetime.now(timezone.utc).isoformat() + + +class PlanNotebook: + """Manages plans as tool-functions, providing contextual hints to steer the agent. + + The plan notebook exposes plan operations as Tools that can be registered + with a Toolkit or Agent. After each agent turn, call get_hint() to get + a system-level message that guides the agent on what to do next. + + Usage: + notebook = PlanNotebook() + # Register plan tools with your agent + agent_config.tools.extend(notebook.get_tools()) + # After each turn, inject hint + hint = notebook.get_hint() + if hint: + # Prepend to next message or inject as system context + ... + """ + + def __init__(self, max_subtasks: int = 20) -> None: + self._current_plan: Optional[Plan] = None + self._history: list[Plan] = [] + self._max_subtasks = max_subtasks + self._on_change: list[Callable[[Optional[Plan]], None]] = [] + + @property + def current_plan(self) -> Optional[Plan]: + return self._current_plan + + def on_plan_change(self, callback: Callable[[Optional[Plan]], None]) -> None: + self._on_change.append(callback) + + def _notify(self) -> None: + for cb in self._on_change: + cb(self._current_plan) + + def create_plan( + self, name: str, description: str, expected_outcome: str, subtasks: list[dict[str, str]] + ) -> str: + """Create a new plan with subtasks. Replaces current plan if one exists.""" + tasks = [ + SubTask( + name=st["name"], + description=st["description"], + expected_outcome=st["expected_outcome"], + ) + for st in subtasks[:self._max_subtasks] + ] + if self._current_plan and self._current_plan.state not in (SubTaskState.DONE, SubTaskState.ABANDONED): + self._current_plan.finish("abandoned", "Replaced by new plan") + self._history.append(self._current_plan) + + self._current_plan = Plan( + name=name, description=description, + expected_outcome=expected_outcome, subtasks=tasks, + ) + self._notify() + return f"Plan '{name}' created with {len(tasks)} subtasks." + + def update_subtask_state(self, subtask_idx: int, state: str) -> str: + """Update a subtask's state to todo, in_progress, or abandoned.""" + if not self._current_plan: + return "Error: No active plan. Create one first." + if not 0 <= subtask_idx < len(self._current_plan.subtasks): + return f"Error: Invalid index {subtask_idx}." + if state not in ("todo", "in_progress", "abandoned"): + return f"Error: Invalid state '{state}'." + + if state == "in_progress": + for i, st in enumerate(self._current_plan.subtasks): + if st.state == SubTaskState.IN_PROGRESS: + return f"Error: Subtask {i} ('{st.name}') already in progress. Finish it first." + if i < subtask_idx and st.state not in (SubTaskState.DONE, SubTaskState.ABANDONED): + return f"Error: Previous subtask {i} ('{st.name}') not done yet." + + self._current_plan.subtasks[subtask_idx].state = SubTaskState(state) + if self._current_plan.state == SubTaskState.TODO: + self._current_plan.state = SubTaskState.IN_PROGRESS + self._notify() + return f"Subtask {subtask_idx} marked as '{state}'." + + def finish_subtask(self, subtask_idx: int, outcome: str) -> str: + """Mark a subtask as done with its specific outcome.""" + if not self._current_plan: + return "Error: No active plan." + if not 0 <= subtask_idx < len(self._current_plan.subtasks): + return f"Error: Invalid index {subtask_idx}." + + self._current_plan.subtasks[subtask_idx].finish(outcome) + + # Auto-activate next subtask + if subtask_idx + 1 < len(self._current_plan.subtasks): + self._current_plan.subtasks[subtask_idx + 1].state = SubTaskState.IN_PROGRESS + next_name = self._current_plan.subtasks[subtask_idx + 1].name + self._notify() + return f"Subtask {subtask_idx} done. Next: '{next_name}' (now in_progress)." + + self._notify() + return f"Subtask {subtask_idx} done. All subtasks complete — call finish_plan." + + def revise_plan(self, subtask_idx: int, action: str, subtask: Optional[dict[str, str]] = None) -> str: + """Revise plan: add/revise/delete a subtask.""" + if not self._current_plan: + return "Error: No active plan." + + if action == "delete": + if 0 <= subtask_idx < len(self._current_plan.subtasks): + removed = self._current_plan.subtasks.pop(subtask_idx) + self._notify() + return f"Deleted subtask '{removed.name}'." + return f"Error: Invalid index {subtask_idx}." + + if not subtask: + return "Error: subtask required for add/revise." + + new_st = SubTask( + name=subtask["name"], + description=subtask["description"], + expected_outcome=subtask["expected_outcome"], + ) + + if action == "add": + self._current_plan.subtasks.insert(subtask_idx, new_st) + self._notify() + return f"Added subtask '{new_st.name}' at index {subtask_idx}." + elif action == "revise": + if 0 <= subtask_idx < len(self._current_plan.subtasks): + self._current_plan.subtasks[subtask_idx] = new_st + self._notify() + return f"Revised subtask at index {subtask_idx}." + + return f"Error: Invalid action '{action}'." + + def finish_plan(self, state: str, outcome: str) -> str: + """Finish or abandon the current plan.""" + if not self._current_plan: + return "Error: No active plan." + self._current_plan.finish(state, outcome) + self._history.append(self._current_plan) + self._current_plan = None + self._notify() + return f"Plan finished as '{state}'." + + def view_history(self) -> str: + """View historical plans.""" + if not self._history: + return "No historical plans." + lines = [] + for p in self._history: + lines.append(f"- {p.name} (id={p.id}, state={p.state.value})") + return "\n".join(lines) + + def get_hint(self) -> Optional[str]: + """Generate a contextual hint based on current plan state. + + Returns a string to inject as system context, or None if no guidance needed. + """ + if self._current_plan is None: + return ( + "If the task is complex or multi-step, create a plan " + "by calling 'create_plan'. Otherwise proceed directly." + ) + + plan = self._current_plan + n_done = sum(1 for st in plan.subtasks if st.state == SubTaskState.DONE) + n_abandoned = sum(1 for st in plan.subtasks if st.state == SubTaskState.ABANDONED) + in_progress = None + in_progress_idx = None + + for i, st in enumerate(plan.subtasks): + if st.state == SubTaskState.IN_PROGRESS: + in_progress = st + in_progress_idx = i + break + + plan_md = plan.to_markdown() + + if n_done == 0 and in_progress is None: + return ( + f"Current plan:\n```\n{plan_md}\n```\n" + "Start by marking the first subtask as 'in_progress' via " + "update_subtask_state(0, 'in_progress'), then execute it." + ) + + if in_progress and in_progress_idx is not None: + return ( + f"Current plan:\n```\n{plan_md}\n```\n" + f"Subtask {in_progress_idx} ('{in_progress.name}') is in progress.\n" + f"Details: {in_progress.description}\n" + f"Expected outcome: {in_progress.expected_outcome}\n" + "Execute it, then call finish_subtask with the specific outcome." + ) + + if n_done + n_abandoned == len(plan.subtasks): + return ( + f"Current plan:\n```\n{plan_md}\n```\n" + "All subtasks complete. Call finish_plan('done', outcome) to wrap up." + ) + + # Some done but nothing in progress + return ( + f"Current plan:\n```\n{plan_md}\n```\n" + f"{n_done} subtasks done. Mark the next as 'in_progress' and execute it." + ) + + def get_tools(self) -> list[Tool]: + """Return plan operations as Tool objects for agent registration.""" + return [ + Tool( + name="create_plan", + description="Create a plan with sequential subtasks for complex tasks", + parameters={ + "type": "object", + "properties": { + "name": {"type": "string", "description": "Plan name (concise, <10 words)"}, + "description": {"type": "string", "description": "Plan description with constraints and goals"}, + "expected_outcome": {"type": "string", "description": "Specific measurable outcome"}, + "subtasks": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "description": {"type": "string"}, + "expected_outcome": {"type": "string"}, + }, + "required": ["name", "description", "expected_outcome"], + }, + }, + }, + "required": ["name", "description", "expected_outcome", "subtasks"], + }, + fn=self.create_plan, + ), + Tool( + name="update_subtask_state", + description="Update a subtask's state (todo/in_progress/abandoned)", + parameters={ + "type": "object", + "properties": { + "subtask_idx": {"type": "integer", "description": "Subtask index (0-based)"}, + "state": {"type": "string", "enum": ["todo", "in_progress", "abandoned"]}, + }, + "required": ["subtask_idx", "state"], + }, + fn=self.update_subtask_state, + ), + Tool( + name="finish_subtask", + description="Mark subtask as done with specific outcome (not 'I did X' but the actual result)", + parameters={ + "type": "object", + "properties": { + "subtask_idx": {"type": "integer", "description": "Subtask index (0-based)"}, + "outcome": {"type": "string", "description": "Specific outcome data/result"}, + }, + "required": ["subtask_idx", "outcome"], + }, + fn=self.finish_subtask, + ), + Tool( + name="revise_plan", + description="Revise current plan: add/revise/delete a subtask", + parameters={ + "type": "object", + "properties": { + "subtask_idx": {"type": "integer"}, + "action": {"type": "string", "enum": ["add", "revise", "delete"]}, + "subtask": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "description": {"type": "string"}, + "expected_outcome": {"type": "string"}, + }, + }, + }, + "required": ["subtask_idx", "action"], + }, + fn=self.revise_plan, + ), + Tool( + name="finish_plan", + description="Finish or abandon the current plan with outcome/reason", + parameters={ + "type": "object", + "properties": { + "state": {"type": "string", "enum": ["done", "abandoned"]}, + "outcome": {"type": "string", "description": "Outcome or reason"}, + }, + "required": ["state", "outcome"], + }, + fn=self.finish_plan, + ), + Tool( + name="view_plan_history", + description="View historical completed/abandoned plans", + parameters={"type": "object", "properties": {}}, + fn=self.view_history, + ), + ] diff --git a/src/hawk/toolkit.py b/src/hawk/toolkit.py new file mode 100644 index 0000000..019f76c --- /dev/null +++ b/src/hawk/toolkit.py @@ -0,0 +1,315 @@ +"""Advanced toolkit with groups, middleware, and async execution for Hawk SDK.""" + +from __future__ import annotations + +import asyncio +import inspect +import json +import uuid +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Awaitable, Callable, Literal, Optional + +from .tools import Tool + + +@dataclass +class ToolGroup: + name: str + description: str + active: bool = False + notes: Optional[str] = None + + +@dataclass +class BackgroundTask: + id: str + tool_name: str + task: asyncio.Task + result: Optional[str] = None + done: bool = False + cancelled: bool = False + + +MiddlewareFunc = Callable[[dict[str, Any], Callable[..., Any]], Any] +PostprocessFunc = Callable[[str, str], str] # (tool_name, result) -> modified_result + + +class Toolkit: + """Advanced toolkit with tool groups, middleware chain, and async execution. + + Features: + - Tool groups: organize tools and activate/deactivate groups dynamically + - Middleware: onion-model pre/post processing of tool calls + - Background execution: long-running tools get a task_id for polling + - Postprocess: per-tool output transformation + + Usage: + toolkit = Toolkit() + toolkit.create_group("web", "Web browsing tools") + toolkit.register(my_tool, group="web") + toolkit.activate_groups(["web"]) + + # Add middleware + toolkit.register_middleware(logging_middleware) + + # Get active tools for sending to model + active_tools = toolkit.get_active_tools() + """ + + def __init__(self) -> None: + self._tools: dict[str, Tool] = {} + self._tool_groups: dict[str, str] = {} # tool_name -> group_name + self._groups: dict[str, ToolGroup] = { + "basic": ToolGroup(name="basic", description="Default tool group", active=True) + } + self._middlewares: list[MiddlewareFunc] = [] + self._postprocess: dict[str, PostprocessFunc] = {} # tool_name -> postprocess fn + self._background_tasks: dict[str, BackgroundTask] = {} + self._async_execution: set[str] = set() # tool names with async execution enabled + + def create_group( + self, name: str, description: str, *, active: bool = False, notes: Optional[str] = None + ) -> None: + if name in self._groups: + raise ValueError(f"Group '{name}' already exists") + self._groups[name] = ToolGroup(name=name, description=description, active=active, notes=notes) + + def activate_groups(self, names: list[str]) -> None: + for name in names: + if name in self._groups: + self._groups[name].active = True + + def deactivate_groups(self, names: list[str]) -> None: + for name in names: + if name in self._groups and name != "basic": + self._groups[name].active = False + + def reset_groups(self, active_groups: list[str]) -> str: + """Set the absolute state of all groups. Groups not listed are deactivated.""" + for name, group in self._groups.items(): + if name == "basic": + continue + group.active = name in active_groups + + activated = [n for n in active_groups if n in self._groups] + notes_parts = [] + for name in activated: + g = self._groups[name] + if g.notes: + notes_parts.append(f"## {name}\n{g.notes}") + + if not activated: + return "All tool groups deactivated." + msg = f"Activated: {', '.join(activated)}" + if notes_parts: + msg += "\n" + "\n".join(notes_parts) + return msg + + def register( + self, + tool: Tool, + *, + group: str = "basic", + postprocess: Optional[PostprocessFunc] = None, + async_execution: bool = False, + on_conflict: Literal["raise", "override", "skip"] = "raise", + ) -> None: + if group not in self._groups: + raise ValueError(f"Group '{group}' does not exist. Create it first.") + + if tool.name in self._tools: + if on_conflict == "raise": + raise ValueError(f"Tool '{tool.name}' already registered") + elif on_conflict == "skip": + return + # override falls through + + self._tools[tool.name] = tool + self._tool_groups[tool.name] = group + if postprocess: + self._postprocess[tool.name] = postprocess + if async_execution: + self._async_execution.add(tool.name) + + def unregister(self, tool_name: str) -> None: + self._tools.pop(tool_name, None) + self._tool_groups.pop(tool_name, None) + self._postprocess.pop(tool_name, None) + self._async_execution.discard(tool_name) + + def register_middleware(self, middleware: MiddlewareFunc) -> None: + self._middlewares.append(middleware) + + def get_active_tools(self) -> list[Tool]: + """Return tools belonging to active groups.""" + return [ + t + for name, t in self._tools.items() + if self._tool_groups.get(name, "basic") == "basic" + or self._groups.get( + self._tool_groups.get(name, "basic"), + ToolGroup(name="", description=""), + ).active + ] + + def get_tool_schemas(self) -> list[dict[str, Any]]: + """Return JSON schemas for active tools.""" + return [t.to_dict() for t in self.get_active_tools()] + + def execute(self, tool_name: str, arguments: dict[str, Any]) -> str: + """Execute a tool with middleware chain and postprocessing.""" + if tool_name not in self._tools: + return json.dumps({"error": f"Tool '{tool_name}' not found"}) + + tool = self._tools[tool_name] + group_name = self._tool_groups.get(tool_name, "basic") + group = self._groups.get(group_name) + + if group and not group.active and group_name != "basic": + return json.dumps({"error": f"Tool '{tool_name}' is in inactive group '{group_name}'"}) + + # Build context for middleware + context = {"tool_name": tool_name, "arguments": arguments, "tool": tool} + + # Apply middleware chain + def base_handler(ctx: dict[str, Any]) -> str: + t = ctx["tool"] + args = ctx["arguments"] + if t.fn is None: + return json.dumps({"error": f"Tool '{t.name}' has no implementation"}) + try: + result = t.fn(**args) + if inspect.isawaitable(result): + raise TypeError(f"Tool '{t.name}' is async. Use execute_async.") + return result if isinstance(result, str) else json.dumps(result) + except Exception as e: + return json.dumps({"error": str(e)}) + + handler = base_handler + for mw in reversed(self._middlewares): + prev_handler = handler + handler = lambda ctx, _mw=mw, _prev=prev_handler: _mw(ctx, _prev) + + result = handler(context) + + # Apply postprocess + if tool_name in self._postprocess: + result = self._postprocess[tool_name](tool_name, result) + + return result + + async def execute_async(self, tool_name: str, arguments: dict[str, Any]) -> str: + """Execute a tool asynchronously with middleware and postprocessing.""" + if tool_name not in self._tools: + return json.dumps({"error": f"Tool '{tool_name}' not found"}) + + tool = self._tools[tool_name] + group_name = self._tool_groups.get(tool_name, "basic") + group = self._groups.get(group_name) + + if group and not group.active and group_name != "basic": + return json.dumps({"error": f"Tool '{tool_name}' is in inactive group '{group_name}'"}) + + # Check if background execution + if tool_name in self._async_execution: + task_id = str(uuid.uuid4())[:8] + task = asyncio.create_task(self._run_background(task_id, tool, arguments)) + self._background_tasks[task_id] = BackgroundTask( + id=task_id, tool_name=tool_name, task=task + ) + return json.dumps({ + "status": "running", + "task_id": task_id, + "message": f"Tool '{tool_name}' executing in background. Use view_task('{task_id}') to check status.", + }) + + context = {"tool_name": tool_name, "arguments": arguments, "tool": tool} + + async def base_handler(ctx: dict[str, Any]) -> str: + t = ctx["tool"] + args = ctx["arguments"] + if t.fn is None: + return json.dumps({"error": f"Tool '{t.name}' has no implementation"}) + try: + result = t.fn(**args) + if inspect.isawaitable(result): + result = await result + return result if isinstance(result, str) else json.dumps(result) + except Exception as e: + return json.dumps({"error": str(e)}) + + # For async, just run directly (middleware can be extended later for async) + result = await base_handler(context) + + if tool_name in self._postprocess: + result = self._postprocess[tool_name](tool_name, result) + + return result + + async def _run_background( + self, task_id: str, tool: Tool, arguments: dict[str, Any] + ) -> None: + try: + if tool.fn is None: + result = json.dumps({"error": f"Tool '{tool.name}' has no implementation"}) + else: + res = tool.fn(**arguments) + if inspect.isawaitable(res): + res = await res + result = res if isinstance(res, str) else json.dumps(res) + + if tool.name in self._postprocess: + result = self._postprocess[tool.name](tool.name, result) + + bt = self._background_tasks[task_id] + bt.result = result + bt.done = True + except asyncio.CancelledError: + bt = self._background_tasks[task_id] + bt.cancelled = True + bt.done = True + except Exception as e: + bt = self._background_tasks[task_id] + bt.result = json.dumps({"error": str(e)}) + bt.done = True + + def view_task(self, task_id: str) -> str: + if task_id not in self._background_tasks: + return json.dumps({"error": f"Task '{task_id}' not found"}) + bt = self._background_tasks[task_id] + if bt.cancelled: + return json.dumps({"status": "cancelled", "task_id": task_id}) + if bt.done: + result = bt.result + del self._background_tasks[task_id] + return result or json.dumps({"status": "done", "result": None}) + return json.dumps({"status": "running", "task_id": task_id, "tool": bt.tool_name}) + + async def wait_task(self, task_id: str, timeout: float = 30.0) -> str: + if task_id not in self._background_tasks: + return json.dumps({"error": f"Task '{task_id}' not found"}) + bt = self._background_tasks[task_id] + if bt.done: + return self.view_task(task_id) + try: + await asyncio.wait_for(asyncio.shield(bt.task), timeout=timeout) + except asyncio.TimeoutError: + return json.dumps({"status": "running", "message": f"Still running after {timeout}s"}) + return self.view_task(task_id) + + def cancel_task(self, task_id: str) -> str: + if task_id not in self._background_tasks: + return json.dumps({"error": f"Task '{task_id}' not found"}) + bt = self._background_tasks[task_id] + if bt.done: + return json.dumps({"error": "Task already completed"}) + bt.task.cancel() + return json.dumps({"status": "cancelled", "task_id": task_id}) + + def state_dict(self) -> dict[str, Any]: + return {"active_groups": [n for n, g in self._groups.items() if g.active]} + + def load_state_dict(self, state: dict[str, Any]) -> None: + active = state.get("active_groups", []) + for name, group in self._groups.items(): + group.active = name in active diff --git a/src/hawk/tracing.py b/src/hawk/tracing.py new file mode 100644 index 0000000..ecffa49 --- /dev/null +++ b/src/hawk/tracing.py @@ -0,0 +1,307 @@ +"""OpenTelemetry tracing decorators for Hawk SDK. + +Zero-cost when disabled. Provides automatic instrumentation of agent chat, +tool execution, and client calls with OTel GenAI semantic conventions. + +Usage: + from hawk.tracing import configure_tracing, trace_chat, trace_tool + + # Enable tracing (call once at startup) + configure_tracing(endpoint="http://localhost:4318") + + # Decorators are already applied to SDK internals, or use on custom code: + @trace_tool + def my_tool(query: str) -> str: + ... +""" + +from __future__ import annotations + +import functools +import inspect +from typing import Any, AsyncGenerator, Callable, Generator, Optional, TypeVar + +T = TypeVar("T") + +_tracing_enabled: bool = False +_tracer: Any = None + +# Provider detection from base URL fragments +_PROVIDER_PATTERNS: list[tuple[str, str]] = [ + ("api.anthropic.com", "anthropic"), + ("api.openai.com", "openai"), + ("generativelanguage.googleapis.com", "google"), + ("api.cohere.com", "cohere"), + ("api.mistral.ai", "mistral"), + ("api.groq.com", "groq"), + ("localhost", "local"), + ("127.0.0.1", "local"), +] + + +def configure_tracing( + *, + endpoint: Optional[str] = None, + service_name: str = "hawk-sdk", + enabled: bool = True, +) -> None: + """Configure OpenTelemetry tracing for the Hawk SDK. + + Args: + endpoint: OTLP endpoint (e.g. "http://localhost:4318"). + If None, uses OTEL_EXPORTER_OTLP_ENDPOINT env var. + service_name: Service name for traces. + enabled: Whether tracing is enabled. + """ + global _tracing_enabled, _tracer + + if not enabled: + _tracing_enabled = False + return + + try: + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor + from opentelemetry.sdk.resources import Resource + + resource = Resource.create({"service.name": service_name}) + provider = TracerProvider(resource=resource) + + if endpoint: + from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter + exporter = OTLPSpanExporter(endpoint=endpoint) + provider.add_span_processor(BatchSpanProcessor(exporter)) + + trace.set_tracer_provider(provider) + _tracer = trace.get_tracer("hawk-sdk") + _tracing_enabled = True + except ImportError: + _tracing_enabled = False + + +def is_tracing_enabled() -> bool: + """Check if tracing is currently enabled.""" + return _tracing_enabled + + +def detect_provider(base_url: str) -> str: + """Detect LLM provider from base URL.""" + for pattern, provider in _PROVIDER_PATTERNS: + if pattern in base_url: + return provider + return "unknown" + + +def trace_chat(func: Callable[..., T]) -> Callable[..., T]: + """Decorator to trace agent chat calls with OTel spans. + + Captures: operation name, model, input message, output response, + token usage, duration. + """ + if inspect.iscoroutinefunction(func): + @functools.wraps(func) + async def async_wrapper(*args: Any, **kwargs: Any) -> Any: + if not _tracing_enabled or _tracer is None: + return await func(*args, **kwargs) + + span_name = f"chat {func.__qualname__}" + attributes = { + "gen_ai.operation.name": "chat", + "gen_ai.system": "hawk", + } + + # Extract message from args if available + if args and len(args) > 1 and isinstance(args[1], str): + attributes["gen_ai.request.message"] = args[1][:500] + elif "message" in kwargs: + attributes["gen_ai.request.message"] = str(kwargs["message"])[:500] + + with _tracer.start_as_current_span(span_name, attributes=attributes, end_on_exit=False) as span: + try: + result = await func(*args, **kwargs) + if hasattr(result, "response"): + span.set_attribute("gen_ai.response.text", str(result.response)[:500]) + if hasattr(result, "tokens_in"): + span.set_attribute("gen_ai.usage.input_tokens", result.tokens_in) + if hasattr(result, "tokens_out"): + span.set_attribute("gen_ai.usage.output_tokens", result.tokens_out) + span.set_status(_ok_status()) + span.end() + return result + except Exception as e: + _record_error(span, e) + raise + return async_wrapper # type: ignore + + @functools.wraps(func) + def sync_wrapper(*args: Any, **kwargs: Any) -> Any: + if not _tracing_enabled or _tracer is None: + return func(*args, **kwargs) + + span_name = f"chat {func.__qualname__}" + attributes = { + "gen_ai.operation.name": "chat", + "gen_ai.system": "hawk", + } + + if args and len(args) > 1 and isinstance(args[1], str): + attributes["gen_ai.request.message"] = args[1][:500] + elif "message" in kwargs: + attributes["gen_ai.request.message"] = str(kwargs["message"])[:500] + + with _tracer.start_as_current_span(span_name, attributes=attributes, end_on_exit=False) as span: + try: + result = func(*args, **kwargs) + if hasattr(result, "response"): + span.set_attribute("gen_ai.response.text", str(result.response)[:500]) + if hasattr(result, "tokens_in"): + span.set_attribute("gen_ai.usage.input_tokens", result.tokens_in) + if hasattr(result, "tokens_out"): + span.set_attribute("gen_ai.usage.output_tokens", result.tokens_out) + span.set_status(_ok_status()) + span.end() + return result + except Exception as e: + _record_error(span, e) + raise + return sync_wrapper # type: ignore + + +def trace_tool(func: Callable[..., T]) -> Callable[..., T]: + """Decorator to trace tool execution with OTel spans. + + Captures: tool name, arguments, result, duration, errors. + """ + if inspect.iscoroutinefunction(func): + @functools.wraps(func) + async def async_wrapper(*args: Any, **kwargs: Any) -> Any: + if not _tracing_enabled or _tracer is None: + return await func(*args, **kwargs) + + tool_name = func.__name__ + span_name = f"tool {tool_name}" + attributes = { + "gen_ai.operation.name": "execute_tool", + "gen_ai.tool.name": tool_name, + } + + with _tracer.start_as_current_span(span_name, attributes=attributes, end_on_exit=False) as span: + try: + result = await func(*args, **kwargs) + if isinstance(result, str): + span.set_attribute("gen_ai.tool.result", result[:1000]) + span.set_status(_ok_status()) + span.end() + return result + except Exception as e: + _record_error(span, e) + raise + return async_wrapper # type: ignore + + @functools.wraps(func) + def sync_wrapper(*args: Any, **kwargs: Any) -> Any: + if not _tracing_enabled or _tracer is None: + return func(*args, **kwargs) + + tool_name = func.__name__ + span_name = f"tool {tool_name}" + attributes = { + "gen_ai.operation.name": "execute_tool", + "gen_ai.tool.name": tool_name, + } + + with _tracer.start_as_current_span(span_name, attributes=attributes, end_on_exit=False) as span: + try: + result = func(*args, **kwargs) + if isinstance(result, str): + span.set_attribute("gen_ai.tool.result", result[:1000]) + span.set_status(_ok_status()) + span.end() + return result + except Exception as e: + _record_error(span, e) + raise + return sync_wrapper # type: ignore + + +def trace(name: Optional[str] = None) -> Callable: + """Generic tracing decorator for any function. + + Args: + name: Custom span name. Defaults to function name. + """ + def decorator(func: Callable[..., T]) -> Callable[..., T]: + if inspect.iscoroutinefunction(func): + @functools.wraps(func) + async def async_wrapper(*args: Any, **kwargs: Any) -> Any: + if not _tracing_enabled or _tracer is None: + return await func(*args, **kwargs) + + span_name = name or func.__qualname__ + with _tracer.start_as_current_span(span_name, end_on_exit=False) as span: + try: + result = await func(*args, **kwargs) + span.set_status(_ok_status()) + span.end() + return result + except Exception as e: + _record_error(span, e) + raise + return async_wrapper # type: ignore + + @functools.wraps(func) + def sync_wrapper(*args: Any, **kwargs: Any) -> Any: + if not _tracing_enabled or _tracer is None: + return func(*args, **kwargs) + + span_name = name or func.__qualname__ + with _tracer.start_as_current_span(span_name, end_on_exit=False) as span: + try: + result = func(*args, **kwargs) + if isinstance(result, Generator): + return _trace_generator(result, span) + span.set_status(_ok_status()) + span.end() + return result + except Exception as e: + _record_error(span, e) + raise + return sync_wrapper # type: ignore + return decorator + + +def _trace_generator(gen: Generator, span: Any) -> Generator: + """Wrap a generator to trace its lifecycle.""" + try: + for item in gen: + yield item + span.set_status(_ok_status()) + span.end() + except Exception as e: + _record_error(span, e) + raise + + +def _ok_status() -> Any: + """Get OK status code.""" + try: + from opentelemetry.trace import StatusCode + return StatusCode.OK + except ImportError: + return None + + +def _record_error(span: Any, error: Exception) -> None: + """Record an error on a span.""" + try: + from opentelemetry.trace import StatusCode + span.set_status(StatusCode.ERROR, str(error)) + span.record_exception(error) + except (ImportError, AttributeError): + pass + finally: + try: + span.end() + except Exception: + pass