From 65cadf66edd277d8d039c2a1d0e7a4f1ebe0efac Mon Sep 17 00:00:00 2001 From: johnxie Date: Sat, 21 Mar 2026 00:15:14 -0700 Subject: [PATCH] feat: add 4 Genesis-strategic tutorials (AFFiNE, Plane, MetaGPT, A2A Protocol) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New tutorials for repos directly relevant to the Taskade Genesis competitive landscape: - AFFiNE (66K stars): Open-source AI workspace — docs, whiteboards, databases, AI copilot - Plane (47K stars): AI-native project management — issues, cycles, wiki, AI features - MetaGPT (66K stars): Multi-agent role-based collaboration for software generation - A2A Protocol (23K stars): Google's Agent-to-Agent interoperability standard Each tutorial: 9 files (README + 8 chapters), v2 format compliant, with mermaid diagrams, code examples, and cross-references. Total: 14K+ lines of new content. Repository now contains 195 tutorials (was 191). --- CONTENT_GAPS_ANALYSIS.md | 4 +- README.md | 4 + TUTORIAL_STRUCTURE.md | 2 +- categories/ai-ml-platforms.md | 4 + discoverability/query-hub.md | 2 +- discoverability/search-intent-map.md | 21 +- discoverability/tutorial-directory.md | 10 +- discoverability/tutorial-index.json | 130 +++- discoverability/tutorial-itemlist.schema.json | 412 ++++++------ llms-full.txt | 24 + llms.txt | 4 + tutorials/README.md | 8 +- .../01-getting-started.md | 209 ++++++ .../02-protocol-specification.md | 385 +++++++++++ .../03-agent-discovery.md | 376 +++++++++++ .../04-task-management.md | 474 +++++++++++++ .../05-authentication-and-security.md | 406 ++++++++++++ .../a2a-protocol-tutorial/06-python-sdk.md | 471 +++++++++++++ .../07-multi-agent-scenarios.md | 454 +++++++++++++ .../a2a-protocol-tutorial/08-mcp-plus-a2a.md | 474 +++++++++++++ tutorials/a2a-protocol-tutorial/README.md | 130 ++++ .../affine-tutorial/01-getting-started.md | 229 +++++++ .../affine-tutorial/02-system-architecture.md | 302 +++++++++ tutorials/affine-tutorial/03-block-system.md | 315 +++++++++ .../04-collaborative-editing.md | 343 ++++++++++ tutorials/affine-tutorial/05-ai-copilot.md | 418 ++++++++++++ .../affine-tutorial/06-database-and-views.md | 444 +++++++++++++ tutorials/affine-tutorial/07-plugin-system.md | 525 +++++++++++++++ .../08-self-hosting-and-deployment.md | 569 ++++++++++++++++ tutorials/affine-tutorial/README.md | 116 ++++ .../metagpt-tutorial/01-getting-started.md | 279 ++++++++ tutorials/metagpt-tutorial/02-agent-roles.md | 377 +++++++++++ .../metagpt-tutorial/03-sop-and-workflows.md | 402 +++++++++++ .../metagpt-tutorial/04-action-system.md | 385 +++++++++++ .../metagpt-tutorial/05-memory-and-context.md | 399 +++++++++++ .../metagpt-tutorial/06-tool-integration.md | 418 ++++++++++++ .../07-multi-agent-orchestration.md | 479 +++++++++++++ .../08-production-deployment.md | 598 +++++++++++++++++ tutorials/metagpt-tutorial/README.md | 135 ++++ .../plane-tutorial/01-getting-started.md | 282 ++++++++ .../plane-tutorial/02-system-architecture.md | 382 +++++++++++ tutorials/plane-tutorial/03-issue-tracking.md | 402 +++++++++++ .../plane-tutorial/04-cycles-and-modules.md | 420 ++++++++++++ tutorials/plane-tutorial/05-ai-features.md | 451 +++++++++++++ tutorials/plane-tutorial/06-pages-and-wiki.md | 387 +++++++++++ .../plane-tutorial/07-api-and-integrations.md | 476 +++++++++++++ .../08-self-hosting-and-deployment.md | 627 ++++++++++++++++++ tutorials/plane-tutorial/README.md | 132 ++++ tutorials/tutorial-manifest.json | 80 ++- 49 files changed, 14163 insertions(+), 213 deletions(-) create mode 100644 tutorials/a2a-protocol-tutorial/01-getting-started.md create mode 100644 tutorials/a2a-protocol-tutorial/02-protocol-specification.md create mode 100644 tutorials/a2a-protocol-tutorial/03-agent-discovery.md create mode 100644 tutorials/a2a-protocol-tutorial/04-task-management.md create mode 100644 tutorials/a2a-protocol-tutorial/05-authentication-and-security.md create mode 100644 tutorials/a2a-protocol-tutorial/06-python-sdk.md create mode 100644 tutorials/a2a-protocol-tutorial/07-multi-agent-scenarios.md create mode 100644 tutorials/a2a-protocol-tutorial/08-mcp-plus-a2a.md create mode 100644 tutorials/a2a-protocol-tutorial/README.md create mode 100644 tutorials/affine-tutorial/01-getting-started.md create mode 100644 tutorials/affine-tutorial/02-system-architecture.md create mode 100644 tutorials/affine-tutorial/03-block-system.md create mode 100644 tutorials/affine-tutorial/04-collaborative-editing.md create mode 100644 tutorials/affine-tutorial/05-ai-copilot.md create mode 100644 tutorials/affine-tutorial/06-database-and-views.md create mode 100644 tutorials/affine-tutorial/07-plugin-system.md create mode 100644 tutorials/affine-tutorial/08-self-hosting-and-deployment.md create mode 100644 tutorials/affine-tutorial/README.md create mode 100644 tutorials/metagpt-tutorial/01-getting-started.md create mode 100644 tutorials/metagpt-tutorial/02-agent-roles.md create mode 100644 tutorials/metagpt-tutorial/03-sop-and-workflows.md create mode 100644 tutorials/metagpt-tutorial/04-action-system.md create mode 100644 tutorials/metagpt-tutorial/05-memory-and-context.md create mode 100644 tutorials/metagpt-tutorial/06-tool-integration.md create mode 100644 tutorials/metagpt-tutorial/07-multi-agent-orchestration.md create mode 100644 tutorials/metagpt-tutorial/08-production-deployment.md create mode 100644 tutorials/metagpt-tutorial/README.md create mode 100644 tutorials/plane-tutorial/01-getting-started.md create mode 100644 tutorials/plane-tutorial/02-system-architecture.md create mode 100644 tutorials/plane-tutorial/03-issue-tracking.md create mode 100644 tutorials/plane-tutorial/04-cycles-and-modules.md create mode 100644 tutorials/plane-tutorial/05-ai-features.md create mode 100644 tutorials/plane-tutorial/06-pages-and-wiki.md create mode 100644 tutorials/plane-tutorial/07-api-and-integrations.md create mode 100644 tutorials/plane-tutorial/08-self-hosting-and-deployment.md create mode 100644 tutorials/plane-tutorial/README.md diff --git a/CONTENT_GAPS_ANALYSIS.md b/CONTENT_GAPS_ANALYSIS.md index 2dde713..0a73be1 100644 --- a/CONTENT_GAPS_ANALYSIS.md +++ b/CONTENT_GAPS_ANALYSIS.md @@ -6,8 +6,8 @@ This document tracks structural and quality gaps that impact completeness and di | Metric | Value | |:-------|:------| -| Tutorial directories | 191 | -| Tutorials with exactly 8 numbered chapters | 188 | +| Tutorial directories | 195 | +| Tutorials with exactly 8 numbered chapters | 192 | | Tutorials with >8 numbered chapters | 3 | | Tutorials with 0 numbered chapters | 0 | | Tutorials with partial chapter coverage (1-7) | 0 | diff --git a/README.md b/README.md index 8f10020..8aaad14 100644 --- a/README.md +++ b/README.md @@ -208,6 +208,8 @@ Build autonomous AI systems that reason, plan, and collaborate. | **[Open SWE](tutorials/open-swe-tutorial/)** | 5.3K+ | Python | Async cloud coding agent architecture and migration playbook | | **[HumanLayer](tutorials/humanlayer-tutorial/)** | 9.6K+ | Python | Context engineering and human-governed coding-agent workflows | | **[Wshobson Agents](tutorials/wshobson-agents-tutorial/)** | 29.9K+ | TypeScript | Pluginized multi-agent workflows with specialist Claude Code agents | +| **[MetaGPT](tutorials/metagpt-tutorial/)** | 66K+ | Python | Multi-agent framework with role-based collaboration (PM, Architect, Engineer) for software generation | +| **[A2A Protocol](tutorials/a2a-protocol-tutorial/)** | 23K+ | Python/TypeScript | Google's Agent-to-Agent protocol for cross-platform agent interoperability and discovery | ### 🧠 LLM Frameworks & RAG @@ -266,6 +268,8 @@ Full-stack AI chat platforms and copilots. | **[GPT-OSS](tutorials/gpt-oss-tutorial/)** | 6.4K+ | TypeScript | Open-source GPT implementation | | **[Claude Quickstarts](tutorials/claude-quickstarts-tutorial/)** | 13.7K+ | Python/TypeScript | Production Claude integration patterns | | **[Cherry Studio](tutorials/cherry-studio-tutorial/)** | 40.5K+ | TypeScript | Multi-provider AI desktop workspace with assistants, documents, and MCP tools | +| **[AFFiNE](tutorials/affine-tutorial/)** | 66K+ | TypeScript | Open-source Notion + Miro alternative with docs, whiteboards, databases, and AI copilot | +| **[Plane](tutorials/plane-tutorial/)** | 47K+ | Python/TypeScript | AI-native project management with issues, cycles, modules, wiki, and AI features | ### 🔧 Developer Tools & Productivity diff --git a/TUTORIAL_STRUCTURE.md b/TUTORIAL_STRUCTURE.md index f6f88e6..b634b8d 100644 --- a/TUTORIAL_STRUCTURE.md +++ b/TUTORIAL_STRUCTURE.md @@ -17,7 +17,7 @@ tutorials// | Pattern | Count | |:--------|:------| -| `root_only` | 191 | +| `root_only` | 195 | | `docs_only` | 0 | | `index_only` | 0 | | `mixed` | 0 | diff --git a/categories/ai-ml-platforms.md b/categories/ai-ml-platforms.md index afa34f6..9abb700 100644 --- a/categories/ai-ml-platforms.md +++ b/categories/ai-ml-platforms.md @@ -167,6 +167,10 @@ - [Vibe Kanban](../tutorials/vibe-kanban-tutorial/) - [Whisper Cpp](../tutorials/whisper-cpp-tutorial/) - [Wshobson Agents](../tutorials/wshobson-agents-tutorial/) +- [AFFiNE](../tutorials/affine-tutorial/) +- [Plane](../tutorials/plane-tutorial/) +- [MetaGPT](../tutorials/metagpt-tutorial/) +- [A2A Protocol](../tutorials/a2a-protocol-tutorial/) ## Suggest Additions diff --git a/discoverability/query-hub.md b/discoverability/query-hub.md index b5c5f33..576f4b5 100644 --- a/discoverability/query-hub.md +++ b/discoverability/query-hub.md @@ -2,7 +2,7 @@ Auto-generated high-intent query landing surface mapped to the most relevant tutorials. -- Total tutorials indexed: **191** +- Total tutorials indexed: **195** - Query hubs: **6** - Source: `scripts/generate_discoverability_assets.py` diff --git a/discoverability/search-intent-map.md b/discoverability/search-intent-map.md index b54223c..ec9ef63 100644 --- a/discoverability/search-intent-map.md +++ b/discoverability/search-intent-map.md @@ -2,13 +2,13 @@ Auto-generated topical clusters to strengthen internal linking and query-to-tutorial mapping. -- Total tutorials: **191** +- Total tutorials: **195** - Total clusters: **9** - Source: `scripts/generate_discoverability_assets.py` ## ai-app-frameworks -- tutorial_count: **25** +- tutorial_count: **26** - [Activepieces Tutorial: Open-Source Automation, Pieces, and AI-Ready Workflow Operations](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/activepieces-tutorial/README.md) - intents: production-operations @@ -50,6 +50,8 @@ Auto-generated topical clusters to strengthen internal linking and query-to-tuto - intents: production-operations - [OpenBB Tutorial: Complete Guide to Investment Research Platform](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/openbb-tutorial/README.md) - intents: general-learning +- [Plane Tutorial: AI-Native Project Management](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/plane-tutorial/README.md) + - intents: general-learning - [Semantic Kernel Tutorial: Microsoft's AI Orchestration](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/semantic-kernel-tutorial/README.md) - intents: general-learning - [Supabase Tutorial: Building Modern Backend Applications](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/supabase-tutorial/README.md) @@ -58,13 +60,14 @@ Auto-generated topical clusters to strengthen internal linking and query-to-tuto - intents: general-learning - [Vercel AI SDK Tutorial: Production TypeScript AI Apps and Agents](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/vercel-ai-tutorial/README.md) - intents: production-operations -- [n8n AI Tutorial: Workflow Automation with AI](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/n8n-ai-tutorial/README.md) - - intents: general-learning +- ... plus 1 more tutorials in this cluster ## ai-coding-agents -- tutorial_count: **85** +- tutorial_count: **87** +- [A2A Protocol Tutorial: Building Interoperable Agent Systems With Google's Agent-to-Agent Standard](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/a2a-protocol-tutorial/README.md) + - intents: agentic-coding - [ADK Python Tutorial: Production-Grade Agent Engineering with Google's ADK](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/adk-python-tutorial/README.md) - intents: production-operations, agentic-coding - [AG2 Tutorial: Next-Generation Multi-Agent Framework](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/ag2-tutorial/README.md) @@ -113,14 +116,14 @@ Auto-generated topical clusters to strengthen internal linking and query-to-tuto - intents: agentic-coding - [Cline Tutorial: Agentic Coding with Human Control](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/cline-tutorial/README.md) - intents: agentic-coding -- [CodeMachine CLI Tutorial: Orchestrating Long-Running Coding Agent Workflows](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/codemachine-cli-tutorial/README.md) - - intents: agentic-coding -- ... plus 60 more tutorials in this cluster +- ... plus 62 more tutorials in this cluster ## data-and-storage -- tutorial_count: **8** +- tutorial_count: **9** +- [AFFiNE Tutorial: Open-Source AI Workspace with Docs, Whiteboards, and Databases](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/affine-tutorial/README.md) + - intents: general-learning - [Athens Research: Deep Dive Tutorial](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/athens-research-tutorial/README.md) - intents: architecture-deep-dive - [ClickHouse Tutorial: High-Performance Analytical Database](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/clickhouse-tutorial/README.md) diff --git a/discoverability/tutorial-directory.md b/discoverability/tutorial-directory.md index 3619d92..032b2e5 100644 --- a/discoverability/tutorial-directory.md +++ b/discoverability/tutorial-directory.md @@ -2,15 +2,19 @@ This page is auto-generated from the tutorial index and is intended as a fast browse surface for contributors and search crawlers. -- Total tutorials: **191** +- Total tutorials: **195** - Source: `scripts/generate_discoverability_assets.py` ## A +- [A2A Protocol Tutorial: Building Interoperable Agent Systems With Google's Agent-to-Agent Standard](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/a2a-protocol-tutorial/README.md) + - Learn how agents discover, communicate, and delegate tasks to each other using the A2A protocol — the open standard (now Linux Foundation) for agent-to-agent interoperability. - [Activepieces Tutorial: Open-Source Automation, Pieces, and AI-Ready Workflow Operations](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/activepieces-tutorial/README.md) - Learn how to use activepieces/activepieces to build, run, and govern production automation workflows with open-source extensibility, piece development, API control, and self-hosted operations. - [ADK Python Tutorial: Production-Grade Agent Engineering with Google's ADK](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/adk-python-tutorial/README.md) - Learn how to use google/adk-python to build, evaluate, and deploy modular AI agent systems with strong tooling, session controls, and production rollouts. +- [AFFiNE Tutorial: Open-Source AI Workspace with Docs, Whiteboards, and Databases](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/affine-tutorial/README.md) + - Learn how to use toeverything/AFFiNE to build, extend, and self-host a modern knowledge workspace combining documents, whiteboards, and databases — powered by BlockSuite, CRDT-based collaboration, and integrated AI copilot features. - [AG2 Tutorial: Next-Generation Multi-Agent Framework](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/ag2-tutorial/README.md) - Build collaborative AI agent systems with AG2, the community-driven successor to AutoGen. - [AgentGPT Tutorial: Building Autonomous AI Agents](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/agentgpt-tutorial/README.md) @@ -284,6 +288,8 @@ This page is auto-generated from the tutorial index and is intended as a fast br - A deep technical walkthrough of MeiliSearch covering Lightning Fast Search Engine. - [Mem0 Tutorial: Building Production-Ready AI Agents with Scalable Long-Term Memory](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mem0-tutorial/README.md) - A deep technical walkthrough of Mem0 covering Building Production-Ready AI Agents with Scalable Long-Term Memory. +- [MetaGPT Tutorial: Multi-Agent Software Development with Role-Based Collaboration](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/metagpt-tutorial/README.md) + - In one sentence: Give MetaGPT a product idea, and a virtual software company of AI agents designs, architects, codes, and tests it for you. - [Mini-SWE-Agent Tutorial: Minimal Autonomous Code Agent Design at Benchmark Scale](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mini-swe-agent-tutorial/README.md) - Learn how to use SWE-agent/mini-swe-agent to run compact, high-performing software-engineering agent workflows with minimal scaffolding and strong reproducibility. - [Mistral Vibe Tutorial: Minimal CLI Coding Agent by Mistral](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mistral-vibe-tutorial/README.md) @@ -351,6 +357,8 @@ This page is auto-generated from the tutorial index and is intended as a fast br - AI Photo Management Revolution: Enhanced facial recognition, LLM integrations, and advanced organization features mark PhotoPrism's evolution. - [Plandex Tutorial: Large-Task AI Coding Agent Workflows](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/plandex-tutorial/README.md) - Learn how to use plandex-ai/plandex for large codebase tasks with strong context management, cumulative diff review, model packs, and self-hosted operations. +- [Plane Tutorial: AI-Native Project Management](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/plane-tutorial/README.md) + - Open-source AI-native project management that rivals Jira and Linear — with issues, cycles, modules, and wiki built in. - [Planning with Files Tutorial: Persistent Markdown Workflow Memory for AI Coding Agents](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/planning-with-files-tutorial/README.md) - Learn how to use OthmanAdi/planning-with-files to run Manus-style file-based planning workflows across Claude Code and other AI coding environments. - [Playwright MCP Tutorial: Browser Automation for Coding Agents Through MCP](https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/playwright-mcp-tutorial/README.md) diff --git a/discoverability/tutorial-index.json b/discoverability/tutorial-index.json index c9fc52e..bae86a7 100644 --- a/discoverability/tutorial-index.json +++ b/discoverability/tutorial-index.json @@ -1,7 +1,40 @@ { "project": "awesome-code-docs", - "tutorial_count": 191, + "tutorial_count": 195, "tutorials": [ + { + "cluster": "ai-coding-agents", + "file_url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/a2a-protocol-tutorial/README.md", + "index_path": "tutorials/a2a-protocol-tutorial/README.md", + "intent_signals": [ + "agentic-coding" + ], + "keywords": [ + "a2a", + "protocol", + "building", + "interoperable", + "agent", + "google", + "standard", + "agents", + "discover", + "communicate", + "delegate", + "tasks", + "each", + "other", + "open", + "now", + "linux", + "foundation" + ], + "path": "tutorials/a2a-protocol-tutorial", + "repo_url": "https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/a2a-protocol-tutorial", + "slug": "a2a-protocol-tutorial", + "summary": "Learn how agents discover, communicate, and delegate tasks to each other using the A2A protocol \u2014 the open standard (now Linux Foundation) for agent-to-agent interoperability.", + "title": "A2A Protocol Tutorial: Building Interoperable Agent Systems With Google's Agent-to-Agent Standard" + }, { "cluster": "ai-app-frameworks", "file_url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/activepieces-tutorial/README.md", @@ -65,6 +98,39 @@ "summary": "Learn how to use google/adk-python to build, evaluate, and deploy modular AI agent systems with strong tooling, session controls, and production rollouts.", "title": "ADK Python Tutorial: Production-Grade Agent Engineering with Google's ADK" }, + { + "cluster": "data-and-storage", + "file_url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/affine-tutorial/README.md", + "index_path": "tutorials/affine-tutorial/README.md", + "intent_signals": [ + "general-learning" + ], + "keywords": [ + "affine", + "open", + "source", + "workspace", + "whiteboards", + "databases", + "toeverything", + "extend", + "self", + "host", + "modern", + "knowledge", + "combining", + "documents", + "powered", + "blocksuite", + "crdt", + "based" + ], + "path": "tutorials/affine-tutorial", + "repo_url": "https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/affine-tutorial", + "slug": "affine-tutorial", + "summary": "Learn how to use toeverything/AFFiNE to build, extend, and self-host a modern knowledge workspace combining documents, whiteboards, and databases \u2014 powered by BlockSuite, CRDT-based collaboration, and integrated AI copilot features.", + "title": "AFFiNE Tutorial: Open-Source AI Workspace with Docs, Whiteboards, and Databases" + }, { "cluster": "ai-coding-agents", "file_url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/ag2-tutorial/README.md", @@ -3666,6 +3732,40 @@ "summary": "A deep technical walkthrough of Mem0 covering Building Production-Ready AI Agents with Scalable Long-Term Memory.", "title": "Mem0 Tutorial: Building Production-Ready AI Agents with Scalable Long-Term Memory" }, + { + "cluster": "ai-coding-agents", + "file_url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/metagpt-tutorial/README.md", + "index_path": "tutorials/metagpt-tutorial/README.md", + "intent_signals": [ + "architecture-deep-dive", + "agentic-coding" + ], + "keywords": [ + "metagpt", + "multi", + "agent", + "software", + "development", + "role", + "based", + "collaboration", + "one", + "sentence", + "give", + "product", + "idea", + "virtual", + "company", + "agents", + "designs", + "architects" + ], + "path": "tutorials/metagpt-tutorial", + "repo_url": "https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/metagpt-tutorial", + "slug": "metagpt-tutorial", + "summary": "In one sentence: Give MetaGPT a product idea, and a virtual software company of AI agents designs, architects, codes, and tests it for you.", + "title": "MetaGPT Tutorial: Multi-Agent Software Development with Role-Based Collaboration" + }, { "cluster": "ai-coding-agents", "file_url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mini-swe-agent-tutorial/README.md", @@ -4484,6 +4584,34 @@ "summary": "Learn how to use plandex-ai/plandex for large codebase tasks with strong context management, cumulative diff review, model packs, and self-hosted operations.", "title": "Plandex Tutorial: Large-Task AI Coding Agent Workflows" }, + { + "cluster": "ai-app-frameworks", + "file_url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/plane-tutorial/README.md", + "index_path": "tutorials/plane-tutorial/README.md", + "intent_signals": [ + "general-learning" + ], + "keywords": [ + "plane", + "native", + "management", + "open", + "source", + "rivals", + "jira", + "linear", + "issues", + "cycles", + "modules", + "wiki", + "built" + ], + "path": "tutorials/plane-tutorial", + "repo_url": "https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/plane-tutorial", + "slug": "plane-tutorial", + "summary": "Open-source AI-native project management that rivals Jira and Linear \u2014 with issues, cycles, modules, and wiki built in.", + "title": "Plane Tutorial: AI-Native Project Management" + }, { "cluster": "ai-coding-agents", "file_url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/planning-with-files-tutorial/README.md", diff --git a/discoverability/tutorial-itemlist.schema.json b/discoverability/tutorial-itemlist.schema.json index 595c1ab..183f49b 100644 --- a/discoverability/tutorial-itemlist.schema.json +++ b/discoverability/tutorial-itemlist.schema.json @@ -2,1345 +2,1373 @@ "@context": "https://schema.org", "@type": "ItemList", "itemListElement": [ + { + "@type": "ListItem", + "description": "Learn how agents discover, communicate, and delegate tasks to each other using the A2A protocol \u2014 the open standard (now Linux Foundation) for agent-to-agent interoperability.", + "name": "A2A Protocol Tutorial: Building Interoperable Agent Systems With Google's Agent-to-Agent Standard", + "position": 1, + "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/a2a-protocol-tutorial/README.md" + }, { "@type": "ListItem", "description": "Learn how to use activepieces/activepieces to build, run, and govern production automation workflows with open-source extensibility, piece development, API control, and self-hosted operations.", "name": "Activepieces Tutorial: Open-Source Automation, Pieces, and AI-Ready Workflow Operations", - "position": 1, + "position": 2, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/activepieces-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use google/adk-python to build, evaluate, and deploy modular AI agent systems with strong tooling, session controls, and production rollouts.", "name": "ADK Python Tutorial: Production-Grade Agent Engineering with Google's ADK", - "position": 2, + "position": 3, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/adk-python-tutorial/README.md" }, + { + "@type": "ListItem", + "description": "Learn how to use toeverything/AFFiNE to build, extend, and self-host a modern knowledge workspace combining documents, whiteboards, and databases \u2014 powered by BlockSuite, CRDT-based collaboration, and integrated AI copilot features.", + "name": "AFFiNE Tutorial: Open-Source AI Workspace with Docs, Whiteboards, and Databases", + "position": 4, + "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/affine-tutorial/README.md" + }, { "@type": "ListItem", "description": "Build collaborative AI agent systems with AG2, the community-driven successor to AutoGen.", "name": "AG2 Tutorial: Next-Generation Multi-Agent Framework", - "position": 3, + "position": 5, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/ag2-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of AgentGPT covering Building Autonomous AI Agents.", "name": "AgentGPT Tutorial: Building Autonomous AI Agents", - "position": 4, + "position": 6, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/agentgpt-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use Fosowl/agenticSeek to run multi-agent planning, browsing, and coding workflows with local model support, Docker-first runtime defaults, and practical operator guardrails.", "name": "AgenticSeek Tutorial: Local-First Autonomous Agent Operations", - "position": 5, + "position": 7, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/agenticseek-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use agentsmd/agents.md to define a clear, portable instruction contract for coding agents across projects and tools.", "name": "AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories", - "position": 6, + "position": 8, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/agents-md-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to build and operate learning multi-agent systems with agno-agi/agno, including memory, orchestration, AgentOS runtime, and production guardrails.", "name": "Agno Tutorial: Multi-Agent Systems That Learn Over Time", - "position": 7, + "position": 9, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/agno-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn to use Aider-AI/aider for real file edits, git-native workflows, model routing, and reliable day-to-day coding loops.", "name": "Aider Tutorial: AI Pair Programming in Your Terminal", - "position": 8, + "position": 10, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/aider-tutorial/README.md" }, { "@type": "ListItem", "description": "A practical guide to building with Anthropic's API and official SDKs, including messages, tools, vision, streaming, and production operations.", "name": "Anthropic API Tutorial: Build Production Apps with Claude", - "position": 9, + "position": 11, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/anthropic-code-tutorial/README.md" }, { "@type": "ListItem", "description": "Build and operate production-quality skills for Claude Code, Claude.ai, and the Claude API.", "name": "Anthropic Skills Tutorial: Reusable AI Agent Capabilities", - "position": 10, + "position": 12, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/anthropic-skills-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to deploy and operate Mintplex-Labs/anything-llm for document-grounded chat, workspace management, agent workflows, and production use.", "name": "AnythingLLM Tutorial: Self-Hosted RAG and Agents Platform", - "position": 11, + "position": 13, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/anything-llm-tutorial/README.md" }, { "@type": "ListItem", "description": "Athens Research \u2014 An open-source, Roam-like knowledge management system built with ClojureScript and graph databases.", "name": "Athens Research: Deep Dive Tutorial", - "position": 12, + "position": 14, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/athens-research-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use HKUDS/AutoAgent to create and orchestrate LLM agents through natural-language workflows, with support for CLI operations, tool creation, and benchmark-oriented evaluation.", "name": "AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration", - "position": 13, + "position": 15, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/autoagent-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of Microsoft AutoGen covering Building Multi-Agent AI Systems.", "name": "Microsoft AutoGen Tutorial: Building Multi-Agent AI Systems", - "position": 14, + "position": 16, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/autogen-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use hesreallyhim/awesome-claude-code as a high-signal discovery and decision system for skills, commands, hooks, tooling, and CLAUDE.md patterns.", "name": "Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation", - "position": 15, + "position": 17, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/awesome-claude-code-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use ComposioHQ/awesome-claude-skills to discover, evaluate, install, and contribute Claude skills for coding, automation, writing, and cross-app workflows.", "name": "Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows", - "position": 16, + "position": 18, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/awesome-claude-skills-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use punkpeye/awesome-mcp-servers as a practical control surface for discovering, vetting, and operating Model Context Protocol servers across coding, data, browser automation, and enterprise workflows.", "name": "Awesome MCP Servers Tutorial: Curating and Operating High-Signal MCP Integrations", - "position": 17, + "position": 19, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/awesome-mcp-servers-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use awslabs/mcp to compose, run, and govern AWS-focused MCP servers across development, infrastructure, data, and operations workflows.", "name": "awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads", - "position": 18, + "position": 20, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/awslabs-mcp-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use yoheinakajima/babyagi for autonomous task generation, execution, and prioritization\u2014the foundational agent loop that started the autonomous AI agent wave.", "name": "BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework", - "position": 19, + "position": 21, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/babyagi-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use steveyegge/beads to give coding agents durable, dependency-aware task memory with structured issue graphs instead of ad-hoc markdown plans.", "name": "Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents", - "position": 20, + "position": 22, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/beads-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of BentoML covering Building Production-Ready ML Services.", "name": "BentoML Tutorial: Building Production-Ready ML Services", - "position": 21, + "position": 23, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/bentoml-tutorial/README.md" }, { "@type": "ListItem", "description": "A production-focused deep dive into stackblitz-labs/bolt.diy: architecture, provider routing, safe edit loops, MCP integrations, deployment choices, and operational governance.", "name": "bolt.diy Tutorial: Build and Operate an Open Source AI App Builder", - "position": 22, + "position": 24, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/bolt-diy-tutorial/README.md" }, { "@type": "ListItem", "description": "Important Notice (2025): Botpress v12 has been sunset and is no longer available for new deployments. However, existing customers with active v12 subscriptions remain fully supported.", "name": "Botpress Tutorial: Open Source Conversational AI Platform", - "position": 23, + "position": 25, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/botpress-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use browser-use/browser-use to build agents that can navigate websites, execute workflows, and run reliable browser automation in production.", "name": "Browser Use Tutorial: AI-Powered Web Automation Agents", - "position": 24, + "position": 26, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/browser-use-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of Chatbox covering Building Modern AI Chat Interfaces.", "name": "Chatbox Tutorial: Building Modern AI Chat Interfaces", - "position": 25, + "position": 27, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/chatbox-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use CherryHQ/cherry-studio to run multi-provider AI workflows, manage assistants, and integrate MCP tools in a desktop-first productivity environment.", "name": "Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams", - "position": 26, + "position": 28, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/cherry-studio-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of ChromaDB covering Building AI-Native Vector Databases.", "name": "ChromaDB Tutorial: Building AI-Native Vector Databases", - "position": 27, + "position": 29, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/chroma-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use ChromeDevTools/chrome-devtools-mcp to give coding agents reliable browser control, performance tracing, and deep debugging capabilities.", "name": "Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents", - "position": 28, + "position": 30, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/chrome-devtools-mcp-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use campfirein/cipher as a memory-centric MCP-enabled layer that preserves and shares coding context across IDEs, agents, and teams.", "name": "Cipher Tutorial: Shared Memory Layer for Coding Agents", - "position": 29, + "position": 31, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/cipher-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use musistudio/claude-code-router to route Claude Code workloads across multiple model providers with configurable routing rules, transformers, presets, and operational controls.", "name": "Claude Code Router Tutorial: Multi-Provider Routing and Control Plane for Claude Code", - "position": 30, + "position": 32, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/claude-code-router-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use anthropics/claude-code for codebase understanding, multi-file edits, command execution, git workflows, and MCP-based extension.", "name": "Claude Code Tutorial: Agentic Coding from Your Terminal", - "position": 31, + "position": 33, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/claude-code-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use ruvnet/claude-flow to orchestrate multi-agent workflows, operate MCP/CLI surfaces, and reason about V2-to-V3 architecture and migration tradeoffs.", "name": "Claude Flow Tutorial: Multi-Agent Orchestration, MCP Tooling, and V3 Module Architecture", - "position": 32, + "position": 34, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/claude-flow-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use thedotmack/claude-mem to capture, compress, and retrieve coding-session memory with hook-driven automation, searchable context layers, and operator controls.", "name": "Claude-Mem Tutorial: Persistent Memory Compression for Claude Code", - "position": 33, + "position": 35, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/claude-mem-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use anthropics/claude-plugins-official to discover, evaluate, install, and contribute Claude Code plugins with clear directory standards and plugin safety practices.", "name": "Claude Plugins Official Tutorial: Anthropic's Managed Plugin Directory", - "position": 34, + "position": 36, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/claude-plugins-official-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn from Anthropic's official quickstart projects to build deployable applications with Claude API, including customer support, data analysis, browser automation, and autonomous coding.", "name": "Claude Quickstarts Tutorial: Production Integration Patterns", - "position": 35, + "position": 37, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/claude-quickstarts-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use smtg-ai/claude-squad to run and manage multiple coding-agent sessions across isolated workspaces with tmux and git worktrees.", "name": "Claude Squad Tutorial: Multi-Agent Terminal Session Orchestration", - "position": 36, + "position": 38, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/claude-squad-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of Claude Task Master covering AI-Powered Task Management for Developers.", "name": "Claude Task Master Tutorial: AI-Powered Task Management for Developers", - "position": 37, + "position": 39, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/claude-task-master-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of ClickHouse covering High-Performance Analytical Database.", "name": "ClickHouse Tutorial: High-Performance Analytical Database", - "position": 38, + "position": 40, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/clickhouse-tutorial/README.md" }, { "@type": "ListItem", "description": "A practical engineering guide to cline/cline: install, operate, and govern Cline across local development and team environments.", "name": "Cline Tutorial: Agentic Coding with Human Control", - "position": 39, + "position": 41, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/cline-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use moazbuilds/CodeMachine-CLI to orchestrate repeatable coding-agent workflows with multi-agent coordination, context control, and long-running execution.", "name": "CodeMachine CLI Tutorial: Orchestrating Long-Running Coding Agent Workflows", - "position": 40, + "position": 42, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/codemachine-cli-tutorial/README.md" }, { "@type": "ListItem", "description": "Design and operate a production-grade code analysis platform with parsing, symbol resolution, code intelligence features, LSP integration, and rollout governance.", "name": "Codex Analysis Platform Tutorial: Build Code Intelligence Systems", - "position": 41, + "position": 43, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/codex-analysis-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use openai/codex to run a lightweight coding agent locally, with strong controls for auth, configuration, MCP integration, and sandboxed execution.", "name": "Codex CLI Tutorial: Local Terminal Agent Workflows with OpenAI Codex", - "position": 42, + "position": 44, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/codex-cli-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of ComfyUI covering Mastering AI Image Generation Workflows.", "name": "ComfyUI Tutorial: Mastering AI Image Generation Workflows", - "position": 43, + "position": 45, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/comfyui-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use ComposioHQ/composio to connect agents to 800+ toolkits with session-aware discovery, robust authentication flows, provider integrations, MCP support, and event-trigger automation.", "name": "Composio Tutorial: Production Tool and Authentication Infrastructure for AI Agents", - "position": 44, + "position": 46, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/composio-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use EveryInc/compound-engineering-plugin to run compound engineering workflows in Claude Code and convert plugin assets for other coding-agent ecosystems.", "name": "Compound Engineering Plugin Tutorial: Compounding Agent Workflows Across Toolchains", - "position": 45, + "position": 47, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/compound-engineering-plugin-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use upstash/context7 to inject up-to-date, version-aware library docs into Claude Code, Cursor, and other MCP-capable coding agents.", "name": "Context7 Tutorial: Live Documentation Context for Coding Agents", - "position": 46, + "position": 48, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/context7-tutorial/README.md" }, { "@type": "ListItem", "description": "A practical guide to continuedev/continue, covering IDE usage, headless/CLI workflows, model configuration, team collaboration, and enterprise operations.", "name": "Continue Tutorial: Open-Source AI Coding Agents for IDE and CLI", - "position": 47, + "position": 49, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/continue-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use github/copilot-cli to run Copilot's coding agent directly from the terminal with GitHub-native context, approval controls, and extensibility through MCP and LSP.", "name": "GitHub Copilot CLI Tutorial: Copilot Agent Workflows in the Terminal", - "position": 48, + "position": 50, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/copilot-cli-tutorial/README.md" }, { "@type": "ListItem", "description": "Create in-app AI assistants, chatbots, and agentic UIs with the open-source CopilotKit framework.", "name": "CopilotKit Tutorial: Building AI Copilots for React Applications", - "position": 49, + "position": 51, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/copilotkit-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use modelcontextprotocol/create-python-server to scaffold Python MCP servers with minimal setup, template-driven primitives, and publish-ready packaging workflows.", "name": "Create Python Server Tutorial: Scaffold and Ship MCP Servers with uvx", - "position": 50, + "position": 52, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/create-python-server-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use modelcontextprotocol/create-typescript-server to scaffold MCP server projects quickly, understand generated template structure, and operate build/debug workflows safely in archived-tooling environments.", "name": "Create TypeScript Server Tutorial: Scaffold MCP Servers with TypeScript Templates", - "position": 51, + "position": 53, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/create-typescript-server-tutorial/README.md" }, { "@type": "ListItem", "description": "CrewAI View Repo is a framework for orchestrating role-based AI agent teams that collaborate to accomplish complex tasks. It provides a structured approach to creating AI crews with specialized agents, tools, and processes, enabling sophisticated multi-agent workflows and collaborative problem-solving.", "name": "CrewAI Tutorial: Building Collaborative AI Agent Teams", - "position": 52, + "position": 54, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/crewai-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use charmbracelet/crush for terminal-native coding workflows with flexible model providers, LSP/MCP integrations, and production-grade controls.", "name": "Crush Tutorial: Multi-Model Terminal Coding Agent with Strong Extensibility", - "position": 53, + "position": 55, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/crush-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use daytonaio/daytona to run AI-generated code in isolated sandboxes, integrate coding agents through MCP, and operate sandbox infrastructure with stronger security and resource controls.", "name": "Daytona Tutorial: Secure Sandbox Infrastructure for AI-Generated Code", - "position": 54, + "position": 56, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/daytona-tutorial/README.md" }, { "@type": "ListItem", "description": "Orchestrate complex distributed workflows with Deer Flow's powerful task coordination and execution platform.", "name": "Deer Flow Tutorial: Distributed Workflow Orchestration Platform", - "position": 55, + "position": 57, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/deer-flow-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to deploy and operate stitionai/devika \u2014 a multi-agent autonomous coding system that plans, researches, writes, and debugs code end-to-end.", "name": "Devika Tutorial: Open-Source Autonomous AI Software Engineer", - "position": 56, + "position": 58, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/devika-tutorial/README.md" }, { "@type": "ListItem", "description": "Dify \u2014 An open-source LLM application development platform for building workflows, RAG pipelines, and AI agents with a visual interface.", "name": "Dify Platform: Deep Dive Tutorial", - "position": 57, + "position": 59, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/dify-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn to program language models declaratively with DSPy, the Stanford NLP framework for systematic prompt optimization and modular LLM pipelines.", "name": "DSPy Tutorial: Programming Language Models", - "position": 58, + "position": 60, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/dspy-tutorial/README.md" }, { "@type": "ListItem", "description": "A practical guide to dyad-sh/dyad, focused on local-first app generation, integration patterns, validation loops, and deployment readiness.", "name": "Dyad Tutorial: Local-First AI App Building", - "position": 59, + "position": 61, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/dyad-tutorial/README.md" }, { "@type": "ListItem", "description": "ElizaOS \u2014 Autonomous agents for everyone.", "name": "ElizaOS: Deep Dive Tutorial", - "position": 60, + "position": 62, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/elizaos-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use affaan-m/everything-claude-code to adopt battle-tested Claude Code agents, skills, hooks, commands, rules, and MCP workflows in a structured, production-oriented way.", "name": "Everything Claude Code Tutorial: Production Configuration Patterns for Claude Code", - "position": 61, + "position": 63, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/everything-claude-code-tutorial/README.md" }, { "@type": "ListItem", "description": "Enhance human capabilities with Fabric's modular framework for AI-powered cognitive assistance and task automation.", "name": "Fabric Tutorial: Open-Source Framework for Augmenting Humans with AI", - "position": 62, + "position": 64, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/fabric-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use jlowin/fastmcp to design, run, test, and deploy MCP servers and clients with practical transport, integration, auth, and operations patterns.", "name": "FastMCP Tutorial: Building and Operating MCP Servers with Pythonic Control", - "position": 63, + "position": 65, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/fastmcp-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use GLips/Figma-Context-MCP (Framelink MCP for Figma) to give coding agents structured design context for higher-fidelity implementation.", "name": "Figma Context MCP Tutorial: Design-to-Code Workflows for Coding Agents", - "position": 64, + "position": 66, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/figma-context-mcp-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use firecrawl/firecrawl-mcp-server to add robust web scraping, crawling, search, and extraction capabilities to MCP-enabled coding and research agents.", "name": "Firecrawl MCP Server Tutorial: Web Scraping and Search Tools for MCP Clients", - "position": 65, + "position": 67, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/firecrawl-mcp-server-tutorial/README.md" }, { "@type": "ListItem", "description": "Deep technical walkthrough of Firecrawl Tutorial: Building LLM-Ready Web Scraping and Data Extraction Systems.", "name": "Firecrawl Tutorial: Building LLM-Ready Web Scraping and Data Extraction Systems", - "position": 66, + "position": 68, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/firecrawl-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use fireproof-storage/fireproof to build local-first, encrypted, sync-capable applications with a unified browser/Node/Deno API and React hooks.", "name": "Fireproof Tutorial: Local-First Document Database for AI-Native Apps", - "position": 67, + "position": 69, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/fireproof-tutorial/README.md" }, { "@type": "ListItem", "description": "Flowise \u2014 An open-source visual tool for building LLM workflows with a drag-and-drop interface.", "name": "Flowise LLM Orchestration: Deep Dive Tutorial", - "position": 68, + "position": 70, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/flowise-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use google-gemini/gemini-cli to run coding and operations workflows in terminal-first loops with strong tooling, MCP extensibility, headless automation, and safety controls.", "name": "Gemini CLI Tutorial: Terminal-First Agent Workflows with Google Gemini", - "position": 69, + "position": 71, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/gemini-cli-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use googleapis/genai-toolbox to expose database tools through MCP and native SDK paths, with stronger configuration discipline, deployment options, and observability controls.", "name": "GenAI Toolbox Tutorial: MCP-First Database Tooling with Config-Driven Control Planes", - "position": 70, + "position": 72, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/genai-toolbox-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use github/github-mcp-server to connect coding agents directly to repositories, issues, pull requests, actions, and code security workflows with stronger control.", "name": "GitHub MCP Server Tutorial: Production GitHub Operations Through MCP", - "position": 71, + "position": 73, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/github-mcp-server-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use block/goose to automate coding workflows with controlled tool execution, strong provider flexibility, and production-ready operations.", "name": "Goose Tutorial: Extensible Open-Source AI Agent for Real Engineering Work", - "position": 72, + "position": 74, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/goose-tutorial/README.md" }, { "@type": "ListItem", "description": "A comprehensive guide to understanding, building, and deploying open-source GPT implementations -- from nanoGPT to GPT-NeoX and beyond.", "name": "GPT Open Source: Deep Dive Tutorial", - "position": 73, + "position": 75, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/gpt-oss-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use gptme/gptme to run a local-first coding and knowledge-work agent with strong CLI ergonomics, extensible tools, and automation-friendly modes.", "name": "gptme Tutorial: Open-Source Terminal Agent for Local Tool-Driven Work", - "position": 74, + "position": 76, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/gptme-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn tiann/hapi, a local-first hub that lets you run Claude Code/Codex/Gemini/OpenCode sessions locally while controlling and approving them remotely.", "name": "HAPI Tutorial: Remote Control for Local AI Coding Sessions", - "position": 75, + "position": 77, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/hapi-tutorial/README.md" }, { "@type": "ListItem", "description": "Haystack \u2014 An open-source framework for building production-ready LLM applications, RAG pipelines, and intelligent search systems.", "name": "Haystack: Deep Dive Tutorial", - "position": 76, + "position": 78, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/haystack-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of HuggingFace Transformers covering Building State-of-the-Art AI Models.", "name": "HuggingFace Transformers Tutorial: Building State-of-the-Art AI Models", - "position": 77, + "position": 79, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/huggingface-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use humanlayer/humanlayer patterns to orchestrate coding agents with stronger context control, human oversight, and team-scale workflows.", "name": "HumanLayer Tutorial: Context Engineering and Human-Governed Coding Agents", - "position": 78, + "position": 80, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/humanlayer-tutorial/README.md" }, { "@type": "ListItem", "description": "Get reliable, typed responses from LLMs with Pydantic validation.", "name": "Instructor Tutorial: Structured LLM Outputs", - "position": 79, + "position": 81, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/instructor-tutorial/README.md" }, { "@type": "ListItem", "description": "Khoj \u2014 An open-source, self-hostable AI personal assistant that connects to your notes, documents, and online data.", "name": "Khoj AI: Deep Dive Tutorial", - "position": 80, + "position": 82, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/khoj-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use Kilo-Org/kilocode for high-throughput coding workflows with multi-mode operation, agent-loop controls, and extensible CLI/IDE integration.", "name": "Kilo Code Tutorial: Agentic Engineering from IDE and CLI Surfaces", - "position": 81, + "position": 83, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/kilocode-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use MoonshotAI/kimi-cli to run an interactive terminal coding agent with configurable modes, MCP integrations, and ACP-based IDE connectivity.", "name": "Kimi CLI Tutorial: Multi-Mode Terminal Agent with MCP and ACP", - "position": 82, + "position": 84, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/kimi-cli-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use kirodotdev/Kiro for structured AI-powered development with spec-driven workflows, agent steering, event-driven automation, and AWS-native integrations.", "name": "Kiro Tutorial: Spec-Driven Agentic IDE from AWS", - "position": 83, + "position": 85, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/kiro-tutorial/README.md" }, { "@type": "ListItem", "description": "Master Kubernetes Operators with hands-on Go implementation using the Operator SDK and controller-runtime library for enterprise application management.", "name": "Kubernetes Operator Patterns: Building Production-Grade Controllers", - "position": 84, + "position": 86, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/kubernetes-operator-tutorial/README.md" }, { "@type": "ListItem", "description": "Master LanceDB, the open-source serverless vector database designed for AI applications, RAG systems, and semantic search.", "name": "LanceDB Tutorial: Serverless Vector Database for AI", - "position": 85, + "position": 87, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/lancedb-tutorial/README.md" }, { "@type": "ListItem", "description": "Deep technical walkthrough of LangChain Architecture: Internal Design Deep Dive.", "name": "LangChain Architecture: Internal Design Deep Dive", - "position": 86, + "position": 88, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/langchain-architecture-tutorial/README.md" }, { "@type": "ListItem", "description": "Pydantic 2 Required: LangChain v0.3 fully migrated to Pydantic 2. Code using langchain_core.pydantic_v1 should be updated to native Pydantic 2 syntax.", "name": "LangChain Tutorial: Building AI Applications with Large Language Models", - "position": 87, + "position": 89, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/langchain-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to build, deploy, and operate agent workflows with langflow-ai/langflow, including visual flow composition, API/MCP deployment, and production reliability controls.", "name": "Langflow Tutorial: Visual AI Agent and Workflow Platform", - "position": 88, + "position": 90, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/langflow-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use langfuse/langfuse to trace, evaluate, and improve production LLM systems with structured observability workflows.", "name": "Langfuse Tutorial: LLM Observability, Evaluation, and Prompt Operations", - "position": 89, + "position": 91, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/langfuse-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of LangGraph covering Building Stateful Multi-Actor Applications.", "name": "LangGraph Tutorial: Building Stateful Multi-Actor Applications", - "position": 90, + "position": 92, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/langgraph-tutorial/README.md" }, { "@type": "ListItem", "description": "Build AI agents with persistent memory using the framework formerly known as MemGPT.", "name": "Letta Tutorial: Stateful LLM Agents", - "position": 91, + "position": 93, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/letta-tutorial/README.md" }, { "@type": "ListItem", "description": "Build provider-agnostic LLM applications with BerriAI/litellm, including routing, fallbacks, proxy deployment, and cost-aware operations.", "name": "LiteLLM Tutorial: Unified LLM Gateway and Routing Layer", - "position": 92, + "position": 94, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/litellm-tutorial/README.md" }, { "@type": "ListItem", "description": "Deep technical walkthrough of Liveblocks - Real-Time Collaboration Deep Dive.", "name": "Liveblocks - Real-Time Collaboration Deep Dive", - "position": 93, + "position": 95, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/liveblocks-tutorial/README.md" }, { "@type": "ListItem", "description": "Run large language models efficiently on your local machine with pure C/C++.", "name": "llama.cpp Tutorial: Local LLM Inference", - "position": 94, + "position": 96, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/llama-cpp-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of LLaMA-Factory covering Unified Framework for LLM Training and Fine-tuning.", "name": "LLaMA-Factory Tutorial: Unified Framework for LLM Training and Fine-tuning", - "position": 95, + "position": 97, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/llama-factory-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of LlamaIndex covering Building Advanced RAG Systems and Data Frameworks.", "name": "LlamaIndex Tutorial: Building Advanced RAG Systems and Data Frameworks", - "position": 96, + "position": 98, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/llamaindex-tutorial/README.md" }, { "@type": "ListItem", "description": "LobeChat \u2014 An open-source, modern-design AI chat framework for building private LLM applications.", "name": "LobeChat AI Platform: Deep Dive Tutorial", - "position": 97, + "position": 99, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/lobechat-tutorial/README.md" }, { "@type": "ListItem", "description": "Run LLMs, image generation, and audio models locally with an OpenAI-compatible API.", "name": "LocalAI Tutorial: Self-Hosted OpenAI Alternative", - "position": 98, + "position": 100, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/localai-tutorial/README.md" }, { "@type": "ListItem", "description": "Logseq \u2014 A privacy-first, local-first knowledge management platform with block-based editing and graph visualization.", "name": "Logseq: Deep Dive Tutorial", - "position": 99, + "position": 101, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/logseq-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to build production AI applications with mastra-ai/mastra, including agents, workflows, memory, MCP tooling, and reliability operations.", "name": "Mastra Tutorial: TypeScript Framework for AI Agents and Workflows", - "position": 100, + "position": 102, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mastra-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use hangwin/mcp-chrome to expose browser automation, content analysis, and semantic tab search tools to MCP clients.", "name": "MCP Chrome Tutorial: Control Your Real Chrome Browser Through MCP", - "position": 101, + "position": 103, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-chrome-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to build and operate MCP clients and servers with modelcontextprotocol/csharp-sdk, including package choices, auth patterns, tasks, diagnostics, and versioning strategy.", "name": "MCP C# SDK Tutorial: Production MCP in .NET with Hosting, ASP.NET Core, and Task Workflows", - "position": 102, + "position": 104, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-csharp-sdk-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use modelcontextprotocol/docs as an archived reference, map its conceptual guides, and migrate documentation workflows to the canonical modelcontextprotocol/modelcontextprotocol docs location.", "name": "MCP Docs Repo Tutorial: Navigating the Archived MCP Documentation Repository", - "position": 103, + "position": 105, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-docs-repo-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use modelcontextprotocol/ext-apps to build interactive MCP Apps, wire host bridges, secure UI resources, and run reliable testing and migration workflows.", "name": "MCP Ext Apps Tutorial: Building Interactive MCP Apps and Hosts", - "position": 104, + "position": 106, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-ext-apps-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use modelcontextprotocol/go-sdk for production MCP workloads across stdio and streamable HTTP, including auth middleware, conformance, and upgrade planning.", "name": "MCP Go SDK Tutorial: Building Robust MCP Clients and Servers in Go", - "position": 105, + "position": 107, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-go-sdk-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use modelcontextprotocol/inspector to test MCP servers across stdio, SSE, and streamable HTTP, with safer auth defaults and repeatable CLI automation.", "name": "MCP Inspector Tutorial: Debugging and Validating MCP Servers", - "position": 106, + "position": 108, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-inspector-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use modelcontextprotocol/java-sdk across core Java and Spring stacks, from transport setup to conformance and production hardening.", "name": "MCP Java SDK Tutorial: Building MCP Clients and Servers with Reactor, Servlet, and Spring", - "position": 107, + "position": 109, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-java-sdk-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to implement MCP client/server workflows with modelcontextprotocol/kotlin-sdk, including module boundaries, transport choices, capability negotiation, and production lifecycle controls.", "name": "MCP Kotlin SDK Tutorial: Building Multiplatform MCP Clients and Servers", - "position": 108, + "position": 110, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-kotlin-sdk-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to implement MCP server workflows with modelcontextprotocol/php-sdk, including attribute discovery, manual capability registration, transport strategy, session storage, and framework integration patterns.", "name": "MCP PHP SDK Tutorial: Building MCP Servers in PHP with Discovery and Transport Flexibility", - "position": 109, + "position": 111, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-php-sdk-tutorial/README.md" }, { "@type": "ListItem", "description": "Master the Model Context Protocol Python SDK to build custom tool servers that extend Claude and other LLMs with powerful capabilities.", "name": "MCP Python SDK Tutorial: Building AI Tool Servers", - "position": 110, + "position": 112, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-python-sdk-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use modelcontextprotocol/quickstart-resources as a practical reference for multi-language MCP server/client implementations, protocol smoke testing, and onboarding workflows.", "name": "MCP Quickstart Resources Tutorial: Cross-Language MCP Servers and Clients by Example", - "position": 111, + "position": 113, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-quickstart-resources-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how modelcontextprotocol/registry works end to end: publishing authenticated server metadata, consuming the API as an aggregator, and operating registry infrastructure safely.", "name": "MCP Registry Tutorial: Publishing, Discovery, and Governance for MCP Servers", - "position": 112, + "position": 114, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-registry-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to implement MCP server/client workflows with modelcontextprotocol/ruby-sdk, including tool/prompt/resource registration, streamable HTTP sessions, structured logging, and release operations.", "name": "MCP Ruby SDK Tutorial: Building MCP Servers and Clients in Ruby", - "position": 113, + "position": 115, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-ruby-sdk-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use modelcontextprotocol/rust-sdk (rmcp) for production MCP clients and servers with strong transport control, macro-driven tooling, OAuth, and async task workflows.", "name": "MCP Rust SDK Tutorial: Building High-Performance MCP Services with RMCP", - "position": 114, + "position": 116, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-rust-sdk-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use the official MCP reference servers as implementation blueprints, not drop-in production services.", "name": "MCP Servers Tutorial: Reference Implementations and Patterns", - "position": 115, + "position": 117, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-servers-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn the current Model Context Protocol directly from modelcontextprotocol/modelcontextprotocol, including lifecycle, transports, security, authorization, and governance workflows.", "name": "MCP Specification Tutorial: Designing Production-Grade MCP Clients and Servers From the Source of Truth", - "position": 116, + "position": 118, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-specification-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to implement MCP client and server workflows with modelcontextprotocol/swift-sdk, including transport options, sampling, batching, and graceful service lifecycle control.", "name": "MCP Swift SDK Tutorial: Building MCP Clients and Servers in Swift", - "position": 117, + "position": 119, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-swift-sdk-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use modelcontextprotocol/typescript-sdk to build production MCP clients and servers, migrate from v1 to v2 safely, and validate behavior with conformance workflows.", "name": "MCP TypeScript SDK Tutorial: Building and Migrating MCP Clients and Servers in TypeScript", - "position": 118, + "position": 120, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-typescript-sdk-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how mcp-use/mcp-use composes agent, client, server, and inspector workflows across Python and TypeScript with practical security and operations patterns.", "name": "MCP Use Tutorial: Full-Stack MCP Development Across Agents, Clients, Servers, and Inspector", - "position": 119, + "position": 121, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcp-use-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use modelcontextprotocol/mcpb to package local MCP servers into signed .mcpb bundles with manifest metadata, CLI workflows, and distribution-ready operational controls.", "name": "MCPB Tutorial: Packaging and Distributing Local MCP Servers as Bundles", - "position": 120, + "position": 122, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mcpb-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of MeiliSearch covering Lightning Fast Search Engine.", "name": "MeiliSearch Tutorial: Lightning Fast Search Engine", - "position": 121, + "position": 123, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/meilisearch-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of Mem0 covering Building Production-Ready AI Agents with Scalable Long-Term Memory.", "name": "Mem0 Tutorial: Building Production-Ready AI Agents with Scalable Long-Term Memory", - "position": 122, + "position": 124, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mem0-tutorial/README.md" }, + { + "@type": "ListItem", + "description": "In one sentence: Give MetaGPT a product idea, and a virtual software company of AI agents designs, architects, codes, and tests it for you.", + "name": "MetaGPT Tutorial: Multi-Agent Software Development with Role-Based Collaboration", + "position": 125, + "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/metagpt-tutorial/README.md" + }, { "@type": "ListItem", "description": "Learn how to use SWE-agent/mini-swe-agent to run compact, high-performing software-engineering agent workflows with minimal scaffolding and strong reproducibility.", "name": "Mini-SWE-Agent Tutorial: Minimal Autonomous Code Agent Design at Benchmark Scale", - "position": 123, + "position": 126, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mini-swe-agent-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use mistralai/mistral-vibe for terminal-native coding workflows with configurable agent profiles, skills, subagents, and ACP integrations.", "name": "Mistral Vibe Tutorial: Minimal CLI Coding Agent by Mistral", - "position": 124, + "position": 127, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mistral-vibe-tutorial/README.md" }, { "@type": "ListItem", "description": "Build powerful AI-powered automations with n8n's visual workflow builder.", "name": "n8n AI Tutorial: Workflow Automation with AI", - "position": 125, + "position": 128, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/n8n-ai-tutorial/README.md" }, { "@type": "ListItem", "description": "n8n \u2014 Visual workflow automation with Model Context Protocol (MCP) integration for AI-powered tool use.", "name": "n8n Model Context Protocol: Deep Dive Tutorial", - "position": 126, + "position": 129, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/n8n-mcp-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how Nano-Collective/nanocoder implements local-first coding-agent workflows, tool execution loops, and multi-provider model integration.", "name": "Nanocoder Tutorial: Building and Understanding AI Coding Agents", - "position": 127, + "position": 130, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/nanocoder-tutorial/README.md" }, { "@type": "ListItem", "description": "NocoDB \u2014 An open-source Airtable alternative that turns any database into a smart spreadsheet.", "name": "NocoDB: Deep Dive Tutorial", - "position": 128, + "position": 131, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/nocodb-tutorial/README.md" }, { "@type": "ListItem", "description": "Obsidian Outliner \u2014 A plugin that adds outliner-style editing behaviors to Obsidian, demonstrating advanced plugin architecture patterns.", "name": "Obsidian Outliner Plugin: Deep Dive Tutorial", - "position": 129, + "position": 132, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/obsidian-outliner-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use ollama/ollama for local model execution, customization, embeddings/RAG, integration, and production deployment.", "name": "Ollama Tutorial: Running and Serving LLMs Locally", - "position": 130, + "position": 133, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/ollama-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use onlook-dev/onlook to design and edit production-grade React apps visually while keeping generated code in your repository.", "name": "Onlook Tutorial: Visual-First AI Coding for Next.js and Tailwind", - "position": 131, + "position": 134, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/onlook-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use winfunc/opcode to manage Claude Code projects, sessions, agents, MCP servers, and checkpoints from a desktop-first operating interface.", "name": "Opcode Tutorial: GUI Command Center for Claude Code Workflows", - "position": 132, + "position": 135, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/opcode-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn from langchain-ai/open-swe architecture, workflows, and operational patterns, including how to maintain or migrate from a deprecated codebase.", "name": "Open SWE Tutorial: Asynchronous Cloud Coding Agent Architecture and Migration Playbook", - "position": 133, + "position": 136, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/open-swe-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to run and operate open-webui/open-webui as a self-hosted AI interface with model routing, RAG workflows, multi-user controls, and production deployment patterns.", "name": "Open WebUI Tutorial: Self-Hosted AI Workspace and Chat Interface", - "position": 134, + "position": 137, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/open-webui-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to build reliable Python integrations with openai/openai-python using Responses-first architecture, migration-safe patterns, and production operations.", "name": "OpenAI Python SDK Tutorial: Production API Patterns", - "position": 135, + "position": 138, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/openai-python-sdk-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to build low-latency voice agents with openai/openai-realtime-agents, including realtime session design, tool orchestration, and production rollout patterns.", "name": "OpenAI Realtime Agents Tutorial: Voice-First AI Systems", - "position": 136, + "position": 139, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/openai-realtime-agents-tutorial/README.md" }, { "@type": "ListItem", "description": "Build robust transcription pipelines with Whisper, from local experiments to production deployment.", "name": "OpenAI Whisper Tutorial: Speech Recognition and Translation", - "position": 137, + "position": 140, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/openai-whisper-tutorial/README.md" }, { "@type": "ListItem", "description": "Democratize investment research with OpenBB's comprehensive financial data and analysis platform.", "name": "OpenBB Tutorial: Complete Guide to Investment Research Platform", - "position": 138, + "position": 141, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/openbb-tutorial/README.md" }, { "@type": "ListItem", "description": "OpenClaw \u2014 Your own personal AI assistant. Any OS. Any Platform.", "name": "OpenClaw: Deep Dive Tutorial", - "position": 139, + "position": 142, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/openclaw-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn from opencode-ai/opencode architecture and workflows, and migrate safely to actively maintained successors.", "name": "OpenCode AI Legacy Tutorial: Archived Terminal Agent Workflows and Migration to Crush", - "position": 140, + "position": 143, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/opencode-ai-legacy-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use anomalyco/opencode to run terminal-native coding agents with provider flexibility, strong tool control, and production-grade workflows.", "name": "OpenCode Tutorial: Open-Source Terminal Coding Agent at Scale", - "position": 141, + "position": 144, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/opencode-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to operate OpenHands/OpenHands across local GUI, CLI, and SDK workflows with production-minded safety, validation, and integration patterns.", "name": "OpenHands Tutorial: Autonomous Software Engineering Workflows", - "position": 142, + "position": 145, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/openhands-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use numman-ali/openskills to install, synchronize, and operate reusable SKILL.md packs across Claude Code, Cursor, Codex, Aider, and other agent environments.", "name": "OpenSkills Tutorial: Universal Skill Loading for Coding Agents", - "position": 143, + "position": 146, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/openskills-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use Fission-AI/OpenSpec to make AI-assisted software delivery more predictable with artifact-driven planning, implementation, and archival workflows.", "name": "OpenSpec Tutorial: Spec-Driven Workflows for AI Coding Agents", - "position": 144, + "position": 147, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/openspec-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use vercel-labs/opensrc to fetch package and repository source code so coding agents can reason about implementation details, not only public types and docs.", "name": "OpenSrc Tutorial: Deep Source Context for Coding Agents", - "position": 145, + "position": 148, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/opensrc-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of Outlines covering Structured Text Generation with LLMs.", "name": "Outlines Tutorial: Structured Text Generation with LLMs", - "position": 146, + "position": 149, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/outlines-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of Perplexica covering AI-Powered Search Engine.", "name": "Perplexica Tutorial: AI-Powered Search Engine", - "position": 147, + "position": 150, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/perplexica-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of Phidata covering Building Autonomous AI Agents.", "name": "Phidata Tutorial: Building Autonomous AI Agents", - "position": 148, + "position": 151, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/phidata-tutorial/README.md" }, { "@type": "ListItem", "description": "AI Photo Management Revolution: Enhanced facial recognition, LLM integrations, and advanced organization features mark PhotoPrism's evolution.", "name": "PhotoPrism Tutorial: AI-Powered Photos App", - "position": 149, + "position": 152, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/photoprism-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use plandex-ai/plandex for large codebase tasks with strong context management, cumulative diff review, model packs, and self-hosted operations.", "name": "Plandex Tutorial: Large-Task AI Coding Agent Workflows", - "position": 150, + "position": 153, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/plandex-tutorial/README.md" }, + { + "@type": "ListItem", + "description": "Open-source AI-native project management that rivals Jira and Linear \u2014 with issues, cycles, modules, and wiki built in.", + "name": "Plane Tutorial: AI-Native Project Management", + "position": 154, + "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/plane-tutorial/README.md" + }, { "@type": "ListItem", "description": "Learn how to use OthmanAdi/planning-with-files to run Manus-style file-based planning workflows across Claude Code and other AI coding environments.", "name": "Planning with Files Tutorial: Persistent Markdown Workflow Memory for AI Coding Agents", - "position": 151, + "position": 155, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/planning-with-files-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use microsoft/playwright-mcp to give AI coding agents structured browser automation with accessibility snapshots, deterministic actions, and portable MCP host integrations.", "name": "Playwright MCP Tutorial: Browser Automation for Coding Agents Through MCP", - "position": 152, + "position": 156, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/playwright-mcp-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to build agentic applications with The-Pocket/PocketFlow, a minimalist graph framework that still supports workflows, multi-agent patterns, RAG, and human-in-the-loop flows.", "name": "PocketFlow Tutorial: Minimal LLM Framework with Graph-Based Power", - "position": 153, + "position": 157, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/pocketflow-tutorial/README.md" }, { "@type": "ListItem", "description": "Master PostgreSQL's query execution engine, understand EXPLAIN output, and optimize complex queries for maximum performance.", "name": "PostgreSQL Query Planner Deep Dive", - "position": 154, + "position": 158, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/postgresql-tutorial/README.md" }, { "@type": "ListItem", "description": "Deep technical walkthrough of PostHog Tutorial: Open Source Product Analytics Platform.", "name": "PostHog Tutorial: Open Source Product Analytics Platform", - "position": 155, + "position": 159, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/posthog-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of Pydantic AI covering Type-Safe AI Agent Development.", "name": "Pydantic AI Tutorial: Type-Safe AI Agent Development", - "position": 156, + "position": 160, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/pydantic-ai-tutorial/README.md" }, { "@type": "ListItem", "description": "Deep technical walkthrough of Quivr Tutorial: Open-Source RAG Framework for Document Ingestion.", "name": "Quivr Tutorial: Open-Source RAG Framework for Document Ingestion", - "position": 157, + "position": 161, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/quivr-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use QwenLM/Qwen-Agent to build production-capable agents with function calling, MCP integration, memory/RAG patterns, and benchmark-aware planning workflows.", "name": "Qwen-Agent Tutorial: Tool-Enabled Agent Framework with MCP, RAG, and Multi-Modal Workflows", - "position": 158, + "position": 162, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/qwen-agent-tutorial/README.md" }, { "@type": "ListItem", "description": "Transform documents into intelligent Q&A systems with RAGFlow's comprehensive RAG (Retrieval-Augmented Generation) platform.", "name": "RAGFlow Tutorial: Complete Guide to Open-Source RAG Engine", - "position": 159, + "position": 163, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/ragflow-tutorial/README.md" }, { "@type": "ListItem", "description": "Deep dive into React's reconciliation algorithm, the Fiber architecture that powers modern React applications.", "name": "React Fiber Internals", - "position": 160, + "position": 164, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/react-fiber-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use refly-ai/refly to turn vibe workflows into reusable, versioned agent skills that can run via API, webhook, and CLI integrations.", "name": "Refly Tutorial: Build Deterministic Agent Skills and Ship Them Across APIs and Claude Code", - "position": 161, + "position": 165, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/refly-tutorial/README.md" }, { "@type": "ListItem", "description": "A production-focused guide to RooCodeInc/Roo-Code: mode design, task execution, checkpoints, MCP, team profiles, and enterprise operations.", "name": "Roo Code Tutorial: Run an AI Dev Team in Your Editor", - "position": 162, + "position": 166, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/roo-code-tutorial/README.md" }, { "@type": "ListItem", "description": "Build enterprise AI applications with Microsoft's SDK for integrating LLMs.", "name": "Semantic Kernel Tutorial: Microsoft's AI Orchestration", - "position": 163, + "position": 167, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/semantic-kernel-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use oraios/serena to give coding agents IDE-grade semantic retrieval and editing tools across large codebases.", "name": "Serena Tutorial: Semantic Code Retrieval Toolkit for Coding Agents", - "position": 164, + "position": 168, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/serena-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use shotgun-sh/shotgun to plan, specify, and execute large code changes with structured agent workflows and stronger delivery control.", "name": "Shotgun Tutorial: Spec-Driven Development for Coding Agents", - "position": 165, + "position": 169, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/shotgun-tutorial/README.md" }, { "@type": "ListItem", "description": "Unlock the full potential of large language models with SillyTavern's comprehensive interface for role-playing, creative writing, and AI experimentation.", "name": "SillyTavern Tutorial: Advanced LLM Frontend for Power Users", - "position": 166, + "position": 170, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/sillytavern-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of SiYuan covering Privacy-First Knowledge Management.", "name": "SiYuan Tutorial: Privacy-First Knowledge Management", - "position": 167, + "position": 171, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/siyuan-tutorial/README.md" }, { "@type": "ListItem", "description": "Build efficient AI agents with minimal code using Hugging Face's smolagents library.", "name": "Smolagents Tutorial: Hugging Face's Lightweight Agent Framework", - "position": 168, + "position": 172, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/smolagents-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use stagewise-io/stagewise to connect browser-selected UI context with coding agents, plugin extensions, and multi-agent bridge workflows.", "name": "Stagewise Tutorial: Frontend Coding Agent Workflows in Real Browser Context", - "position": 169, + "position": 173, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/stagewise-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use strands-agents/sdk-python to build lightweight, model-driven agents with strong tool abstractions, hooks, and production deployment patterns.", "name": "Strands Agents Tutorial: Model-Driven Agent Systems with Native MCP Support", - "position": 170, + "position": 174, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/strands-agents-tutorial/README.md" }, { "@type": "ListItem", "description": "Deep technical walkthrough of Supabase Tutorial: Building Modern Backend Applications.", "name": "Supabase Tutorial: Building Modern Backend Applications", - "position": 171, + "position": 175, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/supabase-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of SuperAGI covering Production-Ready Autonomous AI Agents.", "name": "SuperAGI Tutorial: Production-Ready Autonomous AI Agents", - "position": 172, + "position": 176, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/superagi-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use superset-sh/superset to orchestrate many coding agents in parallel with worktree isolation, centralized monitoring, and fast review loops.", "name": "Superset Terminal Tutorial: Command Center for Parallel Coding Agents", - "position": 173, + "position": 177, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/superset-terminal-tutorial/README.md" }, { "@type": "ListItem", "description": "Deep technical walkthrough of OpenAI Swarm Tutorial: Lightweight Multi-Agent Orchestration.", "name": "OpenAI Swarm Tutorial: Lightweight Multi-Agent Orchestration", - "position": 174, + "position": 178, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/swarm-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use SWE-agent/SWE-agent for autonomous software engineering workflows, from single-issue runs to benchmark and research-grade evaluation.", "name": "SWE-agent Tutorial: Autonomous Repository Repair and Benchmark-Driven Engineering", - "position": 175, + "position": 179, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/swe-agent-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use sweepai/sweep to turn GitHub issues into pull requests, operate feedback loops, and run self-hosted or CLI workflows with clear guardrails.", "name": "Sweep Tutorial: Issue-to-PR AI Coding Workflows on GitHub", - "position": 176, + "position": 180, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/sweep-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to run and extend TabbyML/tabby for production code completion and team knowledge workflows.", "name": "Tabby Tutorial: Self-Hosted AI Coding Assistant Architecture and Operations", - "position": 177, + "position": 181, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/tabby-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use and maintain taskade/awesome-vibe-coding as a decision system for AI app builders, coding agents, MCP tooling, and Genesis-centered workflows.", "name": "Taskade Awesome Vibe Coding Tutorial: Curating the 2026 AI-Building Landscape", - "position": 178, + "position": 182, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/taskade-awesome-vibe-coding-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how taskade/docs structures product documentation across Genesis, API references, automations, help-center workflows, and release timelines.", "name": "Taskade Docs Tutorial: Operating the Living-DNA Documentation Stack", - "position": 179, + "position": 183, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/taskade-docs-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to run, extend, and operate taskade/mcp to connect Taskade workspaces, tasks, projects, and AI agents into MCP-compatible clients.", "name": "Taskade MCP Tutorial: OpenAPI-Driven MCP Server for Taskade Workflows", - "position": 180, + "position": 184, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/taskade-mcp-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to operate Taskade as an AI-native workspace system: Genesis app generation, AI agents, automations, enterprise controls, and production rollout patterns.", "name": "Taskade Tutorial: AI-Native Workspace, Genesis, and Agentic Operations", - "position": 181, + "position": 185, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/taskade-tutorial/README.md" }, { "@type": "ListItem", "description": "Teable \u2014 A high-performance, multi-dimensional database platform built on PostgreSQL with real-time collaboration.", "name": "Teable: Deep Dive Tutorial", - "position": 182, + "position": 186, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/teable-tutorial/README.md" }, { "@type": "ListItem", "description": "Master tiktoken, OpenAI's fast BPE tokenizer, to accurately count tokens, optimize prompts, and reduce API costs.", "name": "tiktoken Tutorial: OpenAI Token Encoding & Optimization", - "position": 183, + "position": 187, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/tiktoken-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of Turborepo covering High-Performance Monorepo Build System.", "name": "Turborepo Tutorial: High-Performance Monorepo Build System", - "position": 184, + "position": 188, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/turborepo-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use modelcontextprotocol/use-mcp to connect React apps to MCP servers with OAuth-aware flows, tool/resource/prompt access, and resilient transport lifecycle handling.", "name": "use-mcp Tutorial: React Hook Patterns for MCP Client Integration", - "position": 185, + "position": 189, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/use-mcp-tutorial/README.md" }, { "@type": "ListItem", "description": "Build robust AI product features with vercel/ai, including streaming, structured outputs, tool loops, framework integration, and production deployment patterns.", "name": "Vercel AI SDK Tutorial: Production TypeScript AI Apps and Agents", - "position": 186, + "position": 190, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/vercel-ai-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use BloopAI/vibe-kanban to coordinate Claude Code, Codex, Gemini CLI, and other coding agents through a unified orchestration workspace.", "name": "Vibe Kanban Tutorial: Multi-Agent Orchestration Board for Coding Workflows", - "position": 187, + "position": 191, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/vibe-kanban-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use cloudflare/vibesdk to run a prompt-to-app platform with agent orchestration, preview sandboxes, and production deployment on Cloudflare.", "name": "VibeSDK Tutorial: Build a Vibe-Coding Platform on Cloudflare", - "position": 188, + "position": 192, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/vibesdk-tutorial/README.md" }, { "@type": "ListItem", "description": "Master vLLM for blazing-fast, cost-effective large language model inference with advanced optimization techniques.", "name": "vLLM Tutorial: High-Performance LLM Inference", - "position": 189, + "position": 193, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/vllm-tutorial/README.md" }, { "@type": "ListItem", "description": "A deep technical walkthrough of Whisper.cpp covering High-Performance Speech Recognition in C/C++.", "name": "Whisper.cpp Tutorial: High-Performance Speech Recognition in C/C++", - "position": 190, + "position": 194, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/whisper-cpp-tutorial/README.md" }, { "@type": "ListItem", "description": "Learn how to use wshobson/agents to install focused Claude Code plugins, coordinate specialist agents, and run scalable multi-agent workflows with clear model and skill boundaries.", "name": "Wshobson Agents Tutorial: Pluginized Multi-Agent Workflows for Claude Code", - "position": 191, + "position": 195, "url": "https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/wshobson-agents-tutorial/README.md" } ], "name": "Awesome Code Docs Tutorial Catalog", - "numberOfItems": 191, + "numberOfItems": 195, "url": "https://github.com/johnxie/awesome-code-docs" } diff --git a/llms-full.txt b/llms-full.txt index 2ed788c..2cc6bdd 100644 --- a/llms-full.txt +++ b/llms-full.txt @@ -3,6 +3,12 @@ Main repository: - https://github.com/johnxie/awesome-code-docs +## A2A Protocol Tutorial: Building Interoperable Agent Systems With Google's Agent-to-Agent Standard +- Path: tutorials/a2a-protocol-tutorial +- Index: https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/a2a-protocol-tutorial/README.md +- Summary: Learn how agents discover, communicate, and delegate tasks to each other using the A2A protocol — the open standard (now Linux Foundation) for agent-to-agent interoperability. +- Keywords: a2a, protocol, building, interoperable, agent, google, standard, agents, discover, communicate, delegate, tasks, each, other, open, now, linux, foundation + ## Activepieces Tutorial: Open-Source Automation, Pieces, and AI-Ready Workflow Operations - Path: tutorials/activepieces-tutorial - Index: https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/activepieces-tutorial/README.md @@ -15,6 +21,12 @@ Main repository: - Summary: Learn how to use google/adk-python to build, evaluate, and deploy modular AI agent systems with strong tooling, session controls, and production rollouts. - Keywords: adk, python, grade, agent, engineering, google, evaluate, deploy, modular, strong, tooling, session, controls, rollouts +## AFFiNE Tutorial: Open-Source AI Workspace with Docs, Whiteboards, and Databases +- Path: tutorials/affine-tutorial +- Index: https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/affine-tutorial/README.md +- Summary: Learn how to use toeverything/AFFiNE to build, extend, and self-host a modern knowledge workspace combining documents, whiteboards, and databases — powered by BlockSuite, CRDT-based collaboration, and integrated AI copilot features. +- Keywords: affine, open, source, workspace, whiteboards, databases, toeverything, extend, self, host, modern, knowledge, combining, documents, powered, blocksuite, crdt, based + ## AG2 Tutorial: Next-Generation Multi-Agent Framework - Path: tutorials/ag2-tutorial - Index: https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/ag2-tutorial/README.md @@ -735,6 +747,12 @@ Main repository: - Summary: A deep technical walkthrough of Mem0 covering Building Production-Ready AI Agents with Scalable Long-Term Memory. - Keywords: mem0, building, ready, agents, scalable, long, term, memory, technical, walkthrough +## MetaGPT Tutorial: Multi-Agent Software Development with Role-Based Collaboration +- Path: tutorials/metagpt-tutorial +- Index: https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/metagpt-tutorial/README.md +- Summary: In one sentence: Give MetaGPT a product idea, and a virtual software company of AI agents designs, architects, codes, and tests it for you. +- Keywords: metagpt, multi, agent, software, development, role, based, collaboration, one, sentence, give, product, idea, virtual, company, agents, designs, architects + ## Mini-SWE-Agent Tutorial: Minimal Autonomous Code Agent Design at Benchmark Scale - Path: tutorials/mini-swe-agent-tutorial - Index: https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/mini-swe-agent-tutorial/README.md @@ -903,6 +921,12 @@ Main repository: - Summary: Learn how to use plandex-ai/plandex for large codebase tasks with strong context management, cumulative diff review, model packs, and self-hosted operations. - Keywords: plandex, large, task, coding, agent, workflows, codebase, tasks, strong, context, management, cumulative, diff, review, model, packs, self, hosted +## Plane Tutorial: AI-Native Project Management +- Path: tutorials/plane-tutorial +- Index: https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/plane-tutorial/README.md +- Summary: Open-source AI-native project management that rivals Jira and Linear — with issues, cycles, modules, and wiki built in. +- Keywords: plane, native, management, open, source, rivals, jira, linear, issues, cycles, modules, wiki, built + ## Planning with Files Tutorial: Persistent Markdown Workflow Memory for AI Coding Agents - Path: tutorials/planning-with-files-tutorial - Index: https://github.com/johnxie/awesome-code-docs/blob/main/tutorials/planning-with-files-tutorial/README.md diff --git a/llms.txt b/llms.txt index 263c1f0..675c553 100644 --- a/llms.txt +++ b/llms.txt @@ -14,8 +14,10 @@ - Infrastructure: Ollama, vLLM, LiteLLM, llama.cpp ## Tutorial Directory +- A2A Protocol Tutorial: Building Interoperable Agent Systems With Google's Agent-to-Agent Standard: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/a2a-protocol-tutorial - Activepieces Tutorial: Open-Source Automation, Pieces, and AI-Ready Workflow Operations: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/activepieces-tutorial - ADK Python Tutorial: Production-Grade Agent Engineering with Google's ADK: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/adk-python-tutorial +- AFFiNE Tutorial: Open-Source AI Workspace with Docs, Whiteboards, and Databases: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/affine-tutorial - AG2 Tutorial: Next-Generation Multi-Agent Framework: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/ag2-tutorial - AgentGPT Tutorial: Building Autonomous AI Agents: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/agentgpt-tutorial - AgenticSeek Tutorial: Local-First Autonomous Agent Operations: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/agenticseek-tutorial @@ -136,6 +138,7 @@ - MCPB Tutorial: Packaging and Distributing Local MCP Servers as Bundles: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/mcpb-tutorial - MeiliSearch Tutorial: Lightning Fast Search Engine: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/meilisearch-tutorial - Mem0 Tutorial: Building Production-Ready AI Agents with Scalable Long-Term Memory: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/mem0-tutorial +- MetaGPT Tutorial: Multi-Agent Software Development with Role-Based Collaboration: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/metagpt-tutorial - Mini-SWE-Agent Tutorial: Minimal Autonomous Code Agent Design at Benchmark Scale: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/mini-swe-agent-tutorial - Mistral Vibe Tutorial: Minimal CLI Coding Agent by Mistral: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/mistral-vibe-tutorial - n8n AI Tutorial: Workflow Automation with AI: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/n8n-ai-tutorial @@ -164,6 +167,7 @@ - Phidata Tutorial: Building Autonomous AI Agents: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/phidata-tutorial - PhotoPrism Tutorial: AI-Powered Photos App: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/photoprism-tutorial - Plandex Tutorial: Large-Task AI Coding Agent Workflows: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/plandex-tutorial +- Plane Tutorial: AI-Native Project Management: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/plane-tutorial - Planning with Files Tutorial: Persistent Markdown Workflow Memory for AI Coding Agents: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/planning-with-files-tutorial - Playwright MCP Tutorial: Browser Automation for Coding Agents Through MCP: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/playwright-mcp-tutorial - PocketFlow Tutorial: Minimal LLM Framework with Graph-Based Power: https://github.com/johnxie/awesome-code-docs/tree/main/tutorials/pocketflow-tutorial diff --git a/tutorials/README.md b/tutorials/README.md index 7340231..68977f1 100644 --- a/tutorials/README.md +++ b/tutorials/README.md @@ -14,9 +14,9 @@ Use this guide to navigate all tutorial tracks, understand structure rules, and | Metric | Value | |:-------|:------| -| Tutorial directories | 191 | -| Tutorial markdown files | 1722 | -| Tutorial markdown lines | 696,269 | +| Tutorial directories | 195 | +| Tutorial markdown files | 1758 | +| Tutorial markdown lines | 709,940 | ## Source Verification Snapshot @@ -37,7 +37,7 @@ Repository-source verification run against tutorial index references (GitHub API | Pattern | Count | Description | |:--------|:------|:------------| -| Root chapter files | 191 | `README.md` + top-level `01-...md` to `08-...md` | +| Root chapter files | 195 | `README.md` + top-level `01-...md` to `08-...md` | | `docs/` chapter files | 0 | Deprecated and fully migrated | | Index-only roadmap | 0 | All catalog entries publish full chapter sets | | Mixed root + `docs/` | 0 | Legacy hybrid layout removed | diff --git a/tutorials/a2a-protocol-tutorial/01-getting-started.md b/tutorials/a2a-protocol-tutorial/01-getting-started.md new file mode 100644 index 0000000..1adc0db --- /dev/null +++ b/tutorials/a2a-protocol-tutorial/01-getting-started.md @@ -0,0 +1,209 @@ +--- +layout: default +title: "Chapter 1: Getting Started" +parent: "A2A Protocol Tutorial" +nav_order: 1 +--- + +# Chapter 1: Getting Started With the A2A Protocol + +Welcome to the Agent-to-Agent (A2A) protocol tutorial. A2A is an open standard — originally created by Google and now governed by the Linux Foundation — that defines how AI agents discover and communicate with each other across platforms, frameworks, and vendors. + +## What Problem Does This Solve? + +Today's AI ecosystem has a fragmentation problem. You might build an agent with LangChain, your partner uses CrewAI, and a third service runs on a custom framework. Each agent is capable on its own, but making them collaborate requires custom glue code for every pair of integrations. + +A2A provides a universal protocol so that any agent can: + +1. **Discover** what other agents can do (via Agent Cards) +2. **Send tasks** to remote agents over standard HTTP +3. **Receive streaming updates** as work progresses +4. **Collect artifacts** (results) in a structured format + +Think of it like HTTP for agent collaboration — a shared language that lets independently built agents work together. + +## MCP vs A2A: Two Complementary Standards + +A common question is how A2A relates to MCP (Model Context Protocol). They solve different problems and are designed to work together: + +| Aspect | MCP | A2A | +|:-------|:----|:----| +| **Relationship** | Agent → Tool/Data | Agent → Agent | +| **Primary use** | Give an agent access to APIs, databases, files | Let agents delegate work to other agents | +| **Discovery** | Server capabilities negotiation | Agent Cards with skills and endpoints | +| **Communication** | JSON-RPC over stdio/HTTP | JSON-RPC over HTTP with streaming | +| **Governance** | Anthropic / open community | Linux Foundation | + +```mermaid +flowchart TD + U[User] --> HA[Host Agent] + HA -->|MCP| T1[Search Tool] + HA -->|MCP| T2[Database Tool] + HA -->|A2A| RA1[Research Agent] + HA -->|A2A| RA2[Coding Agent] + RA1 -->|MCP| T3[Web Scraper Tool] + RA2 -->|MCP| T4[Code Executor Tool] + + classDef agent fill:#fff3e0,stroke:#ef6c00 + classDef tool fill:#e1f5fe,stroke:#01579b + + class HA,RA1,RA2 agent + class T1,T2,T3,T4 tool +``` + +**MCP** connects an agent to tools and data sources. **A2A** connects an agent to other agents. A host agent might use MCP to access a database and A2A to delegate a research task to a specialized agent — which itself uses MCP to access a web scraping tool. + +## Core Concepts at a Glance + +### Agent Card + +Every A2A agent publishes a JSON document called an **Agent Card** at a well-known URL (typically `/.well-known/agent.json`). This card describes: + +- The agent's name, description, and provider +- What skills it offers +- What authentication it requires +- Its endpoint URL + +```json +{ + "name": "Research Assistant", + "description": "Finds and summarizes information on any topic", + "url": "https://research-agent.example.com/a2a", + "version": "1.0.0", + "capabilities": { + "streaming": true, + "pushNotifications": false + }, + "skills": [ + { + "id": "web-research", + "name": "Web Research", + "description": "Search the web and synthesize findings", + "tags": ["research", "search", "summarize"] + } + ], + "authentication": { + "schemes": ["oauth2"] + } +} +``` + +### Task + +A **Task** is the unit of work in A2A. A client agent sends a task to a remote agent, which processes it and returns results. Tasks have a lifecycle: + +``` +submitted → working → completed + → failed + → canceled +``` + +### Message and Artifact + +Communication within a task happens through **Messages** (conversational turns) and **Artifacts** (structured output). A message might say "I'm analyzing the data now..." while an artifact contains the final research report. + +## Setting Up Your Environment + +### Install the A2A Python SDK + +```bash +# Create a virtual environment +python -m venv a2a-env +source a2a-env/bin/activate + +# Install the A2A SDK +pip install a2a-sdk +``` + +### Verify the Installation + +```python +import a2a + +# Check SDK version +print(f"A2A SDK version: {a2a.__version__}") +``` + +### Quick Smoke Test: Fetching an Agent Card + +```python +import httpx +import json + +async def discover_agent(base_url: str): + """Fetch an agent's card from its well-known URL.""" + async with httpx.AsyncClient() as client: + response = await client.get( + f"{base_url}/.well-known/agent.json" + ) + response.raise_for_status() + card = response.json() + + print(f"Agent: {card['name']}") + print(f"Skills: {[s['name'] for s in card.get('skills', [])]}") + return card + +# Usage: +# import asyncio +# asyncio.run(discover_agent("https://research-agent.example.com")) +``` + +## How It Works Under the Hood + +When a client agent wants to collaborate with a remote agent, the full flow looks like this: + +```mermaid +sequenceDiagram + participant C as Client Agent + participant D as Discovery (well-known URL) + participant R as Remote Agent + + C->>D: GET /.well-known/agent.json + D-->>C: Agent Card (capabilities, skills, auth) + + C->>R: POST /a2a (tasks/send) + Note right of R: Remote agent processes task + R-->>C: Task response (status: working) + + C->>R: POST /a2a (tasks/get) + R-->>C: Task response (status: completed, artifacts) +``` + +1. **Discovery**: The client fetches the remote agent's Agent Card to learn its capabilities. +2. **Task submission**: The client sends a task via JSON-RPC over HTTP. +3. **Processing**: The remote agent works on the task, potentially streaming updates. +4. **Result retrieval**: The client gets the final result as artifacts. + +## Project Structure + +A typical A2A project looks like this: + +``` +my-a2a-agent/ +├── agent_card.json # Your agent's capability declaration +├── server.py # A2A server handling incoming tasks +├── client.py # A2A client for calling other agents +├── task_handler.py # Business logic for processing tasks +├── requirements.txt +└── tests/ + ├── test_agent_card.py + └── test_task_lifecycle.py +``` + +## What You Will Build in This Tutorial + +Across the following chapters, you will: + +1. Understand the full protocol specification (Chapter 2) +2. Create discoverable Agent Cards (Chapter 3) +3. Implement task lifecycle management (Chapter 4) +4. Secure agent communication (Chapter 5) +5. Build working A2A agents in Python (Chapter 6) +6. Design multi-agent delegation patterns (Chapter 7) +7. Combine A2A with MCP for the complete ecosystem (Chapter 8) + +--- + +**Next: [Chapter 2: Protocol Specification](02-protocol-specification.md)** — Dive into Agent Cards, task lifecycle, and streaming mechanics. + +[Back to Tutorial Overview](README.md) | [All Tutorials](../../README.md#-tutorial-catalog) diff --git a/tutorials/a2a-protocol-tutorial/02-protocol-specification.md b/tutorials/a2a-protocol-tutorial/02-protocol-specification.md new file mode 100644 index 0000000..87a3e73 --- /dev/null +++ b/tutorials/a2a-protocol-tutorial/02-protocol-specification.md @@ -0,0 +1,385 @@ +--- +layout: default +title: "Chapter 2: Protocol Specification" +parent: "A2A Protocol Tutorial" +nav_order: 2 +--- + +# Chapter 2: Protocol Specification + +The A2A protocol is built on JSON-RPC 2.0 over HTTP, providing a familiar foundation for developers. This chapter walks through every core primitive: Agent Cards, Messages, Tasks, Artifacts, and streaming — the building blocks of all agent-to-agent communication. + +## What Problem Does This Solve? + +Without a shared protocol specification, every multi-agent integration is ad hoc. Team A invents one JSON format, Team B invents another, and a mediator layer must translate between them. A2A defines the canonical wire format so that any conforming agent can talk to any other. + +## JSON-RPC Foundation + +All A2A communication uses JSON-RPC 2.0. Every request has a `method`, `params`, and `id`: + +```json +{ + "jsonrpc": "2.0", + "id": "req-001", + "method": "tasks/send", + "params": { + "id": "task-abc-123", + "message": { + "role": "user", + "parts": [ + { "type": "text", "text": "Summarize the latest AI safety research" } + ] + } + } +} +``` + +Responses follow the standard JSON-RPC format with either a `result` or `error` field. + +## Agent Card Schema + +The Agent Card is the identity document of an A2A agent. It is a JSON object served at `/.well-known/agent.json`: + +```json +{ + "name": "Code Review Agent", + "description": "Automated code review with security and style analysis", + "url": "https://code-review.example.com/a2a", + "version": "2.0.0", + "provider": { + "organization": "DevTools Corp", + "url": "https://devtools.example.com" + }, + "capabilities": { + "streaming": true, + "pushNotifications": true, + "stateTransitionHistory": true + }, + "skills": [ + { + "id": "security-review", + "name": "Security Review", + "description": "Analyze code for security vulnerabilities", + "tags": ["security", "code-review", "vulnerabilities"], + "examples": [ + "Review this Python file for SQL injection risks", + "Check this API endpoint for authentication issues" + ] + }, + { + "id": "style-review", + "name": "Style Review", + "description": "Check code against style guidelines", + "tags": ["style", "linting", "best-practices"] + } + ], + "authentication": { + "schemes": ["oauth2"], + "credentials": "https://auth.devtools.example.com/.well-known/openid-configuration" + }, + "defaultInputModes": ["text", "file"], + "defaultOutputModes": ["text", "file"] +} +``` + +### Key Fields Explained + +| Field | Purpose | +|:------|:--------| +| `name`, `description` | Human-readable identity | +| `url` | The JSON-RPC endpoint for sending tasks | +| `capabilities` | What protocol features the agent supports | +| `skills` | Discrete things the agent can do, with tags for matching | +| `authentication` | How to authenticate before sending tasks | +| `defaultInputModes` | What content types the agent accepts (text, file, data) | +| `defaultOutputModes` | What content types the agent produces | + +## Message Structure + +Messages represent conversational turns between agents. Each message has a `role` and a list of `parts`: + +```json +{ + "role": "user", + "parts": [ + { + "type": "text", + "text": "Please review this code for security issues" + }, + { + "type": "file", + "file": { + "name": "handler.py", + "mimeType": "text/x-python", + "bytes": "aW1wb3J0IG9z..." + } + } + ] +} +``` + +### Part Types + +A2A supports multiple part types within a single message: + +```python +# Text part — plain text or markdown +text_part = {"type": "text", "text": "Analyze this data"} + +# File part — binary content with metadata +file_part = { + "type": "file", + "file": { + "name": "report.pdf", + "mimeType": "application/pdf", + "bytes": "" # or use "uri" for remote files + } +} + +# Data part — structured JSON data +data_part = { + "type": "data", + "data": { + "metrics": {"complexity": 42, "lines": 500}, + "language": "python" + } +} +``` + +## Task Lifecycle + +A Task is the central unit of work. It has a well-defined state machine: + +```mermaid +stateDiagram-v2 + [*] --> submitted: tasks/send + submitted --> working: Agent starts processing + working --> working: Status update (streaming) + working --> input_required: Agent needs clarification + input_required --> working: Client provides input + working --> completed: Success + working --> failed: Error + working --> canceled: tasks/cancel + completed --> [*] + failed --> [*] + canceled --> [*] +``` + +### Task Object + +```json +{ + "id": "task-abc-123", + "sessionId": "session-xyz", + "status": { + "state": "working", + "message": { + "role": "agent", + "parts": [{"type": "text", "text": "Analyzing code..."}] + }, + "timestamp": "2026-03-21T10:30:00Z" + }, + "artifacts": [], + "history": [ + { + "role": "user", + "parts": [{"type": "text", "text": "Review this code"}] + } + ], + "metadata": {} +} +``` + +### Task States + +| State | Meaning | +|:------|:--------| +| `submitted` | Task received, not yet started | +| `working` | Agent is actively processing | +| `input-required` | Agent needs more information from the client | +| `completed` | Task finished successfully | +| `failed` | Task encountered an unrecoverable error | +| `canceled` | Task was canceled by the client | + +## Artifacts + +Artifacts are the structured outputs of a task. Unlike status messages (which are ephemeral), artifacts persist as deliverables: + +```json +{ + "id": "artifact-001", + "name": "Security Review Report", + "description": "Analysis of handler.py for security vulnerabilities", + "parts": [ + { + "type": "text", + "text": "## Security Review\n\n### Critical: SQL Injection on line 42\n..." + }, + { + "type": "data", + "data": { + "vulnerabilities": 3, + "severity": {"critical": 1, "high": 1, "medium": 1} + } + } + ], + "metadata": { + "reviewType": "security", + "linesAnalyzed": 500 + } +} +``` + +## Protocol Methods + +The A2A specification defines these JSON-RPC methods: + +### Core Methods + +```typescript +// Send a task to an agent +interface TaskSendRequest { + method: "tasks/send"; + params: { + id: string; + sessionId?: string; + message: Message; + metadata?: Record; + pushNotification?: PushNotificationConfig; + }; +} + +// Send a task with streaming response (SSE) +interface TaskSendSubscribeRequest { + method: "tasks/sendSubscribe"; + params: TaskSendRequest["params"]; +} + +// Get current task status +interface TaskGetRequest { + method: "tasks/get"; + params: { + id: string; + historyLength?: number; + }; +} + +// Cancel a running task +interface TaskCancelRequest { + method: "tasks/cancel"; + params: { id: string }; +} +``` + +### Push Notification Methods + +```typescript +// Set up push notification webhook +interface SetPushNotificationRequest { + method: "tasks/pushNotification/set"; + params: { + id: string; + pushNotificationConfig: { + url: string; + token?: string; + }; + }; +} + +// Get current push notification config +interface GetPushNotificationRequest { + method: "tasks/pushNotification/get"; + params: { id: string }; +} +``` + +## Streaming With Server-Sent Events + +When using `tasks/sendSubscribe`, the server responds with a stream of Server-Sent Events (SSE): + +```python +# What the HTTP response stream looks like: +# Content-Type: text/event-stream + +# data: {"jsonrpc":"2.0","id":"req-1","result":{"id":"task-1","status":{"state":"working","message":{"role":"agent","parts":[{"type":"text","text":"Starting analysis..."}]}}}} + +# data: {"jsonrpc":"2.0","id":"req-1","result":{"id":"task-1","status":{"state":"working","message":{"role":"agent","parts":[{"type":"text","text":"Found 3 issues..."}]}}}} + +# data: {"jsonrpc":"2.0","id":"req-1","result":{"id":"task-1","status":{"state":"completed"},"artifacts":[{"name":"report","parts":[{"type":"text","text":"## Final Report..."}]}]}} +``` + +### SSE Event Types + +Each SSE event carries either a `TaskStatusUpdateEvent` or a `TaskArtifactUpdateEvent`: + +```json +{ + "jsonrpc": "2.0", + "id": "req-1", + "result": { + "id": "task-1", + "status": { + "state": "working", + "message": { + "role": "agent", + "parts": [{"type": "text", "text": "Processing step 2 of 5..."}] + } + } + } +} +``` + +## How It Works Under the Hood + +```mermaid +sequenceDiagram + participant C as Client Agent + participant S as A2A Server + + C->>S: POST /a2a {"method": "tasks/sendSubscribe", ...} + Note right of S: Opens SSE stream + + S-->>C: SSE: status=working "Starting..." + S-->>C: SSE: status=working "Found 3 issues" + S-->>C: SSE: artifact "Partial report..." + S-->>C: SSE: status=completed + final artifact + Note left of C: Stream closes + + C->>S: POST /a2a {"method": "tasks/get", ...} + S-->>C: Full task with history and artifacts +``` + +The protocol separates **status updates** (transient progress) from **artifacts** (persistent outputs), so a client can display real-time progress while also accumulating deliverables. + +## Error Handling + +A2A uses standard JSON-RPC error codes plus protocol-specific extensions: + +```json +{ + "jsonrpc": "2.0", + "id": "req-1", + "error": { + "code": -32001, + "message": "Task not found", + "data": { "taskId": "task-unknown" } + } +} +``` + +| Code | Meaning | +|:-----|:--------| +| `-32700` | Parse error | +| `-32600` | Invalid request | +| `-32601` | Method not found | +| `-32602` | Invalid params | +| `-32603` | Internal error | +| `-32001` | Task not found | +| `-32002` | Task not cancelable | +| `-32003` | Push notification not supported | + +--- + +**Next: [Chapter 3: Agent Discovery](03-agent-discovery.md)** — How agents find each other and evaluate capabilities. + +[Previous: Chapter 1](01-getting-started.md) | [Back to Tutorial Overview](README.md) diff --git a/tutorials/a2a-protocol-tutorial/03-agent-discovery.md b/tutorials/a2a-protocol-tutorial/03-agent-discovery.md new file mode 100644 index 0000000..b3cbe9b --- /dev/null +++ b/tutorials/a2a-protocol-tutorial/03-agent-discovery.md @@ -0,0 +1,376 @@ +--- +layout: default +title: "Chapter 3: Agent Discovery" +parent: "A2A Protocol Tutorial" +nav_order: 3 +--- + +# Chapter 3: Agent Discovery + +Agent discovery is the foundation of A2A interoperability. Before agents can collaborate, they need a reliable way to find each other, understand capabilities, and evaluate fitness for a task. The A2A protocol solves this through Agent Cards — self-describing JSON documents served at well-known URLs. + +## What Problem Does This Solve? + +In a world of hundreds of specialized agents, a client agent needs to answer: "Which agent should I delegate this task to?" Without a standard discovery mechanism, you would need a central registry that every agent vendor agrees on, or hard-code agent URLs into your application. Agent Cards provide a decentralized, web-native solution — the same pattern that `robots.txt` and `.well-known/openid-configuration` use. + +## The Well-Known URL Pattern + +Every A2A agent MUST serve its Agent Card at: + +``` +https:///.well-known/agent.json +``` + +This follows [RFC 8615](https://tools.ietf.org/html/rfc8615) for well-known URIs. Any client that knows an agent's hostname can discover its capabilities with a single GET request. + +```python +import httpx + +async def fetch_agent_card(host: str) -> dict: + """Discover an agent by fetching its card from the well-known URL.""" + url = f"https://{host}/.well-known/agent.json" + async with httpx.AsyncClient() as client: + response = await client.get(url, follow_redirects=True) + response.raise_for_status() + return response.json() + +# Example usage +# card = await fetch_agent_card("research-agent.example.com") +``` + +## Anatomy of an Agent Card + +Let us build a complete Agent Card for a translation agent: + +```json +{ + "name": "Polyglot Translator", + "description": "High-quality translation between 50+ languages with domain specialization in legal, medical, and technical texts", + "url": "https://translate.example.com/a2a", + "version": "3.1.0", + "documentationUrl": "https://translate.example.com/docs", + "provider": { + "organization": "LinguaTech", + "url": "https://linguatech.example.com", + "contactEmail": "agents@linguatech.example.com" + }, + "capabilities": { + "streaming": true, + "pushNotifications": true, + "stateTransitionHistory": true + }, + "skills": [ + { + "id": "general-translation", + "name": "General Translation", + "description": "Translate text between any supported language pair", + "tags": ["translation", "language", "i18n"], + "examples": [ + "Translate this paragraph from English to Japanese", + "Convert this French legal document to English" + ], + "inputModes": ["text", "file"], + "outputModes": ["text", "file"] + }, + { + "id": "medical-translation", + "name": "Medical Translation", + "description": "Translate medical documents with terminology accuracy", + "tags": ["translation", "medical", "healthcare"], + "examples": [ + "Translate this patient discharge summary to Spanish", + "Convert this clinical trial report from German to English" + ], + "inputModes": ["text", "file"], + "outputModes": ["text", "file"] + } + ], + "defaultInputModes": ["text", "file"], + "defaultOutputModes": ["text"], + "authentication": { + "schemes": ["oauth2"], + "credentials": "https://auth.linguatech.example.com/.well-known/openid-configuration" + }, + "supportsAuthenticatedExtendedCard": true +} +``` + +### Skills as the Primary Matching Mechanism + +Skills are how client agents determine if a remote agent is right for a task. Each skill has: + +- **`id`**: A stable identifier for programmatic matching +- **`name` / `description`**: Human-readable context +- **`tags`**: Keywords for search and filtering +- **`examples`**: Sample prompts that show what this skill handles +- **`inputModes` / `outputModes`**: Content type constraints + +## Building an Agent Card in Python + +```python +from dataclasses import dataclass, field, asdict +from typing import Optional +import json + +@dataclass +class Skill: + id: str + name: str + description: str + tags: list[str] = field(default_factory=list) + examples: list[str] = field(default_factory=list) + input_modes: list[str] = field(default_factory=lambda: ["text"]) + output_modes: list[str] = field(default_factory=lambda: ["text"]) + +@dataclass +class AgentCard: + name: str + description: str + url: str + version: str + skills: list[Skill] + provider: Optional[dict] = None + capabilities: dict = field(default_factory=lambda: { + "streaming": False, + "pushNotifications": False, + }) + authentication: Optional[dict] = None + default_input_modes: list[str] = field(default_factory=lambda: ["text"]) + default_output_modes: list[str] = field(default_factory=lambda: ["text"]) + + def to_json(self) -> str: + """Serialize to the A2A Agent Card JSON format.""" + data = { + "name": self.name, + "description": self.description, + "url": self.url, + "version": self.version, + "skills": [ + { + "id": s.id, + "name": s.name, + "description": s.description, + "tags": s.tags, + "examples": s.examples, + "inputModes": s.input_modes, + "outputModes": s.output_modes, + } + for s in self.skills + ], + "capabilities": self.capabilities, + "defaultInputModes": self.default_input_modes, + "defaultOutputModes": self.default_output_modes, + } + if self.provider: + data["provider"] = self.provider + if self.authentication: + data["authentication"] = self.authentication + return json.dumps(data, indent=2) + +# Create an agent card +card = AgentCard( + name="Data Analyst", + description="Statistical analysis and visualization agent", + url="https://analyst.example.com/a2a", + version="1.0.0", + skills=[ + Skill( + id="statistical-analysis", + name="Statistical Analysis", + description="Run statistical tests and generate insights", + tags=["statistics", "analysis", "data"], + examples=["Analyze the correlation between X and Y in this dataset"], + ), + Skill( + id="visualization", + name="Data Visualization", + description="Create charts and graphs from datasets", + tags=["charts", "graphs", "visualization"], + input_modes=["text", "data", "file"], + output_modes=["text", "file"], + ), + ], + capabilities={"streaming": True, "pushNotifications": False}, +) + +print(card.to_json()) +``` + +## Serving the Agent Card + +Here is a minimal server that serves an Agent Card alongside the A2A endpoint: + +```python +from starlette.applications import Starlette +from starlette.responses import JSONResponse +from starlette.routing import Route + +agent_card = { + "name": "Echo Agent", + "description": "Simple agent that echoes messages back", + "url": "https://echo.example.com/a2a", + "version": "1.0.0", + "capabilities": {"streaming": False, "pushNotifications": False}, + "skills": [ + { + "id": "echo", + "name": "Echo", + "description": "Echoes your message back", + "tags": ["echo", "test"], + } + ], + "defaultInputModes": ["text"], + "defaultOutputModes": ["text"], +} + +async def serve_agent_card(request): + """Serve the Agent Card at the well-known URL.""" + return JSONResponse( + agent_card, + headers={ + "Content-Type": "application/json", + "Cache-Control": "public, max-age=3600", + }, + ) + +async def handle_a2a(request): + """Handle A2A JSON-RPC requests.""" + body = await request.json() + # ... dispatch based on body["method"] + return JSONResponse({"jsonrpc": "2.0", "id": body["id"], "result": {}}) + +app = Starlette(routes=[ + Route("/.well-known/agent.json", serve_agent_card), + Route("/a2a", handle_a2a, methods=["POST"]), +]) +``` + +## Agent Discovery Patterns + +### Pattern 1: Direct Discovery + +The simplest pattern — the client knows the agent's hostname: + +```python +card = await fetch_agent_card("research-agent.example.com") +``` + +### Pattern 2: Registry-Based Discovery + +An organization maintains a registry of known agents: + +```python +async def discover_from_registry( + registry_url: str, tags: list[str] +) -> list[dict]: + """Query a registry for agents matching specific tags.""" + async with httpx.AsyncClient() as client: + response = await client.get( + f"{registry_url}/agents", + params={"tags": ",".join(tags)}, + ) + agents = response.json() + + # Fetch full Agent Card for each result + cards = [] + for agent in agents: + card = await fetch_agent_card(agent["host"]) + cards.append(card) + return cards + +# Find all agents that can do translation +# cards = await discover_from_registry( +# "https://registry.example.com", ["translation"] +# ) +``` + +### Pattern 3: Skill-Based Routing + +Match incoming requests to the best agent based on skill tags: + +```python +def find_best_agent( + agent_cards: list[dict], required_tags: set[str] +) -> dict | None: + """Find the agent whose skills best match the required tags.""" + best_match = None + best_score = 0 + + for card in agent_cards: + for skill in card.get("skills", []): + skill_tags = set(skill.get("tags", [])) + overlap = len(skill_tags & required_tags) + if overlap > best_score: + best_score = overlap + best_match = card + + return best_match + +# Example +agents = [card_a, card_b, card_c] +best = find_best_agent(agents, {"medical", "translation"}) +``` + +## Extended Agent Cards + +A2A supports the concept of **extended Agent Cards** — additional capability details that are only available after authentication. This lets agents hide sensitive details from anonymous discovery while still being findable: + +```json +{ + "supportsAuthenticatedExtendedCard": true +} +``` + +After authenticating, a client can fetch the extended card with additional fields like rate limits, SLA guarantees, or internal-only skills. + +## How It Works Under the Hood + +```mermaid +flowchart TD + C[Client Agent] -->|1. GET /.well-known/agent.json| W[Well-Known URL] + W -->|2. Agent Card JSON| C + C -->|3. Evaluate skills and capabilities| D{Match?} + D -->|Yes| E[Authenticate] + D -->|No| F[Try next agent] + E -->|4. OAuth2 token| T[Token] + T -->|5. POST /a2a tasks/send| S[Remote Agent] + S -->|6. Task result| C + + classDef client fill:#e1f5fe,stroke:#01579b + classDef decision fill:#fff3e0,stroke:#ef6c00 + classDef server fill:#e8f5e8,stroke:#1b5e20 + + class C client + class D decision + class S,W server +``` + +The discovery process is intentionally lightweight — a single HTTP GET returns everything a client needs to decide whether to use an agent. No handshake, no session setup, no registration required. + +## Caching and Versioning + +Agent Cards should be cached by clients. The `version` field helps clients detect changes: + +```python +import hashlib + +class AgentCardCache: + def __init__(self): + self._cache: dict[str, tuple[str, dict]] = {} # host -> (version, card) + + async def get_card(self, host: str) -> dict: + cached = self._cache.get(host) + card = await fetch_agent_card(host) + + if cached and cached[0] == card.get("version"): + return cached[1] # Use cached version + + self._cache[host] = (card.get("version", ""), card) + return card +``` + +--- + +**Next: [Chapter 4: Task Management](04-task-management.md)** — Creating, tracking, and completing tasks with streaming updates. + +[Previous: Chapter 2](02-protocol-specification.md) | [Back to Tutorial Overview](README.md) diff --git a/tutorials/a2a-protocol-tutorial/04-task-management.md b/tutorials/a2a-protocol-tutorial/04-task-management.md new file mode 100644 index 0000000..6157c92 --- /dev/null +++ b/tutorials/a2a-protocol-tutorial/04-task-management.md @@ -0,0 +1,474 @@ +--- +layout: default +title: "Chapter 4: Task Management" +parent: "A2A Protocol Tutorial" +nav_order: 4 +--- + +# Chapter 4: Task Management + +Tasks are the core unit of work in the A2A protocol. This chapter covers the full task lifecycle — from creation through streaming updates to artifact collection — with practical implementations in Python and TypeScript. + +## What Problem Does This Solve? + +Agent-to-agent collaboration is inherently asynchronous. A research agent might take minutes to compile findings; a code review agent might need to ask clarifying questions mid-task. The A2A task model provides a structured way to handle all of these patterns: synchronous quick replies, long-running background work, multi-turn conversations, and real-time streaming — all through the same protocol. + +## Task Creation + +### Using `tasks/send` (Synchronous) + +The simplest way to create a task — send and wait for the final result: + +```python +import httpx +import uuid + +async def send_task( + agent_url: str, + message: str, + session_id: str | None = None, + token: str | None = None, +) -> dict: + """Send a task to a remote agent and wait for the result.""" + task_id = str(uuid.uuid4()) + headers = {"Content-Type": "application/json"} + if token: + headers["Authorization"] = f"Bearer {token}" + + payload = { + "jsonrpc": "2.0", + "id": f"req-{task_id[:8]}", + "method": "tasks/send", + "params": { + "id": task_id, + "message": { + "role": "user", + "parts": [{"type": "text", "text": message}], + }, + }, + } + + if session_id: + payload["params"]["sessionId"] = session_id + + async with httpx.AsyncClient(timeout=120.0) as client: + response = await client.post(agent_url, json=payload) + response.raise_for_status() + return response.json()["result"] + +# Usage +# result = await send_task( +# "https://agent.example.com/a2a", +# "Summarize the key findings from the Q4 report" +# ) +``` + +### Using `tasks/sendSubscribe` (Streaming) + +For real-time updates, use the streaming variant that returns Server-Sent Events: + +```python +import httpx +import json + +async def send_task_streaming( + agent_url: str, + message: str, + on_status: callable = None, + on_artifact: callable = None, +) -> dict: + """Send a task and stream status updates and artifacts.""" + task_id = str(uuid.uuid4()) + + payload = { + "jsonrpc": "2.0", + "id": f"req-{task_id[:8]}", + "method": "tasks/sendSubscribe", + "params": { + "id": task_id, + "message": { + "role": "user", + "parts": [{"type": "text", "text": message}], + }, + }, + } + + final_result = None + async with httpx.AsyncClient(timeout=300.0) as client: + async with client.stream("POST", agent_url, json=payload) as response: + async for line in response.aiter_lines(): + if not line.startswith("data: "): + continue + + event_data = json.loads(line[6:]) + result = event_data.get("result", {}) + + # Handle status updates + if "status" in result: + status = result["status"] + if on_status: + on_status(status) + if status["state"] in ("completed", "failed", "canceled"): + final_result = result + break + + # Handle artifact updates + if "artifact" in result: + if on_artifact: + on_artifact(result["artifact"]) + + return final_result + +# Usage with callbacks +# async def handle_status(status): +# print(f"[{status['state']}] {status.get('message', {}).get('parts', [{}])[0].get('text', '')}") +# +# result = await send_task_streaming( +# "https://agent.example.com/a2a", +# "Analyze this dataset for trends", +# on_status=handle_status, +# ) +``` + +### TypeScript Client + +```typescript +interface TaskSendParams { + id: string; + sessionId?: string; + message: { + role: "user" | "agent"; + parts: Array<{ type: string; text?: string; data?: unknown }>; + }; +} + +async function sendTask( + agentUrl: string, + message: string +): Promise { + const taskId = crypto.randomUUID(); + + const response = await fetch(agentUrl, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + jsonrpc: "2.0", + id: `req-${taskId.slice(0, 8)}`, + method: "tasks/send", + params: { + id: taskId, + message: { + role: "user", + parts: [{ type: "text", text: message }], + }, + }, + }), + }); + + const result = await response.json(); + return result.result; +} + +// Streaming variant +async function* sendTaskStreaming( + agentUrl: string, + message: string +): AsyncGenerator { + const taskId = crypto.randomUUID(); + + const response = await fetch(agentUrl, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + jsonrpc: "2.0", + id: `req-${taskId.slice(0, 8)}`, + method: "tasks/sendSubscribe", + params: { + id: taskId, + message: { + role: "user", + parts: [{ type: "text", text: message }], + }, + }, + }), + }); + + const reader = response.body!.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split("\n"); + buffer = lines.pop()!; + + for (const line of lines) { + if (line.startsWith("data: ")) { + yield JSON.parse(line.slice(6)); + } + } + } +} +``` + +## Checking Task Status + +For long-running tasks, poll the status: + +```python +import asyncio + +async def poll_task( + agent_url: str, + task_id: str, + interval: float = 2.0, + timeout: float = 300.0, +) -> dict: + """Poll a task until it reaches a terminal state.""" + payload = { + "jsonrpc": "2.0", + "id": "poll", + "method": "tasks/get", + "params": {"id": task_id, "historyLength": 10}, + } + + elapsed = 0.0 + async with httpx.AsyncClient() as client: + while elapsed < timeout: + response = await client.post(agent_url, json=payload) + result = response.json()["result"] + state = result["status"]["state"] + + if state in ("completed", "failed", "canceled"): + return result + + await asyncio.sleep(interval) + elapsed += interval + + raise TimeoutError(f"Task {task_id} did not complete within {timeout}s") +``` + +## Multi-Turn Conversations + +When an agent needs clarification, it sets the task state to `input-required`. The client then sends a follow-up message with the same task ID: + +```python +async def handle_multi_turn(agent_url: str, initial_message: str) -> dict: + """Handle a multi-turn task conversation.""" + task_id = str(uuid.uuid4()) + session_id = str(uuid.uuid4()) + + # First turn + result = await send_task(agent_url, initial_message) + + while result["status"]["state"] == "input-required": + # Show the agent's question to the user + agent_question = result["status"]["message"]["parts"][0]["text"] + print(f"Agent asks: {agent_question}") + + # Get user's response (in practice, this might come from the client agent) + user_response = input("Your answer: ") + + # Send follow-up with same task ID and session ID + payload = { + "jsonrpc": "2.0", + "id": f"req-follow-up", + "method": "tasks/send", + "params": { + "id": task_id, + "sessionId": session_id, + "message": { + "role": "user", + "parts": [{"type": "text", "text": user_response}], + }, + }, + } + + async with httpx.AsyncClient() as client: + response = await client.post(agent_url, json=payload) + result = response.json()["result"] + + return result +``` + +## Canceling Tasks + +```python +async def cancel_task(agent_url: str, task_id: str) -> dict: + """Cancel a running task.""" + payload = { + "jsonrpc": "2.0", + "id": "cancel", + "method": "tasks/cancel", + "params": {"id": task_id}, + } + + async with httpx.AsyncClient() as client: + response = await client.post(agent_url, json=payload) + return response.json()["result"] +``` + +## Push Notifications + +For very long-running tasks, instead of polling, the client can register a webhook: + +```python +async def setup_push_notification( + agent_url: str, + task_id: str, + webhook_url: str, + webhook_token: str | None = None, +) -> dict: + """Register a webhook for task completion notifications.""" + payload = { + "jsonrpc": "2.0", + "id": "push-setup", + "method": "tasks/pushNotification/set", + "params": { + "id": task_id, + "pushNotificationConfig": { + "url": webhook_url, + "token": webhook_token, + }, + }, + } + + async with httpx.AsyncClient() as client: + response = await client.post(agent_url, json=payload) + return response.json()["result"] +``` + +The remote agent will POST to the webhook URL whenever the task status changes: + +```python +from starlette.applications import Starlette +from starlette.responses import JSONResponse +from starlette.routing import Route + +async def webhook_handler(request): + """Receive push notifications from remote agents.""" + data = await request.json() + task_id = data.get("id") + status = data.get("status", {}) + state = status.get("state") + + print(f"Task {task_id} status: {state}") + + if state == "completed": + artifacts = data.get("artifacts", []) + for artifact in artifacts: + print(f" Artifact: {artifact.get('name')}") + + return JSONResponse({"received": True}) + +webhook_app = Starlette(routes=[ + Route("/webhook/task-updates", webhook_handler, methods=["POST"]), +]) +``` + +## Working With Artifacts + +Artifacts are the deliverables of a task. Extract and process them: + +```python +def extract_artifacts(task_result: dict) -> list[dict]: + """Extract and categorize artifacts from a completed task.""" + artifacts = task_result.get("artifacts", []) + processed = [] + + for artifact in artifacts: + for part in artifact.get("parts", []): + if part["type"] == "text": + processed.append({ + "name": artifact.get("name", "unnamed"), + "type": "text", + "content": part["text"], + }) + elif part["type"] == "file": + processed.append({ + "name": artifact.get("name", "unnamed"), + "type": "file", + "filename": part["file"].get("name"), + "mime_type": part["file"].get("mimeType"), + "data": part["file"].get("bytes"), # base64 + }) + elif part["type"] == "data": + processed.append({ + "name": artifact.get("name", "unnamed"), + "type": "data", + "content": part["data"], + }) + + return processed +``` + +## How It Works Under the Hood + +```mermaid +flowchart TD + C[Client Agent] -->|tasks/send| S[Remote Agent] + S --> Q{Quick or Long?} + Q -->|Quick| R1[Process immediately] + R1 --> DONE[Return completed task] + + Q -->|Long| BG[Background processing] + BG --> ST[Status: working] + ST -->|Stream via SSE| C + BG --> ART[Generate artifacts] + ART -->|Artifact update via SSE| C + BG --> NEED{Need input?} + NEED -->|Yes| IR[Status: input-required] + IR -->|Client sends follow-up| BG + NEED -->|No| COMP[Status: completed] + COMP --> DONE2[Return final task with artifacts] + DONE2 --> C + + classDef client fill:#e1f5fe,stroke:#01579b + classDef server fill:#fff3e0,stroke:#ef6c00 + classDef decision fill:#f3e5f5,stroke:#4a148c + + class C client + class S,BG,ST,ART,R1,COMP,DONE,DONE2,IR server + class Q,NEED decision +``` + +## Session Management + +Sessions group related tasks together. Using the same `sessionId` across multiple `tasks/send` calls lets the remote agent maintain context: + +```python +class A2ASession: + """Manage a series of related tasks with a single agent.""" + + def __init__(self, agent_url: str, token: str | None = None): + self.agent_url = agent_url + self.session_id = str(uuid.uuid4()) + self.token = token + self.task_history: list[str] = [] + + async def send(self, message: str) -> dict: + """Send a new task within this session.""" + result = await send_task( + self.agent_url, + message, + session_id=self.session_id, + token=self.token, + ) + self.task_history.append(result["id"]) + return result + + async def get_last_task(self) -> dict | None: + """Retrieve the last task in this session.""" + if not self.task_history: + return None + return await poll_task(self.agent_url, self.task_history[-1]) +``` + +--- + +**Next: [Chapter 5: Authentication and Security](05-authentication-and-security.md)** — Securing agent-to-agent communication with OAuth2 and identity verification. + +[Previous: Chapter 3](03-agent-discovery.md) | [Back to Tutorial Overview](README.md) diff --git a/tutorials/a2a-protocol-tutorial/05-authentication-and-security.md b/tutorials/a2a-protocol-tutorial/05-authentication-and-security.md new file mode 100644 index 0000000..df49a67 --- /dev/null +++ b/tutorials/a2a-protocol-tutorial/05-authentication-and-security.md @@ -0,0 +1,406 @@ +--- +layout: default +title: "Chapter 5: Authentication and Security" +parent: "A2A Protocol Tutorial" +nav_order: 5 +--- + +# Chapter 5: Authentication and Security + +Agent-to-agent communication happens over the open internet between independently operated services. This chapter covers how A2A handles authentication, authorization, identity verification, and the trust model that makes agent collaboration safe. + +## What Problem Does This Solve? + +When Agent A sends a task to Agent B, both sides need answers to critical questions: + +- **Agent B asks**: "Is Agent A who it claims to be? Does it have permission to use my capabilities?" +- **Agent A asks**: "Is Agent B's response authentic? Has the data been tampered with?" +- **Both ask**: "Is this communication confidential?" + +Without standardized security, every agent integration would need custom auth negotiation. A2A builds on established web security standards — primarily OAuth 2.0 — so that agents can leverage existing identity infrastructure. + +## Authentication Schemes in Agent Cards + +The Agent Card declares what authentication the agent requires: + +```json +{ + "name": "Enterprise Analysis Agent", + "url": "https://analysis.corp.example.com/a2a", + "authentication": { + "schemes": ["oauth2"], + "credentials": "https://auth.corp.example.com/.well-known/openid-configuration" + } +} +``` + +### Supported Schemes + +| Scheme | Use Case | +|:-------|:---------| +| `oauth2` | Standard OAuth 2.0 / OpenID Connect — recommended for production | +| `bearer` | Simple bearer token — useful for API keys and service-to-service | +| `none` | No authentication — only for public, read-only agents | + +## OAuth 2.0 Flow for Agent-to-Agent + +The recommended flow for A2A is the **OAuth 2.0 Client Credentials** grant, which is designed for machine-to-machine authentication: + +```mermaid +sequenceDiagram + participant CA as Client Agent + participant AS as Authorization Server + participant RA as Remote Agent + + CA->>AS: POST /token (client_id, client_secret, scope) + AS-->>CA: access_token (JWT) + + CA->>RA: GET /.well-known/agent.json + RA-->>CA: Agent Card + + CA->>RA: POST /a2a (Authorization: Bearer ) + Note right of RA: Validate token signature,
check scopes, process task + RA-->>CA: Task result +``` + +### Implementing the Client Credentials Flow + +```python +import httpx +from datetime import datetime, timedelta + +class A2AAuthClient: + """Handle OAuth2 client credentials for A2A communication.""" + + def __init__( + self, + client_id: str, + client_secret: str, + token_endpoint: str, + scopes: list[str] | None = None, + ): + self.client_id = client_id + self.client_secret = client_secret + self.token_endpoint = token_endpoint + self.scopes = scopes or ["a2a:tasks:send"] + self._token: str | None = None + self._expires_at: datetime | None = None + + async def get_token(self) -> str: + """Get a valid access token, refreshing if needed.""" + if self._token and self._expires_at and datetime.utcnow() < self._expires_at: + return self._token + + async with httpx.AsyncClient() as client: + response = await client.post( + self.token_endpoint, + data={ + "grant_type": "client_credentials", + "client_id": self.client_id, + "client_secret": self.client_secret, + "scope": " ".join(self.scopes), + }, + ) + response.raise_for_status() + data = response.json() + + self._token = data["access_token"] + expires_in = data.get("expires_in", 3600) + self._expires_at = datetime.utcnow() + timedelta(seconds=expires_in - 60) + return self._token + + async def send_authenticated_task( + self, agent_url: str, task_params: dict + ) -> dict: + """Send a task with OAuth2 authentication.""" + token = await self.get_token() + payload = { + "jsonrpc": "2.0", + "id": "req-auth", + "method": "tasks/send", + "params": task_params, + } + + async with httpx.AsyncClient() as client: + response = await client.post( + agent_url, + json=payload, + headers={"Authorization": f"Bearer {token}"}, + ) + response.raise_for_status() + return response.json()["result"] +``` + +### Discovering the Token Endpoint + +The `credentials` field in the Agent Card typically points to an OpenID Connect discovery document: + +```python +async def discover_token_endpoint(openid_config_url: str) -> str: + """Fetch the token endpoint from OIDC discovery.""" + async with httpx.AsyncClient() as client: + response = await client.get(openid_config_url) + config = response.json() + return config["token_endpoint"] + +# Usage: +# token_url = await discover_token_endpoint( +# "https://auth.corp.example.com/.well-known/openid-configuration" +# ) +``` + +## Server-Side Token Validation + +The A2A server must validate incoming tokens before processing tasks: + +```python +import jwt +from functools import lru_cache + +class TokenValidator: + """Validate JWT access tokens for incoming A2A requests.""" + + def __init__(self, issuer: str, audience: str, jwks_url: str): + self.issuer = issuer + self.audience = audience + self.jwks_url = jwks_url + self._jwks_client = jwt.PyJWKClient(jwks_url) + + def validate(self, token: str) -> dict: + """Validate a JWT and return the claims.""" + signing_key = self._jwks_client.get_signing_key_from_jwt(token) + + claims = jwt.decode( + token, + signing_key.key, + algorithms=["RS256"], + issuer=self.issuer, + audience=self.audience, + ) + + return claims + + def check_scope(self, claims: dict, required_scope: str) -> bool: + """Check if the token has the required scope.""" + scopes = claims.get("scope", "").split() + return required_scope in scopes +``` + +### Integrating Validation Into the A2A Server + +```python +from starlette.applications import Starlette +from starlette.responses import JSONResponse +from starlette.routing import Route + +validator = TokenValidator( + issuer="https://auth.corp.example.com", + audience="a2a-agents", + jwks_url="https://auth.corp.example.com/.well-known/jwks.json", +) + +async def handle_a2a(request): + """A2A endpoint with authentication.""" + # Extract token + auth_header = request.headers.get("Authorization", "") + if not auth_header.startswith("Bearer "): + return JSONResponse( + { + "jsonrpc": "2.0", + "id": None, + "error": {"code": -32000, "message": "Missing authentication"}, + }, + status_code=401, + ) + + token = auth_header[7:] + + # Validate token + try: + claims = validator.validate(token) + except jwt.InvalidTokenError as e: + return JSONResponse( + { + "jsonrpc": "2.0", + "id": None, + "error": {"code": -32000, "message": f"Invalid token: {e}"}, + }, + status_code=401, + ) + + # Check required scope + if not validator.check_scope(claims, "a2a:tasks:send"): + return JSONResponse( + { + "jsonrpc": "2.0", + "id": None, + "error": {"code": -32000, "message": "Insufficient scope"}, + }, + status_code=403, + ) + + # Process the request + body = await request.json() + # ... dispatch task handler + return JSONResponse({"jsonrpc": "2.0", "id": body["id"], "result": {}}) +``` + +## Agent Identity and Trust + +### Verifying Agent Identity + +Beyond token validation, agents may need to verify each other's identity: + +```python +async def verify_agent_identity( + agent_url: str, + expected_provider: str | None = None, +) -> bool: + """Verify an agent's identity by cross-checking its card and TLS certificate.""" + import ssl + from urllib.parse import urlparse + + parsed = urlparse(agent_url) + + # Step 1: TLS certificate validates the domain + # (httpx does this automatically) + + # Step 2: Fetch and validate the Agent Card + async with httpx.AsyncClient(verify=True) as client: + response = await client.get( + f"https://{parsed.hostname}/.well-known/agent.json" + ) + card = response.json() + + # Step 3: Verify the card's URL matches what we expect + if card.get("url") != agent_url: + return False + + # Step 4: Optionally verify the provider + if expected_provider: + provider = card.get("provider", {}).get("organization") + if provider != expected_provider: + return False + + return True +``` + +### Trust Levels + +A practical trust model for A2A deployments: + +```python +from enum import Enum + +class TrustLevel(Enum): + PUBLIC = "public" # Open agents, no auth required + AUTHENTICATED = "auth" # Valid OAuth token required + VERIFIED = "verified" # Token + known provider verification + INTERNAL = "internal" # Same organization, mTLS or internal network + +def determine_trust_level( + claims: dict, + agent_card: dict, + is_internal: bool = False, +) -> TrustLevel: + """Determine the trust level for an incoming request.""" + if is_internal: + return TrustLevel.INTERNAL + + # Check if the client is from a verified/known provider + client_org = claims.get("org", "") + known_orgs = {"partner-corp.com", "trusted-vendor.io"} + + if client_org in known_orgs: + return TrustLevel.VERIFIED + + if claims: + return TrustLevel.AUTHENTICATED + + return TrustLevel.PUBLIC +``` + +## Securing Push Notifications + +Push notification webhooks need their own security to prevent spoofed updates: + +```python +import hmac +import hashlib + +def sign_notification(payload: bytes, secret: str) -> str: + """Sign a push notification payload.""" + return hmac.new( + secret.encode(), payload, hashlib.sha256 + ).hexdigest() + +def verify_notification_signature( + payload: bytes, signature: str, secret: str +) -> bool: + """Verify a push notification's HMAC signature.""" + expected = sign_notification(payload, secret) + return hmac.compare_digest(signature, expected) +``` + +## How It Works Under the Hood + +```mermaid +flowchart TD + CA[Client Agent] -->|1. Discover OIDC config| OIDC[/.well-known/openid-configuration] + OIDC -->|2. Token endpoint URL| CA + CA -->|3. Client credentials grant| AS[Authorization Server] + AS -->|4. JWT access token| CA + CA -->|5. Bearer token + task| RA[Remote Agent] + RA -->|6. Fetch JWKS| JWKS[JWKS Endpoint] + JWKS -->|7. Public keys| RA + RA -->|8. Validate JWT, check scopes| V{Valid?} + V -->|Yes| PROC[Process task] + V -->|No| ERR[401/403 Error] + PROC -->|9. Result| CA + + classDef auth fill:#f3e5f5,stroke:#4a148c + classDef agent fill:#e1f5fe,stroke:#01579b + + class AS,OIDC,JWKS,V auth + class CA,RA agent +``` + +## Security Best Practices + +1. **Always use TLS**: All A2A communication must happen over HTTPS. +2. **Rotate credentials**: Use short-lived tokens (< 1 hour) with automatic refresh. +3. **Principle of least privilege**: Request only the scopes your agent needs. +4. **Validate Agent Cards over TLS**: Only trust cards fetched over verified HTTPS connections. +5. **Log authentication events**: Track which agents are communicating for audit trails. +6. **Rate limit by identity**: Prevent any single agent from overwhelming your service. +7. **Verify webhook origins**: Always validate push notification signatures. + +```python +# Example: Rate limiting by client identity +from collections import defaultdict +from time import time + +class RateLimiter: + def __init__(self, max_requests: int = 100, window_seconds: int = 60): + self.max_requests = max_requests + self.window = window_seconds + self._requests: dict[str, list[float]] = defaultdict(list) + + def check(self, client_id: str) -> bool: + """Return True if the request is allowed.""" + now = time() + window_start = now - self.window + self._requests[client_id] = [ + t for t in self._requests[client_id] if t > window_start + ] + if len(self._requests[client_id]) >= self.max_requests: + return False + self._requests[client_id].append(now) + return True +``` + +--- + +**Next: [Chapter 6: Python SDK](06-python-sdk.md)** — Building complete A2A agents using the official Python SDK. + +[Previous: Chapter 4](04-task-management.md) | [Back to Tutorial Overview](README.md) diff --git a/tutorials/a2a-protocol-tutorial/06-python-sdk.md b/tutorials/a2a-protocol-tutorial/06-python-sdk.md new file mode 100644 index 0000000..de0df95 --- /dev/null +++ b/tutorials/a2a-protocol-tutorial/06-python-sdk.md @@ -0,0 +1,471 @@ +--- +layout: default +title: "Chapter 6: Python SDK" +parent: "A2A Protocol Tutorial" +nav_order: 6 +--- + +# Chapter 6: Python SDK + +The A2A Python SDK provides high-level abstractions for building both A2A servers (agents that receive tasks) and A2A clients (agents that send tasks). This chapter shows how to build production-quality A2A agents from scratch. + +## What Problem Does This Solve? + +While you can implement A2A using raw HTTP and JSON-RPC (as shown in earlier chapters), the SDK handles the boilerplate: request parsing, response formatting, SSE streaming, Agent Card serving, error handling, and lifecycle management. This lets you focus on what your agent actually does rather than protocol plumbing. + +## Installation + +```bash +pip install a2a-sdk + +# With optional dependencies for common patterns +pip install a2a-sdk[google] # For Google AI integration +pip install a2a-sdk[openai] # For OpenAI integration +``` + +## Building an A2A Server + +### The Basic Structure + +An A2A server has three main components: +1. An **Agent Card** that describes capabilities +2. A **Task Handler** that processes incoming tasks +3. An **A2A Server** that ties them together + +```python +from a2a.server import A2AServer, TaskHandler, TaskContext +from a2a.types import AgentCard, Skill, AgentCapabilities + +# Step 1: Define your Agent Card +card = AgentCard( + name="Summarization Agent", + description="Summarizes long texts, documents, and web pages", + url="http://localhost:8000/a2a", + version="1.0.0", + capabilities=AgentCapabilities( + streaming=True, + push_notifications=False, + state_transition_history=True, + ), + skills=[ + Skill( + id="text-summary", + name="Text Summarization", + description="Condense long texts into concise summaries", + tags=["summarize", "text", "condense"], + examples=[ + "Summarize this article in 3 bullet points", + "Give me a one-paragraph summary of this document", + ], + ), + Skill( + id="key-points", + name="Key Point Extraction", + description="Extract the most important points from a text", + tags=["extract", "key-points", "highlights"], + ), + ], + default_input_modes=["text", "file"], + default_output_modes=["text"], +) +``` + +### Implementing the Task Handler + +The task handler is where your agent's logic lives: + +```python +from a2a.types import ( + Message, TextPart, Artifact, TaskStatus, TaskState +) + +class SummarizationHandler(TaskHandler): + """Handle summarization tasks.""" + + async def handle_task(self, context: TaskContext) -> None: + """Process an incoming task.""" + # Extract the user's message + user_message = context.message + text_parts = [ + part.text for part in user_message.parts + if isinstance(part, TextPart) + ] + input_text = "\n".join(text_parts) + + if not input_text: + await context.fail("No text provided to summarize") + return + + # Update status to show we are working + await context.update_status( + TaskState.WORKING, + message="Analyzing text...", + ) + + # Perform summarization (simplified — use your LLM here) + summary = await self._summarize(input_text) + + # Return the result as an artifact + await context.add_artifact( + Artifact( + name="Summary", + description="Summarized version of the input text", + parts=[TextPart(text=summary)], + ) + ) + + # Mark task as completed + await context.complete("Summarization complete") + + async def _summarize(self, text: str) -> str: + """Call an LLM to summarize text.""" + # In production, call OpenAI/Anthropic/Google here + word_count = len(text.split()) + return ( + f"Summary of {word_count}-word text:\n\n" + f"This text discusses the following key topics:\n" + f"- {text[:100]}...\n" + f"(This is a placeholder — integrate your LLM here)" + ) +``` + +### Starting the Server + +```python +import uvicorn + +# Step 3: Create and run the server +server = A2AServer( + agent_card=card, + task_handler=SummarizationHandler(), + host="0.0.0.0", + port=8000, +) + +if __name__ == "__main__": + uvicorn.run(server.app, host="0.0.0.0", port=8000) +``` + +## Building an A2A Client + +### Basic Client Usage + +```python +from a2a.client import A2AClient + +async def main(): + # Create a client pointing to a remote agent + client = A2AClient(url="http://localhost:8000") + + # Discover the agent's capabilities + card = await client.get_agent_card() + print(f"Connected to: {card.name}") + print(f"Skills: {[s.name for s in card.skills]}") + + # Send a task + result = await client.send_task( + message="Summarize the following: The A2A protocol is an open standard..." + ) + + print(f"Status: {result.status.state}") + for artifact in result.artifacts: + for part in artifact.parts: + if isinstance(part, TextPart): + print(f"Result: {part.text}") + +# import asyncio +# asyncio.run(main()) +``` + +### Streaming Client + +```python +async def streaming_example(): + client = A2AClient(url="http://localhost:8000") + + async for event in client.send_task_streaming( + message="Summarize this long document about quantum computing..." + ): + if event.type == "status": + print(f"[{event.status.state}] {event.status.message}") + elif event.type == "artifact": + print(f"Artifact: {event.artifact.name}") + for part in event.artifact.parts: + if isinstance(part, TextPart): + print(part.text) +``` + +## Streaming Server Implementation + +For agents that produce output incrementally: + +```python +class StreamingSummarizationHandler(TaskHandler): + """Summarization with streaming status updates.""" + + async def handle_task(self, context: TaskContext) -> None: + text_parts = [ + part.text for part in context.message.parts + if isinstance(part, TextPart) + ] + input_text = "\n".join(text_parts) + + # Stream progress updates + sections = self._split_into_sections(input_text) + summaries = [] + + for i, section in enumerate(sections): + await context.update_status( + TaskState.WORKING, + message=f"Summarizing section {i+1} of {len(sections)}...", + ) + + section_summary = await self._summarize_section(section) + summaries.append(section_summary) + + # Send intermediate artifact + await context.add_artifact( + Artifact( + name=f"Section {i+1} Summary", + parts=[TextPart(text=section_summary)], + metadata={"section": i + 1, "partial": True}, + ) + ) + + # Send final combined artifact + final_summary = "\n\n".join(summaries) + await context.add_artifact( + Artifact( + name="Complete Summary", + parts=[TextPart(text=final_summary)], + metadata={"partial": False}, + ) + ) + + await context.complete("All sections summarized") + + def _split_into_sections(self, text: str) -> list[str]: + """Split text into manageable sections.""" + words = text.split() + chunk_size = 500 + return [ + " ".join(words[i:i+chunk_size]) + for i in range(0, len(words), chunk_size) + ] + + async def _summarize_section(self, section: str) -> str: + """Summarize a single section.""" + return f"Summary: {section[:200]}..." +``` + +## Multi-Turn Conversation Handler + +Agents that need to ask clarifying questions: + +```python +class ResearchHandler(TaskHandler): + """Research agent that may ask for clarification.""" + + async def handle_task(self, context: TaskContext) -> None: + text_parts = [ + part.text for part in context.message.parts + if isinstance(part, TextPart) + ] + query = "\n".join(text_parts) + + # Check if the query is too vague + if len(query.split()) < 5: + await context.request_input( + message=( + "Your research query seems brief. Could you provide more " + "detail? For example:\n" + "- What specific aspect are you interested in?\n" + "- What time period should I focus on?\n" + "- Are there particular sources you prefer?" + ) + ) + return # Handler will be called again with the follow-up + + # Proceed with research + await context.update_status( + TaskState.WORKING, + message="Researching your topic...", + ) + + findings = await self._research(query) + + await context.add_artifact( + Artifact( + name="Research Findings", + parts=[TextPart(text=findings)], + ) + ) + await context.complete("Research complete") + + async def _research(self, query: str) -> str: + return f"Research findings for: {query}\n\n(Integrate your research pipeline here)" +``` + +## Putting It All Together: Full Agent Example + +```python +"""Complete A2A agent with authentication, streaming, and multi-turn support.""" +import uvicorn +from a2a.server import A2AServer, TaskHandler, TaskContext +from a2a.types import ( + AgentCard, Skill, AgentCapabilities, AgentAuthentication, + Artifact, TextPart, DataPart, TaskState, +) + +# Agent Card +card = AgentCard( + name="Data Analysis Agent", + description="Analyze datasets, generate insights, and create visualizations", + url="http://localhost:9000/a2a", + version="2.0.0", + capabilities=AgentCapabilities( + streaming=True, + push_notifications=False, + state_transition_history=True, + ), + skills=[ + Skill( + id="analyze-data", + name="Data Analysis", + description="Statistical analysis of structured datasets", + tags=["analysis", "statistics", "data"], + input_modes=["text", "data", "file"], + output_modes=["text", "data"], + ), + ], + authentication=AgentAuthentication(schemes=["bearer"]), +) + +class DataAnalysisHandler(TaskHandler): + async def handle_task(self, context: TaskContext) -> None: + # Extract input + data_parts = [] + text_parts = [] + for part in context.message.parts: + if isinstance(part, TextPart): + text_parts.append(part.text) + elif isinstance(part, DataPart): + data_parts.append(part.data) + + query = "\n".join(text_parts) + + if not data_parts and "data" not in query.lower(): + await context.request_input( + message="Please provide a dataset (as structured data or file) along with your analysis question." + ) + return + + await context.update_status(TaskState.WORKING, message="Loading data...") + await context.update_status(TaskState.WORKING, message="Running analysis...") + + # Produce results + await context.add_artifact( + Artifact( + name="Analysis Results", + parts=[ + TextPart(text=f"Analysis of your data for query: {query}"), + DataPart(data={ + "rows_analyzed": 1000, + "columns": 5, + "insights": ["Trend detected", "Anomaly in column 3"], + }), + ], + ) + ) + + await context.complete("Analysis complete") + +server = A2AServer( + agent_card=card, + task_handler=DataAnalysisHandler(), +) + +if __name__ == "__main__": + uvicorn.run(server.app, host="0.0.0.0", port=9000) +``` + +## How It Works Under the Hood + +```mermaid +flowchart TD + REQ[Incoming HTTP Request] --> MW[Middleware: Auth, Rate Limit] + MW --> PARSE[Parse JSON-RPC] + PARSE --> ROUTE{Method?} + + ROUTE -->|tasks/send| SYNC[Sync Handler] + ROUTE -->|tasks/sendSubscribe| STREAM[Streaming Handler] + ROUTE -->|tasks/get| GET[Get Task State] + ROUTE -->|tasks/cancel| CANCEL[Cancel Task] + + SYNC --> TH[TaskHandler.handle_task] + STREAM --> TH + TH --> CTX[TaskContext] + CTX -->|update_status| STATUS[Status Update] + CTX -->|add_artifact| ART[Artifact] + CTX -->|complete/fail| DONE[Terminal State] + + STATUS -->|SSE event| CLIENT[Client] + ART -->|SSE event| CLIENT + DONE -->|Final response| CLIENT + + classDef infra fill:#f3e5f5,stroke:#4a148c + classDef handler fill:#e1f5fe,stroke:#01579b + classDef output fill:#e8f5e8,stroke:#1b5e20 + + class REQ,MW,PARSE,ROUTE infra + class SYNC,STREAM,TH,CTX,GET,CANCEL handler + class STATUS,ART,DONE,CLIENT output +``` + +## Testing Your Agent + +```python +import pytest +import httpx +from a2a.testing import MockA2AServer + +@pytest.mark.asyncio +async def test_summarization_agent(): + """Test the summarization agent end-to-end.""" + handler = SummarizationHandler() + async with MockA2AServer(handler=handler) as server: + async with httpx.AsyncClient() as client: + # Test Agent Card + response = await client.get( + f"{server.url}/.well-known/agent.json" + ) + assert response.status_code == 200 + card = response.json() + assert card["name"] == "Summarization Agent" + + # Test task + response = await client.post( + f"{server.url}/a2a", + json={ + "jsonrpc": "2.0", + "id": "test-1", + "method": "tasks/send", + "params": { + "id": "task-test", + "message": { + "role": "user", + "parts": [{"type": "text", "text": "Summarize: A2A is great."}], + }, + }, + }, + ) + result = response.json()["result"] + assert result["status"]["state"] == "completed" + assert len(result["artifacts"]) > 0 +``` + +--- + +**Next: [Chapter 7: Multi-Agent Scenarios](07-multi-agent-scenarios.md)** — Agent delegation, composition, and real-world multi-agent patterns. + +[Previous: Chapter 5](05-authentication-and-security.md) | [Back to Tutorial Overview](README.md) diff --git a/tutorials/a2a-protocol-tutorial/07-multi-agent-scenarios.md b/tutorials/a2a-protocol-tutorial/07-multi-agent-scenarios.md new file mode 100644 index 0000000..a245e51 --- /dev/null +++ b/tutorials/a2a-protocol-tutorial/07-multi-agent-scenarios.md @@ -0,0 +1,454 @@ +--- +layout: default +title: "Chapter 7: Multi-Agent Scenarios" +parent: "A2A Protocol Tutorial" +nav_order: 7 +--- + +# Chapter 7: Multi-Agent Scenarios + +Real-world AI systems rarely involve a single agent. This chapter explores how multiple A2A agents collaborate: delegation chains, parallel fan-out, hierarchical orchestration, and practical patterns for building resilient multi-agent systems. + +## What Problem Does This Solve? + +A single agent can answer questions or call tools, but complex workflows — "research a topic, write a report, review it for accuracy, then translate it into three languages" — require multiple specialized agents working together. A2A provides the interoperability layer so these agents can be built by different teams, run on different platforms, and still collaborate seamlessly. + +## Pattern 1: Simple Delegation + +An orchestrator agent delegates a task to a single specialist: + +```python +from a2a.client import A2AClient +from a2a.server import TaskHandler, TaskContext +from a2a.types import Artifact, TextPart, TaskState + +class OrchestratorHandler(TaskHandler): + """Orchestrator that delegates to a specialist agent.""" + + def __init__(self, research_agent_url: str): + self.research_client = A2AClient(url=research_agent_url) + + async def handle_task(self, context: TaskContext) -> None: + user_text = self._extract_text(context.message) + + await context.update_status( + TaskState.WORKING, + message="Delegating research to specialist agent...", + ) + + # Delegate to the research agent + result = await self.research_client.send_task( + message=f"Research the following topic thoroughly: {user_text}" + ) + + if result.status.state == "completed": + # Pass through the research agent's artifacts + for artifact in result.artifacts: + await context.add_artifact(artifact) + await context.complete("Research delegation complete") + else: + await context.fail( + f"Research agent failed: {result.status.message}" + ) + + def _extract_text(self, message) -> str: + return "\n".join( + part.text for part in message.parts + if isinstance(part, TextPart) + ) +``` + +```mermaid +sequenceDiagram + participant U as User + participant O as Orchestrator Agent + participant R as Research Agent + + U->>O: "Research quantum computing advances" + O->>R: tasks/send "Research quantum computing..." + R-->>O: Artifacts: [research report] + O-->>U: Artifacts: [research report] +``` + +## Pattern 2: Sequential Pipeline + +Chain multiple agents in sequence, where each agent's output feeds into the next: + +```python +class PipelineOrchestrator(TaskHandler): + """Chain agents: Research → Write → Review.""" + + def __init__( + self, + research_url: str, + writer_url: str, + reviewer_url: str, + ): + self.research = A2AClient(url=research_url) + self.writer = A2AClient(url=writer_url) + self.reviewer = A2AClient(url=reviewer_url) + + async def handle_task(self, context: TaskContext) -> None: + topic = self._extract_text(context.message) + + # Stage 1: Research + await context.update_status( + TaskState.WORKING, message="Stage 1/3: Researching..." + ) + research_result = await self.research.send_task( + message=f"Research: {topic}" + ) + research_text = self._artifacts_to_text(research_result.artifacts) + + # Stage 2: Writing + await context.update_status( + TaskState.WORKING, message="Stage 2/3: Writing article..." + ) + writer_result = await self.writer.send_task( + message=f"Write an article based on this research:\n\n{research_text}" + ) + article_text = self._artifacts_to_text(writer_result.artifacts) + + # Stage 3: Review + await context.update_status( + TaskState.WORKING, message="Stage 3/3: Reviewing..." + ) + review_result = await self.reviewer.send_task( + message=f"Review this article for accuracy:\n\n{article_text}" + ) + + # Combine all artifacts + await context.add_artifact( + Artifact( + name="Final Article", + parts=[TextPart(text=article_text)], + metadata={"stage": "writing"}, + ) + ) + for artifact in review_result.artifacts: + artifact.metadata = artifact.metadata or {} + artifact.metadata["stage"] = "review" + await context.add_artifact(artifact) + + await context.complete("Pipeline complete: research → write → review") + + def _extract_text(self, message) -> str: + return "\n".join( + part.text for part in message.parts if isinstance(part, TextPart) + ) + + def _artifacts_to_text(self, artifacts) -> str: + texts = [] + for a in artifacts: + for p in a.parts: + if isinstance(p, TextPart): + texts.append(p.text) + return "\n\n".join(texts) +``` + +```mermaid +flowchart LR + U[User] --> O[Orchestrator] + O -->|1| R[Research Agent] + R -->|findings| O + O -->|2| W[Writer Agent] + W -->|draft| O + O -->|3| REV[Reviewer Agent] + REV -->|review| O + O -->|final article + review| U + + classDef orch fill:#fff3e0,stroke:#ef6c00 + classDef agent fill:#e1f5fe,stroke:#01579b + + class O orch + class R,W,REV agent +``` + +## Pattern 3: Parallel Fan-Out + +Send tasks to multiple agents simultaneously and merge results: + +```python +import asyncio + +class ParallelResearchOrchestrator(TaskHandler): + """Fan-out research to multiple specialized agents in parallel.""" + + def __init__(self, agent_urls: dict[str, str]): + self.agents = { + name: A2AClient(url=url) + for name, url in agent_urls.items() + } + + async def handle_task(self, context: TaskContext) -> None: + topic = self._extract_text(context.message) + + await context.update_status( + TaskState.WORKING, + message=f"Dispatching to {len(self.agents)} specialist agents...", + ) + + # Fan-out: send to all agents in parallel + tasks = { + name: client.send_task( + message=f"From your {name} perspective, analyze: {topic}" + ) + for name, client in self.agents.items() + } + + results = {} + for name, coro in tasks.items(): + try: + results[name] = await coro + await context.update_status( + TaskState.WORKING, + message=f"Received results from {name} agent", + ) + except Exception as e: + results[name] = None + await context.update_status( + TaskState.WORKING, + message=f"Warning: {name} agent failed: {e}", + ) + + # Merge results into artifacts + for name, result in results.items(): + if result and result.status.state == "completed": + for artifact in result.artifacts: + artifact.name = f"{name}: {artifact.name}" + await context.add_artifact(artifact) + + successful = sum(1 for r in results.values() if r) + await context.complete( + f"Parallel analysis complete: {successful}/{len(self.agents)} agents responded" + ) + +# Usage +# orchestrator = ParallelResearchOrchestrator({ +# "technical": "https://tech-agent.example.com", +# "market": "https://market-agent.example.com", +# "legal": "https://legal-agent.example.com", +# }) +``` + +## Pattern 4: Dynamic Agent Selection + +Choose which agent to delegate to based on the task content: + +```python +class DynamicRouter(TaskHandler): + """Route tasks to the best-matching agent based on skill tags.""" + + def __init__(self, agent_hosts: list[str]): + self.agent_hosts = agent_hosts + self._cards: list[dict] = [] + + async def initialize(self): + """Discover all agent capabilities at startup.""" + for host in self.agent_hosts: + client = A2AClient(url=f"https://{host}") + card = await client.get_agent_card() + self._cards.append({ + "host": host, + "card": card, + "client": client, + }) + + async def handle_task(self, context: TaskContext) -> None: + user_text = self._extract_text(context.message) + + # Simple keyword matching against skill tags + best_agent = self._find_best_agent(user_text) + + if not best_agent: + await context.fail( + "No suitable agent found for this task. " + f"Available capabilities: {self._list_capabilities()}" + ) + return + + await context.update_status( + TaskState.WORKING, + message=f"Routing to {best_agent['card'].name}...", + ) + + result = await best_agent["client"].send_task(message=user_text) + + for artifact in result.artifacts: + await context.add_artifact(artifact) + + await context.complete( + f"Handled by {best_agent['card'].name}" + ) + + def _find_best_agent(self, query: str) -> dict | None: + query_words = set(query.lower().split()) + best = None + best_score = 0 + + for entry in self._cards: + for skill in entry["card"].skills: + tags = set(t.lower() for t in skill.tags) + score = len(query_words & tags) + if score > best_score: + best_score = score + best = entry + + return best + + def _list_capabilities(self) -> str: + all_skills = [] + for entry in self._cards: + for skill in entry["card"].skills: + all_skills.append(f"{entry['card'].name}: {skill.name}") + return ", ".join(all_skills) +``` + +## Pattern 5: Hierarchical Teams + +Build a tree of orchestrators for complex workflows: + +```mermaid +flowchart TD + U[User] --> CEO[Executive Orchestrator] + CEO --> RM[Research Manager] + CEO --> WM[Writing Manager] + + RM --> RA1[Web Research Agent] + RM --> RA2[Academic Research Agent] + RM --> RA3[Data Analysis Agent] + + WM --> WA1[Draft Writer Agent] + WM --> WA2[Editor Agent] + WM --> WA3[Fact-Check Agent] + + classDef exec fill:#fff3e0,stroke:#ef6c00 + classDef manager fill:#f3e5f5,stroke:#4a148c + classDef worker fill:#e1f5fe,stroke:#01579b + + class CEO exec + class RM,WM manager + class RA1,RA2,RA3,WA1,WA2,WA3 worker +``` + +```python +class HierarchicalOrchestrator(TaskHandler): + """Top-level orchestrator that delegates to sub-orchestrators.""" + + def __init__(self, research_manager_url: str, writing_manager_url: str): + self.research_mgr = A2AClient(url=research_manager_url) + self.writing_mgr = A2AClient(url=writing_manager_url) + + async def handle_task(self, context: TaskContext) -> None: + topic = self._extract_text(context.message) + + # Phase 1: Research (delegated to research manager, which + # internally fans out to multiple research agents) + await context.update_status( + TaskState.WORKING, message="Phase 1: Commissioning research..." + ) + research = await self.research_mgr.send_task( + message=f"Conduct comprehensive research on: {topic}" + ) + research_text = self._artifacts_to_text(research.artifacts) + + # Phase 2: Writing (delegated to writing manager, which + # internally handles drafting, editing, and fact-checking) + await context.update_status( + TaskState.WORKING, message="Phase 2: Producing written output..." + ) + writing = await self.writing_mgr.send_task( + message=f"Create a polished report based on:\n\n{research_text}" + ) + + for artifact in writing.artifacts: + await context.add_artifact(artifact) + + await context.complete("Hierarchical workflow complete") +``` + +## Error Handling and Resilience + +Multi-agent systems need robust error handling: + +```python +class ResilientDelegator: + """Delegate tasks with retry, fallback, and timeout logic.""" + + def __init__(self, primary_url: str, fallback_url: str | None = None): + self.primary = A2AClient(url=primary_url) + self.fallback = A2AClient(url=fallback_url) if fallback_url else None + + async def delegate( + self, + message: str, + max_retries: int = 2, + timeout: float = 120.0, + ) -> dict | None: + """Try primary agent, retry on failure, fall back if available.""" + for attempt in range(max_retries + 1): + try: + result = await asyncio.wait_for( + self.primary.send_task(message=message), + timeout=timeout, + ) + if result.status.state == "completed": + return result + except (asyncio.TimeoutError, Exception) as e: + if attempt < max_retries: + await asyncio.sleep(2 ** attempt) # exponential backoff + continue + + # Try fallback + if self.fallback: + try: + return await asyncio.wait_for( + self.fallback.send_task(message=message), + timeout=timeout, + ) + except Exception: + pass + + return None +``` + +## How It Works Under the Hood + +```mermaid +sequenceDiagram + participant U as User + participant O as Orchestrator + participant R as Research Agent + participant W as Writer Agent + participant REV as Reviewer Agent + + U->>O: "Create report on AI safety" + O->>R: tasks/sendSubscribe "Research AI safety" + R-->>O: SSE: working "Searching papers..." + R-->>O: SSE: working "Found 42 papers" + R-->>O: SSE: completed + artifacts + + O->>W: tasks/sendSubscribe "Write report from findings" + W-->>O: SSE: working "Drafting..." + W-->>O: SSE: completed + draft artifact + + O->>REV: tasks/send "Review this draft" + REV-->>O: completed + review artifact + + O-->>U: Final report + review notes +``` + +## Practical Tips for Multi-Agent Systems + +1. **Keep orchestrators thin**: The orchestrator should route and merge, not contain business logic. +2. **Use sessions for context**: Pass `sessionId` through the delegation chain so agents can maintain conversation context. +3. **Set timeouts**: Always set timeouts on delegated tasks to prevent cascading hangs. +4. **Log correlation IDs**: Propagate a trace ID through all delegated tasks for debugging. +5. **Design for partial failure**: If one of three parallel agents fails, return the results from the other two rather than failing entirely. +6. **Version your agents**: Use Agent Card versioning so orchestrators can adapt to capability changes. + +--- + +**Next: [Chapter 8: MCP + A2A](08-mcp-plus-a2a.md)** — Combining MCP (tools) and A2A (agents) for the full ecosystem architecture. + +[Previous: Chapter 6](06-python-sdk.md) | [Back to Tutorial Overview](README.md) diff --git a/tutorials/a2a-protocol-tutorial/08-mcp-plus-a2a.md b/tutorials/a2a-protocol-tutorial/08-mcp-plus-a2a.md new file mode 100644 index 0000000..06a7086 --- /dev/null +++ b/tutorials/a2a-protocol-tutorial/08-mcp-plus-a2a.md @@ -0,0 +1,474 @@ +--- +layout: default +title: "Chapter 8: MCP + A2A" +parent: "A2A Protocol Tutorial" +nav_order: 8 +--- + +# Chapter 8: MCP + A2A — The Full Agent Ecosystem + +MCP and A2A are two halves of the same coin. MCP connects agents to tools and data; A2A connects agents to each other. This chapter shows how to combine them into a unified architecture where agents use MCP for capabilities and A2A for collaboration. + +## What Problem Does This Solve? + +Consider a real scenario: a user asks an AI assistant to "analyze our Q4 sales data and compare it with competitor public filings." This requires: + +- **MCP**: Access the company's database, read spreadsheets, call analytics APIs +- **A2A**: Delegate competitor research to a specialized research agent that has its own MCP tools for web scraping and document analysis + +Neither protocol alone covers the full workflow. Together, they create a composable agent ecosystem where any agent can use any tool and collaborate with any other agent. + +## The Unified Architecture + +```mermaid +flowchart TD + U[User] --> HA[Host Agent] + + subgraph MCP_Tools ["MCP: Tools & Data"] + HA -->|MCP| DB[(Database Server)] + HA -->|MCP| FS[File System Server] + HA -->|MCP| CALC[Analytics API Server] + end + + subgraph A2A_Agents ["A2A: Agent Collaboration"] + HA -->|A2A| RA[Research Agent] + HA -->|A2A| VA[Visualization Agent] + end + + subgraph RA_Tools ["Research Agent's MCP Tools"] + RA -->|MCP| WEB[Web Scraper Server] + RA -->|MCP| DOC[Document Parser Server] + end + + subgraph VA_Tools ["Viz Agent's MCP Tools"] + VA -->|MCP| CHART[Chart Generator Server] + VA -->|MCP| EXPORT[Export Server] + end + + classDef user fill:#e8f5e8,stroke:#1b5e20 + classDef agent fill:#fff3e0,stroke:#ef6c00 + classDef mcp fill:#e1f5fe,stroke:#01579b + + class U user + class HA,RA,VA agent + class DB,FS,CALC,WEB,DOC,CHART,EXPORT mcp +``` + +## Building an Agent That Uses Both Protocols + +Here is a complete agent that uses MCP for tool access and A2A for delegation: + +```python +"""Agent that combines MCP tools with A2A delegation.""" +from a2a.server import A2AServer, TaskHandler, TaskContext +from a2a.client import A2AClient +from a2a.types import ( + AgentCard, Skill, AgentCapabilities, + Artifact, TextPart, DataPart, TaskState, +) +from mcp import ClientSession +from mcp.client.stdio import stdio_client, StdioServerParameters + +class HybridAnalysisHandler(TaskHandler): + """ + An agent that: + 1. Uses MCP to query a database (tool access) + 2. Delegates competitor research to another agent via A2A + 3. Combines results into a unified report + """ + + def __init__( + self, + db_server_command: list[str], + research_agent_url: str, + ): + self.db_server_command = db_server_command + self.research_client = A2AClient(url=research_agent_url) + + async def handle_task(self, context: TaskContext) -> None: + query = self._extract_text(context.message) + + # Step 1: Use MCP to access internal data + await context.update_status( + TaskState.WORKING, + message="Querying internal database via MCP...", + ) + internal_data = await self._query_database(query) + + # Step 2: Use A2A to delegate competitor research + await context.update_status( + TaskState.WORKING, + message="Delegating competitor research to specialist agent...", + ) + research_result = await self.research_client.send_task( + message=f"Research competitor data relevant to: {query}" + ) + competitor_data = self._artifacts_to_text(research_result.artifacts) + + # Step 3: Combine and analyze + await context.update_status( + TaskState.WORKING, + message="Synthesizing internal and competitor data...", + ) + report = self._synthesize(internal_data, competitor_data, query) + + await context.add_artifact( + Artifact( + name="Combined Analysis Report", + parts=[TextPart(text=report)], + metadata={ + "sources": ["internal_db", "competitor_research"], + }, + ) + ) + await context.complete("Analysis complete") + + async def _query_database(self, query: str) -> str: + """Use MCP to call a database tool.""" + server_params = StdioServerParameters( + command=self.db_server_command[0], + args=self.db_server_command[1:], + ) + + async with stdio_client(server_params) as (read, write): + async with ClientSession(read, write) as session: + await session.initialize() + + # List available tools + tools = await session.list_tools() + print(f"Available MCP tools: {[t.name for t in tools.tools]}") + + # Call the query tool + result = await session.call_tool( + "query", + arguments={"sql": f"SELECT * FROM sales WHERE quarter = 'Q4'"}, + ) + return result.content[0].text + + def _synthesize( + self, internal: str, competitor: str, query: str + ) -> str: + return ( + f"# Combined Analysis: {query}\n\n" + f"## Internal Data\n{internal}\n\n" + f"## Competitor Research\n{competitor}\n\n" + f"## Synthesis\n(LLM-generated analysis combining both sources)" + ) + + def _extract_text(self, message) -> str: + return "\n".join( + p.text for p in message.parts if isinstance(p, TextPart) + ) + + def _artifacts_to_text(self, artifacts) -> str: + texts = [] + for a in artifacts: + for p in a.parts: + if isinstance(p, TextPart): + texts.append(p.text) + return "\n\n".join(texts) +``` + +## Exposing MCP Tools Through A2A + +A powerful pattern: wrap MCP tool servers as A2A agents so that remote agents can use tools they do not have direct access to: + +```python +class MCPToolBridgeHandler(TaskHandler): + """ + Bridge: Expose MCP tools as an A2A agent. + + Remote agents send A2A tasks like "query the database for X" + and this agent translates them into MCP tool calls. + """ + + def __init__(self, mcp_server_command: list[str]): + self.mcp_server_command = mcp_server_command + + async def handle_task(self, context: TaskContext) -> None: + user_text = self._extract_text(context.message) + + await context.update_status( + TaskState.WORKING, message="Connecting to MCP tool server..." + ) + + server_params = StdioServerParameters( + command=self.mcp_server_command[0], + args=self.mcp_server_command[1:], + ) + + async with stdio_client(server_params) as (read, write): + async with ClientSession(read, write) as session: + await session.initialize() + tools = await session.list_tools() + + # Use an LLM to determine which tool to call + tool_name, arguments = await self._plan_tool_call( + user_text, tools.tools + ) + + await context.update_status( + TaskState.WORKING, + message=f"Calling tool: {tool_name}", + ) + + result = await session.call_tool(tool_name, arguments) + + await context.add_artifact( + Artifact( + name=f"Tool Result: {tool_name}", + parts=[TextPart(text=result.content[0].text)], + metadata={"tool": tool_name, "arguments": arguments}, + ) + ) + + await context.complete(f"Tool call complete: {tool_name}") + + async def _plan_tool_call(self, query, tools): + """Use an LLM to select the right tool and arguments.""" + # Simplified: in production, send tool descriptions to an LLM + # and parse the response + tool_name = tools[0].name if tools else "unknown" + return tool_name, {"input": query} +``` + +### Agent Card for the Bridge + +```json +{ + "name": "Database Access Agent", + "description": "Provides secure database query access via A2A", + "url": "https://db-bridge.internal.example.com/a2a", + "version": "1.0.0", + "capabilities": { "streaming": true, "pushNotifications": false }, + "skills": [ + { + "id": "sql-query", + "name": "SQL Query", + "description": "Execute read-only SQL queries against the company database", + "tags": ["database", "sql", "query", "data"], + "examples": [ + "Get all Q4 sales by region", + "Count active users in the last 30 days" + ] + } + ], + "authentication": { "schemes": ["oauth2"] } +} +``` + +## TypeScript: Combined MCP + A2A Client + +```typescript +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; + +interface A2ATaskResult { + status: { state: string }; + artifacts: Array<{ name: string; parts: Array<{ type: string; text?: string }> }>; +} + +class HybridAgent { + private mcpClient: Client; + private a2aAgentUrl: string; + + constructor(mcpServerCommand: string[], a2aAgentUrl: string) { + this.mcpClient = new Client({ name: "hybrid-agent", version: "1.0.0" }); + this.a2aAgentUrl = a2aAgentUrl; + } + + async initialize() { + const transport = new StdioClientTransport({ + command: "node", + args: ["mcp-server.js"], + }); + await this.mcpClient.connect(transport); + } + + async queryLocalTool(toolName: string, args: Record) { + // Use MCP for local tool access + const result = await this.mcpClient.callTool({ + name: toolName, + arguments: args, + }); + return result; + } + + async delegateToAgent(message: string): Promise { + // Use A2A for agent delegation + const response = await fetch(this.a2aAgentUrl, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + jsonrpc: "2.0", + id: crypto.randomUUID(), + method: "tasks/send", + params: { + id: crypto.randomUUID(), + message: { + role: "user", + parts: [{ type: "text", text: message }], + }, + }, + }), + }); + + const data = await response.json(); + return data.result; + } + + async analyzeWithBothProtocols(query: string) { + // Step 1: MCP — get internal data + const localData = await this.queryLocalTool("query", { + sql: "SELECT * FROM metrics WHERE quarter = 'Q4'", + }); + + // Step 2: A2A — delegate external research + const agentResult = await this.delegateToAgent( + `Research competitors for: ${query}` + ); + + return { localData, agentResult }; + } +} +``` + +## Architecture Decision Framework + +When should you use MCP vs A2A? + +```mermaid +flowchart TD + Q[Need to access external capability?] + Q -->|Tool, API, or data source| MCP[Use MCP] + Q -->|Another AI agent| A2A[Use A2A] + + MCP --> M1[Direct tool call] + MCP --> M2[Resource read] + MCP --> M3[Prompt template] + + A2A --> A1[Delegate task] + A2A --> A2[Get specialized analysis] + A2A --> A3[Fan-out to multiple agents] + + BOTH[Both?] --> BRIDGE[MCP Tool Bridge via A2A] + BOTH --> HYBRID[Hybrid Agent: MCP + A2A] + + classDef mcp fill:#e1f5fe,stroke:#01579b + classDef a2a fill:#fff3e0,stroke:#ef6c00 + classDef both fill:#f3e5f5,stroke:#4a148c + + class MCP,M1,M2,M3 mcp + class A2A,A1,A2,A3 a2a + class BOTH,BRIDGE,HYBRID both +``` + +| Scenario | Protocol | Reason | +|:---------|:---------|:-------| +| Query a database | MCP | Direct tool access | +| Read a file from disk | MCP | Local resource access | +| Ask a research agent to find papers | A2A | Agent delegation | +| Get a code review from a specialist | A2A | Specialized agent skill | +| Let a remote agent use your database | A2A wrapping MCP | Controlled access bridge | +| Agent uses tools AND delegates sub-tasks | Both | Hybrid architecture | + +## Production Architecture Example + +A complete production system combining both protocols: + +```python +"""Production-grade hybrid agent configuration.""" +from dataclasses import dataclass + +@dataclass +class AgentConfig: + """Configuration for a hybrid MCP + A2A agent.""" + # Identity + name: str + description: str + port: int + + # MCP tool servers this agent can use + mcp_servers: dict[str, list[str]] # name -> command + + # A2A agents this agent can delegate to + a2a_agents: dict[str, str] # name -> URL + + # Authentication + oauth_client_id: str + oauth_client_secret: str + oauth_token_endpoint: str + +# Example configuration +config = AgentConfig( + name="Enterprise Assistant", + description="Full-service enterprise AI assistant", + port=8000, + mcp_servers={ + "database": ["python", "-m", "mcp_postgres_server"], + "filesystem": ["python", "-m", "mcp_filesystem_server", "/data"], + "slack": ["node", "mcp-slack-server/index.js"], + }, + a2a_agents={ + "research": "https://research.internal.example.com", + "code-review": "https://code-review.internal.example.com", + "translation": "https://translate.partner.example.com", + }, + oauth_client_id="enterprise-assistant", + oauth_client_secret="...", + oauth_token_endpoint="https://auth.example.com/token", +) +``` + +## The Emerging Ecosystem + +The MCP + A2A combination creates a layered ecosystem: + +``` +┌─────────────────────────────────────────────┐ +│ User / Application │ +├─────────────────────────────────────────────┤ +│ Host Agent (Orchestrator) │ +├──────────────────┬──────────────────────────┤ +│ MCP Layer │ A2A Layer │ +│ (Tools) │ (Agents) │ +│ │ │ +│ ┌──────────┐ │ ┌───────────────┐ │ +│ │ Database │ │ │ Research Agent │ │ +│ │ Server │ │ │ ├─ MCP tools │ │ +│ ├──────────┤ │ ├───────────────┤ │ +│ │ File │ │ │ Review Agent │ │ +│ │ Server │ │ │ ├─ MCP tools │ │ +│ ├──────────┤ │ ├───────────────┤ │ +│ │ API │ │ │ Translation │ │ +│ │ Server │ │ │ Agent │ │ +│ └──────────┘ │ └───────────────┘ │ +└──────────────────┴──────────────────────────┘ +``` + +Each A2A agent in the ecosystem is itself an MCP client with its own tool servers. The protocols compose naturally: A2A handles the agent-to-agent coordination layer, while MCP handles the agent-to-tool execution layer. + +## Key Takeaways + +1. **MCP is for tools, A2A is for agents** — use each where it fits. +2. **They compose naturally** — an A2A agent can use MCP tools internally. +3. **MCP Tool Bridges** let remote agents access tools they cannot reach directly. +4. **The hybrid pattern** (MCP + A2A in one agent) is the production default. +5. **Both protocols are open standards** — MCP from Anthropic, A2A from Linux Foundation — ensuring broad ecosystem support. + +--- + +**You have completed the A2A Protocol Tutorial.** You now understand how to build interoperable agents that discover, communicate, delegate, and collaborate using the A2A protocol — and how to combine it with MCP for the full agent ecosystem. + +[Previous: Chapter 7](07-multi-agent-scenarios.md) | [Back to Tutorial Overview](README.md) + +--- + +## Where to Go Next + +- [MCP Specification Tutorial](../mcp-specification-tutorial/) — Deep dive into the MCP protocol specification +- [CrewAI Tutorial](../crewai-tutorial/) — Multi-agent orchestration with a framework approach +- [Composio Tutorial](../composio-tutorial/) — Tool integration platform bridging agent frameworks +- [Taskade Tutorial](../taskade-tutorial/) — AI-native productivity with agent workflows + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/a2a-protocol-tutorial/README.md b/tutorials/a2a-protocol-tutorial/README.md new file mode 100644 index 0000000..ae3d482 --- /dev/null +++ b/tutorials/a2a-protocol-tutorial/README.md @@ -0,0 +1,130 @@ +--- +layout: default +title: "A2A Protocol Tutorial" +nav_order: 195 +has_children: true +format_version: v2 +--- + +# A2A Protocol Tutorial: Building Interoperable Agent Systems With Google's Agent-to-Agent Standard + +> Learn how agents discover, communicate, and delegate tasks to each other using the A2A protocol — the open standard (now Linux Foundation) for agent-to-agent interoperability. + +[![GitHub Repo](https://img.shields.io/badge/GitHub-a2aproject%2FA2A-black?logo=github)](https://github.com/a2aproject/A2A) +[![License](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](https://github.com/a2aproject/A2A/blob/main/LICENSE) +[![Stars](https://img.shields.io/github/stars/a2aproject/A2A?style=social)](https://github.com/a2aproject/A2A) + +## Why This Track Matters + +The AI ecosystem is converging on two complementary standards: **MCP** (Model Context Protocol) for connecting agents to tools and data, and **A2A** (Agent-to-Agent) for connecting agents to each other. Together they form the complete agent interoperability stack. + +A2A solves a critical gap: how do independently built agents — potentially from different vendors, frameworks, and platforms — discover each other's capabilities and collaborate on tasks? Without A2A, every multi-agent system invents its own bespoke integration layer. + +This track focuses on: + +- understanding the A2A protocol specification and its design principles +- building agents that publish discoverable Agent Cards +- implementing task lifecycle management with streaming updates +- securing agent-to-agent communication with OAuth2 and identity verification +- combining A2A with MCP to create the full agent ecosystem architecture + +## Current Snapshot (auto-updated) + +- repository: [`a2aproject/A2A`](https://github.com/a2aproject/A2A) +- stars: about **23k** +- governance: Linux Foundation open standard +- key specification: Agent Card, Task lifecycle, Streaming, Push notifications + +## Mental Model + +```mermaid +flowchart LR + subgraph MCP ["MCP (Agent → Tool)"] + A1[AI Agent] -->|calls| T1[Tool Server] + A1 -->|reads| R1[Resource / Data] + end + + subgraph A2A ["A2A (Agent → Agent)"] + A2[Client Agent] -->|discovers| AC[Agent Card] + A2 -->|sends task| A3[Remote Agent] + A3 -->|streams updates| A2 + A3 -->|returns artifact| A2 + end + + User[User / Host App] --> A1 + User --> A2 + A3 -->|uses tools via MCP| T2[MCP Server] + + classDef mcp fill:#e1f5fe,stroke:#01579b + classDef a2a fill:#fff3e0,stroke:#ef6c00 + classDef user fill:#e8f5e8,stroke:#1b5e20 + + class A1,T1,R1 mcp + class A2,AC,A3 a2a + class User user + class T2 mcp +``` + +## Chapter Guide + +| Chapter | Key Question | Outcome | +|:--------|:-------------|:--------| +| [01 - Getting Started](01-getting-started.md) | What is A2A and how does it differ from MCP? | Clear mental model of agent interop | +| [02 - Protocol Specification](02-protocol-specification.md) | What are the core protocol primitives? | Understanding of Agent Cards, tasks, and messages | +| [03 - Agent Discovery](03-agent-discovery.md) | How do agents find and evaluate each other? | Ability to publish and consume Agent Cards | +| [04 - Task Management](04-task-management.md) | How does the full task lifecycle work? | Mastery of task creation, streaming, and artifacts | +| [05 - Authentication and Security](05-authentication-and-security.md) | How do agents trust each other? | Secure agent communication patterns | +| [06 - Python SDK](06-python-sdk.md) | How do I build A2A agents in Python? | Working A2A server and client | +| [07 - Multi-Agent Scenarios](07-multi-agent-scenarios.md) | How do agents delegate and compose? | Real-world multi-agent patterns | +| [08 - MCP + A2A](08-mcp-plus-a2a.md) | How do MCP and A2A work together? | Full ecosystem architecture | + +## What You Will Learn + +- How to read and implement the A2A protocol specification +- How to create Agent Cards that advertise capabilities and skills +- How to manage the full task lifecycle with streaming and push notifications +- How to authenticate and authorize agent-to-agent communication +- How to build A2A servers and clients using the Python SDK +- How to design multi-agent delegation and composition patterns +- How to combine MCP (tools) and A2A (agents) into a unified architecture + +## Source References + +- [A2A Protocol README](https://github.com/a2aproject/A2A/blob/main/README.md) +- [A2A Specification](https://github.com/a2aproject/A2A/blob/main/spec/) +- [Agent Card Schema](https://github.com/a2aproject/A2A/blob/main/spec/agent-card.md) +- [A2A Python SDK](https://github.com/a2aproject/A2A/tree/main/sdk/python) +- [A2A Samples](https://github.com/a2aproject/A2A/tree/main/samples) +- [A2A Technical Documentation](https://google.github.io/A2A/) + +## Related Tutorials + +- [MCP Specification Tutorial](../mcp-specification-tutorial/) — The complementary protocol for agent-to-tool communication +- [Composio Tutorial](../composio-tutorial/) — Tool integration platform that bridges agent frameworks +- [CrewAI Tutorial](../crewai-tutorial/) — Multi-agent orchestration framework +- [Taskade Tutorial](../taskade-tutorial/) — AI-native productivity with agent capabilities + +--- + +Start with [Chapter 1: Getting Started](01-getting-started.md). + +## Navigation & Backlinks + +- [Start Here: Chapter 1: Getting Started](01-getting-started.md) +- [Back to Main Catalog](../../README.md#-tutorial-catalog) +- [Browse A-Z Tutorial Directory](../../discoverability/tutorial-directory.md) +- [Search by Intent](../../discoverability/query-hub.md) +- [Explore Category Hubs](../../README.md#category-hubs) + +## Full Chapter Map + +1. [Chapter 1: Getting Started](01-getting-started.md) +2. [Chapter 2: Protocol Specification](02-protocol-specification.md) +3. [Chapter 3: Agent Discovery](03-agent-discovery.md) +4. [Chapter 4: Task Management](04-task-management.md) +5. [Chapter 5: Authentication and Security](05-authentication-and-security.md) +6. [Chapter 6: Python SDK](06-python-sdk.md) +7. [Chapter 7: Multi-Agent Scenarios](07-multi-agent-scenarios.md) +8. [Chapter 8: MCP + A2A](08-mcp-plus-a2a.md) + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/affine-tutorial/01-getting-started.md b/tutorials/affine-tutorial/01-getting-started.md new file mode 100644 index 0000000..48d0f41 --- /dev/null +++ b/tutorials/affine-tutorial/01-getting-started.md @@ -0,0 +1,229 @@ +--- +layout: default +title: "Chapter 1: Getting Started" +nav_order: 1 +parent: AFFiNE Tutorial +--- + +# Chapter 1: Getting Started + +Welcome to **Chapter 1: Getting Started**. In this part of **AFFiNE Tutorial: Open-Source AI Workspace with Docs, Whiteboards, and Databases**, you will set up your local development environment, understand the monorepo structure, and create your first AFFiNE workspace. + +AFFiNE is a next-generation knowledge base that combines documents, whiteboards, and databases into a unified workspace. Before diving into the architecture and internals, you need a working local setup to explore the codebase and experiment with features hands-on. + +## What Problem Does This Solve? + +Getting started with a large monorepo like AFFiNE can be overwhelming. The project spans multiple packages — BlockSuite for the editor framework, OctoBase for CRDT storage, a Node.js server, and an Electron desktop app. This chapter gives you a clear, repeatable path from cloning the repo to running the full application locally. + +## Learning Goals + +- clone the AFFiNE monorepo and install dependencies +- understand the workspace structure and key packages +- run the development server for web and desktop targets +- create your first workspace with pages and edgeless canvases +- navigate the codebase to find key entry points + +## Prerequisites + +Before starting, ensure you have these tools installed: + +- **Node.js** >= 18.x (LTS recommended) +- **pnpm** >= 9.x (AFFiNE uses pnpm workspaces) +- **Git** for cloning the repository +- **Docker** (optional, for running backend services locally) + +## Step 1: Clone and Install + +```bash +# Clone the AFFiNE repository +git clone https://github.com/toeverything/AFFiNE.git +cd AFFiNE + +# Install dependencies using pnpm +pnpm install +``` + +The repository uses pnpm workspaces to manage its monorepo structure. After installation, the key packages are organized as follows: + +``` +AFFiNE/ +├── packages/ +│ ├── frontend/ # Web and desktop frontend apps +│ │ ├── core/ # Main AFFiNE application +│ │ ├── electron/ # Desktop wrapper +│ │ └── web/ # Web entry point +│ ├── backend/ +│ │ └── server/ # Node.js API server +│ └── common/ # Shared utilities and types +├── blocksuite/ # Block editor framework (git submodule or linked) +├── tools/ # Build and development tooling +└── docker/ # Docker configuration files +``` + +## Step 2: Understand the Monorepo Structure + +AFFiNE is organized as a monorepo with several key domains: + +```typescript +// Key package entry points to understand: + +// 1. Frontend core - the main application shell +// packages/frontend/core/src/app.tsx +// This is where the React application bootstraps + +// 2. BlockSuite integration - the editor framework +// packages/frontend/core/src/blocksuite/ +// Block definitions and editor configuration + +// 3. Backend server - API and sync services +// packages/backend/server/src/index.ts +// Express/Nest.js server for auth, sync, AI features + +// 4. Workspace management +// packages/frontend/core/src/modules/workspace/ +// Workspace creation, switching, and persistence +``` + +## Step 3: Run the Development Server + +```bash +# Start the web development server +pnpm dev + +# Or start with the full backend stack +pnpm dev:full + +# For desktop development (Electron) +pnpm dev:electron +``` + +The web dev server typically starts on `http://localhost:8080`. The first load may take a moment as Vite bundles the application. + +## Step 4: Create Your First Workspace + +Once the application loads, you will see the workspace creation flow: + +1. **Create a local workspace** — data is stored in your browser's IndexedDB +2. **Create a new page** — this opens the page editor (document mode) +3. **Switch to edgeless mode** — toggle the mode switcher to access the whiteboard canvas +4. **Add blocks** — type `/` to open the slash command menu and insert different block types + +```typescript +// The workspace initialization flow in code: +// packages/frontend/core/src/modules/workspace/services/workspace.ts + +interface WorkspaceMetadata { + id: string; + flavour: WorkspaceFlavour; // 'local' | 'affine-cloud' + version: number; +} + +// When you create a workspace, AFFiNE initializes a yjs Doc +// that serves as the root CRDT document for all content +``` + +## Step 5: Explore the Codebase Entry Points + +Here are the key files to bookmark as you explore: + +```typescript +// Application bootstrap +// packages/frontend/core/src/bootstrap/index.ts +// Sets up service providers, initializes modules + +// Workspace provider - manages workspace lifecycle +// packages/frontend/core/src/modules/workspace/ + +// Editor integration - connects BlockSuite to AFFiNE +// packages/frontend/core/src/blocksuite/ + +// Page management - CRUD operations on pages +// packages/frontend/core/src/modules/doc/ + +// AI copilot features +// packages/frontend/core/src/modules/ai/ +``` + +## How It Works Under the Hood + +When you start AFFiNE locally, several systems initialize in sequence: + +```mermaid +flowchart TD + A[pnpm dev] --> B[Vite dev server starts] + B --> C[React app bootstraps] + C --> D[Service modules initialize] + D --> E[Workspace provider loads] + E --> F{Workspace exists?} + F -->|Yes| G[Load yjs Doc from IndexedDB] + F -->|No| H[Show workspace creation UI] + H --> I[Initialize new yjs Doc] + G --> J[BlockSuite editor mounts] + I --> J + J --> K[Page ready for editing] +``` + +The critical insight is that AFFiNE uses **yjs documents** as the foundational data layer. Every workspace is a collection of yjs documents, and every page within a workspace is represented as a yjs subdocument. This means: + +- All content is CRDT-native from the start +- Local persistence uses IndexedDB with yjs encoding +- Cloud sync simply transmits yjs updates between peers +- Undo/redo is handled at the yjs document level + +## Development Tips + +### Environment Variables + +```bash +# .env file in project root +# Configure the backend API URL +AFFINE_SERVER_URL=http://localhost:3010 + +# Enable debug logging +DEBUG=affine:* + +# Configure AI features (requires API key) +COPILOT_OPENAI_API_KEY=sk-... +``` + +### Useful Commands + +```bash +# Run tests +pnpm test + +# Build for production +pnpm build + +# Lint and format +pnpm lint +pnpm format + +# Generate GraphQL types (for backend development) +pnpm codegen +``` + +### Common Issues + +1. **Node version mismatch** — use `nvm` or `fnm` to switch to the correct Node.js version +2. **pnpm version** — check `.npmrc` or `package.json` for the required pnpm version +3. **Port conflicts** — the dev server defaults to 8080; check if another process is using it +4. **Memory issues** — large monorepo builds may need `NODE_OPTIONS=--max-old-space-size=8192` + +## Source References + +- [AFFiNE Repository](https://github.com/toeverything/AFFiNE) +- [Contributing Guide](https://github.com/toeverything/AFFiNE/blob/canary/CONTRIBUTING.md) +- [Development Setup](https://docs.affine.pro/docs/developing) + +## Summary + +You now have a working AFFiNE development environment with a local workspace. You understand the monorepo layout, know how to start the dev server, and have identified the key entry points in the codebase. + +Next: [Chapter 2: System Architecture](02-system-architecture.md) — where we explore how BlockSuite, OctoBase, and yjs fit together to form the full AFFiNE stack. + +--- + +[Back to Tutorial Index](README.md) | [Next: Chapter 2 - System Architecture](02-system-architecture.md) + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/affine-tutorial/02-system-architecture.md b/tutorials/affine-tutorial/02-system-architecture.md new file mode 100644 index 0000000..fbbe108 --- /dev/null +++ b/tutorials/affine-tutorial/02-system-architecture.md @@ -0,0 +1,302 @@ +--- +layout: default +title: "Chapter 2: System Architecture" +nav_order: 2 +parent: AFFiNE Tutorial +--- + +# Chapter 2: System Architecture + +Welcome to **Chapter 2: System Architecture**. In this part of **AFFiNE Tutorial**, you will build a deep understanding of how BlockSuite, OctoBase, yjs, and the AFFiNE application layer connect to form a cohesive workspace platform. + +AFFiNE is not a monolithic application. It is a layered system where each layer has a clear responsibility: BlockSuite handles the editor and block model, yjs provides the CRDT foundation, OctoBase manages storage and sync, and the AFFiNE shell orchestrates everything into a user-facing workspace. + +## What Problem Does This Solve? + +Understanding the architecture is essential before you modify any part of the system. Without knowing how the layers connect, you risk breaking sync, corrupting block trees, or misunderstanding where a feature should be implemented. This chapter gives you the architectural map. + +## Learning Goals + +- understand the four-layer architecture of AFFiNE +- learn how BlockSuite provides the editor framework and block model +- understand yjs as the CRDT foundation for all content +- learn how OctoBase bridges local and cloud storage +- trace a user action from UI click to persisted CRDT update + +## The Four-Layer Architecture + +```mermaid +flowchart TB + subgraph "Application Layer" + A1[AFFiNE Frontend - React] + A2[Workspace Management] + A3[Authentication and Accounts] + end + + subgraph "Editor Layer - BlockSuite" + B1[Block Specs and Schema] + B2[Page Mode Editor] + B3[Edgeless Mode Editor] + B4[Widget System] + end + + subgraph "Data Layer - yjs / CRDT" + C1[Y.Doc - Root Document] + C2[Y.Map - Block Properties] + C3[Y.Array - Child Ordering] + C4[Y.Text - Rich Text Content] + end + + subgraph "Sync and Storage Layer" + D1[IndexedDB Provider] + D2[WebSocket Sync] + D3[OctoBase / Cloud Storage] + D4[SQLite - Desktop] + end + + A1 --> B1 + A2 --> C1 + B1 --> C1 + B2 --> C2 + B3 --> C3 + B4 --> C4 + C1 --> D1 + C1 --> D2 + D2 --> D3 + C1 --> D4 +``` + +## Layer 1: The Application Shell + +The AFFiNE frontend is a React application that provides the workspace chrome — sidebar navigation, workspace switching, settings, authentication, and the container for the BlockSuite editor. + +```typescript +// packages/frontend/core/src/modules/workspace/services/workspace.ts +// Simplified workspace initialization + +export class WorkspaceService { + private workspaceList: WorkspaceMetadata[] = []; + + async openWorkspace(id: string): Promise { + // 1. Load workspace metadata + const meta = await this.getWorkspaceMeta(id); + + // 2. Initialize the yjs Doc for this workspace + const doc = new Y.Doc({ guid: id }); + + // 3. Connect storage providers (IndexedDB, WebSocket) + await this.connectProviders(doc, meta.flavour); + + // 4. Initialize the BlockSuite workspace on top of the doc + const workspace = new Workspace({ + id, + doc, + schema: this.blockSchema, + }); + + return workspace; + } +} +``` + +Key responsibilities: +- **Workspace lifecycle** — create, open, delete, and switch workspaces +- **Authentication** — local and cloud account management +- **Module system** — dependency injection for services across the app +- **Routing** — URL-based navigation between pages and views + +## Layer 2: BlockSuite — The Editor Framework + +BlockSuite is the heart of AFFiNE's editing experience. It is a framework for building block-based editors, developed as a separate project by the same team. + +```typescript +// BlockSuite defines blocks using a schema system +// Each block type has a model (data), a view (rendering), and a service (behavior) + +import { defineBlockSchema } from '@blocksuite/store'; + +// Example: The paragraph block schema +export const ParagraphBlockSchema = defineBlockSchema({ + flavour: 'affine:paragraph', + metadata: { + version: 1, + role: 'content', + parent: ['affine:note'], + children: ['affine:paragraph', 'affine:list', 'affine:code'], + }, + props: (internal) => ({ + type: 'text' as ParagraphType, + text: internal.Text(), + }), +}); + +// Block types form a tree: +// Page (root) +// └── Note (container) +// ├── Paragraph +// ├── Heading +// ├── List +// ├── Code +// ├── Image +// └── Database +``` + +BlockSuite provides two distinct editing modes on the same data: + +- **Page Mode** — a linear document editor, similar to Notion +- **Edgeless Mode** — a freeform canvas/whiteboard, similar to Miro + +Both modes read and write to the same yjs document, meaning you can create content in one mode and view or edit it in the other. + +## Layer 3: yjs — The CRDT Foundation + +Every piece of content in AFFiNE is stored as a yjs CRDT structure. This is the single most important architectural decision in the system. + +```typescript +// How blocks map to yjs data structures: + +import * as Y from 'yjs'; + +// A workspace is a Y.Doc +const workspaceDoc = new Y.Doc(); + +// Each page is a subdocument +const pageDoc = new Y.Doc(); +workspaceDoc.getMap('spaces').set('page:abc123', pageDoc); + +// Within a page, blocks are stored in a Y.Map +const blocks = pageDoc.getMap('blocks'); + +// Each block is a Y.Map with properties +const paragraphBlock = new Y.Map(); +paragraphBlock.set('sys:id', 'block:xyz'); +paragraphBlock.set('sys:flavour', 'affine:paragraph'); +paragraphBlock.set('sys:children', new Y.Array()); // child block IDs +paragraphBlock.set('prop:text', new Y.Text('Hello, AFFiNE!')); +paragraphBlock.set('prop:type', 'text'); + +blocks.set('block:xyz', paragraphBlock); +``` + +The yjs document model provides: +- **Conflict-free merging** — multiple users can edit simultaneously without coordination +- **Offline support** — changes are captured locally and merged when reconnected +- **Undo/redo** — yjs tracks operations for reversible editing +- **Compact binary encoding** — efficient storage and network transfer + +## Layer 4: Sync and Storage + +AFFiNE supports multiple storage backends, all built on top of yjs providers: + +```typescript +// Storage provider abstraction +// packages/frontend/core/src/modules/workspace/providers/ + +// IndexedDB — browser-local persistence +import { IndexeddbPersistence } from 'y-indexeddb'; +const idbProvider = new IndexeddbPersistence(workspaceId, doc); + +// WebSocket — real-time cloud sync +import { WebsocketProvider } from 'y-websocket'; +const wsProvider = new WebsocketProvider( + 'wss://sync.affine.pro', + workspaceId, + doc +); + +// The providers compose transparently: +// - Local changes go to IndexedDB immediately +// - WebSocket provider sends updates to the server +// - Server broadcasts to other connected clients +// - OctoBase on the server persists the merged state +``` + +## How It Works Under the Hood: Tracing a Keystroke + +When a user types a character in a paragraph block, here is the full path: + +```mermaid +sequenceDiagram + participant User + participant Editor as BlockSuite Editor + participant Model as Block Model + participant YDoc as yjs Y.Text + participant IDB as IndexedDB + participant WS as WebSocket + participant Server as AFFiNE Server + participant Peer as Other Clients + + User->>Editor: Types character "a" + Editor->>Model: Update paragraph block + Model->>YDoc: Y.Text.insert(pos, "a") + YDoc->>IDB: Persist update binary + YDoc->>WS: Send update to server + WS->>Server: Receive update + Server->>Server: Merge into server doc + Server->>Peer: Broadcast update + Peer->>Peer: Apply update locally +``` + +Each step is designed to be fast and non-blocking: +1. The editor captures the input event +2. BlockSuite translates it to a block model operation +3. The block model writes to the yjs document +4. yjs generates a compact binary update +5. The update is written to IndexedDB (local persistence) +6. The same update is sent over WebSocket (cloud sync) +7. The server merges the update and broadcasts to peers + +## The Module and Service Pattern + +AFFiNE uses a dependency injection pattern for organizing services: + +```typescript +// packages/frontend/core/src/modules/ +// Each module encapsulates a feature domain + +// Module definition pattern: +export class WorkspaceModule { + static readonly id = 'workspace'; + + // Services are injected and lazily initialized + constructor( + private readonly docService: DocService, + private readonly syncService: SyncService, + private readonly storageService: StorageService, + ) {} + + // Public API for the rest of the application + async openDoc(docId: string): Promise { + const doc = await this.docService.loadDoc(docId); + await this.syncService.connect(doc); + return doc; + } +} +``` + +Key modules include: +- **workspace** — workspace CRUD and lifecycle +- **doc** — page/document management +- **editor** — BlockSuite integration and configuration +- **sync** — cloud synchronization +- **ai** — copilot and AI feature integration +- **collection** — saved filters and views + +## Source References + +- [BlockSuite Repository](https://github.com/toeverything/blocksuite) +- [yjs Documentation](https://docs.yjs.dev/) +- [AFFiNE Architecture](https://github.com/toeverything/AFFiNE/blob/canary/README.md) +- [OctoBase Overview](https://github.com/toeverything/OctoBase) + +## Summary + +AFFiNE's architecture is a four-layer stack: React application shell, BlockSuite editor framework, yjs CRDT data layer, and composable storage providers. Every piece of content flows through yjs, making collaboration and offline support intrinsic rather than bolted on. + +Next: [Chapter 3: Block System](03-block-system.md) — where we explore block types, the block tree, and how page and edgeless modes share the same data model. + +--- + +[Back to Tutorial Index](README.md) | [Previous: Chapter 1](01-getting-started.md) | [Next: Chapter 3](03-block-system.md) + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/affine-tutorial/03-block-system.md b/tutorials/affine-tutorial/03-block-system.md new file mode 100644 index 0000000..fb39e8d --- /dev/null +++ b/tutorials/affine-tutorial/03-block-system.md @@ -0,0 +1,315 @@ +--- +layout: default +title: "Chapter 3: Block System" +nav_order: 3 +parent: AFFiNE Tutorial +--- + +# Chapter 3: Block System + +Welcome to **Chapter 3: Block System**. In this part of **AFFiNE Tutorial**, you will learn how AFFiNE's block-based content model works, how blocks form trees, and how the same block data powers both page mode (documents) and edgeless mode (whiteboards). + +The block system is the fundamental content abstraction in AFFiNE. Every paragraph, heading, image, code block, and database is a block. Understanding how blocks work is essential for customizing the editor, building plugins, or debugging content issues. + +## What Problem Does This Solve? + +Traditional document editors store content as a flat stream of formatted text. This makes it difficult to support rich content types like embedded databases, interactive widgets, or freeform canvas layouts. AFFiNE's block system solves this by representing all content as a tree of typed, composable blocks — each with its own schema, properties, and rendering logic. + +## Learning Goals + +- understand the block tree structure and how blocks nest +- learn the core block flavours (types) available in AFFiNE +- understand how page mode and edgeless mode share the same block data +- learn how block schemas define structure, props, and parent-child relationships +- trace how a block is created, rendered, and persisted + +## The Block Tree + +Every AFFiNE page is a tree of blocks rooted at a `page` block: + +``` +affine:page (root) +├── affine:surface (edgeless canvas layer) +│ ├── affine:frame +│ │ └── affine:note +│ │ └── affine:paragraph +│ └── affine:shape (whiteboard elements) +└── affine:note (document container) + ├── affine:paragraph { type: "h1", text: "My Title" } + ├── affine:paragraph { type: "text", text: "Some content..." } + ├── affine:list { type: "bulleted", text: "Item one" } + ├── affine:list { type: "bulleted", text: "Item two" } + ├── affine:code { language: "typescript", text: "const x = 1;" } + ├── affine:image { sourceId: "blob:abc123" } + └── affine:database { title: "Task Tracker" } + ├── affine:paragraph { text: "Row 1 content" } + └── affine:paragraph { text: "Row 2 content" } +``` + +## Core Block Flavours + +AFFiNE defines blocks using a `flavour` identifier. Each flavour has a schema that describes its properties and valid parent-child relationships: + +```typescript +// Block flavours and their roles: + +// Root block — every page has exactly one +// flavour: 'affine:page' +// Contains metadata like title and version + +// Note block — primary container for document content +// flavour: 'affine:note' +// Groups content blocks in page mode; acts as a moveable frame in edgeless mode + +// Surface block — the edgeless canvas layer +// flavour: 'affine:surface' +// Contains shapes, connectors, frames, and positioned elements + +// Content blocks — the building blocks of documents: +const contentFlavours = { + 'affine:paragraph': 'Text, headings (h1-h6), and quotes', + 'affine:list': 'Bulleted, numbered, todo, and toggle lists', + 'affine:code': 'Code blocks with syntax highlighting', + 'affine:image': 'Embedded images with captions', + 'affine:divider': 'Horizontal divider lines', + 'affine:bookmark': 'URL bookmarks with preview cards', + 'affine:attachment': 'File attachments', + 'affine:database': 'Inline databases with table/kanban views', + 'affine:embed': 'Embedded content (YouTube, Figma, etc.)', +}; +``` + +## Defining a Block Schema + +Each block type is defined using the `defineBlockSchema` function from BlockSuite: + +```typescript +import { defineBlockSchema } from '@blocksuite/store'; + +// The list block schema demonstrates parent-child constraints +export const ListBlockSchema = defineBlockSchema({ + flavour: 'affine:list', + metadata: { + version: 1, + role: 'content', + // Which blocks can contain this block + parent: ['affine:note', 'affine:list', 'affine:database'], + // Which blocks can be children of this block + children: [ + 'affine:paragraph', + 'affine:list', + 'affine:code', + 'affine:image', + ], + }, + props: (internal) => ({ + // The type of list: bulleted, numbered, todo, toggle + type: 'bulleted' as ListType, + // Rich text content using yjs Y.Text + text: internal.Text(), + // For todo lists: checked state + checked: false, + // For toggle lists: collapsed state + collapsed: false, + }), +}); +``` + +The `metadata.parent` and `metadata.children` arrays enforce structural validity. BlockSuite prevents invalid nesting at the schema level — you cannot drop an image block inside a code block, for example. + +## Page Mode vs Edgeless Mode + +The key insight is that both modes operate on the **same block tree**. They simply render different subtrees with different layouts: + +```typescript +// Page mode renders the note blocks linearly: +// affine:page → affine:note → [content blocks in order] +// +// Edgeless mode renders the surface block as a canvas: +// affine:page → affine:surface → [positioned elements] +// → affine:note (as a movable card on canvas) + +// The mode switch does NOT change the data — it changes the view: +interface EditorMode { + mode: 'page' | 'edgeless'; + + // In page mode: note blocks render as a vertical document + // In edgeless mode: note blocks render as positioned cards + // surface elements render as shapes/connectors +} +``` + +```mermaid +flowchart LR + subgraph "Same Block Tree" + P[affine:page] + N1[affine:note] + N2[affine:note] + S[affine:surface] + C1[paragraph] + C2[list] + C3[shape] + C4[connector] + + P --> N1 + P --> N2 + P --> S + N1 --> C1 + N1 --> C2 + S --> C3 + S --> C4 + end + + subgraph "Page Mode View" + PM[Linear document] + PM --> N1v[Note 1 contents] + PM --> N2v[Note 2 contents] + end + + subgraph "Edgeless Mode View" + EM[Infinite canvas] + EM --> N1c[Note 1 as card] + EM --> N2c[Note 2 as card] + EM --> SC[Shapes and connectors] + end +``` + +## Block Operations + +BlockSuite provides a transaction-based API for modifying blocks: + +```typescript +// Adding a new block +const doc = workspace.getDoc('page:abc123'); + +doc.addBlock( + 'affine:paragraph', // flavour + { // props + type: 'text', + text: new Y.Text('Hello!'), + }, + noteBlockId, // parent block ID + insertIndex // position among siblings +); + +// Moving a block +doc.moveBlocks( + [blockToMove], // blocks to move + targetParentBlock, // new parent + targetSiblingBlock, // insert before this sibling + 'before' // 'before' or 'after' +); + +// Deleting a block +doc.deleteBlock(block); + +// Updating block props +doc.updateBlock(block, { + type: 'h1', // change paragraph to heading +}); +``` + +All block operations are atomic within a yjs transaction, ensuring consistency even during concurrent edits (see [Chapter 4: Collaborative Editing](04-collaborative-editing.md)). + +## How It Works Under the Hood: Block Rendering Pipeline + +When a block is rendered, BlockSuite follows a spec-driven pipeline: + +```mermaid +flowchart TD + A[Block Model in yjs] --> B[BlockSpec lookup by flavour] + B --> C[Block View component] + B --> D[Block Service] + B --> E[Block Widgets] + C --> F[Lit Element renders to DOM] + D --> G[Handles block-specific logic] + E --> H[Toolbar, menus, drag handles] + F --> I[User sees rendered block] + G --> I + H --> I +``` + +The BlockSpec system ties together three concerns: + +```typescript +// A BlockSpec binds model, view, and service for a flavour: + +import { BlockSpec } from '@blocksuite/block-std'; + +export const ParagraphBlockSpec: BlockSpec = { + schema: ParagraphBlockSchema, + view: { + // Lit web component for rendering + component: literal`affine-paragraph`, + }, + service: ParagraphBlockService, + widgets: { + // Optional toolbar and interaction widgets + slashMenu: literal`affine-slash-menu-widget`, + dragHandle: literal`affine-drag-handle-widget`, + }, +}; +``` + +BlockSuite uses **Lit** (web components) for block rendering, not React. The AFFiNE React shell hosts the Lit-based editor in a container element. This separation allows BlockSuite to be framework-agnostic. + +## The Slash Command Menu + +The slash command (`/`) menu is the primary way users insert new blocks: + +```typescript +// The slash menu is a widget that listens for "/" input +// and presents a filtered list of block types to insert + +// Each menu item maps to a block creation action: +const slashMenuItems = [ + { + name: 'Text', + action: ({ doc, model }) => { + doc.addBlock('affine:paragraph', { type: 'text' }, model.id); + }, + }, + { + name: 'Heading 1', + action: ({ doc, model }) => { + doc.addBlock('affine:paragraph', { type: 'h1' }, model.id); + }, + }, + { + name: 'To-do List', + action: ({ doc, model }) => { + doc.addBlock('affine:list', { type: 'todo' }, model.id); + }, + }, + { + name: 'Code Block', + action: ({ doc, model }) => { + doc.addBlock('affine:code', {}, model.id); + }, + }, + { + name: 'Database', + action: ({ doc, model }) => { + doc.addBlock('affine:database', { title: 'Untitled' }, model.id); + }, + }, +]; +``` + +## Source References + +- [BlockSuite Store](https://github.com/toeverything/blocksuite/tree/master/packages/store) +- [AFFiNE Block Specs](https://github.com/toeverything/AFFiNE/tree/canary/packages/frontend/core/src/blocksuite) +- [BlockSuite Documentation](https://blocksuite.io) + +## Summary + +AFFiNE's block system represents all content as a tree of typed, schema-validated blocks stored in yjs CRDT structures. The same block tree powers both page mode (linear documents) and edgeless mode (infinite canvas). BlockSpecs bind each flavour to its model, view, and service, creating a composable and extensible content system. + +Next: [Chapter 4: Collaborative Editing](04-collaborative-editing.md) — where we dive deep into how yjs CRDTs enable real-time multi-user editing without conflicts. + +--- + +[Back to Tutorial Index](README.md) | [Previous: Chapter 2](02-system-architecture.md) | [Next: Chapter 4](04-collaborative-editing.md) + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/affine-tutorial/04-collaborative-editing.md b/tutorials/affine-tutorial/04-collaborative-editing.md new file mode 100644 index 0000000..6e25f6d --- /dev/null +++ b/tutorials/affine-tutorial/04-collaborative-editing.md @@ -0,0 +1,343 @@ +--- +layout: default +title: "Chapter 4: Collaborative Editing" +nav_order: 4 +parent: AFFiNE Tutorial +--- + +# Chapter 4: Collaborative Editing + +Welcome to **Chapter 4: Collaborative Editing**. In this part of **AFFiNE Tutorial**, you will learn how AFFiNE achieves real-time multi-user collaboration using yjs CRDTs, how conflicts are resolved automatically, and how the sync protocol works across local and cloud storage. + +Real-time collaboration is not an add-on feature in AFFiNE — it is the foundational design choice. Because all content is stored as yjs CRDT documents (as introduced in [Chapter 2: System Architecture](02-system-architecture.md)), every operation is inherently collaborative and conflict-free. + +## What Problem Does This Solve? + +When multiple users edit the same document simultaneously, traditional systems face the challenge of conflict resolution. Operational Transformation (OT) — used by Google Docs — requires a central server to order operations. CRDTs (Conflict-free Replicated Data Types) take a different approach: they guarantee that any two replicas that have received the same set of updates will converge to the same state, regardless of the order updates were applied. + +AFFiNE uses yjs, one of the most battle-tested CRDT implementations, to make collaboration work seamlessly — including offline scenarios where users edit without connectivity and merge later. + +## Learning Goals + +- understand how yjs CRDTs guarantee conflict-free merging +- learn the sync protocol between AFFiNE clients and server +- understand how awareness (cursors, selections) works in real-time +- learn how offline editing and reconnection are handled +- trace a collaborative editing session through the full stack + +## yjs CRDT Fundamentals + +yjs provides several CRDT types that map to content needs: + +```typescript +import * as Y from 'yjs'; + +const doc = new Y.Doc(); + +// Y.Text — for rich text content (paragraphs, headings, code) +// Supports concurrent character insertions at any position +const text = doc.getText('content'); +text.insert(0, 'Hello'); +text.insert(5, ' World'); +// Result: "Hello World" + +// Y.Map — for block properties (key-value pairs) +// Supports concurrent property updates; last-writer-wins per key +const blockProps = doc.getMap('block:abc'); +blockProps.set('type', 'h1'); +blockProps.set('collapsed', false); + +// Y.Array — for ordered collections (child block IDs) +// Supports concurrent insertions at any position +const children = doc.getArray('children'); +children.push(['block:1', 'block:2']); +children.insert(1, ['block:3']); // Insert between block:1 and block:2 + +// Y.XmlFragment / Y.XmlText — for rich text with formatting +// Used internally by BlockSuite for inline formatting (bold, italic, etc.) +``` + +### How Conflicts Are Resolved + +```typescript +// Scenario: Two users edit the same text simultaneously + +// User A's document state: "Hello" +// User B's document state: "Hello" + +// User A inserts " World" at position 5 +// User B inserts " AFFiNE" at position 5 + +// Without CRDT: CONFLICT — which insertion wins? +// With yjs: Both insertions are preserved + +// yjs uses unique client IDs and logical clocks to order +// concurrent insertions deterministically: +// Result: "Hello World AFFiNE" or "Hello AFFiNE World" +// (order depends on client IDs, but BOTH replicas converge +// to the exact same result — that is the CRDT guarantee) +``` + +## The Sync Protocol + +AFFiNE uses the yjs sync protocol (y-protocols) to exchange updates between clients and the server: + +```mermaid +sequenceDiagram + participant A as Client A + participant S as AFFiNE Server + participant B as Client B + + Note over A,B: Initial Sync (Step 1) + A->>S: SyncStep1(stateVector_A) + S->>A: SyncStep2(missingUpdates_for_A) + S->>A: SyncStep1(stateVector_S) + A->>S: SyncStep2(missingUpdates_for_S) + + Note over A,B: Real-time Updates + A->>S: Update(binary_delta) + S->>S: Apply update to server doc + S->>B: Update(binary_delta) + B->>B: Apply update locally + + Note over A,B: Awareness Protocol + A->>S: Awareness(cursor_pos, selection, user_info) + S->>B: Awareness(cursor_pos, selection, user_info) +``` + +The sync protocol has two phases: + +1. **Initial sync** — exchange state vectors to determine which updates each side is missing, then send only the missing updates +2. **Live updates** — each local change generates a compact binary update that is broadcast to all connected peers + +```typescript +// packages/backend/server/src/modules/sync/ +// Simplified WebSocket sync handler on the server + +import { applyUpdate, encodeStateAsUpdate, encodeStateVector } from 'yjs'; + +class SyncHandler { + private docs: Map = new Map(); + + handleConnection(ws: WebSocket, docId: string) { + const doc = this.getOrCreateDoc(docId); + + // Send current state vector to client + const sv = encodeStateVector(doc); + ws.send(createSyncStep1Message(sv)); + + ws.on('message', (data: Uint8Array) => { + const messageType = readMessageType(data); + + switch (messageType) { + case MessageType.SYNC_STEP_1: + // Client sent their state vector; respond with missing updates + const clientSV = readStateVector(data); + const update = encodeStateAsUpdate(doc, clientSV); + ws.send(createSyncStep2Message(update)); + break; + + case MessageType.SYNC_STEP_2: + case MessageType.UPDATE: + // Apply client update to server doc + const clientUpdate = readUpdate(data); + applyUpdate(doc, clientUpdate); + // Broadcast to all other connected clients + this.broadcast(docId, data, ws); + break; + } + }); + } +} +``` + +## Awareness Protocol + +Beyond document content, AFFiNE synchronizes **awareness** information — cursor positions, selections, and user presence: + +```typescript +import { Awareness } from 'y-protocols/awareness'; + +// Each client maintains an awareness instance +const awareness = new Awareness(doc); + +// Set local awareness state +awareness.setLocalState({ + user: { + name: 'Alice', + color: '#ff5733', + avatar: 'https://...', + }, + cursor: { + blockId: 'block:abc', + offset: 42, + }, + selection: { + type: 'block', + blockIds: ['block:abc', 'block:def'], + }, +}); + +// Listen for remote awareness changes +awareness.on('change', ({ added, updated, removed }) => { + // Render remote cursors and selections in the editor + for (const clientId of [...added, ...updated]) { + const state = awareness.getStates().get(clientId); + renderRemoteCursor(state); + } + for (const clientId of removed) { + removeRemoteCursor(clientId); + } +}); +``` + +The awareness protocol uses a lightweight heartbeat mechanism — if a client does not send an awareness update within 30 seconds, it is considered disconnected and removed from the presence list. + +## Offline Editing and Reconnection + +One of yjs's strongest features is offline support. AFFiNE leverages this for seamless offline editing: + +```mermaid +flowchart TD + A[User edits while online] --> B[Updates go to IndexedDB + WebSocket] + B --> C{Connection lost} + C --> D[User continues editing offline] + D --> E[Updates accumulate in IndexedDB only] + E --> F{Connection restored} + F --> G[SyncStep1: exchange state vectors] + G --> H[SyncStep2: send missing updates both ways] + H --> I[All replicas converge automatically] + I --> J[No merge conflicts, no data loss] +``` + +```typescript +// The IndexedDB provider persists all yjs updates locally +// packages/frontend/core/src/modules/workspace/providers/ + +class LocalSyncProvider { + private idbProvider: IndexeddbPersistence; + + constructor(workspaceId: string, doc: Y.Doc) { + // IndexedDB stores every update, maintaining full history + this.idbProvider = new IndexeddbPersistence(workspaceId, doc); + + // When loaded from IndexedDB, doc has full local state + this.idbProvider.on('synced', () => { + console.log('Local state loaded from IndexedDB'); + }); + } +} + +class CloudSyncProvider { + private wsProvider: WebsocketProvider | null = null; + + connect(workspaceId: string, doc: Y.Doc) { + this.wsProvider = new WebsocketProvider( + 'wss://sync.affine.pro', + workspaceId, + doc, + { + // Automatically reconnect with exponential backoff + connect: true, + maxBackoffTime: 10000, + } + ); + + this.wsProvider.on('status', ({ status }) => { + if (status === 'connected') { + // yjs sync protocol automatically exchanges + // missing updates upon reconnection + console.log('Connected — syncing...'); + } + }); + } +} +``` + +## Sub-Document Sync + +AFFiNE workspaces use yjs sub-documents to manage page-level granularity: + +```typescript +// A workspace is a root Y.Doc +// Each page is a sub-document (Y.Doc nested inside the root) + +const workspaceDoc = new Y.Doc(); +const pages = workspaceDoc.getMap('spaces'); + +// Sub-documents are loaded on demand +// This means opening a workspace does NOT load all page content +// Only metadata (titles, IDs) is in the root doc + +const pageDoc = new Y.Doc({ guid: 'page:abc123' }); +pages.set('page:abc123', pageDoc); + +// When a user opens a specific page: +// 1. The sub-document is loaded from IndexedDB +// 2. A sync connection is established for just that sub-doc +// 3. Other pages remain unloaded until accessed + +// This is critical for performance in large workspaces +// with hundreds or thousands of pages +``` + +## Conflict Resolution Examples + +### Concurrent Text Editing + +```typescript +// Alice and Bob both type in the same paragraph simultaneously + +// Alice types "cat" at position 10 +// Bob types "dog" at position 10 + +// yjs resolves this deterministically: +// Both "cat" and "dog" appear in the text +// Order is determined by client ID comparison +// Both Alice and Bob see identical final text +``` + +### Concurrent Block Reordering + +```typescript +// Alice moves block X above block Y +// Bob moves block X below block Z + +// yjs Y.Array handles this: +// The move is implemented as delete + insert +// Both operations are applied; the result depends on +// yjs's internal ordering, but both replicas converge +``` + +### Concurrent Property Updates + +```typescript +// Alice changes block background to "blue" +// Bob changes block background to "red" + +// Y.Map uses last-writer-wins per key +// The client with the higher timestamp "wins" +// Both replicas converge to the same color +// This is acceptable because property conflicts +// are rare and low-stakes in practice +``` + +## Source References + +- [yjs GitHub](https://github.com/yjs/yjs) +- [y-protocols](https://github.com/yjs/y-protocols) +- [y-indexeddb](https://github.com/yjs/y-indexeddb) +- [y-websocket](https://github.com/yjs/y-websocket) +- [CRDT Resources](https://crdt.tech/) + +## Summary + +AFFiNE's collaborative editing is built on yjs CRDTs, which guarantee that all replicas converge to the same state regardless of operation ordering. The sync protocol efficiently exchanges only missing updates, the awareness protocol shares cursor and presence information, and offline editing works seamlessly because yjs documents can be merged at any time without conflicts. + +Next: [Chapter 5: AI Copilot](05-ai-copilot.md) — where we explore how AFFiNE integrates AI features for writing assistance, summarization, and content generation. + +--- + +[Back to Tutorial Index](README.md) | [Previous: Chapter 3](03-block-system.md) | [Next: Chapter 5](05-ai-copilot.md) + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/affine-tutorial/05-ai-copilot.md b/tutorials/affine-tutorial/05-ai-copilot.md new file mode 100644 index 0000000..9e8df4a --- /dev/null +++ b/tutorials/affine-tutorial/05-ai-copilot.md @@ -0,0 +1,418 @@ +--- +layout: default +title: "Chapter 5: AI Copilot" +nav_order: 5 +parent: AFFiNE Tutorial +--- + +# Chapter 5: AI Copilot + +Welcome to **Chapter 5: AI Copilot**. In this part of **AFFiNE Tutorial**, you will learn how AFFiNE integrates AI capabilities directly into the workspace — enabling writing assistance, content summarization, translation, image generation, and conversational interactions with your documents. + +AFFiNE's AI copilot is not a separate product bolted onto the editor. It is deeply integrated into the block system (see [Chapter 3: Block System](03-block-system.md)) and operates on the same content model, meaning AI actions can read, create, and modify blocks directly. + +## What Problem Does This Solve? + +Modern knowledge workers need AI assistance embedded in their workflow — not in a separate chat window. AFFiNE's copilot solves this by providing AI actions that operate directly on selected blocks, entire pages, or the workspace context. The architecture supports multiple AI providers and allows self-hosted users to bring their own API keys. + +## Learning Goals + +- understand the copilot architecture and how it connects to the editor +- learn the available AI actions and how they transform content +- understand the provider abstraction for OpenAI, local models, and custom backends +- learn how AI chat sessions maintain context with document content +- trace an AI action from user trigger to content modification + +## Copilot Architecture Overview + +```mermaid +flowchart TD + subgraph "Frontend" + A[User selects text or block] + B[AI Action Menu] + C[Copilot Panel / Chat] + D[Inline AI Toolbar] + end + + subgraph "AI Module" + E[Action Router] + F[Prompt Builder] + G[Context Collector] + H[Response Handler] + end + + subgraph "Backend" + I[Copilot API Endpoint] + J[Provider Abstraction] + K[OpenAI / GPT-4] + L[Custom Model Endpoint] + M[Image Generation] + end + + A --> B + A --> D + B --> E + C --> E + D --> E + E --> F + F --> G + G --> I + I --> J + J --> K + J --> L + J --> M + K --> H + L --> H + M --> H + H --> A +``` + +## AI Actions + +AFFiNE provides several categories of AI actions that operate on document content: + +```typescript +// packages/frontend/core/src/modules/ai/actions/ + +// Writing assistance actions +const writingActions = { + 'improve-writing': { + description: 'Improve the writing quality of selected text', + input: 'selected text blocks', + output: 'improved text replacement', + }, + 'fix-spelling-grammar': { + description: 'Fix spelling and grammar errors', + input: 'selected text blocks', + output: 'corrected text replacement', + }, + 'make-shorter': { + description: 'Condense the selected text', + input: 'selected text blocks', + output: 'shortened text replacement', + }, + 'make-longer': { + description: 'Expand the selected text with more detail', + input: 'selected text blocks', + output: 'expanded text replacement', + }, + 'change-tone': { + description: 'Rewrite in a specified tone (professional, casual, etc.)', + input: 'selected text blocks + tone parameter', + output: 'rewritten text replacement', + }, +}; + +// Content generation actions +const generationActions = { + 'summarize': { + description: 'Summarize selected content or entire page', + input: 'selected blocks or full page', + output: 'summary paragraph', + }, + 'translate': { + description: 'Translate content to a target language', + input: 'selected text + target language', + output: 'translated text', + }, + 'explain': { + description: 'Explain selected content in simpler terms', + input: 'selected blocks', + output: 'explanation paragraph', + }, + 'continue-writing': { + description: 'Generate continuation of the current content', + input: 'preceding content context', + output: 'new paragraphs appended', + }, + 'generate-outline': { + description: 'Create a document outline from a topic', + input: 'topic string', + output: 'heading and list blocks', + }, +}; + +// Image actions +const imageActions = { + 'create-image': { + description: 'Generate an image from a text prompt', + input: 'text prompt', + output: 'image block', + }, + 'explain-image': { + description: 'Describe the contents of an image', + input: 'image block', + output: 'description paragraph', + }, +}; +``` + +## How AI Actions Work + +When a user triggers an AI action, the system follows a structured pipeline: + +```typescript +// Simplified AI action execution flow + +interface AIActionContext { + // The selected blocks or text that the action operates on + selectedBlocks: BlockModel[]; + selectedText?: string; + + // The full page content for context + pageContent: string; + + // Action-specific parameters + params: Record; +} + +class AIActionExecutor { + async execute( + actionId: string, + context: AIActionContext + ): Promise { + // 1. Build the prompt from the action template and context + const prompt = this.promptBuilder.build(actionId, context); + + // 2. Collect additional context (page title, surrounding blocks) + const enrichedPrompt = this.contextCollector.enrich(prompt, context); + + // 3. Send to the AI provider (streaming response) + const stream = await this.provider.chat({ + messages: enrichedPrompt.messages, + model: enrichedPrompt.model, + stream: true, + }); + + // 4. Handle the response — either replace or insert blocks + return this.responseHandler.handle(actionId, stream, context); + } +} +``` + +### Prompt Building + +```typescript +// The prompt builder constructs messages for the AI provider +// Each action has a system prompt template + +const actionPrompts: Record = { + 'improve-writing': `You are a writing assistant integrated into a document editor. +The user has selected the following text and wants you to improve its quality. +Maintain the original meaning and tone while improving clarity, flow, and grammar. +Return ONLY the improved text without explanations.`, + + 'summarize': `You are a summarization assistant integrated into a document editor. +The user wants a concise summary of the following content. +Provide a clear, well-structured summary that captures the key points.`, + + 'translate': `You are a translation assistant integrated into a document editor. +Translate the following text to {{targetLanguage}}. +Maintain the original formatting and structure. +Return ONLY the translated text.`, + + 'continue-writing': `You are a writing assistant integrated into a document editor. +Based on the existing content, continue writing in the same style and tone. +Generate 2-3 paragraphs that naturally follow from the context.`, +}; +``` + +## The Copilot Chat Panel + +Beyond inline actions, AFFiNE provides a chat panel for conversational AI interactions: + +```typescript +// packages/frontend/core/src/modules/ai/chat/ + +interface CopilotChatSession { + id: string; + messages: ChatMessage[]; + // The chat has access to the current page context + pageContext: { + docId: string; + title: string; + // Blocks can be attached as context + attachedBlocks: BlockModel[]; + }; +} + +interface ChatMessage { + role: 'user' | 'assistant' | 'system'; + content: string; + // AI responses can include actionable blocks + attachments?: { + type: 'text' | 'image' | 'code'; + content: string; + }[]; + // Users can insert AI responses directly into the document + actions?: { + insertToPage: () => void; + replaceSelection: () => void; + copyToClipboard: () => void; + }; +} +``` + +The chat panel supports: + +- **Document-aware conversations** — the AI has access to the current page content +- **Block references** — users can attach specific blocks as context for questions +- **Actionable responses** — AI responses can be inserted directly into the document as blocks +- **Session history** — conversations are persisted per workspace + +## Provider Abstraction + +AFFiNE's copilot supports multiple AI providers through a backend abstraction: + +```typescript +// packages/backend/server/src/modules/copilot/providers/ + +interface CopilotProvider { + // Text generation (streaming) + chatStream(params: ChatParams): AsyncIterable; + + // Text generation (non-streaming) + chat(params: ChatParams): Promise; + + // Image generation + generateImage(params: ImageParams): Promise; + + // Text embedding (for search and retrieval) + embed(text: string): Promise; +} + +interface ChatParams { + messages: Array<{ + role: 'system' | 'user' | 'assistant'; + content: string; + }>; + model: string; + temperature?: number; + maxTokens?: number; +} + +// Provider implementations: +// - OpenAI (GPT-4, GPT-4o, DALL-E) +// - Custom endpoint (any OpenAI-compatible API) +// - Future: local models, Anthropic, etc. +``` + +### Configuring AI Providers + +```typescript +// For self-hosted instances, configure providers via environment: + +// .env configuration +// COPILOT_OPENAI_API_KEY=sk-... +// COPILOT_OPENAI_MODEL=gpt-4o +// COPILOT_OPENAI_BASE_URL=https://api.openai.com/v1 + +// Or use a custom OpenAI-compatible endpoint (e.g., Ollama, vLLM): +// COPILOT_OPENAI_BASE_URL=http://localhost:11434/v1 +// COPILOT_OPENAI_API_KEY=ollama +// COPILOT_OPENAI_MODEL=llama3 +``` + +## How It Works Under the Hood: AI-to-Block Pipeline + +When an AI action generates content, the response must be converted back into blocks: + +```mermaid +sequenceDiagram + participant User + participant Editor as BlockSuite Editor + participant AI as AI Module + participant Backend as Copilot API + participant LLM as AI Provider + + User->>Editor: Select text, click "Improve Writing" + Editor->>AI: Extract selected blocks as text + AI->>Backend: POST /api/copilot/chat (streaming) + Backend->>LLM: Forward with system prompt + LLM-->>Backend: Stream tokens + Backend-->>AI: Stream response chunks + AI->>AI: Accumulate response in preview + Note over AI: User sees streaming preview + User->>AI: Click "Replace" + AI->>Editor: Replace selected blocks with AI content + Editor->>Editor: Create new block transaction + Note over Editor: yjs update propagates to all providers +``` + +The response handling includes a critical step: converting AI-generated text (often markdown) back into AFFiNE blocks: + +```typescript +// AI response to block conversion + +class AIResponseHandler { + async insertAsBlocks( + response: string, + targetDoc: Doc, + parentBlockId: string, + position: number + ) { + // Parse the AI response (typically markdown) + const parsedBlocks = this.markdownParser.parse(response); + + // Insert each parsed element as a new block + for (const parsed of parsedBlocks) { + switch (parsed.type) { + case 'paragraph': + targetDoc.addBlock('affine:paragraph', { + type: 'text', + text: new Y.Text(parsed.content), + }, parentBlockId, position++); + break; + + case 'heading': + targetDoc.addBlock('affine:paragraph', { + type: `h${parsed.level}`, + text: new Y.Text(parsed.content), + }, parentBlockId, position++); + break; + + case 'code': + targetDoc.addBlock('affine:code', { + language: parsed.language, + text: new Y.Text(parsed.content), + }, parentBlockId, position++); + break; + + case 'list': + targetDoc.addBlock('affine:list', { + type: parsed.ordered ? 'numbered' : 'bulleted', + text: new Y.Text(parsed.content), + }, parentBlockId, position++); + break; + } + } + } +} +``` + +## Edgeless AI Features + +In edgeless (whiteboard) mode, the copilot has additional capabilities: + +- **Mind map generation** — generate a mind map from a topic, creating connected shape blocks on the canvas +- **Presentation generation** — create slide-like frames from a document outline +- **Image generation** — create images from prompts and place them on the canvas +- **Content expansion** — select a note on the canvas and ask AI to expand it + +## Source References + +- [AFFiNE AI Module](https://github.com/toeverything/AFFiNE/tree/canary/packages/frontend/core/src/modules/ai) +- [Copilot Backend](https://github.com/toeverything/AFFiNE/tree/canary/packages/backend/server/src/modules/copilot) +- [AFFiNE AI Documentation](https://docs.affine.pro/docs/affine-ai) + +## Summary + +AFFiNE's AI copilot is deeply integrated into the block-based content model, providing inline writing actions, conversational chat with document context, and provider-agnostic backend support. AI responses are converted back into native blocks, making the output fully editable and collaborative through the same yjs CRDT system. + +Next: [Chapter 6: Database and Views](06-database-and-views.md) — where we explore how AFFiNE implements inline databases with table, kanban, and filtered views. + +--- + +[Back to Tutorial Index](README.md) | [Previous: Chapter 4](04-collaborative-editing.md) | [Next: Chapter 6](06-database-and-views.md) + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/affine-tutorial/06-database-and-views.md b/tutorials/affine-tutorial/06-database-and-views.md new file mode 100644 index 0000000..6c29b0a --- /dev/null +++ b/tutorials/affine-tutorial/06-database-and-views.md @@ -0,0 +1,444 @@ +--- +layout: default +title: "Chapter 6: Database and Views" +nav_order: 6 +parent: AFFiNE Tutorial +--- + +# Chapter 6: Database and Views + +Welcome to **Chapter 6: Database and Views**. In this part of **AFFiNE Tutorial**, you will learn how AFFiNE implements inline databases — structured data blocks that live inside documents and support multiple view types including table, kanban, and filtered views. + +Unlike traditional databases that exist separately from documents, AFFiNE's database blocks are part of the block tree (see [Chapter 3: Block System](03-block-system.md)). They are embedded directly in pages, share the same yjs CRDT layer for collaboration (see [Chapter 4: Collaborative Editing](04-collaborative-editing.md)), and can be viewed and edited in both page and edgeless modes. + +## What Problem Does This Solve? + +Knowledge workers need to mix structured and unstructured data in the same workspace. A project page might contain a description, a task database, meeting notes, and a timeline — all in one document. AFFiNE's database blocks solve this by treating structured data as first-class blocks that support multiple view types, column definitions, filtering, sorting, and grouping. + +## Learning Goals + +- understand the database block model and how it stores structured data +- learn the available view types: table, kanban, and list +- understand column types and how they define data schemas +- learn how filtering, sorting, and grouping work on database views +- trace how a database block is stored and synced via yjs + +## Database Block Model + +A database block in AFFiNE is a container block that has child blocks (rows) and a column definition schema: + +```typescript +// The database block schema +import { defineBlockSchema } from '@blocksuite/store'; + +export const DatabaseBlockSchema = defineBlockSchema({ + flavour: 'affine:database', + metadata: { + version: 1, + role: 'content', + parent: ['affine:note'], + // Each child block represents a row + children: ['affine:paragraph', 'affine:list', 'affine:image'], + }, + props: (internal) => ({ + // The title of the database + title: internal.Text('Untitled Database'), + + // Column definitions + columns: [] as Column[], + + // View configurations (table, kanban, etc.) + views: [] as DatabaseView[], + + // Cell data: maps rowId -> columnId -> cell value + cells: {} as Record>, + }), +}); +``` + +### Column Types + +Columns define the schema of a database. Each column has a type that determines what data it can hold and how it is rendered: + +```typescript +interface Column { + id: string; + name: string; + type: ColumnType; + data: ColumnData; // Type-specific configuration + width?: number; +} + +type ColumnType = + | 'title' // The primary title column (always present) + | 'rich-text' // Rich text content + | 'number' // Numeric values + | 'select' // Single-select from options + | 'multi-select' // Multi-select from options + | 'date' // Date/datetime values + | 'checkbox' // Boolean toggle + | 'link' // URL links + | 'progress' // Progress bar (0-100) + | 'image'; // Image attachment + +// Example column definitions for a project tracker: +const projectColumns: Column[] = [ + { + id: 'col-title', + name: 'Task', + type: 'title', + data: {}, + }, + { + id: 'col-status', + name: 'Status', + type: 'select', + data: { + options: [ + { id: 'opt-1', value: 'To Do', color: 'grey' }, + { id: 'opt-2', value: 'In Progress', color: 'blue' }, + { id: 'opt-3', value: 'Done', color: 'green' }, + ], + }, + }, + { + id: 'col-assignee', + name: 'Assignee', + type: 'rich-text', + data: {}, + }, + { + id: 'col-due', + name: 'Due Date', + type: 'date', + data: {}, + }, + { + id: 'col-priority', + name: 'Priority', + type: 'select', + data: { + options: [ + { id: 'opt-high', value: 'High', color: 'red' }, + { id: 'opt-med', value: 'Medium', color: 'orange' }, + { id: 'opt-low', value: 'Low', color: 'blue' }, + ], + }, + }, + { + id: 'col-progress', + name: 'Progress', + type: 'progress', + data: {}, + }, +]; +``` + +## View Types + +Each database block can have multiple views that present the same data in different layouts: + +### Table View + +```typescript +interface TableView { + id: string; + name: string; + mode: 'table'; + + // Which columns are visible and in what order + columns: Array<{ + id: string; // references Column.id + width: number; // column width in pixels + hide: boolean; // whether the column is hidden + }>; + + // Filtering rules + filter: FilterGroup; + + // Sort configuration + sort: SortRule[]; + + // Row grouping (optional) + groupBy?: GroupByConfig; +} +``` + +### Kanban View + +The kanban view groups rows into columns based on a select-type property: + +```typescript +interface KanbanView { + id: string; + name: string; + mode: 'kanban'; + + // The column used for grouping (must be select/multi-select type) + groupBy: { + columnId: string; + // Each select option becomes a kanban column + }; + + // Card configuration — what columns to show on each card + cardProperties: Array<{ + columnId: string; + visible: boolean; + }>; + + // Filtering and sorting still apply + filter: FilterGroup; + sort: SortRule[]; +} +``` + +```mermaid +flowchart LR + subgraph "Same Data" + DB[(Database Block)] + end + + subgraph "Table View" + T[Rows and columns grid] + end + + subgraph "Kanban View" + K1[To Do] + K2[In Progress] + K3[Done] + end + + subgraph "List View" + L[Compact list with metadata] + end + + DB --> T + DB --> K1 + DB --> K2 + DB --> K3 + DB --> L +``` + +## Filtering and Sorting + +Database views support composable filter rules: + +```typescript +interface FilterGroup { + type: 'group'; + op: 'and' | 'or'; + conditions: Array; +} + +interface FilterCondition { + type: 'condition'; + columnId: string; + operator: FilterOperator; + value: unknown; +} + +type FilterOperator = + | 'equals' + | 'not-equals' + | 'contains' + | 'not-contains' + | 'is-empty' + | 'is-not-empty' + | 'greater-than' + | 'less-than' + | 'before' // for dates + | 'after' // for dates + | 'is-checked' // for checkboxes + | 'is-unchecked'; // for checkboxes + +// Example: Show only high-priority tasks that are not done +const exampleFilter: FilterGroup = { + type: 'group', + op: 'and', + conditions: [ + { + type: 'condition', + columnId: 'col-priority', + operator: 'equals', + value: 'opt-high', + }, + { + type: 'condition', + columnId: 'col-status', + operator: 'not-equals', + value: 'opt-3', // "Done" + }, + ], +}; + +// Sorting +interface SortRule { + columnId: string; + direction: 'asc' | 'desc'; +} + +// Example: Sort by due date ascending, then by priority descending +const exampleSort: SortRule[] = [ + { columnId: 'col-due', direction: 'asc' }, + { columnId: 'col-priority', direction: 'desc' }, +]; +``` + +## How It Works Under the Hood: yjs Storage + +Database data is stored in yjs structures within the block model: + +```mermaid +flowchart TD + subgraph "yjs Document" + DB[Database Block - Y.Map] + DB --> COLS[columns - Y.Array of Y.Maps] + DB --> VIEWS[views - Y.Array of Y.Maps] + DB --> CELLS[cells - Y.Map of Y.Maps] + DB --> CHILDREN[children - Y.Array of block IDs] + end + + subgraph "Row Blocks" + R1[Row 1 - Paragraph Block] + R2[Row 2 - Paragraph Block] + R3[Row 3 - Paragraph Block] + end + + CHILDREN --> R1 + CHILDREN --> R2 + CHILDREN --> R3 + + subgraph "Cell Data" + CELLS --> C1["row1 -> { col-status: opt-2, col-due: 1711234567 }"] + CELLS --> C2["row2 -> { col-status: opt-1, col-due: 1711334567 }"] + end +``` + +```typescript +// How cells are read and written via yjs + +class DatabaseBlockModel { + // Read a cell value + getCell(rowId: string, columnId: string): CellValue | undefined { + const cells = this.yMap.get('cells') as Y.Map>; + const rowCells = cells.get(rowId); + return rowCells?.get(columnId); + } + + // Write a cell value (triggers yjs update -> sync) + setCell(rowId: string, columnId: string, value: CellValue) { + const cells = this.yMap.get('cells') as Y.Map>; + let rowCells = cells.get(rowId); + if (!rowCells) { + rowCells = new Y.Map(); + cells.set(rowId, rowCells); + } + rowCells.set(columnId, value); + // This yjs transaction automatically: + // 1. Persists to IndexedDB + // 2. Syncs to other clients via WebSocket + // 3. Updates any views observing this data + } + + // Add a new row + addRow(position?: number): string { + const newBlock = this.doc.addBlock( + 'affine:paragraph', + { type: 'text', text: new Y.Text('') }, + this.id, + position + ); + return newBlock; + } + + // Add a new column + addColumn(column: Column, position?: number) { + const columns = this.yMap.get('columns') as Y.Array; + if (position !== undefined) { + columns.insert(position, [column]); + } else { + columns.push([column]); + } + } +} +``` + +## Creating a Database Programmatically + +```typescript +// Create a database block with initial schema and data + +const noteBlockId = 'note:main'; + +// 1. Add the database block +const dbBlockId = doc.addBlock('affine:database', { + title: new Y.Text('Project Tasks'), + columns: projectColumns, + views: [ + { + id: 'view-table', + name: 'All Tasks', + mode: 'table', + columns: projectColumns.map(col => ({ + id: col.id, + width: 200, + hide: false, + })), + filter: { type: 'group', op: 'and', conditions: [] }, + sort: [], + }, + { + id: 'view-kanban', + name: 'Board', + mode: 'kanban', + groupBy: { columnId: 'col-status' }, + cardProperties: [ + { columnId: 'col-assignee', visible: true }, + { columnId: 'col-due', visible: true }, + { columnId: 'col-priority', visible: true }, + ], + filter: { type: 'group', op: 'and', conditions: [] }, + sort: [], + }, + ], + cells: {}, +}, noteBlockId); + +// 2. Add rows (child blocks) +const row1Id = doc.addBlock('affine:paragraph', { + type: 'text', + text: new Y.Text('Design landing page'), +}, dbBlockId); + +const row2Id = doc.addBlock('affine:paragraph', { + type: 'text', + text: new Y.Text('Implement auth flow'), +}, dbBlockId); + +// 3. Set cell values for each row +const dbModel = doc.getBlockById(dbBlockId); +dbModel.setCell(row1Id, 'col-status', 'opt-2'); // In Progress +dbModel.setCell(row1Id, 'col-priority', 'opt-high'); +dbModel.setCell(row1Id, 'col-due', Date.now()); + +dbModel.setCell(row2Id, 'col-status', 'opt-1'); // To Do +dbModel.setCell(row2Id, 'col-priority', 'opt-med'); +``` + +## Source References + +- [BlockSuite Database Block](https://github.com/toeverything/blocksuite/tree/master/packages/blocks/src/database-block) +- [AFFiNE Database Views](https://github.com/toeverything/AFFiNE/tree/canary/packages/frontend/core/src/components/database) +- [BlockSuite Documentation](https://blocksuite.io) + +## Summary + +AFFiNE's database blocks bring structured data into the document canvas as first-class blocks. Columns define the schema, views (table, kanban, list) provide different presentations of the same data, and all state is stored in yjs CRDTs for seamless collaboration. Filtering, sorting, and grouping operate on the view level, so the same data can be sliced differently across views. + +Next: [Chapter 7: Plugin System](07-plugin-system.md) — where we explore how to extend AFFiNE with custom blocks, plugins, and integrations. + +--- + +[Back to Tutorial Index](README.md) | [Previous: Chapter 5](05-ai-copilot.md) | [Next: Chapter 7](07-plugin-system.md) + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/affine-tutorial/07-plugin-system.md b/tutorials/affine-tutorial/07-plugin-system.md new file mode 100644 index 0000000..86d6323 --- /dev/null +++ b/tutorials/affine-tutorial/07-plugin-system.md @@ -0,0 +1,525 @@ +--- +layout: default +title: "Chapter 7: Plugin System" +nav_order: 7 +parent: AFFiNE Tutorial +--- + +# Chapter 7: Plugin System + +Welcome to **Chapter 7: Plugin System**. In this part of **AFFiNE Tutorial**, you will learn how to extend AFFiNE with custom blocks, plugins, and integrations using the extension architecture built on top of BlockSuite. + +AFFiNE's extensibility comes from two levels: the BlockSuite block specification system (see [Chapter 3: Block System](03-block-system.md)) for creating new block types, and the AFFiNE module system (see [Chapter 2: System Architecture](02-system-architecture.md)) for adding application-level features like sidebar panels, commands, and integrations. + +## What Problem Does This Solve? + +Every team has unique workflow needs. Out-of-the-box block types cover common cases, but organizations often need custom blocks (e.g., a CRM contact card, a CI/CD status widget, an API response viewer) or integrations (e.g., syncing with external tools). The plugin system provides a structured way to extend AFFiNE without forking the core codebase. + +## Learning Goals + +- understand the extension architecture and how plugins register with the system +- learn how to create a custom block type with schema, view, and service +- understand how to add commands, keyboard shortcuts, and toolbar items +- learn how to build sidebar panel extensions +- understand the plugin lifecycle and how plugins are loaded + +## Extension Architecture Overview + +```mermaid +flowchart TD + subgraph "Plugin Package" + A[Plugin Manifest] + B[Block Specs] + C[Commands] + D[UI Components] + E[Services] + end + + subgraph "AFFiNE Core" + F[Plugin Registry] + G[BlockSuite Schema Registry] + H[Command Registry] + I[UI Slot System] + J[Service Container] + end + + A --> F + B --> G + C --> H + D --> I + E --> J + + F --> K[Plugin Loaded and Active] +``` + +## Creating a Custom Block + +The most common extension pattern is creating a new block type. Here is a complete example of a custom "callout" block: + +### Step 1: Define the Block Schema + +```typescript +// plugins/callout-block/schema.ts + +import { defineBlockSchema } from '@blocksuite/store'; + +export type CalloutType = 'info' | 'warning' | 'error' | 'success' | 'tip'; + +export const CalloutBlockSchema = defineBlockSchema({ + flavour: 'custom:callout', + metadata: { + version: 1, + role: 'content', + // Can be placed inside note blocks + parent: ['affine:note'], + // Can contain text-based child blocks + children: ['affine:paragraph', 'affine:list', 'affine:code'], + }, + props: (internal) => ({ + // The callout type determines the icon and color + type: 'info' as CalloutType, + // Rich text content + text: internal.Text(), + // Optional custom title + title: '', + // Collapsed state + collapsed: false, + }), +}); +``` + +### Step 2: Create the Block View + +BlockSuite uses Lit web components for block rendering: + +```typescript +// plugins/callout-block/view.ts + +import { BlockElement } from '@blocksuite/block-std'; +import { html, css } from 'lit'; +import { customElement, property } from 'lit/decorators.js'; + +@customElement('custom-callout-block') +export class CalloutBlockElement extends BlockElement { + static override styles = css` + :host { + display: block; + margin: 8px 0; + } + + .callout-container { + display: flex; + border-radius: 8px; + padding: 16px; + gap: 12px; + } + + .callout-container[data-type='info'] { + background-color: #e8f4fd; + border-left: 4px solid #1890ff; + } + + .callout-container[data-type='warning'] { + background-color: #fff7e6; + border-left: 4px solid #fa8c16; + } + + .callout-container[data-type='error'] { + background-color: #fff1f0; + border-left: 4px solid #f5222d; + } + + .callout-container[data-type='success'] { + background-color: #f6ffed; + border-left: 4px solid #52c41a; + } + + .callout-container[data-type='tip'] { + background-color: #f9f0ff; + border-left: 4px solid #722ed1; + } + + .callout-icon { + font-size: 20px; + flex-shrink: 0; + } + + .callout-content { + flex: 1; + min-width: 0; + } + + .callout-title { + font-weight: 600; + margin-bottom: 4px; + } + `; + + private _getIcon(type: string): string { + const icons: Record = { + info: 'i', + warning: '!', + error: 'x', + success: '✓', + tip: '★', + }; + return icons[type] || 'i'; + } + + override render() { + const model = this.model; + const type = model.props.type; + + return html` +
+
${this._getIcon(type)}
+
+ ${model.props.title + ? html`
${model.props.title}
` + : null} + +
+ ${this.renderChildren(model)} +
+
+
+ `; + } +} +``` + +### Step 3: Create the Block Service + +```typescript +// plugins/callout-block/service.ts + +import { BlockService } from '@blocksuite/block-std'; + +export class CalloutBlockService extends BlockService { + override mounted() { + super.mounted(); + + // Register keyboard shortcuts for this block type + this.handleEvent('keyDown', (ctx) => { + const event = ctx.get('keyboardState').raw; + + // Cmd+Shift+C toggles callout type + if (event.metaKey && event.shiftKey && event.key === 'c') { + this.cycleCalloutType(); + event.preventDefault(); + } + }); + } + + private cycleCalloutType() { + const model = this.std.doc.getBlock(this.blockId)?.model; + if (!model) return; + + const types = ['info', 'warning', 'error', 'success', 'tip']; + const currentIndex = types.indexOf(model.props.type); + const nextType = types[(currentIndex + 1) % types.length]; + + this.std.doc.updateBlock(model, { type: nextType }); + } +} +``` + +### Step 4: Register the Block Spec + +```typescript +// plugins/callout-block/index.ts + +import { BlockSpec } from '@blocksuite/block-std'; +import { literal } from 'lit/static-html.js'; +import { CalloutBlockSchema } from './schema'; +import { CalloutBlockService } from './service'; + +export const CalloutBlockSpec: BlockSpec = { + schema: CalloutBlockSchema, + view: { + component: literal`custom-callout-block`, + }, + service: CalloutBlockService, +}; + +// Register in the editor configuration: +// packages/frontend/core/src/blocksuite/block-specs.ts + +import { CalloutBlockSpec } from '../plugins/callout-block'; + +export function getBlockSpecs() { + return [ + // ... built-in block specs + CalloutBlockSpec, + ]; +} +``` + +## Adding Commands and Keyboard Shortcuts + +AFFiNE uses a command system for registering actions: + +```typescript +// plugins/callout-block/commands.ts + +import { Command } from '@blocksuite/block-std'; + +// Command to insert a callout block +export const insertCalloutCommand: Command = { + id: 'insert-callout', + // Command runs in the block std context + run(ctx) { + const { std } = ctx; + const doc = std.doc; + + // Find the current selection to determine insertion point + const selection = std.selection; + const currentBlock = selection.getSelectedBlocks()[0]; + + if (!currentBlock) return; + + const parentId = currentBlock.model.parent?.id; + if (!parentId) return; + + // Insert the callout block after the current block + const index = currentBlock.model.parent.children.indexOf( + currentBlock.model + ); + + doc.addBlock( + 'custom:callout', + { type: 'info', text: new Y.Text('') }, + parentId, + index + 1 + ); + }, +}; + +// Register the command +std.command.add('insert-callout', insertCalloutCommand); +``` + +### Adding to the Slash Menu + +```typescript +// Register the callout block in the slash command menu + +const calloutSlashItem = { + name: 'Callout', + description: 'Insert a callout block', + icon: 'InfoIcon', + group: 'Content', + action: ({ doc, model }) => { + const parentId = model.parent?.id; + if (!parentId) return; + + const index = model.parent.children.indexOf(model); + doc.addBlock( + 'custom:callout', + { type: 'info', text: new Y.Text('') }, + parentId, + index + 1 + ); + }, +}; +``` + +## Building a Sidebar Panel Extension + +Beyond blocks, you can extend the AFFiNE application shell with sidebar panels: + +```typescript +// plugins/my-sidebar-panel/index.tsx + +import React, { useState, useEffect } from 'react'; + +// Sidebar panel component +export function MySidebarPanel() { + const [data, setData] = useState([]); + + useEffect(() => { + // Fetch data from external API or workspace + fetchPanelData().then(setData); + }, []); + + return ( +
+

My Custom Panel

+
    + {data.map((item, i) => ( +
  • {item.name}
  • + ))} +
+
+ ); +} + +// Register the sidebar panel +// This hooks into AFFiNE's module system + +export class MySidebarModule { + static readonly id = 'my-sidebar'; + + register(registry: ModuleRegistry) { + registry.registerSidebarPanel({ + id: 'my-sidebar-panel', + title: 'My Panel', + icon: 'CustomIcon', + component: MySidebarPanel, + position: 'right', + }); + } +} +``` + +## Plugin Lifecycle + +```mermaid +flowchart TD + A[Plugin Package Discovered] --> B[Manifest Validated] + B --> C[Dependencies Checked] + C --> D{Dependencies Met?} + D -->|No| E[Plugin Disabled - Missing Dependencies] + D -->|Yes| F[Block Specs Registered] + F --> G[Commands Registered] + G --> H[UI Components Mounted] + H --> I[Services Initialized] + I --> J[Plugin Active] + J --> K{User Disables Plugin} + K --> L[Services Disposed] + L --> M[UI Components Unmounted] + M --> N[Plugin Inactive] +``` + +## Building a Block Toolbar Extension + +Custom blocks often need toolbar actions for user interaction: + +```typescript +// plugins/callout-block/toolbar.ts + +import { WidgetElement } from '@blocksuite/block-std'; +import { html, css } from 'lit'; +import { customElement } from 'lit/decorators.js'; + +@customElement('callout-toolbar-widget') +export class CalloutToolbarWidget extends WidgetElement { + static override styles = css` + :host { + display: flex; + gap: 4px; + padding: 4px 8px; + background: white; + border-radius: 8px; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.12); + } + + button { + padding: 4px 8px; + border: none; + border-radius: 4px; + cursor: pointer; + font-size: 12px; + } + + button:hover { + background: #f0f0f0; + } + `; + + private _changeType(type: string) { + const model = this.blockElement?.model; + if (!model) return; + this.std.doc.updateBlock(model, { type }); + } + + private _toggleCollapsed() { + const model = this.blockElement?.model; + if (!model) return; + this.std.doc.updateBlock(model, { + collapsed: !model.props.collapsed, + }); + } + + override render() { + return html` + + + + + + + `; + } +} +``` + +## Testing Custom Blocks + +```typescript +// plugins/callout-block/__tests__/callout.spec.ts + +import { describe, it, expect } from 'vitest'; +import { DocCollection } from '@blocksuite/store'; +import { CalloutBlockSchema } from '../schema'; + +describe('CalloutBlock', () => { + it('should create a callout block with default props', () => { + const collection = new DocCollection({ + schema: [CalloutBlockSchema], + }); + const doc = collection.createDoc(); + + doc.load(); + const rootId = doc.addBlock('affine:page'); + const noteId = doc.addBlock('affine:note', {}, rootId); + + const calloutId = doc.addBlock( + 'custom:callout', + { type: 'info' }, + noteId + ); + + const block = doc.getBlockById(calloutId); + expect(block).toBeDefined(); + expect(block?.flavour).toBe('custom:callout'); + expect(block?.props.type).toBe('info'); + }); + + it('should enforce valid parent constraints', () => { + // Callout blocks can only be inside note blocks + // Attempting to add one elsewhere should fail or be rejected + const collection = new DocCollection({ + schema: [CalloutBlockSchema], + }); + const doc = collection.createDoc(); + doc.load(); + + const rootId = doc.addBlock('affine:page'); + + // This should fail — callout cannot be direct child of page + expect(() => { + doc.addBlock('custom:callout', { type: 'info' }, rootId); + }).toThrow(); + }); +}); +``` + +## Source References + +- [BlockSuite Block Std](https://github.com/toeverything/blocksuite/tree/master/packages/block-std) +- [BlockSuite Blocks Package](https://github.com/toeverything/blocksuite/tree/master/packages/blocks) +- [AFFiNE Plugin Examples](https://github.com/toeverything/AFFiNE/tree/canary/packages/frontend/core/src/blocksuite) +- [Lit Web Components](https://lit.dev/) + +## Summary + +AFFiNE's plugin system leverages BlockSuite's spec-driven architecture to support custom block types (schema + view + service), application-level extensions (commands, sidebar panels, toolbar items), and a clear lifecycle for loading and unloading plugins. The Lit-based rendering system for blocks and the React-based application shell provide two extension surfaces for different types of customizations. + +Next: [Chapter 8: Self-Hosting and Deployment](08-self-hosting-and-deployment.md) — where we cover Docker deployment, cloud hosting options, and storage backend configuration. + +--- + +[Back to Tutorial Index](README.md) | [Previous: Chapter 6](06-database-and-views.md) | [Next: Chapter 8](08-self-hosting-and-deployment.md) + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/affine-tutorial/08-self-hosting-and-deployment.md b/tutorials/affine-tutorial/08-self-hosting-and-deployment.md new file mode 100644 index 0000000..d331989 --- /dev/null +++ b/tutorials/affine-tutorial/08-self-hosting-and-deployment.md @@ -0,0 +1,569 @@ +--- +layout: default +title: "Chapter 8: Self-Hosting and Deployment" +nav_order: 8 +parent: AFFiNE Tutorial +--- + +# Chapter 8: Self-Hosting and Deployment + +Welcome to **Chapter 8: Self-Hosting and Deployment**. In this part of **AFFiNE Tutorial**, you will learn how to deploy AFFiNE to production environments using Docker, configure storage backends, set up authentication, and operate the platform reliably. + +Self-hosting AFFiNE gives you full data ownership, the ability to use custom AI providers (see [Chapter 5: AI Copilot](05-ai-copilot.md)), and control over the collaboration infrastructure (see [Chapter 4: Collaborative Editing](04-collaborative-editing.md)). This chapter covers everything from a minimal Docker setup to a production-grade deployment with external databases and object storage. + +## What Problem Does This Solve? + +While AFFiNE Cloud provides a managed experience, many teams and organizations need to self-host for data sovereignty, compliance, air-gapped environments, or integration with internal infrastructure. This chapter provides a clear path from local Docker deployment to hardened production operations. + +## Learning Goals + +- deploy AFFiNE using Docker and Docker Compose +- configure PostgreSQL, Redis, and object storage backends +- set up authentication and user management +- understand the server architecture and API surface +- configure backups, monitoring, and operational health checks +- scale AFFiNE for team and organizational use + +## Quick Start: Docker Compose + +The fastest way to run a self-hosted AFFiNE instance: + +```yaml +# docker-compose.yml + +version: '3.8' + +services: + affine: + image: ghcr.io/toeverything/affine-graphql:stable + container_name: affine + ports: + - '3010:3010' + - '5555:5555' + depends_on: + postgres: + condition: service_healthy + redis: + condition: service_healthy + environment: + - NODE_ENV=production + - AFFINE_SERVER_PORT=3010 + - AFFINE_SERVER_HOST=0.0.0.0 + + # Database configuration + - DATABASE_URL=postgresql://affine:affine_password@postgres:5432/affine + + # Redis configuration + - REDIS_SERVER_HOST=redis + - REDIS_SERVER_PORT=6379 + + # Server configuration + - AFFINE_SERVER_HTTPS=false + - AFFINE_SERVER_EXTERNAL_URL=http://localhost:3010 + + # Authentication + - AFFINE_AUTH_EMAIL_SENDER=noreply@example.com + + # Storage (local filesystem by default) + - AFFINE_STORAGE_PROVIDER=fs + - AFFINE_STORAGE_PATH=/data/storage + + # AI Copilot (optional) + # - COPILOT_OPENAI_API_KEY=sk-... + # - COPILOT_OPENAI_MODEL=gpt-4o + + volumes: + - affine_data:/data + restart: unless-stopped + + postgres: + image: postgres:16-alpine + container_name: affine-postgres + environment: + POSTGRES_USER: affine + POSTGRES_PASSWORD: affine_password + POSTGRES_DB: affine + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ['CMD-SHELL', 'pg_isready -U affine'] + interval: 10s + timeout: 5s + retries: 5 + restart: unless-stopped + + redis: + image: redis:7-alpine + container_name: affine-redis + volumes: + - redis_data:/data + healthcheck: + test: ['CMD', 'redis-cli', 'ping'] + interval: 10s + timeout: 5s + retries: 5 + restart: unless-stopped + +volumes: + affine_data: + postgres_data: + redis_data: +``` + +```bash +# Deploy with Docker Compose +docker compose up -d + +# Check logs +docker compose logs -f affine + +# The application will be available at http://localhost:3010 +``` + +## Server Architecture + +The AFFiNE server is a Node.js application built with NestJS that provides several services: + +```mermaid +flowchart TD + subgraph "AFFiNE Server" + A[NestJS Application] + B[GraphQL API] + C[WebSocket Server] + D[REST Endpoints] + end + + subgraph "Services" + E[Auth Service] + F[Workspace Service] + G[Doc Sync Service] + H[Copilot Service] + I[Storage Service] + J[User Service] + end + + subgraph "Storage" + K[(PostgreSQL)] + L[(Redis)] + M[(Object Storage)] + end + + A --> B + A --> C + A --> D + B --> E + B --> F + B --> J + C --> G + B --> H + B --> I + E --> K + F --> K + G --> K + G --> L + H --> K + I --> M + J --> K +``` + +### Key Server Components + +```typescript +// packages/backend/server/src/app.module.ts +// Simplified module structure + +@Module({ + imports: [ + // Core infrastructure + ConfigModule, + PrismaModule, // PostgreSQL via Prisma ORM + CacheModule, // Redis caching + StorageModule, // File/blob storage + + // Feature modules + AuthModule, // Authentication and sessions + UserModule, // User management + WorkspaceModule, // Workspace CRUD and permissions + DocModule, // Document management and sync + SyncModule, // WebSocket sync protocol + CopilotModule, // AI features + QuotaModule, // Usage limits and quotas + + // API surface + GraphQLModule, // GraphQL API (primary) + ], +}) +export class AppModule {} +``` + +## Database Configuration + +AFFiNE uses PostgreSQL for relational data and metadata: + +```typescript +// Database schema managed by Prisma +// packages/backend/server/prisma/schema.prisma + +// Key models: + +model User { + id String @id @default(uuid()) + email String @unique + name String? + avatarUrl String? + emailVerified Boolean @default(false) + createdAt DateTime @default(now()) + + workspaces WorkspaceUserPermission[] + sessions Session[] +} + +model Workspace { + id String @id @default(uuid()) + public Boolean @default(false) + createdAt DateTime @default(now()) + + permissions WorkspaceUserPermission[] + docs Doc[] +} + +model Doc { + id String @id + workspaceId String + workspace Workspace @relation(fields: [workspaceId], references: [id]) + + // yjs document binary data + blob Bytes? + + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt +} + +model WorkspaceUserPermission { + id String @id @default(uuid()) + workspaceId String + userId String + // Owner, Admin, Write, Read + permission Int + + workspace Workspace @relation(fields: [workspaceId], references: [id]) + user User @relation(fields: [userId], references: [id]) +} +``` + +### Running Database Migrations + +```bash +# Run pending migrations +docker compose exec affine npx prisma migrate deploy + +# Or within the container +docker compose exec affine sh -c "node --import ./scripts/migrate.mjs" +``` + +## Storage Backend Configuration + +AFFiNE supports multiple storage backends for blobs (images, attachments, files): + +### Local Filesystem (Default) + +```bash +# Environment variables +AFFINE_STORAGE_PROVIDER=fs +AFFINE_STORAGE_PATH=/data/storage +``` + +### AWS S3 + +```bash +# S3-compatible object storage +AFFINE_STORAGE_PROVIDER=s3 +AFFINE_S3_BUCKET=my-affine-bucket +AFFINE_S3_REGION=us-east-1 +AFFINE_S3_ACCESS_KEY_ID=AKIA... +AFFINE_S3_SECRET_ACCESS_KEY=... +AFFINE_S3_ENDPOINT=https://s3.amazonaws.com +``` + +### S3-Compatible (MinIO, R2, etc.) + +```bash +# MinIO example +AFFINE_STORAGE_PROVIDER=s3 +AFFINE_S3_BUCKET=affine +AFFINE_S3_REGION=us-east-1 +AFFINE_S3_ACCESS_KEY_ID=minioadmin +AFFINE_S3_SECRET_ACCESS_KEY=minioadmin +AFFINE_S3_ENDPOINT=http://minio:9000 +AFFINE_S3_FORCE_PATH_STYLE=true +``` + +## Authentication Configuration + +### Email/Password (Default) + +```bash +# SMTP configuration for email verification +MAILER_HOST=smtp.example.com +MAILER_PORT=465 +MAILER_USER=noreply@example.com +MAILER_PASSWORD=your_smtp_password +MAILER_SENDER=noreply@example.com +MAILER_SECURE=true +``` + +### OAuth Providers + +```bash +# Google OAuth +OAUTH_GOOGLE_CLIENT_ID=... +OAUTH_GOOGLE_CLIENT_SECRET=... + +# GitHub OAuth +OAUTH_GITHUB_CLIENT_ID=... +OAUTH_GITHUB_CLIENT_SECRET=... +``` + +## How It Works Under the Hood: Request Flow + +```mermaid +sequenceDiagram + participant Client as AFFiNE Client + participant LB as Load Balancer / Reverse Proxy + participant Server as AFFiNE Server + participant PG as PostgreSQL + participant Redis as Redis + participant S3 as Object Storage + + Note over Client,S3: Page Load + Client->>LB: GET / (static assets) + LB->>Server: Proxy request + Server->>Client: SPA bundle + + Note over Client,S3: Authentication + Client->>Server: GraphQL mutation: signIn + Server->>PG: Verify credentials + Server->>Redis: Create session + Server->>Client: Auth token + + Note over Client,S3: Document Sync + Client->>Server: WebSocket connect + Server->>PG: Load doc state + Server->>Client: SyncStep2 (yjs updates) + Client->>Server: Live updates (yjs binary) + Server->>PG: Persist updates + Server->>Redis: Pub/Sub to other instances + + Note over Client,S3: Blob Upload + Client->>Server: Upload image + Server->>S3: Store blob + Server->>PG: Record blob metadata + Server->>Client: Blob URL +``` + +## Reverse Proxy Configuration + +### Nginx + +```nginx +# /etc/nginx/conf.d/affine.conf + +server { + listen 443 ssl http2; + server_name affine.example.com; + + ssl_certificate /etc/ssl/certs/affine.pem; + ssl_certificate_key /etc/ssl/private/affine.key; + + # Maximum upload size for attachments + client_max_body_size 100M; + + location / { + proxy_pass http://localhost:3010; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # WebSocket support for real-time sync + location /socket.io/ { + proxy_pass http://localhost:3010; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_read_timeout 86400; + } +} +``` + +## Backup Strategy + +### Database Backup + +```bash +#!/bin/bash +# backup.sh — automated PostgreSQL backup + +BACKUP_DIR="/backups/affine" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +BACKUP_FILE="${BACKUP_DIR}/affine_${TIMESTAMP}.sql.gz" + +# Create backup directory +mkdir -p "$BACKUP_DIR" + +# Dump PostgreSQL database +docker compose exec -T postgres pg_dump \ + -U affine \ + -d affine \ + --format=custom \ + | gzip > "$BACKUP_FILE" + +# Retain last 30 days of backups +find "$BACKUP_DIR" -name "affine_*.sql.gz" -mtime +30 -delete + +echo "Backup created: $BACKUP_FILE" +``` + +### Blob Storage Backup + +```bash +# For filesystem storage +rsync -avz /data/storage/ /backups/affine-blobs/ + +# For S3 storage — blobs are already durable in S3 +# But you may want cross-region replication for DR +``` + +## Health Checks and Monitoring + +```bash +# Server health endpoint +curl http://localhost:3010/api/healthz + +# Response: { "status": "ok" } +``` + +```yaml +# Docker Compose healthcheck for the AFFiNE service +services: + affine: + # ... other config ... + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:3010/api/healthz'] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s +``` + +### Key Metrics to Monitor + +```typescript +// Important operational metrics: +const monitoringChecklist = { + // Infrastructure + 'cpu_usage': 'Server CPU utilization', + 'memory_usage': 'Server memory utilization', + 'disk_usage': 'Storage volume capacity', + + // Application + 'websocket_connections': 'Active sync connections', + 'api_response_time': 'GraphQL query latency (p50, p95, p99)', + 'sync_latency': 'Time for yjs updates to propagate', + + // Database + 'pg_connections': 'Active PostgreSQL connections', + 'pg_query_time': 'Slow query detection', + 'redis_memory': 'Redis memory usage', + + // Storage + 'blob_storage_size': 'Total blob storage consumption', + 'upload_errors': 'Failed blob uploads', +}; +``` + +## Scaling Considerations + +### Horizontal Scaling + +```mermaid +flowchart TD + LB[Load Balancer] --> S1[AFFiNE Server 1] + LB --> S2[AFFiNE Server 2] + LB --> S3[AFFiNE Server 3] + + S1 --> PG[(PostgreSQL)] + S2 --> PG + S3 --> PG + + S1 --> R[(Redis - Pub/Sub)] + S2 --> R + S3 --> R + + S1 --> OS[(Object Storage)] + S2 --> OS + S3 --> OS +``` + +When running multiple server instances: + +- **WebSocket affinity** — use sticky sessions or Redis Pub/Sub to ensure real-time sync works across instances +- **Redis Pub/Sub** — used to broadcast yjs updates between server instances so all connected clients receive updates +- **Shared storage** — all instances must use the same PostgreSQL database and object storage backend +- **Session management** — Redis-backed sessions work across instances + +### Environment Variables for Scaling + +```bash +# Enable Redis-based pub/sub for multi-instance sync +AFFINE_SYNC_PUBSUB=redis + +# Connection pool sizing +DATABASE_POOL_SIZE=20 + +# WebSocket configuration +AFFINE_WS_MAX_CONNECTIONS=10000 +AFFINE_WS_PING_INTERVAL=30000 +``` + +## Upgrade Procedure + +```bash +# 1. Pull the latest image +docker compose pull affine + +# 2. Stop the current instance +docker compose stop affine + +# 3. Run database migrations (if needed) +docker compose run --rm affine npx prisma migrate deploy + +# 4. Start the new version +docker compose up -d affine + +# 5. Verify health +curl http://localhost:3010/api/healthz + +# 6. Check logs for errors +docker compose logs --tail=100 affine +``` + +## Source References + +- [AFFiNE Self-Host Docs](https://docs.affine.pro/docs/self-host-affine) +- [AFFiNE Docker Images](https://github.com/toeverything/AFFiNE/pkgs/container/affine-graphql) +- [Server Package](https://github.com/toeverything/AFFiNE/tree/canary/packages/backend/server) +- [Docker Configuration](https://github.com/toeverything/AFFiNE/tree/canary/docker) + +## Summary + +Self-hosting AFFiNE involves deploying the server with Docker Compose, configuring PostgreSQL for metadata, Redis for caching and pub/sub, and an object storage backend for blobs. The server provides GraphQL APIs, WebSocket sync, and authentication — all behind a reverse proxy for production use. Backup, monitoring, and scaling follow standard patterns for Node.js applications with stateful WebSocket connections. + +--- + +[Back to Tutorial Index](README.md) | [Previous: Chapter 7](07-plugin-system.md) + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/affine-tutorial/README.md b/tutorials/affine-tutorial/README.md new file mode 100644 index 0000000..94952d8 --- /dev/null +++ b/tutorials/affine-tutorial/README.md @@ -0,0 +1,116 @@ +--- +layout: default +title: "AFFiNE Tutorial" +nav_order: 192 +has_children: true +format_version: v2 +--- + +# AFFiNE Tutorial: Open-Source AI Workspace with Docs, Whiteboards, and Databases + +> Learn how to use `toeverything/AFFiNE` to build, extend, and self-host a modern knowledge workspace combining documents, whiteboards, and databases — powered by BlockSuite, CRDT-based collaboration, and integrated AI copilot features. + +[![GitHub Repo](https://img.shields.io/badge/GitHub-toeverything%2FAFFiNE-black?logo=github)](https://github.com/toeverything/AFFiNE) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/toeverything/AFFiNE/blob/canary/LICENSE) +[![Latest Release](https://img.shields.io/github/v/release/toeverything/AFFiNE)](https://github.com/toeverything/AFFiNE/releases) + +## Why This Track Matters + +AFFiNE is one of the most ambitious open-source alternatives to Notion and Miro, combining document editing, whiteboard canvases, and structured databases into a single workspace. With approximately 66,000 GitHub stars, it represents a significant shift in how developers think about collaborative knowledge tools. + +This track is particularly relevant for developers who: + +- want to understand how modern block-based editors are architected with BlockSuite +- need to learn CRDT-based real-time collaboration patterns using yjs and OctoBase +- are building AI-augmented productivity tools and want to study copilot integration patterns +- plan to self-host a Notion-like workspace with full data ownership and extensibility + +This track focuses on: + +- understanding the BlockSuite framework and its block-tree content model +- mastering the dual page/edgeless editing modes and their underlying data structures +- learning CRDT synchronization with yjs for conflict-free real-time collaboration +- integrating AI copilot features into document and whiteboard workflows +- building custom blocks and plugins to extend the workspace +- deploying and operating self-hosted AFFiNE instances in production + +## Current Snapshot (auto-updated) + +- repository: [`toeverything/AFFiNE`](https://github.com/toeverything/AFFiNE) +- stars: about **66k** +- latest release: check [releases page](https://github.com/toeverything/AFFiNE/releases) + +## Mental Model + +```mermaid +flowchart LR + A[Knowledge need] --> B[Workspace creation] + B --> C[Block-based content authoring] + C --> D[Page and edgeless modes] + D --> E[CRDT sync and collaboration] + E --> F[AI copilot augmentation] + F --> G[Database views and organization] + G --> H[Plugin extension and deployment] +``` + +## Chapter Guide + +| Chapter | Key Question | Outcome | +|:--------|:-------------|:--------| +| [01 - Getting Started](01-getting-started.md) | How do I set up AFFiNE locally and create my first workspace? | Working dev environment and first workspace | +| [02 - System Architecture](02-system-architecture.md) | How does BlockSuite, OctoBase, and yjs fit together? | Clear mental model of the full stack | +| [03 - Block System](03-block-system.md) | How do blocks, pages, and edgeless canvases work? | Understanding of the content model | +| [04 - Collaborative Editing](04-collaborative-editing.md) | How does real-time CRDT sync and conflict resolution work? | Ability to reason about collaboration | +| [05 - AI Copilot](05-ai-copilot.md) | How are AI features integrated into the workspace? | Understanding of copilot architecture | +| [06 - Database and Views](06-database-and-views.md) | How do database blocks, kanban, and table views work? | Ability to build structured data workflows | +| [07 - Plugin System](07-plugin-system.md) | How do I extend AFFiNE with custom blocks and plugins? | Plugin development readiness | +| [08 - Self-Hosting and Deployment](08-self-hosting-and-deployment.md) | How do I deploy and operate AFFiNE in production? | Production deployment baseline | + +## What You Will Learn + +- how AFFiNE's BlockSuite framework organizes content into a composable block tree +- how page mode and edgeless (whiteboard) mode share the same underlying data model +- how yjs CRDTs enable real-time conflict-free collaboration across clients +- how the AI copilot integrates with blocks for writing assistance, summarization, and generation +- how database blocks support table, kanban, and filtered views within documents +- how to build custom blocks and plugins using AFFiNE's extension architecture +- how to self-host AFFiNE with Docker and configure storage backends + +## Source References + +- [AFFiNE Repository](https://github.com/toeverything/AFFiNE) +- [README](https://github.com/toeverything/AFFiNE/blob/canary/README.md) +- [BlockSuite Repository](https://github.com/toeverything/blocksuite) +- [AFFiNE Documentation](https://docs.affine.pro) +- [Self-Hosting Guide](https://docs.affine.pro/docs/self-host-affine) + +## Related Tutorials + +- [LobeChat Tutorial](../lobechat-tutorial/) — AI chat framework with plugin architecture and multi-model support +- [Dify Tutorial](../dify-tutorial/) — LLM app development platform with visual workflow orchestration +- [SiYuan Tutorial](../siyuan-tutorial/) — Local-first personal knowledge management system + +--- + +Start with [Chapter 1: Getting Started](01-getting-started.md). + +## Navigation & Backlinks + +- [Start Here: Chapter 1: Getting Started](01-getting-started.md) +- [Back to Main Catalog](../../README.md#-tutorial-catalog) +- [Browse A-Z Tutorial Directory](../../discoverability/tutorial-directory.md) +- [Search by Intent](../../discoverability/query-hub.md) +- [Explore Category Hubs](../../README.md#category-hubs) + +## Full Chapter Map + +1. [Chapter 1: Getting Started](01-getting-started.md) +2. [Chapter 2: System Architecture](02-system-architecture.md) +3. [Chapter 3: Block System](03-block-system.md) +4. [Chapter 4: Collaborative Editing](04-collaborative-editing.md) +5. [Chapter 5: AI Copilot](05-ai-copilot.md) +6. [Chapter 6: Database and Views](06-database-and-views.md) +7. [Chapter 7: Plugin System](07-plugin-system.md) +8. [Chapter 8: Self-Hosting and Deployment](08-self-hosting-and-deployment.md) + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/metagpt-tutorial/01-getting-started.md b/tutorials/metagpt-tutorial/01-getting-started.md new file mode 100644 index 0000000..1bf3383 --- /dev/null +++ b/tutorials/metagpt-tutorial/01-getting-started.md @@ -0,0 +1,279 @@ +--- +layout: default +title: "Chapter 1: Getting Started" +parent: "MetaGPT Tutorial" +nav_order: 1 +--- + +# Chapter 1: Getting Started with MetaGPT + +Welcome to MetaGPT! In this chapter you will install the framework, configure it for your LLM provider, and run your first multi-agent software generation from a single requirement. By the end, you will have a working MetaGPT installation and a clear understanding of the development loop. + +## What Problem Does This Solve? + +Building software involves multiple roles -- product managers gather requirements, architects design systems, engineers write code, and QA testers validate correctness. Coordinating all of this manually is slow and expensive. MetaGPT automates this entire pipeline by assigning each role to a specialized AI agent that collaborates through structured workflows, turning a one-line idea into working code. + +## Installing MetaGPT + +### Basic Installation + +```bash +# Create and activate a virtual environment +python -m venv metagpt-env +source metagpt-env/bin/activate # On Windows: metagpt-env\Scripts\activate + +# Install MetaGPT +pip install metagpt + +# Or install from source for latest features +git clone https://github.com/geekan/MetaGPT.git +cd MetaGPT +pip install -e . +``` + +### Verifying Installation + +```bash +# Check that MetaGPT is installed correctly +python -c "import metagpt; print(metagpt.__version__)" +``` + +## Configuration + +MetaGPT uses a YAML configuration file to manage LLM providers, API keys, and runtime settings. + +### Setting Up Your Configuration + +```yaml +# ~/.metagpt/config2.yaml +llm: + api_type: "openai" + model: "gpt-4-turbo" + base_url: "https://api.openai.com/v1" + api_key: "sk-YOUR_API_KEY_HERE" + +# Optional: cost controls +max_budget: 10.0 # Maximum spend in USD per run +``` + +### Environment Variables (Alternative) + +```bash +# You can also configure via environment variables +export OPENAI_API_KEY="sk-YOUR_API_KEY_HERE" +export METAGPT_MODEL="gpt-4-turbo" +``` + +### Using Other LLM Providers + +```yaml +# Example: Using Anthropic Claude +llm: + api_type: "claude" + model: "claude-3-opus-20240229" + api_key: "sk-ant-YOUR_KEY_HERE" + +# Example: Using a local model via Ollama +llm: + api_type: "ollama" + model: "llama3:70b" + base_url: "http://localhost:11434/api" +``` + +## Your First Multi-Agent Run + +The simplest way to use MetaGPT is to give it a product requirement and let the full team of agents handle the rest. + +### From the Command Line + +```bash +# Generate a complete project from a requirement +metagpt "Create a CLI tool that converts CSV files to JSON format with data validation" +``` + +### From Python Code + +```python +import asyncio +from metagpt.software_company import generate_repo, ProjectRepo + +async def main(): + """Run MetaGPT to generate a complete project.""" + repo: ProjectRepo = await generate_repo( + idea="Create a CLI tool that converts CSV files to JSON format with data validation" + ) + print(f"Project generated at: {repo.workdir}") + +asyncio.run(main()) +``` + +### Understanding the Output + +When you run MetaGPT, it creates a structured project directory: + +``` +workspace/ + csv_to_json/ + docs/ + prd.md # Product Requirements Document + system_design.md # Architecture design + api_spec.md # API specifications + csv_to_json/ + __init__.py + main.py # Entry point + converter.py # Core conversion logic + validator.py # Data validation + tests/ + test_converter.py # Unit tests + test_validator.py # Validation tests + requirements.txt # Dependencies +``` + +## Understanding the Agent Pipeline + +When you submit a requirement, MetaGPT orchestrates a sequence of specialized agents: + +```mermaid +sequenceDiagram + participant User + participant PM as ProductManager + participant Arch as Architect + participant Eng as Engineer + participant QA as QATester + + User->>PM: "Create a CSV to JSON CLI tool" + PM->>PM: Competitive analysis + PM->>Arch: PRD document + Arch->>Arch: System design + Arch->>Eng: Tech spec + API design + Eng->>Eng: Write code files + Eng->>QA: Code implementation + QA->>QA: Generate test cases + QA-->>Eng: Bug reports (if any) + Eng-->>User: Final project output +``` + +Each agent reads messages published by upstream agents, performs its specialized work, and publishes structured outputs for downstream agents. This mirrors the publish-subscribe pattern used in real software teams. + +## A Simpler Example: Single Agent + +You do not need to run the full pipeline every time. MetaGPT also lets you use individual roles: + +```python +import asyncio +from metagpt.roles import Architect + +async def main(): + """Use just the Architect agent to design a system.""" + architect = Architect() + + # Give the architect a requirement to design + result = await architect.run( + "Design a microservice architecture for a real-time chat application " + "that supports 10,000 concurrent users." + ) + print(result) + +asyncio.run(main()) +``` + +## Configuration Deep Dive + +### Key Configuration Options + +```yaml +# ~/.metagpt/config2.yaml - Full example +llm: + api_type: "openai" + model: "gpt-4-turbo" + api_key: "sk-YOUR_KEY" + base_url: "https://api.openai.com/v1" + +# Project workspace location +workspace: + path: "./workspace" + +# Cost management +max_budget: 10.0 + +# Logging +log_level: "INFO" + +# Code execution settings +enable_code_execution: true +``` + +### Verifying Your Setup + +```python +import asyncio +from metagpt.config2 import Config + +async def verify_setup(): + """Verify MetaGPT configuration is valid.""" + config = Config.default() + print(f"LLM provider: {config.llm.api_type}") + print(f"Model: {config.llm.model}") + print(f"Max budget: {config.max_budget}") + +asyncio.run(verify_setup()) +``` + +## How It Works Under the Hood + +```mermaid +flowchart TD + A["User Requirement"] --> B["Environment Initialization"] + B --> C["Role Registry"] + C --> D["Message Bus Setup"] + D --> E["Run Loop Start"] + + E --> F{"Any role has
pending messages?"} + F -->|Yes| G["Role reads messages"] + G --> H["Role selects Action"] + H --> I["Action calls LLM"] + I --> J["Action produces output"] + J --> K["Output published to Message Bus"] + K --> F + + F -->|No| L["All roles idle → Run Complete"] + L --> M["Output saved to workspace"] + + classDef input fill:#e1f5fe,stroke:#01579b + classDef process fill:#f3e5f5,stroke:#4a148c + classDef decision fill:#fff3e0,stroke:#ef6c00 + classDef output fill:#e8f5e8,stroke:#1b5e20 + + class A input + class B,C,D,E,G,H,I,J,K process + class F decision + class L,M output +``` + +The core engine works as an event-driven loop: + +1. **Environment Initialization** -- loads configuration, sets up the LLM connection, and initializes the workspace. +2. **Role Registry** -- creates instances of each role (ProductManager, Architect, etc.) and registers them in a shared environment. +3. **Message Bus** -- all roles communicate through a centralized message bus. Each role watches for messages tagged with relevant action types. +4. **Run Loop** -- the engine repeatedly checks whether any role has pending messages to process. When a role receives a message, it selects the appropriate action, calls the LLM, and publishes the result. +5. **Completion** -- when no role has pending work, the loop terminates and results are saved. + +## Common Issues and Troubleshooting + +| Issue | Solution | +|-------|----------| +| `API key not found` | Check `~/.metagpt/config2.yaml` or environment variables | +| `Model not available` | Verify your API key has access to the specified model | +| `Rate limit exceeded` | Add retry configuration or reduce parallel agent count | +| `Workspace permission error` | Ensure write permissions on the workspace directory | +| `Import errors` | Verify installation with `pip show metagpt` | + +## Summary + +In this chapter you installed MetaGPT, configured it for your LLM provider, and ran your first multi-agent generation. You saw how a single requirement flows through ProductManager, Architect, Engineer, and QA agents to produce a complete project. + +**Next:** [Chapter 2: Agent Roles](02-agent-roles.md) -- dive deep into each built-in role and learn how to customize agent behavior. + +--- + +[Back to Tutorial Index](README.md) | [Next: Chapter 2: Agent Roles](02-agent-roles.md) diff --git a/tutorials/metagpt-tutorial/02-agent-roles.md b/tutorials/metagpt-tutorial/02-agent-roles.md new file mode 100644 index 0000000..ed77154 --- /dev/null +++ b/tutorials/metagpt-tutorial/02-agent-roles.md @@ -0,0 +1,377 @@ +--- +layout: default +title: "Chapter 2: Agent Roles" +parent: "MetaGPT Tutorial" +nav_order: 2 +--- + +# Chapter 2: Agent Roles -- ProductManager, Architect, Engineer, and QA + +In [Chapter 1](01-getting-started.md) you ran a full multi-agent pipeline. Now it is time to understand exactly what each role does, how roles are defined internally, and how to create your own custom roles. + +## What Problem Does This Solve? + +In a real software team, unstructured collaboration leads to chaos -- requirements drift, designs contradict implementation, and bugs slip through untested code. MetaGPT solves this by encoding each team member's responsibilities, inputs, outputs, and constraints into a formal `Role` class. Every agent knows precisely what it should do, what it should watch for, and what it should produce. + +## The Built-In Role Hierarchy + +MetaGPT ships with several pre-built roles that mirror a real software development team: + +```mermaid +flowchart LR + R["Role (Base Class)"] --> PM["ProductManager"] + R --> A["Architect"] + R --> E["Engineer"] + R --> QA["QATester"] + R --> PM2["ProjectManager"] + R --> RS["Researcher"] + + PM -->|produces| PRD["PRD Document"] + A -->|produces| SD["System Design"] + E -->|produces| CODE["Source Code"] + QA -->|produces| TC["Test Cases"] + + classDef base fill:#e1f5fe,stroke:#01579b + classDef role fill:#f3e5f5,stroke:#4a148c + classDef artifact fill:#fff3e0,stroke:#ef6c00 + + class R base + class PM,A,E,QA,PM2,RS role + class PRD,SD,CODE,TC artifact +``` + +## The Role Base Class + +Every agent in MetaGPT extends the `Role` base class. Understanding this class is essential for customization. + +```python +from metagpt.roles import Role +from metagpt.actions import Action +from metagpt.schema import Message + +class Role: + """Base class for all MetaGPT roles.""" + + name: str = "" # Human-readable name + profile: str = "" # Role description / system prompt + goal: str = "" # What this role aims to achieve + constraints: str = "" # Behavioral constraints + actions: list[Action] = [] # Actions this role can perform + + async def _observe(self) -> list[Message]: + """Read new messages from the environment.""" + ... + + async def _think(self) -> Action: + """Decide which action to take next.""" + ... + + async def _act(self) -> Message: + """Execute the chosen action and return a result.""" + ... + + async def run(self, message: str) -> Message: + """Full observe-think-act cycle.""" + ... +``` + +The three-phase cycle -- **observe**, **think**, **act** -- is the heartbeat of every role. + +## ProductManager + +The ProductManager is the entry point of the pipeline. It takes a raw user requirement and produces a structured Product Requirements Document (PRD). + +```python +from metagpt.roles.product_manager import ProductManager + +# The ProductManager's internal configuration +pm = ProductManager() +print(pm.profile) +# "You are a Product Manager focused on creating successful products..." + +print(pm.goal) +# "Efficiently create a successful product that meets user needs" + +print(pm.constraints) +# "Use the same language as the user requirement" +``` + +### What the ProductManager Produces + +Given a requirement like "Build a task management app", the ProductManager generates: + +- **Competitive analysis** of existing products +- **User stories** and acceptance criteria +- **Feature prioritization** using a structured format +- **PRD document** in Markdown with sections for goals, scope, and requirements + +```python +import asyncio +from metagpt.roles.product_manager import ProductManager +from metagpt.schema import Message + +async def run_product_manager(): + pm = ProductManager() + result = await pm.run(Message(content="Build a real-time collaborative whiteboard app")) + print(result.content) # Structured PRD document + +asyncio.run(run_product_manager()) +``` + +## Architect + +The Architect receives the PRD and produces a technical system design, including data models, API specifications, and technology choices. + +```python +from metagpt.roles.architect import Architect + +architect = Architect() +print(architect.profile) +# "You are an Architect designing robust, scalable systems..." +``` + +### Architect Outputs + +- **System design document** with component diagrams +- **API specification** (often OpenAPI-compatible) +- **Data model definitions** including relationships +- **Technology stack recommendations** + +```python +import asyncio +from metagpt.roles.architect import Architect +from metagpt.schema import Message + +async def run_architect(): + architect = Architect() + # In practice, the architect reads the PRD from the message bus + prd_content = """ + ## Product Requirements + - Real-time collaborative whiteboard + - Support 100 concurrent users + - Drawing tools: pen, shapes, text + - Export to PNG/SVG + """ + result = await architect.run(Message(content=prd_content)) + print(result.content) # System design + API specs + +asyncio.run(run_architect()) +``` + +## Engineer + +The Engineer takes the system design and API specs and writes actual code files. This is the most LLM-intensive role. + +```python +from metagpt.roles.engineer import Engineer + +engineer = Engineer() +print(engineer.goal) +# "Write elegant, readable, extensible, efficient code" + +print(engineer.constraints) +# "The code should conform to standards like PEP8, be modular, " +# "easy to read and maintain..." +``` + +### Engineer Behavior + +The Engineer role is unique because it: + +1. Parses the system design to identify required files +2. Generates code for each file in dependency order +3. Handles imports and cross-file references +4. Can review and fix its own code based on QA feedback + +```python +import asyncio +from metagpt.roles.engineer import Engineer +from metagpt.schema import Message + +async def run_engineer(): + engineer = Engineer() + tech_spec = """ + ## System Design + Files to implement: + - main.py: FastAPI entry point + - models.py: Pydantic data models + - services.py: Business logic layer + - database.py: SQLite connection handling + """ + result = await engineer.run(Message(content=tech_spec)) + print(result.content) + +asyncio.run(run_engineer()) +``` + +## QA Tester + +The QA agent receives the code and generates test cases, runs them, and reports bugs back to the Engineer. + +```python +from metagpt.roles.qa_engineer import QaEngineer + +qa = QaEngineer() +print(qa.goal) +# "Write comprehensive and correct test cases to ensure code quality" +``` + +### QA Feedback Loop + +```python +import asyncio +from metagpt.roles.qa_engineer import QaEngineer +from metagpt.schema import Message + +async def run_qa(): + qa = QaEngineer() + code_content = """ + ## Implementation + File: converter.py + ```python + def convert_csv_to_json(csv_path: str) -> dict: + import csv, json + with open(csv_path) as f: + reader = csv.DictReader(f) + return json.dumps(list(reader)) + ``` + """ + result = await qa.run(Message(content=code_content)) + print(result.content) # Test cases + bug reports + +asyncio.run(run_qa()) +``` + +## Creating a Custom Role + +The real power of MetaGPT is building your own roles. Here is a complete example of a custom `TechnicalWriter` role: + +```python +from metagpt.roles import Role +from metagpt.actions import Action +from metagpt.schema import Message + +class WriteDocumentation(Action): + """Action that generates technical documentation.""" + name: str = "WriteDocumentation" + + async def run(self, context: str) -> str: + prompt = f"""Based on the following code and design documents, + write comprehensive technical documentation including: + - Overview and architecture + - API reference + - Usage examples + - Configuration guide + + Context: + {context} + """ + return await self._aask(prompt) + + +class TechnicalWriter(Role): + """A custom role that generates documentation from code.""" + name: str = "TechWriter" + profile: str = "You are a Technical Writer who creates clear, comprehensive documentation." + goal: str = "Produce developer-friendly documentation for the project." + constraints: str = "Write in clear, concise English. Include code examples." + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([WriteDocumentation]) + # Watch for messages from Engineer (code output) + self._watch([Engineer]) +``` + +### Using the Custom Role + +```python +import asyncio + +async def main(): + writer = TechnicalWriter() + result = await writer.run(Message( + content="## Code\ndef add(a, b): return a + b\ndef multiply(a, b): return a * b" + )) + print(result.content) + +asyncio.run(main()) +``` + +## How It Works Under the Hood + +```mermaid +flowchart TD + A["Message arrives on bus"] --> B["Role._observe()"] + B --> C{"Message matches
watched actions?"} + C -->|No| D["Skip"] + C -->|Yes| E["Role._think()"] + E --> F{"Multiple actions
available?"} + F -->|One| G["Select that action"] + F -->|Many| H["LLM decides which action"] + G --> I["Role._act()"] + H --> I + I --> J["Action.run() calls LLM"] + J --> K["Format output as Message"] + K --> L["Publish to Message Bus"] + + classDef input fill:#e1f5fe,stroke:#01579b + classDef process fill:#f3e5f5,stroke:#4a148c + classDef decision fill:#fff3e0,stroke:#ef6c00 + classDef output fill:#e8f5e8,stroke:#1b5e20 + + class A input + class B,E,G,H,I,J,K process + class C,F decision + class D,L output +``` + +Key implementation details: + +1. **Message Filtering** -- each role has a `_watch` list that determines which upstream actions it responds to. The ProductManager watches for user requirements; the Architect watches for PRD outputs. +2. **Action Selection** -- if a role has multiple actions, the `_think` step uses the LLM to decide which action to invoke based on context. +3. **Stateful Execution** -- roles maintain internal state across the observe-think-act cycle, allowing them to remember previous interactions and build on earlier outputs. +4. **Publish-Subscribe** -- all outputs are published as `Message` objects to a shared environment. Downstream roles receive only the messages they are subscribed to. + +## Role Configuration Patterns + +### Setting Role Constraints + +```python +class StrictEngineer(Role): + """An engineer with additional constraints.""" + name: str = "StrictEngineer" + profile: str = "Senior Software Engineer" + goal: str = "Write production-quality code" + constraints: str = ( + "Always include type hints. " + "Always add docstrings. " + "Never use global variables. " + "Follow SOLID principles." + ) +``` + +### Roles That Watch Multiple Sources + +```python +class CodeReviewer(Role): + """Reviews code from multiple engineers.""" + name: str = "CodeReviewer" + profile: str = "Senior Code Reviewer" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([ReviewCode]) + # Watch for output from both Engineer and QA + self._watch([Engineer, QaEngineer]) +``` + +## Summary + +Every MetaGPT agent is a `Role` with a defined profile, goal, constraints, and set of actions. The built-in roles -- ProductManager, Architect, Engineer, and QA -- form a complete software development pipeline. You can extend this by creating custom roles with their own actions and watch patterns. + +**Next:** [Chapter 3: SOPs and Workflows](03-sop-and-workflows.md) -- learn how Standardized Operating Procedures govern role collaboration. + +--- + +[Previous: Chapter 1: Getting Started](01-getting-started.md) | [Back to Tutorial Index](README.md) | [Next: Chapter 3: SOPs and Workflows](03-sop-and-workflows.md) diff --git a/tutorials/metagpt-tutorial/03-sop-and-workflows.md b/tutorials/metagpt-tutorial/03-sop-and-workflows.md new file mode 100644 index 0000000..d35bafd --- /dev/null +++ b/tutorials/metagpt-tutorial/03-sop-and-workflows.md @@ -0,0 +1,402 @@ +--- +layout: default +title: "Chapter 3: SOPs and Workflows" +parent: "MetaGPT Tutorial" +nav_order: 3 +--- + +# Chapter 3: Standardized Operating Procedures and Workflows + +In [Chapter 2](02-agent-roles.md) you learned about individual roles. This chapter explains how MetaGPT coordinates those roles through Standardized Operating Procedures (SOPs) -- the structured workflows that turn a collection of agents into a functioning team. + +## What Problem Does This Solve? + +When multiple LLM agents collaborate without structure, the result is unpredictable: agents repeat work, contradict each other, or produce outputs that do not connect. SOPs solve this by defining a deterministic sequence of handoffs, ensuring each agent receives the right input, produces the right output, and passes it to the right recipient. This is what makes MetaGPT fundamentally different from "chatbot-style" multi-agent systems. + +## The SOP Concept + +An SOP in MetaGPT is an implicit contract between roles. It defines: + +1. **Who does what** -- which role is responsible for each phase +2. **In what order** -- the sequence of execution +3. **With what inputs** -- what each role reads from the message bus +4. **Producing what outputs** -- the structured artifact each role generates + +```mermaid +flowchart LR + subgraph SOP["Software Development SOP"] + direction LR + P1["1. Requirements
(ProductManager)"] --> P2["2. Design
(Architect)"] + P2 --> P3["3. Implementation
(Engineer)"] + P3 --> P4["4. Testing
(QATester)"] + P4 -->|bug found| P3 + end + + U["User Requirement"] --> P1 + P4 --> O["Deliverable"] + + classDef phase fill:#f3e5f5,stroke:#4a148c + classDef io fill:#e1f5fe,stroke:#01579b + + class P1,P2,P3,P4 phase + class U,O io +``` + +## The Default Software Development SOP + +MetaGPT's flagship SOP is the software development workflow. Here is how it works step by step: + +### Phase 1: Requirements Analysis (ProductManager) + +```python +from metagpt.actions.write_prd import WritePRD + +# The ProductManager executes WritePRD, which: +# 1. Analyzes the user requirement +# 2. Performs competitive analysis +# 3. Identifies user personas +# 4. Defines user stories with acceptance criteria +# 5. Produces a structured PRD document +``` + +The PRD output follows a strict template: + +```markdown +## Product Requirements Document + +### Goals +- Primary goal description + +### User Stories +1. As a [user type], I want to [action] so that [benefit] + +### Competitive Analysis +| Product | Pros | Cons | + +### Requirement Analysis +- Functional requirements +- Non-functional requirements + +### UI/UX Design +- Interface guidelines +``` + +### Phase 2: System Design (Architect) + +```python +from metagpt.actions.design_api import WriteDesign + +# The Architect executes WriteDesign, which: +# 1. Reads the PRD from the message bus +# 2. Defines the system architecture +# 3. Specifies data models +# 4. Designs API endpoints +# 5. Chooses technology stack +``` + +### Phase 3: Implementation (Engineer) + +```python +from metagpt.actions.write_code import WriteCode + +# The Engineer executes WriteCode, which: +# 1. Reads system design and API spec +# 2. Identifies all files to create +# 3. Generates code in dependency order +# 4. Ensures cross-file consistency +``` + +### Phase 4: Testing (QATester) + +```python +from metagpt.actions.write_test import WriteTest + +# The QATester executes WriteTest, which: +# 1. Reads the code files +# 2. Generates unit tests for each module +# 3. Generates integration tests +# 4. Reports any issues found +``` + +## Implementing a Custom SOP + +You can define your own SOPs by creating roles with specific watch patterns and action sequences. + +### Example: Research Report SOP + +```python +import asyncio +from metagpt.roles import Role +from metagpt.actions import Action +from metagpt.schema import Message +from metagpt.environment import Environment + + +class GatherSources(Action): + """Gather and summarize research sources.""" + name: str = "GatherSources" + + async def run(self, context: str) -> str: + prompt = f"""You are a research assistant. Given the topic below, + identify 5-10 key sources, papers, or references and provide + a brief summary of each. + + Topic: {context} + """ + return await self._aask(prompt) + + +class AnalyzeFindings(Action): + """Analyze research findings and identify patterns.""" + name: str = "AnalyzeFindings" + + async def run(self, context: str) -> str: + prompt = f"""You are a research analyst. Based on the gathered sources + below, identify key themes, contradictions, and insights. + + Sources: + {context} + """ + return await self._aask(prompt) + + +class WriteReport(Action): + """Write a structured research report.""" + name: str = "WriteReport" + + async def run(self, context: str) -> str: + prompt = f"""You are a technical writer. Based on the analysis below, + write a comprehensive research report with: + - Executive Summary + - Key Findings + - Detailed Analysis + - Conclusions and Recommendations + + Analysis: + {context} + """ + return await self._aask(prompt) + + +class Researcher(Role): + """Gathers sources on a topic.""" + name: str = "Researcher" + profile: str = "Research Assistant" + goal: str = "Find and summarize relevant sources" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([GatherSources]) + + +class Analyst(Role): + """Analyzes gathered research.""" + name: str = "Analyst" + profile: str = "Research Analyst" + goal: str = "Identify patterns and insights in research data" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([AnalyzeFindings]) + self._watch([Researcher]) # Watches Researcher output + + +class ReportWriter(Role): + """Writes the final report.""" + name: str = "ReportWriter" + profile: str = "Technical Writer" + goal: str = "Produce a clear, comprehensive report" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([WriteReport]) + self._watch([Analyst]) # Watches Analyst output + + +async def run_research_sop(): + """Execute the research report SOP.""" + env = Environment() + + # Add roles to the environment + env.add_roles([ + Researcher(), + Analyst(), + ReportWriter(), + ]) + + # Kick off with a research topic + env.publish_message(Message( + content="The impact of large language models on software engineering practices", + role="User" + )) + + # Run until all roles are idle + await env.run() + +asyncio.run(run_research_sop()) +``` + +## Workflow Patterns + +### Sequential Workflow (Default) + +The most common pattern. Each role runs after the previous one completes. + +```mermaid +flowchart LR + A["Role A"] -->|output| B["Role B"] + B -->|output| C["Role C"] + C -->|output| D["Role D"] +``` + +```python +class StepOne(Role): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([ActionA]) + +class StepTwo(Role): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([ActionB]) + self._watch([StepOne]) # Sequential dependency + +class StepThree(Role): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([ActionC]) + self._watch([StepTwo]) # Sequential dependency +``` + +### Fan-Out Workflow + +One role's output triggers multiple parallel roles. + +```mermaid +flowchart TD + A["Coordinator"] --> B["Worker A"] + A --> C["Worker B"] + A --> D["Worker C"] + B --> E["Aggregator"] + C --> E + D --> E +``` + +```python +class Coordinator(Role): + """Publishes a task that multiple workers pick up.""" + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([DistributeTask]) + +class WorkerA(Role): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([ProcessPartA]) + self._watch([Coordinator]) + +class WorkerB(Role): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([ProcessPartB]) + self._watch([Coordinator]) + +class Aggregator(Role): + """Collects results from all workers.""" + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([CombineResults]) + self._watch([WorkerA, WorkerB]) # Watches multiple roles +``` + +### Feedback Loop Workflow + +A downstream role can send feedback to an upstream role, creating an iterative improvement loop. + +```python +class Coder(Role): + """Writes code, also handles revision requests.""" + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([WriteCode, ReviseCode]) + self._watch([Architect, Reviewer]) # Watches both upstream and feedback + +class Reviewer(Role): + """Reviews code and sends feedback.""" + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([ReviewCode]) + self._watch([Coder]) +``` + +## How It Works Under the Hood + +```mermaid +flowchart TD + A["Environment.run()"] --> B["Collect all roles"] + B --> C["For each role: _observe()"] + C --> D{"Role has
new messages?"} + D -->|Yes| E["role._react()"] + D -->|No| F["Skip role"] + E --> G["_think() selects action"] + G --> H["_act() executes action"] + H --> I["Publish result Message"] + I --> J{"Any role still
has work?"} + J -->|Yes| C + J -->|No| K["Run complete"] + + classDef process fill:#f3e5f5,stroke:#4a148c + classDef decision fill:#fff3e0,stroke:#ef6c00 + classDef terminal fill:#e8f5e8,stroke:#1b5e20 + + class A,B,C,E,G,H,I process + class D,J decision + class F,K terminal +``` + +The SOP is enforced through the message bus and watch patterns: + +1. **Message Tags** -- every message carries metadata about which action produced it. Roles use `_watch` to subscribe only to specific action types. +2. **Ordering Guarantee** -- the environment processes roles in registration order within each cycle, ensuring deterministic execution when roles have clear dependencies. +3. **Convergence** -- the run loop terminates when no role has pending messages, ensuring that feedback loops eventually stabilize. +4. **Idempotency** -- each message is processed exactly once by each watching role, preventing duplicate work. + +## Customizing the Default SOP + +You can modify the default software development SOP by adding, removing, or replacing roles: + +```python +import asyncio +from metagpt.environment import Environment +from metagpt.roles import ProductManager, Architect, Engineer +from metagpt.schema import Message + +async def custom_pipeline(): + """Run a pipeline without QA (faster, cheaper).""" + env = Environment() + env.add_roles([ + ProductManager(), + Architect(), + Engineer(), + # QaTester omitted intentionally + ]) + + env.publish_message(Message( + content="Build a simple URL shortener service", + role="User" + )) + + await env.run() + +asyncio.run(custom_pipeline()) +``` + +## Summary + +SOPs are the backbone of MetaGPT's reliability. They transform unstructured multi-agent chat into a predictable, auditable pipeline. The key patterns -- sequential, fan-out, and feedback loop -- can be combined to model any team workflow. The message bus and watch mechanism enforce these patterns without requiring explicit orchestration code. + +**Next:** [Chapter 4: Action System](04-action-system.md) -- learn how to build the individual actions that roles execute. + +--- + +[Previous: Chapter 2: Agent Roles](02-agent-roles.md) | [Back to Tutorial Index](README.md) | [Next: Chapter 4: Action System](04-action-system.md) diff --git a/tutorials/metagpt-tutorial/04-action-system.md b/tutorials/metagpt-tutorial/04-action-system.md new file mode 100644 index 0000000..d7aae92 --- /dev/null +++ b/tutorials/metagpt-tutorial/04-action-system.md @@ -0,0 +1,385 @@ +--- +layout: default +title: "Chapter 4: Action System" +parent: "MetaGPT Tutorial" +nav_order: 4 +--- + +# Chapter 4: The Action System -- Actions, Action Nodes, and Custom Actions + +In [Chapter 3](03-sop-and-workflows.md) you learned how SOPs coordinate roles. Now we zoom into the atomic unit of work in MetaGPT: the **Action**. Every meaningful operation -- writing a PRD, generating code, running tests -- is an Action. + +## What Problem Does This Solve? + +LLMs produce unstructured text by default. When building multi-agent systems, you need structured, validated outputs that downstream agents can reliably parse. The Action system solves this by providing a framework for defining prompts, parsing outputs, validating structure, and handling errors -- all in a reusable, composable way. + +## The Action Base Class + +Every action in MetaGPT extends the `Action` class: + +```python +from metagpt.actions import Action + +class Action: + """Base class for all MetaGPT actions.""" + + name: str = "" # Unique action identifier + node: ActionNode = None # Optional structured output schema + + async def run(self, *args, **kwargs) -> str: + """Execute this action. Override in subclasses.""" + ... + + async def _aask(self, prompt: str, system_msgs: list[str] = None) -> str: + """Send a prompt to the LLM and return the response.""" + ... +``` + +### A Simple Custom Action + +```python +from metagpt.actions import Action + +class SummarizeText(Action): + """Summarize a block of text into key points.""" + name: str = "SummarizeText" + + async def run(self, text: str) -> str: + prompt = f"""Summarize the following text into 3-5 bullet points. + Be concise and focus on the most important information. + + Text: + {text} + """ + result = await self._aask(prompt) + return result +``` + +### Using the Action + +```python +import asyncio + +async def main(): + action = SummarizeText() + summary = await action.run( + "MetaGPT is a multi-agent framework that assigns GPT agents " + "to different software development roles. It uses standardized " + "operating procedures to coordinate agents, producing structured " + "outputs like PRDs, system designs, and tested code from a single " + "requirement." + ) + print(summary) + +asyncio.run(main()) +``` + +## Action Nodes: Structured Output + +Action Nodes are MetaGPT's mechanism for enforcing structured output from LLMs. Instead of free-form text, you define an output schema and the framework ensures the LLM's response conforms to it. + +### Defining an Action Node + +```python +from metagpt.actions.action_node import ActionNode + +# Define a structured output schema +REVIEW_NODE = ActionNode( + key="CodeReview", + expected_type=str, + instruction="Review the code and provide structured feedback", + example="", + schema=""" + { + "summary": "Brief overview of code quality", + "issues": [ + { + "severity": "high|medium|low", + "file": "filename.py", + "line": 42, + "description": "What is wrong", + "suggestion": "How to fix it" + } + ], + "score": 0-10, + "approved": true/false + } + """ +) +``` + +### Composing Action Nodes + +Action Nodes can be composed into trees for complex structured outputs: + +```python +from metagpt.actions.action_node import ActionNode + +# Individual leaf nodes +GOAL_NODE = ActionNode( + key="goal", + expected_type=str, + instruction="Describe the primary goal of the product", + example="Create a fast, user-friendly URL shortener" +) + +USER_STORIES_NODE = ActionNode( + key="user_stories", + expected_type=list[str], + instruction="List 3-5 user stories in standard format", + example=[ + "As a user, I want to shorten URLs so I can share them easily", + "As an admin, I want analytics so I can track link usage" + ] +) + +REQUIREMENTS_NODE = ActionNode( + key="requirements", + expected_type=list[str], + instruction="List functional and non-functional requirements", + example=["Support custom short URLs", "Handle 1000 requests/second"] +) + +# Compose into a parent node +PRD_NODE = ActionNode( + key="prd", + expected_type=str, + instruction="Generate a complete Product Requirements Document", + example="", + children=[GOAL_NODE, USER_STORIES_NODE, REQUIREMENTS_NODE] +) +``` + +### Using Action Nodes in an Action + +```python +from metagpt.actions import Action +from metagpt.actions.action_node import ActionNode + +ANALYSIS_NODE = ActionNode( + key="analysis", + expected_type=dict, + instruction="Analyze the given topic", + example={"summary": "...", "key_points": ["..."], "confidence": 0.9} +) + +class StructuredAnalysis(Action): + """An action that produces structured output using ActionNode.""" + name: str = "StructuredAnalysis" + node: ActionNode = ANALYSIS_NODE + + async def run(self, topic: str) -> dict: + # fill() calls the LLM and parses the response into the node schema + result = await self.node.fill( + context=topic, + llm=self.llm + ) + return result.instruct_content.model_dump() +``` + +```python +import asyncio + +async def main(): + action = StructuredAnalysis() + result = await action.run("The future of WebAssembly in server-side computing") + print(result) + # {"summary": "...", "key_points": [...], "confidence": 0.85} + +asyncio.run(main()) +``` + +## Built-In Actions + +MetaGPT ships with several production-quality actions: + +| Action | Used By | Output | +|--------|---------|--------| +| `WritePRD` | ProductManager | Product Requirements Document | +| `WriteDesign` | Architect | System design + API spec | +| `WriteCode` | Engineer | Source code files | +| `WriteTest` | QaTester | Test cases | +| `WriteCodeReview` | Engineer | Code review feedback | +| `DebugError` | Engineer | Bug fix suggestions | +| `RunCode` | Engineer | Code execution results | +| `SearchAndSummarize` | Researcher | Research summaries | + +### Examining a Built-In Action + +```python +from metagpt.actions.write_prd import WritePRD + +# WritePRD uses Action Nodes internally +prd_action = WritePRD() +print(prd_action.name) # "WritePRD" + +# It defines structured output for: +# - Product goals +# - User stories +# - Competitive analysis +# - Requirements specification +``` + +## Advanced Action Patterns + +### Actions with Validation + +```python +import json +from metagpt.actions import Action + +class GenerateConfig(Action): + """Generate and validate a configuration file.""" + name: str = "GenerateConfig" + + async def run(self, requirements: str) -> dict: + prompt = f"""Generate a JSON configuration based on these requirements: + {requirements} + + Return ONLY valid JSON. + """ + for attempt in range(3): + response = await self._aask(prompt) + try: + # Strip markdown code fences if present + clean = response.strip().strip("```json").strip("```").strip() + config = json.loads(clean) + return config + except json.JSONDecodeError: + if attempt < 2: + prompt = f"Your previous response was not valid JSON. Try again:\n{prompt}" + else: + raise ValueError("Failed to generate valid JSON after 3 attempts") +``` + +### Actions with Multi-Step Prompting + +```python +from metagpt.actions import Action + +class DesignDatabase(Action): + """Multi-step action: first analyze, then design schema.""" + name: str = "DesignDatabase" + + async def run(self, requirements: str) -> str: + # Step 1: Analyze data requirements + analysis = await self._aask( + f"Analyze the data requirements for:\n{requirements}\n" + "List all entities, their attributes, and relationships." + ) + + # Step 2: Generate schema based on analysis + schema = await self._aask( + f"Based on this data analysis:\n{analysis}\n" + "Generate a complete SQL CREATE TABLE schema with " + "proper types, constraints, and foreign keys." + ) + + # Step 3: Generate ORM models + models = await self._aask( + f"Based on this SQL schema:\n{schema}\n" + "Generate SQLAlchemy ORM models in Python." + ) + + return f"## Data Analysis\n{analysis}\n\n## SQL Schema\n{schema}\n\n## ORM Models\n{models}" +``` + +### Actions with Context from Previous Actions + +```python +from metagpt.actions import Action +from metagpt.schema import Message + +class RefineDesign(Action): + """Refine a design based on feedback from multiple sources.""" + name: str = "RefineDesign" + + async def run(self, messages: list[Message]) -> str: + # Collect context from multiple upstream messages + original_design = "" + feedback_items = [] + + for msg in messages: + if "design" in msg.cause_by.lower(): + original_design = msg.content + elif "review" in msg.cause_by.lower(): + feedback_items.append(msg.content) + + prompt = f"""Original design: + {original_design} + + Feedback received: + {chr(10).join(feedback_items)} + + Produce a revised design that addresses all feedback points. + """ + return await self._aask(prompt) +``` + +## How It Works Under the Hood + +```mermaid +flowchart TD + A["Role._act() called"] --> B["Select Action from action list"] + B --> C["Action.run() invoked"] + C --> D{"Has ActionNode?"} + D -->|Yes| E["ActionNode.fill()"] + D -->|No| F["Direct _aask() call"] + E --> G["Build prompt from node schema"] + G --> H["Call LLM"] + H --> I["Parse response into schema"] + I --> J{"Parsing
successful?"} + J -->|Yes| K["Return structured data"] + J -->|No| L["Retry with repair prompt"] + L --> H + F --> M["Call LLM with raw prompt"] + M --> N["Return string response"] + K --> O["Wrap in Message"] + N --> O + O --> P["Publish to Message Bus"] + + classDef process fill:#f3e5f5,stroke:#4a148c + classDef decision fill:#fff3e0,stroke:#ef6c00 + classDef output fill:#e8f5e8,stroke:#1b5e20 + + class A,B,C,E,F,G,H,I,L,M process + class D,J decision + class K,N,O,P output +``` + +Key internals: + +1. **Prompt Construction** -- Action Nodes automatically build prompts that include the expected output schema, examples, and instructions. This dramatically improves output quality compared to free-form prompts. +2. **Output Parsing** -- the framework attempts to parse the LLM response into the declared schema. For JSON schemas, it uses structured extraction. For text schemas, it uses pattern matching. +3. **Retry Logic** -- if parsing fails, the framework sends a "repair" prompt that includes the original response and the parsing error, asking the LLM to fix its output. +4. **LLM Abstraction** -- `_aask()` abstracts over different LLM providers, handling API calls, rate limiting, and token management transparently. + +## Registering Actions with Roles + +```python +from metagpt.roles import Role + +class DataPipelineBuilder(Role): + """A role that builds data pipelines through multiple actions.""" + name: str = "DataPipelineBuilder" + profile: str = "Data Engineer" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + # Register multiple actions -- the role will select the appropriate one + self.set_actions([ + DesignDatabase, + GenerateConfig, + WriteCode, # reuse built-in + ]) +``` + +## Summary + +Actions are the building blocks of everything agents do in MetaGPT. Simple actions use `_aask()` for free-form LLM calls. Action Nodes enforce structured output through schemas, automatic parsing, and retry logic. You can compose nodes into trees, chain actions into multi-step workflows, and add validation logic for reliability. + +**Next:** [Chapter 5: Memory and Context](05-memory-and-context.md) -- learn how agents remember and share information. + +--- + +[Previous: Chapter 3: SOPs and Workflows](03-sop-and-workflows.md) | [Back to Tutorial Index](README.md) | [Next: Chapter 5: Memory and Context](05-memory-and-context.md) diff --git a/tutorials/metagpt-tutorial/05-memory-and-context.md b/tutorials/metagpt-tutorial/05-memory-and-context.md new file mode 100644 index 0000000..285f804 --- /dev/null +++ b/tutorials/metagpt-tutorial/05-memory-and-context.md @@ -0,0 +1,399 @@ +--- +layout: default +title: "Chapter 5: Memory and Context" +parent: "MetaGPT Tutorial" +nav_order: 5 +--- + +# Chapter 5: Memory and Context -- How Agents Remember and Share Information + +In [Chapter 4](04-action-system.md) you learned how Actions produce outputs. This chapter covers how those outputs are stored, retrieved, and shared across agents through MetaGPT's memory system. + +## What Problem Does This Solve? + +LLMs are stateless -- each API call starts with no memory of previous interactions. In a multi-agent system, this is a critical problem: the Engineer needs to remember the Architect's design, the QA agent needs access to the code, and iterative refinement requires remembering feedback from previous rounds. MetaGPT's memory system provides persistent, queryable context that agents can read from and write to. + +## Memory Architecture Overview + +```mermaid +flowchart TD + subgraph Agent["Each Agent"] + AM["Agent Memory
(private)"] + WM["Working Memory
(current task)"] + end + + subgraph Shared["Shared Environment"] + MB["Message Bus
(all published messages)"] + SM["Shared Memory
(persistent store)"] + end + + AM <--> WM + WM -->|publish| MB + MB -->|subscribe| WM + AM <--> SM + + classDef private fill:#f3e5f5,stroke:#4a148c + classDef shared fill:#e1f5fe,stroke:#01579b + + class AM,WM private + class MB,SM shared +``` + +MetaGPT has three levels of memory: + +1. **Agent Memory** -- private to each role, stores that role's history of actions and observations +2. **Working Memory** -- the active context for the current task, built from relevant messages +3. **Shared Memory / Message Bus** -- the environment-level store where all published messages are accessible to watching roles + +## Agent Memory + +Each role maintains its own memory of past interactions: + +```python +from metagpt.roles import Role +from metagpt.schema import Message + +class MemoryAwareRole(Role): + """A role that uses its memory for decision-making.""" + name: str = "MemoryAwareRole" + profile: str = "An agent that remembers" + + async def _act(self) -> Message: + # Access this role's message history + memories = self.rc.memory.get() + + # Get the most recent messages + recent = self.rc.memory.get(k=5) # Last 5 messages + + # Search memory by keyword + relevant = self.rc.memory.get_by_keyword("database") + + # Build context from memory + context = "\n".join([m.content for m in relevant]) + + result = await self._aask( + f"Based on your previous work:\n{context}\n\n" + "Continue with the next step." + ) + return Message(content=result, role=self.name) +``` + +### Memory Storage Operations + +```python +from metagpt.memory import Memory +from metagpt.schema import Message + +# Create a memory instance +memory = Memory() + +# Add messages to memory +memory.add(Message(content="User wants a REST API", role="User")) +memory.add(Message(content="Design: FastAPI with SQLite", role="Architect")) +memory.add(Message(content="Code: main.py implemented", role="Engineer")) + +# Retrieve all messages +all_msgs = memory.get() +print(f"Total messages: {len(all_msgs)}") + +# Retrieve last k messages +recent = memory.get(k=2) + +# Get messages by role +architect_msgs = [m for m in memory.get() if m.role == "Architect"] + +# Count messages +print(f"Memory size: {memory.count()}") + +# Clear memory +memory.clear() +``` + +## The Message Bus + +The message bus is the primary mechanism for inter-agent communication. Messages flow through it automatically based on the watch patterns defined in [Chapter 3](03-sop-and-workflows.md). + +### Message Structure + +```python +from metagpt.schema import Message + +# A message carries content, metadata, and routing information +msg = Message( + content="The system should use PostgreSQL for persistence", + role="Architect", # Who sent it + cause_by="WriteDesign", # Which action produced it + sent_from="Architect", # Source role + send_to=["Engineer", "QaTester"], # Target roles (optional) +) +``` + +### Publishing and Subscribing + +```python +from metagpt.environment import Environment +from metagpt.schema import Message + +env = Environment() + +# Publish a message to the bus +env.publish_message(Message( + content="Build a todo app", + role="User", + cause_by="UserRequirement" +)) + +# Roles receive messages automatically through _observe() +# The filtering is based on _watch patterns set during __init__ +``` + +## Context Window Management + +LLMs have finite context windows. MetaGPT provides strategies for managing what goes into each prompt. + +### Automatic Context Compression + +```python +from metagpt.roles import Role +from metagpt.schema import Message + +class ContextAwareRole(Role): + """Demonstrates context window management.""" + name: str = "ContextAwareRole" + + async def _act(self) -> Message: + # Get all available messages + all_messages = self.rc.memory.get() + + # Strategy 1: Use only the most recent messages + recent_context = self.rc.memory.get(k=10) + + # Strategy 2: Summarize older messages, keep recent ones detailed + if len(all_messages) > 20: + old_messages = all_messages[:-10] + old_summary = await self._aask( + f"Summarize these messages in 200 words:\n" + + "\n".join([m.content[:200] for m in old_messages]) + ) + recent_messages = all_messages[-10:] + context = f"Previous context (summary):\n{old_summary}\n\n" + context += "Recent messages:\n" + context += "\n".join([m.content for m in recent_messages]) + else: + context = "\n".join([m.content for m in all_messages]) + + result = await self._aask(f"Context:\n{context}\n\nContinue the task.") + return Message(content=result, role=self.name) +``` + +### Selective Context Building + +```python +from metagpt.roles import Role +from metagpt.schema import Message + +class SelectiveReader(Role): + """Reads only relevant parts of the message history.""" + name: str = "SelectiveReader" + + async def _act(self) -> Message: + memories = self.rc.memory.get() + + # Filter by action type -- only read design documents + design_docs = [ + m for m in memories + if m.cause_by and "Design" in m.cause_by + ] + + # Filter by role -- only read architect outputs + architect_outputs = [ + m for m in memories + if m.role == "Architect" + ] + + context = "\n---\n".join([m.content for m in design_docs]) + result = await self._aask( + f"Based on the design documents:\n{context}\n\n" + "Implement the described system." + ) + return Message(content=result, role=self.name) +``` + +## Shared Context Between Agents + +Sometimes agents need to share structured state beyond simple messages. + +### Using a Shared Data Store + +```python +import asyncio +from metagpt.roles import Role +from metagpt.actions import Action +from metagpt.schema import Message +from metagpt.environment import Environment + +# A simple shared state that all roles can access +class SharedState: + """Thread-safe shared state for multi-agent collaboration.""" + def __init__(self): + self._data = {} + + def set(self, key: str, value) -> None: + self._data[key] = value + + def get(self, key: str, default=None): + return self._data.get(key, default) + + def get_all(self) -> dict: + return dict(self._data) + + +class ContextProducer(Role): + """Writes data to shared context.""" + name: str = "Producer" + shared_state: SharedState = None + + async def _act(self) -> Message: + # Store structured data for other agents + self.shared_state.set("database_type", "PostgreSQL") + self.shared_state.set("api_framework", "FastAPI") + self.shared_state.set("auth_method", "JWT") + + return Message( + content="Technology choices recorded in shared state", + role=self.name + ) + + +class ContextConsumer(Role): + """Reads data from shared context.""" + name: str = "Consumer" + shared_state: SharedState = None + + async def _act(self) -> Message: + # Read structured data from other agents + db = self.shared_state.get("database_type") + framework = self.shared_state.get("api_framework") + + result = await self._aask( + f"Generate code using {framework} with {db} as the database." + ) + return Message(content=result, role=self.name) + + +async def main(): + state = SharedState() + env = Environment() + env.add_roles([ + ContextProducer(shared_state=state), + ContextConsumer(shared_state=state), + ]) + env.publish_message(Message(content="Start", role="User")) + await env.run() + +asyncio.run(main()) +``` + +## Long-Term Memory with Vector Storage + +For complex projects, MetaGPT supports vector-based memory for semantic retrieval: + +```python +from metagpt.memory import Memory +from metagpt.schema import Message + +class VectorMemoryRole(Role): + """Uses vector similarity for memory retrieval.""" + name: str = "VectorMemoryRole" + + async def _act(self) -> Message: + # Store messages with embeddings + self.rc.memory.add(Message( + content="The authentication system uses OAuth 2.0 with PKCE flow", + role="Architect" + )) + self.rc.memory.add(Message( + content="Database uses PostgreSQL with connection pooling via asyncpg", + role="Architect" + )) + self.rc.memory.add(Message( + content="Frontend uses React with TypeScript and TailwindCSS", + role="Architect" + )) + + # Semantic search -- find messages related to a query + query = "How does the user login process work?" + relevant = self.rc.memory.get_by_keyword("authentication") + + context = "\n".join([m.content for m in relevant]) + result = await self._aask( + f"Based on this context:\n{context}\n\n" + f"Answer: {query}" + ) + return Message(content=result, role=self.name) +``` + +## How It Works Under the Hood + +```mermaid +flowchart TD + A["Action produces output"] --> B["Wrap in Message object"] + B --> C["Add to Role's private memory"] + C --> D["Publish to Environment Message Bus"] + D --> E["Environment notifies all roles"] + E --> F{"Role._observe():
message matches
watch filter?"} + F -->|Yes| G["Add to Role's incoming queue"] + F -->|No| H["Discard for this role"] + G --> I["Role._think() reads queue"] + I --> J["Build context from memory + queue"] + J --> K["Action.run() with context"] + + classDef process fill:#f3e5f5,stroke:#4a148c + classDef decision fill:#fff3e0,stroke:#ef6c00 + + class A,B,C,D,E,G,I,J,K process + class F decision + class H process +``` + +Memory internals: + +1. **Append-Only Log** -- agent memory is an append-only list of Messages. This preserves the full history and ensures consistency. +2. **Message Deduplication** -- the environment tracks which messages each role has already processed, preventing double-handling. +3. **Context Budget** -- roles track available token budget and truncate or summarize older context when approaching limits. +4. **Serialization** -- memory can be serialized to disk, allowing checkpointing and resumption of long-running pipelines. + +## Persisting Memory Across Sessions + +```python +import json +from metagpt.memory import Memory +from metagpt.schema import Message + +def save_memory(memory: Memory, path: str): + """Save memory to a JSON file.""" + messages = memory.get() + data = [{"content": m.content, "role": m.role, "cause_by": m.cause_by} + for m in messages] + with open(path, "w") as f: + json.dump(data, f, indent=2) + +def load_memory(path: str) -> Memory: + """Load memory from a JSON file.""" + memory = Memory() + with open(path) as f: + data = json.load(f) + for item in data: + memory.add(Message(**item)) + return memory +``` + +## Summary + +MetaGPT's memory system operates at three levels: private agent memory, working memory for the current task, and the shared message bus. Messages carry routing metadata that enables the watch/subscribe pattern. For large projects, context window management through summarization and selective retrieval keeps prompts focused and efficient. Vector-based memory enables semantic search when keyword matching is insufficient. + +**Next:** [Chapter 6: Tool Integration](06-tool-integration.md) -- give your agents access to the outside world. + +--- + +[Previous: Chapter 4: Action System](04-action-system.md) | [Back to Tutorial Index](README.md) | [Next: Chapter 6: Tool Integration](06-tool-integration.md) diff --git a/tutorials/metagpt-tutorial/06-tool-integration.md b/tutorials/metagpt-tutorial/06-tool-integration.md new file mode 100644 index 0000000..36a6ba7 --- /dev/null +++ b/tutorials/metagpt-tutorial/06-tool-integration.md @@ -0,0 +1,418 @@ +--- +layout: default +title: "Chapter 6: Tool Integration" +parent: "MetaGPT Tutorial" +nav_order: 6 +--- + +# Chapter 6: Tool Integration -- Web Browsing, Code Execution, and Custom Tools + +In [Chapter 5](05-memory-and-context.md) you learned how agents share information through memory. This chapter covers how agents interact with the outside world through tools -- web search, code execution, file I/O, and custom integrations. + +## What Problem Does This Solve? + +LLMs can reason about problems but cannot act on the world by default. An agent that needs to verify its code compiles, look up current API documentation, or query a database requires tools. MetaGPT's tool system provides a standardized way to give agents these capabilities while maintaining the structured SOP workflow. + +## Built-In Tools Overview + +MetaGPT includes several ready-to-use tools: + +| Tool | Purpose | Requires | +|------|---------|----------| +| `WebBrowserEngine` | Browse and extract content from web pages | Browser dependency | +| `SearchEngine` | Search the web via Google/Bing/SerpAPI | API key | +| `CodeInterpreter` | Execute Python code in a sandbox | None | +| `FileRepository` | Read/write files in the workspace | None | +| `Terminal` | Run shell commands | None | + +## Web Browsing + +### Basic Web Page Reading + +```python +import asyncio +from metagpt.tools.web_browser_engine import WebBrowserEngine + +async def browse_web(): + """Fetch and parse a web page.""" + browser = WebBrowserEngine() + + # Fetch a page and extract its content + result = await browser.run("https://docs.python.org/3/library/asyncio.html") + print(result) # Cleaned text content of the page + +asyncio.run(browse_web()) +``` + +### Using Web Browsing in an Action + +```python +from metagpt.actions import Action +from metagpt.tools.web_browser_engine import WebBrowserEngine + +class ResearchAction(Action): + """An action that researches a topic using web browsing.""" + name: str = "ResearchAction" + + async def run(self, topic: str) -> str: + browser = WebBrowserEngine() + + # Step 1: Search for the topic + search_results = await browser.run( + f"https://www.google.com/search?q={topic.replace(' ', '+')}" + ) + + # Step 2: Ask LLM to extract relevant URLs + urls = await self._aask( + f"From these search results, list the 3 most relevant URLs:\n{search_results}" + ) + + # Step 3: Read each URL + contents = [] + for url in urls.strip().split("\n")[:3]: + url = url.strip() + if url.startswith("http"): + try: + content = await browser.run(url) + contents.append(content[:2000]) # Limit per page + except Exception as e: + contents.append(f"Failed to read {url}: {e}") + + # Step 4: Synthesize findings + all_content = "\n---\n".join(contents) + summary = await self._aask( + f"Synthesize these sources into a research summary on '{topic}':\n{all_content}" + ) + return summary +``` + +## Search Engine Integration + +```python +import asyncio +from metagpt.tools.search_engine import SearchEngine + +async def search_example(): + """Search the web and return structured results.""" + # Configure with your preferred search provider + engine = SearchEngine(engine="serpapi") # or "google", "bing" + + results = await engine.run("MetaGPT multi-agent framework tutorial") + for result in results: + print(f"Title: {result['title']}") + print(f"URL: {result['url']}") + print(f"Snippet: {result['snippet']}") + print("---") + +asyncio.run(search_example()) +``` + +### Search in a Role + +```python +from metagpt.roles import Role +from metagpt.actions import Action +from metagpt.tools.search_engine import SearchEngine +from metagpt.schema import Message + +class WebSearch(Action): + """Search the web for information.""" + name: str = "WebSearch" + + async def run(self, query: str) -> str: + engine = SearchEngine(engine="serpapi") + results = await engine.run(query) + formatted = "\n".join([ + f"- [{r['title']}]({r['url']}): {r['snippet']}" + for r in results[:5] + ]) + return f"## Search Results for: {query}\n\n{formatted}" + + +class ResearchAssistant(Role): + """A role that can search the web to answer questions.""" + name: str = "ResearchAssistant" + profile: str = "Web Research Specialist" + goal: str = "Find accurate, up-to-date information from the web" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([WebSearch]) +``` + +## Code Execution + +MetaGPT can execute code to verify implementations, run tests, or perform computations. + +### Basic Code Execution + +```python +import asyncio +from metagpt.actions import Action + +class ExecuteAndVerify(Action): + """Write code and verify it runs correctly.""" + name: str = "ExecuteAndVerify" + + async def run(self, requirement: str) -> str: + # Step 1: Generate code + code = await self._aask( + f"Write a Python function for: {requirement}\n" + "Include a main block that demonstrates usage." + ) + + # Step 2: Extract code from response + if "```python" in code: + code = code.split("```python")[1].split("```")[0] + + # Step 3: Execute code + try: + exec_globals = {} + exec(code, exec_globals) + return f"Code executed successfully.\n\n```python\n{code}\n```" + except Exception as e: + # Step 4: Fix the code based on the error + fixed_code = await self._aask( + f"This code produced an error:\n```python\n{code}\n```\n" + f"Error: {e}\n\nFix the code." + ) + return f"Original had error: {e}\n\nFixed code:\n{fixed_code}" +``` + +### Sandboxed Code Interpreter + +For safer execution, MetaGPT provides a sandboxed code interpreter: + +```python +import asyncio +from metagpt.actions import Action + +class SafeCodeRunner(Action): + """Run code in a sandboxed environment.""" + name: str = "SafeCodeRunner" + + async def run(self, code: str) -> str: + from metagpt.utils.code_parser import CodeParser + + # Parse code blocks from LLM output + parsed = CodeParser.parse_code(block="", text=code) + + # Execute in isolated subprocess + import subprocess + result = subprocess.run( + ["python", "-c", parsed], + capture_output=True, + text=True, + timeout=30 # 30-second timeout + ) + + if result.returncode == 0: + return f"Output:\n{result.stdout}" + else: + return f"Error:\n{result.stderr}" +``` + +## File System Tools + +### Working with the Workspace + +```python +from metagpt.utils.file_repository import FileRepository + +async def file_operations(): + """Demonstrate file system operations.""" + repo = FileRepository(root="./workspace/my_project") + + # Write a file + await repo.save(filename="main.py", content=""" +import sys + +def main(): + print("Hello from MetaGPT!") + return 0 + +if __name__ == "__main__": + sys.exit(main()) +""") + + # Read a file + content = await repo.get(filename="main.py") + print(content) + + # List all files + files = repo.all_files + print(f"Files in project: {files}") + + # Check if file exists + exists = repo.exists(filename="main.py") + print(f"main.py exists: {exists}") +``` + +## Building Custom Tools + +You can create your own tools and integrate them into actions and roles. + +### Example: Database Query Tool + +```python +import sqlite3 +from metagpt.actions import Action + +class DatabaseTool: + """A tool for querying SQLite databases.""" + + def __init__(self, db_path: str): + self.db_path = db_path + + def query(self, sql: str) -> list[dict]: + conn = sqlite3.connect(self.db_path) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + cursor.execute(sql) + results = [dict(row) for row in cursor.fetchall()] + conn.close() + return results + + def execute(self, sql: str) -> int: + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + cursor.execute(sql) + conn.commit() + affected = cursor.rowcount + conn.close() + return affected + + +class QueryDatabase(Action): + """Action that queries a database based on natural language.""" + name: str = "QueryDatabase" + + async def run(self, question: str, db_path: str) -> str: + tool = DatabaseTool(db_path) + + # Get schema information + tables = tool.query( + "SELECT name FROM sqlite_master WHERE type='table'" + ) + schema_info = [] + for table in tables: + cols = tool.query(f"PRAGMA table_info({table['name']})") + schema_info.append(f"Table {table['name']}: {cols}") + + # Generate SQL from natural language + sql = await self._aask( + f"Database schema:\n{schema_info}\n\n" + f"Question: {question}\n\n" + "Generate a SQL query to answer the question. " + "Return ONLY the SQL, no explanation." + ) + + # Execute and return results + sql = sql.strip().strip("```sql").strip("```").strip() + results = tool.query(sql) + return f"Query: {sql}\n\nResults ({len(results)} rows):\n{results}" +``` + +### Example: REST API Tool + +```python +import aiohttp +from metagpt.actions import Action + +class APITool: + """A tool for making REST API calls.""" + + def __init__(self, base_url: str, headers: dict = None): + self.base_url = base_url.rstrip("/") + self.headers = headers or {} + + async def get(self, endpoint: str, params: dict = None) -> dict: + async with aiohttp.ClientSession() as session: + async with session.get( + f"{self.base_url}/{endpoint}", + params=params, + headers=self.headers + ) as resp: + return await resp.json() + + async def post(self, endpoint: str, data: dict = None) -> dict: + async with aiohttp.ClientSession() as session: + async with session.post( + f"{self.base_url}/{endpoint}", + json=data, + headers=self.headers + ) as resp: + return await resp.json() + + +class FetchAPIData(Action): + """Fetch data from an API and analyze it.""" + name: str = "FetchAPIData" + + async def run(self, api_url: str, question: str) -> str: + tool = APITool(base_url=api_url) + + # Fetch data + data = await tool.get("") + + # Analyze with LLM + analysis = await self._aask( + f"API Response:\n{data}\n\n" + f"Question: {question}\n\n" + "Provide a detailed answer based on the API data." + ) + return analysis +``` + +## How It Works Under the Hood + +```mermaid +flowchart TD + A["Action needs external data"] --> B{"Which tool?"} + B -->|Web| C["WebBrowserEngine"] + B -->|Search| D["SearchEngine"] + B -->|Code| E["CodeInterpreter"] + B -->|Files| F["FileRepository"] + B -->|Custom| G["User-defined Tool"] + + C --> H["HTTP request + HTML parsing"] + D --> I["API call to search provider"] + E --> J["Subprocess execution + capture"] + F --> K["OS file system operations"] + G --> L["Custom logic"] + + H --> M["Return text content"] + I --> M + J --> M + K --> M + L --> M + + M --> N["Tool output injected into Action context"] + N --> O["LLM processes context + tool output"] + O --> P["Action returns structured result"] + + classDef tool fill:#f3e5f5,stroke:#4a148c + classDef process fill:#fff3e0,stroke:#ef6c00 + classDef output fill:#e8f5e8,stroke:#1b5e20 + + class C,D,E,F,G tool + class H,I,J,K,L,N,O process + class M,P output +``` + +Tool integration details: + +1. **Lazy Initialization** -- tools are instantiated only when an action needs them, avoiding unnecessary resource allocation. +2. **Error Isolation** -- tool failures are caught and reported to the LLM as context, allowing the agent to adapt or retry with different parameters. +3. **Output Truncation** -- web content and large API responses are automatically truncated to fit within the LLM's context window. +4. **Async by Default** -- all tool interfaces are async, enabling parallel tool calls when an action needs data from multiple sources. + +## Summary + +MetaGPT tools bridge the gap between LLM reasoning and real-world action. Built-in tools cover web browsing, search, code execution, and file management. Custom tools follow a simple pattern: create a tool class with the integration logic, then use it inside an Action. The framework handles error isolation, output truncation, and async execution. + +**Next:** [Chapter 7: Multi-Agent Orchestration](07-multi-agent-orchestration.md) -- compose agents into sophisticated teams. + +--- + +[Previous: Chapter 5: Memory and Context](05-memory-and-context.md) | [Back to Tutorial Index](README.md) | [Next: Chapter 7: Multi-Agent Orchestration](07-multi-agent-orchestration.md) diff --git a/tutorials/metagpt-tutorial/07-multi-agent-orchestration.md b/tutorials/metagpt-tutorial/07-multi-agent-orchestration.md new file mode 100644 index 0000000..c0f5b7a --- /dev/null +++ b/tutorials/metagpt-tutorial/07-multi-agent-orchestration.md @@ -0,0 +1,479 @@ +--- +layout: default +title: "Chapter 7: Multi-Agent Orchestration" +parent: "MetaGPT Tutorial" +nav_order: 7 +--- + +# Chapter 7: Multi-Agent Orchestration -- Team Composition, Task Decomposition, and Parallel Execution + +In [Chapter 6](06-tool-integration.md) you gave agents access to external tools. This chapter covers how to compose multiple agents into coordinated teams, decompose complex tasks, and leverage parallel execution for efficiency. + +## What Problem Does This Solve? + +A single agent can handle simple tasks, but real-world problems require multiple specialists working together. You need to decide which agents to include, how to decompose the task, whether agents should run sequentially or in parallel, and how to aggregate their outputs. MetaGPT's orchestration layer handles all of this through the Environment and Team abstractions. + +## Team Composition + +### The Team Class + +MetaGPT's `Team` class is the top-level orchestrator that manages roles and their interactions: + +```python +import asyncio +from metagpt.team import Team +from metagpt.roles import ProductManager, Architect, Engineer + +async def build_software_team(): + """Create and run a standard software development team.""" + team = Team() + + # Add roles to the team + team.hire([ + ProductManager(), + Architect(), + Engineer(), + ]) + + # Set the initial requirement + team.run_project( + "Build a REST API for a bookstore with CRUD operations, " + "user authentication, and search functionality" + ) + + # Execute the team workflow + await team.run(n_round=10) # Max 10 communication rounds + +asyncio.run(build_software_team()) +``` + +### Custom Team Compositions + +You can mix built-in and custom roles to create specialized teams: + +```python +import asyncio +from metagpt.team import Team +from metagpt.roles import Role, ProductManager, Engineer +from metagpt.actions import Action +from metagpt.schema import Message + +class SecurityAuditor(Role): + """Reviews code for security vulnerabilities.""" + name: str = "SecurityAuditor" + profile: str = "Application Security Engineer" + goal: str = "Identify and report security vulnerabilities in code" + constraints: str = "Focus on OWASP Top 10 vulnerabilities" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([SecurityReview]) + self._watch([Engineer]) # Review after code is written + + +class SecurityReview(Action): + name: str = "SecurityReview" + + async def run(self, context: str) -> str: + return await self._aask( + f"Review this code for security vulnerabilities " + f"(OWASP Top 10):\n{context}\n\n" + "For each vulnerability found, provide:\n" + "- Severity (Critical/High/Medium/Low)\n" + "- Description\n" + "- Remediation" + ) + + +class PerformanceTester(Role): + """Analyzes code for performance issues.""" + name: str = "PerformanceTester" + profile: str = "Performance Engineer" + goal: str = "Identify performance bottlenecks and optimization opportunities" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([PerformanceAnalysis]) + self._watch([Engineer]) + + +class PerformanceAnalysis(Action): + name: str = "PerformanceAnalysis" + + async def run(self, context: str) -> str: + return await self._aask( + f"Analyze this code for performance issues:\n{context}\n\n" + "Check for: N+1 queries, memory leaks, " + "inefficient algorithms, missing caching opportunities." + ) + + +async def enhanced_team(): + """A team with extra quality checks.""" + team = Team() + team.hire([ + ProductManager(), + Architect(), + Engineer(), + SecurityAuditor(), + PerformanceTester(), + ]) + team.run_project("Build a payment processing microservice") + await team.run(n_round=15) + +asyncio.run(enhanced_team()) +``` + +## Task Decomposition + +For complex requirements, you often need to break the work into subtasks before assigning them to agents. + +### Automatic Task Decomposition + +```python +from metagpt.actions import Action +from metagpt.roles import Role +from metagpt.schema import Message + +class DecomposeTask(Action): + """Break a complex requirement into subtasks.""" + name: str = "DecomposeTask" + + async def run(self, requirement: str) -> str: + return await self._aask( + f"Break this requirement into 3-7 independent subtasks:\n" + f"{requirement}\n\n" + "Format each subtask as:\n" + "TASK [number]: [title]\n" + "Description: [what needs to be done]\n" + "Dependencies: [list of task numbers this depends on, or 'none']" + ) + + +class TaskDecomposer(Role): + """Decomposes complex tasks before the team starts work.""" + name: str = "TaskDecomposer" + profile: str = "Project Manager" + goal: str = "Break complex requirements into manageable subtasks" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_actions([DecomposeTask]) +``` + +### Hierarchical Decomposition + +For large projects, decomposition can be hierarchical: + +```python +import asyncio +from metagpt.actions import Action +from metagpt.schema import Message + +class HierarchicalDecompose(Action): + """Multi-level task decomposition.""" + name: str = "HierarchicalDecompose" + + async def run(self, requirement: str) -> str: + # Level 1: Break into major components + components = await self._aask( + f"Break this into major system components:\n{requirement}\n\n" + "List each component with a one-line description." + ) + + # Level 2: Break each component into tasks + detailed_tasks = [] + for component in components.strip().split("\n"): + if component.strip(): + tasks = await self._aask( + f"Break this component into implementation tasks:\n" + f"{component}\n\n" + "List specific coding tasks with estimated complexity." + ) + detailed_tasks.append(f"## {component}\n{tasks}") + + return "\n\n".join(detailed_tasks) +``` + +## Parallel Execution + +When tasks are independent, running agents in parallel dramatically reduces total execution time. + +### Parallel Role Execution + +```python +import asyncio +from metagpt.roles import Role +from metagpt.actions import Action +from metagpt.schema import Message +from metagpt.environment import Environment + + +class AnalyzeModule(Action): + """Analyze a single module.""" + name: str = "AnalyzeModule" + + async def run(self, module_spec: str) -> str: + return await self._aask( + f"Analyze this module and provide implementation details:\n{module_spec}" + ) + + +class ModuleAnalyzer(Role): + """Analyzes a specific module. Multiple instances run in parallel.""" + name: str = "ModuleAnalyzer" + profile: str = "Module Specialist" + module_name: str = "" + + def __init__(self, module_name: str = "", **kwargs): + super().__init__(**kwargs) + self.module_name = module_name + self.name = f"Analyzer_{module_name}" + self.set_actions([AnalyzeModule]) + + +async def parallel_analysis(): + """Run multiple analyzers in parallel.""" + env = Environment() + + modules = ["Authentication", "Database", "API Layer", "Frontend", "Caching"] + + # Create a parallel analyzer for each module + analyzers = [ModuleAnalyzer(module_name=m) for m in modules] + env.add_roles(analyzers) + + # All analyzers will process the same initial message in parallel + env.publish_message(Message( + content="Analyze the module and provide: architecture, key classes, " + "dependencies, and estimated lines of code.", + role="ProjectManager" + )) + + await env.run() + +asyncio.run(parallel_analysis()) +``` + +### Coordinated Parallel-Then-Aggregate Pattern + +```python +import asyncio +from metagpt.roles import Role +from metagpt.actions import Action +from metagpt.schema import Message +from metagpt.environment import Environment + + +class ResearchTopic(Action): + name: str = "ResearchTopic" + async def run(self, topic: str) -> str: + return await self._aask(f"Research and summarize: {topic}") + + +class AggregateResults(Action): + name: str = "AggregateResults" + async def run(self, context: str) -> str: + return await self._aask( + f"Synthesize these research results into a coherent report:\n{context}" + ) + + +class TopicResearcher(Role): + """Researches a specific topic in parallel with other researchers.""" + name: str = "TopicResearcher" + + def __init__(self, topic: str = "", **kwargs): + super().__init__(**kwargs) + self.name = f"Researcher_{topic.replace(' ', '_')}" + self.set_actions([ResearchTopic]) + + +class ReportAggregator(Role): + """Waits for all researchers and combines their findings.""" + name: str = "ReportAggregator" + profile: str = "Report Writer" + expected_sources: int = 0 + + def __init__(self, expected_sources: int = 0, **kwargs): + super().__init__(**kwargs) + self.expected_sources = expected_sources + self.set_actions([AggregateResults]) + self._watch([TopicResearcher]) + + async def _observe(self): + """Wait until all researchers have reported.""" + msgs = await super()._observe() + # Only proceed when we have all expected inputs + research_msgs = [m for m in self.rc.memory.get() + if "Researcher_" in (m.role or "")] + if len(research_msgs) < self.expected_sources: + return [] # Not ready yet + return msgs + + +async def parallel_research(): + """Research multiple topics in parallel, then aggregate.""" + env = Environment() + topics = [ + "transformer architectures", + "multi-agent systems", + "reinforcement learning from human feedback", + ] + + researchers = [TopicResearcher(topic=t) for t in topics] + aggregator = ReportAggregator(expected_sources=len(topics)) + + env.add_roles(researchers + [aggregator]) + env.publish_message(Message(content="Begin research", role="User")) + await env.run() + +asyncio.run(parallel_research()) +``` + +## Dynamic Team Formation + +Sometimes you do not know which agents you need until runtime: + +```python +import asyncio +from metagpt.team import Team +from metagpt.roles import Role, ProductManager, Architect, Engineer +from metagpt.actions import Action +from metagpt.schema import Message + + +class PlanTeam(Action): + name: str = "PlanTeam" + async def run(self, requirement: str) -> str: + return await self._aask( + f"Given this requirement:\n{requirement}\n\n" + "What team roles are needed? Return a JSON list of role names. " + "Choose from: ProductManager, Architect, Engineer, QaTester, " + "SecurityAuditor, DatabaseSpecialist, FrontendDeveloper" + ) + + +ROLE_REGISTRY = { + "ProductManager": ProductManager, + "Architect": Architect, + "Engineer": Engineer, +} + +async def dynamic_team(requirement: str): + """Form a team based on the requirement.""" + import json + + # Step 1: Determine needed roles + planner = PlanTeam() + roles_json = await planner.run(requirement) + role_names = json.loads(roles_json.strip().strip("```json").strip("```")) + + # Step 2: Instantiate the team dynamically + team = Team() + roles = [] + for name in role_names: + if name in ROLE_REGISTRY: + roles.append(ROLE_REGISTRY[name]()) + team.hire(roles) + + # Step 3: Run + team.run_project(requirement) + await team.run(n_round=10) + +asyncio.run(dynamic_team("Build a machine learning model serving platform")) +``` + +## How It Works Under the Hood + +```mermaid +flowchart TD + A["Team.run(n_round)"] --> B["Initialize Environment"] + B --> C["Publish initial requirement"] + C --> D["Round N starts"] + + D --> E["For each Role in env"] + E --> F["role._observe()"] + F --> G{"Has pending
messages?"} + G -->|No| H["Skip role"] + G -->|Yes| I["role._react()"] + I --> J["_think() + _act()"] + J --> K["Publish output message"] + + H --> L{"More roles
to process?"} + K --> L + L -->|Yes| E + L -->|No| M{"Any messages
published this round?"} + + M -->|Yes| N{"Round < n_round?"} + N -->|Yes| D + N -->|No| O["Max rounds reached"] + M -->|No| P["All roles idle"] + + O --> Q["Return results"] + P --> Q + + classDef process fill:#f3e5f5,stroke:#4a148c + classDef decision fill:#fff3e0,stroke:#ef6c00 + classDef terminal fill:#e8f5e8,stroke:#1b5e20 + + class A,B,C,D,E,F,I,J,K process + class G,L,M,N decision + class H,O,P,Q terminal +``` + +Orchestration internals: + +1. **Round-Based Execution** -- the environment runs in rounds. Each round gives every role a chance to observe and act. This continues until either no messages are published in a round (convergence) or the round limit is reached. +2. **Deterministic Ordering** -- within each round, roles are processed in the order they were added to the environment. This ensures reproducible behavior for sequential workflows. +3. **Parallel-Compatible** -- when multiple roles watch the same upstream action and none depend on each other, they effectively execute in parallel within the same round. +4. **Budget Tracking** -- the team tracks total LLM spend across all roles and can halt execution when the budget is exhausted. +5. **Graceful Termination** -- feedback loops between roles (e.g., Engineer <-> QA) converge because each iteration reduces the number of issues, eventually producing no new messages. + +## Controlling Execution + +### Setting Round Limits + +```python +# Limit total rounds to control cost and time +await team.run(n_round=5) # Stop after 5 rounds max +``` + +### Budget Controls + +```python +team = Team() +team.hire([ProductManager(), Architect(), Engineer()]) +team.run_project("Build a chat application") + +# Set budget limit (in USD) +# The team will stop when the budget is exhausted +await team.run(n_round=20) +``` + +### Inspecting Team State + +```python +async def inspect_team(): + team = Team() + team.hire([ProductManager(), Architect(), Engineer()]) + team.run_project("Build a calculator") + + await team.run(n_round=10) + + # Access the environment after execution + env = team.env + + # Check all messages that were exchanged + for msg in env.memory.get(): + print(f"[{msg.role}] ({msg.cause_by}): {msg.content[:100]}...") +``` + +## Summary + +Multi-agent orchestration in MetaGPT is managed through the Team and Environment abstractions. Teams are composed by hiring roles, and the environment manages round-based execution, message routing, and convergence detection. Advanced patterns include parallel execution with aggregation, dynamic team formation, and hierarchical task decomposition. Budget and round controls ensure predictable resource usage. + +**Next:** [Chapter 8: Production Deployment](08-production-deployment.md) -- deploy MetaGPT systems at scale. + +--- + +[Previous: Chapter 6: Tool Integration](06-tool-integration.md) | [Back to Tutorial Index](README.md) | [Next: Chapter 8: Production Deployment](08-production-deployment.md) diff --git a/tutorials/metagpt-tutorial/08-production-deployment.md b/tutorials/metagpt-tutorial/08-production-deployment.md new file mode 100644 index 0000000..6fdd296 --- /dev/null +++ b/tutorials/metagpt-tutorial/08-production-deployment.md @@ -0,0 +1,598 @@ +--- +layout: default +title: "Chapter 8: Production Deployment" +parent: "MetaGPT Tutorial" +nav_order: 8 +--- + +# Chapter 8: Production Deployment -- Configuration, Cost Optimization, and Enterprise Patterns + +In [Chapter 7](07-multi-agent-orchestration.md) you learned how to orchestrate multi-agent teams. This final chapter covers what it takes to run MetaGPT in production: configuration management, cost controls, error handling, monitoring, and enterprise integration patterns. + +## What Problem Does This Solve? + +Development experiments and production systems have fundamentally different requirements. In production you need predictable costs, graceful error recovery, observability, security, and reproducibility. This chapter provides the patterns and configurations needed to bridge that gap. + +## Production Configuration + +### Complete Configuration Template + +```yaml +# config2.yaml -- Production configuration +llm: + api_type: "openai" + model: "gpt-4-turbo" + base_url: "https://api.openai.com/v1" + api_key: "${OPENAI_API_KEY}" # Use environment variable reference + temperature: 0.0 # Deterministic outputs for reproducibility + max_tokens: 4096 + timeout: 120 # Seconds + +# Cost management +max_budget: 50.0 # USD per run +budget_alert_threshold: 0.8 # Alert at 80% of budget + +# Workspace +workspace: + path: "/var/metagpt/workspace" + use_git: true # Track changes with git + +# Retry configuration +retry: + max_retries: 3 + retry_delay: 1.0 # Seconds between retries + exponential_backoff: true + +# Logging +log_level: "INFO" +log_file: "/var/log/metagpt/agent.log" + +# Code execution +enable_code_execution: true +code_execution_timeout: 60 # Seconds +sandbox_mode: true # Run code in isolated environment +``` + +### Environment-Based Configuration + +```python +import os +from metagpt.config2 import Config + +def get_production_config() -> Config: + """Load production configuration with environment overrides.""" + config = Config.default() + + # Override from environment for deployment flexibility + config.llm.api_key = os.environ["OPENAI_API_KEY"] + config.llm.model = os.environ.get("METAGPT_MODEL", "gpt-4-turbo") + config.max_budget = float(os.environ.get("METAGPT_BUDGET", "50.0")) + + return config +``` + +### Multi-Model Configuration + +Use different models for different roles to optimize cost and quality: + +```python +import asyncio +from metagpt.team import Team +from metagpt.roles import ProductManager, Architect, Engineer +from metagpt.config2 import Config + +async def multi_model_team(): + """Use expensive models for design, cheaper models for code generation.""" + # High-capability model for product and architecture decisions + design_config = Config.default() + design_config.llm.model = "gpt-4-turbo" + + # Cost-effective model for code generation (high volume) + code_config = Config.default() + code_config.llm.model = "gpt-4o-mini" + + team = Team() + team.hire([ + ProductManager(config=design_config), + Architect(config=design_config), + Engineer(config=code_config), # Cheaper model for bulk code + ]) + + team.run_project("Build an inventory management system") + await team.run(n_round=10) + +asyncio.run(multi_model_team()) +``` + +## Cost Optimization Strategies + +### 1. Token Budget Management + +```python +from metagpt.roles import Role +from metagpt.schema import Message + +class CostAwareRole(Role): + """A role that tracks and limits its token usage.""" + name: str = "CostAwareRole" + max_tokens_per_action: int = 2000 + + async def _act(self) -> Message: + # Truncate context to fit within budget + memories = self.rc.memory.get() + context = "" + token_estimate = 0 + + for msg in reversed(memories): + msg_tokens = len(msg.content.split()) * 1.3 # Rough estimate + if token_estimate + msg_tokens > self.max_tokens_per_action: + break + context = msg.content + "\n---\n" + context + token_estimate += msg_tokens + + result = await self._aask(f"Context:\n{context}\n\nContinue the task.") + return Message(content=result, role=self.name) +``` + +### 2. Caching LLM Responses + +```python +import hashlib +import json +import os +from metagpt.actions import Action + +class CachedAction(Action): + """An action that caches LLM responses to avoid duplicate API calls.""" + name: str = "CachedAction" + cache_dir: str = "/var/metagpt/cache" + + def _cache_key(self, prompt: str) -> str: + return hashlib.sha256(prompt.encode()).hexdigest() + + def _get_cached(self, key: str) -> str | None: + path = os.path.join(self.cache_dir, f"{key}.json") + if os.path.exists(path): + with open(path) as f: + return json.load(f)["response"] + return None + + def _set_cached(self, key: str, response: str) -> None: + os.makedirs(self.cache_dir, exist_ok=True) + path = os.path.join(self.cache_dir, f"{key}.json") + with open(path, "w") as f: + json.dump({"response": response}, f) + + async def _aask_cached(self, prompt: str) -> str: + key = self._cache_key(prompt) + cached = self._get_cached(key) + if cached is not None: + return cached + + response = await self._aask(prompt) + self._set_cached(key, response) + return response +``` + +### 3. Incremental Generation + +Instead of regenerating everything on each run, reuse previous outputs: + +```python +import asyncio +import json +import os +from metagpt.team import Team +from metagpt.roles import Engineer +from metagpt.schema import Message + +async def incremental_build(requirement: str, workspace: str): + """Only regenerate files that have changed requirements.""" + manifest_path = os.path.join(workspace, ".metagpt_manifest.json") + + # Load previous manifest + if os.path.exists(manifest_path): + with open(manifest_path) as f: + previous = json.load(f) + else: + previous = {"requirement": "", "files": {}} + + # Check if requirement changed + if previous["requirement"] == requirement: + print("No changes detected, skipping generation.") + return + + # Run generation + team = Team() + team.hire([Engineer()]) + team.run_project(requirement) + await team.run(n_round=5) + + # Save manifest + manifest = { + "requirement": requirement, + "files": { + f: os.path.getmtime(os.path.join(workspace, f)) + for f in os.listdir(workspace) + if f.endswith(".py") + } + } + with open(manifest_path, "w") as f: + json.dump(manifest, f) +``` + +## Error Handling and Recovery + +### Retry with Exponential Backoff + +```python +import asyncio +import random +from metagpt.actions import Action + +class ResilientAction(Action): + """An action with production-grade error handling.""" + name: str = "ResilientAction" + + async def run(self, context: str) -> str: + max_retries = 3 + + for attempt in range(max_retries): + try: + result = await self._aask( + f"Process this request:\n{context}" + ) + return result + except Exception as e: + if attempt == max_retries - 1: + raise RuntimeError( + f"Action failed after {max_retries} attempts: {e}" + ) + # Exponential backoff with jitter + delay = (2 ** attempt) + random.uniform(0, 1) + print(f"Attempt {attempt + 1} failed: {e}. " + f"Retrying in {delay:.1f}s...") + await asyncio.sleep(delay) +``` + +### Checkpoint and Resume + +```python +import json +import os +from metagpt.schema import Message + +class CheckpointManager: + """Save and restore agent pipeline state.""" + + def __init__(self, checkpoint_dir: str): + self.checkpoint_dir = checkpoint_dir + os.makedirs(checkpoint_dir, exist_ok=True) + + def save(self, stage: str, messages: list[Message]) -> None: + path = os.path.join(self.checkpoint_dir, f"{stage}.json") + data = [ + {"content": m.content, "role": m.role, "cause_by": m.cause_by} + for m in messages + ] + with open(path, "w") as f: + json.dump(data, f, indent=2) + + def load(self, stage: str) -> list[Message] | None: + path = os.path.join(self.checkpoint_dir, f"{stage}.json") + if not os.path.exists(path): + return None + with open(path) as f: + data = json.load(f) + return [Message(**item) for item in data] + + def has_checkpoint(self, stage: str) -> bool: + path = os.path.join(self.checkpoint_dir, f"{stage}.json") + return os.path.exists(path) +``` + +### Using Checkpoints in a Pipeline + +```python +import asyncio +from metagpt.roles import ProductManager, Architect, Engineer +from metagpt.schema import Message +from metagpt.environment import Environment + +async def resumable_pipeline(requirement: str, checkpoint_dir: str): + """A pipeline that can resume from the last successful stage.""" + ckpt = CheckpointManager(checkpoint_dir) + env = Environment() + + stages = [ + ("prd", ProductManager()), + ("design", Architect()), + ("code", Engineer()), + ] + + last_output = Message(content=requirement, role="User") + + for stage_name, role in stages: + # Check for existing checkpoint + cached = ckpt.load(stage_name) + if cached: + print(f"Resuming from checkpoint: {stage_name}") + last_output = cached[-1] + continue + + # Execute this stage + try: + result = await role.run(last_output) + ckpt.save(stage_name, [result]) + last_output = result + print(f"Stage '{stage_name}' completed and checkpointed.") + except Exception as e: + print(f"Stage '{stage_name}' failed: {e}") + print("Re-run the pipeline to resume from this point.") + raise + +asyncio.run(resumable_pipeline( + "Build an e-commerce checkout flow", + "/var/metagpt/checkpoints/ecommerce" +)) +``` + +## Monitoring and Observability + +### Logging Agent Activity + +```python +import logging +from metagpt.roles import Role +from metagpt.schema import Message + +logger = logging.getLogger("metagpt.production") + +class MonitoredRole(Role): + """A role with production logging.""" + name: str = "MonitoredRole" + + async def _observe(self): + messages = await super()._observe() + logger.info(f"[{self.name}] Observed {len(messages)} new messages") + return messages + + async def _think(self): + action = await super()._think() + logger.info(f"[{self.name}] Selected action: {action.name}") + return action + + async def _act(self) -> Message: + import time + start = time.time() + + result = await super()._act() + + duration = time.time() - start + logger.info( + f"[{self.name}] Action completed in {duration:.2f}s, " + f"output length: {len(result.content)} chars" + ) + return result +``` + +### Cost Tracking + +```python +class CostTracker: + """Track LLM API costs across a pipeline run.""" + + # Approximate costs per 1K tokens (as of 2024) + COSTS = { + "gpt-4-turbo": {"input": 0.01, "output": 0.03}, + "gpt-4o-mini": {"input": 0.00015, "output": 0.0006}, + "gpt-4o": {"input": 0.005, "output": 0.015}, + } + + def __init__(self): + self.total_input_tokens = 0 + self.total_output_tokens = 0 + self.total_cost = 0.0 + self.calls = [] + + def record(self, model: str, input_tokens: int, output_tokens: int): + costs = self.COSTS.get(model, {"input": 0.01, "output": 0.03}) + cost = ( + (input_tokens / 1000) * costs["input"] + + (output_tokens / 1000) * costs["output"] + ) + self.total_input_tokens += input_tokens + self.total_output_tokens += output_tokens + self.total_cost += cost + self.calls.append({ + "model": model, + "input_tokens": input_tokens, + "output_tokens": output_tokens, + "cost": cost, + }) + + def summary(self) -> str: + return ( + f"Total calls: {len(self.calls)}\n" + f"Total input tokens: {self.total_input_tokens:,}\n" + f"Total output tokens: {self.total_output_tokens:,}\n" + f"Total cost: ${self.total_cost:.4f}" + ) +``` + +## Enterprise Integration Patterns + +### API Wrapper + +Expose MetaGPT as a REST API for integration with other systems: + +```python +from fastapi import FastAPI, BackgroundTasks +from pydantic import BaseModel +import asyncio +import uuid + +app = FastAPI() + +# Store results by job ID +results = {} + +class GenerateRequest(BaseModel): + requirement: str + max_rounds: int = 10 + budget: float = 50.0 + +class JobStatus(BaseModel): + job_id: str + status: str + result: str | None = None + +async def run_metagpt_job(job_id: str, requirement: str, max_rounds: int): + """Run MetaGPT in the background.""" + from metagpt.team import Team + from metagpt.roles import ProductManager, Architect, Engineer + + try: + results[job_id] = {"status": "running", "result": None} + team = Team() + team.hire([ProductManager(), Architect(), Engineer()]) + team.run_project(requirement) + await team.run(n_round=max_rounds) + results[job_id] = {"status": "completed", "result": "Project generated"} + except Exception as e: + results[job_id] = {"status": "failed", "result": str(e)} + + +@app.post("/generate", response_model=JobStatus) +async def generate(request: GenerateRequest, background_tasks: BackgroundTasks): + job_id = str(uuid.uuid4()) + results[job_id] = {"status": "queued", "result": None} + background_tasks.add_task( + run_metagpt_job, job_id, request.requirement, request.max_rounds + ) + return JobStatus(job_id=job_id, status="queued") + + +@app.get("/status/{job_id}", response_model=JobStatus) +async def get_status(job_id: str): + if job_id not in results: + return JobStatus(job_id=job_id, status="not_found") + r = results[job_id] + return JobStatus(job_id=job_id, status=r["status"], result=r["result"]) +``` + +### Docker Deployment + +```dockerfile +FROM python:3.11-slim + +WORKDIR /app + +# Install MetaGPT +RUN pip install metagpt + +# Copy configuration +COPY config2.yaml /root/.metagpt/config2.yaml + +# Copy application code +COPY app/ /app/ + +# Environment variables (set at runtime, not build time) +ENV OPENAI_API_KEY="" +ENV METAGPT_BUDGET="50.0" + +EXPOSE 8000 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] +``` + +```yaml +# docker-compose.yml +version: "3.8" +services: + metagpt-api: + build: . + ports: + - "8000:8000" + environment: + - OPENAI_API_KEY=${OPENAI_API_KEY} + - METAGPT_BUDGET=50.0 + volumes: + - ./workspace:/var/metagpt/workspace + - ./logs:/var/log/metagpt + restart: unless-stopped +``` + +## How It Works Under the Hood + +```mermaid +flowchart TD + A["Production Request"] --> B["Load Config"] + B --> C["Initialize Team"] + C --> D["Budget Check"] + D --> E{"Budget
available?"} + E -->|No| F["Reject Request"] + E -->|Yes| G["Execute Pipeline"] + + G --> H["Per-Action Loop"] + H --> I["Try Action"] + I --> J{"Success?"} + J -->|Yes| K["Checkpoint + Log"] + J -->|No| L{"Retries
remaining?"} + L -->|Yes| M["Backoff + Retry"] + M --> I + L -->|No| N["Save Error + Alert"] + + K --> O{"Pipeline
complete?"} + O -->|No| H + O -->|Yes| P["Save Results"] + + P --> Q["Return to Client"] + N --> Q + + classDef process fill:#f3e5f5,stroke:#4a148c + classDef decision fill:#fff3e0,stroke:#ef6c00 + classDef terminal fill:#e8f5e8,stroke:#1b5e20 + classDef error fill:#ffebee,stroke:#c62828 + + class A,B,C,D,G,H,I,K,M,P process + class E,J,L,O decision + class Q terminal + class F,N error +``` + +Production deployment principles: + +1. **Configuration Layering** -- base configuration in YAML, environment-specific overrides via environment variables, per-request overrides via API parameters. +2. **Cost Boundaries** -- budgets are enforced at the team level. Each LLM call deducts from the budget, and the pipeline halts gracefully when the limit is reached. +3. **Idempotent Checkpoints** -- each pipeline stage checkpoints its output. If a run is interrupted, it resumes from the last successful checkpoint without repeating work. +4. **Observability** -- every action logs its duration, token usage, and cost. These metrics can be exported to monitoring systems like Prometheus, Datadog, or CloudWatch. +5. **Security** -- API keys are never stored in configuration files. Use environment variables or secret managers. Code execution runs in sandboxed environments. + +## Production Checklist + +| Category | Item | Status | +|----------|------|--------| +| **Configuration** | API keys in environment variables | Required | +| **Configuration** | Budget limits set | Required | +| **Configuration** | Temperature set to 0.0 for reproducibility | Recommended | +| **Reliability** | Retry logic with backoff | Required | +| **Reliability** | Checkpoint and resume | Recommended | +| **Reliability** | Graceful error handling | Required | +| **Observability** | Structured logging | Required | +| **Observability** | Cost tracking per run | Recommended | +| **Observability** | Latency monitoring | Recommended | +| **Security** | Sandboxed code execution | Required | +| **Security** | Input validation | Required | +| **Security** | Rate limiting on API | Recommended | +| **Performance** | Response caching | Recommended | +| **Performance** | Multi-model strategy | Optional | +| **Performance** | Incremental generation | Optional | + +## Summary + +Running MetaGPT in production requires attention to cost management, error recovery, observability, and security. The key patterns are: multi-model configuration for cost optimization, checkpoint-based resumption for reliability, structured logging for observability, and API wrapping for system integration. With these patterns in place, MetaGPT can serve as a reliable component in enterprise software delivery pipelines. + +--- + +[Previous: Chapter 7: Multi-Agent Orchestration](07-multi-agent-orchestration.md) | [Back to Tutorial Index](README.md) diff --git a/tutorials/metagpt-tutorial/README.md b/tutorials/metagpt-tutorial/README.md new file mode 100644 index 0000000..a273b5b --- /dev/null +++ b/tutorials/metagpt-tutorial/README.md @@ -0,0 +1,135 @@ +--- +layout: default +title: "MetaGPT Tutorial" +nav_order: 194 +has_children: true +format_version: v2 +--- + +# MetaGPT Tutorial: Multi-Agent Software Development with Role-Based Collaboration + +MetaGPT[View Repo](https://github.com/geekan/MetaGPT) is a multi-agent framework where GPT-powered agents assume real-world software roles -- Product Manager, Architect, Engineer, and QA -- to collaboratively build complete software from a single one-line requirement. It encodes Standardized Operating Procedures (SOPs) into agent prompts, enabling structured, role-based collaboration that mirrors how professional development teams actually work. + +> **In one sentence:** Give MetaGPT a product idea, and a virtual software company of AI agents designs, architects, codes, and tests it for you. + +[![Stars](https://img.shields.io/github/stars/geekan/MetaGPT?style=social)](https://github.com/geekan/MetaGPT) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![Python](https://img.shields.io/badge/Python-3.9+-blue)](https://github.com/geekan/MetaGPT) + +## Why This Track Matters + +MetaGPT introduces a paradigm where multiple AI agents collaborate through structured roles and standardized processes, closely mirroring the way real software teams operate. This approach is directly relevant to Genesis-style agent teams and any system that requires coordinated, multi-step AI workflows. + +This track focuses on: + +- Understanding **role-based multi-agent collaboration** where each agent has a defined responsibility +- Learning how **Standardized Operating Procedures (SOPs)** constrain and guide agent behavior +- Building **custom actions and tools** that extend agent capabilities +- Designing **production-ready multi-agent pipelines** with memory, context sharing, and cost optimization + +## Current Snapshot (auto-updated) + +- repository: [`geekan/MetaGPT`](https://github.com/geekan/MetaGPT) +- stars: about **66k** +- language: Python + +## Mental Model + +```mermaid +flowchart TD + A["One-Line Requirement"] --> B["ProductManager Agent"] + B --> C["PRD Document"] + C --> D["Architect Agent"] + D --> E["System Design & API Specs"] + E --> F["Engineer Agent"] + F --> G["Code Implementation"] + G --> H["QA Agent"] + H --> I["Test Cases & Bug Reports"] + I -->|feedback| F + + B -.->|SOP| S1["Competitive Analysis → PRD"] + D -.->|SOP| S2["Design Review → Tech Spec"] + F -.->|SOP| S3["Code Review → Implementation"] + H -.->|SOP| S4["Test Plan → Verification"] + + J["Shared Memory / Message Bus"] --- B + J --- D + J --- F + J --- H + + classDef input fill:#e1f5fe,stroke:#01579b + classDef agent fill:#f3e5f5,stroke:#4a148c + classDef artifact fill:#fff3e0,stroke:#ef6c00 + classDef infra fill:#e8f5e8,stroke:#1b5e20 + + class A input + class B,D,F,H agent + class C,E,G,I artifact + class J,S1,S2,S3,S4 infra +``` + +## Chapter Guide + +Welcome to your journey through multi-agent software development! This tutorial explores how MetaGPT orchestrates AI agents into a functioning software team. + +1. **[Chapter 1: Getting Started](01-getting-started.md)** - Installation, configuration, and your first multi-agent software run +2. **[Chapter 2: Agent Roles](02-agent-roles.md)** - ProductManager, Architect, Engineer, and QA roles in depth +3. **[Chapter 3: SOPs and Workflows](03-sop-and-workflows.md)** - Standardized Operating Procedures and role collaboration patterns +4. **[Chapter 4: Action System](04-action-system.md)** - Actions, action nodes, and building custom actions +5. **[Chapter 5: Memory and Context](05-memory-and-context.md)** - Memory management and context sharing between agents +6. **[Chapter 6: Tool Integration](06-tool-integration.md)** - Web browsing, code execution, and custom tool creation +7. **[Chapter 7: Multi-Agent Orchestration](07-multi-agent-orchestration.md)** - Team composition, task decomposition, and parallel execution +8. **[Chapter 8: Production Deployment](08-production-deployment.md)** - Configuration, cost optimization, and enterprise patterns + +## What You Will Learn + +By the end of this tutorial, you will be able to: + +- **Run a full software generation pipeline** from a single requirement using MetaGPT's built-in roles +- **Understand the SOP-driven architecture** that constrains agents into productive workflows +- **Create custom agent roles** with specialized actions and behaviors +- **Build custom actions and action nodes** for domain-specific tasks +- **Manage shared memory and context** across multi-agent conversations +- **Integrate external tools** including web search, code execution, and APIs +- **Orchestrate complex multi-agent teams** with hierarchical and parallel execution +- **Deploy MetaGPT in production** with cost controls, caching, and monitoring + +## Prerequisites + +- Python 3.9+ (3.10+ recommended) +- Basic understanding of LLM concepts and API usage +- Familiarity with async/await patterns in Python +- An OpenAI API key (or compatible LLM provider key) + +## Source References + +- [MetaGPT GitHub Repository](https://github.com/geekan/MetaGPT) +- [MetaGPT Documentation](https://docs.deepwisdom.ai/main/en/) +- [MetaGPT Paper: "MetaGPT: Meta Programming for A Multi-Agent Collaborative Framework"](https://arxiv.org/abs/2308.00352) + +## Related Tutorials + +- [CrewAI Tutorial](../crewai-tutorial/) - Another role-based multi-agent framework +- [AutoGen Tutorial](../autogen-tutorial/) - Microsoft's multi-agent conversation framework +- [Taskade Tutorial](../taskade-tutorial/) - AI-powered productivity with agent workflows + +## Navigation & Backlinks + +- [Start Here: Chapter 1: Getting Started](01-getting-started.md) +- [Back to Main Catalog](../../README.md#-tutorial-catalog) +- [Browse A-Z Tutorial Directory](../../discoverability/tutorial-directory.md) +- [Search by Intent](../../discoverability/query-hub.md) +- [Explore Category Hubs](../../README.md#category-hubs) + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* + +## Full Chapter Map + +1. [Chapter 1: Getting Started](01-getting-started.md) +2. [Chapter 2: Agent Roles](02-agent-roles.md) +3. [Chapter 3: SOPs and Workflows](03-sop-and-workflows.md) +4. [Chapter 4: Action System](04-action-system.md) +5. [Chapter 5: Memory and Context](05-memory-and-context.md) +6. [Chapter 6: Tool Integration](06-tool-integration.md) +7. [Chapter 7: Multi-Agent Orchestration](07-multi-agent-orchestration.md) +8. [Chapter 8: Production Deployment](08-production-deployment.md) diff --git a/tutorials/plane-tutorial/01-getting-started.md b/tutorials/plane-tutorial/01-getting-started.md new file mode 100644 index 0000000..c850e15 --- /dev/null +++ b/tutorials/plane-tutorial/01-getting-started.md @@ -0,0 +1,282 @@ +--- +layout: default +title: "Plane Tutorial - Chapter 1: Getting Started" +nav_order: 1 +has_children: false +parent: Plane Tutorial +--- + +# Chapter 1: Getting Started + +Welcome to **Chapter 1** of the **Plane Tutorial**. This chapter walks you through installing Plane, creating your first workspace, and setting up a project. By the end, you will have a running Plane instance ready for issue tracking and project management. + +> Install Plane, create a workspace, and launch your first project in minutes. + +## What Problem Does This Solve? + +Teams need a project management tool they can control. SaaS solutions like Jira and Linear lock you into their infrastructure and pricing. Plane gives you a full-featured PM platform you can self-host, customize, and extend — without vendor lock-in. + +## Installation Options + +### Docker Compose (Recommended) + +The fastest way to get Plane running locally is with Docker Compose. Plane ships an official `docker-compose.yml` that bundles all services. + +```bash +# Clone the Plane repository +git clone https://github.com/makeplane/plane.git +cd plane + +# Copy the environment template +cp .env.example .env + +# Start all services (web, API, worker, database, redis) +docker compose up -d +``` + +This starts the following services: + +| Service | Port | Description | +|:--------|:-----|:------------| +| **Web (Next.js)** | 3000 | Frontend application | +| **API (Django)** | 8000 | Backend REST API | +| **Worker (Celery)** | — | Background task processing | +| **PostgreSQL** | 5432 | Primary database | +| **Redis** | 6379 | Cache and message broker | +| **MinIO** | 9000 | Object storage for attachments | + +### Environment Configuration + +The `.env` file controls all service configuration. Key variables to set: + +```bash +# .env — Core configuration +# ---------------------------- + +# Database +PGHOST=plane-db +PGDATABASE=plane +POSTGRES_USER=plane +POSTGRES_PASSWORD=plane +POSTGRES_DB=plane +DATABASE_URL=postgresql://plane:plane@plane-db:5432/plane + +# Redis +REDIS_HOST=plane-redis +REDIS_PORT=6379 +REDIS_URL=redis://plane-redis:6379/ + +# Application +SECRET_KEY=your-secret-key-here +NEXT_PUBLIC_API_BASE_URL=http://localhost:8000 +WEB_URL=http://localhost:3000 + +# Storage (MinIO) +AWS_S3_BUCKET_NAME=uploads +AWS_ACCESS_KEY_ID=access-key +AWS_SECRET_ACCESS_KEY=secret-key +AWS_S3_ENDPOINT_URL=http://plane-minio:9000 +``` + +### Local Development Setup + +If you want to develop on Plane itself, run the backend and frontend separately. + +#### Backend (Django) + +```bash +# Navigate to the API server directory +cd apiserver + +# Create a virtual environment +python3 -m venv venv +source venv/bin/activate + +# Install dependencies +pip install -r requirements.txt + +# Run database migrations +python manage.py migrate + +# Create a superuser +python manage.py createsuperuser + +# Start the development server +python manage.py runserver 0.0.0.0:8000 +``` + +#### Frontend (Next.js) + +```bash +# Navigate to the web app directory +cd web + +# Install dependencies +yarn install + +# Start the development server +yarn dev +``` + +The frontend will be available at `http://localhost:3000`. + +## Creating Your First Workspace + +Once Plane is running, open your browser and navigate to the web URL. You will be guided through onboarding. + +### Step 1: Sign Up + +Create your admin account. In self-hosted mode, the first user becomes the workspace owner. + +### Step 2: Create a Workspace + +A **Workspace** is the top-level container in Plane. It represents your organization or team. + +``` +Workspace + ├── Project A + │ ├── Issues + │ ├── Cycles + │ ├── Modules + │ └── Pages + ├── Project B + └── Settings +``` + +### Step 3: Create a Project + +Inside your workspace, create your first project. Each project has its own: + +- **Issue tracker** with states, labels, and priorities +- **Cycles** for sprint planning +- **Modules** for feature grouping +- **Pages** for documentation and wiki + +### Step 4: Invite Team Members + +Plane supports role-based access control: + +| Role | Permissions | +|:-----|:------------| +| **Owner** | Full workspace control | +| **Admin** | Manage projects and members | +| **Member** | Create and manage issues | +| **Guest** | View-only access | + +## How It Works Under the Hood + +When you create a workspace or project, the Django backend processes the request through a layered architecture. + +```mermaid +sequenceDiagram + participant U as User Browser + participant W as Next.js Web App + participant A as Django API + participant DB as PostgreSQL + participant R as Redis + + U->>W: Create Workspace + W->>A: POST /api/v1/workspaces/ + A->>DB: INSERT workspace record + A->>R: Cache workspace metadata + A-->>W: 201 Created (workspace JSON) + W-->>U: Redirect to workspace dashboard + + U->>W: Create Project + W->>A: POST /api/v1/workspaces/{slug}/projects/ + A->>DB: INSERT project + default states + A->>DB: INSERT default labels + A-->>W: 201 Created (project JSON) + W-->>U: Show project board +``` + +### Django Model: Workspace + +The workspace model is the root entity in the Plane data model: + +```python +# apiserver/plane/db/models/workspace.py + +class Workspace(BaseModel): + name = models.CharField(max_length=80) + logo = models.URLField(blank=True, null=True) + slug = models.SlugField(max_length=48, unique=True) + owner = models.ForeignKey( + "db.User", + on_delete=models.CASCADE, + related_name="owner_workspace", + ) + + def __str__(self): + return self.name + + class Meta: + verbose_name = "Workspace" + verbose_name_plural = "Workspaces" + ordering = ("-created_at",) +``` + +### Django Model: Project + +Projects belong to a workspace and hold all issue-tracking data: + +```python +# apiserver/plane/db/models/project.py + +class Project(BaseModel): + NETWORK_CHOICES = ((0, "Secret"), (2, "Public")) + + name = models.CharField(max_length=255) + description = models.TextField(blank=True) + workspace = models.ForeignKey( + "db.Workspace", + on_delete=models.CASCADE, + related_name="projects", + ) + identifier = models.CharField(max_length=12) + network = models.PositiveSmallIntegerField( + default=2, choices=NETWORK_CHOICES + ) + default_assignee = models.ForeignKey( + "db.User", + on_delete=models.SET_NULL, + null=True, + blank=True, + ) + + class Meta: + unique_together = [["workspace", "identifier"]] + ordering = ("-created_at",) +``` + +## Verifying Your Setup + +After installation, confirm all services are healthy: + +```bash +# Check running containers +docker compose ps + +# Verify API health +curl http://localhost:8000/api/v1/health/ + +# Check database connectivity +docker compose exec plane-api python manage.py dbshell -c "SELECT 1;" +``` + +## Key Takeaways + +- Plane runs as a multi-service stack: Next.js frontend, Django API, Celery workers, PostgreSQL, and Redis. +- Docker Compose is the recommended way to get started quickly. +- Workspaces are the top-level organizational unit; projects live inside workspaces. +- The Django backend uses standard model patterns with ForeignKey relationships between Workspace, Project, and User. + +## Cross-References + +- **Next chapter:** [Chapter 2: System Architecture](02-system-architecture.md) dives deeper into the Django + Next.js stack. +- **Issue tracking:** [Chapter 3: Issue Tracking](03-issue-tracking.md) covers creating your first issues. +- **Deployment:** [Chapter 8: Self-Hosting and Deployment](08-self-hosting-and-deployment.md) covers production configuration. + +--- + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/plane-tutorial/02-system-architecture.md b/tutorials/plane-tutorial/02-system-architecture.md new file mode 100644 index 0000000..cf4f464 --- /dev/null +++ b/tutorials/plane-tutorial/02-system-architecture.md @@ -0,0 +1,382 @@ +--- +layout: default +title: "Plane Tutorial - Chapter 2: System Architecture" +nav_order: 2 +has_children: false +parent: Plane Tutorial +--- + +# Chapter 2: System Architecture + +Welcome to **Chapter 2** of the **Plane Tutorial**. This chapter examines the full-stack architecture of Plane — from the Next.js frontend to the Django backend, database schema design, and background worker system. + +> Understand how Plane's Django backend, Next.js frontend, and supporting services fit together. + +## What Problem Does This Solve? + +Building a project management tool requires coordinating many concerns: real-time updates, background processing, file storage, API design, and a responsive UI. Understanding Plane's architecture shows you how a production-grade PM platform organizes these layers and how each service communicates. + +## High-Level Architecture + +Plane follows a classic client-server architecture with clear separation between the frontend SPA and the backend API. + +``` +┌─────────────────────────────────────────────────┐ +│ User Browser │ +└──────────────────────┬──────────────────────────┘ + │ HTTPS +┌──────────────────────▼──────────────────────────┐ +│ Nginx / Reverse Proxy │ +├────────────┬─────────────────┬──────────────────┤ +│ / │ /api/ │ /uploads/ │ +│ Next.js │ Django API │ MinIO / S3 │ +│ (Web) │ (apiserver) │ (Storage) │ +└────────────┴────────┬────────┴──────────────────┘ + │ + ┌─────────────┼─────────────┐ + │ │ │ + PostgreSQL Redis Celery Workers + (Data store) (Cache/MQ) (Background jobs) +``` + +## Backend: Django API Server + +The Django backend lives in the `apiserver/` directory and provides the REST API that powers the entire application. + +### Project Layout + +``` +apiserver/ +├── plane/ +│ ├── api/ # API views and serializers +│ │ ├── views/ # ViewSets for each resource +│ │ ├── serializers/ +│ │ └── urls/ # URL routing +│ ├── app/ # Core application logic +│ │ ├── views/ # App-specific views +│ │ └── permissions.py +│ ├── db/ # Database models +│ │ └── models/ +│ │ ├── workspace.py +│ │ ├── project.py +│ │ ├── issue.py +│ │ ├── cycle.py +│ │ ├── module.py +│ │ └── page.py +│ ├── bgtasks/ # Celery background tasks +│ ├── middleware/ # Custom middleware +│ └── settings/ # Django settings +├── requirements.txt +└── manage.py +``` + +### API Design Pattern + +Plane uses Django REST Framework (DRF) ViewSets with a consistent URL structure: + +```python +# apiserver/plane/api/urls/issue.py + +from django.urls import path +from plane.api.views import ( + IssueViewSet, + IssueLabelViewSet, + IssueCommentViewSet, + IssueActivityViewSet, +) + +urlpatterns = [ + path( + "workspaces//projects//issues/", + IssueViewSet.as_view({"get": "list", "post": "create"}), + name="project-issues", + ), + path( + "workspaces//projects//issues//", + IssueViewSet.as_view({ + "get": "retrieve", + "patch": "partial_update", + "delete": "destroy", + }), + name="project-issue-detail", + ), +] +``` + +### Base Model Pattern + +All Plane models inherit from a `BaseModel` that provides common fields: + +```python +# apiserver/plane/db/models/base.py + +import uuid +from django.db import models + + +class BaseModel(models.Model): + id = models.UUIDField( + default=uuid.uuid4, unique=True, editable=False, primary_key=True + ) + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + created_by = models.ForeignKey( + "db.User", + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="%(class)s_created_by", + ) + updated_by = models.ForeignKey( + "db.User", + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="%(class)s_updated_by", + ) + + class Meta: + abstract = True +``` + +This gives every entity a UUID primary key, timestamps, and audit fields — a pattern common in enterprise PM tools. + +### Permission Layer + +Plane uses a custom permission system based on workspace and project roles: + +```python +# apiserver/plane/app/permissions.py + +from rest_framework.permissions import BasePermission + +ROLE_CHOICES = { + "owner": 20, + "admin": 15, + "member": 10, + "guest": 5, +} + + +class ProjectEntityPermission(BasePermission): + def has_permission(self, request, view): + if request.method in ["GET", "HEAD"]: + return ProjectMember.objects.filter( + project_id=view.kwargs.get("project_id"), + member=request.user, + role__gte=ROLE_CHOICES["guest"], + ).exists() + + return ProjectMember.objects.filter( + project_id=view.kwargs.get("project_id"), + member=request.user, + role__gte=ROLE_CHOICES["member"], + ).exists() +``` + +## Frontend: Next.js Web Application + +The frontend lives in the `web/` directory and is built with Next.js, TypeScript, and Tailwind CSS. + +### Frontend Structure + +``` +web/ +├── app/ # Next.js App Router pages +│ ├── [workspaceSlug]/ +│ │ ├── projects/ +│ │ │ └── [projectId]/ +│ │ │ ├── issues/ +│ │ │ ├── cycles/ +│ │ │ ├── modules/ +│ │ │ └── pages/ +│ │ └── settings/ +│ └── layout.tsx +├── components/ # Reusable UI components +├── store/ # State management (MobX) +├── services/ # API client layer +├── helpers/ # Utility functions +└── lib/ # Configuration and providers +``` + +### API Client Layer + +The frontend communicates with the Django backend through a typed service layer: + +```typescript +// web/services/issue.service.ts + +import { APIService } from "services/api.service"; +import { IIssue, IIssueResponse } from "types/issue"; + +export class IssueService extends APIService { + constructor() { + super(process.env.NEXT_PUBLIC_API_BASE_URL || ""); + } + + async getIssues( + workspaceSlug: string, + projectId: string, + queries?: object + ): Promise { + return this.get( + `/api/v1/workspaces/${workspaceSlug}/projects/${projectId}/issues/`, + { params: queries } + ); + } + + async createIssue( + workspaceSlug: string, + projectId: string, + data: Partial + ): Promise { + return this.post( + `/api/v1/workspaces/${workspaceSlug}/projects/${projectId}/issues/`, + data + ); + } +} +``` + +### State Management with MobX + +Plane uses MobX for reactive state management: + +```typescript +// web/store/issue/issue.store.ts + +import { makeObservable, observable, action, computed } from "mobx"; +import { IIssue } from "types/issue"; +import { IssueService } from "services/issue.service"; + +export class IssueStore { + issues: Record = {}; + issueService: IssueService; + + constructor() { + makeObservable(this, { + issues: observable, + fetchIssues: action, + issuesList: computed, + }); + this.issueService = new IssueService(); + } + + get issuesList(): IIssue[] { + return Object.values(this.issues); + } + + fetchIssues = async (workspaceSlug: string, projectId: string) => { + const response = await this.issueService.getIssues( + workspaceSlug, + projectId + ); + response.results.forEach((issue) => { + this.issues[issue.id] = issue; + }); + }; +} +``` + +## How It Works Under the Hood + +When a user interacts with the Plane UI, here is the full request lifecycle: + +```mermaid +flowchart TD + A[User Action in Browser] --> B[Next.js Component] + B --> C[MobX Store Action] + C --> D[API Service Layer] + D --> E[HTTP Request to Django] + + E --> F[DRF URL Router] + F --> G[Permission Check] + G --> H{Authorized?} + H -- No --> I[403 Forbidden] + H -- Yes --> J[ViewSet Method] + + J --> K[Serializer Validation] + K --> L[Model Operation] + L --> M[PostgreSQL] + + L --> N[Signal / Post-Save] + N --> O[Celery Task Queue] + O --> P[Redis Broker] + P --> Q[Celery Worker] + Q --> R[Send Webhook / Email / AI] + + M --> S[Response Serialized] + S --> T[JSON to Frontend] + T --> U[MobX Store Updated] + U --> V[UI Re-renders] + + classDef frontend fill:#e1f5fe,stroke:#01579b + classDef backend fill:#f3e5f5,stroke:#4a148c + classDef data fill:#fff3e0,stroke:#ef6c00 + classDef worker fill:#e8f5e8,stroke:#1b5e20 + + class A,B,C,D,V frontend + class E,F,G,H,I,J,K,L,N,S,T backend + class M,P data + class O,Q,R worker +``` + +## Background Task System + +Plane uses Celery with Redis as the message broker for asynchronous work: + +```python +# apiserver/plane/bgtasks/issue_activity_task.py + +from celery import shared_task +from plane.db.models import IssueActivity + + +@shared_task +def issue_activity_task( + type, requested_data, current_instance, + issue_id, project_id, workspace_id, actor_id +): + """Track all changes made to an issue as activity entries.""" + IssueActivity.objects.create( + issue_id=issue_id, + project_id=project_id, + workspace_id=workspace_id, + actor_id=actor_id, + field=type, + old_value=current_instance, + new_value=requested_data, + ) +``` + +## Database Schema Overview + +The core entities and their relationships: + +``` +Workspace (1) ──< Project (1) ──< Issue + │ │ + ├──< Cycle ├──< IssueComment + ├──< Module ├──< IssueActivity + ├──< Page ├──< IssueLabel + ├──< Label └──< IssueAssignee + └──< State +``` + +## Key Takeaways + +- Plane is a Django + Next.js full-stack application with PostgreSQL, Redis, and Celery. +- The backend uses DRF ViewSets with nested URL routing scoped to workspace/project. +- All models share a `BaseModel` with UUID primary keys and audit fields. +- The frontend uses MobX for state management and a typed service layer for API calls. +- Background tasks (activity tracking, webhooks, notifications) are processed by Celery workers. + +## Cross-References + +- **Previous:** [Chapter 1: Getting Started](01-getting-started.md) covers installation. +- **Next:** [Chapter 3: Issue Tracking](03-issue-tracking.md) dives into the issue data model. +- **Deployment details:** [Chapter 8: Self-Hosting and Deployment](08-self-hosting-and-deployment.md). + +--- + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/plane-tutorial/03-issue-tracking.md b/tutorials/plane-tutorial/03-issue-tracking.md new file mode 100644 index 0000000..0bf6c57 --- /dev/null +++ b/tutorials/plane-tutorial/03-issue-tracking.md @@ -0,0 +1,402 @@ +--- +layout: default +title: "Plane Tutorial - Chapter 3: Issue Tracking" +nav_order: 3 +has_children: false +parent: Plane Tutorial +--- + +# Chapter 3: Issue Tracking + +Welcome to **Chapter 3** of the **Plane Tutorial**. This chapter covers the core of any project management tool — issues. You will learn how Plane models issues, states, labels, priorities, assignees, and sub-issues. + +> Create, organize, and track issues with states, priorities, labels, and hierarchical sub-issues. + +## What Problem Does This Solve? + +Every software team needs a structured way to track work. Plane's issue system provides a flexible, extensible data model that supports multiple views (list, board, spreadsheet), custom states, and hierarchical relationships — all without the complexity overhead of legacy tools like Jira. + +## The Issue Data Model + +At its core, an issue in Plane is a rich entity with many relationships: + +```python +# apiserver/plane/db/models/issue.py + +class Issue(ProjectBaseModel): + PRIORITY_CHOICES = ( + ("urgent", "Urgent"), + ("high", "High"), + ("medium", "Medium"), + ("low", "Low"), + ("none", "None"), + ) + + name = models.CharField(max_length=255) + description = models.JSONField(blank=True, default=dict) + description_html = models.TextField(blank=True, default="

") + description_stripped = models.TextField(blank=True, null=True) + priority = models.CharField( + max_length=30, + choices=PRIORITY_CHOICES, + default="none", + ) + state = models.ForeignKey( + "db.State", + on_delete=models.CASCADE, + related_name="state_issues", + ) + parent = models.ForeignKey( + "self", + on_delete=models.CASCADE, + null=True, + blank=True, + related_name="sub_issues", + ) + estimate_point = models.IntegerField( + null=True, blank=True, default=None + ) + sequence_id = models.FloatField(default=65535) + start_date = models.DateField(null=True, blank=True) + target_date = models.DateField(null=True, blank=True) + sort_order = models.FloatField(default=65535) + + class Meta: + ordering = ("-created_at",) +``` + +### Key Relationships + +Issues connect to many other entities through junction tables: + +```python +# Assignees — many-to-many through IssueAssignee +class IssueAssignee(ProjectBaseModel): + issue = models.ForeignKey( + Issue, on_delete=models.CASCADE, related_name="issue_assignees" + ) + assignee = models.ForeignKey( + "db.User", on_delete=models.CASCADE, related_name="issue_assignees" + ) + + class Meta: + unique_together = ["issue", "assignee"] + + +# Labels — many-to-many through IssueLabel +class IssueLabel(ProjectBaseModel): + issue = models.ForeignKey( + Issue, on_delete=models.CASCADE, related_name="issue_labels" + ) + label = models.ForeignKey( + "db.Label", on_delete=models.CASCADE, related_name="issue_labels" + ) + + class Meta: + unique_together = ["issue", "label"] +``` + +## States and Workflow + +States define the workflow for issues in a project. Each project can have custom states organized into groups: + +```python +# apiserver/plane/db/models/state.py + +class State(ProjectBaseModel): + GROUP_CHOICES = ( + ("backlog", "Backlog"), + ("unstarted", "Unstarted"), + ("started", "Started"), + ("completed", "Completed"), + ("cancelled", "Cancelled"), + ) + + name = models.CharField(max_length=255) + description = models.TextField(blank=True) + color = models.CharField(max_length=255) + group = models.CharField( + max_length=20, choices=GROUP_CHOICES, default="backlog" + ) + sequence = models.FloatField(default=65535) + is_default = models.BooleanField(default=False) + + class Meta: + unique_together = ["project", "name"] + ordering = ("sequence",) +``` + +### Default States per Project + +When a project is created, Plane automatically provisions default states: + +| Group | Default State | Color | +|:------|:-------------|:------| +| Backlog | Backlog | `#A3A3A3` | +| Unstarted | Todo | `#3A3A3A` | +| Started | In Progress | `#F59E0B` | +| Completed | Done | `#16A34A` | +| Cancelled | Cancelled | `#EF4444` | + +## Creating Issues via the API + +### Backend ViewSet + +```python +# apiserver/plane/api/views/issue.py + +from rest_framework import status +from rest_framework.response import Response +from plane.db.models import Issue, IssueAssignee, IssueLabel +from plane.api.serializers import IssueSerializer +from plane.bgtasks.issue_activity_task import issue_activity_task + + +class IssueViewSet(ProjectBaseViewSet): + serializer_class = IssueSerializer + model = Issue + + def get_queryset(self): + return ( + Issue.objects.filter( + workspace__slug=self.kwargs.get("slug"), + project_id=self.kwargs.get("project_id"), + ) + .select_related("state", "parent", "project") + .prefetch_related("issue_assignees", "issue_labels") + ) + + def create(self, request, slug, project_id): + serializer = IssueSerializer( + data=request.data, + context={"project_id": project_id, "workspace_slug": slug}, + ) + if serializer.is_valid(): + serializer.save() + # Track activity asynchronously + issue_activity_task.delay( + type="issue.activity.created", + requested_data=request.data, + current_instance=None, + issue_id=str(serializer.data["id"]), + project_id=str(project_id), + workspace_id=str(request.workspace.id), + actor_id=str(request.user.id), + ) + return Response(serializer.data, status=status.HTTP_201_CREATED) + return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) +``` + +### Frontend: Creating an Issue + +```typescript +// web/components/issues/issue-modal.tsx + +import { useForm, Controller } from "react-hook-form"; +import { IIssue } from "types/issue"; +import { useIssueStore } from "store/issue"; + +interface IssueFormData { + name: string; + description_html: string; + priority: "urgent" | "high" | "medium" | "low" | "none"; + state_id: string; + assignee_ids: string[]; + label_ids: string[]; + parent_id?: string; + start_date?: string; + target_date?: string; +} + +export const CreateIssueModal: React.FC = () => { + const { createIssue } = useIssueStore(); + const { control, handleSubmit } = useForm({ + defaultValues: { + name: "", + priority: "none", + assignee_ids: [], + label_ids: [], + }, + }); + + const onSubmit = async (data: IssueFormData) => { + await createIssue(workspaceSlug, projectId, data); + }; + + return ( +
+ ( + + )} + /> + {/* Priority selector, state picker, assignees, labels... */} + + ); +}; +``` + +## Sub-Issues (Parent-Child Hierarchy) + +Plane supports hierarchical issues through the `parent` self-referential foreign key. This allows you to break large tasks into smaller, trackable sub-issues. + +```mermaid +flowchart TD + A["Epic: User Authentication
PROJ-101"] --> B["Story: Login Flow
PROJ-102"] + A --> C["Story: Registration
PROJ-103"] + A --> D["Story: Password Reset
PROJ-104"] + + B --> E["Task: Login Form UI
PROJ-105"] + B --> F["Task: OAuth Integration
PROJ-106"] + B --> G["Task: Session Management
PROJ-107"] + + C --> H["Task: Signup Form
PROJ-108"] + C --> I["Task: Email Verification
PROJ-109"] + + classDef epic fill:#e1f5fe,stroke:#01579b + classDef story fill:#f3e5f5,stroke:#4a148c + classDef task fill:#e8f5e8,stroke:#1b5e20 + + class A epic + class B,C,D story + class E,F,G,H,I task +``` + +### Querying Sub-Issues + +```python +# Fetch all sub-issues of a parent +sub_issues = Issue.objects.filter( + parent_id=parent_issue_id, + project_id=project_id, +).select_related("state").prefetch_related("issue_assignees") + +# Recursive sub-issue tree (using Django CTE or manual recursion) +def get_issue_tree(issue_id): + issue = Issue.objects.get(id=issue_id) + children = Issue.objects.filter(parent_id=issue_id) + return { + "issue": issue, + "sub_issues": [get_issue_tree(child.id) for child in children], + } +``` + +## Labels + +Labels provide flexible categorization beyond states: + +```python +# apiserver/plane/db/models/label.py + +class Label(ProjectBaseModel): + name = models.CharField(max_length=255) + description = models.TextField(blank=True) + color = models.CharField(max_length=255, blank=True) + parent = models.ForeignKey( + "self", + on_delete=models.CASCADE, + null=True, + blank=True, + related_name="label_children", + ) + sort_order = models.FloatField(default=65535) + + class Meta: + unique_together = ["project", "name"] +``` + +Labels also support a parent-child hierarchy, enabling grouped labels like `Frontend > React` or `Bug > Critical`. + +## Issue Views and Filters + +Plane supports multiple views of the same issue data: + +| View | Description | +|:-----|:------------| +| **Board** | Kanban board grouped by state | +| **List** | Traditional list with sorting | +| **Spreadsheet** | Table with inline editing | +| **Calendar** | Timeline based on due dates | +| **Gantt** | Gantt chart for dependencies | + +### Filter System + +```typescript +// web/types/issue-filters.ts + +interface IIssueFilterOptions { + state?: string[]; + priority?: string[]; + assignees?: string[]; + labels?: string[]; + created_by?: string[]; + start_date?: string[]; + target_date?: string[]; + subscriber?: string[]; +} + +interface IIssueDisplayProperties { + assignee: boolean; + start_date: boolean; + due_date: boolean; + labels: boolean; + priority: boolean; + state: boolean; + sub_issue_count: boolean; + estimate: boolean; +} +``` + +## How It Works Under the Hood + +```mermaid +sequenceDiagram + participant U as User + participant FE as Next.js Frontend + participant API as Django API + participant DB as PostgreSQL + participant CQ as Celery Queue + + U->>FE: Click "Create Issue" + FE->>FE: Open issue modal + U->>FE: Fill title, priority, assignees + FE->>API: POST /api/v1/.../issues/ + + API->>API: Validate via IssueSerializer + API->>DB: INSERT into issues table + API->>DB: INSERT into issue_assignees + API->>DB: INSERT into issue_labels + API->>CQ: Dispatch activity tracking task + API-->>FE: 201 Created (issue JSON) + + CQ->>DB: INSERT into issue_activities + CQ->>CQ: Send notifications (if configured) + + FE->>FE: Update MobX store + FE-->>U: Show new issue on board +``` + +## Key Takeaways + +- Issues are rich entities with priority, state, assignees, labels, dates, and estimates. +- States are grouped into five categories (Backlog, Unstarted, Started, Completed, Cancelled) for consistent workflow reporting. +- Sub-issues use a self-referential foreign key, enabling hierarchical task breakdown. +- Labels support nesting for organized categorization. +- Issue activity is tracked asynchronously via Celery for audit trails. + +## Cross-References + +- **Architecture:** [Chapter 2: System Architecture](02-system-architecture.md) for the full backend structure. +- **Sprint planning:** [Chapter 4: Cycles and Modules](04-cycles-and-modules.md) for organizing issues into sprints. +- **AI-powered creation:** [Chapter 5: AI Features](05-ai-features.md) for AI-assisted issue creation. + +--- + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/plane-tutorial/04-cycles-and-modules.md b/tutorials/plane-tutorial/04-cycles-and-modules.md new file mode 100644 index 0000000..9cb6670 --- /dev/null +++ b/tutorials/plane-tutorial/04-cycles-and-modules.md @@ -0,0 +1,420 @@ +--- +layout: default +title: "Plane Tutorial - Chapter 4: Cycles and Modules" +nav_order: 4 +has_children: false +parent: Plane Tutorial +--- + +# Chapter 4: Cycles and Modules + +Welcome to **Chapter 4** of the **Plane Tutorial**. This chapter covers two key organizational constructs in Plane: **Cycles** (time-boxed sprints) and **Modules** (feature-based groupings). Together, they enable flexible roadmap planning. + +> Plan sprints with Cycles, group features with Modules, and build your product roadmap. + +## What Problem Does This Solve? + +Issues on their own are just a list. Teams need two complementary ways to organize work: + +1. **Time-based planning** — "What are we doing this sprint?" (Cycles) +2. **Feature-based grouping** — "What issues belong to the authentication feature?" (Modules) + +Plane provides both, and an issue can belong to one cycle and multiple modules simultaneously. + +## Cycles: Time-Boxed Sprints + +A Cycle represents a sprint or iteration with a start and end date. Issues are assigned to cycles for time-boxed delivery. + +### Cycle Data Model + +```python +# apiserver/plane/db/models/cycle.py + +class Cycle(ProjectBaseModel): + name = models.CharField(max_length=255) + description = models.TextField(blank=True) + start_date = models.DateField(null=True, blank=True) + end_date = models.DateField(null=True, blank=True) + owned_by = models.ForeignKey( + "db.User", + on_delete=models.CASCADE, + related_name="owned_cycles", + ) + view_props = models.JSONField(default=dict) + sort_order = models.FloatField(default=65535) + + class Meta: + ordering = ("-created_at",) + unique_together = ["name", "project"] + + +class CycleIssue(ProjectBaseModel): + """Junction table linking issues to cycles.""" + cycle = models.ForeignKey( + Cycle, on_delete=models.CASCADE, related_name="cycle_issues" + ) + issue = models.ForeignKey( + "db.Issue", on_delete=models.CASCADE, related_name="issue_cycle" + ) + + class Meta: + unique_together = ["cycle", "issue"] +``` + +### Cycle Lifecycle + +Cycles progress through distinct phases: + +```mermaid +stateDiagram-v2 + [*] --> Draft: Create cycle + Draft --> Upcoming: Set start date (future) + Draft --> Current: Start date is today + Upcoming --> Current: Start date reached + Current --> Completed: End date reached + Completed --> [*] + + note right of Draft + No dates set yet. + Planning phase. + end note + + note right of Current + Active sprint. + Issues being worked on. + end note + + note right of Completed + Sprint ended. + Burndown finalized. + end note +``` + +### Creating a Cycle via the API + +```python +# apiserver/plane/api/views/cycle.py + +class CycleViewSet(ProjectBaseViewSet): + serializer_class = CycleSerializer + model = Cycle + + def get_queryset(self): + return Cycle.objects.filter( + workspace__slug=self.kwargs.get("slug"), + project_id=self.kwargs.get("project_id"), + ).annotate( + total_issues=Count("cycle_issues"), + completed_issues=Count( + "cycle_issues", + filter=Q( + cycle_issues__issue__state__group="completed" + ), + ), + cancelled_issues=Count( + "cycle_issues", + filter=Q( + cycle_issues__issue__state__group="cancelled" + ), + ), + ) + + def create(self, request, slug, project_id): + serializer = CycleSerializer(data=request.data) + if serializer.is_valid(): + serializer.save( + project_id=project_id, + owned_by=request.user, + ) + return Response(serializer.data, status=status.HTTP_201_CREATED) + return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) +``` + +### Adding Issues to a Cycle + +```python +# POST /api/v1/workspaces/{slug}/projects/{project_id}/cycles/{cycle_id}/cycle-issues/ + +class CycleIssueViewSet(ProjectBaseViewSet): + serializer_class = CycleIssueSerializer + + def create(self, request, slug, project_id, cycle_id): + issues = request.data.get("issues", []) + cycle_issues = [] + for issue_id in issues: + cycle_issues.append( + CycleIssue( + cycle_id=cycle_id, + issue_id=issue_id, + project_id=project_id, + workspace_id=request.workspace.id, + created_by=request.user, + ) + ) + CycleIssue.objects.bulk_create( + cycle_issues, ignore_conflicts=True + ) + return Response({"message": "Issues added to cycle"}, status=200) +``` + +### Frontend: Cycle Board + +```typescript +// web/components/cycles/cycle-board.tsx + +import { observer } from "mobx-react-lite"; +import { useCycleStore } from "store/cycle"; + +interface CycleProgress { + total_issues: number; + completed_issues: number; + cancelled_issues: number; + started_issues: number; + unstarted_issues: number; + backlog_issues: number; +} + +export const CycleBoardView: React.FC = observer(() => { + const { currentCycle, cycleIssues } = useCycleStore(); + + const progress: CycleProgress = { + total_issues: currentCycle?.total_issues || 0, + completed_issues: currentCycle?.completed_issues || 0, + cancelled_issues: currentCycle?.cancelled_issues || 0, + started_issues: currentCycle?.started_issues || 0, + unstarted_issues: currentCycle?.unstarted_issues || 0, + backlog_issues: currentCycle?.backlog_issues || 0, + }; + + const completionPercentage = + progress.total_issues > 0 + ? Math.round( + (progress.completed_issues / progress.total_issues) * 100 + ) + : 0; + + return ( +
+
+

{currentCycle?.name}

+ + {completionPercentage}% complete + +
+ + +
+ ); +}); +``` + +## Modules: Feature-Based Grouping + +Modules group related issues by feature or initiative, independent of time. An issue can belong to multiple modules. + +### Module Data Model + +```python +# apiserver/plane/db/models/module.py + +class Module(ProjectBaseModel): + STATUS_CHOICES = ( + ("backlog", "Backlog"), + ("planned", "Planned"), + ("in-progress", "In Progress"), + ("paused", "Paused"), + ("completed", "Completed"), + ("cancelled", "Cancelled"), + ) + + name = models.CharField(max_length=255) + description = models.TextField(blank=True) + description_html = models.TextField(blank=True, default="

") + lead = models.ForeignKey( + "db.User", + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="module_leads", + ) + members = models.ManyToManyField( + "db.User", + blank=True, + related_name="module_members", + through="ModuleMember", + ) + start_date = models.DateField(null=True, blank=True) + target_date = models.DateField(null=True, blank=True) + status = models.CharField( + max_length=20, choices=STATUS_CHOICES, default="planned" + ) + view_props = models.JSONField(default=dict) + sort_order = models.FloatField(default=65535) + + class Meta: + unique_together = ["name", "project"] + ordering = ("-created_at",) + + +class ModuleIssue(ProjectBaseModel): + """Junction table linking issues to modules.""" + module = models.ForeignKey( + Module, on_delete=models.CASCADE, related_name="module_issues" + ) + issue = models.ForeignKey( + "db.Issue", on_delete=models.CASCADE, related_name="issue_module" + ) + + class Meta: + unique_together = ["module", "issue"] +``` + +### Cycles vs. Modules + +| Aspect | Cycles | Modules | +|:-------|:-------|:--------| +| **Purpose** | Time-boxed sprints | Feature grouping | +| **Time-bound** | Yes (start + end date) | Optional dates | +| **Issue membership** | One cycle per issue | Multiple modules per issue | +| **Progress tracking** | Burndown charts | Completion percentage | +| **Typical usage** | "Sprint 12" | "Authentication Feature" | + +## How It Works Under the Hood + +The relationship between issues, cycles, and modules: + +```mermaid +flowchart LR + subgraph Project + I1[Issue: Login API] + I2[Issue: Login UI] + I3[Issue: OAuth] + I4[Issue: Signup Form] + I5[Issue: DB Migration] + end + + subgraph Cycles + C1[Sprint 12
Mar 1-15] + C2[Sprint 13
Mar 15-29] + end + + subgraph Modules + M1[Auth Module] + M2[Onboarding Module] + end + + C1 --- I1 + C1 --- I2 + C2 --- I3 + C2 --- I4 + C1 --- I5 + + M1 --- I1 + M1 --- I2 + M1 --- I3 + M2 --- I4 + M2 --- I3 + + classDef issue fill:#fff3e0,stroke:#ef6c00 + classDef cycle fill:#e1f5fe,stroke:#01579b + classDef module fill:#e8f5e8,stroke:#1b5e20 + + class I1,I2,I3,I4,I5 issue + class C1,C2 cycle + class M1,M2 module +``` + +Notice how Issue `I3` (OAuth) belongs to Sprint 13 **and** to both the Auth Module and Onboarding Module. This dual-axis organization is a core strength of Plane. + +## Burndown and Analytics + +Cycles provide built-in analytics. The backend computes burndown data by tracking state transitions over time: + +```python +# apiserver/plane/api/views/analytic.py + +def get_cycle_burndown(cycle_id, project_id): + """Compute daily burndown for a cycle.""" + cycle = Cycle.objects.get(id=cycle_id) + cycle_issues = CycleIssue.objects.filter(cycle=cycle) + + total = cycle_issues.count() + burndown = [] + current_date = cycle.start_date + + while current_date <= cycle.end_date: + completed = cycle_issues.filter( + issue__state__group="completed", + issue__updated_at__date__lte=current_date, + ).count() + burndown.append({ + "date": current_date.isoformat(), + "total": total, + "completed": completed, + "remaining": total - completed, + }) + current_date += timedelta(days=1) + + return burndown +``` + +### Frontend: Burndown Chart + +```typescript +// web/components/cycles/cycle-analytics.tsx + +interface BurndownPoint { + date: string; + total: number; + completed: number; + remaining: number; +} + +export const CycleBurndownChart: React.FC<{ + data: BurndownPoint[]; +}> = ({ data }) => { + const idealBurndown = data.map((point, index) => ({ + date: point.date, + ideal: point.total - (point.total / data.length) * index, + })); + + return ( + + + + + + + + ); +}; +``` + +## Key Takeaways + +- **Cycles** are time-boxed sprints; each issue belongs to at most one active cycle. +- **Modules** are feature-based groupings; an issue can belong to multiple modules. +- Both use junction tables (`CycleIssue`, `ModuleIssue`) for many-to-many relationships. +- Cycle progress is computed via annotated querysets counting issues by state group. +- The dual-axis (time + feature) organization provides flexibility that single-axis tools lack. + +## Cross-References + +- **Issues:** [Chapter 3: Issue Tracking](03-issue-tracking.md) for the issue data model. +- **AI planning:** [Chapter 5: AI Features](05-ai-features.md) for AI-assisted sprint planning. +- **API access:** [Chapter 7: API and Integrations](07-api-and-integrations.md) for managing cycles/modules programmatically. + +--- + +*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)* diff --git a/tutorials/plane-tutorial/05-ai-features.md b/tutorials/plane-tutorial/05-ai-features.md new file mode 100644 index 0000000..a385001 --- /dev/null +++ b/tutorials/plane-tutorial/05-ai-features.md @@ -0,0 +1,451 @@ +--- +layout: default +title: "Plane Tutorial - Chapter 5: AI Features" +nav_order: 5 +has_children: false +parent: Plane Tutorial +--- + +# Chapter 5: AI Features + +Welcome to **Chapter 5** of the **Plane Tutorial**. This chapter explores Plane's AI-native capabilities — from AI-assisted issue creation to smart suggestions, automated triage, and intelligent planning helpers. + +> Leverage AI for issue creation, smart suggestions, automated triage, and planning assistance. + +## What Problem Does This Solve? + +Writing clear issue descriptions, triaging incoming bugs, estimating effort, and breaking down epics are time-consuming tasks. Plane integrates AI directly into the project management workflow so that these tasks can be accelerated or automated, reducing overhead for engineering teams. + +## AI Architecture Overview + +Plane's AI features are built as a separate service that the Django backend communicates with asynchronously: + +```mermaid +flowchart TD + A[User Action] --> B[Next.js Frontend] + B --> C[Django API] + C --> D[AI Service] + + D --> E[LLM Provider] + E --> F[OpenAI / GPT-4] + E --> G[Self-hosted LLM] + + D --> H[Response Processing] + H --> C + C --> B + B --> A + + subgraph AI Service + D --> I[Prompt Templates] + D --> J[Context Builder] + D --> K[Response Parser] + end + + classDef user fill:#e1f5fe,stroke:#01579b + classDef backend fill:#f3e5f5,stroke:#4a148c + classDef ai fill:#fff3e0,stroke:#ef6c00 + + class A,B user + class C backend + class D,E,F,G,H,I,J,K ai +``` + +## AI-Assisted Issue Creation + +The most visible AI feature is the ability to generate issue details from a brief description. + +### Backend: AI Issue Generation Endpoint + +```python +# apiserver/plane/api/views/ai.py + +from rest_framework.views import APIView +from rest_framework.response import Response +from plane.app.permissions import ProjectEntityPermission +from plane.bgtasks.ai_tasks import generate_issue_description + + +class AIAssistantView(APIView): + permission_classes = [ProjectEntityPermission] + + def post(self, request, slug, project_id): + task = request.data.get("task", "") + prompt = request.data.get("prompt", "") + response_format = request.data.get("response", "text") + + if task == "generate_issue": + result = self._generate_issue(prompt, project_id) + elif task == "improve_description": + result = self._improve_description(prompt) + elif task == "summarize": + result = self._summarize(prompt) + else: + return Response( + {"error": "Unknown task"}, status=400 + ) + + return Response({"response": result}) + + def _generate_issue(self, brief_description, project_id): + """Generate a full issue from a brief description.""" + from plane.utils.ai import call_ai_service + + system_prompt = """You are a project management assistant. + Given a brief description, generate: + 1. A clear, concise issue title + 2. A detailed description with acceptance criteria + 3. Suggested priority (urgent/high/medium/low) + 4. Suggested labels + 5. Estimated story points + + Respond in JSON format.""" + + context = self._build_project_context(project_id) + + return call_ai_service( + system_prompt=system_prompt, + user_prompt=brief_description, + context=context, + ) + + def _build_project_context(self, project_id): + """Build context from existing project data.""" + from plane.db.models import Issue, Label, State + + recent_issues = Issue.objects.filter( + project_id=project_id + ).order_by("-created_at")[:20] + + labels = Label.objects.filter(project_id=project_id) + states = State.objects.filter(project_id=project_id) + + return { + "recent_issues": [ + {"title": i.name, "priority": i.priority} + for i in recent_issues + ], + "available_labels": [l.name for l in labels], + "available_states": [s.name for s in states], + } +``` + +### AI Utility Module + +```python +# apiserver/plane/utils/ai.py + +import openai +import json +from django.conf import settings + + +def call_ai_service(system_prompt, user_prompt, context=None): + """Call the configured AI provider.""" + messages = [ + {"role": "system", "content": system_prompt}, + ] + + if context: + messages.append({ + "role": "system", + "content": f"Project context: {json.dumps(context)}", + }) + + messages.append({"role": "user", "content": user_prompt}) + + client = openai.OpenAI( + api_key=settings.OPENAI_API_KEY, + base_url=settings.AI_BASE_URL, # Supports self-hosted LLMs + ) + + response = client.chat.completions.create( + model=settings.AI_MODEL or "gpt-4", + messages=messages, + temperature=0.3, + max_tokens=2000, + ) + + return response.choices[0].message.content +``` + +### Frontend: AI Issue Creator + +```typescript +// web/components/ai/ai-issue-creator.tsx + +import { useState } from "react"; +import { AIService } from "services/ai.service"; + +interface AIIssueResponse { + title: string; + description_html: string; + priority: string; + suggested_labels: string[]; + estimate_points: number; +} + +export const AIIssueCreator: React.FC<{ + workspaceSlug: string; + projectId: string; + onGenerated: (data: AIIssueResponse) => void; +}> = ({ workspaceSlug, projectId, onGenerated }) => { + const [prompt, setPrompt] = useState(""); + const [loading, setLoading] = useState(false); + const aiService = new AIService(); + + const handleGenerate = async () => { + setLoading(true); + try { + const response = await aiService.generateIssue( + workspaceSlug, + projectId, + { task: "generate_issue", prompt } + ); + const parsed: AIIssueResponse = JSON.parse(response.response); + onGenerated(parsed); + } finally { + setLoading(false); + } + }; + + return ( +
+

AI Issue Creator

+