diff --git a/cmd/skill_install_cmd.go b/cmd/skill_install_cmd.go new file mode 100644 index 000000000..8a4589716 --- /dev/null +++ b/cmd/skill_install_cmd.go @@ -0,0 +1,189 @@ +package cmd + +import ( + "context" + "database/sql" + "fmt" + "os" + "os/signal" + "path/filepath" + "text/tabwriter" + + _ "github.com/jackc/pgx/v5/stdlib" + "github.com/spf13/cobra" + + "github.com/nextlevelbuilder/goclaw/internal/config" + "github.com/nextlevelbuilder/goclaw/internal/skills" + "github.com/nextlevelbuilder/goclaw/internal/store/pg" +) + +func skillsInstallCmd() *cobra.Command { + var ref string + cmd := &cobra.Command{ + Use: "install [name|url]", + Short: "Install a skill from registry or GitHub", + Long: `Install a skill from the GoClaw registry (by slug) or directly from a GitHub repo. + +Examples: + goclaw skills install shopee-product-finder + goclaw skills install github.com/user/repo + goclaw skills install owner/repo@v1.0 --ref main`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + if err := runSkillInstall(args[0], ref); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + }, + } + cmd.Flags().StringVar(&ref, "ref", "", "Git ref, tag, or branch to install") + return cmd +} + +func runSkillInstall(input, refOverride string) error { + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt) + defer stop() + + // Parse skill reference. + skillRef, err := skills.ParseSkillRef(input) + if err != nil { + return err + } + + // Override ref if flag provided. + if refOverride != "" { + skillRef.Ref = refOverride + } + + // Resolve registry slug → owner/repo. + cfgPath := resolveConfigPath() + cfg, err := config.Load(cfgPath) + if err != nil { + return fmt.Errorf("load config: %w", err) + } + dataDir := cfg.ResolvedDataDir() + + owner, repo := skillRef.Owner, skillRef.Repo + if skillRef.IsRegistry { + fmt.Printf("Resolving %q from registry...\n", input) + cacheDir := filepath.Join(dataDir, "cache") + registry := skills.NewRegistryClient(cacheDir) + var err error + owner, repo, err = registry.Resolve(ctx, input) + if err != nil { + return err + } + } + + // Fetch from GitHub. + fmt.Printf("Fetching %s/%s", owner, repo) + if skillRef.Ref != "" { + fmt.Printf("@%s", skillRef.Ref) + } + fmt.Println("...") + + tmpDir, err := skills.FetchFromGitHub(ctx, owner, repo, skillRef.Ref) + if err != nil { + return fmt.Errorf("fetch failed: %w", err) + } + defer os.RemoveAll(tmpDir) + + // Connect DB. + db, err := connectDBForCLI() + if err != nil { + return fmt.Errorf("database: %w", err) + } + defer db.Close() + + // Build installer. + skillsStoreDir := filepath.Join(dataDir, "skills-store") + if err := os.MkdirAll(skillsStoreDir, 0755); err != nil { + return err + } + + store := pg.NewPGSkillStore(db, skillsStoreDir) + loader := loadSkillsLoader() + installer := skills.NewInstaller(store, skillsStoreDir, loader) + + // Install. + fmt.Println("Installing...") + ownerID := fmt.Sprintf("github:%s/%s", owner, repo) + result, err := installer.Install(ctx, tmpDir, ownerID) + if err != nil { + return err + } + + // Print result. + fmt.Printf("\nDone! Skill installed:\n") + fmt.Printf(" Name: %s\n", result.Name) + fmt.Printf(" Slug: %s\n", result.Slug) + fmt.Printf(" Version: %d\n", result.Version) + fmt.Printf(" ID: %s\n", result.ID) + if result.DepsWarning != "" { + fmt.Printf("\n ⚠ Dependencies: %s\n", result.DepsWarning) + } + return nil +} + +func skillsSearchCmd() *cobra.Command { + return &cobra.Command{ + Use: "search [query]", + Short: "Search the skill registry", + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + ctx := context.Background() + cfgPath := resolveConfigPath() + cfg, err := config.Load(cfgPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: load config: %v\n", err) + os.Exit(1) + } + cacheDir := filepath.Join(cfg.ResolvedDataDir(), "cache") + registry := skills.NewRegistryClient(cacheDir) + + results, err := registry.Search(ctx, args[0]) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + if len(results) == 0 { + fmt.Println("No skills found matching query.") + return + } + + tw := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) + fmt.Fprintf(tw, "SLUG\tREPO\tDESCRIPTION\n") + for _, r := range results { + desc := r.Description + if len(desc) > 60 { + desc = desc[:57] + "..." + } + fmt.Fprintf(tw, "%s\t%s\t%s\n", r.Slug, r.Repo, desc) + } + tw.Flush() + }, + } +} + +// connectDBForCLI opens a minimal database connection for CLI commands. +func connectDBForCLI() (*sql.DB, error) { + cfgPath := resolveConfigPath() + cfg, err := config.Load(cfgPath) + if err != nil { + return nil, fmt.Errorf("load config: %w", err) + } + dsn := cfg.Database.PostgresDSN + if dsn == "" { + return nil, fmt.Errorf("GOCLAW_POSTGRES_DSN is not set") + } + db, err := sql.Open("pgx", dsn) + if err != nil { + return nil, err + } + if err := db.Ping(); err != nil { + db.Close() + return nil, fmt.Errorf("cannot connect to database: %w", err) + } + return db, nil +} diff --git a/cmd/skill_remove_cmd.go b/cmd/skill_remove_cmd.go new file mode 100644 index 000000000..bd797e465 --- /dev/null +++ b/cmd/skill_remove_cmd.go @@ -0,0 +1,57 @@ +package cmd + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "github.com/spf13/cobra" + + "github.com/nextlevelbuilder/goclaw/internal/config" + "github.com/nextlevelbuilder/goclaw/internal/skills" + "github.com/nextlevelbuilder/goclaw/internal/store/pg" +) + +func skillsRemoveCmd() *cobra.Command { + return &cobra.Command{ + Use: "remove [slug]", + Short: "Remove an installed skill", + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + if err := runSkillRemove(args[0]); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + }, + } +} + +func runSkillRemove(slug string) error { + ctx := context.Background() + + db, err := connectDBForCLI() + if err != nil { + return fmt.Errorf("database: %w", err) + } + defer db.Close() + + cfgPath := resolveConfigPath() + cfg, err := config.Load(cfgPath) + if err != nil { + return fmt.Errorf("load config: %w", err) + } + dataDir := cfg.ResolvedDataDir() + skillsStoreDir := filepath.Join(dataDir, "skills-store") + + store := pg.NewPGSkillStore(db, skillsStoreDir) + loader := loadSkillsLoader() + installer := skills.NewInstaller(store, skillsStoreDir, loader) + + if err := installer.Uninstall(ctx, slug); err != nil { + return err + } + + fmt.Printf("Skill %q removed.\n", slug) + return nil +} diff --git a/cmd/skills_cmd.go b/cmd/skills_cmd.go index 49dc1459c..2fc3269a4 100644 --- a/cmd/skills_cmd.go +++ b/cmd/skills_cmd.go @@ -21,6 +21,9 @@ func skillsCmd() *cobra.Command { } cmd.AddCommand(skillsListCmd()) cmd.AddCommand(skillsShowCmd()) + cmd.AddCommand(skillsInstallCmd()) + cmd.AddCommand(skillsRemoveCmd()) + cmd.AddCommand(skillsSearchCmd()) return cmd } diff --git a/docs/00-architecture-overview.md b/docs/00-architecture-overview.md index 4c22a907a..1a88c9233 100644 --- a/docs/00-architecture-overview.md +++ b/docs/00-architecture-overview.md @@ -109,7 +109,7 @@ flowchart TD | `internal/store/pg/` | PostgreSQL implementations (`database/sql` + `pgx/v5`) | | `internal/bootstrap/` | System prompt files (AGENTS.md, SOUL.md, TOOLS.md, IDENTITY.md, USER.md, BOOTSTRAP.md) + seeding + truncation | | `internal/config/` | Config loading (JSON5) + env var overlay | -| `internal/skills/` | SKILL.md loader (5-tier hierarchy) + BM25 search + hot-reload via fsnotify | +| `internal/skills/` | SKILL.md loader (5-tier hierarchy) + BM25 search + hot-reload via fsnotify. Skill Hub: GitHub fetcher, registry client (curated skill index), installer (multi-stage validation→copy→DB→deps) | | `internal/channels/` | Channel manager + adapters: Telegram (forum topics, STT, bot commands), Feishu/Lark (streaming cards, media), Zalo OA, Zalo Personal, Discord, WhatsApp, Slack | | `internal/mcp/` | MCP server bridge (stdio, SSE, streamable-HTTP transports) | | `internal/scheduler/` | Lane-based concurrency control (main, subagent, cron, team lanes) with per-session serialization. Per-edition rate limits (`MaxSubagentConcurrent`, `MaxSubagentDepth`) with tenant-scoped concurrency | diff --git a/docs/16-skill-publishing.md b/docs/16-skill-publishing.md index 14acb69e4..144a98ced 100644 --- a/docs/16-skill-publishing.md +++ b/docs/16-skill-publishing.md @@ -2,6 +2,8 @@ How agents create, register, and manage skills programmatically through the `publish_skill` builtin tool, working in tandem with the `skill-creator` core skill. +**Related:** See [22-skill-hub-installation.md](./22-skill-hub-installation.md) for user-driven skill discovery and CLI installation from public registry or GitHub. + --- ## 1. Overview diff --git a/docs/17-changelog.md b/docs/17-changelog.md index f1f71290d..728867621 100644 --- a/docs/17-changelog.md +++ b/docs/17-changelog.md @@ -22,6 +22,29 @@ All notable changes to GoClaw Gateway are documented here. Format follows [Keep - **Functional options pattern**: Telegram provider refactored to `telegram.New()` with `WithXxxStore()` option setters for cleaner initialization - **File organization**: Subagent code split into focused modules: `subagent.go`, `subagent_roster.go`, `subagent_spawn.go`. Spawn tool split: `spawn_tool.go` + `spawn_tool_actions.go` +#### Skill Hub: Discovery & Installation (2026-03-31) +- **Skill Hub CLI:** New commands `goclaw skills install`, `remove`, `search` for user-driven skill discovery and lifecycle +- **Registry support:** Curated skill registry at `https://raw.githubusercontent.com/goclaw-hub/registry/main/index.json` with local caching (1-hour TTL) +- **GitHub fetcher:** Tarball download via GitHub API with security hardening (50 MB limit, 500-file limit, path traversal guards, symlink skip) +- **Skill installer:** Multi-stage orchestration (validate→copy→DB→deps→reload) with concurrent safety (advisory locks) +- **Registry client:** JSON-based skill index resolution and fetch with HTTPS enforcement and env override support +- **Installation commands:** + - `goclaw skills install shopee-product-finder` — Install by registry slug + - `goclaw skills install owner/repo` — Install from GitHub directly + - `goclaw skills install owner/repo@v1.0 --ref main` — Install specific ref + - `goclaw skills search ` — Search registry index + - `goclaw skills remove ` — Remove installed skill +- **Security features:** Package name validation (stdlib blocklist), SKILL.md content guard, tar bomb prevention, path traversal hardening +- **Versioned storage:** Skills stored in `skills-store/{slug}/{version}/` with version increments on re-install +- **Dependency validation:** Post-install scanning mirrors `publish_skill` tool; warns on missing deps (does not archive) +- **Hot-reload:** BumpVersion() invalidates loader cache; next access loads from filesystem automatically +- **Files added:** + - `internal/skills/github_fetcher.go` — Tarball download + secure extraction + - `internal/skills/registry_client.go` — Registry index fetch + cache + - `internal/skills/installer.go` — Installation orchestrator + - `cmd/skill_install_cmd.go` — `skills install` CLI handler + - `cmd/skill_remove_cmd.go` — `skills remove` CLI handler + #### Runtime & Packages Management (2026-03-17) - **Packages page**: New "Packages" page in Web UI under System group for managing installed packages - **HTTP API endpoints**: GET/POST `/v1/packages`, `/v1/packages/install`, `/v1/packages/uninstall`, GET `/v1/packages/runtimes` @@ -144,9 +167,13 @@ All notable changes to GoClaw Gateway are documented here. Format follows [Keep ### Documentation +- Added `22-skill-hub-installation.md` — Skill Hub overview, CLI commands, registry architecture, fetcher, installer, security model, error handling +- Updated `16-skill-publishing.md` — Added cross-reference to Skill Hub for user-driven discovery and installation +- Updated `00-architecture-overview.md` — Added Skill Hub components to module map (github_fetcher, registry_client, installer) +- Updated `CLAUDE.md` — Added Skill Hub CLI commands and updated `internal/skills/` description - Updated `18-http-api.md` — Added section 17 for Runtime & Packages Management endpoints - Updated `09-security.md` — Added Docker entrypoint documentation, pkg-helper architecture, privilege separation -- Updated `17-changelog.md` — New entries for packages management, Docker security, and auth fix +- Updated `17-changelog.md` — New entries for packages management, Docker security, auth fix, and Skill Hub - Added `18-http-api.md` — Complete HTTP REST API reference (all endpoints, auth, error codes) - Added `19-websocket-rpc.md` — Complete WebSocket RPC method catalog (64+ methods, permission matrix) - Added `20-api-keys-auth.md` — API key authentication, RBAC scopes, security model, usage examples diff --git a/docs/22-skill-hub-installation.md b/docs/22-skill-hub-installation.md new file mode 100644 index 000000000..5521de5da --- /dev/null +++ b/docs/22-skill-hub-installation.md @@ -0,0 +1,408 @@ +# 22 - Skill Hub: Installation & Package Management + +User-driven skill discovery, installation, and lifecycle management via CLI and future HTTP API. Installs from public skill registry (curated GitHub repositories) or direct GitHub URLs with dependency validation and hot-reload. + +--- + +## 1. Overview + +The Skill Hub extends the skills system with **end-user skill discovery and installation**: + +| Layer | Component | Purpose | +|-------|-----------|---------| +| **CLI** | `goclaw skills install`, `remove`, `search` | Human-friendly skill lifecycle | +| **Registry** | `registry_client.go` | JSON-based skill index on GitHub | +| **Fetcher** | `github_fetcher.go` | Tarball download + secure extraction | +| **Installer** | `installer.go` | Validation, copy, DB registration, hot-reload | + +Skills are stored in **versioned directories** under `skills-store/{slug}/{version}/`. Each installation increments the version, enabling safe re-installations and rollback potential. + +**Security perimeter:** +- Package name validation (stdlib blocklist) +- SKILL.md content scanning (GuardSkillContent) +- Tar bomb prevention (50 MB download, 20 MB extracted, 500 files) +- Path traversal hardening in extraction + +--- + +## 2. CLI Commands + +### 2.1 Install from Registry + +Registry lookup via slug (curated GitHub repos): + +```bash +goclaw skills install shopee-product-finder +``` + +Resolves `shopee-product-finder` slug → GitHub URL via `registry-index.json`, then fetches and installs. Optional ref override: + +```bash +goclaw skills install shopee-product-finder --ref v2.0 +``` + +### 2.2 Install from GitHub + +Direct GitHub repo (owner/repo format or full URL): + +```bash +goclaw skills install owner/repo +goclaw skills install github.com/owner/repo +goclaw skills install owner/repo@v1.0 +goclaw skills install github.com/owner/repo --ref main +``` + +Formats accepted: +- `owner/repo` → GitHub shorthand, default branch +- `owner/repo@v1.0` → Shorthand with specific ref +- `github.com/owner/repo` → Full URL, default branch +- `github.com/owner/repo@v1.0` → Full URL with ref + +### 2.3 Remove + +```bash +goclaw skills remove my-skill-slug +``` + +Deletes the skill from `skills-store/` and marks as archived in the database. + +### 2.4 Search Registry + +```bash +goclaw skills search pdf +goclaw skills search "text processing" +``` + +Queries registry index for matching slug/description/tags. Returns: +- Slug (install name) +- Repo (GitHub location) +- Description +- Tags + +### 2.5 List & Show + +```bash +goclaw skills list # All skills (system + custom) +goclaw skills show my-skill # Full SKILL.md content + metadata +goclaw skills show my-skill --json # JSON metadata +``` + +--- + +## 3. Registry Architecture + +### 3.1 Registry Index Format + +Central registry at `https://raw.githubusercontent.com/goclaw-hub/registry/main/index.json`: + +```json +{ + "skills": [ + { + "slug": "shopee-product-finder", + "repo": "owner/shopee-product-finder", + "description": "Search Shopee for products", + "tags": ["shopping", "e-commerce", "shopee"] + }, + { + "slug": "web-scraper", + "repo": "owner/web-scraper", + "description": "Scrape and extract web content", + "tags": ["web", "scraping", "data"] + } + ] +} +``` + +### 3.2 Registry Client (registry_client.go) + +Fetches and caches the index locally: + +```go +type RegistryClient struct { + cacheDir string + indexURL string // GOCLAW_REGISTRY_URL env or default + cacheTTL time.Duration +} + +// Resolve("shopee-product-finder") → ("owner", "shopee-product-finder") +Resolve(ctx, slug) (owner, repo, error) +``` + +**Cache strategy:** +- Stores `registry-index.json` in `{dataDir}/cache/` (1 MB max) +- 1-hour TTL; expires after first fetch past TTL +- HTTPS-only (MITM protection; non-HTTPS URLs rejected) +- Env override: `GOCLAW_REGISTRY_URL` + +--- + +## 4. Fetcher & Extraction (github_fetcher.go) + +### 4.1 Tarball Download + +Fetches GitHub repository as `.tar.gz` via tarball API: + +``` +GET https://api.github.com/repos/{owner}/{repo}/tarball/{ref} + → 302 redirect to S3 (temporary signed URL) + → Download to temp file (streaming) +``` + +**Size limits:** +- Max download: 50 MB (prevents resource exhaustion) +- Max extracted: 20 MB (prevents decompression bombs) +- Max files: 500 (tar bomb prevention) +- Timeout: 30 seconds + +### 4.2 Secure Extraction + +Hardened tar extraction in `internal/skills/github_fetcher.go`: + +1. **Reject path traversal:** Skip entries containing `..` +2. **Skip symlinks:** Prevent escape via symbolic links +3. **Skip system artifacts:** `.DS_Store`, `__MACOSX`, `Thumbs.db`, `node_modules/` +4. **Track file count:** Reject tarballs with 500+ files +5. **Track extracted size:** Reject if >20 MB total +6. **Extract to temp:** Uses OS temp dir, caller responsible for cleanup + +### 4.3 SkillRef Parser + +Parses user input into structured reference: + +```go +type SkillRef struct { + Owner string // GitHub owner + Repo string // Repository name + Ref string // Tag, branch, or "" (default) + IsRegistry bool // true if input was plain slug +} + +ParseSkillRef("shopee-product-finder") // {IsRegistry: true} +ParseSkillRef("owner/repo") // {Owner: "owner", Repo: "repo"} +ParseSkillRef("owner/repo@v1.0") // {Owner: "owner", Repo: "repo", Ref: "v1.0"} +``` + +--- + +## 5. Installation Flow (installer.go) + +SkillInstaller orchestrates multi-stage skill installation into database and filesystem: + +``` +User calls: goclaw skills install owner/repo + │ + ▼ +1. Fetch tarball via github_fetcher → extract to temp dir + │ + ▼ +2. SkillInstaller.Install(ctx, srcDir, ownerID) + │ + ├─ Read SKILL.md from srcDir + ├─ Guard scan (GuardSkillContent) + ├─ Parse frontmatter (name, slug, description) + ├─ Reject system skill conflict (pdf, docx, skill-creator, etc.) + ├─ Compute SHA-256 hash + ├─ Check dir size (max 20 MB) + ├─ GetNextVersionLocked(slug) → version N + ├─ Copy srcDir → skills-store/{slug}/{N}/ + ├─ INSERT/UPSERT skills table + ├─ ScanSkillDeps + CheckSkillDeps → warn on missing + ├─ BumpVersion() → invalidate loader cache + ├─ Return SkillInstallResult {ID, Name, Slug, Version, DepsWarning} + │ + ▼ +3. Caller cleans up srcDir (temp directory) + │ + ▼ +Success: Skill hot-reloaded, usable by agents +``` + +### 5.1 SkillInstallResult + +```go +type SkillInstallResult struct { + ID uuid.UUID `json:"id"` // Skill UUID in DB + Name string `json:"name"` // Display name + Slug string `json:"slug"` // Kebab-case identifier + Version int `json:"version"` // Version number + DepsWarning string `json:"deps_warning,omitempty"` // Missing deps (if any) +} +``` + +### 5.2 Concurrent Safety + +Uses advisory lock (`GetNextVersionLocked`) to prevent race conditions during: +1. Version calculation +2. Directory copy +3. DB insert + +Lock released after operation (deferred cleanup). + +--- + +## 6. Security Model + +### 6.1 Package Name Validation + +`validatePackageName()` in `dep_installer.go` blocks stdlib packages: + +```go +// Reject these in Python dependency scanning: +"sys", "os", "subprocess", "socket", "urllib", "requests", "cryptography", ... +``` + +Rationale: Prevents agents from accidentally installing stdlib aliases or dangerous packages. + +### 6.2 SKILL.md Content Guard + +`GuardSkillContent()` scans for dangerous patterns: + +| Pattern | Reason | Mitigation | +|---------|--------|-----------| +| `eval`, `exec`, `__import__` | Dynamic code exec | Detection + warning | +| `open(`, `os.system` | Unrestricted file/shell access | Detection + warning | +| Base64-encoded payloads | Obfuscation | Detection + warning | +| Raw `.pyc` / compiled code | Binary obfuscation | Detection + warning | + +**Mode:** Detection-only (returns violations, allows install but warns agent). + +### 6.3 Tar Extraction Hardening + +```go +// Per-entry checks during tar extraction: +if strings.Contains(header.Name, "..") { + skip // Path traversal +} +if header.Typeflag == tar.TypeSymlink { + skip // Symlink escape +} +if isSystemArtifact(header.Name) { + skip // .DS_Store, __MACOSX, node_modules, etc. +} +// Track extracted file count (reject if >500) +// Track extracted size (reject if >20 MB) +``` + +### 6.4 GitHub API Rate Limiting + +Registry client respects GitHub's unauthenticated rate limit (60 req/hour). No auth token required; cache mitigates repeated fetches. + +--- + +## 7. Dependency Validation + +Post-installation dependency scanning mirrors `publish_skill` tool: + +``` +After Install: + │ + ├─ ScanSkillDeps(skillDir) + │ └─ Detect binaries, Python (pip), Node (npm) in scripts/ + │ + ├─ CheckSkillDeps(manifest) + │ └─ Verify each dependency available on system + │ + └─ On missing deps: + ├─ Store in skills.deps JSONB column + ├─ Return DepsWarning in SkillInstallResult + └─ Do NOT archive (unlike HTTP upload handler) +``` + +Users are informed of missing deps but can still use the skill if they manage dependencies manually. + +--- + +## 8. Hot-Reload Integration + +After successful installation: + +```go +si.loader.BumpVersion() // Invalidates in-memory cache version +``` + +Next agent access to skill loads from `skills-store/{slug}/{version}/` directory automatically. + +--- + +## 9. File Organization + +``` +internal/skills/ +├── github_fetcher.go # Tarball download + secure extraction +├── registry_client.go # Registry index fetch + cache +├── installer.go # Multi-stage installation orchestrator +├── guard.go # SKILL.md content scanning +├── dep_installer.go # validatePackageName() + dep checks +└── dep_checker.go # Dependency analysis +``` + +``` +cmd/ +├── skills_cmd.go # Main `skills` command group +├── skill_install_cmd.go # `skills install` subcommand +└── skill_remove_cmd.go # `skills remove` subcommand +``` + +--- + +## 10. Future Extensions + +- **HTTP API:** POST `/v1/skills/install`, DELETE `/v1/skills/{slug}` for Web UI +- **Agent tool:** `install_skill(slug|url, ref?)` builtin for agent-driven installation +- **Rollback:** Version snapshots + downgrade support +- **Signing:** GPG-signed skill registry entries (integrity verification) +- **Private registry:** Custom registry URL support (already in place via env var) + +--- + +## 11. Example Workflow + +**Scenario: User discovers skill via `goclaw skills search`** + +```bash +$ goclaw skills search shopee +[1] shopee-product-finder (owner/shopee-product-finder) + Search Shopee for products + Tags: shopping, e-commerce, shopee + +$ goclaw skills install shopee-product-finder +Resolving "shopee-product-finder" from registry... +Resolved to: owner/shopee-product-finder +Fetching owner/shopee-product-finder (default branch)... +Downloaded 5.2 MB tarball +Extracting... +Validating SKILL.md... +Installing to skills-store/shopee-product-finder/1/ +Skill installed: ID=, v1.0 +Checking dependencies... +Warning: requires Python requests library (pip install requests) +``` + +**Skill now usable in agents immediately.** + +--- + +## 12. Error Handling + +| Error | HTTP Status | Cause | Action | +|-------|------------|-------|--------| +| Slug conflict with system skill | 400 | `IsSystemSkill()` check | Suggest different slug | +| Dir too large (>20 MB) | 413 | Size check in extractor | Reduce skill complexity | +| SKILL.md missing/empty | 400 | Read/parse fail | Verify repo structure | +| Invalid slug format | 400 | Slug validation | Suggest auto-derived slug | +| Download timeout (30s) | 504 | GitHub API slow | Retry with smaller repo | +| Tar bomb (>500 files) | 413 | File count exceeded | Simplify repo structure | +| Path traversal in tar | 400 | Extraction guard | Report malicious tar | +| Missing SKILL.md frontmatter | 400 | name field absent | Verify skill manifest | + +--- + +## 13. Dependencies + +- `archive/tar`, `compress/gzip` — Tar extraction +- `crypto/sha256` — Content hashing +- `net/http` — GitHub API calls +- `path/filepath` — Path security checks +- `database/sql` — Skill registration +- `github.com/google/uuid` — Skill IDs diff --git a/internal/skills/dep_installer.go b/internal/skills/dep_installer.go index 80cfc426f..f00769e95 100644 --- a/internal/skills/dep_installer.go +++ b/internal/skills/dep_installer.go @@ -8,6 +8,7 @@ import ( "log/slog" "net" "os/exec" + "regexp" "strings" "time" ) @@ -17,6 +18,42 @@ const installTimeout = 5 * time.Minute // pkgHelperSocket is the Unix socket path for the root-privileged pkg-helper. const pkgHelperSocket = "/tmp/pkg.sock" +// validPkgName matches safe package names: alphanumeric start, then alphanumeric/dot/hyphen/underscore. +var validPkgName = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9._-]*$`) + +// stdlibBlocklist contains pip package names that shadow Python stdlib modules. +// Installing these via pip can silently hijack imports used by other skills. +var stdlibBlocklist = map[string]bool{ + "pathlib": true, "os": true, "sys": true, "json": true, + "collections": true, "re": true, "io": true, "time": true, + "datetime": true, "subprocess": true, "socket": true, +} + +// validatePackageName checks that a package spec (possibly with version constraint) +// has a safe name and is not in the stdlib blocklist. Returns the original raw string +// on success so it can be passed to pip/npm unchanged. +func validatePackageName(raw string) (string, error) { + // Strip version specifiers to validate the name portion only. + name := raw + for _, sep := range []string{">=", "<=", "==", "~=", "!=", "<", ">"} { + if idx := strings.Index(name, sep); idx > 0 { + name = name[:idx] + break + } + } + name = strings.TrimSpace(name) + if name == "" { + return "", fmt.Errorf("empty package name") + } + if !validPkgName.MatchString(name) { + return "", fmt.Errorf("invalid package name: %q", raw) + } + if stdlibBlocklist[strings.ToLower(name)] { + return "", fmt.Errorf("blocked: %q shadows Python stdlib", name) + } + return raw, nil +} + // InstallResult holds per-category install outcomes. type InstallResult struct { System []string `json:"system,omitempty"` @@ -54,6 +91,10 @@ func InstallSingleDep(ctx context.Context, dep string) (bool, string) { switch { case strings.HasPrefix(dep, "pip:"): pkg := strings.TrimPrefix(dep, "pip:") + if _, err := validatePackageName(pkg); err != nil { + slog.Warn("security.dep_install: rejected package", "dep", dep, "error", err) + return false, err.Error() + } cmd := exec.CommandContext(ctx, "pip3", "install", "--no-cache-dir", "--break-system-packages", pkg) out, err := cmd.CombinedOutput() if err != nil { @@ -63,6 +104,10 @@ func InstallSingleDep(ctx context.Context, dep string) (bool, string) { } case strings.HasPrefix(dep, "npm:"): pkg := strings.TrimPrefix(dep, "npm:") + if _, err := validatePackageName(pkg); err != nil { + slog.Warn("security.dep_install: rejected package", "dep", dep, "error", err) + return false, err.Error() + } cmd := exec.CommandContext(ctx, "npm", "install", "-g", pkg) out, err := cmd.CombinedOutput() if err != nil { @@ -71,8 +116,11 @@ func InstallSingleDep(ctx context.Context, dep string) (bool, string) { return false, msg } default: - // System package via pkg-helper (root-privileged Unix socket). - // pkg-helper handles persist to apk-packages file. + // System package via pkg-helper — validate name before sending. + if _, err := validatePackageName(dep); err != nil { + slog.Warn("security.dep_install: rejected package", "dep", dep, "error", err) + return false, err.Error() + } ok, errMsg := apkViaHelper(ctx, "install", dep) if !ok { return false, errMsg @@ -96,10 +144,24 @@ func InstallDeps(ctx context.Context, manifest *SkillManifest, missing []string) for _, dep := range missing { switch { case strings.HasPrefix(dep, "pip:"): - pipPkgs = append(pipPkgs, strings.TrimPrefix(dep, "pip:")) + pkg := strings.TrimPrefix(dep, "pip:") + if _, err := validatePackageName(pkg); err != nil { + result.Errors = append(result.Errors, fmt.Sprintf("pip %s: %s", pkg, err)) + continue + } + pipPkgs = append(pipPkgs, pkg) case strings.HasPrefix(dep, "npm:"): - npmPkgs = append(npmPkgs, strings.TrimPrefix(dep, "npm:")) + pkg := strings.TrimPrefix(dep, "npm:") + if _, err := validatePackageName(pkg); err != nil { + result.Errors = append(result.Errors, fmt.Sprintf("npm %s: %s", pkg, err)) + continue + } + npmPkgs = append(npmPkgs, pkg) default: + if _, err := validatePackageName(dep); err != nil { + result.Errors = append(result.Errors, fmt.Sprintf("apk %s: %s", dep, err)) + continue + } sysPkgs = append(sysPkgs, dep) } } @@ -156,6 +218,9 @@ func UninstallPackage(ctx context.Context, dep string) (bool, string) { switch { case strings.HasPrefix(dep, "pip:"): pkg := strings.TrimPrefix(dep, "pip:") + if _, err := validatePackageName(pkg); err != nil { + return false, err.Error() + } cmd := exec.CommandContext(ctx, "pip3", "uninstall", "-y", pkg) out, err := cmd.CombinedOutput() if err != nil { @@ -165,6 +230,9 @@ func UninstallPackage(ctx context.Context, dep string) (bool, string) { } case strings.HasPrefix(dep, "npm:"): pkg := strings.TrimPrefix(dep, "npm:") + if _, err := validatePackageName(pkg); err != nil { + return false, err.Error() + } cmd := exec.CommandContext(ctx, "npm", "uninstall", "-g", pkg) out, err := cmd.CombinedOutput() if err != nil { @@ -173,7 +241,9 @@ func UninstallPackage(ctx context.Context, dep string) (bool, string) { return false, msg } default: - // System package via pkg-helper. Helper handles persist file removal. + if _, err := validatePackageName(dep); err != nil { + return false, err.Error() + } ok, errMsg := apkViaHelper(ctx, "uninstall", dep) if !ok { return false, errMsg diff --git a/internal/skills/github_fetcher.go b/internal/skills/github_fetcher.go new file mode 100644 index 000000000..15ddafbda --- /dev/null +++ b/internal/skills/github_fetcher.go @@ -0,0 +1,289 @@ +package skills + +import ( + "archive/tar" + "compress/gzip" + "context" + "fmt" + "io" + "log/slog" + "net/http" + "os" + "os/exec" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" +) + +const ( + maxDownloadSize = 50 << 20 // 50 MB tarball download limit + maxExtractedSize = 20 << 20 // 20 MB extracted content limit + maxExtractedFiles = 500 // tar bomb prevention + fetchTimeout = 30 * time.Second +) + +// SkillRef represents a parsed skill reference — either a GitHub repo or a registry slug. +type SkillRef struct { + Owner string + Repo string + Ref string // tag, branch, or "" for default + IsRegistry bool // true if input was a plain slug needing registry lookup +} + +// ParseSkillRef parses a skill input string into a SkillRef. +// +// Accepted formats: +// - "github.com/owner/repo" → GitHub repo, default branch +// - "github.com/owner/repo@v1.0" → GitHub repo, specific ref +// - "owner/repo" → GitHub repo shorthand +// - "owner/repo@v1.0" → GitHub repo shorthand with ref +// - "my-skill" → registry slug (needs Resolve) +func ParseSkillRef(input string) (SkillRef, error) { + input = strings.TrimSpace(input) + if input == "" { + return SkillRef{}, fmt.Errorf("empty skill reference") + } + + // Strip "https://" or "http://" prefix if present. + cleaned := input + cleaned = strings.TrimPrefix(cleaned, "https://") + cleaned = strings.TrimPrefix(cleaned, "http://") + + // Split off @ref suffix. + var ref string + if idx := strings.LastIndex(cleaned, "@"); idx > 0 { + ref = cleaned[idx+1:] + cleaned = cleaned[:idx] + } + + // Strip "github.com/" prefix. + cleaned = strings.TrimPrefix(cleaned, "github.com/") + + // Check if it looks like "owner/repo". + parts := strings.SplitN(cleaned, "/", 3) + if len(parts) == 2 && parts[0] != "" && parts[1] != "" { + if !isValidGitHubName(parts[0]) || !isValidGitHubName(parts[1]) { + return SkillRef{}, fmt.Errorf("invalid GitHub owner/repo: %q (only alphanumeric, hyphen, dot, underscore allowed)", cleaned) + } + return SkillRef{Owner: parts[0], Repo: parts[1], Ref: ref}, nil + } + + // Single token = registry slug. + if len(parts) == 1 && !strings.Contains(cleaned, "/") { + return SkillRef{IsRegistry: true, Ref: ref}, nil + } + + return SkillRef{}, fmt.Errorf("invalid skill reference: %q (expected owner/repo, github.com/owner/repo, or a slug)", input) +} + +// FetchFromGitHub downloads a repository tarball from GitHub and extracts it +// to a temporary directory. The caller owns the returned directory and must +// clean it up. On error, any partial temp dir is removed automatically. +func FetchFromGitHub(ctx context.Context, owner, repo, ref string) (string, error) { + ctx, cancel := context.WithTimeout(ctx, fetchTimeout) + defer cancel() + + // Build tarball URL. + url := fmt.Sprintf("https://api.github.com/repos/%s/%s/tarball", owner, repo) + if ref != "" { + url += "/" + ref + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return "", fmt.Errorf("build request: %w", err) + } + req.Header.Set("User-Agent", "goclaw/skill-hub") + req.Header.Set("Accept", "application/vnd.github+json") + + // Auto-detect GitHub token for higher rate limits. + if token := resolveGitHubToken(); token != "" { + req.Header.Set("Authorization", "token "+token) + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", fmt.Errorf("fetch tarball: %w", err) + } + defer resp.Body.Close() + + // Log rate limit status. + if remaining := resp.Header.Get("X-RateLimit-Remaining"); remaining != "" { + if n, _ := strconv.Atoi(remaining); n < 10 { + slog.Warn("github: rate limit low", "remaining", n) + } + } + + switch resp.StatusCode { + case http.StatusOK: + // OK — continue + case http.StatusNotFound: + return "", fmt.Errorf("repository %s/%s not found (or private without token)", owner, repo) + case http.StatusForbidden: + return "", fmt.Errorf("GitHub API rate limited — set GITHUB_TOKEN env or install gh CLI") + default: + return "", fmt.Errorf("GitHub API error: %s", resp.Status) + } + + // Check Content-Length if available. + if resp.ContentLength > maxDownloadSize { + return "", fmt.Errorf("tarball too large: %d bytes (max %d)", resp.ContentLength, maxDownloadSize) + } + + // Limit reader to prevent unbounded download. + limited := io.LimitReader(resp.Body, maxDownloadSize+1) + + return extractTarGz(limited, owner, repo) +} + +// extractTarGz streams a gzipped tar archive into a temp directory, +// stripping the single root directory prefix that GitHub adds. +func extractTarGz(r io.Reader, owner, repo string) (string, error) { + gz, err := gzip.NewReader(r) + if err != nil { + return "", fmt.Errorf("gzip decode: %w", err) + } + defer gz.Close() + + tmpDir, err := os.MkdirTemp("", "goclaw-skill-*") + if err != nil { + return "", fmt.Errorf("create temp dir: %w", err) + } + + tr := tar.NewReader(gz) + var totalSize int64 + var fileCount int + + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + os.RemoveAll(tmpDir) + return "", fmt.Errorf("tar read: %w", err) + } + + // Strip the root directory (e.g. "owner-repo-abc123/"). + name := stripRootDir(hdr.Name) + if name == "" || name == "." { + continue + } + + // Skip system artifacts. + if IsSystemArtifact(name) { + continue + } + + // Security: reject symlinks unconditionally. + if hdr.Typeflag == tar.TypeSymlink || hdr.Typeflag == tar.TypeLink { + continue + } + + // Security: validate path — no traversal, no absolute paths. + dest, err := safePath(tmpDir, name) + if err != nil { + os.RemoveAll(tmpDir) + return "", fmt.Errorf("path security: %w", err) + } + + // File count cap. + fileCount++ + if fileCount > maxExtractedFiles { + os.RemoveAll(tmpDir) + return "", fmt.Errorf("too many files in archive (max %d)", maxExtractedFiles) + } + + switch hdr.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(dest, 0755); err != nil { + os.RemoveAll(tmpDir) + return "", err + } + case tar.TypeReg: + // Size check. + totalSize += hdr.Size + if totalSize > maxExtractedSize { + os.RemoveAll(tmpDir) + return "", fmt.Errorf("extracted content too large (max %d MB)", maxExtractedSize>>20) + } + + if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { + os.RemoveAll(tmpDir) + return "", err + } + f, err := os.OpenFile(dest, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + os.RemoveAll(tmpDir) + return "", err + } + if _, err := io.Copy(f, io.LimitReader(tr, hdr.Size+1)); err != nil { + f.Close() + os.RemoveAll(tmpDir) + return "", err + } + f.Close() + } + } + + return tmpDir, nil +} + +// stripRootDir removes the first path component from a tar entry name. +// GitHub tarballs wrap everything in "owner-repo-sha/" which we need to strip. +func stripRootDir(name string) string { + // Normalize separators. + name = filepath.ToSlash(name) + name = strings.TrimPrefix(name, "/") + idx := strings.Index(name, "/") + if idx < 0 { + return "" // root dir entry itself + } + return name[idx+1:] +} + +// safePath validates and returns the cleaned destination path for a tar entry. +// Rejects absolute paths, ".." components, and paths that escape the base dir. +func safePath(base, name string) (string, error) { + // Reject absolute paths. + if filepath.IsAbs(name) { + return "", fmt.Errorf("absolute path in archive: %q", name) + } + // Reject any ".." component. + for _, part := range strings.Split(filepath.ToSlash(name), "/") { + if part == ".." { + return "", fmt.Errorf("path traversal in archive: %q", name) + } + } + dest := filepath.Join(base, name) + // Final check: must be under base. + if !strings.HasPrefix(filepath.Clean(dest)+string(filepath.Separator), filepath.Clean(base)+string(filepath.Separator)) { + return "", fmt.Errorf("path escapes destination: %q", name) + } + return dest, nil +} + +// validGitHubName matches GitHub usernames and repo names. +var validGitHubName = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9._-]*$`) + +func isValidGitHubName(name string) bool { + return validGitHubName.MatchString(name) +} + +// resolveGitHubToken returns a GitHub token for API authentication. +// Checks GITHUB_TOKEN env var first, then falls back to `gh auth token`. +func resolveGitHubToken() string { + if token := os.Getenv("GITHUB_TOKEN"); token != "" { + return token + } + // Fallback: gh CLI token (if installed). + out, err := exec.Command("gh", "auth", "token").Output() + if err == nil { + if token := strings.TrimSpace(string(out)); token != "" { + return token + } + } + return "" +} diff --git a/internal/skills/guard.go b/internal/skills/guard.go index 0948a37e2..dc3890df2 100644 --- a/internal/skills/guard.go +++ b/internal/skills/guard.go @@ -51,7 +51,7 @@ var skillGuardRules = []guardRule{ {regexp.MustCompile(`(?i)(curl|wget)\s+\S+.*\$\{?(HOME|USER|PASS|KEY|SECRET|TOKEN)`), "env var exfiltration via HTTP"}, // --- Path traversal --- - {regexp.MustCompile(`\.\./\.\./\.\./`), "deep path traversal (../../..)"}, + {regexp.MustCompile(`\.\./`), "path traversal (..)"}, // --- SQL injection --- {regexp.MustCompile(`(?i)\bDROP\s+TABLE\b`), "SQL DROP TABLE"}, diff --git a/internal/skills/installer.go b/internal/skills/installer.go new file mode 100644 index 000000000..1c784a592 --- /dev/null +++ b/internal/skills/installer.go @@ -0,0 +1,199 @@ +package skills + +import ( + "context" + "crypto/sha256" + "fmt" + "log/slog" + "os" + "path/filepath" + "strconv" + + "github.com/google/uuid" + + "github.com/nextlevelbuilder/goclaw/internal/store" + "github.com/nextlevelbuilder/goclaw/internal/store/pg" +) + +const maxInstallDirSize = 20 << 20 // 20 MB + +// SkillInstaller orchestrates skill installation from a fetched directory +// into skills-store, the database, and the in-memory loader cache. +// Used by both CLI commands and (future) agent tool. +type SkillInstaller struct { + store *pg.PGSkillStore + baseDir string // skills-store/ root directory + loader *Loader +} + +// SkillInstallResult holds the outcome of a skill installation. +type SkillInstallResult struct { + ID uuid.UUID `json:"id"` + Name string `json:"name"` + Slug string `json:"slug"` + Version int `json:"version"` + DepsWarning string `json:"deps_warning,omitempty"` +} + +// NewInstaller creates a SkillInstaller. +func NewInstaller(store *pg.PGSkillStore, baseDir string, loader *Loader) *SkillInstaller { + return &SkillInstaller{store: store, baseDir: baseDir, loader: loader} +} + +// Install validates, copies, registers, and hot-reloads a skill from srcDir. +// ownerID should be "github:owner/repo" for GitHub-installed skills. +// The caller is responsible for cleaning up srcDir after Install returns. +func (si *SkillInstaller) Install(ctx context.Context, srcDir, ownerID string) (*SkillInstallResult, error) { + // 1. Read and validate SKILL.md. + skillPath := filepath.Join(srcDir, "SKILL.md") + content, err := os.ReadFile(skillPath) + if err != nil { + return nil, fmt.Errorf("SKILL.md not found in skill directory: %w", err) + } + if len(content) == 0 { + return nil, fmt.Errorf("SKILL.md is empty") + } + + // 2. Security scan SKILL.md content. + if violations, safe := GuardSkillContent(string(content)); !safe { + return nil, fmt.Errorf("skill rejected by security scanner:\n%s", FormatGuardViolations(violations)) + } + + // 3. Parse frontmatter. + name, description, slug, frontmatter := ParseSkillFrontmatter(string(content)) + if name == "" { + return nil, fmt.Errorf("SKILL.md frontmatter must contain 'name' field") + } + if slug == "" { + slug = Slugify(name) + } + if !SlugRegexp.MatchString(slug) { + return nil, fmt.Errorf("invalid slug %q: must be lowercase alphanumeric with hyphens", slug) + } + + // 3. Reject system skill overwrite. + if si.store.IsSystemSkill(slug) { + return nil, fmt.Errorf("slug %q conflicts with a system skill — cannot overwrite", slug) + } + + // 4. Compute content hash. + hash := fmt.Sprintf("%x", sha256.Sum256(content)) + + // 5. Check directory size. + size, err := installDirSize(srcDir) + if err != nil { + return nil, fmt.Errorf("failed to calculate directory size: %w", err) + } + if size > maxInstallDirSize { + return nil, fmt.Errorf("skill directory too large: %d bytes (max %d MB)", size, maxInstallDirSize>>20) + } + + // 6. Acquire locked version to prevent race conditions. + version, commitVersion, err := si.store.GetNextVersionLocked(ctx, slug) + if err != nil { + return nil, fmt.Errorf("acquire version lock: %w", err) + } + // Release the advisory lock after we're done with copy + DB insert. + defer commitVersion() //nolint:errcheck + + destDir := filepath.Join(si.baseDir, slug, strconv.Itoa(version)) + if err := os.MkdirAll(destDir, 0755); err != nil { + return nil, fmt.Errorf("create destination: %w", err) + } + + // 7. Copy files. Cleanup on failure. + if err := CopyDir(srcDir, destDir); err != nil { + os.RemoveAll(destDir) + return nil, fmt.Errorf("copy skill files: %w", err) + } + + // 8. Register in DB. + desc := description + params := store.SkillCreateParams{ + Name: name, + Slug: slug, + Description: &desc, + OwnerID: ownerID, + Visibility: "public", + Status: "active", + Version: version, + FilePath: destDir, + FileSize: size, + FileHash: &hash, + Frontmatter: frontmatter, + } + id, err := si.store.CreateSkillManaged(ctx, params) + if err != nil { + os.RemoveAll(destDir) // cleanup orphaned files + return nil, fmt.Errorf("register skill in DB: %w", err) + } + + slog.Info("skill installed", "id", id, "slug", slug, "version", version, "owner", ownerID) + + // 9. Scan and install dependencies. + var depsWarning string + manifest := ScanSkillDeps(destDir) + if manifest != nil && !manifest.IsEmpty() { + ok, missing := CheckSkillDeps(manifest) + if !ok { + _ = si.store.StoreMissingDeps(ctx, id, missing) + // Auto-install missing deps. + installResult, _ := InstallDeps(ctx, manifest, missing) + if installResult != nil && len(installResult.Errors) > 0 { + depsWarning = fmt.Sprintf("some deps failed: %v", installResult.Errors) + } + } + } + + // 10. Hot-reload. + if si.loader != nil { + si.loader.BumpVersion() + } + + return &SkillInstallResult{ + ID: id, + Name: name, + Slug: slug, + Version: version, + DepsWarning: depsWarning, + }, nil +} + +// Uninstall soft-deletes a skill by slug and refreshes the loader cache. +func (si *SkillInstaller) Uninstall(ctx context.Context, slug string) error { + // Look up skill by slug. + info, ok := si.store.GetSkill(ctx, slug) + if !ok { + return fmt.Errorf("skill %q not found", slug) + } + id, err := uuid.Parse(info.ID) + if err != nil { + return fmt.Errorf("invalid skill ID: %w", err) + } + + if err := si.store.DeleteSkill(ctx, id); err != nil { + return fmt.Errorf("delete skill: %w", err) + } + + slog.Info("skill uninstalled", "slug", slug, "id", id) + + if si.loader != nil { + si.loader.BumpVersion() + } + return nil +} + +// installDirSize returns total size of all files in a directory. +func installDirSize(path string) (int64, error) { + var total int64 + err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() { + total += info.Size() + } + return nil + }) + return total, err +} diff --git a/internal/skills/registry_client.go b/internal/skills/registry_client.go new file mode 100644 index 000000000..e0b5f2712 --- /dev/null +++ b/internal/skills/registry_client.go @@ -0,0 +1,224 @@ +package skills + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log/slog" + "net/http" + "os" + "path/filepath" + "strings" + "time" +) + +const ( + defaultRegistryURL = "https://raw.githubusercontent.com/goclaw-hub/registry/main/index.json" + registryCacheTTL = 1 * time.Hour + registryFetchTimeout = 10 * time.Second + maxIndexSize = 1 << 20 // 1 MB + cacheFileName = "registry-index.json" +) + +// RegistryEntry represents one skill in the registry index. +type RegistryEntry struct { + Slug string `json:"slug"` + Repo string `json:"repo"` // "owner/repo" + Description string `json:"description"` + Tags []string `json:"tags,omitempty"` +} + +// RegistryIndex is the top-level JSON structure of the registry. +type RegistryIndex struct { + Skills []RegistryEntry `json:"skills"` +} + +// RegistryClient fetches and caches the skill registry index from GitHub. +type RegistryClient struct { + cacheDir string + indexURL string + cacheTTL time.Duration +} + +// NewRegistryClient creates a registry client with local cache directory. +// Uses GOCLAW_REGISTRY_URL env var if set, otherwise the default GitHub URL. +func NewRegistryClient(cacheDir string) *RegistryClient { + url := os.Getenv("GOCLAW_REGISTRY_URL") + if url == "" { + url = defaultRegistryURL + } + // Enforce HTTPS to prevent MITM. + if !strings.HasPrefix(url, "https://") { + slog.Warn("registry: non-HTTPS URL rejected, using default", "url", url) + url = defaultRegistryURL + } + return &RegistryClient{ + cacheDir: cacheDir, + indexURL: url, + cacheTTL: registryCacheTTL, + } +} + +// Resolve maps a slug to an owner/repo pair via the registry index. +func (c *RegistryClient) Resolve(ctx context.Context, slug string) (owner, repo string, err error) { + index, err := c.fetchIndex(ctx) + if err != nil { + return "", "", err + } + for _, entry := range index.Skills { + if entry.Slug == slug { + parts := strings.SplitN(entry.Repo, "/", 2) + if len(parts) != 2 { + return "", "", fmt.Errorf("registry: invalid repo format for %q: %s", slug, entry.Repo) + } + return parts[0], parts[1], nil + } + } + return "", "", fmt.Errorf("skill %q not found in registry. Try: goclaw skills install github.com/owner/repo", slug) +} + +// Search returns registry entries matching a keyword query across slug, description, and tags. +func (c *RegistryClient) Search(ctx context.Context, query string) ([]RegistryEntry, error) { + index, err := c.fetchIndex(ctx) + if err != nil { + return nil, err + } + q := strings.ToLower(query) + var results []RegistryEntry + for _, entry := range index.Skills { + if matchesQuery(entry, q) { + results = append(results, entry) + } + } + return results, nil +} + +// List returns all skills in the registry. +func (c *RegistryClient) List(ctx context.Context) ([]RegistryEntry, error) { + index, err := c.fetchIndex(ctx) + if err != nil { + return nil, err + } + return index.Skills, nil +} + +// Refresh forces a fresh fetch of the registry index by removing the cache. +func (c *RegistryClient) Refresh(ctx context.Context) error { + cachePath := filepath.Join(c.cacheDir, cacheFileName) + os.Remove(cachePath) + _, err := c.fetchIndex(ctx) + return err +} + +// fetchIndex returns the registry index, using local cache if fresh enough. +// Falls back to stale cache if the network fetch fails. +func (c *RegistryClient) fetchIndex(ctx context.Context) (*RegistryIndex, error) { + cachePath := filepath.Join(c.cacheDir, cacheFileName) + + // Try cache first. + if info, err := os.Stat(cachePath); err == nil { + if time.Since(info.ModTime()) < c.cacheTTL { + if idx, err := c.readCache(cachePath); err == nil { + return idx, nil + } + } + } + + // Fetch from network. + idx, fetchErr := c.fetchFromNetwork(ctx) + if fetchErr == nil { + // Write cache atomically (temp file + rename). + c.writeCache(cachePath, idx) + return idx, nil + } + + // Network failed — try stale cache. + if idx, err := c.readCache(cachePath); err == nil { + slog.Warn("registry: using stale cache", "error", fetchErr) + return idx, nil + } + + return nil, fmt.Errorf("registry unavailable: %w", fetchErr) +} + +// fetchFromNetwork downloads and parses the registry index. +func (c *RegistryClient) fetchFromNetwork(ctx context.Context) (*RegistryIndex, error) { + ctx, cancel := context.WithTimeout(ctx, registryFetchTimeout) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.indexURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "goclaw/skill-hub") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("registry HTTP %s", resp.Status) + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, maxIndexSize+1)) + if err != nil { + return nil, err + } + if len(body) > maxIndexSize { + return nil, fmt.Errorf("registry index too large (max %d bytes)", maxIndexSize) + } + + var idx RegistryIndex + if err := json.Unmarshal(body, &idx); err != nil { + return nil, fmt.Errorf("registry: invalid JSON: %w", err) + } + return &idx, nil +} + +// readCache reads and parses the cached registry index from disk. +func (c *RegistryClient) readCache(path string) (*RegistryIndex, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + var idx RegistryIndex + if err := json.Unmarshal(data, &idx); err != nil { + return nil, err + } + return &idx, nil +} + +// writeCache atomically writes the registry index to the cache file. +func (c *RegistryClient) writeCache(path string, idx *RegistryIndex) { + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return + } + data, err := json.Marshal(idx) + if err != nil { + return + } + tmp := path + ".tmp" + if err := os.WriteFile(tmp, data, 0644); err != nil { + return + } + os.Rename(tmp, path) +} + +// matchesQuery checks if a registry entry matches a search query (case-insensitive). +func matchesQuery(entry RegistryEntry, query string) bool { + if strings.Contains(strings.ToLower(entry.Slug), query) { + return true + } + if strings.Contains(strings.ToLower(entry.Description), query) { + return true + } + for _, tag := range entry.Tags { + if strings.Contains(strings.ToLower(tag), query) { + return true + } + } + return false +}