diff --git a/cmd/template/template.go b/cmd/template/template.go index c48ae38..8eedbef 100644 --- a/cmd/template/template.go +++ b/cmd/template/template.go @@ -9,6 +9,7 @@ import ( "github.com/cbroglie/mustache" "github.com/dangazineu/vyb/llm/openai" "github.com/dangazineu/vyb/llm/payload" + "github.com/dangazineu/vyb/workspace/context" "github.com/dangazineu/vyb/workspace/matcher" "github.com/dangazineu/vyb/workspace/project" "github.com/dangazineu/vyb/workspace/selector" @@ -54,49 +55,41 @@ type Definition struct { LongDescription string `yaml:"longDescription"` } -// prepareExecutionContext extracts all the logic needed to prepare the file selection context. -func prepareExecutionContext(target *string) (absRoot string, relWorkDir string, relTarget *string, err error) { +// prepareExecutionContext builds and validates an ExecutionContext based on +// the current working directory and an optional *target* argument. +func prepareExecutionContext(target *string) (*context.ExecutionContext, error) { absWorkingDir, err := filepath.Abs(".") if err != nil { - return "", "", nil, fmt.Errorf("failed to determine absolute path of working directory: %w", err) - } - distToRoot, err := project.FindDistanceToRoot(absWorkingDir) - if err != nil { - return "", "", nil, fmt.Errorf("unable to determine project distToRoot: %w", err) + return nil, fmt.Errorf("failed to determine absolute working dir: %w", err) } - absRoot, err = filepath.Abs(distToRoot) + // Locate project root using existing helper. + distToRoot, err := project.FindDistanceToRoot(absWorkingDir) if err != nil { - return "", "", nil, fmt.Errorf("failed to determine absolute path of project distToRoot %s: %w", distToRoot, err) + return nil, fmt.Errorf("unable to determine project root: %w", err) } - relWorkDir, err = filepath.Rel(absRoot, absWorkingDir) + absRoot, err := filepath.Abs(distToRoot) if err != nil { - return "", "", nil, fmt.Errorf("failed to determine relative path: %w", err) + return nil, fmt.Errorf("failed to determine absolute project root: %w", err) } + // Resolve absolute target (if any). + var absTarget *string if target != nil { - absTarget, err := filepath.Abs(*target) - if err != nil { - return "", "", nil, fmt.Errorf("failed to determine absolute path of target %s: %w", *target, err) - } - - rel, err := filepath.Rel(absRoot, absTarget) + at, err := filepath.Abs(*target) if err != nil { - return "", "", nil, fmt.Errorf("failed to determine relative path: %w", err) - } - - if rel == "" || strings.HasPrefix(rel, "..") { - return "", "", nil, fmt.Errorf("the target file %s is outside the project distToRoot %s", absTarget, absRoot) + return nil, fmt.Errorf("failed to resolve target %s: %w", *target, err) } + absTarget = &at + } - info, err := os.Stat(absTarget) - if err != nil || info.IsDir() { - return "", "", nil, fmt.Errorf("the target %s is not a valid file", absTarget) - } - target = &rel + // Let ExecutionContext enforce invariants. + ec, err := context.NewExecutionContext(absRoot, absWorkingDir, absTarget) + if err != nil { + return nil, err } - return absRoot, relWorkDir, target, nil + return ec, nil } func execute(cmd *cobra.Command, args []string, def *Definition) error { @@ -104,16 +97,31 @@ func execute(cmd *cobra.Command, args []string, def *Definition) error { return fmt.Errorf("command \"%s\" expects no arguments, but got %v", cmd.Use, args) } + // --------------------------- + // Retrieve --all flag value. + // --------------------------- + includeAll, _ := cmd.Flags().GetBool("all") + var target *string if len(args) > 0 { target = &args[0] } - absRoot, relWorkDir, relTarget, err := prepareExecutionContext(target) + ec, err := prepareExecutionContext(target) if err != nil { return err } + absRoot := ec.ProjectRoot + + // relTarget is the *file* provided by the user (if any), relative to root. + var relTarget *string + if target != nil { + absTarget, _ := filepath.Abs(*target) + rt, _ := filepath.Rel(absRoot, absTarget) + relTarget = &rt + } + rootFS := os.DirFS(absRoot) if relTarget != nil { @@ -122,11 +130,55 @@ func execute(cmd *cobra.Command, args []string, def *Definition) error { } } - files, err := selector.Select(rootFS, relWorkDir, relTarget, append(systemExclusionPatterns, def.ArgExclusionPatterns...), def.ArgInclusionPatterns) + files, err := selector.Select(rootFS, ec, append(systemExclusionPatterns, def.ArgExclusionPatterns...), def.ArgInclusionPatterns) + if err != nil { + return err + } + + // ------------------------------------------------------------ + // Load stored metadata (with annotations) and merge with a fresh + // snapshot produced from the current filesystem state. This + // guarantees we operate with up-to-date file information while + // keeping previously generated annotations intact. + // ------------------------------------------------------------ + storedMeta, err := project.LoadMetadata(absRoot) + if err != nil { + return err + } + freshMeta, err := project.BuildMetadataFS(rootFS) if err != nil { return err } + // Validate that the module name sets are identical. + if !equalModuleNameSets(storedMeta.Modules, freshMeta.Modules) { + return fmt.Errorf("module hierarchy mismatch between stored metadata and filesystem snapshot – please run 'vyb update' first") + } + + // Merge – keep annotations from storedMeta, replace structure from freshMeta. + storedMeta.Patch(freshMeta) + meta := storedMeta + + // ------------------------------------------------------------ + // Unless --all is provided, filter out files that belong to + // descendant modules of the target module (i.e. keep only files + // whose module == targetModule). + // ------------------------------------------------------------ + if !includeAll && meta.Modules != nil { + relTargetDir, _ := filepath.Rel(absRoot, ec.TargetDir) + relTargetDir = filepath.ToSlash(relTargetDir) + targetModule := project.FindModule(meta.Modules, relTargetDir) + if targetModule != nil { + var filtered []string + for _, f := range files { + if project.FindModule(meta.Modules, f) == targetModule { + filtered = append(filtered, f) + } + } + files = filtered + } + } + fmt.Printf("The following files will be included in the request:\n") for _, file := range files { if relTarget != nil && file == *relTarget { @@ -136,7 +188,7 @@ func execute(cmd *cobra.Command, args []string, def *Definition) error { } } - userMsg, err := payload.BuildUserMessage(rootFS, files) + userMsg, err := buildExtendedUserMessage(rootFS, meta, ec, files) if err != nil { return err } @@ -159,13 +211,34 @@ func execute(cmd *cobra.Command, args []string, def *Definition) error { return err } + // -------------------------------------------------------- // Validate that every file in the proposal is allowed to be modified. + // -------------------------------------------------------- invalidFiles := []string{} + + // helper closure to assert path containment using absolute paths. + isWithinDir := func(dir, candidate string) bool { + dir = filepath.Clean(dir) + candidate = filepath.Clean(candidate) + if dir == candidate { + return true + } + return strings.HasPrefix(candidate, dir+string(os.PathSeparator)) + } + for _, prop := range proposal.Proposals { + // 1. Pattern based validation (existing behaviour). if !matcher.IsIncluded(rootFS, prop.FileName, append(systemExclusionPatterns, def.ModificationExclusionPatterns...), def.ModificationInclusionPatterns) { invalidFiles = append(invalidFiles, prop.FileName) + continue + } + // 2. Must reside within the working_dir using absolute paths. + absProp := filepath.Join(absRoot, prop.FileName) + if !isWithinDir(ec.WorkingDir, absProp) { + invalidFiles = append(invalidFiles, prop.FileName+" (outside working_dir)") } } + if len(invalidFiles) > 0 { return fmt.Errorf("change proposal contains modifications to unallowed files: %v", invalidFiles) } @@ -211,14 +284,44 @@ func Register(rootCmd *cobra.Command) error { // Register subcommands. defs := load() for _, def := range defs { - rootCmd.AddCommand(&cobra.Command{ + cmd := &cobra.Command{ Use: def.Name, Long: def.LongDescription, Short: def.ShortDescription, RunE: func(cmd *cobra.Command, args []string) error { return execute(cmd, args, def) }, - }) + } + cmd.Flags().BoolP("all", "a", false, "include all files, even those in descendant modules") + rootCmd.AddCommand(cmd) } return nil } + +// collectModuleNames flattens a module tree into a set of names. +func collectModuleNames(m *project.Module, set map[string]struct{}) { + if m == nil { + return + } + set[m.Name] = struct{}{} + for _, c := range m.Modules { + collectModuleNames(c, set) + } +} + +// equalModuleNameSets returns true when both module trees enumerate exactly +// the same set of module names. +func equalModuleNameSets(a, b *project.Module) bool { + sa, sb := map[string]struct{}{}, map[string]struct{}{} + collectModuleNames(a, sa) + collectModuleNames(b, sb) + if len(sa) != len(sb) { + return false + } + for k := range sa { + if _, ok := sb[k]; !ok { + return false + } + } + return true +} diff --git a/cmd/template/user_msg_builder.go b/cmd/template/user_msg_builder.go new file mode 100644 index 0000000..6ab2967 --- /dev/null +++ b/cmd/template/user_msg_builder.go @@ -0,0 +1,117 @@ +package template + +import ( + "fmt" + "io/fs" + "path/filepath" + "strings" + + "github.com/dangazineu/vyb/llm/payload" + "github.com/dangazineu/vyb/workspace/context" + "github.com/dangazineu/vyb/workspace/project" +) + +// buildExtendedUserMessage composes the user-message payload that will be +// sent to the LLM. It prepends module context information — as dictated +// by the specification — before the raw file contents. When metadata is +// nil or when any contextual information is missing the function falls +// back gracefully, emitting only what is available. +func buildExtendedUserMessage(rootFS fs.FS, meta *project.Metadata, ec *context.ExecutionContext, filePaths []string) (string, error) { + // If metadata is missing we revert to the original behaviour – emit + // just the files. + if meta == nil || meta.Modules == nil { + return payload.BuildUserMessage(rootFS, filePaths) + } + + // Helper to clean/normalise relative paths. + rel := func(abs string) string { + if abs == "" { + return "" + } + r, _ := filepath.Rel(ec.ProjectRoot, abs) + return filepath.ToSlash(r) + } + + workingRel := rel(ec.WorkingDir) + targetRel := rel(ec.TargetDir) + + workingMod := project.FindModule(meta.Modules, workingRel) + targetMod := project.FindModule(meta.Modules, targetRel) + + if workingMod == nil || targetMod == nil { + return "", fmt.Errorf("failed to locate working and target modules") + } + + var sb strings.Builder + + // ------------------------------------------------------------ + // 1. External context of working module. + // ------------------------------------------------------------ + if ann := workingMod.Annotation; ann != nil && ann.ExternalContext != "" { + sb.WriteString(fmt.Sprintf("# Module: `%s`\n", workingMod.Name)) + sb.WriteString("## External Context\n") + sb.WriteString(ann.ExternalContext + "\n") + } + + // ------------------------------------------------------------ + // 2. Internal context of modules between working and target. + // ------------------------------------------------------------ + for m := targetMod.Parent; m != nil && m != workingMod; m = m.Parent { + if ann := m.Annotation; ann != nil && ann.InternalContext != "" { + sb.WriteString(fmt.Sprintf("# Module: `%s`\n", m.Name)) + sb.WriteString("## Internal Context\n") + sb.WriteString(ann.InternalContext + "\n") + } + } + + // ------------------------------------------------------------ + // 3. Public context of sibling modules along the path from the + // parent of the target module up to (and including) the working + // module. This replaces the previous logic that only considered + // direct children of the working module. + // ------------------------------------------------------------ + + isAncestor := func(a, b string) bool { + return a == b || (a != "." && strings.HasPrefix(b, a+"/")) + } + + for ancestor := targetMod.Parent; ancestor != nil; ancestor = ancestor.Parent { + for _, child := range ancestor.Modules { + // Skip the target itself and all its ancestor path. + if isAncestor(child.Name, targetMod.Name) { + continue + } + if ann := child.Annotation; ann != nil && ann.PublicContext != "" { + sb.WriteString(fmt.Sprintf("# Module: `%s`\n", child.Name)) + sb.WriteString("## Public Context\n") + sb.WriteString(ann.PublicContext + "\n") + } + } + if ancestor == workingMod { + break + } + } + + // ------------------------------------------------------------ + // 4. Public context of immediate sub-modules of target module. + // ------------------------------------------------------------ + for _, child := range targetMod.Modules { + if ann := child.Annotation; ann != nil && ann.PublicContext != "" { + sb.WriteString(fmt.Sprintf("# Module: `%s`\n", child.Name)) + sb.WriteString("## Public Context\n") + sb.WriteString(ann.PublicContext + "\n") + } + } + + // ------------------------------------------------------------ + // 5. Append file contents (only files from target module were + // selected by selector.Select). + // ------------------------------------------------------------ + filesMsg, err := payload.BuildUserMessage(rootFS, filePaths) + if err != nil { + return "", err + } + sb.WriteString(filesMsg) + + return sb.String(), nil +} diff --git a/cmd/template/user_msg_builder_test.go b/cmd/template/user_msg_builder_test.go new file mode 100644 index 0000000..5177578 --- /dev/null +++ b/cmd/template/user_msg_builder_test.go @@ -0,0 +1,79 @@ +package template + +import ( + "fmt" + "strings" + "testing" + "testing/fstest" + + "github.com/dangazineu/vyb/workspace/context" + "github.com/dangazineu/vyb/workspace/project" +) + +func Test_buildExtendedUserMessage(t *testing.T) { + ann := func(s string) *project.Annotation { + return &project.Annotation{ + PublicContext: fmt.Sprintf("%s public", s), + ExternalContext: fmt.Sprintf("%s external", s), + InternalContext: fmt.Sprintf("%s internal", s), + } + } + // Build minimal module tree: root -> work (w) -> tgt (w/child) + root := &project.Module{Name: "."} + work := &project.Module{Name: "w", Parent: root, Annotation: ann("W")} + mid := &project.Module{Name: "w/mid", Parent: work, Annotation: ann("Mid")} + tgt := &project.Module{Name: "w/mid/child", Parent: mid, Annotation: ann("Target")} + sib := &project.Module{Name: "w/mid/sibling", Parent: mid, Annotation: ann("Sibling")} + cous := &project.Module{Name: "w/cousin", Parent: work, Annotation: ann("Cousin")} + out := &project.Module{Name: "out", Parent: root, Annotation: ann("Out")} + + work.Modules = []*project.Module{mid, cous} + mid.Modules = []*project.Module{tgt, sib} + root.Modules = []*project.Module{work, out} + + meta := &project.Metadata{Modules: root} + + // in-memory fs with one file inside target module. + mfs := fstest.MapFS{ + "w/mid/child/file.txt": &fstest.MapFile{Data: []byte("hello")}, + "w/mid/file.txt": &fstest.MapFile{Data: []byte("mid content")}, + "w/mid/sibling/file.txt": &fstest.MapFile{Data: []byte("sibling content")}, + "w/file.txt": &fstest.MapFile{Data: []byte("w content")}, + "w/cousin/file.txt": &fstest.MapFile{Data: []byte("cousin content")}, + "out/file.txt": &fstest.MapFile{Data: []byte("out content")}, + } + + ec := &context.ExecutionContext{ + ProjectRoot: ".", + WorkingDir: "w", + TargetDir: "w/mid/child", + } + + msg, err := buildExtendedUserMessage(mfs, meta, ec, []string{"w/mid/child/file.txt"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Basic assertions – ensure expected contexts are present. + mustContain := []string{"W external", "Mid internal", "Sibling public", "Cousin public", "hello"} + for _, s := range mustContain { + if !strings.Contains(msg, s) { + t.Fatalf("expected message to contain %q", s) + } + } + + mustNotContain := []string{ + "W public", "W internal", + "Mid public", "Mid external", + "Sibling internal", "Sibling external", + "Cousin internal", "Cousin external", + "Out public", "Out internal", "Out external", + "mid content", "sibling content", "w content", "cousin content", "out content", + } + + for _, s := range mustNotContain { + if strings.Contains(msg, s) { + t.Fatalf("should not include contexts for target module itself, got message:\n%s", msg) + } + } +} diff --git a/workspace/context/context.go b/workspace/context/context.go new file mode 100644 index 0000000..8e1f564 --- /dev/null +++ b/workspace/context/context.go @@ -0,0 +1,105 @@ +package context + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +// ExecutionContext captures the three key path concepts used by vyb +// commands. All fields are absolute, clean paths. +// +// • ProjectRoot – directory that contains the .vyb folder. +// • WorkingDir – directory from which the command is executed. Must be +// the same as ProjectRoot or a descendant of it. +// • TargetDir – directory containing the target file (if one was +// provided to the command). When no target is given it +// equals WorkingDir. TargetDir is guaranteed to be the +// same as WorkingDir or a descendant of it. +// +// Invariants are enforced by the constructor – direct struct instantiation +// outside this package is discouraged. +// +// NOTE: This package purposefully sits outside the project/root package so +// it can be reused by matcher, selector and template with no import +// cycles. +// +// TODO(vyb): Add convenience helpers (e.g. Rel(path)) when required by +// later tasks. +type ExecutionContext struct { + ProjectRoot string + WorkingDir string + TargetDir string +} + +// NewExecutionContext validates and returns an ExecutionContext. +// +// Parameters must be *absolute* paths. If targetFile is nil it is treated +// as if no target was provided. +func NewExecutionContext(projectRoot, workingDir string, targetFile *string) (*ExecutionContext, error) { + // Sanity-check that we received absolute paths. + if !filepath.IsAbs(projectRoot) || !filepath.IsAbs(workingDir) { + return nil, fmt.Errorf("projectRoot and workingDir must be absolute paths") + } + var targetAbs string + if targetFile != nil { + if !filepath.IsAbs(*targetFile) { + return nil, fmt.Errorf("targetFile must be an absolute path when provided") + } + targetAbs = filepath.Clean(*targetFile) + } + + root := filepath.Clean(projectRoot) + work := filepath.Clean(workingDir) + + // Ensure .vyb exists inside projectRoot. + if fi, err := os.Stat(filepath.Join(root, ".vyb")); err != nil || !fi.IsDir() { + return nil, fmt.Errorf("%s is not a valid project root – missing .vyb directory", root) + } + + // workingDir must be under projectRoot. + if !isDescendant(root, work) { + return nil, fmt.Errorf("workingDir %s is not within projectRoot %s", work, root) + } + + // Derive/validate targetDir when a target file is provided. + var targetDir string + if targetFile != nil { + fi, err := os.Stat(targetAbs) + if err != nil { + return nil, fmt.Errorf("target file %s does not exist: %w", targetAbs, err) + } + if fi.IsDir() { + return nil, fmt.Errorf("target %s is a directory, expected a file", targetAbs) + } + + if !isDescendant(work, targetAbs) { + return nil, fmt.Errorf("target file %s is outside workingDir %s", targetAbs, work) + } + + targetDir = filepath.Dir(targetAbs) + } else { + targetDir = work + } + + return &ExecutionContext{ + ProjectRoot: root, + WorkingDir: work, + TargetDir: filepath.Clean(targetDir), + }, nil +} + +// isDescendant returns true when child == parent or child is somewhere +// below parent in the directory hierarchy. +func isDescendant(parent, child string) bool { + rel, err := filepath.Rel(parent, child) + if err != nil { + return false + } + return rel == "." || !startsWithDotDot(rel) +} + +func startsWithDotDot(rel string) bool { + return rel == ".." || strings.HasPrefix(rel, ".."+string(os.PathSeparator)) +} diff --git a/workspace/context/context_test.go b/workspace/context/context_test.go new file mode 100644 index 0000000..a0121f2 --- /dev/null +++ b/workspace/context/context_test.go @@ -0,0 +1,74 @@ +package context + +import ( + "os" + "path/filepath" + "testing" +) + +// helper to create temp project with optional sub-dirs/files. +func setupProject(t *testing.T) string { + dir := t.TempDir() + if err := os.Mkdir(filepath.Join(dir, ".vyb"), 0o755); err != nil { + t.Fatalf("failed to create .vyb: %v", err) + } + return dir +} + +func TestNewExecutionContext_ValidNoTarget(t *testing.T) { + root := setupProject(t) + + ec, err := NewExecutionContext(root, root, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ec.ProjectRoot != root || ec.WorkingDir != root || ec.TargetDir != root { + t.Fatalf("unexpected paths in context: %+v", ec) + } +} + +func TestNewExecutionContext_ValidWithTarget(t *testing.T) { + root := setupProject(t) + work := filepath.Join(root, "sub") + if err := os.MkdirAll(work, 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + targetFile := filepath.Join(work, "file.txt") + if err := os.WriteFile(targetFile, []byte("data"), 0o644); err != nil { + t.Fatalf("write: %v", err) + } + + ec, err := NewExecutionContext(root, work, &targetFile) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ec.TargetDir != work { + t.Fatalf("expected TargetDir %s, got %s", work, ec.TargetDir) + } +} + +func TestNewExecutionContext_ErrWorkingDirOutsideRoot(t *testing.T) { + root := setupProject(t) + outside := filepath.Dir(root) // parent of root + _, err := NewExecutionContext(root, outside, nil) + if err == nil { + t.Fatalf("expected error, got nil") + } +} + +func TestNewExecutionContext_ErrTargetOutsideWork(t *testing.T) { + root := setupProject(t) + work := filepath.Join(root, "some") + target := filepath.Join(root, "other", "file.txt") + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(target, []byte("x"), 0o644); err != nil { + t.Fatalf("write: %v", err) + } + + _, err := NewExecutionContext(root, work, &target) + if err == nil { + t.Fatalf("expected error, got nil") + } +} diff --git a/workspace/project/metadata.go b/workspace/project/metadata.go index 21139cd..90ccdb3 100644 --- a/workspace/project/metadata.go +++ b/workspace/project/metadata.go @@ -4,6 +4,7 @@ import ( "crypto/md5" "encoding/hex" "fmt" + "github.com/dangazineu/vyb/workspace/context" "io/fs" "os" "path/filepath" @@ -165,10 +166,26 @@ func Create(projectRoot string) error { return nil } +// BuildMetadataFS exposes the internal buildMetadata helper so that external +// packages (e.g. cmd/template) can generate a *fresh* snapshot of the workspace +// file structure without losing the richer annotation data stored on disk. +// +// The behaviour is identical to buildMetadata – it walks the filesystem rooted +// at the provided fs.FS, produces a full Module/File hierarchy and returns the +// resulting *Metadata. +func BuildMetadataFS(fsys fs.FS) (*Metadata, error) { + return buildMetadata(fsys) +} + // buildMetadata builds a metadata representation for the files available in // the given filesystem func buildMetadata(fsys fs.FS) (*Metadata, error) { - selected, err := selector.Select(fsys, "", nil, systemExclusionPatterns, []string{"*"}) + // Build a minimal execution context anchored at workspace root so selector + // includes *all* files. We bypass constructor to avoid filesystem checks + // (unit-tests use fstest.MapFS). + ec := &context.ExecutionContext{ProjectRoot: ".", WorkingDir: ".", TargetDir: "."} + + selected, err := selector.Select(fsys, ec, systemExclusionPatterns, []string{"*"}) if err != nil { return nil, fmt.Errorf("failed during file selection: %w", err) } diff --git a/workspace/project/metadata_helpers.go b/workspace/project/metadata_helpers.go new file mode 100644 index 0000000..72ca289 --- /dev/null +++ b/workspace/project/metadata_helpers.go @@ -0,0 +1,68 @@ +package project + +import ( + "fmt" + "gopkg.in/yaml.v3" + "io/fs" + "os" + "path/filepath" + "strings" +) + +// LoadMetadata reads .vyb/metadata.yaml under the provided absolute +// project root directory and unmarshals it into a *Metadata. The +// function returns an error when the metadata file cannot be found or +// parsed. +func LoadMetadata(projectRoot string) (*Metadata, error) { + if projectRoot == "" { + return nil, fmt.Errorf("projectRoot must not be empty") + } + return LoadMetadataFS(os.DirFS(projectRoot)) +} + +// LoadMetadataFS performs the same operation as LoadMetadata but takes an +// fs.FS rooted at workspace root. This is mostly useful for tests where +// an in-memory fs.FS is more convenient than an OS path. +func LoadMetadataFS(fsys fs.FS) (*Metadata, error) { + data, err := fs.ReadFile(fsys, ".vyb/metadata.yaml") + if err != nil { + return nil, fmt.Errorf("failed to read metadata.yaml: %w", err) + } + var m Metadata + if err := yaml.Unmarshal(data, &m); err != nil { + return nil, fmt.Errorf("failed to unmarshal metadata: %w", err) + } + return &m, nil +} + +// FindModule returns the deepest *Module whose Name is an ancestor (or +// equal) to relPath. Both parameters must use forward-slash separators +// and be relative to the workspace root (same convention used by the +// Module.Name field). When no matching module exists the function now +// returns the root module. This guarantees that files living directly in +// the workspace root are associated with the root module instead of +// returning nil. +func FindModule(root *Module, relPath string) *Module { + if root == nil { + return nil + } + // Normalise to forward-slash for consistent matching. + relPath = filepath.ToSlash(relPath) + + // By default, the best match is the root module itself – this covers + // files that live directly in the workspace root and therefore do + // not belong to any nested sub-module. + best := root + + var dfs func(*Module) + dfs = func(m *Module) { + for _, c := range m.Modules { + if relPath == c.Name || (c.Name != "." && strings.HasPrefix(relPath, c.Name+"/")) { + best = c + } + dfs(c) + } + } + dfs(root) + return best +} diff --git a/workspace/project/metadata_helpers_test.go b/workspace/project/metadata_helpers_test.go new file mode 100644 index 0000000..6a5b937 --- /dev/null +++ b/workspace/project/metadata_helpers_test.go @@ -0,0 +1,20 @@ +package project + +import "testing" + +func TestFindModule_RootFile(t *testing.T) { + // build a tiny hierarchy: root (.) with child "dir" + root := &Module{Name: "."} + child := &Module{Name: "dir", Parent: root} + root.Modules = []*Module{child} + + // Case 1: file directly in root – expect root module. + if got := FindModule(root, "README.md"); got != root { + t.Fatalf("expected root module for file in project root, got %v", got) + } + + // Case 2: file inside dir – expect child module. + if got := FindModule(root, "dir/file.txt"); got != child { + t.Fatalf("expected child module for nested file, got %v", got) + } +} diff --git a/workspace/selector/selector.go b/workspace/selector/selector.go index 6f103dc..aade5a2 100644 --- a/workspace/selector/selector.go +++ b/workspace/selector/selector.go @@ -8,105 +8,129 @@ import ( "path/filepath" "strings" + "github.com/dangazineu/vyb/workspace/context" "github.com/dangazineu/vyb/workspace/matcher" ) -// Select selects which files should be in scope, based on a series of criteria: -// - the return value of the function is a slice with a list of file names, relative to the projectRoot (only files, no directories); -// - if no target is provided, every file directly and indirectly under the baseDir will be evaluated; -// - if a target is provided, and it is a directory, only files directly and indirectly under the target folder will be evaluated; -// - if a target is provided, and it is a file, only files directly and indirectly under the target file's enclosing directory will be evaluated; -// - A file is included in the response if matcher.IsIncluded returns true; +// Select walks the workspace starting from ec.TargetDir (relative to the +// project root) collecting every file that matches inclusion/exclusion +// patterns. All parameters – except projectRoot – are derived from the +// provided ExecutionContext. +// +// Invariants: +// - ec.ProjectRoot MUST correspond to the same filesystem represented by +// projectRoot (no runtime cross-validation is performed). +// - Only paths that are **under** ec.TargetDir are evaluated. +// // - If a directory is excluded if matcher.IsExcluded returns true; // - If a directory is excluded, none of its contents will be evaluated; // - For each directory that is not excluded, if a .gitignore file is present, it will be read, and its contents will be appended to the exclusionPatterns for this and all its sub-directories; // - All arguments (commandBaseDir, target, exclusionPatterns, and inclusionPatterns) are relative to the projectRoot; // - .gitignore patterns are relative to the directory where the .gitignore file was found; -func Select(projectRoot fs.FS, commandBaseDir string, target *string, exclusionPatterns, inclusionPatterns []string) ([]string, error) { - // Determine the startDir based on commandBaseDir and target. - startDir := path.Clean(commandBaseDir) - if target != nil { - t := path.Clean(*target) - info, err := fs.Stat(projectRoot, t) - if err == nil { - if !info.IsDir() { - // target is a file, use its parent directory as the starting directory - startDir = path.Dir(t) - } else { - startDir = t - } - } - // If error, fallback to commandBaseDir +func Select(projectRoot fs.FS, ec *context.ExecutionContext, exclusionPatterns, inclusionPatterns []string) ([]string, error) { + if ec == nil { + return nil, fs.ErrInvalid } - var results []string + // Compute the directory (relative to project root) that will seed the + // comparisons when deciding which files to include. This is guaranteed to + // be within the workspace as enforced by ExecutionContext. + relStart := "." + if rel, err := filepath.Rel(ec.ProjectRoot, ec.TargetDir); err == nil { + relStart = filepath.ToSlash(rel) + } - // effectiveExclusions holds the accumulated exclusion patterns for each directory. - effectiveExclusions := make(map[string][]string) + // ------------------------------------------------------------ + // Helper predicates + // ------------------------------------------------------------ - // Helper function to check if two paths are related. - // Two paths are considered related if one is an ancestor of the other. - // Always returns true for the root directory. - isRelated := func(curr, target string) bool { - curr = path.Clean(curr) + // isDescendant returns true when p == target or p is nested somewhere under + // target. + isDescendant := func(p, target string) bool { + if target == "." { + return true + } + p = path.Clean(p) target = path.Clean(target) - if curr == "." || target == "." { + return p == target || strings.HasPrefix(p+"/", target+"/") + } + + // isAncestor returns true when p == target or p is an ancestor directory of + // target. + isAncestor := func(p, target string) bool { + if p == "." { return true } - // Append "/" to ensure we match whole directory segments. - currPrefix := curr + "/" - targetPrefix := target + "/" - res := strings.HasPrefix(targetPrefix, currPrefix) || strings.HasPrefix(currPrefix, targetPrefix) - return res + p = path.Clean(p) + target = path.Clean(target) + return p == target || strings.HasPrefix(target+"/", p+"/") + } + + // A directory is relevant if it is the target itself, one of its ancestors + // or one of its descendants. + isRelevantDir := func(dir string) bool { + return isAncestor(dir, relStart) || isDescendant(dir, relStart) } - // Walk the entire project from the project root to accumulate all .gitignore exclusions. + // ------------------------------------------------------------ + // Walk preparation + // ------------------------------------------------------------ + + // effectiveExclusions keeps the accumulated exclusion patterns per dir. + effectiveExclusions := map[string][]string{} + + var results []string + err := fs.WalkDir(projectRoot, ".", func(currPath string, d fs.DirEntry, err error) error { if err != nil { return err } - // Only process directories (and their files) that are related to startDir. - // That is, the directory is either an ancestor of startDir or is under startDir. + // -------------------------------------------------------- + // Relevance filtering – keep the traversal tight. + // -------------------------------------------------------- if d.IsDir() { - if !isRelated(currPath, startDir) { + if !isRelevantDir(currPath) { return fs.SkipDir } } else { - // Only process files that are under the startDir - rel, relErr := filepath.Rel(startDir, currPath) - if relErr != nil || rel == ".." || strings.HasPrefix(rel, "../") { + // Skip files that are not inside the target subtree. + if !isDescendant(currPath, relStart) { return nil } } - // Determine parent's effective exclusions. parentDir := path.Dir(currPath) - parentExclusions, ok := effectiveExclusions[parentDir] - if !ok { - // Fallback in case parent's exclusions are missing. - parentExclusions = exclusionPatterns - } + parentExcl := append(exclusionPatterns, effectiveExclusions[parentDir]...) - // When processing a directory, first check if it should be excluded. + // -------------------------------------------------------- + // Directory processing + // -------------------------------------------------------- if d.IsDir() { - if matcher.IsExcluded(projectRoot, currPath, parentExclusions) { + // Apply parent exclusion patterns to decide whether to descend. + if matcher.IsExcluded(projectRoot, currPath, parentExcl) { return fs.SkipDir } - // Compute current directory's effective exclusions. - effectiveExclusions[currPath] = computeEffectiveExclusions(projectRoot, currPath, parentExclusions) - } else { // File - if matcher.IsIncluded(projectRoot, currPath, parentExclusions, inclusionPatterns) { - results = append(results, currPath) - } + // Build this dir's exclusion list inheriting parent + .gitignore. + effectiveExclusions[currPath] = computeEffectiveExclusions(projectRoot, currPath, parentExcl) + return nil + } + + // -------------------------------------------------------- + // File processing + // -------------------------------------------------------- + if matcher.IsIncluded(projectRoot, currPath, parentExcl, inclusionPatterns) { + results = append(results, currPath) } return nil }) + return results, err } -// computeEffectiveExclusions extracts the effective exclusion patterns for a directory. -// It starts with the provided baseExclusions and appends patterns from a .gitignore file, if present. +// computeEffectiveExclusions extracts the effective exclusion patterns for a +// directory. It starts with the provided baseExclusions and appends patterns +// from a .gitignore file, if present. func computeEffectiveExclusions(projectRoot fs.FS, dir string, baseExclusions []string) []string { exclusions := append([]string{}, baseExclusions...) gitignorePath := path.Join(dir, ".gitignore") @@ -116,7 +140,8 @@ func computeEffectiveExclusions(projectRoot fs.FS, dir string, baseExclusions [] return exclusions } -// parseGitignore parses the content of a .gitignore file and returns a slice of patterns. +// parseGitignore parses the content of a .gitignore file and returns a slice +// of patterns. func parseGitignore(data string) []string { var patterns []string reader := strings.NewReader(data) @@ -133,7 +158,6 @@ func parseGitignore(data string) []string { } continue } - // Additional parsing rules can be added here if needed. patterns = append(patterns, line) if err == io.EOF { break diff --git a/workspace/selector/selector_test.go b/workspace/selector/selector_test.go index 5f682a4..241bebf 100644 --- a/workspace/selector/selector_test.go +++ b/workspace/selector/selector_test.go @@ -2,8 +2,10 @@ package selector import ( "fmt" + "github.com/dangazineu/vyb/workspace/context" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "path/filepath" "testing" "testing/fstest" ) @@ -15,7 +17,7 @@ func TestSelect(t *testing.T) { "base/dir1/ignored.txt": {Data: []byte("ignored content")}, "base/dir1/file1.txt": {Data: []byte("content1")}, "base/dir1/subdir1/.gitignore": {Data: []byte("# no ignore here\n")}, - "base/dir1/subdir1/ignored.txt": {Data: []byte("ignored also content as it's inherited from parent\n")}, + "base/dir1/subdir1/ignored.txt": {Data: []byte("ignored also as it's inherited from parent\n")}, "base/dir1/subdir1/file2.txt": {Data: []byte("content2")}, "base/dir1/subdir2/.gitignore": {Data: []byte("*\n")}, "base/dir1/subdir2/file3.txt": {Data: []byte("this file should never be included\n")}, @@ -44,6 +46,16 @@ func TestSelect(t *testing.T) { "base/dir1/subdir3/file4.txt", }, }, + { + baseDir: "base/dir1", + target: target("base/dir1/file1.txt"), + exclusions: []string{".gitignore", "file2.txt"}, + inclusions: []string{"*"}, + want: []string{ + "base/dir1/file1.txt", + "base/dir1/subdir3/file4.txt", + }, + }, { baseDir: "base/dir1/subdir1", exclusions: []string{".gitignore"}, @@ -67,7 +79,14 @@ func TestSelect(t *testing.T) { for i, tc := range tests { t.Run(fmt.Sprintf("TestSelect[%d]", i), func(t *testing.T) { - got, err := Select(fsys, tc.baseDir, tc.target, tc.exclusions, tc.inclusions) + ec := &context.ExecutionContext{ProjectRoot: ".", WorkingDir: tc.baseDir, TargetDir: func() string { + if tc.target != nil { + return filepath.Dir(*tc.target) + } + return tc.baseDir + }()} + + got, err := Select(fsys, ec, tc.exclusions, tc.inclusions) if err != nil { t.Fatalf("Got an error: %v", err) } @@ -78,6 +97,42 @@ func TestSelect(t *testing.T) { } } +// TestSelect_TargetDirIsolation ensures that Select never returns files that +// live outside ec.TargetDir – even when they share part of the path or match +// the inclusion patterns. This guards against accidental context leakage to +// the LLM caused by future regressions in the traversal logic. +func TestSelect_TargetDirIsolation(t *testing.T) { + fsys := fstest.MapFS{ + "root/work/a.txt": {Data: []byte("w a")}, + "root/work/b.txt": {Data: []byte("w b")}, + "root/work/sub/c.txt": {Data: []byte("w sub c")}, + "root/other/x.txt": {Data: []byte("o x")}, + } + + // Simulate: project_root = root, working_dir = root/work, target = work/sub/c.txt + // We expect only files under work/sub to be selected. + targetFile := "root/work/sub/c.txt" + ec := &context.ExecutionContext{ + ProjectRoot: ".", + WorkingDir: "root/work", + TargetDir: filepath.Dir(targetFile), + } + + got, err := Select(fsys, ec, []string{}, []string{"*"}) + + if err != nil { + t.Fatalf("Select returned error: %v", err) + } + + want := []string{ + "root/work/sub/c.txt", + } + + if diff := cmp.Diff(want, got, cmpopts.EquateEmpty()); diff != "" { + t.Fatalf("selected paths mismatch (-want +got):\n%s", diff) + } +} + func target(t string) *string { return &t }