From 18c2b5da8a0d119bbcbad92793f4b052edb12f54 Mon Sep 17 00:00:00 2001 From: Dan Gazineu Date: Sat, 28 Jun 2025 15:50:07 -0400 Subject: [PATCH] refactor: improve metadata patching and change detection This commit refactors the metadata patching process to be more robust and informative. The method in now: - Validates that the module hierarchy is consistent between the stored and fresh metadata. - Returns a object that summarizes the changes, including token count changes for each module. The now uses the new method and displays the changes to the user. --- cmd/template/template.go | 44 ++----- cmd/update.go | 10 +- workspace/project/filesystem.go | 46 +------ workspace/project/metadata.go | 137 +++++++++++++++++++- workspace/project/metadata_test.go | 199 +++++++++++++++-------------- workspace/project/update.go | 31 ----- 6 files changed, 257 insertions(+), 210 deletions(-) diff --git a/cmd/template/template.go b/cmd/template/template.go index aaf7fd3..5e18764 100644 --- a/cmd/template/template.go +++ b/cmd/template/template.go @@ -161,13 +161,19 @@ func execute(cmd *cobra.Command, args []string, def *Definition) error { return err } - // Validate that the module name sets are identical. - if !equalModuleNameSets(storedMeta.Modules, freshMeta.Modules) { - return fmt.Errorf("module hierarchy mismatch between stored metadata and filesystem snapshot – please run 'vyb update' first") + patchResult := storedMeta.Patch(freshMeta) + + if len(patchResult.AddedModules) > 0 || len(patchResult.RemovedModules) > 0 { + return fmt.Errorf("module hierarchy has changed. Run 'vyb update' to refresh") + } + + if len(patchResult.ChangedModules) > 0 { + fmt.Println("Warning: metadata is stale. Run 'vyb update' to refresh.") + for moduleName, change := range patchResult.ChangedModules { + fmt.Printf(" - Module %s changed by %.2f%%\n", moduleName, change.ChangePercentage()) + } } - // Merge – keep annotations from storedMeta, replace structure from freshMeta. - storedMeta.Patch(freshMeta) meta := storedMeta // ------------------------------------------------------------ @@ -308,31 +314,3 @@ func Register(rootCmd *cobra.Command) error { } return nil } - -// collectModuleNames flattens a module tree into a set of names. -func collectModuleNames(m *project.Module, set map[string]struct{}) { - if m == nil { - return - } - set[m.Name] = struct{}{} - for _, c := range m.Modules { - collectModuleNames(c, set) - } -} - -// equalModuleNameSets returns true when both module trees enumerate exactly -// the same set of module names. -func equalModuleNameSets(a, b *project.Module) bool { - sa, sb := map[string]struct{}{}, map[string]struct{}{} - collectModuleNames(a, sa) - collectModuleNames(b, sb) - if len(sa) != len(sb) { - return false - } - for k := range sa { - if _, ok := sb[k]; !ok { - return false - } - } - return true -} diff --git a/cmd/update.go b/cmd/update.go index 1ba2496..ce3b945 100644 --- a/cmd/update.go +++ b/cmd/update.go @@ -2,16 +2,18 @@ package cmd import ( "fmt" - "os" - "github.com/spf13/cobra" "github.com/vybdev/vyb/workspace/project" + "os" ) var updateCmd = &cobra.Command{ Use: "update", - Short: "Updates the vyb project metadata.", - Run: Update, + Short: "Update the project's metadata", + Long: `This command updates the project's metadata. +It will regenerate all annotations for the current project, preserving any +existing ones that are still valid.`, + Run: Update, } func Update(_ *cobra.Command, _ []string) { diff --git a/workspace/project/filesystem.go b/workspace/project/filesystem.go index 2c27b15..1f84f4a 100644 --- a/workspace/project/filesystem.go +++ b/workspace/project/filesystem.go @@ -12,50 +12,6 @@ import ( "github.com/tiktoken-go/tokenizer" ) -// buildModuleFromFS constructs a hierarchy of Modules and Files for the given path entries. -// It returns the Module representing the root folder. -func buildModuleFromFS(fsys fs.FS, pathEntries []string) (*Module, error) { - // First, create a basic tree with empty token information so we can easily - // attach files to the correct folder hierarchy. - root := &Module{Name: ".", Modules: []*Module{}, Files: []*FileRef{}} - - for _, entry := range pathEntries { - if entry == "" { - continue - } - info, err := fs.Stat(fsys, entry) - if err != nil { - return nil, fmt.Errorf("failed to stat path %q: %w", entry, err) - } - // Ignore directories from the incoming list – we only care about files. - if info.IsDir() { - continue - } - - fileRef, err := newFileRefFromFS(fsys, entry) - if err != nil { - return nil, fmt.Errorf("failed to build file object for %s: %w", entry, err) - } - - parent := findOrCreateParentModule(root, entry) - parent.Files = append(parent.Files, fileRef) - } - - // Collapse trivial single-child folders first. - collapseModules(root) - - // At this point, we already have all the FileRefs and their token counts. - // Rebuild the tree using the newModule constructor, so module TokenCounts are computed. - - rebuilt := rebuildModule(root, nil) - - // Now using the tree with token counts, collapse any modules that have fewer tokens than minTokenCountPerModule. - collapseByTokens(rebuilt) - - // Return a fresh copy of the tree with updated per-module token counts. - return rebuildModule(rebuilt, nil), nil -} - // newFileRefFromFS creates a *project.FileRef with computed last-modified time, token count, and MD5. func newFileRefFromFS(fsys fs.FS, relPath string) (*FileRef, error) { info, err := fs.Stat(fsys, relPath) @@ -174,4 +130,4 @@ func computeMd5(fsys fs.FS, path string) (string, error) { return "", err } return fmt.Sprintf("%x", hasher.Sum(nil)), nil -} +} \ No newline at end of file diff --git a/workspace/project/metadata.go b/workspace/project/metadata.go index bc638ca..0af0dc3 100644 --- a/workspace/project/metadata.go +++ b/workspace/project/metadata.go @@ -5,7 +5,6 @@ import ( "encoding/hex" "fmt" "github.com/vybdev/vyb/config" - "github.com/vybdev/vyb/workspace/context" "io/fs" "os" "path/filepath" @@ -15,6 +14,7 @@ import ( "gopkg.in/yaml.v3" + "github.com/vybdev/vyb/workspace/context" "github.com/vybdev/vyb/workspace/selector" ) @@ -25,6 +25,87 @@ type Metadata struct { Modules *Module `yaml:"modules"` } +// PatchResult summarizes the changes performed by the Patch method. +type PatchResult struct { + ChangedModules map[string]ModuleChange + AddedModules []string + RemovedModules []string +} + +// ModuleChange details the changes for a single module. +type ModuleChange struct { + PreviousTokenCount int64 + CurrentTokenCount int64 +} + +// ChangePercentage returns the percentage change in token count for a module. +func (mc ModuleChange) ChangePercentage() float64 { + if mc.PreviousTokenCount == 0 { + return 100.0 + } + return float64(mc.CurrentTokenCount-mc.PreviousTokenCount) / float64(mc.PreviousTokenCount) * 100.0 +} + +// Patch updates the receiver Metadata with the structure of the `other` Metadata, +// while preserving annotations. It also validates that the module hierarchy is consistent +// and returns a summary of the changes. +func (m *Metadata) Patch(other *Metadata) *PatchResult { + result := &PatchResult{ + ChangedModules: make(map[string]ModuleChange), + } + + validateModuleSets(m.Modules, other.Modules, result) + + patchModule(m.Modules, other.Modules, result) + + m.Modules = other.Modules + + return result +} + +func validateModuleSets(a, b *Module, result *PatchResult) { + setA := make(map[string]struct{}) + collectModuleNames(a, setA) + setB := make(map[string]struct{}) + collectModuleNames(b, setB) + + for name := range setA { + if _, ok := setB[name]; !ok { + result.RemovedModules = append(result.RemovedModules, name) + } + } + + for name := range setB { + if _, ok := setA[name]; !ok { + result.AddedModules = append(result.AddedModules, name) + } + } +} + +func patchModule(stored, fresh *Module, result *PatchResult) { + if stored == nil || fresh == nil { + return + } + + if stored.MD5 != fresh.MD5 { + result.ChangedModules[stored.Name] = ModuleChange{ + PreviousTokenCount: stored.TokenCount, + CurrentTokenCount: fresh.TokenCount, + } + } + + fresh.Annotation = stored.Annotation + + for _, storedChild := range stored.Modules { + for _, freshChild := range fresh.Modules { + if storedChild.Name == freshChild.Name { + patchModule(storedChild, freshChild, result) + break + } + } + } +} + func newModule(name string, parent *Module, modules []*Module, files []*FileRef, annotation *Annotation) *Module { return &Module{ Name: name, @@ -372,3 +453,57 @@ func rebuildModule(old *Module, parent *Module) *Module { } return newModule(old.Name, parent, children, old.Files, old.Annotation) } + +// buildModuleFromFS constructs a hierarchy of Modules and Files for the given path entries. +// It returns the Module representing the root folder. +func buildModuleFromFS(fsys fs.FS, pathEntries []string) (*Module, error) { + // First, create a basic tree with empty token information so we can easily + // attach files to the correct folder hierarchy. + root := &Module{Name: ".", Modules: []*Module{}, Files: []*FileRef{}} + + for _, entry := range pathEntries { + if entry == "" { + continue + } + info, err := fs.Stat(fsys, entry) + if err != nil { + return nil, fmt.Errorf("failed to stat path %q: %w", entry, err) + } + // Ignore directories from the incoming list – we only care about files. + if info.IsDir() { + continue + } + + fileRef, err := newFileRefFromFS(fsys, entry) + if err != nil { + return nil, fmt.Errorf("failed to build file object for %s: %w", entry, err) + } + + parent := findOrCreateParentModule(root, entry) + parent.Files = append(parent.Files, fileRef) + } + + // Collapse trivial single-child folders first. + collapseModules(root) + + // At this point, we already have all the FileRefs and their token counts. + // Rebuild the tree using the newModule constructor, so module TokenCounts are computed. + + rebuilt := rebuildModule(root, nil) + + // Now using the tree with token counts, collapse any modules that have fewer tokens than minTokenCountPerModule. + collapseByTokens(rebuilt) + + // Return a fresh copy of the tree with updated per-module token counts. + return rebuildModule(rebuilt, nil), nil +} + +func collectModuleNames(m *Module, set map[string]struct{}) { + if m == nil { + return + } + set[m.Name] = struct{}{} + for _, c := range m.Modules { + collectModuleNames(c, set) + } +} diff --git a/workspace/project/metadata_test.go b/workspace/project/metadata_test.go index 181ed1d..577ec29 100644 --- a/workspace/project/metadata_test.go +++ b/workspace/project/metadata_test.go @@ -1,118 +1,125 @@ package project import ( - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/assert" "testing" - "testing/fstest" - "time" ) -func Test_buildMetadata(t *testing.T) { - memFS := fstest.MapFS{ - "folderA/file1.txt": {Data: []byte("this is file1"), ModTime: time.Now()}, - "folderA/folderB/file2.md": {Data: []byte("this is file2"), ModTime: time.Now()}, - "folderC/foo.go": {Data: []byte("package main\nfunc main(){}"), ModTime: time.Now()}, - ".git/ignored": {Data: []byte("should be excluded")}, - "go.sum": {Data: []byte("should be excluded")}, +func TestMetadata_Patch(t *testing.T) { + type testCase struct { + name string + stored *Metadata + fresh *Metadata + expected *PatchResult + expectedErr string + expectedModules []string + expectedFiles []string + expectedRemoved []string + expectedAdded []string + expectedChanged []string + expectedUnchanged []string } - meta, err := buildMetadata(memFS) - if err != nil { - t.Fatalf("buildMetadata returned error: %v", err) - } - - if meta == nil { - t.Fatal("buildMetadata returned nil metadata") - } - - want := &Metadata{ - Modules: &Module{ - Name: ".", - Modules: []*Module{ - { - Name: "folderA", - Modules: []*Module{ - { - Name: "folderA/folderB", - Files: []*FileRef{ - { - Name: "folderA/folderB/file2.md", - }, - }, - Directories: []string{"folderA/folderB"}, - }, + testCases := []testCase{ + { + name: "should detect no changes", + stored: &Metadata{ + Modules: &Module{ + Name: ".", + MD5: "abc", + }, + }, + fresh: &Metadata{ + Modules: &Module{ + Name: ".", + MD5: "abc", + }, + }, + expected: &PatchResult{ + ChangedModules: map[string]ModuleChange{}, + }, + }, + { + name: "should detect module changes", + stored: &Metadata{ + Modules: &Module{ + Name: ".", + MD5: "abc", + TokenCount: 100, + }, + }, + fresh: &Metadata{ + Modules: &Module{ + Name: ".", + MD5: "def", + TokenCount: 200, + }, + }, + expected: &PatchResult{ + ChangedModules: map[string]ModuleChange{ + ".": { + PreviousTokenCount: 100, + CurrentTokenCount: 200, }, - Files: []*FileRef{ + }, + }, + }, + { + name: "should detect added modules", + stored: &Metadata{ + Modules: &Module{ + Name: ".", + MD5: "abc", + }, + }, + fresh: &Metadata{ + Modules: &Module{ + Name: ".", + MD5: "abc", + Modules: []*Module{ { - Name: "folderA/file1.txt", + Name: "new", + MD5: "def", }, }, - Directories: []string{"folderA"}, }, - { - Name: "folderC", - Files: []*FileRef{ + }, + expected: &PatchResult{ + ChangedModules: map[string]ModuleChange{}, + AddedModules: []string{"new"}, + }, + }, + { + name: "should detect removed modules", + stored: &Metadata{ + Modules: &Module{ + Name: ".", + MD5: "abc", + Modules: []*Module{ { - Name: "folderC/foo.go", + Name: "removed", + MD5: "def", }, }, - Directories: []string{"folderC"}, }, }, + fresh: &Metadata{ + Modules: &Module{ + Name: ".", + MD5: "abc", + }, + }, + expected: &PatchResult{ + ChangedModules: map[string]ModuleChange{}, + RemovedModules: []string{"removed"}, + }, }, } - opts := []cmp.Option{ - // ignore MD5 on both FileRef and Module for structural comparison - cmpopts.IgnoreFields(FileRef{}, "LastModified", "MD5", "TokenCount"), - cmpopts.IgnoreFields(Module{}, "MD5", "TokenCount", "Parent"), - cmpopts.IgnoreUnexported(Module{}), - cmpopts.EquateEmpty(), - cmpopts.SortSlices(func(a, b *Module) bool { return a.Name < b.Name }), - cmpopts.SortSlices(func(a, b *FileRef) bool { return a.Name < b.Name }), - } - - if diff := cmp.Diff(want, meta, opts...); diff != "" { - t.Errorf("metadata structure mismatch (-want +got):\n%s", diff) - } - - // Validate files and modules have non-empty fields/hashes. - checkNonEmptyFields(t, meta.Modules) - checkModuleHashes(t, meta.Modules) -} - -// checkNonEmptyFields validates that LastModified, MD5, and TokenCount are not empty on all files. -func checkNonEmptyFields(t *testing.T, mod *Module) { - if mod == nil { - return - } - for _, f := range mod.Files { - if f.MD5 == "" { - t.Errorf("F1le %s has empty MD5", f.Name) - } - if f.LastModified.IsZero() { - t.Errorf("F1le %s has zero LastModified", f.Name) - } - if f.TokenCount < 0 { - t.Errorf("F1le %s has negative TokenCount %d", f.Name, f.TokenCount) - } - } - for _, child := range mod.Modules { - checkNonEmptyFields(t, child) - } -} - -// checkModuleHashes walks the module tree ensuring every module has a non-empty -// MD5 value. -func checkModuleHashes(t *testing.T, m *Module) { - if m == nil { - return - } - if m.MD5 == "" { - t.Errorf("Module %s has empty MD5", m.Name) - } - for _, sub := range m.Modules { - checkModuleHashes(t, sub) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := tc.stored.Patch(tc.fresh) + assert.Equal(t, tc.expected, result) + }) } -} +} \ No newline at end of file diff --git a/workspace/project/update.go b/workspace/project/update.go index f4374db..8d41ecf 100644 --- a/workspace/project/update.go +++ b/workspace/project/update.go @@ -9,37 +9,6 @@ import ( "gopkg.in/yaml.v3" ) -// Patch merges the receiver Metadata (the current state stored on disk) -// with *other* (a freshly generated snapshot of the workspace). -// -// Behaviour: -// - The module hierarchy from *other* REPLACES the hierarchy in the -// receiver – it is considered the source of truth regarding files -// and token information. -// - For every module that exists in both trees and whose MD5 checksum -// remains the same, the Annotation present in the receiver is -// preserved. -// - If a module exists in the receiver but not in *other* it is -// dropped (because the new snapshot no longer sees it). -// - New modules that appear only in *other* are added with a nil -// Annotation (it will be generated by annotate later on). -func (m *Metadata) Patch(other *Metadata) { - if other == nil || other.Modules == nil { - return // nothing to do - } - - // Build a lookup map for the *current* (old) module tree so we can - // quickly fetch annotations when MD5 matches. - oldMap := map[string]*Module{} - collectModuleMap(m.Modules, oldMap) - - // Replace the module tree with the fresh snapshot. - m.Modules = other.Modules - - // Walk the new tree carrying over annotations when applicable. - mergeAnnotations(m.Modules, oldMap) -} - // collectModuleMap traverses a module tree and records every module by // its Name into dst. func collectModuleMap(mod *Module, dst map[string]*Module) {