From 2ddbc209a660550a0a588c7d2c1d986145793f20 Mon Sep 17 00:00:00 2001 From: sawka Date: Wed, 12 Feb 2025 10:36:26 -0800 Subject: [PATCH 1/2] updated suggesiton code to use a channel and a heap (prep for streaming lists) --- pkg/suggestion/filewalk.go | 73 ++++++++++++++++ pkg/suggestion/suggestion.go | 160 ++++++++++++++++++++++------------- pkg/util/utilfn/utilfn.go | 9 ++ 3 files changed, 184 insertions(+), 58 deletions(-) create mode 100644 pkg/suggestion/filewalk.go diff --git a/pkg/suggestion/filewalk.go b/pkg/suggestion/filewalk.go new file mode 100644 index 0000000000..983e4c0e74 --- /dev/null +++ b/pkg/suggestion/filewalk.go @@ -0,0 +1,73 @@ +// Copyright 2025, Command Line Inc. +// SPDX-License-Identifier: Apache-2.0 + +package suggestion + +import ( + "context" + "fmt" + "io/fs" + "os" + "path/filepath" + + "github.com/wavetermdev/waveterm/pkg/util/utilfn" +) + +const ListDirChanSize = 50 + +type DirEntryResult struct { + Entry fs.DirEntry + Err error +} + +func listDirectory(ctx context.Context, dir string, maxFiles int) (<-chan DirEntryResult, error) { + // Open the directory outside the goroutine for early error reporting. + f, err := os.Open(dir) + if err != nil { + return nil, err + } + + // Ensure we have a directory. + fi, err := f.Stat() + if err != nil { + f.Close() + return nil, err + } + if !fi.IsDir() { + f.Close() + return nil, fmt.Errorf("%s is not a directory", dir) + } + + ch := make(chan DirEntryResult, ListDirChanSize) + go func() { + defer close(ch) + // Make sure to close the directory when done. + defer f.Close() + + // Read up to maxFiles entries. + entries, err := f.ReadDir(maxFiles) + if err != nil { + utilfn.SendWithCtxCheck(ctx, ch, DirEntryResult{Err: err}) + return + } + + // Send each entry over the channel. + for _, entry := range entries { + ok := utilfn.SendWithCtxCheck(ctx, ch, DirEntryResult{Entry: entry}) + if !ok { + return + } + } + + // Add parent directory (“..”) entry if not at the filesystem root. + if filepath.Dir(dir) != dir { + mockDir := &MockDirEntry{ + NameStr: "..", + IsDirVal: true, + FileMode: fs.ModeDir | 0755, + } + utilfn.SendWithCtxCheck(ctx, ch, DirEntryResult{Entry: mockDir}) + } + }() + return ch, nil +} diff --git a/pkg/suggestion/suggestion.go b/pkg/suggestion/suggestion.go index 133ffc1da8..edf4096d2a 100644 --- a/pkg/suggestion/suggestion.go +++ b/pkg/suggestion/suggestion.go @@ -4,6 +4,7 @@ package suggestion import ( + "container/heap" "context" "fmt" "io/fs" @@ -322,103 +323,146 @@ func fetchBookmarkSuggestions(_ context.Context, data wshrpc.FetchSuggestionsDat }, nil } -// FetchSuggestions returns file suggestions using junegunn/fzf’s fuzzy matching. +// Define a scored entry for fuzzy matching. +type scoredEntry struct { + ent fs.DirEntry + score int + fileName string + positions []int +} + +// We'll use a heap to only keep the top MaxSuggestions when a search term is provided. +// Define a min-heap so that the worst (lowest scoring) candidate is at the top. +type scoredEntryHeap []scoredEntry + +// Less: lower score is “less”. For equal scores, a candidate with a longer filename is considered worse. +func (h scoredEntryHeap) Len() int { return len(h) } +func (h scoredEntryHeap) Less(i, j int) bool { + if h[i].score != h[j].score { + return h[i].score < h[j].score + } + return len(h[i].fileName) > len(h[j].fileName) +} +func (h scoredEntryHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } +func (h *scoredEntryHeap) Push(x interface{}) { *h = append(*h, x.(scoredEntry)) } +func (h *scoredEntryHeap) Pop() interface{} { + old := *h + n := len(old) + x := old[n-1] + *h = old[0 : n-1] + return x +} + +// fetchFileSuggestions returns file suggestions using fuzzy matching. func fetchFileSuggestions(_ context.Context, data wshrpc.FetchSuggestionsData) (*wshrpc.FetchSuggestionsResponse, error) { // Only support file suggestions. if data.SuggestionType != "file" { return nil, fmt.Errorf("unsupported suggestion type: %q", data.SuggestionType) } - // Resolve the base directory, the query prefix (for display) and the search term. + // Resolve the base directory, query prefix (for display) and search term. baseDir, queryPrefix, searchTerm, err := resolveFileQuery(data.FileCwd, data.Query) if err != nil { return nil, fmt.Errorf("error resolving base dir: %w", err) } - dirFd, err := os.Open(baseDir) - if err != nil { - return nil, fmt.Errorf("error opening directory: %w", err) - } - defer dirFd.Close() - - finfo, err := dirFd.Stat() - if err != nil { - return nil, fmt.Errorf("error getting directory info: %w", err) - } - if !finfo.IsDir() { - return nil, fmt.Errorf("not a directory: %s", baseDir) - } + // Use a cancellable context for directory listing. + listingCtx, cancelFn := context.WithCancel(context.Background()) + defer cancelFn() - // Read up to 1000 entries. - dirEnts, err := dirFd.ReadDir(1000) + entriesCh, err := listDirectory(listingCtx, baseDir, 1000) if err != nil { - return nil, fmt.Errorf("error reading directory: %w", err) + return nil, fmt.Errorf("error listing directory: %w", err) } - // Add parent directory (“..”) entry if not at the filesystem root. - if filepath.Dir(baseDir) != baseDir { - dirEnts = append(dirEnts, &MockDirEntry{ - NameStr: "..", - IsDirVal: true, - FileMode: fs.ModeDir | 0755, - }) - } + const maxEntries = MaxSuggestions // top-k entries - // For fuzzy matching we’ll compute a score for each candidate. - type scoredEntry struct { - ent fs.DirEntry - score int - fileName string - positions []int - } var scoredEntries []scoredEntry - // If a search term is provided, convert it to lowercase (per fzf’s API contract). + // If a search term is provided, convert it to lowercase per fzf’s API contract. var patternRunes []rune if searchTerm != "" { patternRunes = []rune(strings.ToLower(searchTerm)) - } + // Use a heap to keep the top maxEntries. + var topHeap scoredEntryHeap + heap.Init(&topHeap) - // Create a slab for temporary allocations in the fzf matching function. - var slab util.Slab + // Create a slab for temporary allocations in the fuzzy matching function. + var slab util.Slab - // Iterate over directory entries. - for _, de := range dirEnts { - fileName := de.Name() - score := 0 + // Process directory entries from the channel. + for result := range entriesCh { + if result.Err != nil { + return nil, fmt.Errorf("error reading directory: %w", result.Err) + } + de := result.Entry + fileName := de.Name() - // If a search term was provided, perform fuzzy matching. - if searchTerm != "" { - // Convert candidate to lowercase for case-insensitive matching. + // Perform fuzzy matching. candidate := strings.ToLower(fileName) text := util.ToChars([]byte(candidate)) - result, positions := algo.FuzzyMatchV2(false, true, true, &text, patternRunes, true, &slab) - if result.Score <= 0 { - // No match: skip this entry. + matchResult, positions := algo.FuzzyMatchV2(false, true, true, &text, patternRunes, true, &slab) + if matchResult.Score <= 0 { + // No match; skip this entry. continue } - score = result.Score - entry := scoredEntry{ent: de, score: score, fileName: fileName} + score := matchResult.Score + + var candidatePositions []int if positions != nil { - entry.positions = *positions + candidatePositions = *positions + } + se := scoredEntry{ + ent: de, + score: score, + fileName: fileName, + positions: candidatePositions, + } + + // If the heap is not full, push the candidate. + if topHeap.Len() < maxEntries { + heap.Push(&topHeap, se) + } else { + // If the candidate is better than the worst in the heap, replace it. + worst := topHeap[0] + if se.score > worst.score || (se.score == worst.score && len(se.fileName) < len(worst.fileName)) { + heap.Pop(&topHeap) + heap.Push(&topHeap, se) + } } - scoredEntries = append(scoredEntries, entry) - } else { - scoredEntries = append(scoredEntries, scoredEntry{ent: de, score: score, fileName: fileName}) } - } - // Sort entries by descending score (better matches first). - if searchTerm != "" { + // Convert the heap to a slice. + scoredEntries = make([]scoredEntry, topHeap.Len()) + copy(scoredEntries, topHeap) + // Sort in descending order (better candidates first). sort.Slice(scoredEntries, func(i, j int) bool { if scoredEntries[i].score != scoredEntries[j].score { return scoredEntries[i].score > scoredEntries[j].score } return len(scoredEntries[i].fileName) < len(scoredEntries[j].fileName) }) + } else { + // If no search term is provided, simply collect all entries. + for result := range entriesCh { + if result.Err != nil { + return nil, fmt.Errorf("error reading directory: %w", result.Err) + } + de := result.Entry + fileName := de.Name() + scoredEntries = append(scoredEntries, scoredEntry{ + ent: de, + score: 0, + fileName: fileName, + }) + } + // Limit to the first maxEntries. + if len(scoredEntries) > maxEntries { + scoredEntries = scoredEntries[:maxEntries] + } } - // Build up to MaxSuggestions suggestions + // Build suggestions from the scored entries. var suggestions []wshrpc.SuggestionType for _, candidate := range scoredEntries { fileName := candidate.ent.Name() @@ -426,7 +470,7 @@ func fetchFileSuggestions(_ context.Context, data wshrpc.FetchSuggestionsData) ( suggestionFileName := filepath.Join(queryPrefix, fileName) offset := len(suggestionFileName) - len(fileName) if offset > 0 && len(candidate.positions) > 0 { - // Adjust the match positions to account for the queryPrefix. + // Adjust match positions to account for the query prefix. for j := range candidate.positions { candidate.positions[j] += offset } diff --git a/pkg/util/utilfn/utilfn.go b/pkg/util/utilfn/utilfn.go index 3b67035a7e..49a8133f59 100644 --- a/pkg/util/utilfn/utilfn.go +++ b/pkg/util/utilfn/utilfn.go @@ -1023,3 +1023,12 @@ func QuickHashString(s string) string { h.Write([]byte(s)) return base64.RawURLEncoding.EncodeToString(h.Sum(nil)) } + +func SendWithCtxCheck[T any](ctx context.Context, ch chan<- T, val T) bool { + select { + case <-ctx.Done(): + return false + case ch <- val: + return true + } +} From 63a01511653487d60c499149405011e301e839f6 Mon Sep 17 00:00:00 2001 From: sawka Date: Wed, 12 Feb 2025 13:58:57 -0800 Subject: [PATCH 2/2] simplify a bit --- pkg/suggestion/suggestion.go | 109 +++++++++++++++-------------------- 1 file changed, 48 insertions(+), 61 deletions(-) diff --git a/pkg/suggestion/suggestion.go b/pkg/suggestion/suggestion.go index edf4096d2a..157cf587c5 100644 --- a/pkg/suggestion/suggestion.go +++ b/pkg/suggestion/suggestion.go @@ -353,7 +353,6 @@ func (h *scoredEntryHeap) Pop() interface{} { return x } -// fetchFileSuggestions returns file suggestions using fuzzy matching. func fetchFileSuggestions(_ context.Context, data wshrpc.FetchSuggestionsData) (*wshrpc.FetchSuggestionsResponse, error) { // Only support file suggestions. if data.SuggestionType != "file" { @@ -377,91 +376,79 @@ func fetchFileSuggestions(_ context.Context, data wshrpc.FetchSuggestionsData) ( const maxEntries = MaxSuggestions // top-k entries - var scoredEntries []scoredEntry + // Always use a heap. + var topHeap scoredEntryHeap + heap.Init(&topHeap) - // If a search term is provided, convert it to lowercase per fzf’s API contract. var patternRunes []rune if searchTerm != "" { patternRunes = []rune(strings.ToLower(searchTerm)) - // Use a heap to keep the top maxEntries. - var topHeap scoredEntryHeap - heap.Init(&topHeap) + } - // Create a slab for temporary allocations in the fuzzy matching function. - var slab util.Slab + var slab util.Slab + var index int // used for ordering when searchTerm is empty - // Process directory entries from the channel. - for result := range entriesCh { - if result.Err != nil { - return nil, fmt.Errorf("error reading directory: %w", result.Err) - } - de := result.Entry - fileName := de.Name() + // Process each directory entry. + for result := range entriesCh { + if result.Err != nil { + return nil, fmt.Errorf("error reading directory: %w", result.Err) + } + de := result.Entry + fileName := de.Name() + var score int + var candidatePositions []int + if searchTerm != "" { // Perform fuzzy matching. candidate := strings.ToLower(fileName) text := util.ToChars([]byte(candidate)) matchResult, positions := algo.FuzzyMatchV2(false, true, true, &text, patternRunes, true, &slab) if matchResult.Score <= 0 { - // No match; skip this entry. + index++ continue } - score := matchResult.Score - - var candidatePositions []int + score = matchResult.Score if positions != nil { candidatePositions = *positions } - se := scoredEntry{ - ent: de, - score: score, - fileName: fileName, - positions: candidatePositions, - } + } else { + // Use ordering: first entry gets highest score. + score = maxEntries - index + } + index++ - // If the heap is not full, push the candidate. - if topHeap.Len() < maxEntries { - heap.Push(&topHeap, se) - } else { - // If the candidate is better than the worst in the heap, replace it. - worst := topHeap[0] - if se.score > worst.score || (se.score == worst.score && len(se.fileName) < len(worst.fileName)) { - heap.Pop(&topHeap) - heap.Push(&topHeap, se) - } - } + se := scoredEntry{ + ent: de, + score: score, + fileName: fileName, + positions: candidatePositions, } - // Convert the heap to a slice. - scoredEntries = make([]scoredEntry, topHeap.Len()) - copy(scoredEntries, topHeap) - // Sort in descending order (better candidates first). - sort.Slice(scoredEntries, func(i, j int) bool { - if scoredEntries[i].score != scoredEntries[j].score { - return scoredEntries[i].score > scoredEntries[j].score - } - return len(scoredEntries[i].fileName) < len(scoredEntries[j].fileName) - }) - } else { - // If no search term is provided, simply collect all entries. - for result := range entriesCh { - if result.Err != nil { - return nil, fmt.Errorf("error reading directory: %w", result.Err) + if topHeap.Len() < maxEntries { + heap.Push(&topHeap, se) + } else { + // Replace the worst candidate if this one is better. + worst := topHeap[0] + if se.score > worst.score || (se.score == worst.score && len(se.fileName) < len(worst.fileName)) { + heap.Pop(&topHeap) + heap.Push(&topHeap, se) } - de := result.Entry - fileName := de.Name() - scoredEntries = append(scoredEntries, scoredEntry{ - ent: de, - score: 0, - fileName: fileName, - }) } - // Limit to the first maxEntries. - if len(scoredEntries) > maxEntries { - scoredEntries = scoredEntries[:maxEntries] + if searchTerm == "" && topHeap.Len() >= maxEntries { + break } } + // Extract and sort the scored entries (highest score first). + scoredEntries := make([]scoredEntry, topHeap.Len()) + copy(scoredEntries, topHeap) + sort.Slice(scoredEntries, func(i, j int) bool { + if scoredEntries[i].score != scoredEntries[j].score { + return scoredEntries[i].score > scoredEntries[j].score + } + return len(scoredEntries[i].fileName) < len(scoredEntries[j].fileName) + }) + // Build suggestions from the scored entries. var suggestions []wshrpc.SuggestionType for _, candidate := range scoredEntries {