-
Notifications
You must be signed in to change notification settings - Fork 194
gitindex: optimize git index time by ~21% #1036
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| package gitindex | ||
|
|
||
| // contentSlab reduces per-file heap allocations by sub-slicing from a | ||
| // shared buffer. Each returned slice has its capacity capped (3-index | ||
| // slice) so appending to one file's content cannot overwrite adjacent | ||
| // data. Files larger than the slab get their own allocation. | ||
| type contentSlab struct { | ||
| buf []byte | ||
| cap int | ||
| } | ||
|
|
||
| func newContentSlab(slabCap int) contentSlab { | ||
| return contentSlab{ | ||
| buf: make([]byte, 0, slabCap), | ||
| cap: slabCap, | ||
| } | ||
| } | ||
|
|
||
| // alloc returns a byte slice of length n. The caller must write into it | ||
| // immediately (the bytes are uninitialized when sourced from the slab). | ||
| func (s *contentSlab) alloc(n int) []byte { | ||
| if n > s.cap { | ||
| return make([]byte, n) | ||
| } | ||
| if len(s.buf)+n > cap(s.buf) { | ||
| s.buf = make([]byte, n, s.cap) | ||
| return s.buf[:n:n] | ||
| } | ||
| off := len(s.buf) | ||
| s.buf = s.buf[:off+n] | ||
| return s.buf[off : off+n : off+n] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| package gitindex | ||
|
|
||
| import "testing" | ||
|
|
||
| func TestContentSlab(t *testing.T) { | ||
| t.Run("fits in slab", func(t *testing.T) { | ||
| s := newContentSlab(1024) | ||
| b := s.alloc(100) | ||
| if len(b) != 100 { | ||
| t.Fatalf("len = %d, want 100", len(b)) | ||
| } | ||
| if cap(b) != 100 { | ||
| t.Fatalf("cap = %d, want 100 (3-index slice)", cap(b)) | ||
| } | ||
| }) | ||
|
|
||
| t.Run("cap is capped so append cannot corrupt adjacent data", func(t *testing.T) { | ||
| s := newContentSlab(1024) | ||
| a := s.alloc(10) | ||
| copy(a, []byte("aaaaaaaaaa")) | ||
|
|
||
| b := s.alloc(10) | ||
| copy(b, []byte("bbbbbbbbbb")) | ||
|
|
||
| // Appending to a must not overwrite b. | ||
| a = append(a, 'X') // triggers new backing array since cap==len | ||
| if string(b) != "bbbbbbbbbb" { | ||
| t.Fatalf("adjacent data corrupted: got %q", b) | ||
| } | ||
| _ = a | ||
| }) | ||
|
|
||
| t.Run("slab rollover", func(t *testing.T) { | ||
| s := newContentSlab(64) | ||
| a := s.alloc(60) | ||
| if len(a) != 60 || cap(a) != 60 { | ||
| t.Fatalf("a: len=%d cap=%d", len(a), cap(a)) | ||
| } | ||
| // Next alloc doesn't fit in remaining 4 bytes → new slab. | ||
| b := s.alloc(10) | ||
| if len(b) != 10 || cap(b) != 10 { | ||
| t.Fatalf("b: len=%d cap=%d", len(b), cap(b)) | ||
| } | ||
| // a and b should not share backing arrays. | ||
| copy(a, make([]byte, 60)) | ||
| copy(b, []byte("0123456789")) | ||
| if string(b) != "0123456789" { | ||
| t.Fatal("rollover corrupted data") | ||
| } | ||
| }) | ||
|
|
||
| t.Run("oversized allocation", func(t *testing.T) { | ||
| s := newContentSlab(64) | ||
| b := s.alloc(128) | ||
| if len(b) != 128 { | ||
| t.Fatalf("len = %d, want 128", len(b)) | ||
| } | ||
| // Oversized alloc should not consume slab space. | ||
| c := s.alloc(32) | ||
| if len(c) != 32 || cap(c) != 32 { | ||
| t.Fatalf("c: len=%d cap=%d", len(c), cap(c)) | ||
| } | ||
| }) | ||
|
|
||
| t.Run("zero size", func(t *testing.T) { | ||
| s := newContentSlab(64) | ||
| b := s.alloc(0) | ||
| if len(b) != 0 { | ||
| t.Fatalf("len = %d, want 0", len(b)) | ||
| } | ||
| }) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,7 +37,6 @@ import ( | |
|
|
||
| "github.com/bmatcuk/doublestar" | ||
| "github.com/dustin/go-humanize" | ||
| "github.com/go-enry/go-enry/v2" | ||
| "github.com/rs/xid" | ||
| "golang.org/x/sys/unix" | ||
|
|
||
|
|
@@ -625,6 +624,11 @@ func (b *Builder) Add(doc Document) error { | |
| doc.SkipReason = skip | ||
| } | ||
|
|
||
| // Pre-compute file category and language while content is still | ||
| // available, before content is dropped for skipped documents. | ||
| DetermineFileCategory(&doc) | ||
| DetermineLanguageIfUnknown(&doc) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a bit subtle, but previously in |
||
|
|
||
| b.todo = append(b.todo, &doc) | ||
|
|
||
| if doc.SkipReason == SkipReasonNone { | ||
|
|
@@ -888,18 +892,19 @@ func rank(d *Document, origIdx int) []float64 { | |
| skipped = 1.0 | ||
| } | ||
|
|
||
| // Use pre-computed Category from DetermineFileCategory. | ||
| generated := 0.0 | ||
| if enry.IsGenerated(d.Name, d.Content) { | ||
| if d.Category == FileCategoryGenerated { | ||
| generated = 1.0 | ||
| } | ||
|
|
||
| vendor := 0.0 | ||
| if enry.IsVendor(d.Name) { | ||
| if d.Category == FileCategoryVendored { | ||
| vendor = 1.0 | ||
| } | ||
|
|
||
| test := 0.0 | ||
| if enry.IsTest(d.Name) { | ||
| if d.Category == FileCategoryTest { | ||
| test = 1.0 | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,8 +55,6 @@ func parseSymbols(todo []*Document, languageMap ctags.LanguageMap, parserBins ct | |
| continue | ||
| } | ||
|
|
||
| DetermineLanguageIfUnknown(doc) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm guessing this was a pointless check? |
||
|
|
||
| parserType := languageMap[normalizeLanguage(doc.Language)] | ||
| if parserType == ctags.NoCTags { | ||
| continue | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -235,8 +235,15 @@ func (s *postingsBuilder) newSearchableString(data []byte, byteSections []Docume | |
| s.postings[ng] = pl | ||
| } | ||
| } | ||
| m := binary.PutUvarint(buf[:], uint64(newOff-pl.lastOff)) | ||
| pl.data = append(pl.data, buf[:m]...) | ||
| delta := uint64(newOff - pl.lastOff) | ||
| if delta < 0x80 { | ||
| // Single-byte varint fast path: ~80% of deltas are < 128. | ||
| // append(slice, byte) is cheaper than append(slice, slice...). | ||
| pl.data = append(pl.data, byte(delta)) | ||
| } else { | ||
| m := binary.PutUvarint(buf[:], delta) | ||
| pl.data = append(pl.data, buf[:m]...) | ||
| } | ||
| pl.lastOff = newOff | ||
| } | ||
| s.runeCount += runeIndex | ||
|
|
@@ -536,8 +543,12 @@ func DetermineLanguageIfUnknown(doc *Document) { | |
|
|
||
| // Add a file which only occurs in certain branches. | ||
| func (b *ShardBuilder) Add(doc Document) error { | ||
| if index := bytes.IndexByte(doc.Content, 0); index > 0 { | ||
| doc.SkipReason = SkipReasonBinary | ||
| // Skip binary check if already computed (e.g., by Builder.Add | ||
| // which calls DocChecker.Check before docs reach buildShard). | ||
| if doc.Category == FileCategoryMissing { | ||
| if index := bytes.IndexByte(doc.Content, 0); index > 0 { | ||
| doc.SkipReason = SkipReasonBinary | ||
| } | ||
| } | ||
|
|
||
| if doc.SkipReason != SkipReasonNone { | ||
|
|
@@ -547,7 +558,9 @@ func (b *ShardBuilder) Add(doc Document) error { | |
| } | ||
|
|
||
| DetermineLanguageIfUnknown(&doc) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should do this after determine file category like I commented above. I will send a PR for that to not block this PR any more :) |
||
| DetermineFileCategory(&doc) | ||
| if doc.Category == FileCategoryMissing { | ||
| DetermineFileCategory(&doc) | ||
| } | ||
|
|
||
| sort.Sort(symbolSlice{doc.Symbols, doc.SymbolsMetaData}) | ||
| var last DocumentSection | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.