From be1918291c68cf78c414202913b110a5b60b7400 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Thu, 25 Sep 2025 12:53:35 -0700 Subject: [PATCH 1/4] make checkptr failures trigger reliably on test data --- internal/tdp/vm/run.go | 1 + internal/tdp/vm/utf8.go | 5 ++-- internal/testdata/scalars/utf8.yaml | 11 +++++++ internal/testdata/testdata.go | 45 ++++++++++++++++++++++++++++- internal/xunsafe/untyped.go | 4 ++- 5 files changed, 62 insertions(+), 4 deletions(-) diff --git a/internal/tdp/vm/run.go b/internal/tdp/vm/run.go index 20ad81c..1f37c3a 100644 --- a/internal/tdp/vm/run.go +++ b/internal/tdp/vm/run.go @@ -87,6 +87,7 @@ func Run(m *dynamic.Message, data []byte, options Options) (err error) { p3.Options = options data = RelocatePageBoundary(data, !p3.AllowAlias) + m.Shared.Src = unsafe.SliceData(data) m.Shared.Len = len(data) // The arena keeps m.context alive, so we don't need to KeepAlive src. diff --git a/internal/tdp/vm/utf8.go b/internal/tdp/vm/utf8.go index 9ea9c67..4a48d21 100644 --- a/internal/tdp/vm/utf8.go +++ b/internal/tdp/vm/utf8.go @@ -43,7 +43,7 @@ func verifyUTF8(p1 P1, p2 P2, n int) (P1, P2, zc.Range) { // and a remainder part that only does 0 to 7 bytes. if e8 > p { again: - bytes := *xunsafe.Cast[uint64](p.AssertValid()) + bytes := xunsafe.ByteLoad[uint64](p.AssertValid(), 0) p = p.Add(8) if bytes&tdp.SignBits != 0 { p = p.Add(-8) // Back up, need to take the slow path. @@ -56,7 +56,8 @@ func verifyUTF8(p1 P1, p2 P2, n int) (P1, P2, zc.Range) { if e > p { // Fast path for if the last few bytes are also ASCII. left := int(e - p) - bytes := *xunsafe.Cast[uint64](p.AssertValid()) + bytes := xunsafe.ByteLoad[uint64](p.AssertValid(), 0) + p = p.Add(left) if bytes&(tdp.SignBits>>uint((8-left)*8)) != 0 { p = p.Add(-left) diff --git a/internal/testdata/scalars/utf8.yaml b/internal/testdata/scalars/utf8.yaml index 7fb9d1b..cacabd3 100644 --- a/internal/testdata/scalars/utf8.yaml +++ b/internal/testdata/scalars/utf8.yaml @@ -16,6 +16,17 @@ type: hyperpb.test.Scalars protoscope: - '14: {`ff`}' +- '14: {"a"}' +- '14: {"ab"}' +- '14: {"abc"}' +- '14: {"abcd"}' +- '14: {"abcde"}' +- '14: {"abcdef"}' +- '14: {"abcdefg"}' +- '14: {"abcdefgh"}' +- '14: {"abcdefghi"}' +- '14: {"abcdefghij"}' + # Various torn runes. - '14: {`c280`}' - '14: {`c2`}' diff --git a/internal/testdata/testdata.go b/internal/testdata/testdata.go index cf410b4..11456b9 100644 --- a/internal/testdata/testdata.go +++ b/internal/testdata/testdata.go @@ -151,7 +151,7 @@ func (test *TestCase) Run(t *testing.T, ctx *hyperpb.Shared, verbose bool) { // Parse using hyperpb. m2 := ctx.NewMessage(test.Type.Fast) - err2 := proto.Unmarshal(specimen, m2) + err2 := m2.Unmarshal(specimen, hyperpb.WithAllowAlias(true)) if verbose { t.Logf("theirs: %v, ours: %v", err1, err2) @@ -263,7 +263,50 @@ func parseTestCase(t testing.TB, path string, file []byte) *TestCase { // making sure we have optimal message placement before we start. test.Specimens[i] = vm.RelocatePageBoundary(test.Specimens[i], false) } + } else { + // Ensure that each specimen is at the end of an allocation. + for n, b := range test.Specimens { + test.Specimens[n] = snapBytes(b) + } } return test } + +// snapBytes makes a copy of b such that b is at the end of a Go allocation. +// +// The Go allocator allocates objects on slabs (which it calls runtime.mspans), +// which are large page-sized chunks divided into small pieces of the same size. +// The sizes the Go allocator uses for slab objects are called size classes. +// +// This function discovers size classes using append(), which, when resizing, +// always returns a slice whose capacity is a size class. +// +// This is used to trigger various checkptr corner cases in tests. +func snapBytes(b []byte) []byte { + // Build a slice that is sized to a Go size class. + // + // This append lowers to a call to runtime.growslice, and, because + // sizeof(byte) == 1, triggers this bit of code at + // https://cs.opensource.google/go/go/+/refs/tags/go1.25.1:src/runtime/slice.go;l=210: + // + // lenmem = uintptr(oldLen) + // newlenmem = uintptr(newLen) + // capmem = roundupsize(uintptr(newcap), noscan) + // overflow = uintptr(newcap) > maxAlloc + // newcap = int(capmem) + // + // runtime.roundupsize(n, noscan) rounds n up to the next size class + // larger ot equal to it. + // + // We always round up the size class to at least 32, to avoid very tiny + // objects that share allocations. + buf := append([]byte(nil), make([]byte, max(32, len(b)))...) + buf = buf[:cap(buf):cap(buf)] + + // Discard everything excelt the last len(b) bytes of buf. + buf = buf[len(buf)-len(b):] + + copy(buf, b) + return buf +} diff --git a/internal/xunsafe/untyped.go b/internal/xunsafe/untyped.go index fa36cf0..8b9beb6 100644 --- a/internal/xunsafe/untyped.go +++ b/internal/xunsafe/untyped.go @@ -14,7 +14,9 @@ package xunsafe -import "unsafe" +import ( + "unsafe" +) // ByteAdd adds the given offset to p, without scaling. // From cd4a024a3504173b7a657e6214fca4903a75e7d6 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Thu, 25 Sep 2025 13:05:32 -0700 Subject: [PATCH 2/4] fix checkptr failures --- internal/tdp/thunks/repeated.go | 4 ++-- internal/tdp/thunks/singular.go | 2 +- internal/tdp/thunks/stencils.go | 16 ++++++++-------- internal/tdp/vm/run.go | 1 - internal/tdp/vm/utf8.go | 4 ++-- 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/internal/tdp/thunks/repeated.go b/internal/tdp/thunks/repeated.go index c8b08aa..ae6a600 100644 --- a/internal/tdp/thunks/repeated.go +++ b/internal/tdp/thunks/repeated.go @@ -294,7 +294,7 @@ func parsePackedVarint[T tdp.Int](p1 vm.P1, p2 vm.P2) (vm.P1, vm.P2) { e8 := p.Add(layout.RoundDown(int(e-p), 8)) if p < e8 { again: - bytes := *xunsafe.Cast[uint64](p.AssertValid()) + bytes := xunsafe.ByteLoad[uint64](p.AssertValid(), 0) count += bits.OnesCount64(bytes & tdp.SignBits) p = p.Add(8) if p < e8 { @@ -303,7 +303,7 @@ func parsePackedVarint[T tdp.Int](p1 vm.P1, p2 vm.P2) (vm.P1, vm.P2) { } if p < e { left := int(e - p) - bytes := *xunsafe.Cast[uint64](p.AssertValid()) + bytes := xunsafe.ByteLoad[uint64](p.AssertValid(), 0) count += bits.OnesCount64(bytes & (tdp.SignBits >> uint((8-left)*8))) } } diff --git a/internal/tdp/thunks/singular.go b/internal/tdp/thunks/singular.go index c59e844..7be4876 100644 --- a/internal/tdp/thunks/singular.go +++ b/internal/tdp/thunks/singular.go @@ -294,7 +294,7 @@ func parseFixed[T tdp.Int](p1 vm.P1, p2 vm.P2) (vm.P1, vm.P2) { } var p *T p1, p2, p = vm.GetMutableField[T](p1, p2) - *p = *xunsafe.Cast[T](p1.PtrAddr.AssertValid()) + *p = xunsafe.ByteLoad[T](p1.PtrAddr.AssertValid(), 0) p1 = p1.Advance(layout.Size[T]()) return p1, p2 diff --git a/internal/tdp/thunks/stencils.go b/internal/tdp/thunks/stencils.go index 91ca73b..5195376 100644 --- a/internal/tdp/thunks/stencils.go +++ b/internal/tdp/thunks/stencils.go @@ -8354,7 +8354,7 @@ func parsePackedVarint8(p1 vm.P1, p2 vm.P2) (vm.P1, vm.P2) { e8 := p.Add(layout.RoundDown(int(e-p), 8)) if p < e8 { again: - bytes := *xunsafe.Cast[uint64](p.AssertValid()) + bytes := xunsafe.ByteLoad[uint64](p.AssertValid(), 0) count += bits.OnesCount64(bytes & tdp.SignBits) p = p.Add(8) if p < e8 { @@ -8363,7 +8363,7 @@ func parsePackedVarint8(p1 vm.P1, p2 vm.P2) (vm.P1, vm.P2) { } if p < e { left := int(e - p) - bytes := *xunsafe.Cast[uint64](p.AssertValid()) + bytes := xunsafe.ByteLoad[uint64](p.AssertValid(), 0) count += bits.OnesCount64(bytes & (tdp.SignBits >> uint((8-left)*8))) } } @@ -8483,7 +8483,7 @@ func parsePackedVarint32(p1 vm.P1, p2 vm.P2) (vm.P1, vm.P2) { e8 := p.Add(layout.RoundDown(int(e-p), 8)) if p < e8 { again: - bytes := *xunsafe.Cast[uint64](p.AssertValid()) + bytes := xunsafe.ByteLoad[uint64](p.AssertValid(), 0) count += bits.OnesCount64(bytes & tdp.SignBits) p = p.Add(8) if p < e8 { @@ -8492,7 +8492,7 @@ func parsePackedVarint32(p1 vm.P1, p2 vm.P2) (vm.P1, vm.P2) { } if p < e { left := int(e - p) - bytes := *xunsafe.Cast[uint64](p.AssertValid()) + bytes := xunsafe.ByteLoad[uint64](p.AssertValid(), 0) count += bits.OnesCount64(bytes & (tdp.SignBits >> uint((8-left)*8))) } } @@ -8612,7 +8612,7 @@ func parsePackedVarint64(p1 vm.P1, p2 vm.P2) (vm.P1, vm.P2) { e8 := p.Add(layout.RoundDown(int(e-p), 8)) if p < e8 { again: - bytes := *xunsafe.Cast[uint64](p.AssertValid()) + bytes := xunsafe.ByteLoad[uint64](p.AssertValid(), 0) count += bits.OnesCount64(bytes & tdp.SignBits) p = p.Add(8) if p < e8 { @@ -8621,7 +8621,7 @@ func parsePackedVarint64(p1 vm.P1, p2 vm.P2) (vm.P1, vm.P2) { } if p < e { left := int(e - p) - bytes := *xunsafe.Cast[uint64](p.AssertValid()) + bytes := xunsafe.ByteLoad[uint64](p.AssertValid(), 0) count += bits.OnesCount64(bytes & (tdp.SignBits >> uint((8-left)*8))) } } @@ -8945,7 +8945,7 @@ func parseFixed32(p1 vm.P1, p2 vm.P2) (vm.P1, vm.P2) { } var p *uint32 p1, p2, p = vm.GetMutableField[uint32](p1, p2) - *p = *xunsafe.Cast[uint32](p1.PtrAddr.AssertValid()) + *p = xunsafe.ByteLoad[uint32](p1.PtrAddr.AssertValid(), 0) p1 = p1.Advance(layout.Size[uint32]()) return p1, p2 @@ -8959,7 +8959,7 @@ func parseFixed64(p1 vm.P1, p2 vm.P2) (vm.P1, vm.P2) { } var p *uint64 p1, p2, p = vm.GetMutableField[uint64](p1, p2) - *p = *xunsafe.Cast[uint64](p1.PtrAddr.AssertValid()) + *p = xunsafe.ByteLoad[uint64](p1.PtrAddr.AssertValid(), 0) p1 = p1.Advance(layout.Size[uint64]()) return p1, p2 diff --git a/internal/tdp/vm/run.go b/internal/tdp/vm/run.go index 1f37c3a..20ad81c 100644 --- a/internal/tdp/vm/run.go +++ b/internal/tdp/vm/run.go @@ -87,7 +87,6 @@ func Run(m *dynamic.Message, data []byte, options Options) (err error) { p3.Options = options data = RelocatePageBoundary(data, !p3.AllowAlias) - m.Shared.Src = unsafe.SliceData(data) m.Shared.Len = len(data) // The arena keeps m.context alive, so we don't need to KeepAlive src. diff --git a/internal/tdp/vm/utf8.go b/internal/tdp/vm/utf8.go index 4a48d21..f2feb97 100644 --- a/internal/tdp/vm/utf8.go +++ b/internal/tdp/vm/utf8.go @@ -83,7 +83,7 @@ unicode: n := min(8, int(e-p)) // Fast path for ASCII: simply check that all of the bytes don't have // their sign bits set. - bytes := *xunsafe.Cast[uint64](p.AssertValid()) + bytes := xunsafe.ByteLoad[uint64](p.AssertValid(), 0) mask := uint64(tdp.SignBits) >> uint((8-n)*8) ascii := bits.TrailingZeros64(bytes&mask) / 8 p1.Log(p2, "ascii bytes", "%016x, %d bytes", bytes, ascii) @@ -115,7 +115,7 @@ unicode: // Bounds check is complete here. We are free to load four bytes // and mask off what we don't need. We can't re-use bytes here // because the rune might straddle a boundary. - raw := *xunsafe.Cast[uint32](p.AssertValid()) + raw := xunsafe.ByteLoad[uint32](p.AssertValid(), 0) p1.Log(p2, "wide rune bits", "%08b, %d bytes", xunsafe.Bytes(&raw), count) // This puts the contents of the first byte into r. From 692f54b6ff87594abeab997f3d4554737899a820 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Thu, 25 Sep 2025 13:12:49 -0700 Subject: [PATCH 3/4] fix required.yaml test --- internal/testdata/testdata.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/testdata/testdata.go b/internal/testdata/testdata.go index 11456b9..60ad4c5 100644 --- a/internal/testdata/testdata.go +++ b/internal/testdata/testdata.go @@ -152,6 +152,9 @@ func (test *TestCase) Run(t *testing.T, ctx *hyperpb.Shared, verbose bool) { // Parse using hyperpb. m2 := ctx.NewMessage(test.Type.Fast) err2 := m2.Unmarshal(specimen, hyperpb.WithAllowAlias(true)) + if err2 == nil { + err2 = m2.Initialized() + } if verbose { t.Logf("theirs: %v, ours: %v", err1, err2) From 612e02d9141b34e97a813746e8e2731ebacebf1e Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Thu, 25 Sep 2025 13:41:49 -0700 Subject: [PATCH 4/4] fix crash --- internal/tdp/vm/run.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/tdp/vm/run.go b/internal/tdp/vm/run.go index 20ad81c..18552c5 100644 --- a/internal/tdp/vm/run.go +++ b/internal/tdp/vm/run.go @@ -458,6 +458,7 @@ func skipRecord(p1 P1, p2 P2, depth int) (P1, P2) { switch ty { case protowire.VarintType: + p1, p2 = p1.AtLeast(p2, 1) p1, p2, _ = p1.Varint(p2) case protowire.BytesType: p1, p2, _ = p1.Bytes(p2)