diff --git a/width.go b/width.go index 1f8cdb1..170b73d 100644 --- a/width.go +++ b/width.go @@ -12,12 +12,20 @@ import ( // characters are treated as width 1. When EastAsianWidth is true, ambiguous // East Asian characters are treated as width 2. type Options struct { + // EastAsianWidth specifies how ambiguous East Asian characters are treated. + // true means width 2, false (default) means width 1. EastAsianWidth bool + // IgnoreANSI specifies whether ANSI escape codes should be ignored. + // true means ignore, false (default) means treat codes as any other text. + IgnoreANSI bool } // DefaultOptions is the default options for the display width // calculation, which is EastAsianWidth: false. -var DefaultOptions = Options{EastAsianWidth: false} +var DefaultOptions = Options{ + EastAsianWidth: false, + IgnoreANSI: false, +} // String calculates the display width of a string, // by iterating over grapheme clusters in the string @@ -33,6 +41,13 @@ func (options Options) String(s string) int { pos := 0 for pos < len(s) { + if options.IgnoreANSI { + if n := ansiSequenceLength(s[pos:]); n > 0 { + pos += n + continue + } + } + // Try ASCII optimization asciiLen := printableASCIILength(s[pos:]) if asciiLen > 0 { @@ -54,6 +69,10 @@ func (options Options) String(s string) int { if pos < len(s) && s[pos] >= 0x20 && s[pos] <= 0x7E { break } + // If next byte is ESC and we ignore ANSI, break so we skip the sequence at top of loop + if options.IgnoreANSI && pos < len(s) && s[pos] == esc { + break + } } // Defensive, should not happen: if no progress was made, @@ -81,6 +100,13 @@ func (options Options) Bytes(s []byte) int { pos := 0 for pos < len(s) { + if options.IgnoreANSI { + if n := ansiSequenceLength(s[pos:]); n > 0 { + pos += n + continue + } + } + // Try ASCII optimization asciiLen := printableASCIILength(s[pos:]) if asciiLen > 0 { @@ -102,6 +128,10 @@ func (options Options) Bytes(s []byte) int { if pos < len(s) && s[pos] >= 0x20 && s[pos] <= 0x7E { break } + // If next byte is ESC and we ignore ANSI, break so we skip the sequence at top of loop + if options.IgnoreANSI && pos < len(s) && s[pos] == esc { + break + } } // Defensive, should not happen: if no progress was made, @@ -158,20 +188,57 @@ const _Default property = 0 // equal to maxWidth. func (options Options) TruncateString(s string, maxWidth int, tail string) string { maxWidthWithoutTail := maxWidth - options.String(tail) + width := 0 + pos := 0 + var lastFitPos int + + for pos < len(s) { + if options.IgnoreANSI { + if n := ansiSequenceLength(s[pos:]); n > 0 { + pos += n + continue + } + } + + asciiLen := printableASCIILength(s[pos:]) + if asciiLen > 0 { + for i := 0; i < asciiLen; i++ { + if width+1 <= maxWidthWithoutTail { + lastFitPos = pos + i + 1 + } + width++ + if width > maxWidth { + return s[:lastFitPos] + tail + } + } + pos += asciiLen + continue + } - var pos, total int - g := graphemes.FromString(s) - for g.Next() { - gw := graphemeWidth(g.Value(), options) - if total+gw <= maxWidthWithoutTail { - pos = g.End() + g := graphemes.FromString(s[pos:]) + start := pos + for g.Next() { + v := g.Value() + gw := graphemeWidth(v, options) + if width+gw <= maxWidthWithoutTail && gw > 0 { + lastFitPos = pos + len(v) + } + width += gw + pos += len(v) + if width > maxWidth { + return s[:lastFitPos] + tail + } + if pos < len(s) && s[pos] >= 0x20 && s[pos] <= 0x7E { + break + } + if options.IgnoreANSI && pos < len(s) && s[pos] == esc { + break + } } - total += gw - if total > maxWidth { - return s[:pos] + tail + if pos == start { + pos++ } } - // No truncation return s } @@ -191,23 +258,63 @@ func TruncateString(s string, maxWidth int, tail string) string { // equal to maxWidth. func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { maxWidthWithoutTail := maxWidth - options.Bytes(tail) + width := 0 + pos := 0 + var lastFitPos int + + for pos < len(s) { + if options.IgnoreANSI { + if n := ansiSequenceLength(s[pos:]); n > 0 { + pos += n + continue + } + } - var pos, total int - g := graphemes.FromBytes(s) - for g.Next() { - gw := graphemeWidth(g.Value(), options) - if total+gw <= maxWidthWithoutTail { - pos = g.End() + asciiLen := printableASCIILength(s[pos:]) + if asciiLen > 0 { + for i := 0; i < asciiLen; i++ { + if width+1 <= maxWidthWithoutTail { + lastFitPos = pos + i + 1 + } + width++ + if width > maxWidth { + result := make([]byte, 0, lastFitPos+len(tail)) + result = append(result, s[:lastFitPos]...) + result = append(result, tail...) + return result + } + } + pos += asciiLen + continue + } + + g := graphemes.FromBytes(s[pos:]) + start := pos + for g.Next() { + v := g.Value() + gw := graphemeWidth(v, options) + if width+gw <= maxWidthWithoutTail && gw > 0 { + lastFitPos = pos + len(v) + } + width += gw + pos += len(v) + if width > maxWidth { + result := make([]byte, 0, lastFitPos+len(tail)) + result = append(result, s[:lastFitPos]...) + result = append(result, tail...) + return result + } + if pos < len(s) && s[pos] >= 0x20 && s[pos] <= 0x7E { + break + } + if options.IgnoreANSI && pos < len(s) && s[pos] == esc { + break + } } - total += gw - if total > maxWidth { - result := make([]byte, 0, pos+len(tail)) - result = append(result, s[:pos]...) - result = append(result, tail...) - return result + if pos == start { + pos++ } } - // No truncation return s } @@ -263,6 +370,64 @@ func asciiWidth(b byte) int { return 1 } +const esc = 0x1B + +// ansiSequenceLength returns the number of bytes of an ANSI escape sequence +// at the start of s, or 0 if s does not start with a complete sequence. +// It does not interpret the sequence; it only returns the length to skip. +// Covers CSI (e.g. SGR color codes), OSC (e.g. hyperlinks), and 2-character +// escape sequences per ECMA-48 / ISO 6429. +func ansiSequenceLength[T ~string | ~[]byte](s T) int { + if len(s) < 2 || s[0] != esc { + return 0 + } + switch s[1] { + case '[': + // CSI: ESC [ P...P I...I F (final byte 0x40-0x7E) + i := 2 + for i < len(s) { + b := s[i] + if b >= 0x40 && b <= 0x7E { + return i + 1 + } + if (b >= 0x30 && b <= 0x3F) || (b >= 0x20 && b <= 0x2F) { + i++ + continue + } + return 0 + } + return 0 + case ']': + // OSC: ESC ] ... BEL (0x07) or ST (ESC \) + i := 2 + for i < len(s) { + if s[i] == 0x07 { + return i + 1 + } + if s[i] == esc && i+1 < len(s) && s[i+1] == '\\' { + return i + 2 + } + i++ + } + return 0 + case 'P', 'X', '^', '_': + // DCS, SOS, PM, APC: ... ST (ESC \) + i := 2 + for i < len(s) { + if s[i] == esc && i+1 < len(s) && s[i+1] == '\\' { + return i + 2 + } + i++ + } + return 0 + default: + if s[1] >= 0x40 && s[1] <= 0x5F { + return 2 + } + return 0 + } +} + // printableASCIILength returns the length of consecutive printable ASCII bytes // starting at the beginning of s. func printableASCIILength[T string | []byte](s T) int { diff --git a/width_test.go b/width_test.go index d008c05..d0df4e4 100644 --- a/width_test.go +++ b/width_test.go @@ -951,6 +951,15 @@ func TestTruncateString(t *testing.T) { {"flag sequence", "πŸ‡ΊπŸ‡ΈπŸ‡―πŸ‡΅", 2, "...", defaultOptions, "..."}, {"ZWJ sequence", "πŸ‘¨β€πŸ‘©β€πŸ‘§", 2, "...", defaultOptions, "πŸ‘¨β€πŸ‘©β€πŸ‘§"}, {"ZWJ sequence truncate", "πŸ‘¨β€πŸ‘©β€πŸ‘§πŸ‘¨β€πŸ‘©β€πŸ‘§", 2, "...", defaultOptions, "..."}, + + // IgnoreANSI: truncate by visible width, ANSI codes not counted (same as String: truncate when content width > maxWidth) + {"IgnoreANSI red fits", "\x1B[31mred\x1B[0m", 6, "...", ignoreANSIOptions, "\x1B[31mred\x1B[0m"}, + {"IgnoreANSI red truncate", "\x1B[31mred\x1B[0m", 2, "...", ignoreANSIOptions, "..."}, + {"IgnoreANSI long text", "\x1B[32mhello world\x1B[0m", 8, "...", ignoreANSIOptions, "\x1B[32mhello..."}, + {"IgnoreANSI leading text", "hi\x1B[32m there\x1B[0m", 7, "...", ignoreANSIOptions, "hi\x1B[32m t..."}, + {"IgnoreANSI no truncation", "\x1B[1m\x1B[32mhi\x1B[0m", 5, "...", ignoreANSIOptions, "\x1B[1m\x1B[32mhi\x1B[0m"}, + {"IgnoreANSI CJK", "\x1B[31mδΈ­\x1B[0m", 5, "...", ignoreANSIOptions, "\x1B[31mδΈ­\x1B[0m"}, + {"IgnoreANSI CJK truncate", "\x1B[31mδΈ­ζ–‡\x1B[0m", 3, "...", ignoreANSIOptions, "..."}, } for _, tt := range tests { @@ -1027,6 +1036,120 @@ func TestTruncateBytesDoesNotMutateInput(t *testing.T) { } } +func TestAnsiSequenceLength(t *testing.T) { + tests := []struct { + name string + input string + expected int + desc string + }{ + // No sequence at start + {"empty", "", 0, "empty string"}, + {"single byte", "a", 0, "no ESC at start"}, + {"single ESC", "\x1B", 0, "incomplete, len < 2"}, + {"ESC then end", "\x1B", 0, "incomplete"}, + {"text before ESC", "hello\x1B[31m", 0, "ESC not at start"}, + + // CSI (ESC [ P...P I...I F) β€” length = index of final byte + 1 + {"CSI incomplete", "\x1B[", 0, "CSI with no final byte"}, + {"CSI empty", "\x1B[m", 3, "ESC [ m (reset)"}, + {"CSI one param", "\x1B[0m", 4, "ESC [ 0 m"}, + {"CSI SGR red", "\x1B[31m", 5, "ESC [ 31 m"}, + {"CSI bold green", "\x1B[1;32m", 7, "ESC [ 1 ; 32 m"}, + {"CSI with intermediate", "\x1B[ 0m", 5, "ESC [ space 0 m (space is intermediate)"}, + {"CSI final at 0x40", "\x1B[@", 3, "final byte @ (0x40)"}, + {"CSI final at 0x7E", "\x1B[~", 3, "final byte ~ (0x7E)"}, + {"CSI invalid byte", "\x1B[\x80m", 0, "0x80 not param/intermediate/final"}, + {"CSI then text", "\x1B[31mred", 5, "only sequence length, rest is text"}, + + // OSC (ESC ] ... BEL or ST) + {"OSC incomplete", "\x1B]", 0, "OSC with no terminator"}, + {"OSC BEL only", "\x1B]\x07", 3, "ESC ] BEL"}, + {"OSC title", "\x1B]0;title\x07", 10, "ESC ] 0 ; title BEL"}, + {"OSC hyperlink", "\x1B]8;;https://x.com\x07", 19, "ESC ] 8 ; ; https://x.com BEL"}, + {"OSC ST terminator", "\x1B]0;title\x1B\\", 11, "ESC ] 0 ; title ST"}, + {"OSC no terminator", "\x1B]incomplete", 0, "OSC with no BEL or ST"}, + + // DCS, SOS, PM, APC (ESC P/X/^/_ ... ST) + {"DCS incomplete", "\x1BP", 0, "DCS with no ST"}, + {"DCS minimal", "\x1BP\x1B\\", 4, "ESC P ESC \\"}, + {"DCS with data", "\x1BPdata\x1B\\", 8, "ESC P data ESC \\"}, + {"SOS minimal", "\x1BX\x1B\\", 4, "ESC X ESC \\"}, + {"PM minimal", "\x1B^\x1B\\", 4, "ESC ^ ESC \\"}, + {"APC minimal", "\x1B_\x1B\\", 4, "ESC _ ESC \\"}, + + // 2-character sequences (ESC + 0x40-0x5F, excluding [ ] P X ^ _) + {"2-char at sign", "\x1B@", 2, "ESC @ (0x40)"}, + {"2-char D", "\x1BD", 2, "ESC D (IND)"}, + {"2-char backslash", "\x1B\\", 2, "ESC \\ (0x5C)"}, + {"ESC ^ incomplete PM", "\x1B^", 0, "ESC ^ starts PM sequence, incomplete without ST"}, + {"2-char outside range", "\x1B`", 0, "ESC ` (0x60, above 0x5F)"}, + {"2-char lowercase a", "\x1Ba", 0, "ESC a (0x61, above 0x5F)"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ansiSequenceLength(tt.input) + if got != tt.expected { + t.Errorf("ansiSequenceLength(%q) = %d, want %d (%s)", + tt.input, got, tt.expected, tt.desc) + t.Logf(" len(input)=%d, bytes=% x", len(tt.input), []byte(tt.input)) + } + // Exercise []byte path + gotBytes := ansiSequenceLength([]byte(tt.input)) + if gotBytes != tt.expected { + t.Errorf("ansiSequenceLength([]byte(%q)) = %d, want %d", + tt.input, gotBytes, tt.expected) + } + }) + } +} + +var ignoreANSIOptions = Options{IgnoreANSI: true} + +func TestStringWidthIgnoreANSI(t *testing.T) { + tests := []struct { + name string + input string + options Options + expected int + }{ + // Default options: ANSI bytes are counted (ESC=0, others=1 each) + {"default CSI only", "\x1B[31m", defaultOptions, 4}, + {"default red text", "\x1B[31mred\x1B[0m", defaultOptions, 10}, + + // IgnoreANSI: true β€” only visible runes count + {"IgnoreANSI empty", "", ignoreANSIOptions, 0}, + {"IgnoreANSI CSI only", "\x1B[31m", ignoreANSIOptions, 0}, + {"IgnoreANSI red", "\x1B[31mred\x1B[0m", ignoreANSIOptions, 3}, + {"IgnoreANSI bold green", "\x1B[1;32mbold green\x1B[0m", ignoreANSIOptions, 10}, + {"IgnoreANSI multiple CSI", "\x1B[1m\x1B[32mhi\x1B[0m", ignoreANSIOptions, 2}, + {"IgnoreANSI leading text", "hi\x1B[31m there\x1B[0m", ignoreANSIOptions, 8}, + {"IgnoreANSI trailing CSI", "ok\x1B[0m", ignoreANSIOptions, 2}, + {"IgnoreANSI OSC hyperlink", "\x1B]8;;https://x.com\x07link\x1B\\", ignoreANSIOptions, 4}, + {"IgnoreANSI 2-char then newline", "\x1BD\n", ignoreANSIOptions, 0}, + {"IgnoreANSI mixed with CJK", "\x1B[32mδΈ­\x1B[0m", ignoreANSIOptions, 2}, + {"IgnoreANSI mixed with emoji", "\x1B[31mπŸ˜€\x1B[0m", ignoreANSIOptions, 2}, + {"IgnoreANSI no ANSI", "hello", ignoreANSIOptions, 5}, + {"IgnoreANSI incomplete CSI", "a\x1B[", ignoreANSIOptions, 2}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.options.String(tt.input) + if got != tt.expected { + t.Errorf("Options.String(%q) = %d, want %d", + tt.input, got, tt.expected) + } + gotBytes := tt.options.Bytes([]byte(tt.input)) + if gotBytes != tt.expected { + t.Errorf("Options.Bytes([]byte(%q)) = %d, want %d", + tt.input, gotBytes, tt.expected) + } + }) + } +} + func TestPrintableASCIILength(t *testing.T) { tests := []struct { name string