From 1daf57eae54695b0ace2cde4a90a986a660a4095 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 4 Mar 2026 09:32:03 +0000 Subject: [PATCH 1/3] Initial plan From 5efce6518d5a182bfc4dc19ed52d509a5a085600 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 4 Mar 2026 09:38:32 +0000 Subject: [PATCH 2/3] Fix SCC captions out of order when short text followed by longer text In PASS 2 of SCCWriter.write(), buffer time calculations could push a longer caption's adjusted start time before the previous shorter caption's start time. Two fixes applied: 1. Also adjust the first caption's start time for buffering (was skipped due to early `continue`) 2. Clamp each caption's adjusted start time to be at least as late as the previous caption's adjusted start time Co-authored-by: lorandvarga <7048551+lorandvarga@users.noreply.github.com> --- pycaption/scc/__init__.py | 11 ++++++----- tests/test_scc_conversion.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index 5846f551..ed7424b4 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -563,11 +563,12 @@ def write(self, caption_set): code_words = len(code) / 5 + 8 code_time_microseconds = code_words * MICROSECONDS_PER_CODEWORD code_start = start - code_time_microseconds - if index == 0: - continue - previous_code, previous_start, previous_end = codes[index - 1] - if previous_end + 3 * MICROSECONDS_PER_CODEWORD >= code_start: - codes[index - 1] = (previous_code, previous_start, None) + if index > 0: + previous_code, previous_start, previous_end = codes[index - 1] + if code_start < previous_start: + code_start = previous_start + if previous_end + 3 * MICROSECONDS_PER_CODEWORD >= code_start: + codes[index - 1] = (previous_code, previous_start, None) codes[index] = (code, code_start, end) # PASS 3: diff --git a/tests/test_scc_conversion.py b/tests/test_scc_conversion.py index ed7a81c9..9ee70517 100644 --- a/tests/test_scc_conversion.py +++ b/tests/test_scc_conversion.py @@ -1,3 +1,5 @@ +import re + import pytest from pycaption import ( @@ -6,6 +8,7 @@ SCCWriter, SRTReader, SRTWriter, + WebVTTReader, WebVTTWriter, ) from tests.mixins import CaptionSetTestingMixIn @@ -65,6 +68,36 @@ def test_dfxp_is_valid_xml_when_scc_source_has_ampersand_character( assert dfxp == sample_dfxp_with_ampersand_character +class TestSCCTimestampOrdering: + def test_scc_captions_are_in_order_when_short_text_followed_by_long(self): + """When short caption text is followed by longer caption text, + the SCC output timestamps should remain in chronological order. + See: https://github.com/pbs/pycaption/issues/XXX + """ + vtt_input = ( + "WEBVTT\n\n" + "0\n" + "00:00:02.200 --> 00:00:02.359\n" + "you know,\n\n" + "1\n" + "00:00:02.400 --> 00:00:03.760\n" + "the way he kind of looked at me.\n\n" + "2\n" + "00:00:04.700 --> 00:00:05.169\n" + "And I said,\n\n" + "3\n" + "00:00:05.210 --> 00:00:05.520\n" + "oh\n" + ) + captions = WebVTTReader().read(vtt_input) + scc_output = SCCWriter().write(captions) + timestamps = re.findall(r"(\d+:\d+:\d+:\d+)", scc_output) + for i in range(1, len(timestamps)): + assert timestamps[i] >= timestamps[i - 1], ( + f"Timestamps out of order: {timestamps[i - 1]} > {timestamps[i]}" + ) + + class TestSCCToWebVTT: def test_webvtt_newlines_are_properly_rendered( self, From 27795144382aff212ed66b05503ce83b3a13f1ed Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 4 Mar 2026 09:39:38 +0000 Subject: [PATCH 3/3] Address code review: remove placeholder issue reference, add format comment Co-authored-by: lorandvarga <7048551+lorandvarga@users.noreply.github.com> --- tests/test_scc_conversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_scc_conversion.py b/tests/test_scc_conversion.py index 9ee70517..99c3714c 100644 --- a/tests/test_scc_conversion.py +++ b/tests/test_scc_conversion.py @@ -72,7 +72,6 @@ class TestSCCTimestampOrdering: def test_scc_captions_are_in_order_when_short_text_followed_by_long(self): """When short caption text is followed by longer caption text, the SCC output timestamps should remain in chronological order. - See: https://github.com/pbs/pycaption/issues/XXX """ vtt_input = ( "WEBVTT\n\n" @@ -91,6 +90,7 @@ def test_scc_captions_are_in_order_when_short_text_followed_by_long(self): ) captions = WebVTTReader().read(vtt_input) scc_output = SCCWriter().write(captions) + # SCC timestamps use HH:MM:SS:FF format (FF = frames) timestamps = re.findall(r"(\d+:\d+:\d+:\d+)", scc_output) for i in range(1, len(timestamps)): assert timestamps[i] >= timestamps[i - 1], (