diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index 5846f551..ed7424b4 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -563,11 +563,12 @@ def write(self, caption_set): code_words = len(code) / 5 + 8 code_time_microseconds = code_words * MICROSECONDS_PER_CODEWORD code_start = start - code_time_microseconds - if index == 0: - continue - previous_code, previous_start, previous_end = codes[index - 1] - if previous_end + 3 * MICROSECONDS_PER_CODEWORD >= code_start: - codes[index - 1] = (previous_code, previous_start, None) + if index > 0: + previous_code, previous_start, previous_end = codes[index - 1] + if code_start < previous_start: + code_start = previous_start + if previous_end + 3 * MICROSECONDS_PER_CODEWORD >= code_start: + codes[index - 1] = (previous_code, previous_start, None) codes[index] = (code, code_start, end) # PASS 3: diff --git a/tests/test_scc_conversion.py b/tests/test_scc_conversion.py index ed7a81c9..99c3714c 100644 --- a/tests/test_scc_conversion.py +++ b/tests/test_scc_conversion.py @@ -1,3 +1,5 @@ +import re + import pytest from pycaption import ( @@ -6,6 +8,7 @@ SCCWriter, SRTReader, SRTWriter, + WebVTTReader, WebVTTWriter, ) from tests.mixins import CaptionSetTestingMixIn @@ -65,6 +68,36 @@ def test_dfxp_is_valid_xml_when_scc_source_has_ampersand_character( assert dfxp == sample_dfxp_with_ampersand_character +class TestSCCTimestampOrdering: + def test_scc_captions_are_in_order_when_short_text_followed_by_long(self): + """When short caption text is followed by longer caption text, + the SCC output timestamps should remain in chronological order. + """ + vtt_input = ( + "WEBVTT\n\n" + "0\n" + "00:00:02.200 --> 00:00:02.359\n" + "you know,\n\n" + "1\n" + "00:00:02.400 --> 00:00:03.760\n" + "the way he kind of looked at me.\n\n" + "2\n" + "00:00:04.700 --> 00:00:05.169\n" + "And I said,\n\n" + "3\n" + "00:00:05.210 --> 00:00:05.520\n" + "oh\n" + ) + captions = WebVTTReader().read(vtt_input) + scc_output = SCCWriter().write(captions) + # SCC timestamps use HH:MM:SS:FF format (FF = frames) + timestamps = re.findall(r"(\d+:\d+:\d+:\d+)", scc_output) + for i in range(1, len(timestamps)): + assert timestamps[i] >= timestamps[i - 1], ( + f"Timestamps out of order: {timestamps[i - 1]} > {timestamps[i]}" + ) + + class TestSCCToWebVTT: def test_webvtt_newlines_are_properly_rendered( self,