From f4ef1c4126b98f4ac816c25f08cfac987e825822 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Mon, 10 Mar 2025 23:04:46 -0700 Subject: [PATCH 01/19] check video sequence null island, speed, and file size --- mapillary_tools/constants.py | 3 + mapillary_tools/exceptions.py | 12 ++ .../process_sequence_properties.py | 160 +++++++++++++----- tests/unit/test_sequence_processing.py | 60 ++++++- 4 files changed, 193 insertions(+), 42 deletions(-) diff --git a/mapillary_tools/constants.py b/mapillary_tools/constants.py index 64be1202a..8031dd941 100644 --- a/mapillary_tools/constants.py +++ b/mapillary_tools/constants.py @@ -11,6 +11,9 @@ CUTOFF_TIME = float(os.getenv(_ENV_PREFIX + "CUTOFF_TIME", 60)) DUPLICATE_DISTANCE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_DISTANCE", 0.1)) DUPLICATE_ANGLE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_ANGLE", 5)) +MAX_AVG_SPEED = float( + os.getenv(_ENV_PREFIX + "MAX_AVG_SPEED", 200_000 / 3600) +) # 200 KM/h # in seconds VIDEO_SAMPLE_INTERVAL = float(os.getenv(_ENV_PREFIX + "VIDEO_SAMPLE_INTERVAL", -1)) # in meters diff --git a/mapillary_tools/exceptions.py b/mapillary_tools/exceptions.py index 16c3f6e6a..45d1491b9 100644 --- a/mapillary_tools/exceptions.py +++ b/mapillary_tools/exceptions.py @@ -118,3 +118,15 @@ class MapillaryUploadUnauthorizedError(MapillaryUserError): class MapillaryMetadataValidationError(MapillaryUserError, MapillaryDescriptionError): exit_code = 15 + + +class MapillaryFileTooLargeError(MapillaryDescriptionError): + pass + + +class MapillaryCaptureSpeedTooFastError(MapillaryDescriptionError): + pass + + +class MapillaryNullIslandError(MapillaryDescriptionError): + pass diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index 27ca1a4cc..72b7ed57f 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -4,8 +4,7 @@ import os import typing as T -from . import constants, geo, types -from .exceptions import MapillaryBadParameterError, MapillaryDuplicationError +from . import constants, exceptions, geo, types, utils LOG = logging.getLogger(__name__) @@ -75,7 +74,7 @@ def duplication_check( ): dups.append( types.describe_error_metadata( - MapillaryDuplicationError( + exceptions.MapillaryDuplicationError( f"Duplicate of its previous image in terms of distance <= {duplicate_distance} and angle <= {duplicate_angle}", types.as_desc(cur), distance=distance, @@ -217,54 +216,126 @@ def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None: def _parse_filesize_in_bytes(filesize_str: str) -> int: filesize_str = filesize_str.strip().upper() - if filesize_str.endswith("B"): - return int(filesize_str[:-1]) - elif filesize_str.endswith("K"): - return int(filesize_str[:-1]) * 1024 - elif filesize_str.endswith("M"): - return int(filesize_str[:-1]) * 1024 * 1024 - elif filesize_str.endswith("G"): - return int(filesize_str[:-1]) * 1024 * 1024 * 1024 - else: - return int(filesize_str) + try: + if filesize_str.endswith("B"): + return int(filesize_str[:-1]) + elif filesize_str.endswith("K"): + return int(filesize_str[:-1]) * 1024 + elif filesize_str.endswith("M"): + return int(filesize_str[:-1]) * 1024 * 1024 + elif filesize_str.endswith("G"): + return int(filesize_str[:-1]) * 1024 * 1024 * 1024 + else: + return int(filesize_str) + except ValueError: + raise exceptions.MapillaryBadParameterError( + f"Expect valid file size that ends with B, K, M, or G, but got {filesize_str}" + ) def _parse_pixels(pixels_str: str) -> int: pixels_str = pixels_str.strip().upper() - if pixels_str.endswith("K"): - return int(pixels_str[:-1]) * 1000 - elif pixels_str.endswith("M"): - return int(pixels_str[:-1]) * 1000 * 1000 - elif pixels_str.endswith("G"): - return int(pixels_str[:-1]) * 1000 * 1000 * 1000 + try: + if pixels_str.endswith("K"): + return int(pixels_str[:-1]) * 1000 + elif pixels_str.endswith("M"): + return int(pixels_str[:-1]) * 1000 * 1000 + elif pixels_str.endswith("G"): + return int(pixels_str[:-1]) * 1000 * 1000 * 1000 + else: + return int(pixels_str) + except ValueError: + raise exceptions.MapillaryBadParameterError( + f"Expect valid number of pixels that ends with K, M, or G, but got {pixels_str}" + ) + + +def _avg_speed(points: T.Sequence[geo.Point]) -> float: + total_distance = 0.0 + for cur, nxt in geo.pairwise(points): + total_distance += geo.gps_distance( + (cur.lat, cur.lon), + (nxt.lat, nxt.lon), + ) + + if points: + time_diff = points[-1].time - points[0].time else: - return int(pixels_str) + time_diff = 0.0 + + if time_diff == 0.0: + return float("inf") + + return total_distance / time_diff + + +def _process_videos( + video_metadatas: T.Sequence[types.VideoMetadata], + max_sequence_filesize_in_bytes: int, + max_avg_speed: float, +) -> T.Tuple[T.List[types.VideoMetadata], T.List[types.ErrorMetadata]]: + error_metadatas: T.List[types.ErrorMetadata] = [] + new_video_metadata: T.List[types.VideoMetadata] = [] + + for video_metadata in video_metadatas: + if video_metadata.filesize is None: + filesize = utils.get_file_size(video_metadata.filename) + else: + filesize = video_metadata.filesize + + if filesize > max_sequence_filesize_in_bytes: + error_metadatas.append( + types.describe_error_metadata( + exc=exceptions.MapillaryFileTooLargeError( + f"Video file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)", + ), + filename=video_metadata.filename, + filetype=video_metadata.filetype, + ) + ) + elif any(p.lat == 0 and p.lon == 0 for p in video_metadata.points): + error_metadatas.append( + types.describe_error_metadata( + exc=exceptions.MapillaryNullIslandError( + "Found GPS coordinates in Null Island (0, 0)", + ), + filename=video_metadata.filename, + filetype=video_metadata.filetype, + ) + ) + elif ( + len(video_metadata.points) >= 2 + and _avg_speed(video_metadata.points) > max_avg_speed + ): + error_metadatas.append( + types.describe_error_metadata( + exc=exceptions.MapillaryCaptureSpeedTooFastError( + f"Capture speed is too fast (exceeds {round(max_avg_speed, 3)} m/s)", + ), + filename=video_metadata.filename, + filetype=video_metadata.filetype, + ) + ) + else: + new_video_metadata.append(video_metadata) + + return new_video_metadata, error_metadatas def process_sequence_properties( metadatas: T.Sequence[types.MetadataOrError], - cutoff_distance=constants.CUTOFF_DISTANCE, - cutoff_time=constants.CUTOFF_TIME, - interpolate_directions=False, - duplicate_distance=constants.DUPLICATE_DISTANCE, - duplicate_angle=constants.DUPLICATE_ANGLE, + cutoff_distance: float = constants.CUTOFF_DISTANCE, + cutoff_time: float = constants.CUTOFF_TIME, + interpolate_directions: bool = False, + duplicate_distance: float = constants.DUPLICATE_DISTANCE, + duplicate_angle: float = constants.DUPLICATE_ANGLE, + max_avg_speed: float = constants.MAX_AVG_SPEED, ) -> T.List[types.MetadataOrError]: - try: - max_sequence_filesize_in_bytes = _parse_filesize_in_bytes( - constants.MAX_SEQUENCE_FILESIZE - ) - except ValueError: - raise MapillaryBadParameterError( - f"Expect the envvar {constants._ENV_PREFIX}MAX_SEQUENCE_FILESIZE to be a valid filesize that ends with B, K, M, or G, but got {constants.MAX_SEQUENCE_FILESIZE}" - ) - - try: - max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS) - except ValueError: - raise MapillaryBadParameterError( - f"Expect the envvar {constants._ENV_PREFIX}MAX_SEQUENCE_PIXELS to be a valid number of pixels that ends with K, M, or G, but got {constants.MAX_SEQUENCE_PIXELS}" - ) + max_sequence_filesize_in_bytes = _parse_filesize_in_bytes( + constants.MAX_SEQUENCE_FILESIZE + ) + max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS) error_metadatas: T.List[types.ErrorMetadata] = [] image_metadatas: T.List[types.ImageMetadata] = [] @@ -280,6 +351,13 @@ def process_sequence_properties( else: raise RuntimeError(f"invalid metadata type: {metadata}") + video_metadatas, video_error_metadatas = _process_videos( + video_metadatas, + max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes, + max_avg_speed=max_avg_speed, + ) + error_metadatas.extend(video_error_metadatas) + sequences_by_folder = _group_sort_images_by_folder(image_metadatas) # make sure they are sorted for sequence in sequences_by_folder: @@ -296,7 +374,7 @@ def process_sequence_properties( sequences_after_cut.extend(cut) assert len(image_metadatas) == sum(len(s) for s in sequences_after_cut) - # reuse imaeg_metadatas to store processed image metadatas + # reuse image_metadatas to store processed image metadatas image_metadatas = [] sequence_idx = 0 diff --git a/tests/unit/test_sequence_processing.py b/tests/unit/test_sequence_processing.py index 79e35db70..1e1aa4b6e 100644 --- a/tests/unit/test_sequence_processing.py +++ b/tests/unit/test_sequence_processing.py @@ -3,7 +3,6 @@ from pathlib import Path import py.path - import pytest from mapillary_tools import ( @@ -252,6 +251,7 @@ def test_interpolation(tmpdir: py.path.local): points=[], make="hello", model="world", + filesize=123, ), ] metadatas = psp.process_sequence_properties( @@ -454,3 +454,61 @@ def test_cut_by_pixels(tmpdir: py.path.local): ) == 2 ) + + +def test_video_error(): + # curdir = tmpdir.mkdir("hello222").mkdir("world333") + sequence: T.List[types.Metadata] = [ + types.VideoMetadata( + Path("test_video_null_island.mp4"), + None, + types.FileType.VIDEO, + points=[ + geo.Point(1, -0.00001, -0.00001, 1, angle=None), + geo.Point(1, 0, 0, 1, angle=None), + geo.Point(1, 0.00001, 0.00001, 1, angle=None), + ], + make="hello", + model="world", + filesize=123, + ), + types.VideoMetadata( + Path("test_video_too_fast.mp4"), + None, + types.FileType.VIDEO, + points=[ + geo.Point(1, 1, 1, 1, angle=None), + geo.Point(1.1, 1.00001, 1.00001, 1, angle=None), + geo.Point(10, 1, 3, 1, angle=None), + ], + make="hello", + model="world", + filesize=123, + ), + types.VideoMetadata( + Path("test_video_file_too_large.mp4"), + None, + types.FileType.VIDEO, + points=[geo.Point(1, 1, 1, 1, angle=None)], + make="hello", + model="world", + filesize=1024 * 1024 * 1024 * 200, + ), + types.VideoMetadata( + Path("test_good.mp4"), + None, + types.FileType.VIDEO, + points=[geo.Point(1, 1, 1, 1, angle=None)], + make="hello", + model="world", + filesize=123, + ), + ] + metadatas = psp.process_sequence_properties( + sequence, + cutoff_distance=1000000000, + cutoff_time=100, + interpolate_directions=True, + duplicate_distance=100, + duplicate_angle=5, + ) From 076501d5c05a20a396d354dc31eb2a7b535eab6b Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Mon, 10 Mar 2025 23:44:37 -0700 Subject: [PATCH 02/19] refactor --- .../process_sequence_properties.py | 91 +++++++++++-------- 1 file changed, 52 insertions(+), 39 deletions(-) diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index 72b7ed57f..0fd988dda 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -276,7 +276,7 @@ def _process_videos( max_avg_speed: float, ) -> T.Tuple[T.List[types.VideoMetadata], T.List[types.ErrorMetadata]]: error_metadatas: T.List[types.ErrorMetadata] = [] - new_video_metadata: T.List[types.VideoMetadata] = [] + output_video_metadatas: T.List[types.VideoMetadata] = [] for video_metadata in video_metadatas: if video_metadata.filesize is None: @@ -318,9 +318,9 @@ def _process_videos( ) ) else: - new_video_metadata.append(video_metadata) + output_video_metadatas.append(video_metadata) - return new_video_metadata, error_metadatas + return output_video_metadatas, error_metadatas def process_sequence_properties( @@ -358,58 +358,71 @@ def process_sequence_properties( ) error_metadatas.extend(video_error_metadatas) - sequences_by_folder = _group_sort_images_by_folder(image_metadatas) + input_sequences: T.List[PointSequence] + output_sequences: T.List[PointSequence] + + input_sequences = _group_sort_images_by_folder(image_metadatas) # make sure they are sorted - for sequence in sequences_by_folder: + for sequence in input_sequences: for cur, nxt in geo.pairwise(sequence): assert cur.time <= nxt.time, "sequence must be sorted" - for s in sequences_by_folder: - _interpolate_subsecs_for_sorting(s) - - # cut sequences - sequences_after_cut: T.List[PointSequence] = [] - for sequence in sequences_by_folder: - cut = cut_sequence_by_time_distance(sequence, cutoff_distance, cutoff_time) - sequences_after_cut.extend(cut) - assert len(image_metadatas) == sum(len(s) for s in sequences_after_cut) + for sequence in input_sequences: + _interpolate_subsecs_for_sorting(sequence) - # reuse image_metadatas to store processed image metadatas - image_metadatas = [] - - sequence_idx = 0 + # Cut sequences by time or distance + output_sequences = [] + for sequence in input_sequences: + output_sequences.extend( + cut_sequence_by_time_distance(sequence, cutoff_distance, cutoff_time) + ) + assert len(image_metadatas) == sum(len(s) for s in output_sequences) - for sequence in sequences_after_cut: - # duplication check - dedups, dups = duplication_check( + # Duplication check + input_sequences = output_sequences + output_sequences = [] + for sequence in input_sequences: + output_sequence, dups = duplication_check( sequence, duplicate_distance=duplicate_distance, duplicate_angle=duplicate_angle, ) - assert len(sequence) == len(dedups) + len(dups) + assert len(sequence) == len(output_sequence) + len(dups) + output_sequences.append(output_sequence) error_metadatas.extend(dups) - # interpolate angles + # Interpolate angles + input_sequences = output_sequences + for sequence in input_sequences: if interpolate_directions: - for p in dedups: - p.angle = None - geo.interpolate_directions_if_none(dedups) - - # cut sequence per MAX_SEQUENCE_LENGTH images - cut = cut_sequence( - dedups, - constants.MAX_SEQUENCE_LENGTH, - max_sequence_filesize_in_bytes, - max_sequence_pixels, + for image in sequence: + image.angle = None + geo.interpolate_directions_if_none(sequence) + + # Cut sequences by max number of images, max filesize, and max pixels + input_sequences = output_sequences + output_sequences = [] + for sequence in input_sequences: + output_sequences.extend( + cut_sequence( + sequence, + constants.MAX_SEQUENCE_LENGTH, + max_sequence_filesize_in_bytes, + max_sequence_pixels, + ) ) + # Assign sequence UUIDs + sequence_idx = 0 + image_metadatas = [] + input_sequences = output_sequences + for sequence in input_sequences: # assign sequence UUIDs - for c in cut: - for p in c: - # using incremental id as shorter "uuid", so we can save some space for the desc file - p.MAPSequenceUUID = str(sequence_idx) - image_metadatas.append(p) - sequence_idx += 1 + for image in sequence: + # using incremental id as shorter "uuid", so we can save some space for the desc file + image.MAPSequenceUUID = str(sequence_idx) + image_metadatas.append(image) + sequence_idx += 1 results = error_metadatas + image_metadatas + video_metadatas From bea5e0a0724165f5b1f87e023fb12675119ddd85 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Tue, 11 Mar 2025 16:27:07 -0700 Subject: [PATCH 03/19] process images --- .../process_sequence_properties.py | 80 ++++++++++++++++--- 1 file changed, 71 insertions(+), 9 deletions(-) diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index 78f65a7eb..39c25ca24 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -9,8 +9,8 @@ LOG = logging.getLogger(__name__) -Point = T.TypeVar("Point", bound=geo.Point) -PointSequence = T.List[Point] +PointLike = T.TypeVar("PointLike", bound=geo.Point) +PointSequence = T.List[PointLike] def cut_sequence_by_time_distance( @@ -252,16 +252,16 @@ def _parse_pixels(pixels_str: str) -> int: ) -def _avg_speed(points: T.Sequence[geo.Point]) -> float: +def _avg_speed(sequence: T.Sequence[PointLike]) -> float: total_distance = 0.0 - for cur, nxt in geo.pairwise(points): + for cur, nxt in geo.pairwise(sequence): total_distance += geo.gps_distance( (cur.lat, cur.lon), (nxt.lat, nxt.lon), ) - if points: - time_diff = points[-1].time - points[0].time + if sequence: + time_diff = sequence[-1].time - sequence[0].time else: time_diff = 0.0 @@ -324,6 +324,61 @@ def _process_videos( return output_video_metadatas, error_metadatas +def _process_sequences( + sequences: T.Sequence[PointSequence], + max_sequence_filesize_in_bytes: int, + max_avg_speed: float, +) -> T.Tuple[T.List[PointSequence], T.List[types.ErrorMetadata]]: + error_metadatas: T.List[types.ErrorMetadata] = [] + output_sequences: T.List[PointSequence] = [] + + for sequence in sequences: + filesize = 0 + for image in sequence: + if image.filesize is None: + filesize += utils.get_file_size(image.filename) + else: + filesize += image.filesize + + if filesize > max_sequence_filesize_in_bytes: + for image in sequence: + error_metadatas.append( + types.describe_error_metadata( + exc=exceptions.MapillaryFileTooLargeError( + f"Sequence file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)", + ), + filename=image.filename, + filetype=types.FileType.IMAGE, + ) + ) + elif any(image.lat == 0 and image.lon == 0 for image in sequence): + for image in sequence: + error_metadatas.append( + types.describe_error_metadata( + exc=exceptions.MapillaryNullIslandError( + "Found GPS coordinates in Null Island (0, 0)", + ), + filename=image.filename, + filetype=types.FileType.IMAGE, + ) + ) + elif len(sequence) >= 2 and _avg_speed(sequence) > max_avg_speed: + for image in sequence: + error_metadatas.append( + types.describe_error_metadata( + exc=exceptions.MapillaryCaptureSpeedTooFastError( + f"Capture speed is too fast (exceeds {round(max_avg_speed, 3)} m/s)", + ), + filename=image.filename, + filetype=types.FileType.IMAGE, + ) + ) + else: + output_sequences.extend(sequence) + + return output_sequences, error_metadatas + + def process_sequence_properties( metadatas: T.Sequence[types.MetadataOrError], cutoff_distance: float = constants.CUTOFF_DISTANCE, @@ -383,14 +438,14 @@ def process_sequence_properties( input_sequences = output_sequences output_sequences = [] for sequence in input_sequences: - output_sequence, dups = duplication_check( + output_sequence, errors = duplication_check( sequence, max_duplicate_distance=duplicate_distance, max_duplicate_angle=duplicate_angle, ) - assert len(sequence) == len(output_sequence) + len(dups) + assert len(sequence) == len(output_sequence) + len(errors) output_sequences.append(output_sequence) - error_metadatas.extend(dups) + error_metadatas.extend(errors) # Interpolate angles input_sequences = output_sequences @@ -399,6 +454,7 @@ def process_sequence_properties( for image in sequence: image.angle = None geo.interpolate_directions_if_none(sequence) + output_sequences = input_sequences # Cut sequences by max number of images, max filesize, and max pixels input_sequences = output_sequences @@ -413,6 +469,11 @@ def process_sequence_properties( ) ) + output_sequences, errors = _process_sequences( + input_sequences, max_sequence_filesize_in_bytes, max_avg_speed + ) + error_metadatas.extend(errors) + # Assign sequence UUIDs sequence_idx = 0 image_metadatas = [] @@ -424,6 +485,7 @@ def process_sequence_properties( image.MAPSequenceUUID = str(sequence_idx) image_metadatas.append(image) sequence_idx += 1 + output_sequences = input_sequences results = error_metadatas + image_metadatas + video_metadatas From b811fd7cd0670acf491434fda3a04756abebf70e Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Tue, 11 Mar 2025 16:43:13 -0700 Subject: [PATCH 04/19] update --- .../process_sequence_properties.py | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index 39c25ca24..9a62c294d 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -13,10 +13,10 @@ PointSequence = T.List[PointLike] -def cut_sequence_by_time_distance( +def cut_sequence_by_time_or_distance( sequence: PointSequence, - cutoff_distance: float, - cutoff_time: float, + cutoff_distance: T.Optional[float] = None, + cutoff_time: T.Optional[float] = None, ) -> T.List[PointSequence]: sequences: T.List[PointSequence] = [] @@ -25,18 +25,25 @@ def cut_sequence_by_time_distance( for prev, cur in geo.pairwise(sequence): # invariant: prev is processed + + # Cut by distance distance = geo.gps_distance( (prev.lat, prev.lon), (cur.lat, cur.lon), ) - if cutoff_distance <= distance: - sequences.append([cur]) - continue + if cutoff_distance is not None: + if cutoff_distance <= distance: + sequences.append([cur]) + continue + + # Cut by time time_diff = cur.time - prev.time assert 0 <= time_diff, "sequence must be sorted by capture times" - if cutoff_time <= time_diff: - sequences.append([cur]) - continue + if cutoff_time is not None: + if cutoff_time <= time_diff: + sequences.append([cur]) + continue + sequences[-1].append(cur) # invariant: cur is processed @@ -271,7 +278,7 @@ def _avg_speed(sequence: T.Sequence[PointLike]) -> float: return total_distance / time_diff -def _process_videos( +def _process_videos_with_limits( video_metadatas: T.Sequence[types.VideoMetadata], max_sequence_filesize_in_bytes: int, max_avg_speed: float, @@ -324,7 +331,7 @@ def _process_videos( return output_video_metadatas, error_metadatas -def _process_sequences( +def _process_sequences_with_limits( sequences: T.Sequence[PointSequence], max_sequence_filesize_in_bytes: int, max_avg_speed: float, @@ -407,7 +414,7 @@ def process_sequence_properties( else: raise RuntimeError(f"invalid metadata type: {metadata}") - video_metadatas, video_error_metadatas = _process_videos( + video_metadatas, video_error_metadatas = _process_videos_with_limits( video_metadatas, max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes, max_avg_speed=max_avg_speed, @@ -430,7 +437,7 @@ def process_sequence_properties( output_sequences = [] for sequence in input_sequences: output_sequences.extend( - cut_sequence_by_time_distance(sequence, cutoff_distance, cutoff_time) + cut_sequence_by_time_or_distance(sequence, cutoff_time=cutoff_time) ) assert len(image_metadatas) == sum(len(s) for s in output_sequences) @@ -469,7 +476,7 @@ def process_sequence_properties( ) ) - output_sequences, errors = _process_sequences( + output_sequences, errors = _process_sequences_with_limits( input_sequences, max_sequence_filesize_in_bytes, max_avg_speed ) error_metadatas.extend(errors) From 8f18eb07361514395a4da27a9eab71ee25493b81 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 12 Mar 2025 16:36:43 -0700 Subject: [PATCH 05/19] update --- mapillary_tools/constants.py | 2 ++ .../process_sequence_properties.py | 24 ++++++++++--- tests/unit/test_sequence_processing.py | 36 +++++++++++++------ 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/mapillary_tools/constants.py b/mapillary_tools/constants.py index 8031dd941..0a20d0468 100644 --- a/mapillary_tools/constants.py +++ b/mapillary_tools/constants.py @@ -7,7 +7,9 @@ ANSI_BOLD = "\033[1m" ANSI_RESET_ALL = "\033[0m" +# In meters CUTOFF_DISTANCE = float(os.getenv(_ENV_PREFIX + "CUTOFF_DISTANCE", 600)) +# In seconds CUTOFF_TIME = float(os.getenv(_ENV_PREFIX + "CUTOFF_TIME", 60)) DUPLICATE_DISTANCE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_DISTANCE", 0.1)) DUPLICATE_ANGLE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_ANGLE", 5)) diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index 9a62c294d..ef22c60fd 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -278,7 +278,7 @@ def _avg_speed(sequence: T.Sequence[PointLike]) -> float: return total_distance / time_diff -def _process_videos_with_limits( +def _check_video_limits( video_metadatas: T.Sequence[types.VideoMetadata], max_sequence_filesize_in_bytes: int, max_avg_speed: float, @@ -331,7 +331,7 @@ def _process_videos_with_limits( return output_video_metadatas, error_metadatas -def _process_sequences_with_limits( +def _check_sequence_limits( sequences: T.Sequence[PointSequence], max_sequence_filesize_in_bytes: int, max_avg_speed: float, @@ -381,7 +381,7 @@ def _process_sequences_with_limits( ) ) else: - output_sequences.extend(sequence) + output_sequences.append(sequence) return output_sequences, error_metadatas @@ -414,7 +414,8 @@ def process_sequence_properties( else: raise RuntimeError(f"invalid metadata type: {metadata}") - video_metadatas, video_error_metadatas = _process_videos_with_limits( + # Check limits for videos + video_metadatas, video_error_metadatas = _check_video_limits( video_metadatas, max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes, max_avg_speed=max_avg_speed, @@ -425,6 +426,9 @@ def process_sequence_properties( output_sequences: T.List[PointSequence] input_sequences = _group_sort_images_by_folder(image_metadatas) + if input_sequences: + assert isinstance(input_sequences[0], list) + # make sure they are sorted for sequence in input_sequences: for cur, nxt in geo.pairwise(sequence): @@ -434,6 +438,7 @@ def process_sequence_properties( _interpolate_subsecs_for_sorting(sequence) # Cut sequences by time or distance + # NOTE: do not cut by distance here because it affects the speed limit check output_sequences = [] for sequence in input_sequences: output_sequences.extend( @@ -443,6 +448,8 @@ def process_sequence_properties( # Duplication check input_sequences = output_sequences + if input_sequences: + assert isinstance(input_sequences[0], list) output_sequences = [] for sequence in input_sequences: output_sequence, errors = duplication_check( @@ -456,6 +463,8 @@ def process_sequence_properties( # Interpolate angles input_sequences = output_sequences + if input_sequences: + assert isinstance(input_sequences[0], list) for sequence in input_sequences: if interpolate_directions: for image in sequence: @@ -465,6 +474,8 @@ def process_sequence_properties( # Cut sequences by max number of images, max filesize, and max pixels input_sequences = output_sequences + if input_sequences: + assert isinstance(input_sequences[0], list) output_sequences = [] for sequence in input_sequences: output_sequences.extend( @@ -476,10 +487,13 @@ def process_sequence_properties( ) ) - output_sequences, errors = _process_sequences_with_limits( + # Check limits for sequences + output_sequences, errors = _check_sequence_limits( input_sequences, max_sequence_filesize_in_bytes, max_avg_speed ) error_metadatas.extend(errors) + if output_sequences: + assert isinstance(output_sequences[0], list) # Assign sequence UUIDs sequence_idx = 0 diff --git a/tests/unit/test_sequence_processing.py b/tests/unit/test_sequence_processing.py index 0b0ec2b50..12d8c515a 100644 --- a/tests/unit/test_sequence_processing.py +++ b/tests/unit/test_sequence_processing.py @@ -248,11 +248,21 @@ def test_interpolation(tmpdir: py.path.local): curdir = tmpdir.mkdir("hello222").mkdir("world333") sequence: T.List[types.Metadata] = [ # s1 - _make_image_metadata(Path(curdir) / Path("./a.jpg"), 1, 1, 3, angle=344), - _make_image_metadata(Path(curdir) / Path("./b.jpg"), 0, 1, 4, angle=22), - _make_image_metadata(Path(curdir) / Path("./c.jpg"), 0, 0, 5, angle=-123), - _make_image_metadata(Path(curdir) / Path("./d.jpg"), 0, 0, 1, angle=2), - _make_image_metadata(Path(curdir) / Path("./e.jpg"), 1, 0, 2, angle=123), + _make_image_metadata( + Path(curdir) / Path("./a.jpg"), 0.00002, 0.00001, 3, angle=344 + ), + _make_image_metadata( + Path(curdir) / Path("./b.jpg"), 0.00001, 0.00001, 4, angle=22 + ), + _make_image_metadata( + Path(curdir) / Path("./c.jpg"), 0.00001, 0.00000, 5, angle=-123 + ), + _make_image_metadata( + Path(curdir) / Path("./d.jpg"), 0.00001, 0.00000, 1, angle=2 + ), + _make_image_metadata( + Path(curdir) / Path("./e.jpg"), 0.00002, 0.00000, 2, angle=123 + ), types.VideoMetadata( Path("test_video.mp4"), None, @@ -433,16 +443,22 @@ def test_cut_by_pixels(tmpdir: py.path.local): ), _make_image_metadata( Path(curdir) / Path("./b.jpg"), - 9, - 9, - 2, + 2.00001, + 2.00001, + 20, angle=344, width=2, height=2, ), # s1 _make_image_metadata( - Path(curdir) / Path("./c.jpg"), 1, 1, 3, angle=344, width=int(6e9), height=2 + Path(curdir) / Path("./c.jpg"), + 2.00002, + 2.00002, + 30, + angle=344, + width=int(6e9), + height=2, ), ] metadatas = psp.process_sequence_properties( @@ -450,7 +466,7 @@ def test_cut_by_pixels(tmpdir: py.path.local): cutoff_distance=1000000000, cutoff_time=100, interpolate_directions=True, - duplicate_distance=100, + duplicate_distance=1, duplicate_angle=5, ) assert ( From 0c703dee2727dd8577ecbc59a1fdb2dcdbff9364 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 12 Mar 2025 17:13:08 -0700 Subject: [PATCH 06/19] make sure a sequence of images comes from the same folder and the same camera --- mapillary_tools/process_sequence_properties.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index ef22c60fd..2c7ba57fb 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -169,12 +169,20 @@ def _group_sort_images_by_folder( image_metadatas: T.List[types.ImageMetadata], ) -> T.List[T.List[types.ImageMetadata]]: # group images by parent directory - sequences_by_parent: T.Dict[str, T.List[types.ImageMetadata]] = {} + sequences_by_group_key: T.Dict[T.Tuple, T.List[types.ImageMetadata]] = {} for image_metadata in image_metadatas: filename = image_metadata.filename.resolve() - sequences_by_parent.setdefault(str(filename.parent), []).append(image_metadata) + # Make sure a sequence comes from the same folder and the same camera + group_key = ( + str(filename.parent), + image_metadata.MAPDeviceMake, + image_metadata.MAPDeviceModel, + image_metadata.width, + image_metadata.height, + ) + sequences_by_group_key.setdefault(group_key, []).append(image_metadata) - sequences = list(sequences_by_parent.values()) + sequences = list(sequences_by_group_key.values()) for sequence in sequences: sequence.sort( key=lambda metadata: metadata.sort_key(), From 157f4ec52f5e4ccc6c4f7cd1b76cfed2173cb88d Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 13 Mar 2025 12:58:41 -0700 Subject: [PATCH 07/19] update --- .../process_sequence_properties.py | 105 +++++++++++------- 1 file changed, 65 insertions(+), 40 deletions(-) diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index 2c7ba57fb..078db8fbc 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -33,6 +33,14 @@ def cut_sequence_by_time_or_distance( ) if cutoff_distance is not None: if cutoff_distance <= distance: + LOG.debug( + "Cut the sequence because the distance gap between two images (%s meters) exceeds the cutoff distance (%s meters): %s: %s -> %s", + round(distance, 2), + round(cutoff_distance, 2), + prev.filename.parent, + prev.filename.name, + cur.filename.name, + ) sequences.append([cur]) continue @@ -41,6 +49,14 @@ def cut_sequence_by_time_or_distance( assert 0 <= time_diff, "sequence must be sorted by capture times" if cutoff_time is not None: if cutoff_time <= time_diff: + LOG.debug( + "Cut the sequence because the time gap between two images (%s seconds) exceeds the cutoff time (%s seconds): %s: %s -> %s", + round(time_diff, 2), + round(cutoff_time, 2), + prev.filename.parent, + prev.filename.name, + cur.filename.name, + ) sequences.append([cur]) continue @@ -165,30 +181,14 @@ def cut_sequence( return sequences -def _group_sort_images_by_folder( +def _group_by( image_metadatas: T.List[types.ImageMetadata], -) -> T.List[T.List[types.ImageMetadata]]: - # group images by parent directory - sequences_by_group_key: T.Dict[T.Tuple, T.List[types.ImageMetadata]] = {} - for image_metadata in image_metadatas: - filename = image_metadata.filename.resolve() - # Make sure a sequence comes from the same folder and the same camera - group_key = ( - str(filename.parent), - image_metadata.MAPDeviceMake, - image_metadata.MAPDeviceModel, - image_metadata.width, - image_metadata.height, - ) - sequences_by_group_key.setdefault(group_key, []).append(image_metadata) - - sequences = list(sequences_by_group_key.values()) - for sequence in sequences: - sequence.sort( - key=lambda metadata: metadata.sort_key(), - ) - - return sequences + group_key_func=T.Callable[[types.ImageMetadata], T.Hashable], +) -> T.Dict[T.Hashable, T.List[types.ImageMetadata]]: + grouped: T.Dict[T.Hashable, T.List[types.ImageMetadata]] = {} + for metadata in image_metadatas: + grouped.setdefault(group_key_func(metadata), []).append(metadata) + return grouped def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None: @@ -433,31 +433,58 @@ def process_sequence_properties( input_sequences: T.List[PointSequence] output_sequences: T.List[PointSequence] - input_sequences = _group_sort_images_by_folder(image_metadatas) - if input_sequences: - assert isinstance(input_sequences[0], list) + # Group by folder and camera + grouped = _group_by( + image_metadatas, + lambda image_metadata: ( + str(metadata.filename.parent), + image_metadata.MAPDeviceMake, + image_metadata.MAPDeviceModel, + image_metadata.width, + image_metadata.height, + ), + ) + for key in grouped: + LOG.debug( + "Group sequences by %s, %s: %s images", + grouped[key][0].filename.parent, + key, + len(grouped[key]), + ) + output_sequences = list(grouped.values()) + LOG.info("Found %s sequences from different cameras", len(output_sequences)) - # make sure they are sorted + # Make sure each sequence is sorted (in-place update) + input_sequences = output_sequences + output_sequences = input_sequences for sequence in input_sequences: - for cur, nxt in geo.pairwise(sequence): - assert cur.time <= nxt.time, "sequence must be sorted" + sequence.sort( + key=lambda metadata: metadata.sort_key(), + ) + # Interpolate subseconds for same timestamps (in-place update) + input_sequences = output_sequences + output_sequences = input_sequences for sequence in input_sequences: _interpolate_subsecs_for_sorting(sequence) # Cut sequences by time or distance # NOTE: do not cut by distance here because it affects the speed limit check + input_sequences = output_sequences output_sequences = [] for sequence in input_sequences: output_sequences.extend( cut_sequence_by_time_or_distance(sequence, cutoff_time=cutoff_time) ) assert len(image_metadatas) == sum(len(s) for s in output_sequences) + LOG.info( + "Found %s sequences after cut by cutoff time %d", + len(output_sequences), + cutoff_time, + ) # Duplication check input_sequences = output_sequences - if input_sequences: - assert isinstance(input_sequences[0], list) output_sequences = [] for sequence in input_sequences: output_sequence, errors = duplication_check( @@ -469,21 +496,17 @@ def process_sequence_properties( output_sequences.append(output_sequence) error_metadatas.extend(errors) - # Interpolate angles + # Interpolate angles (in-place update) input_sequences = output_sequences - if input_sequences: - assert isinstance(input_sequences[0], list) + output_sequences = input_sequences for sequence in input_sequences: if interpolate_directions: for image in sequence: image.angle = None geo.interpolate_directions_if_none(sequence) - output_sequences = input_sequences # Cut sequences by max number of images, max filesize, and max pixels input_sequences = output_sequences - if input_sequences: - assert isinstance(input_sequences[0], list) output_sequences = [] for sequence in input_sequences: output_sequences.extend( @@ -494,19 +517,21 @@ def process_sequence_properties( max_sequence_pixels, ) ) + LOG.info("Found %s sequences after cut by sequence limits", len(output_sequences)) # Check limits for sequences + input_sequences = output_sequences output_sequences, errors = _check_sequence_limits( input_sequences, max_sequence_filesize_in_bytes, max_avg_speed ) error_metadatas.extend(errors) - if output_sequences: - assert isinstance(output_sequences[0], list) + LOG.info("Found %s sequences after sequence limit checks", len(output_sequences)) - # Assign sequence UUIDs + # Assign sequence UUIDs (in-place update) sequence_idx = 0 image_metadatas = [] input_sequences = output_sequences + output_sequences = input_sequences for sequence in input_sequences: # assign sequence UUIDs for image in sequence: From b2e62def37039ecb3a7790605ca235017ba796d1 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 13 Mar 2025 13:01:11 -0700 Subject: [PATCH 08/19] update --- mapillary_tools/process_sequence_properties.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index 078db8fbc..4ce0d35fa 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -452,21 +452,23 @@ def process_sequence_properties( len(grouped[key]), ) output_sequences = list(grouped.values()) - LOG.info("Found %s sequences from different cameras", len(output_sequences)) + LOG.info( + "Found %s sequences from different folders and cameras", len(output_sequences) + ) # Make sure each sequence is sorted (in-place update) input_sequences = output_sequences - output_sequences = input_sequences for sequence in input_sequences: sequence.sort( key=lambda metadata: metadata.sort_key(), ) + output_sequences = input_sequences # Interpolate subseconds for same timestamps (in-place update) input_sequences = output_sequences - output_sequences = input_sequences for sequence in input_sequences: _interpolate_subsecs_for_sorting(sequence) + output_sequences = input_sequences # Cut sequences by time or distance # NOTE: do not cut by distance here because it affects the speed limit check @@ -498,12 +500,12 @@ def process_sequence_properties( # Interpolate angles (in-place update) input_sequences = output_sequences - output_sequences = input_sequences for sequence in input_sequences: if interpolate_directions: for image in sequence: image.angle = None geo.interpolate_directions_if_none(sequence) + output_sequences = input_sequences # Cut sequences by max number of images, max filesize, and max pixels input_sequences = output_sequences @@ -531,7 +533,6 @@ def process_sequence_properties( sequence_idx = 0 image_metadatas = [] input_sequences = output_sequences - output_sequences = input_sequences for sequence in input_sequences: # assign sequence UUIDs for image in sequence: From 26bbcc575bebe5928c973c92bd882e109c2a4c9f Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 13 Mar 2025 13:11:03 -0700 Subject: [PATCH 09/19] update --- mapillary_tools/process_sequence_properties.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index 4ce0d35fa..1b576edb6 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -534,7 +534,6 @@ def process_sequence_properties( image_metadatas = [] input_sequences = output_sequences for sequence in input_sequences: - # assign sequence UUIDs for image in sequence: # using incremental id as shorter "uuid", so we can save some space for the desc file image.MAPSequenceUUID = str(sequence_idx) From 9010267d34eb6cbf2e0170abc0a235528091d6af Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 13 Mar 2025 13:35:05 -0700 Subject: [PATCH 10/19] updates --- mapillary_tools/process_sequence_properties.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index 1b576edb6..b5596c765 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -445,12 +445,7 @@ def process_sequence_properties( ), ) for key in grouped: - LOG.debug( - "Group sequences by %s, %s: %s images", - grouped[key][0].filename.parent, - key, - len(grouped[key]), - ) + LOG.debug("Group sequences by %s: %s images", key, len(grouped[key])) output_sequences = list(grouped.values()) LOG.info( "Found %s sequences from different folders and cameras", len(output_sequences) @@ -470,7 +465,7 @@ def process_sequence_properties( _interpolate_subsecs_for_sorting(sequence) output_sequences = input_sequences - # Cut sequences by time or distance + # Cut sequences by cutoff time # NOTE: do not cut by distance here because it affects the speed limit check input_sequences = output_sequences output_sequences = [] @@ -531,16 +526,18 @@ def process_sequence_properties( # Assign sequence UUIDs (in-place update) sequence_idx = 0 - image_metadatas = [] input_sequences = output_sequences for sequence in input_sequences: for image in sequence: # using incremental id as shorter "uuid", so we can save some space for the desc file image.MAPSequenceUUID = str(sequence_idx) - image_metadatas.append(image) sequence_idx += 1 output_sequences = input_sequences + image_metadatas = [] + for sequence in input_sequences: + image_metadatas.extend(sequence) + results = error_metadatas + image_metadatas + video_metadatas assert len(metadatas) == len(results), ( From c5c26db959cb23f23c08bc31bd774cbd044c9f97 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 13 Mar 2025 17:26:18 -0700 Subject: [PATCH 11/19] cut by distance --- .../process_sequence_properties.py | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index b5596c765..7f77a7d5e 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -436,12 +436,12 @@ def process_sequence_properties( # Group by folder and camera grouped = _group_by( image_metadatas, - lambda image_metadata: ( + lambda metadata: ( str(metadata.filename.parent), - image_metadata.MAPDeviceMake, - image_metadata.MAPDeviceModel, - image_metadata.width, - image_metadata.height, + metadata.MAPDeviceMake, + metadata.MAPDeviceModel, + metadata.width, + metadata.height, ), ) for key in grouped: @@ -466,16 +466,15 @@ def process_sequence_properties( output_sequences = input_sequences # Cut sequences by cutoff time - # NOTE: do not cut by distance here because it affects the speed limit check + # NOTE: Do not cut by distance here because it affects the speed limit check input_sequences = output_sequences output_sequences = [] for sequence in input_sequences: output_sequences.extend( cut_sequence_by_time_or_distance(sequence, cutoff_time=cutoff_time) ) - assert len(image_metadatas) == sum(len(s) for s in output_sequences) LOG.info( - "Found %s sequences after cut by cutoff time %d", + "Found %s sequences after cut by cutoff time %d seconds", len(output_sequences), cutoff_time, ) @@ -524,6 +523,20 @@ def process_sequence_properties( error_metadatas.extend(errors) LOG.info("Found %s sequences after sequence limit checks", len(output_sequences)) + # Cut sequences by cutoff distance + # NOTE: The speed limit check probably rejects most of anomalies + input_sequences = output_sequences + output_sequences = [] + for sequence in input_sequences: + output_sequences.extend( + cut_sequence_by_time_or_distance(sequence, cutoff_distance=cutoff_distance) + ) + LOG.info( + "Found %s sequences after cut by cutoff distance %d meters", + len(output_sequences), + cutoff_distance, + ) + # Assign sequence UUIDs (in-place update) sequence_idx = 0 input_sequences = output_sequences From a8f4852b1a64894c7c7cd8c3ac68e4cc5cd9a3f2 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 13 Mar 2025 17:46:54 -0700 Subject: [PATCH 12/19] fix tests --- tests/unit/test_sequence_processing.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/unit/test_sequence_processing.py b/tests/unit/test_sequence_processing.py index 12d8c515a..51b129786 100644 --- a/tests/unit/test_sequence_processing.py +++ b/tests/unit/test_sequence_processing.py @@ -296,11 +296,11 @@ def test_subsec_interpolation(tmpdir: py.path.local): curdir = tmpdir.mkdir("hello222").mkdir("world333") sequence: T.List[types.Metadata] = [ # s1 - _make_image_metadata(Path(curdir) / Path("./a.jpg"), 1, 1, 0.0, 1), - _make_image_metadata(Path(curdir) / Path("./b.jpg"), 0, 1, 1.0, 11), - _make_image_metadata(Path(curdir) / Path("./c.jpg"), 0, 0, 1.0, 22), - _make_image_metadata(Path(curdir) / Path("./d.jpg"), 0, 0, 1.0, 33), - _make_image_metadata(Path(curdir) / Path("./e.jpg"), 1, 0, 2.0, 44), + _make_image_metadata(Path(curdir) / Path("./a.jpg"), 0.00001, 0.00001, 0.0, 1), + _make_image_metadata(Path(curdir) / Path("./b.jpg"), 0.00000, 0.00001, 1.0, 11), + _make_image_metadata(Path(curdir) / Path("./c.jpg"), 0.00001, 0.00001, 1.0, 22), + _make_image_metadata(Path(curdir) / Path("./d.jpg"), 0.00001, 0.00001, 1.0, 33), + _make_image_metadata(Path(curdir) / Path("./e.jpg"), 0.00001, 0.00000, 2.0, 44), ] metadatas = psp.process_sequence_properties( sequence, @@ -324,7 +324,7 @@ def test_interpolation_single(tmpdir: py.path.local): curdir = tmpdir.mkdir("hello77").mkdir("world88") sequence = [ # s1 - _make_image_metadata(Path(curdir) / Path("./a.jpg"), 0, 0, 1, angle=123), + _make_image_metadata(Path(curdir) / Path("./a.jpg"), 0.2, 0.3, 1, angle=123), ] metadatas = psp.process_sequence_properties( sequence, From 99659ca3d393f7f5b0cdd8f206636dd00e4f4c4d Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 13 Mar 2025 18:07:42 -0700 Subject: [PATCH 13/19] fix tests --- tests/unit/test_sequence_processing.py | 78 ++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 10 deletions(-) diff --git a/tests/unit/test_sequence_processing.py b/tests/unit/test_sequence_processing.py index 51b129786..f1370ff5d 100644 --- a/tests/unit/test_sequence_processing.py +++ b/tests/unit/test_sequence_processing.py @@ -21,8 +21,7 @@ def _make_image_metadata( lat: float, time: float, angle: T.Optional[float] = None, - width: int = 0, - height: int = 0, + **kwargs, ) -> types.ImageMetadata: filename = filename.resolve() if not filename.exists(): @@ -36,9 +35,8 @@ def _make_image_metadata( lat=lat, time=time, alt=None, + **kwargs, angle=angle, - width=width, - height=height, ) @@ -116,6 +114,66 @@ def test_find_sequences_by_folder(tmpdir: py.path.local): ] == [d.filename for d in actual_sequences[2]] +def test_find_sequences_by_camera(tmpdir: py.path.local): + curdir = tmpdir.mkdir("hello1").mkdir("world2") + sequence: T.List[types.MetadataOrError] = [ + # s1 + _make_image_metadata( + Path(curdir) / Path("hello.jpg"), + 1.00002, + 1.00002, + 2, + 11, + MAPDeviceMake="foo", + MAPDeviceModel="bar", + width=1, + height=1, + ), + _make_image_metadata( + Path(curdir) / Path("foo.jpg"), + 1.00001, + 1.00001, + 3, + 22, + MAPDeviceMake="foo", + MAPDeviceModel="bar", + width=1, + height=1, + ), + # s2 + _make_image_metadata( + Path(curdir) / Path("a.jpg"), + 1.00002, + 1.00002, + 1, + 33, + MAPDeviceMake="foo", + MAPDeviceModel="bar2", + width=1, + height=1, + ), + # s3 + _make_image_metadata( + Path(curdir) / Path("b.jpg"), + 1.00001, + 1.00001, + 1, + 33, + MAPDeviceMake="foo", + MAPDeviceModel="bar2", + width=1, + height=2, + ), + ] + metadatas = psp.process_sequence_properties( + sequence, + ) + uuids = set( + d.MAPSequenceUUID for d in metadatas if isinstance(d, types.ImageMetadata) + ) + assert len(uuids) == 3 + + def test_sequences_sorted(tmpdir: py.path.local): curdir = tmpdir.mkdir("hello1").mkdir("world2") sequence: T.List[types.ImageMetadata] = [ @@ -481,11 +539,11 @@ def test_cut_by_pixels(tmpdir: py.path.local): ) -def test_video_error(): - # curdir = tmpdir.mkdir("hello222").mkdir("world333") +def test_video_error(tmpdir: py.path.local): + curdir = tmpdir.mkdir("hello222").mkdir("videos") sequence: T.List[types.Metadata] = [ types.VideoMetadata( - Path("test_video_null_island.mp4"), + Path(curdir) / Path("test_video_null_island.mp4"), None, types.FileType.VIDEO, points=[ @@ -498,7 +556,7 @@ def test_video_error(): filesize=123, ), types.VideoMetadata( - Path("test_video_too_fast.mp4"), + Path(curdir) / Path("test_video_too_fast.mp4"), None, types.FileType.VIDEO, points=[ @@ -511,7 +569,7 @@ def test_video_error(): filesize=123, ), types.VideoMetadata( - Path("test_video_file_too_large.mp4"), + Path(curdir) / Path("test_video_file_too_large.mp4"), None, types.FileType.VIDEO, points=[geo.Point(1, 1, 1, 1, angle=None)], @@ -520,7 +578,7 @@ def test_video_error(): filesize=1024 * 1024 * 1024 * 200, ), types.VideoMetadata( - Path("test_good.mp4"), + Path(curdir) / Path("test_good.mp4"), None, types.FileType.VIDEO, points=[geo.Point(1, 1, 1, 1, angle=None)], From e121a0731be041b246841578a289c01795253df3 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 16 Mar 2025 11:32:04 -0700 Subject: [PATCH 14/19] do not use fixture for set up envvars --- tests/integration/test_gopro.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/tests/integration/test_gopro.py b/tests/integration/test_gopro.py index 0624dbacd..5ebe90698 100644 --- a/tests/integration/test_gopro.py +++ b/tests/integration/test_gopro.py @@ -18,7 +18,12 @@ IMPORT_PATH = "tests/data/gopro_data" - +TEST_ENVS = { + "MAPILLARY_TOOLS_GOPRO_GPS_FIXES": "0,2,3", + "MAPILLARY_TOOLS_GOPRO_MAX_DOP100": "100000", + "MAPILLARY_TOOLS_GOPRO_GPS_PRECISION": "10000000", + "MAPILLARY_TOOLS_MAX_AVG_SPEED": "200000", # km/h +} EXPECTED_DESCS: T.List[T.Any] = [ { "MAPAltitude": 9540.24, @@ -111,22 +116,8 @@ def setup_data(tmpdir: py.path.local): tmpdir.remove(ignore_errors=True) -@pytest.fixture -def setup_envvars(): - # this sample hero8.mp4 doesn't have any good GPS points, - # so we do not filter out bad GPS points - os.environ["MAPILLARY_TOOLS_GOPRO_GPS_FIXES"] = "0,2,3" - os.environ["MAPILLARY_TOOLS_GOPRO_MAX_DOP100"] = "100000" - os.environ["MAPILLARY_TOOLS_GOPRO_GPS_PRECISION"] = "10000000" - yield - del os.environ["MAPILLARY_TOOLS_GOPRO_GPS_FIXES"] - del os.environ["MAPILLARY_TOOLS_GOPRO_MAX_DOP100"] - del os.environ["MAPILLARY_TOOLS_GOPRO_GPS_PRECISION"] - - @pytest.mark.usefixtures("setup_config") @pytest.mark.usefixtures("setup_upload") -@pytest.mark.usefixtures("setup_envvars") def test_process_gopro_hero8( setup_data: py.path.local, use_exiftool: bool = False, @@ -137,7 +128,9 @@ def test_process_gopro_hero8( args = f"{EXECUTABLE} video_process --video_sample_interval=2 --video_sample_distance=-1 --geotag_source=gopro_videos {str(video_path)}" if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) + env = os.environ.copy() + env.update(TEST_ENVS) + x = subprocess.run(args, shell=True, env=env) assert x.returncode == 0, x.stderr sample_dir = setup_data.join("mapillary_sampled_video_frames") desc_path = sample_dir.join("mapillary_image_description.json") @@ -150,14 +143,12 @@ def test_process_gopro_hero8( @pytest.mark.usefixtures("setup_config") @pytest.mark.usefixtures("setup_upload") -@pytest.mark.usefixtures("setup_envvars") def test_process_gopro_hero8_with_exiftool(setup_data: py.path.local): return test_process_gopro_hero8(setup_data, use_exiftool=True) @pytest.mark.usefixtures("setup_config") @pytest.mark.usefixtures("setup_upload") -@pytest.mark.usefixtures("setup_envvars") def test_process_gopro_hero8_with_exiftool_multiple_videos_with_the_same_name( setup_data: py.path.local, ): From 154fd269fdd237592f4f430d59f72a0aa92884e9 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 16 Mar 2025 11:34:37 -0700 Subject: [PATCH 15/19] move some process_and_upload to the right place --- tests/data/gpx/sf_30km_h.gpx | 638 +++++++++++++++++++ tests/integration/test_process_and_upload.py | 121 +++- 2 files changed, 750 insertions(+), 9 deletions(-) create mode 100644 tests/data/gpx/sf_30km_h.gpx diff --git a/tests/data/gpx/sf_30km_h.gpx b/tests/data/gpx/sf_30km_h.gpx new file mode 100644 index 000000000..b56b45faf --- /dev/null +++ b/tests/data/gpx/sf_30km_h.gpx @@ -0,0 +1,638 @@ + + + + New file 1 + + gpx.studio + + + + + New file 1 + + + 94.75 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 93 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 92.5 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 92 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 91.25 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 90.75 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 89 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 85.5 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 80 + + + + + tertiary + asphalt + shared_lane + lane + yes + 5 + + + + + + 78 + + + + + service + + + + + + 77.25 + + + + + service + + + + + + 76 + + + + + service + + + + + + 74.75 + + + + + yes + residential + + + + + + 77.5 + + + + + yes + residential + + + + + + 79.5 + + + + + yes + residential + + + + + + 81 + + + + + yes + residential + + + + + + 82.25 + + + + + yes + residential + + + + + + 83 + + + + + yes + residential + + + + + + 83.25 + + + + + yes + residential + + + + + + 83.25 + + + + + yes + residential + + + + + + 82.5 + + + + + yes + residential + + + + + + 81.75 + + + + + yes + residential + + + + + + 80.75 + + + + + yes + residential + + + + + + 80.25 + + + + + yes + residential + + + + + + 79.5 + + + + + yes + residential + + + + + + 76 + + + + + yes + residential + + + + + + 72 + + + + + yes + residential + + + + + + 66.25 + + + + + yes + residential + + + + + + 63.5 + + + + + yes + residential + + + + + + 62 + + + + + yes + residential + + + + + + 59.75 + + + + + yes + residential + + + + + + 56.75 + + + + + yes + residential + + + + + + 53.25 + + + + + yes + residential + + + + + + 49 + + + + + yes + residential + + + + + + 49.25 + + + + + yes + residential + + + + + + 50.5 + + + + + yes + residential + + + + + + 51.5 + + + + + yes + residential + + + + + + 52.75 + + + + + yes + residential + + + + + + 54.25 + + + + + yes + residential + + + + + + 59.75 + + + + + yes + residential + + + + + + 60.75 + + + + + yes + residential + + + + + + 61 + + + + + yes + residential + + + + + + 60.25 + + + + + yes + residential + + + + + + 58 + + + + + yes + residential + + + + + + 56.75 + + + + + yes + residential + + + + + + 55 + + + + + yes + residential + + + + + + 50.75 + + + + + yes + residential + + + + + + 46.25 + + + + + yes + residential + yes + + + + + + 47.75 + + + + + yes + residential + yes + + + + + + + diff --git a/tests/integration/test_process_and_upload.py b/tests/integration/test_process_and_upload.py index ccf7d6633..edbea8685 100644 --- a/tests/integration/test_process_and_upload.py +++ b/tests/integration/test_process_and_upload.py @@ -129,7 +129,7 @@ } -def _validate_output(upload_dir: py.path.local, expected): +def _validate_uploads(upload_dir: py.path.local, expected): descs = [] for file in upload_dir.listdir(): if str(file).endswith(".mp4"): @@ -138,13 +138,13 @@ def _validate_output(upload_dir: py.path.local, expected): descs.extend(validate_and_extract_zip(str(file))) else: raise Exception(f"invalid file {file}") + + excludes = ["filename", "filesize", "md5sum", "MAPMetaTags", "MAPSequenceUUID"] + actual = {} for desc in descs: actual[os.path.basename(desc["filename"])] = { - k: v - for k, v in desc.items() - if k - not in ["filename", "filesize", "md5sum", "MAPMetaTags", "MAPSequenceUUID"] + k: v for k, v in desc.items() if k not in excludes } assert expected == actual @@ -166,11 +166,11 @@ def test_process_and_upload(setup_data: py.path.local, setup_upload: py.path.loc ) assert x.returncode == 0, x.stderr if IS_FFMPEG_INSTALLED: - _validate_output( + _validate_uploads( setup_upload, {**EXPECTED_DESCS["gopro"], **EXPECTED_DESCS["image"]} ) else: - _validate_output(setup_upload, {**EXPECTED_DESCS["image"]}) + _validate_uploads(setup_upload, {**EXPECTED_DESCS["image"]}) @pytest.mark.usefixtures("setup_config") @@ -179,8 +179,111 @@ def test_process_and_upload_images_only( setup_upload: py.path.local, ): x = subprocess.run( - f"{EXECUTABLE} --verbose process_and_upload --filetypes=image {UPLOAD_FLAGS} {PROCESS_FLAGS} {setup_data}/images {setup_data}/images {setup_data}/images/DSC00001.JPG --desc_path=-", + f"""{EXECUTABLE} --verbose process_and_upload \ + {UPLOAD_FLAGS} {PROCESS_FLAGS} \ + --filetypes=image \ + --desc_path=- \ + {setup_data}/images {setup_data}/images {setup_data}/images/DSC00001.JPG +""", + shell=True, + ) + assert x.returncode == 0, x.stderr + _validate_uploads(setup_upload, EXPECTED_DESCS["image"]) + + +@pytest.mark.usefixtures("setup_config") +def test_video_process_and_upload( + setup_upload: py.path.local, setup_data: py.path.local +): + if not IS_FFMPEG_INSTALLED: + pytest.skip("skip because ffmpeg not installed") + + video_dir = setup_data.join("videos") + gpx_start_time = "2025_03_14_07_00_00_000" + gpx_end_time = "2025_03_14_07_01_33_624" + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + x = subprocess.run( + f"""{EXECUTABLE} video_process_and_upload \ + {PROCESS_FLAGS} {UPLOAD_FLAGS} \ + --video_sample_interval=2 \ + --video_sample_distance=-1 \ + --video_start_time {gpx_start_time} \ + --geotag_source gpx \ + --geotag_source_path {gpx_file} \ + --desc_path - \ + {video_dir} {video_dir.join("my_samples")} +""", + shell=True, + ) + assert x.returncode == 0, x.stderr + assert 1 == len(setup_upload.listdir()) + expected = { + "sample-5s_NA_000001.jpg": { + "MAPAltitude": 94.75, + "MAPCaptureTime": "2025_03_14_07_00_00_000", + "MAPCompassHeading": { + "MagneticHeading": 0.484, + "TrueHeading": 0.484, + }, + "MAPLatitude": 37.793585, + "MAPLongitude": -122.461396, + "MAPOrientation": 1, + "filetype": "image", + }, + "sample-5s_NA_000002.jpg": { + "MAPAltitude": 93.347, + "MAPCaptureTime": "2025_03_14_07_00_02_000", + "MAPCompassHeading": { + "MagneticHeading": 0.484, + "TrueHeading": 0.484, + }, + "MAPLatitude": 37.7937349, + "MAPLongitude": -122.4613944, + "MAPOrientation": 1, + "filetype": "image", + }, + "sample-5s_NA_000003.jpg": { + "MAPAltitude": 92.492, + "MAPCaptureTime": "2025_03_14_07_00_04_000", + "MAPCompassHeading": { + "MagneticHeading": 343.286, + "TrueHeading": 343.286, + }, + "MAPLatitude": 37.7938825, + "MAPLongitude": -122.4614226, + "MAPOrientation": 1, + "filetype": "image", + }, + } + _validate_uploads(setup_upload, expected) + + +@pytest.mark.usefixtures("setup_config") +def xtest_video_process_and_upload_after_gpx( + setup_upload: py.path.local, setup_data: py.path.local +): + if not IS_FFMPEG_INSTALLED: + pytest.skip("skip because ffmpeg not installed") + + video_dir = setup_data.join("videos") + gpx_start_time = "2025_03_14_07_00_00_000" + gpx_end_time = "2025_03_14_07_01_33_624" + video_start_time = "2025_03_14_07_01_34_624" + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + x = subprocess.run( + f"""{EXECUTABLE} video_process_and_upload \ + {PROCESS_FLAGS} {UPLOAD_FLAGS} \ + --video_sample_interval=2 \ + --video_sample_distance=-1 \ + --video_start_time {video_start_time} \ + --geotag_source gpx \ + --geotag_source_path {gpx_file} \ + --skip_process_errors \ + --desc_path - \ + {video_dir} {video_dir.join("my_samples")} +""", shell=True, ) assert x.returncode == 0, x.stderr - _validate_output(setup_upload, EXPECTED_DESCS["image"]) + assert 0 == len(setup_upload.listdir()) + _validate_uploads(setup_upload, {}) From 2609220a6fc956b9b5fd313d4411d7fc80e8e12a Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 16 Mar 2025 11:57:36 -0700 Subject: [PATCH 16/19] pass envvars to subprocess --- tests/integration/fixtures.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index 5d19b10dd..bf1454250 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -27,16 +27,20 @@ @pytest.fixture def setup_config(tmpdir: py.path.local): config_path = tmpdir.mkdir("configs").join("CLIENT_ID") - os.environ["MAPILLARY_CONFIG_PATH"] = str(config_path) + TEST_ENV = { + "MAPILLARY_CONFIG_PATH": str(config_path), + } + env = os.environ.copy() + env.update(TEST_ENV) x = subprocess.run( f"{EXECUTABLE} authenticate --user_name {USERNAME} --jwt test_user_token", shell=True, + env=env, ) assert x.returncode == 0, x.stderr yield config_path if tmpdir.check(): tmpdir.remove(ignore_errors=True) - del os.environ["MAPILLARY_CONFIG_PATH"] @pytest.fixture From 8460140d79f43fe85bcc5b6fbac4a5be9e7c1c2f Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 16 Mar 2025 11:58:15 -0700 Subject: [PATCH 17/19] increase limit to 400km/h --- mapillary_tools/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapillary_tools/constants.py b/mapillary_tools/constants.py index 0a20d0468..8f88fb781 100644 --- a/mapillary_tools/constants.py +++ b/mapillary_tools/constants.py @@ -14,8 +14,8 @@ DUPLICATE_DISTANCE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_DISTANCE", 0.1)) DUPLICATE_ANGLE = float(os.getenv(_ENV_PREFIX + "DUPLICATE_ANGLE", 5)) MAX_AVG_SPEED = float( - os.getenv(_ENV_PREFIX + "MAX_AVG_SPEED", 200_000 / 3600) -) # 200 KM/h + os.getenv(_ENV_PREFIX + "MAX_AVG_SPEED", 400_000 / 3600) +) # 400 KM/h # in seconds VIDEO_SAMPLE_INTERVAL = float(os.getenv(_ENV_PREFIX + "VIDEO_SAMPLE_INTERVAL", -1)) # in meters From 2cf173c9c8c264f3f041c62b0b420014f5facb71 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 16 Mar 2025 12:40:34 -0700 Subject: [PATCH 18/19] fix tests in test_process.py --- tests/integration/test_process.py | 108 ++++++++++++++---------------- 1 file changed, 52 insertions(+), 56 deletions(-) diff --git a/tests/integration/test_process.py b/tests/integration/test_process.py index 2a00d5ba8..d829f1de7 100644 --- a/tests/integration/test_process.py +++ b/tests/integration/test_process.py @@ -14,8 +14,6 @@ run_exiftool_and_generate_geotag_args, setup_config, setup_data, - setup_upload, - USERNAME, validate_and_extract_zip, verify_descs, ) @@ -72,12 +70,6 @@ } -def test_basic(): - for option in ["--version", "--help"]: - x = subprocess.run(f"{EXECUTABLE} {option}", shell=True) - assert x.returncode == 0, x.stderr - - def _local_to_utc(ct: str): return ( datetime.datetime.fromisoformat(ct) @@ -86,6 +78,12 @@ def _local_to_utc(ct: str): ) +def test_basic(): + for option in ["--version", "--help"]: + x = subprocess.run(f"{EXECUTABLE} {option}", shell=True) + assert x.returncode == 0, x.stderr + + def test_process_images_with_defaults( setup_data: py.path.local, use_exiftool: bool = False, @@ -371,15 +369,22 @@ def filter_out_errors(descs): return [desc for desc in descs if "error" not in desc] -def test_geotagging_from_gpx(setup_data: py.path.local): +def test_geotagging_images_from_gpx(setup_data: py.path.local): gpx_file = setup_data.join("test.gpx") with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) + images = setup_data.join("images") + x = subprocess.run( - f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --geotag_source gpx --geotag_source_path {gpx_file} --skip_process_errors", + f"""{EXECUTABLE} process {PROCESS_FLAGS} \ + --file_types=image \ + --geotag_source=gpx \ + --geotag_source_path={gpx_file} \ + --skip_process_errors \ + {images} +""", shell=True, ) - assert x.returncode == 0, x.stderr verify_descs( [ { @@ -405,14 +410,15 @@ def test_geotagging_from_gpx(setup_data: py.path.local): }, }, ], - Path(setup_data, "mapillary_image_description.json"), + Path(images, "mapillary_image_description.json"), ) -def test_geotagging_from_gpx_with_offset(setup_data: py.path.local): +def test_geotagging_images_from_gpx_with_offset(setup_data: py.path.local): gpx_file = setup_data.join("test.gpx") with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) + x = subprocess.run( f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --geotag_source gpx --geotag_source_path {gpx_file} --interpolation_offset_time=-20 --skip_process_errors", shell=True, @@ -449,7 +455,7 @@ def test_geotagging_from_gpx_with_offset(setup_data: py.path.local): ) -def test_geotagging_from_gpx_use_gpx_start_time(setup_data: py.path.local): +def test_geotagging_images_from_gpx_use_gpx_start_time(setup_data: py.path.local): gpx_file = setup_data.join("test.gpx") with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) @@ -489,7 +495,9 @@ def test_geotagging_from_gpx_use_gpx_start_time(setup_data: py.path.local): ) -def test_geotagging_from_gpx_use_gpx_start_time_with_offset(setup_data: py.path.local): +def test_geotagging_images_from_gpx_use_gpx_start_time_with_offset( + setup_data: py.path.local, +): gpx_file = setup_data.join("test.gpx") with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) @@ -643,19 +651,28 @@ def test_video_process(setup_data: py.path.local): pytest.skip("skip because ffmpeg not installed") video_dir = setup_data.join("videos") - gpx_file = video_dir.join("test.gpx") + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + gpx_start_time = "2025_03_14_07_00_00_000" + gpx_end_time = "2025_03_14_07_01_33_624" + video_start_time = "2025_03_14_07_00_00_000" desc_path = video_dir.join("my_samples").join("mapillary_image_description.json") - with gpx_file.open("w") as fp: - fp.write(GPX_CONTENT) x = subprocess.run( - f"{EXECUTABLE} --verbose video_process --video_sample_interval=2 --video_sample_distance=-1 {PROCESS_FLAGS} --skip_process_errors --video_start_time 2018_06_08_20_23_34_123 --geotag_source gpx --geotag_source_path {gpx_file} {video_dir} {video_dir.join('my_samples')}", + f"""{EXECUTABLE} --verbose video_process \ + {PROCESS_FLAGS} \ + --video_sample_interval=2 \ + --video_sample_distance=-1 \ + --skip_process_errors \ + --video_start_time {video_start_time} \ + --geotag_source gpx \ + --geotag_source_path {gpx_file} {video_dir} {video_dir.join("my_samples")} +""", shell=True, ) assert x.returncode == 0, x.stderr with open(desc_path) as fp: descs = json.load(fp) - assert 1 == len(find_desc_errors(descs)) - assert 2 == len(filter_out_errors(descs)) + assert 0 == len(find_desc_errors(descs)) + assert 3 == len(filter_out_errors(descs)) def test_video_process_sample_with_multiple_distances(setup_data: py.path.local): @@ -767,54 +784,33 @@ def test_video_process_sample_with_distance(setup_data: py.path.local): ) -@pytest.mark.usefixtures("setup_config") -def test_video_process_and_upload( - setup_upload: py.path.local, setup_data: py.path.local -): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") - - video_dir = setup_data.join("videos") - gpx_file = video_dir.join("test.gpx") - with gpx_file.open("w") as fp: - fp.write(GPX_CONTENT) - x = subprocess.run( - f"{EXECUTABLE} video_process_and_upload {PROCESS_FLAGS} --video_sample_interval=2 --video_sample_distance=-1 --video_start_time 2018_06_08_20_23_34_123 --geotag_source gpx --geotag_source_path {gpx_file} --dry_run --user_name={USERNAME} {video_dir} {video_dir.join('my_samples')}", - shell=True, - ) - assert x.returncode != 0, x.stderr - assert 0 == len(setup_upload.listdir()) - - x = subprocess.run( - f"{EXECUTABLE} video_process_and_upload {PROCESS_FLAGS} --video_sample_interval=2 --video_sample_distance=-1 --video_start_time 2018_06_08_20_23_34_123 --geotag_source gpx --geotag_source_path {gpx_file} --skip_process_errors --dry_run --user_name={USERNAME} {video_dir} {video_dir.join('my_samples')}", - shell=True, - ) - assert x.returncode == 0, x.stderr - assert 2 == len(setup_upload.listdir()) - for z in setup_upload.listdir(): - validate_and_extract_zip(str(z)) - - def test_video_process_multiple_videos(setup_data: py.path.local): if not IS_FFMPEG_INSTALLED: pytest.skip("skip because ffmpeg not installed") - gpx_file = setup_data.join("test.gpx") desc_path = setup_data.join("my_samples").join("mapillary_image_description.json") sub_folder = setup_data.join("video_sub_folder").mkdir() video_path = setup_data.join("videos").join("sample-5s.mp4") video_path.copy(sub_folder) - with gpx_file.open("w") as fp: - fp.write(GPX_CONTENT) + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + gpx_start_time = "2025_03_14_07_00_00_000" + gpx_end_time = "2025_03_14_07_01_33_624" x = subprocess.run( - f"{EXECUTABLE} video_process {PROCESS_FLAGS} --video_sample_interval=2 --video_sample_distance=-1 --video_start_time 2018_06_08_20_23_34_123 --geotag_source gpx --geotag_source_path {gpx_file} {video_path} {setup_data.join('my_samples')}", + f"""{EXECUTABLE} video_process {PROCESS_FLAGS} \ + --video_sample_interval=2 \ + --video_sample_distance=-1 \ + --video_start_time={gpx_start_time} \ + --geotag_source=gpx \ + --geotag_source_path={gpx_file} \ + {video_path} {setup_data.join("my_samples")} +""", shell=True, ) - assert x.returncode != 0, x.stderr + assert x.returncode == 0, x.stderr with open(desc_path) as fp: descs = json.load(fp) for d in descs: assert Path(d["filename"]).is_file(), d["filename"] assert "sample-5s.mp4" in d["filename"] - assert 1 == len(find_desc_errors(descs)) - assert 2 == len(filter_out_errors(descs)) + assert 0 == len(find_desc_errors(descs)) + assert 3 == len(filter_out_errors(descs)) From 9dad2063a7d5563d52bb005bbbd281e7e9de771a Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 16 Mar 2025 17:11:03 -0700 Subject: [PATCH 19/19] Revert "pass envvars to subprocess" This reverts commit 2609220a6fc956b9b5fd313d4411d7fc80e8e12a. --- tests/integration/fixtures.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index bf1454250..5d19b10dd 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -27,20 +27,16 @@ @pytest.fixture def setup_config(tmpdir: py.path.local): config_path = tmpdir.mkdir("configs").join("CLIENT_ID") - TEST_ENV = { - "MAPILLARY_CONFIG_PATH": str(config_path), - } - env = os.environ.copy() - env.update(TEST_ENV) + os.environ["MAPILLARY_CONFIG_PATH"] = str(config_path) x = subprocess.run( f"{EXECUTABLE} authenticate --user_name {USERNAME} --jwt test_user_token", shell=True, - env=env, ) assert x.returncode == 0, x.stderr yield config_path if tmpdir.check(): tmpdir.remove(ignore_errors=True) + del os.environ["MAPILLARY_CONFIG_PATH"] @pytest.fixture