From 2cad74e3277ac69068661f2cc99ce9bee1acb07e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nikola=20Ple=C5=A1a?= Date: Mon, 3 Feb 2025 10:45:59 +0100 Subject: [PATCH 01/13] Add file sizes to output --- .../geotag/geotag_images_from_exif.py | 2 ++ .../geotag/geotag_videos_from_exiftool_video.py | 2 ++ .../geotag/geotag_videos_from_video.py | 5 +++++ mapillary_tools/process_geotag_properties.py | 5 ++++- mapillary_tools/types.py | 17 +++++++++++++++++ mapillary_tools/utils.py | 3 +++ .../video_data_extraction/extract_video_data.py | 1 + schema/image_description_schema.json | 8 ++++++++ 8 files changed, 42 insertions(+), 1 deletion(-) diff --git a/mapillary_tools/geotag/geotag_images_from_exif.py b/mapillary_tools/geotag/geotag_images_from_exif.py index 474a303fe..d07e4b0e2 100644 --- a/mapillary_tools/geotag/geotag_images_from_exif.py +++ b/mapillary_tools/geotag/geotag_images_from_exif.py @@ -6,6 +6,7 @@ from tqdm import tqdm +from mapillary_tools import utils from .. import exceptions, exif_write, geo, types from ..exif_read import ExifRead, ExifReadABC from .geotag_from_generic import GeotagImagesFromGeneric @@ -64,6 +65,7 @@ def build_image_metadata( image_metadata = types.ImageMetadata( filename=image_path, md5sum=None, + size=utils.get_file_size(image_path), time=geo.as_unix_time(capture_time), lat=lat, lon=lon, diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py index 3ed8465ed..8a4a30c1a 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py @@ -6,6 +6,7 @@ from tqdm import tqdm +from mapillary_tools import utils from .. import exceptions, exiftool_read, geo, types from ..exiftool_read_video import ExifToolReadVideo from ..telemetry import GPSPoint @@ -66,6 +67,7 @@ def geotag_video(element: ET.Element) -> types.VideoMetadataOrError: video_metadata = types.VideoMetadata( video_path, md5sum=None, + size=utils.get_file_size(video_path), filetype=types.FileType.VIDEO, points=points, make=exif.extract_make(), diff --git a/mapillary_tools/geotag/geotag_videos_from_video.py b/mapillary_tools/geotag/geotag_videos_from_video.py index b5ac06316..27f6f7de0 100644 --- a/mapillary_tools/geotag/geotag_videos_from_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_video.py @@ -4,6 +4,8 @@ from multiprocessing import Pool from pathlib import Path +from mapillary_tools import utils + from tqdm import tqdm from .. import exceptions, geo, types @@ -82,6 +84,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, + size=utils.get_file_size(video_path), filetype=types.FileType.CAMM, points=points, make=make, @@ -105,6 +108,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, + size=utils.get_file_size(video_path), filetype=types.FileType.GOPRO, points=T.cast(T.List[geo.Point], points_with_fix), make=make, @@ -128,6 +132,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, + size=utils.get_file_size(video_path), filetype=types.FileType.BLACKVUE, points=points, make=make, diff --git a/mapillary_tools/process_geotag_properties.py b/mapillary_tools/process_geotag_properties.py index ff6096f7f..f7d3a0ec3 100644 --- a/mapillary_tools/process_geotag_properties.py +++ b/mapillary_tools/process_geotag_properties.py @@ -424,19 +424,22 @@ def _show_stats_per_filetype( skipped_process_errors: T.Set[T.Type[Exception]], ): good_metadatas: T.List[T.Union[types.VideoMetadata, types.ImageMetadata]] = [] + size_to_upload = 0 error_metadatas: T.List[types.ErrorMetadata] = [] for metadata in metadatas: if isinstance(metadata, types.ErrorMetadata): error_metadatas.append(metadata) else: good_metadatas.append(metadata) + size_to_upload += metadata.size LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value) if good_metadatas: LOG.info( - "\t %8d %s(s) are ready to be uploaded", + "\t %8d %s(s) (%s MB) are ready to be uploaded", len(good_metadatas), filetype.value, + round(size_to_upload / 1024 / 1024, 1), ) error_counter = collections.Counter( diff --git a/mapillary_tools/types.py b/mapillary_tools/types.py index d50a009fb..cc6e57ee8 100644 --- a/mapillary_tools/types.py +++ b/mapillary_tools/types.py @@ -56,6 +56,7 @@ class ImageMetadata(geo.Point): MAPMetaTags: T.Optional[T.Dict] = None # deprecated since v0.10.0; keep here for compatibility MAPFilename: T.Optional[str] = None + size: T.Optional[int] = None def update_md5sum(self, image_data: T.Optional[T.BinaryIO] = None) -> None: if self.md5sum is None: @@ -81,6 +82,7 @@ class VideoMetadata: points: T.Sequence[geo.Point] make: T.Optional[str] = None model: T.Optional[str] = None + size: T.Optional[int] = None def update_md5sum(self) -> None: if self.md5sum is None: @@ -143,6 +145,7 @@ class ImageDescription(_SequenceOnly, _Image, MetaProperties, total=True): # if None or absent, it will be calculated md5sum: T.Optional[str] filetype: Literal["image"] + size: T.Optional[int] class _VideoDescriptionRequired(TypedDict, total=True): @@ -156,6 +159,7 @@ class _VideoDescriptionRequired(TypedDict, total=True): class VideoDescription(_VideoDescriptionRequired, total=False): MAPDeviceMake: str MAPDeviceModel: str + size: T.Optional[int] class _ErrorDescription(TypedDict, total=False): @@ -368,6 +372,10 @@ def merge_schema(*schemas: T.Dict) -> T.Dict: "type": ["string", "null"], "description": "MD5 checksum of the image content. If not provided, the uploader will compute it", }, + "size": { + "type": ["number", "null"], + "description": "File size", + }, "filetype": { "type": "string", "enum": [FileType.IMAGE.value], @@ -394,6 +402,10 @@ def merge_schema(*schemas: T.Dict) -> T.Dict: "type": ["string", "null"], "description": "MD5 checksum of the video content. If not provided, the uploader will compute it", }, + "size": { + "type": ["number", "null"], + "description": "File size", + }, "filetype": { "type": "string", "enum": [ @@ -484,6 +496,7 @@ def _as_video_desc(metadata: VideoMetadata) -> VideoDescription: "filename": str(metadata.filename.resolve()), "md5sum": metadata.md5sum, "filetype": metadata.filetype.value, + "size": metadata.size, "MAPGPSTrack": [_encode_point(p) for p in metadata.points], } if metadata.make: @@ -497,6 +510,7 @@ def _as_image_desc(metadata: ImageMetadata) -> ImageDescription: desc: ImageDescription = { "filename": str(metadata.filename.resolve()), "md5sum": metadata.md5sum, + "size": metadata.size, "filetype": FileType.IMAGE.value, "MAPLatitude": round(metadata.lat, _COORDINATES_PRECISION), "MAPLongitude": round(metadata.lon, _COORDINATES_PRECISION), @@ -542,6 +556,7 @@ def _from_image_desc(desc) -> ImageMetadata: if k not in [ "filename", "md5sum", + "size", "filetype", "MAPLatitude", "MAPLongitude", @@ -554,6 +569,7 @@ def _from_image_desc(desc) -> ImageMetadata: return ImageMetadata( filename=Path(desc["filename"]), md5sum=desc.get("md5sum"), + size=desc.get("size"), lat=desc["MAPLatitude"], lon=desc["MAPLongitude"], alt=desc.get("MAPAltitude"), @@ -585,6 +601,7 @@ def _from_video_desc(desc: VideoDescription) -> VideoMetadata: return VideoMetadata( filename=Path(desc["filename"]), md5sum=desc["md5sum"], + size=desc["size"], filetype=FileType(desc["filetype"]), points=[_decode_point(entry) for entry in desc["MAPGPSTrack"]], make=desc.get("MAPDeviceMake"), diff --git a/mapillary_tools/utils.py b/mapillary_tools/utils.py index d5e738a17..428d8be9c 100644 --- a/mapillary_tools/utils.py +++ b/mapillary_tools/utils.py @@ -201,3 +201,6 @@ def find_xml_files(import_paths: T.Sequence[Path]) -> T.List[Path]: else: xml_paths.append(path) return list(deduplicate_paths(xml_paths)) + +def get_file_size(path: Path) -> int: + return os.path.getsize(path) diff --git a/mapillary_tools/video_data_extraction/extract_video_data.py b/mapillary_tools/video_data_extraction/extract_video_data.py index 716d4657e..d43b3cec9 100644 --- a/mapillary_tools/video_data_extraction/extract_video_data.py +++ b/mapillary_tools/video_data_extraction/extract_video_data.py @@ -94,6 +94,7 @@ def process_file(self, file: Path) -> VideoMetadataOrError: filename=file, filetype=FileType.VIDEO, md5sum=None, + size=utils.get_file_size(file), points=points, make=make, model=model, diff --git a/schema/image_description_schema.json b/schema/image_description_schema.json index d26d54002..2ab73faaa 100644 --- a/schema/image_description_schema.json +++ b/schema/image_description_schema.json @@ -66,6 +66,10 @@ "video" ], "description": "The video file type" + }, + "size": { + "type": "number", + "description": "File size" } }, "required": [ @@ -160,6 +164,10 @@ "image" ], "description": "The image file type" + }, + "size": { + "type": "number", + "description": "File size" } }, "required": [ From 72ce6a75307ada1ca29d6843855895512eb80bde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nikola=20Ple=C5=A1a?= Date: Mon, 3 Feb 2025 16:35:28 +0100 Subject: [PATCH 02/13] Rename size to filesize --- .../geotag/geotag_images_from_exif.py | 5 +++-- .../geotag_videos_from_exiftool_video.py | 5 +++-- .../geotag/geotag_videos_from_video.py | 6 ++--- mapillary_tools/process_geotag_properties.py | 12 +++++----- mapillary_tools/types.py | 22 +++++++++---------- .../extract_video_data.py | 2 +- schema/image_description_schema.json | 4 ++-- 7 files changed, 29 insertions(+), 27 deletions(-) diff --git a/mapillary_tools/geotag/geotag_images_from_exif.py b/mapillary_tools/geotag/geotag_images_from_exif.py index d07e4b0e2..515091c55 100644 --- a/mapillary_tools/geotag/geotag_images_from_exif.py +++ b/mapillary_tools/geotag/geotag_images_from_exif.py @@ -4,9 +4,10 @@ from multiprocessing import Pool from pathlib import Path +from mapillary_tools import utils + from tqdm import tqdm -from mapillary_tools import utils from .. import exceptions, exif_write, geo, types from ..exif_read import ExifRead, ExifReadABC from .geotag_from_generic import GeotagImagesFromGeneric @@ -65,7 +66,7 @@ def build_image_metadata( image_metadata = types.ImageMetadata( filename=image_path, md5sum=None, - size=utils.get_file_size(image_path), + filesize=utils.get_file_size(image_path), time=geo.as_unix_time(capture_time), lat=lat, lon=lon, diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py index 8a4a30c1a..8f67aef89 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py @@ -4,9 +4,10 @@ from multiprocessing import Pool from pathlib import Path +from mapillary_tools import utils + from tqdm import tqdm -from mapillary_tools import utils from .. import exceptions, exiftool_read, geo, types from ..exiftool_read_video import ExifToolReadVideo from ..telemetry import GPSPoint @@ -67,7 +68,7 @@ def geotag_video(element: ET.Element) -> types.VideoMetadataOrError: video_metadata = types.VideoMetadata( video_path, md5sum=None, - size=utils.get_file_size(video_path), + filesize=utils.get_file_size(video_path), filetype=types.FileType.VIDEO, points=points, make=exif.extract_make(), diff --git a/mapillary_tools/geotag/geotag_videos_from_video.py b/mapillary_tools/geotag/geotag_videos_from_video.py index 27f6f7de0..7374ed965 100644 --- a/mapillary_tools/geotag/geotag_videos_from_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_video.py @@ -84,7 +84,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, - size=utils.get_file_size(video_path), + filesize=utils.get_file_size(video_path), filetype=types.FileType.CAMM, points=points, make=make, @@ -108,7 +108,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, - size=utils.get_file_size(video_path), + filesize=utils.get_file_size(video_path), filetype=types.FileType.GOPRO, points=T.cast(T.List[geo.Point], points_with_fix), make=make, @@ -132,7 +132,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, - size=utils.get_file_size(video_path), + filesize=utils.get_file_size(video_path), filetype=types.FileType.BLACKVUE, points=points, make=make, diff --git a/mapillary_tools/process_geotag_properties.py b/mapillary_tools/process_geotag_properties.py index f7d3a0ec3..d68d575be 100644 --- a/mapillary_tools/process_geotag_properties.py +++ b/mapillary_tools/process_geotag_properties.py @@ -255,9 +255,9 @@ def process_geotag_properties( metadatas.extend(video_metadata) # filenames should be deduplicated in utils.find_images/utils.find_videos - assert len(metadatas) == len(set(metadata.filename for metadata in metadatas)), ( - "duplicate filenames found" - ) + assert len(metadatas) == len( + set(metadata.filename for metadata in metadatas) + ), "duplicate filenames found" return metadatas @@ -424,14 +424,14 @@ def _show_stats_per_filetype( skipped_process_errors: T.Set[T.Type[Exception]], ): good_metadatas: T.List[T.Union[types.VideoMetadata, types.ImageMetadata]] = [] - size_to_upload = 0 + filesize_to_upload = 0 error_metadatas: T.List[types.ErrorMetadata] = [] for metadata in metadatas: if isinstance(metadata, types.ErrorMetadata): error_metadatas.append(metadata) else: good_metadatas.append(metadata) - size_to_upload += metadata.size + filesize_to_upload += metadata.filesize LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value) if good_metadatas: @@ -439,7 +439,7 @@ def _show_stats_per_filetype( "\t %8d %s(s) (%s MB) are ready to be uploaded", len(good_metadatas), filetype.value, - round(size_to_upload / 1024 / 1024, 1), + round(filesize_to_upload / 1024 / 1024, 1), ) error_counter = collections.Counter( diff --git a/mapillary_tools/types.py b/mapillary_tools/types.py index cc6e57ee8..170dfc896 100644 --- a/mapillary_tools/types.py +++ b/mapillary_tools/types.py @@ -56,7 +56,7 @@ class ImageMetadata(geo.Point): MAPMetaTags: T.Optional[T.Dict] = None # deprecated since v0.10.0; keep here for compatibility MAPFilename: T.Optional[str] = None - size: T.Optional[int] = None + filesize: T.Optional[int] = None def update_md5sum(self, image_data: T.Optional[T.BinaryIO] = None) -> None: if self.md5sum is None: @@ -82,7 +82,7 @@ class VideoMetadata: points: T.Sequence[geo.Point] make: T.Optional[str] = None model: T.Optional[str] = None - size: T.Optional[int] = None + filesize: T.Optional[int] = None def update_md5sum(self) -> None: if self.md5sum is None: @@ -145,7 +145,7 @@ class ImageDescription(_SequenceOnly, _Image, MetaProperties, total=True): # if None or absent, it will be calculated md5sum: T.Optional[str] filetype: Literal["image"] - size: T.Optional[int] + filesize: T.Optional[int] class _VideoDescriptionRequired(TypedDict, total=True): @@ -159,7 +159,7 @@ class _VideoDescriptionRequired(TypedDict, total=True): class VideoDescription(_VideoDescriptionRequired, total=False): MAPDeviceMake: str MAPDeviceModel: str - size: T.Optional[int] + filesize: T.Optional[int] class _ErrorDescription(TypedDict, total=False): @@ -372,7 +372,7 @@ def merge_schema(*schemas: T.Dict) -> T.Dict: "type": ["string", "null"], "description": "MD5 checksum of the image content. If not provided, the uploader will compute it", }, - "size": { + "filesize": { "type": ["number", "null"], "description": "File size", }, @@ -402,7 +402,7 @@ def merge_schema(*schemas: T.Dict) -> T.Dict: "type": ["string", "null"], "description": "MD5 checksum of the video content. If not provided, the uploader will compute it", }, - "size": { + "filesize": { "type": ["number", "null"], "description": "File size", }, @@ -496,7 +496,7 @@ def _as_video_desc(metadata: VideoMetadata) -> VideoDescription: "filename": str(metadata.filename.resolve()), "md5sum": metadata.md5sum, "filetype": metadata.filetype.value, - "size": metadata.size, + "filesize": metadata.filesize, "MAPGPSTrack": [_encode_point(p) for p in metadata.points], } if metadata.make: @@ -510,7 +510,7 @@ def _as_image_desc(metadata: ImageMetadata) -> ImageDescription: desc: ImageDescription = { "filename": str(metadata.filename.resolve()), "md5sum": metadata.md5sum, - "size": metadata.size, + "filesize": metadata.filesize, "filetype": FileType.IMAGE.value, "MAPLatitude": round(metadata.lat, _COORDINATES_PRECISION), "MAPLongitude": round(metadata.lon, _COORDINATES_PRECISION), @@ -556,7 +556,7 @@ def _from_image_desc(desc) -> ImageMetadata: if k not in [ "filename", "md5sum", - "size", + "filesize", "filetype", "MAPLatitude", "MAPLongitude", @@ -569,7 +569,7 @@ def _from_image_desc(desc) -> ImageMetadata: return ImageMetadata( filename=Path(desc["filename"]), md5sum=desc.get("md5sum"), - size=desc.get("size"), + filesize=desc.get("filesize"), lat=desc["MAPLatitude"], lon=desc["MAPLongitude"], alt=desc.get("MAPAltitude"), @@ -601,7 +601,7 @@ def _from_video_desc(desc: VideoDescription) -> VideoMetadata: return VideoMetadata( filename=Path(desc["filename"]), md5sum=desc["md5sum"], - size=desc["size"], + filesize=desc["filesize"], filetype=FileType(desc["filetype"]), points=[_decode_point(entry) for entry in desc["MAPGPSTrack"]], make=desc.get("MAPDeviceMake"), diff --git a/mapillary_tools/video_data_extraction/extract_video_data.py b/mapillary_tools/video_data_extraction/extract_video_data.py index d43b3cec9..e2c34b69c 100644 --- a/mapillary_tools/video_data_extraction/extract_video_data.py +++ b/mapillary_tools/video_data_extraction/extract_video_data.py @@ -94,7 +94,7 @@ def process_file(self, file: Path) -> VideoMetadataOrError: filename=file, filetype=FileType.VIDEO, md5sum=None, - size=utils.get_file_size(file), + filesize=utils.get_file_size(file), points=points, make=make, model=model, diff --git a/schema/image_description_schema.json b/schema/image_description_schema.json index 2ab73faaa..e9a134613 100644 --- a/schema/image_description_schema.json +++ b/schema/image_description_schema.json @@ -67,7 +67,7 @@ ], "description": "The video file type" }, - "size": { + "filesize": { "type": "number", "description": "File size" } @@ -165,7 +165,7 @@ ], "description": "The image file type" }, - "size": { + "filesize": { "type": "number", "description": "File size" } From 5adad72f597ff81c8732353d6a740572633ee553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nikola=20Ple=C5=A1a?= Date: Mon, 3 Feb 2025 16:40:28 +0100 Subject: [PATCH 03/13] Lint --- mapillary_tools/camm/camm_builder.py | 18 +++++++++--------- mapillary_tools/geotag/gpmf_parser.py | 6 +++--- mapillary_tools/mp4/construct_mp4_parser.py | 6 +++--- mapillary_tools/mp4/mp4_sample_parser.py | 6 +++--- mapillary_tools/mp4/simple_mp4_builder.py | 6 +++--- mapillary_tools/process_sequence_properties.py | 12 ++++++------ mapillary_tools/sample_video.py | 18 +++++++++--------- mapillary_tools/upload.py | 6 +++--- mapillary_tools/upload_api_v4.py | 6 +++--- mapillary_tools/utils.py | 3 ++- 10 files changed, 44 insertions(+), 43 deletions(-) diff --git a/mapillary_tools/camm/camm_builder.py b/mapillary_tools/camm/camm_builder.py index d22c624a2..bebda19b4 100644 --- a/mapillary_tools/camm/camm_builder.py +++ b/mapillary_tools/camm/camm_builder.py @@ -88,12 +88,12 @@ def _create_edit_list_from_points( ] break - assert 0 <= points[0].time, ( - f"expect non-negative point time but got {points[0]}" - ) - assert points[0].time <= points[-1].time, ( - f"expect points to be sorted but got first point {points[0]} and last point {points[-1]}" - ) + assert ( + 0 <= points[0].time + ), f"expect non-negative point time but got {points[0]}" + assert ( + points[0].time <= points[-1].time + ), f"expect points to be sorted but got first point {points[0]} and last point {points[-1]}" if idx == 0: if 0 < points[0].time: @@ -149,9 +149,9 @@ def convert_telemetry_to_raw_samples( else: timedelta = 0 - assert 0 <= timedelta <= builder.UINT32_MAX, ( - f"expected timedelta {timedelta} between {measurements[idx]} and {measurements[idx + 1]} with timescale {timescale} to be <= UINT32_MAX" - ) + assert ( + 0 <= timedelta <= builder.UINT32_MAX + ), f"expected timedelta {timedelta} between {measurements[idx]} and {measurements[idx + 1]} with timescale {timescale} to be <= UINT32_MAX" yield sample_parser.RawSample( # will update later diff --git a/mapillary_tools/geotag/gpmf_parser.py b/mapillary_tools/geotag/gpmf_parser.py index f799ae217..9b896c87b 100644 --- a/mapillary_tools/geotag/gpmf_parser.py +++ b/mapillary_tools/geotag/gpmf_parser.py @@ -379,9 +379,9 @@ def _apply_matrix( matrix: T.Sequence[float], values: T.Sequence[float] ) -> T.Generator[float, None, None]: size = len(values) - assert len(matrix) == size * size, ( - f"expecting a square matrix of size {size} x {size} but got {len(matrix)}" - ) + assert ( + len(matrix) == size * size + ), f"expecting a square matrix of size {size} x {size} but got {len(matrix)}" for y in range(size): row_start = y * size diff --git a/mapillary_tools/mp4/construct_mp4_parser.py b/mapillary_tools/mp4/construct_mp4_parser.py index 6391e6846..8ca1454ba 100644 --- a/mapillary_tools/mp4/construct_mp4_parser.py +++ b/mapillary_tools/mp4/construct_mp4_parser.py @@ -610,9 +610,9 @@ def find_box_at_path( return box box_data = T.cast(T.Sequence[BoxDict], box["data"]) # ListContainer from construct is not sequence - assert isinstance(box_data, T.Sequence), ( - f"expect a list of boxes but got {type(box_data)} at path {path}" - ) + assert isinstance( + box_data, T.Sequence + ), f"expect a list of boxes but got {type(box_data)} at path {path}" found = find_box_at_path(box_data, path[1:]) if found is not None: return found diff --git a/mapillary_tools/mp4/mp4_sample_parser.py b/mapillary_tools/mp4/mp4_sample_parser.py index 6f5afdd46..1cebd682b 100644 --- a/mapillary_tools/mp4/mp4_sample_parser.py +++ b/mapillary_tools/mp4/mp4_sample_parser.py @@ -59,9 +59,9 @@ def _extract_raw_samples( if not chunk_entries: return - assert len(sizes) <= len(timedeltas), ( - f"got less ({len(timedeltas)}) sample time deltas (stts) than expected ({len(sizes)})" - ) + assert len(sizes) <= len( + timedeltas + ), f"got less ({len(timedeltas)}) sample time deltas (stts) than expected ({len(sizes)})" sample_idx = 0 chunk_idx = 0 diff --git a/mapillary_tools/mp4/simple_mp4_builder.py b/mapillary_tools/mp4/simple_mp4_builder.py index b93a3ef29..632aecca0 100644 --- a/mapillary_tools/mp4/simple_mp4_builder.py +++ b/mapillary_tools/mp4/simple_mp4_builder.py @@ -404,8 +404,8 @@ def _rewrite_and_build_moov_typed_data( for box in _filter_trak_boxes(moov_children): sample_offset = _update_sbtl_sample_offsets(box, sample_offset) moov_typed_data = _build_moov_typed_data(moov_children) - assert len(moov_typed_data) == moov_typed_data_size, ( - f"{len(moov_typed_data)} != {moov_typed_data_size}" - ) + assert ( + len(moov_typed_data) == moov_typed_data_size + ), f"{len(moov_typed_data)} != {moov_typed_data_size}" return moov_typed_data diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index 27ca1a4cc..2ed7049ad 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -209,9 +209,9 @@ def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None: gidx = gidx + len(group) for cur, nxt in geo.pairwise(sequence): - assert cur.time <= nxt.time, ( - f"sequence must be sorted but got {cur.time} > {nxt.time}" - ) + assert ( + cur.time <= nxt.time + ), f"sequence must be sorted but got {cur.time} > {nxt.time}" def _parse_filesize_in_bytes(filesize_str: str) -> int: @@ -335,9 +335,9 @@ def process_sequence_properties( results = error_metadatas + image_metadatas + video_metadatas - assert len(metadatas) == len(results), ( - f"expected {len(metadatas)} results but got {len(results)}" - ) + assert len(metadatas) == len( + results + ), f"expected {len(metadatas)} results but got {len(results)}" assert sequence_idx == len( set(metadata.MAPSequenceUUID for metadata in image_metadatas) ) diff --git a/mapillary_tools/sample_video.py b/mapillary_tools/sample_video.py index 89b9de9ba..eb0217063 100644 --- a/mapillary_tools/sample_video.py +++ b/mapillary_tools/sample_video.py @@ -118,9 +118,9 @@ def sample_video( start_time=video_start_time_dt, ) else: - assert 0 < video_sample_interval, ( - "expect positive video_sample_interval but got {video_sample_interval}" - ) + assert ( + 0 < video_sample_interval + ), "expect positive video_sample_interval but got {video_sample_interval}" _sample_single_video_by_interval( video_path, sample_dir, @@ -339,9 +339,9 @@ def _sample_single_video_by_distance( f"Expect {len(sorted_sample_indices)} samples but extracted {len(frame_samples)} samples" ) for idx, (frame_idx_1based, sample_paths) in enumerate(frame_samples): - assert len(sample_paths) == 1, ( - "Expect 1 sample path at {frame_idx_1based} but got {sample_paths}" - ) + assert ( + len(sample_paths) == 1 + ), "Expect 1 sample path at {frame_idx_1based} but got {sample_paths}" if idx + 1 != frame_idx_1based: raise exceptions.MapillaryVideoError( f"Expect {sample_paths[0]} to be {idx + 1}th sample but got {frame_idx_1based}" @@ -352,9 +352,9 @@ def _sample_single_video_by_distance( continue video_sample, interp = sample_points_by_frame_idx[sample_idx] - assert interp.time == video_sample.exact_composition_time, ( - f"interpolated time {interp.time} should match the video sample time {video_sample.exact_composition_time}" - ) + assert ( + interp.time == video_sample.exact_composition_time + ), f"interpolated time {interp.time} should match the video sample time {video_sample.exact_composition_time}" timestamp = start_time + datetime.timedelta(seconds=interp.time) exif_edit = ExifEdit(sample_paths[0]) diff --git a/mapillary_tools/upload.py b/mapillary_tools/upload.py index 39a0cc8ea..ef33cc3dc 100644 --- a/mapillary_tools/upload.py +++ b/mapillary_tools/upload.py @@ -657,9 +657,9 @@ def upload( ) for idx, video_metadata in enumerate(specified_video_metadatas): video_metadata.update_md5sum() - assert isinstance(video_metadata.md5sum, str), ( - "md5sum should be updated" - ) + assert isinstance( + video_metadata.md5sum, str + ), "md5sum should be updated" # extract telemetry measurements from GoPro videos telemetry_measurements: T.List[telemetry.TelemetryMeasurement] = [] diff --git a/mapillary_tools/upload_api_v4.py b/mapillary_tools/upload_api_v4.py index fe56e8f8d..fc1b1d4f5 100644 --- a/mapillary_tools/upload_api_v4.py +++ b/mapillary_tools/upload_api_v4.py @@ -153,9 +153,9 @@ def upload( if not chunk: break - assert offset == self.entity_size, ( - f"Offset ends at {offset} but the entity size is {self.entity_size}" - ) + assert ( + offset == self.entity_size + ), f"Offset ends at {offset} but the entity size is {self.entity_size}" payload = resp.json() try: diff --git a/mapillary_tools/utils.py b/mapillary_tools/utils.py index 428d8be9c..52f64b278 100644 --- a/mapillary_tools/utils.py +++ b/mapillary_tools/utils.py @@ -202,5 +202,6 @@ def find_xml_files(import_paths: T.Sequence[Path]) -> T.List[Path]: xml_paths.append(path) return list(deduplicate_paths(xml_paths)) + def get_file_size(path: Path) -> int: - return os.path.getsize(path) + return os.path.getsize(path) From 8b5987bbc2bb12b83f060d21b38dda99ba683ad5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nikola=20Ple=C5=A1a?= Date: Tue, 4 Feb 2025 11:31:12 +0100 Subject: [PATCH 04/13] Undo edit --- mapillary_tools/upload.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mapillary_tools/upload.py b/mapillary_tools/upload.py index ef33cc3dc..39a0cc8ea 100644 --- a/mapillary_tools/upload.py +++ b/mapillary_tools/upload.py @@ -657,9 +657,9 @@ def upload( ) for idx, video_metadata in enumerate(specified_video_metadatas): video_metadata.update_md5sum() - assert isinstance( - video_metadata.md5sum, str - ), "md5sum should be updated" + assert isinstance(video_metadata.md5sum, str), ( + "md5sum should be updated" + ) # extract telemetry measurements from GoPro videos telemetry_measurements: T.List[telemetry.TelemetryMeasurement] = [] From c501c4aeb694786b47197670260aa41c4ff4c296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nikola=20Ple=C5=A1a?= Date: Mon, 3 Feb 2025 10:45:59 +0100 Subject: [PATCH 05/13] Add file sizes to output --- .../geotag/geotag_images_from_exif.py | 2 ++ .../geotag/geotag_videos_from_exiftool_video.py | 2 ++ .../geotag/geotag_videos_from_video.py | 5 +++++ mapillary_tools/process_geotag_properties.py | 5 ++++- mapillary_tools/types.py | 17 +++++++++++++++++ mapillary_tools/utils.py | 3 +++ .../video_data_extraction/extract_video_data.py | 1 + schema/image_description_schema.json | 8 ++++++++ 8 files changed, 42 insertions(+), 1 deletion(-) diff --git a/mapillary_tools/geotag/geotag_images_from_exif.py b/mapillary_tools/geotag/geotag_images_from_exif.py index 474a303fe..d07e4b0e2 100644 --- a/mapillary_tools/geotag/geotag_images_from_exif.py +++ b/mapillary_tools/geotag/geotag_images_from_exif.py @@ -6,6 +6,7 @@ from tqdm import tqdm +from mapillary_tools import utils from .. import exceptions, exif_write, geo, types from ..exif_read import ExifRead, ExifReadABC from .geotag_from_generic import GeotagImagesFromGeneric @@ -64,6 +65,7 @@ def build_image_metadata( image_metadata = types.ImageMetadata( filename=image_path, md5sum=None, + size=utils.get_file_size(image_path), time=geo.as_unix_time(capture_time), lat=lat, lon=lon, diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py index 3ed8465ed..8a4a30c1a 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py @@ -6,6 +6,7 @@ from tqdm import tqdm +from mapillary_tools import utils from .. import exceptions, exiftool_read, geo, types from ..exiftool_read_video import ExifToolReadVideo from ..telemetry import GPSPoint @@ -66,6 +67,7 @@ def geotag_video(element: ET.Element) -> types.VideoMetadataOrError: video_metadata = types.VideoMetadata( video_path, md5sum=None, + size=utils.get_file_size(video_path), filetype=types.FileType.VIDEO, points=points, make=exif.extract_make(), diff --git a/mapillary_tools/geotag/geotag_videos_from_video.py b/mapillary_tools/geotag/geotag_videos_from_video.py index b5ac06316..27f6f7de0 100644 --- a/mapillary_tools/geotag/geotag_videos_from_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_video.py @@ -4,6 +4,8 @@ from multiprocessing import Pool from pathlib import Path +from mapillary_tools import utils + from tqdm import tqdm from .. import exceptions, geo, types @@ -82,6 +84,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, + size=utils.get_file_size(video_path), filetype=types.FileType.CAMM, points=points, make=make, @@ -105,6 +108,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, + size=utils.get_file_size(video_path), filetype=types.FileType.GOPRO, points=T.cast(T.List[geo.Point], points_with_fix), make=make, @@ -128,6 +132,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, + size=utils.get_file_size(video_path), filetype=types.FileType.BLACKVUE, points=points, make=make, diff --git a/mapillary_tools/process_geotag_properties.py b/mapillary_tools/process_geotag_properties.py index 59a4cf345..8a81fa264 100644 --- a/mapillary_tools/process_geotag_properties.py +++ b/mapillary_tools/process_geotag_properties.py @@ -413,19 +413,22 @@ def _show_stats_per_filetype( skipped_process_errors: T.Set[T.Type[Exception]], ): good_metadatas: T.List[T.Union[types.VideoMetadata, types.ImageMetadata]] = [] + size_to_upload = 0 error_metadatas: T.List[types.ErrorMetadata] = [] for metadata in metadatas: if isinstance(metadata, types.ErrorMetadata): error_metadatas.append(metadata) else: good_metadatas.append(metadata) + size_to_upload += metadata.size LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value) if good_metadatas: LOG.info( - "\t %8d %s(s) are ready to be uploaded", + "\t %8d %s(s) (%s MB) are ready to be uploaded", len(good_metadatas), filetype.value, + round(size_to_upload / 1024 / 1024, 1), ) error_counter = collections.Counter( diff --git a/mapillary_tools/types.py b/mapillary_tools/types.py index a7616a216..d45d34bee 100644 --- a/mapillary_tools/types.py +++ b/mapillary_tools/types.py @@ -57,6 +57,7 @@ class ImageMetadata(geo.Point): MAPMetaTags: T.Optional[T.Dict] = None # deprecated since v0.10.0; keep here for compatibility MAPFilename: T.Optional[str] = None + size: T.Optional[int] = None def update_md5sum(self, image_data: T.Optional[T.BinaryIO] = None) -> None: if self.md5sum is None: @@ -82,6 +83,7 @@ class VideoMetadata: points: T.Sequence[geo.Point] make: T.Optional[str] = None model: T.Optional[str] = None + size: T.Optional[int] = None def update_md5sum(self) -> None: if self.md5sum is None: @@ -144,6 +146,7 @@ class ImageDescription(_SequenceOnly, _Image, MetaProperties, total=True): # if None or absent, it will be calculated md5sum: T.Optional[str] filetype: Literal["image"] + size: T.Optional[int] class _VideoDescriptionRequired(TypedDict, total=True): @@ -157,6 +160,7 @@ class _VideoDescriptionRequired(TypedDict, total=True): class VideoDescription(_VideoDescriptionRequired, total=False): MAPDeviceMake: str MAPDeviceModel: str + size: T.Optional[int] class _ErrorDescription(TypedDict, total=False): @@ -369,6 +373,10 @@ def merge_schema(*schemas: T.Dict) -> T.Dict: "type": ["string", "null"], "description": "MD5 checksum of the image content. If not provided, the uploader will compute it", }, + "size": { + "type": ["number", "null"], + "description": "File size", + }, "filetype": { "type": "string", "enum": [FileType.IMAGE.value], @@ -395,6 +403,10 @@ def merge_schema(*schemas: T.Dict) -> T.Dict: "type": ["string", "null"], "description": "MD5 checksum of the video content. If not provided, the uploader will compute it", }, + "size": { + "type": ["number", "null"], + "description": "File size", + }, "filetype": { "type": "string", "enum": [ @@ -485,6 +497,7 @@ def _as_video_desc(metadata: VideoMetadata) -> VideoDescription: "filename": str(metadata.filename.resolve()), "md5sum": metadata.md5sum, "filetype": metadata.filetype.value, + "size": metadata.size, "MAPGPSTrack": [_encode_point(p) for p in metadata.points], } if metadata.make: @@ -498,6 +511,7 @@ def _as_image_desc(metadata: ImageMetadata) -> ImageDescription: desc: ImageDescription = { "filename": str(metadata.filename.resolve()), "md5sum": metadata.md5sum, + "size": metadata.size, "filetype": FileType.IMAGE.value, "MAPLatitude": round(metadata.lat, _COORDINATES_PRECISION), "MAPLongitude": round(metadata.lon, _COORDINATES_PRECISION), @@ -543,6 +557,7 @@ def _from_image_desc(desc) -> ImageMetadata: if k not in [ "filename", "md5sum", + "size", "filetype", "MAPLatitude", "MAPLongitude", @@ -555,6 +570,7 @@ def _from_image_desc(desc) -> ImageMetadata: return ImageMetadata( filename=Path(desc["filename"]), md5sum=desc.get("md5sum"), + size=desc.get("size"), lat=desc["MAPLatitude"], lon=desc["MAPLongitude"], alt=desc.get("MAPAltitude"), @@ -586,6 +602,7 @@ def _from_video_desc(desc: VideoDescription) -> VideoMetadata: return VideoMetadata( filename=Path(desc["filename"]), md5sum=desc["md5sum"], + size=desc["size"], filetype=FileType(desc["filetype"]), points=[_decode_point(entry) for entry in desc["MAPGPSTrack"]], make=desc.get("MAPDeviceMake"), diff --git a/mapillary_tools/utils.py b/mapillary_tools/utils.py index 58b53bfc4..9a8d18ae9 100644 --- a/mapillary_tools/utils.py +++ b/mapillary_tools/utils.py @@ -190,3 +190,6 @@ def find_xml_files(import_paths: T.Sequence[Path]) -> T.List[Path]: if path.suffix.lower() in [".xml"]: xml_paths.append(path) return list(deduplicate_paths(xml_paths)) + +def get_file_size(path: Path) -> int: + return os.path.getsize(path) diff --git a/mapillary_tools/video_data_extraction/extract_video_data.py b/mapillary_tools/video_data_extraction/extract_video_data.py index 311286505..955d5b1f5 100644 --- a/mapillary_tools/video_data_extraction/extract_video_data.py +++ b/mapillary_tools/video_data_extraction/extract_video_data.py @@ -92,6 +92,7 @@ def process_file(self, file: Path) -> VideoMetadataOrError: filename=file, filetype=FileType.VIDEO, md5sum=None, + size=utils.get_file_size(file), points=points, make=make, model=model, diff --git a/schema/image_description_schema.json b/schema/image_description_schema.json index d26d54002..2ab73faaa 100644 --- a/schema/image_description_schema.json +++ b/schema/image_description_schema.json @@ -66,6 +66,10 @@ "video" ], "description": "The video file type" + }, + "size": { + "type": "number", + "description": "File size" } }, "required": [ @@ -160,6 +164,10 @@ "image" ], "description": "The image file type" + }, + "size": { + "type": "number", + "description": "File size" } }, "required": [ From a66ae0b3010b89567ff5c63a3a8318b2a5a629d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nikola=20Ple=C5=A1a?= Date: Mon, 3 Feb 2025 16:35:28 +0100 Subject: [PATCH 06/13] Rename size to filesize --- .../geotag/geotag_images_from_exif.py | 5 +++-- .../geotag_videos_from_exiftool_video.py | 5 +++-- .../geotag/geotag_videos_from_video.py | 6 ++--- mapillary_tools/process_geotag_properties.py | 12 +++++----- mapillary_tools/types.py | 22 +++++++++---------- .../extract_video_data.py | 2 +- schema/image_description_schema.json | 4 ++-- 7 files changed, 29 insertions(+), 27 deletions(-) diff --git a/mapillary_tools/geotag/geotag_images_from_exif.py b/mapillary_tools/geotag/geotag_images_from_exif.py index d07e4b0e2..515091c55 100644 --- a/mapillary_tools/geotag/geotag_images_from_exif.py +++ b/mapillary_tools/geotag/geotag_images_from_exif.py @@ -4,9 +4,10 @@ from multiprocessing import Pool from pathlib import Path +from mapillary_tools import utils + from tqdm import tqdm -from mapillary_tools import utils from .. import exceptions, exif_write, geo, types from ..exif_read import ExifRead, ExifReadABC from .geotag_from_generic import GeotagImagesFromGeneric @@ -65,7 +66,7 @@ def build_image_metadata( image_metadata = types.ImageMetadata( filename=image_path, md5sum=None, - size=utils.get_file_size(image_path), + filesize=utils.get_file_size(image_path), time=geo.as_unix_time(capture_time), lat=lat, lon=lon, diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py index 8a4a30c1a..8f67aef89 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool_video.py @@ -4,9 +4,10 @@ from multiprocessing import Pool from pathlib import Path +from mapillary_tools import utils + from tqdm import tqdm -from mapillary_tools import utils from .. import exceptions, exiftool_read, geo, types from ..exiftool_read_video import ExifToolReadVideo from ..telemetry import GPSPoint @@ -67,7 +68,7 @@ def geotag_video(element: ET.Element) -> types.VideoMetadataOrError: video_metadata = types.VideoMetadata( video_path, md5sum=None, - size=utils.get_file_size(video_path), + filesize=utils.get_file_size(video_path), filetype=types.FileType.VIDEO, points=points, make=exif.extract_make(), diff --git a/mapillary_tools/geotag/geotag_videos_from_video.py b/mapillary_tools/geotag/geotag_videos_from_video.py index 27f6f7de0..7374ed965 100644 --- a/mapillary_tools/geotag/geotag_videos_from_video.py +++ b/mapillary_tools/geotag/geotag_videos_from_video.py @@ -84,7 +84,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, - size=utils.get_file_size(video_path), + filesize=utils.get_file_size(video_path), filetype=types.FileType.CAMM, points=points, make=make, @@ -108,7 +108,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, - size=utils.get_file_size(video_path), + filesize=utils.get_file_size(video_path), filetype=types.FileType.GOPRO, points=T.cast(T.List[geo.Point], points_with_fix), make=make, @@ -132,7 +132,7 @@ def _extract_video_metadata( return types.VideoMetadata( filename=video_path, md5sum=None, - size=utils.get_file_size(video_path), + filesize=utils.get_file_size(video_path), filetype=types.FileType.BLACKVUE, points=points, make=make, diff --git a/mapillary_tools/process_geotag_properties.py b/mapillary_tools/process_geotag_properties.py index 8a81fa264..58805b1f7 100644 --- a/mapillary_tools/process_geotag_properties.py +++ b/mapillary_tools/process_geotag_properties.py @@ -245,9 +245,9 @@ def process_geotag_properties( metadatas.extend(video_metadata) # filenames should be deduplicated in utils.find_images/utils.find_videos - assert len(metadatas) == len(set(metadata.filename for metadata in metadatas)), ( - "duplicate filenames found" - ) + assert len(metadatas) == len( + set(metadata.filename for metadata in metadatas) + ), "duplicate filenames found" return metadatas @@ -413,14 +413,14 @@ def _show_stats_per_filetype( skipped_process_errors: T.Set[T.Type[Exception]], ): good_metadatas: T.List[T.Union[types.VideoMetadata, types.ImageMetadata]] = [] - size_to_upload = 0 + filesize_to_upload = 0 error_metadatas: T.List[types.ErrorMetadata] = [] for metadata in metadatas: if isinstance(metadata, types.ErrorMetadata): error_metadatas.append(metadata) else: good_metadatas.append(metadata) - size_to_upload += metadata.size + filesize_to_upload += metadata.filesize LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value) if good_metadatas: @@ -428,7 +428,7 @@ def _show_stats_per_filetype( "\t %8d %s(s) (%s MB) are ready to be uploaded", len(good_metadatas), filetype.value, - round(size_to_upload / 1024 / 1024, 1), + round(filesize_to_upload / 1024 / 1024, 1), ) error_counter = collections.Counter( diff --git a/mapillary_tools/types.py b/mapillary_tools/types.py index d45d34bee..738ee556a 100644 --- a/mapillary_tools/types.py +++ b/mapillary_tools/types.py @@ -57,7 +57,7 @@ class ImageMetadata(geo.Point): MAPMetaTags: T.Optional[T.Dict] = None # deprecated since v0.10.0; keep here for compatibility MAPFilename: T.Optional[str] = None - size: T.Optional[int] = None + filesize: T.Optional[int] = None def update_md5sum(self, image_data: T.Optional[T.BinaryIO] = None) -> None: if self.md5sum is None: @@ -83,7 +83,7 @@ class VideoMetadata: points: T.Sequence[geo.Point] make: T.Optional[str] = None model: T.Optional[str] = None - size: T.Optional[int] = None + filesize: T.Optional[int] = None def update_md5sum(self) -> None: if self.md5sum is None: @@ -146,7 +146,7 @@ class ImageDescription(_SequenceOnly, _Image, MetaProperties, total=True): # if None or absent, it will be calculated md5sum: T.Optional[str] filetype: Literal["image"] - size: T.Optional[int] + filesize: T.Optional[int] class _VideoDescriptionRequired(TypedDict, total=True): @@ -160,7 +160,7 @@ class _VideoDescriptionRequired(TypedDict, total=True): class VideoDescription(_VideoDescriptionRequired, total=False): MAPDeviceMake: str MAPDeviceModel: str - size: T.Optional[int] + filesize: T.Optional[int] class _ErrorDescription(TypedDict, total=False): @@ -373,7 +373,7 @@ def merge_schema(*schemas: T.Dict) -> T.Dict: "type": ["string", "null"], "description": "MD5 checksum of the image content. If not provided, the uploader will compute it", }, - "size": { + "filesize": { "type": ["number", "null"], "description": "File size", }, @@ -403,7 +403,7 @@ def merge_schema(*schemas: T.Dict) -> T.Dict: "type": ["string", "null"], "description": "MD5 checksum of the video content. If not provided, the uploader will compute it", }, - "size": { + "filesize": { "type": ["number", "null"], "description": "File size", }, @@ -497,7 +497,7 @@ def _as_video_desc(metadata: VideoMetadata) -> VideoDescription: "filename": str(metadata.filename.resolve()), "md5sum": metadata.md5sum, "filetype": metadata.filetype.value, - "size": metadata.size, + "filesize": metadata.filesize, "MAPGPSTrack": [_encode_point(p) for p in metadata.points], } if metadata.make: @@ -511,7 +511,7 @@ def _as_image_desc(metadata: ImageMetadata) -> ImageDescription: desc: ImageDescription = { "filename": str(metadata.filename.resolve()), "md5sum": metadata.md5sum, - "size": metadata.size, + "filesize": metadata.filesize, "filetype": FileType.IMAGE.value, "MAPLatitude": round(metadata.lat, _COORDINATES_PRECISION), "MAPLongitude": round(metadata.lon, _COORDINATES_PRECISION), @@ -557,7 +557,7 @@ def _from_image_desc(desc) -> ImageMetadata: if k not in [ "filename", "md5sum", - "size", + "filesize", "filetype", "MAPLatitude", "MAPLongitude", @@ -570,7 +570,7 @@ def _from_image_desc(desc) -> ImageMetadata: return ImageMetadata( filename=Path(desc["filename"]), md5sum=desc.get("md5sum"), - size=desc.get("size"), + filesize=desc.get("filesize"), lat=desc["MAPLatitude"], lon=desc["MAPLongitude"], alt=desc.get("MAPAltitude"), @@ -602,7 +602,7 @@ def _from_video_desc(desc: VideoDescription) -> VideoMetadata: return VideoMetadata( filename=Path(desc["filename"]), md5sum=desc["md5sum"], - size=desc["size"], + filesize=desc["filesize"], filetype=FileType(desc["filetype"]), points=[_decode_point(entry) for entry in desc["MAPGPSTrack"]], make=desc.get("MAPDeviceMake"), diff --git a/mapillary_tools/video_data_extraction/extract_video_data.py b/mapillary_tools/video_data_extraction/extract_video_data.py index 955d5b1f5..442a4bfcd 100644 --- a/mapillary_tools/video_data_extraction/extract_video_data.py +++ b/mapillary_tools/video_data_extraction/extract_video_data.py @@ -92,7 +92,7 @@ def process_file(self, file: Path) -> VideoMetadataOrError: filename=file, filetype=FileType.VIDEO, md5sum=None, - size=utils.get_file_size(file), + filesize=utils.get_file_size(file), points=points, make=make, model=model, diff --git a/schema/image_description_schema.json b/schema/image_description_schema.json index 2ab73faaa..e9a134613 100644 --- a/schema/image_description_schema.json +++ b/schema/image_description_schema.json @@ -67,7 +67,7 @@ ], "description": "The video file type" }, - "size": { + "filesize": { "type": "number", "description": "File size" } @@ -165,7 +165,7 @@ ], "description": "The image file type" }, - "size": { + "filesize": { "type": "number", "description": "File size" } From d040521d24f4703d793cf5422c1f0335c47a3f37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nikola=20Ple=C5=A1a?= Date: Mon, 3 Feb 2025 16:40:28 +0100 Subject: [PATCH 07/13] Lint --- mapillary_tools/camm/camm_builder.py | 18 +++++++++--------- mapillary_tools/geotag/gpmf_parser.py | 6 +++--- mapillary_tools/mp4/construct_mp4_parser.py | 6 +++--- mapillary_tools/mp4/mp4_sample_parser.py | 6 +++--- mapillary_tools/mp4/simple_mp4_builder.py | 6 +++--- mapillary_tools/process_sequence_properties.py | 12 ++++++------ mapillary_tools/sample_video.py | 18 +++++++++--------- mapillary_tools/upload_api_v4.py | 6 +++--- mapillary_tools/utils.py | 3 ++- 9 files changed, 41 insertions(+), 40 deletions(-) diff --git a/mapillary_tools/camm/camm_builder.py b/mapillary_tools/camm/camm_builder.py index d22c624a2..bebda19b4 100644 --- a/mapillary_tools/camm/camm_builder.py +++ b/mapillary_tools/camm/camm_builder.py @@ -88,12 +88,12 @@ def _create_edit_list_from_points( ] break - assert 0 <= points[0].time, ( - f"expect non-negative point time but got {points[0]}" - ) - assert points[0].time <= points[-1].time, ( - f"expect points to be sorted but got first point {points[0]} and last point {points[-1]}" - ) + assert ( + 0 <= points[0].time + ), f"expect non-negative point time but got {points[0]}" + assert ( + points[0].time <= points[-1].time + ), f"expect points to be sorted but got first point {points[0]} and last point {points[-1]}" if idx == 0: if 0 < points[0].time: @@ -149,9 +149,9 @@ def convert_telemetry_to_raw_samples( else: timedelta = 0 - assert 0 <= timedelta <= builder.UINT32_MAX, ( - f"expected timedelta {timedelta} between {measurements[idx]} and {measurements[idx + 1]} with timescale {timescale} to be <= UINT32_MAX" - ) + assert ( + 0 <= timedelta <= builder.UINT32_MAX + ), f"expected timedelta {timedelta} between {measurements[idx]} and {measurements[idx + 1]} with timescale {timescale} to be <= UINT32_MAX" yield sample_parser.RawSample( # will update later diff --git a/mapillary_tools/geotag/gpmf_parser.py b/mapillary_tools/geotag/gpmf_parser.py index f799ae217..9b896c87b 100644 --- a/mapillary_tools/geotag/gpmf_parser.py +++ b/mapillary_tools/geotag/gpmf_parser.py @@ -379,9 +379,9 @@ def _apply_matrix( matrix: T.Sequence[float], values: T.Sequence[float] ) -> T.Generator[float, None, None]: size = len(values) - assert len(matrix) == size * size, ( - f"expecting a square matrix of size {size} x {size} but got {len(matrix)}" - ) + assert ( + len(matrix) == size * size + ), f"expecting a square matrix of size {size} x {size} but got {len(matrix)}" for y in range(size): row_start = y * size diff --git a/mapillary_tools/mp4/construct_mp4_parser.py b/mapillary_tools/mp4/construct_mp4_parser.py index 6391e6846..8ca1454ba 100644 --- a/mapillary_tools/mp4/construct_mp4_parser.py +++ b/mapillary_tools/mp4/construct_mp4_parser.py @@ -610,9 +610,9 @@ def find_box_at_path( return box box_data = T.cast(T.Sequence[BoxDict], box["data"]) # ListContainer from construct is not sequence - assert isinstance(box_data, T.Sequence), ( - f"expect a list of boxes but got {type(box_data)} at path {path}" - ) + assert isinstance( + box_data, T.Sequence + ), f"expect a list of boxes but got {type(box_data)} at path {path}" found = find_box_at_path(box_data, path[1:]) if found is not None: return found diff --git a/mapillary_tools/mp4/mp4_sample_parser.py b/mapillary_tools/mp4/mp4_sample_parser.py index 6f5afdd46..1cebd682b 100644 --- a/mapillary_tools/mp4/mp4_sample_parser.py +++ b/mapillary_tools/mp4/mp4_sample_parser.py @@ -59,9 +59,9 @@ def _extract_raw_samples( if not chunk_entries: return - assert len(sizes) <= len(timedeltas), ( - f"got less ({len(timedeltas)}) sample time deltas (stts) than expected ({len(sizes)})" - ) + assert len(sizes) <= len( + timedeltas + ), f"got less ({len(timedeltas)}) sample time deltas (stts) than expected ({len(sizes)})" sample_idx = 0 chunk_idx = 0 diff --git a/mapillary_tools/mp4/simple_mp4_builder.py b/mapillary_tools/mp4/simple_mp4_builder.py index b93a3ef29..632aecca0 100644 --- a/mapillary_tools/mp4/simple_mp4_builder.py +++ b/mapillary_tools/mp4/simple_mp4_builder.py @@ -404,8 +404,8 @@ def _rewrite_and_build_moov_typed_data( for box in _filter_trak_boxes(moov_children): sample_offset = _update_sbtl_sample_offsets(box, sample_offset) moov_typed_data = _build_moov_typed_data(moov_children) - assert len(moov_typed_data) == moov_typed_data_size, ( - f"{len(moov_typed_data)} != {moov_typed_data_size}" - ) + assert ( + len(moov_typed_data) == moov_typed_data_size + ), f"{len(moov_typed_data)} != {moov_typed_data_size}" return moov_typed_data diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index 27ca1a4cc..2ed7049ad 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -209,9 +209,9 @@ def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None: gidx = gidx + len(group) for cur, nxt in geo.pairwise(sequence): - assert cur.time <= nxt.time, ( - f"sequence must be sorted but got {cur.time} > {nxt.time}" - ) + assert ( + cur.time <= nxt.time + ), f"sequence must be sorted but got {cur.time} > {nxt.time}" def _parse_filesize_in_bytes(filesize_str: str) -> int: @@ -335,9 +335,9 @@ def process_sequence_properties( results = error_metadatas + image_metadatas + video_metadatas - assert len(metadatas) == len(results), ( - f"expected {len(metadatas)} results but got {len(results)}" - ) + assert len(metadatas) == len( + results + ), f"expected {len(metadatas)} results but got {len(results)}" assert sequence_idx == len( set(metadata.MAPSequenceUUID for metadata in image_metadatas) ) diff --git a/mapillary_tools/sample_video.py b/mapillary_tools/sample_video.py index 89b9de9ba..eb0217063 100644 --- a/mapillary_tools/sample_video.py +++ b/mapillary_tools/sample_video.py @@ -118,9 +118,9 @@ def sample_video( start_time=video_start_time_dt, ) else: - assert 0 < video_sample_interval, ( - "expect positive video_sample_interval but got {video_sample_interval}" - ) + assert ( + 0 < video_sample_interval + ), "expect positive video_sample_interval but got {video_sample_interval}" _sample_single_video_by_interval( video_path, sample_dir, @@ -339,9 +339,9 @@ def _sample_single_video_by_distance( f"Expect {len(sorted_sample_indices)} samples but extracted {len(frame_samples)} samples" ) for idx, (frame_idx_1based, sample_paths) in enumerate(frame_samples): - assert len(sample_paths) == 1, ( - "Expect 1 sample path at {frame_idx_1based} but got {sample_paths}" - ) + assert ( + len(sample_paths) == 1 + ), "Expect 1 sample path at {frame_idx_1based} but got {sample_paths}" if idx + 1 != frame_idx_1based: raise exceptions.MapillaryVideoError( f"Expect {sample_paths[0]} to be {idx + 1}th sample but got {frame_idx_1based}" @@ -352,9 +352,9 @@ def _sample_single_video_by_distance( continue video_sample, interp = sample_points_by_frame_idx[sample_idx] - assert interp.time == video_sample.exact_composition_time, ( - f"interpolated time {interp.time} should match the video sample time {video_sample.exact_composition_time}" - ) + assert ( + interp.time == video_sample.exact_composition_time + ), f"interpolated time {interp.time} should match the video sample time {video_sample.exact_composition_time}" timestamp = start_time + datetime.timedelta(seconds=interp.time) exif_edit = ExifEdit(sample_paths[0]) diff --git a/mapillary_tools/upload_api_v4.py b/mapillary_tools/upload_api_v4.py index fe56e8f8d..fc1b1d4f5 100644 --- a/mapillary_tools/upload_api_v4.py +++ b/mapillary_tools/upload_api_v4.py @@ -153,9 +153,9 @@ def upload( if not chunk: break - assert offset == self.entity_size, ( - f"Offset ends at {offset} but the entity size is {self.entity_size}" - ) + assert ( + offset == self.entity_size + ), f"Offset ends at {offset} but the entity size is {self.entity_size}" payload = resp.json() try: diff --git a/mapillary_tools/utils.py b/mapillary_tools/utils.py index 9a8d18ae9..5d166563c 100644 --- a/mapillary_tools/utils.py +++ b/mapillary_tools/utils.py @@ -191,5 +191,6 @@ def find_xml_files(import_paths: T.Sequence[Path]) -> T.List[Path]: xml_paths.append(path) return list(deduplicate_paths(xml_paths)) + def get_file_size(path: Path) -> int: - return os.path.getsize(path) + return os.path.getsize(path) From 7f791bb9b267e19a1e1bf43d8f8de76f0694ada2 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 5 Feb 2025 13:14:14 +0100 Subject: [PATCH 08/13] uv run ruff format mapillary_tools --- mapillary_tools/camm/camm_builder.py | 18 +++++++++--------- mapillary_tools/geotag/gpmf_parser.py | 6 +++--- mapillary_tools/mp4/construct_mp4_parser.py | 6 +++--- mapillary_tools/mp4/mp4_sample_parser.py | 6 +++--- mapillary_tools/mp4/simple_mp4_builder.py | 6 +++--- mapillary_tools/process_geotag_properties.py | 6 +++--- mapillary_tools/process_sequence_properties.py | 12 ++++++------ mapillary_tools/sample_video.py | 18 +++++++++--------- mapillary_tools/upload_api_v4.py | 6 +++--- 9 files changed, 42 insertions(+), 42 deletions(-) diff --git a/mapillary_tools/camm/camm_builder.py b/mapillary_tools/camm/camm_builder.py index bebda19b4..d22c624a2 100644 --- a/mapillary_tools/camm/camm_builder.py +++ b/mapillary_tools/camm/camm_builder.py @@ -88,12 +88,12 @@ def _create_edit_list_from_points( ] break - assert ( - 0 <= points[0].time - ), f"expect non-negative point time but got {points[0]}" - assert ( - points[0].time <= points[-1].time - ), f"expect points to be sorted but got first point {points[0]} and last point {points[-1]}" + assert 0 <= points[0].time, ( + f"expect non-negative point time but got {points[0]}" + ) + assert points[0].time <= points[-1].time, ( + f"expect points to be sorted but got first point {points[0]} and last point {points[-1]}" + ) if idx == 0: if 0 < points[0].time: @@ -149,9 +149,9 @@ def convert_telemetry_to_raw_samples( else: timedelta = 0 - assert ( - 0 <= timedelta <= builder.UINT32_MAX - ), f"expected timedelta {timedelta} between {measurements[idx]} and {measurements[idx + 1]} with timescale {timescale} to be <= UINT32_MAX" + assert 0 <= timedelta <= builder.UINT32_MAX, ( + f"expected timedelta {timedelta} between {measurements[idx]} and {measurements[idx + 1]} with timescale {timescale} to be <= UINT32_MAX" + ) yield sample_parser.RawSample( # will update later diff --git a/mapillary_tools/geotag/gpmf_parser.py b/mapillary_tools/geotag/gpmf_parser.py index 9b896c87b..f799ae217 100644 --- a/mapillary_tools/geotag/gpmf_parser.py +++ b/mapillary_tools/geotag/gpmf_parser.py @@ -379,9 +379,9 @@ def _apply_matrix( matrix: T.Sequence[float], values: T.Sequence[float] ) -> T.Generator[float, None, None]: size = len(values) - assert ( - len(matrix) == size * size - ), f"expecting a square matrix of size {size} x {size} but got {len(matrix)}" + assert len(matrix) == size * size, ( + f"expecting a square matrix of size {size} x {size} but got {len(matrix)}" + ) for y in range(size): row_start = y * size diff --git a/mapillary_tools/mp4/construct_mp4_parser.py b/mapillary_tools/mp4/construct_mp4_parser.py index 8ca1454ba..6391e6846 100644 --- a/mapillary_tools/mp4/construct_mp4_parser.py +++ b/mapillary_tools/mp4/construct_mp4_parser.py @@ -610,9 +610,9 @@ def find_box_at_path( return box box_data = T.cast(T.Sequence[BoxDict], box["data"]) # ListContainer from construct is not sequence - assert isinstance( - box_data, T.Sequence - ), f"expect a list of boxes but got {type(box_data)} at path {path}" + assert isinstance(box_data, T.Sequence), ( + f"expect a list of boxes but got {type(box_data)} at path {path}" + ) found = find_box_at_path(box_data, path[1:]) if found is not None: return found diff --git a/mapillary_tools/mp4/mp4_sample_parser.py b/mapillary_tools/mp4/mp4_sample_parser.py index 1cebd682b..6f5afdd46 100644 --- a/mapillary_tools/mp4/mp4_sample_parser.py +++ b/mapillary_tools/mp4/mp4_sample_parser.py @@ -59,9 +59,9 @@ def _extract_raw_samples( if not chunk_entries: return - assert len(sizes) <= len( - timedeltas - ), f"got less ({len(timedeltas)}) sample time deltas (stts) than expected ({len(sizes)})" + assert len(sizes) <= len(timedeltas), ( + f"got less ({len(timedeltas)}) sample time deltas (stts) than expected ({len(sizes)})" + ) sample_idx = 0 chunk_idx = 0 diff --git a/mapillary_tools/mp4/simple_mp4_builder.py b/mapillary_tools/mp4/simple_mp4_builder.py index 632aecca0..b93a3ef29 100644 --- a/mapillary_tools/mp4/simple_mp4_builder.py +++ b/mapillary_tools/mp4/simple_mp4_builder.py @@ -404,8 +404,8 @@ def _rewrite_and_build_moov_typed_data( for box in _filter_trak_boxes(moov_children): sample_offset = _update_sbtl_sample_offsets(box, sample_offset) moov_typed_data = _build_moov_typed_data(moov_children) - assert ( - len(moov_typed_data) == moov_typed_data_size - ), f"{len(moov_typed_data)} != {moov_typed_data_size}" + assert len(moov_typed_data) == moov_typed_data_size, ( + f"{len(moov_typed_data)} != {moov_typed_data_size}" + ) return moov_typed_data diff --git a/mapillary_tools/process_geotag_properties.py b/mapillary_tools/process_geotag_properties.py index 58805b1f7..8a007fbe1 100644 --- a/mapillary_tools/process_geotag_properties.py +++ b/mapillary_tools/process_geotag_properties.py @@ -245,9 +245,9 @@ def process_geotag_properties( metadatas.extend(video_metadata) # filenames should be deduplicated in utils.find_images/utils.find_videos - assert len(metadatas) == len( - set(metadata.filename for metadata in metadatas) - ), "duplicate filenames found" + assert len(metadatas) == len(set(metadata.filename for metadata in metadatas)), ( + "duplicate filenames found" + ) return metadatas diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index 2ed7049ad..27ca1a4cc 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -209,9 +209,9 @@ def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None: gidx = gidx + len(group) for cur, nxt in geo.pairwise(sequence): - assert ( - cur.time <= nxt.time - ), f"sequence must be sorted but got {cur.time} > {nxt.time}" + assert cur.time <= nxt.time, ( + f"sequence must be sorted but got {cur.time} > {nxt.time}" + ) def _parse_filesize_in_bytes(filesize_str: str) -> int: @@ -335,9 +335,9 @@ def process_sequence_properties( results = error_metadatas + image_metadatas + video_metadatas - assert len(metadatas) == len( - results - ), f"expected {len(metadatas)} results but got {len(results)}" + assert len(metadatas) == len(results), ( + f"expected {len(metadatas)} results but got {len(results)}" + ) assert sequence_idx == len( set(metadata.MAPSequenceUUID for metadata in image_metadatas) ) diff --git a/mapillary_tools/sample_video.py b/mapillary_tools/sample_video.py index eb0217063..89b9de9ba 100644 --- a/mapillary_tools/sample_video.py +++ b/mapillary_tools/sample_video.py @@ -118,9 +118,9 @@ def sample_video( start_time=video_start_time_dt, ) else: - assert ( - 0 < video_sample_interval - ), "expect positive video_sample_interval but got {video_sample_interval}" + assert 0 < video_sample_interval, ( + "expect positive video_sample_interval but got {video_sample_interval}" + ) _sample_single_video_by_interval( video_path, sample_dir, @@ -339,9 +339,9 @@ def _sample_single_video_by_distance( f"Expect {len(sorted_sample_indices)} samples but extracted {len(frame_samples)} samples" ) for idx, (frame_idx_1based, sample_paths) in enumerate(frame_samples): - assert ( - len(sample_paths) == 1 - ), "Expect 1 sample path at {frame_idx_1based} but got {sample_paths}" + assert len(sample_paths) == 1, ( + "Expect 1 sample path at {frame_idx_1based} but got {sample_paths}" + ) if idx + 1 != frame_idx_1based: raise exceptions.MapillaryVideoError( f"Expect {sample_paths[0]} to be {idx + 1}th sample but got {frame_idx_1based}" @@ -352,9 +352,9 @@ def _sample_single_video_by_distance( continue video_sample, interp = sample_points_by_frame_idx[sample_idx] - assert ( - interp.time == video_sample.exact_composition_time - ), f"interpolated time {interp.time} should match the video sample time {video_sample.exact_composition_time}" + assert interp.time == video_sample.exact_composition_time, ( + f"interpolated time {interp.time} should match the video sample time {video_sample.exact_composition_time}" + ) timestamp = start_time + datetime.timedelta(seconds=interp.time) exif_edit = ExifEdit(sample_paths[0]) diff --git a/mapillary_tools/upload_api_v4.py b/mapillary_tools/upload_api_v4.py index fc1b1d4f5..fe56e8f8d 100644 --- a/mapillary_tools/upload_api_v4.py +++ b/mapillary_tools/upload_api_v4.py @@ -153,9 +153,9 @@ def upload( if not chunk: break - assert ( - offset == self.entity_size - ), f"Offset ends at {offset} but the entity size is {self.entity_size}" + assert offset == self.entity_size, ( + f"Offset ends at {offset} but the entity size is {self.entity_size}" + ) payload = resp.json() try: From d79823877b0d553867135b2659811533e4874093 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 5 Feb 2025 13:15:33 +0100 Subject: [PATCH 09/13] use relative import --- mapillary_tools/geotag/geotag_images_from_exif.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mapillary_tools/geotag/geotag_images_from_exif.py b/mapillary_tools/geotag/geotag_images_from_exif.py index 515091c55..4e46f2198 100644 --- a/mapillary_tools/geotag/geotag_images_from_exif.py +++ b/mapillary_tools/geotag/geotag_images_from_exif.py @@ -4,11 +4,9 @@ from multiprocessing import Pool from pathlib import Path -from mapillary_tools import utils - from tqdm import tqdm -from .. import exceptions, exif_write, geo, types +from .. import exceptions, exif_write, geo, types, utils from ..exif_read import ExifRead, ExifReadABC from .geotag_from_generic import GeotagImagesFromGeneric From ae5397d5897fa32eec21f0d96047ec2e25601dfd Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 5 Feb 2025 13:20:15 +0100 Subject: [PATCH 10/13] fix types --- mapillary_tools/process_geotag_properties.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapillary_tools/process_geotag_properties.py b/mapillary_tools/process_geotag_properties.py index 8a007fbe1..054a8cbd3 100644 --- a/mapillary_tools/process_geotag_properties.py +++ b/mapillary_tools/process_geotag_properties.py @@ -420,7 +420,7 @@ def _show_stats_per_filetype( error_metadatas.append(metadata) else: good_metadatas.append(metadata) - filesize_to_upload += metadata.filesize + filesize_to_upload += metadata.filesize or 0 LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value) if good_metadatas: From 2d9c61fe82a68815e2798900389f0a56b199f1f8 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 5 Feb 2025 13:26:50 +0100 Subject: [PATCH 11/13] uv run -m mapillary_tools.types > schema/image_description_schema.json --- schema/image_description_schema.json | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/schema/image_description_schema.json b/schema/image_description_schema.json index e9a134613..2415e3ffa 100644 --- a/schema/image_description_schema.json +++ b/schema/image_description_schema.json @@ -57,6 +57,13 @@ ], "description": "MD5 checksum of the video content. If not provided, the uploader will compute it" }, + "filesize": { + "type": [ + "number", + "null" + ], + "description": "File size" + }, "filetype": { "type": "string", "enum": [ @@ -66,10 +73,6 @@ "video" ], "description": "The video file type" - }, - "filesize": { - "type": "number", - "description": "File size" } }, "required": [ @@ -158,16 +161,19 @@ ], "description": "MD5 checksum of the image content. If not provided, the uploader will compute it" }, + "filesize": { + "type": [ + "number", + "null" + ], + "description": "File size" + }, "filetype": { "type": "string", "enum": [ "image" ], "description": "The image file type" - }, - "filesize": { - "type": "number", - "description": "File size" } }, "required": [ From 1cb47d18a1ac3bfa4afc7477d07579d48e0b55f1 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 5 Feb 2025 13:35:52 +0100 Subject: [PATCH 12/13] fix tests --- tests/integration/test_process_and_upload.py | 6 ++++++ tests/unit/test_sequence_processing.py | 1 + 2 files changed, 7 insertions(+) diff --git a/tests/integration/test_process_and_upload.py b/tests/integration/test_process_and_upload.py index 5d74b3cf7..cde2d9407 100644 --- a/tests/integration/test_process_and_upload.py +++ b/tests/integration/test_process_and_upload.py @@ -68,6 +68,7 @@ "MAPLatitude": 33.1266719, "MAPLongitude": -117.3273063, "MAPOrientation": 1, + "filesize": 116432, "filetype": "image", }, "mly_tools_724084a74a44eebd025d0d97a1d5aa30_NA_000002.jpg": { @@ -79,6 +80,7 @@ "MAPLatitude": 33.1266891, "MAPLongitude": -117.3273151, "MAPOrientation": 1, + "filesize": 125847, "filetype": "image", }, "mly_tools_724084a74a44eebd025d0d97a1d5aa30_NA_000003.jpg": { @@ -90,6 +92,7 @@ "MAPLatitude": 33.1267078, "MAPLongitude": -117.3273264, "MAPOrientation": 1, + "filesize": 128344, "filetype": "image", }, "mly_tools_724084a74a44eebd025d0d97a1d5aa30_NA_000004.jpg": { @@ -101,6 +104,7 @@ "MAPLatitude": 33.1267282, "MAPLongitude": -117.3273391, "MAPOrientation": 1, + "filesize": 126391, "filetype": "image", }, "mly_tools_724084a74a44eebd025d0d97a1d5aa30_NA_000005.jpg": { @@ -112,6 +116,7 @@ "MAPLatitude": 33.12675, "MAPLongitude": -117.3273483, "MAPOrientation": 1, + "filesize": 128585, "filetype": "image", }, "mly_tools_724084a74a44eebd025d0d97a1d5aa30_NA_000006.jpg": { @@ -123,6 +128,7 @@ "MAPLatitude": 33.1267663, "MAPLongitude": -117.3273595, "MAPOrientation": 1, + "filesize": 132640, "filetype": "image", }, }, diff --git a/tests/unit/test_sequence_processing.py b/tests/unit/test_sequence_processing.py index 053ee2fbe..79e35db70 100644 --- a/tests/unit/test_sequence_processing.py +++ b/tests/unit/test_sequence_processing.py @@ -374,6 +374,7 @@ def test_process_finalize(setup_data): { "filename": str(test_exif), "filetype": "image", + "filesize": None, "MAPLatitude": 1, "MAPLongitude": 1, "MAPCaptureTime": "1970_01_01_00_00_02_000", From c15a0169ff7ed649e50df5047df3eb2773c577f3 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 5 Feb 2025 14:00:37 +0100 Subject: [PATCH 13/13] fix tests again --- tests/integration/fixtures.py | 15 ++++++++------- tests/integration/test_process_and_upload.py | 9 ++------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index 52b8d9c36..5d19b10dd 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -156,13 +156,14 @@ def run_exiftool_and_generate_geotag_args( def validate_and_extract_image(image_path: str): with open(image_path, "rb") as fp: tags = exifread.process_file(fp) - desc_tag = tags.get("Image ImageDescription") - assert desc_tag is not None, (tags, image_path) - desc = json.loads(str(desc_tag.values)) - desc["filename"] = image_path - desc["filetype"] = "image" - jsonschema.validate(desc, image_description_schema) - return desc + + desc_tag = tags.get("Image ImageDescription") + assert desc_tag is not None, (tags, image_path) + desc = json.loads(str(desc_tag.values)) + desc["filename"] = image_path + desc["filetype"] = "image" + jsonschema.validate(desc, image_description_schema) + return desc def validate_and_extract_zip(zip_path: str) -> T.List[T.Dict]: diff --git a/tests/integration/test_process_and_upload.py b/tests/integration/test_process_and_upload.py index cde2d9407..ccf7d6633 100644 --- a/tests/integration/test_process_and_upload.py +++ b/tests/integration/test_process_and_upload.py @@ -68,7 +68,6 @@ "MAPLatitude": 33.1266719, "MAPLongitude": -117.3273063, "MAPOrientation": 1, - "filesize": 116432, "filetype": "image", }, "mly_tools_724084a74a44eebd025d0d97a1d5aa30_NA_000002.jpg": { @@ -80,7 +79,6 @@ "MAPLatitude": 33.1266891, "MAPLongitude": -117.3273151, "MAPOrientation": 1, - "filesize": 125847, "filetype": "image", }, "mly_tools_724084a74a44eebd025d0d97a1d5aa30_NA_000003.jpg": { @@ -92,7 +90,6 @@ "MAPLatitude": 33.1267078, "MAPLongitude": -117.3273264, "MAPOrientation": 1, - "filesize": 128344, "filetype": "image", }, "mly_tools_724084a74a44eebd025d0d97a1d5aa30_NA_000004.jpg": { @@ -104,7 +101,6 @@ "MAPLatitude": 33.1267282, "MAPLongitude": -117.3273391, "MAPOrientation": 1, - "filesize": 126391, "filetype": "image", }, "mly_tools_724084a74a44eebd025d0d97a1d5aa30_NA_000005.jpg": { @@ -116,7 +112,6 @@ "MAPLatitude": 33.12675, "MAPLongitude": -117.3273483, "MAPOrientation": 1, - "filesize": 128585, "filetype": "image", }, "mly_tools_724084a74a44eebd025d0d97a1d5aa30_NA_000006.jpg": { @@ -128,7 +123,6 @@ "MAPLatitude": 33.1267663, "MAPLongitude": -117.3273595, "MAPOrientation": 1, - "filesize": 132640, "filetype": "image", }, }, @@ -149,7 +143,8 @@ def _validate_output(upload_dir: py.path.local, expected): actual[os.path.basename(desc["filename"])] = { k: v for k, v in desc.items() - if k not in ["filename", "md5sum", "MAPMetaTags", "MAPSequenceUUID"] + if k + not in ["filename", "filesize", "md5sum", "MAPMetaTags", "MAPSequenceUUID"] } assert expected == actual