From 68ed29eab8e8ecf86e0a3912fac2cc916c6e98f0 Mon Sep 17 00:00:00 2001 From: almac2022 Date: Tue, 10 Mar 2026 16:32:18 -0700 Subject: [PATCH] Add fly_fetch() and URL columns to test data Include thumbnail_image_url, flight_log_url, camera_calibration_url, patb_georef_url, focal_length, flying_height, and ground_sample_distance in bundled test data. Add fly_fetch() to download files by type. Fixes #15 Co-Authored-By: Claude Opus 4.6 --- NAMESPACE | 1 + R/fly_fetch.R | 97 +++++++++++++++++++++++++++++ data-raw/make_testdata.R | 11 ++-- inst/testdata/photo_centroids.gpkg | Bin 98304 -> 106496 bytes man/fly_fetch.Rd | 57 +++++++++++++++++ tests/testthat/test-fly_fetch.R | 71 +++++++++++++++++++++ 6 files changed, 233 insertions(+), 4 deletions(-) create mode 100644 R/fly_fetch.R create mode 100644 man/fly_fetch.Rd create mode 100644 tests/testthat/test-fly_fetch.R diff --git a/NAMESPACE b/NAMESPACE index 162e4e1..63cea0f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(fly_coverage) +export(fly_fetch) export(fly_filter) export(fly_footprint) export(fly_overlap) diff --git a/R/fly_fetch.R b/R/fly_fetch.R new file mode 100644 index 0000000..abf26db --- /dev/null +++ b/R/fly_fetch.R @@ -0,0 +1,97 @@ +#' Download airphoto files from BC Data Catalogue URLs +#' +#' Downloads thumbnail images, flight logs, camera calibration reports, or +#' geo-referencing files for selected airphotos. The URL columns must be +#' present in the input data (available from the full BC Data Catalogue +#' centroid layer). +#' +#' @param photos_sf An sf object with airphoto metadata, typically output +#' from [fly_select()] or [fly_filter()]. Must contain the relevant URL +#' column for the requested `type`. +#' @param type File type to download. One of `"thumbnail"`, +#' `"flight_log"`, `"calibration"`, or `"georef"`. +#' @param dest_dir Directory to save downloaded files. Created if it does +#' not exist. +#' @param overwrite If `FALSE` (default), skip files that already exist +#' in `dest_dir`. +#' @return A tibble with columns `airp_id`, `url`, `dest`, and `success`. +#' +#' @details +#' URL column mapping: +#' \itemize{ +#' \item `"thumbnail"` → `thumbnail_image_url` +#' \item `"flight_log"` → `flight_log_url` +#' \item `"calibration"` → `camera_calibration_url` +#' \item `"georef"` → `patb_georef_url` +#' } +#' +#' Photos with missing (`NA` or empty) URLs are skipped and reported as +#' `success = FALSE` in the output. +#' +#' @examples +#' centroids <- sf::st_read(system.file("testdata/photo_centroids.gpkg", package = "fly")) +#' +#' # Download thumbnails for first 2 photos +#' result <- fly_fetch(centroids[1:2, ], type = "thumbnail", +#' dest_dir = tempdir()) +#' result +#' +#' @export +fly_fetch <- function(photos_sf, type = "thumbnail", + dest_dir = "photos", overwrite = FALSE) { + type <- match.arg(type, c("thumbnail", "flight_log", "calibration", "georef")) + + url_col <- switch(type, + thumbnail = "thumbnail_image_url", + flight_log = "flight_log_url", + calibration = "camera_calibration_url", + georef = "patb_georef_url" + ) + + if (!url_col %in% names(photos_sf)) { + stop("Column `", url_col, "` not found in input data. ", + "Use full BC Data Catalogue centroid data to get URL columns.", + call. = FALSE) + } + + dir.create(dest_dir, recursive = TRUE, showWarnings = FALSE) + + urls <- photos_sf[[url_col]] + ids <- if ("airp_id" %in% names(photos_sf)) { + photos_sf[["airp_id"]] + } else { + seq_len(nrow(photos_sf)) + } + + results <- dplyr::tibble( + airp_id = ids, + url = urls, + dest = NA_character_, + success = FALSE + ) + + for (i in seq_len(nrow(results))) { + u <- results$url[i] + if (is.na(u) || u == "") next + + dest_file <- file.path(dest_dir, basename(u)) + results$dest[i] <- dest_file + + if (!overwrite && file.exists(dest_file)) { + results$success[i] <- TRUE + next + } + + results$success[i] <- tryCatch({ + utils::download.file(u, dest_file, mode = "wb", quiet = TRUE) + file.exists(dest_file) && file.size(dest_file) > 0 + }, error = function(e) FALSE) + } + + n_ok <- sum(results$success) + n_skip <- sum(is.na(results$url) | results$url == "") + message("Downloaded ", n_ok, " of ", nrow(results), " files", + if (n_skip > 0) paste0(" (", n_skip, " skipped, no URL)") else "") + + results +} diff --git a/data-raw/make_testdata.R b/data-raw/make_testdata.R index 6b719a0..864b64b 100644 --- a/data-raw/make_testdata.R +++ b/data-raw/make_testdata.R @@ -6,14 +6,14 @@ # Crop near Houston, BC. # Dual-scale coverage: 1:12000 and 1:31680 (1968). # -# Source: airbc cached data (BC Data Catalogue + flooded VCA output) -# Run from airbc repo root: Rscript ../fly/data-raw/make_testdata.R +# Source: diggs cached data (BC Data Catalogue + flooded VCA output) +# Run from fly repo root: Rscript data-raw/make_testdata.R library(sf) library(dplyr) sf_use_s2(FALSE) -airbc_data <- file.path(dirname(getwd()), "airbc", "data") +airbc_data <- file.path(dirname(getwd()), "diggs", "data") outdir <- "inst/testdata" dir.create(outdir, recursive = TRUE, showWarnings = FALSE) @@ -57,7 +57,10 @@ test_photos <- bind_rows(sample_12, sample_31) # Keep essential columns only test_photos <- test_photos |> select(airp_id, photo_year, photo_date, scale, film_roll, - frame_number, media, photo_tag, nts_tile, geometry) + frame_number, media, photo_tag, nts_tile, + focal_length, flying_height, ground_sample_distance, + thumbnail_image_url, flight_log_url, + camera_calibration_url, patb_georef_url, geometry) st_write(test_photos, file.path(outdir, "photo_centroids.gpkg"), delete_dsn = TRUE, quiet = TRUE) message("photo_centroids.gpkg: ", nrow(test_photos), " photos (", diff --git a/inst/testdata/photo_centroids.gpkg b/inst/testdata/photo_centroids.gpkg index 842827b0f747ad35436e9576bcb7a342db9d83dc..3ccf69849ab3b78dba78a88ce3416cedf9e1d60a 100644 GIT binary patch delta 6717 zcmeI1NlX(_7{}*LOV{bbW?2jrBnCt~OQG9^i;!R}#8{07!#FL|MW&sm3z3AaQSmBp zXiN-OFJ4RyoQ%YH^8z-lmsH<~3h`@1O7c`hVYBUe+vc zYo2$5j#c|4=y;Qy!Yw2bpeWyv$43$l_mHRX6(8Kjv3JI>_KBun{YABm4k3GHwQ0uq z32_@8hNt=_gxK2eWTM%4Fu}w)Iu>UlES*hpz4pN9y;5i~ zCLQPaNX`2Y6K7KlEfB?mDJB!+6E(|8CKIG1ET3Y-C+`Ou1a*MrP}Hc!a)@0tUNT(K zlRA%fL^IYfp}wn{Lc<`AWLH=HZMm$q6&X{ZJ)P~KyQdvRkz8Z06G8I7^x*aq^1`+g z&18~kACB`$HW6o%X-9;gbp%6>5Q7(Yl*UP~%Yz3)!%l)MKWKvV(#2KB!=$jWvmyxF z4)Xj=IaG@0I65gvbU9s#M~ZQBvz7`gTEiMJfPl#AIs2DRScy_>f~K4<8CivRMb@mj z!dk2ArlF+kf3dnstS*{xkutKn=#s8k6P(p%l*CF6)q@p-vp$l@>Z}K=5zhKq8mmXn z+N$Khtf9hMTU$P?SX+kff3GddLy7q+JZOq?%ciRsuiOXpaMqWSSV^~hx>m93;H*c| zSiN;&)xudHm?Y(ZQ@);6rK<+c`av2iHC&H;ZGf}(nTxvWSI$*iq`NM2LPP)wIo*3Er-uC7d@fa2LyZDY~8d1wYATR zWD$$jY&NwjZWr#%6}L-NoQ~pl2}*Ih@D8WA-9Mc1|9V2Na%GC!Rot%Pb``g)xLsM? zZr8EfElxg6g6CCZcEqAHTTN}4z0hd2V!{l!rSfl9J8R^1lIC|VLbfxT^Td5M!&!Hv zu@1{O52<2pg0qt4+4wnUxWn?z#Hv^u;jC-YSSk6=Y{#sdcHq5#>OSTsXcO;eHp+K6 XTf!X<2Ec{<+eL59EL@-f75>E5SUPO%lkRmup)CL3*qtH4y_-Bz$gG_H=yiLT<};85H|S}LNWkW~b!FOcGI{qP>&;XUtM@;qrX454Bq3!&rHxrCK; zbE$M+cXwaXji*1j{-1<%9x**5CKsfr_*?LJm%AaCByMczPh3X-s9x)j#bSUK_h1|v z=-)la`ZwignHOfZC+}Xh@2e5HIqN8eHF`v$%^!T;1!%;%*w#yeCiWrYkKY~eYEMXR zbQ~zO`{XqashG)eTu@oytdiAFZpl~DV4j#$M%mafI`u1kNmsQ)ZB%_!3u?P^p)B~d zc1j`npuvp_d~0(Pgj<_vZ$Eq@`2b_d0kk^CqJwC)j75gf?Dw|w^Nr@MD)E^30gxkk zQngC<8;`3dW9tH%!Pv5hrZcu!gAJ`}mByU()|giro0icO#wHXrnXxeyO=4_BL#tsd zqoaw8xdxiRSeJ>$GuA=SfU&RzxlL@34JT Wj?!QI@Bl%2Qi3iBoZXk81^xj8Qjt#p diff --git a/man/fly_fetch.Rd b/man/fly_fetch.Rd new file mode 100644 index 0000000..9ab51c5 --- /dev/null +++ b/man/fly_fetch.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fly_fetch.R +\name{fly_fetch} +\alias{fly_fetch} +\title{Download airphoto files from BC Data Catalogue URLs} +\usage{ +fly_fetch( + photos_sf, + type = "thumbnail", + dest_dir = "photos", + overwrite = FALSE +) +} +\arguments{ +\item{photos_sf}{An sf object with airphoto metadata, typically output +from \code{\link[=fly_select]{fly_select()}} or \code{\link[=fly_filter]{fly_filter()}}. Must contain the relevant URL +column for the requested \code{type}.} + +\item{type}{File type to download. One of \code{"thumbnail"}, +\code{"flight_log"}, \code{"calibration"}, or \code{"georef"}.} + +\item{dest_dir}{Directory to save downloaded files. Created if it does +not exist.} + +\item{overwrite}{If \code{FALSE} (default), skip files that already exist +in \code{dest_dir}.} +} +\value{ +A tibble with columns \code{airp_id}, \code{url}, \code{dest}, and \code{success}. +} +\description{ +Downloads thumbnail images, flight logs, camera calibration reports, or +geo-referencing files for selected airphotos. The URL columns must be +present in the input data (available from the full BC Data Catalogue +centroid layer). +} +\details{ +URL column mapping: +\itemize{ +\item \code{"thumbnail"} → \code{thumbnail_image_url} +\item \code{"flight_log"} → \code{flight_log_url} +\item \code{"calibration"} → \code{camera_calibration_url} +\item \code{"georef"} → \code{patb_georef_url} +} + +Photos with missing (\code{NA} or empty) URLs are skipped and reported as +\code{success = FALSE} in the output. +} +\examples{ +centroids <- sf::st_read(system.file("testdata/photo_centroids.gpkg", package = "fly")) + +# Download thumbnails for first 2 photos +result <- fly_fetch(centroids[1:2, ], type = "thumbnail", + dest_dir = tempdir()) +result + +} diff --git a/tests/testthat/test-fly_fetch.R b/tests/testthat/test-fly_fetch.R new file mode 100644 index 0000000..3ade005 --- /dev/null +++ b/tests/testthat/test-fly_fetch.R @@ -0,0 +1,71 @@ +test_that("fly_fetch returns expected columns", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + result <- fly_fetch(centroids[1, ], type = "thumbnail", + dest_dir = tempdir()) + expect_s3_class(result, "tbl_df") + expect_true(all(c("airp_id", "url", "dest", "success") %in% names(result))) +}) + +test_that("fly_fetch downloads thumbnail files", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + dest <- file.path(tempdir(), "fly_test_thumbs") + unlink(dest, recursive = TRUE) + + result <- fly_fetch(centroids[1:2, ], type = "thumbnail", + dest_dir = dest) + expect_equal(nrow(result), 2) + # Files should exist on disk + downloaded <- result[result$success, ] + expect_true(all(file.exists(downloaded$dest))) +}) + +test_that("fly_fetch skips existing files when overwrite is FALSE", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + dest <- file.path(tempdir(), "fly_test_nooverwrite") + unlink(dest, recursive = TRUE) + + # Download once + fly_fetch(centroids[1, ], type = "thumbnail", dest_dir = dest) + # Get file modification time + f <- list.files(dest, full.names = TRUE)[1] + mtime1 <- file.mtime(f) + Sys.sleep(1) + + # Download again without overwrite + fly_fetch(centroids[1, ], type = "thumbnail", + dest_dir = dest, overwrite = FALSE) + mtime2 <- file.mtime(f) + expect_equal(mtime1, mtime2) +}) + +test_that("fly_fetch handles missing URL column", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + centroids$thumbnail_image_url <- NULL + expect_error(fly_fetch(centroids, type = "thumbnail"), + "not found in input data") +}) + +test_that("fly_fetch handles NA URLs gracefully", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + centroids$thumbnail_image_url[1] <- NA + result <- fly_fetch(centroids[1, ], type = "thumbnail", + dest_dir = tempdir()) + expect_false(result$success[1]) +}) + +test_that("fly_fetch rejects invalid type", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + expect_error(fly_fetch(centroids, type = "bogus")) +}) + +test_that("fly_fetch maps type to correct URL column", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + dest <- file.path(tempdir(), "fly_test_flight_log") + unlink(dest, recursive = TRUE) + + result <- fly_fetch(centroids[1, ], type = "flight_log", + dest_dir = dest) + expect_s3_class(result, "tbl_df") + # Should use flight_log_url column + expect_equal(result$url, centroids$flight_log_url[1]) +})