diff --git a/NAMESPACE b/NAMESPACE index 162e4e1..63cea0f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(fly_coverage) +export(fly_fetch) export(fly_filter) export(fly_footprint) export(fly_overlap) diff --git a/R/fly_fetch.R b/R/fly_fetch.R new file mode 100644 index 0000000..abf26db --- /dev/null +++ b/R/fly_fetch.R @@ -0,0 +1,97 @@ +#' Download airphoto files from BC Data Catalogue URLs +#' +#' Downloads thumbnail images, flight logs, camera calibration reports, or +#' geo-referencing files for selected airphotos. The URL columns must be +#' present in the input data (available from the full BC Data Catalogue +#' centroid layer). +#' +#' @param photos_sf An sf object with airphoto metadata, typically output +#' from [fly_select()] or [fly_filter()]. Must contain the relevant URL +#' column for the requested `type`. +#' @param type File type to download. One of `"thumbnail"`, +#' `"flight_log"`, `"calibration"`, or `"georef"`. +#' @param dest_dir Directory to save downloaded files. Created if it does +#' not exist. +#' @param overwrite If `FALSE` (default), skip files that already exist +#' in `dest_dir`. +#' @return A tibble with columns `airp_id`, `url`, `dest`, and `success`. +#' +#' @details +#' URL column mapping: +#' \itemize{ +#' \item `"thumbnail"` → `thumbnail_image_url` +#' \item `"flight_log"` → `flight_log_url` +#' \item `"calibration"` → `camera_calibration_url` +#' \item `"georef"` → `patb_georef_url` +#' } +#' +#' Photos with missing (`NA` or empty) URLs are skipped and reported as +#' `success = FALSE` in the output. +#' +#' @examples +#' centroids <- sf::st_read(system.file("testdata/photo_centroids.gpkg", package = "fly")) +#' +#' # Download thumbnails for first 2 photos +#' result <- fly_fetch(centroids[1:2, ], type = "thumbnail", +#' dest_dir = tempdir()) +#' result +#' +#' @export +fly_fetch <- function(photos_sf, type = "thumbnail", + dest_dir = "photos", overwrite = FALSE) { + type <- match.arg(type, c("thumbnail", "flight_log", "calibration", "georef")) + + url_col <- switch(type, + thumbnail = "thumbnail_image_url", + flight_log = "flight_log_url", + calibration = "camera_calibration_url", + georef = "patb_georef_url" + ) + + if (!url_col %in% names(photos_sf)) { + stop("Column `", url_col, "` not found in input data. ", + "Use full BC Data Catalogue centroid data to get URL columns.", + call. = FALSE) + } + + dir.create(dest_dir, recursive = TRUE, showWarnings = FALSE) + + urls <- photos_sf[[url_col]] + ids <- if ("airp_id" %in% names(photos_sf)) { + photos_sf[["airp_id"]] + } else { + seq_len(nrow(photos_sf)) + } + + results <- dplyr::tibble( + airp_id = ids, + url = urls, + dest = NA_character_, + success = FALSE + ) + + for (i in seq_len(nrow(results))) { + u <- results$url[i] + if (is.na(u) || u == "") next + + dest_file <- file.path(dest_dir, basename(u)) + results$dest[i] <- dest_file + + if (!overwrite && file.exists(dest_file)) { + results$success[i] <- TRUE + next + } + + results$success[i] <- tryCatch({ + utils::download.file(u, dest_file, mode = "wb", quiet = TRUE) + file.exists(dest_file) && file.size(dest_file) > 0 + }, error = function(e) FALSE) + } + + n_ok <- sum(results$success) + n_skip <- sum(is.na(results$url) | results$url == "") + message("Downloaded ", n_ok, " of ", nrow(results), " files", + if (n_skip > 0) paste0(" (", n_skip, " skipped, no URL)") else "") + + results +} diff --git a/data-raw/make_testdata.R b/data-raw/make_testdata.R index 6b719a0..864b64b 100644 --- a/data-raw/make_testdata.R +++ b/data-raw/make_testdata.R @@ -6,14 +6,14 @@ # Crop near Houston, BC. # Dual-scale coverage: 1:12000 and 1:31680 (1968). # -# Source: airbc cached data (BC Data Catalogue + flooded VCA output) -# Run from airbc repo root: Rscript ../fly/data-raw/make_testdata.R +# Source: diggs cached data (BC Data Catalogue + flooded VCA output) +# Run from fly repo root: Rscript data-raw/make_testdata.R library(sf) library(dplyr) sf_use_s2(FALSE) -airbc_data <- file.path(dirname(getwd()), "airbc", "data") +airbc_data <- file.path(dirname(getwd()), "diggs", "data") outdir <- "inst/testdata" dir.create(outdir, recursive = TRUE, showWarnings = FALSE) @@ -57,7 +57,10 @@ test_photos <- bind_rows(sample_12, sample_31) # Keep essential columns only test_photos <- test_photos |> select(airp_id, photo_year, photo_date, scale, film_roll, - frame_number, media, photo_tag, nts_tile, geometry) + frame_number, media, photo_tag, nts_tile, + focal_length, flying_height, ground_sample_distance, + thumbnail_image_url, flight_log_url, + camera_calibration_url, patb_georef_url, geometry) st_write(test_photos, file.path(outdir, "photo_centroids.gpkg"), delete_dsn = TRUE, quiet = TRUE) message("photo_centroids.gpkg: ", nrow(test_photos), " photos (", diff --git a/inst/testdata/photo_centroids.gpkg b/inst/testdata/photo_centroids.gpkg index 842827b..3ccf698 100644 Binary files a/inst/testdata/photo_centroids.gpkg and b/inst/testdata/photo_centroids.gpkg differ diff --git a/man/fly_fetch.Rd b/man/fly_fetch.Rd new file mode 100644 index 0000000..9ab51c5 --- /dev/null +++ b/man/fly_fetch.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fly_fetch.R +\name{fly_fetch} +\alias{fly_fetch} +\title{Download airphoto files from BC Data Catalogue URLs} +\usage{ +fly_fetch( + photos_sf, + type = "thumbnail", + dest_dir = "photos", + overwrite = FALSE +) +} +\arguments{ +\item{photos_sf}{An sf object with airphoto metadata, typically output +from \code{\link[=fly_select]{fly_select()}} or \code{\link[=fly_filter]{fly_filter()}}. Must contain the relevant URL +column for the requested \code{type}.} + +\item{type}{File type to download. One of \code{"thumbnail"}, +\code{"flight_log"}, \code{"calibration"}, or \code{"georef"}.} + +\item{dest_dir}{Directory to save downloaded files. Created if it does +not exist.} + +\item{overwrite}{If \code{FALSE} (default), skip files that already exist +in \code{dest_dir}.} +} +\value{ +A tibble with columns \code{airp_id}, \code{url}, \code{dest}, and \code{success}. +} +\description{ +Downloads thumbnail images, flight logs, camera calibration reports, or +geo-referencing files for selected airphotos. The URL columns must be +present in the input data (available from the full BC Data Catalogue +centroid layer). +} +\details{ +URL column mapping: +\itemize{ +\item \code{"thumbnail"} → \code{thumbnail_image_url} +\item \code{"flight_log"} → \code{flight_log_url} +\item \code{"calibration"} → \code{camera_calibration_url} +\item \code{"georef"} → \code{patb_georef_url} +} + +Photos with missing (\code{NA} or empty) URLs are skipped and reported as +\code{success = FALSE} in the output. +} +\examples{ +centroids <- sf::st_read(system.file("testdata/photo_centroids.gpkg", package = "fly")) + +# Download thumbnails for first 2 photos +result <- fly_fetch(centroids[1:2, ], type = "thumbnail", + dest_dir = tempdir()) +result + +} diff --git a/tests/testthat/test-fly_fetch.R b/tests/testthat/test-fly_fetch.R new file mode 100644 index 0000000..3ade005 --- /dev/null +++ b/tests/testthat/test-fly_fetch.R @@ -0,0 +1,71 @@ +test_that("fly_fetch returns expected columns", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + result <- fly_fetch(centroids[1, ], type = "thumbnail", + dest_dir = tempdir()) + expect_s3_class(result, "tbl_df") + expect_true(all(c("airp_id", "url", "dest", "success") %in% names(result))) +}) + +test_that("fly_fetch downloads thumbnail files", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + dest <- file.path(tempdir(), "fly_test_thumbs") + unlink(dest, recursive = TRUE) + + result <- fly_fetch(centroids[1:2, ], type = "thumbnail", + dest_dir = dest) + expect_equal(nrow(result), 2) + # Files should exist on disk + downloaded <- result[result$success, ] + expect_true(all(file.exists(downloaded$dest))) +}) + +test_that("fly_fetch skips existing files when overwrite is FALSE", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + dest <- file.path(tempdir(), "fly_test_nooverwrite") + unlink(dest, recursive = TRUE) + + # Download once + fly_fetch(centroids[1, ], type = "thumbnail", dest_dir = dest) + # Get file modification time + f <- list.files(dest, full.names = TRUE)[1] + mtime1 <- file.mtime(f) + Sys.sleep(1) + + # Download again without overwrite + fly_fetch(centroids[1, ], type = "thumbnail", + dest_dir = dest, overwrite = FALSE) + mtime2 <- file.mtime(f) + expect_equal(mtime1, mtime2) +}) + +test_that("fly_fetch handles missing URL column", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + centroids$thumbnail_image_url <- NULL + expect_error(fly_fetch(centroids, type = "thumbnail"), + "not found in input data") +}) + +test_that("fly_fetch handles NA URLs gracefully", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + centroids$thumbnail_image_url[1] <- NA + result <- fly_fetch(centroids[1, ], type = "thumbnail", + dest_dir = tempdir()) + expect_false(result$success[1]) +}) + +test_that("fly_fetch rejects invalid type", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + expect_error(fly_fetch(centroids, type = "bogus")) +}) + +test_that("fly_fetch maps type to correct URL column", { + centroids <- sf::st_read(testdata_path("photo_centroids.gpkg"), quiet = TRUE) + dest <- file.path(tempdir(), "fly_test_flight_log") + unlink(dest, recursive = TRUE) + + result <- fly_fetch(centroids[1, ], type = "flight_log", + dest_dir = dest) + expect_s3_class(result, "tbl_df") + # Should use flight_log_url column + expect_equal(result$url, centroids$flight_log_url[1]) +})