Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

### Ads Client
* HTTP cache TTL is now resolved by priority — explicit per-request TTL (if any) wins, otherwise the response's `Cache-Control: max-age` is used, otherwise the configured `default_ttl`. The resolved TTL is capped at 7 days as a safety net against misconfigured server values. Previously the layer took the minimum of all three, which effectively ignored the server's `max-age` signal.
* Callback requests (`recordImpression`, `recordClick`, `reportAd`) now retry once after a short delay on transient transport-level failures (timeouts, connection resets, DNS), to better survive flaky mobile networks. HTTP status errors are not retried.

### Remote Settings
* Add uptake telemetry support ([#7288](https://github.com/mozilla/application-services/pull/7288))
Expand Down
70 changes: 69 additions & 1 deletion components/ads-client/src/mars/transport.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/

use std::hash::Hash;
use std::time::Duration;

use viaduct::{Client, ClientSettings, Request, Response};

Expand All @@ -16,6 +17,26 @@ use crate::{

const OHTTP_CHANNEL_ID: &str = "ads-client";

/// Wait before the single retry attempt for fire-and-forget callback
/// requests. Short enough to feel responsive on mobile, long enough to
/// let a transient blip recover.
const CALLBACK_RETRY_DELAY: Duration = Duration::from_millis(500);

/// Run `op` and, if it returns `Err`, sleep for `delay` and run it once
/// more. The second result is returned as-is.
fn retry_once<F, T, E>(delay: Duration, mut op: F) -> Result<T, E>
where
F: FnMut() -> Result<T, E>,
{
match op() {
Ok(v) => Ok(v),
Err(_) => {
std::thread::sleep(delay);
op()
}
}
}

pub struct MARSTransport<T: Telemetry> {
http_cache: Option<HttpCache>,
telemetry: T,
Expand All @@ -38,7 +59,11 @@ impl<T: Telemetry> MARSTransport<T> {

pub fn fire(&self, request: Request, ohttp: bool) -> Result<(), TransportError> {
let client = Self::client_for(ohttp)?;
let response = client.send_sync(request)?;
// Callback requests (impression/click/report) are fire-and-forget on
// unreliable mobile networks. Retry once on a transient viaduct error
// (timeout, connection reset, DNS) before giving up. HTTP status
// errors are not retried — those are decided after this call returns.
let response = retry_once(CALLBACK_RETRY_DELAY, || client.send_sync(request.clone()))?;
HTTPError::check(&response)?;
Ok(())
}
Expand Down Expand Up @@ -83,3 +108,46 @@ impl<T: Telemetry> MARSTransport<T> {
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use std::cell::Cell;

#[test]
fn retry_once_returns_first_success_without_calling_again() {
let calls = Cell::new(0);
let result: Result<&'static str, ()> = retry_once(Duration::ZERO, || {
calls.set(calls.get() + 1);
Ok("ok")
});
assert_eq!(result, Ok("ok"));
assert_eq!(calls.get(), 1);
}

#[test]
fn retry_once_retries_after_failure_and_returns_success() {
let calls = Cell::new(0);
let result: Result<&'static str, &'static str> = retry_once(Duration::ZERO, || {
calls.set(calls.get() + 1);
if calls.get() == 1 {
Err("transient")
} else {
Ok("ok")
}
});
assert_eq!(result, Ok("ok"));
assert_eq!(calls.get(), 2);
}

#[test]
fn retry_once_gives_up_after_second_failure() {
let calls = Cell::new(0);
let result: Result<(), &'static str> = retry_once(Duration::ZERO, || {
calls.set(calls.get() + 1);
Err("still broken")
});
assert_eq!(result, Err("still broken"));
assert_eq!(calls.get(), 2);
}
}
Loading