Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion client/containers/Pub/PubHeader/Download.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ const Download = (props: Props) => {
communitySubdomain: communityData.subdomain,
format: type.format,
pubId: pubData.id,
isProd: locationData.isProd,
});
if (type.format === 'formatted') {
return download(formattedDownload.url);
Expand Down
3 changes: 0 additions & 3 deletions infra/.env.test
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@ MAILCHIMP_API_KEY=xxx
FIREBASE_SERVICE_ACCOUNT_BASE64=xxx

CLOUDAMQP_URL=xxx
ALGOLIA_ID=ooo
ALGOLIA_KEY=ooo
ALGOLIA_SEARCH_KEY=ooo
JWT_SIGNING_SECRET=shhhhhh
BYPASS_CAPTCHA=true
FIREBASE_TEST_DB_URL=http://localhost:9875?ns=pubpub-v6
Expand Down
12 changes: 6 additions & 6 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

97 changes: 80 additions & 17 deletions server/analytics/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,82 @@ import { contract } from 'utils/api/contract';

import { enqueue } from './writeBuffer';

// ─── Stitch dual-write (temporary for rollback safety) ──────────────────────

/** Fire-and-forget POST to the old Stitch/Redshift webhook so we can rollback if needed. */
function sendToStitch(payload: unknown) {
fetch(env.STITCH_WEBHOOK_URL, {
method: 'POST',
body: JSON.stringify(payload),
headers: { 'Content-Type': 'application/json' },
}).catch(() => {
// Silently swallow — Stitch is best-effort during the transition period.
});
}

const s = initServer();

const toEventRecord = (
payload: AnalyticsEventPayload,
enrichment: { country: string | null; countryCode: string | null },
) => {
// ─── validation helpers ──────────────────────────────────────────────────────

const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;

/** Coerce a value to a valid UUID or null. Prevents bad UUIDs from killing the entire bulkCreate batch. */
function sanitizeUuid(val: unknown): string | null {
return typeof val === 'string' && UUID_RE.test(val) ? val : null;
}

const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;

/** Returns true if the timestamp is within an acceptable range (not future, not >30 days old). */
function isTimestampValid(ts: number): boolean {
const now = Date.now();
return ts <= now && ts >= now - THIRTY_DAYS_MS;
}

// ─── fields to strip (no longer stored) ──────────────────────────────────────

const DROPPED_FIELDS = new Set([
'title',
'country',
'countryCode',
'isProd',
'communityName',
'communitySubdomain',
'pubTitle',
'pubSlug',
'collectionTitle',
'collectionSlug',
'collectionKind',
'collectionIds',
'primaryCollectionId',
'pageTitle',
'pageSlug',
]);

// ─── transform payload → DB record ──────────────────────────────────────────

const toEventRecord = (payload: AnalyticsEventPayload) => {
const raw = payload as Record<string, unknown>;
const { unique, collectionIds, timestamp, ...fields } = raw;
const { unique, timestamp, ...rest } = raw;

// Strip fields that are no longer stored in the table
const fields: Record<string, unknown> = {};
for (const [key, value] of Object.entries(rest)) {
if (!DROPPED_FIELDS.has(key)) {
fields[key] = value;
}
}

// Sanitize UUID fields to prevent a single bad value from failing the whole batch
fields.communityId = sanitizeUuid(fields.communityId);
fields.pubId = sanitizeUuid(fields.pubId);
fields.collectionId = sanitizeUuid(fields.collectionId);
fields.pageId = sanitizeUuid(fields.pageId);

return {
...fields,
...enrichment,
timestamp: new Date(timestamp as number),
createdAt: new Date(timestamp as number),
isUnique: (unique as boolean | undefined) ?? null,
collectionIds: typeof collectionIds === 'string' ? collectionIds.split(',') : null,
};
};

Expand All @@ -44,21 +105,23 @@ export const analyticsServer = s.router(contract.analytics, {
},
],
handler: async ({ body: payload }) => {
// Only record events on the production deployment (or in tests)
// to avoid polluting analytics with localhost / staging page views.
// Checked server-side so it can't be spoofed by clients.
if (!env.PUBPUB_PRODUCTION && env.NODE_ENV !== 'test') {
// Dual-write to Stitch/Redshift for rollback safety (temporary).
// Sent unconditionally (before validation) to match old behavior exactly:
// { country, countryCode, ...payload }
const { timezone } = payload;
const { name: country = null, id: countryCode = null } =
getCountryForTimezone(timezone) || {};
sendToStitch({ country, countryCode, ...payload });

// Reject events with unreasonable timestamps (future or >30 days old)
if (!isTimestampValid(payload.timestamp)) {
return {
status: 204,
body: undefined,
};
}

const { timezone } = payload;
const { name: country = null, id: countryCode = null } =
getCountryForTimezone(timezone) || {};

const record = toEventRecord(payload, { country, countryCode });
const record = toEventRecord(payload);

enqueue(record);

Expand Down
85 changes: 56 additions & 29 deletions server/analytics/impactApi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
*
* Mounted at /api/analytics-impact/...
*/

import { getCountryForTimezone } from 'countries-and-timezones';
import { Router } from 'express';
import { QueryTypes } from 'sequelize';

Expand Down Expand Up @@ -49,13 +51,36 @@ function setCache(key: string, data: unknown) {

type DailyRow = { date: string; pageViews: number; uniquePageViews: number };
type CountryRow = { country: string; countryCode: string; count: number };
type TimezoneRow = { timezone: string; count: number };
type ReferrerRow = { referrer: string; count: number };
type CampaignRow = { campaign: string; count: number };
type TopPageRow = { pageTitle: string; path: string; count: number };
type TopPubRow = { pubTitle: string; pubId: string; views: number; downloads: number };
type TopCollectionRow = { collectionTitle: string; collectionId: string; count: number };
type DeviceRow = { device_type: string; count: number };

// ─── timezone → country mapping ──────────────────────────────────────────────

/** Rolls up timezone-level rows into country-level totals using the npm package. */
function rollUpTimezoneToCountries(rows: TimezoneRow[]): CountryRow[] {
const countryMap = new Map<string, CountryRow>();
for (const row of rows) {
const tz = getCountryForTimezone(row.timezone);
const key = tz ? tz.id : 'Unknown';
const existing = countryMap.get(key);
if (existing) {
existing.count += Number(row.count);
} else {
countryMap.set(key, {
country: tz?.name ?? 'Unknown',
countryCode: tz?.id ?? '',
count: Number(row.count),
});
}
}
return [...countryMap.values()].sort((a, b) => b.count - a.count);
}

// ─── scope filter builder ────────────────────────────────────────────────────

type Scope = {
Expand Down Expand Up @@ -102,7 +127,7 @@ async function fetchSummary(scope: Scope, startDate: string, endDate: string) {
return fetchSummaryFromRaw(scope, startDate, endDate);
}

const [daily, countries, topPubs, topPages, topCollections, referrers, campaigns, devices] =
const [daily, timezoneRows, topPubs, topPages, topCollections, referrers, campaigns, devices] =
await Promise.all([
// ── daily breakdown from matview
sequelize.query<DailyRow>(
Expand All @@ -116,16 +141,15 @@ async function fetchSummary(scope: Scope, startDate: string, endDate: string) {
ORDER BY date`,
{ replacements, type: QueryTypes.SELECT },
),
// ── countries from matview
sequelize.query<CountryRow>(
// ── countries from matview (timezone → country mapped in JS)
sequelize.query<TimezoneRow>(
`SELECT
country,
country_code AS "countryCode",
timezone,
SUM(count)::int AS count
FROM analytics_daily_country
FROM analytics_daily_timezone
WHERE ${mvWhere}
GROUP BY country, country_code
ORDER BY count DESC LIMIT 250`,
GROUP BY timezone
ORDER BY count DESC`,
{ replacements, type: QueryTypes.SELECT },
),
// ── top pubs from matview, JOIN Pubs for titles
Expand Down Expand Up @@ -221,7 +245,7 @@ async function fetchSummary(scope: Scope, startDate: string, endDate: string) {
totalUniqueVisits,
totalDownloads,
daily: dailyParsed,
countries: countries.map((c) => ({ ...c, count: Number(c.count) })),
countries: rollUpTimezoneToCountries(timezoneRows).slice(0, 250),
topPubs: topPubs.map((p) => ({
...p,
views: Number(p.views),
Expand All @@ -243,18 +267,18 @@ async function fetchSummaryFromRaw(scope: Scope, startDate: string, endDate: str
const { clause: scopeClause, replacements: scopeReplacements } = scopeWhere(scope);
const baseReplacements = { ...scopeReplacements, startDate, endDate };

const dateFilter = `"timestamp" >= :startDate::date AND "timestamp" < (:endDate::date + interval '1 day')`;
const dateFilter = `"createdAt" >= :startDate::date AND "createdAt" < (:endDate::date + interval '1 day')`;
const pageEvents = `event IN ('page','pub','collection','other')`;
const baseWhere = `${scopeClause} AND ${dateFilter}`;

const { clause: aeScopeClause } = scopeWhere(scope, 'ae');
const aeDateFilter = `ae."timestamp" >= :startDate::date AND ae."timestamp" < (:endDate::date + interval '1 day')`;
const aeDateFilter = `ae."createdAt" >= :startDate::date AND ae."createdAt" < (:endDate::date + interval '1 day')`;
const aeBaseWhere = `${aeScopeClause} AND ${aeDateFilter}`;

const [
daily,
[totalDlRow],
countries,
timezoneRows,
topPubs,
topPages,
topCollections,
Expand All @@ -264,7 +288,7 @@ async function fetchSummaryFromRaw(scope: Scope, startDate: string, endDate: str
] = await Promise.all([
sequelize.query<DailyRow>(
`SELECT
date_trunc('day', "timestamp")::date::text AS date,
date_trunc('day', "createdAt")::date::text AS date,
COUNT(*) AS "pageViews",
COUNT(*) FILTER (WHERE "isUnique" = true) AS "uniquePageViews"
FROM "AnalyticsEvents"
Expand All @@ -278,15 +302,14 @@ async function fetchSummaryFromRaw(scope: Scope, startDate: string, endDate: str
WHERE ${baseWhere} AND event = 'download'`,
{ replacements: baseReplacements, type: QueryTypes.SELECT },
),
sequelize.query<CountryRow>(
sequelize.query<TimezoneRow>(
`SELECT
COALESCE(country, 'Unknown') AS country,
COALESCE("countryCode", '') AS "countryCode",
COALESCE(timezone, '') AS timezone,
COUNT(*) AS count
FROM "AnalyticsEvents"
WHERE ${baseWhere} AND ${pageEvents}
GROUP BY country, "countryCode"
ORDER BY count DESC LIMIT 250`,
GROUP BY timezone
ORDER BY count DESC`,
{ replacements: baseReplacements, type: QueryTypes.SELECT },
),
sequelize.query<TopPubRow>(
Expand All @@ -304,23 +327,27 @@ async function fetchSummaryFromRaw(scope: Scope, startDate: string, endDate: str
),
sequelize.query<TopPageRow>(
`SELECT
COALESCE("pageTitle", "pubTitle", "collectionTitle", path, '') AS "pageTitle",
COALESCE(path, '') AS path,
COALESCE(pg.title, p.title, c.title, ae.path, '') AS "pageTitle",
COALESCE(ae.path, '') AS path,
COUNT(*) AS count
FROM "AnalyticsEvents"
WHERE ${baseWhere} AND ${pageEvents}
GROUP BY "pageTitle", "pubTitle", "collectionTitle", path
FROM "AnalyticsEvents" ae
LEFT JOIN "Pages" pg ON pg.id = ae."pageId"
LEFT JOIN "Pubs" p ON p.id = ae."pubId"
LEFT JOIN "Collections" c ON c.id = ae."collectionId"
WHERE ${aeBaseWhere} AND ae.event IN ('page','pub','collection','other')
GROUP BY pg.title, p.title, c.title, ae.path
ORDER BY count DESC LIMIT 250`,
{ replacements: baseReplacements, type: QueryTypes.SELECT },
),
sequelize.query<TopCollectionRow>(
`SELECT
COALESCE("collectionTitle", "collectionSlug", "collectionId"::text) AS "collectionTitle",
"collectionId"::text AS "collectionId",
COALESCE(c.title, ae."collectionId"::text) AS "collectionTitle",
ae."collectionId"::text AS "collectionId",
COUNT(*) AS count
FROM "AnalyticsEvents"
WHERE ${baseWhere} AND "collectionId" IS NOT NULL AND event IN ('collection','pub')
GROUP BY "collectionTitle", "collectionSlug", "collectionId"
FROM "AnalyticsEvents" ae
LEFT JOIN "Collections" c ON c.id = ae."collectionId"
WHERE ${aeBaseWhere} AND ae."collectionId" IS NOT NULL AND ae.event IN ('collection','pub')
GROUP BY ae."collectionId", c.title
ORDER BY count DESC LIMIT 250`,
{ replacements: baseReplacements, type: QueryTypes.SELECT },
),
Expand Down Expand Up @@ -376,7 +403,7 @@ async function fetchSummaryFromRaw(scope: Scope, startDate: string, endDate: str
totalUniqueVisits,
totalDownloads: parseInt(String((totalDlRow as any)?.totalDownloads ?? '0'), 10),
daily: dailyParsed,
countries: countries.map((c) => ({ ...c, count: Number(c.count) })),
countries: rollUpTimezoneToCountries(timezoneRows).slice(0, 250),
topPubs: topPubs.map((p) => ({
...p,
views: Number(p.views),
Expand Down
Loading