Skip to content

Commit 14e474a

Browse files
authored
Clean local analytics table (#3579)
1 parent 2c55108 commit 14e474a

11 files changed

Lines changed: 265 additions & 234 deletions

File tree

client/containers/Pub/PubHeader/Download.tsx

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ const Download = (props: Props) => {
7171
communitySubdomain: communityData.subdomain,
7272
format: type.format,
7373
pubId: pubData.id,
74-
isProd: locationData.isProd,
7574
});
7675
if (type.format === 'formatted') {
7776
return download(formattedDownload.url);

infra/.env.test

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,6 @@ MAILCHIMP_API_KEY=xxx
1010
FIREBASE_SERVICE_ACCOUNT_BASE64=xxx
1111

1212
CLOUDAMQP_URL=xxx
13-
ALGOLIA_ID=ooo
14-
ALGOLIA_KEY=ooo
15-
ALGOLIA_SEARCH_KEY=ooo
1613
JWT_SIGNING_SECRET=shhhhhh
1714
BYPASS_CAPTCHA=true
1815
FIREBASE_TEST_DB_URL=http://localhost:9875?ns=pubpub-v6

pnpm-lock.yaml

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

server/analytics/api.ts

Lines changed: 80 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,82 @@ import { contract } from 'utils/api/contract';
99

1010
import { enqueue } from './writeBuffer';
1111

12+
// ─── Stitch dual-write (temporary for rollback safety) ──────────────────────
13+
14+
/** Fire-and-forget POST to the old Stitch/Redshift webhook so we can rollback if needed. */
15+
function sendToStitch(payload: unknown) {
16+
fetch(env.STITCH_WEBHOOK_URL, {
17+
method: 'POST',
18+
body: JSON.stringify(payload),
19+
headers: { 'Content-Type': 'application/json' },
20+
}).catch(() => {
21+
// Silently swallow — Stitch is best-effort during the transition period.
22+
});
23+
}
24+
1225
const s = initServer();
1326

14-
const toEventRecord = (
15-
payload: AnalyticsEventPayload,
16-
enrichment: { country: string | null; countryCode: string | null },
17-
) => {
27+
// ─── validation helpers ──────────────────────────────────────────────────────
28+
29+
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
30+
31+
/** Coerce a value to a valid UUID or null. Prevents bad UUIDs from killing the entire bulkCreate batch. */
32+
function sanitizeUuid(val: unknown): string | null {
33+
return typeof val === 'string' && UUID_RE.test(val) ? val : null;
34+
}
35+
36+
const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;
37+
38+
/** Returns true if the timestamp is within an acceptable range (not future, not >30 days old). */
39+
function isTimestampValid(ts: number): boolean {
40+
const now = Date.now();
41+
return ts <= now && ts >= now - THIRTY_DAYS_MS;
42+
}
43+
44+
// ─── fields to strip (no longer stored) ──────────────────────────────────────
45+
46+
const DROPPED_FIELDS = new Set([
47+
'title',
48+
'country',
49+
'countryCode',
50+
'isProd',
51+
'communityName',
52+
'communitySubdomain',
53+
'pubTitle',
54+
'pubSlug',
55+
'collectionTitle',
56+
'collectionSlug',
57+
'collectionKind',
58+
'collectionIds',
59+
'primaryCollectionId',
60+
'pageTitle',
61+
'pageSlug',
62+
]);
63+
64+
// ─── transform payload → DB record ──────────────────────────────────────────
65+
66+
const toEventRecord = (payload: AnalyticsEventPayload) => {
1867
const raw = payload as Record<string, unknown>;
19-
const { unique, collectionIds, timestamp, ...fields } = raw;
68+
const { unique, timestamp, ...rest } = raw;
69+
70+
// Strip fields that are no longer stored in the table
71+
const fields: Record<string, unknown> = {};
72+
for (const [key, value] of Object.entries(rest)) {
73+
if (!DROPPED_FIELDS.has(key)) {
74+
fields[key] = value;
75+
}
76+
}
77+
78+
// Sanitize UUID fields to prevent a single bad value from failing the whole batch
79+
fields.communityId = sanitizeUuid(fields.communityId);
80+
fields.pubId = sanitizeUuid(fields.pubId);
81+
fields.collectionId = sanitizeUuid(fields.collectionId);
82+
fields.pageId = sanitizeUuid(fields.pageId);
2083

2184
return {
2285
...fields,
23-
...enrichment,
24-
timestamp: new Date(timestamp as number),
86+
createdAt: new Date(timestamp as number),
2587
isUnique: (unique as boolean | undefined) ?? null,
26-
collectionIds: typeof collectionIds === 'string' ? collectionIds.split(',') : null,
2788
};
2889
};
2990

@@ -44,21 +105,23 @@ export const analyticsServer = s.router(contract.analytics, {
44105
},
45106
],
46107
handler: async ({ body: payload }) => {
47-
// Only record events on the production deployment (or in tests)
48-
// to avoid polluting analytics with localhost / staging page views.
49-
// Checked server-side so it can't be spoofed by clients.
50-
if (!env.PUBPUB_PRODUCTION && env.NODE_ENV !== 'test') {
108+
// Dual-write to Stitch/Redshift for rollback safety (temporary).
109+
// Sent unconditionally (before validation) to match old behavior exactly:
110+
// { country, countryCode, ...payload }
111+
const { timezone } = payload;
112+
const { name: country = null, id: countryCode = null } =
113+
getCountryForTimezone(timezone) || {};
114+
sendToStitch({ country, countryCode, ...payload });
115+
116+
// Reject events with unreasonable timestamps (future or >30 days old)
117+
if (!isTimestampValid(payload.timestamp)) {
51118
return {
52119
status: 204,
53120
body: undefined,
54121
};
55122
}
56123

57-
const { timezone } = payload;
58-
const { name: country = null, id: countryCode = null } =
59-
getCountryForTimezone(timezone) || {};
60-
61-
const record = toEventRecord(payload, { country, countryCode });
124+
const record = toEventRecord(payload);
62125

63126
enqueue(record);
64127

server/analytics/impactApi.ts

Lines changed: 56 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
*
55
* Mounted at /api/analytics-impact/...
66
*/
7+
8+
import { getCountryForTimezone } from 'countries-and-timezones';
79
import { Router } from 'express';
810
import { QueryTypes } from 'sequelize';
911

@@ -49,13 +51,36 @@ function setCache(key: string, data: unknown) {
4951

5052
type DailyRow = { date: string; pageViews: number; uniquePageViews: number };
5153
type CountryRow = { country: string; countryCode: string; count: number };
54+
type TimezoneRow = { timezone: string; count: number };
5255
type ReferrerRow = { referrer: string; count: number };
5356
type CampaignRow = { campaign: string; count: number };
5457
type TopPageRow = { pageTitle: string; path: string; count: number };
5558
type TopPubRow = { pubTitle: string; pubId: string; views: number; downloads: number };
5659
type TopCollectionRow = { collectionTitle: string; collectionId: string; count: number };
5760
type DeviceRow = { device_type: string; count: number };
5861

62+
// ─── timezone → country mapping ──────────────────────────────────────────────
63+
64+
/** Rolls up timezone-level rows into country-level totals using the npm package. */
65+
function rollUpTimezoneToCountries(rows: TimezoneRow[]): CountryRow[] {
66+
const countryMap = new Map<string, CountryRow>();
67+
for (const row of rows) {
68+
const tz = getCountryForTimezone(row.timezone);
69+
const key = tz ? tz.id : 'Unknown';
70+
const existing = countryMap.get(key);
71+
if (existing) {
72+
existing.count += Number(row.count);
73+
} else {
74+
countryMap.set(key, {
75+
country: tz?.name ?? 'Unknown',
76+
countryCode: tz?.id ?? '',
77+
count: Number(row.count),
78+
});
79+
}
80+
}
81+
return [...countryMap.values()].sort((a, b) => b.count - a.count);
82+
}
83+
5984
// ─── scope filter builder ────────────────────────────────────────────────────
6085

6186
type Scope = {
@@ -102,7 +127,7 @@ async function fetchSummary(scope: Scope, startDate: string, endDate: string) {
102127
return fetchSummaryFromRaw(scope, startDate, endDate);
103128
}
104129

105-
const [daily, countries, topPubs, topPages, topCollections, referrers, campaigns, devices] =
130+
const [daily, timezoneRows, topPubs, topPages, topCollections, referrers, campaigns, devices] =
106131
await Promise.all([
107132
// ── daily breakdown from matview
108133
sequelize.query<DailyRow>(
@@ -116,16 +141,15 @@ async function fetchSummary(scope: Scope, startDate: string, endDate: string) {
116141
ORDER BY date`,
117142
{ replacements, type: QueryTypes.SELECT },
118143
),
119-
// ── countries from matview
120-
sequelize.query<CountryRow>(
144+
// ── countries from matview (timezone → country mapped in JS)
145+
sequelize.query<TimezoneRow>(
121146
`SELECT
122-
country,
123-
country_code AS "countryCode",
147+
timezone,
124148
SUM(count)::int AS count
125-
FROM analytics_daily_country
149+
FROM analytics_daily_timezone
126150
WHERE ${mvWhere}
127-
GROUP BY country, country_code
128-
ORDER BY count DESC LIMIT 250`,
151+
GROUP BY timezone
152+
ORDER BY count DESC`,
129153
{ replacements, type: QueryTypes.SELECT },
130154
),
131155
// ── top pubs from matview, JOIN Pubs for titles
@@ -221,7 +245,7 @@ async function fetchSummary(scope: Scope, startDate: string, endDate: string) {
221245
totalUniqueVisits,
222246
totalDownloads,
223247
daily: dailyParsed,
224-
countries: countries.map((c) => ({ ...c, count: Number(c.count) })),
248+
countries: rollUpTimezoneToCountries(timezoneRows).slice(0, 250),
225249
topPubs: topPubs.map((p) => ({
226250
...p,
227251
views: Number(p.views),
@@ -243,18 +267,18 @@ async function fetchSummaryFromRaw(scope: Scope, startDate: string, endDate: str
243267
const { clause: scopeClause, replacements: scopeReplacements } = scopeWhere(scope);
244268
const baseReplacements = { ...scopeReplacements, startDate, endDate };
245269

246-
const dateFilter = `"timestamp" >= :startDate::date AND "timestamp" < (:endDate::date + interval '1 day')`;
270+
const dateFilter = `"createdAt" >= :startDate::date AND "createdAt" < (:endDate::date + interval '1 day')`;
247271
const pageEvents = `event IN ('page','pub','collection','other')`;
248272
const baseWhere = `${scopeClause} AND ${dateFilter}`;
249273

250274
const { clause: aeScopeClause } = scopeWhere(scope, 'ae');
251-
const aeDateFilter = `ae."timestamp" >= :startDate::date AND ae."timestamp" < (:endDate::date + interval '1 day')`;
275+
const aeDateFilter = `ae."createdAt" >= :startDate::date AND ae."createdAt" < (:endDate::date + interval '1 day')`;
252276
const aeBaseWhere = `${aeScopeClause} AND ${aeDateFilter}`;
253277

254278
const [
255279
daily,
256280
[totalDlRow],
257-
countries,
281+
timezoneRows,
258282
topPubs,
259283
topPages,
260284
topCollections,
@@ -264,7 +288,7 @@ async function fetchSummaryFromRaw(scope: Scope, startDate: string, endDate: str
264288
] = await Promise.all([
265289
sequelize.query<DailyRow>(
266290
`SELECT
267-
date_trunc('day', "timestamp")::date::text AS date,
291+
date_trunc('day', "createdAt")::date::text AS date,
268292
COUNT(*) AS "pageViews",
269293
COUNT(*) FILTER (WHERE "isUnique" = true) AS "uniquePageViews"
270294
FROM "AnalyticsEvents"
@@ -278,15 +302,14 @@ async function fetchSummaryFromRaw(scope: Scope, startDate: string, endDate: str
278302
WHERE ${baseWhere} AND event = 'download'`,
279303
{ replacements: baseReplacements, type: QueryTypes.SELECT },
280304
),
281-
sequelize.query<CountryRow>(
305+
sequelize.query<TimezoneRow>(
282306
`SELECT
283-
COALESCE(country, 'Unknown') AS country,
284-
COALESCE("countryCode", '') AS "countryCode",
307+
COALESCE(timezone, '') AS timezone,
285308
COUNT(*) AS count
286309
FROM "AnalyticsEvents"
287310
WHERE ${baseWhere} AND ${pageEvents}
288-
GROUP BY country, "countryCode"
289-
ORDER BY count DESC LIMIT 250`,
311+
GROUP BY timezone
312+
ORDER BY count DESC`,
290313
{ replacements: baseReplacements, type: QueryTypes.SELECT },
291314
),
292315
sequelize.query<TopPubRow>(
@@ -304,23 +327,27 @@ async function fetchSummaryFromRaw(scope: Scope, startDate: string, endDate: str
304327
),
305328
sequelize.query<TopPageRow>(
306329
`SELECT
307-
COALESCE("pageTitle", "pubTitle", "collectionTitle", path, '') AS "pageTitle",
308-
COALESCE(path, '') AS path,
330+
COALESCE(pg.title, p.title, c.title, ae.path, '') AS "pageTitle",
331+
COALESCE(ae.path, '') AS path,
309332
COUNT(*) AS count
310-
FROM "AnalyticsEvents"
311-
WHERE ${baseWhere} AND ${pageEvents}
312-
GROUP BY "pageTitle", "pubTitle", "collectionTitle", path
333+
FROM "AnalyticsEvents" ae
334+
LEFT JOIN "Pages" pg ON pg.id = ae."pageId"
335+
LEFT JOIN "Pubs" p ON p.id = ae."pubId"
336+
LEFT JOIN "Collections" c ON c.id = ae."collectionId"
337+
WHERE ${aeBaseWhere} AND ae.event IN ('page','pub','collection','other')
338+
GROUP BY pg.title, p.title, c.title, ae.path
313339
ORDER BY count DESC LIMIT 250`,
314340
{ replacements: baseReplacements, type: QueryTypes.SELECT },
315341
),
316342
sequelize.query<TopCollectionRow>(
317343
`SELECT
318-
COALESCE("collectionTitle", "collectionSlug", "collectionId"::text) AS "collectionTitle",
319-
"collectionId"::text AS "collectionId",
344+
COALESCE(c.title, ae."collectionId"::text) AS "collectionTitle",
345+
ae."collectionId"::text AS "collectionId",
320346
COUNT(*) AS count
321-
FROM "AnalyticsEvents"
322-
WHERE ${baseWhere} AND "collectionId" IS NOT NULL AND event IN ('collection','pub')
323-
GROUP BY "collectionTitle", "collectionSlug", "collectionId"
347+
FROM "AnalyticsEvents" ae
348+
LEFT JOIN "Collections" c ON c.id = ae."collectionId"
349+
WHERE ${aeBaseWhere} AND ae."collectionId" IS NOT NULL AND ae.event IN ('collection','pub')
350+
GROUP BY ae."collectionId", c.title
324351
ORDER BY count DESC LIMIT 250`,
325352
{ replacements: baseReplacements, type: QueryTypes.SELECT },
326353
),
@@ -376,7 +403,7 @@ async function fetchSummaryFromRaw(scope: Scope, startDate: string, endDate: str
376403
totalUniqueVisits,
377404
totalDownloads: parseInt(String((totalDlRow as any)?.totalDownloads ?? '0'), 10),
378405
daily: dailyParsed,
379-
countries: countries.map((c) => ({ ...c, count: Number(c.count) })),
406+
countries: rollUpTimezoneToCountries(timezoneRows).slice(0, 250),
380407
topPubs: topPubs.map((p) => ({
381408
...p,
382409
views: Number(p.views),

0 commit comments

Comments
 (0)