Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,5 @@ pubpub-localdb/
tsconfig.tsbuildinfo
.jest/secret-env.js

infra/pgdata/
infra/pgdata/
infra/metabase-plugins/
60 changes: 60 additions & 0 deletions analytics_schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
CREATE TABLE pubpub_analytics.data(
__sdc_primary_key character varying(128) ENCODE lzo distkey,
_sdc_batched_at timestamp without time zone ENCODE az64,
_sdc_received_at timestamp without time zone ENCODE az64,
_sdc_sequence bigint ENCODE az64,
_sdc_table_version bigint ENCODE az64,
collectionid character varying(128) ENCODE lzo,
collectionkind character varying(128) ENCODE lzo,
communityid character varying(128) ENCODE lzo,
country character varying(128) ENCODE lzo,
countrycode character varying(128) ENCODE lzo,
event character varying(128) ENCODE lzo,
height bigint ENCODE az64,
isprod boolean ENCODE raw,
primarycollectionid character varying(128) ENCODE lzo,
pubid character varying(128) ENCODE lzo,
type character varying(128) ENCODE lzo,
unique boolean ENCODE raw,
width bigint ENCODE az64,
timestamp bigint ENCODE az64,
utmcontent character varying(128) ENCODE lzo,
utmmedium character varying(128) ENCODE lzo,
utmterm character varying(128) ENCODE lzo,
release__string character varying(128) ENCODE lzo,
path character varying(256) ENCODE lzo,
collectiontitle character varying(256) ENCODE lzo,
collectionslug character varying(256) ENCODE lzo,
pubslug character varying(256) ENCODE lzo,
communityname character varying(256) ENCODE lzo,
collectionids character varying(1024) ENCODE lzo,
release__bigint bigint ENCODE az64,
pagetitle character varying(256) ENCODE lzo,
referrer character varying(4096) ENCODE lzo,
utmcampaign character varying(256) ENCODE lzo,
utmsource character varying(512) ENCODE lzo,
timezone character varying(256) ENCODE lzo,
os character varying(256) ENCODE lzo,
pageid character varying(256) ENCODE lzo,
locale character varying(256) ENCODE lzo,
pageslug character varying(256) ENCODE lzo,
communitysubdomain character varying(256) ENCODE lzo,
format character varying(256) ENCODE lzo,
useragent character varying(8192) ENCODE lzo,
pubtitle character varying(1024) ENCODE lzo,
url character varying(4096) ENCODE lzo,
search character varying(4096) ENCODE lzo,
title character varying(16384) ENCODE lzo,
hash character varying(8192) ENCODE lzo) DISTSTYLE AUTO SORTKEY(
__sdc_primary_key
);

CREATE TABLE pubpub_analytics.data__collectionids(
_sdc_batched_at timestamp without time zone ENCODE az64,
_sdc_level_0_id bigint ENCODE az64,
_sdc_received_at timestamp without time zone ENCODE az64,
_sdc_sequence bigint ENCODE az64,
_sdc_source_key___sdc_primary_key character varying(128) ENCODE lzo distkey,
_sdc_table_version bigint ENCODE az64,
value character varying(128) ENCODE lzo) DISTSTYLE AUTO;

2 changes: 1 addition & 1 deletion client/containers/AdminDashboard/AdminDashboard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ type Props = {
const AdminDashboard = (props: Props) => {
const { impactData } = props;
const { baseToken } = impactData;
const dashUrl = `https://metabase.pubpub.org/embed/dashboard/${baseToken}#bordered=false&titled=false`;
const dashUrl = `http://localhost:3030/embed/dashboard/${baseToken}#bordered=false&titled=false`;
const getOffset = (width) => {
return width < 960 ? 45 : 61;
};
Expand Down
2 changes: 1 addition & 1 deletion client/containers/DashboardImpact/DashboardImpact.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const DashboardImpact = (props: Props) => {
const displayDataWarning = activeTarget?.createdAt < '2020-04-29';
const isCollection = activeTargetType === 'collection';
const genUrl = (token) => {
return `https://metabase.pubpub.org/embed/dashboard/${token}#bordered=false&titled=false`;
return `http://localhost:3030/embed/dashboard/${token}#bordered=false&titled=false`;
};
const getOffset = (width) => {
return width < 960 ? 45 : 61;
Expand Down
106 changes: 61 additions & 45 deletions infra/.env.dev.enc

Large diffs are not rendered by default.

110 changes: 62 additions & 48 deletions infra/.env.enc

Large diffs are not rendered by default.

10 changes: 9 additions & 1 deletion infra/Caddyfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,18 @@
respond "OK" 200
}

{$METABASE_HOST:analytics.localhost} {
tls internal {
on_demand
}
encode gzip
reverse_proxy pubpub_metabase:3001
}

:443 {
tls internal {
on_demand
}
encode gzip
reverse_proxy pubpub_app:3000
}
}
67 changes: 67 additions & 0 deletions infra/docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,70 @@ services:
ports:
- "${DB_PORT:-5439}:5432"
networks: [appnet]


metabase:
build:
context: ./metabase
env_file:
- .env.dev
environment:
MB_JETTY_PORT: "3001"
MB_ENABLE_QUERY_CACHING: "true"
MB_QUERY_CACHING_TTL_RATIO: "10"
MB_QUERY_CACHING_MIN_TTL: "120"
depends_on:
- metabase_db
- db
ports:
- "${METABASE_PORT:-3030}:3001"
volumes:
- analytics_duckdb:/duckdb:ro
networks: [appnet]

analytics_sync:
build:
context: ./duckdb-sync
env_file:
- .env.dev
volumes:
- analytics_duckdb:/data
depends_on:
- db
networks: [appnet]

metabase_db:
env_file:
- .env.dev
image: postgres:18
environment:
- POSTGRES_USER=appuser
- POSTGRES_PASSWORD=apppassword
- POSTGRES_DB=metabasedb
volumes:
- metabase_pgdata:/var/lib/postgresql
ports:
- "${METABASE_DB_PORT:-5440}:5432"
networks: [appnet]

analytics_migration:
build:
context: ../tools/analytics-migration
env_file:
- .env.dev
environment:
DATABASE_URL: postgres://appuser:apppassword@db:5432/appdb
volumes:
- ../tools/analytics-migration:/migration
- /var/run/docker.sock:/var/run/docker.sock
- analytics_migration_data:/data
- analytics_duckdb:/duckdb
depends_on:
- db
- metabase_db
networks: [appnet]
profiles: [migration]

# cron:
# build:
# context: ..
Expand All @@ -110,3 +174,6 @@ networks:
volumes:
pgdata:
rabbitmqdata:
metabase_pgdata:
analytics_duckdb:
analytics_migration_data:
28 changes: 28 additions & 0 deletions infra/docker-compose.prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,33 @@ services:
limits:
memory: 2G

metabase:
image: metabase/metabase:latest
environment:
MB_DB_TYPE: postgres
MB_DB_DBNAME: metabasedb
MB_DB_PORT: "5432"
MB_DB_HOST: metabase_db
MB_DB_USER: appuser
MB_DB_PASS: apppassword
MB_JETTY_PORT: "3001"
depends_on:
- metabase_db
- db
ports:
- "${METABASE_PORT:-3030}:3001"
networks: [appnet]

metabase_db:
image: postgres:16
environment:
- POSTGRES_USER=appuser
- POSTGRES_PASSWORD=apppassword
- POSTGRES_DB=metabasedb
volumes:
- metabase_pgdata:/var/lib/postgresql/data
networks: [appnet]

rabbitmq:
image: rabbitmq:3.13-alpine
environment:
Expand Down Expand Up @@ -102,3 +129,4 @@ networks:
volumes:
# pgdata:
rabbitmqdata:
metabase_pgdata:
20 changes: 20 additions & 0 deletions infra/duckdb-sync/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM debian:bookworm-slim

ARG DUCKDB_VERSION=v1.4.4
ARG TARGETARCH

RUN apt-get update && \
apt-get install -y --no-install-recommends curl unzip ca-certificates && \
rm -rf /var/lib/apt/lists/* && \
curl -fsSL \
"https://github.com/duckdb/duckdb/releases/download/${DUCKDB_VERSION}/duckdb_cli-linux-${TARGETARCH}.zip" \
-o /tmp/duckdb.zip && \
unzip /tmp/duckdb.zip -d /usr/local/bin/ && \
rm /tmp/duckdb.zip && \
chmod +x /usr/local/bin/duckdb && \
duckdb -c "INSTALL postgres"

COPY sync.sh /usr/local/bin/sync.sh
RUN chmod +x /usr/local/bin/sync.sh

CMD ["/usr/local/bin/sync.sh"]
66 changes: 66 additions & 0 deletions infra/duckdb-sync/sync.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/bin/bash
set -euo pipefail

DUCKDB_FILE="${DUCKDB_FILE:-/data/analytics.duckdb}"
PG_HOST="${PG_HOST}"
PG_PORT="${PG_PORT}"
PG_DB="${PG_DB}"
PG_USER="${PG_USER}"
PG_PASS="${PG_PASS}"

SYNC_INTERVAL="${SYNC_INTERVAL:-43200}"
MODE="${1:-loop}"

PG_CONN="dbname=${PG_DB} host=${PG_HOST} port=${PG_PORT} user=${PG_USER} password=${PG_PASS}"

sync_analytics() {
local start
start=$(date +%s)

duckdb "$DUCKDB_FILE" <<SQL
LOAD postgres;

ATTACH '${PG_CONN}' AS pg (TYPE POSTGRES, READ_ONLY);

CREATE TABLE IF NOT EXISTS "AnalyticsEvents" AS
SELECT * FROM pg.public."AnalyticsEvents" WHERE false;

INSERT INTO "AnalyticsEvents"
SELECT *
FROM pg.public."AnalyticsEvents"
WHERE "createdAt" > (
SELECT COALESCE(MAX("createdAt"), '1970-01-01'::TIMESTAMPTZ)
FROM "AnalyticsEvents"
);

DETACH pg;
SQL

local count elapsed
count=$(duckdb "$DUCKDB_FILE" -csv -noheader "SELECT count(*) FROM \"AnalyticsEvents\"")
elapsed=$(( $(date +%s) - start ))
echo "[$(date -Iseconds)] sync complete: ${count} rows (${elapsed}s)"
}

if [ "$MODE" = "--full" ]; then
echo "full resync requested, removing existing data..."
rm -f "$DUCKDB_FILE"
fi

echo "duckdb analytics sync"
echo " postgres: ${PG_HOST}:${PG_PORT}/${PG_DB}"
echo " duckdb: ${DUCKDB_FILE}"
echo ""

sync_analytics

if [ "$MODE" = "--once" ] || [ "$MODE" = "--full" ]; then
exit 0
fi

echo "entering sync loop (interval: ${SYNC_INTERVAL}s)"

while true; do
sleep "$SYNC_INTERVAL"
sync_analytics
done
13 changes: 13 additions & 0 deletions infra/env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_REDSHIFT_PATH=
AM_REDSHIFT_BACKUP_ACCESS_KEY=
AM_REDSHIFT_BACKUP_SECRET_KEY=
AM_MB_RDS_URL=
AM_MB_RDS_ADMIN_EMAIL=
AM_MB_RDS_ADMIN_PASSWORD=
AM_MB_COLLECTION_ID=14

MB_DB_HOST=
MB_DB_PASS=
MB_DB_USER=
MB_DB_NAME=
MB_DB_PORT=
34 changes: 34 additions & 0 deletions infra/metabase/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM eclipse-temurin:21-jre-jammy

ARG METABASE_VERSION=0.58.9
ARG METABASE_DUCKDB_DRIVER_VERSION=1.4.4.0

ENV MB_PLUGINS_DIR=/home/metabase/plugins/

RUN groupadd -r metabase && useradd -r -g metabase metabase

RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*

RUN mkdir -p /home/metabase/plugins /home/metabase/data && \
chown -R metabase:metabase /home/metabase

WORKDIR /home/metabase

ADD --chown=metabase:metabase \
https://downloads.metabase.com/v${METABASE_VERSION}/metabase.jar \
/home/metabase/

ADD --chown=metabase:metabase \
https://github.com/motherduckdb/metabase_duckdb_driver/releases/download/${METABASE_DUCKDB_DRIVER_VERSION}/duckdb.metabase-driver.jar \
/home/metabase/plugins/

RUN chmod 755 /home/metabase/metabase.jar && \
chmod 755 /home/metabase/plugins/duckdb.metabase-driver.jar

EXPOSE 3000

USER metabase

CMD ["java", "-jar", "/home/metabase/metabase.jar"]
Loading
Loading