diff --git a/services/apps/script_executor_worker/package.json b/services/apps/script_executor_worker/package.json index 82654bff8e..f95faac3ab 100644 --- a/services/apps/script_executor_worker/package.json +++ b/services/apps/script_executor_worker/package.json @@ -8,6 +8,7 @@ "dev:local": "nodemon --watch src --watch ../../libs --ext ts --exec pnpm run start:debug:local", "dev": "nodemon --watch src --watch ../../libs --ext ts --exec pnpm run start:debug", "cleanup-fork-activities": "npx tsx src/bin/cleanup-fork-activities-and-maintainers.ts", + "cleanup-gerrit-activities": "npx tsx src/bin/cleanup-gerrit-activities.ts", "cleanup-member-aggregates": "npx tsx src/bin/cleanup-member-aggregates.ts", "recalculate-enrichment-affiliations": "npx tsx src/bin/recalculate-enrichment-affiliations.ts", "recalculate-all-affiliations": "npx tsx src/bin/recalculate-all-affiliations.ts", diff --git a/services/apps/script_executor_worker/src/bin/cleanup-gerrit-activities.ts b/services/apps/script_executor_worker/src/bin/cleanup-gerrit-activities.ts index 93b056f566..775cc87a5d 100644 --- a/services/apps/script_executor_worker/src/bin/cleanup-gerrit-activities.ts +++ b/services/apps/script_executor_worker/src/bin/cleanup-gerrit-activities.ts @@ -2,29 +2,43 @@ * Gerrit Activities Cleanup Script * * PROBLEM: - * Gerrit activities need to be cleaned up from both PostgreSQL and Tinybird - * based on specific platform and type filters with a date cutoff. + * Gerrit activities need to be cleaned up from both PostgreSQL and Tinybird. * * SOLUTION: - * This script deletes activities from Gerrit platform across: - * - PostgreSQL (activityRelations table only) - * - Tinybird (activities and activityRelations tables) + * This script deletes activities from the Gerrit platform across: + * - PostgreSQL (activityRelations table only, chunked in 10k batches) + * - Tinybird (activities and activityRelations datasources, one delete job each) * - * Filters: - * - platform = 'gerrit' - * - type in ('changeset-merged', 'changeset-abandoned', 'patchset_approval-created') - * - updatedAt < '2025-12-15' + * Before any deletion, the script prints the affected row counts from all three + * stores and prompts for confirmation (skip with --yes). + * + * Filters (all optional): + * - platform = 'gerrit' (always applied) + * - --type (optional) restrict to specific activity type(s) + * - --before-date (optional) restrict to records with updatedAt < date + * + * WARNING: Running with no --type and no --before-date will delete ALL gerrit + * activities regardless of type or age. + * + * NOTE: This script only purges the raw Tinybird datasources (`activities` and + * `activityRelations`). Derived materialized views (activities_backup, + * activities_deduplicated_ds, activityRelations_bucket_MV_ds_*, etc.) are NOT + * affected because Tinybird/ClickHouse MV deletes do not cascade. * * Usage: * # Via package.json script (recommended): - * pnpm run cleanup-gerrit-activities -- [--dry-run] [--tb-token ] + * pnpm run cleanup-gerrit-activities -- [options] * * # Or directly with tsx: - * npx tsx src/bin/cleanup-gerrit-activities.ts [--dry-run] [--tb-token ] + * npx tsx src/bin/cleanup-gerrit-activities.ts [options] * * Options: - * --dry-run Display what would be deleted without actually deleting anything - * --tb-token Tinybird API token to use (overrides CROWD_TINYBIRD_ACTIVITIES_TOKEN environment variable) + * --dry-run Display row counts and what would be deleted, without deleting anything + * --yes / -y Skip confirmation prompt (for non-interactive use) + * --tb-token Tinybird API token (overrides CROWD_TINYBIRD_ACTIVITIES_TOKEN) + * --before-date Only delete records with updatedAt before this date + * --type Only delete activities of these types (comma-separated). + * Valid values: changeset-created, changeset-merged, changeset-closed, changeset-abandoned, changeset_comment-created, patchset-created, patchset_comment-created, patchset_approval-created * * Environment Variables Required: * CROWD_DB_WRITE_HOST - Postgres write host @@ -37,6 +51,7 @@ */ import * as fs from 'fs' import * as path from 'path' +import * as readline from 'readline' import { TinybirdClient, @@ -48,6 +63,22 @@ import { getServiceChildLogger } from '@crowd/logging' const log = getServiceChildLogger('cleanup-gerrit-activities-script') +const VALID_GERRIT_TYPES = [ + 'changeset-created', + 'changeset-merged', + 'changeset-closed', + 'changeset-abandoned', + 'changeset_comment-created', + 'patchset-created', + 'patchset_comment-created', + 'patchset_approval-created', +] as const + +interface Filters { + beforeDate?: string + types?: string[] +} + interface DeletionStatus { success: boolean jobId?: string @@ -58,8 +89,8 @@ interface CleanupResult { status: 'success' | 'failure' startTime: string endTime: string - totalBatches: number - failedBatches: number + postgresDeleted: number + tinybirdJobIds: string[] deletions: { postgres: DeletionStatus tinybird: { @@ -69,378 +100,355 @@ interface CleanupResult { } } -/** - * Initialize Postgres connection using QueryExecutor - */ -async function initPostgresClient(): Promise { - log.info('Initializing Postgres connection...') - - const dbConnection = await getDbConnection(WRITE_DB_CONFIG()) - const queryExecutor = pgpQx(dbConnection) +// --------------------------------------------------------------------------- +// Filter clause builders +// --------------------------------------------------------------------------- - log.info('Postgres connection established') - return queryExecutor +/** Tinybird/ClickHouse WHERE clause (unquoted identifiers, single-quoted strings) */ +function buildTinybirdFilterClause(filters: Filters): string { + const parts = [`platform = 'gerrit'`] + if (filters.types?.length) { + const list = filters.types.map((t) => `'${t}'`).join(', ') + parts.push(`type IN (${list})`) + } + if (filters.beforeDate) { + parts.push(`updatedAt < '${filters.beforeDate}'`) + } + return parts.join(' AND ') } /** - * Query activity IDs from Tinybird in batches and delete from Postgres immediately - * Uses batched queries to avoid hitting Tinybird's result size limit (100 MiB) + * Postgres WHERE clause + pg-promise param map. + * updatedAt is camelCase and must be double-quoted. */ -async function queryAndProcessActivityIdsInBatches( - tinybird: TinybirdClient, - postgres: QueryExecutor, - dryRun: boolean, - onBatchProcessed: () => void, -): Promise { - log.info('Querying activity IDs from Tinybird for Gerrit cleanup...') +function buildPostgresFilter(filters: Filters): { where: string; values: Record } { + const conditions: string[] = [`platform = 'gerrit'`] + const values: Record = {} - const BATCH_SIZE = 10000 - let offset = 0 - let hasMore = true - let totalProcessed = 0 - let batchNumber = 0 + if (filters.types?.length) { + conditions.push(`type IN ($(types:csv))`) + values.types = filters.types + } + if (filters.beforeDate) { + conditions.push(`"updatedAt" < $(beforeDate)`) + values.beforeDate = filters.beforeDate + } - try { - while (hasMore) { - const query = `SELECT DISTINCT activityId FROM activityRelations WHERE platform = 'gerrit' AND type IN ('changeset-merged', 'changeset-abandoned', 'patchset_approval-created') AND updatedAt < '2025-12-15' ORDER BY activityId LIMIT ${BATCH_SIZE} OFFSET ${offset} FORMAT JSON` - log.info(`Querying batch: offset=${offset}, limit=${BATCH_SIZE}`) + return { where: conditions.join(' AND '), values } +} - const result = await tinybird.executeSql<{ data: Array<{ activityId: string }> }>(query) - const batchActivityIds = result.data.map((row) => row.activityId) +// --------------------------------------------------------------------------- +// Count helpers +// --------------------------------------------------------------------------- + +async function countPostgresRows(postgres: QueryExecutor, filters: Filters): Promise { + const { where, values } = buildPostgresFilter(filters) + const result = (await postgres.selectOne( + `SELECT COUNT(*) AS count FROM "activityRelations" WHERE ${where}`, + values, + )) as { count: string } + return parseInt(result.count, 10) +} - if (batchActivityIds.length === 0) { - hasMore = false +async function countTinybirdRows( + tinybird: TinybirdClient, + datasource: string, + filters: Filters, +): Promise { + const whereClause = buildTinybirdFilterClause(filters) + const query = `SELECT count() AS c FROM ${datasource} WHERE ${whereClause} FORMAT JSON` + const result = await tinybird.executeSql<{ data: Array<{ c: number }> }>(query) + return result.data[0]?.c ?? 0 +} + +// --------------------------------------------------------------------------- +// Confirmation prompt +// --------------------------------------------------------------------------- + +async function confirmOrAbort(message: string): Promise { + const rl = readline.createInterface({ input: process.stdin, output: process.stdout }) + return new Promise((resolve, reject) => { + rl.question(`${message}\nType "yes" to proceed: `, (answer) => { + rl.close() + if (answer.trim().toLowerCase() === 'yes') { + resolve() } else { - batchNumber++ - log.info( - `Processing batch ${batchNumber} (${batchActivityIds.length} activities, total processed: ${totalProcessed})...`, - ) - - const postgresStatus = await deleteActivityRelationsFromPostgres( - postgres, - batchActivityIds, - dryRun, - ) - - if (!postgresStatus.success) { - log.error(`Failed to delete batch ${batchNumber} from Postgres: ${postgresStatus.error}`) - } - - totalProcessed += batchActivityIds.length - onBatchProcessed() - - // If we got fewer results than the batch size, we've reached the end - if (batchActivityIds.length < BATCH_SIZE) { - hasMore = false - } else { - offset += BATCH_SIZE - } + reject(new Error('Aborted by user')) } - } - - log.info(`Found and processed ${totalProcessed} total activity ID(s) in Tinybird`) - return totalProcessed - } catch (error) { - const statusCode = error?.response?.status || 'unknown' - const responseBody = error?.response?.data - ? JSON.stringify(error.response.data) - : error?.response?.body || 'no body' - log.error( - `Failed to query activity IDs from Tinybird: ${error.message} (status: ${statusCode}, body: ${responseBody})`, - ) - throw error - } + }) + }) } +// --------------------------------------------------------------------------- +// Postgres chunked delete +// --------------------------------------------------------------------------- + /** - * Delete activity relations from Postgres using activity IDs + * Delete matching rows from activityRelations in 10k batches. + * Each batch is its own transaction so lock duration and WAL stays bounded. + * Returns total rows deleted. */ -async function deleteActivityRelationsFromPostgres( +async function deletePostgresInChunks( postgres: QueryExecutor, - activityIds: string[], - dryRun = false, -): Promise { - if (activityIds.length === 0) { - log.info(`No activity IDs to ${dryRun ? 'query' : 'delete'} from Postgres`) - return { success: true } - } - - try { - if (dryRun) { - log.info(`[DRY RUN] Querying ${activityIds.length} activity relations from Postgres...`) - const query = ` - SELECT COUNT(*) as count - FROM "activityRelations" - WHERE "activityId" IN ($(activityIds:csv)) - ` - const result = (await postgres.selectOne(query, { activityIds })) as { count: string } - const rowCount = parseInt(result.count, 10) - log.info(`[DRY RUN] Would delete ${rowCount} activity relation(s) from Postgres`) - return { success: true } + filters: Filters, + batchSize = 10000, +): Promise { + const { where, values } = buildPostgresFilter(filters) + const query = ` + DELETE FROM "activityRelations" + WHERE "activityId" IN ( + SELECT "activityId" FROM "activityRelations" + WHERE ${where} + LIMIT ${batchSize} + ) + ` + + let total = 0 + let batch = 0 + let deleted = 0 + + do { + deleted = await postgres.result(query, values) + if (deleted === 0) break + total += deleted + batch++ + if (batch % 10 === 0) { + log.info(` … deleted ${total.toLocaleString()} rows so far (batch ${batch})`) } + } while (deleted > 0) - log.info(`Deleting ${activityIds.length} activity relations from Postgres...`) - const query = ` - DELETE FROM "activityRelations" - WHERE "activityId" IN ($(activityIds:csv)) - ` - const rowCount = await postgres.result(query, { activityIds }) - log.info(`✓ Deleted ${rowCount} activity relation(s) from Postgres`) - return { success: true } - } catch (error) { - log.error(`Failed to delete activity relations from Postgres: ${error.message}`) - return { success: false, error: error.message } - } + return total } -/** - * Delete activities from Tinybird using the delete API - */ +// --------------------------------------------------------------------------- +// Tinybird delete jobs +// --------------------------------------------------------------------------- + async function deleteActivitiesFromTinybird( tinybird: TinybirdClient, - dryRun = false, + filters: Filters, ): Promise<{ activities: DeletionStatus activityRelations: DeletionStatus jobIds: string[] }> { + const deleteCondition = buildTinybirdFilterClause(filters) const results = { activities: { success: false } as DeletionStatus, activityRelations: { success: false } as DeletionStatus, } - - if (dryRun) { - log.info('[DRY RUN] Would delete activities from Tinybird using Gerrit filters...') - log.info( - `[DRY RUN] Filters: platform='gerrit', type in ('changeset-merged', 'changeset-abandoned', 'patchset_approval-created'), updatedAt < '2025-12-15'`, - ) - log.info(`[DRY RUN] Would delete from 'activities' datasource`) - log.info(`[DRY RUN] Would delete from 'activityRelations' datasource`) - return { - activities: { success: true }, - activityRelations: { success: true }, - jobIds: [], - } - } - - log.info('Deleting activities from Tinybird using Gerrit filters...') - const triggeredJobIds: string[] = [] - // Define deletion conditions - const activitiesDeleteCondition = `platform = 'gerrit' AND type IN ('changeset-merged', 'changeset-abandoned', 'patchset_approval-created') AND updatedAt < '2025-12-15'` - const activityRelationsDeleteCondition = `platform = 'gerrit' AND type IN ('changeset-merged', 'changeset-abandoned', 'patchset_approval-created') AND updatedAt < '2025-12-15'` - - // Delete from activities datasource + log.info('Triggering deletion job for Tinybird activities datasource...') try { - log.info('Triggering deletion job for activities datasource...') - const activitiesJobResponse = await tinybird.deleteDatasource( - 'activities', - activitiesDeleteCondition, - true, - false, // Don't wait - ) - log.info(`✓ Triggered deletion job for activities (job_id: ${activitiesJobResponse.job_id})`) - triggeredJobIds.push(activitiesJobResponse.job_id) - results.activities = { - success: true, - jobId: activitiesJobResponse.job_id, - } + const resp = await tinybird.deleteDatasource('activities', deleteCondition, true, false) + log.info(`✓ Triggered activities deletion job (job_id: ${resp.job_id})`) + triggeredJobIds.push(resp.job_id) + results.activities = { success: true, jobId: resp.job_id } } catch (error) { - log.error(`Failed to trigger deletion job for activities datasource: ${error.message}`) - results.activities = { - success: false, - error: error.message, - } + log.error(`Failed to trigger deletion job for activities: ${error.message}`) + results.activities = { success: false, error: error.message } } - // Delete from activityRelations datasource + log.info('Triggering deletion job for Tinybird activityRelations datasource...') try { - log.info('Triggering deletion job for activityRelations datasource...') - const activityRelationsJobResponse = await tinybird.deleteDatasource( - 'activityRelations', - activityRelationsDeleteCondition, - true, - false, // Don't wait - ) - log.info( - `✓ Triggered deletion job for activityRelations (job_id: ${activityRelationsJobResponse.job_id})`, - ) - triggeredJobIds.push(activityRelationsJobResponse.job_id) - results.activityRelations = { - success: true, - jobId: activityRelationsJobResponse.job_id, - } + const resp = await tinybird.deleteDatasource('activityRelations', deleteCondition, true, false) + log.info(`✓ Triggered activityRelations deletion job (job_id: ${resp.job_id})`) + triggeredJobIds.push(resp.job_id) + results.activityRelations = { success: true, jobId: resp.job_id } } catch (error) { - log.error(`Failed to trigger deletion job for activityRelations datasource: ${error.message}`) - results.activityRelations = { - success: false, - error: error.message, - } + log.error(`Failed to trigger deletion job for activityRelations: ${error.message}`) + results.activityRelations = { success: false, error: error.message } } - log.info(`✓ All deletion jobs triggered (${triggeredJobIds.length} running in background)`) - - return { - ...results, - jobIds: triggeredJobIds, - } + return { ...results, jobIds: triggeredJobIds } } -/** - * Main cleanup process - */ -async function runCleanup(dryRun = false, tbToken?: string): Promise { +// --------------------------------------------------------------------------- +// Main cleanup orchestration +// --------------------------------------------------------------------------- + +async function runCleanup( + dryRun: boolean, + skipConfirm: boolean, + tbToken: string | undefined, + filters: Filters, +): Promise { const startTime = new Date().toISOString() - let failedBatches = 0 - let totalBatches = 0 - if (dryRun) { - log.info(`\n${'='.repeat(80)}`) - log.info(`[DRY RUN MODE] Gerrit Activities Cleanup`) - log.info(`${'='.repeat(80)}`) - } else { - log.info(`\n${'='.repeat(80)}`) - log.info(`Gerrit Activities Cleanup`) - log.info(`${'='.repeat(80)}`) - } + log.info(`\n${'='.repeat(80)}`) + log.info(dryRun ? '[DRY RUN MODE] Gerrit Activities Cleanup' : 'Gerrit Activities Cleanup') + log.info(`${'='.repeat(80)}`) + log.info(`Active filters: ${buildTinybirdFilterClause(filters)}`) - try { - // Initialize database clients - const postgres = await initPostgresClient() - const tinybird = new TinybirdClient(tbToken) + const postgres = await initPostgresClient() + const tinybird = new TinybirdClient(tbToken) - // Track deletion statuses - const allDeletionStatuses = { - postgres: { success: true } as DeletionStatus, - tinybird: { - activities: { success: true } as DeletionStatus, - activityRelations: { success: true } as DeletionStatus, - }, - } + // Pre-flight counts (all three stores in parallel) + log.info('Counting affected rows across all stores...') + const [pgCount, tbActivitiesCount, tbRelationsCount] = await Promise.all([ + countPostgresRows(postgres, filters), + countTinybirdRows(tinybird, 'activities', filters), + countTinybirdRows(tinybird, 'activityRelations', filters), + ]) - // Step 1: Query activity IDs from Tinybird in batches and delete from Postgres as we go - log.info( - 'Step 1: Processing activity IDs in batches from Tinybird and deleting from Postgres...', - ) - const totalProcessed = await queryAndProcessActivityIdsInBatches( - tinybird, - postgres, - dryRun, - () => { - totalBatches++ - }, - ) + log.info(` PostgreSQL activityRelations : ${pgCount.toLocaleString()} rows`) + log.info(` Tinybird activities : ${tbActivitiesCount.toLocaleString()} rows`) + log.info(` Tinybird activityRelations : ${tbRelationsCount.toLocaleString()} rows`) - if (totalProcessed === 0) { - log.info('No activities to delete, skipping Tinybird deletion steps') - log.info(`✓ Completed ${dryRun ? 'dry run for' : 'cleanup for'} Gerrit activities`) - return - } + if (dryRun) { + log.info(`\n[DRY RUN] Would delete:`) + log.info(` ${pgCount.toLocaleString()} rows from PostgreSQL activityRelations`) + log.info(` ${tbActivitiesCount.toLocaleString()} rows from Tinybird activities`) + log.info(` ${tbRelationsCount.toLocaleString()} rows from Tinybird activityRelations`) + log.info('[DRY RUN] No data was deleted.') + return + } - log.info(`✓ Completed processing ${totalProcessed} activities from Postgres`) + if (pgCount === 0 && tbActivitiesCount === 0 && tbRelationsCount === 0) { + log.info('No matching rows found. Nothing to delete.') + return + } - // Step 2: Delete from Tinybird in a single operation per datasource - log.info('Step 2: Triggering Tinybird deletions...') - const tinybirdStatuses = await deleteActivitiesFromTinybird(tinybird, dryRun) + if (!skipConfirm) { + await confirmOrAbort( + `\nAbout to permanently delete ${pgCount.toLocaleString()} PG rows, ${tbActivitiesCount.toLocaleString()} TB activities, ${tbRelationsCount.toLocaleString()} TB activityRelations.`, + ) + } - // Track failures from Tinybird - if (!tinybirdStatuses.activities.success) { - allDeletionStatuses.tinybird.activities = tinybirdStatuses.activities - failedBatches++ - } - if (!tinybirdStatuses.activityRelations.success) { - allDeletionStatuses.tinybird.activityRelations = tinybirdStatuses.activityRelations - failedBatches++ - } + // Step 1: Delete from Postgres in chunks + log.info(`\nStep 1: Deleting ${pgCount.toLocaleString()} rows from PostgreSQL in 10k batches...`) + const postgresStatus: DeletionStatus = { success: true } + let postgresDeleted = 0 + try { + postgresDeleted = await deletePostgresInChunks(postgres, filters) + log.info(`✓ Deleted ${postgresDeleted.toLocaleString()} row(s) from PostgreSQL`) + } catch (error) { + log.error(`Failed to delete from PostgreSQL: ${error.message}`) + postgresStatus.success = false + postgresStatus.error = error.message + } - // Wait for all Tinybird deletion jobs to complete - if (!dryRun && tinybirdStatuses.jobIds.length > 0) { - log.info( - `Waiting for ${tinybirdStatuses.jobIds.length} Tinybird deletion job(s) to complete...`, - ) - try { - await tinybird.waitForJobs(tinybirdStatuses.jobIds, 60000, 3600000) // 1min interval, 1h timeout - log.info(`✓ All Tinybird deletion jobs completed`) - } catch (error) { - log.error(`Failed to wait for Tinybird deletion jobs: ${error.message}`) - // Continue anyway - jobs are still running in background - } - } + // Step 2: Trigger Tinybird delete jobs + log.info('\nStep 2: Triggering Tinybird deletion jobs...') + const tinybirdStatuses = await deleteActivitiesFromTinybird(tinybird, filters) + + // Persist result JSON immediately (before waiting) so job IDs are recoverable on timeout + const endTime = new Date().toISOString() + const result: CleanupResult = { + status: + postgresStatus.success && + tinybirdStatuses.activities.success && + tinybirdStatuses.activityRelations.success + ? 'success' + : 'failure', + startTime, + endTime, + postgresDeleted, + tinybirdJobIds: tinybirdStatuses.jobIds, + deletions: { + postgres: postgresStatus, + tinybird: { + activities: tinybirdStatuses.activities, + activityRelations: tinybirdStatuses.activityRelations, + }, + }, + } - // Create cleanup result - const endTime = new Date().toISOString() - const result: CleanupResult = { - status: failedBatches > 0 ? 'failure' : 'success', - startTime, - endTime, - totalBatches, - failedBatches, - deletions: allDeletionStatuses, - } + const jsonFilePath = path.join( + '/tmp', + `cleanup_gerrit_activities_${new Date().toISOString().replace(/[:.]/g, '-')}.json`, + ) + try { + fs.writeFileSync(jsonFilePath, JSON.stringify(result, null, 2), 'utf-8') + log.info(`✓ Job IDs and status saved to: ${jsonFilePath}`) + } catch (error) { + log.error(`Failed to write result file ${jsonFilePath}: ${error.message}`) + } - // Save results to file - const jsonFilePath = path.join( - '/tmp', - `cleanup_gerrit_activities_${new Date().toISOString().replace(/[:.]/g, '-')}.json`, + // Step 3: Wait for Tinybird jobs (up to 6 hours) + if (tinybirdStatuses.jobIds.length > 0) { + log.info( + `\nStep 3: Waiting for ${tinybirdStatuses.jobIds.length} Tinybird job(s) to complete (up to 6h)...`, ) try { - fs.writeFileSync(jsonFilePath, JSON.stringify(result, null, 2), 'utf-8') - log.info(`✓ Cleanup results saved to: ${jsonFilePath}`) + await tinybird.waitForJobs(tinybirdStatuses.jobIds, 60_000, 6 * 60 * 60 * 1000) + log.info('✓ All Tinybird deletion jobs completed') } catch (error) { - log.error(`Failed to write cleanup results to ${jsonFilePath}: ${error.message}`) - } - - // Summary - log.info(`\n${'='.repeat(80)}`) - log.info('Cleanup Summary') - log.info(`${'='.repeat(80)}`) - log.info(`✓ Activities ${dryRun ? 'found' : 'deleted'}: ${totalProcessed}`) - log.info(`✓ Batches processed: ${totalBatches}`) - if (failedBatches > 0) { - log.warn(`✗ Failed batches: ${failedBatches}`) - } - - if (tinybirdStatuses.activities.success) { - log.info( - `✓ Tinybird activities deletion job ${dryRun ? 'would be' : 'was'} triggered: ${tinybirdStatuses.activities.jobId || 'N/A'}`, - ) - } else { - log.error(`✗ Tinybird activities deletion failed: ${tinybirdStatuses.activities.error}`) - } - - if (tinybirdStatuses.activityRelations.success) { - log.info( - `✓ Tinybird activityRelations deletion job ${dryRun ? 'would be' : 'was'} triggered: ${tinybirdStatuses.activityRelations.jobId || 'N/A'}`, - ) - } else { + log.error(`Tinybird wait failed: ${error.message}`) log.error( - `✗ Tinybird activityRelations deletion failed: ${tinybirdStatuses.activityRelations.error}`, + `Tinybird jobs may still be running. Job IDs: ${tinybirdStatuses.jobIds.join(', ')}`, ) - } - - if (result.status === 'failure') { + log.error(`Check the result file for details: ${jsonFilePath}`) process.exit(1) } - } catch (error) { - log.error(`Failed to run Gerrit cleanup: ${error.message}`) - throw error + } + + // Summary + log.info(`\n${'='.repeat(80)}`) + log.info('Cleanup Summary') + log.info(`${'='.repeat(80)}`) + log.info(`✓ PostgreSQL rows deleted : ${postgresDeleted.toLocaleString()}`) + if (tinybirdStatuses.activities.success) { + log.info(`✓ Tinybird activities job : ${tinybirdStatuses.activities.jobId}`) + } else { + log.error(`✗ Tinybird activities failed: ${tinybirdStatuses.activities.error}`) + } + if (tinybirdStatuses.activityRelations.success) { + log.info(`✓ Tinybird activityRelations job: ${tinybirdStatuses.activityRelations.jobId}`) + } else { + log.error(`✗ Tinybird activityRelations failed: ${tinybirdStatuses.activityRelations.error}`) + } + log.info(`Result file: ${jsonFilePath}`) + + if (result.status === 'failure') { + process.exit(1) } } -/** - * Main entry point - */ +async function initPostgresClient(): Promise { + log.info('Initializing Postgres connection...') + const dbConnection = await getDbConnection(WRITE_DB_CONFIG()) + const queryExecutor = pgpQx(dbConnection) + log.info('Postgres connection established') + return queryExecutor +} + +// --------------------------------------------------------------------------- +// Entry point +// --------------------------------------------------------------------------- + async function main() { const args = process.argv.slice(2) - // Parse flags - const dryRunIndex = args.indexOf('--dry-run') - const tbTokenIndex = args.indexOf('--tb-token') - const dryRun = dryRunIndex !== -1 + if (args.includes('--help') || args.includes('-h')) { + log.info(` + Usage: + pnpm run cleanup-gerrit-activities -- [options] + + Options: + --dry-run Display row counts without deleting anything + --yes / -y Skip confirmation prompt (non-interactive) + --tb-token Tinybird API token (overrides env var) + --before-date Only delete records with updatedAt before this date + --type Comma-separated activity types to delete. + Valid: ${VALID_GERRIT_TYPES.join(', ')} + + WARNING: Running with no --type and no --before-date deletes ALL gerrit activities. + + Examples: + pnpm run cleanup-gerrit-activities -- --dry-run --type patchset_approval-created + pnpm run cleanup-gerrit-activities -- --type patchset_approval-created --before-date 2025-12-15 + pnpm run cleanup-gerrit-activities -- --type changeset-merged --yes + `) + process.exit(0) + } + + const dryRun = args.includes('--dry-run') + const skipConfirm = args.includes('--yes') || args.includes('-y') - // Extract tb-token value if provided let tbToken: string | undefined + const tbTokenIndex = args.indexOf('--tb-token') if (tbTokenIndex !== -1) { if (tbTokenIndex + 1 >= args.length) { log.error('Error: --tb-token requires a value') @@ -449,49 +457,64 @@ async function main() { tbToken = args[tbTokenIndex + 1] } - // Check for help flag or no valid arguments - if (args.includes('--help') || args.includes('-h')) { - log.info(` - Usage: - # Via package.json script (recommended): - pnpm run cleanup-gerrit-activities -- [--dry-run] [--tb-token ] - - # Or directly with tsx: - npx tsx src/bin/cleanup-gerrit-activities.ts [--dry-run] [--tb-token ] - - Options: - --dry-run: (optional) Display what would be deleted without actually deleting anything - --tb-token: (optional) Tinybird API token to use (overrides CROWD_TINYBIRD_ACTIVITIES_TOKEN environment variable) - - Examples: - # Run cleanup - pnpm run cleanup-gerrit-activities - - # Dry run to preview what would be deleted - pnpm run cleanup-gerrit-activities -- --dry-run - - # Use custom Tinybird token - pnpm run cleanup-gerrit-activities -- --tb-token your-token-here - - Filters applied: - - platform = 'gerrit' - - type in ('changeset-merged', 'changeset-abandoned', 'patchset_approval-created') - - updatedAt < '2025-12-15' - `) - process.exit(0) + let beforeDate: string | undefined + const beforeDateIndex = args.indexOf('--before-date') + if (beforeDateIndex !== -1) { + if (beforeDateIndex + 1 >= args.length) { + log.error('Error: --before-date requires a value (YYYY-MM-DD)') + process.exit(1) + } + const raw = args[beforeDateIndex + 1] + if (!/^\d{4}-\d{2}-\d{2}$/.test(raw) || !isFinite(Date.parse(raw))) { + log.error(`Error: --before-date value "${raw}" is not a valid date (expected YYYY-MM-DD)`) + process.exit(1) + } + beforeDate = raw } - if (dryRun) { - log.info(`\n${'='.repeat(80)}`) - log.info('🧪 DRY RUN MODE - No data will be deleted') - log.info(`${'='.repeat(80)}\n`) + let types: string[] | undefined + const typeIndex = args.indexOf('--type') + if (typeIndex !== -1) { + if (typeIndex + 1 >= args.length) { + log.error('Error: --type requires a value (comma-separated list of activity types)') + process.exit(1) + } + const raw = args[typeIndex + 1] + const parsed = raw + .split(',') + .map((t) => t.trim()) + .filter(Boolean) + if (parsed.length === 0) { + log.error('Error: --type received an empty value') + process.exit(1) + } + const invalid = parsed.filter((t) => !(VALID_GERRIT_TYPES as readonly string[]).includes(t)) + if (invalid.length > 0) { + log.error( + `Error: --type contains invalid value(s): ${invalid.join(', ')}. Valid values: ${VALID_GERRIT_TYPES.join(', ')}`, + ) + process.exit(1) + } + types = parsed } + if (!types && !beforeDate) { + log.warn( + 'WARNING: No --type or --before-date provided — this will target ALL gerrit activities.', + ) + } + + const filters: Filters = { beforeDate, types } + try { - await runCleanup(dryRun, tbToken) + await runCleanup(dryRun, skipConfirm, tbToken, filters) } catch (error) { + if (error.message === 'Aborted by user') { + log.info('Cleanup aborted.') + process.exit(0) + } log.error(error, 'Failed to run Gerrit cleanup script') - log.error(`\n❌ Error: ${error.message}`) + log.error(`\nError: ${error.message}`) process.exit(1) } }