diff --git a/packages/das/src/webhook/github-fetcher.service.ts b/packages/das/src/webhook/github-fetcher.service.ts index 98c8de4..160ba13 100644 --- a/packages/das/src/webhook/github-fetcher.service.ts +++ b/packages/das/src/webhook/github-fetcher.service.ts @@ -969,8 +969,8 @@ export class GitHubFetcherService implements OnModuleInit { /** * Upsert a list of LABELED_EVENT / UNLABELED_EVENT timeline nodes into * the label_events table. Actor role is resolved at read time via - * contributor_repo_roles — GraphQL's actor type doesn't expose - * authorAssociation. + * contributor_repo_roles using stored PR/issue, review, and comment + * association evidence; GraphQL's actor type doesn't expose authorAssociation. */ private async saveLabelTimelineEvents( repoFullName: string, diff --git a/packages/das/src/webhook/handlers/label.handler.ts b/packages/das/src/webhook/handlers/label.handler.ts index 4a4eaaf..5304fa1 100644 --- a/packages/das/src/webhook/handlers/label.handler.ts +++ b/packages/das/src/webhook/handlers/label.handler.ts @@ -34,8 +34,8 @@ export class LabelHandler { source === "pr" ? payload.pull_request.number : payload.issue.number; // Append to label_events log. Actor's repo role is resolved at read time - // via contributor_repo_roles (see pr_labels_by_actor view) — neither the - // webhook sender nor GraphQL LabeledEvent.actor expose author_association. + // via contributor_repo_roles using stored PR/issue, review, and comment + // association evidence; label actors themselves don't expose it. await this.labelEventRepo.save({ repoFullName, targetNumber, diff --git a/packages/db/20_view_contributor_repo_roles.sql b/packages/db/20_view_contributor_repo_roles.sql index eb0f13f..1e084df 100644 --- a/packages/db/20_view_contributor_repo_roles.sql +++ b/packages/db/20_view_contributor_repo_roles.sql @@ -1,5 +1,8 @@ -- Latest known association per contributor per repo. --- Unions PRs and issues, takes the most recently created record. +-- Uses every table that stores GitHub's author_association/reviewer_association: +-- PR authors, issue authors, submitted reviews, and issue/PR thread comments. +-- Rows without a stored association are ignored; label views should use the +-- latest known role, not let a missing observation erase earlier evidence. CREATE OR REPLACE VIEW contributor_repo_roles AS SELECT DISTINCT ON (repo_full_name, author_github_id) @@ -8,10 +11,62 @@ SELECT DISTINCT ON (repo_full_name, author_github_id) author_login, author_association FROM ( - SELECT repo_full_name, author_github_id, author_login, author_association, created_at + SELECT + repo_full_name, + author_github_id, + author_login, + author_association, + created_at AS observed_at, + 10 AS source_rank, + 'pr:' || pr_number::text AS source_key FROM pull_requests + WHERE author_github_id IS NOT NULL + AND author_github_id <> '' + AND author_association IS NOT NULL + UNION ALL - SELECT repo_full_name, author_github_id, author_login, author_association, created_at + + SELECT + repo_full_name, + author_github_id, + author_login, + author_association, + created_at AS observed_at, + 10 AS source_rank, + 'issue:' || issue_number::text AS source_key FROM issues + WHERE author_github_id IS NOT NULL + AND author_github_id <> '' + AND author_association IS NOT NULL + + UNION ALL + + SELECT + repo_full_name, + reviewer_github_id AS author_github_id, + reviewer_login AS author_login, + reviewer_association AS author_association, + submitted_at AS observed_at, + 20 AS source_rank, + 'review:' || pr_number::text || ':' || submitted_at::text AS source_key + FROM reviews + WHERE reviewer_github_id IS NOT NULL + AND reviewer_github_id <> '' + AND reviewer_association IS NOT NULL + + UNION ALL + + SELECT + repo_full_name, + author_github_id, + author_login, + author_association, + COALESCE(updated_at, created_at) AS observed_at, + 30 AS source_rank, + 'comment:' || comment_id::text AS source_key + FROM comments + WHERE author_github_id IS NOT NULL + AND author_github_id <> '' + AND author_association IS NOT NULL ) combined -ORDER BY repo_full_name, author_github_id, created_at DESC; +ORDER BY repo_full_name, author_github_id, observed_at DESC, source_rank DESC, source_key DESC; diff --git a/packages/db/24_view_pr_labels_by_actor.sql b/packages/db/24_view_pr_labels_by_actor.sql index d6cbc88..8b10f27 100644 --- a/packages/db/24_view_pr_labels_by_actor.sql +++ b/packages/db/24_view_pr_labels_by_actor.sql @@ -2,8 +2,8 @@ -- Collapses label_events to the latest action per (repo, pr, label); only rows -- where the latest action was "labeled" are included (i.e. label still applied). -- actor_association is resolved from contributor_repo_roles (the actor's most --- recently observed role from PRs/issues they've authored in this repo). --- Actors who've never authored anything return NULL for actor_association. +-- recently observed role from authored PRs/issues, reviews, or comments in +-- this repo). Actors with no stored association evidence return NULL. CREATE OR REPLACE VIEW pr_labels_by_actor AS WITH latest_events AS (