diff --git a/.github/workflows/data-processing.yml b/.github/workflows/data-processing.yml
index 2077216b907..04d22085468 100644
--- a/.github/workflows/data-processing.yml
+++ b/.github/workflows/data-processing.yml
@@ -107,7 +107,7 @@ jobs:
# Install R packages for contributor analysis and visualization
#========================================
- name: Install tenzing R dependencies
- run: Rscript -e 'install.packages(c("rmarkdown","ggplot2", "readxl", "dplyr", "googlesheets4", "stringr", "gridExtra", "glue", "tidygraph", "ggraph", "igraph", "visNetwork"))'
+ run: Rscript -e 'install.packages(c("rmarkdown", "ggplot2", "dplyr", "tidyr", "readr", "googlesheets4", "stringr", "here", "sysfonts", "showtext", "treemapify", "igraph", "visNetwork"))'
#==============================
# Contributor Analysis (Monthly)
@@ -124,7 +124,7 @@ jobs:
# Clean old files from content/contributor-analysis and partials
rm -rf content/contributor-analysis/*.png content/contributor-analysis/*.html content/contributor-analysis/htmlwidgets_libs
- rm -f layouts/partials/network-graph.html
+ rm -f static/partials/network-graph.html
# Run index.Rmd to generate contributor analysis content and plots
echo "📊 Rendering contributor analysis..."
@@ -134,9 +134,9 @@ jobs:
echo "🕸️ Rendering network visualization..."
Rscript -e "rmarkdown::render('content/contributor-analysis/network-graph.Rmd')"
- # Move generated HTML file to layouts/partials
- echo "📁 Moving network graph to partials..."
- mv content/contributor-analysis/network-graph.html layouts/partials/
+ # Move generated HTML file to static/partials (served via iframe)
+ echo "📁 Moving network graph to static/partials..."
+ mv content/contributor-analysis/network-graph.html static/partials/
# Clean up HTML artifacts from index.md if any
sed -i.bak -e '/^```{=html}$/d' -e '/^```$/d' content/contributor-analysis/index.md && rm content/contributor-analysis/index.md.bak
@@ -359,6 +359,7 @@ jobs:
content/glossary/
data/
static/data/
+ static/partials/
content/contributor-analysis/
content/publications/citation_chart.webp
retention-days: 7
diff --git a/content/contributor-analysis/index.Rmd b/content/contributor-analysis/index.Rmd
index ab87820d3cf..5c17d2a0e66 100644
--- a/content/contributor-analysis/index.Rmd
+++ b/content/contributor-analysis/index.Rmd
@@ -23,242 +23,218 @@ library(ggplot2) # For visualization
library(tidyr) # For reshaping data
library(googlesheets4) # For reading Google Sheets
library(stringr) # For removing unwanted characters around strings
-library(gridExtra) # For arranging figures
-library(glue) # For printing text
-library(tidygraph) #
-library(ggraph)
-library(igraph)
-library(visNetwork)
-library(Matrix)
+library(here)
+library(readr)
+library(sysfonts)
+library(showtext)
+library(treemapify)
+
+font_add_google("Domine", "Domine")
+showtext_auto()
```
-```{r Read sheets from automation source}
+```{r Read contributor data from cache}
-# Define Google Sheet URL for Automation Source
-google_sheet_url_automation <- "https://docs.google.com/spreadsheets/d/1MUD54FQUhfcBKrvr5gCYoh2wgbJ6Lf7oAJRAqsQ-Nag/edit"
+# Read role column mappings (same source as tenzing.py)
+fields_url <- "https://docs.google.com/spreadsheets/d/e/2PACX-1vT_IaXiYtB3iAmtDZ_XiQKrToRkxOlkXNAeNU2SIT_J9PxvsQyptga6Gg9c8mSvDZpwY6d8skswIQYh/pub?output=csv&gid=277271370"
+role_cols <- read_csv(fields_url, show_col_types = FALSE) |> pull(Fields)
+
+# Read contributor data from cache (generated by tenzing.py)
+cache_path <- here("scripts", "forrt_contribs", "contributors_cache.csv")
+credit_roles <- read_csv(cache_path, show_col_types = FALSE)
-# Get sheet names
-gs4_deauth()
-sheets <- sheet_names(google_sheet_url_automation)
-
-# Exclude the first four Automation Source sheets
-relevant_sheets <- sheets[-c(1,2,3,4)]
-
-# Exclude 'full' sheets
-relevant_sheets <- relevant_sheets[!grepl(" - full$", relevant_sheets)]
-
-# Read all relevant sheets into a list and remove rows without last names
-internal_links <- read_sheet(google_sheet_url_automation, sheet = "INTERNAL LINKS") %>%
- select(`Project Name`) # Extract project names
-
-data_list <- lapply(relevant_sheets, function(sheet) {
- # Read the content of the current sheet
- df <- read_sheet(google_sheet_url_automation, sheet = sheet)
- # Remove rows where the `Surname` is missing or empty
- df <- df %>% filter(!is.na(`Surname`) & `Surname` != "")
- # Match the `Project Name` using the sheet name (assuming sheet name is the identifier)
- project_name <- internal_links$`Project Name`[internal_links$`Project Name` == sheet]
- # If a match is found, assign the `Project Name`, else set to NA
- if (length(project_name) > 0) {
- df <- df %>% mutate(`Project Name` = project_name)
- } else {
- df <- df %>% mutate(`Project Name` = NA)
- }
- df
-})
-
-# Combine all sheets into one dataframe
-dt <- bind_rows(data_list, .id = "Source_Sheet")
-
-# Move project name column to first position
-dt <- dt %>%
- select(`Project Name`, everything())
-
-# Remove PM column as it is unnecessary and might cause problems with binding
-dt <- dt %>%
- select(-`Project Managers`)
```
```{r Read sheets from leads tenzing}
-# Define Google Sheet URL for Leads Tenzing
+# Define Google Sheet URL for Leads Tenzing
google_sheet_url_leads <- "https://docs.google.com/spreadsheets/d/1roy-sZTxyXENA5c5IIV7IIemYvzbzs7zojUN2yIpi58/edit?gid=0#gid=0"
-# Get sheet names
-leads_sheets <- sheet_names(google_sheet_url_leads)
-
-# Exclude the second Leads Tenzing sheet
-relevant_leads_sheets <- leads_sheets[-c(2)]
-
-# Read the Leads Tenzing sheet
-leads_df <- read_sheet(google_sheet_url_leads)
+gs4_deauth()
-# Rename columns and select relevant ones
-leads_df <- leads_df %>%
+lead_roles <- read_sheet(google_sheet_url_leads) |>
rename(
`Project Name` = `FORRT project(s)`,
`ORCID iD` = `ORCID`
- ) %>%
- select(`First name`, `Middle name`, `Surname`, `Project Name`, `Role`, `ORCID iD`)
-
-# Create a column to mark the presence of a role and group by individual
-leads_df <- leads_df %>%
- mutate(has_role = TRUE) %>% # Mark with TRUE for individuals having a role
- distinct(`First name`, `Middle name`, `Surname`, `Project Name`, `ORCID iD`, `Role`, .keep_all = TRUE) # Remove duplicates
-
-# Pivot the data to create a column for each leadership role type, removing director
-leads_df <- leads_df %>%
- pivot_wider(names_from = `Role`, values_from = `has_role`, values_fill = list(has_role = FALSE)) %>%
- select(-Director, -`Operations Coordinator`) # Drop the 'Director' and Operations Coordinator columns
+ ) |>
+ select(`First name`, `Middle name`, `Surname`, `Project Name`, `Role`, `ORCID iD`) |>
+ mutate(has_role = TRUE) |>
+ distinct(`First name`, `Middle name`, `Surname`, `Project Name`, `ORCID iD`, `Role`, .keep_all = TRUE) |>
+ pivot_wider(names_from = `Role`, values_from = `has_role`, values_fill = list(has_role = FALSE)) |>
+ # Remove FORRT-wide organizational roles (not project-level contributions)
+ select(-Director, -`Operations Coordinator`)
```
```{r Combine roles from leads tenzing with automation source}
+# Get leadership role columns (everything except ID columns)
+id_cols <- c("First name", "Middle name", "Surname", "Project Name", "ORCID iD")
+leads_role_cols <- setdiff(names(lead_roles), id_cols)
+all_role_cols <- union(role_cols, leads_role_cols)
+
# Combine leads tenzing rows with automation source
-dt <- bind_rows(dt, leads_df)
+contributions <- bind_rows(credit_roles, lead_roles)
```
```{r Trim Values For Consistency}
# Trim names and ORCIDs to ensure inconsistent Tenzing entries are not counted separately
-dt <- dt %>%
+contributions <- contributions |>
mutate(
`First name` = str_trim(str_replace_all(`First name`, "\\*", "")), # Remove * and trim spaces
- `Middle name` = str_trim(str_replace_all(`Middle name`, "\\*", "")) %>% str_sub(1, 1), # Remove *, trim spaces, and keep first letter
+ `Middle name` = str_trim(str_replace_all(`Middle name`, "\\*", "")) |> str_sub(1, 1), # Remove *, trim spaces, and keep first letter
`Surname` = str_trim(str_replace_all(`Surname`, "\\*", "")), # Remove * and trim spaces
`ORCID iD` = str_trim(str_remove(`ORCID iD`, "https://orcid.org/")) # Remove ORCID URL prefix and trim spaces
)
```
-```{r ensure Conceptualization is logical}
-dt$Conceptualization <- as.logical(dt$Conceptualization)
-```
-
```{r Metrics}
# Count unique contributors (only use surname and first name at the moment, middle name causes problems)
-unique_contributors <- dt %>%
- distinct(`Surname`, `First name`) %>%
+unique_contributors <- contributions |>
+ distinct(Surname, `First name`) |>
nrow()
# Contributions per person
-contributions_per_person <- dt %>%
- group_by(`Surname`, `First name`,) %>%
- summarise(Contributions = n(), .groups = 'drop')
+contributions_per_person <- contributions |>
+ summarise(Contributions = n(), .by = c(Surname, `First name`))
# Mean contributions per person
mean_contributions_per_person <- mean(contributions_per_person$Contributions)
```
```{r Project Engagement}
-count_projects <- dt %>%
- summarise(n_distinct(`Project Name`))
-
-count_projects <- as.integer(count_projects)
+count_projects <- n_distinct(contributions$`Project Name`)
# Contributors per project
-project_contributors <- dt %>%
- distinct(`Project Name`, `Surname`, `First name`, `Middle name`) %>% # Remove duplicate contributor entries
- group_by(`Project Name`) %>%
- summarise(Unique_Contributors = n(), .groups = 'drop') %>% # Count distinct names
- arrange(desc(Unique_Contributors))
+project_contributors <- contributions |>
+ distinct(`Project Name`, Surname, `First name`, `Middle name`) |>
+ summarise(unique_contributors = n(), .by = `Project Name`) |>
+ arrange(desc(unique_contributors))
# Reorder the 'Project Name' based on the number of unique contributors (descending)
project_contributors$`Project Name` <- factor(project_contributors$`Project Name`,
levels = project_contributors$`Project Name`)
# Roles and contributions distribution
-dt_long <- dt %>%
+contributions_long <- contributions |>
pivot_longer(
- cols = "Conceptualization" | starts_with("Writing") |
- "Data curation" | "Formal analysis" |
- "Funding acquisition" | "Investigation" |
- "Methodology" | "Project administration" |
- "Resources" | "Software" | "Supervision" | "Validation" |
- "Visualization" | "Project manager" |
- "Project Coordinators" | "Project lead" | "Project co-lead",
+ cols = any_of(all_role_cols),
names_to = "Role",
values_to = "Contribution"
- ) %>%
+ ) |>
filter(Contribution == TRUE)
-role_distribution <- dt_long %>%
+role_distribution <- contributions_long |>
count(Role, sort = TRUE)
-mean_project_contributors <- mean(project_contributors$Unique_Contributors)
+mean_project_contributors <- mean(project_contributors$unique_contributors)
```
-```{r Summary for Website}
-output <- glue("
-As of {format(Sys.Date(), '%d %B %Y')}, FORRT has a total of {sprintf('%d', count_projects)} completed or ongoing projects and support teams, with a total of {format(unique_contributors, big.mark = ',')} contributors. There is an average (mean) of {sprintf('%.2f', mean_contributions_per_person)} contributions per person across all FORRT projects, and the average number of contributors per project is {sprintf('%.2f', mean_project_contributors)}. You can see the full list of FORRT contributors and their individual contributions [here](https://forrt.org/contributors/).")
-```
-
-`r output`
+As of `r format(Sys.Date(), '%d %B %Y')`, FORRT has a total of `r count_projects` completed or ongoing projects and support teams, with a total of `r format(unique_contributors, big.mark = ',')` contributors. There is an average (mean) of `r round(mean_contributions_per_person, 2)` contributions per person across all FORRT projects, and the average number of contributors per project is `r round(mean_project_contributors)`. You can see the full list of FORRT contributors and their individual contributions [here](https://forrt.org/contributors/).
```{r Basic visualizations}
# Contributors per project
-# Keep only the top 10 projects based on Unique_Contributors
-project_contributors <- project_contributors %>%
- arrange(desc(Unique_Contributors)) %>%
+# Keep only the top 10 projects based on unique_contributors
+project_contributors <- project_contributors |>
+ arrange(desc(unique_contributors)) |>
slice_head(n = 10)
-# Get the first three project names from the sorted order
-top_projects <- project_contributors$`Project Name`[1:5]
-
-# Add labels only for these projects
-project_contributors <- project_contributors %>%
- mutate(label = ifelse(`Project Name` %in% top_projects, as.character(`Project Name`), NA))
-
# Plot
-projects_plot <- ggplot(project_contributors, aes(x = `Project Name`, y = Unique_Contributors)) +
- geom_bar(stat = "identity", fill = "#A52828", color = "#323232", alpha = 0.8) +
- geom_text(aes(label = label), hjust = 0, nudge_x = -0.3, vjust = 1.0, nudge_y = 9, size = 3, na.rm = TRUE) + # Adjusted vjust and nudge_y
- labs(title = "Contributors for FORRT's 10 Biggest Projects",
- x = "Projects",
- y = "Number of Contributors") +
- theme_minimal(base_size = 18) +
- theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
- axis.text.x = element_blank(),
- axis.ticks.x = element_blank(),
- axis.text.y = element_text(size = 12),
- axis.title = element_text(size = 14),
- panel.background = element_rect(fill = "#fefdf6", color = NA), # No panel border
- plot.background = element_rect(fill = "#fefdf6", color = NA),
- panel.border = element_blank(),
- panel.spacing = unit(0, "lines"))
+projects_plot <- ggplot(project_contributors, aes(y = reorder(`Project Name`, unique_contributors), x = unique_contributors)) +
+ geom_segment(aes(x = 0, xend = unique_contributors, yend = reorder(`Project Name`, unique_contributors)),
+ color = "#A52828", linewidth = 1) +
+ geom_point(color = "#A52828", size = 4) +
+ geom_text(aes(label = unique_contributors), hjust = -0.8, size = 5, family = "Domine", color = "#333333") +
+ scale_x_continuous(limits = c(0, max(project_contributors$unique_contributors) * 1.15), expand = c(0, 0)) +
+ labs(title = "Number of Contributors for FORRT's 10 Biggest Projects") +
+ theme_minimal(base_size = 14, base_family = "Domine") +
+ theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 20, margin = margin(b = 20)),
+ plot.title.position = "plot",
+ axis.text.y = element_text(size = 14, color = "#333333"),
+ axis.text.x = element_blank(),
+ axis.title = element_blank(),
+ panel.grid = element_blank(),
+ panel.background = element_rect(fill = "#fefdf6", color = NA),
+ plot.background = element_rect(fill = "#fefdf6", color = NA),
+ panel.border = element_blank(),
+ panel.spacing = unit(0, "lines"),
+ plot.margin = margin(t = 10, r = 10, b = 60, l = 10))
# Roles in projects
-roles_plot <- ggplot(role_distribution, aes(x = reorder(Role, n), y = n)) +
- geom_bar(stat = "identity", fill = "#A52828", color = "#323232", alpha = 0.8) +
- coord_flip() +
- labs(title = "Distribution of Contributions Across Roles",
- x = "Role",
+
+roles_plot <- ggplot(role_distribution, aes(y = reorder(Role, n), x = n)) +
+ geom_col(fill = "#A52828", alpha = 0.8) +
+ geom_text(aes(label = n), hjust = -0.3, size = 5, family = "Domine", color = "#333333") +
+ scale_x_continuous(limits = c(0, max(role_distribution$n) * 1.1), expand = c(0, 0)) +
+ labs(title = "Number of Contributions Across Roles",
y = "Number of People") +
- theme_minimal(base_size = 14) +
- theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 16), # Set title size to 16
- axis.text = element_text(size = 12),
- axis.title = element_text(size = 14),
- panel.background = element_rect(fill = "#fefdf6", color = NA), # No panel border
- plot.background = element_rect(fill = "#fefdf6", color = NA), # No plot border
- panel.border = element_blank(), # Explicitly blank
- panel.spacing = unit(0, "lines"))
+ theme_minimal(base_size = 14, base_family = "Domine") +
+ theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 20, margin = margin(b = 20)),
+ plot.title.position = "plot",
+ axis.text.y = element_text(size = 16, color = "#333333"),
+ axis.text.x = element_blank(),
+ axis.title = element_blank(),
+ panel.grid = element_blank(),
+ panel.background = element_rect(fill = "#fefdf6", color = NA),
+ plot.background = element_rect(fill = "#fefdf6", color = NA),
+ panel.border = element_blank(),
+ panel.spacing = unit(0, "lines"),
+ plot.margin = margin(t = 10, r = 10, b = 10, l = 10))
+
+# Treemap: number of projects per contributor
+projects_per_person <- contributions_per_person |>
+ mutate(
+ label = case_when(
+ Contributions == 1 ~ "1 project",
+ Contributions >= 5 ~ "5 or more projects",
+ TRUE ~ str_c(Contributions, " projects")
+ )
+ ) |>
+ count(label) |>
+ mutate(
+ sort_order = case_when(
+ str_detect(label, "^1") ~ 1L,
+ str_detect(label, "^2") ~ 2L,
+ str_detect(label, "^3") ~ 3L,
+ str_detect(label, "^4") ~ 4L,
+ TRUE ~ 5L
+ ),
+ percent = round(n / sum(n) * 100, 1)
+ ) |>
+ arrange(sort_order)
+
+treemap_plot <- ggplot(projects_per_person, aes(area = n, fill = sort_order)) +
+ geom_treemap() +
+ geom_treemap_text(aes(label = label), colour = "white", place = "centre", size = 16, family = "Domine") +
+ geom_treemap_text(aes(label = str_c("\n\n\n(", percent, "% of contributors)")), colour = "white", place = "centre", size = 11, family = "Domine") +
+ scale_fill_gradient(low = "#e08a8a", high = "#A52828", guide = "none") +
+ labs(title = "Share of Contributors by Number of Projects they Contributed To") +
+ theme_minimal(base_size = 14, base_family = "Domine") +
+ theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 20, margin = margin(b = 20)),
+ plot.title.position = "plot",
+ panel.background = element_rect(fill = "#fefdf6", color = NA),
+ plot.background = element_rect(fill = "#fefdf6", color = NA),
+ plot.margin = margin(t = 10, r = 10, b = 60, l = 10))
```
-```{r projects-plot, fig.alt = "Bar chart of contributors per project"}
+```{r projects-plot, fig.alt = "Bar chart of contributors per project", fig.width = 10, fig.height = 7}
projects_plot
```
-```{r roles-plot, fig.alt = "Bar chart of contributions by Tenzing role"}
-roles_plot
+```{r treemap-plot, fig.alt = "Treemap of number of projects per contributor", fig.width = 10, fig.height = 7}
+treemap_plot
```
+```{r roles-plot, fig.alt = "Bar chart of contributions by Tenzing role", fig.width = 10, fig.height = 7}
+roles_plot
+```
-```{r dt long}
-dt_long <- dt_long %>%
- rowwise() %>%
- mutate(Contributor = paste(na.omit(c(`Surname`, `First name`)), collapse = " ")) %>%
+```{r credit-roles-long}
+contributions_long <- contributions_long |>
+ unite(Contributor, Surname, `First name`, sep = " ", remove = FALSE, na.rm = TRUE) |>
mutate(Lead = if_else(Role %in% c("lead", "co-lead"), Role, "other"))
```
diff --git a/content/contributor-analysis/index.md b/content/contributor-analysis/index.md
index 5716bcb8a3e..08984289f38 100644
--- a/content/contributor-analysis/index.md
+++ b/content/contributor-analysis/index.md
@@ -3,19 +3,21 @@ title: "FORRT Contributor Analyses"
always_allow_html: true
output:
md_document:
- variant: markdown_github
+ variant: gfm
preserve_yaml: true
toc: false
type: contributors_analysis
---
-As of 14 September 2025, FORRT has a total of 91 completed or ongoing
-projects and support teams, with a total of 587 contributors. There is
-an average (mean) of 2.45 contributions per person across all FORRT
-projects, and the average number of contributors per project is 15.54.
-You can see the full list of FORRT contributors and their individual
+As of 12 February 2026, FORRT has a total of 98 completed or ongoing
+projects and support teams, with a total of 627 contributors. There is
+an average (mean) of 2.4 contributions per person across all FORRT
+projects, and the average number of contributors per project is 15. You
+can see the full list of FORRT contributors and their individual
contributions [here](https://forrt.org/contributors/).
+
+
diff --git a/content/contributor-analysis/network-graph.Rmd b/content/contributor-analysis/network-graph.Rmd
index 1ab83d430d8..b95ab79e9e4 100644
--- a/content/contributor-analysis/network-graph.Rmd
+++ b/content/contributor-analysis/network-graph.Rmd
@@ -9,6 +9,14 @@ output:
The interactive network visualization below shows the connections between FORRT contributors based on their collaborative work across different projects. Each node represents a contributor, and the connections (edges) represent shared project participation.
+
{{ partial "page_header" . }}
-
+
{{ .Content }}
-
+
-{{ partial "network-graph.html" . }}
+
+
+
+
+
diff --git a/layouts/partials/network-graph.html b/layouts/partials/network-graph.html
index b98b466f2a6..a18ea2a83c5 100644
--- a/layouts/partials/network-graph.html
+++ b/layouts/partials/network-graph.html
@@ -5661,6 +5661,9 @@ Network of Contributors
different projects. Each node represents a contributor, and the
connections (edges) represent shared project participation.
-
-
+
+
+
diff --git a/scripts/forrt_contribs/tenzing_template.md b/scripts/forrt_contribs/tenzing_template.md
index 429520c8b71..14c0a85fd88 100644
--- a/scripts/forrt_contribs/tenzing_template.md
+++ b/scripts/forrt_contribs/tenzing_template.md
@@ -192,7 +192,7 @@ if (window.location.hash) {
-FORRT is driven by a **large and diverse community of contributors** that shape one or more of our projects. Below you can see everyone's scientific contributions in detail. Note that many also contribute to maintaining our community - we are equally grateful for their efforts. You can find out more about the scale of contributions at FORRT, including an interactive network graph, on our contributor analysis page.
+FORRT is driven by a **large and diverse community of contributors** that shape one or more of our projects. Below you can see everyone's scientific contributions in detail. Note that many also contribute to maintaining our community - we are equally grateful for their efforts. You can find out more about the scale of contributions at FORRT, including an interactive network graph, on our contributor analysis page.
diff --git a/static/partials/network-graph.html b/static/partials/network-graph.html
new file mode 100644
index 00000000000..119a7705bf7
--- /dev/null
+++ b/static/partials/network-graph.html
@@ -0,0 +1,5748 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+Network of Contributors
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
The interactive network visualization below shows the connections
+between FORRT contributors based on their collaborative work across
+different projects. Each node represents a contributor, and the
+connections (edges) represent shared project participation.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+