diff --git a/.github/workflows/data-processing.yml b/.github/workflows/data-processing.yml index 2077216b907..04d22085468 100644 --- a/.github/workflows/data-processing.yml +++ b/.github/workflows/data-processing.yml @@ -107,7 +107,7 @@ jobs: # Install R packages for contributor analysis and visualization #======================================== - name: Install tenzing R dependencies - run: Rscript -e 'install.packages(c("rmarkdown","ggplot2", "readxl", "dplyr", "googlesheets4", "stringr", "gridExtra", "glue", "tidygraph", "ggraph", "igraph", "visNetwork"))' + run: Rscript -e 'install.packages(c("rmarkdown", "ggplot2", "dplyr", "tidyr", "readr", "googlesheets4", "stringr", "here", "sysfonts", "showtext", "treemapify", "igraph", "visNetwork"))' #============================== # Contributor Analysis (Monthly) @@ -124,7 +124,7 @@ jobs: # Clean old files from content/contributor-analysis and partials rm -rf content/contributor-analysis/*.png content/contributor-analysis/*.html content/contributor-analysis/htmlwidgets_libs - rm -f layouts/partials/network-graph.html + rm -f static/partials/network-graph.html # Run index.Rmd to generate contributor analysis content and plots echo "📊 Rendering contributor analysis..." @@ -134,9 +134,9 @@ jobs: echo "🕸️ Rendering network visualization..." Rscript -e "rmarkdown::render('content/contributor-analysis/network-graph.Rmd')" - # Move generated HTML file to layouts/partials - echo "📁 Moving network graph to partials..." - mv content/contributor-analysis/network-graph.html layouts/partials/ + # Move generated HTML file to static/partials (served via iframe) + echo "📁 Moving network graph to static/partials..." + mv content/contributor-analysis/network-graph.html static/partials/ # Clean up HTML artifacts from index.md if any sed -i.bak -e '/^```{=html}$/d' -e '/^```$/d' content/contributor-analysis/index.md && rm content/contributor-analysis/index.md.bak @@ -359,6 +359,7 @@ jobs: content/glossary/ data/ static/data/ + static/partials/ content/contributor-analysis/ content/publications/citation_chart.webp retention-days: 7 diff --git a/content/contributor-analysis/index.Rmd b/content/contributor-analysis/index.Rmd index ab87820d3cf..5c17d2a0e66 100644 --- a/content/contributor-analysis/index.Rmd +++ b/content/contributor-analysis/index.Rmd @@ -23,242 +23,218 @@ library(ggplot2) # For visualization library(tidyr) # For reshaping data library(googlesheets4) # For reading Google Sheets library(stringr) # For removing unwanted characters around strings -library(gridExtra) # For arranging figures -library(glue) # For printing text -library(tidygraph) # -library(ggraph) -library(igraph) -library(visNetwork) -library(Matrix) +library(here) +library(readr) +library(sysfonts) +library(showtext) +library(treemapify) + +font_add_google("Domine", "Domine") +showtext_auto() ``` -```{r Read sheets from automation source} +```{r Read contributor data from cache} -# Define Google Sheet URL for Automation Source -google_sheet_url_automation <- "https://docs.google.com/spreadsheets/d/1MUD54FQUhfcBKrvr5gCYoh2wgbJ6Lf7oAJRAqsQ-Nag/edit" +# Read role column mappings (same source as tenzing.py) +fields_url <- "https://docs.google.com/spreadsheets/d/e/2PACX-1vT_IaXiYtB3iAmtDZ_XiQKrToRkxOlkXNAeNU2SIT_J9PxvsQyptga6Gg9c8mSvDZpwY6d8skswIQYh/pub?output=csv&gid=277271370" +role_cols <- read_csv(fields_url, show_col_types = FALSE) |> pull(Fields) + +# Read contributor data from cache (generated by tenzing.py) +cache_path <- here("scripts", "forrt_contribs", "contributors_cache.csv") +credit_roles <- read_csv(cache_path, show_col_types = FALSE) -# Get sheet names -gs4_deauth() -sheets <- sheet_names(google_sheet_url_automation) - -# Exclude the first four Automation Source sheets -relevant_sheets <- sheets[-c(1,2,3,4)] - -# Exclude 'full' sheets -relevant_sheets <- relevant_sheets[!grepl(" - full$", relevant_sheets)] - -# Read all relevant sheets into a list and remove rows without last names -internal_links <- read_sheet(google_sheet_url_automation, sheet = "INTERNAL LINKS") %>% - select(`Project Name`) # Extract project names - -data_list <- lapply(relevant_sheets, function(sheet) { - # Read the content of the current sheet - df <- read_sheet(google_sheet_url_automation, sheet = sheet) - # Remove rows where the `Surname` is missing or empty - df <- df %>% filter(!is.na(`Surname`) & `Surname` != "") - # Match the `Project Name` using the sheet name (assuming sheet name is the identifier) - project_name <- internal_links$`Project Name`[internal_links$`Project Name` == sheet] - # If a match is found, assign the `Project Name`, else set to NA - if (length(project_name) > 0) { - df <- df %>% mutate(`Project Name` = project_name) - } else { - df <- df %>% mutate(`Project Name` = NA) - } - df -}) - -# Combine all sheets into one dataframe -dt <- bind_rows(data_list, .id = "Source_Sheet") - -# Move project name column to first position -dt <- dt %>% - select(`Project Name`, everything()) - -# Remove PM column as it is unnecessary and might cause problems with binding -dt <- dt %>% - select(-`Project Managers`) ``` ```{r Read sheets from leads tenzing} -# Define Google Sheet URL for Leads Tenzing +# Define Google Sheet URL for Leads Tenzing google_sheet_url_leads <- "https://docs.google.com/spreadsheets/d/1roy-sZTxyXENA5c5IIV7IIemYvzbzs7zojUN2yIpi58/edit?gid=0#gid=0" -# Get sheet names -leads_sheets <- sheet_names(google_sheet_url_leads) - -# Exclude the second Leads Tenzing sheet -relevant_leads_sheets <- leads_sheets[-c(2)] - -# Read the Leads Tenzing sheet -leads_df <- read_sheet(google_sheet_url_leads) +gs4_deauth() -# Rename columns and select relevant ones -leads_df <- leads_df %>% +lead_roles <- read_sheet(google_sheet_url_leads) |> rename( `Project Name` = `FORRT project(s)`, `ORCID iD` = `ORCID` - ) %>% - select(`First name`, `Middle name`, `Surname`, `Project Name`, `Role`, `ORCID iD`) - -# Create a column to mark the presence of a role and group by individual -leads_df <- leads_df %>% - mutate(has_role = TRUE) %>% # Mark with TRUE for individuals having a role - distinct(`First name`, `Middle name`, `Surname`, `Project Name`, `ORCID iD`, `Role`, .keep_all = TRUE) # Remove duplicates - -# Pivot the data to create a column for each leadership role type, removing director -leads_df <- leads_df %>% - pivot_wider(names_from = `Role`, values_from = `has_role`, values_fill = list(has_role = FALSE)) %>% - select(-Director, -`Operations Coordinator`) # Drop the 'Director' and Operations Coordinator columns + ) |> + select(`First name`, `Middle name`, `Surname`, `Project Name`, `Role`, `ORCID iD`) |> + mutate(has_role = TRUE) |> + distinct(`First name`, `Middle name`, `Surname`, `Project Name`, `ORCID iD`, `Role`, .keep_all = TRUE) |> + pivot_wider(names_from = `Role`, values_from = `has_role`, values_fill = list(has_role = FALSE)) |> + # Remove FORRT-wide organizational roles (not project-level contributions) + select(-Director, -`Operations Coordinator`) ``` ```{r Combine roles from leads tenzing with automation source} +# Get leadership role columns (everything except ID columns) +id_cols <- c("First name", "Middle name", "Surname", "Project Name", "ORCID iD") +leads_role_cols <- setdiff(names(lead_roles), id_cols) +all_role_cols <- union(role_cols, leads_role_cols) + # Combine leads tenzing rows with automation source -dt <- bind_rows(dt, leads_df) +contributions <- bind_rows(credit_roles, lead_roles) ``` ```{r Trim Values For Consistency} # Trim names and ORCIDs to ensure inconsistent Tenzing entries are not counted separately -dt <- dt %>% +contributions <- contributions |> mutate( `First name` = str_trim(str_replace_all(`First name`, "\\*", "")), # Remove * and trim spaces - `Middle name` = str_trim(str_replace_all(`Middle name`, "\\*", "")) %>% str_sub(1, 1), # Remove *, trim spaces, and keep first letter + `Middle name` = str_trim(str_replace_all(`Middle name`, "\\*", "")) |> str_sub(1, 1), # Remove *, trim spaces, and keep first letter `Surname` = str_trim(str_replace_all(`Surname`, "\\*", "")), # Remove * and trim spaces `ORCID iD` = str_trim(str_remove(`ORCID iD`, "https://orcid.org/")) # Remove ORCID URL prefix and trim spaces ) ``` -```{r ensure Conceptualization is logical} -dt$Conceptualization <- as.logical(dt$Conceptualization) -``` - ```{r Metrics} # Count unique contributors (only use surname and first name at the moment, middle name causes problems) -unique_contributors <- dt %>% - distinct(`Surname`, `First name`) %>% +unique_contributors <- contributions |> + distinct(Surname, `First name`) |> nrow() # Contributions per person -contributions_per_person <- dt %>% - group_by(`Surname`, `First name`,) %>% - summarise(Contributions = n(), .groups = 'drop') +contributions_per_person <- contributions |> + summarise(Contributions = n(), .by = c(Surname, `First name`)) # Mean contributions per person mean_contributions_per_person <- mean(contributions_per_person$Contributions) ``` ```{r Project Engagement} -count_projects <- dt %>% - summarise(n_distinct(`Project Name`)) - -count_projects <- as.integer(count_projects) +count_projects <- n_distinct(contributions$`Project Name`) # Contributors per project -project_contributors <- dt %>% - distinct(`Project Name`, `Surname`, `First name`, `Middle name`) %>% # Remove duplicate contributor entries - group_by(`Project Name`) %>% - summarise(Unique_Contributors = n(), .groups = 'drop') %>% # Count distinct names - arrange(desc(Unique_Contributors)) +project_contributors <- contributions |> + distinct(`Project Name`, Surname, `First name`, `Middle name`) |> + summarise(unique_contributors = n(), .by = `Project Name`) |> + arrange(desc(unique_contributors)) # Reorder the 'Project Name' based on the number of unique contributors (descending) project_contributors$`Project Name` <- factor(project_contributors$`Project Name`, levels = project_contributors$`Project Name`) # Roles and contributions distribution -dt_long <- dt %>% +contributions_long <- contributions |> pivot_longer( - cols = "Conceptualization" | starts_with("Writing") | - "Data curation" | "Formal analysis" | - "Funding acquisition" | "Investigation" | - "Methodology" | "Project administration" | - "Resources" | "Software" | "Supervision" | "Validation" | - "Visualization" | "Project manager" | - "Project Coordinators" | "Project lead" | "Project co-lead", + cols = any_of(all_role_cols), names_to = "Role", values_to = "Contribution" - ) %>% + ) |> filter(Contribution == TRUE) -role_distribution <- dt_long %>% +role_distribution <- contributions_long |> count(Role, sort = TRUE) -mean_project_contributors <- mean(project_contributors$Unique_Contributors) +mean_project_contributors <- mean(project_contributors$unique_contributors) ``` -```{r Summary for Website} -output <- glue(" -As of {format(Sys.Date(), '%d %B %Y')}, FORRT has a total of {sprintf('%d', count_projects)} completed or ongoing projects and support teams, with a total of {format(unique_contributors, big.mark = ',')} contributors. There is an average (mean) of {sprintf('%.2f', mean_contributions_per_person)} contributions per person across all FORRT projects, and the average number of contributors per project is {sprintf('%.2f', mean_project_contributors)}. You can see the full list of FORRT contributors and their individual contributions [here](https://forrt.org/contributors/).") -``` - -`r output` +As of `r format(Sys.Date(), '%d %B %Y')`, FORRT has a total of `r count_projects` completed or ongoing projects and support teams, with a total of `r format(unique_contributors, big.mark = ',')` contributors. There is an average (mean) of `r round(mean_contributions_per_person, 2)` contributions per person across all FORRT projects, and the average number of contributors per project is `r round(mean_project_contributors)`. You can see the full list of FORRT contributors and their individual contributions [here](https://forrt.org/contributors/). ```{r Basic visualizations} # Contributors per project -# Keep only the top 10 projects based on Unique_Contributors -project_contributors <- project_contributors %>% - arrange(desc(Unique_Contributors)) %>% +# Keep only the top 10 projects based on unique_contributors +project_contributors <- project_contributors |> + arrange(desc(unique_contributors)) |> slice_head(n = 10) -# Get the first three project names from the sorted order -top_projects <- project_contributors$`Project Name`[1:5] - -# Add labels only for these projects -project_contributors <- project_contributors %>% - mutate(label = ifelse(`Project Name` %in% top_projects, as.character(`Project Name`), NA)) - # Plot -projects_plot <- ggplot(project_contributors, aes(x = `Project Name`, y = Unique_Contributors)) + - geom_bar(stat = "identity", fill = "#A52828", color = "#323232", alpha = 0.8) + - geom_text(aes(label = label), hjust = 0, nudge_x = -0.3, vjust = 1.0, nudge_y = 9, size = 3, na.rm = TRUE) + # Adjusted vjust and nudge_y - labs(title = "Contributors for FORRT's 10 Biggest Projects", - x = "Projects", - y = "Number of Contributors") + - theme_minimal(base_size = 18) + - theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 16), - axis.text.x = element_blank(), - axis.ticks.x = element_blank(), - axis.text.y = element_text(size = 12), - axis.title = element_text(size = 14), - panel.background = element_rect(fill = "#fefdf6", color = NA), # No panel border - plot.background = element_rect(fill = "#fefdf6", color = NA), - panel.border = element_blank(), - panel.spacing = unit(0, "lines")) +projects_plot <- ggplot(project_contributors, aes(y = reorder(`Project Name`, unique_contributors), x = unique_contributors)) + + geom_segment(aes(x = 0, xend = unique_contributors, yend = reorder(`Project Name`, unique_contributors)), + color = "#A52828", linewidth = 1) + + geom_point(color = "#A52828", size = 4) + + geom_text(aes(label = unique_contributors), hjust = -0.8, size = 5, family = "Domine", color = "#333333") + + scale_x_continuous(limits = c(0, max(project_contributors$unique_contributors) * 1.15), expand = c(0, 0)) + + labs(title = "Number of Contributors for FORRT's 10 Biggest Projects") + + theme_minimal(base_size = 14, base_family = "Domine") + + theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 20, margin = margin(b = 20)), + plot.title.position = "plot", + axis.text.y = element_text(size = 14, color = "#333333"), + axis.text.x = element_blank(), + axis.title = element_blank(), + panel.grid = element_blank(), + panel.background = element_rect(fill = "#fefdf6", color = NA), + plot.background = element_rect(fill = "#fefdf6", color = NA), + panel.border = element_blank(), + panel.spacing = unit(0, "lines"), + plot.margin = margin(t = 10, r = 10, b = 60, l = 10)) # Roles in projects -roles_plot <- ggplot(role_distribution, aes(x = reorder(Role, n), y = n)) + - geom_bar(stat = "identity", fill = "#A52828", color = "#323232", alpha = 0.8) + - coord_flip() + - labs(title = "Distribution of Contributions Across Roles", - x = "Role", + +roles_plot <- ggplot(role_distribution, aes(y = reorder(Role, n), x = n)) + + geom_col(fill = "#A52828", alpha = 0.8) + + geom_text(aes(label = n), hjust = -0.3, size = 5, family = "Domine", color = "#333333") + + scale_x_continuous(limits = c(0, max(role_distribution$n) * 1.1), expand = c(0, 0)) + + labs(title = "Number of Contributions Across Roles", y = "Number of People") + - theme_minimal(base_size = 14) + - theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 16), # Set title size to 16 - axis.text = element_text(size = 12), - axis.title = element_text(size = 14), - panel.background = element_rect(fill = "#fefdf6", color = NA), # No panel border - plot.background = element_rect(fill = "#fefdf6", color = NA), # No plot border - panel.border = element_blank(), # Explicitly blank - panel.spacing = unit(0, "lines")) + theme_minimal(base_size = 14, base_family = "Domine") + + theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 20, margin = margin(b = 20)), + plot.title.position = "plot", + axis.text.y = element_text(size = 16, color = "#333333"), + axis.text.x = element_blank(), + axis.title = element_blank(), + panel.grid = element_blank(), + panel.background = element_rect(fill = "#fefdf6", color = NA), + plot.background = element_rect(fill = "#fefdf6", color = NA), + panel.border = element_blank(), + panel.spacing = unit(0, "lines"), + plot.margin = margin(t = 10, r = 10, b = 10, l = 10)) + +# Treemap: number of projects per contributor +projects_per_person <- contributions_per_person |> + mutate( + label = case_when( + Contributions == 1 ~ "1 project", + Contributions >= 5 ~ "5 or more projects", + TRUE ~ str_c(Contributions, " projects") + ) + ) |> + count(label) |> + mutate( + sort_order = case_when( + str_detect(label, "^1") ~ 1L, + str_detect(label, "^2") ~ 2L, + str_detect(label, "^3") ~ 3L, + str_detect(label, "^4") ~ 4L, + TRUE ~ 5L + ), + percent = round(n / sum(n) * 100, 1) + ) |> + arrange(sort_order) + +treemap_plot <- ggplot(projects_per_person, aes(area = n, fill = sort_order)) + + geom_treemap() + + geom_treemap_text(aes(label = label), colour = "white", place = "centre", size = 16, family = "Domine") + + geom_treemap_text(aes(label = str_c("\n\n\n(", percent, "% of contributors)")), colour = "white", place = "centre", size = 11, family = "Domine") + + scale_fill_gradient(low = "#e08a8a", high = "#A52828", guide = "none") + + labs(title = "Share of Contributors by Number of Projects they Contributed To") + + theme_minimal(base_size = 14, base_family = "Domine") + + theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 20, margin = margin(b = 20)), + plot.title.position = "plot", + panel.background = element_rect(fill = "#fefdf6", color = NA), + plot.background = element_rect(fill = "#fefdf6", color = NA), + plot.margin = margin(t = 10, r = 10, b = 60, l = 10)) ``` -```{r projects-plot, fig.alt = "Bar chart of contributors per project"} +```{r projects-plot, fig.alt = "Bar chart of contributors per project", fig.width = 10, fig.height = 7} projects_plot ``` -```{r roles-plot, fig.alt = "Bar chart of contributions by Tenzing role"} -roles_plot +```{r treemap-plot, fig.alt = "Treemap of number of projects per contributor", fig.width = 10, fig.height = 7} +treemap_plot ``` +```{r roles-plot, fig.alt = "Bar chart of contributions by Tenzing role", fig.width = 10, fig.height = 7} +roles_plot +``` -```{r dt long} -dt_long <- dt_long %>% - rowwise() %>% - mutate(Contributor = paste(na.omit(c(`Surname`, `First name`)), collapse = " ")) %>% +```{r credit-roles-long} +contributions_long <- contributions_long |> + unite(Contributor, Surname, `First name`, sep = " ", remove = FALSE, na.rm = TRUE) |> mutate(Lead = if_else(Role %in% c("lead", "co-lead"), Role, "other")) ``` diff --git a/content/contributor-analysis/index.md b/content/contributor-analysis/index.md index 5716bcb8a3e..08984289f38 100644 --- a/content/contributor-analysis/index.md +++ b/content/contributor-analysis/index.md @@ -3,19 +3,21 @@ title: "FORRT Contributor Analyses" always_allow_html: true output: md_document: - variant: markdown_github + variant: gfm preserve_yaml: true toc: false type: contributors_analysis --- -As of 14 September 2025, FORRT has a total of 91 completed or ongoing -projects and support teams, with a total of 587 contributors. There is -an average (mean) of 2.45 contributions per person across all FORRT -projects, and the average number of contributors per project is 15.54. -You can see the full list of FORRT contributors and their individual +As of 12 February 2026, FORRT has a total of 98 completed or ongoing +projects and support teams, with a total of 627 contributors. There is +an average (mean) of 2.4 contributions per person across all FORRT +projects, and the average number of contributors per project is 15. You +can see the full list of FORRT contributors and their individual contributions [here](https://forrt.org/contributors/). Bar chart of contributors per project +Treemap of number of projects per contributor + Bar chart of contributions by Tenzing role diff --git a/content/contributor-analysis/network-graph.Rmd b/content/contributor-analysis/network-graph.Rmd index 1ab83d430d8..b95ab79e9e4 100644 --- a/content/contributor-analysis/network-graph.Rmd +++ b/content/contributor-analysis/network-graph.Rmd @@ -9,6 +9,14 @@ output: The interactive network visualization below shows the connections between FORRT contributors based on their collaborative work across different projects. Each node represents a contributor, and the connections (edges) represent shared project participation. +
{{ partial "page_header" . }}
-
+
{{ .Content }} - +
-{{ partial "network-graph.html" . }} +
+ +
+ +
diff --git a/layouts/partials/network-graph.html b/layouts/partials/network-graph.html index b98b466f2a6..a18ea2a83c5 100644 --- a/layouts/partials/network-graph.html +++ b/layouts/partials/network-graph.html @@ -5661,6 +5661,9 @@

Network of Contributors

different projects. Each node represents a contributor, and the connections (edges) represent shared project participation.

-
- +
+ + diff --git a/scripts/forrt_contribs/tenzing_template.md b/scripts/forrt_contribs/tenzing_template.md index 429520c8b71..14c0a85fd88 100644 --- a/scripts/forrt_contribs/tenzing_template.md +++ b/scripts/forrt_contribs/tenzing_template.md @@ -192,7 +192,7 @@ if (window.location.hash) {
-FORRT is driven by a **large and diverse community of contributors** that shape one or more of our projects. Below you can see everyone's scientific contributions in detail. Note that many also contribute to maintaining our community - we are equally grateful for their efforts. You can find out more about the scale of contributions at FORRT, including an interactive network graph, on our contributor analysis page. +FORRT is driven by a **large and diverse community of contributors** that shape one or more of our projects. Below you can see everyone's scientific contributions in detail. Note that many also contribute to maintaining our community - we are equally grateful for their efforts. You can find out more about the scale of contributions at FORRT, including an interactive network graph, on our contributor analysis page.
diff --git a/static/partials/network-graph.html b/static/partials/network-graph.html new file mode 100644 index 00000000000..119a7705bf7 --- /dev/null +++ b/static/partials/network-graph.html @@ -0,0 +1,5748 @@ + + + + + + + + + + + + + +Network of Contributors + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +

The interactive network visualization below shows the connections +between FORRT contributors based on their collaborative work across +different projects. Each node represents a contributor, and the +connections (edges) represent shared project participation.

+ +
+ + + + + + +
+ + + + + + + + + + + + + + +