From 744ce6e0d0ee424147e9c0e83571037575896ee6 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 18 Mar 2026 11:06:57 +0000
Subject: [PATCH 1/6] fix: exclude YouTube from Mastodon detection, add YouTube
 as conference link

YouTube URLs with /@channel patterns were incorrectly matched as Mastodon
profiles. This fixes the extractor to properly identify YouTube links and
adds YouTube as a first-class conference field for enrichment and display.

- Fix extract_links_from_url to detect YouTube before generic /@username
- Add youtube field to Conference schema, validation, and data model
- Add YouTube display on conference detail pages
- Add tests for YouTube extraction and Mastodon disambiguation

https://claude.ai/code/session_0154a8RdG7M2nj83zPWVodgZ
---
 _includes/head.html              |  1 +
 _layouts/conference.html         |  6 ++
 tests/test_youtube_extraction.py | 99 ++++++++++++++++++++++++++++++++
 utils/enrich_tba.py              | 15 ++++-
 utils/schema.yml                 |  1 +
 utils/tidy_conf/schema.py        |  3 +-
 utils/tidy_conf/validation.py    |  1 +
 7 files changed, 123 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_youtube_extraction.py
diff --git a/_includes/head.html b/_includes/head.html
index 46e20587854..5fa233c0f8f 100644
--- a/_includes/head.html
+++ b/_includes/head.html
@@ -89,6 +89,7 @@
     twitter: {{ conf.twitter | jsonify }},
     mastodon: {{ conf.mastodon | jsonify }},
     bluesky: {{ conf.bluesky | jsonify }},
+    youtube: {{ conf.youtube | jsonify }},
     location: {{ conf.location | jsonify }},
     extra_places: {{ conf.extra_places | jsonify }},
     workshop_deadline: {{ conf.workshop_deadline | jsonify }},
diff --git a/_layouts/conference.html b/_layouts/conference.html
index 1865c52fd4e..8b99898e43f 100644
--- a/_layouts/conference.html
+++ b/_layouts/conference.html
@@ -162,6 +162,12 @@ <h2 id="conf-subtitle">a.k.a. {{page.alt_name}} {{page.year}}</h2>
                 <a id="conf-mastodon" target="_blank" rel="noopener noreferrer" href="{{page.mastodon}}">Mastodon</a>
               </div>
               {% endif %}
+              {% if page.youtube %}
+              <div>
+                <i class="fa-brands fa-youtube" style="width:16px;height:16px;" aria-hidden="true"></i>
+                <a id="conf-youtube" target="_blank" rel="noopener noreferrer" href="{{page.youtube}}">YouTube</a>
+              </div>
+              {% endif %}
             </div>
           </div>
           <div id="conf-deadlines" class="row">
diff --git a/tests/test_youtube_extraction.py b/tests/test_youtube_extraction.py
new file mode 100644
index 00000000000..87da528d1fa
--- /dev/null
+++ b/tests/test_youtube_extraction.py
@@ -0,0 +1,99 @@
+"""Tests for YouTube link extraction and Mastodon/YouTube disambiguation."""
+
+import sys
+from pathlib import Path
+from unittest.mock import patch
+
+sys.path.append(str(Path(__file__).parent.parent / "utils"))
+
+from enrich_tba import extract_links_from_url
+
+
+class TestYouTubeExtraction:
+    """Test YouTube link detection in extract_links_from_url."""
+
+    @patch("enrich_tba.get_all_links")
+    def test_youtube_channel_detected(self, mock_links):
+        """YouTube /@channel links are detected as youtube, not mastodon."""
+        mock_links.return_value = [
+            "https://www.youtube.com/@PyConUS",
+        ]
+        result = extract_links_from_url("https://pycon.org")
+        assert "youtube" in result
+        assert result["youtube"] == "https://www.youtube.com/@PyConUS"
+        assert "mastodon" not in result
+
+    @patch("enrich_tba.get_all_links")
+    def test_youtube_channel_url_without_at(self, mock_links):
+        """YouTube channel links without @ are detected."""
+        mock_links.return_value = [
+            "https://www.youtube.com/channel/UCMjMBMGt0WP2usFilILnbcA",
+        ]
+        result = extract_links_from_url("https://pycon.org")
+        assert "youtube" in result
+        assert "mastodon" not in result
+
+    @patch("enrich_tba.get_all_links")
+    def test_youtube_not_mistaken_for_mastodon(self, mock_links):
+        """YouTube /@username must not end up in mastodon field."""
+        mock_links.return_value = [
+            "https://www.youtube.com/@EuroPython",
+            "https://fosstodon.org/@europython",
+        ]
+        result = extract_links_from_url("https://europython.eu")
+        assert result.get("youtube") == "https://www.youtube.com/@EuroPython"
+        assert result.get("mastodon") == "https://fosstodon.org/@europython"
+
+    @patch("enrich_tba.get_all_links")
+    def test_youtu_be_short_link(self, mock_links):
+        """Short youtu.be links are detected as youtube."""
+        mock_links.return_value = [
+            "https://youtu.be/abc123",
+        ]
+        result = extract_links_from_url("https://pycon.org")
+        assert "youtube" in result
+        assert "mastodon" not in result
+
+    @patch("enrich_tba.get_all_links")
+    def test_mastodon_still_works(self, mock_links):
+        """Mastodon links on known instances still detected correctly."""
+        mock_links.return_value = [
+            "https://fosstodon.org/@pycon",
+        ]
+        result = extract_links_from_url("https://pycon.org")
+        assert "mastodon" in result
+        assert result["mastodon"] == "https://fosstodon.org/@pycon"
+        assert "youtube" not in result
+
+    @patch("enrich_tba.get_all_links")
+    def test_generic_mastodon_still_works(self, mock_links):
+        """Generic /@username on unknown instances still detected as mastodon."""
+        mock_links.return_value = [
+            "https://social.example.org/@pyconf",
+        ]
+        result = extract_links_from_url("https://pyconf.org")
+        assert "mastodon" in result
+        assert "youtube" not in result
+
+    @patch("enrich_tba.get_all_links")
+    def test_youtube_first_seen_wins(self, mock_links):
+        """Only the first YouTube link is kept."""
+        mock_links.return_value = [
+            "https://www.youtube.com/@PyConUS",
+            "https://www.youtube.com/@AnotherChannel",
+        ]
+        result = extract_links_from_url("https://pycon.org")
+        assert result["youtube"] == "https://www.youtube.com/@PyConUS"
+
+    @patch("enrich_tba.get_all_links")
+    def test_all_social_links_extracted(self, mock_links):
+        """YouTube, Mastodon, and Bluesky can all be extracted together."""
+        mock_links.return_value = [
+            "https://bsky.app/profile/pycon.org",
+            "https://www.youtube.com/@PyConUS",
+            "https://fosstodon.org/@pycon",
+        ]
+        result = extract_links_from_url("https://pycon.org")
+        assert "bluesky" in result
+        assert "youtube" in result
+        assert "mastodon" in result
diff --git a/utils/enrich_tba.py b/utils/enrich_tba.py
index b6fa66cbe6f..f84dc0defb2 100644
--- a/utils/enrich_tba.py
+++ b/utils/enrich_tba.py
@@ -50,7 +50,7 @@
 MAX_CONTENT_LENGTH = 15000  # Max characters per conference website
 
 # Field type categorization for validation
-URL_FIELDS = {"sponsor", "finaid", "mastodon", "bluesky", "cfp_link"}
+URL_FIELDS = {"sponsor", "finaid", "mastodon", "bluesky", "youtube", "cfp_link"}
 DATE_FIELDS = {"cfp", "workshop_deadline", "tutorial_deadline"}
 TIMEZONE_FIELD = "timezone"
 
@@ -341,14 +341,25 @@ def extract_links_from_url(url: str) -> dict[str, str]:
             seen_types.add("bluesky")
             logger.debug(f"  Found bluesky: {link}")
 
+        # YouTube - youtube.com/@channel or youtu.be links
+        elif "youtube" not in seen_types and ("youtube.com" in link_lower or "youtu.be" in link_lower):
+            domain = parsed_link.netloc.lower()
+            if "youtube.com" in domain or "youtu.be" in domain:
+                found["youtube"] = link
+                seen_types.add("youtube")
+                logger.debug(f"  Found youtube: {link}")
+
         # Mastodon - /@username pattern on known instances or any instance
-        # Exclude Twitter/X which don't use /@, but guard against edge cases
+        # Exclude Twitter/X and YouTube which also use /@username patterns
         elif "mastodon" not in seen_types and "/@" in link:
             domain = parsed_link.netloc.lower()
 
             # Skip Twitter/X domains (exact host or subdomains only)
             if domain == "twitter.com" or domain.endswith((".x.com", ".twitter.com")) or domain == "x.com":
                 pass
+            # Skip YouTube domains
+            elif "youtube.com" in domain or "youtu.be" in domain:
+                pass
             elif domain in MASTODON_INSTANCES or "mastodon" in domain or "toot" in domain:
                 found["mastodon"] = link
                 seen_types.add("mastodon")
diff --git a/utils/schema.yml b/utils/schema.yml
index 9ad7e5b7827..94438152025 100644
--- a/utils/schema.yml
+++ b/utils/schema.yml
@@ -18,6 +18,7 @@
   twitter: BestConfEver # Twitter handle of conference (Optional)
   mastodon: https://mastodon.social/@bconf # Mastodon handle of conference (Optional)
   bluesky: https://bsky.app/@bconf # Bluesky handle of conference (Optional)
+  youtube: https://www.youtube.com/@bconf # YouTube channel of conference (Optional)
   sub: PY # Type of conference (see or add _data/types.yml)
   note: Important # In case there are extra notes about the conference (Optional)
   location: # Geolocation for inclusion in map
diff --git a/utils/tidy_conf/schema.py b/utils/tidy_conf/schema.py
index 11885c87365..ea8e391b78a 100644
--- a/utils/tidy_conf/schema.py
+++ b/utils/tidy_conf/schema.py
@@ -72,6 +72,7 @@ class Conference(BaseModel):
     twitter: str | None = None
     mastodon: HttpUrl | None = None
     bluesky: str | None = None
+    youtube: HttpUrl | None = None
     sub: str
     note: str | None = None
     location: list[Location] | None = None
@@ -121,7 +122,7 @@ def validate_title(cls, v):
             return re.sub(r"\b(19|20)\d{2}\b", "", v).strip()
         return v
 
-    @field_serializer("link", "cfp_link", "sponsor", "finaid", "mastodon")
+    @field_serializer("link", "cfp_link", "sponsor", "finaid", "mastodon", "youtube")
     def ser_url(self, value):
         return str(value)
 
diff --git a/utils/tidy_conf/validation.py b/utils/tidy_conf/validation.py
index 84cab7a1afa..46a9dd1ee11 100644
--- a/utils/tidy_conf/validation.py
+++ b/utils/tidy_conf/validation.py
@@ -33,6 +33,7 @@
     "twitter",
     "mastodon",
     "bluesky",
+    "youtube",
     "location",
     "extra_places",
 ]

From 2c42cd271b148163c86824535fa48c83c8a1d5a5 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 18 Mar 2026 19:27:15 +0000
Subject: [PATCH 2/6] feat: add Bluesky and YouTube display to conference
 templates

Bluesky and YouTube data was being tracked but never shown to users.
Add display links to conference detail pages, summary pages, and the
index listing row using Font Awesome brand icons.

https://claude.ai/code/session_0154a8RdG7M2nj83zPWVodgZ
---
 _includes/index_conf_title_row.html |  3 +++
 _layouts/conference.html            |  6 ++++++
 _layouts/summary.html               | 12 ++++++++++++
 3 files changed, 21 insertions(+)

diff --git a/_includes/index_conf_title_row.html b/_includes/index_conf_title_row.html
index 765dd9b7da5..0400e4b1d99 100644
--- a/_includes/index_conf_title_row.html
+++ b/_includes/index_conf_title_row.html
@@ -23,6 +23,9 @@
                 {% elsif conf.twitter %}
                 <a title="Twitter" href="https://twitter.com/{{conf.twitter}}" target="_blank" rel="noopener noreferrer"><img src="/static/img/407-twitter.svg" alt="Twitter" width="14" height="14" /></a>
                 {% endif %}
+                {% if conf.bluesky %}
+                <a title="Bluesky" href="{{conf.bluesky}}" target="_blank" rel="noopener noreferrer"><i class="fa-brands fa-bluesky" style="width:14px;height:14px;" aria-hidden="true"></i></a>
+                {% endif %}
             </span>
         </div>
     </div>
diff --git a/_layouts/conference.html b/_layouts/conference.html
index 8b99898e43f..50959ba31df 100644
--- a/_layouts/conference.html
+++ b/_layouts/conference.html
@@ -162,6 +162,12 @@ <h2 id="conf-subtitle">a.k.a. {{page.alt_name}} {{page.year}}</h2>
                 <a id="conf-mastodon" target="_blank" rel="noopener noreferrer" href="{{page.mastodon}}">Mastodon</a>
               </div>
               {% endif %}
+              {% if page.bluesky %}
+              <div>
+                <i class="fa-brands fa-bluesky" style="width:16px;height:16px;" aria-hidden="true"></i>
+                <a id="conf-bluesky" target="_blank" rel="noopener noreferrer" href="{{page.bluesky}}">Bluesky</a>
+              </div>
+              {% endif %}
               {% if page.youtube %}
               <div>
                 <i class="fa-brands fa-youtube" style="width:16px;height:16px;" aria-hidden="true"></i>
diff --git a/_layouts/summary.html b/_layouts/summary.html
index 80dac2620fd..9f796ca5568 100644
--- a/_layouts/summary.html
+++ b/_layouts/summary.html
@@ -71,6 +71,18 @@ <h1>
                 <a id="conf-mastodon" target="_blank" rel="noopener noreferrer" href="{{confs[0].mastodon}}">Mastodon</a>
               </div>
               {% endif %}
+              {% if confs[0].bluesky %}
+              <div>
+                <i class="fa-brands fa-bluesky" style="width:16px;height:16px;" aria-hidden="true"></i>
+                <a id="conf-bluesky" target="_blank" rel="noopener noreferrer" href="{{confs[0].bluesky}}">Bluesky</a>
+              </div>
+              {% endif %}
+              {% if confs[0].youtube %}
+              <div>
+                <i class="fa-brands fa-youtube" style="width:16px;height:16px;" aria-hidden="true"></i>
+                <a id="conf-youtube" target="_blank" rel="noopener noreferrer" href="{{confs[0].youtube}}">YouTube</a>
+              </div>
+              {% endif %}
             </div>
           </div>
           <div id="all_confs">

From 9feec7e1f4f3eea05477e23117276947044a89c6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 18 Apr 2026 17:57:55 +0000
Subject: [PATCH 3/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/enrich_tba.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/utils/enrich_tba.py b/utils/enrich_tba.py
index f84dc0defb2..97348db5676 100644
--- a/utils/enrich_tba.py
+++ b/utils/enrich_tba.py
@@ -355,10 +355,7 @@ def extract_links_from_url(url: str) -> dict[str, str]:
             domain = parsed_link.netloc.lower()
 
             # Skip Twitter/X domains (exact host or subdomains only)
-            if domain == "twitter.com" or domain.endswith((".x.com", ".twitter.com")) or domain == "x.com":
-                pass
-            # Skip YouTube domains
-            elif "youtube.com" in domain or "youtu.be" in domain:
+            if domain == "twitter.com" or domain.endswith((".x.com", ".twitter.com")) or domain == "x.com" or ("youtube.com" in domain or "youtu.be" in domain):
                 pass
             elif domain in MASTODON_INSTANCES or "mastodon" in domain or "toot" in domain:
                 found["mastodon"] = link

From 162db0f176514f536390e94985ae9704069e62d3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 18 Apr 2026 18:00:05 +0000
Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/enrich_tba.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/utils/enrich_tba.py b/utils/enrich_tba.py
index 076b4d001c7..de30995a346 100644
--- a/utils/enrich_tba.py
+++ b/utils/enrich_tba.py
@@ -355,7 +355,12 @@ def extract_links_from_url(url: str) -> dict[str, str]:
             domain = parsed_link.netloc.lower()
 
             # Skip Twitter/X domains (exact host or subdomains only)
-            if domain == "twitter.com" or domain.endswith((".x.com", ".twitter.com")) or domain == "x.com" or ("youtube.com" in domain or "youtu.be" in domain):
+            if (
+                domain == "twitter.com"
+                or domain.endswith((".x.com", ".twitter.com"))
+                or domain == "x.com"
+                or ("youtube.com" in domain or "youtu.be" in domain)
+            ):
                 pass
             elif domain in MASTODON_INSTANCES or "mastodon" in domain or "toot" in domain:
                 found["mastodon"] = link

From 6c9329155995cf213093a42cecd9349574ea2241 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 18 Apr 2026 18:01:42 +0000
Subject: [PATCH 5/6] fix(enrich-tba): use exact domain matching for
 YouTube/Twitter detection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CodeQL flagged the substring checks ("youtube.com" in domain) as
incomplete URL sanitization — a host like evil-youtube.com.attacker
could match. Replace with a _domain_matches helper that accepts an
exact host or a proper subdomain, and reuse it for Twitter/X.

Also collapses the line Ruff E501 on the combined Twitter/YouTube
skip condition into a readable form.

https://claude.ai/code/session_0154a8RdG7M2nj83zPWVodgZ
---
 utils/enrich_tba.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/utils/enrich_tba.py b/utils/enrich_tba.py
index de30995a346..bed4ec4bcfe 100644
--- a/utils/enrich_tba.py
+++ b/utils/enrich_tba.py
@@ -267,6 +267,11 @@ def get_all_links(url: str) -> list[str]:
         return []
 
 
+def _domain_matches(domain: str, hosts: tuple[str, ...]) -> bool:
+    """Return True if domain equals one of hosts or is a subdomain of one."""
+    return any(domain == h or domain.endswith(f".{h}") for h in hosts)
+
+
 # Known Mastodon instances (common ones in tech/Python community)
 MASTODON_INSTANCES = {
     "mastodon.social",
@@ -334,6 +339,10 @@ def extract_links_from_url(url: str) -> dict[str, str]:
     for link in links:
         link_lower = link.lower()
         parsed_link = urlparse(link)
+        link_domain = parsed_link.netloc.lower()
+
+        is_youtube = _domain_matches(link_domain, ("youtube.com", "youtu.be"))
+        is_twitter = _domain_matches(link_domain, ("twitter.com", "x.com"))
 
         # Bluesky - always bsky.app/profile/
         if "bluesky" not in seen_types and "bsky.app/profile/" in link_lower:
@@ -342,27 +351,18 @@ def extract_links_from_url(url: str) -> dict[str, str]:
             logger.debug(f"  Found bluesky: {link}")
 
         # YouTube - youtube.com/@channel or youtu.be links
-        elif "youtube" not in seen_types and ("youtube.com" in link_lower or "youtu.be" in link_lower):
-            domain = parsed_link.netloc.lower()
-            if "youtube.com" in domain or "youtu.be" in domain:
-                found["youtube"] = link
-                seen_types.add("youtube")
-                logger.debug(f"  Found youtube: {link}")
+        elif "youtube" not in seen_types and is_youtube:
+            found["youtube"] = link
+            seen_types.add("youtube")
+            logger.debug(f"  Found youtube: {link}")
 
         # Mastodon - /@username pattern on known instances or any instance
         # Exclude Twitter/X and YouTube which also use /@username patterns
         elif "mastodon" not in seen_types and "/@" in link:
-            domain = parsed_link.netloc.lower()
-
-            # Skip Twitter/X domains (exact host or subdomains only)
-            if (
-                domain == "twitter.com"
-                or domain.endswith((".x.com", ".twitter.com"))
-                or domain == "x.com"
-                or ("youtube.com" in domain or "youtu.be" in domain)
-            ):
+            # Skip Twitter/X and YouTube domains
+            if is_twitter or is_youtube:
                 pass
-            elif domain in MASTODON_INSTANCES or "mastodon" in domain or "toot" in domain:
+            elif link_domain in MASTODON_INSTANCES or "mastodon" in link_domain or "toot" in link_domain:
                 found["mastodon"] = link
                 seen_types.add("mastodon")
                 logger.debug(f"  Found mastodon: {link}")

From 6d949a78a484e2a09f8e72903080719be2b0e690 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 18 Apr 2026 18:08:04 +0000
Subject: [PATCH 6/6] chore: bump pyupgrade to v3.21.2 for Python 3.14
 compatibility
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pyupgrade v3.15.2 crashes on Python 3.14 with a TypeError from
tokenize.cookie_re — it passes a str where newer CPython expects a
bytes pattern. pre-commit.ci runs on Python 3.14, so the hook was
failing on every PR regardless of the diff. Bumping to v3.21.2 picks
up the upstream fix.

https://claude.ai/code/session_0154a8RdG7M2nj83zPWVodgZ
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f94db7a3529..0f0fa0fb603 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,7 +27,7 @@ repos:
     - --force-single-line-imports
     - --profile black
 - repo: https://github.com/asottile/pyupgrade # Upgrade Python syntax
-  rev: v3.15.2
+  rev: v3.21.2
   hooks:
   - id: pyupgrade
     args: