From d95c706b5416081d45c41a2305955f903e4bb9cc Mon Sep 17 00:00:00 2001 From: Tenemo Date: Mon, 30 Mar 2026 01:24:39 +0200 Subject: [PATCH 1/3] deduplication; refactor --- e2e/support/siteContracts.ts | 4 +- eslint.config.js | 3 + netlify.toml | 5 - src/app/appConstants.ts | 2 + src/app/siteLinks.ts | 2 + .../Projects/ProjectCard/ProjectCard.tsx | 11 +- .../ProjectMarkdown/ProjectMarkdown.tsx | 205 +----------------- .../ProjectMarkdown/markdownUtils.ts | 190 ++++++++++++++++ src/features/Projects/projectUtils.ts | 5 - src/routes/projects.tsx | 5 +- src/utils/build/cspCompatibility.spec.ts | 12 + src/utils/build/cspCompatibility.ts | 24 +- src/utils/build/fetchGithubData.ts | 11 +- src/utils/build/findHtmlFiles.ts | 19 ++ src/utils/build/inlineCss.ts | 19 +- src/utils/build/transformImagePaths.ts | 19 +- src/utils/build/validateBuiltHtmlCsp.ts | 18 +- src/utils/githubUrls.ts | 13 ++ 18 files changed, 280 insertions(+), 287 deletions(-) create mode 100644 src/features/Projects/ProjectItem/ProjectMarkdown/markdownUtils.ts create mode 100644 src/utils/build/findHtmlFiles.ts create mode 100644 src/utils/githubUrls.ts diff --git a/e2e/support/siteContracts.ts b/e2e/support/siteContracts.ts index 860f74d..5570518 100644 --- a/e2e/support/siteContracts.ts +++ b/e2e/support/siteContracts.ts @@ -1,5 +1,5 @@ +import { getProjectPath } from 'app/routePaths'; import { PROJECTS } from 'features/Projects/projectsData'; -import { getProjectRoutePath } from 'features/Projects/projectUtils'; export type TopLevelPageContract = { route: string; @@ -119,7 +119,7 @@ export const PROJECT_ROUTE_CONTRACTS: readonly ProjectRouteContract[] = PROJECTS.map((project) => ({ name: project.name, repo: project.repo, - route: getProjectRoutePath(project.repo), + route: getProjectPath(project.repo), })).sort((left, right) => left.route.localeCompare(right.route)); export const PROJECT_ROUTES = PROJECT_ROUTE_CONTRACTS.map(({ route }) => route); diff --git a/eslint.config.js b/eslint.config.js index ed0d7e5..da1d868 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -235,7 +235,10 @@ export default defineConfig( }, { files: [ + 'src/utils/build/cspCompatibility.ts', 'src/utils/build/fetchGithubData.ts', + 'src/utils/build/inlineCss.ts', + 'src/utils/build/transformImagePaths.ts', 'src/utils/build/validateBuiltHtmlCsp.ts', 'e2e/support/serveDistClient.ts', ], diff --git a/netlify.toml b/netlify.toml index 9c06764..c466f82 100644 --- a/netlify.toml +++ b/netlify.toml @@ -42,11 +42,6 @@ for = "/media/projects/*.webm" [headers.values] Cache-Control = "public, max-age=31536000, immutable" -[[redirects]] -from = "/robots.txt" -to = "/robots.txt" -status = 200 - # Redirects for legacy portfolio routes to support old links [[redirects]] from = "/portfolio" diff --git a/src/app/appConstants.ts b/src/app/appConstants.ts index b5e2842..ecc8b30 100644 --- a/src/app/appConstants.ts +++ b/src/app/appConstants.ts @@ -8,3 +8,5 @@ export const PRODUCTION_OG_IMAGES_DIRECTORY = 'https://piech.dev/media/projects/og_images/'; export const SILENT_CAPTIONS_TRACK_PATH = '/media/captions/silent.vtt'; + +export const PROJECT_DATE_FORMAT = 'MMMM yyyy'; diff --git a/src/app/siteLinks.ts b/src/app/siteLinks.ts index 92a3d82..2c14b62 100644 --- a/src/app/siteLinks.ts +++ b/src/app/siteLinks.ts @@ -1,3 +1,5 @@ +export const GITHUB_OWNER = 'tenemo'; + export const SITE_LINKS = { home: 'https://piech.dev/', emailAddress: 'piotr@piech.dev', diff --git a/src/features/Projects/ProjectCard/ProjectCard.tsx b/src/features/Projects/ProjectCard/ProjectCard.tsx index e3a1202..44b995f 100644 --- a/src/features/Projects/ProjectCard/ProjectCard.tsx +++ b/src/features/Projects/ProjectCard/ProjectCard.tsx @@ -3,13 +3,16 @@ import React from 'react'; import { Link } from 'react-router'; import type { ProjectPreviewAsset } from '../projectsData'; -import { getProjectRoutePath } from '../projectUtils'; import type { TechnologyName } from '../technologies'; import styles from './projectCard.module.scss'; import ProjectTechnologies from './ProjectTechnologies/ProjectTechnologies'; -import { SILENT_CAPTIONS_TRACK_PATH } from 'app/appConstants'; +import { + PROJECT_DATE_FORMAT, + SILENT_CAPTIONS_TRACK_PATH, +} from 'app/appConstants'; +import { getProjectPath } from 'app/routePaths'; import { OpenInNewIcon } from 'components/Icons'; import { repositoriesData } from 'utils/data/githubData'; @@ -30,11 +33,11 @@ const ProjectCard = ({ prioritizePreview = false, technologies, }: ProjectCardProps): React.JSX.Element => { - const projectPath = getProjectRoutePath(repo); + const projectPath = getProjectPath(repo); const repositoryInfo = repositoriesData[repo]; const createdIso = repositoryInfo?.createdDatetime; const createdLabel = createdIso - ? format(new Date(createdIso), 'MMMM yyyy') + ? format(new Date(createdIso), PROJECT_DATE_FORMAT) : undefined; const previewSrc = `/media/projects/${projectPreview.fileName}`; diff --git a/src/features/Projects/ProjectItem/ProjectMarkdown/ProjectMarkdown.tsx b/src/features/Projects/ProjectItem/ProjectMarkdown/ProjectMarkdown.tsx index 4c9109d..a219f30 100644 --- a/src/features/Projects/ProjectItem/ProjectMarkdown/ProjectMarkdown.tsx +++ b/src/features/Projects/ProjectItem/ProjectMarkdown/ProjectMarkdown.tsx @@ -11,30 +11,21 @@ import rehypeSanitize, { } from 'rehype-sanitize'; import remarkGfm from 'remark-gfm'; +import { + getLeadingMarkdownHeading, + stringifyCodeChildren, + stripRedundantLeadingHeading, + toRepositoryAssetUrl, +} from './markdownUtils'; import styles from './projectMarkdown.module.scss'; -import { SILENT_CAPTIONS_TRACK_PATH } from 'app/appConstants'; +import { + PROJECT_DATE_FORMAT, + SILENT_CAPTIONS_TRACK_PATH, +} from 'app/appConstants'; import { findProjectByRepo } from 'features/Projects/projectUtils'; import { repositoriesData } from 'utils/data/githubData'; - -const OWNER = 'tenemo'; -const GITHUB_USER_ATTACHMENT_PATTERN = - /^https:\/\/github\.com\/user-attachments\/assets\/[a-f0-9-]+$/; -const MARKDOWN_HEADING_LINE_PATTERN = /^[ \t]{0,3}#{1,6}[ \t]+[^\r\n]+$/u; - -function stringifyCodeChildren(children: React.ReactNode): string { - if (typeof children === 'string') { - return children; - } - - if (!Array.isArray(children)) { - return ''; - } - - return children - .map((child) => (typeof child === 'string' ? child : '')) - .join(''); -} +import { isGithubUserAttachmentUrl } from 'utils/githubUrls'; const sanitizedMarkdownSchema: RehypeSanitizeSchema = { ...defaultSchema, @@ -66,178 +57,6 @@ const sanitizedMarkdownSchema: RehypeSanitizeSchema = { }, }; -const hasUrlScheme = (url: string): boolean => - /^[a-z][a-z\d+\-.]*:/i.test(url) || url.startsWith('//'); - -const isGithubUserAttachmentUrl = (url: string): boolean => - GITHUB_USER_ATTACHMENT_PATTERN.test(url); - -const normalizeHeadingForComparison = (value: string): string => - value - .normalize('NFKD') - .replace(/[`*_~[\]()]/g, '') - .replace(/<[^>]*>/g, '') - .replace(/[^a-z0-9]+/gi, '') - .toLowerCase(); - -const getNextMarkdownLine = ( - markdown: string, - offset: number, -): { - line: string; - nextOffset: number; -} => { - const nextLineBreak = markdown.indexOf('\n', offset); - const rawLine = - nextLineBreak === -1 - ? markdown.slice(offset) - : markdown.slice(offset, nextLineBreak); - - return { - line: rawLine.endsWith('\r') ? rawLine.slice(0, -1) : rawLine, - nextOffset: nextLineBreak === -1 ? markdown.length : nextLineBreak + 1, - }; -}; - -const extractMarkdownHeadingFromLine = (line: string): string | undefined => { - if (!MARKDOWN_HEADING_LINE_PATTERN.exec(line)) { - return undefined; - } - - const trimmedLine = line.trimStart(); - let hashCount = 0; - - while (trimmedLine[hashCount] === '#') { - hashCount += 1; - } - - let headingText = trimmedLine.slice(hashCount).trim(); - let markerStart = headingText.length; - - while (markerStart > 0 && headingText[markerStart - 1] === '#') { - markerStart -= 1; - } - - if ( - markerStart < headingText.length && - (headingText[markerStart - 1] === ' ' || - headingText[markerStart - 1] === '\t') - ) { - headingText = headingText.slice(0, markerStart).trimEnd(); - } - - return headingText.length > 0 ? headingText : undefined; -}; - -const getLeadingMarkdownHeading = (markdown: string): string | undefined => { - const markdownWithoutBom = markdown.startsWith('\uFEFF') - ? markdown.slice(1) - : markdown; - let offset = 0; - - while (offset < markdownWithoutBom.length) { - const { line, nextOffset } = getNextMarkdownLine( - markdownWithoutBom, - offset, - ); - - if (line.trim().length === 0) { - offset = nextOffset; - continue; - } - - return extractMarkdownHeadingFromLine(line); - } - - return undefined; -}; - -const trimLeadingBlankLines = (value: string): string => { - let offset = 0; - - while (offset < value.length) { - const nextLineBreak = value.indexOf('\n', offset); - const lineEnd = nextLineBreak === -1 ? value.length : nextLineBreak; - const line = value.slice(offset, lineEnd).replace(/\r$/u, ''); - - if (line.trim().length > 0) { - return value.slice(offset); - } - - offset = nextLineBreak === -1 ? value.length : nextLineBreak + 1; - } - - return ''; -}; - -const stripRedundantLeadingHeading = ({ - markdown, - comparisonCandidates, -}: { - markdown: string; - comparisonCandidates: readonly string[]; -}): string => { - const markdownWithoutBom = markdown.startsWith('\uFEFF') - ? markdown.slice(1) - : markdown; - let offset = 0; - - while (offset < markdownWithoutBom.length) { - const { line, nextOffset } = getNextMarkdownLine( - markdownWithoutBom, - offset, - ); - - if (line.trim().length === 0) { - offset = nextOffset; - continue; - } - - const headingText = extractMarkdownHeadingFromLine(line); - - if (!headingText) { - return markdown; - } - - const normalizedHeading = normalizeHeadingForComparison(headingText); - const isRedundantHeading = comparisonCandidates.some( - (candidate) => - normalizeHeadingForComparison(candidate) === normalizedHeading, - ); - - if (!isRedundantHeading) { - return markdown; - } - - return trimLeadingBlankLines(markdownWithoutBom.slice(nextOffset)); - } - - return markdownWithoutBom; -}; - -const toRepositoryAssetUrl = ({ - url, - repo, - defaultBranch, - key, -}: { - url: string; - repo: string; - defaultBranch: string; - key: string; -}): string => { - if (hasUrlScheme(url) || url.startsWith('#')) { - return url; - } - - const repositoryUrl = new URL( - url, - `https://github.com/${OWNER}/${repo}/blob/${defaultBranch}/`, - ).toString(); - - return key === 'src' ? `${repositoryUrl}?raw=true` : repositoryUrl; -}; - type ProjectMarkdownProps = { markdown: string; repo: string; @@ -265,7 +84,7 @@ const ProjectMarkdown = ({ ], }); const createdLabel = createdIso - ? format(new Date(createdIso), 'MMMM yyyy') + ? format(new Date(createdIso), PROJECT_DATE_FORMAT) : undefined; const urlTransform = ( diff --git a/src/features/Projects/ProjectItem/ProjectMarkdown/markdownUtils.ts b/src/features/Projects/ProjectItem/ProjectMarkdown/markdownUtils.ts new file mode 100644 index 0000000..350a221 --- /dev/null +++ b/src/features/Projects/ProjectItem/ProjectMarkdown/markdownUtils.ts @@ -0,0 +1,190 @@ +import React from 'react'; + +import { GITHUB_OWNER } from 'app/siteLinks'; + +const MARKDOWN_HEADING_LINE_PATTERN = /^[ \t]{0,3}#{1,6}[ \t]+[^\r\n]+$/u; + +export function stringifyCodeChildren(children: React.ReactNode): string { + if (typeof children === 'string') { + return children; + } + + if (!Array.isArray(children)) { + return ''; + } + + return children + .map((child) => (typeof child === 'string' ? child : '')) + .join(''); +} + +export const hasUrlScheme = (url: string): boolean => + /^[a-z][a-z\d+\-.]*:/i.test(url) || url.startsWith('//'); + +const normalizeHeadingForComparison = (value: string): string => + value + .normalize('NFKD') + .replace(/[`*_~[\]()]/g, '') + .replace(/<[^>]*>/g, '') + .replace(/[^a-z0-9]+/gi, '') + .toLowerCase(); + +const getNextMarkdownLine = ( + markdown: string, + offset: number, +): { + line: string; + nextOffset: number; +} => { + const nextLineBreak = markdown.indexOf('\n', offset); + const rawLine = + nextLineBreak === -1 + ? markdown.slice(offset) + : markdown.slice(offset, nextLineBreak); + + return { + line: rawLine.endsWith('\r') ? rawLine.slice(0, -1) : rawLine, + nextOffset: nextLineBreak === -1 ? markdown.length : nextLineBreak + 1, + }; +}; + +const extractMarkdownHeadingFromLine = (line: string): string | undefined => { + if (!MARKDOWN_HEADING_LINE_PATTERN.exec(line)) { + return undefined; + } + + const trimmedLine = line.trimStart(); + let hashCount = 0; + + while (trimmedLine[hashCount] === '#') { + hashCount += 1; + } + + let headingText = trimmedLine.slice(hashCount).trim(); + let markerStart = headingText.length; + + while (markerStart > 0 && headingText[markerStart - 1] === '#') { + markerStart -= 1; + } + + if ( + markerStart < headingText.length && + (headingText[markerStart - 1] === ' ' || + headingText[markerStart - 1] === '\t') + ) { + headingText = headingText.slice(0, markerStart).trimEnd(); + } + + return headingText.length > 0 ? headingText : undefined; +}; + +export const getLeadingMarkdownHeading = ( + markdown: string, +): string | undefined => { + const markdownWithoutBom = markdown.startsWith('\uFEFF') + ? markdown.slice(1) + : markdown; + let offset = 0; + + while (offset < markdownWithoutBom.length) { + const { line, nextOffset } = getNextMarkdownLine( + markdownWithoutBom, + offset, + ); + + if (line.trim().length === 0) { + offset = nextOffset; + continue; + } + + return extractMarkdownHeadingFromLine(line); + } + + return undefined; +}; + +const trimLeadingBlankLines = (value: string): string => { + let offset = 0; + + while (offset < value.length) { + const nextLineBreak = value.indexOf('\n', offset); + const lineEnd = nextLineBreak === -1 ? value.length : nextLineBreak; + const line = value.slice(offset, lineEnd).replace(/\r$/u, ''); + + if (line.trim().length > 0) { + return value.slice(offset); + } + + offset = nextLineBreak === -1 ? value.length : nextLineBreak + 1; + } + + return ''; +}; + +export const stripRedundantLeadingHeading = ({ + markdown, + comparisonCandidates, +}: { + markdown: string; + comparisonCandidates: readonly string[]; +}): string => { + const markdownWithoutBom = markdown.startsWith('\uFEFF') + ? markdown.slice(1) + : markdown; + let offset = 0; + + while (offset < markdownWithoutBom.length) { + const { line, nextOffset } = getNextMarkdownLine( + markdownWithoutBom, + offset, + ); + + if (line.trim().length === 0) { + offset = nextOffset; + continue; + } + + const headingText = extractMarkdownHeadingFromLine(line); + + if (!headingText) { + return markdown; + } + + const normalizedHeading = normalizeHeadingForComparison(headingText); + const isRedundantHeading = comparisonCandidates.some( + (candidate) => + normalizeHeadingForComparison(candidate) === normalizedHeading, + ); + + if (!isRedundantHeading) { + return markdown; + } + + return trimLeadingBlankLines(markdownWithoutBom.slice(nextOffset)); + } + + return markdownWithoutBom; +}; + +export const toRepositoryAssetUrl = ({ + url, + repo, + defaultBranch, + key, +}: { + url: string; + repo: string; + defaultBranch: string; + key: string; +}): string => { + if (hasUrlScheme(url) || url.startsWith('#')) { + return url; + } + + const repositoryUrl = new URL( + url, + `https://github.com/${GITHUB_OWNER}/${repo}/blob/${defaultBranch}/`, + ).toString(); + + return key === 'src' ? `${repositoryUrl}?raw=true` : repositoryUrl; +}; diff --git a/src/features/Projects/projectUtils.ts b/src/features/Projects/projectUtils.ts index afa1418..ff0504c 100644 --- a/src/features/Projects/projectUtils.ts +++ b/src/features/Projects/projectUtils.ts @@ -1,10 +1,5 @@ import { PROJECTS } from './projectsData'; -import { getProjectPath } from 'app/routePaths'; - -export const getProjectRoutePath = (repo: string): string => - getProjectPath(repo); - export const findProjectByRepo = ( repo: string, ): (typeof PROJECTS)[number] | undefined => diff --git a/src/routes/projects.tsx b/src/routes/projects.tsx index 1cd45cf..a37f18c 100644 --- a/src/routes/projects.tsx +++ b/src/routes/projects.tsx @@ -20,11 +20,10 @@ import { import { PERSON, PERSON_ID, WEBSITE, WEBSITE_ID } from './index'; import { DEFAULT_KEYWORDS } from 'app/appConstants'; -import { HOME_PATH, PROJECTS_PATH } from 'app/routePaths'; +import { HOME_PATH, PROJECTS_PATH, getProjectPath } from 'app/routePaths'; import { SITE_LINKS } from 'app/siteLinks'; import Projects from 'features/Projects/Projects'; import { PROJECTS } from 'features/Projects/projectsData'; -import { getProjectRoutePath } from 'features/Projects/projectUtils'; const projectsItemList: ItemList = { '@type': 'ItemList', @@ -32,7 +31,7 @@ const projectsItemList: ItemList = { itemListOrder: 'https://schema.org/ItemListOrderAscending', numberOfItems: PROJECTS.length, itemListElement: PROJECTS.map((project, index) => { - const url = getSiteUrl(getProjectRoutePath(project.repo)); + const url = getSiteUrl(getProjectPath(project.repo)); const code: SoftwareSourceCode = { '@type': 'SoftwareSourceCode', '@id': `${url}#code`, diff --git a/src/utils/build/cspCompatibility.spec.ts b/src/utils/build/cspCompatibility.spec.ts index 5ec9bf7..6c518df 100644 --- a/src/utils/build/cspCompatibility.spec.ts +++ b/src/utils/build/cspCompatibility.spec.ts @@ -4,6 +4,7 @@ import { classifyLinkResource, isAllowedResourceUrl, isExecutableScript, + normalizeResourceOrigin, } from './cspCompatibility'; describe('cspCompatibility', () => { @@ -86,6 +87,17 @@ describe('cspCompatibility', () => { ).toBe(false); }); + it('normalizes protocol-relative URLs to their actual origin', () => { + expect(normalizeResourceOrigin('//cdn.example.com/image.png')).toBe( + 'https://cdn.example.com', + ); + expect( + normalizeResourceOrigin( + '//private-user-images.githubusercontent.com/example', + ), + ).toBe('https://private-user-images.githubusercontent.com'); + }); + it('classifies fetchable links for CSP validation', () => { expect(classifyLinkResource({ as: 'image', rel: 'preload' })).toBe( 'image', diff --git a/src/utils/build/cspCompatibility.ts b/src/utils/build/cspCompatibility.ts index 01ef5a0..99234f1 100644 --- a/src/utils/build/cspCompatibility.ts +++ b/src/utils/build/cspCompatibility.ts @@ -1,3 +1,5 @@ +import { isGithubUserAttachmentUrl } from '../githubUrls.ts'; + export type ResourceKind = | 'document' | 'image' @@ -40,20 +42,6 @@ const ALLOWED_ORIGIN_PATTERNS: Record< const NON_EXECUTABLE_SCRIPT_TYPES = new Set(['application/ld+json']); -function isGithubUserAttachmentUrl(url: string): boolean { - try { - const parsedUrl = new URL(url); - - return ( - parsedUrl.protocol === 'https:' && - parsedUrl.hostname === 'github.com' && - parsedUrl.pathname.startsWith('/user-attachments/assets/') - ); - } catch { - return false; - } -} - export function isExecutableScript({ src, type, @@ -83,14 +71,14 @@ export function normalizeResourceOrigin(url: string): string { return GITHUB_USER_ATTACHMENT_RUNTIME_ORIGIN; } - if (url.startsWith('/')) { - return 'self'; - } - if (url.startsWith('//')) { return new URL(`https:${url}`).origin; } + if (url.startsWith('/')) { + return 'self'; + } + try { const normalizedOrigin = new URL(url).origin; diff --git a/src/utils/build/fetchGithubData.ts b/src/utils/build/fetchGithubData.ts index 3346c1c..ec68837 100644 --- a/src/utils/build/fetchGithubData.ts +++ b/src/utils/build/fetchGithubData.ts @@ -26,9 +26,8 @@ import path from 'node:path'; import 'dotenv/config'; import { Octokit } from 'octokit'; +import { GITHUB_OWNER } from '../../app/siteLinks.ts'; import { PROJECTS } from '../../features/Projects/projectsData.ts'; - -const OWNER = 'tenemo'; const OUT_DIR = path.join(process.cwd(), 'temp'); const OUT_PATH = path.join(OUT_DIR, 'githubData.json'); const EPOCH_ISO = '1970-01-01T00:00:00.000Z'; @@ -123,7 +122,7 @@ async function getReadme(octokit: Octokit, repo: string): Promise { const response = await octokit.request( 'GET /repos/{owner}/{repo}/readme', { - owner: OWNER, + owner: GITHUB_OWNER, repo, headers: { accept: 'application/vnd.github.raw+json', @@ -150,7 +149,7 @@ async function getLastCommitDatetime( ): Promise { try { const response = await octokit.rest.repos.getCommit({ - owner: OWNER, + owner: GITHUB_OWNER, repo, ref: defaultBranch, }); @@ -171,11 +170,11 @@ async function buildRepositoryInfo( ): Promise { const [repoResult, topicsResult, readmeResult] = await Promise.allSettled([ octokit.rest.repos.get({ - owner: OWNER, + owner: GITHUB_OWNER, repo, }), octokit.rest.repos.getAllTopics({ - owner: OWNER, + owner: GITHUB_OWNER, repo, }), getReadme(octokit, repo), diff --git a/src/utils/build/findHtmlFiles.ts b/src/utils/build/findHtmlFiles.ts new file mode 100644 index 0000000..c450383 --- /dev/null +++ b/src/utils/build/findHtmlFiles.ts @@ -0,0 +1,19 @@ +import { promises as fs } from 'fs'; +import path from 'path'; + +export async function findHtmlFiles(dir: string): Promise { + const dirents = await fs.readdir(dir, { withFileTypes: true }); + const files = await Promise.all( + dirents.map(async (dirent) => { + const resolvedPath = path.resolve(dir, dirent.name); + + if (dirent.isDirectory()) { + return findHtmlFiles(resolvedPath); + } + + return resolvedPath.endsWith('.html') ? [resolvedPath] : []; + }), + ); + + return files.flat(); +} diff --git a/src/utils/build/inlineCss.ts b/src/utils/build/inlineCss.ts index f60f841..798ed1d 100644 --- a/src/utils/build/inlineCss.ts +++ b/src/utils/build/inlineCss.ts @@ -3,6 +3,8 @@ import path from 'path'; import { JSDOM } from 'jsdom'; +import { findHtmlFiles } from './findHtmlFiles.ts'; + const outDir = path.resolve(process.cwd(), 'dist/client'); let warningsCount = 0; @@ -22,23 +24,6 @@ async function fileExists(filePath: string): Promise { } } -async function findHtmlFiles(dir: string): Promise { - const dirents = await fs.readdir(dir, { withFileTypes: true }); - const files = await Promise.all( - dirents.map(async (dirent) => { - const resolvedPath = path.resolve(dir, dirent.name); - - if (dirent.isDirectory()) { - return findHtmlFiles(resolvedPath); - } - - return resolvedPath.endsWith('.html') ? [resolvedPath] : []; - }), - ); - - return files.flat(); -} - async function resolveCssPath( htmlFile: string, href: string, diff --git a/src/utils/build/transformImagePaths.ts b/src/utils/build/transformImagePaths.ts index 1d300a4..78762b5 100644 --- a/src/utils/build/transformImagePaths.ts +++ b/src/utils/build/transformImagePaths.ts @@ -3,26 +3,11 @@ import path from 'path'; import { JSDOM } from 'jsdom'; +import { findHtmlFiles } from './findHtmlFiles.ts'; + const outDir = path.resolve(process.cwd(), 'dist/client'); const EXCLUDED_EXTENSIONS = new Set(['.mp4', '.webm', '.ogg']); -async function findHtmlFiles(dir: string): Promise { - const dirents = await fs.readdir(dir, { withFileTypes: true }); - const files = await Promise.all( - dirents.map(async (dirent) => { - const resolvedPath = path.resolve(dir, dirent.name); - - if (dirent.isDirectory()) { - return findHtmlFiles(resolvedPath); - } - - return resolvedPath.endsWith('.html') ? [resolvedPath] : []; - }), - ); - - return files.flat(); -} - function getMediaAssetType(url: string): 'logo' | 'project' | null { if (url.startsWith('/media/logos/')) { return 'logo'; diff --git a/src/utils/build/validateBuiltHtmlCsp.ts b/src/utils/build/validateBuiltHtmlCsp.ts index 9e7cced..20db51d 100644 --- a/src/utils/build/validateBuiltHtmlCsp.ts +++ b/src/utils/build/validateBuiltHtmlCsp.ts @@ -8,27 +8,11 @@ import { isAllowedResourceUrl, isExecutableScript, } from './cspCompatibility.ts'; +import { findHtmlFiles } from './findHtmlFiles.ts'; const outDir = path.resolve(process.cwd(), 'dist/client'); const BANNED_SELECTORS = ['iframe', 'object', 'embed'] as const; -async function findHtmlFiles(dir: string): Promise { - const dirents = await fs.readdir(dir, { withFileTypes: true }); - const files = await Promise.all( - dirents.map(async (dirent) => { - const resolvedPath = path.resolve(dir, dirent.name); - - if (dirent.isDirectory()) { - return findHtmlFiles(resolvedPath); - } - - return resolvedPath.endsWith('.html') ? [resolvedPath] : []; - }), - ); - - return files.flat(); -} - function formatElementViolation({ details, filePath, diff --git a/src/utils/githubUrls.ts b/src/utils/githubUrls.ts new file mode 100644 index 0000000..72a3b08 --- /dev/null +++ b/src/utils/githubUrls.ts @@ -0,0 +1,13 @@ +export function isGithubUserAttachmentUrl(url: string): boolean { + try { + const parsedUrl = new URL(url); + + return ( + parsedUrl.protocol === 'https:' && + parsedUrl.hostname === 'github.com' && + parsedUrl.pathname.startsWith('/user-attachments/assets/') + ); + } catch { + return false; + } +} From e420108967b9442b5a230fc2da914b57b7a562fe Mon Sep 17 00:00:00 2001 From: Tenemo Date: Mon, 30 Mar 2026 02:05:50 +0200 Subject: [PATCH 2/3] comments addressed --- playwright.config.ts | 7 +++- src/app/siteLinks.ts | 8 +++-- .../Projects/ProjectItem/ProjectItem.tsx | 4 +-- .../ProjectMarkdown/ProjectMarkdown.spec.tsx | 7 ++-- src/routes/project-item.tsx | 4 +-- src/utils/build/cspCompatibility.spec.ts | 11 ++++++ src/utils/githubUrls.spec.ts | 34 +++++++++++++++++++ src/utils/githubUrls.ts | 5 ++- 8 files changed, 68 insertions(+), 12 deletions(-) create mode 100644 src/utils/githubUrls.spec.ts diff --git a/playwright.config.ts b/playwright.config.ts index f47a3d5..6b40ff1 100644 --- a/playwright.config.ts +++ b/playwright.config.ts @@ -64,6 +64,11 @@ const ciWorkerCount = parseWorkerCount( process.env.PLAYWRIGHT_CI_WORKERS, 4, ); +const localWorkerCount = parseWorkerCount( + 'PLAYWRIGHT_LOCAL_WORKERS', + process.env.PLAYWRIGHT_LOCAL_WORKERS, + 4, +); const remoteCiWorkerCount = parseWorkerCount( 'PLAYWRIGHT_REMOTE_CI_WORKERS', process.env.PLAYWRIGHT_REMOTE_CI_WORKERS, @@ -82,7 +87,7 @@ export default defineConfig({ : 1 : process.env.CI ? ciWorkerCount - : undefined, + : localWorkerCount, reporter: reporters, use: { baseURL: E2E_BASE_URL, diff --git a/src/app/siteLinks.ts b/src/app/siteLinks.ts index 2c14b62..a2851e1 100644 --- a/src/app/siteLinks.ts +++ b/src/app/siteLinks.ts @@ -1,11 +1,13 @@ -export const GITHUB_OWNER = 'tenemo'; +export const GITHUB_OWNER = 'Tenemo'; + +const GITHUB_BASE_URL = `https://github.com/${GITHUB_OWNER}`; export const SITE_LINKS = { home: 'https://piech.dev/', emailAddress: 'piotr@piech.dev', email: 'mailto:piotr@piech.dev', - githubProfile: 'https://github.com/Tenemo', - githubRepo: 'https://github.com/Tenemo/piech.dev', + githubProfile: GITHUB_BASE_URL, + githubRepo: `${GITHUB_BASE_URL}/piech.dev`, linkedin: 'https://www.linkedin.com/in/ppiech', telegram: 'https://t.me/tenemo', } as const; diff --git a/src/features/Projects/ProjectItem/ProjectItem.tsx b/src/features/Projects/ProjectItem/ProjectItem.tsx index 27b5874..cd7b56c 100644 --- a/src/features/Projects/ProjectItem/ProjectItem.tsx +++ b/src/features/Projects/ProjectItem/ProjectItem.tsx @@ -6,7 +6,7 @@ import ProjectMarkdown from './ProjectMarkdown/ProjectMarkdown'; import { MAIN_CONTENT_ID } from 'app/accessibility'; import { PROJECTS_PATH } from 'app/routePaths'; -import { SITE_LINKS } from 'app/siteLinks'; +import { GITHUB_OWNER, SITE_LINKS } from 'app/siteLinks'; import { ArrowBackIcon, GitHubIcon } from 'components/Icons'; import { repositoriesData } from 'utils/data/githubData'; @@ -37,7 +37,7 @@ const ProjectItemDetails: React.FC = (): React.JSX.Element => { target="_blank" title={`View ${repo} on GitHub`} > - github.com/tenemo/{repo} + github.com/{GITHUB_OWNER}/{repo} diff --git a/src/features/Projects/ProjectItem/ProjectMarkdown/ProjectMarkdown.spec.tsx b/src/features/Projects/ProjectItem/ProjectMarkdown/ProjectMarkdown.spec.tsx index ad3f342..fd468af 100644 --- a/src/features/Projects/ProjectItem/ProjectMarkdown/ProjectMarkdown.spec.tsx +++ b/src/features/Projects/ProjectItem/ProjectMarkdown/ProjectMarkdown.spec.tsx @@ -5,6 +5,7 @@ import ProjectMarkdown from './ProjectMarkdown'; import styles from './projectMarkdown.module.scss'; import { SILENT_CAPTIONS_TRACK_PATH } from 'app/appConstants'; +import { GITHUB_OWNER } from 'app/siteLinks'; describe('ProjectMarkdown', () => { it('renders basic markdown content', () => { @@ -86,11 +87,11 @@ describe('ProjectMarkdown', () => { expect(screen.getByAltText('Test Image')).toHaveAttribute( 'src', - 'https://github.com/tenemo/test-repo/blob/main/media/test.webp?raw=true', + `https://github.com/${GITHUB_OWNER}/test-repo/blob/main/media/test.webp?raw=true`, ); expect(screen.getByText('Test Link')).toHaveAttribute( 'href', - 'https://github.com/tenemo/test-repo/blob/main/src/index.ts', + `https://github.com/${GITHUB_OWNER}/test-repo/blob/main/src/index.ts`, ); }); @@ -187,7 +188,7 @@ describe('ProjectMarkdown', () => { expect(container.querySelector('script')).not.toBeInTheDocument(); expect(screen.getByAltText('Safe image')).toHaveAttribute( 'src', - 'https://github.com/tenemo/test-repo/blob/main/media/test.webp?raw=true', + `https://github.com/${GITHUB_OWNER}/test-repo/blob/main/media/test.webp?raw=true`, ); expect(screen.getByAltText('Safe image')).not.toHaveAttribute( 'onerror', diff --git a/src/routes/project-item.tsx b/src/routes/project-item.tsx index 20fe995..3cbdeee 100644 --- a/src/routes/project-item.tsx +++ b/src/routes/project-item.tsx @@ -13,7 +13,7 @@ import { PERSON, PERSON_ID, WEBSITE, WEBSITE_ID } from './index'; import { DEFAULT_KEYWORDS } from 'app/appConstants'; import { getProjectPath, HOME_PATH, PROJECTS_PATH } from 'app/routePaths'; -import { SITE_LINKS } from 'app/siteLinks'; +import { GITHUB_OWNER, SITE_LINKS } from 'app/siteLinks'; import ProjectItem from 'features/Projects/ProjectItem/ProjectItem'; import { findProjectByRepo } from 'features/Projects/projectUtils'; import { repositoriesData } from 'utils/data/githubData'; @@ -23,7 +23,7 @@ export const meta: MetaFunction = ({ params }) => { const info = repositoriesData[repo]; const description = info?.description ?? - `Project details for ${repo} from github.com/tenemo/${repo}`; + `Project details for ${repo} from github.com/${GITHUB_OWNER}/${repo}`; const keywords = info?.topics && info.topics.length > 0 ? info.topics.join(', ') diff --git a/src/utils/build/cspCompatibility.spec.ts b/src/utils/build/cspCompatibility.spec.ts index 6c518df..1839cc0 100644 --- a/src/utils/build/cspCompatibility.spec.ts +++ b/src/utils/build/cspCompatibility.spec.ts @@ -55,6 +55,12 @@ describe('cspCompatibility', () => { 'media', ), ).toBe(true); + expect( + isAllowedResourceUrl( + '//github.com/user-attachments/assets/12345678-1234-5678-9abc-123456789abc', + 'media', + ), + ).toBe(true); expect( isAllowedResourceUrl( 'https://github-production-user-asset-6210df.s3.amazonaws.com/example.mp4', @@ -96,6 +102,11 @@ describe('cspCompatibility', () => { '//private-user-images.githubusercontent.com/example', ), ).toBe('https://private-user-images.githubusercontent.com'); + expect( + normalizeResourceOrigin( + '//github.com/user-attachments/assets/12345678-1234-5678-9abc-123456789abc', + ), + ).toBe('https://github-production-user-asset-6210df.s3.amazonaws.com'); }); it('classifies fetchable links for CSP validation', () => { diff --git a/src/utils/githubUrls.spec.ts b/src/utils/githubUrls.spec.ts new file mode 100644 index 0000000..b36061a --- /dev/null +++ b/src/utils/githubUrls.spec.ts @@ -0,0 +1,34 @@ +import { describe, expect, it } from 'vitest'; + +import { isGithubUserAttachmentUrl } from './githubUrls'; + +describe('githubUrls', () => { + it('detects GitHub user attachment URLs with explicit https', () => { + expect( + isGithubUserAttachmentUrl( + 'https://github.com/user-attachments/assets/12345678-1234-5678-9abc-123456789abc', + ), + ).toBe(true); + }); + + it('detects protocol-relative GitHub user attachment URLs', () => { + expect( + isGithubUserAttachmentUrl( + '//github.com/user-attachments/assets/12345678-1234-5678-9abc-123456789abc', + ), + ).toBe(true); + }); + + it('rejects non-attachment or insecure URLs', () => { + expect( + isGithubUserAttachmentUrl( + 'https://github.com/Tenemo/piech.dev/blob/main/README.md', + ), + ).toBe(false); + expect( + isGithubUserAttachmentUrl( + 'http://github.com/user-attachments/assets/12345678-1234-5678-9abc-123456789abc', + ), + ).toBe(false); + }); +}); diff --git a/src/utils/githubUrls.ts b/src/utils/githubUrls.ts index 72a3b08..dac649e 100644 --- a/src/utils/githubUrls.ts +++ b/src/utils/githubUrls.ts @@ -1,6 +1,9 @@ +const normalizeUrlForParsing = (url: string): string => + url.startsWith('//') ? `https:${url}` : url; + export function isGithubUserAttachmentUrl(url: string): boolean { try { - const parsedUrl = new URL(url); + const parsedUrl = new URL(normalizeUrlForParsing(url)); return ( parsedUrl.protocol === 'https:' && From e9f6754061773dd7aaecd29946b67d98168d92a0 Mon Sep 17 00:00:00 2001 From: Tenemo Date: Mon, 30 Mar 2026 02:38:29 +0200 Subject: [PATCH 3/3] GH data fetch is fail-fast now --- package-lock.json | 41 ++++ package.json | 1 + src/types/jsdom.d.ts | 9 - src/types/json-module.d.ts | 7 - src/utils/build/fetchGithubData.spec.ts | 277 +++++++++++++++++++++++ src/utils/build/fetchGithubData.ts | 282 +++++++++++++++--------- 6 files changed, 494 insertions(+), 123 deletions(-) delete mode 100644 src/types/jsdom.d.ts delete mode 100644 src/types/json-module.d.ts create mode 100644 src/utils/build/fetchGithubData.spec.ts diff --git a/package-lock.json b/package-lock.json index 9f4a125..c5c7327 100644 --- a/package-lock.json +++ b/package-lock.json @@ -35,6 +35,7 @@ "@testing-library/dom": "^10.4.1", "@testing-library/jest-dom": "^6.9.1", "@testing-library/react": "^16.3.2", + "@types/jsdom": "^28.0.1", "@types/node": "^25.5.0", "@types/react": "^19.2.14", "@types/react-dom": "^19.2.3", @@ -3714,6 +3715,39 @@ "@types/unist": "*" } }, + "node_modules/@types/jsdom": { + "version": "28.0.1", + "resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-28.0.1.tgz", + "integrity": "sha512-GJq2QE4TAZ5ajSoCasn5DOFm8u1mI3tIFvM5tIq3W5U/RTB6gsHwc6Yhpl91X9VSDOUVblgXmG+2+sSvFQrdlw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*", + "@types/tough-cookie": "*", + "parse5": "^7.0.0", + "undici-types": "^7.21.0" + } + }, + "node_modules/@types/jsdom/node_modules/parse5": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", + "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==", + "dev": true, + "license": "MIT", + "dependencies": { + "entities": "^6.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/@types/jsdom/node_modules/undici-types": { + "version": "7.24.6", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.24.6.tgz", + "integrity": "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/json-schema": { "version": "7.0.15", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", @@ -3799,6 +3833,13 @@ "@types/node": "*" } }, + "node_modules/@types/tough-cookie": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz", + "integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/unist": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", diff --git a/package.json b/package.json index db4ac5f..25af8e5 100644 --- a/package.json +++ b/package.json @@ -76,6 +76,7 @@ "@testing-library/dom": "^10.4.1", "@testing-library/jest-dom": "^6.9.1", "@testing-library/react": "^16.3.2", + "@types/jsdom": "^28.0.1", "@types/node": "^25.5.0", "@types/react": "^19.2.14", "@types/react-dom": "^19.2.3", diff --git a/src/types/jsdom.d.ts b/src/types/jsdom.d.ts deleted file mode 100644 index 01476b0..0000000 --- a/src/types/jsdom.d.ts +++ /dev/null @@ -1,9 +0,0 @@ -declare module 'jsdom' { - export class JSDOM { - constructor(html?: string); - readonly window: { - document: Document; - }; - serialize(): string; - } -} diff --git a/src/types/json-module.d.ts b/src/types/json-module.d.ts deleted file mode 100644 index 2315e5b..0000000 --- a/src/types/json-module.d.ts +++ /dev/null @@ -1,7 +0,0 @@ -type JSONPrimitive = string | number | boolean | null; -type JSONValue = JSONPrimitive | { [key: string]: JSONValue } | JSONValue[]; - -declare module '*.json' { - const value: JSONValue; - export default value; -} diff --git a/src/utils/build/fetchGithubData.spec.ts b/src/utils/build/fetchGithubData.spec.ts new file mode 100644 index 0000000..add42ea --- /dev/null +++ b/src/utils/build/fetchGithubData.spec.ts @@ -0,0 +1,277 @@ +// @vitest-environment node + +import { mkdtemp, mkdir, readFile, rm, writeFile } from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { PROJECTS } from 'features/Projects/projectsData'; +import type { RepositoryInfo } from 'types/githubData'; + +const octokitMocks = { + getCommit: vi.fn(), + getAllTopics: vi.fn(), + reposGet: vi.fn(), + request: vi.fn(), +}; + +vi.mock('octokit', () => ({ + Octokit: class { + public readonly rest = { + repos: { + get: octokitMocks.reposGet, + getAllTopics: octokitMocks.getAllTopics, + getCommit: octokitMocks.getCommit, + }, + }; + + public readonly request = octokitMocks.request; + }, +})); + +type GithubDataFile = { + metadata: { fetchedDatetime: string }; + repositories: Record; +}; + +type RepoParams = { + repo: string; +}; + +const UNIQUE_REPOS = Array.from( + new Set(PROJECTS.map((project) => project.repo)), +); +const README_UNAVAILABLE = '# README unavailable\n'; +const TEMP_DIRECTORIES: string[] = []; + +async function createWorkspace(): Promise { + const directory = await mkdtemp(path.join(os.tmpdir(), 'piech-dev-fetch-')); + TEMP_DIRECTORIES.push(directory); + await mkdir(path.join(directory, 'temp'), { recursive: true }); + return directory; +} + +function createRepositoryInfo(repo: string): RepositoryInfo { + return { + name: repo, + description: `Description for ${repo}`, + createdDatetime: '2024-01-01T00:00:00.000Z', + defaultBranch: 'main', + lastCommitDatetime: '2024-02-02T00:00:00.000Z', + license: 'MIT', + readme_content: `# ${repo}\n\nRepository README.`, + topics: ['portfolio', 'typescript'], + }; +} + +function createCachePayload(): GithubDataFile { + return { + metadata: { + fetchedDatetime: new Date().toISOString(), + }, + repositories: Object.fromEntries( + UNIQUE_REPOS.map((repo) => [repo, createRepositoryInfo(repo)]), + ), + }; +} + +async function writeCache( + workspace: string, + payload: GithubDataFile, +): Promise { + await writeFile( + path.join(workspace, 'temp', 'githubData.json'), + JSON.stringify(payload, null, 2), + 'utf8', + ); +} + +async function loadFetchGithubDataModule( + workspace: string, +): Promise { + vi.resetModules(); + vi.spyOn(process, 'cwd').mockReturnValue(workspace); + return import('./fetchGithubData'); +} + +async function readGithubDataFile(workspace: string): Promise { + const payload = await readFile( + path.join(workspace, 'temp', 'githubData.json'), + 'utf8', + ); + + return JSON.parse(payload) as GithubDataFile; +} + +function seedSuccessfulOctokitResponses(): void { + octokitMocks.reposGet.mockImplementation(({ repo }: RepoParams) => + Promise.resolve({ + data: { + created_at: '2024-01-01T00:00:00.000Z', + default_branch: 'main', + description: `Description for ${repo}`, + license: { spdx_id: 'MIT' }, + name: repo, + pushed_at: '2024-02-01T00:00:00.000Z', + }, + }), + ); + octokitMocks.getAllTopics.mockResolvedValue({ + data: { + names: ['portfolio', 'typescript'], + }, + }); + octokitMocks.getCommit.mockResolvedValue({ + data: { + commit: { + author: { date: '2024-02-02T00:00:00.000Z' }, + committer: { date: '2024-02-01T00:00:00.000Z' }, + }, + }, + }); + octokitMocks.request.mockImplementation( + (_route: string, { repo }: RepoParams) => + Promise.resolve({ + data: `# ${repo}\n\nRepository README.`, + }), + ); +} + +describe('fetchGithubData', () => { + beforeEach(() => { + octokitMocks.getCommit.mockReset(); + octokitMocks.getAllTopics.mockReset(); + octokitMocks.reposGet.mockReset(); + octokitMocks.request.mockReset(); + }); + + afterEach(async () => { + vi.restoreAllMocks(); + vi.unstubAllEnvs(); + + while (TEMP_DIRECTORIES.length > 0) { + const directory = TEMP_DIRECTORIES.pop(); + + if (!directory) { + continue; + } + + await rm(directory, { + force: true, + recursive: true, + }); + } + }); + + it('writes fetched GitHub data when all required repository metadata is available', async () => { + const workspace = await createWorkspace(); + seedSuccessfulOctokitResponses(); + const { fetchGithubData } = await loadFetchGithubDataModule(workspace); + + await fetchGithubData({ refetch: true }); + + const payload = await readGithubDataFile(workspace); + + expect(Object.keys(payload.repositories).sort()).toEqual( + [...UNIQUE_REPOS].sort(), + ); + expect(payload.repositories['piech.dev']).toMatchObject({ + createdDatetime: '2024-01-01T00:00:00.000Z', + defaultBranch: 'main', + description: 'Description for piech.dev', + lastCommitDatetime: '2024-02-02T00:00:00.000Z', + readme_content: '# piech.dev\n\nRepository README.', + }); + }); + + it('fails when a required README cannot be fetched and cache fallback is not enabled', async () => { + const workspace = await createWorkspace(); + seedSuccessfulOctokitResponses(); + octokitMocks.request.mockImplementation( + (_route: string, { repo }: RepoParams) => { + if (repo === UNIQUE_REPOS[0]) { + return Promise.reject( + new Error('GitHub readme endpoint failed'), + ); + } + + return Promise.resolve({ + data: `# ${repo}\n\nRepository README.`, + }); + }, + ); + const { fetchGithubData } = await loadFetchGithubDataModule(workspace); + + await expect(fetchGithubData({ refetch: true })).rejects.toThrow( + /Failed to fetch README/, + ); + await expect( + readFile(path.join(workspace, 'temp', 'githubData.json'), 'utf8'), + ).rejects.toThrow(); + }); + + it('uses the existing cache only when cache fallback is explicitly enabled', async () => { + const workspace = await createWorkspace(); + const cachedPayload = createCachePayload(); + await writeCache(workspace, cachedPayload); + octokitMocks.reposGet.mockRejectedValue( + new Error('GitHub unavailable'), + ); + octokitMocks.getAllTopics.mockResolvedValue({ + data: { + names: ['portfolio', 'typescript'], + }, + }); + octokitMocks.getCommit.mockResolvedValue({ + data: { + commit: { + author: { date: '2024-02-02T00:00:00.000Z' }, + }, + }, + }); + octokitMocks.request.mockResolvedValue({ + data: '# README\n\nRepository README.', + }); + const { fetchGithubData } = await loadFetchGithubDataModule(workspace); + + await expect( + fetchGithubData({ allowCacheFallback: true, refetch: true }), + ).resolves.toBeUndefined(); + + const payload = await readGithubDataFile(workspace); + + expect(payload).toEqual(cachedPayload); + }); + + it('rejects degraded cache data instead of silently reusing it as fallback', async () => { + const workspace = await createWorkspace(); + const degradedPayload = createCachePayload(); + degradedPayload.repositories[UNIQUE_REPOS[0]].readme_content = + README_UNAVAILABLE; + await writeCache(workspace, degradedPayload); + octokitMocks.reposGet.mockRejectedValue( + new Error('GitHub unavailable'), + ); + octokitMocks.getAllTopics.mockResolvedValue({ + data: { + names: ['portfolio', 'typescript'], + }, + }); + octokitMocks.getCommit.mockResolvedValue({ + data: { + commit: { + author: { date: '2024-02-02T00:00:00.000Z' }, + }, + }, + }); + octokitMocks.request.mockResolvedValue({ + data: '# README\n\nRepository README.', + }); + const { fetchGithubData } = await loadFetchGithubDataModule(workspace); + + await expect( + fetchGithubData({ allowCacheFallback: true, refetch: true }), + ).rejects.toThrow(/Failed to fetch repository metadata/); + }); +}); diff --git a/src/utils/build/fetchGithubData.ts b/src/utils/build/fetchGithubData.ts index ec68837..b3de5ed 100644 --- a/src/utils/build/fetchGithubData.ts +++ b/src/utils/build/fetchGithubData.ts @@ -1,6 +1,6 @@ /* - - CLI usage: node --experimental-strip-types src/utils/build/fetchGithubData.ts [--refetch] - - Programmatic usage: await fetchGithubData({ refetch?: boolean }) + - CLI usage: node --experimental-strip-types src/utils/build/fetchGithubData.ts [--refetch] [--allow-cache-fallback] + - Programmatic usage: await fetchGithubData({ refetch?: boolean, allowCacheFallback?: boolean }) - Writes temp/githubData.json with shape: { metadata: { fetchedDatetime: string }, @@ -28,6 +28,7 @@ import { Octokit } from 'octokit'; import { GITHUB_OWNER } from '../../app/siteLinks.ts'; import { PROJECTS } from '../../features/Projects/projectsData.ts'; + const OUT_DIR = path.join(process.cwd(), 'temp'); const OUT_PATH = path.join(OUT_DIR, 'githubData.json'); const EPOCH_ISO = '1970-01-01T00:00:00.000Z'; @@ -51,17 +52,48 @@ export type GithubData = { repositories: Record; }; -function isFallbackRepositoryInfo( +type FetchGithubDataOptions = { + refetch?: boolean; + allowCacheFallback?: boolean; +}; + +type CacheState = { + isFresh: boolean; + isUsable: boolean; +}; + +function isNonEmptyString(value: unknown): value is string { + return typeof value === 'string' && value.trim() !== ''; +} + +function isValidIsoDatetime(value: unknown): value is string { + return ( + isNonEmptyString(value) && + value !== EPOCH_ISO && + !Number.isNaN(Date.parse(value)) + ); +} + +function isValidRepositoryInfo( repoInfo: Partial | undefined, ): boolean { if (!repoInfo) { - return true; + return false; } return ( - repoInfo.readme_content === README_UNAVAILABLE || - repoInfo.createdDatetime === EPOCH_ISO || - repoInfo.lastCommitDatetime === EPOCH_ISO + isNonEmptyString(repoInfo.name) && + typeof repoInfo.description === 'string' && + isValidIsoDatetime(repoInfo.createdDatetime) && + isValidIsoDatetime(repoInfo.lastCommitDatetime) && + isNonEmptyString(repoInfo.defaultBranch) && + isNonEmptyString(repoInfo.readme_content) && + repoInfo.readme_content !== README_NOT_FOUND && + repoInfo.readme_content !== README_UNAVAILABLE && + (repoInfo.topics === undefined || + (Array.isArray(repoInfo.topics) && + repoInfo.topics.every((topic) => isNonEmptyString(topic)))) && + (repoInfo.license === undefined || isNonEmptyString(repoInfo.license)) ); } @@ -110,6 +142,39 @@ function normalizeLicense( return license.name ?? undefined; } +function readCachedGithubData( + repos: readonly string[], +): CacheState | undefined { + if (!fssync.existsSync(OUT_PATH)) { + return undefined; + } + + try { + const raw = fssync.readFileSync(OUT_PATH, 'utf8'); + const current = JSON.parse(raw) as Partial; + const metaStr = current.metadata?.fetchedDatetime; + const metaDate = metaStr ? new Date(metaStr) : undefined; + const fileMtimeMs = fssync.statSync(OUT_PATH).mtime.getTime(); + const effectiveTimeMs = + metaDate && !Number.isNaN(metaDate.getTime()) + ? metaDate.getTime() + : fileMtimeMs; + const ONE_DAY_MS = 24 * 60 * 60 * 1000; + const repositories = current.repositories ?? {}; + const isComplete = repos.every((repo) => repo in repositories); + const isUsable = + isComplete && + repos.every((repo) => isValidRepositoryInfo(repositories[repo])); + + return { + isFresh: Date.now() - effectiveTimeMs <= ONE_DAY_MS, + isUsable, + }; + } catch { + return undefined; + } +} + function createOctokit(token?: string): Octokit { return new Octokit({ auth: token, @@ -130,15 +195,24 @@ async function getReadme(octokit: Octokit, repo: string): Promise { }, ); - return typeof response.data === 'string' - ? response.data - : README_NOT_FOUND; + if (!isNonEmptyString(response.data)) { + throw new Error(`Repository "${repo}" returned an empty README.`); + } + + return response.data; } catch (error) { if (getErrorStatus(error) === 404) { - return README_NOT_FOUND; + throw new Error(`Repository "${repo}" is missing a README.`, { + cause: error, + }); } - throw error; + throw new Error( + `Failed to fetch README for "${repo}": ${stringifyReason(error)}`, + { + cause: error, + }, + ); } } @@ -180,19 +254,21 @@ async function buildRepositoryInfo( getReadme(octokit, repo), ]); - const readmeContent = - readmeResult.status === 'fulfilled' - ? readmeResult.value - : README_UNAVAILABLE; + if (repoResult.status === 'rejected') { + throw new Error( + `Failed to fetch repository metadata for "${repo}": ${stringifyReason(repoResult.reason)}`, + ); + } if (readmeResult.status === 'rejected') { - console.warn( - '[githubData] README fetch failed for', - repo, - stringifyReason(readmeResult.reason), - ); + throw new Error(stringifyReason(readmeResult.reason)); } + const topics = + topicsResult.status === 'fulfilled' + ? topicsResult.value.data.names + : undefined; + if (topicsResult.status === 'rejected') { console.warn( '[githubData] Topics fetch failed for', @@ -201,125 +277,117 @@ async function buildRepositoryInfo( ); } - const topics = - topicsResult.status === 'fulfilled' - ? topicsResult.value.data.names - : undefined; + const repository = repoResult.value.data; + const defaultBranch = repository.default_branch; - if (repoResult.status === 'rejected') { - console.warn( - '[githubData] Repo info failed for', - repo, - stringifyReason(repoResult.reason), + if (!isNonEmptyString(defaultBranch)) { + throw new Error( + `Repository "${repo}" is missing default branch metadata.`, ); + } - return { - name: repo, - description: 'No description available', - createdDatetime: EPOCH_ISO, - lastCommitDatetime: EPOCH_ISO, - readme_content: readmeContent, - topics, - }; + if (!isValidIsoDatetime(repository.created_at)) { + throw new Error( + `Repository "${repo}" is missing a valid creation timestamp.`, + ); } - const repository = repoResult.value.data; const lastCommitDatetime = await getLastCommitDatetime( octokit, repo, - repository.default_branch, + defaultBranch, ); + const resolvedLastCommitDatetime = + lastCommitDatetime ?? repository.pushed_at; + + if (!isValidIsoDatetime(resolvedLastCommitDatetime)) { + throw new Error( + `Repository "${repo}" is missing a valid last commit timestamp.`, + ); + } return { name: repository.name, description: repository.description ?? 'No description available', createdDatetime: repository.created_at, - lastCommitDatetime: lastCommitDatetime ?? repository.pushed_at, - defaultBranch: repository.default_branch, + lastCommitDatetime: resolvedLastCommitDatetime, + defaultBranch, license: normalizeLicense(repository.license), - readme_content: readmeContent, + readme_content: readmeResult.value, topics, }; } -export async function fetchGithubData(options?: { - refetch?: boolean; -}): Promise { +export async function fetchGithubData( + options?: FetchGithubDataOptions, +): Promise { + const allowCacheFallback = Boolean(options?.allowCacheFallback); const refetch = Boolean(options?.refetch); const repos = Array.from(new Set(PROJECTS.map((project) => project.repo))); + const cacheState = readCachedGithubData(repos); - if (fssync.existsSync(OUT_PATH)) { - try { - const raw = fssync.readFileSync(OUT_PATH, 'utf8'); - const current = JSON.parse(raw) as Partial; - const metaStr = current.metadata?.fetchedDatetime; - const metaDate = metaStr ? new Date(metaStr) : undefined; - const fileMtimeMs = fssync.statSync(OUT_PATH).mtime.getTime(); - const effectiveTimeMs = - metaDate && !isNaN(metaDate.getTime()) - ? metaDate.getTime() - : fileMtimeMs; - const ONE_DAY_MS = 24 * 60 * 60 * 1000; - const olderThanOneDay = Date.now() - effectiveTimeMs > ONE_DAY_MS; - const repositories = current.repositories ?? {}; - const infoKeys = Object.keys(repositories); - const complete = repos.every((repo) => infoKeys.includes(repo)); - const hasFallbackData = repos.some((repo) => - isFallbackRepositoryInfo(repositories[repo]), - ); + if (!refetch && cacheState?.isUsable && cacheState.isFresh) { + console.log('[githubData] Up-to-date and fresh file found, skipping.'); + return; + } - if (!refetch && complete && !olderThanOneDay && !hasFallbackData) { - console.log( - '[githubData] Up-to-date and fresh file found, skipping.', - ); - return; - } - - if (olderThanOneDay) { - console.log( - '[githubData] Existing file is older than a day; refetching.', - ); - } - - if (hasFallbackData) { - console.log( - '[githubData] Existing file contains fallback GitHub data; refetching.', - ); - } - } catch { - // Rebuild the file if parsing the cache fails. - } + if (cacheState && !cacheState.isFresh) { + console.log( + '[githubData] Existing cache is older than a day; refetching.', + ); + } + + if (cacheState && !cacheState.isUsable) { + console.log( + '[githubData] Existing cache is incomplete or degraded; refetching.', + ); } const token = process.env.PERSONAL_GITHUB_TOKEN ?? process.env.GH_TOKEN; const octokit = createOctokit(token); - const repositoryEntries = await Promise.all( - repos.map( - async (repo): Promise => [ - repo, - await buildRepositoryInfo(octokit, repo), - ], - ), - ); - const repositories: GithubData['repositories'] = - Object.fromEntries(repositoryEntries); - await fs.mkdir(OUT_DIR, { recursive: true }); - const payload: GithubData = { - metadata: { fetchedDatetime: new Date().toISOString() }, - repositories, - }; - await fs.writeFile(OUT_PATH, JSON.stringify(payload, null, 2), 'utf8'); - console.log('[githubData] Wrote new format to', OUT_PATH); + try { + const repositoryEntries = await Promise.all( + repos.map( + async (repo): Promise => [ + repo, + await buildRepositoryInfo(octokit, repo), + ], + ), + ); + const repositories: GithubData['repositories'] = + Object.fromEntries(repositoryEntries); + + await fs.mkdir(OUT_DIR, { recursive: true }); + const payload: GithubData = { + metadata: { fetchedDatetime: new Date().toISOString() }, + repositories, + }; + await fs.writeFile(OUT_PATH, JSON.stringify(payload, null, 2), 'utf8'); + console.log('[githubData] Wrote new format to', OUT_PATH); + } catch (error) { + if (allowCacheFallback && cacheState?.isUsable) { + console.warn( + '[githubData] Fetch failed, using the existing cache because cache fallback was explicitly enabled.', + stringifyReason(error), + ); + return; + } + + throw error; + } } if ( import.meta.url === (process.argv[1] && new URL(`file://${process.argv[1]}`).href) ) { + const allowCacheFallback = process.argv.includes('--allow-cache-fallback'); const refetch = process.argv.includes('--refetch'); - void fetchGithubData({ refetch }).catch((error: unknown) => { - console.error('[githubData] Generation failed:', error); - process.exitCode = 1; - }); + void fetchGithubData({ allowCacheFallback, refetch }).catch( + (error: unknown) => { + console.error('[githubData] Generation failed:', error); + process.exitCode = 1; + }, + ); }