diff --git a/lib/harAnalyzer.js b/lib/harAnalyzer.js index 7d1c895..3f1e854 100644 --- a/lib/harAnalyzer.js +++ b/lib/harAnalyzer.js @@ -35,6 +35,7 @@ export class HarAnalyzer { this.dependencies = this.package.dependencies; this.version = this.package.version; } + trySetStartUrl(url, uuid, group) { if (this.groups[group] !== undefined) { // Only test it once for every group @@ -44,6 +45,7 @@ export class HarAnalyzer { 'start-url': url } } + transform2SimplifiedData(harData, url) { const data = { 'url': url, @@ -200,6 +202,7 @@ export class HarAnalyzer { return knowledgeData; } + tryGetUpdatedDate(url, bodyText, knowledgeData) { const regexes = [ /(?bedömning|redogörelse|uppdater|gransk)(?[^>.]*) (?[0-9]{1,2} )(?(?:jan(?:uari)*|feb(?:ruari)*|mar(?:s)*|apr(?:il)*|maj|jun(?:i)*|jul(?:i)*|aug(?:usti)*|sep(?:tember)*|okt(?:ober)*|nov(?:ember)*|dec(?:ember)*) )(?20[0-9]{2})/gi, @@ -216,26 +219,24 @@ export class HarAnalyzer { let issues = []; regexes.forEach(regex => { - const matches = bodyText.matchAll(regex); // Use matchAll to get all matches + const matches = bodyText.matchAll(regex); for (const match of matches) { - dates.push(this.getWeightedDocDateFromMatch(match, bodyText)); // Push the named groups into the dates array + dates.push(this.getWeightedDocDateFromMatch(match, bodyText)); } }); - // Eliminate duplicates by comparing all properties const uniqueDates = dates.filter((date, index, self) => index === self.findIndex(d => d.word === date.word && d.text === date.text && d.type === date.type && - d.date[0] === date.date[0] && // Compare year - d.date[1] === date.date[1] && // Compare month - d.date[2] === date.date[2] && // Compare day + d.date[0] === date.date[0] && + d.date[1] === date.date[1] && + d.date[2] === date.date[2] && d.weight === date.weight ) ); - // Sort dates by weight in descending order uniqueDates.sort((a, b) => b.weight - a.weight); if (uniqueDates.length === 0) { @@ -248,9 +249,9 @@ export class HarAnalyzer { }); } else { const dateInfo = uniqueDates.pop().date; - const dateDoc = new Date(dateInfo[0], dateInfo[1] - 1, dateInfo[2]); // Month is 0-indexed in JavaScript + const dateDoc = new Date(dateInfo[0], dateInfo[1] - 1, dateInfo[2]); - const year = 365 * 24 * 60 * 60 * 1000; // Convert year to milliseconds + const year = 365 * 24 * 60 * 60 * 1000; const now = new Date(); const cutoff1Year = new Date(now.getTime() - year); @@ -306,7 +307,6 @@ export class HarAnalyzer { text: `Accessibility statement seems to be older than 5 years`, severity: 'error', }); - } } @@ -358,6 +358,7 @@ export class HarAnalyzer { weight: weight }; } + convertToMonthNumber(month) { const monthDict = { 'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, @@ -372,6 +373,7 @@ export class HarAnalyzer { } return parseInt(month, 10); } + getDateWeight(text) { const patterns = [ { regex: /bedömning/i, weight: 1.0 }, @@ -388,11 +390,12 @@ export class HarAnalyzer { return null; } + tryGetEvaluationMethod(url, bodyText, knowledgeData) { let issues = []; const evaluationMethod = bodyText.match(/(sj(.{1, 6}|ä|ä|ä)lvskattning|intern[a]{0,1} kontroller|intern[a]{0,1} test(ning|er){0,1}]|utvärderingsmetod|tillgänglighetsexpert(er){0,1}|funka|etu ab|siteimprove|oberoende granskning|oberoende tillgänglighetsgranskning(ar){0,1}|tillgänglighetskonsult(er){0,1}|med hjälp av|egna tester|oberoende experter|Hur vi testat webbplats(en){0,1}|vi testat webbplatsen|intervjuer|rutiner|checklistor|checklista|utbildningar|automatiserade|automatisk|maskinell|kontrollverktyg|tillgänglighetskontroll)/gim); if (evaluationMethod) { - const searchWord = evaluationMethod[0]; // The matched word + const searchWord = evaluationMethod[0]; knowledgeData['evaluation-method-word'] = searchWord.trim(); knowledgeData['evaluation-method-text'] = this.tryGetWordSentence(searchWord, bodyText); } @@ -409,40 +412,64 @@ export class HarAnalyzer { } looks_like_a11y_statement(knowledgeData, body, doc) { - if (knowledgeData['compatible-word'] || knowledgeData['notification-function-link-url'] || knowledgeData['unreasonably-burdensome-accommodation-word']) { - let h1 = body.querySelector('h1'); - if (h1) { - knowledgeData['h1'] = h1.textContent.replace(/\u00AD/g, '').trim(); - const isA11yStatementH1 = /tillg(.{1,6}|ä|ä|ä)nglighetsredog(.{1,6}|ö|ö|ö)relse/gim.test(knowledgeData['h1']); - if (isA11yStatementH1) { - return true; - } - } - let title = doc.querySelector('title'); - if (title) { - knowledgeData['page-title'] = title.textContent.replace(/\u00AD/g, '').trim(); - const isA11yStatementTitle = /tillg(.{1,6}|ä|ä|ä)nglighetsredog(.{1,6}|ö|ö|ö)relse/gim.test(knowledgeData['page-title']); - if (isA11yStatementTitle) { - return true; - } - } - // TODO: Check link precision level for this page (if it is 0.5 or more return true) + // Extrahera h1 och title oavsett vilken väg som tas nedan, + // så att de syns i utdata (har använts som debug-info tidigare). + const h1Element = body.querySelector('h1'); + if (h1Element) { + knowledgeData['h1'] = h1Element.textContent.replace(/\u00AD/g, '').trim(); + } + const titleElement = doc.querySelector('title'); + if (titleElement) { + knowledgeData['page-title'] = titleElement.textContent.replace(/\u00AD/g, '').trim(); + } + + const h1Text = knowledgeData['h1'] || ''; + const titleText = knowledgeData['page-title'] || ''; + + // Hela det juridiska ordet "tillgänglighetsredogörelse" + const strictRegex = /tillg(.{1,6}|ä|ä|ä)nglighetsredog(.{1,6}|ö|ö|ö)relse/i; + // Bara rotordet "tillgänglighet" + const looseRegex = /tillg(.{1,6}|ä|ä|ä)nglighet/i; + + const hasStrictMatch = strictRegex.test(h1Text) || strictRegex.test(titleText); + + // Stark signal på egen hand: h1 eller title innehåller hela ordet + // "tillgänglighetsredogörelse". Så pass specifikt att vi inte också + // behöver kräva en lagspråk-signal. Hanterar sajter (som rvn.se) som + // inte använder formuleringen "delvis förenlig" och inte länkar + // explicit till digg.se. + if (hasStrictMatch) { + return true; + } + + // Svagare signal: h1 eller title innehåller bara "tillgänglighet". + // Då kräver vi också att minst en lagspråk-signal hittats i texten, + // som visar att det faktiskt rör sig om en juridisk redogörelse. + // Hanterar sajter (som regionostergotland.se) som använder kortare + // rubriker som "Tillgänglighet på webbplatsen". + const hasLooseMatch = looseRegex.test(h1Text) || looseRegex.test(titleText); + const hasLegalSignal = !!( + knowledgeData['compatible-word'] || + knowledgeData['notification-function-link-url'] || + knowledgeData['unreasonably-burdensome-accommodation-word'] + ); + + if (hasLooseMatch && hasLegalSignal) { + return true; } + return false; } getMinifiedBodyText(body, knowledgeData) { - const minifiedBody = body.cloneNode(true); // Deep clone the body, including all child nodes + const minifiedBody = body.cloneNode(true); - // Specify the tags you want to remove const tagsToRemove = ['script', 'nav', 'form', 'input', 'button', 'a']; - // Iterate through each tag and remove all instances of it tagsToRemove.forEach(tag => { const elements = minifiedBody.querySelectorAll(tag); elements.forEach(element => element.remove()); }); - // Get mimized text content const bodyText = minifiedBody.textContent .replace(/\u00AD/g, '') .replace(/\n/g, ' ') @@ -479,7 +506,6 @@ export class HarAnalyzer { else { const digg_old_url = /digg\.se[a-z/-]+anmal-bristande-tillganglighet/i.test(href); if (digg_old_url) { - // FIX: Ändrat från anchor.replace(...).textContent till anchor.textContent.replace(...) knowledgeData['notification-function-link-text'] = anchor.textContent.replace(/\u00AD/g, '').trim(); knowledgeData['notification-function-link-url'] = href; issues.push({ @@ -489,7 +515,6 @@ export class HarAnalyzer { text: `Uses old or incorrect link to DIGG's report function`, severity: 'warning', data: { - // FIX: Samma här text: anchor.textContent.replace(/\u00AD/g, '').trim(), url: href } @@ -511,15 +536,11 @@ export class HarAnalyzer { } tryGetCompatibleText(url, bodyText, knowledgeData) { - // Följsamhet till lagkraven med formuleringen: - // helt förenlig, - // delvis förenlig eller - // inte förenlig. let issues = []; const compatTextMatch = bodyText.match(/(?helt|delvis|inte) förenlig/i); if (compatTextMatch) { - const searchWord = compatTextMatch[0]; // The matched word + const searchWord = compatTextMatch[0]; knowledgeData['compatible-word'] = searchWord; knowledgeData['compatible-text'] = this.tryGetWordSentence(searchWord, bodyText); @@ -553,18 +574,17 @@ export class HarAnalyzer { } return issues; } + tryGetWordSentence(word, bodyText) { - // Match the whole sentence containing the word const sentenceMatch = bodyText.match(new RegExp(`[A-ZÅÄÖ.]{0,1}[a-zåäö ]+?${word}[^A-ZÅÄÖ.]*\\.`)); if (sentenceMatch) { - let compatibleText = sentenceMatch[0].trim(); // Extract the sentence + let compatibleText = sentenceMatch[0].trim(); if (compatibleText.length > 200) { const searchWordIndex = compatibleText.indexOf(word); - const start = Math.max(0, searchWordIndex - 100); // Ensure the search word is centered + const start = Math.max(0, searchWordIndex - 100); const end = Math.min(compatibleText.length, searchWordIndex + 100 + word.length); compatibleText = compatibleText.substring(start, end).trim(); - // Add ellipses if text was trimmed if (start > 0) compatibleText = '...' + compatibleText; if (end < compatibleText.length) compatibleText += '...'; } @@ -573,14 +593,14 @@ export class HarAnalyzer { } return word; } + tryGetFullWord(word, bodyText) { - // Match the whole sentence containing the word const sentenceMatch = bodyText.match(new RegExp(`[A-ZÅÄÖ .]{0,1}[a-zåäö]*${word}[a-zåäö]*[^ .]*`)); if (sentenceMatch) { - let compatibleText = sentenceMatch[0].trim(); // Extract the sentence + let compatibleText = sentenceMatch[0].trim(); if (compatibleText.length > 20) { const searchWordIndex = compatibleText.indexOf(word); - const start = Math.max(0, searchWordIndex - 10); // Ensure the search word is centered + const start = Math.max(0, searchWordIndex - 10); const end = Math.min(compatibleText.length, searchWordIndex + 10 + word.length); compatibleText = compatibleText.substring(start, end).trim(); } @@ -589,16 +609,15 @@ export class HarAnalyzer { } return word; } + tryGetUnreasonablyBurdensomeAccommodation(url, bodyText, knowledgeData) { - // Redogörelse av innehåll som undantagits på grund av - // oskäligt betungande anpassning (12 §) med tydlig motivering. let issues = []; let compatTextMatch = bodyText.match(/(?12[ \t\r\n]§ lagen)/gim); if (!compatTextMatch) { compatTextMatch = bodyText.match(/(?Oskäligt betungande anpassning)/gim); } if (compatTextMatch) { - const searchWord = compatTextMatch[0]; // The matched word + const searchWord = compatTextMatch[0]; knowledgeData['unreasonably-burdensome-accommodation-word'] = searchWord; knowledgeData['unreasonably-burdensome-accommodation-text'] = this.tryGetWordSentence(searchWord, bodyText); issues.push({ @@ -611,6 +630,7 @@ export class HarAnalyzer { } return issues; } + getInterestingTextPrecision(text) { const patterns = [ { @@ -655,6 +675,7 @@ export class HarAnalyzer { return 0.1; } + getInterestingUrls(org_url_start, body) { const urls = {}; const anchors = body.querySelectorAll('a[href]'); @@ -663,7 +684,25 @@ export class HarAnalyzer { if (!href || href.length == 0) { continue; } - if (href.endsWith('.pdf')) { + // Hoppa över länkar till filer som inte är HTML. Dessa kan matcha + // regex:en via sitt filnamn (t.ex. "tillganglighetsanalys.docx") och + // då hamna i kön av intressanta URL:er. När pluginet sedan följer dem + // får det inget HTML-svar och fyrar av en falsk "no-network"-varning. + const lowerHref = href.toLowerCase(); + const nonHtmlExtensions = [ + // Dokument + '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', + '.odt', '.ods', '.odp', '.rtf', '.csv', '.txt', + // Bilder + '.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.bmp', '.ico', '.tiff', + // Media + '.mp3', '.mp4', '.wav', '.ogg', '.webm', '.avi', '.mov', '.flac', '.m4a', + // Arkiv + '.zip', '.rar', '.7z', '.tar', '.gz', '.bz2', + // Data + '.json', '.xml', '.rss', '.atom' + ]; + if (nonHtmlExtensions.some(ext => lowerHref.endsWith(ext))) { continue; } if (href.startsWith('//')) { @@ -707,7 +746,6 @@ export class HarAnalyzer { } sortInterestingUrls(urls) { - // Sort URLs by precision in descending order const sortedUrls = Object.entries(urls) .sort(([, precisionA], [, precisionB]) => precisionB - precisionA) .reduce((acc, [href, precision]) => { @@ -742,6 +780,10 @@ export class HarAnalyzer { 'knowledgeData': knowledgeData }; } + + // FIX: Använder nu knowledgeData[0]['url'] istället för knowledgeData['url'] (som var undefined), + // och bygger upp hela regeluppsättningen (alla regler som "resolved" + no-a11y-statement som "critical") + // så att plugin-webperf-core kan beräkna en korrekt poäng. checkNoAccessibilityStatement(group) { if (this.groups[group] === undefined) { return; @@ -751,28 +793,55 @@ export class HarAnalyzer { return; } - if (this.groups[group]['knowledgeData'].length === 0) { + if (!this.groups[group]['knowledgeData'] || this.groups[group]['knowledgeData'].length === 0) { return; } - this.groups[group]['knowledgeData'][0]['issues']['no-a11y-statement'] = { - url: this.groups[group]['knowledgeData']['url'], - 'test': 'a11y-statement', - rule: 'no-a11y-statement', - category: 'a11y', - text: `Unable to find accessibility statement`, - severity: 'critical', - subIssues: [{ - url: this.groups[group]['knowledgeData']['url'], + const knowledgeData = this.groups[group]['knowledgeData'][0]; + const url = knowledgeData['url'] || this.groups[group]['start-url']; + + // Bygg full regeluppsättning på samma sätt som happy-path gör i createKnowledgeFromData + const issuesByRule = { + 'no-a11y-statement': { + 'test': 'a11y-statement', rule: 'no-a11y-statement', category: 'a11y', - text: `Unable to find accessibility statement`, severity: 'critical', - line: 0, - column: 0 - }] + subIssues: [{ + url: url, + rule: 'no-a11y-statement', + category: 'a11y', + text: `Unable to find accessibility statement`, + severity: 'critical', + line: 0, + column: 0 + }] + } + }; + + const allRules = Object.keys(this.rules || {}).filter(rule => this.rules[rule] !== 'off'); + for (const rule of allRules) { + // Hoppa över regler som redan finns (t.ex. 'no-network' som varning) — skriv inte över dem som resolved + if (issuesByRule[rule] || (knowledgeData.issues && knowledgeData.issues[rule])) { + continue; + } + issuesByRule[rule] = { + 'test': 'a11y-statement', + rule: rule, + category: 'a11y', + severity: 'resolved', + subIssues: [] + }; + } + + knowledgeData.issues = { + ...knowledgeData.issues, + ...issuesByRule }; } + + // FIX: Den duplicerade (och felaktiga) issue-koden för 15-URL-gränsen är borttagen. + // checkNoAccessibilityStatement i sitespeedio.summarize-fasen sköter det nu. getNextInterestingUrl(group) { if (this.groups[group] === undefined) { return undefined; @@ -783,52 +852,30 @@ export class HarAnalyzer { } if (!('visited-urls' in this.groups[group])) { - this.groups[group]['visited-urls'] = new Set(); // Initialize visited URLs if not present + this.groups[group]['visited-urls'] = new Set(); } if (this.groups[group]['has-a11y-statement']) { - return undefined; // No more URLs to visit if an a11y statement is found + return undefined; } - // Do not return a URL if more than 15 URLs have been visited const visitedUrls = this.groups[group]['visited-urls']; if (visitedUrls.size >= 15) { - if (this.groups[group]['knowledgeData'].length === 0) { - return undefined; - } - - this.groups[group]['knowledgeData'][0]['issues']['no-a11y-statement'] = { - url: this.groups[group]['knowledgeData']['url'], - 'test': 'a11y-statement', - rule: 'no-a11y-statement', - category: 'a11y', - text: `Unable to find accessibility statement`, - severity: 'critical', - subIssues: [{ - url: this.groups[group]['knowledgeData']['url'], - rule: 'no-a11y-statement', - category: 'a11y', - text: `Unable to find accessibility statement`, - severity: 'critical', - line: 0, - column: 0 - }] - }; - return undefined; } const interestingUrls = this.groups[group]['interesting-urls']; for (const url of Object.keys(interestingUrls)) { if (!visitedUrls.has(url)) { - visitedUrls.add(url); // Mark the URL as visited - delete interestingUrls[url]; // Remove the URL from the dictionary - return url; // Return the first unvisited URL + visitedUrls.add(url); + delete interestingUrls[url]; + return url; } } - return undefined; // Return the first URL or undefined if none exist + return undefined; } + getSummary() { return this; } diff --git a/lib/index.js b/lib/index.js index d4fbf59..1a2deca 100644 --- a/lib/index.js +++ b/lib/index.js @@ -5,11 +5,6 @@ import { fileURLToPath } from 'node:url'; import fs from 'node:fs'; const fsp = fs.promises; -// https://www.sitespeed.io/documentation/sitespeed.io/plugins/#create-your-own-plugin -// node bin\sitespeed.js -b edge -n 1 --plugins.add analysisstorer --plugins.add ../../../plugin-accessibility-statement/lib/index.js --browsertime.chrome.includeResponseBodies all https://webperf.se -// node bin\sitespeed.js -b edge -n 1 --plugins.add ../../../plugin-pagenotfound/lib/index.js --plugins.add ../../../plugin-css/lib/index.js --plugins.add ../../../plugin-accessibility-statement/lib/index.js --browsertime.chrome.includeResponseBodies all https://webperf.se -// node bin\sitespeed.js -b edge -n 1 --plugins.add ../../../plugin-pagenotfound/lib/index.js --plugins.add ../../../plugin-css/lib/index.js --plugins.add ../../../plugin-accessibility-statement/lib/index.js --plugins.add ../../../plugin-html/lib/index.js --browsertime.chrome.includeResponseBodies all https://webperf.se - const pluginname = 'webperf-plugin-accessibility-statement'; export default class AccessibilityStatementPlugin extends SitespeedioPlugin { @@ -24,7 +19,7 @@ export default class AccessibilityStatementPlugin extends SitespeedioPlugin { const libFolder = fileURLToPath(new URL('..', import.meta.url)); this.pluginFolder = path.resolve(libFolder); this.options = options; - this.log = context.log + this.log = context.log; this.pug = await fsp.readFile( path.resolve(this.pluginFolder, 'pug', 'index.pug'), @@ -33,10 +28,8 @@ export default class AccessibilityStatementPlugin extends SitespeedioPlugin { } async processMessage(message, queue) { - // const filterRegistry = this.filterRegistry; switch (message.type) { case 'browsertime.setup': { - // check https://github.com/sitespeedio/dashboard.sitespeed.io/blob/main/config/emulatedMobile.json for inspiration queue.postMessage(this.make('browsertime.config', { "chrome": { "includeResponseBodies": "all", @@ -48,12 +41,10 @@ export default class AccessibilityStatementPlugin extends SitespeedioPlugin { break; } case 'sitespeedio.setup': { - // Let other plugins know that our plugin is alive queue.postMessage(this.make(pluginname + '.setup', { 'version': this.version, 'dependencies': this.dependencies })); - // Add the HTML pugs queue.postMessage( this.make('html.pug', { id: pluginname, @@ -102,9 +93,6 @@ export default class AccessibilityStatementPlugin extends SitespeedioPlugin { ); } else { super.sendMessage( - // The HTML plugin will pickup every message names *.pageSummary - // and publish the data under pageInfo.data.*.pageSummary - // in this case pageInfo.data.gpsi.pageSummary pluginname + '.pageSummary', data, { @@ -126,6 +114,36 @@ export default class AccessibilityStatementPlugin extends SitespeedioPlugin { for (let group of Object.keys(summary.groups)) { this.harAnalyzer.checkNoAccessibilityStatement(group); + // FIX: När ingen tillgänglighetsredogörelse hittats har checkNoAccessibilityStatement just + // lagt till "no-a11y-statement" + alla resolved-regler på knowledgeData[0]. Per-sida-meddelandet + // (webPerfCoreSummary) skickades redan när browsertime.har bearbetades, så plugin-webperf-core + // har inte sett dessa issues. Vi publicerar nu om sidan så att plugin-webperf-core kan räkna + // ut en korrekt poäng och rapportera issuet. + if (this.isWebperfCorePluginPresent && !summary.groups[group]['has-a11y-statement']) { + const knowledgeDataArr = summary.groups[group]['knowledgeData']; + const analyzedDataArr = summary.groups[group]['analyzedData']; + if (knowledgeDataArr && knowledgeDataArr.length > 0) { + const firstKnowledge = knowledgeDataArr[0]; + const firstAnalyzed = analyzedDataArr && analyzedDataArr.length > 0 + ? analyzedDataArr[0] + : undefined; + super.sendMessage( + pluginname + '.webPerfCoreSummary', + { + version: this.harAnalyzer.version, + dependencies: this.harAnalyzer.dependencies, + url: firstKnowledge['url'], + analyzedData: firstAnalyzed, + knowledgeData: firstKnowledge + }, + { + url: firstKnowledge['url'], + group + } + ); + } + } + super.sendMessage(pluginname + '.summary', summary.groups[group], { group }); @@ -138,7 +156,4 @@ export default class AccessibilityStatementPlugin extends SitespeedioPlugin { } } } - // close(options, errors) { - // // Cleanup if necessary - // } } \ No newline at end of file