| |
|
|
| import fs from 'fs' |
| import path from 'path' |
|
|
| import cheerio from 'cheerio' |
| import coreLib from '@actions/core' |
| import { fetchWithRetry } from '@/frame/lib/fetch-utils' |
| import chalk from 'chalk' |
| import { JSONFilePreset } from 'lowdb/node' |
| import { type Octokit } from '@octokit/rest' |
| import type { Response } from 'express' |
|
|
| import type { ExtendedRequest, Page, Permalink, Context } from '@/types' |
| import shortVersions from '@/versions/middleware/short-versions' |
| import contextualize from '@/frame/middleware/context/context' |
| import features from '@/versions/middleware/features' |
| import getRedirect from '@/redirects/lib/get-redirect' |
| import warmServer from '@/frame/lib/warm-server' |
| import { liquid } from '@/content-render/index' |
| import { deprecated } from '@/versions/lib/enterprise-server-releases' |
| import excludedLinks from '@/links/lib/excluded-links' |
| import { getEnvInputs, boolEnvVar } from '@/workflows/get-env-inputs' |
| import { debugTimeEnd, debugTimeStart } from './debug-time-taken' |
| import { uploadArtifact as uploadArtifactLib } from './upload-artifact' |
| import github from '@/workflows/github' |
| import { getActionContext } from '@/workflows/action-context' |
| import { createMinimalProcessor } from '@/content-render/unified/processor' |
| import { createReportIssue, linkReports } from '@/workflows/issue-report' |
| import { type CoreInject } from '@/links/scripts/action-injections' |
|
|
| type Flaw = { |
| WARNING?: string |
| CRITICAL?: string |
| isExternal?: boolean |
| } |
|
|
| type LinkFlaw = { |
| page: Page |
| permalink: Permalink |
| href?: string |
| url?: string |
| text?: string |
| src: string |
| flaw: Flaw |
| } |
|
|
| type Redirects = Record<string, string> |
| type PageMap = Record<string, Page> |
|
|
| type UploadArtifact = (name: string, message: string) => void |
|
|
| type Options = { |
| level?: string |
| files?: string[] |
| random?: boolean |
| language?: string | string[] |
| filter?: string[] |
| version?: string | string[] |
| max?: number |
| linkReports?: boolean |
| actionUrl?: string |
| verbose?: boolean |
| checkExternalLinks?: boolean |
| createReport?: boolean |
| failOnFlaw?: boolean |
| shouldComment?: boolean |
| reportRepository?: string |
| reportAuthor?: string |
| reportLabel?: string |
| checkAnchors?: boolean |
| checkImages?: boolean |
| patient?: boolean |
| externalServerErrorsAsWarning?: string |
| verboseUrl?: string |
| bail?: boolean |
| commentLimitToExternalLinks?: boolean |
| actionContext?: any |
| concurrency?: number |
| } |
|
|
| |
| const DEFAULT_CONCURRENCY_LIMIT = 3 |
|
|
| const STATIC_PREFIXES: Record<string, string> = { |
| assets: path.resolve('assets'), |
| public: path.resolve(path.join('src', 'graphql', 'data')), |
| } |
| |
| for (const [key, value] of Object.entries(STATIC_PREFIXES)) { |
| if (!fs.existsSync(value)) { |
| throw new Error(`Can't find static prefix (${key}): ${value}`) |
| } |
| } |
|
|
| |
| |
| |
| const EXTERNAL_LINK_CHECKER_MAX_AGE_MS = |
| parseInt(process.env.EXTERNAL_LINK_CHECKER_MAX_AGE_DAYS || '7') * 24 * 60 * 60 * 1000 |
| const EXTERNAL_LINK_CHECKER_DB = |
| process.env.EXTERNAL_LINK_CHECKER_DB || 'external-link-checker-db.json' |
|
|
| |
| type Data = { |
| urls: { |
| [url: string]: { |
| timestamp: number |
| result: { |
| ok: boolean |
| statusCode: number |
| } |
| } |
| } |
| } |
| const defaultData: Data = { urls: {} } |
| const externalLinkCheckerDB = await JSONFilePreset<Data>(EXTERNAL_LINK_CHECKER_DB, defaultData) |
|
|
| type DBType = typeof externalLinkCheckerDB |
|
|
| |
| async function limitConcurrency<T, R>( |
| items: T[], |
| asyncFn: (item: T) => Promise<R>, |
| limit: number = 3, |
| ): Promise<R[]> { |
| const results: Promise<R>[] = [] |
| const executing = new Set<Promise<R>>() |
|
|
| for (const item of items) { |
| const createPromise = async () => { |
| const result = await asyncFn(item) |
| executing.delete(promise) |
| return result |
| } |
| const promise = createPromise() |
|
|
| results.push(promise) |
| executing.add(promise) |
|
|
| if (executing.size >= limit) { |
| await Promise.race(executing) |
| } |
| } |
|
|
| return Promise.all(results) |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| function jitter(base: number, percentage: number) { |
| const r = percentage / 100 |
| const negative = Math.random() > 0.5 ? -1 : 1 |
| return base + base * Math.random() * r * negative |
| } |
| |
| |
| |
| |
| function linksToSkipFactory() { |
| const set = new Set(excludedLinks.map(({ is }) => is).filter(Boolean)) |
| const arr = excludedLinks.map(({ startsWith }) => startsWith).filter(Boolean) |
| return (href: string) => set.has(href) || arr.some((v) => v && href.startsWith(v)) |
| } |
|
|
| const linksToSkip = linksToSkipFactory() |
|
|
| const CONTENT_ROOT = path.resolve('content') |
|
|
| const deprecatedVersionPrefixesRegex = new RegExp( |
| `enterprise(-server@|/)(${deprecated.join('|')})(/|$)`, |
| ) |
|
|
| |
| if (import.meta.url.endsWith(process.argv[1])) { |
| |
| const { |
| ACTION_RUN_URL, |
| LEVEL, |
| FILES_CHANGED, |
| REPORT_REPOSITORY, |
| REPORT_AUTHOR, |
| REPORT_LABEL, |
| EXTERNAL_SERVER_ERRORS_AS_WARNINGS, |
| CHECK_ANCHORS, |
| CONCURRENCY, |
| } = process.env |
|
|
| const octokit = github() |
|
|
| |
| let files |
| if (FILES_CHANGED) { |
| const fileList = JSON.parse(FILES_CHANGED) |
| if (Array.isArray(fileList) && fileList.length > 0) { |
| files = fileList |
| } else { |
| console.warn(`No changed files found in PR: ${FILES_CHANGED}. Exiting...`) |
| process.exit(0) |
| } |
| } |
|
|
| const opts: Options = { |
| level: LEVEL, |
| files, |
| verbose: true, |
| linkReports: true, |
| checkImages: true, |
| checkAnchors: Boolean(CHECK_ANCHORS), |
| patient: boolEnvVar('PATIENT'), |
| random: false, |
| language: 'en', |
| actionUrl: ACTION_RUN_URL, |
| checkExternalLinks: boolEnvVar('CHECK_EXTERNAL_LINKS'), |
| shouldComment: boolEnvVar('SHOULD_COMMENT'), |
| commentLimitToExternalLinks: boolEnvVar('COMMENT_LIMIT_TO_EXTERNAL_LINKS'), |
| failOnFlaw: boolEnvVar('FAIL_ON_FLAW'), |
| createReport: boolEnvVar('CREATE_REPORT'), |
| reportRepository: REPORT_REPOSITORY, |
| reportLabel: REPORT_LABEL, |
| reportAuthor: REPORT_AUTHOR, |
| actionContext: getActionContext(), |
| externalServerErrorsAsWarning: EXTERNAL_SERVER_ERRORS_AS_WARNINGS, |
| concurrency: CONCURRENCY ? parseInt(CONCURRENCY, 10) : DEFAULT_CONCURRENCY_LIMIT, |
| } |
|
|
| if (opts.shouldComment || opts.createReport) { |
| |
| |
| getEnvInputs(['GITHUB_TOKEN']) |
| } |
|
|
| main(coreLib, octokit, uploadArtifactLib, opts) |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| async function main( |
| core: any, |
| octokit: Octokit, |
| uploadArtifact: UploadArtifact, |
| opts: Options = {}, |
| ) { |
| const { |
| level = 'warning', |
| files = [], |
| random, |
| language = 'en', |
| filter, |
| version, |
| max, |
| verbose, |
| checkExternalLinks = false, |
| createReport = false, |
| failOnFlaw = false, |
| shouldComment = false, |
| reportRepository = 'github/docs-content', |
| reportAuthor = 'docs-bot', |
| reportLabel = 'broken link report', |
| concurrency = DEFAULT_CONCURRENCY_LIMIT, |
| } = opts |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const { redirects, pages: pageMap, pageList } = await warmServer([]) |
|
|
| if (files.length) { |
| core.debug(`Limitting to files list: ${files.join(', ')}`) |
| } |
|
|
| let languages = language |
| if (!Array.isArray(languages)) { |
| languages = [languages] |
| } |
|
|
| const filters = filter || [] |
| if (filters && !Array.isArray(filters)) { |
| throw new Error(`filters, ${filters} is not an array`) |
| } |
|
|
| let versions = version || [] |
| if (versions && typeof versions === 'string') { |
| versions = [versions] |
| } else if (!Array.isArray(versions)) { |
| throw new Error(`versions, '${version}' is not an array`) |
| } |
|
|
| if (random) { |
| shuffle(pageList) |
| } |
|
|
| debugTimeStart(core, 'getPages') |
| const pages = getPages(pageList, languages, filters, files, max) |
| debugTimeEnd(core, 'getPages') |
|
|
| if (checkExternalLinks && pages.length >= 100) { |
| core.warning( |
| `Warning! Checking external URLs can be time costly. You're testing ${pages.length} pages.`, |
| ) |
| } |
|
|
| await externalLinkCheckerDB.read() |
|
|
| if (verbose && checkExternalLinks) { |
| core.info(`Checking of external links is is cached to ${EXTERNAL_LINK_CHECKER_DB}`) |
| core.info( |
| `External link cache max age is ${ |
| EXTERNAL_LINK_CHECKER_MAX_AGE_MS / 1000 / 60 / 60 / 24 |
| } days`, |
| ) |
| let countNotTooOld = 0 |
| let countTooOld = 0 |
| for (const { timestamp } of Object.values(externalLinkCheckerDB.data.urls || {})) { |
| const age = Date.now() - timestamp |
| if (age > EXTERNAL_LINK_CHECKER_MAX_AGE_MS) { |
| countTooOld++ |
| } else { |
| countNotTooOld++ |
| } |
| } |
| core.info( |
| `External link cache: ${countNotTooOld.toLocaleString()} are still fresh, ${countTooOld.toLocaleString()} links too old`, |
| ) |
| } |
|
|
| debugTimeStart(core, 'processPages') |
| const t0 = new Date().getTime() |
| const flawsGroups = await limitConcurrency( |
| pages, |
| (page: Page) => |
| processPage( |
| core, |
| page, |
| pageMap, |
| redirects, |
| opts, |
| externalLinkCheckerDB, |
| versions as string[], |
| ), |
| concurrency, |
| ) |
| const t1 = new Date().getTime() |
| debugTimeEnd(core, 'processPages') |
|
|
| await externalLinkCheckerDB.write() |
|
|
| const flaws = flawsGroups.flat() |
|
|
| printGlobalCacheHitRatio(core) |
|
|
| if (verbose) { |
| summarizeCounts(core, pages, (t1 - t0) / 1000) |
| core.info(`Checked ${(globalCacheHitCount + globalCacheMissCount).toLocaleString()} links`) |
| } |
|
|
| summarizeFlaws(core, flaws) |
|
|
| const uniqueHrefs = new Set(flaws.map((flaw) => flaw.href)) |
|
|
| if (flaws.length > 0) { |
| await uploadJsonFlawsArtifact(uploadArtifact, flaws, { |
| verboseUrl: opts.verboseUrl, |
| }) |
| core.info(`All flaws written to artifact log.`) |
| if (createReport) { |
| core.info(`Creating issue for flaws...`) |
| const reportProps = { |
| core, |
| octokit, |
| reportTitle: `${uniqueHrefs.size} broken links found`, |
| reportBody: flawIssueDisplay(flaws, opts), |
| reportRepository, |
| reportLabel, |
| } |
| const newReport = await createReportIssue(reportProps) |
|
|
| if (linkReports) { |
| const linkProps = { |
| core, |
| octokit, |
| newReport, |
| reportRepository, |
| reportAuthor, |
| reportLabel, |
| } |
| await linkReports(linkProps) |
| } |
| } |
| if (shouldComment) { |
| await commentOnPR(core, octokit, flaws, opts) |
| } |
|
|
| const flawsInLevel = flaws.filter((flaw) => { |
| if (level === 'critical') { |
| return flaw?.flaw?.CRITICAL |
| } |
| |
| return true |
| }) |
|
|
| if (flawsInLevel.length > 0) { |
| core.setOutput('has_flaws_at_level', flawsInLevel.length > 0) |
| if (failOnFlaw) { |
| core.setFailed( |
| `${flaws.length} broken links found. See action artifact uploads for details`, |
| ) |
| process.exit(1) |
| } |
| } |
| } else { |
| |
| |
| if (shouldComment) { |
| await commentOnPR(core, octokit, flaws, opts) |
| } |
| } |
| } |
|
|
| async function commentOnPR(core: CoreInject, octokit: Octokit, flaws: LinkFlaw[], opts: Options) { |
| const { actionContext = {} } = opts |
| const { owner, repo } = actionContext |
| const pullNumber = actionContext?.pull_request?.number |
| if (!owner || !repo || !pullNumber) { |
| core.warning(`commentOnPR called outside of PR action runner context. Not creating comment.`) |
| return |
| } |
|
|
| const findAgainSymbol = '<!-- rendered-content-link-checker-comment-finder -->' |
|
|
| const body = flawIssueDisplay(flaws, opts, false) |
|
|
| const { data } = await octokit.rest.issues.listComments({ |
| owner, |
| repo, |
| issue_number: pullNumber, |
| }) |
| let previousCommentId |
| for (const { body: commentBody, id } of data) { |
| if (commentBody && commentBody.includes(findAgainSymbol)) { |
| previousCommentId = id |
| } |
| } |
|
|
| |
| if (!body) { |
| core.info('No flaws qualify for comment') |
|
|
| if (previousCommentId) { |
| const nothingComment = 'Previous broken links comment now moot. 👌😙' |
| await octokit.rest.issues.updateComment({ |
| owner, |
| repo, |
| comment_id: previousCommentId, |
| body: `${nothingComment}\n\n${findAgainSymbol}`, |
| }) |
| core.info(`Updated comment on PR: ${pullNumber} (${previousCommentId})`) |
| } |
| return |
| } |
|
|
| if (previousCommentId) { |
| const noteComment = '(*The original automated comment was updated*)' |
| await octokit.rest.issues.updateComment({ |
| owner, |
| repo, |
| comment_id: previousCommentId, |
| body: `${body}\n\n${noteComment}\n\n${findAgainSymbol}`, |
| }) |
| core.info(`Updated comment on PR: ${pullNumber} (${previousCommentId})`) |
| return |
| } |
|
|
| try { |
| await octokit.rest.issues.createComment({ |
| owner, |
| repo, |
| issue_number: pullNumber, |
| body: `${body}\n\n${findAgainSymbol}`, |
| }) |
| core.info(`Created comment on PR: ${pullNumber}`) |
| } catch (error) { |
| core.setFailed(`Error commenting on PR when there are flaws`) |
| throw error |
| } |
| } |
|
|
| function flawIssueDisplay(flaws: LinkFlaw[], opts: Options, mentionExternalExclusionList = true) { |
| let output = '' |
| let flawsToDisplay = 0 |
|
|
| type LinkFlawWithPermalink = { |
| |
| |
| href?: string |
| url?: string |
| text?: string |
| src: string |
| flaw: Flaw |
| permalinkHrefs: string[] |
| } |
| |
| const hrefsOnPageGroup: Record<string, Record<string, LinkFlawWithPermalink>> = {} |
| for (const { page, permalink, href, text, src, flaw } of flaws) { |
| |
| if (opts.commentLimitToExternalLinks && !flaw.isExternal) { |
| continue |
| } |
|
|
| flawsToDisplay++ |
|
|
| const pageKey = page.fullPath |
| if (!hrefsOnPageGroup[pageKey]) { |
| hrefsOnPageGroup[pageKey] = {} |
| } |
|
|
| const linkKey = href || src |
| if (!hrefsOnPageGroup[pageKey][linkKey]) { |
| hrefsOnPageGroup[page.fullPath][linkKey] = { href, text, src, flaw, permalinkHrefs: [] } |
| } |
|
|
| if (!hrefsOnPageGroup[pageKey][linkKey].permalinkHrefs.includes(permalink.href)) { |
| hrefsOnPageGroup[pageKey][linkKey].permalinkHrefs.push(permalink.href) |
| } |
| } |
|
|
| |
| if (!flawsToDisplay) { |
| return '' |
| } |
|
|
| |
| for (const [pagePath, pageHrefs] of Object.entries(hrefsOnPageGroup)) { |
| const fullPath = prettyFullPath(pagePath) |
| output += `\n\n### In \`${fullPath}\`\n` |
|
|
| for (const [, hrefObj] of Object.entries(pageHrefs)) { |
| if (hrefObj.href) { |
| output += `\n\n - Href: [${hrefObj.href}](${hrefObj.href})` |
| output += `\n - Text: ${hrefObj.text}` |
| } else if (hrefObj.src) { |
| output += `\n\n - Image src: [${hrefObj.src}](${hrefObj.src})` |
| } else { |
| output += `\n\n - WORKFLOW ERROR: Flaw has neither 'href' nor 'src'` |
| } |
| output += `\n - Flaw: \`${ |
| hrefObj.flaw.CRITICAL ? hrefObj.flaw.CRITICAL : hrefObj.flaw.WARNING |
| }\`` |
| output += `\n - On permalinks` |
| for (const permalinkHref of hrefObj.permalinkHrefs) { |
| output += `\n - \`${permalinkHref}\`` |
| } |
| } |
| } |
|
|
| if (mentionExternalExclusionList) { |
| output += |
| '\n\n---\n\nIf any link reported in this issue is not actually broken ' + |
| 'and repeatedly shows up on reports, consider making a PR that adds it as an exception to `src/links/lib/excluded-links.ts`. ' + |
| 'For more information, see [Fixing broken links in GitHub user docs](https://github.com/github/docs/blob/main/src/links/lib/README.md).' |
| } |
|
|
| output = `${flawsToDisplay} broken${ |
| opts.commentLimitToExternalLinks ? ' **external** ' : ' ' |
| }links found in [this](${opts.actionUrl}) workflow.\n${output}` |
|
|
| |
| if (output.length > 60000) { |
| output = `${output.slice(0, 60000)}\n\n---\n\nOUTPUT TRUNCATED` |
| } |
|
|
| return output |
| } |
|
|
| function printGlobalCacheHitRatio(core: CoreInject) { |
| const hits = globalCacheHitCount |
| const misses = globalCacheMissCount |
| |
| |
| |
| if (misses + hits) { |
| core.debug( |
| `Cache hit ratio: ${hits.toLocaleString()} of ${(misses + hits).toLocaleString()} (${( |
| (100 * hits) / |
| (misses + hits) |
| ).toFixed(1)}%)`, |
| ) |
| } |
| } |
|
|
| function getPages( |
| pageList: Page[], |
| languages: string[], |
| filters: string[], |
| files: string[], |
| max: number | undefined, |
| ) { |
| return pageList |
| .filter((page: Page) => { |
| if (languages.length && !languages.includes(page.languageCode)) { |
| return false |
| } |
|
|
| if (filters.length && !filters.find((filter) => page.relativePath.includes(filter))) { |
| return false |
| } |
|
|
| if ( |
| files.length && |
| |
| |
| !files.find((file) => { |
| if (page.relativePath === file) return true |
| if (page.fullPath === file) return true |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| if (path.join(CONTENT_ROOT, page.relativePath) === path.resolve(file)) return true |
| return false |
| }) |
| ) { |
| return false |
| } |
|
|
| return true |
| }) |
| .slice(0, max ? Math.min(max, pageList.length) : pageList.length) |
| } |
|
|
| async function processPage( |
| core: CoreInject, |
| page: Page, |
| pageMap: PageMap, |
| redirects: Redirects, |
| opts: Options, |
| db: DBType, |
| versions: string[], |
| ) { |
| const { verbose, verboseUrl, bail } = opts |
| const filteredPermalinks = page.permalinks.filter((permalink) => { |
| return !versions.length || versions.includes(permalink.pageVersion) |
| }) |
| const allFlawsEach = await limitConcurrency( |
| filteredPermalinks, |
| (permalink) => processPermalink(core, permalink, page, pageMap, redirects, opts, db), |
| opts.concurrency || DEFAULT_CONCURRENCY_LIMIT, |
| ) |
|
|
| const allFlaws = allFlawsEach.flat() |
|
|
| if (allFlaws.length > 0) { |
| if (verbose) { |
| printFlaws(core, allFlaws, { verboseUrl }) |
| } |
|
|
| if (bail) { |
| if (!verbose) { |
| console.warn('Use --verbose to see the flaws before it exits') |
| } |
| throw new Error(`More than one flaw in ${page.relativePath}`) |
| } |
| } |
|
|
| return allFlaws |
| } |
|
|
| async function processPermalink( |
| core: any, |
| permalink: Permalink, |
| page: Page, |
| pageMap: PageMap, |
| redirects: Redirects, |
| opts: Options, |
| db: DBType, |
| ) { |
| const { |
| level = 'critical', |
| checkAnchors, |
| checkImages, |
| checkExternalLinks, |
| verbose, |
| patient, |
| externalServerErrorsAsWarning, |
| } = opts |
| let html = '' |
| try { |
| html = await renderInnerHTML(page, permalink) |
| } catch (error) { |
| console.warn( |
| `The error happened trying to render ${page.relativePath} (permalink: ${permalink.href})`, |
| ) |
| throw error |
| } |
| const $ = cheerio.load(html, { xmlMode: true }) |
| const flaws: LinkFlaw[] = [] |
| const links: cheerio.Element[] = [] |
| $('a[href]').each((i, link) => { |
| links.push(link) |
| }) |
| const newFlaws: LinkFlaw[] = await limitConcurrency( |
| links, |
| async (link) => { |
| const { href } = (link as cheerio.TagElement).attribs |
|
|
| |
| |
| if (!href.startsWith('#')) { |
| if (globalHrefCheckCache.has(href)) { |
| globalCacheHitCount++ |
| return globalHrefCheckCache.get(href) |
| } |
| globalCacheMissCount++ |
| } |
|
|
| const flaw = await checkHrefLink( |
| core, |
| href, |
| $, |
| redirects, |
| pageMap, |
| checkAnchors, |
| checkExternalLinks, |
| externalServerErrorsAsWarning, |
| permalink, |
| { verbose, patient }, |
| db, |
| ) |
|
|
| if (flaw) { |
| if (level === 'critical' && !flaw.CRITICAL) { |
| return |
| } |
| const text = $(link).text() |
| if (!href.startsWith('#')) { |
| globalHrefCheckCache.set(href, { href, flaw, text }) |
| } |
| return { href, flaw, text } |
| } else { |
| if (!href.startsWith('#')) { |
| globalHrefCheckCache.set(href, flaw) |
| } |
| } |
| }, |
| opts.concurrency || DEFAULT_CONCURRENCY_LIMIT, |
| ) |
|
|
| for (const flaw of newFlaws) { |
| if (flaw) { |
| flaws.push(Object.assign(flaw, { page, permalink })) |
| } |
| } |
|
|
| if (checkImages) { |
| $('img[src]').each((i, img) => { |
| let { src } = (img as cheerio.TagElement).attribs |
|
|
| |
| |
| |
| |
| src = src.replace(/\/cb-\d+\//, '/') |
|
|
| if (globalImageSrcCheckCache.has(src)) { |
| globalCacheHitCount++ |
| return globalImageSrcCheckCache.get(src) |
| } |
|
|
| const flaw = checkImageSrc(src) |
|
|
| globalImageSrcCheckCache.set(src, flaw) |
|
|
| if (flaw) { |
| if (level === 'critical' && !flaw.CRITICAL) { |
| return |
| } |
| flaws.push({ permalink, page, src, flaw }) |
| } |
| }) |
| } |
|
|
| return flaws |
| } |
|
|
| async function uploadJsonFlawsArtifact( |
| uploadArtifact: UploadArtifact, |
| flaws: LinkFlaw[], |
| { verboseUrl = null }: { verboseUrl?: string | null } = {}, |
| artifactName = 'all-rendered-link-flaws.json', |
| ) { |
| type PrintableLinkFlaw = { |
| href?: string |
| url?: string |
| text?: string |
| src?: string |
| flaw?: Flaw |
| } |
| const printableFlaws: Record<string, PrintableLinkFlaw[]> = {} |
| for (const { page, permalink, href, text, src, flaw } of flaws) { |
| const fullPath = prettyFullPath(page.fullPath) |
|
|
| if (!(fullPath in printableFlaws)) { |
| printableFlaws[fullPath] = [] |
| } |
| if (href) { |
| printableFlaws[fullPath].push({ |
| href, |
| url: verboseUrl ? new URL(permalink.href, verboseUrl).toString() : permalink.href, |
| text, |
| flaw, |
| }) |
| } else if (src) { |
| printableFlaws[fullPath].push({ |
| src, |
| }) |
| } |
| } |
| const message = JSON.stringify(printableFlaws, undefined, 2) |
| return uploadArtifact(artifactName, message) |
| } |
|
|
| function printFlaws( |
| core: CoreInject, |
| flaws: LinkFlaw[], |
| { verboseUrl }: { verboseUrl?: string | undefined } = {}, |
| ) { |
| let previousPage = null |
| let previousPermalink = null |
|
|
| for (const { page, permalink, href, text, src, flaw } of flaws) { |
| const fullPath = prettyFullPath(page.fullPath) |
| if (page !== previousPage) { |
| core.info(`PAGE: ${chalk.bold(fullPath)}`) |
| } |
| previousPage = page |
|
|
| if (href) { |
| if (previousPermalink !== permalink.href) { |
| if (verboseUrl) { |
| core.info(` URL: ${new URL(permalink.href, verboseUrl).toString()}`) |
| } else { |
| core.info(` PERMALINK: ${permalink.href}`) |
| } |
| } |
| previousPermalink = permalink.href |
|
|
| core.info(` HREF: ${chalk.bold(href)}`) |
| core.info(` TEXT: ${text}`) |
| } else if (src) { |
| core.info(` IMG SRC: ${chalk.bold(src)}`) |
| } else { |
| throw new Error("Flaw has neither 'href' nor 'src'") |
| } |
|
|
| core.info(` FLAW: ${flaw.CRITICAL ? chalk.red(flaw.CRITICAL) : chalk.yellow(flaw.WARNING)}`) |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| function prettyFullPath(fullPath: string) { |
| return path.relative(process.cwd(), fullPath) |
| } |
|
|
| const globalHrefCheckCache = new Map() |
| const globalImageSrcCheckCache = new Map() |
| let globalCacheHitCount = 0 |
| let globalCacheMissCount = 0 |
|
|
| async function checkHrefLink( |
| core: any, |
| href: string, |
| $: cheerio.Root, |
| redirects: Redirects, |
| pageMap: PageMap, |
| checkAnchors = false, |
| checkExternalLinks = false, |
| externalServerErrorsAsWarning: string | undefined | null = null, |
| permalink: Permalink, |
| { verbose = false, patient = false }: { verbose?: boolean; patient?: boolean } = {}, |
| db: DBType | null = null, |
| ): Promise<Flaw | undefined> { |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
| |
|
|
| const [pathFragment, hashFragment] = href.split('#') |
| const hash = `#${hashFragment}` |
|
|
| |
| if (checkAnchors && (!pathFragment || pathFragment === permalink.href)) { |
| |
| |
| if (hash === '#') { |
| return { WARNING: 'Link is just an empty `#`' } |
| } |
| |
| |
| |
| else { |
| |
| |
| |
| const avoid = |
| permalink && |
| ((permalink.href.includes('/rest/') && !permalink.href.includes('/rest/guides/')) || |
| permalink.href.includes('/webhooks-and-events/webhooks/webhook-events-and-payloads') || |
| permalink.href.includes('/graphql/reference') || |
| permalink.href.includes('/code-security/codeql-cli/codeql-cli-manual/') || |
| permalink.href.includes( |
| '/apps/maintaining-github-apps/modifying-a-github-app-registration', |
| ) || |
| permalink.href.includes( |
| '/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning', |
| ) || |
| permalink.href.includes( |
| '/site-policy/github-company-policies/github-statement-against-modern-slavery-and-child-labor', |
| ) || |
| permalink.href.includes('/site-policy/content-removal-policies/dmca-takedown-policy') || |
| permalink.href.includes('/early-access/')) |
|
|
| |
| |
| if (hash !== '#top' && !avoid) { |
| |
| |
| const countDOMItems = $(hash).length + $(`a[name="${hash.slice(1)}"]`).length |
| if (countDOMItems === 0) { |
| return { CRITICAL: `Anchor on the same page can't be found by ID` } |
| } else if (countDOMItems > 1) { |
| return { CRITICAL: `Matches multiple points in the page` } |
| } |
| } |
| } |
| } |
| |
| else { |
| |
| |
| |
| if (href.startsWith('/')) { |
| const pathname = new URL(href, 'http://example.com').pathname |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| if (pathname.endsWith('/')) { |
| const whatifPathname = pathname.slice(0, -1) |
| if (getRedirect(whatifPathname, { redirects, pages: pageMap })) { |
| return { |
| WARNING: `Redirect to ${getRedirect(whatifPathname, { redirects, pages: pageMap })}`, |
| } |
| } else if (!pageMap[whatifPathname]) { |
| if (!deprecatedVersionPrefixesRegex.test(whatifPathname)) { |
| return { CRITICAL: 'Broken link' } |
| } |
| } |
| return { WARNING: 'Links with a trailing / will always redirect' } |
| } else { |
| const firstPart = pathname.split('/')[1] |
| if (STATIC_PREFIXES[firstPart]) { |
| const staticFilePath = path.join( |
| STATIC_PREFIXES[firstPart], |
| pathname.split(path.sep).slice(2).join(path.sep), |
| ) |
| if (!fs.existsSync(staticFilePath)) { |
| return { CRITICAL: `Static file not found ${staticFilePath} (${pathname})` } |
| } |
| } else if (getRedirect(pathname, { redirects, pages: pageMap })) { |
| return { WARNING: `Redirect to ${getRedirect(pathname, { redirects, pages: pageMap })}` } |
| } else if (!pageMap[pathname]) { |
| if (deprecatedVersionPrefixesRegex.test(pathname)) { |
| return |
| } |
|
|
| return { CRITICAL: 'Broken link' } |
| } |
| } |
| } |
| |
| |
| else if (checkExternalLinks) { |
| if (!href.startsWith('https://')) { |
| return { WARNING: `Will not check external URLs that are not HTTPS (${href})` } |
| } |
| if (linksToSkip(href)) { |
| return |
| } |
| const { ok, ...info } = await checkExternalURLCached(core, href, { verbose, patient }, db) |
| if (!ok) { |
| |
| |
| |
| let problem = 'CRITICAL' |
| if (externalServerErrorsAsWarning) { |
| if ( |
| (info.statusCode && info.statusCode >= 500) || |
| (info.requestError && isTemporaryRequestError(info.requestError)) |
| ) { |
| problem = 'WARNING' |
| } |
| } |
| return { [problem]: `Broken external link (${JSON.stringify(info)})`, isExternal: true } |
| } |
| } |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| function isTemporaryRequestError(requestError: string | undefined) { |
| if (typeof requestError === 'string') { |
| |
| |
| const errorEnums = ['ECONNRESET', 'ECONNREFUSED', 'ETIMEDOUT', 'ECONNABORTED'] |
| return errorEnums.some((enum_) => requestError.includes(enum_)) |
| } |
| return false |
| } |
|
|
| |
| |
| |
| async function checkExternalURLCached( |
| core: CoreInject, |
| href: string, |
| { verbose, patient }: { verbose?: boolean; patient?: boolean }, |
| db: DBType | null, |
| ) { |
| const cacheMaxAge = EXTERNAL_LINK_CHECKER_MAX_AGE_MS |
| const now = new Date().getTime() |
| const url = href.split('#')[0] |
|
|
| |
| const { hostname } = new URL(url) |
| const rateLimitTime = _rateLimitedDomains.get(hostname) |
| if (rateLimitTime) { |
| const oneHourAgo = Date.now() - 60 * 60 * 1000 |
| if (rateLimitTime > oneHourAgo) { |
| if (verbose) core.info(`Skipping ${url} - domain ${hostname} is rate limited`) |
| return { ok: false, statusCode: 429, skipReason: 'Domain rate limited' } |
| } else { |
| |
| _rateLimitedDomains.delete(hostname) |
| } |
| } |
|
|
| if (cacheMaxAge) { |
| const tooOld = now - Math.floor(jitter(cacheMaxAge, 10)) |
| if (db && db.data.urls[url]) { |
| if (db.data.urls[url].timestamp > tooOld) { |
| if (verbose) { |
| core.info(`External URL ${url} in cache`) |
| } |
| return db.data.urls[url].result |
| } else if (verbose) { |
| core.info(`External URL ${url} in cache but too old`) |
| |
| delete db.data.urls[url] |
| } |
| } |
| } |
|
|
| const result = await checkExternalURL(core, href, { |
| verbose, |
| patient, |
| }) |
|
|
| if (cacheMaxAge) { |
| |
| |
| if (db && result.ok) { |
| db.data.urls[url] = { |
| timestamp: now, |
| result, |
| } |
| } |
| } |
|
|
| return result |
| } |
|
|
| const _fetchCache = new Map() |
| async function checkExternalURL( |
| core: CoreInject, |
| url: string, |
| { verbose = false, patient = false } = {}, |
| ) { |
| if (!url.startsWith('https://')) throw new Error('Invalid URL') |
| const cleanURL = url.split('#')[0] |
| if (!_fetchCache.has(cleanURL)) { |
| _fetchCache.set(cleanURL, innerFetch(core, cleanURL, { verbose, patient })) |
| } |
| return _fetchCache.get(cleanURL) |
| } |
|
|
| |
| |
| const _rateLimitedDomains = new Map<string, number>() |
|
|
| async function innerFetch( |
| core: CoreInject, |
| url: string, |
| config: { verbose?: boolean; patient?: boolean; retries?: number } = {}, |
| ) { |
| const { verbose, patient } = config |
|
|
| const headers = { |
| 'User-Agent': |
| 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36', |
| } |
|
|
| const retries = patient ? 3 : 2 |
| const timeout = patient ? 10000 : 5000 |
|
|
| if (verbose) core.info(`External URL HEAD: ${url}`) |
|
|
| try { |
| |
| let r = await fetchWithRetry( |
| url, |
| { |
| method: 'HEAD', |
| headers, |
| }, |
| { |
| retries, |
| timeout, |
| throwHttpErrors: false, |
| }, |
| ) |
|
|
| |
| if (r.status === 405 || r.status === 404 || r.status === 403) { |
| if (verbose) core.info(`External URL GET: ${url} (HEAD failed with ${r.status})`) |
| r = await fetchWithRetry( |
| url, |
| { |
| method: 'GET', |
| headers, |
| }, |
| { |
| retries, |
| timeout, |
| throwHttpErrors: false, |
| }, |
| ) |
| } |
|
|
| if (verbose) { |
| core.info(`External URL ${url}: ${r.status}`) |
| } |
|
|
| |
| const { hostname } = new URL(url) |
| if (r.status === 429) { |
| _rateLimitedDomains.set(hostname, Date.now()) |
| if (verbose) core.info(`Domain ${hostname} is now rate limited for 1 hour`) |
| } |
|
|
| if (verbose) { |
| core.info((r.ok ? chalk.green : chalk.red)(`${r.status} on ${url}`)) |
| } |
| return { ok: r.ok, statusCode: r.status } |
| } catch (err) { |
| if (err instanceof Error) { |
| if (verbose) { |
| core.info(chalk.yellow(`Request Error (${err.message}) on ${url}`)) |
| } |
| return { ok: false, requestError: err.message } |
| } |
| throw err |
| } |
| } |
|
|
| function checkImageSrc(src: string) { |
| if (!src.startsWith('/') && !src.startsWith('http')) { |
| return { CRITICAL: 'Image path is not absolute. Should start with a /' } |
| } |
| const pathname = new URL(src, 'http://example.com').pathname |
| if (pathname.startsWith('http://')) { |
| return { CRITICAL: "Don't use insecure HTTP:// for external images" } |
| } |
| if (!pathname.startsWith('/')) { |
| return { WARNING: "External images can't not be checked" } |
| } |
| const prefix = pathname.split('/')[1] |
| if (prefix in STATIC_PREFIXES) { |
| const staticFilePath = path.join( |
| STATIC_PREFIXES[prefix], |
| pathname.split(path.sep).slice(2).join(path.sep), |
| ) |
| if (!fs.existsSync(staticFilePath)) { |
| return { CRITICAL: `Static file not found (${pathname})` } |
| } |
| } else { |
| return { WARNING: `Unrecognized image src prefix (${prefix})` } |
| } |
| } |
|
|
| function summarizeFlaws(core: CoreInject, flaws: LinkFlaw[]) { |
| if (flaws.length) { |
| core.info( |
| chalk.bold( |
| `Found ${flaws.length.toLocaleString()} flaw${flaws.length === 1 ? '' : 's'} in total.`, |
| ), |
| ) |
| } else { |
| core.info(chalk.green('No flaws found! 💖')) |
| } |
| } |
|
|
| function summarizeCounts(core: CoreInject, pages: Page[], tookSeconds: number) { |
| const count = pages.map((page) => page.permalinks.length).reduce((a, b) => a + b, 0) |
| core.info( |
| `Tested ${count.toLocaleString()} permalinks across ${pages.length.toLocaleString()} pages`, |
| ) |
| core.info(`Took ${Math.floor(tookSeconds)} seconds. (~${(tookSeconds / 60).toFixed(1)} minutes)`) |
| const permalinksPerSecond = count / tookSeconds |
| core.info(`~${permalinksPerSecond.toFixed(1)} permalinks per second.`) |
| const pagesPerSecond = pages.length / tookSeconds |
| core.info(`~${pagesPerSecond.toFixed(1)} pages per second.`) |
| } |
|
|
| function shuffle(array: any[]) { |
| let currentIndex = array.length |
| let randomIndex |
|
|
| |
| while (currentIndex !== 0) { |
| |
| randomIndex = Math.floor(Math.random() * currentIndex) |
| currentIndex-- |
|
|
| |
| ;[array[currentIndex], array[randomIndex]] = [array[randomIndex], array[currentIndex]] |
| } |
|
|
| return array |
| } |
|
|
| async function renderInnerHTML(page: Page, permalink: Permalink) { |
| const next = () => {} |
| const res = {} |
|
|
| const pagePath = permalink.href |
| const context: Context = {} |
| const req = { |
| path: pagePath, |
| language: permalink.languageCode, |
| pagePath, |
| cookies: {}, |
| context, |
| } |
| |
| await contextualize(req as ExtendedRequest, res as Response, next) |
| await shortVersions(req as ExtendedRequest, res as Response, next) |
| req.context.page = page |
| features(req as ExtendedRequest, res as Response, next) |
|
|
| req.context.relativePath = page.relativePath |
|
|
| const guts = [page.rawIntro, page.rawPermissions, page.markdown].filter(Boolean).join('\n').trim() |
|
|
| |
| |
| const markdown = await liquid.parseAndRender(guts, req.context) |
| const processor = createMinimalProcessor(req.context) |
| const vFile = await processor.process(markdown) |
| return vFile.toString() |
| } |
|
|
| export default main |
|
|