| import Parser from '@jocmp/mercury-parser'; |
| import type { CheerioAPI } from 'cheerio'; |
| import { load } from 'cheerio'; |
| import type { Element } from 'domhandler'; |
| import * as entities from 'entities'; |
| import type { MiddlewareHandler } from 'hono'; |
| import { convert } from 'html-to-text'; |
| import markdownit from 'markdown-it'; |
| import { RE2JS } from 're2js'; |
| import sanitizeHtml from 'sanitize-html'; |
| import { simplecc } from 'simplecc-wasm'; |
|
|
| import { config } from '@/config'; |
| import type { Data, DataItem } from '@/types'; |
| import cache from '@/utils/cache'; |
| import ofetch from '@/utils/ofetch'; |
|
|
| const md = markdownit({ |
| html: true, |
| }); |
|
|
| const resolveRelativeLink = ($: CheerioAPI, elem: Element, attr: string, baseUrl?: string) => { |
| const $elem = $(elem); |
|
|
| if (baseUrl) { |
| try { |
| const oldAttr = $elem.attr(attr); |
| if (oldAttr) { |
| |
| $elem.attr(attr, new URL(oldAttr, baseUrl).href); |
| } |
| } catch { |
| |
| } |
| } |
| }; |
|
|
| const getAiCompletion = async (prompt: string, text: string) => { |
| const apiUrl = `${config.openai.endpoint}/chat/completions`; |
| const response = await ofetch(apiUrl, { |
| method: 'POST', |
| body: { |
| model: config.openai.model, |
| max_tokens: config.openai.maxTokens, |
| messages: [ |
| { role: 'system', content: prompt }, |
| { role: 'user', content: text }, |
| ], |
| temperature: config.openai.temperature, |
| }, |
| headers: { |
| Authorization: `Bearer ${config.openai.apiKey}`, |
| }, |
| }); |
|
|
| return response.choices[0].message.content; |
| }; |
|
|
| const getAuthorString = (item) => { |
| let author = ''; |
| if (item.author) { |
| author = typeof item.author === 'string' ? item.author : item.author.map((i) => i.name).join(' '); |
| } |
| return author; |
| }; |
|
|
| const middleware: MiddlewareHandler = async (ctx, next) => { |
| await next(); |
|
|
| const data = ctx.get('data') as Data; |
| if (data) { |
| if ((!data.item || data.item.length === 0) && !data.allowEmpty) { |
| throw new Error('this route is empty, please check the original site or <a href="https://github.com/DIYgod/RSSHub/issues/new/choose">create an issue</a>'); |
| } |
|
|
| |
| data.item = data.item || []; |
|
|
| |
| data.title && (data.title = entities.decodeXML(data.title + '')); |
| data.description && (data.description = entities.decodeXML(data.description + '')); |
|
|
| |
| if (ctx.req.query('sorted') !== 'false') { |
| data.item = data.item.toSorted((a: DataItem, b: DataItem) => +new Date(b.pubDate || 0) - +new Date(a.pubDate || 0)); |
| } |
|
|
| const handleItem = (item: DataItem) => { |
| item.title && (item.title = entities.decodeXML(item.title + '')); |
|
|
| |
| if (item.pubDate) { |
| item.pubDate = new Date(item.pubDate).toUTCString(); |
| } |
|
|
| |
| if (item.link) { |
| let baseUrl = data.link; |
| if (baseUrl && !/^https?:\/\//.test(baseUrl)) { |
| baseUrl = /^\/\//.test(baseUrl) ? 'http:' + baseUrl : 'http://' + baseUrl; |
| } |
|
|
| item.link = new URL(item.link, baseUrl).href; |
| } |
|
|
| |
| if (item.description) { |
| const $ = load(item.description); |
| let baseUrl = item.link || data.link; |
|
|
| if (baseUrl && !/^https?:\/\//.test(baseUrl)) { |
| baseUrl = /^\/\//.test(baseUrl) ? 'http:' + baseUrl : 'http://' + baseUrl; |
| } |
|
|
| $('script').remove(); |
|
|
| $('img').each((_, ele) => { |
| const $ele = $(ele); |
|
|
| |
| if (!$ele.attr('src')) { |
| const lazySrc = $ele.attr('data-src') || $ele.attr('data-original'); |
| if (lazySrc) { |
| $ele.attr('src', lazySrc); |
| } else { |
| for (const key in ele.attribs) { |
| const value = ele.attribs[key].trim(); |
| if (['.gif', '.png', '.jpg', '.webp'].some((suffix) => value.includes(suffix))) { |
| $ele.attr('src', value); |
| break; |
| } |
| } |
| } |
| } |
|
|
| |
| for (const e of ['onclick', 'onerror', 'onload']) { |
| $ele.removeAttr(e); |
| } |
| }); |
|
|
| |
| |
| |
| $('a, area').each((_, elem) => { |
| resolveRelativeLink($, elem, 'href', baseUrl); |
| |
| }); |
| |
| $('img, video, audio, source, iframe, embed, track').each((_, elem) => { |
| resolveRelativeLink($, elem, 'src', baseUrl); |
| }); |
| $('video[poster]').each((_, elem) => { |
| resolveRelativeLink($, elem, 'poster', baseUrl); |
| }); |
| $('img, iframe').each((_, elem) => { |
| if (!$(elem).attr('referrerpolicy')) { |
| $(elem).attr('referrerpolicy', 'no-referrer'); |
| } |
| }); |
|
|
| item.description = $('body').html() + '' + (config.suffix || ''); |
|
|
| if (item._extra?.links && $('.rsshub-quote').length) { |
| item._extra?.links?.map((e) => { |
| e.content_html = $.html($('.rsshub-quote')); |
| return e; |
| }); |
| } |
| } |
|
|
| |
| if (item.category) { |
| |
| Array.isArray(item.category) || (item.category = [item.category]); |
| item.category = item.category.filter((e) => typeof e === 'string'); |
| } |
| return item; |
| }; |
|
|
| data.item = await Promise.all(data.item.map((itm) => handleItem(itm))); |
|
|
| |
| const engine = config.feature.filter_regex_engine; |
| const makeRegex = (str: string) => { |
| |
| const insensitive = ctx.req.query('filter_case_sensitive') === 'false'; |
| switch (engine) { |
| case 'regexp': |
| return new RegExp(str, insensitive ? 'i' : ''); |
| case 're2': |
| return RE2JS.compile(str, insensitive ? RE2JS.CASE_INSENSITIVE : 0); |
| default: |
| throw new Error(`Invalid Engine Value: ${engine}, please check your config.`); |
| } |
| }; |
|
|
| if (ctx.req.query('filter')) { |
| const regex = makeRegex(ctx.req.query('filter')!); |
|
|
| data.item = data.item.filter((item) => { |
| const title = item.title || ''; |
| const description = item.description || title; |
| const author = getAuthorString(item); |
| const category = item.category || []; |
| const isFilter = |
| regex instanceof RE2JS |
| ? regex.matcher(title).find() || regex.matcher(description).find() || regex.matcher(author).find() || category.some((c) => regex.matcher(c).find()) |
| : title.match(regex) || description.match(regex) || author.match(regex) || category.some((c) => c.match(regex)); |
|
|
| return isFilter; |
| }); |
| } |
|
|
| |
| if (!ctx.req.query('filter') && (ctx.req.query('filter_title') || ctx.req.query('filter_description') || ctx.req.query('filter_author') || ctx.req.query('filter_category'))) { |
| data.item = data.item.filter((item) => { |
| const title = item.title || ''; |
| const description = item.description || title; |
| const author = getAuthorString(item); |
| const category = item.category || []; |
| let isFilter = true; |
|
|
| if (ctx.req.query('filter_title')) { |
| const titleRegex = makeRegex(ctx.req.query('filter_title')!); |
| isFilter = titleRegex instanceof RE2JS ? titleRegex.matcher(title).find() : !!titleRegex.test(title); |
| } |
| if (ctx.req.query('filter_description')) { |
| const descriptionRegex = makeRegex(ctx.req.query('filter_description')!); |
| isFilter = isFilter && (descriptionRegex instanceof RE2JS ? descriptionRegex.matcher(description).find() : !!descriptionRegex.test(description)); |
| } |
| if (ctx.req.query('filter_author')) { |
| const authorRegex = makeRegex(ctx.req.query('filter_author')!); |
| isFilter = isFilter && (authorRegex instanceof RE2JS ? authorRegex.matcher(author).find() : !!authorRegex.test(author)); |
| } |
| if (ctx.req.query('filter_category')) { |
| const categoryRegex = makeRegex(ctx.req.query('filter_category')!); |
| isFilter = isFilter && category.some((c) => (categoryRegex instanceof RE2JS ? categoryRegex.matcher(c).find() : c.match(categoryRegex))); |
| } |
|
|
| return isFilter; |
| }); |
| } |
|
|
| if (ctx.req.query('filterout') || ctx.req.query('filterout_title') || ctx.req.query('filterout_description') || ctx.req.query('filterout_author') || ctx.req.query('filterout_category')) { |
| data.item = data.item.filter((item) => { |
| const title = item.title; |
| const description = item.description || title; |
| const author = getAuthorString(item); |
| const category = item.category || []; |
| let isFilter = true; |
|
|
| if (ctx.req.query('filterout') || ctx.req.query('filterout_title')) { |
| const titleRegex = makeRegex(ctx.req.query('filterout_title') || ctx.req.query('filterout')!); |
| isFilter = titleRegex instanceof RE2JS ? !titleRegex.matcher(title).find() : !titleRegex.test(title); |
| } |
| if (ctx.req.query('filterout') || ctx.req.query('filterout_description')) { |
| const descriptionRegex = makeRegex(ctx.req.query('filterout_description') || ctx.req.query('filterout')!); |
| isFilter = isFilter && (descriptionRegex instanceof RE2JS ? !descriptionRegex.matcher(description).find() : !descriptionRegex.test(description)); |
| } |
| if (ctx.req.query('filterout_author')) { |
| const authorRegex = makeRegex(ctx.req.query('filterout_author')!); |
| isFilter = isFilter && (authorRegex instanceof RE2JS ? !authorRegex.matcher(author).find() : !authorRegex.test(author)); |
| } |
| if (ctx.req.query('filterout_category')) { |
| const categoryRegex = makeRegex(ctx.req.query('filterout_category')!); |
| isFilter = isFilter && !category.some((c) => (categoryRegex instanceof RE2JS ? categoryRegex.matcher(c).find() : c.match(categoryRegex))); |
| } |
|
|
| return isFilter; |
| }); |
| } |
|
|
| if (ctx.req.query('filter_time')) { |
| const now = Date.now(); |
| data.item = data.item.filter(({ pubDate }) => { |
| let isFilter = true; |
| try { |
| isFilter = !pubDate || now - new Date(pubDate).getTime() <= Number.parseInt(ctx.req.query('filter_time')!) * 1000; |
| } catch { |
| |
| } |
| return isFilter; |
| }); |
| } |
|
|
| |
| if (ctx.req.query('limit')) { |
| data.item = data.item.slice(0, Number.parseInt(ctx.req.query('limit')!)); |
| } |
|
|
| |
| if (ctx.req.query('tgiv')) { |
| data.item.map((item) => { |
| if (item.link) { |
| const encodedlink = encodeURIComponent(item.link); |
| item.link = `https://t.me/iv?url=${encodedlink}&rhash=${ctx.req.query('tgiv')}`; |
| return item; |
| } else { |
| return item; |
| } |
| }); |
| } |
|
|
| |
| if (ctx.req.query('mode')?.toLowerCase() === 'fulltext') { |
| const tasks = data.item.map(async (item) => { |
| const { link, author, description } = item; |
| const parsed_result: any = await cache.tryGet(`mercury-cache-${link}`, async () => { |
| if (link) { |
| |
| try { |
| const res = await ofetch(link); |
| const $ = load(res); |
| const result = await Parser.parse(link, { |
| html: $.html(), |
| }); |
| return result; |
| } catch { |
| |
| } |
| } |
| }); |
|
|
| item.author = author || parsed_result?.author; |
| item.description = parsed_result && parsed_result.content.length > 40 ? entities.decodeXML(parsed_result.content) : description; |
| }); |
| await Promise.all(tasks); |
| } |
|
|
| |
| if (ctx.req.query('chatgpt') && config.openai.apiKey) { |
| data.item = await Promise.all( |
| data.item.map(async (item) => { |
| try { |
| |
| if (config.openai.inputOption === 'description' && item.description) { |
| const description = await cache.tryGet(`openai:description:${item.link}`, async () => { |
| const description = convert(item.description!); |
| const descriptionMd = await getAiCompletion(config.openai.promptDescription, description); |
| return md.render(descriptionMd); |
| }); |
| |
| if (description !== '') { |
| item.description = description + '<hr/><br/>' + item.description; |
| } |
| } |
| |
| else if (config.openai.inputOption === 'title' && item.title) { |
| const title = await cache.tryGet(`openai:title:${item.link}`, async () => { |
| const title = convert(item.title!); |
| return await getAiCompletion(config.openai.promptTitle, title); |
| }); |
| |
| if (title !== '') { |
| item.title = title + ''; |
| } |
| } |
| |
| else if (config.openai.inputOption === 'both' && item.title && item.description) { |
| const title = await cache.tryGet(`openai:title:${item.link}`, async () => { |
| const title = convert(item.title!); |
| return await getAiCompletion(config.openai.promptTitle, title); |
| }); |
| |
| if (title !== '') { |
| item.title = title + ''; |
| } |
|
|
| const description = await cache.tryGet(`openai:description:${item.link}`, async () => { |
| const description = convert(item.description!); |
| const descriptionMd = await getAiCompletion(config.openai.promptDescription, description); |
| return md.render(descriptionMd); |
| }); |
| |
| if (description !== '') { |
| item.description = description + '<hr/><br/>' + item.description; |
| } |
| } |
| } catch { |
| |
| } |
| return item; |
| }) |
| ); |
| } |
|
|
| |
| if (ctx.req.query('scihub')) { |
| data.item.map((item) => { |
| item.link = item.doi ? `${config.scihub.host}${item.doi}` : `${config.scihub.host}${item.link}`; |
| return item; |
| }); |
| } |
|
|
| |
| if (ctx.req.query('opencc')) { |
| for (const item of data.item) { |
| item.title = simplecc(item.title ?? item.link, ctx.req.query('opencc')!); |
| item.description = simplecc(item.description ?? item.title ?? item.link, ctx.req.query('opencc')!); |
| } |
| } |
|
|
| |
| if (ctx.req.query('brief')) { |
| const num = /[1-9]\d{2,}/; |
| if (num.test(ctx.req.query('brief')!)) { |
| const brief: number = Number.parseInt(ctx.req.query('brief')!); |
| for (const item of data.item) { |
| let text; |
| if (item.description) { |
| text = sanitizeHtml(item.description, { allowedTags: [], allowedAttributes: {} }); |
| item.description = text.length > brief ? `<p>${text.slice(0, brief)}…</p>` : `<p>${text}</p>`; |
| } |
| } |
| } else { |
| throw new Error(`Invalid parameter brief. Please check the doc https://docs.rsshub.app/guide/parameters#shu-chu-jian-xun`); |
| } |
| } |
| |
|
|
| ctx.set('data', data); |
| } else { |
| |
| } |
| }; |
|
|
| export default middleware; |
|
|