Spaces:
Sleeping
feat: add sources for websearch (#1551)
Browse files* feat: playwright, spatial parsing, markdown for web search
Co-authored-by: Aaditya Sahay <aadityasahay1@gmail.com>
* feat: choose multiple clusters if necessary (#2)
* chore: resolve linting failures
* feat: improve paring performance and error messages
* feat: inline citations
* feat: adjust inline citation prompt, less intrusive tokens
* feat: add sources to message when using websearch
* fix: clean up packages
* fix: packages
* fix: packages lol
* fix: make websearch citation work better wiht tools
* fix: use single brackets for sources, only render source element if a matching source is available
* fix: bad import
---------
Co-authored-by: Liam Dyer <liamcdyer@gmail.com>
Co-authored-by: Aaditya Sahay <aadityasahay1@gmail.com>
Co-authored-by: Aaditya Sahay <56438732+Aaditya-Sahay@users.noreply.github.com>
|
@@ -13434,4 +13434,4 @@
|
|
| 13434 |
}
|
| 13435 |
}
|
| 13436 |
}
|
| 13437 |
-
}
|
|
|
|
| 13434 |
}
|
| 13435 |
}
|
| 13436 |
}
|
| 13437 |
+
}
|
|
@@ -37,6 +37,28 @@
|
|
| 37 |
import DOMPurify from "isomorphic-dompurify";
|
| 38 |
import { enhance } from "$app/forms";
|
| 39 |
import { browser } from "$app/environment";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
function sanitizeMd(md: string) {
|
| 42 |
let ret = md
|
|
@@ -114,7 +136,7 @@
|
|
| 114 |
})
|
| 115 |
);
|
| 116 |
|
| 117 |
-
$: tokens = marked.lexer(sanitizeMd(message.content
|
| 118 |
|
| 119 |
$: emptyLoad =
|
| 120 |
!message.content && (webSearchIsDone || (searchUpdates && searchUpdates.length === 0));
|
|
|
|
| 37 |
import DOMPurify from "isomorphic-dompurify";
|
| 38 |
import { enhance } from "$app/forms";
|
| 39 |
import { browser } from "$app/environment";
|
| 40 |
+
import type { WebSearchSource } from "$lib/types/WebSearch";
|
| 41 |
+
|
| 42 |
+
function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []): string {
|
| 43 |
+
const linkStyle =
|
| 44 |
+
"color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;";
|
| 45 |
+
|
| 46 |
+
return md.replace(/\[(\d+)\]/g, (match: string) => {
|
| 47 |
+
const indices: number[] = (match.match(/\d+/g) || []).map(Number);
|
| 48 |
+
const links: string = indices
|
| 49 |
+
.map((index: number) => {
|
| 50 |
+
const source = webSearchSources[index - 1];
|
| 51 |
+
if (source) {
|
| 52 |
+
return `<a href="${source.link}" target="_blank" rel="noreferrer" style="${linkStyle}">${index}</a>`;
|
| 53 |
+
}
|
| 54 |
+
return "";
|
| 55 |
+
})
|
| 56 |
+
.filter(Boolean)
|
| 57 |
+
.join(", ");
|
| 58 |
+
|
| 59 |
+
return links ? ` <sup>${links}</sup>` : match;
|
| 60 |
+
});
|
| 61 |
+
}
|
| 62 |
|
| 63 |
function sanitizeMd(md: string) {
|
| 64 |
let ret = md
|
|
|
|
| 136 |
})
|
| 137 |
);
|
| 138 |
|
| 139 |
+
$: tokens = marked.lexer(addInlineCitations(sanitizeMd(message.content), webSearchSources));
|
| 140 |
|
| 141 |
$: emptyLoad =
|
| 142 |
!message.content && (webSearchIsDone || (searchUpdates && searchUpdates.length === 0));
|
|
@@ -17,7 +17,7 @@ export async function preprocessMessages(
|
|
| 17 |
|
| 18 |
function addWebSearchContext(messages: Message[], webSearch: Message["webSearch"]) {
|
| 19 |
const webSearchContext = webSearch?.contextSources
|
| 20 |
-
.map(({ context }) => context.trim())
|
| 21 |
.join("\n\n----------\n\n");
|
| 22 |
|
| 23 |
// No web search context available, skip
|
|
@@ -35,7 +35,7 @@ function addWebSearchContext(messages: Message[], webSearch: Message["webSearch"
|
|
| 35 |
const finalMessage = {
|
| 36 |
...messages[messages.length - 1],
|
| 37 |
content: `I searched the web using the query: ${webSearch.searchQuery}.
|
| 38 |
-
Today is ${currentDate} and here are the results:
|
| 39 |
=====================
|
| 40 |
${webSearchContext}
|
| 41 |
=====================
|
|
|
|
| 17 |
|
| 18 |
function addWebSearchContext(messages: Message[], webSearch: Message["webSearch"]) {
|
| 19 |
const webSearchContext = webSearch?.contextSources
|
| 20 |
+
.map(({ context }, idx) => `Source [${idx + 1}]\n${context.trim()}`)
|
| 21 |
.join("\n\n----------\n\n");
|
| 22 |
|
| 23 |
// No web search context available, skip
|
|
|
|
| 35 |
const finalMessage = {
|
| 36 |
...messages[messages.length - 1],
|
| 37 |
content: `I searched the web using the query: ${webSearch.searchQuery}.
|
| 38 |
+
Today is ${currentDate} and here are the results. When answering the question, if you use a source, cite its index inline like this: [1], [2], etc.
|
| 39 |
=====================
|
| 40 |
${webSearchContext}
|
| 41 |
=====================
|
|
@@ -25,12 +25,21 @@ const websearch: ConfigTool = {
|
|
| 25 |
showOutput: false,
|
| 26 |
async *call({ query }, { conv, assistant, messages }) {
|
| 27 |
const webSearchToolResults = yield* runWebSearch(conv, messages, assistant?.rag, String(query));
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
.
|
|
|
|
| 31 |
|
| 32 |
return {
|
| 33 |
-
outputs: [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
display: false,
|
| 35 |
};
|
| 36 |
},
|
|
|
|
| 25 |
showOutput: false,
|
| 26 |
async *call({ query }, { conv, assistant, messages }) {
|
| 27 |
const webSearchToolResults = yield* runWebSearch(conv, messages, assistant?.rag, String(query));
|
| 28 |
+
|
| 29 |
+
const webSearchContext = webSearchToolResults?.contextSources
|
| 30 |
+
.map(({ context }, idx) => `Source [${idx + 1}]\n${context.trim()}`)
|
| 31 |
+
.join("\n\n----------\n\n");
|
| 32 |
|
| 33 |
return {
|
| 34 |
+
outputs: [
|
| 35 |
+
{
|
| 36 |
+
websearch: webSearchContext,
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
instructions:
|
| 40 |
+
"When answering the question, if you use sources from the websearch results above, cite each index inline individually wrapped like: [1], [2] etc.",
|
| 41 |
+
},
|
| 42 |
+
],
|
| 43 |
display: false,
|
| 44 |
};
|
| 45 |
},
|