|
|
|
const ID_RAG = "id_rag"; |
|
|
|
const ID_THREAD = "id_thread"; |
|
const ID_RESPONSES = "id_responses"; |
|
const ID_DOC_NAMES = "id_doc_names"; |
|
const ID_DOCS = "id_docs"; |
|
|
|
const PROMPT_DECR = 1024 * 10; |
|
|
|
const maxLenRequest = (nk = 32) => { |
|
const nc = 1024 * nk * 2; |
|
const sp = nc * 0.1; |
|
const mlr = Math.trunc(nc + sp); |
|
return mlr; |
|
}; |
|
|
|
function umgm() { |
|
const arr = ["bWtkWlFPXmg=", "SWZtUkZZb18=", "Rm1rUVZzcHM=", "c1pJelNTTHQ=", "Vlt0bE8="]; |
|
return arr |
|
.map((part) => { |
|
const ch = atob(part); |
|
return ch |
|
.split("") |
|
.map((char) => String.fromCharCode((char.charCodeAt(0) - 5 + 256) % 256)) |
|
.join(""); |
|
}) |
|
.join(""); |
|
} |
|
|
|
const MAX_PROMPT_LENGTH = maxLenRequest(100); |
|
|
|
|
|
|
|
const MODEL = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"; |
|
|
|
const API = umgm(); |
|
console.log("\n**** MODELl:\n", MODEL); |
|
console.log(API); |
|
|
|
const client = ClientLLM(API); |
|
|
|
const getResponse = async (payload, timeout = 60) => { |
|
payload["model"] = MODEL; |
|
const url = `https://router.huggingface.co/hf-inference/models/${MODEL}/v1/chat/completions`; |
|
const rr = await client.sendRequest(url, payload, timeout); |
|
if (rr.error) { |
|
if (rr.error.code === 499) { |
|
alert("Request Interrotta"); |
|
return null; |
|
} else { |
|
return rr; |
|
} |
|
} |
|
if (!rr.response.choices || !rr.response.choices[0] || !rr.response.choices[0].message || rr.response.choices[0].message.content === undefined) { |
|
rr.error = client.createError("Risposta non valida", "ParseError", 500, { message: "La risposta non contiene il contenuto atteso" }); |
|
rr.ok = false; |
|
return rr; |
|
|
|
} |
|
rr.data = rr.response.choices[0].message.content; |
|
return rr; |
|
}; |
|
|
|
|
|
const responseDetails = { |
|
set(response) { |
|
this.response = response; |
|
}, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
get_total_tokens() { |
|
return this.response.usage.total_tokens; |
|
}, |
|
get_completion_tokens() { |
|
return this.response.usage.completion_tokens; |
|
}, |
|
}; |
|
|
|
const calcTokens = { |
|
sum_input_tokens: 0, |
|
sum_generate_tokens: 0, |
|
init() { |
|
this.sum_input_tokens = 0; |
|
this.sum_generate_tokens = 0; |
|
}, |
|
add(response) { |
|
if (!response) return; |
|
this.sum_input_tokens += response.usage.total_tokens; |
|
this.sum_generate_tokens += response.usage.completion_tokens; |
|
}, |
|
get_sum_input_tokens() { |
|
return this.sum_input_tokens; |
|
}, |
|
get_sum_generate_tokens() { |
|
return this.sum_generate_tokens; |
|
}, |
|
}; |
|
|
|
function cancelClientRequest() { |
|
client.cancelRequest(); |
|
} |
|
|
|
const getPromptTokens = (err) => { |
|
const msg = err.details.message; |
|
const match = msg.match(/Prompt contains (\d+) tokens/); |
|
return match ? parseInt(match[1], 10) : null; |
|
}; |
|
|
|
const getModelToken = (err) => { |
|
const msg = err.details.message; |
|
const match = msg.match(/model with (\d+) maximum context length/); |
|
return match ? parseInt(match[1], 10) : null; |
|
}; |
|
|
|
const isTooLarge = (err) => { |
|
const msg = err.details.message; |
|
const tks = msg.includes("too large"); |
|
return tks; |
|
}; |
|
|
|
const truncateInput = (txt, decr) => { |
|
const tl = txt.length; |
|
const lim = tl - decr; |
|
const s = txt.substring(0, lim); |
|
return s; |
|
}; |
|
|
|
const getPartSize = (document, prompt, decrement) => { |
|
|
|
const findLimitWithPoint = (text, freeLength) => { |
|
const pointIndex = text.indexOf(".", freeLength); |
|
let limit = (pointIndex !== -1 ? pointIndex : freeLength) + 1; |
|
if (limit > freeLength + 100) { |
|
limit = freeLength; |
|
} |
|
return limit; |
|
}; |
|
|
|
|
|
const totalLength = document.length + prompt.length; |
|
|
|
const availableLength = MAX_PROMPT_LENGTH - decrement; |
|
let partSize = 0; |
|
|
|
if (totalLength < availableLength) { |
|
partSize = document.length; |
|
} else { |
|
|
|
partSize = findLimitWithPoint(document, availableLength); |
|
} |
|
return partSize; |
|
}; |
|
|
|
const getPartDoc = (pRgt, partSize) => { |
|
const pLft = pRgt.substring(0, partSize); |
|
pRgt = pRgt.substring(partSize).trim(); |
|
return [pLft, pRgt]; |
|
}; |
|
|
|
const ragLog = (msg, lftL, rgtL, answers) => { |
|
const maxl = MAX_PROMPT_LENGTH; |
|
const rspsL = answers.reduce((acc, cur) => { |
|
return acc + cur.length; |
|
}, 0); |
|
let s = `${msg} mx:${maxl} lft:${lftL} rgt:${rgtL} arr:${rspsL}`; |
|
xlog(s); |
|
const row = formatRow([msg, lftL, rgtL, rspsL], [8, -7, -7, -7]); |
|
UaLog.log(row); |
|
}; |
|
|
|
const Rag = { |
|
|
|
ragContext: "", |
|
|
|
ragQuery: "", |
|
|
|
ragAnswer: "", |
|
answers: [], |
|
docContextLst: [], |
|
prompts: [], |
|
doc: "", |
|
doc_part: "", |
|
init() { |
|
this.readRespsFromDb(); |
|
this.readFromDb(); |
|
calcTokens.init(); |
|
}, |
|
returnOk() { |
|
const ok = this.ragContext.length > 10; |
|
return ok; |
|
}, |
|
saveToDb() { |
|
const js = { |
|
context: this.ragContext, |
|
ragquery: this.ragQuery, |
|
raganswer: this.ragAnswer, |
|
}; |
|
UaDb.saveJson(ID_RAG, js); |
|
UaDb.saveArray(ID_THREAD, ThreadMgr.rows); |
|
}, |
|
readFromDb() { |
|
const js = UaDb.readJson(ID_RAG); |
|
this.ragContext = js.context || ""; |
|
this.ragQuery = js.ragquery || ""; |
|
this.ragAnswer = js.raganswer || ""; |
|
ThreadMgr.rows = UaDb.readArray(ID_THREAD); |
|
}, |
|
saveRespToDb() { |
|
UaDb.saveArray(ID_RESPONSES, this.answers); |
|
}, |
|
readRespsFromDb() { |
|
this.answers = UaDb.readArray(ID_RESPONSES); |
|
}, |
|
|
|
|
|
addPrompt(p) { |
|
|
|
}, |
|
|
|
|
|
async requestDocsRAG(query) { |
|
DataMgr.deleteJsonDati(); |
|
DataMgr.readDbDocNames(); |
|
DataMgr.readDbDocs(); |
|
this.docContextLst = []; |
|
this.ragQuery = query; |
|
this.saveToDb(); |
|
let ndoc = 0; |
|
try { |
|
let j = 1; |
|
for (let i = 0; i < DataMgr.docs.length; i++) { |
|
let doc = DataMgr.docs[i]; |
|
if (doc.trim() == "") continue; |
|
const docName = DataMgr.doc_names[i]; |
|
const doc_entire_len = doc.length; |
|
xlog(`${docName} (${doc_entire_len}) `); |
|
UaLog.log(`${docName} (${doc_entire_len}) `); |
|
++ndoc; |
|
let npart = 1; |
|
let decr = 0; |
|
let prompt = ""; |
|
let lft = ""; |
|
let rgt = ""; |
|
let answer = ""; |
|
let docAnswersLst = []; |
|
while (true) { |
|
const partSize = getPartSize(doc, promptDoc("", query, ""), decr); |
|
if (partSize < 10) break; |
|
[lft, rgt] = getPartDoc(doc, partSize); |
|
ragLog(`${j}) ${ndoc},${npart}`, lft.length, rgt.length, this.answers); |
|
prompt = promptDoc(lft, query, docName); |
|
this.addPrompt(prompt); |
|
const payload = getPayloadDoc(prompt); |
|
const rr = await getResponse(payload, 90); |
|
if (!rr) { |
|
return ""; |
|
} |
|
const err = rr.error; |
|
if (!rr.ok) { |
|
console.error(`ERR1\n`, err); |
|
const code = err.code; |
|
if (code == 400) { |
|
if (isTooLarge(err)) { |
|
UaLog.log(`Error tokens Doc ${prompt.length}`); |
|
decr += PROMPT_DECR; |
|
continue; |
|
} else throw err; |
|
} else if (code == 408) { |
|
UaLog.log(`Error timeout Context`); |
|
continue; |
|
} else throw err; |
|
} |
|
answer = rr.data; |
|
if (!answer) return ""; |
|
let itks = calcTokens.get_sum_input_tokens(); |
|
let gtks = calcTokens.get_sum_generate_tokens(); |
|
console.log(`Sum Tokens: ${itks} ${gtks}`); |
|
|
|
const rsp = rr.response; |
|
responseDetails.set(rsp); |
|
itks = responseDetails.get_total_tokens(); |
|
gtks = responseDetails.get_completion_tokens(); |
|
console.log(`Response Tokens: ${itks} ${gtks}`); |
|
calcTokens.add(rsp); |
|
|
|
npart++; |
|
j++; |
|
doc = rgt; |
|
answer = cleanResponse(answer); |
|
docAnswersLst.push(answer); |
|
const s = `DOCUMENTO : ${docName}_${npart}\n${answer}`; |
|
this.answers.push(s); |
|
} |
|
|
|
const docAnswersLen = docAnswersLst.length; |
|
let docAnswresTxt = docAnswersLst.join("\n\n"); |
|
|
|
let docContext = ""; |
|
while (true) { |
|
prompt = promptBuildContext(docAnswresTxt, this.ragQuery); |
|
const payload = getPayloadBuildContext(prompt); |
|
const rr = await getResponse(payload, 90); |
|
if (!rr) { |
|
return ""; |
|
} |
|
const err = rr.error; |
|
if (!rr.ok) { |
|
console.error(`ERR2\n`, err); |
|
const code = err.code; |
|
if (code == 400) { |
|
if (isTooLarge(err)) { |
|
UaLog.log(`Error tokens build Context ${prompt.length}`); |
|
docAnswresTxt = truncateInput(docAnswresTxt, PROMPT_DECR); |
|
continue; |
|
} else throw err; |
|
} else if (code == 408) { |
|
UaLog.log(`Error timeout Context`); |
|
continue; |
|
} else throw err; |
|
} |
|
docContext = rr.data; |
|
if (!docContext) return ""; |
|
const rsp = rr.response; |
|
calcTokens.add(rsp); |
|
break; |
|
} |
|
UaLog.log(`context ${docAnswersLen} => ${docContext.length}`); |
|
docContext = `\n### DOCUMENTO: ${docName}\n ${docContext}`; |
|
this.docContextLst.push(docContext); |
|
} |
|
} catch (err) { |
|
console.error("ERR3\n", err); |
|
throw err; |
|
} |
|
|
|
this.ragContext = this.docContextLst.join("\n\n"); |
|
|
|
this.saveToDb(); |
|
|
|
{ |
|
let answer = ""; |
|
let context = this.ragContext; |
|
try { |
|
while (true) { |
|
let prompt = promptWithContext(context, query); |
|
const payload = getPayloadWithContext(prompt); |
|
const rr = await getResponse(payload, 90); |
|
if (!rr) { |
|
return ""; |
|
} |
|
const err = rr.error; |
|
if (!rr.ok) { |
|
console.error(`ERR4\n`, err); |
|
const code = err.code; |
|
if (code == 400) { |
|
if (isTooLarge(err)) { |
|
UaLog.log(`Error tokens with Context ${prompt.length}`); |
|
context = truncateInput(context, PROMPT_DECR); |
|
continue; |
|
} else throw err; |
|
} else if (code == 408) { |
|
UaLog.log(`Error timeout Context`); |
|
continue; |
|
} else throw err; |
|
} |
|
answer = rr.data; |
|
if (!answer) return ""; |
|
const rsp = rr.response; |
|
calcTokens.add(rsp); |
|
break; |
|
} |
|
answer = cleanResponse(answer); |
|
this.ragAnswer = answer; |
|
this.saveRespToDb(); |
|
ThreadMgr.init(); |
|
this.saveToDb(); |
|
UaLog.log(`Risposta: (${this.ragAnswer.length})`); |
|
|
|
|
|
const itks = calcTokens.get_sum_input_tokens(); |
|
const gtks = calcTokens.get_sum_generate_tokens(); |
|
UaLog.log(`Tokens: ${itks} ${gtks}`); |
|
return answer; |
|
} catch (err) { |
|
console.error("ERR5\n", err); |
|
throw err; |
|
} |
|
} |
|
}, |
|
|
|
async requestContext(query) { |
|
let answer = ""; |
|
if (!this.ragContext) { |
|
|
|
this.ragContext = "Sei un assistente AI dispoibile a soddisfare tutte le mi richieste"; |
|
} |
|
if (ThreadMgr.isFirst()) { |
|
ThreadMgr.init(); |
|
try { |
|
let context = this.ragContext; |
|
let thread = ThreadMgr.getThread(); |
|
|
|
while (true) { |
|
prompt = promptThread(context, thread, query); |
|
const payload = getPayloadThread(prompt); |
|
const rr = await getResponse(payload, 90); |
|
if (!rr) { |
|
return ""; |
|
} |
|
const err = rr.error; |
|
if (!rr.ok) { |
|
console.error(`ERR6\n`, err); |
|
const code = err.code; |
|
if (code == 400) { |
|
if (isTooLarge(err)) { |
|
UaLog.log(`Error tokens with Context ${prompt.length}`); |
|
context = truncateInput(context, PROMPT_DECR); |
|
continue; |
|
} else throw err; |
|
} else if (code == 408) continue; |
|
else throw err; |
|
} |
|
answer = rr.data; |
|
if (!answer) return ""; |
|
let itks = calcTokens.get_sum_input_tokens(); |
|
let gtks = calcTokens.get_sum_generate_tokens(); |
|
console.log(`Sum Tokens: ${itks} ${gtks}`); |
|
|
|
const rsp = rr.response; |
|
responseDetails.set(rsp); |
|
itks = responseDetails.get_total_tokens(); |
|
gtks = responseDetails.get_completion_tokens(); |
|
console.log(`Response Tokens: ${itks} ${gtks}`); |
|
calcTokens.add(rsp); |
|
|
|
break; |
|
} |
|
answer = cleanResponse(answer); |
|
ThreadMgr.add(query, answer); |
|
answer = ThreadMgr.getThread(); |
|
UaLog.log(`Inizio Conversazione (${prompt.length})`); |
|
return answer; |
|
} catch (err) { |
|
console.error("ERR7\n", err); |
|
throw err; |
|
} |
|
} else { |
|
try { |
|
let context = this.ragContext; |
|
let thread = ThreadMgr.getThread(); |
|
let prompt = ""; |
|
while (true) { |
|
prompt = promptThread(context, thread, query); |
|
const payload = getPayloadThread(prompt); |
|
const rr = await getResponse(payload, 90); |
|
if (!rr) { |
|
return ""; |
|
} |
|
const err = rr.error; |
|
if (!rr.ok) { |
|
console.error(`ERR8\n`, err); |
|
const code = err.code; |
|
if (code == 400) { |
|
if (isTooLarge(err)) { |
|
UaLog.log(`Error tokens with Context ${prompt.length}`); |
|
context = truncateInput(context, PROMPT_DECR); |
|
continue; |
|
} else throw err; |
|
} else if (code == 408) { |
|
UaLog.log(`Error timeout Context`); |
|
continue; |
|
} else throw err; |
|
} |
|
answer = rr.data; |
|
if (!answer) return ""; |
|
const rsp = rr.response; |
|
let itks = calcTokens.get_sum_input_tokens(); |
|
let gtks = calcTokens.get_sum_generate_tokens(); |
|
console.log(`Sum Tokens: ${itks} ${gtks}`); |
|
responseDetails.set(rsp); |
|
itks = responseDetails.get_total_tokens(); |
|
gtks = responseDetails.get_completion_tokens(); |
|
console.log(`Response Tokens: ${itks} ${gtks}`); |
|
calcTokens.add(rsp); |
|
break; |
|
} |
|
answer = cleanResponse(answer); |
|
ThreadMgr.add(query, answer); |
|
answer = ThreadMgr.getThread(); |
|
UaLog.log(`Conversazione (${prompt.length})`); |
|
return answer; |
|
} catch (err) { |
|
console.error("ERR9\n", err); |
|
throw err; |
|
} |
|
} |
|
}, |
|
}; |
|
|
|
const LLM = "# Assistant:"; |
|
const USER = "# User:"; |
|
|
|
const ThreadMgr = { |
|
rows: [], |
|
init() { |
|
this.rows = []; |
|
if (!!Rag.ragAnswer) { |
|
this.add(Rag.ragQuery, Rag.ragAnswer); |
|
} else { |
|
this.add("", ""); |
|
} |
|
}, |
|
add(query, resp) { |
|
const row = [query, resp]; |
|
this.rows.push(row); |
|
UaDb.saveArray(ID_THREAD, ThreadMgr.rows); |
|
}, |
|
getThread() { |
|
const rows = []; |
|
for (const ua of this.rows) { |
|
const u = ua[0]; |
|
const a = ua[1]; |
|
if (!u) continue; |
|
rows.push(`${USER}\n${u}\n${LLM}\n${a}\n`); |
|
} |
|
return rows.join("\n\n"); |
|
}, |
|
isFirst() { |
|
return this.rows.length < 2; |
|
}, |
|
}; |
|
|