ragrqs / static /javascript /rag_mgr.js
gmaterni's picture
Upload folder using huggingface_hub
cd92039 verified
/** @format */
const ID_RAG = "id_rag";
const ID_THREAD = "id_thread";
const ID_RESPONSES = "id_responses";
const ID_DOC_NAMES = "id_doc_names";
const ID_DOCS = "id_docs";
const PROMPT_DECR = 1024 * 10;
const maxLenRequest = (nk = 32) => {
const nc = 1024 * nk * 2;
const sp = nc * 0.1;
const mlr = Math.trunc(nc + sp);
return mlr;
};
function umgm() {
const arr = ["bWtkWlFPXmg=", "SWZtUkZZb18=", "Rm1rUVZzcHM=", "c1pJelNTTHQ=", "Vlt0bE8="];
return arr
.map((part) => {
const ch = atob(part);
return ch
.split("")
.map((char) => String.fromCharCode((char.charCodeAt(0) - 5 + 256) % 256))
.join("");
})
.join("");
}
const MAX_PROMPT_LENGTH = maxLenRequest(100);
//HF
// const MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1";
// const MODEL = "mistralai/Mistral-Small-24B-Instruct-2501";
const MODEL = "mistralai/Mistral-Small-3.1-24B-Instruct-2503";
// /////////////
const API = umgm();
console.log("\n**** MODELl:\n", MODEL);
console.log(API);
const client = ClientLLM(API);
const getResponse = async (payload, timeout = 60) => {
payload["model"] = MODEL;
const url = `https://router.huggingface.co/hf-inference/models/${MODEL}/v1/chat/completions`;
const rr = await client.sendRequest(url, payload, timeout);
if (rr.error) {
if (rr.error.code === 499) {
alert("Request Interrotta");
return null;
} else {
return rr;
}
}
if (!rr.response.choices || !rr.response.choices[0] || !rr.response.choices[0].message || rr.response.choices[0].message.content === undefined) {
rr.error = client.createError("Risposta non valida", "ParseError", 500, { message: "La risposta non contiene il contenuto atteso" });
rr.ok = false;
return rr;
// return RequestResult(false, null, err);
}
rr.data = rr.response.choices[0].message.content;
return rr;
};
////////////////////////
const responseDetails = {
set(response) {
this.response = response;
},
// get_id() {
// return this.response.id;
// },
// get_created() {
// return this.response.created;
// },
// get_model() {
// return this.response.model;
// },
// get_index() {
// return this.response.choices[0].index;
// },
// get_role() {
// return this.response.choices[0].message.role;
// },
// get_tool_calls() {
// return this.response.choices[0].message.tool_calls;
// },
// get_content() {
// return this.response.choices[0].message.content;
// },
// get_finish_reason() {
// return this.response.choices[0].finish_reason;
// },
// get_prompt_tokens() {
// return this.response.usage.prompt_tokens;
// },
get_total_tokens() {
return this.response.usage.total_tokens;
},
get_completion_tokens() {
return this.response.usage.completion_tokens;
},
};
const calcTokens = {
sum_input_tokens: 0,
sum_generate_tokens: 0,
init() {
this.sum_input_tokens = 0;
this.sum_generate_tokens = 0;
},
add(response) {
if (!response) return;
this.sum_input_tokens += response.usage.total_tokens;
this.sum_generate_tokens += response.usage.completion_tokens;
},
get_sum_input_tokens() {
return this.sum_input_tokens;
},
get_sum_generate_tokens() {
return this.sum_generate_tokens;
},
};
function cancelClientRequest() {
client.cancelRequest();
}
const getPromptTokens = (err) => {
const msg = err.details.message;
const match = msg.match(/Prompt contains (\d+) tokens/);
return match ? parseInt(match[1], 10) : null;
};
const getModelToken = (err) => {
const msg = err.details.message;
const match = msg.match(/model with (\d+) maximum context length/);
return match ? parseInt(match[1], 10) : null;
};
const isTooLarge = (err) => {
const msg = err.details.message;
const tks = msg.includes("too large");
return tks;
};
const truncateInput = (txt, decr) => {
const tl = txt.length;
const lim = tl - decr;
const s = txt.substring(0, lim);
return s;
};
const getPartSize = (document, prompt, decrement) => {
// Funzione interna per trovare un punto nel documento a partire da una certa posizione
const findLimitWithPoint = (text, freeLength) => {
const pointIndex = text.indexOf(".", freeLength);
let limit = (pointIndex !== -1 ? pointIndex : freeLength) + 1;
if (limit > freeLength + 100) {
limit = freeLength;
}
return limit;
};
// Calcola la lunghezza totale del documento e del prompt
const totalLength = document.length + prompt.length;
// Calcola la lunghezza massima disponibile dopo il decremento
const availableLength = MAX_PROMPT_LENGTH - decrement;
let partSize = 0;
// Se la lunghezza totale è minore della lunghezza disponibile, usa la lunghezza del documento
if (totalLength < availableLength) {
partSize = document.length;
} else {
// Altrimenti, trova un punto nel documento per limitare la lunghezza
partSize = findLimitWithPoint(document, availableLength);
}
return partSize;
};
const getPartDoc = (pRgt, partSize) => {
const pLft = pRgt.substring(0, partSize);
pRgt = pRgt.substring(partSize).trim();
return [pLft, pRgt];
};
const ragLog = (msg, lftL, rgtL, answers) => {
const maxl = MAX_PROMPT_LENGTH;
const rspsL = answers.reduce((acc, cur) => {
return acc + cur.length;
}, 0);
let s = `${msg} mx:${maxl} lft:${lftL} rgt:${rgtL} arr:${rspsL}`;
xlog(s);
const row = formatRow([msg, lftL, rgtL, rspsL], [8, -7, -7, -7]);
UaLog.log(row);
};
const Rag = {
// costituito dalla risposte accumulate sistemate
ragContext: "",
//query usata per creare la lista delle rispste
ragQuery: "",
// risposta finale alla qury contetso
ragAnswer: "",
answers: [],
docContextLst: [],
prompts: [],
doc: "",
doc_part: "",
init() {
this.readRespsFromDb();
this.readFromDb();
calcTokens.init();
},
returnOk() {
const ok = this.ragContext.length > 10;
return ok;
},
saveToDb() {
const js = {
context: this.ragContext,
ragquery: this.ragQuery,
raganswer: this.ragAnswer,
};
UaDb.saveJson(ID_RAG, js);
UaDb.saveArray(ID_THREAD, ThreadMgr.rows);
},
readFromDb() {
const js = UaDb.readJson(ID_RAG);
this.ragContext = js.context || "";
this.ragQuery = js.ragquery || "";
this.ragAnswer = js.raganswer || "";
ThreadMgr.rows = UaDb.readArray(ID_THREAD);
},
saveRespToDb() {
UaDb.saveArray(ID_RESPONSES, this.answers);
},
readRespsFromDb() {
this.answers = UaDb.readArray(ID_RESPONSES);
},
// AAA visualizzazione prompts
addPrompt(p) {
// this.prompts.push(p);
},
// documenti => risposte RAG => context
async requestDocsRAG(query) {
DataMgr.deleteJsonDati();
DataMgr.readDbDocNames();
DataMgr.readDbDocs();
this.docContextLst = [];
this.ragQuery = query;
this.saveToDb();
let ndoc = 0;
try {
let j = 1;
for (let i = 0; i < DataMgr.docs.length; i++) {
let doc = DataMgr.docs[i];
if (doc.trim() == "") continue;
const docName = DataMgr.doc_names[i];
const doc_entire_len = doc.length;
xlog(`${docName} (${doc_entire_len}) `);
UaLog.log(`${docName} (${doc_entire_len}) `);
++ndoc;
let npart = 1;
let decr = 0;
let prompt = "";
let lft = "";
let rgt = "";
let answer = "";
let docAnswersLst = [];
while (true) {
const partSize = getPartSize(doc, promptDoc("", query, ""), decr);
if (partSize < 10) break;
[lft, rgt] = getPartDoc(doc, partSize);
ragLog(`${j}) ${ndoc},${npart}`, lft.length, rgt.length, this.answers);
prompt = promptDoc(lft, query, docName);
this.addPrompt(prompt);
const payload = getPayloadDoc(prompt);
const rr = await getResponse(payload, 90);
if (!rr) {
return "";
}
const err = rr.error;
if (!rr.ok) {
console.error(`ERR1\n`, err);
const code = err.code;
if (code == 400) {
if (isTooLarge(err)) {
UaLog.log(`Error tokens Doc ${prompt.length}`);
decr += PROMPT_DECR;
continue;
} else throw err;
} else if (code == 408) {
UaLog.log(`Error timeout Context`);
continue;
} else throw err;
}
answer = rr.data;
if (!answer) return "";
let itks = calcTokens.get_sum_input_tokens();
let gtks = calcTokens.get_sum_generate_tokens();
console.log(`Sum Tokens: ${itks} ${gtks}`);
const rsp = rr.response;
responseDetails.set(rsp);
itks = responseDetails.get_total_tokens();
gtks = responseDetails.get_completion_tokens();
console.log(`Response Tokens: ${itks} ${gtks}`);
calcTokens.add(rsp);
npart++;
j++;
doc = rgt;
answer = cleanResponse(answer);
docAnswersLst.push(answer);
const s = `DOCUMENTO : ${docName}_${npart}\n${answer}`;
this.answers.push(s);
} // end while
// doc answer list => cContext
const docAnswersLen = docAnswersLst.length;
let docAnswresTxt = docAnswersLst.join("\n\n");
let docContext = "";
while (true) {
prompt = promptBuildContext(docAnswresTxt, this.ragQuery);
const payload = getPayloadBuildContext(prompt);
const rr = await getResponse(payload, 90);
if (!rr) {
return "";
}
const err = rr.error;
if (!rr.ok) {
console.error(`ERR2\n`, err);
const code = err.code;
if (code == 400) {
if (isTooLarge(err)) {
UaLog.log(`Error tokens build Context ${prompt.length}`);
docAnswresTxt = truncateInput(docAnswresTxt, PROMPT_DECR);
continue;
} else throw err;
} else if (code == 408) {
UaLog.log(`Error timeout Context`);
continue;
} else throw err;
}
docContext = rr.data;
if (!docContext) return "";
const rsp = rr.response;
calcTokens.add(rsp);
break;
} //end while
UaLog.log(`context ${docAnswersLen} => ${docContext.length}`);
docContext = `\n### DOCUMENTO: ${docName}\n ${docContext}`;
this.docContextLst.push(docContext);
} // end for document
} catch (err) {
console.error("ERR3\n", err);
throw err;
}
// console.log("tagContext_0\n", this.ragContext);
this.ragContext = this.docContextLst.join("\n\n");
// console.log("tagContext_1\n", this.ragContext);
this.saveToDb();
// queryWithContext finale che utilizza context e genera la prima risposta
{
let answer = "";
let context = this.ragContext;
try {
while (true) {
let prompt = promptWithContext(context, query);
const payload = getPayloadWithContext(prompt);
const rr = await getResponse(payload, 90);
if (!rr) {
return "";
}
const err = rr.error;
if (!rr.ok) {
console.error(`ERR4\n`, err);
const code = err.code;
if (code == 400) {
if (isTooLarge(err)) {
UaLog.log(`Error tokens with Context ${prompt.length}`);
context = truncateInput(context, PROMPT_DECR);
continue;
} else throw err;
} else if (code == 408) {
UaLog.log(`Error timeout Context`);
continue;
} else throw err;
}
answer = rr.data;
if (!answer) return "";
const rsp = rr.response;
calcTokens.add(rsp);
break;
}
answer = cleanResponse(answer);
this.ragAnswer = answer;
this.saveRespToDb();
ThreadMgr.init();
this.saveToDb();
UaLog.log(`Risposta: (${this.ragAnswer.length})`);
//log del totale tokens
const itks = calcTokens.get_sum_input_tokens();
const gtks = calcTokens.get_sum_generate_tokens();
UaLog.log(`Tokens: ${itks} ${gtks}`);
return answer;
} catch (err) {
console.error("ERR5\n", err);
throw err;
}
} // end query
},
//richiesta iniziale della conversazione
async requestContext(query) {
let answer = "";
if (!this.ragContext) {
// gestisce il pulsante verde che ha accettao il contetso vuoto
this.ragContext = "Sei un assistente AI dispoibile a soddisfare tutte le mi richieste";
}
if (ThreadMgr.isFirst()) {
ThreadMgr.init();
try {
let context = this.ragContext;
let thread = ThreadMgr.getThread();
while (true) {
prompt = promptThread(context, thread, query);
const payload = getPayloadThread(prompt);
const rr = await getResponse(payload, 90);
if (!rr) {
return "";
}
const err = rr.error;
if (!rr.ok) {
console.error(`ERR6\n`, err);
const code = err.code;
if (code == 400) {
if (isTooLarge(err)) {
UaLog.log(`Error tokens with Context ${prompt.length}`);
context = truncateInput(context, PROMPT_DECR);
continue;
} else throw err;
} else if (code == 408) continue;
else throw err;
}
answer = rr.data;
if (!answer) return "";
let itks = calcTokens.get_sum_input_tokens();
let gtks = calcTokens.get_sum_generate_tokens();
console.log(`Sum Tokens: ${itks} ${gtks}`);
const rsp = rr.response;
responseDetails.set(rsp);
itks = responseDetails.get_total_tokens();
gtks = responseDetails.get_completion_tokens();
console.log(`Response Tokens: ${itks} ${gtks}`);
calcTokens.add(rsp);
break;
}
answer = cleanResponse(answer);
ThreadMgr.add(query, answer);
answer = ThreadMgr.getThread();
UaLog.log(`Inizio Conversazione (${prompt.length})`);
return answer;
} catch (err) {
console.error("ERR7\n", err);
throw err;
}
} else {
try {
let context = this.ragContext;
let thread = ThreadMgr.getThread();
let prompt = "";
while (true) {
prompt = promptThread(context, thread, query);
const payload = getPayloadThread(prompt);
const rr = await getResponse(payload, 90);
if (!rr) {
return "";
}
const err = rr.error;
if (!rr.ok) {
console.error(`ERR8\n`, err);
const code = err.code;
if (code == 400) {
if (isTooLarge(err)) {
UaLog.log(`Error tokens with Context ${prompt.length}`);
context = truncateInput(context, PROMPT_DECR);
continue;
} else throw err;
} else if (code == 408) {
UaLog.log(`Error timeout Context`);
continue;
} else throw err;
}
answer = rr.data;
if (!answer) return "";
const rsp = rr.response;
let itks = calcTokens.get_sum_input_tokens();
let gtks = calcTokens.get_sum_generate_tokens();
console.log(`Sum Tokens: ${itks} ${gtks}`);
responseDetails.set(rsp);
itks = responseDetails.get_total_tokens();
gtks = responseDetails.get_completion_tokens();
console.log(`Response Tokens: ${itks} ${gtks}`);
calcTokens.add(rsp);
break;
}
answer = cleanResponse(answer);
ThreadMgr.add(query, answer);
answer = ThreadMgr.getThread();
UaLog.log(`Conversazione (${prompt.length})`);
return answer;
} catch (err) {
console.error("ERR9\n", err);
throw err;
}
}
},
};
const LLM = "# Assistant:";
const USER = "# User:";
const ThreadMgr = {
rows: [],
init() {
this.rows = [];
if (!!Rag.ragAnswer) {
this.add(Rag.ragQuery, Rag.ragAnswer);
} else {
this.add("", "");
}
},
add(query, resp) {
const row = [query, resp];
this.rows.push(row);
UaDb.saveArray(ID_THREAD, ThreadMgr.rows);
},
getThread() {
const rows = [];
for (const ua of this.rows) {
const u = ua[0];
const a = ua[1];
if (!u) continue;
rows.push(`${USER}\n${u}\n${LLM}\n${a}\n`);
}
return rows.join("\n\n");
},
isFirst() {
return this.rows.length < 2;
},
};