Spaces:
Runtime error
Runtime error
input direction bug solved
Browse files- app.py +21 -19
- templates/chat.html +114 -8
app.py
CHANGED
@@ -12,6 +12,7 @@ from langchain.retrievers import ContextualCompressionRetriever
|
|
12 |
from langchain.retrievers import EnsembleRetriever
|
13 |
from langchain_community.vectorstores import FAISS
|
14 |
from langchain_groq import ChatGroq
|
|
|
15 |
from langchain import hub
|
16 |
import pickle
|
17 |
import os
|
@@ -25,6 +26,7 @@ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
|
25 |
SECRET_KEY = os.getenv("SECRET_KEY")
|
26 |
SESSION_ID_DEFAULT = "abc123"
|
27 |
|
|
|
28 |
# Set environment variables
|
29 |
os.environ['USER_AGENT'] = USER_AGENT
|
30 |
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
|
@@ -41,7 +43,7 @@ app.config['SECRET_KEY'] = SECRET_KEY
|
|
41 |
|
42 |
embed_model = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-multilingual-base", model_kwargs={"trust_remote_code":True})
|
43 |
llm = ChatGroq(
|
44 |
-
model="llama-3.1-
|
45 |
temperature=0.0,
|
46 |
max_tokens=1024,
|
47 |
max_retries=2
|
@@ -54,9 +56,9 @@ combined_vectorstore = excel_vectorstore
|
|
54 |
|
55 |
with open('combined_recursive_keyword_retriever.pkl', 'rb') as f:
|
56 |
combined_keyword_retriever = pickle.load(f)
|
57 |
-
combined_keyword_retriever.k =
|
58 |
|
59 |
-
semantic_retriever = combined_vectorstore.as_retriever(search_type="mmr", search_kwargs={
|
60 |
|
61 |
|
62 |
# initialize the ensemble retriever
|
@@ -65,32 +67,31 @@ ensemble_retriever = EnsembleRetriever(
|
|
65 |
)
|
66 |
|
67 |
|
68 |
-
embeddings_filter = EmbeddingsFilter(embeddings=embed_model, similarity_threshold=0.
|
69 |
compression_retriever = ContextualCompressionRetriever(
|
70 |
-
base_compressor=embeddings_filter, base_retriever=
|
71 |
)
|
72 |
|
73 |
template = """
|
74 |
-
User:
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
place the Left-to-Right embedding character (U+202A) before the English word or number,
|
80 |
-
and the Pop Directional Formatting character (U+202C) afterward.
|
81 |
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
|
86 |
-
|
87 |
-
Keep in mind, you will lose the job, if you answer out of CONTEXT questions
|
88 |
|
|
|
|
|
|
|
89 |
CONTEXT: {context}
|
90 |
Query: {question}
|
91 |
|
92 |
-
|
93 |
-
Assistant:
|
94 |
"""
|
95 |
|
96 |
prompt = ChatPromptTemplate.from_template(template)
|
@@ -125,6 +126,7 @@ def handle_disconnect():
|
|
125 |
@socketio.on('message')
|
126 |
def handle_message(data):
|
127 |
question = data.get('question')
|
|
|
128 |
try:
|
129 |
for chunk in rag_chain.stream(question):
|
130 |
emit('response', chunk, room=request.sid)
|
|
|
12 |
from langchain.retrievers import EnsembleRetriever
|
13 |
from langchain_community.vectorstores import FAISS
|
14 |
from langchain_groq import ChatGroq
|
15 |
+
from langchain_community.document_compressors import JinaRerank
|
16 |
from langchain import hub
|
17 |
import pickle
|
18 |
import os
|
|
|
26 |
SECRET_KEY = os.getenv("SECRET_KEY")
|
27 |
SESSION_ID_DEFAULT = "abc123"
|
28 |
|
29 |
+
|
30 |
# Set environment variables
|
31 |
os.environ['USER_AGENT'] = USER_AGENT
|
32 |
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
|
|
|
43 |
|
44 |
embed_model = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-multilingual-base", model_kwargs={"trust_remote_code":True})
|
45 |
llm = ChatGroq(
|
46 |
+
model="llama-3.1-70b-versatile",
|
47 |
temperature=0.0,
|
48 |
max_tokens=1024,
|
49 |
max_retries=2
|
|
|
56 |
|
57 |
with open('combined_recursive_keyword_retriever.pkl', 'rb') as f:
|
58 |
combined_keyword_retriever = pickle.load(f)
|
59 |
+
combined_keyword_retriever.k = 1000
|
60 |
|
61 |
+
semantic_retriever = combined_vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 100})
|
62 |
|
63 |
|
64 |
# initialize the ensemble retriever
|
|
|
67 |
)
|
68 |
|
69 |
|
70 |
+
embeddings_filter = EmbeddingsFilter(embeddings=embed_model, similarity_threshold=0.4)
|
71 |
compression_retriever = ContextualCompressionRetriever(
|
72 |
+
base_compressor=embeddings_filter, base_retriever=semantic_retriever
|
73 |
)
|
74 |
|
75 |
template = """
|
76 |
+
User Instructions:
|
77 |
+
|
78 |
+
You are an Arabic AI Assistant focused on providing clear, concise but detailed responses.
|
79 |
+
Always answer truthfully. If the user query is not relevant to the provided CONTEXT, respond stating the reason.
|
80 |
+
Generate responses in Arabic. Format any English words and numbers appropriately for clarity.
|
|
|
|
|
81 |
|
82 |
+
The context may contain English word or numbers.
|
83 |
+
Use Unicode direction codes to specify the text direction for English words/numbers in response.
|
84 |
+
Use \u202A (Left-to-Right) before and \u202C (Pop Direction Formatting) after any English words or numbers for clarity.
|
85 |
|
86 |
+
Round off numbers with decimal integers to two decimals.
|
|
|
87 |
|
88 |
+
Use bullet points or numbered lists where applicable for better organization.
|
89 |
+
Provide detailed yet concise answers, covering all important aspects.
|
90 |
+
Remember, responding outside the CONTEXT may lead to termination of the interaction.
|
91 |
CONTEXT: {context}
|
92 |
Query: {question}
|
93 |
|
94 |
+
After generating your response, ensure proper formatting and text direction of Arabic and English words/numbers. Return only the AI-generated answer.
|
|
|
95 |
"""
|
96 |
|
97 |
prompt = ChatPromptTemplate.from_template(template)
|
|
|
126 |
@socketio.on('message')
|
127 |
def handle_message(data):
|
128 |
question = data.get('question')
|
129 |
+
print(question)
|
130 |
try:
|
131 |
for chunk in rag_chain.stream(question):
|
132 |
emit('response', chunk, room=request.sid)
|
templates/chat.html
CHANGED
@@ -270,6 +270,17 @@
|
|
270 |
|
271 |
</style>
|
272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
|
274 |
</head>
|
275 |
<body>
|
@@ -376,9 +387,10 @@
|
|
376 |
</div>
|
377 |
|
378 |
</div>
|
|
|
379 |
<div class="w-full bg-transparent">
|
380 |
<div class="input-options-container max-w-3xl p-3 flex flex-row items-center justify-between border rounded-full shadow-xl mx-auto">
|
381 |
-
<div id="question-box" dir="
|
382 |
<button class="sendbtn " style="width: 40px; height: 40px; padding: 8px; aspect-ratio: 1; display: flex; align-items: center; justify-content: center; border-radius: 30px; border: none; color: white;">
|
383 |
<svg width="30px" height="30px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"> <path d="M12 6V18M12 6L7 11M12 6L17 11" stroke="#ffffff" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"></path> </g></svg>
|
384 |
</button>
|
@@ -387,6 +399,9 @@
|
|
387 |
</div>
|
388 |
</div>
|
389 |
|
|
|
|
|
|
|
390 |
<!-- SocketIO -->
|
391 |
<script src="https://cdn.socket.io/4.5.0/socket.io.min.js"></script>
|
392 |
<!-- Showdown.js -->
|
@@ -395,7 +410,7 @@
|
|
395 |
<script>
|
396 |
|
397 |
// Initialize Socket.IO client
|
398 |
-
var socket = io.connect('
|
399 |
transports: ['websocket']
|
400 |
});
|
401 |
|
@@ -418,10 +433,10 @@
|
|
418 |
const isRTL = true;
|
419 |
$(".chat-container").append(
|
420 |
`
|
421 |
-
<div class="chat-block load-chat-block w-full flex flex-row items-center
|
422 |
-
<div
|
423 |
-
<span class="
|
424 |
-
<svg fill="#000000" width="
|
425 |
</span>
|
426 |
</div>
|
427 |
<div class="px-3 py-1 order-1">
|
@@ -496,7 +511,7 @@
|
|
496 |
</span>
|
497 |
</div>
|
498 |
<div class="px-3 py-1">
|
499 |
-
<div class="message-content pr-2" style="width: 100%; height: 100%; margin: auto; font-weight: 500; text-wrap: pretty;
|
500 |
${answer}
|
501 |
</div>
|
502 |
</div>
|
@@ -567,7 +582,7 @@
|
|
567 |
console.warn("Disconnected from server:", reason);
|
568 |
response = "";
|
569 |
// appendAnswer("You have been disconnected from the server. Please refresh the page to reconnect.");
|
570 |
-
socket = io.connect('
|
571 |
transports: ['websocket']
|
572 |
});
|
573 |
});
|
@@ -588,5 +603,96 @@
|
|
588 |
});
|
589 |
</script>
|
590 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
591 |
</body>
|
592 |
</html>
|
|
|
270 |
|
271 |
</style>
|
272 |
|
273 |
+
<style>
|
274 |
+
#question-box span[dir="rtl"] {
|
275 |
+
direction: rtl;
|
276 |
+
unicode-bidi: embed;
|
277 |
+
}
|
278 |
+
#question-box span[dir="ltr"] {
|
279 |
+
direction: ltr;
|
280 |
+
unicode-bidi: embed;
|
281 |
+
}
|
282 |
+
</style>
|
283 |
+
|
284 |
|
285 |
</head>
|
286 |
<body>
|
|
|
387 |
</div>
|
388 |
|
389 |
</div>
|
390 |
+
|
391 |
<div class="w-full bg-transparent">
|
392 |
<div class="input-options-container max-w-3xl p-3 flex flex-row items-center justify-between border rounded-full shadow-xl mx-auto">
|
393 |
+
<div id="question-box" dir="auto" class="inline-block px-4 text-2xl" contenteditable="true" data-placeholder="أكتب سؤالك هنا..." style="width: 100%; max-height: 100px; outline: none; font-weight: 500; font-size: 14px;"></div>
|
394 |
<button class="sendbtn " style="width: 40px; height: 40px; padding: 8px; aspect-ratio: 1; display: flex; align-items: center; justify-content: center; border-radius: 30px; border: none; color: white;">
|
395 |
<svg width="30px" height="30px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"> <path d="M12 6V18M12 6L7 11M12 6L17 11" stroke="#ffffff" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"></path> </g></svg>
|
396 |
</button>
|
|
|
399 |
</div>
|
400 |
</div>
|
401 |
|
402 |
+
|
403 |
+
|
404 |
+
|
405 |
<!-- SocketIO -->
|
406 |
<script src="https://cdn.socket.io/4.5.0/socket.io.min.js"></script>
|
407 |
<!-- Showdown.js -->
|
|
|
410 |
<script>
|
411 |
|
412 |
// Initialize Socket.IO client
|
413 |
+
var socket = io.connect('http://127.0.0.1:5000/', {
|
414 |
transports: ['websocket']
|
415 |
});
|
416 |
|
|
|
433 |
const isRTL = true;
|
434 |
$(".chat-container").append(
|
435 |
`
|
436 |
+
<div class="chat-block load-chat-block w-full flex flex-row-reverse items-center justify-end bg-secondary p-3 rounded-xl z-10" style="background-color: rgba(242, 255, 225, 0.678);">
|
437 |
+
<div class="flex items-center order-2" >
|
438 |
+
<span class="bg-white" style="width: 36px; height: 36px; display: flex; align-items: center; justify-content: center; border-radius: 50px; padding: 5px; border: 2px solid #c2c2c2;">
|
439 |
+
<svg fill="#000000" width="36px" height="36px" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"><path d="M21.928 11.607c-.202-.488-.635-.605-.928-.633V8c0-1.103-.897-2-2-2h-6V4.61c.305-.274.5-.668.5-1.11a1.5 1.5 0 0 0-3 0c0 .442.195.836.5 1.11V6H5c-1.103 0-2 .897-2 2v2.997l-.082.006A1 1 0 0 0 1.99 12v2a1 1 0 0 0 1 1H3v5c0 1.103.897 2 2 2h14c1.103 0 2-.897 2-2v-5a1 1 0 0 0 1-1v-1.938a1.006 1.006 0 0 0-.072-.455zM5 20V8h14l.001 3.996L19 12v2l.001.005.001 5.995H5z"></path><ellipse cx="8.5" cy="12" rx="1.5" ry="2"></ellipse><ellipse cx="15.5" cy="12" rx="1.5" ry="2"></ellipse><path d="M8 16h8v2H8z"></path></g></svg>
|
440 |
</span>
|
441 |
</div>
|
442 |
<div class="px-3 py-1 order-1">
|
|
|
511 |
</span>
|
512 |
</div>
|
513 |
<div class="px-3 py-1">
|
514 |
+
<div class="message-content pr-2" style="width: 100%; height: 100%; margin: auto; font-weight: 500; text-wrap: pretty;" style="-webkit-locale: "ar";" >
|
515 |
${answer}
|
516 |
</div>
|
517 |
</div>
|
|
|
582 |
console.warn("Disconnected from server:", reason);
|
583 |
response = "";
|
584 |
// appendAnswer("You have been disconnected from the server. Please refresh the page to reconnect.");
|
585 |
+
socket = io.connect('http://127.0.0.1:5000/', {
|
586 |
transports: ['websocket']
|
587 |
});
|
588 |
});
|
|
|
603 |
});
|
604 |
</script>
|
605 |
|
606 |
+
<script>
|
607 |
+
const inputDiv = document.getElementById('question-box');
|
608 |
+
|
609 |
+
inputDiv.addEventListener('input', function(event) {
|
610 |
+
// Save the caret position
|
611 |
+
const caretPosition = saveCaretPosition(this);
|
612 |
+
|
613 |
+
// Process the content
|
614 |
+
processBiDiContent(this);
|
615 |
+
|
616 |
+
// Restore the caret position
|
617 |
+
restoreCaretPosition(this, caretPosition);
|
618 |
+
});
|
619 |
+
|
620 |
+
function processBiDiContent(element) {
|
621 |
+
let content = element.innerText;
|
622 |
+
|
623 |
+
// Regex for English letters and numbers
|
624 |
+
const englishRegex = /[A-Za-z0-9]+/g;
|
625 |
+
|
626 |
+
// Escape HTML to prevent injection
|
627 |
+
content = escapeHTML(content);
|
628 |
+
|
629 |
+
// Wrap Arabic text in <span dir="rtl">
|
630 |
+
// content = content.replace(arabicRegex, function(match) {
|
631 |
+
// return `<span dir="rtl">${match}</span>`;
|
632 |
+
// });
|
633 |
+
|
634 |
+
// Wrap English text and numbers in <span dir="ltr">
|
635 |
+
content = content.replace(englishRegex, function(match) {
|
636 |
+
return `<span dir="ltr">${match}</span>`;
|
637 |
+
});
|
638 |
+
|
639 |
+
// Update the element's HTML
|
640 |
+
element.innerHTML = content;
|
641 |
+
}
|
642 |
+
|
643 |
+
function escapeHTML(str) {
|
644 |
+
return str.replace(/[&<>"']/g, function(m) {
|
645 |
+
return {
|
646 |
+
'&': '&',
|
647 |
+
'<': '<',
|
648 |
+
'>': '>',
|
649 |
+
'"': '"',
|
650 |
+
"'": '''
|
651 |
+
}[m];
|
652 |
+
});
|
653 |
+
}
|
654 |
+
|
655 |
+
function saveCaretPosition(context) {
|
656 |
+
const selection = window.getSelection();
|
657 |
+
const range = selection.getRangeAt(0);
|
658 |
+
const preCaretRange = range.cloneRange();
|
659 |
+
preCaretRange.selectNodeContents(context);
|
660 |
+
preCaretRange.setEnd(range.endContainer, range.endOffset);
|
661 |
+
const caretOffset = preCaretRange.toString().length;
|
662 |
+
return caretOffset;
|
663 |
+
}
|
664 |
+
|
665 |
+
function restoreCaretPosition(context, offset) {
|
666 |
+
let charIndex = 0;
|
667 |
+
const range = document.createRange();
|
668 |
+
range.setStart(context, 0);
|
669 |
+
range.collapse(true);
|
670 |
+
const nodeStack = [context];
|
671 |
+
let node, foundStart = false;
|
672 |
+
|
673 |
+
while (!foundStart && (node = nodeStack.pop())) {
|
674 |
+
if (node.nodeType === Node.TEXT_NODE) {
|
675 |
+
const nextCharIndex = charIndex + node.length;
|
676 |
+
if (offset >= charIndex && offset <= nextCharIndex) {
|
677 |
+
range.setStart(node, offset - charIndex);
|
678 |
+
range.collapse(true);
|
679 |
+
foundStart = true;
|
680 |
+
}
|
681 |
+
charIndex = nextCharIndex;
|
682 |
+
} else {
|
683 |
+
let i = node.childNodes.length;
|
684 |
+
while (i--) {
|
685 |
+
nodeStack.push(node.childNodes[i]);
|
686 |
+
}
|
687 |
+
}
|
688 |
+
}
|
689 |
+
|
690 |
+
const sel = window.getSelection();
|
691 |
+
sel.removeAllRanges();
|
692 |
+
sel.addRange(range);
|
693 |
+
}
|
694 |
+
|
695 |
+
</script>
|
696 |
+
|
697 |
</body>
|
698 |
</html>
|