Ritesh-hf commited on
Commit
af912a1
1 Parent(s): 5621a3a

input direction bug solved

Browse files
Files changed (2) hide show
  1. app.py +21 -19
  2. templates/chat.html +114 -8
app.py CHANGED
@@ -12,6 +12,7 @@ from langchain.retrievers import ContextualCompressionRetriever
12
  from langchain.retrievers import EnsembleRetriever
13
  from langchain_community.vectorstores import FAISS
14
  from langchain_groq import ChatGroq
 
15
  from langchain import hub
16
  import pickle
17
  import os
@@ -25,6 +26,7 @@ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
25
  SECRET_KEY = os.getenv("SECRET_KEY")
26
  SESSION_ID_DEFAULT = "abc123"
27
 
 
28
  # Set environment variables
29
  os.environ['USER_AGENT'] = USER_AGENT
30
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
@@ -41,7 +43,7 @@ app.config['SECRET_KEY'] = SECRET_KEY
41
 
42
  embed_model = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-multilingual-base", model_kwargs={"trust_remote_code":True})
43
  llm = ChatGroq(
44
- model="llama-3.1-8b-instant",
45
  temperature=0.0,
46
  max_tokens=1024,
47
  max_retries=2
@@ -54,9 +56,9 @@ combined_vectorstore = excel_vectorstore
54
 
55
  with open('combined_recursive_keyword_retriever.pkl', 'rb') as f:
56
  combined_keyword_retriever = pickle.load(f)
57
- combined_keyword_retriever.k = 100
58
 
59
- semantic_retriever = combined_vectorstore.as_retriever(search_type="mmr", search_kwargs={'k': 100})
60
 
61
 
62
  # initialize the ensemble retriever
@@ -65,32 +67,31 @@ ensemble_retriever = EnsembleRetriever(
65
  )
66
 
67
 
68
- embeddings_filter = EmbeddingsFilter(embeddings=embed_model, similarity_threshold=0.5)
69
  compression_retriever = ContextualCompressionRetriever(
70
- base_compressor=embeddings_filter, base_retriever=ensemble_retriever
71
  )
72
 
73
  template = """
74
- User: You are an Arabic AI Assistant that follows instructions extremely well.
75
- Please be truthful and give direct answers. Please tell 'I don't know' if user query is not in CONTEXT.
76
- Generate the response in Arabic. Use bullet/number lists wherever necessary. If the response includes any English words or numbers,
77
- format them so that they are displayed from left to right within the Arabic text.
78
- Use the appropriate Unicode control characters to achieve this. For example,
79
- place the Left-to-Right embedding character (U+202A) before the English word or number,
80
- and the Pop Directional Formatting character (U+202C) afterward.
81
 
82
- Example:
83
- Input: "What year is it?"
84
- Output: "ما هو العام؟ \u202A2023\u202C"
85
 
86
- Give detail but concise answers explaining all the important parts.
87
- Keep in mind, you will lose the job, if you answer out of CONTEXT questions
88
 
 
 
 
89
  CONTEXT: {context}
90
  Query: {question}
91
 
92
- Remember only return AI answer
93
- Assistant:
94
  """
95
 
96
  prompt = ChatPromptTemplate.from_template(template)
@@ -125,6 +126,7 @@ def handle_disconnect():
125
  @socketio.on('message')
126
  def handle_message(data):
127
  question = data.get('question')
 
128
  try:
129
  for chunk in rag_chain.stream(question):
130
  emit('response', chunk, room=request.sid)
 
12
  from langchain.retrievers import EnsembleRetriever
13
  from langchain_community.vectorstores import FAISS
14
  from langchain_groq import ChatGroq
15
+ from langchain_community.document_compressors import JinaRerank
16
  from langchain import hub
17
  import pickle
18
  import os
 
26
  SECRET_KEY = os.getenv("SECRET_KEY")
27
  SESSION_ID_DEFAULT = "abc123"
28
 
29
+
30
  # Set environment variables
31
  os.environ['USER_AGENT'] = USER_AGENT
32
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
 
43
 
44
  embed_model = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-multilingual-base", model_kwargs={"trust_remote_code":True})
45
  llm = ChatGroq(
46
+ model="llama-3.1-70b-versatile",
47
  temperature=0.0,
48
  max_tokens=1024,
49
  max_retries=2
 
56
 
57
  with open('combined_recursive_keyword_retriever.pkl', 'rb') as f:
58
  combined_keyword_retriever = pickle.load(f)
59
+ combined_keyword_retriever.k = 1000
60
 
61
+ semantic_retriever = combined_vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 100})
62
 
63
 
64
  # initialize the ensemble retriever
 
67
  )
68
 
69
 
70
+ embeddings_filter = EmbeddingsFilter(embeddings=embed_model, similarity_threshold=0.4)
71
  compression_retriever = ContextualCompressionRetriever(
72
+ base_compressor=embeddings_filter, base_retriever=semantic_retriever
73
  )
74
 
75
  template = """
76
+ User Instructions:
77
+
78
+ You are an Arabic AI Assistant focused on providing clear, concise but detailed responses.
79
+ Always answer truthfully. If the user query is not relevant to the provided CONTEXT, respond stating the reason.
80
+ Generate responses in Arabic. Format any English words and numbers appropriately for clarity.
 
 
81
 
82
+ The context may contain English word or numbers.
83
+ Use Unicode direction codes to specify the text direction for English words/numbers in response.
84
+ Use \u202A (Left-to-Right) before and \u202C (Pop Direction Formatting) after any English words or numbers for clarity.
85
 
86
+ Round off numbers with decimal integers to two decimals.
 
87
 
88
+ Use bullet points or numbered lists where applicable for better organization.
89
+ Provide detailed yet concise answers, covering all important aspects.
90
+ Remember, responding outside the CONTEXT may lead to termination of the interaction.
91
  CONTEXT: {context}
92
  Query: {question}
93
 
94
+ After generating your response, ensure proper formatting and text direction of Arabic and English words/numbers. Return only the AI-generated answer.
 
95
  """
96
 
97
  prompt = ChatPromptTemplate.from_template(template)
 
126
  @socketio.on('message')
127
  def handle_message(data):
128
  question = data.get('question')
129
+ print(question)
130
  try:
131
  for chunk in rag_chain.stream(question):
132
  emit('response', chunk, room=request.sid)
templates/chat.html CHANGED
@@ -270,6 +270,17 @@
270
 
271
  </style>
272
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
  </head>
275
  <body>
@@ -376,9 +387,10 @@
376
  </div>
377
 
378
  </div>
 
379
  <div class="w-full bg-transparent">
380
  <div class="input-options-container max-w-3xl p-3 flex flex-row items-center justify-between border rounded-full shadow-xl mx-auto">
381
- <div id="question-box" dir="rtl" class="inline-block px-4 text-2xl" role="textbox" contenteditable="true" data-placeholder="أكتب سؤالك هنا..." style="width: 100%; max-height: 100px; outline: none; font-weight: 500; font-size: 14px; direction: RTL;"></div>
382
  <button class="sendbtn " style="width: 40px; height: 40px; padding: 8px; aspect-ratio: 1; display: flex; align-items: center; justify-content: center; border-radius: 30px; border: none; color: white;">
383
  <svg width="30px" height="30px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"> <path d="M12 6V18M12 6L7 11M12 6L17 11" stroke="#ffffff" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"></path> </g></svg>
384
  </button>
@@ -387,6 +399,9 @@
387
  </div>
388
  </div>
389
 
 
 
 
390
  <!-- SocketIO -->
391
  <script src="https://cdn.socket.io/4.5.0/socket.io.min.js"></script>
392
  <!-- Showdown.js -->
@@ -395,7 +410,7 @@
395
  <script>
396
 
397
  // Initialize Socket.IO client
398
- var socket = io.connect('https://ritesh-hf-adafsa-flask-app-demo.hf.space', {
399
  transports: ['websocket']
400
  });
401
 
@@ -418,10 +433,10 @@
418
  const isRTL = true;
419
  $(".chat-container").append(
420
  `
421
- <div class="chat-block load-chat-block w-full flex flex-row items-center 'justify-start bg-secondary p-3 rounded-xl z-10" style="background-color: rgba(242, 255, 225, 0.678);">
422
- <div style="width: fit-content; margin-bottom: 5px; display: flex; align-items: center; gap: 15px;">
423
- <span class="p-2 border-2" style="display: flex; align-items: center; justify-content: center; border-radius: 50px; background-color: white;">
424
- <svg fill="#000000" width="24px" height="24px" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"><path d="M21.928 11.607c-.202-.488-.635-.605-.928-.633V8c0-1.103-.897-2-2-2h-6V4.61c.305-.274.5-.668.5-1.11a1.5 1.5 0 0 0-3 0c0 .442.195.836.5 1.11V6H5c-1.103 0-2 .897-2 2v2.997l-.082.006A1 1 0 0 0 1.99 12v2a1 1 0 0 0 1 1H3v5c0 1.103.897 2 2 2h14c1.103 0 2-.897 2-2v-5a1 1 0 0 0 1-1v-1.938a1.006 1.006 0 0 0-.072-.455zM5 20V8h14l.001 3.996L19 12v2l.001.005.001 5.995H5z"></path><ellipse cx="8.5" cy="12" rx="1.5" ry="2"></ellipse><ellipse cx="15.5" cy="12" rx="1.5" ry="2"></ellipse><path d="M8 16h8v2H8z"></path></g></svg>
425
  </span>
426
  </div>
427
  <div class="px-3 py-1 order-1">
@@ -496,7 +511,7 @@
496
  </span>
497
  </div>
498
  <div class="px-3 py-1">
499
- <div class="message-content pr-2" style="width: 100%; height: 100%; margin: auto; font-weight: 500; text-wrap: pretty; text-align: right;" style="-webkit-locale: "ar";" >
500
  ${answer}
501
  </div>
502
  </div>
@@ -567,7 +582,7 @@
567
  console.warn("Disconnected from server:", reason);
568
  response = "";
569
  // appendAnswer("You have been disconnected from the server. Please refresh the page to reconnect.");
570
- socket = io.connect('https://ritesh-hf-adafsa-flask-app-demo.hf.space', {
571
  transports: ['websocket']
572
  });
573
  });
@@ -588,5 +603,96 @@
588
  });
589
  </script>
590
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
  </body>
592
  </html>
 
270
 
271
  </style>
272
 
273
+ <style>
274
+ #question-box span[dir="rtl"] {
275
+ direction: rtl;
276
+ unicode-bidi: embed;
277
+ }
278
+ #question-box span[dir="ltr"] {
279
+ direction: ltr;
280
+ unicode-bidi: embed;
281
+ }
282
+ </style>
283
+
284
 
285
  </head>
286
  <body>
 
387
  </div>
388
 
389
  </div>
390
+
391
  <div class="w-full bg-transparent">
392
  <div class="input-options-container max-w-3xl p-3 flex flex-row items-center justify-between border rounded-full shadow-xl mx-auto">
393
+ <div id="question-box" dir="auto" class="inline-block px-4 text-2xl" contenteditable="true" data-placeholder="أكتب سؤالك هنا..." style="width: 100%; max-height: 100px; outline: none; font-weight: 500; font-size: 14px;"></div>
394
  <button class="sendbtn " style="width: 40px; height: 40px; padding: 8px; aspect-ratio: 1; display: flex; align-items: center; justify-content: center; border-radius: 30px; border: none; color: white;">
395
  <svg width="30px" height="30px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"> <path d="M12 6V18M12 6L7 11M12 6L17 11" stroke="#ffffff" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"></path> </g></svg>
396
  </button>
 
399
  </div>
400
  </div>
401
 
402
+
403
+
404
+
405
  <!-- SocketIO -->
406
  <script src="https://cdn.socket.io/4.5.0/socket.io.min.js"></script>
407
  <!-- Showdown.js -->
 
410
  <script>
411
 
412
  // Initialize Socket.IO client
413
+ var socket = io.connect('http://127.0.0.1:5000/', {
414
  transports: ['websocket']
415
  });
416
 
 
433
  const isRTL = true;
434
  $(".chat-container").append(
435
  `
436
+ <div class="chat-block load-chat-block w-full flex flex-row-reverse items-center justify-end bg-secondary p-3 rounded-xl z-10" style="background-color: rgba(242, 255, 225, 0.678);">
437
+ <div class="flex items-center order-2" >
438
+ <span class="bg-white" style="width: 36px; height: 36px; display: flex; align-items: center; justify-content: center; border-radius: 50px; padding: 5px; border: 2px solid #c2c2c2;">
439
+ <svg fill="#000000" width="36px" height="36px" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"><path d="M21.928 11.607c-.202-.488-.635-.605-.928-.633V8c0-1.103-.897-2-2-2h-6V4.61c.305-.274.5-.668.5-1.11a1.5 1.5 0 0 0-3 0c0 .442.195.836.5 1.11V6H5c-1.103 0-2 .897-2 2v2.997l-.082.006A1 1 0 0 0 1.99 12v2a1 1 0 0 0 1 1H3v5c0 1.103.897 2 2 2h14c1.103 0 2-.897 2-2v-5a1 1 0 0 0 1-1v-1.938a1.006 1.006 0 0 0-.072-.455zM5 20V8h14l.001 3.996L19 12v2l.001.005.001 5.995H5z"></path><ellipse cx="8.5" cy="12" rx="1.5" ry="2"></ellipse><ellipse cx="15.5" cy="12" rx="1.5" ry="2"></ellipse><path d="M8 16h8v2H8z"></path></g></svg>
440
  </span>
441
  </div>
442
  <div class="px-3 py-1 order-1">
 
511
  </span>
512
  </div>
513
  <div class="px-3 py-1">
514
+ <div class="message-content pr-2" style="width: 100%; height: 100%; margin: auto; font-weight: 500; text-wrap: pretty;" style="-webkit-locale: "ar";" >
515
  ${answer}
516
  </div>
517
  </div>
 
582
  console.warn("Disconnected from server:", reason);
583
  response = "";
584
  // appendAnswer("You have been disconnected from the server. Please refresh the page to reconnect.");
585
+ socket = io.connect('http://127.0.0.1:5000/', {
586
  transports: ['websocket']
587
  });
588
  });
 
603
  });
604
  </script>
605
 
606
+ <script>
607
+ const inputDiv = document.getElementById('question-box');
608
+
609
+ inputDiv.addEventListener('input', function(event) {
610
+ // Save the caret position
611
+ const caretPosition = saveCaretPosition(this);
612
+
613
+ // Process the content
614
+ processBiDiContent(this);
615
+
616
+ // Restore the caret position
617
+ restoreCaretPosition(this, caretPosition);
618
+ });
619
+
620
+ function processBiDiContent(element) {
621
+ let content = element.innerText;
622
+
623
+ // Regex for English letters and numbers
624
+ const englishRegex = /[A-Za-z0-9]+/g;
625
+
626
+ // Escape HTML to prevent injection
627
+ content = escapeHTML(content);
628
+
629
+ // Wrap Arabic text in <span dir="rtl">
630
+ // content = content.replace(arabicRegex, function(match) {
631
+ // return `<span dir="rtl">${match}</span>`;
632
+ // });
633
+
634
+ // Wrap English text and numbers in <span dir="ltr">
635
+ content = content.replace(englishRegex, function(match) {
636
+ return `<span dir="ltr">${match}</span>`;
637
+ });
638
+
639
+ // Update the element's HTML
640
+ element.innerHTML = content;
641
+ }
642
+
643
+ function escapeHTML(str) {
644
+ return str.replace(/[&<>"']/g, function(m) {
645
+ return {
646
+ '&': '&amp;',
647
+ '<': '&lt;',
648
+ '>': '&gt;',
649
+ '"': '&quot;',
650
+ "'": '&#39;'
651
+ }[m];
652
+ });
653
+ }
654
+
655
+ function saveCaretPosition(context) {
656
+ const selection = window.getSelection();
657
+ const range = selection.getRangeAt(0);
658
+ const preCaretRange = range.cloneRange();
659
+ preCaretRange.selectNodeContents(context);
660
+ preCaretRange.setEnd(range.endContainer, range.endOffset);
661
+ const caretOffset = preCaretRange.toString().length;
662
+ return caretOffset;
663
+ }
664
+
665
+ function restoreCaretPosition(context, offset) {
666
+ let charIndex = 0;
667
+ const range = document.createRange();
668
+ range.setStart(context, 0);
669
+ range.collapse(true);
670
+ const nodeStack = [context];
671
+ let node, foundStart = false;
672
+
673
+ while (!foundStart && (node = nodeStack.pop())) {
674
+ if (node.nodeType === Node.TEXT_NODE) {
675
+ const nextCharIndex = charIndex + node.length;
676
+ if (offset >= charIndex && offset <= nextCharIndex) {
677
+ range.setStart(node, offset - charIndex);
678
+ range.collapse(true);
679
+ foundStart = true;
680
+ }
681
+ charIndex = nextCharIndex;
682
+ } else {
683
+ let i = node.childNodes.length;
684
+ while (i--) {
685
+ nodeStack.push(node.childNodes[i]);
686
+ }
687
+ }
688
+ }
689
+
690
+ const sel = window.getSelection();
691
+ sel.removeAllRanges();
692
+ sel.addRange(range);
693
+ }
694
+
695
+ </script>
696
+
697
  </body>
698
  </html>