Zeggai Abdellah commited on
Commit
9317650
·
1 Parent(s): 1817834

add answer language to the response

Browse files
Files changed (2) hide show
  1. rag_pipeline.py +31 -3
  2. requirements.txt +4 -1
rag_pipeline.py CHANGED
@@ -9,6 +9,7 @@ import re
9
  from llama_index.core import PromptTemplate
10
  from llama_index.core.agent import ReActAgent
11
  from llama_index.llms.google_genai import GoogleGenAI
 
12
  import os
13
 
14
 
@@ -275,6 +276,31 @@ def process_question(agent, question: str) -> str:
275
  print(f"Error processing question: {e}")
276
  return f"Error processing your question: {str(e)}"
277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  def process_question_with_sequential_citations(agent, question: str, chunks_directory="./data/") -> dict:
279
  """
280
  Process a question through the RAG pipeline and return response with sequential citation numbers.
@@ -340,12 +366,13 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
340
 
341
  # Convert to JSON
342
  cited_elements_json = json.dumps(cited_elements_ordered, ensure_ascii=False, indent=2)
343
-
344
  return {
345
  "response": sequential_response,
346
  "cited_elements_json": cited_elements_json,
347
  "unique_ids": unique_ids,
348
- "citation_mapping": source_id_to_number
 
349
  }
350
 
351
  except Exception as e:
@@ -354,7 +381,8 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
354
  "response": response_text if 'response_text' in locals() else "Error occurred",
355
  "cited_elements_json": "[]",
356
  "unique_ids": [],
357
- "citation_mapping": {}
 
358
  }
359
 
360
  def process_question_with_citations(agent, question: str, chunks_directory="./data/") -> dict:
 
9
  from llama_index.core import PromptTemplate
10
  from llama_index.core.agent import ReActAgent
11
  from llama_index.llms.google_genai import GoogleGenAI
12
+ from langdetect import detect
13
  import os
14
 
15
 
 
276
  print(f"Error processing question: {e}")
277
  return f"Error processing your question: {str(e)}"
278
 
279
+ def aswer_language_detection(response_text: str) -> str:
280
+ """
281
+ Detect the language of the response text.
282
+
283
+ Args:
284
+ response_text (str): The response text to analyze.
285
+
286
+ Returns:
287
+ str: Detected language code (e.g., 'en', 'fr', etc.)
288
+ """
289
+
290
+ try:
291
+ # Detect the language of the first 5 words of the response
292
+ first_line = " ".join(response_text.split()[:5])
293
+ first_line = re.sub(r'\[.*?\]', '', first_line) # Remove citations
294
+ answer_language = detect(first_line)
295
+ if answer_language not in ['en', 'ar', 'fr']:
296
+ answer_language ='en'
297
+ except:
298
+ answer_language ='en'
299
+
300
+ finally:
301
+ return answer_language
302
+
303
+
304
  def process_question_with_sequential_citations(agent, question: str, chunks_directory="./data/") -> dict:
305
  """
306
  Process a question through the RAG pipeline and return response with sequential citation numbers.
 
366
 
367
  # Convert to JSON
368
  cited_elements_json = json.dumps(cited_elements_ordered, ensure_ascii=False, indent=2)
369
+ aswer_language= aswer_language_detection(response_text)
370
  return {
371
  "response": sequential_response,
372
  "cited_elements_json": cited_elements_json,
373
  "unique_ids": unique_ids,
374
+ "citation_mapping": source_id_to_number,
375
+ "answer_language":aswer_language
376
  }
377
 
378
  except Exception as e:
 
381
  "response": response_text if 'response_text' in locals() else "Error occurred",
382
  "cited_elements_json": "[]",
383
  "unique_ids": [],
384
+ "citation_mapping": {},
385
+ "answer_language": "en" # Default to English if not specified
386
  }
387
 
388
  def process_question_with_citations(agent, question: str, chunks_directory="./data/") -> dict:
requirements.txt CHANGED
@@ -18,4 +18,7 @@ chromadb
18
  rank-bm25
19
 
20
  # ML and embeddings
21
- sentence-transformers
 
 
 
 
18
  rank-bm25
19
 
20
  # ML and embeddings
21
+ sentence-transformers
22
+
23
+ # detect language
24
+ langdetect