Spaces:
Sleeping
Sleeping
Zeggai Abdellah
commited on
Commit
·
9317650
1
Parent(s):
1817834
add answer language to the response
Browse files- rag_pipeline.py +31 -3
- requirements.txt +4 -1
rag_pipeline.py
CHANGED
|
@@ -9,6 +9,7 @@ import re
|
|
| 9 |
from llama_index.core import PromptTemplate
|
| 10 |
from llama_index.core.agent import ReActAgent
|
| 11 |
from llama_index.llms.google_genai import GoogleGenAI
|
|
|
|
| 12 |
import os
|
| 13 |
|
| 14 |
|
|
@@ -275,6 +276,31 @@ def process_question(agent, question: str) -> str:
|
|
| 275 |
print(f"Error processing question: {e}")
|
| 276 |
return f"Error processing your question: {str(e)}"
|
| 277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
def process_question_with_sequential_citations(agent, question: str, chunks_directory="./data/") -> dict:
|
| 279 |
"""
|
| 280 |
Process a question through the RAG pipeline and return response with sequential citation numbers.
|
|
@@ -340,12 +366,13 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
|
|
| 340 |
|
| 341 |
# Convert to JSON
|
| 342 |
cited_elements_json = json.dumps(cited_elements_ordered, ensure_ascii=False, indent=2)
|
| 343 |
-
|
| 344 |
return {
|
| 345 |
"response": sequential_response,
|
| 346 |
"cited_elements_json": cited_elements_json,
|
| 347 |
"unique_ids": unique_ids,
|
| 348 |
-
"citation_mapping": source_id_to_number
|
|
|
|
| 349 |
}
|
| 350 |
|
| 351 |
except Exception as e:
|
|
@@ -354,7 +381,8 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
|
|
| 354 |
"response": response_text if 'response_text' in locals() else "Error occurred",
|
| 355 |
"cited_elements_json": "[]",
|
| 356 |
"unique_ids": [],
|
| 357 |
-
"citation_mapping": {}
|
|
|
|
| 358 |
}
|
| 359 |
|
| 360 |
def process_question_with_citations(agent, question: str, chunks_directory="./data/") -> dict:
|
|
|
|
| 9 |
from llama_index.core import PromptTemplate
|
| 10 |
from llama_index.core.agent import ReActAgent
|
| 11 |
from llama_index.llms.google_genai import GoogleGenAI
|
| 12 |
+
from langdetect import detect
|
| 13 |
import os
|
| 14 |
|
| 15 |
|
|
|
|
| 276 |
print(f"Error processing question: {e}")
|
| 277 |
return f"Error processing your question: {str(e)}"
|
| 278 |
|
| 279 |
+
def aswer_language_detection(response_text: str) -> str:
|
| 280 |
+
"""
|
| 281 |
+
Detect the language of the response text.
|
| 282 |
+
|
| 283 |
+
Args:
|
| 284 |
+
response_text (str): The response text to analyze.
|
| 285 |
+
|
| 286 |
+
Returns:
|
| 287 |
+
str: Detected language code (e.g., 'en', 'fr', etc.)
|
| 288 |
+
"""
|
| 289 |
+
|
| 290 |
+
try:
|
| 291 |
+
# Detect the language of the first 5 words of the response
|
| 292 |
+
first_line = " ".join(response_text.split()[:5])
|
| 293 |
+
first_line = re.sub(r'\[.*?\]', '', first_line) # Remove citations
|
| 294 |
+
answer_language = detect(first_line)
|
| 295 |
+
if answer_language not in ['en', 'ar', 'fr']:
|
| 296 |
+
answer_language ='en'
|
| 297 |
+
except:
|
| 298 |
+
answer_language ='en'
|
| 299 |
+
|
| 300 |
+
finally:
|
| 301 |
+
return answer_language
|
| 302 |
+
|
| 303 |
+
|
| 304 |
def process_question_with_sequential_citations(agent, question: str, chunks_directory="./data/") -> dict:
|
| 305 |
"""
|
| 306 |
Process a question through the RAG pipeline and return response with sequential citation numbers.
|
|
|
|
| 366 |
|
| 367 |
# Convert to JSON
|
| 368 |
cited_elements_json = json.dumps(cited_elements_ordered, ensure_ascii=False, indent=2)
|
| 369 |
+
aswer_language= aswer_language_detection(response_text)
|
| 370 |
return {
|
| 371 |
"response": sequential_response,
|
| 372 |
"cited_elements_json": cited_elements_json,
|
| 373 |
"unique_ids": unique_ids,
|
| 374 |
+
"citation_mapping": source_id_to_number,
|
| 375 |
+
"answer_language":aswer_language
|
| 376 |
}
|
| 377 |
|
| 378 |
except Exception as e:
|
|
|
|
| 381 |
"response": response_text if 'response_text' in locals() else "Error occurred",
|
| 382 |
"cited_elements_json": "[]",
|
| 383 |
"unique_ids": [],
|
| 384 |
+
"citation_mapping": {},
|
| 385 |
+
"answer_language": "en" # Default to English if not specified
|
| 386 |
}
|
| 387 |
|
| 388 |
def process_question_with_citations(agent, question: str, chunks_directory="./data/") -> dict:
|
requirements.txt
CHANGED
|
@@ -18,4 +18,7 @@ chromadb
|
|
| 18 |
rank-bm25
|
| 19 |
|
| 20 |
# ML and embeddings
|
| 21 |
-
sentence-transformers
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
rank-bm25
|
| 19 |
|
| 20 |
# ML and embeddings
|
| 21 |
+
sentence-transformers
|
| 22 |
+
|
| 23 |
+
# detect language
|
| 24 |
+
langdetect
|