AgenticRAG_test

Sleeping

App Files Files Community

Zeggai Abdellah commited on Jun 4

Commit

9317650

1 Parent(s): 1817834

add answer language to the response

Browse files

Files changed (2) hide show

rag_pipeline.py +31 -3
requirements.txt +4 -1

rag_pipeline.py CHANGED Viewed

@@ -9,6 +9,7 @@ import re
 from llama_index.core import PromptTemplate
 from llama_index.core.agent import ReActAgent
 from llama_index.llms.google_genai import GoogleGenAI
 import os
@@ -275,6 +276,31 @@ def process_question(agent, question: str) -> str:
         print(f"Error processing question: {e}")
         return f"Error processing your question: {str(e)}"
 def process_question_with_sequential_citations(agent, question: str, chunks_directory="./data/") -> dict:
     """
     Process a question through the RAG pipeline and return response with sequential citation numbers.
@@ -340,12 +366,13 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
         # Convert to JSON
         cited_elements_json = json.dumps(cited_elements_ordered, ensure_ascii=False, indent=2)
         return {
             "response": sequential_response,
             "cited_elements_json": cited_elements_json,
             "unique_ids": unique_ids,
-            "citation_mapping": source_id_to_number
         }
     except Exception as e:
@@ -354,7 +381,8 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
             "response": response_text if 'response_text' in locals() else "Error occurred",
             "cited_elements_json": "[]",
             "unique_ids": [],
-            "citation_mapping": {}
         }
 def process_question_with_citations(agent, question: str, chunks_directory="./data/") -> dict:

 from llama_index.core import PromptTemplate
 from llama_index.core.agent import ReActAgent
 from llama_index.llms.google_genai import GoogleGenAI
+from langdetect import detect
 import os
         print(f"Error processing question: {e}")
         return f"Error processing your question: {str(e)}"
+def aswer_language_detection(response_text: str) -> str:
+    """
+    Detect the language of the response text.
+    Args:
+        response_text (str): The response text to analyze.
+    Returns:
+        str: Detected language code (e.g., 'en', 'fr', etc.)
+    """
+    try:
+            # Detect the language of the first 5 words of the response
+            first_line = " ".join(response_text.split()[:5])
+            first_line = re.sub(r'\[.*?\]', '', first_line)  # Remove citations
+            answer_language = detect(first_line)
+            if answer_language not in ['en', 'ar', 'fr']:
+                answer_language ='en'
+    except:
+            answer_language ='en'
+    finally:
+        return answer_language
 def process_question_with_sequential_citations(agent, question: str, chunks_directory="./data/") -> dict:
     """
     Process a question through the RAG pipeline and return response with sequential citation numbers.
         # Convert to JSON
         cited_elements_json = json.dumps(cited_elements_ordered, ensure_ascii=False, indent=2)
+        aswer_language= aswer_language_detection(response_text)
         return {
             "response": sequential_response,
             "cited_elements_json": cited_elements_json,
             "unique_ids": unique_ids,
+            "citation_mapping": source_id_to_number,
+            "answer_language":aswer_language
         }
     except Exception as e:
             "response": response_text if 'response_text' in locals() else "Error occurred",
             "cited_elements_json": "[]",
             "unique_ids": [],
+            "citation_mapping": {},
+            "answer_language": "en"  # Default to English if not specified
         }
 def process_question_with_citations(agent, question: str, chunks_directory="./data/") -> dict:

requirements.txt CHANGED Viewed

@@ -18,4 +18,7 @@ chromadb
 rank-bm25
 # ML and embeddings
-sentence-transformers

 rank-bm25
 # ML and embeddings
+sentence-transformers
+# detect language
+langdetect