Spaces:

gourisankar85
/

realtime-rag-pipeline

Running

App Files Files Community

Gourisankar Padihary commited on Dec 15, 2024

Commit

79dcf63

1 Parent(s): bd69eee

Compute attributes changes

Browse files

Files changed (2) hide show

generator/compute_metrics.py +24 -0
main.py +18 -14

generator/compute_metrics.py ADDED Viewed

	@@ -0,0 +1,24 @@

+def compute_metrics(attributes):
+    # Extract relevant information from attributes
+    all_relevant_sentence_keys = attributes.get("all_relevant_sentence_keys", [])
+    all_utilized_sentence_keys = attributes.get("all_utilized_sentence_keys", [])
+    sentence_support_information = attributes.get("sentence_support_information", [])
+    # Compute Context Relevance
+    context_relevance = len(all_relevant_sentence_keys) / len(sentence_support_information) if sentence_support_information else 0
+    # Compute Context Utilization
+    context_utilization = len(all_utilized_sentence_keys) / len(sentence_support_information) if sentence_support_information else 0
+    # Compute Completeness
+    completeness = all(info.get("fully_supported", False) for info in sentence_support_information)
+    # Compute Adherence
+    adherence = attributes.get("overall_supported", False)
+    return {
+        "Context Relevance": context_relevance,
+        "Context Utilization": context_utilization,
+        "Completeness": completeness,
+        "Adherence": adherence
+    }

main.py CHANGED Viewed

@@ -6,6 +6,7 @@ from retriever.retrieve_documents import retrieve_top_k_documents
 from generator.initialize_llm import initialize_llm
 from generator.generate_response import generate_response
 from generator.extract_attributes import extract_attributes
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -24,37 +25,36 @@ def main():
     # Embed the documents
     vector_store = embed_documents(documents)
     logging.info("Documents embedded")
-    # Initialize the LLM
-    llm = initialize_llm()
-    logging.info("LLM initialized")
     # Sample question
     sample_question = dataset[0]['question']
     logging.info(f"Sample question: {sample_question}")
     # Retrieve relevant documents
     relevant_docs = retrieve_top_k_documents(vector_store, sample_question, top_k=5)
-    logging.info("Relevant documents retrieved :", print(len(relevant_docs)))
     # Log each retrieved document individually
-    #for i, doc in enumerate(relevant_docs):
-        #logging.info(f"Relevant document {i+1}: {doc}")
     # Generate a response using the relevant documents
-    response, source_docs = generate_response(llm, vector_store, sample_question)
     logging.info("Response generated")
     # Print the response
-    print(f"Response: {response}")
-    print(f"Source Documents: {source_docs}")
-     # Extract attributes from the response and source documents
-    attributes = extract_attributes(sample_question, relevant_docs, response)
     # Only proceed if the content is not empty
     if attributes.content:
         result_content = attributes.content  # Access the content attribute
         # Extract the JSON part from the result_content
         json_start = result_content.find("{")
         json_end = result_content.rfind("}") + 1
@@ -63,6 +63,10 @@ def main():
         try:
             result_json = json.loads(json_str)
             print(json.dumps(result_json, indent=2))
         except json.JSONDecodeError as e:
             logging.error(f"JSONDecodeError: {e}")

 from generator.initialize_llm import initialize_llm
 from generator.generate_response import generate_response
 from generator.extract_attributes import extract_attributes
+from generator.compute_metrics import compute_metrics
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
     # Embed the documents
     vector_store = embed_documents(documents)
     logging.info("Documents embedded")
     # Sample question
     sample_question = dataset[0]['question']
     logging.info(f"Sample question: {sample_question}")
     # Retrieve relevant documents
     relevant_docs = retrieve_top_k_documents(vector_store, sample_question, top_k=5)
+    logging.info(f"Relevant documents retrieved :{len(relevant_docs)}")
     # Log each retrieved document individually
+    for i, doc in enumerate(relevant_docs):
+        logging.info(f"Relevant document {i+1}: {doc} \n")
+    # Initialize the LLM
+    llm = initialize_llm()
+    logging.info("LLM initialized")
     # Generate a response using the relevant documents
+    response, source_docs = generate_response(llm, vector_store, sample_question, relevant_docs)
     logging.info("Response generated")
     # Print the response
+    print(f"Response from LLM: {response}")
+    #print(f"Source Documents: {source_docs}")
+    # Valuations : Extract attributes from the response and source documents
+    attributes = extract_attributes(sample_question, source_docs, response)
     # Only proceed if the content is not empty
     if attributes.content:
         result_content = attributes.content  # Access the content attribute
         # Extract the JSON part from the result_content
         json_start = result_content.find("{")
         json_end = result_content.rfind("}") + 1
         try:
             result_json = json.loads(json_str)
             print(json.dumps(result_json, indent=2))
+            # Compute metrics using the extracted attributes
+            metrics = compute_metrics(result_json)
+            print(metrics)
         except json.JSONDecodeError as e:
             logging.error(f"JSONDecodeError: {e}")