Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import os | |
import torch | |
import numpy as np | |
from datetime import datetime | |
try: | |
from transformers import pipeline | |
except ImportError: | |
import subprocess | |
subprocess.check_call(["pip", "install", "transformers"]) | |
from transformers import pipeline | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
PDF_PATH = "Sample HI Policy.pdf" | |
CSV_PATH = "RAG_Test_Questions.csv" | |
st.set_page_config(page_title="PolicyGaido - Insurance Q&A", page_icon="π", layout="wide") | |
st.title("Insurance Policy Q&A Assistant") | |
st.markdown("Ask questions about your insurance policy or select from predefined questions.") | |
if "initialized" not in st.session_state: | |
st.session_state.initialized = False | |
st.session_state.vector_store = None | |
st.session_state.questions = [] | |
st.session_state.evaluation_history = [] | |
with st.sidebar: | |
st.header("Configuration") | |
model_option = st.selectbox("Select Language Model", ["BERT-for-QA"]) | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
st.caption(f"Running on: {device}") | |
if st.button("Initialize System"): | |
with st.spinner("Initializing Q&A system..."): | |
st.session_state.initialized = True | |
with st.expander("Performance Metrics"): | |
if st.session_state.evaluation_history: | |
total_queries = len(st.session_state.evaluation_history) | |
avg_confidence = np.mean([eval_data['confidence'] for eval_data in st.session_state.evaluation_history]) | |
avg_relevance = np.mean([eval_data['relevance'] for eval_data in st.session_state.evaluation_history]) | |
st.metric("Total Queries", total_queries) | |
st.metric("Avg. Confidence", f"{avg_confidence:.2f}%") | |
st.metric("Avg. Relevance", f"{avg_relevance:.2f}%") | |
if st.button("Clear History"): | |
st.session_state.evaluation_history = [] | |
st.experimental_rerun() | |
def load_questions(csv_path): | |
encodings = ["utf-8", "latin-1", "ISO-8859-1", "cp1252"] | |
for encoding in encodings: | |
try: | |
df = pd.read_csv(csv_path, encoding=encoding) | |
# Check for both "Question" and "Questions" columns | |
if "Question" in df.columns: | |
return df["Question"].dropna().tolist() | |
elif "Questions" in df.columns: | |
return df["Questions"].dropna().tolist() | |
except UnicodeDecodeError: | |
continue | |
except Exception as e: | |
st.error(f"Error loading CSV: {e}") | |
return [] | |
return [] | |
if os.path.exists(CSV_PATH): | |
st.session_state.questions = load_questions(CSV_PATH) | |
if not st.session_state.questions: | |
st.warning(f"No questions found in {CSV_PATH}. Make sure the file has a 'Questions' column.") | |
def process_pdf(pdf_path): | |
try: | |
loader = PyPDFLoader(pdf_path) | |
documents = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
chunks = text_splitter.split_documents(documents) | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': device}) | |
vector_store = FAISS.from_documents(chunks, embeddings) | |
return vector_store | |
except Exception as e: | |
st.error(f"Error processing PDF: {e}") | |
return None | |
if st.session_state.initialized and not st.session_state.vector_store: | |
with st.spinner("Processing policy document..."): | |
st.session_state.vector_store = process_pdf(PDF_PATH) | |
def get_answer(question, model_name): | |
if st.session_state.vector_store is None: | |
return "System not initialized. Please initialize first.", [], 0 | |
retriever = st.session_state.vector_store.as_retriever(search_kwargs={"k": 3}) | |
docs = retriever.get_relevant_documents(question) | |
context = " ".join([doc.page_content for doc in docs]) | |
if not context: | |
return "No relevant information found in the policy document.", [], 0 | |
qa_pipeline = pipeline("question-answering", model="deepset/bert-base-cased-squad2" if model_name == "BERT-for-QA" else "distilbert-base-cased-distilled-squad", tokenizer="deepset/bert-base-cased-squad2", device=0 if torch.cuda.is_available() else -1) | |
result = qa_pipeline(question=question, context=context) | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': device}) | |
question_embedding = embeddings.embed_query(question) | |
context_embedding = embeddings.embed_query(context[:1000]) # We are using the first 1000 chars only to avoid token limits | |
# Compute cosine similarity | |
similarity = np.dot(question_embedding, context_embedding) / (np.linalg.norm(question_embedding) * np.linalg.norm(context_embedding)) | |
relevance_score = float(similarity * 100) | |
return result["answer"], docs, result["score"], relevance_score | |
def evaluate_answer(answer, docs, confidence, relevance): | |
hallucination_indicators = 0 | |
answer_found = False | |
answer_words = set(answer.lower().split()) | |
if len(answer_words) > 0: | |
for doc in docs: | |
doc_content = doc.page_content.lower() | |
overlap_count = sum(1 for word in answer_words if word in doc_content) | |
if overlap_count / len(answer_words) > 0.3: | |
answer_found = True | |
break | |
if not answer_found and len(answer_words) > 3: | |
hallucination_indicators += 1 | |
hedging_phrases = ["i think", "probably", "likely", "may", "might", "could be", "possibly", "perhaps"] | |
if any(phrase in answer.lower() for phrase in hedging_phrases): | |
hallucination_indicators += 1 | |
hallucination_risk = min(100, hallucination_indicators * 50) | |
return { | |
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
"confidence": confidence * 100, | |
"relevance": relevance, | |
"hallucination_risk": hallucination_risk | |
} | |
col1, col2 = st.columns([1, 1]) | |
with col1: | |
st.subheader("Ask a Question") | |
question_method = st.radio("Choose question input method:", ["Predefined", "Custom"]) | |
question = "" | |
if question_method == "Predefined" and st.session_state.questions: | |
question = st.selectbox("Select a question:", st.session_state.questions) | |
if not st.session_state.questions: | |
st.info("No predefined questions available. Please check your CSV file.") | |
elif question_method == "Custom": | |
question = st.text_area("Enter your question:") | |
if question and st.button("Ask Question"): | |
with st.spinner("Generating answer..."): | |
answer, docs, confidence, relevance = get_answer(question, model_option) | |
evaluation = evaluate_answer(answer, docs, confidence, relevance) | |
st.session_state.evaluation_history.append(evaluation) | |
st.session_state["last_answer"] = (question, answer, docs, evaluation) | |
with col2: | |
st.subheader("Answer") | |
if "last_answer" in st.session_state: | |
question, answer, docs, evaluation = st.session_state["last_answer"] | |
st.markdown(f"Question: {question}") | |
st.markdown(f"Answer: {answer}") | |
col_a, col_b, col_c = st.columns(3) | |
with col_a: | |
st.metric("Confidence", f"{evaluation['confidence']:.1f}%", | |
delta=f"{evaluation['confidence']-50:.1f}" if evaluation['confidence'] > 50 else f"{evaluation['confidence']-50:.1f}") | |
with col_b: | |
st.metric("Relevance", f"{evaluation['relevance']:.1f}%", | |
delta=f"{evaluation['relevance']-50:.1f}" if evaluation['relevance'] > 50 else f"{evaluation['relevance']-50:.1f}") | |
with col_c: | |
st.metric("Hallucination Risk", f"{evaluation['hallucination_risk']:.1f}%", | |
delta=f"{-evaluation['hallucination_risk']+50:.1f}" if evaluation['hallucination_risk'] < 50 else f"{-evaluation['hallucination_risk']+50:.1f}", | |
delta_color="inverse") | |
with st.expander("View Source Information"): | |
for i, doc in enumerate(docs): | |
st.markdown(f"Source {i+1}: {doc.page_content[:500]}...") | |
st.divider() | |
st.subheader("Evaluation History") | |
if st.session_state.evaluation_history: | |
history_df = pd.DataFrame(st.session_state.evaluation_history) | |
# Displaying the summary statistics | |
st.subheader("Performance Statistics") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("Avg. Confidence", f"{history_df['confidence'].mean():.1f}%") | |
with col2: | |
st.metric("Avg. Relevance", f"{history_df['relevance'].mean():.1f}%") | |
with col3: | |
st.metric("Avg. Hallucination Risk", f"{history_df['hallucination_risk'].mean():.1f}%") | |
# Here we are showing the history table | |
st.dataframe(history_df) | |
else: | |
st.info("No evaluation history available yet. Ask some questions to build history.") | |
st.divider() | |
st.caption("PolicyGaido Insurance Q&A Assistant | Built with Streamlit, Transformers, and FAISS (By Vasu Johri)") |