RAG / app.py
stevafernandes's picture
Update app.py
cb0b520 verified
import streamlit as st
import os
import zipfile
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
# --- Configuration ---
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
# Paths
FAISS_ZIP_PATH = "faiss_index.zip"
FAISS_INDEX_PATH = "faiss_index"
def extract_faiss_index():
"""Extract FAISS index from zip file if needed."""
index_file = os.path.join(FAISS_INDEX_PATH, "index.faiss")
# Already extracted
if os.path.exists(index_file):
return True
# Extract from zip
if os.path.exists(FAISS_ZIP_PATH):
with zipfile.ZipFile(FAISS_ZIP_PATH, 'r') as zip_ref:
zip_ref.extractall(".")
return True
return False
def get_conversational_chain(api_key):
"""Create the QA chain with strict context-only answering."""
prompt_template = """
You are a helpful assistant that ONLY answers based on the context provided from the PDF document.
STRICT RULES:
1. Do NOT use any external knowledge or assumptions.
2. Do NOT make up information that is not in the context.
3. If the answer is not found in the context below, reply with: "I don't have information about that in the uploaded document."
4. Always base your answers strictly on the provided context.
Context from PDF:
{context}
Question:
{question}
Answer (based only on the context above):
"""
model = ChatGoogleGenerativeAI(model="gemini-3-flash-preview", temperature=0, google_api_key=api_key)
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain = prompt | model | StrOutputParser()
return chain
def format_docs(docs):
"""Format documents into a single string for context."""
return "\n\n".join(doc.page_content for doc in docs)
def user_input(user_question, vector_store, api_key):
"""Process user question and return answer from the PDF context."""
docs = vector_store.similarity_search(user_question)
chain = get_conversational_chain(api_key)
context = format_docs(docs)
response = chain.invoke({"context": context, "question": user_question})
return response
@st.cache_resource
def load_vector_store(_api_key):
"""Load pre-built FAISS vector store."""
# Extract zip if needed
extract_faiss_index()
embeddings = GoogleGenerativeAIEmbeddings(
model="models/embedding-001",
google_api_key=_api_key
)
vector_store = FAISS.load_local(
FAISS_INDEX_PATH,
embeddings,
allow_dangerous_deserialization=True
)
return vector_store
def main():
st.set_page_config(
page_title="Papal Encyclicals RAG",
page_icon="📄",
layout="centered",
initial_sidebar_state="collapsed"
)
# Custom CSS
st.markdown(
"""
<style>
#MainMenu {visibility: hidden;}
header {visibility: hidden;}
footer {visibility: hidden;}
.stDeployButton {display: none;}
.block-container {
padding-top: 2rem;
padding-bottom: 2rem;
max-width: 800px;
}
.stApp {
background-color: #ffffff !important;
}
.main-title {
font-size: 2.5rem !important;
font-weight: 600 !important;
color: #1a1a1a !important;
text-align: center;
margin-bottom: 0.5rem;
padding-top: 1rem;
}
.subtitle {
font-size: 1rem !important;
color: #666666 !important;
text-align: center;
margin-bottom: 2rem;
}
.stTextInput > div > div > input {
border: 1px solid #e0e0e0 !important;
border-radius: 8px;
padding: 0.75rem 1rem;
font-size: 1rem;
background-color: #ffffff !important;
color: #333333 !important;
}
.stTextInput > div > div > input:focus {
border-color: #4a90d9 !important;
box-shadow: 0 0 0 2px rgba(74, 144, 217, 0.1);
}
.stTextInput > div > div > input::placeholder {
color: #999999 !important;
}
.answer-container {
background-color: #fafafa !important;
border: 1px solid #e8e8e8;
border-radius: 10px;
padding: 1.5rem;
margin-top: 1rem;
}
.answer-label {
font-size: 0.85rem !important;
font-weight: 600 !important;
color: #888888 !important;
text-transform: uppercase;
letter-spacing: 0.5px;
margin-bottom: 0.75rem;
}
.answer-text {
font-size: 1rem !important;
color: #333333 !important;
line-height: 1.7;
}
.status-badge {
display: inline-flex;
align-items: center;
gap: 0.5rem;
background-color: #f0f9f4 !important;
color: #166534 !important;
padding: 0.5rem 1rem;
border-radius: 20px;
font-size: 0.9rem;
font-weight: 500;
margin-bottom: 2rem;
}
.status-dot {
width: 8px;
height: 8px;
background-color: #22c55e !important;
border-radius: 50%;
}
.stTextInput label {
font-size: 0.95rem !important;
color: #444444 !important;
font-weight: 500;
margin-bottom: 0.5rem;
}
</style>
""",
unsafe_allow_html=True
)
# Header
st.markdown('<h1 class="main-title">Papal Encyclicals RAG</h1>', unsafe_allow_html=True)
st.markdown('<p class="subtitle">Ask questions about papal encyclicals and get answers based on the source document</p>', unsafe_allow_html=True)
# Check for API key
if not GOOGLE_API_KEY:
st.error("Google API Key not found in environment variables.")
st.info("Please add GOOGLE_API_KEY to your Hugging Face Space secrets.")
st.stop()
# Check if FAISS index or zip exists
index_file = os.path.join(FAISS_INDEX_PATH, "index.faiss")
if not os.path.exists(index_file) and not os.path.exists(FAISS_ZIP_PATH):
st.error("FAISS index not found!")
st.info("Please upload faiss_index.zip or the faiss_index folder to your Space.")
st.stop()
# Load vector store (cached)
with st.spinner("Loading index..."):
try:
vector_store = load_vector_store(GOOGLE_API_KEY)
except Exception as e:
st.error(f"Error loading index: {str(e)}")
st.stop()
# Status badge
st.markdown(
'''
<div class="status-badge">
<span class="status-dot"></span>
Document ready
</div>
''',
unsafe_allow_html=True
)
# Question input
user_question = st.text_input(
"Your question",
placeholder="e.g., What are the main themes discussed in the encyclicals?",
help="The AI will only answer based on the content of the uploaded PDF",
label_visibility="visible"
)
if user_question:
with st.spinner("Searching for answer..."):
try:
answer = user_input(user_question, vector_store, GOOGLE_API_KEY)
st.markdown(
f'''
<div class="answer-container">
<div class="answer-label">Answer</div>
<div class="answer-text">{answer}</div>
</div>
''',
unsafe_allow_html=True
)
except Exception as e:
st.error(f"Error getting answer: {str(e)}")
if __name__ == "__main__":
main()