Spaces:
Running
Running
| import streamlit as st | |
| import os | |
| import zipfile | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_core.prompts import PromptTemplate | |
| from langchain_core.output_parsers import StrOutputParser | |
| # --- Configuration --- | |
| GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "") | |
| # Paths | |
| FAISS_ZIP_PATH = "faiss_index.zip" | |
| FAISS_INDEX_PATH = "faiss_index" | |
| def extract_faiss_index(): | |
| """Extract FAISS index from zip file if needed.""" | |
| index_file = os.path.join(FAISS_INDEX_PATH, "index.faiss") | |
| # Already extracted | |
| if os.path.exists(index_file): | |
| return True | |
| # Extract from zip | |
| if os.path.exists(FAISS_ZIP_PATH): | |
| with zipfile.ZipFile(FAISS_ZIP_PATH, 'r') as zip_ref: | |
| zip_ref.extractall(".") | |
| return True | |
| return False | |
| def get_conversational_chain(api_key): | |
| """Create the QA chain with strict context-only answering.""" | |
| prompt_template = """ | |
| You are a helpful assistant that ONLY answers based on the context provided from the PDF document. | |
| STRICT RULES: | |
| 1. Do NOT use any external knowledge or assumptions. | |
| 2. Do NOT make up information that is not in the context. | |
| 3. If the answer is not found in the context below, reply with: "I don't have information about that in the uploaded document." | |
| 4. Always base your answers strictly on the provided context. | |
| Context from PDF: | |
| {context} | |
| Question: | |
| {question} | |
| Answer (based only on the context above): | |
| """ | |
| model = ChatGoogleGenerativeAI(model="gemini-3-flash-preview", temperature=0, google_api_key=api_key) | |
| prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) | |
| chain = prompt | model | StrOutputParser() | |
| return chain | |
| def format_docs(docs): | |
| """Format documents into a single string for context.""" | |
| return "\n\n".join(doc.page_content for doc in docs) | |
| def user_input(user_question, vector_store, api_key): | |
| """Process user question and return answer from the PDF context.""" | |
| docs = vector_store.similarity_search(user_question) | |
| chain = get_conversational_chain(api_key) | |
| context = format_docs(docs) | |
| response = chain.invoke({"context": context, "question": user_question}) | |
| return response | |
| def load_vector_store(_api_key): | |
| """Load pre-built FAISS vector store.""" | |
| # Extract zip if needed | |
| extract_faiss_index() | |
| embeddings = GoogleGenerativeAIEmbeddings( | |
| model="models/embedding-001", | |
| google_api_key=_api_key | |
| ) | |
| vector_store = FAISS.load_local( | |
| FAISS_INDEX_PATH, | |
| embeddings, | |
| allow_dangerous_deserialization=True | |
| ) | |
| return vector_store | |
| def main(): | |
| st.set_page_config( | |
| page_title="Papal Encyclicals RAG", | |
| page_icon="📄", | |
| layout="centered", | |
| initial_sidebar_state="collapsed" | |
| ) | |
| # Custom CSS | |
| st.markdown( | |
| """ | |
| <style> | |
| #MainMenu {visibility: hidden;} | |
| header {visibility: hidden;} | |
| footer {visibility: hidden;} | |
| .stDeployButton {display: none;} | |
| .block-container { | |
| padding-top: 2rem; | |
| padding-bottom: 2rem; | |
| max-width: 800px; | |
| } | |
| .stApp { | |
| background-color: #ffffff !important; | |
| } | |
| .main-title { | |
| font-size: 2.5rem !important; | |
| font-weight: 600 !important; | |
| color: #1a1a1a !important; | |
| text-align: center; | |
| margin-bottom: 0.5rem; | |
| padding-top: 1rem; | |
| } | |
| .subtitle { | |
| font-size: 1rem !important; | |
| color: #666666 !important; | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| .stTextInput > div > div > input { | |
| border: 1px solid #e0e0e0 !important; | |
| border-radius: 8px; | |
| padding: 0.75rem 1rem; | |
| font-size: 1rem; | |
| background-color: #ffffff !important; | |
| color: #333333 !important; | |
| } | |
| .stTextInput > div > div > input:focus { | |
| border-color: #4a90d9 !important; | |
| box-shadow: 0 0 0 2px rgba(74, 144, 217, 0.1); | |
| } | |
| .stTextInput > div > div > input::placeholder { | |
| color: #999999 !important; | |
| } | |
| .answer-container { | |
| background-color: #fafafa !important; | |
| border: 1px solid #e8e8e8; | |
| border-radius: 10px; | |
| padding: 1.5rem; | |
| margin-top: 1rem; | |
| } | |
| .answer-label { | |
| font-size: 0.85rem !important; | |
| font-weight: 600 !important; | |
| color: #888888 !important; | |
| text-transform: uppercase; | |
| letter-spacing: 0.5px; | |
| margin-bottom: 0.75rem; | |
| } | |
| .answer-text { | |
| font-size: 1rem !important; | |
| color: #333333 !important; | |
| line-height: 1.7; | |
| } | |
| .status-badge { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 0.5rem; | |
| background-color: #f0f9f4 !important; | |
| color: #166534 !important; | |
| padding: 0.5rem 1rem; | |
| border-radius: 20px; | |
| font-size: 0.9rem; | |
| font-weight: 500; | |
| margin-bottom: 2rem; | |
| } | |
| .status-dot { | |
| width: 8px; | |
| height: 8px; | |
| background-color: #22c55e !important; | |
| border-radius: 50%; | |
| } | |
| .stTextInput label { | |
| font-size: 0.95rem !important; | |
| color: #444444 !important; | |
| font-weight: 500; | |
| margin-bottom: 0.5rem; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # Header | |
| st.markdown('<h1 class="main-title">Papal Encyclicals RAG</h1>', unsafe_allow_html=True) | |
| st.markdown('<p class="subtitle">Ask questions about papal encyclicals and get answers based on the source document</p>', unsafe_allow_html=True) | |
| # Check for API key | |
| if not GOOGLE_API_KEY: | |
| st.error("Google API Key not found in environment variables.") | |
| st.info("Please add GOOGLE_API_KEY to your Hugging Face Space secrets.") | |
| st.stop() | |
| # Check if FAISS index or zip exists | |
| index_file = os.path.join(FAISS_INDEX_PATH, "index.faiss") | |
| if not os.path.exists(index_file) and not os.path.exists(FAISS_ZIP_PATH): | |
| st.error("FAISS index not found!") | |
| st.info("Please upload faiss_index.zip or the faiss_index folder to your Space.") | |
| st.stop() | |
| # Load vector store (cached) | |
| with st.spinner("Loading index..."): | |
| try: | |
| vector_store = load_vector_store(GOOGLE_API_KEY) | |
| except Exception as e: | |
| st.error(f"Error loading index: {str(e)}") | |
| st.stop() | |
| # Status badge | |
| st.markdown( | |
| ''' | |
| <div class="status-badge"> | |
| <span class="status-dot"></span> | |
| Document ready | |
| </div> | |
| ''', | |
| unsafe_allow_html=True | |
| ) | |
| # Question input | |
| user_question = st.text_input( | |
| "Your question", | |
| placeholder="e.g., What are the main themes discussed in the encyclicals?", | |
| help="The AI will only answer based on the content of the uploaded PDF", | |
| label_visibility="visible" | |
| ) | |
| if user_question: | |
| with st.spinner("Searching for answer..."): | |
| try: | |
| answer = user_input(user_question, vector_store, GOOGLE_API_KEY) | |
| st.markdown( | |
| f''' | |
| <div class="answer-container"> | |
| <div class="answer-label">Answer</div> | |
| <div class="answer-text">{answer}</div> | |
| </div> | |
| ''', | |
| unsafe_allow_html=True | |
| ) | |
| except Exception as e: | |
| st.error(f"Error getting answer: {str(e)}") | |
| if __name__ == "__main__": | |
| main() |