Spaces:

taaha3244
/

Lex

Runtime error

File size: 3,922 Bytes

8999dd1

import os
from dotenv import load_dotenv
import tempfile
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader

from main import summarize_pdf_document
from main import retrieve_documents
from main import embed_document_data
from main import is_document_embedded




load_dotenv()

def main():
    st.sidebar.title("PDF Management")
    uploaded_files = st.sidebar.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
    files_info = []  # Initialize files_info to an empty list before checking for uploaded files

    if uploaded_files:
        files_info = save_uploaded_files(uploaded_files)
        process_documents(files_info)
        if st.button('Add Uploaded  Documents in Q nd A'):
            embed_documents(files_info)

    # Call to display the Q&A section unconditionally
    display_qna_section(files_info)


def save_uploaded_files(uploaded_files):
    """Save uploaded files to temporary directory and return their file paths along with original filenames."""
    files_info = []
    for uploaded_file in uploaded_files:
        # Create a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmpfile:
            # Write contents of the uploaded file to the temporary file
            tmpfile.write(uploaded_file.getvalue())
            # Append both the temp file path and the original name
            files_info.append((tmpfile.name, uploaded_file.name))
    return files_info


def process_documents(files_info):
    """Handle document processing for summarization."""
    st.header("Document Summaries")
    summarize_button = st.button('Summarize Uploaded Documents')
    
    if summarize_button:
        for temp_path, original_name in files_info:
            with st.container():  # Using container to better organize the display
                st.write(f"Summary for {original_name}:")  # Display the original file name
                try:
                    summary = summarize_pdf_document(temp_path, os.getenv('OPENAI_API_KEY'))
                    st.text_area("", value=summary, height=200, key=f"summary_{original_name}")
                except Exception as e:
                    st.error(f"Failed to summarize {original_name}: {str(e)}")
                    
    
def embed_documents(files_info):
    """Embed each document with correct metadata, replacing temp path with original filename."""
    for temp_path, original_name in files_info:
        if not is_document_embedded(original_name):  # Check if already embedded
            try:
                # Load the document
                loader = PyPDFLoader(temp_path)
                documents = loader.load()

                # Update the metadata for each document
                updated_documents = []
                for doc in documents:
                    doc.metadata['source'] = original_name  # Use original filename
                    updated_documents.append(doc)

                embed_document_data(updated_documents)  # Embed the documents
                st.success(f"Embedded {original_name}")
            except Exception as e:
                st.error(f"Failed to embed {original_name}: {str(e)}")
        else:
            st.info(f"{original_name} is already embedded.")


def display_qna_section(files_info):
    """Display Q&A section."""
    st.header("Question and Answer")
    with st.form("qa_form"):
        user_query = st.text_input("Enter your question here:")
        submit_button = st.form_submit_button('Get Answer')
    
    if submit_button and user_query:
        answer = handle_query(user_query)
        st.write(answer)
    elif submit_button and not user_query:
        st.error("Please enter a question to get an answer.")

def handle_query(query):
    """Retrieve answers based on the query."""
    answer = retrieve_documents(query)
    return answer

if __name__ == "__main__":
    main()