Spaces:

youdata-ai
/

MoSPI

Sleeping

File size: 3,463 Bytes

4ab6643

import streamlit as st
from pymongo import MongoClient
import os
from dotenv import load_dotenv
from helper.upload_file_to_s3 import upload_file
from helper.process_pdf import process_pdf
import time

# Load environment variables
load_dotenv()
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_BUCKET_NAME = os.getenv("AWS_BUCKET_NAME")
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")
COLLECTION_NAME2=os.getenv("COMPANY_COLLECTION_NAME")

mongo_client = MongoClient(MONGO_URI)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]
collection2=db[COLLECTION_NAME2]

def upload_pdf():
    if st.button("Back"):
        st.session_state.page = "upload_main"
        st.rerun()

    # File uploader (image files only)
    uploaded_pdf = st.file_uploader("Choose a PDF file to upload", type=["pdf"],
                                      accept_multiple_files=False)

    # Fetch tags and categories from MongoDB
    tags_doc = collection2.find_one({"type": "tags"})
    categories_doc = collection2.find_one({"type": "categories"})

    tags_options = tags_doc["tags"] if tags_doc and "tags" in tags_doc else []
    categories_options = categories_doc["categories"] if categories_doc and "categories" in categories_doc else []

    # Multi-select dropdowns for tags and categories
    selected_tags = st.multiselect("Select Tags", options=tags_options)
    selected_categories = st.multiselect("Select Categories", options=categories_options)

    if uploaded_pdf and selected_tags and selected_categories:
        flag=False
        if st.button("Submit"):

            with st.spinner(text="Uploading and Processing Image"):
                # Upload file to S3
                metadata = upload_file(uploaded_pdf,"PDF")
                if metadata:
                    object_url = metadata.get("object_url")
                    filename = metadata.get("name")

                    # Process image with LLM for description
                    pdf_processed = process_pdf(object_url,selected_tags,selected_categories)
                    if pdf_processed:
                        collection.update_one(
                            {"object_url": object_url},
                            {"$set": {
                                "tags": selected_tags,
                                "categories": selected_categories,
                                "status": "processed"
                            }}
                        )
                        st.success("PDF has been successfully uploaded and processed.")
                        flag = True
                    else:
                        st.error("Could Not Process the PDF. Please try again.")
                        collection.update_one(
                            {"object_url": object_url},
                            {"$set": {
                                "tags": selected_tags,
                                "categories": selected_categories,
                                "status": "failed"
                            }}
                        )


            if flag:
                st.write("Redirecting to View Page to view all uploaded pdfs")
                time.sleep(2)
                st.session_state.page = "view_pdf"
                st.rerun()