Midterm / pages /Load_Documents.py
deepali1021's picture
refactor
7d6aa6f
import os
import openai
from utils._admin_util import create_embeddings, create_vector_store, read_pdf_data, split_data
import streamlit as st
from dotenv import load_dotenv
def validate_api_key(api_key):
"""Test if the API key is valid"""
try:
# Make a small test request to OpenAI
client = openai.OpenAI(api_key=api_key)
client.embeddings.create(input="test", model="text-embedding-ada-002")
return True
except openai.AuthenticationError:
st.error("❌ Invalid API key")
return False
except openai.PermissionDeniedError:
st.error("❌ Permission denied. Please check your API key's permissions")
return False
except Exception as e:
st.error(f"❌ API key validation error: {str(e)}")
return False
def main():
load_dotenv()
# Add detailed API key verification
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
st.error("❌ OpenAI API key not found! Please ensure it's set in the environment variables.")
st.info("To set up your API key:")
st.code("1. Go to Hugging Face Space settings\n2. Add OPENAI_API_KEY in Repository Secrets")
st.stop()
# Validate the API key
# if not validate_api_key(api_key):
# st.stop()
#print("API KEY :",api_key)
st.set_page_config(page_title="Dump PDFs to QDrant - Vector Store")
st.title("Please upload your files...πŸ“ ")
try:
# Upload multiple PDF files
uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
if uploaded_files:
with st.spinner('Processing PDF files...'):
all_chunks = []
# Process each PDF file
for pdf in uploaded_files:
st.write(f"Processing: {pdf.name}")
# Extract text from PDF
text = read_pdf_data(pdf)
st.write(f"πŸ‘‰ Reading {pdf.name} done")
# Create chunks for this PDF
chunks = split_data(text)
all_chunks.extend(chunks)
st.write(f"πŸ‘‰ Splitting {pdf.name} into chunks done")
if not all_chunks:
st.error("❌ No valid chunks were created from the PDFs")
st.stop()
st.write("Creating embeddings...")
embeddings = create_embeddings()
st.write("πŸ‘‰ Creating embeddings instance done")
# Create vector store with all chunks
vector_store = create_vector_store(embeddings, all_chunks)
st.session_state.vector_store = vector_store
st.success(f"βœ… Successfully processed {len(uploaded_files)} files and pushed embeddings to Qdrant")
st.write(f"Total chunks created: {len(all_chunks)}")
except Exception as e:
st.error(f"❌ An unexpected error occurred: {str(e)}")
if __name__ == '__main__':
main()