Spaces:
Runtime error
Runtime error
File size: 1,336 Bytes
b9ca8c8 9063322 b9ca8c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import chromadb
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from uuid import uuid4
import gradio as gr
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=800,
chunk_overlap=50
)
client = chromadb.PersistentClient("test")
collection = client.create_collection("test_data")
def upload_pdf(file_path):
loader = PyPDFLoader(file_path)
pages = loader.load()
documents = []
for page in pages:
docs = text_splitter.split_text(page.page_content)
for doc in docs:
documents.append({
"text": docs, "meta_data": page.metadata,
})
collection.add(
ids=[str(uuid4()) for _ in range(len(documents))],
documents=[doc['text'][0] for doc in documents],
metadatas=[doc['meta_data'] for doc in documents]
)
return f"PDF Uploaded Successfully. {collection.count()} chunks stored in ChromaDB"
# Define the Gradio interface
iface = gr.Interface(
fn=upload_pdf,
inputs=["file"], # Specify a file input component
outputs="textbox", # Display the output text in a textbox
title="Upload PDF to ChromaDB",
description="Upload a PDF file and store its text chunks in ChromaDB.",
)
# Launch the Gradio app
iface.launch(debug=True,share=True) |