Spaces:

aakash0563
/

PDFQueryBot

Runtime error

File size: 2,869 Bytes

29aeeac
33db722
 
 
 
 
 
 
 
 
 
4dfc3a9
17982b7
33db722
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06aad00
 
 
 
29aeeac
06aad00
 
 
 
 
 
 
 
 
 
 
 
 
 
61477db
c631bd3
 
 
61477db
 
 
 
 
 
 
 
 
 
 
 
06aad00
 
 
 
29aeeac
61477db
 
06aad00
17809d8
 
17982b7
 
d832d3e
3f7e079
29aeeac
71fc09f
17982b7
fbffa21
da6a822
fbffa21
6a387ad

import threading
import re
import gradio as gr
import os
import google.generativeai as genai
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
import chromadb
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from uuid import uuid4



text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=50
)
client = chromadb.PersistentClient("test")
collection = client.create_collection("test_data")

def upload_pdf(file_path):
    loader = PyPDFLoader(file_path)
    pages = loader.load()
    documents = []
    for page in pages:
        docs = text_splitter.split_text(page.page_content)
        for doc in docs:
            documents.append({
                "text": docs, "meta_data": page.metadata,
            })
    collection.add(
        ids=[str(uuid4()) for _ in range(len(documents))],
        documents=[doc['text'][0] for doc in documents],
        metadatas=[doc['meta_data'] for doc in documents]
    )
    return f"PDF Uploaded Successfully. {collection.count()} chunks stored in ChromaDB"

# Now you can use hugging_face_api_key in your code

genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-pro')  # Load the model

def get_Answer(query):
    res = collection.query(  # Assuming `collection` is defined elsewhere
        query_texts=query,
        n_results=2
    )
    system = f"""You are a teacher. You will be provided some context, 
    your task is to analyze the relevant context and answer the below question:
    - {query}
    """
    context = " ".join([re.sub(r'[^\x00-\x7F]+', ' ', r) for r in res['documents'][0]])
    prompt = f"### System: {system} \n\n ###: User: {context} \n\n ### Assistant:\n"
    answer = model.generate_content(prompt).text
    return answer

def Show_Interface(file_path,query):
    if file_path and query:
        return "Choose only one method at a time(Upload pdf /or Query from uploaded PDF)"
    elif file_path:
        return upload_pdf(file_path)
    else:
        return get_Answer(query)

# # Define the Gradio interface
# iface1 = gr.Interface(
#     fn=get_Answer,
#     inputs=gr.Textbox(lines=5, placeholder="Ask a question"),  # Textbox for query
#     outputs="textbox",  # Display the generated answer in a textbox
#     title="Answer Questions with Gemini-Pro",
#     description="Ask a question and get an answer based on context from a ChromaDB collection.",
# )



# Define the Gradio interface
iface2 = gr.Interface(
    fn=Show_Interface,
    inputs=["file","text"],  # Specify a file input component
    outputs="textbox",  # Display the output text in a textbox
    title="Choose one process at a time(Upload pdf /or Query from uploaded PDF)",
    #description="Choose only one method at a time(Upload pdf /or Query from uploaded PDF)",
)

iface2.launch(debug=True)