File size: 2,001 Bytes
f31b8a3
37123e5
 
 
 
 
f31b8a3
 
37123e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f31b8a3
37123e5
 
 
 
 
 
 
 
 
 
f31b8a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import gradio as gr
import os
from utils.document_parsing import DocParsing
from utils.retrieval import Retrieval
from utils.llm_generation import LLMGeneration
import json


embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
#  Setting up Retriever
retriever = Retrieval(model_name=embedding_model_name)


llm_model_name = "gpt-4o-mini"
# Settting up LLMGenerator
llm_generator = LLMGeneration(llm_model_name=llm_model_name)

def set_api_key(api_key):
    os.environ['OPENAI_API_KEY'] = api_key

def process_inputs(api_key:str, pdf_file, questions: str):

    # Setup Api KEY
    set_api_key(api_key)

    if pdf_file is None:
        raise Exception("Blaf")
    
    
    # Parsing the pdf
    doc_handler = DocParsing(file_path=pdf_file.name,model_name=embedding_model_name)
    docs = doc_handler.process_pdf()

    # Create vector store
    retriever.create_vector_store(chunks=docs)


    output_dict = {}
    questions_list = questions.strip().split('\n')
    for question in questions_list:
        
        # Retrieve top similar chunks
        similar_chunks = retriever.search(query=question, k=10)

        # Generate the answer
        output_dict[question] = llm_generator.generate_answer(question, similar_chunks)
        
    
    response = json.dumps(output_dict)
    return response

with gr.Blocks() as demo:
    gr.Markdown("# AskMYPDF Q&A App")
    gr.Markdown("Enter your OPENAI API key, upload a PDF, and list your questions below.")
    
    api_key_input = gr.Textbox(label="API Key", type="password")
    pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
    questions_input = gr.Textbox(label="List of Questions (one per line)", lines=5, placeholder="Question 1\nQuestion 2\n...")

    submit_button = gr.Button("Submit")
    output = gr.Textbox(label="Output")

    submit_button.click(
        fn=process_inputs,
        inputs=[api_key_input, pdf_input, questions_input],
        outputs=output
    )

if __name__ == "__main__":
    demo.launch()