File size: 2,287 Bytes
f31b8a3
37123e5
 
 
 
 
f31b8a3
 
37123e5
 
 
 
 
 
 
72390f6
37123e5
69992ee
72390f6
 
69992ee
72390f6
 
37123e5
69992ee
 
37123e5
 
 
 
 
72390f6
69992ee
37123e5
69992ee
37123e5
 
 
 
 
72390f6
 
37123e5
72390f6
 
37123e5
69992ee
37123e5
69992ee
37123e5
 
 
 
 
69992ee
6dee266
f31b8a3
 
69992ee
f31b8a3
 
69992ee
 
 
 
f31b8a3
 
69992ee
 
 
 
 
f31b8a3
 
 
 
 
 
 
69992ee
f31b8a3
 
 
69992ee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
import os
from utils.document_parsing import DocParsing
from utils.retrieval import Retrieval
from utils.llm_generation import LLMGeneration
import json


embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
#  Setting up Retriever
retriever = Retrieval(model_name=embedding_model_name)


llm_model_name = "gpt-4o-mini"
# Settting up LLMGenerator
llm_generator = None


def set_api_key(api_key: str):
    if api_key.strip():
        os.environ["OPENAI_API_KEY"] = api_key
    else:
        raise gr.Error("Please provide a valid API key")


def process_inputs(api_key: str, pdf_file, questions: str):

    # Setup Api KEY
    set_api_key(api_key)

    if pdf_file is None:
        raise gr.Error("Please upload a pdf file")

    # Parsing the pdf
    doc_handler = DocParsing(file_path=pdf_file.name, model_name=embedding_model_name)
    docs = doc_handler.process_pdf()

    # Create vector store
    retriever.create_vector_store(chunks=docs)

    # LLM Generator
    llm_generator = LLMGeneration(llm_model_name=llm_model_name)

    if not questions.strip():
        raise gr.Error("Please provide valid set of questions")
    output_dict = {}
    questions_list = questions.strip().split("\n")
    for question in questions_list:

        # Retrieve top similar chunks
        similar_chunks = retriever.search(query=question, k=10)

        # Generate the answer
        output_dict[question] = llm_generator.generate_answer(question, similar_chunks)

    response = json.dumps(output_dict, indent=4)
    return response


with gr.Blocks() as demo:
    gr.Markdown("# AskMYPDF Q&A App")
    gr.Markdown(
        "Enter your OPENAI API key, upload a PDF, and list your questions below."
    )

    api_key_input = gr.Textbox(label="API Key", type="password")
    pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
    questions_input = gr.Textbox(
        label="List of Questions (one per line)",
        lines=5,
        placeholder="Question 1\nQuestion 2\n...",
    )

    submit_button = gr.Button("Submit")
    output = gr.Textbox(label="Output")

    submit_button.click(
        fn=process_inputs,
        inputs=[api_key_input, pdf_input, questions_input],
        outputs=output,
    )

if __name__ == "__main__":
    demo.launch()