agoyal496 commited on
Commit
69992ee
·
1 Parent(s): 72390f6

Formatting

Browse files
Files changed (3) hide show
  1. app.py +21 -14
  2. utils/llm_generation.py +1 -4
  3. utils/retrieval.py +8 -5
app.py CHANGED
@@ -15,23 +15,24 @@ llm_model_name = "gpt-4o-mini"
15
  # Settting up LLMGenerator
16
  llm_generator = None
17
 
 
18
  def set_api_key(api_key: str):
19
  if api_key.strip():
20
- os.environ['OPENAI_API_KEY'] = api_key
21
  else:
22
  raise gr.Error("Please provide a valid API key")
23
 
24
- def process_inputs(api_key:str, pdf_file, questions: str):
 
25
 
26
  # Setup Api KEY
27
  set_api_key(api_key)
28
-
29
 
30
  if pdf_file is None:
31
  raise gr.Error("Please upload a pdf file")
32
-
33
  # Parsing the pdf
34
- doc_handler = DocParsing(file_path=pdf_file.name,model_name=embedding_model_name)
35
  docs = doc_handler.process_pdf()
36
 
37
  # Create vector store
@@ -43,26 +44,32 @@ def process_inputs(api_key:str, pdf_file, questions: str):
43
  if not questions.strip():
44
  raise gr.Error("Please provide valid set of questions")
45
  output_dict = {}
46
- questions_list = questions.strip().split('\n')
47
  for question in questions_list:
48
-
49
  # Retrieve top similar chunks
50
  similar_chunks = retriever.search(query=question, k=10)
51
 
52
  # Generate the answer
53
  output_dict[question] = llm_generator.generate_answer(question, similar_chunks)
54
-
55
-
56
  response = json.dumps(output_dict, indent=4)
57
  return response
58
 
 
59
  with gr.Blocks() as demo:
60
  gr.Markdown("# AskMYPDF Q&A App")
61
- gr.Markdown("Enter your OPENAI API key, upload a PDF, and list your questions below.")
62
-
 
 
63
  api_key_input = gr.Textbox(label="API Key", type="password")
64
  pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
65
- questions_input = gr.Textbox(label="List of Questions (one per line)", lines=5, placeholder="Question 1\nQuestion 2\n...")
 
 
 
 
66
 
67
  submit_button = gr.Button("Submit")
68
  output = gr.Textbox(label="Output")
@@ -70,8 +77,8 @@ with gr.Blocks() as demo:
70
  submit_button.click(
71
  fn=process_inputs,
72
  inputs=[api_key_input, pdf_input, questions_input],
73
- outputs=output
74
  )
75
 
76
  if __name__ == "__main__":
77
- demo.launch()
 
15
  # Settting up LLMGenerator
16
  llm_generator = None
17
 
18
+
19
  def set_api_key(api_key: str):
20
  if api_key.strip():
21
+ os.environ["OPENAI_API_KEY"] = api_key
22
  else:
23
  raise gr.Error("Please provide a valid API key")
24
 
25
+
26
+ def process_inputs(api_key: str, pdf_file, questions: str):
27
 
28
  # Setup Api KEY
29
  set_api_key(api_key)
 
30
 
31
  if pdf_file is None:
32
  raise gr.Error("Please upload a pdf file")
33
+
34
  # Parsing the pdf
35
+ doc_handler = DocParsing(file_path=pdf_file.name, model_name=embedding_model_name)
36
  docs = doc_handler.process_pdf()
37
 
38
  # Create vector store
 
44
  if not questions.strip():
45
  raise gr.Error("Please provide valid set of questions")
46
  output_dict = {}
47
+ questions_list = questions.strip().split("\n")
48
  for question in questions_list:
49
+
50
  # Retrieve top similar chunks
51
  similar_chunks = retriever.search(query=question, k=10)
52
 
53
  # Generate the answer
54
  output_dict[question] = llm_generator.generate_answer(question, similar_chunks)
55
+
 
56
  response = json.dumps(output_dict, indent=4)
57
  return response
58
 
59
+
60
  with gr.Blocks() as demo:
61
  gr.Markdown("# AskMYPDF Q&A App")
62
+ gr.Markdown(
63
+ "Enter your OPENAI API key, upload a PDF, and list your questions below."
64
+ )
65
+
66
  api_key_input = gr.Textbox(label="API Key", type="password")
67
  pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
68
+ questions_input = gr.Textbox(
69
+ label="List of Questions (one per line)",
70
+ lines=5,
71
+ placeholder="Question 1\nQuestion 2\n...",
72
+ )
73
 
74
  submit_button = gr.Button("Submit")
75
  output = gr.Textbox(label="Output")
 
77
  submit_button.click(
78
  fn=process_inputs,
79
  inputs=[api_key_input, pdf_input, questions_input],
80
+ outputs=output,
81
  )
82
 
83
  if __name__ == "__main__":
84
+ demo.launch()
utils/llm_generation.py CHANGED
@@ -1,6 +1,5 @@
1
  from langchain.prompts import (
2
  ChatPromptTemplate,
3
- SystemMessagePromptTemplate,
4
  HumanMessagePromptTemplate,
5
  )
6
  from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
@@ -57,9 +56,7 @@ class LLMGeneration:
57
  content="<context>John traveled to Paris last summer. He stayed at a small boutique hotel and visited the Louvre museum.</context>"
58
  "<question>Where did John travel?</question>"
59
  ),
60
- AIMessage(
61
- content="""{"answer": "Paris"}"""
62
- ),
63
  ]
64
 
65
  self.initial_prompt_messages = [system_message] + few_shots
 
1
  from langchain.prompts import (
2
  ChatPromptTemplate,
 
3
  HumanMessagePromptTemplate,
4
  )
5
  from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
 
56
  content="<context>John traveled to Paris last summer. He stayed at a small boutique hotel and visited the Louvre museum.</context>"
57
  "<question>Where did John travel?</question>"
58
  ),
59
+ AIMessage(content="""{"answer": "Paris"}"""),
 
 
60
  ]
61
 
62
  self.initial_prompt_messages = [system_message] + few_shots
utils/retrieval.py CHANGED
@@ -3,11 +3,14 @@ from langchain_community.vectorstores import FAISS
3
  from langchain.schema import Document
4
  from typing import List
5
 
 
6
  class Retrieval:
7
- def __init__(self, model_name):
8
  self.model_name = model_name
9
- self.embeddings = HuggingFaceEmbeddings(model_name=model_name)
10
-
 
 
11
 
12
  def create_vector_store(self, chunks: List[Document]):
13
 
@@ -15,8 +18,8 @@ class Retrieval:
15
  # Create FAISS vector store
16
  self.vectorstore = FAISS.from_documents(self.chunks, self.embeddings)
17
 
18
- def search(self,query, k=10) -> List[Document]:
19
  # Retrieve top 10 similar chunks
20
  similar_docs = self.vectorstore.similarity_search(query, k)
21
 
22
- return similar_docs
 
3
  from langchain.schema import Document
4
  from typing import List
5
 
6
+
7
  class Retrieval:
8
+ def __init__(self, model_name, max_model_tokens=384):
9
  self.model_name = model_name
10
+ self.embeddings = HuggingFaceEmbeddings(
11
+ model_name=model_name,
12
+ encode_kwargs={"max_length": max_model_tokens, "truncation": True},
13
+ )
14
 
15
  def create_vector_store(self, chunks: List[Document]):
16
 
 
18
  # Create FAISS vector store
19
  self.vectorstore = FAISS.from_documents(self.chunks, self.embeddings)
20
 
21
+ def search(self, query, k=10) -> List[Document]:
22
  # Retrieve top 10 similar chunks
23
  similar_docs = self.vectorstore.similarity_search(query, k)
24
 
25
+ return similar_docs