pvyas96 commited on
Commit
e7e6fc2
1 Parent(s): b10fba0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -53
app.py CHANGED
@@ -1,13 +1,7 @@
1
- from langchain.callbacks.manager import CallbackManager
2
- from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
3
- from langchain.llms import LlamaCpp
4
- from langchain.prompts import PromptTemplate
5
- import PyPDF2 # for reading pdf files
6
- import torch # for loading and running the llama model
7
- import gradio as gr # for creating a user interface
8
 
9
- # Callbacks support token-wise streaming
10
- callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
11
  # Make sure the model path is correct for your system!
12
  llm = LlamaCpp(
13
  model_path="./llama-2-7b-chat.Q4_K_S.gguf",
@@ -15,84 +9,58 @@ llm = LlamaCpp(
15
  n_ctx=512,
16
  max_tokens=2000,
17
  top_p=1,
18
- #callback_manager=callback_manager,
19
- #verbose=1
20
  )
21
 
22
  template = """Generate only one MCQ question based on text \
23
  that is delimited by triple backticks \
24
  with {pattern} pattern. \
25
- text: ```{text}``` \
26
  """
27
 
28
 
29
  def extract_paragraphs(pdf_file):
30
- pattern = """IIT Gate exam \ """
31
- # Open the pdf file in read mode
32
  pdf_file = open(pdf_file, "rb")
33
-
34
- # Create a pdf reader object
35
  pdf_reader = PyPDF2.PdfReader(pdf_file)
36
-
37
- # Get the number of pages in the pdf
38
  num_pages = len(pdf_reader.pages)
39
-
40
- # Initialize an empty string to store the text
41
  text = ""
42
 
43
- # Loop through each page and extract the text
44
  for i in range(num_pages):
45
- # Get the page object
46
  page = pdf_reader.pages[i]
47
- # Extract the text from the page
48
  text += page.extract_text()
49
 
50
- # Close the pdf file
51
  pdf_file.close()
52
-
53
- # Split the text into words
54
  words = text.split()
55
-
56
- # Initialize an empty list to store the paragraphs
57
  paragraphs = []
58
-
59
- # Initialize a counter to keep track of the words
60
- count = 0
61
-
62
- # Initialize an empty string to store the current paragraph
63
  paragraph = ""
 
64
 
65
- # Loop through each word and add it to the paragraph
66
  for word in words:
67
- # Add the word to the paragraph
68
  paragraph += word + " "
69
- # Increment the counter
70
  count += 1
71
- # If the counter reaches 400 or the end of the text, append the paragraph to the list and reset the counter and the paragraph
72
  if count == 200 or word == words[-1]:
73
  paragraphs.append(paragraph)
74
  count = 0
75
  paragraph = ""
76
 
77
- # Return the list of paragraphs
78
  return paragraphs
79
 
 
80
  def Generate_mcq_from_pdf(pdf_file):
81
- pattern = "IIT GATE \ "
82
- paragraphs = extract_paragraphs(pdf_file)
83
- for para in paragraphs:
84
- text = f"""{para}"""
85
- input_msg = PromptTemplate.from_template(template=template)
86
- input_s = input_msg.format(pattern=pattern, text=text)
87
- output_msg = llm(input_s)
88
- output_file = "questions.txt"
89
- with open(output_file, "w") as f:
90
- f.write(output_msg)
91
- return output_msg, output_file
92
 
93
  app = gr.Interface(
94
- fn=Generate_mcq_from_pdf, # your function
95
- inputs=gr.File(type="filepath", file_types=["pdf"]), # file upload component for pdf files
96
- outputs=[gr.Textbox(label="Questions"), gr.File(label="Output File")], # list of output components
97
  )
98
- app.launch()
 
1
+ import PyPDF2
2
+ import torch
3
+ import gradio as gr
 
 
 
 
4
 
 
 
5
  # Make sure the model path is correct for your system!
6
  llm = LlamaCpp(
7
  model_path="./llama-2-7b-chat.Q4_K_S.gguf",
 
9
  n_ctx=512,
10
  max_tokens=2000,
11
  top_p=1,
 
 
12
  )
13
 
14
  template = """Generate only one MCQ question based on text \
15
  that is delimited by triple backticks \
16
  with {pattern} pattern. \
17
+ text: `{text}` \
18
  """
19
 
20
 
21
  def extract_paragraphs(pdf_file):
22
+ pattern = "IIT GATE " # Adjust the pattern as needed
 
23
  pdf_file = open(pdf_file, "rb")
 
 
24
  pdf_reader = PyPDF2.PdfReader(pdf_file)
 
 
25
  num_pages = len(pdf_reader.pages)
 
 
26
  text = ""
27
 
 
28
  for i in range(num_pages):
 
29
  page = pdf_reader.pages[i]
 
30
  text += page.extract_text()
31
 
 
32
  pdf_file.close()
 
 
33
  words = text.split()
 
 
34
  paragraphs = []
 
 
 
 
 
35
  paragraph = ""
36
+ count = 0
37
 
 
38
  for word in words:
 
39
  paragraph += word + " "
 
40
  count += 1
 
41
  if count == 200 or word == words[-1]:
42
  paragraphs.append(paragraph)
43
  count = 0
44
  paragraph = ""
45
 
 
46
  return paragraphs
47
 
48
+
49
  def Generate_mcq_from_pdf(pdf_file):
50
+ paragraphs = extract_paragraphs(pdf_file)
51
+ for para in paragraphs:
52
+ input_msg = PromptTemplate.from_template(template=template)
53
+ input_s = input_msg.format(pattern=pattern, text=para)
54
+ output_msg = llm(input_s)
55
+ output_file = "questions.txt"
56
+ with open(output_file, "w") as f:
57
+ f.write(output_msg)
58
+ return output_msg, output_file
59
+
 
60
 
61
  app = gr.Interface(
62
+ fn=Generate_mcq_from_pdf,
63
+ inputs=gr.File(type="filepath", file_types=["pdf"]),
64
+ outputs=[gr.Textbox(label="Questions"), gr.File(label="Output File")],
65
  )
66
+ app.launch()