File size: 2,337 Bytes
ad2288c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
import PyPDF2 # for reading pdf files
import torch # for loading and running the llama model
import gradio as gr # for creating a user interface

# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/content/drive/MyDrive/llama-2-7b-chat.Q4_K_S.gguf",
    temperature=0.75,
    max_tokens=2000,
    top_p=1,
    callback_manager=callback_manager,
    verbose=1
)

template = """Generate MCQ question based on text \
that is delimited by triple backticks \
with {pattern} pattern. \
text: ```{text}``` \
also give the answers just below them and please don't give number to question.
"""
pattern = """IIT Gate exam pattern \
"""

def extract_paragraphs(pdf_file):
  # Open the pdf file in read mode
  pdf = open(pdf_file, "rb")
  # Create a pdf reader object
  pdf_reader = PyPDF2.PdfReader(pdf)
  # Initialize an empty list to store the paragraphs
  paragraphs = []
  # Loop through the pages of the pdf file
  for page in range(len(pdf_reader.pages)):
    # Get the page object
    pdf_page = pdf_reader.pages[page]
    # Extract the text from the page
    page_text = pdf_page.extract_text()
    # Split the text by newline characters
    page_paragraphs = page_text.split("\n\n")
    # Append the non-empty paragraphs to the list
    for paragraph in page_paragraphs:
      if paragraph.strip():
        paragraphs.append(paragraph)
  # Close the pdf file
  pdf.close()
  # Return the list of paragraphs
  return paragraphs

def Generate_mcq_from_pdf(pdf_file):
  paragraphs = extract_paragraphs(pdf_file)
  para = paragraphs[7][:400]
  text = f"""{para}"""
  input_msg = PromptTemplate.from_template(template=template)
  input = input_msg.format(pattern=pattern, text=text)
  output_msg = llm(input)
  return output_msg

app = gr.Interface(
  fn=Generate_mcq_from_pdf, # your function
  inputs=gr.File(type="filepath", file_types=["pdf"]), # file upload component for pdf files
  outputs=gr.Textbox(label="Questions"), # text box component for output text
)
app.launch()