pvyas96's picture
Create app.py
ad2288c
raw
history blame
No virus
2.34 kB
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
import PyPDF2 # for reading pdf files
import torch # for loading and running the llama model
import gradio as gr # for creating a user interface
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Make sure the model path is correct for your system!
llm = LlamaCpp(
model_path="/content/drive/MyDrive/llama-2-7b-chat.Q4_K_S.gguf",
temperature=0.75,
max_tokens=2000,
top_p=1,
callback_manager=callback_manager,
verbose=1
)
template = """Generate MCQ question based on text \
that is delimited by triple backticks \
with {pattern} pattern. \
text: ```{text}``` \
also give the answers just below them and please don't give number to question.
"""
pattern = """IIT Gate exam pattern \
"""
def extract_paragraphs(pdf_file):
# Open the pdf file in read mode
pdf = open(pdf_file, "rb")
# Create a pdf reader object
pdf_reader = PyPDF2.PdfReader(pdf)
# Initialize an empty list to store the paragraphs
paragraphs = []
# Loop through the pages of the pdf file
for page in range(len(pdf_reader.pages)):
# Get the page object
pdf_page = pdf_reader.pages[page]
# Extract the text from the page
page_text = pdf_page.extract_text()
# Split the text by newline characters
page_paragraphs = page_text.split("\n\n")
# Append the non-empty paragraphs to the list
for paragraph in page_paragraphs:
if paragraph.strip():
paragraphs.append(paragraph)
# Close the pdf file
pdf.close()
# Return the list of paragraphs
return paragraphs
def Generate_mcq_from_pdf(pdf_file):
paragraphs = extract_paragraphs(pdf_file)
para = paragraphs[7][:400]
text = f"""{para}"""
input_msg = PromptTemplate.from_template(template=template)
input = input_msg.format(pattern=pattern, text=text)
output_msg = llm(input)
return output_msg
app = gr.Interface(
fn=Generate_mcq_from_pdf, # your function
inputs=gr.File(type="filepath", file_types=["pdf"]), # file upload component for pdf files
outputs=gr.Textbox(label="Questions"), # text box component for output text
)
app.launch()