Spaces:
Runtime error
Runtime error
File size: 2,337 Bytes
ad2288c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
import PyPDF2 # for reading pdf files
import torch # for loading and running the llama model
import gradio as gr # for creating a user interface
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Make sure the model path is correct for your system!
llm = LlamaCpp(
model_path="/content/drive/MyDrive/llama-2-7b-chat.Q4_K_S.gguf",
temperature=0.75,
max_tokens=2000,
top_p=1,
callback_manager=callback_manager,
verbose=1
)
template = """Generate MCQ question based on text \
that is delimited by triple backticks \
with {pattern} pattern. \
text: ```{text}``` \
also give the answers just below them and please don't give number to question.
"""
pattern = """IIT Gate exam pattern \
"""
def extract_paragraphs(pdf_file):
# Open the pdf file in read mode
pdf = open(pdf_file, "rb")
# Create a pdf reader object
pdf_reader = PyPDF2.PdfReader(pdf)
# Initialize an empty list to store the paragraphs
paragraphs = []
# Loop through the pages of the pdf file
for page in range(len(pdf_reader.pages)):
# Get the page object
pdf_page = pdf_reader.pages[page]
# Extract the text from the page
page_text = pdf_page.extract_text()
# Split the text by newline characters
page_paragraphs = page_text.split("\n\n")
# Append the non-empty paragraphs to the list
for paragraph in page_paragraphs:
if paragraph.strip():
paragraphs.append(paragraph)
# Close the pdf file
pdf.close()
# Return the list of paragraphs
return paragraphs
def Generate_mcq_from_pdf(pdf_file):
paragraphs = extract_paragraphs(pdf_file)
para = paragraphs[7][:400]
text = f"""{para}"""
input_msg = PromptTemplate.from_template(template=template)
input = input_msg.format(pattern=pattern, text=text)
output_msg = llm(input)
return output_msg
app = gr.Interface(
fn=Generate_mcq_from_pdf, # your function
inputs=gr.File(type="filepath", file_types=["pdf"]), # file upload component for pdf files
outputs=gr.Textbox(label="Questions"), # text box component for output text
)
app.launch() |