Spaces:
Runtime error
Runtime error
from langchain.callbacks.manager import CallbackManager | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
from langchain.llms import LlamaCpp | |
from langchain.prompts import PromptTemplate | |
import PyPDF2 # for reading pdf files | |
import torch # for loading and running the llama model | |
import gradio as gr # for creating a user interface | |
# Callbacks support token-wise streaming | |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
# Make sure the model path is correct for your system! | |
llm = LlamaCpp( | |
model_path="/content/drive/MyDrive/llama-2-7b-chat.Q4_K_S.gguf", | |
temperature=0.75, | |
max_tokens=2000, | |
top_p=1, | |
callback_manager=callback_manager, | |
verbose=1 | |
) | |
template = """Generate MCQ question based on text \ | |
that is delimited by triple backticks \ | |
with {pattern} pattern. \ | |
text: ```{text}``` \ | |
also give the answers just below them and please don't give number to question. | |
""" | |
pattern = """IIT Gate exam pattern \ | |
""" | |
def extract_paragraphs(pdf_file): | |
# Open the pdf file in read mode | |
pdf = open(pdf_file, "rb") | |
# Create a pdf reader object | |
pdf_reader = PyPDF2.PdfReader(pdf) | |
# Initialize an empty list to store the paragraphs | |
paragraphs = [] | |
# Loop through the pages of the pdf file | |
for page in range(len(pdf_reader.pages)): | |
# Get the page object | |
pdf_page = pdf_reader.pages[page] | |
# Extract the text from the page | |
page_text = pdf_page.extract_text() | |
# Split the text by newline characters | |
page_paragraphs = page_text.split("\n\n") | |
# Append the non-empty paragraphs to the list | |
for paragraph in page_paragraphs: | |
if paragraph.strip(): | |
paragraphs.append(paragraph) | |
# Close the pdf file | |
pdf.close() | |
# Return the list of paragraphs | |
return paragraphs | |
def Generate_mcq_from_pdf(pdf_file): | |
paragraphs = extract_paragraphs(pdf_file) | |
para = paragraphs[7][:400] | |
text = f"""{para}""" | |
input_msg = PromptTemplate.from_template(template=template) | |
input = input_msg.format(pattern=pattern, text=text) | |
output_msg = llm(input) | |
return output_msg | |
app = gr.Interface( | |
fn=Generate_mcq_from_pdf, # your function | |
inputs=gr.File(type="filepath", file_types=["pdf"]), # file upload component for pdf files | |
outputs=gr.Textbox(label="Questions"), # text box component for output text | |
) | |
app.launch() |