from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.llms import LlamaCpp from langchain.prompts import PromptTemplate import PyPDF2 # for reading pdf files import torch # for loading and running the llama model import gradio as gr # for creating a user interface # Callbacks support token-wise streaming callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) # Make sure the model path is correct for your system! llm = LlamaCpp( model_path="/content/drive/MyDrive/llama-2-7b-chat.Q4_K_S.gguf", temperature=0.75, max_tokens=2000, top_p=1, callback_manager=callback_manager, verbose=1 ) template = """Generate MCQ question based on text \ that is delimited by triple backticks \ with {pattern} pattern. \ text: ```{text}``` \ also give the answers just below them and please don't give number to question. """ pattern = """IIT Gate exam pattern \ """ def extract_paragraphs(pdf_file): # Open the pdf file in read mode pdf = open(pdf_file, "rb") # Create a pdf reader object pdf_reader = PyPDF2.PdfReader(pdf) # Initialize an empty list to store the paragraphs paragraphs = [] # Loop through the pages of the pdf file for page in range(len(pdf_reader.pages)): # Get the page object pdf_page = pdf_reader.pages[page] # Extract the text from the page page_text = pdf_page.extract_text() # Split the text by newline characters page_paragraphs = page_text.split("\n\n") # Append the non-empty paragraphs to the list for paragraph in page_paragraphs: if paragraph.strip(): paragraphs.append(paragraph) # Close the pdf file pdf.close() # Return the list of paragraphs return paragraphs def Generate_mcq_from_pdf(pdf_file): paragraphs = extract_paragraphs(pdf_file) para = paragraphs[7][:400] text = f"""{para}""" input_msg = PromptTemplate.from_template(template=template) input = input_msg.format(pattern=pattern, text=text) output_msg = llm(input) return output_msg app = gr.Interface( fn=Generate_mcq_from_pdf, # your function inputs=gr.File(type="filepath", file_types=["pdf"]), # file upload component for pdf files outputs=gr.Textbox(label="Questions"), # text box component for output text ) app.launch()