Spaces:

pvyas96
/

MCQ_bank_from_pdf

Runtime error

App Files Files Community

pvyas96 commited on Jun 4

Commit

e7e6fc2

•

1 Parent(s): b10fba0

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -53

app.py CHANGED Viewed

@@ -1,13 +1,7 @@
-from langchain.callbacks.manager import CallbackManager
-from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from langchain.llms import LlamaCpp
-from langchain.prompts import PromptTemplate
-import PyPDF2 # for reading pdf files
-import torch # for loading and running the llama model
-import gradio as gr # for creating a user interface
-# Callbacks support token-wise streaming
-callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
 # Make sure the model path is correct for your system!
 llm = LlamaCpp(
     model_path="./llama-2-7b-chat.Q4_K_S.gguf",
@@ -15,84 +9,58 @@ llm = LlamaCpp(
     n_ctx=512,
     max_tokens=2000,
     top_p=1,
-    #callback_manager=callback_manager,
-    #verbose=1
 )
 template = """Generate only one MCQ question based on text \
 that is delimited by triple backticks \
 with {pattern} pattern. \
-text: ```{text}``` \
 """
 def extract_paragraphs(pdf_file):
-    pattern = """IIT Gate exam \ """
-    # Open the pdf file in read mode
     pdf_file = open(pdf_file, "rb")
-    # Create a pdf reader object
     pdf_reader = PyPDF2.PdfReader(pdf_file)
-    # Get the number of pages in the pdf
     num_pages = len(pdf_reader.pages)
-    # Initialize an empty string to store the text
     text = ""
-    # Loop through each page and extract the text
     for i in range(num_pages):
-        # Get the page object
         page = pdf_reader.pages[i]
-        # Extract the text from the page
         text += page.extract_text()
-    # Close the pdf file
     pdf_file.close()
-    # Split the text into words
     words = text.split()
-    # Initialize an empty list to store the paragraphs
     paragraphs = []
-    # Initialize a counter to keep track of the words
-    count = 0
-    # Initialize an empty string to store the current paragraph
     paragraph = ""
-    # Loop through each word and add it to the paragraph
     for word in words:
-        # Add the word to the paragraph
         paragraph += word + " "
-        # Increment the counter
         count += 1
-        # If the counter reaches 400 or the end of the text, append the paragraph to the list and reset the counter and the paragraph
         if count == 200 or word == words[-1]:
             paragraphs.append(paragraph)
             count = 0
             paragraph = ""
-    # Return the list of paragraphs
     return paragraphs
 def Generate_mcq_from_pdf(pdf_file):
-  pattern = "IIT GATE \ "
-  paragraphs = extract_paragraphs(pdf_file)
-  for para in paragraphs:
-    text = f"""{para}"""
-    input_msg = PromptTemplate.from_template(template=template)
-    input_s = input_msg.format(pattern=pattern, text=text)
-    output_msg = llm(input_s)
-    output_file = "questions.txt"
-    with open(output_file, "w") as f:
-      f.write(output_msg)
-  return output_msg, output_file
 app = gr.Interface(
-  fn=Generate_mcq_from_pdf, # your function
-  inputs=gr.File(type="filepath", file_types=["pdf"]), # file upload component for pdf files
-  outputs=[gr.Textbox(label="Questions"), gr.File(label="Output File")], # list of output components
 )
-app.launch()

+import PyPDF2
+import torch
+import gradio as gr
 # Make sure the model path is correct for your system!
 llm = LlamaCpp(
     model_path="./llama-2-7b-chat.Q4_K_S.gguf",
     n_ctx=512,
     max_tokens=2000,
     top_p=1,
 )
 template = """Generate only one MCQ question based on text \
 that is delimited by triple backticks \
 with {pattern} pattern. \
+text: `{text}` \
 """
 def extract_paragraphs(pdf_file):
+    pattern = "IIT GATE "  # Adjust the pattern as needed
     pdf_file = open(pdf_file, "rb")
     pdf_reader = PyPDF2.PdfReader(pdf_file)
     num_pages = len(pdf_reader.pages)
     text = ""
     for i in range(num_pages):
         page = pdf_reader.pages[i]
         text += page.extract_text()
     pdf_file.close()
     words = text.split()
     paragraphs = []
     paragraph = ""
+    count = 0
     for word in words:
         paragraph += word + " "
         count += 1
         if count == 200 or word == words[-1]:
             paragraphs.append(paragraph)
             count = 0
             paragraph = ""
     return paragraphs
 def Generate_mcq_from_pdf(pdf_file):
+    paragraphs = extract_paragraphs(pdf_file)
+    for para in paragraphs:
+        input_msg = PromptTemplate.from_template(template=template)
+        input_s = input_msg.format(pattern=pattern, text=para)
+        output_msg = llm(input_s)
+        output_file = "questions.txt"
+        with open(output_file, "w") as f:
+            f.write(output_msg)
+    return output_msg, output_file
 app = gr.Interface(
+    fn=Generate_mcq_from_pdf,
+    inputs=gr.File(type="filepath", file_types=["pdf"]),
+    outputs=[gr.Textbox(label="Questions"), gr.File(label="Output File")],
 )
+app.launch()