Spaces:
Runtime error
Runtime error
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import requests | |
import PyPDF2 | |
import gradio as gr | |
# Replace with your Hugging Face API token | |
api_token = "YOUR_HUGGING_FACE_TOKEN" | |
mistral_model_id = "NousResearch/Hermes-2-Pro-Mistral-7B" # Choose appropriate model version | |
tokenizer = AutoTokenizer.from_pretrained(mistral_model_id) | |
model = AutoModelForSeq2SeqLM.from_pretrained(mistral_model_id) | |
def extract_paragraphs(pdf_file): | |
pattern = "IIT GATE " # Adjust the pattern as needed | |
pdf_file = open(pdf_file, "rb") | |
pdf_reader = PyPDF2.PdfReader(pdf_file) | |
num_pages = len(pdf_reader.pages) | |
text = "" | |
for i in range(num_pages): | |
page = pdf_reader.pages[i] | |
text += page.extract_text() | |
pdf_file.close() | |
words = text.split() | |
paragraphs = [] | |
paragraph = "" | |
count = 0 | |
for word in words: | |
paragraph += word + " " | |
count += 1 | |
if count == 200 or word == words[-1]: | |
paragraphs.append(paragraph) | |
count = 0 | |
paragraph = "" | |
return paragraphs | |
def Generate_mcq_from_pdf(pdf_file): | |
paragraphs = extract_paragraphs(pdf_file) | |
for para in paragraphs: | |
template = """Generate only one MCQ question based on text \ | |
that is delimited by triple backticks \ | |
with {pattern} pattern. \ | |
text: `{text}` \ | |
""" | |
prompt = template.format(pattern="IIT GATE", text=para) | |
inputs = tokenizer(prompt, return_tensors="pt") | |
headers = {"Authorization": f"Bearer {api_token}"} | |
url = f"https://api-inference.huggingface.co/models/{mistral_model_id}" | |
response = requests.post(url, headers=headers, json=inputs) | |
response.raise_for_status() # Raise an error if request fails | |
output_ids = response.json()["generated_ids"] | |
output_text = tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0] | |
output_file = "questions.txt" | |
with open(output_file, "w") as f: | |
f.write(output_text) | |
return output_text, output_file | |
app = gr.Interface( | |
fn=Generate_mcq_from_pdf, | |
inputs=gr.File(type="filepath", file_types=["pdf"]), | |
outputs=[gr.Textbox(label="Questions"), gr.File(label="Output File")], | |
) | |
app.launch() | |