DisgustingOzil
/

Academic-MCQ-Generator

Inference Endpoints

Model card Files Files and versions Community

DisgustingOzil commited on Mar 26

Commit

9618fad

•

1 Parent(s): 8735cdd

Update README.md

Files changed (1) hide show

README.md +91 -0

README.md CHANGED Viewed

	@@ -179,6 +179,97 @@ for part in partitions:
179
180
181



























































































182	```
183
184

+```
+### Gradio App for it
+```python
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import re
+model_id = "DisgustingOzil/Academic-MCQ-Generator"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+### Instruction:
+{}
+### Input:
+{}
+### Response:
+{}"""
+def partition_text(text, partition_size=9):
+    words = text.split()
+    total_words = len(words)
+    words_per_partition = total_words // partition_size
+    partitions = []
+    for i in range(0, total_words, words_per_partition):
+        partition = " ".join(words[i:i+words_per_partition])
+        if len(partition) > 100:  # Ensuring meaningful length for MCQ generation
+            partitions.append(partition)
+    return partitions
+def generate_mcqs_for_partition(Instruction,partition):
+    # Adjust with the actual model calling and response parsing logic
+    inputs = tokenizer(
+    [
+        alpaca_prompt.format(
+            Instruction, # instruction
+            partition, # input
+            "", # output - leave this blank for generation!
+        )
+    ], return_tensors = "pt").to("cuda")
+    # print(partition)
+    outputs = model.generate(**inputs, max_length=512, num_return_sequences=1)
+    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # print()
+    return output_text
+def generate_mcqs(Instruction,text):
+    partitions = partition_text(text)
+    mcqs_output = []
+    for part in partitions:
+        output_text = generate_mcqs_for_partition(Instruction,part)
+        pattern = r'<question>(.*?)</question>.*?<answer>(.*?)</answer>.*?<distractor>(.*?)</distractor>'
+        matches = re.findall(pattern, output_text, re.DOTALL)
+        for match in matches:
+            question = match[0].strip()
+            correct_answer = match[1].strip()
+            distractors = [d.replace('</d>', '').strip() for d in match[2].split('<d>') if d.strip()]
+            distractors_formatted = ',\n'.join(distractors)
+            mcqs_output.append(f"Question: {question}\nCorrect Answer: {correct_answer}\nDistractors: {distractors_formatted}\n")
+    return "\n".join(mcqs_output) if mcqs_output else "No MCQs could be generated from the input."
+iface = gr.Interface(fn=generate_mcqs,
+                     inputs=[gr.Textbox(label="Instruction"),gr.Textbox(lines=10, label="Input Biology Text")
+                             ],
+                     outputs="text",
+                     title="Biology MCQ Generator",
+                     description="Enter a text about Biology to generate MCQs.")
+if __name__ == "__main__":
+    iface.launch(debug=True,share=True)
 ```