DisgustingOzil
commited on
Commit
•
9618fad
1
Parent(s):
8735cdd
Update README.md
Browse files
README.md
CHANGED
@@ -179,6 +179,97 @@ for part in partitions:
|
|
179 |
|
180 |
|
181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
```
|
183 |
|
184 |
|
|
|
179 |
|
180 |
|
181 |
|
182 |
+
```
|
183 |
+
|
184 |
+
|
185 |
+
### Gradio App for it
|
186 |
+
|
187 |
+
```python
|
188 |
+
|
189 |
+
|
190 |
+
|
191 |
+
|
192 |
+
import gradio as gr
|
193 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
194 |
+
import re
|
195 |
+
|
196 |
+
model_id = "DisgustingOzil/Academic-MCQ-Generator"
|
197 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
198 |
+
model = AutoModelForCausalLM.from_pretrained(model_id)
|
199 |
+
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
200 |
+
|
201 |
+
### Instruction:
|
202 |
+
{}
|
203 |
+
|
204 |
+
### Input:
|
205 |
+
{}
|
206 |
+
|
207 |
+
### Response:
|
208 |
+
{}"""
|
209 |
+
def partition_text(text, partition_size=9):
|
210 |
+
words = text.split()
|
211 |
+
total_words = len(words)
|
212 |
+
words_per_partition = total_words // partition_size
|
213 |
+
partitions = []
|
214 |
+
for i in range(0, total_words, words_per_partition):
|
215 |
+
partition = " ".join(words[i:i+words_per_partition])
|
216 |
+
if len(partition) > 100: # Ensuring meaningful length for MCQ generation
|
217 |
+
partitions.append(partition)
|
218 |
+
|
219 |
+
return partitions
|
220 |
+
|
221 |
+
def generate_mcqs_for_partition(Instruction,partition):
|
222 |
+
# Adjust with the actual model calling and response parsing logic
|
223 |
+
|
224 |
+
inputs = tokenizer(
|
225 |
+
[
|
226 |
+
alpaca_prompt.format(
|
227 |
+
Instruction, # instruction
|
228 |
+
partition, # input
|
229 |
+
"", # output - leave this blank for generation!
|
230 |
+
)
|
231 |
+
], return_tensors = "pt").to("cuda")
|
232 |
+
# print(partition)
|
233 |
+
outputs = model.generate(**inputs, max_length=512, num_return_sequences=1)
|
234 |
+
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
235 |
+
# print()
|
236 |
+
return output_text
|
237 |
+
|
238 |
+
def generate_mcqs(Instruction,text):
|
239 |
+
partitions = partition_text(text)
|
240 |
+
mcqs_output = []
|
241 |
+
|
242 |
+
for part in partitions:
|
243 |
+
output_text = generate_mcqs_for_partition(Instruction,part)
|
244 |
+
pattern = r'<question>(.*?)</question>.*?<answer>(.*?)</answer>.*?<distractor>(.*?)</distractor>'
|
245 |
+
matches = re.findall(pattern, output_text, re.DOTALL)
|
246 |
+
|
247 |
+
for match in matches:
|
248 |
+
question = match[0].strip()
|
249 |
+
correct_answer = match[1].strip()
|
250 |
+
distractors = [d.replace('</d>', '').strip() for d in match[2].split('<d>') if d.strip()]
|
251 |
+
distractors_formatted = ',\n'.join(distractors)
|
252 |
+
mcqs_output.append(f"Question: {question}\nCorrect Answer: {correct_answer}\nDistractors: {distractors_formatted}\n")
|
253 |
+
|
254 |
+
|
255 |
+
|
256 |
+
return "\n".join(mcqs_output) if mcqs_output else "No MCQs could be generated from the input."
|
257 |
+
|
258 |
+
iface = gr.Interface(fn=generate_mcqs,
|
259 |
+
inputs=[gr.Textbox(label="Instruction"),gr.Textbox(lines=10, label="Input Biology Text")
|
260 |
+
],
|
261 |
+
outputs="text",
|
262 |
+
title="Biology MCQ Generator",
|
263 |
+
description="Enter a text about Biology to generate MCQs.")
|
264 |
+
|
265 |
+
if __name__ == "__main__":
|
266 |
+
iface.launch(debug=True,share=True)
|
267 |
+
|
268 |
+
|
269 |
+
|
270 |
+
|
271 |
+
|
272 |
+
|
273 |
```
|
274 |
|
275 |
|