DisgustingOzil commited on
Commit
9618fad
1 Parent(s): 8735cdd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +91 -0
README.md CHANGED
@@ -179,6 +179,97 @@ for part in partitions:
179
 
180
 
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  ```
183
 
184
 
 
179
 
180
 
181
 
182
+ ```
183
+
184
+
185
+ ### Gradio App for it
186
+
187
+ ```python
188
+
189
+
190
+
191
+
192
+ import gradio as gr
193
+ from transformers import AutoTokenizer, AutoModelForCausalLM
194
+ import re
195
+
196
+ model_id = "DisgustingOzil/Academic-MCQ-Generator"
197
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
198
+ model = AutoModelForCausalLM.from_pretrained(model_id)
199
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
200
+
201
+ ### Instruction:
202
+ {}
203
+
204
+ ### Input:
205
+ {}
206
+
207
+ ### Response:
208
+ {}"""
209
+ def partition_text(text, partition_size=9):
210
+ words = text.split()
211
+ total_words = len(words)
212
+ words_per_partition = total_words // partition_size
213
+ partitions = []
214
+ for i in range(0, total_words, words_per_partition):
215
+ partition = " ".join(words[i:i+words_per_partition])
216
+ if len(partition) > 100: # Ensuring meaningful length for MCQ generation
217
+ partitions.append(partition)
218
+
219
+ return partitions
220
+
221
+ def generate_mcqs_for_partition(Instruction,partition):
222
+ # Adjust with the actual model calling and response parsing logic
223
+
224
+ inputs = tokenizer(
225
+ [
226
+ alpaca_prompt.format(
227
+ Instruction, # instruction
228
+ partition, # input
229
+ "", # output - leave this blank for generation!
230
+ )
231
+ ], return_tensors = "pt").to("cuda")
232
+ # print(partition)
233
+ outputs = model.generate(**inputs, max_length=512, num_return_sequences=1)
234
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
235
+ # print()
236
+ return output_text
237
+
238
+ def generate_mcqs(Instruction,text):
239
+ partitions = partition_text(text)
240
+ mcqs_output = []
241
+
242
+ for part in partitions:
243
+ output_text = generate_mcqs_for_partition(Instruction,part)
244
+ pattern = r'<question>(.*?)</question>.*?<answer>(.*?)</answer>.*?<distractor>(.*?)</distractor>'
245
+ matches = re.findall(pattern, output_text, re.DOTALL)
246
+
247
+ for match in matches:
248
+ question = match[0].strip()
249
+ correct_answer = match[1].strip()
250
+ distractors = [d.replace('</d>', '').strip() for d in match[2].split('<d>') if d.strip()]
251
+ distractors_formatted = ',\n'.join(distractors)
252
+ mcqs_output.append(f"Question: {question}\nCorrect Answer: {correct_answer}\nDistractors: {distractors_formatted}\n")
253
+
254
+
255
+
256
+ return "\n".join(mcqs_output) if mcqs_output else "No MCQs could be generated from the input."
257
+
258
+ iface = gr.Interface(fn=generate_mcqs,
259
+ inputs=[gr.Textbox(label="Instruction"),gr.Textbox(lines=10, label="Input Biology Text")
260
+ ],
261
+ outputs="text",
262
+ title="Biology MCQ Generator",
263
+ description="Enter a text about Biology to generate MCQs.")
264
+
265
+ if __name__ == "__main__":
266
+ iface.launch(debug=True,share=True)
267
+
268
+
269
+
270
+
271
+
272
+
273
  ```
274
 
275