Model Card for Model ID

This a Mistral 7b Quantized trained on Academic Short QA model . It is fine tuned using Qlora technique and it is trainde till around 500 step with loss around 0.450

Requirements


!pip install gradio
!pip install -U xformers --index-url https://download.pytorch.org/whl/cu121
!pip install "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"

import os
os.environ["WANDB_DISABLED"] = "true"

Gradio App

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import re

model_id = "DisgustingOzil/Academic-ShortQA-Generator"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

def partition_text(text, partition_size):
    words = text.split()
    total_words = len(words)
    words_per_partition = total_words // partition_size
    partitions = []
    for i in range(0, total_words, words_per_partition):
        partition = " ".join(words[i:i+words_per_partition])
        if len(partition) > 100:  # Ensuring meaningful length for MCQ generation
            partitions.append(partition)
    return partitions

def generate_mcqs_for_partition(Instruction, partition, temperature, top_k):
    inputs = tokenizer(alpaca_prompt.format(Instruction, partition, ""), return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_length=512,
        num_return_sequences=1,
        temperature=temperature,
        top_k=top_k
    )
    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return output_text

def generate_mcqs(Instruction, text, partition_count, temperature, top_k):
    partitions = partition_text(text, partition_count)
    mcqs_output = []

    for part in partitions:
        output_text = generate_mcqs_for_partition(Instruction, part, temperature, top_k)
        pattern = r'<question>(.*?)</question>.*?<answer>(.*?)</answer>'
        matches = re.findall(pattern, output_text, re.DOTALL)

        for match in matches:
            question = match[0].strip()
            correct_answer = match[1].strip()
            mcqs_output.append(f"Question: {question}\nCorrect Answer: {correct_answer}\n")

    return "\n".join(mcqs_output) if mcqs_output else "No MCQs could be generated from the input."

iface = gr.Interface(
    fn=generate_mcqs,
    inputs=[
        gr.Textbox(label="Instruction"),
        gr.Textbox(lines=10, label="Input Biology Text"),
        gr.Slider(minimum=1, maximum=10, step=1, label="Partition Count"),
        gr.Slider(minimum=0.5, maximum=1.0, step=0.05 , label="Temperature"),
        gr.Slider(minimum=1, maximum=50, step=1, label="Top K")
    ],
    outputs="text",
    title="ShortQA Generator",
    description="Enter a text about Biology to generate MCQs. Adjust the sliders to change the model's generation parameters."
)

if __name__ == "__main__":
    iface.launch(debug=True, share=True)