--- library_name: transformers tags: - unsloth --- # Model Card for Model ID This a Mistral 7b Quantized trained on Academic Short QA model . It is fine tuned using Qlora technique and it is trainde till around 500 step with loss around 0.450 ## Requirements ```python !pip install gradio !pip install -U xformers --index-url https://download.pytorch.org/whl/cu121 !pip install "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git" import os os.environ["WANDB_DISABLED"] = "true" ``` ### Gradio App ```python import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import re model_id = "DisgustingOzil/Academic-ShortQA-Generator" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {} ### Input: {} ### Response: {}""" def partition_text(text, partition_size): words = text.split() total_words = len(words) words_per_partition = total_words // partition_size partitions = [] for i in range(0, total_words, words_per_partition): partition = " ".join(words[i:i+words_per_partition]) if len(partition) > 100: # Ensuring meaningful length for MCQ generation partitions.append(partition) return partitions def generate_mcqs_for_partition(Instruction, partition, temperature, top_k): inputs = tokenizer(alpaca_prompt.format(Instruction, partition, ""), return_tensors="pt") outputs = model.generate( **inputs, max_length=512, num_return_sequences=1, temperature=temperature, top_k=top_k ) output_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return output_text def generate_mcqs(Instruction, text, partition_count, temperature, top_k): partitions = partition_text(text, partition_count) mcqs_output = [] for part in partitions: output_text = generate_mcqs_for_partition(Instruction, part, temperature, top_k) pattern = r'(.*?).*?(.*?)' matches = re.findall(pattern, output_text, re.DOTALL) for match in matches: question = match[0].strip() correct_answer = match[1].strip() mcqs_output.append(f"Question: {question}\nCorrect Answer: {correct_answer}\n") return "\n".join(mcqs_output) if mcqs_output else "No MCQs could be generated from the input." iface = gr.Interface( fn=generate_mcqs, inputs=[ gr.Textbox(label="Instruction"), gr.Textbox(lines=10, label="Input Biology Text"), gr.Slider(minimum=1, maximum=10, step=1, label="Partition Count"), gr.Slider(minimum=0.5, maximum=1.0, step=0.05 , label="Temperature"), gr.Slider(minimum=1, maximum=50, step=1, label="Top K") ], outputs="text", title="ShortQA Generator", description="Enter a text about Biology to generate MCQs. Adjust the sliders to change the model's generation parameters." ) if __name__ == "__main__": iface.launch(debug=True, share=True) ```