import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer peft_model_id = "JuliaUpton/Math_AI" config = PeftConfig.from_pretrained(peft_model_id) model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=False) tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) # Load the Lora model merged_model = PeftModel.from_pretrained(model, peft_model_id) def input_from_text(instruction): return f"[INST]Below is a math inquiry, please answer it as a math expert showing your thought process.\n\n### Inquiry:\n{instruction}\n\n### Response:[/INST]" def make_inference(instruction): inputs = mixtral_tokenizer(input_from_text(instruction), return_tensors="pt") outputs = merged_model.generate( **inputs, max_new_tokens=150, generation_kwargs={"repetition_penalty" : 1.7} ) # print(mixtral_tokenizer.decode(outputs[0], skip_special_tokens=True)) result = mixtral_tokenizer.decode(outputs[0], skip_special_tokens=True).split("[/INST]")[1] return result if __name__ == "__main__": # make a gradio interface import gradio as gr gr.Interface( make_inference, [ gr.Textbox(lines=5, label="Instruction"), ], gr.Textbox(label="Answer"), title="Math-AI", description="Math-AI is a generative model that answers math questions", ).launch()