import streamlit as st import torch from peft import PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer import transformers # Define the Streamlit app st.title("Mistral Model Integration") # Create a text input for the user to enter their prompt instruction = st.text_area("Enter your prompt:") # Function to interact with Mistral Model # def mistral_model(prompt, token_limit): # # Your model loading and inference code here (from the code you provided) # # ... # return responses def mistral_model(prompt, token_limit): # Initialize the model and tokenizer model_name = "bn22/Mistral-7B-Instruct-v0.1-sharded" adapters_name = "atharvapawar/flaskCodemistral-7b-mj-finetuned" device = "cuda" # Use "cuda" for GPU or "cpu" for CPU tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Load the adapter model = PeftModel.from_pretrained(model, adapters_name) # Generate responses text = "[INST]" + prompt + "[/INST]" encoded = tokenizer(text, return_tensors="pt", add_special_tokens=False) model.to(device) generated_ids = model.generate(**encoded, max_length=token_limit, do_sample=True) decoded = tokenizer.batch_decode(generated_ids) return decoded # Check if the user entered a prompt if instruction: # Add a slider for selecting the token limit token_limit = st.slider("Select token limit", min_value=10, max_value=500, value=250) # Create a button to trigger model inference if st.button("Generate Response"): responses = mistral_model(instruction, token_limit) st.write("Generated Responses:") for response in responses: st.write(response) # # Finally, run the Streamlit app # if __name__ == "__main__": # st.run()