Atharva Prashant Pawar
v1
478270a
raw
history blame contribute delete
No virus
1.85 kB
import streamlit as st
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import transformers
# Define the Streamlit app
st.title("Mistral Model Integration")
# Create a text input for the user to enter their prompt
instruction = st.text_area("Enter your prompt:")
# Function to interact with Mistral Model
# def mistral_model(prompt, token_limit):
# # Your model loading and inference code here (from the code you provided)
# # ...
# return responses
def mistral_model(prompt, token_limit):
# Initialize the model and tokenizer
model_name = "bn22/Mistral-7B-Instruct-v0.1-sharded"
adapters_name = "atharvapawar/flaskCodemistral-7b-mj-finetuned"
device = "cuda" # Use "cuda" for GPU or "cpu" for CPU
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Load the adapter
model = PeftModel.from_pretrained(model, adapters_name)
# Generate responses
text = "[INST]" + prompt + "[/INST]"
encoded = tokenizer(text, return_tensors="pt", add_special_tokens=False)
model.to(device)
generated_ids = model.generate(**encoded, max_length=token_limit, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
return decoded
# Check if the user entered a prompt
if instruction:
# Add a slider for selecting the token limit
token_limit = st.slider("Select token limit", min_value=10, max_value=500, value=250)
# Create a button to trigger model inference
if st.button("Generate Response"):
responses = mistral_model(instruction, token_limit)
st.write("Generated Responses:")
for response in responses:
st.write(response)
# # Finally, run the Streamlit app
# if __name__ == "__main__":
# st.run()