Spaces:

atharvapawar
/

api-Codemistral-7b-mj-finetuned

Runtime error

File size: 1,853 Bytes

import streamlit as st
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import transformers

# Define the Streamlit app
st.title("Mistral Model Integration")

# Create a text input for the user to enter their prompt
instruction = st.text_area("Enter your prompt:")

# Function to interact with Mistral Model
# def mistral_model(prompt, token_limit):
#     # Your model loading and inference code here (from the code you provided)
#     # ...

#     return responses

def mistral_model(prompt, token_limit):
    # Initialize the model and tokenizer
    model_name = "bn22/Mistral-7B-Instruct-v0.1-sharded"
    adapters_name = "atharvapawar/flaskCodemistral-7b-mj-finetuned"
    device = "cuda"  # Use "cuda" for GPU or "cpu" for CPU

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)

    # Load the adapter
    model = PeftModel.from_pretrained(model, adapters_name)

    # Generate responses
    text = "[INST]" + prompt + "[/INST]"
    encoded = tokenizer(text, return_tensors="pt", add_special_tokens=False)
    model.to(device)
    generated_ids = model.generate(**encoded, max_length=token_limit, do_sample=True)
    decoded = tokenizer.batch_decode(generated_ids)

    return decoded

        



# Check if the user entered a prompt
if instruction:
    # Add a slider for selecting the token limit
    token_limit = st.slider("Select token limit", min_value=10, max_value=500, value=250)

    # Create a button to trigger model inference
    if st.button("Generate Response"):
        responses = mistral_model(instruction, token_limit)
        st.write("Generated Responses:")
        for response in responses:
            st.write(response)

# # Finally, run the Streamlit app
# if __name__ == "__main__":
#     st.run()