Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import torch | |
| from peft import PeftModel | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import transformers | |
| # Define the Streamlit app | |
| st.title("Mistral Model Integration") | |
| # Create a text input for the user to enter their prompt | |
| instruction = st.text_area("Enter your prompt:") | |
| # Function to interact with Mistral Model | |
| # def mistral_model(prompt, token_limit): | |
| # # Your model loading and inference code here (from the code you provided) | |
| # # ... | |
| # return responses | |
| def mistral_model(prompt, token_limit): | |
| # Initialize the model and tokenizer | |
| model_name = "bn22/Mistral-7B-Instruct-v0.1-sharded" | |
| adapters_name = "atharvapawar/flaskCodemistral-7b-mj-finetuned" | |
| device = "cuda" # Use "cuda" for GPU or "cpu" for CPU | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| # Load the adapter | |
| model = PeftModel.from_pretrained(model, adapters_name) | |
| # Generate responses | |
| text = "[INST]" + prompt + "[/INST]" | |
| encoded = tokenizer(text, return_tensors="pt", add_special_tokens=False) | |
| model.to(device) | |
| generated_ids = model.generate(**encoded, max_length=token_limit, do_sample=True) | |
| decoded = tokenizer.batch_decode(generated_ids) | |
| return decoded | |
| # Check if the user entered a prompt | |
| if instruction: | |
| # Add a slider for selecting the token limit | |
| token_limit = st.slider("Select token limit", min_value=10, max_value=500, value=250) | |
| # Create a button to trigger model inference | |
| if st.button("Generate Response"): | |
| responses = mistral_model(instruction, token_limit) | |
| st.write("Generated Responses:") | |
| for response in responses: | |
| st.write(response) | |
| # # Finally, run the Streamlit app | |
| # if __name__ == "__main__": | |
| # st.run() | |