Spaces:
No application file
No application file
# -*- coding: utf-8 -*- | |
"""trymistral1.ipynb | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/drive/1tBaMrUjepPv483Xhmfh9QbZHtB-SC1wG | |
""" | |
# to get packages initially installed in colab or in other word, the colab environment | |
!pip3 freeze > requirements.txt | |
# Step 1: Install the necessary libraries | |
!pip install transformers | |
!pip install datasets | |
!pip install huggingface_hub | |
!pip install accelerate bitsandbytes mistral_inference # Make sure to install mistral_inference | |
# Step 2: Authenticate with Hugging Face | |
from huggingface_hub import login | |
# Replace 'your_hf_token' with your actual token | |
login(token="") | |
!pip install torch | |
import torch | |
!pip install mistral_inference | |
# Install necessary libraries | |
!pip install transformers gradio bitsandbytes | |
!pip show mistral_inference | |
!pip list | |
# Step 3: Load the model and tokenizer | |
from transformers import AutoTokenizer, MistralForCausalLM | |
from transformers import AutoModelForCausalLM, BitsAndBytesConfig | |
import gradio as gr | |
from PIL import Image | |
import io | |
import base64 | |
def image_to_base64(image): | |
buffered = io.BytesIO() | |
image.save(buffered, format="png") | |
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8') | |
return img_str | |
def chat_with_llava(image, question): | |
try: | |
# Convert image to base64 (if needed, but LLAVA model might not use images) | |
image_b64 = image_to_base64(image) | |
# Prepare input | |
inputs = tokenizer(question, return_tensors="pt") | |
# Generate text | |
outputs = model.generate(**inputs) | |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return generated_text | |
except Exception as e: | |
return f"Error occurred: {str(e)}" | |
model_id = "microsoft/llava-med-v1.5-mistral-7b" | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_use_double_quant=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.bfloat16 | |
) | |
# Load the tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
# Load the model using MistralForCausalLM | |
# Use the appropriate model class for 'llava_mistral' architecture | |
model = MistralForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto") | |
# Create a Gradio interface | |
iface = gr.Interface( | |
fn=chat_with_llava, | |
inputs=[gr.Image(type="pil", label="Upload Image"), gr.Textbox(lines=1, label="Ask a question")], | |
outputs=gr.Textbox(label="Response"), | |
title="LLAVA Model Chat with Image", | |
description="Upload an image and ask a question to the LLAVA model.", | |
) | |
# Launch the Gradio interface | |
iface.launch() | |