Spaces:
Paused
Paused
from flask import Flask, request, jsonify | |
import torch | |
from PIL import Image | |
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig | |
# Get API token from environment variable | |
api_token = os.getenv("HF_TOKEN").strip() | |
# Model configuration | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_use_double_quant=True, | |
bnb_4bit_compute_dtype=torch.float16, | |
) | |
# Model and tokenizer loading | |
model = AutoModel.from_pretrained( | |
"ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1", | |
quantization_config=bnb_config, | |
device_map="auto", | |
torch_dtype=torch.float16, | |
trust_remote_code=True, | |
attn_implementation="flash_attention_2", | |
) | |
tokenizer = AutoTokenizer.from_pretrained( | |
"ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1", | |
trust_remote_code=True | |
) | |
app = Flask(__name__) | |
# Model configuration and loading (same as before) | |
def analyze(): | |
image = request.files['image'] | |
question = request.form['question'] | |
# Preprocess image | |
image = Image.open(image).convert('RGB') | |
# Prepare input | |
msgs = [{'role': 'user', 'content': [image, question]}] | |
# Generate response | |
res = model.chat( | |
image=image, | |
msgs=msgs, | |
tokenizer=tokenizer, | |
sampling=True, | |
temperature=0.95, | |
stream=True | |
) | |
# Process response | |
generated_text = "" | |
for new_text in res: | |
generated_text += new_text | |
return jsonify({'response': generated_text}) | |
if __name__ == '__main__': | |
app.run(debug=True) |