|
from transformers import Blip2ForConditionalGeneration |
|
from transformers import Blip2Processor |
|
from peft import PeftModel |
|
import streamlit as st |
|
from PIL import Image |
|
import torch |
|
|
|
preprocess_ckp = "Salesforce/blip2-opt-2.7b" |
|
base_model_ckp = "./model/blip2-opt-2.7b-fp16-sharded" |
|
peft_model_ckp = "./model/blip2_peft" |
|
|
|
|
|
processor = None |
|
model = None |
|
|
|
def init_model(): |
|
|
|
|
|
|
|
|
|
processor = Blip2Processor.from_pretrained(preprocess_ckp) |
|
|
|
|
|
model = Blip2ForConditionalGeneration.from_pretrained(base_model_ckp) |
|
model = PeftModel.from_pretrained(model, peft_model_ckp) |
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
|
st.title("Fashion Image Caption using BLIP2") |
|
|
|
init_model() |
|
|
|
file_name = st.file_uploader("Upload image") |
|
|
|
if file_name is not None: |
|
|
|
image_col, caption_text = st.columns(2) |
|
|
|
image_col.header("Image") |
|
image = Image.open(file_name) |
|
image_col.image(image, use_column_width = True) |
|
|
|
|
|
inputs = processor(images = image, return_tensors = "pt").to('cuda', torch.float16) |
|
pixel_values = inputs.pixel_values |
|
|
|
|
|
generated_ids = model.generate(pixel_values = pixel_values, max_length = 25) |
|
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
|
|
|
caption_text.header("Generated Caption") |
|
caption_text.text(generated_caption) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |