ChrisMofus commited on
Commit
c8ff0d2
1 Parent(s): 4e4112c

Add image-text-to-text model

Browse files
Files changed (1) hide show
  1. app.py +28 -0
app.py CHANGED
@@ -1,8 +1,36 @@
 
 
1
  import streamlit as st
2
  from PIL import Image
 
 
 
 
 
 
3
 
4
  uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
5
 
6
  if uploaded_file is not None:
7
  image = Image.open(uploaded_file)
8
  st.image(image, caption='Uploaded Image.', use_column_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import torch
3
  import streamlit as st
4
  from PIL import Image
5
+ from io import BytesIO
6
+
7
+ from transformers import AutoProcessor, AutoModelForVision2Seq
8
+ from transformers.image_utils import load_image
9
+
10
+ DEVICE = "cuda:0"
11
 
12
  uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
13
 
14
  if uploaded_file is not None:
15
  image = Image.open(uploaded_file)
16
  st.image(image, caption='Uploaded Image.', use_column_width=True)
17
+
18
+ processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b-base")
19
+ model = AutoModelForVision2Seq.from_pretrained(
20
+ "HuggingFaceM4/idefics2-8b-base",
21
+ ).to(DEVICE)
22
+
23
+ # Create inputs
24
+ prompts = [
25
+ "<image>",
26
+ ]
27
+ images = [image]
28
+ inputs = processor(text=prompts, images=images, padding=True, return_tensors="pt")
29
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
30
+
31
+
32
+ # Generate
33
+ generated_ids = model.generate(**inputs, max_new_tokens=500)
34
+ generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)
35
+
36
+ print(generated_texts)