JustinLin610 commited on
Commit
2c140eb
1 Parent(s): 4c61a0f

add app.py and readme

Browse files
Files changed (2) hide show
  1. README.md +6 -6
  2. app.py +51 -0
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- title: ImageBind Zeroshot Demo
3
- emoji: 💻
4
- colorFrom: blue
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 3.29.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: ImageBind
3
+ emoji: 🔥
4
+ colorFrom: yellow
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 3.12.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import data
2
+ import torch
3
+ import gradio as gr
4
+ from models import imagebind_model
5
+ from models.imagebind_model import ModalityType
6
+
7
+
8
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
9
+ model = imagebind_model.imagebind_huge(pretrained=True)
10
+ model.eval()
11
+ model.to(device)
12
+
13
+
14
+ def image_text_zeroshot(image, text_list):
15
+ image_paths = [image]
16
+ labels = [label.strip(" ") for label in text_list.strip(" ").split(",")]
17
+ inputs = {
18
+ ModalityType.TEXT: data.load_and_transform_text(text_list, device),
19
+ ModalityType.VISION: data.load_and_transform_vision_data(image_paths, device),
20
+ }
21
+
22
+ with torch.no_grad():
23
+ embeddings = model(inputs)
24
+
25
+ scores = torch.softmax(
26
+ embeddings[ModalityType.VISION] @ embeddings[ModalityType.AUDIO].T,
27
+ dim=-1
28
+ ).squeeze(0).tolist()
29
+
30
+ score_dict = {label:score for label, score in zip(labels, scores)}
31
+
32
+ return score_dict
33
+
34
+
35
+ inputs = [
36
+ gr.inputs.Image(type='file',
37
+ label="Input image"),
38
+ gr.inputs.Textbox(lines=1,
39
+ label="Candidate texts"),
40
+ ]
41
+
42
+ iface = gr.Interface(image_text_zeroshot,
43
+ inputs,
44
+ "label",
45
+ examples=[[".assets/dog_image.jpg", "A dog|A car|A bird"],
46
+ [".assets/car_image.jpg", "A dog|A car|A bird"],
47
+ [".assets/bird_image.jpg", "A dog|A car|A bird"]],
48
+ description="""Zeroshot test""",
49
+ title="Zero-shot Classification")
50
+
51
+ iface.launch()