evelyncsb commited on
Commit
7f53b0a
1 Parent(s): f1a07a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -39
app.py CHANGED
@@ -12,46 +12,52 @@ from imagebind.models.imagebind_model import ModalityType
12
  import torch.nn as nn
13
 
14
 
15
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
16
  model = imagebind_model.imagebind_huge(pretrained=True)
17
  model.eval()
18
  model.to(device)
19
 
20
- def image_text_zeroshot(image, text_list):
21
- image_paths = [image]
22
- labels = [label.strip(" ") for label in text_list.strip(" ").split("|")]
23
- inputs = {
24
- ModalityType.TEXT: data.load_and_transform_text(labels, device),
25
- ModalityType.VISION: data.load_and_transform_vision_data(image_paths, device),
26
- }
27
-
28
- with torch.no_grad():
29
- embeddings = model(inputs)
30
-
31
- scores = (
32
- torch.softmax(
33
- embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, dim=-1
34
- )
35
- .squeeze(0)
36
- .tolist()
37
- )
38
-
39
- score_dict = {label: score for label, score in zip(labels, scores)}
40
-
41
- return score_dict
42
-
43
- def main():
44
- inputs = [
45
- gr.inputs.Textbox(lines=1, label="texts"),
46
- gr.inputs.Image(type="filepath", label="Output image")
47
- ]
48
-
49
- iface = gr.Interface(
50
- image_text_zeroshot(image, text_list),
51
- inputs,
52
- "label",
53
- description="""...""",
54
- title="ImageBind",
55
- )
56
-
57
- iface.launch()
 
 
 
 
 
 
 
12
  import torch.nn as nn
13
 
14
 
15
+ device = "cpu" #"cuda:0" if torch.cuda.is_available() else "cpu"
16
  model = imagebind_model.imagebind_huge(pretrained=True)
17
  model.eval()
18
  model.to(device)
19
 
20
+ # def image_text_zeroshot(image, text_list):
21
+ # image_paths = [image]
22
+ # labels = [label.strip(" ") for label in text_list.strip(" ").split("|")]
23
+ # inputs = {
24
+ # ModalityType.TEXT: data.load_and_transform_text(labels, device),
25
+ # ModalityType.VISION: data.load_and_transform_vision_data(image_paths, device),
26
+ # }
27
+
28
+ # with torch.no_grad():
29
+ # embeddings = model(inputs)
30
+
31
+ # scores = (
32
+ # torch.softmax(
33
+ # embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, dim=-1
34
+ # )
35
+ # .squeeze(0)
36
+ # .tolist()
37
+ # )
38
+
39
+ # score_dict = {label: score for label, score in zip(labels, scores)}
40
+
41
+ # return score_dict
42
+
43
+ # def main():
44
+ # inputs = [
45
+ # gr.inputs.Textbox(lines=1, label="texts"),
46
+ # gr.inputs.Image(type="filepath", label="Output image")
47
+ # ]
48
+
49
+ # iface = gr.Interface(
50
+ # image_text_zeroshot(image, text_list),
51
+ # inputs,
52
+ # "label",
53
+ # description="""...""",
54
+ # title="ImageBind",
55
+ # )
56
+
57
+ # iface.launch()
58
+
59
+ def image_classifier(inp):
60
+ return {'cat': 0.3, 'dog': 0.7}
61
+
62
+ demo = gr.Interface(fn=image_classifier, inputs="image", outputs="label")
63
+ demo.launch()