TharunSivamani commited on
Commit
3bebb65
1 Parent(s): 46f3070

initial commit

Browse files
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import CLIPProcessor, CLIPModel
3
+
4
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
5
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
6
+
7
+
8
+ def calculate_CLIP_score(image, text):
9
+ words = text.split(";")
10
+ words = [w.strip() for w in words]
11
+ words = list(filter(None, words))
12
+
13
+ if len(words) == 0:
14
+ return dict()
15
+
16
+ inputs = processor(text=words, images=image, return_tensors="pt", padding=True)
17
+ outputs = model(**inputs)
18
+ logits_per_image = outputs.logits_per_image.detach().numpy()
19
+
20
+ results_dict = {
21
+ label: score / 100.0
22
+ for label, score in zip(words, logits_per_image[0])
23
+ }
24
+
25
+ return results_dict
26
+
27
+
28
+ examples = [
29
+ ["images/two_dogs.jpg", "two dogs playing in the beach; a dog and a dog playing in the beach; beach"],
30
+ ["images/horse_field.jpg", "horse standing in a field; a field; a horse standing"],
31
+ ["images/human.jpg", "a man beside a river; a riverbed; a man"]
32
+ ]
33
+
34
+ demo = gr.Interface(
35
+ fn=calculate_CLIP_score,
36
+ inputs=["image", "text"],
37
+ outputs="label",
38
+ examples=examples,
39
+ )
40
+
41
+ demo.launch()
images/horse_field.jpg ADDED
images/human.jpg ADDED
images/two_dogs.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ gradio
4
+ torchvision