pharmapsychotic commited on
Commit
7ac65ca
1 Parent(s): d272750

First test for HuggingSpace

Browse files
Files changed (3) hide show
  1. app.py +61 -0
  2. example.jpg +0 -0
  3. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sys
3
+ import torch
4
+ import torchvision.transforms as T
5
+ import torchvision.transforms.functional as TF
6
+
7
+ sys.path.append('src/blip')
8
+ sys.path.append('src/clip')
9
+
10
+ import clip
11
+ from models.blip import blip_decoder
12
+
13
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
14
+
15
+ print("Loading BLIP model...")
16
+ blip_image_eval_size = 384
17
+ blip_model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_large_caption.pth'
18
+ blip_model = blip_decoder(pretrained=blip_model_url, image_size=blip_image_eval_size, vit='large', med_config='./src/blip/configs/med_config.json')
19
+ blip_model.eval()
20
+ blip_model = blip_model.to(device)
21
+
22
+ print("Loading CLIP model...")
23
+ clip_model_name = 'ViT-L/14'
24
+ clip_model, clip_preprocess = clip.load(clip_model_name, device=device)
25
+ clip_model.to(device).eval()
26
+
27
+
28
+ def generate_caption(pil_image):
29
+ gpu_image = T.Compose([
30
+ T.Resize((blip_image_eval_size, blip_image_eval_size), interpolation=TF.InterpolationMode.BICUBIC),
31
+ T.ToTensor(),
32
+ T.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
33
+ ])(pil_image).unsqueeze(0).to(device)
34
+
35
+ with torch.no_grad():
36
+ caption = blip_model.generate(gpu_image, sample=False, num_beams=3, max_length=20, min_length=5)
37
+ return caption[0]
38
+
39
+ def inference(image):
40
+ return generate_caption(image)
41
+
42
+ inputs = [gr.inputs.Image(type='pil')]
43
+ outputs = gr.outputs.Textbox(label="Output")
44
+
45
+ title = "CLIP Interrogator"
46
+ description = "First test of CLIP Interrogator on HuggingSpace"
47
+ article = """
48
+ <p style='text-align: center'>
49
+ <a href="">Colab Notebook</a> /
50
+ <a href="">Github repo</a>
51
+ </p>
52
+ """
53
+
54
+ gr.Interface(
55
+ inference,
56
+ inputs,
57
+ outputs,
58
+ title=title, description=description,
59
+ article=article,
60
+ examples=[['example.jpg']]
61
+ ).launch(enable_queue=True)
example.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fairscale
2
+ ftfy
3
+ Pillow
4
+ timm
5
+ torch
6
+ torchvision
7
+ transformers==4.21.2
8
+ -e git+https://github.com/openai/CLIP.git@main#egg=clip
9
+ -e git+https://github.com/salesforce/BLIP.git@main#egg=blip