Spaces:

Lin-Chen
/

Share-Captioner

Running on Zero

ZeroGPU

by hysts HF Staff - opened Jun 18, 2024

←

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,23 +1,24 @@
 import gradio as gr
 import torch
 from PIL import Image
 from transformers import AutoModelForCausalLM, AutoTokenizer
 model_name = "Lin-Chen/ShareCaptioner"
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
-    model_name, device_map="cpu", trust_remote_code=True).eval()
 model.tokenizer = tokenizer
 model.cuda()
-model.half()
 seg1 = '<|User|>:'
 seg2 = f'Analyze the image in a comprehensive and detailed manner.{model.eoh}\n<|Bot|>:'
-seg_emb1 = model.encode_text(seg1, add_special_tokens=True)
-seg_emb2 = model.encode_text(seg2, add_special_tokens=False)
 def detailed_caption(img_path):
     subs = []
     image = Image.open(img_path).convert("RGB")

 import gradio as gr
 import torch
+import spaces
 from PIL import Image
 from transformers import AutoModelForCausalLM, AutoTokenizer
 model_name = "Lin-Chen/ShareCaptioner"
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="cpu", torch_dtype=torch.float16, trust_remote_code=True).eval()
 model.tokenizer = tokenizer
 model.cuda()
 seg1 = '<|User|>:'
 seg2 = f'Analyze the image in a comprehensive and detailed manner.{model.eoh}\n<|Bot|>:'
+seg_emb1 = model.encode_text(seg1, add_special_tokens=True).cuda()
+seg_emb2 = model.encode_text(seg2, add_special_tokens=False).cuda()
+@spaces.GPU
 def detailed_caption(img_path):
     subs = []
     image = Image.open(img_path).convert("RGB")

requirements.txt CHANGED Viewed

@@ -4,10 +4,11 @@ tiktoken==0.5.1
 einops==0.7.0
 transformers_stream_generator==0.0.4
 scipy==1.11.3
-torchvision==0.15.2
 pillow==10.0.1
 matplotlib==3.8.0
-gradio==3.50.2
 sentencepiece
 urllib3==1.26.18
-timm==0.6.13

 einops==0.7.0
 transformers_stream_generator==0.0.4
 scipy==1.11.3
+torch==2.1.2
+torchvision==0.16.2
 pillow==10.0.1
 matplotlib==3.8.0
 sentencepiece
 urllib3==1.26.18
+timm==1.0.3
+spaces