Baptiste Canton commited on
Commit
3825bf0
1 Parent(s): e828fe3
Files changed (2) hide show
  1. app2.py +45 -0
  2. app3.py +72 -0
app2.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
3
+ import logging
4
+ import gradio as gr
5
+ from pillow_heif import register_heif_opener
6
+
7
+ register_heif_opener()
8
+
9
+ from transformers import pipeline
10
+
11
+
12
+ LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG")
13
+ MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", 200))
14
+ # https://huggingface.co/models?pipeline_tag=image-to-text&sort=likes
15
+ MODEL = os.getenv("MODEL", "Salesforce/blip-image-captioning-large")
16
+
17
+ logging.basicConfig(level=LOG_LEVEL)
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ logger.info("Loading model...")
22
+ # simpler model: "ydshieh/vit-gpt2-coco-en"
23
+ captioner = pipeline(
24
+ "image-to-text",
25
+ model=MODEL,
26
+ max_new_tokens=MAX_NEW_TOKENS,
27
+ )
28
+ logger.info("Done loading model.")
29
+
30
+
31
+
32
+ def graptioner(img):
33
+ result = captioner(img)
34
+ caption = result[0]["generated_text"]
35
+ return caption
36
+
37
+ iface = gr.Interface(
38
+ fn=graptioner,
39
+ inputs=gr.components.Image(type="pil"),
40
+ outputs=["text"],
41
+ allow_flagging="never",
42
+ # analytics_enabled=False
43
+ )
44
+
45
+ iface.launch()
app3.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import io
3
+ import logging
4
+ import os
5
+
6
+ import gradio as gr
7
+ import requests
8
+ from PIL import Image
9
+ from pillow_heif import register_heif_opener
10
+ from transformers import pipeline
11
+
12
+ os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
13
+ LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG")
14
+ MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", 200))
15
+ # https://huggingface.co/models?pipeline_tag=image-to-text&sort=likes
16
+ MODEL = os.getenv("MODEL", "Salesforce/blip-image-captioning-large")
17
+
18
+ register_heif_opener()
19
+
20
+ logging.basicConfig(level=LOG_LEVEL)
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ def setup_args():
25
+ parser = argparse.ArgumentParser()
26
+ parser.add_argument("--share", action="store_true", default=False)
27
+ return parser.parse_args()
28
+
29
+
30
+ def load_image_from_url(url):
31
+ try:
32
+ response = requests.get(url)
33
+ if not response.ok:
34
+ raise Exception("Error downloading image")
35
+ image = Image.open(io.BytesIO(response.content))
36
+ return image
37
+ except Exception as e:
38
+ logger.error("Error loading image from URL: %s", e)
39
+ raise
40
+
41
+
42
+ def graptioner(image, url):
43
+ if url and url.strip():
44
+ image = load_image_from_url(url)
45
+ width, height = image.size
46
+ if width < 1 or height < 1:
47
+ raise Exception("Invalid image")
48
+ logger.debug("Loaded image size: %sx%s", width, height)
49
+ # generate caption
50
+ result = captioner(image)
51
+ return result[0]["generated_text"]
52
+
53
+
54
+ if __name__ == "__main__":
55
+ args = setup_args()
56
+ logger.info("Loading model...")
57
+ # simpler model: "ydshieh/vit-gpt2-coco-en"
58
+ captioner = pipeline(
59
+ "image-to-text",
60
+ model=MODEL,
61
+ max_new_tokens=MAX_NEW_TOKENS,
62
+ )
63
+ logger.info("Done loading model.")
64
+ iface = gr.Interface(
65
+ fn=graptioner,
66
+ inputs=[
67
+ gr.Image(type="pil", label="Upload Image"),
68
+ gr.Textbox(lines=1, placeholder="Image URL", label="Image URL"),
69
+ ],
70
+ outputs=["text"],
71
+ )
72
+ iface.launch(share=args.share)