File size: 6,080 Bytes
fbf5e14
ecbc33a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb6ff47
ecbc33a
 
 
 
 
 
 
 
 
 
 
 
 
fb6ff47
 
 
ecbc33a
 
 
 
 
fb6ff47
ecbc33a
 
5cf5acc
ecbc33a
 
fb6ff47
ecbc33a
6f299f7
ecbc33a
 
 
66f8fc1
41e193d
6987040
 
 
 
41e193d
 
6987040
 
 
 
 
 
 
 
72d6681
 
6987040
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36c27b0
6987040
 
 
1a1b115
72d6681
6987040
ecbc33a
6987040
5ce139d
 
e8f2900
5ce139d
e8f2900
 
 
 
5ce139d
e8f2900
 
 
 
ffcf10e
e8f2900
 
c7eed2a
 
 
c1f1f88
e8f2900
66f8fc1
6987040
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148

import gradio as gr
from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, VisionEncoderDecoderModel
import torch

torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000039769.jpg', 'cats.jpg')
torch.hub.download_url_to_file('https://huggingface.co/datasets/nielsr/textcaps-sample/resolve/main/stop_sign.png', 'stop_sign.png')
torch.hub.download_url_to_file('https://cdn.openai.com/dall-e-2/demos/text2im/astronaut/horse/photo/0.jpg', 'astronaut.jpg')

git_processor_base = AutoProcessor.from_pretrained("microsoft/git-base-coco")
git_model_base = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco")

git_processor_large = AutoProcessor.from_pretrained("microsoft/git-large-coco")
git_model_large = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")

blip_processor_base = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model_base = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

blip_processor_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
blip_model_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

vitgpt_processor = AutoImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
vitgpt_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
vitgpt_tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")

device = "cuda" if torch.cuda.is_available() else "cpu"

git_model_base.to(device)
blip_model_base.to(device)
git_model_large.to(device)
blip_model_large.to(device)
vitgpt_model.to(device)

def generate_caption(processor, model, image, tokenizer=None):
    inputs = processor(images=image, return_tensors="pt").to(device)
    
    generated_ids = model.generate(pixel_values=inputs.pixel_values, max_length=50)

    if tokenizer is not None:
        generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    else:
        generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
   
    return generated_caption


def generate_captions(image):
    caption_git_base = generate_caption(git_processor_base, git_model_base, image)

    caption_git_large = generate_caption(git_processor_large, git_model_large, image)

    caption_blip_base = generate_caption(blip_processor_base, blip_model_base, image)

    caption_blip_large = generate_caption(blip_processor_large, blip_model_large, image)

    caption_vitgpt = generate_caption(vitgpt_processor, vitgpt_model, image, vitgpt_tokenizer)

    return caption_git_base, caption_git_large, caption_blip_base, caption_blip_large, caption_vitgpt

   
#examples = [["cats.jpg"], ["stop_sign.png"], ["astronaut.jpg"]]
outputs = [gr.outputs.Textbox(label="Caption generated by GIT-base"), gr.outputs.Textbox(label="Caption generated by GIT-large"), gr.outputs.Textbox(label="Caption generated by BLIP-base"), gr.outputs.Textbox(label="Caption generated by BLIP-large"), gr.outputs.Textbox(label="Caption generated by ViT+GPT-2")] 

title = "Interactive demo: comparing image captioning models"
description = "Gradio Demo to compare GIT, BLIP and ViT+GPT2, 3 state-of-the-art vision+language models. To use it, simply upload your image and click 'submit', or click one of the examples to load them. Read more at the links below."
article = "<p style='text-align: center'><a href='https://huggingface.co/docs/transformers/main/model_doc/blip' target='_blank'>BLIP docs</a> | <a href='https://huggingface.co/docs/transformers/main/model_doc/git' target='_blank'>GIT docs</a></p>"

iface = gr.Interface(fn=generate_captions, 
                         inputs=gr.inputs.Image(type="pil"),
                         outputs=outputs,
 #                        examples=examples, 
                         title=title,
                         description=description,
                         article=article, 
                         enable_queue=True)
iface.launch(server_name="0.0.0.0", server_port=7860)

'''

import gradio as gr
import numpy as np
from PIL import Image

def generate_ascii_art(image):
    try:
        # Convert the numpy array to a PIL Image
        img = Image.fromarray(np.uint8(image))

        # Resize the image to a smaller size for faster processing
        img = img.resize((80, 60))

        # Convert the image to grayscale
        img = img.convert("L")

        # Define ASCII characters to represent different intensity levels
        #ascii_chars = "@%#*+=-:. "
        ascii_chars = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/|()1{}[]?-_+~<>i!lI;:,\\^`'. "

        # Convert each pixel to ASCII character based on intensity
        ascii_image = ""
        for pixel_value in img.getdata():
            ascii_image += ascii_chars[pixel_value // 25]

        # Reshape the ASCII string to match the resized image dimensions
        ascii_image = "\n".join([ascii_image[i:i + img.width] for i in range(0, len(ascii_image), img.width)])

        return ascii_image
    except Exception as e:
        return f"Error: {e}"

iface = gr.Interface(
    fn=generate_ascii_art,
    inputs="image",
    outputs="text",
    title="ASCII Art Generator",
    description="Upload an image, and this app will turn it into ASCII art!  - Simple  Gradio App from  Docker",
    live=True
)

iface.launch(server_name="0.0.0.0", server_port=7860)



import gradio as gr
import subprocess

def run_command(command):
    try:
        result = subprocess.check_output(command, shell=True, text=True)
        return result
    except subprocess.CalledProcessError as e:
        return f"Error: {e}"

iface = gr.Interface(
    fn=run_command,
    inputs="text",
    outputs="text",
    #live=True,
    title="Command Output Viewer",
    description="Enter a command and view its output.",
    examples=[
    ["ls"],
    ["pwd"],
    ["echo 'Hello, Gradio!'"]]
)

iface.launch(server_name="0.0.0.0", server_port=7860)
'''