Spaces:
Running
Running
File size: 6,080 Bytes
fbf5e14 ecbc33a fb6ff47 ecbc33a fb6ff47 ecbc33a fb6ff47 ecbc33a 5cf5acc ecbc33a fb6ff47 ecbc33a 6f299f7 ecbc33a 66f8fc1 41e193d 6987040 41e193d 6987040 72d6681 6987040 36c27b0 6987040 1a1b115 72d6681 6987040 ecbc33a 6987040 5ce139d e8f2900 5ce139d e8f2900 5ce139d e8f2900 ffcf10e e8f2900 c7eed2a c1f1f88 e8f2900 66f8fc1 6987040 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import gradio as gr
from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, VisionEncoderDecoderModel
import torch
torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000039769.jpg', 'cats.jpg')
torch.hub.download_url_to_file('https://huggingface.co/datasets/nielsr/textcaps-sample/resolve/main/stop_sign.png', 'stop_sign.png')
torch.hub.download_url_to_file('https://cdn.openai.com/dall-e-2/demos/text2im/astronaut/horse/photo/0.jpg', 'astronaut.jpg')
git_processor_base = AutoProcessor.from_pretrained("microsoft/git-base-coco")
git_model_base = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco")
git_processor_large = AutoProcessor.from_pretrained("microsoft/git-large-coco")
git_model_large = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
blip_processor_base = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model_base = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
blip_processor_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
blip_model_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
vitgpt_processor = AutoImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
vitgpt_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
vitgpt_tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
device = "cuda" if torch.cuda.is_available() else "cpu"
git_model_base.to(device)
blip_model_base.to(device)
git_model_large.to(device)
blip_model_large.to(device)
vitgpt_model.to(device)
def generate_caption(processor, model, image, tokenizer=None):
inputs = processor(images=image, return_tensors="pt").to(device)
generated_ids = model.generate(pixel_values=inputs.pixel_values, max_length=50)
if tokenizer is not None:
generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
else:
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return generated_caption
def generate_captions(image):
caption_git_base = generate_caption(git_processor_base, git_model_base, image)
caption_git_large = generate_caption(git_processor_large, git_model_large, image)
caption_blip_base = generate_caption(blip_processor_base, blip_model_base, image)
caption_blip_large = generate_caption(blip_processor_large, blip_model_large, image)
caption_vitgpt = generate_caption(vitgpt_processor, vitgpt_model, image, vitgpt_tokenizer)
return caption_git_base, caption_git_large, caption_blip_base, caption_blip_large, caption_vitgpt
#examples = [["cats.jpg"], ["stop_sign.png"], ["astronaut.jpg"]]
outputs = [gr.outputs.Textbox(label="Caption generated by GIT-base"), gr.outputs.Textbox(label="Caption generated by GIT-large"), gr.outputs.Textbox(label="Caption generated by BLIP-base"), gr.outputs.Textbox(label="Caption generated by BLIP-large"), gr.outputs.Textbox(label="Caption generated by ViT+GPT-2")]
title = "Interactive demo: comparing image captioning models"
description = "Gradio Demo to compare GIT, BLIP and ViT+GPT2, 3 state-of-the-art vision+language models. To use it, simply upload your image and click 'submit', or click one of the examples to load them. Read more at the links below."
article = "<p style='text-align: center'><a href='https://huggingface.co/docs/transformers/main/model_doc/blip' target='_blank'>BLIP docs</a> | <a href='https://huggingface.co/docs/transformers/main/model_doc/git' target='_blank'>GIT docs</a></p>"
iface = gr.Interface(fn=generate_captions,
inputs=gr.inputs.Image(type="pil"),
outputs=outputs,
# examples=examples,
title=title,
description=description,
article=article,
enable_queue=True)
iface.launch(server_name="0.0.0.0", server_port=7860)
'''
import gradio as gr
import numpy as np
from PIL import Image
def generate_ascii_art(image):
try:
# Convert the numpy array to a PIL Image
img = Image.fromarray(np.uint8(image))
# Resize the image to a smaller size for faster processing
img = img.resize((80, 60))
# Convert the image to grayscale
img = img.convert("L")
# Define ASCII characters to represent different intensity levels
#ascii_chars = "@%#*+=-:. "
ascii_chars = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/|()1{}[]?-_+~<>i!lI;:,\\^`'. "
# Convert each pixel to ASCII character based on intensity
ascii_image = ""
for pixel_value in img.getdata():
ascii_image += ascii_chars[pixel_value // 25]
# Reshape the ASCII string to match the resized image dimensions
ascii_image = "\n".join([ascii_image[i:i + img.width] for i in range(0, len(ascii_image), img.width)])
return ascii_image
except Exception as e:
return f"Error: {e}"
iface = gr.Interface(
fn=generate_ascii_art,
inputs="image",
outputs="text",
title="ASCII Art Generator",
description="Upload an image, and this app will turn it into ASCII art! - Simple Gradio App from Docker",
live=True
)
iface.launch(server_name="0.0.0.0", server_port=7860)
import gradio as gr
import subprocess
def run_command(command):
try:
result = subprocess.check_output(command, shell=True, text=True)
return result
except subprocess.CalledProcessError as e:
return f"Error: {e}"
iface = gr.Interface(
fn=run_command,
inputs="text",
outputs="text",
#live=True,
title="Command Output Viewer",
description="Enter a command and view its output.",
examples=[
["ls"],
["pwd"],
["echo 'Hello, Gradio!'"]]
)
iface.launch(server_name="0.0.0.0", server_port=7860)
''' |