Spaces:
Runtime error
Runtime error
File size: 5,550 Bytes
c298f18 3cd7c47 c298f18 27aa20b 7f8b190 c298f18 556dab2 c298f18 3cd7c47 c298f18 3cd7c47 c298f18 3cd7c47 c298f18 3cd7c47 c298f18 3cd7c47 c298f18 3cd7c47 c298f18 3cd7c47 c298f18 3cd7c47 c298f18 3cd7c47 c298f18 3cd7c47 c298f18 3cd7c47 c298f18 7b56fcc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
from lavis.models import load_model_and_preprocess
import torch
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
model_name = "blip2_t5_instruct"
model_type = "flant5xl"
model, vis_processors, _ = load_model_and_preprocess(
name=model_name,
model_type=model_type,
is_eval=True,
device=device
)
def infer(image, prompt, min_len, max_len, beam_size, len_penalty, repetition_penalty, top_p, decoding_method):
use_nucleus_sampling = decoding_method == "Nucleus sampling"
image = vis_processors["eval"](image).unsqueeze(0).to(device)
samples = {
"image": image,
"prompt": prompt,
}
output = model.generate(
samples,
length_penalty=float(len_penalty),
repetition_penalty=float(repetition_penalty),
num_beams=beam_size,
max_length=max_len,
min_length=min_len,
top_p=top_p,
use_nucleus_sampling=use_nucleus_sampling
)
return output[0]
theme = gr.themes.Monochrome(
primary_hue="indigo",
secondary_hue="blue",
neutral_hue="slate",
radius_size=gr.themes.sizes.radius_sm,
font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
)
css = ".generating {visibility: hidden}"
examples = [
["banff.jpg", "Can you tell me about this image in detail", 1, 200, 5, 1, 3, 0.9, "Beam search"]
]
with gr.Blocks(theme=theme, analytics_enabled=False,css=css) as demo:
gr.Markdown("## InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning")
gr.Markdown(
"""
Unofficial demo for InstructBLIP. InstructBLIP is a new vision-language instruction-tuning framework by Salesforce that uses BLIP-2 models, achieving state-of-the-art zero-shot generalization performance on a wide range of vision-language tasks.
The demo is based on the official <a href="https://github.com/salesforce/LAVIS/tree/main/projects/instructblip" style="text-decoration: underline;" target="_blank"> Github </a> implementation
"""
)
gr.HTML("<p>You can duplicate this Space to run it privately without a queue for shorter queue times : <a style='display:inline-block' href='https://huggingface.co/spaces/RamAnanth1/InstructBLIP?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14' alt='Duplicate Space'></a> </p>")
with gr.Row():
with gr.Column(scale=3):
image_input = gr.Image(type="pil")
prompt_textbox = gr.Textbox(label="Prompt:", placeholder="prompt", value="Please describe what you see.", lines=2)
output = gr.Textbox(label="Output")
submit = gr.Button("Run", variant="primary")
with gr.Column(scale=1):
min_len = gr.Slider(
minimum=1,
maximum=100,
value=25,
step=1,
interactive=False,
label="Min Length",
)
max_len = gr.Slider(
minimum=10,
maximum=500,
value=500,
step=5,
interactive=False,
label="Max Length",
)
sampling = gr.Radio(
choices=["Beam search", "Nucleus sampling"],
value="Beam search",
label="Text Decoding Method",
interactive=False,
)
top_p = gr.Slider(
minimum=0.5,
maximum=1.0,
value=0.9,
step=0.1,
interactive=False,
label="Top p",
)
beam_size = gr.Slider(
minimum=1,
maximum=10,
value=5,
step=1,
interactive=False,
label="Beam Size",
)
len_penalty = gr.Slider(
minimum=-1,
maximum=2,
value=0.4,
step=0.2,
interactive=False,
label="Length Penalty",
)
repetition_penalty = gr.Slider(
minimum=-1,
maximum=3,
value=5,
step=0.2,
interactive=False,
label="Repetition Penalty",
)
gr.Examples(
examples=examples,
inputs=[image_input, prompt_textbox, min_len, max_len, beam_size, len_penalty, repetition_penalty, top_p, sampling],
cache_examples=False,
fn=infer,
outputs=[output],
)
submit.click(infer, inputs=[image_input, prompt_textbox, min_len, max_len, beam_size, len_penalty, repetition_penalty, top_p, sampling], outputs=[output])
demo.queue(concurrency_count=16).launch(debug=True, share=True,)
|