Spaces:
Runtime error
Runtime error
File size: 2,520 Bytes
fd6e378 4806a36 c597ebb fd6e378 646a2c5 fd6e378 c597ebb fd6e378 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import lancedb
import lancedb.embeddings.imagebind
from lancedb.embeddings import get_registry
from lancedb.pydantic import LanceModel, Vector
import gradio as gr
from downloader import dowload_and_save_audio, dowload_and_save_image, base_path
import os
import shutil
model = get_registry().get("imagebind").create()
class TextModel(LanceModel):
text: str
image_uri: str = model.SourceField()
audio_path: str
vector: Vector(model.ndims()) = model.VectorField()
text_list = ["A bird", "A dragon", "A car","A guitar","A witch","Thunder"]
image_paths = dowload_and_save_image()
audio_paths = dowload_and_save_audio()
# Load data
inputs = [
{"text": a, "audio_path": b, "image_uri": c}
for a, b, c in zip(text_list, audio_paths, image_paths)
]
dirpath = "/tmp/lancedb"
if os.path.exists(dirpath) and os.path.isdir(dirpath):
shutil.rmtree(dirpath)
db = lancedb.connect(dirpath)
table = db.create_table("img_bind", schema=TextModel)
table.add(inputs)
def process_image(inp_img) -> str:
actual = (
table.search(inp_img, vector_column_name="vector")
.limit(1)
.to_pydantic(TextModel)[0]
)
return actual.text, actual.audio_path
def process_text(inp_text) -> str:
actual = (
table.search(inp_text, vector_column_name="vector")
.limit(1)
.to_pydantic(TextModel)[0]
)
return actual.image_uri, actual.audio_path
def process_audio(inp_audio) -> str:
actual = (
table.search(inp_audio, vector_column_name="vector")
.limit(1)
.to_pydantic(TextModel)[0]
)
return actual.image_uri, actual.text
im_to_at = gr.Interface(
process_image,
gr.Image(type="filepath", value=image_paths[0]),
[gr.Text(label="Output Text"), gr.Audio(label="Output Audio")],
examples=image_paths,
allow_flagging="never",
)
txt_to_ia = gr.Interface(
process_text,
gr.Textbox(label="Enter a prompt:"),
[gr.Image(label="Output Image"), gr.Audio(label="Output Audio")],
allow_flagging="never",
examples=text_list,
)
a_to_it = gr.Interface(
process_audio,
gr.Audio(type="filepath", value=audio_paths[0]),
[gr.Image(label="Output Image"), gr.Text(label="Output Text")],
examples=audio_paths,
allow_flagging="never",
)
demo = gr.TabbedInterface(
[im_to_at, txt_to_ia, a_to_it],
["Image to Text/Audio", "Text to Image/Audio", "Audio to Image/Text"],
)
if __name__ == "__main__":
demo.launch(share=True, allowed_paths=[f"{base_path}/test_inputs/"])
|