File size: 2,520 Bytes
fd6e378
 
 
 
 
4806a36
c597ebb
 
fd6e378
 
 
 
 
 
 
 
 
 
646a2c5
fd6e378
 
 
 
 
 
 
 
 
c597ebb
 
 
 
fd6e378
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import lancedb
import lancedb.embeddings.imagebind
from lancedb.embeddings import get_registry
from lancedb.pydantic import LanceModel, Vector
import gradio as gr
from downloader import dowload_and_save_audio, dowload_and_save_image, base_path
import os
import shutil

model = get_registry().get("imagebind").create()

class TextModel(LanceModel):
    text: str
    image_uri: str = model.SourceField()
    audio_path: str
    vector: Vector(model.ndims()) = model.VectorField()


text_list = ["A bird", "A dragon", "A car","A guitar","A witch","Thunder"]
image_paths = dowload_and_save_image()
audio_paths = dowload_and_save_audio()

# Load data
inputs = [
    {"text": a, "audio_path": b, "image_uri": c}
    for a, b, c in zip(text_list, audio_paths, image_paths)
]

dirpath = "/tmp/lancedb"
if os.path.exists(dirpath) and os.path.isdir(dirpath):
    shutil.rmtree(dirpath)
db = lancedb.connect(dirpath)
table = db.create_table("img_bind", schema=TextModel)
table.add(inputs)


def process_image(inp_img) -> str:
    actual = (
        table.search(inp_img, vector_column_name="vector")
        .limit(1)
        .to_pydantic(TextModel)[0]
    )

    return actual.text, actual.audio_path


def process_text(inp_text) -> str:
    actual = (
        table.search(inp_text, vector_column_name="vector")
        .limit(1)
        .to_pydantic(TextModel)[0]
    )

    return actual.image_uri, actual.audio_path


def process_audio(inp_audio) -> str:
    actual = (
        table.search(inp_audio, vector_column_name="vector")
        .limit(1)
        .to_pydantic(TextModel)[0]
    )

    return actual.image_uri, actual.text


im_to_at = gr.Interface(
    process_image,
    gr.Image(type="filepath", value=image_paths[0]),
    [gr.Text(label="Output Text"), gr.Audio(label="Output Audio")],
    examples=image_paths,
    allow_flagging="never",
)
txt_to_ia = gr.Interface(
    process_text,
    gr.Textbox(label="Enter a prompt:"),
    [gr.Image(label="Output Image"), gr.Audio(label="Output Audio")],
    allow_flagging="never",
    examples=text_list,
)
a_to_it = gr.Interface(
    process_audio,
    gr.Audio(type="filepath", value=audio_paths[0]),
    [gr.Image(label="Output Image"), gr.Text(label="Output Text")],
    examples=audio_paths,
    allow_flagging="never",
)
demo = gr.TabbedInterface(
    [im_to_at, txt_to_ia, a_to_it],
    ["Image to Text/Audio", "Text to Image/Audio", "Audio to Image/Text"],
)

if __name__ == "__main__":
    demo.launch(share=True, allowed_paths=[f"{base_path}/test_inputs/"])