Spaces:
Running
Running
app update
Browse files- app.py +90 -0
- downloader.py +90 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import lancedb
|
2 |
+
import lancedb.embeddings.imagebind
|
3 |
+
from lancedb.embeddings import get_registry
|
4 |
+
from lancedb.pydantic import LanceModel, Vector
|
5 |
+
import gradio as gr
|
6 |
+
from downloader import dowload_and_save_audio, dowload_and_save_image, base_path
|
7 |
+
|
8 |
+
model = get_registry().get("imagebind").create()
|
9 |
+
|
10 |
+
|
11 |
+
class TextModel(LanceModel):
|
12 |
+
text: str
|
13 |
+
image_uri: str = model.SourceField()
|
14 |
+
audio_path: str
|
15 |
+
vector: Vector(model.ndims()) = model.VectorField()
|
16 |
+
|
17 |
+
|
18 |
+
text_list = ["A bird", "A dragon", "A car"]
|
19 |
+
image_paths = dowload_and_save_image()
|
20 |
+
audio_paths = dowload_and_save_audio()
|
21 |
+
|
22 |
+
# Load data
|
23 |
+
inputs = [
|
24 |
+
{"text": a, "audio_path": b, "image_uri": c}
|
25 |
+
for a, b, c in zip(text_list, audio_paths, image_paths)
|
26 |
+
]
|
27 |
+
|
28 |
+
db = lancedb.connect("/tmp/lancedb")
|
29 |
+
table = db.create_table("img_bind", schema=TextModel)
|
30 |
+
table.add(inputs)
|
31 |
+
|
32 |
+
|
33 |
+
def process_image(inp_img) -> str:
|
34 |
+
actual = (
|
35 |
+
table.search(inp_img, vector_column_name="vector")
|
36 |
+
.limit(1)
|
37 |
+
.to_pydantic(TextModel)[0]
|
38 |
+
)
|
39 |
+
|
40 |
+
return actual.text, actual.audio_path
|
41 |
+
|
42 |
+
|
43 |
+
def process_text(inp_text) -> str:
|
44 |
+
actual = (
|
45 |
+
table.search(inp_text, vector_column_name="vector")
|
46 |
+
.limit(1)
|
47 |
+
.to_pydantic(TextModel)[0]
|
48 |
+
)
|
49 |
+
|
50 |
+
return actual.image_uri, actual.audio_path
|
51 |
+
|
52 |
+
|
53 |
+
def process_audio(inp_audio) -> str:
|
54 |
+
actual = (
|
55 |
+
table.search(inp_audio, vector_column_name="vector")
|
56 |
+
.limit(1)
|
57 |
+
.to_pydantic(TextModel)[0]
|
58 |
+
)
|
59 |
+
|
60 |
+
return actual.image_uri, actual.text
|
61 |
+
|
62 |
+
|
63 |
+
im_to_at = gr.Interface(
|
64 |
+
process_image,
|
65 |
+
gr.Image(type="filepath", value=image_paths[0]),
|
66 |
+
[gr.Text(label="Output Text"), gr.Audio(label="Output Audio")],
|
67 |
+
examples=image_paths,
|
68 |
+
allow_flagging="never",
|
69 |
+
)
|
70 |
+
txt_to_ia = gr.Interface(
|
71 |
+
process_text,
|
72 |
+
gr.Textbox(label="Enter a prompt:"),
|
73 |
+
[gr.Image(label="Output Image"), gr.Audio(label="Output Audio")],
|
74 |
+
allow_flagging="never",
|
75 |
+
examples=text_list,
|
76 |
+
)
|
77 |
+
a_to_it = gr.Interface(
|
78 |
+
process_audio,
|
79 |
+
gr.Audio(type="filepath", value=audio_paths[0]),
|
80 |
+
[gr.Image(label="Output Image"), gr.Text(label="Output Text")],
|
81 |
+
examples=audio_paths,
|
82 |
+
allow_flagging="never",
|
83 |
+
)
|
84 |
+
demo = gr.TabbedInterface(
|
85 |
+
[im_to_at, txt_to_ia, a_to_it],
|
86 |
+
["Image to Text/Audio", "Text to Image/Audio", "Audio to Image/Text"],
|
87 |
+
)
|
88 |
+
|
89 |
+
if __name__ == "__main__":
|
90 |
+
demo.launch(share=True, allowed_paths=[f"{base_path}/test_inputs/"])
|
downloader.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import lancedb
|
2 |
+
import lancedb.embeddings.imagebind
|
3 |
+
from lancedb.embeddings import get_registry
|
4 |
+
from lancedb.pydantic import LanceModel, Vector
|
5 |
+
import gradio as gr
|
6 |
+
from downloader import dowload_and_save_audio, dowload_and_save_image, base_path
|
7 |
+
|
8 |
+
model = get_registry().get("imagebind").create()
|
9 |
+
|
10 |
+
|
11 |
+
class TextModel(LanceModel):
|
12 |
+
text: str
|
13 |
+
image_uri: str = model.SourceField()
|
14 |
+
audio_path: str
|
15 |
+
vector: Vector(model.ndims()) = model.VectorField()
|
16 |
+
|
17 |
+
|
18 |
+
text_list = ["A bird", "A dragon", "A car"]
|
19 |
+
image_paths = dowload_and_save_image()
|
20 |
+
audio_paths = dowload_and_save_audio()
|
21 |
+
|
22 |
+
# Load data
|
23 |
+
inputs = [
|
24 |
+
{"text": a, "audio_path": b, "image_uri": c}
|
25 |
+
for a, b, c in zip(text_list, audio_paths, image_paths)
|
26 |
+
]
|
27 |
+
|
28 |
+
db = lancedb.connect("/tmp/lancedb")
|
29 |
+
table = db.create_table("img_bind", schema=TextModel)
|
30 |
+
table.add(inputs)
|
31 |
+
|
32 |
+
|
33 |
+
def process_image(inp_img) -> str:
|
34 |
+
actual = (
|
35 |
+
table.search(inp_img, vector_column_name="vector")
|
36 |
+
.limit(1)
|
37 |
+
.to_pydantic(TextModel)[0]
|
38 |
+
)
|
39 |
+
|
40 |
+
return actual.text, actual.audio_path
|
41 |
+
|
42 |
+
|
43 |
+
def process_text(inp_text) -> str:
|
44 |
+
actual = (
|
45 |
+
table.search(inp_text, vector_column_name="vector")
|
46 |
+
.limit(1)
|
47 |
+
.to_pydantic(TextModel)[0]
|
48 |
+
)
|
49 |
+
|
50 |
+
return actual.image_uri, actual.audio_path
|
51 |
+
|
52 |
+
|
53 |
+
def process_audio(inp_audio) -> str:
|
54 |
+
actual = (
|
55 |
+
table.search(inp_audio, vector_column_name="vector")
|
56 |
+
.limit(1)
|
57 |
+
.to_pydantic(TextModel)[0]
|
58 |
+
)
|
59 |
+
|
60 |
+
return actual.image_uri, actual.text
|
61 |
+
|
62 |
+
|
63 |
+
im_to_at = gr.Interface(
|
64 |
+
process_image,
|
65 |
+
gr.Image(type="filepath", value=image_paths[0]),
|
66 |
+
[gr.Text(label="Output Text"), gr.Audio(label="Output Audio")],
|
67 |
+
examples=image_paths,
|
68 |
+
allow_flagging="never",
|
69 |
+
)
|
70 |
+
txt_to_ia = gr.Interface(
|
71 |
+
process_text,
|
72 |
+
gr.Textbox(label="Enter a prompt:"),
|
73 |
+
[gr.Image(label="Output Image"), gr.Audio(label="Output Audio")],
|
74 |
+
allow_flagging="never",
|
75 |
+
examples=text_list,
|
76 |
+
)
|
77 |
+
a_to_it = gr.Interface(
|
78 |
+
process_audio,
|
79 |
+
gr.Audio(type="filepath", value=audio_paths[0]),
|
80 |
+
[gr.Image(label="Output Image"), gr.Text(label="Output Text")],
|
81 |
+
examples=audio_paths,
|
82 |
+
allow_flagging="never",
|
83 |
+
)
|
84 |
+
demo = gr.TabbedInterface(
|
85 |
+
[im_to_at, txt_to_ia, a_to_it],
|
86 |
+
["Image to Text/Audio", "Text to Image/Audio", "Audio to Image/Text"],
|
87 |
+
)
|
88 |
+
|
89 |
+
if __name__ == "__main__":
|
90 |
+
demo.launch(share=True, allowed_paths=[f"{base_path}/test_inputs/"])
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
lancedb
|
2 |
+
gradio
|
3 |
+
pandas
|
4 |
+
imagebind@git+https://github.com/raghavdixit99/ImageBind.git
|