Spaces:
Configuration error
Configuration error
app add
Browse files- app.py +156 -0
- requirements.txt +4 -0
- bird_audio.wav β test_inputs/bird_audio.wav +0 -0
- bird_image.jpg β test_inputs/bird_image.jpg +0 -0
- car.jpg β test_inputs/car.jpg +0 -0
- car_audio.wav β test_inputs/car_audio.wav +0 -0
- dog_audio.wav β test_inputs/dog_audio.wav +0 -0
- dog_image.jpg β test_inputs/dog_image.jpg +0 -0
- dragon-growl-37570.wav β test_inputs/dragon-growl-37570.wav +0 -0
- dragon.jpg β test_inputs/dragon.jpg +0 -0
app.py
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import lancedb
|
2 |
+
import lancedb.embeddings.imagebind
|
3 |
+
from lancedb.embeddings import get_registry
|
4 |
+
from lancedb.pydantic import LanceModel, Vector
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
model = get_registry().get("imagebind").create()
|
8 |
+
|
9 |
+
class TextModel(LanceModel):
|
10 |
+
text: str
|
11 |
+
image_uri: str = model.SourceField()
|
12 |
+
audio_path: str
|
13 |
+
vector: Vector(model.ndims()) = model.VectorField()
|
14 |
+
|
15 |
+
text_list=["A dragon", "A car", "A bird"]
|
16 |
+
image_paths=["./test_inputs/dragon.jpg",
|
17 |
+
"./test_inputs/car.jpg",
|
18 |
+
"./test_inputs/bird_image.jpg"]
|
19 |
+
audio_paths=["./test_inputs/dragon-growl-37570.wav", "./test_inputs/car_audio.wav",
|
20 |
+
"./test_inputs/bird_audio.wav"]
|
21 |
+
|
22 |
+
# Load data
|
23 |
+
inputs = [
|
24 |
+
{
|
25 |
+
"text": a,
|
26 |
+
"audio_path":b,
|
27 |
+
"image_uri":c
|
28 |
+
} for a,
|
29 |
+
b,
|
30 |
+
c in zip(text_list,
|
31 |
+
audio_paths,
|
32 |
+
image_paths)
|
33 |
+
]
|
34 |
+
|
35 |
+
|
36 |
+
db = lancedb.connect("~/lancedb")
|
37 |
+
table = db.create_table("img_bind",schema=TextModel)
|
38 |
+
table.add(inputs)
|
39 |
+
|
40 |
+
import gradio as gr
|
41 |
+
|
42 |
+
def process_image(inp_img) -> str:
|
43 |
+
|
44 |
+
actual = (
|
45 |
+
table.search(inp_img, vector_column_name="vector")
|
46 |
+
.limit(1)
|
47 |
+
.to_pydantic(TextModel)[0]
|
48 |
+
)
|
49 |
+
|
50 |
+
return actual.text, actual.audio_path
|
51 |
+
|
52 |
+
def process_text(inp_text) -> str:
|
53 |
+
|
54 |
+
actual = (
|
55 |
+
table.search(inp_text, vector_column_name="vector")
|
56 |
+
.limit(1)
|
57 |
+
.to_pydantic(TextModel)[0]
|
58 |
+
)
|
59 |
+
|
60 |
+
return actual.image_uri, actual.audio_path
|
61 |
+
|
62 |
+
def process_audio(inp_audio) -> str:
|
63 |
+
|
64 |
+
actual = (
|
65 |
+
table.search(inp_audio, vector_column_name="vector")
|
66 |
+
.limit(1)
|
67 |
+
.to_pydantic(TextModel)[0]
|
68 |
+
)
|
69 |
+
|
70 |
+
return actual.image_uri, actual.text
|
71 |
+
|
72 |
+
css = """
|
73 |
+
output-audio, output-text {
|
74 |
+
display: none;
|
75 |
+
}
|
76 |
+
img {
|
77 |
+
width: 300px;
|
78 |
+
height: 450px;
|
79 |
+
object-fit: cover;
|
80 |
+
|
81 |
+
"""
|
82 |
+
with gr.Blocks(css=css) as app:
|
83 |
+
# Using Markdown for custom CSS (optional)
|
84 |
+
with gr.Tab("Image to Text and Audio"):
|
85 |
+
with gr.Row():
|
86 |
+
with gr.Column():
|
87 |
+
inp1 = gr.Image(value='./test_inputs/dragon.jpg',type='filepath',elem_id='img')
|
88 |
+
output_audio1 = gr.Audio(label="Output Audio", elem_id="output-audio")
|
89 |
+
output_text1 = gr.Textbox(label="Output Text", elem_id="output-text")
|
90 |
+
btn_img1 = gr.Button("Retrieve")
|
91 |
+
|
92 |
+
# output_audio1 = gr.Audio(label="Output Audio 1", elem_id="output-audio1")
|
93 |
+
with gr.Column():
|
94 |
+
inp2 = gr.Image(value='./test_inputs/car.jpg',type='filepath',elem_id='img')
|
95 |
+
output_audio2 = gr.Audio(label="Output Audio", elem_id="output-audio")
|
96 |
+
output_text2 = gr.Textbox(label="Output Text", elem_id="output-text")
|
97 |
+
btn_img2 = gr.Button("Retrieve")
|
98 |
+
|
99 |
+
with gr.Column():
|
100 |
+
inp3 = gr.Image(value='./test_inputs/bird_image.jpg',type='filepath',elem_id='img')
|
101 |
+
output_audio3 = gr.Audio(label="Output Audio", elem_id="output-audio")
|
102 |
+
output_text3 = gr.Textbox(label="Output Text", elem_id="output-text")
|
103 |
+
btn_img3 = gr.Button("Retrieve")
|
104 |
+
|
105 |
+
with gr.Tab("Text to Image and Audio"):
|
106 |
+
with gr.Row():
|
107 |
+
with gr.Column():
|
108 |
+
input_txt1 = gr.Textbox(label="Enter a prompt:", elem_id="output-text")
|
109 |
+
output_audio4 = gr.Audio(label="Output Audio", elem_id="output-audio")
|
110 |
+
output_img1 = gr.Image(type='filepath',elem_id='img')
|
111 |
+
|
112 |
+
# with gr.Column():
|
113 |
+
# input_txt2 = gr.Textbox(label="Enter a prompt:", elem_id="output-text")
|
114 |
+
# output_audio5 = gr.Audio(label="Output Audio", elem_id="output-audio")
|
115 |
+
# output_img2 = gr.Image(type='filepath',elem_id='img')
|
116 |
+
|
117 |
+
|
118 |
+
# with gr.Column():
|
119 |
+
# input_txt3 = gr.Textbox(label="Enter a prompt:", elem_id="output-text")
|
120 |
+
# output_audio6 = gr.Audio(label="Output Audio", elem_id="output-audio")
|
121 |
+
# output_img3 = gr.Image(type='filepath',elem_id='img')
|
122 |
+
|
123 |
+
with gr.Tab("Audio to Image and Text"):
|
124 |
+
with gr.Row():
|
125 |
+
with gr.Column():
|
126 |
+
inp_audio1 = gr.Audio(value='./test_inputs/dragon-growl-37570.wav',type='filepath',elem_id='output-audio')
|
127 |
+
output_img7 = gr.Image(type='filepath',elem_id='img')
|
128 |
+
output_text7 = gr.Textbox(label="Output Text", elem_id="output-text")
|
129 |
+
btn_audio1 = gr.Button("Retrieve")
|
130 |
+
|
131 |
+
with gr.Column():
|
132 |
+
inp_audio2 = gr.Audio(value='./test_inputs/car_audio.wav',type='filepath',elem_id='output-audio')
|
133 |
+
output_img8 = gr.Image(type='filepath',elem_id='img')
|
134 |
+
output_text8 = gr.Textbox(label="Output Text", elem_id="output-text")
|
135 |
+
btn_audio2 = gr.Button("Retrieve")
|
136 |
+
|
137 |
+
with gr.Column():
|
138 |
+
inp_audio3 = gr.Audio(value='./test_inputs/bird_audio.wav',type='filepath',elem_id='output-audio')
|
139 |
+
output_img9 = gr.Image(type='filepath',elem_id='img')
|
140 |
+
output_text9 = gr.Textbox(label="Output Text", elem_id="output-text")
|
141 |
+
btn_audio3 = gr.Button("Retrieve")
|
142 |
+
|
143 |
+
# Click actions for buttons/Textboxes
|
144 |
+
btn_img1.click(process_image, inputs=[inp1],outputs=[output_text1,output_audio1])
|
145 |
+
btn_img2.click(process_image, inputs=[inp2],outputs=[output_text2,output_audio2])
|
146 |
+
btn_img3.click(process_image, inputs=[inp3],outputs=[output_text3,output_audio3])
|
147 |
+
|
148 |
+
input_txt1.submit(process_text, inputs=[input_txt1],outputs=[output_img1,output_audio4])
|
149 |
+
|
150 |
+
btn_audio1.click(process_audio, inputs=[inp_audio1],outputs=[output_img7,output_text7])
|
151 |
+
btn_audio2.click(process_audio, inputs=[inp_audio2],outputs=[output_img8,output_text8])
|
152 |
+
btn_audio3.click(process_audio, inputs=[inp_audio3],outputs=[output_img9,output_text9])
|
153 |
+
|
154 |
+
if __name__ == "__main__":
|
155 |
+
app.launch(share=True)
|
156 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
lancedb
|
2 |
+
gradio
|
3 |
+
pandas
|
4 |
+
imagebind@git+https://github.com/raghavdixit99/ImageBind.git
|
bird_audio.wav β test_inputs/bird_audio.wav
RENAMED
File without changes
|
bird_image.jpg β test_inputs/bird_image.jpg
RENAMED
File without changes
|
car.jpg β test_inputs/car.jpg
RENAMED
File without changes
|
car_audio.wav β test_inputs/car_audio.wav
RENAMED
File without changes
|
dog_audio.wav β test_inputs/dog_audio.wav
RENAMED
File without changes
|
dog_image.jpg β test_inputs/dog_image.jpg
RENAMED
File without changes
|
dragon-growl-37570.wav β test_inputs/dragon-growl-37570.wav
RENAMED
File without changes
|
dragon.jpg β test_inputs/dragon.jpg
RENAMED
File without changes
|