raghavd99 commited on
Commit
fd6e378
1 Parent(s): f93d3d2

app update

Browse files
Files changed (3) hide show
  1. app.py +90 -0
  2. downloader.py +90 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import lancedb
2
+ import lancedb.embeddings.imagebind
3
+ from lancedb.embeddings import get_registry
4
+ from lancedb.pydantic import LanceModel, Vector
5
+ import gradio as gr
6
+ from downloader import dowload_and_save_audio, dowload_and_save_image, base_path
7
+
8
+ model = get_registry().get("imagebind").create()
9
+
10
+
11
+ class TextModel(LanceModel):
12
+ text: str
13
+ image_uri: str = model.SourceField()
14
+ audio_path: str
15
+ vector: Vector(model.ndims()) = model.VectorField()
16
+
17
+
18
+ text_list = ["A bird", "A dragon", "A car"]
19
+ image_paths = dowload_and_save_image()
20
+ audio_paths = dowload_and_save_audio()
21
+
22
+ # Load data
23
+ inputs = [
24
+ {"text": a, "audio_path": b, "image_uri": c}
25
+ for a, b, c in zip(text_list, audio_paths, image_paths)
26
+ ]
27
+
28
+ db = lancedb.connect("/tmp/lancedb")
29
+ table = db.create_table("img_bind", schema=TextModel)
30
+ table.add(inputs)
31
+
32
+
33
+ def process_image(inp_img) -> str:
34
+ actual = (
35
+ table.search(inp_img, vector_column_name="vector")
36
+ .limit(1)
37
+ .to_pydantic(TextModel)[0]
38
+ )
39
+
40
+ return actual.text, actual.audio_path
41
+
42
+
43
+ def process_text(inp_text) -> str:
44
+ actual = (
45
+ table.search(inp_text, vector_column_name="vector")
46
+ .limit(1)
47
+ .to_pydantic(TextModel)[0]
48
+ )
49
+
50
+ return actual.image_uri, actual.audio_path
51
+
52
+
53
+ def process_audio(inp_audio) -> str:
54
+ actual = (
55
+ table.search(inp_audio, vector_column_name="vector")
56
+ .limit(1)
57
+ .to_pydantic(TextModel)[0]
58
+ )
59
+
60
+ return actual.image_uri, actual.text
61
+
62
+
63
+ im_to_at = gr.Interface(
64
+ process_image,
65
+ gr.Image(type="filepath", value=image_paths[0]),
66
+ [gr.Text(label="Output Text"), gr.Audio(label="Output Audio")],
67
+ examples=image_paths,
68
+ allow_flagging="never",
69
+ )
70
+ txt_to_ia = gr.Interface(
71
+ process_text,
72
+ gr.Textbox(label="Enter a prompt:"),
73
+ [gr.Image(label="Output Image"), gr.Audio(label="Output Audio")],
74
+ allow_flagging="never",
75
+ examples=text_list,
76
+ )
77
+ a_to_it = gr.Interface(
78
+ process_audio,
79
+ gr.Audio(type="filepath", value=audio_paths[0]),
80
+ [gr.Image(label="Output Image"), gr.Text(label="Output Text")],
81
+ examples=audio_paths,
82
+ allow_flagging="never",
83
+ )
84
+ demo = gr.TabbedInterface(
85
+ [im_to_at, txt_to_ia, a_to_it],
86
+ ["Image to Text/Audio", "Text to Image/Audio", "Audio to Image/Text"],
87
+ )
88
+
89
+ if __name__ == "__main__":
90
+ demo.launch(share=True, allowed_paths=[f"{base_path}/test_inputs/"])
downloader.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import lancedb
2
+ import lancedb.embeddings.imagebind
3
+ from lancedb.embeddings import get_registry
4
+ from lancedb.pydantic import LanceModel, Vector
5
+ import gradio as gr
6
+ from downloader import dowload_and_save_audio, dowload_and_save_image, base_path
7
+
8
+ model = get_registry().get("imagebind").create()
9
+
10
+
11
+ class TextModel(LanceModel):
12
+ text: str
13
+ image_uri: str = model.SourceField()
14
+ audio_path: str
15
+ vector: Vector(model.ndims()) = model.VectorField()
16
+
17
+
18
+ text_list = ["A bird", "A dragon", "A car"]
19
+ image_paths = dowload_and_save_image()
20
+ audio_paths = dowload_and_save_audio()
21
+
22
+ # Load data
23
+ inputs = [
24
+ {"text": a, "audio_path": b, "image_uri": c}
25
+ for a, b, c in zip(text_list, audio_paths, image_paths)
26
+ ]
27
+
28
+ db = lancedb.connect("/tmp/lancedb")
29
+ table = db.create_table("img_bind", schema=TextModel)
30
+ table.add(inputs)
31
+
32
+
33
+ def process_image(inp_img) -> str:
34
+ actual = (
35
+ table.search(inp_img, vector_column_name="vector")
36
+ .limit(1)
37
+ .to_pydantic(TextModel)[0]
38
+ )
39
+
40
+ return actual.text, actual.audio_path
41
+
42
+
43
+ def process_text(inp_text) -> str:
44
+ actual = (
45
+ table.search(inp_text, vector_column_name="vector")
46
+ .limit(1)
47
+ .to_pydantic(TextModel)[0]
48
+ )
49
+
50
+ return actual.image_uri, actual.audio_path
51
+
52
+
53
+ def process_audio(inp_audio) -> str:
54
+ actual = (
55
+ table.search(inp_audio, vector_column_name="vector")
56
+ .limit(1)
57
+ .to_pydantic(TextModel)[0]
58
+ )
59
+
60
+ return actual.image_uri, actual.text
61
+
62
+
63
+ im_to_at = gr.Interface(
64
+ process_image,
65
+ gr.Image(type="filepath", value=image_paths[0]),
66
+ [gr.Text(label="Output Text"), gr.Audio(label="Output Audio")],
67
+ examples=image_paths,
68
+ allow_flagging="never",
69
+ )
70
+ txt_to_ia = gr.Interface(
71
+ process_text,
72
+ gr.Textbox(label="Enter a prompt:"),
73
+ [gr.Image(label="Output Image"), gr.Audio(label="Output Audio")],
74
+ allow_flagging="never",
75
+ examples=text_list,
76
+ )
77
+ a_to_it = gr.Interface(
78
+ process_audio,
79
+ gr.Audio(type="filepath", value=audio_paths[0]),
80
+ [gr.Image(label="Output Image"), gr.Text(label="Output Text")],
81
+ examples=audio_paths,
82
+ allow_flagging="never",
83
+ )
84
+ demo = gr.TabbedInterface(
85
+ [im_to_at, txt_to_ia, a_to_it],
86
+ ["Image to Text/Audio", "Text to Image/Audio", "Audio to Image/Text"],
87
+ )
88
+
89
+ if __name__ == "__main__":
90
+ demo.launch(share=True, allowed_paths=[f"{base_path}/test_inputs/"])
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ lancedb
2
+ gradio
3
+ pandas
4
+ imagebind@git+https://github.com/raghavdixit99/ImageBind.git