raghavd99 commited on
Commit
d5a66a4
·
1 Parent(s): fd6e378
Files changed (2) hide show
  1. app.py +1 -1
  2. downloader.py +64 -88
app.py CHANGED
@@ -3,7 +3,7 @@ import lancedb.embeddings.imagebind
3
  from lancedb.embeddings import get_registry
4
  from lancedb.pydantic import LanceModel, Vector
5
  import gradio as gr
6
- from downloader import dowload_and_save_audio, dowload_and_save_image, base_path
7
 
8
  model = get_registry().get("imagebind").create()
9
 
 
3
  from lancedb.embeddings import get_registry
4
  from lancedb.pydantic import LanceModel, Vector
5
  import gradio as gr
6
+ from .downloader import dowload_and_save_audio, dowload_and_save_image, base_path
7
 
8
  model = get_registry().get("imagebind").create()
9
 
downloader.py CHANGED
@@ -1,90 +1,66 @@
1
- import lancedb
2
- import lancedb.embeddings.imagebind
3
- from lancedb.embeddings import get_registry
4
- from lancedb.pydantic import LanceModel, Vector
5
- import gradio as gr
6
- from downloader import dowload_and_save_audio, dowload_and_save_image, base_path
7
-
8
- model = get_registry().get("imagebind").create()
9
-
10
-
11
- class TextModel(LanceModel):
12
- text: str
13
- image_uri: str = model.SourceField()
14
- audio_path: str
15
- vector: Vector(model.ndims()) = model.VectorField()
16
-
17
-
18
- text_list = ["A bird", "A dragon", "A car"]
19
- image_paths = dowload_and_save_image()
20
- audio_paths = dowload_and_save_audio()
21
-
22
- # Load data
23
- inputs = [
24
- {"text": a, "audio_path": b, "image_uri": c}
25
- for a, b, c in zip(text_list, audio_paths, image_paths)
26
  ]
27
 
28
- db = lancedb.connect("/tmp/lancedb")
29
- table = db.create_table("img_bind", schema=TextModel)
30
- table.add(inputs)
31
-
32
-
33
- def process_image(inp_img) -> str:
34
- actual = (
35
- table.search(inp_img, vector_column_name="vector")
36
- .limit(1)
37
- .to_pydantic(TextModel)[0]
38
- )
39
-
40
- return actual.text, actual.audio_path
41
-
42
-
43
- def process_text(inp_text) -> str:
44
- actual = (
45
- table.search(inp_text, vector_column_name="vector")
46
- .limit(1)
47
- .to_pydantic(TextModel)[0]
48
- )
49
-
50
- return actual.image_uri, actual.audio_path
51
-
52
-
53
- def process_audio(inp_audio) -> str:
54
- actual = (
55
- table.search(inp_audio, vector_column_name="vector")
56
- .limit(1)
57
- .to_pydantic(TextModel)[0]
58
- )
59
-
60
- return actual.image_uri, actual.text
61
-
62
-
63
- im_to_at = gr.Interface(
64
- process_image,
65
- gr.Image(type="filepath", value=image_paths[0]),
66
- [gr.Text(label="Output Text"), gr.Audio(label="Output Audio")],
67
- examples=image_paths,
68
- allow_flagging="never",
69
- )
70
- txt_to_ia = gr.Interface(
71
- process_text,
72
- gr.Textbox(label="Enter a prompt:"),
73
- [gr.Image(label="Output Image"), gr.Audio(label="Output Audio")],
74
- allow_flagging="never",
75
- examples=text_list,
76
- )
77
- a_to_it = gr.Interface(
78
- process_audio,
79
- gr.Audio(type="filepath", value=audio_paths[0]),
80
- [gr.Image(label="Output Image"), gr.Text(label="Output Text")],
81
- examples=audio_paths,
82
- allow_flagging="never",
83
- )
84
- demo = gr.TabbedInterface(
85
- [im_to_at, txt_to_ia, a_to_it],
86
- ["Image to Text/Audio", "Text to Image/Audio", "Audio to Image/Text"],
87
- )
88
-
89
- if __name__ == "__main__":
90
- demo.launch(share=True, allowed_paths=[f"{base_path}/test_inputs/"])
 
1
+ import requests
2
+ import os
3
+ from pathlib import Path
4
+
5
+ # URL of the raw audio file on GitHub
6
+ audio_file_urls = [
7
+ "https://github.com/raghavdixit99/assets/raw/main/bird_audio.wav",
8
+ "https://github.com/raghavdixit99/assets/raw/main/dragon-growl-37570.wav",
9
+ "https://github.com/raghavdixit99/assets/raw/main/car_audio.wav",
10
+ ]
11
+ image_urls = [
12
+ "https://github.com/raghavdixit99/assets/assets/34462078/abf47cc4-d979-4aaa-83be-53a2115bf318",
13
+ "https://github.com/raghavdixit99/assets/assets/34462078/93be928e-522b-4e37-889d-d4efd54b2112",
14
+ "https://github.com/raghavdixit99/assets/assets/34462078/025deaff-632a-4829-a86c-3de6e326402f",
 
 
 
 
 
 
 
 
 
 
 
15
  ]
16
 
17
+ base_path = os.path.dirname(os.path.abspath(__file__))
18
+
19
+
20
+ # Local path where you want to save the .wav file
21
+ def dowload_and_save_audio():
22
+ audio_pths = []
23
+ for url in audio_file_urls:
24
+ filename = url.split("/")[-1]
25
+ local_file_path = Path(f"{base_path}/test_inputs/{filename}")
26
+ local_file_path.parent.mkdir(parents=True, exist_ok=True)
27
+ # Perform the GET request
28
+ response = requests.get(url)
29
+
30
+ # Check if the request was successful
31
+ if response.status_code == 200:
32
+ # Write the content of the response to a local file
33
+ with open(local_file_path, "wb") as audio_file:
34
+ audio_file.write(response.content)
35
+ audio_pths.append(str(local_file_path))
36
+ print(
37
+ f"Audio file downloaded successfully and saved as '{local_file_path}'."
38
+ )
39
+ else:
40
+ print(f"Failed to download file. Status code: {response.status_code}")
41
+ return audio_pths
42
+
43
+
44
+ def dowload_and_save_image():
45
+ image_paths = []
46
+ for url in image_urls:
47
+ filename = url.split("/")[-1]
48
+ local_file_path = Path(f"{base_path}/test_inputs/{filename}.jpeg")
49
+
50
+ local_file_path.parent.mkdir(parents=True, exist_ok=True)
51
+ # Perform the GET request
52
+ response = requests.get(url)
53
+
54
+ # Check if the request was successful
55
+ if response.status_code == 200:
56
+ # Write the content of the response to a local file
57
+ with open(local_file_path, "wb") as image_file:
58
+ image_file.write(response.content)
59
+ image_paths.append(str(local_file_path))
60
+ print(
61
+ f"Image file downloaded successfully and saved as '{local_file_path}'."
62
+ )
63
+ else:
64
+ print(f"Failed to download file. Status code: {response.status_code}")
65
+
66
+ return image_paths