arxivgpt kim commited on
Commit
3e510b3
โ€ข
1 Parent(s): a8d74c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -19
app.py CHANGED
@@ -1,35 +1,183 @@
1
  import gradio as gr
2
  import requests
 
 
 
 
 
 
3
 
 
4
  def search_pexels_images(query):
5
- API_KEY = '5woz23MGx1QrSY0WHFb0BRi29JvbXPu97Hg0xnklYgHUI8G0w23FKH62' # Pexels API ํ‚ค๋ฅผ ์—ฌ๊ธฐ์— ์ž…๋ ฅํ•˜์„ธ์š”.
6
- # ํ•œ ํŽ˜์ด์ง€๋‹น ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ˆ˜๋ฅผ 10๊ฐœ๋กœ ์„ค์ •
7
  url = f"https://api.pexels.com/v1/search?query={query}&per_page=80"
8
-
9
- headers = {
10
- "Authorization": API_KEY
11
- }
12
  response = requests.get(url, headers=headers)
13
  data = response.json()
14
-
15
  images_urls = [photo['src']['medium'] for photo in data['photos']]
16
  return images_urls
17
 
 
18
  def show_search_results(query):
19
  images_urls = search_pexels_images(query)
20
  return images_urls
21
 
22
- with gr.Blocks() as app:
23
- with gr.Column():
24
- gr.Markdown("### Image SFX Generator with Pexels Image Search")
25
- search_query = gr.Textbox(label="์‚ฌ์ง„ ๊ฒ€์ƒ‰")
26
- search_btn = gr.Button("๊ฒ€์ƒ‰")
27
- images_output = gr.Gallery(label="๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ด๋ฏธ์ง€")
28
-
29
- search_btn.click(
30
- fn=show_search_results,
31
- inputs=search_query,
32
- outputs=images_output
 
 
 
 
 
 
 
 
 
 
33
  )
34
 
35
- app.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import requests
3
+ import gradio as gr
4
+ from gradio_client import Client
5
+ import json
6
+ import re
7
+ from moviepy.editor import VideoFileClip
8
+ from moviepy.audio.AudioClip import AudioClip
9
 
10
+ # Pexels ์ด๋ฏธ์ง€ ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
11
  def search_pexels_images(query):
12
+ API_KEY = '5woz23MGx1QrSY0WHFb0BRi29JvbXPu97Hg0xnklYgHUI8G0w23FKH62'
 
13
  url = f"https://api.pexels.com/v1/search?query={query}&per_page=80"
14
+ headers = {"Authorization": API_KEY}
 
 
 
15
  response = requests.get(url, headers=headers)
16
  data = response.json()
 
17
  images_urls = [photo['src']['medium'] for photo in data['photos']]
18
  return images_urls
19
 
20
+ # Pexels ์ด๋ฏธ์ง€ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ํ‘œ์‹œ ํ•จ์ˆ˜
21
  def show_search_results(query):
22
  images_urls = search_pexels_images(query)
23
  return images_urls
24
 
25
+ def extract_audio(video_in):
26
+ input_video = video_in
27
+ output_audio = 'audio.wav'
28
+
29
+ # Open the video file and extract the audio
30
+ video_clip = VideoFileClip(input_video)
31
+ audio_clip = video_clip.audio
32
+
33
+ # Save the audio as a .wav file
34
+ audio_clip.write_audiofile(output_audio, fps=44100) # Use 44100 Hz as the sample rate for .wav files
35
+ print("Audio extraction complete.")
36
+
37
+ return 'audio.wav'
38
+
39
+ def get_caption_from_kosmos(image_in):
40
+ kosmos2_client = Client("https://ydshieh-kosmos-2.hf.space/")
41
+
42
+ kosmos2_result = kosmos2_client.predict(
43
+ image_in, # str (filepath or URL to image) in 'Test Image' Image component
44
+ "Detailed", # str in 'Description Type' Radio component
45
+ fn_index=4
46
  )
47
 
48
+ print(f"KOSMOS2 RETURNS: {kosmos2_result}")
49
+
50
+ with open(kosmos2_result[1], 'r') as f:
51
+ data = json.load(f)
52
+
53
+ reconstructed_sentence = []
54
+ for sublist in data:
55
+ reconstructed_sentence.append(sublist[0])
56
+
57
+ full_sentence = ' '.join(reconstructed_sentence)
58
+ #print(full_sentence)
59
+
60
+ # Find the pattern matching the expected format ("Describe this image in detail:" followed by optional space and then the rest)...
61
+ pattern = r'^Describe this image in detail:\s*(.*)$'
62
+ # Apply the regex pattern to extract the description text.
63
+ match = re.search(pattern, full_sentence)
64
+ if match:
65
+ description = match.group(1)
66
+ print(description)
67
+ else:
68
+ print("Unable to locate valid description.")
69
+
70
+ # Find the last occurrence of "."
71
+ last_period_index = description.rfind('.')
72
+
73
+ # Truncate the string up to the last period
74
+ truncated_caption = description[:last_period_index + 1]
75
+
76
+ # print(truncated_caption)
77
+ print(f"\nโ€”\nIMAGE CAPTION: {truncated_caption}")
78
+
79
+ return truncated_caption
80
+
81
+ def get_caption(image_in):
82
+ client = Client("https://vikhyatk-moondream1.hf.space/")
83
+ result = client.predict(
84
+ image_in, # filepath in 'image' Image component
85
+ "Describe precisely the image in one sentence.", # str in 'Question' Textbox component
86
+ api_name="/answer_question"
87
+ )
88
+ print(result)
89
+ return result
90
+
91
+ def get_magnet(prompt):
92
+ amended_prompt = f"{prompt}"
93
+ print(amended_prompt)
94
+ client = Client("https://fffiloni-magnet.hf.space/")
95
+ result = client.predict(
96
+ "facebook/audio-magnet-medium", # Literal['facebook/magnet-small-10secs', 'facebook/magnet-medium-10secs', 'facebook/magnet-small-30secs', 'facebook/magnet-medium-30secs', 'facebook/audio-magnet-small', 'facebook/audio-magnet-medium'] in 'Model' Radio component
97
+ "", # str in 'Model Path (custom models)' Textbox component
98
+ amended_prompt, # str in 'Input Text' Textbox component
99
+ 3, # float in 'Temperature' Number component
100
+ 0.9, # float in 'Top-p' Number component
101
+ 10, # float in 'Max CFG coefficient' Number component
102
+ 1, # float in 'Min CFG coefficient' Number component
103
+ 20, # float in 'Decoding Steps (stage 1)' Number component
104
+ 10, # float in 'Decoding Steps (stage 2)' Number component
105
+ 10, # float in 'Decoding Steps (stage 3)' Number component
106
+ 10, # float in 'Decoding Steps (stage 4)' Number component
107
+ "prod-stride1 (new!)", # Literal['max-nonoverlap', 'prod-stride1 (new!)'] in 'Span Scoring' Radio component
108
+ api_name="/predict_full"
109
+ )
110
+ print(result)
111
+ return result[1]
112
+
113
+ def get_audioldm(prompt):
114
+ client = Client("https://haoheliu-audioldm2-text2audio-text2music.hf.space/")
115
+ result = client.predict(
116
+ prompt, # str in 'Input text' Textbox component
117
+ "Low quality. Music.", # str in 'Negative prompt' Textbox component
118
+ 10, # int | float (numeric value between 5 and 15) in 'Duration (seconds)' Slider component
119
+ 3.5, # int | float (numeric value between 0 and 7) in 'Guidance scale' Slider component
120
+ 45, # int | float in 'Seed' Number component
121
+ 3, # int | float (numeric value between 1 and 5) in 'Number waveforms to generate' Slider component
122
+ fn_index=1
123
+ )
124
+ print(result)
125
+ audio_result = extract_audio(result)
126
+ return audio_result
127
+
128
+ def get_audiogen(prompt):
129
+ client = Client("https://fffiloni-audiogen.hf.space/")
130
+ result = client.predict(
131
+ prompt,
132
+ 10,
133
+ api_name="/infer"
134
+ )
135
+ return result
136
+
137
+ def infer(image_in, chosen_model):
138
+ caption = get_caption(image_in)
139
+ if chosen_model == "MAGNet" :
140
+ magnet_result = get_magnet(caption)
141
+ return magnet_result
142
+ elif chosen_model == "AudioLDM-2" :
143
+ audioldm_result = get_audioldm(caption)
144
+ return audioldm_result
145
+ elif chosen_model == "AudioGen" :
146
+ audiogen_result = get_audiogen(caption)
147
+ return audiogen_result
148
+
149
+ css="""
150
+ #col-container{
151
+ margin: 0 auto;
152
+ max-width: 800px;
153
+ }
154
+ """
155
+
156
+ with gr.Blocks() as app:
157
+ with gr.Tabs():
158
+ with gr.TabItem("Image to SFX"):
159
+ with gr.Column():
160
+ gr.Markdown("### Image to SFX")
161
+ image_in = gr.Image(sources=["upload"], type="filepath", label="Image input")
162
+ chosen_model = gr.Radio(label="Choose a model", choices=["MAGNet", "AudioLDM-2", "AudioGen"], value="AudioLDM-2")
163
+ submit_btn = gr.Button("Submit")
164
+ audio_o = gr.Audio(label="Audio output")
165
+ submit_btn.click(
166
+ fn=infer,
167
+ inputs=[image_in, chosen_model],
168
+ outputs=audio_o
169
+ )
170
+
171
+ with gr.TabItem("Pexels Image Search"):
172
+ with gr.Column():
173
+ gr.Markdown("### Pexels Image Search")
174
+ search_query = gr.Textbox(label="์‚ฌ์ง„ ๊ฒ€์ƒ‰")
175
+ search_btn = gr.Button("๊ฒ€์ƒ‰")
176
+ images_output = gr.Gallery(label="๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ด๋ฏธ์ง€")
177
+ search_btn.click(
178
+ fn=show_search_results,
179
+ inputs=search_query,
180
+ outputs=images_output
181
+ )
182
+
183
+ app.launch(debug=True)