import gradio as gr import requests import gradio as gr from gradio_client import Client import json import re from moviepy.editor import VideoFileClip from moviepy.audio.AudioClip import AudioClip def search_pexels_videos(query): API_KEY = '5woz23MGx1QrSY0WHFb0BRi29JvbXPu97Hg0xnklYgHUI8G0w23FKH62' # 실제 Pexels API 키로 교체하세요. url = f"https://api.pexels.com/videos/search?query={query}&per_page=80" headers = {"Authorization": API_KEY} response = requests.get(url, headers=headers) data = response.json() # 'videos' 키의 존재 여부를 확인하고, 존재하지 않을 경우 빈 리스트 반환 if 'videos' in data: videos_urls = [{"url": video['url'], "image": video['image']} for video in data['videos']] else: print("No 'videos' key in response data. Returning empty list.") videos_urls = [] return videos_urls # Pexels 동영상 검색 결과를 HTML 링크로 표시하는 함수 def show_video_search_results(query): videos_info = search_pexels_videos(query) videos_html = [ f"
Video thumbnail

View Video

" for video in videos_info ] return "".join(videos_html) # Pexels 이미지 검색 함수 def search_pexels_images(query): API_KEY = '5woz23MGx1QrSY0WHFb0BRi29JvbXPu97Hg0xnklYgHUI8G0w23FKH62' url = f"https://api.pexels.com/v1/search?query={query}&per_page=80" headers = {"Authorization": API_KEY} response = requests.get(url, headers=headers) data = response.json() images_urls = [photo['src']['medium'] for photo in data['photos']] return images_urls # Pexels 이미지 검색 결과 표시 함수 def show_search_results(query): images_urls = search_pexels_images(query) return images_urls def extract_audio(video_in): input_video = video_in output_audio = 'audio.wav' # Open the video file and extract the audio video_clip = VideoFileClip(input_video) audio_clip = video_clip.audio # Save the audio as a .wav file audio_clip.write_audiofile(output_audio, fps=44100) # Use 44100 Hz as the sample rate for .wav files print("Audio extraction complete.") return 'audio.wav' def get_caption_from_kosmos(image_in): kosmos2_client = Client("https://ydshieh-kosmos-2.hf.space/") kosmos2_result = kosmos2_client.predict( image_in, # str (filepath or URL to image) in 'Test Image' Image component "Detailed", # str in 'Description Type' Radio component fn_index=4 ) print(f"KOSMOS2 RETURNS: {kosmos2_result}") with open(kosmos2_result[1], 'r') as f: data = json.load(f) reconstructed_sentence = [] for sublist in data: reconstructed_sentence.append(sublist[0]) full_sentence = ' '.join(reconstructed_sentence) #print(full_sentence) # Find the pattern matching the expected format ("Describe this image in detail:" followed by optional space and then the rest)... pattern = r'^Describe this image in detail:\s*(.*)$' # Apply the regex pattern to extract the description text. match = re.search(pattern, full_sentence) if match: description = match.group(1) print(description) else: print("Unable to locate valid description.") # Find the last occurrence of "." last_period_index = description.rfind('.') # Truncate the string up to the last period truncated_caption = description[:last_period_index + 1] # print(truncated_caption) print(f"\n—\nIMAGE CAPTION: {truncated_caption}") return truncated_caption def get_caption(image_in): client = Client("https://vikhyatk-moondream1.hf.space/") result = client.predict( image_in, # filepath in 'image' Image component "Describe precisely the image in one sentence.", # str in 'Question' Textbox component api_name="/answer_question" ) print(result) return result def get_magnet(prompt): amended_prompt = f"{prompt}" print(amended_prompt) client = Client("https://fffiloni-magnet.hf.space/") result = client.predict( "facebook/audio-magnet-medium", # Literal['facebook/magnet-small-10secs', 'facebook/magnet-medium-10secs', 'facebook/magnet-small-30secs', 'facebook/magnet-medium-30secs', 'facebook/audio-magnet-small', 'facebook/audio-magnet-medium'] in 'Model' Radio component "", # str in 'Model Path (custom models)' Textbox component amended_prompt, # str in 'Input Text' Textbox component 3, # float in 'Temperature' Number component 0.9, # float in 'Top-p' Number component 10, # float in 'Max CFG coefficient' Number component 1, # float in 'Min CFG coefficient' Number component 20, # float in 'Decoding Steps (stage 1)' Number component 10, # float in 'Decoding Steps (stage 2)' Number component 10, # float in 'Decoding Steps (stage 3)' Number component 10, # float in 'Decoding Steps (stage 4)' Number component "prod-stride1 (new!)", # Literal['max-nonoverlap', 'prod-stride1 (new!)'] in 'Span Scoring' Radio component api_name="/predict_full" ) print(result) return result[1] def get_audioldm(prompt): client = Client("https://haoheliu-audioldm2-text2audio-text2music.hf.space/") result = client.predict( prompt, # str in 'Input text' Textbox component "Low quality. Music.", # str in 'Negative prompt' Textbox component 10, # int | float (numeric value between 5 and 15) in 'Duration (seconds)' Slider component 3.5, # int | float (numeric value between 0 and 7) in 'Guidance scale' Slider component 45, # int | float in 'Seed' Number component 3, # int | float (numeric value between 1 and 5) in 'Number waveforms to generate' Slider component fn_index=1 ) print(result) audio_result = extract_audio(result) return audio_result def get_audiogen(prompt): client = Client("https://fffiloni-audiogen.hf.space/") result = client.predict( prompt, 10, api_name="/infer" ) return result def infer(image_in, chosen_model): caption = get_caption(image_in) if chosen_model == "MAGNet" : magnet_result = get_magnet(caption) return magnet_result elif chosen_model == "AudioLDM-2" : audioldm_result = get_audioldm(caption) return audioldm_result elif chosen_model == "AudioGen" : audiogen_result = get_audiogen(caption) return audiogen_result css=""" #col-container{ margin: 0 auto; max-width: 800px; } """ with gr.Blocks() as app: with gr.Tabs(): with gr.TabItem("Image to Audio"): with gr.Column(): gr.Markdown("### Image to Audio") image_in = gr.Image(sources=["upload"], type="filepath", label="Image input") chosen_model = gr.Radio(label="Choose a model", choices=["MAGNet", "AudioLDM-2", "AudioGen"], value="AudioLDM-2") submit_btn = gr.Button("Submit") audio_o = gr.Audio(label="Audio output") submit_btn.click( fn=infer, inputs=[image_in, chosen_model], outputs=audio_o ) with gr.TabItem("FREE Image Search"): with gr.Column(): gr.Markdown("### FREE Image Search") search_query = gr.Textbox(label="사진 검색") search_btn = gr.Button("검색") images_output = gr.Gallery(label="검색 결과 이미지") search_btn.click( fn=show_search_results, inputs=search_query, outputs=images_output ) with gr.TabItem("FREE Video Search"): with gr.Column(): gr.Markdown("### FREE Video Search") video_search_query = gr.Textbox(label="비디오 검색") video_search_btn = gr.Button("검색") # HTML 컴포넌트로 비디오 검색 결과 표시 videos_output = gr.HTML(label="검색 결과 동영상") video_search_btn.click( fn=show_video_search_results, inputs=video_search_query, outputs=videos_output ) app.launch(debug=True)