File size: 3,652 Bytes
372395e
caed802
b19e4a9
f694503
04d2706
 
caed802
372395e
caed802
372395e
cb934a1
 
 
 
 
3efd05b
b5357a4
b3ab125
372395e
79b4496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48c215d
79b4496
 
 
 
 
 
 
 
 
 
 
e7c2915
 
 
 
 
 
 
 
 
 
 
 
12bd467
 
 
 
 
 
e7c2915
 
 
79b4496
 
372395e
 
 
79b4496
372395e
 
cb934a1
ee7056c
f694503
372395e
570b690
79b4496
53f5458
 
 
 
 
79b4496
fdef21e
cb934a1
bc6b39c
372395e
a63d987
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import gradio as gr
import os
import time
from moviepy.editor import *
from share_btn import community_icon_html, loading_icon_html, share_js

token = os.environ.get('HF_TOKEN')
caption = gr.Blocks.load(name="spaces/SRDdev/Image-Caption")
audio_gen = gr.Blocks.load(name="spaces/fffiloni/audioldm-text-to-audio-generation-clone", api_key=token)

def infer(image_input, manual_caption):
    if manual_caption == None:
        cap = caption(image_input, fn_index=0)
    else:
        cap = manual_caption
    sound = audio_gen(cap, 10, 2.5, 45, 3, fn_index=0)
    
    return cap, sound[1], gr.Group.update(visible=True)

title = """
    <div style="text-align: center; max-width: 700px; margin: 0 auto;">
        <div
        style="
            display: inline-flex;
            align-items: center;
            gap: 0.8rem;
            font-size: 1.75rem;
        "
        >
        <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
            Image to Sound Effect
        </h1>
        </div>
        <p style="margin-bottom: 10px; font-size: 94%">
        Convert an image to a corresponding sound effect generated through GPT2 Image Captioning & AudioLDM
        </p>
    </div>
"""

article = """
    
    <div class="footer">
        <p>
         
        Follow <a href="https://twitter.com/fffiloni" target="_blank">Sylvain Filoni</a> for future updates πŸ€—
        </p>
    </div>

    <div id="may-like-container" style="display: flex;justify-content: center;flex-direction: column;align-items: center;margin-bottom: 30px;">
        <p>You may also like: </p>
        
        <div id="may-like-content" style="display:flex;flex-wrap: wrap;align-items:center;height:20px;">
            
            <svg height="20" width="208" style="margin-left:4px;margin-bottom: 6px;">       
                 <a href="https://huggingface.co/spaces/haoheliu/audioldm-text-to-audio-generation" target="_blank">
                    <image href="https://img.shields.io/badge/πŸ€— Spaces-AudioLDM_Text_to_Audio-blue" src="https://img.shields.io/badge/πŸ€— Spaces-AudioLDM_Text_to_Audio-blue.png" height="20"/>
                 </a>
            </svg>

            <svg height="20" width="122" style="margin-left:4px;margin-bottom: 6px;">       
                 <a href="https://huggingface.co/spaces/fffiloni/spectrogram-to-music" target="_blank">
                    <image href="https://img.shields.io/badge/πŸ€— Spaces-Riffusion-blue" src="https://img.shields.io/badge/πŸ€— Spaces-Riffusion-blue.png" height="20"/>
                 </a>
            </svg>
            
        </div>
    </div>
"""

with gr.Blocks(css="style.css") as demo:
    with gr.Column(elem_id="col-container"):
        
        gr.HTML(title)
    
        input_img = gr.Image(type="filepath", elem_id="input-img")
        manual_cap = gr.Textbox(label="Image description (optional)")
        caption_output = gr.Textbox(label="Caption", lines=1, visible=False, elem_id="text-caption")
        sound_output = gr.Audio(label="Result", elem_id="sound-output")
        
        generate = gr.Button("Generate SFX from Image")

        with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
            community_icon = gr.HTML(community_icon_html)
            loading_icon = gr.HTML(loading_icon_html)
            share_button = gr.Button("Share to community", elem_id="share-btn")

        gr.HTML(article)
        
    generate.click(infer, inputs=[input_img, manual_cap], outputs=[caption_output, sound_output, share_group], api_name="i2fx")
    share_button.click(None, [], [], _js=share_js)

demo.queue(max_size=32).launch(debug=True)