Spaces:

assasinatee
/

STAR

Sleeping

App Files Files Community

Yixuan Li commited on 27 days ago

Commit

0509b90

1 Parent(s): 2176cd2

page optimization

Browse files

Files changed (21) hide show

app.py +72 -8
wav/human/1.wav +3 -0
wav/human/10.wav +3 -0
wav/human/2.wav +3 -0
wav/human/3.wav +3 -0
wav/human/4.wav +3 -0
wav/human/5.wav +3 -0
wav/human/6.wav +3 -0
wav/human/7.wav +3 -0
wav/human/8.wav +3 -0
wav/human/9.wav +3 -0
wav/vits/1.wav +3 -0
wav/vits/10.wav +3 -0
wav/vits/2.wav +3 -0
wav/vits/3.wav +3 -0
wav/vits/4.wav +3 -0
wav/vits/5.wav +3 -0
wav/vits/6.wav +3 -0
wav/vits/7.wav +3 -0
wav/vits/8.wav +3 -0
wav/vits/9.wav +3 -0

app.py CHANGED Viewed

@@ -89,17 +89,22 @@ def infer(audio_path: str) -> str:
     return output_file
-with gr.Blocks(title="STAR Online Inference", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# STAR: Speech-to-Audio Generation via Representation Learning")
     gr.Markdown("""
 <div style="text-align: left; padding: 10px;">
 ## 🗣️ Input
 A brief input speech utterance for the overall audio scene.
-> Example:A cat meowing and young female speaking
 ### 🎙️ Input Speech Example
 """)
@@ -109,15 +114,27 @@ A brief input speech utterance for the overall audio scene.
     gr.Markdown("""
 <div style="text-align: left; padding: 10px;">
-### 🎧️ Output Audio Example
 """)
     audio = gr.Audio(value="wav/audio.wav",  label="Generated Audio Example", type="filepath")
     gr.Markdown("""
 </div>
 ---
 </div>
 """)
     with gr.Column():
@@ -125,8 +142,55 @@ A brief input speech utterance for the overall audio scene.
         btn = gr.Button("🎵Generate Audio!", variant="primary")
         output_audio = gr.Audio(label="Generated Audio", type="filepath")
         btn.click(fn=infer, inputs=input_audio, outputs=output_audio)
-demo.launch()

     return output_file
+with gr.Blocks(title="STAR Online Inference", theme=gr.themes.Glass()) as demo:
     gr.Markdown("# STAR: Speech-to-Audio Generation via Representation Learning")
     gr.Markdown("""
 <div style="text-align: left; padding: 10px;">
+##  📚️ Introduction
+STAR is the first end-to-end speech-to-audio generation framework, designed to enhance efficiency and address error propagation inherent in cascaded systems.
+Within this space, you have the opportunity to directly control our model through voice input, thereby generating the corresponding audio output.
 ## 🗣️ Input
 A brief input speech utterance for the overall audio scene.
+> Example：A cat meowing and young female speaking
 ### 🎙️ Input Speech Example
 """)
     gr.Markdown("""
 <div style="text-align: left; padding: 10px;">
+## 🎧️ Output
+ Capture both auditory events and scene cues and generate corresponding audio
+### 🔊 Output Audio Example
 """)
     audio = gr.Audio(value="wav/audio.wav",  label="Generated Audio Example", type="filepath")
     gr.Markdown("""
+<div style="text-align: left; padding: 10px;">
 </div>
 ---
 </div>
+## 🛠️ Online Inference
+You can upload your own samples, or try the quick examples provided below.
 """)
     with gr.Column():
         btn = gr.Button("🎵Generate Audio!", variant="primary")
         output_audio = gr.Audio(label="Generated Audio", type="filepath")
         btn.click(fn=infer, inputs=input_audio, outputs=output_audio)
+    gr.Markdown("""
+<div style="text-align: left; padding: 10px;">
+## 🎯 Quick Examples
+""")
+    with gr.Tabs():
+        with gr.Tab("VITS Generated Speech"):
+            gr.Markdown("| 🎧 Audio | 📝 Caption |\n|:--:|:--|")
+            gr.Examples(
+                examples=[
+                    ["wav/vits/1.wav", "A cat meowing and young female speaking"],
+                    ["wav/vits/2.wav", "Sustained industrial engine noise"],
+                    ["wav/vits/3.wav", "A woman talks and a baby whispers"],
+                    ["wav/vits/4.wav", "A man speaks followed by a toilet flush"],
+                    ["wav/vits/5.wav", "It is raining and thundering, and then a man speaks"],
+                    ["wav/vits/6.wav", "A man speaking as birds are chirping"],
+                    ["wav/vits/7.wav", "A muffled man talking as a goat baas before and after two goats baaing in the distance while wind blows into a microphone"],
+                    ["wav/vits/8.wav", "Birds chirping and a horse neighing"],
+                    ["wav/vits/9.wav", "Several church bells ringing"],
+                    ["wav/vits/10.wav", "A telephone rings with bell sounds"]
+                ],
+                inputs=[input_audio, _],
+                label="Click examples below to try!",
+                cache_examples = False,
+                examples_per_page = 5,
+            )
+        with gr.Tab("Real human Speech"):
+            gr.Markdown("| 🎧 Audio | 📝 Caption |\n|:--:|:--|")
+            gr.Examples(
+                examples=[
+                    ["wav/human/1.wav", "A cat meowing and young female speaking"],
+                    ["wav/human/2.wav", "Sustained industrial engine noise"],
+                    ["wav/human/3.wav", "A woman talks and a baby whispers"],
+                    ["wav/human/4.wav", "A man speaks followed by a toilet flush"],
+                    ["wav/human/5.wav", "It is raining and thundering, and then a man speaks"],
+                    ["wav/human/6.wav", "A man speaking as birds are chirping"],
+                    ["wav/human/7.wav", "A muffled man talking as a goat baas before and after two goats baaing in the distance while wind blows into a microphone"],
+                    ["wav/human/8.wav", "Birds chirping and a horse neighing"],
+                    ["wav/human/9.wav", "Several church bells ringing"],
+                    ["wav/human/10.wav", "A telephone rings with bell sounds"]
+                ],
+                inputs=[input_audio, _],
+                label="Click examples below to try!",
+                cache_examples = False,
+                examples_per_page = 5,
+            )
+demo.launch()

wav/human/1.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3df72406f58c50f323b0b532fb55df443628e715f03af66bc11ccd15eb649f11
+size 603726

wav/human/10.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fdd320764912961861756d32e93ffd173cd53d924a42e03c28ce7005154579e8
+size 672846

wav/human/2.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44b06c20cc32deb1c5088e13164e858f81452f061f71290062f79e7e3a23fe9b
+size 649806

wav/human/3.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf4cb6eebb04c6454ecc65cea8df46445d4996cdc36f15ff1f2ce5d177c3f90b
+size 617550

wav/human/4.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64b8eb089de3be0d18bd767cba861b907042f34c9ac03907b258d36794ea3e0b
+size 732750

wav/human/5.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:115cc7b13c4d79179de4901b2443a520de644374bfcdd6e7a6d97f8b254cd814
+size 857166

wav/human/6.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4fb1e9c2479fa9197969b8f23e3052d6225d437eb1300e1ae32511528303523
+size 640590

wav/human/7.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d616cfa29f31f92c5f63faf8e225058b3e895ae9a4b32163f7f6889b1a52c679
+size 1764942

wav/human/8.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4aa8af69c2dcf3dd9cdcfd514832de776256b4b34fec5f49d4e9f2329c1aa7f
+size 649806

wav/human/9.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18c32b3183f51307b660a48d97285202fc849866dead7166f387c1f985770e41
+size 488526

wav/vits/1.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d23cb7106c66ad61d2b9717daea77385883cf71772836a8c5d18b9496dbb8d5
+size 130604

wav/vits/10.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09eb032c3b97cff6e64f65c28c3042b62c13c26027ed5cea0a879acae08de422
+size 111660

wav/vits/2.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd3847a5d10b320f0332986c7722c6379ae54fc619f856be08ccc9b9c5653f4f
+size 106028

wav/vits/3.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8fe02704f94e440c78aaf17feee1a1810cd12c82f70e162433b70a374eac0853
+size 116268

wav/vits/4.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9331107c335e4845cc3886e588cb09b0cde8f7e48280699f97bfa8b9cf8193cd
+size 132652

wav/vits/5.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6ec5223af568eddbced78c5402bd02abe1339cef96de7b6bc3f11362963e170
+size 163884

wav/vits/6.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:562bc9e9f0b8c3033b34d3d9f965393f071a97f6a03d673fe9605bc39694168f
+size 101932

wav/vits/7.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e4152c3394432b827841b1fafc994b1af3aad279025e74016c74073e9a6dc83
+size 352300

wav/vits/8.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d208a28653f8c57cbfec0f9db7d4caeaa53af18b377991b9e92e080ff9505e8
+size 100396

wav/vits/9.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0531c9efae71fcbc1a1739ba76335ac8b413c01b5e73ceb40ff65289c42e75b
+size 76332