Spaces:
Running
Running
poemsforaphrodite
commited on
Commit
•
160e4b0
1
Parent(s):
42ab230
Upload openvoice_app.py with huggingface_hub
Browse files- openvoice_app.py +25 -7
openvoice_app.py
CHANGED
@@ -3,19 +3,31 @@ import torch
|
|
3 |
import argparse
|
4 |
import gradio as gr
|
5 |
import requests
|
6 |
-
import langid
|
7 |
from openvoice import se_extractor
|
8 |
from openvoice.api import BaseSpeakerTTS, ToneColorConverter
|
9 |
from dotenv import load_dotenv
|
10 |
from openai import OpenAI
|
11 |
from elevenlabs.client import ElevenLabs
|
12 |
-
from elevenlabs import play,save
|
|
|
|
|
|
|
|
|
|
|
13 |
load_dotenv()
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# Argument parsing
|
16 |
parser = argparse.ArgumentParser()
|
17 |
parser.add_argument("--share", action='store_true', default=False, help="make link public")
|
18 |
args = parser.parse_args()
|
|
|
|
|
19 |
client = ElevenLabs(api_key=os.environ.get("ELEVENLABS_API_KEY"))
|
20 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
21 |
output_dir = 'outputs'
|
@@ -23,6 +35,7 @@ os.makedirs(output_dir, exist_ok=True)
|
|
23 |
|
24 |
api_key = os.environ.get("ELEVENLABS_API_KEY")
|
25 |
supported_languages = ['zh', 'en']
|
|
|
26 |
# Function to get all voices
|
27 |
def get_voices(api_key):
|
28 |
url = "https://api.elevenlabs.io/v1/voices"
|
@@ -37,7 +50,8 @@ def delete_voice(api_key, voice_id):
|
|
37 |
response = requests.request("DELETE", url, headers=headers)
|
38 |
return response.status_code, response.text
|
39 |
|
40 |
-
|
|
|
41 |
def predict(prompt, style, audio_file_pth):
|
42 |
text_hint = ''
|
43 |
if len(prompt) < 2:
|
@@ -53,7 +67,7 @@ def predict(prompt, style, audio_file_pth):
|
|
53 |
description="A trial voice model for testing",
|
54 |
files=[audio_file_pth],
|
55 |
)
|
56 |
-
#
|
57 |
audio = client.generate(text=prompt, voice=voice)
|
58 |
save(audio, f'{output_dir}/output.wav')
|
59 |
|
@@ -84,7 +98,7 @@ with gr.Blocks(gr.themes.Glass()) as demo:
|
|
84 |
style_gr = gr.Dropdown(
|
85 |
label="Style",
|
86 |
choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
|
87 |
-
info="Please upload a reference audio file that is
|
88 |
max_choices=1,
|
89 |
value="default",
|
90 |
)
|
@@ -100,11 +114,15 @@ with gr.Blocks(gr.themes.Glass()) as demo:
|
|
100 |
audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
|
101 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
102 |
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
demo.queue()
|
106 |
demo.launch(debug=True, show_api=False, share=args.share)
|
107 |
|
108 |
# Hide Gradio footer
|
109 |
css = "footer {visibility: hidden}"
|
110 |
-
|
|
|
3 |
import argparse
|
4 |
import gradio as gr
|
5 |
import requests
|
|
|
6 |
from openvoice import se_extractor
|
7 |
from openvoice.api import BaseSpeakerTTS, ToneColorConverter
|
8 |
from dotenv import load_dotenv
|
9 |
from openai import OpenAI
|
10 |
from elevenlabs.client import ElevenLabs
|
11 |
+
from elevenlabs import play, save
|
12 |
+
from flask import Flask
|
13 |
+
from flask_limiter import Limiter
|
14 |
+
from flask_limiter.util import get_remote_address
|
15 |
+
|
16 |
+
# Load environment variables
|
17 |
load_dotenv()
|
18 |
|
19 |
+
# Initialize Flask app
|
20 |
+
app = Flask(__name__)
|
21 |
+
|
22 |
+
# Setup Limiter for rate limiting and quota management based on IP address
|
23 |
+
limiter = Limiter(get_remote_address, app=app, default_limits=["1 per 10 minutes"])
|
24 |
+
|
25 |
# Argument parsing
|
26 |
parser = argparse.ArgumentParser()
|
27 |
parser.add_argument("--share", action='store_true', default=False, help="make link public")
|
28 |
args = parser.parse_args()
|
29 |
+
|
30 |
+
# Initialize ElevenLabs client
|
31 |
client = ElevenLabs(api_key=os.environ.get("ELEVENLABS_API_KEY"))
|
32 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
33 |
output_dir = 'outputs'
|
|
|
35 |
|
36 |
api_key = os.environ.get("ELEVENLABS_API_KEY")
|
37 |
supported_languages = ['zh', 'en']
|
38 |
+
|
39 |
# Function to get all voices
|
40 |
def get_voices(api_key):
|
41 |
url = "https://api.elevenlabs.io/v1/voices"
|
|
|
50 |
response = requests.request("DELETE", url, headers=headers)
|
51 |
return response.status_code, response.text
|
52 |
|
53 |
+
# Predict function with rate limiting based on IP address
|
54 |
+
@limiter.limit("1 per 10 minutes")
|
55 |
def predict(prompt, style, audio_file_pth):
|
56 |
text_hint = ''
|
57 |
if len(prompt) < 2:
|
|
|
67 |
description="A trial voice model for testing",
|
68 |
files=[audio_file_pth],
|
69 |
)
|
70 |
+
# Generate audio from text
|
71 |
audio = client.generate(text=prompt, voice=voice)
|
72 |
save(audio, f'{output_dir}/output.wav')
|
73 |
|
|
|
98 |
style_gr = gr.Dropdown(
|
99 |
label="Style",
|
100 |
choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
|
101 |
+
info="Please upload a reference audio file that is at least 1 minute long. For best results, ensure the audio is clear. You can use Adobe Podcast Enhance(https://podcast.adobe.com/enhance) to improve the audio quality before uploading.",
|
102 |
max_choices=1,
|
103 |
value="default",
|
104 |
)
|
|
|
114 |
audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
|
115 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
116 |
|
117 |
+
def handle_click(prompt, style, audio_file_pth):
|
118 |
+
ip = request.remote_addr
|
119 |
+
return predict(prompt, style, audio_file_pth, ip)
|
120 |
+
|
121 |
+
tts_button.click(handle_click,
|
122 |
+
[input_text_gr, style_gr, ref_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
|
123 |
|
124 |
demo.queue()
|
125 |
demo.launch(debug=True, show_api=False, share=args.share)
|
126 |
|
127 |
# Hide Gradio footer
|
128 |
css = "footer {visibility: hidden}"
|
|