poemsforaphrodite commited on
Commit
160e4b0
1 Parent(s): 42ab230

Upload openvoice_app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. openvoice_app.py +25 -7
openvoice_app.py CHANGED
@@ -3,19 +3,31 @@ import torch
3
  import argparse
4
  import gradio as gr
5
  import requests
6
- import langid
7
  from openvoice import se_extractor
8
  from openvoice.api import BaseSpeakerTTS, ToneColorConverter
9
  from dotenv import load_dotenv
10
  from openai import OpenAI
11
  from elevenlabs.client import ElevenLabs
12
- from elevenlabs import play,save
 
 
 
 
 
13
  load_dotenv()
14
 
 
 
 
 
 
 
15
  # Argument parsing
16
  parser = argparse.ArgumentParser()
17
  parser.add_argument("--share", action='store_true', default=False, help="make link public")
18
  args = parser.parse_args()
 
 
19
  client = ElevenLabs(api_key=os.environ.get("ELEVENLABS_API_KEY"))
20
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
21
  output_dir = 'outputs'
@@ -23,6 +35,7 @@ os.makedirs(output_dir, exist_ok=True)
23
 
24
  api_key = os.environ.get("ELEVENLABS_API_KEY")
25
  supported_languages = ['zh', 'en']
 
26
  # Function to get all voices
27
  def get_voices(api_key):
28
  url = "https://api.elevenlabs.io/v1/voices"
@@ -37,7 +50,8 @@ def delete_voice(api_key, voice_id):
37
  response = requests.request("DELETE", url, headers=headers)
38
  return response.status_code, response.text
39
 
40
-
 
41
  def predict(prompt, style, audio_file_pth):
42
  text_hint = ''
43
  if len(prompt) < 2:
@@ -53,7 +67,7 @@ def predict(prompt, style, audio_file_pth):
53
  description="A trial voice model for testing",
54
  files=[audio_file_pth],
55
  )
56
- #text should be prompt
57
  audio = client.generate(text=prompt, voice=voice)
58
  save(audio, f'{output_dir}/output.wav')
59
 
@@ -84,7 +98,7 @@ with gr.Blocks(gr.themes.Glass()) as demo:
84
  style_gr = gr.Dropdown(
85
  label="Style",
86
  choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
87
- info="Please upload a reference audio file that is atleast 1 minute long. For best results, ensure the audio is clear. You can use Adobe Podcast Enhance(https://podcast.adobe.com/enhance) to improve the audio quality before uploading.",
88
  max_choices=1,
89
  value="default",
90
  )
@@ -100,11 +114,15 @@ with gr.Blocks(gr.themes.Glass()) as demo:
100
  audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
101
  ref_audio_gr = gr.Audio(label="Reference Audio Used")
102
 
103
- tts_button.click(predict, [input_text_gr, style_gr, ref_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
 
 
 
 
 
104
 
105
  demo.queue()
106
  demo.launch(debug=True, show_api=False, share=args.share)
107
 
108
  # Hide Gradio footer
109
  css = "footer {visibility: hidden}"
110
-
 
3
  import argparse
4
  import gradio as gr
5
  import requests
 
6
  from openvoice import se_extractor
7
  from openvoice.api import BaseSpeakerTTS, ToneColorConverter
8
  from dotenv import load_dotenv
9
  from openai import OpenAI
10
  from elevenlabs.client import ElevenLabs
11
+ from elevenlabs import play, save
12
+ from flask import Flask
13
+ from flask_limiter import Limiter
14
+ from flask_limiter.util import get_remote_address
15
+
16
+ # Load environment variables
17
  load_dotenv()
18
 
19
+ # Initialize Flask app
20
+ app = Flask(__name__)
21
+
22
+ # Setup Limiter for rate limiting and quota management based on IP address
23
+ limiter = Limiter(get_remote_address, app=app, default_limits=["1 per 10 minutes"])
24
+
25
  # Argument parsing
26
  parser = argparse.ArgumentParser()
27
  parser.add_argument("--share", action='store_true', default=False, help="make link public")
28
  args = parser.parse_args()
29
+
30
+ # Initialize ElevenLabs client
31
  client = ElevenLabs(api_key=os.environ.get("ELEVENLABS_API_KEY"))
32
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
33
  output_dir = 'outputs'
 
35
 
36
  api_key = os.environ.get("ELEVENLABS_API_KEY")
37
  supported_languages = ['zh', 'en']
38
+
39
  # Function to get all voices
40
  def get_voices(api_key):
41
  url = "https://api.elevenlabs.io/v1/voices"
 
50
  response = requests.request("DELETE", url, headers=headers)
51
  return response.status_code, response.text
52
 
53
+ # Predict function with rate limiting based on IP address
54
+ @limiter.limit("1 per 10 minutes")
55
  def predict(prompt, style, audio_file_pth):
56
  text_hint = ''
57
  if len(prompt) < 2:
 
67
  description="A trial voice model for testing",
68
  files=[audio_file_pth],
69
  )
70
+ # Generate audio from text
71
  audio = client.generate(text=prompt, voice=voice)
72
  save(audio, f'{output_dir}/output.wav')
73
 
 
98
  style_gr = gr.Dropdown(
99
  label="Style",
100
  choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
101
+ info="Please upload a reference audio file that is at least 1 minute long. For best results, ensure the audio is clear. You can use Adobe Podcast Enhance(https://podcast.adobe.com/enhance) to improve the audio quality before uploading.",
102
  max_choices=1,
103
  value="default",
104
  )
 
114
  audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
115
  ref_audio_gr = gr.Audio(label="Reference Audio Used")
116
 
117
+ def handle_click(prompt, style, audio_file_pth):
118
+ ip = request.remote_addr
119
+ return predict(prompt, style, audio_file_pth, ip)
120
+
121
+ tts_button.click(handle_click,
122
+ [input_text_gr, style_gr, ref_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
123
 
124
  demo.queue()
125
  demo.launch(debug=True, show_api=False, share=args.share)
126
 
127
  # Hide Gradio footer
128
  css = "footer {visibility: hidden}"