IliaLarchenko commited on
Commit
e98b248
β€’
1 Parent(s): 166b0a2

Auto streaming for TTS

Browse files
Files changed (2) hide show
  1. api/audio.py +71 -38
  2. app.py +12 -6
api/audio.py CHANGED
@@ -29,6 +29,7 @@ def numpy_audio_to_bytes(audio_data):
29
  class STTManager:
30
  def __init__(self, config):
31
  self.config = config
 
32
 
33
  def speech_to_text(self, audio, convert_to_bytes=True):
34
  if convert_to_bytes:
@@ -57,44 +58,76 @@ class STTManager:
57
 
58
 
59
  class TTSManager:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  def __init__(self, config):
61
  self.config = config
62
-
63
- def read_last_message(self, chat_display):
64
- if chat_display:
65
- text = chat_display[-1][1]
66
-
67
- headers = {"Authorization": "Bearer " + self.config.tts.key}
68
- try:
69
- if self.config.tts.type == "OPENAI_API":
70
- data = {"model": self.config.tts.name, "input": text, "voice": "alloy", "response_format": "opus"}
71
-
72
- if os.environ.get("STREAMING", False):
73
- with requests.post(self.config.tts.url, headers=headers, json=data, stream=True) as response:
74
- if response.status_code != 200:
75
- error_details = response.json().get("error", "No error message provided")
76
- raise APIError("TTS Error: OPENAI API error", status_code=response.status_code, details=error_details)
77
- else:
78
- yield from response.iter_content(chunk_size=1024)
79
- else:
80
- response = requests.post(self.config.tts.url, headers=headers, json=data)
81
- if response.status_code != 200:
82
- error_details = response.json().get("error", "No error message provided")
83
- raise APIError("TTS Error: OPENAI API error", status_code=response.status_code, details=error_details)
84
- return response.content
85
- elif self.config.tts.type == "HF_API":
86
- if os.environ.get("STREAMING", False):
87
- raise APIError("Streaming not supported for HF API TTS")
88
- else:
89
- response = requests.post(self.config.tts.url, headers=headers, json={"inputs": text})
90
- if response.status_code != 200:
91
- error_details = response.json().get("error", "No error message provided")
92
- raise APIError("TTS Error: HF API error", status_code=response.status_code, details=error_details)
93
- return response.content
94
-
95
- except APIError as e:
96
- raise
97
- except Exception as e:
98
- raise APIError(f"TTS Error: Unexpected error: {e}")
99
  else:
100
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  class STTManager:
30
  def __init__(self, config):
31
  self.config = config
32
+ self.streaming = os.getenv("STREAMING", False)
33
 
34
  def speech_to_text(self, audio, convert_to_bytes=True):
35
  if convert_to_bytes:
 
58
 
59
 
60
  class TTSManager:
61
+ def test_tts(self):
62
+ try:
63
+ self.read_text("Handshake")
64
+ return True
65
+ except:
66
+ return False
67
+
68
+ def test_tts_stream(self):
69
+ try:
70
+ for _ in self.read_text_stream("Handshake"):
71
+ pass
72
+ return True
73
+ except:
74
+ return False
75
+
76
  def __init__(self, config):
77
  self.config = config
78
+ self.status = self.test_tts()
79
+ if self.status:
80
+ self.streaming = self.test_tts_stream()
81
+ else:
82
+ self.streaming = False
83
+ if self.streaming:
84
+ self.read_last_message = self.rlm_stream
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  else:
86
+ self.read_last_message = self.rlm
87
+
88
+ def read_text(self, text):
89
+ headers = {"Authorization": "Bearer " + self.config.tts.key}
90
+ try:
91
+ if self.config.tts.type == "OPENAI_API":
92
+ data = {"model": self.config.tts.name, "input": text, "voice": "alloy", "response_format": "opus"}
93
+ response = requests.post(self.config.tts.url, headers=headers, json=data)
94
+ elif self.config.tts.type == "HF_API":
95
+ response = requests.post(self.config.tts.url, headers=headers, json={"inputs": text})
96
+ if response.status_code != 200:
97
+ error_details = response.json().get("error", "No error message provided")
98
+ raise APIError(f"TTS Error: {self.config.tts.type} error", status_code=response.status_code, details=error_details)
99
+ except APIError as e:
100
+ raise
101
+ except Exception as e:
102
+ raise APIError(f"TTS Error: Unexpected error: {e}")
103
+
104
+ return response.content
105
+
106
+ def read_text_stream(self, text):
107
+ 1 / 0
108
+ if self.config.tts.type not in ["OPENAI_API"]:
109
+ raise APIError("TTS Error: Streaming not supported for this TTS type")
110
+ headers = {"Authorization": "Bearer " + self.config.tts.key}
111
+ data = {"model": self.config.tts.name, "input": text, "voice": "alloy", "response_format": "opus"}
112
+
113
+ try:
114
+ with requests.post(self.config.tts.url, headers=headers, json=data, stream=True) as response:
115
+ if response.status_code != 200:
116
+ error_details = response.json().get("error", "No error message provided")
117
+ raise APIError("TTS Error: OPENAI API error", status_code=response.status_code, details=error_details)
118
+ else:
119
+ yield from response.iter_content(chunk_size=1024)
120
+ except StopIteration:
121
+ pass
122
+ except APIError as e:
123
+ raise
124
+ except Exception as e:
125
+ raise APIError(f"TTS Error: Unexpected error: {e}")
126
+
127
+ def rlm(self, chat_history):
128
+ if chat_history:
129
+ return self.read_text(chat_history[-1][1])
130
+
131
+ def rlm_stream(self, chat_history):
132
+ if chat_history:
133
+ yield from self.read_text_stream(chat_history[-1][1])
app.py CHANGED
@@ -46,6 +46,14 @@ def hide_solution():
46
  return solution_acc, end_btn, problem_acc, audio_input
47
 
48
 
 
 
 
 
 
 
 
 
49
  # Interface
50
 
51
  with gr.Blocks(title="AI Interviewer") as demo:
@@ -53,18 +61,16 @@ with gr.Blocks(title="AI Interviewer") as demo:
53
  gr.Markdown(instruction["demo"])
54
 
55
  started_coding = gr.State(False)
56
- audio_output = gr.Audio(label="Play audio", autoplay=True, visible=False, streaming=os.environ.get("STREAMING", False))
57
  with gr.Tab("Instruction") as instruction_tab:
58
  with gr.Row():
59
  with gr.Column(scale=2):
60
  gr.Markdown(instruction["introduction"])
61
  with gr.Column(scale=1):
62
  space = " " * 10
63
- try:
64
- audio_test = tts.text_to_speech("Handshake")
65
- gr.Markdown(f"TTS status: 🟒{space} {config.tts.name}")
66
- except:
67
- gr.Markdown(f"TTS status: πŸ”΄{space} {config.tts.name}")
68
 
69
  try:
70
  text_test = stt.speech_to_text(audio_test, False)
 
46
  return solution_acc, end_btn, problem_acc, audio_input
47
 
48
 
49
+ def get_status_color(obj):
50
+ if obj.status:
51
+ if obj.streaming:
52
+ return "🟒"
53
+ return "🟑"
54
+ return "πŸ”΄"
55
+
56
+
57
  # Interface
58
 
59
  with gr.Blocks(title="AI Interviewer") as demo:
 
61
  gr.Markdown(instruction["demo"])
62
 
63
  started_coding = gr.State(False)
64
+ audio_output = gr.Audio(label="Play audio", autoplay=True, visible=os.environ["DEBUG"], streaming=tts.streaming)
65
  with gr.Tab("Instruction") as instruction_tab:
66
  with gr.Row():
67
  with gr.Column(scale=2):
68
  gr.Markdown(instruction["introduction"])
69
  with gr.Column(scale=1):
70
  space = " " * 10
71
+
72
+ tts_status = get_status_color(tts)
73
+ gr.Markdown(f"TTS status: {tts_status}{space}{config.tts.name}")
 
 
74
 
75
  try:
76
  text_test = stt.speech_to_text(audio_test, False)