amir22010 commited on
Commit
2e12d6c
·
verified ·
1 Parent(s): 2cdbeff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -18
app.py CHANGED
@@ -2,13 +2,14 @@ import gradio as gr
2
  from llama_cpp import Llama
3
  import os
4
  from groq import Groq
5
- import numpy
6
 
7
  #tts
8
  from balacoon_tts import TTS
9
  from threading import Lock
10
- from io import BytesIO
11
  from huggingface_hub import hf_hub_download, list_repo_files
 
 
 
12
 
13
  #tts cpu model
14
  tts_model_str = "en_us_hifi_jets_cpu.addon"
@@ -22,8 +23,7 @@ for name in list_repo_files(repo_id="balacoon/tts"):
22
  local_dir=os.getcwd(),
23
  )
24
 
25
- #tts speaker
26
- speaker_str = "92"
27
 
28
  # locker that disallow access to the tts object from more then one thread
29
  locker = Lock()
@@ -42,6 +42,21 @@ llm = Llama.from_pretrained(
42
  verbose=False
43
  )
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  #guardrail model
46
  guard_llm = "llama-3.1-8b-instant"
47
 
@@ -96,16 +111,16 @@ async def greet(product,description):
96
  ]
97
  response = client.chat.completions.create(model=guard_llm, messages=messages, temperature=0)
98
  if response.choices[0].message.content != "not moderated":
99
- audio_stream = BytesIO()
100
- tts = TTS(os.path.join(os.getcwd(), tts_model_str))
101
  a_list = ["Sorry, I can't proceed for generating marketing email. Your content needs to be moderated first. Thank you!"]
102
- with locker:
103
- audio_stream.write(numpy.ascontiguousarray(tts.synthesize(a_list[0], speaker_str)))
104
- audio_stream.seek(0)
105
- yield audio_stream
 
 
106
  else:
107
- audio_stream = BytesIO()
108
- tts = TTS(os.path.join(os.getcwd(), tts_model_str))
109
  output = llm.create_chat_completion(
110
  messages=[
111
  {
@@ -122,12 +137,12 @@ async def greet(product,description):
122
  for chunk in output:
123
  delta = chunk['choices'][0]['delta']
124
  if 'content' in delta:
125
- with locker:
126
- audio_stream.write(numpy.ascontiguousarray(tts.synthesize(delta.get('content', ''), speaker_str)))
127
- # partial_message = partial_message + delta.get('content', '')
128
- # yield partial_message
129
- audio_stream.seek(0)
130
- yield audio_stream
131
 
132
  demo = gr.Interface(fn=greet, inputs=["text","text"], outputs=gr.Audio(), concurrency_limit=10)
133
  demo.launch()
 
2
  from llama_cpp import Llama
3
  import os
4
  from groq import Groq
 
5
 
6
  #tts
7
  from balacoon_tts import TTS
8
  from threading import Lock
 
9
  from huggingface_hub import hf_hub_download, list_repo_files
10
+ from pydub import AudioSegment
11
+ import io
12
+ import tempfile
13
 
14
  #tts cpu model
15
  tts_model_str = "en_us_hifi_jets_cpu.addon"
 
23
  local_dir=os.getcwd(),
24
  )
25
 
26
+ tts = TTS(os.path.join(os.getcwd(), tts_model_str))
 
27
 
28
  # locker that disallow access to the tts object from more then one thread
29
  locker = Lock()
 
42
  verbose=False
43
  )
44
 
45
+ def text_to_speech(text):
46
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
47
+ with locker:
48
+ audio_data = tts.synthesize(text, "92"))
49
+ temp_file.write(audio_data)
50
+ return temp_file.name
51
+
52
+ def combine_audio_files(audio_files):
53
+ combined = AudioSegment.empty()
54
+ for audio_file in audio_files:
55
+ segment = AudioSegment.from_wav(audio_file)
56
+ combined += segment
57
+ os.remove(audio_file) # Remove temporary files
58
+ return combined
59
+
60
  #guardrail model
61
  guard_llm = "llama-3.1-8b-instant"
62
 
 
111
  ]
112
  response = client.chat.completions.create(model=guard_llm, messages=messages, temperature=0)
113
  if response.choices[0].message.content != "not moderated":
114
+ audio_files = []
 
115
  a_list = ["Sorry, I can't proceed for generating marketing email. Your content needs to be moderated first. Thank you!"]
116
+ audio_file = text_to_speech(a_list[0])
117
+ audio_files.append(audio_file)
118
+ final_audio = combine_audio_files(audio_files)
119
+ output_file = "final_output.mp3"
120
+ final_audio.export(output_file, format="mp3")
121
+ yield final_audio
122
  else:
123
+ audio_files = []
 
124
  output = llm.create_chat_completion(
125
  messages=[
126
  {
 
137
  for chunk in output:
138
  delta = chunk['choices'][0]['delta']
139
  if 'content' in delta:
140
+ audio_file = text_to_speech(delta.get('content', ''))
141
+ audio_files.append(audio_file)
142
+ final_audio = combine_audio_files(audio_files)
143
+ output_file = "final_output.mp3"
144
+ final_audio.export(output_file, format="mp3")
145
+ yield final_audio
146
 
147
  demo = gr.Interface(fn=greet, inputs=["text","text"], outputs=gr.Audio(), concurrency_limit=10)
148
  demo.launch()