Spaces:

amir22010
/

MarketMate

Sleeping

App Files Files Community

amir22010 commited on Oct 15, 2024

Commit

2e12d6c

verified ·

1 Parent(s): 2cdbeff

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -18

app.py CHANGED Viewed

@@ -2,13 +2,14 @@ import gradio as gr
 from llama_cpp import Llama
 import os
 from groq import Groq
-import numpy
 #tts
 from balacoon_tts import TTS
 from threading import Lock
-from io import BytesIO
 from huggingface_hub import hf_hub_download, list_repo_files
 #tts cpu model
 tts_model_str = "en_us_hifi_jets_cpu.addon"
@@ -22,8 +23,7 @@ for name in list_repo_files(repo_id="balacoon/tts"):
                 local_dir=os.getcwd(),
             )
-#tts speaker
-speaker_str = "92"
 # locker that disallow access to the tts object from more then one thread
 locker = Lock()
@@ -42,6 +42,21 @@ llm = Llama.from_pretrained(
     verbose=False
 )
 #guardrail model
 guard_llm = "llama-3.1-8b-instant"
@@ -96,16 +111,16 @@ async def greet(product,description):
     ]
     response = client.chat.completions.create(model=guard_llm, messages=messages, temperature=0)
     if response.choices[0].message.content != "not moderated":
-        audio_stream = BytesIO()
-        tts = TTS(os.path.join(os.getcwd(), tts_model_str))
         a_list = ["Sorry, I can't proceed for generating marketing email. Your content needs to be moderated first. Thank you!"]
-        with locker:
-            audio_stream.write(numpy.ascontiguousarray(tts.synthesize(a_list[0], speaker_str)))
-        audio_stream.seek(0)
-        yield audio_stream
     else:
-        audio_stream = BytesIO()
-        tts = TTS(os.path.join(os.getcwd(), tts_model_str))
         output = llm.create_chat_completion(
         messages=[
             {
@@ -122,12 +137,12 @@ async def greet(product,description):
         for chunk in output:
             delta = chunk['choices'][0]['delta']
             if 'content' in delta:
-                with locker:
-                    audio_stream.write(numpy.ascontiguousarray(tts.synthesize(delta.get('content', ''), speaker_str)))
-                # partial_message = partial_message + delta.get('content', '')
-                # yield partial_message
-        audio_stream.seek(0)
-        yield audio_stream
 demo = gr.Interface(fn=greet, inputs=["text","text"], outputs=gr.Audio(), concurrency_limit=10)
 demo.launch()

 from llama_cpp import Llama
 import os
 from groq import Groq
 #tts
 from balacoon_tts import TTS
 from threading import Lock
 from huggingface_hub import hf_hub_download, list_repo_files
+from pydub import AudioSegment
+import io
+import tempfile
 #tts cpu model
 tts_model_str = "en_us_hifi_jets_cpu.addon"
                 local_dir=os.getcwd(),
             )
+tts = TTS(os.path.join(os.getcwd(), tts_model_str))
 # locker that disallow access to the tts object from more then one thread
 locker = Lock()
     verbose=False
 )
+def text_to_speech(text):
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
+        with locker:
+            audio_data = tts.synthesize(text, "92"))
+        temp_file.write(audio_data)
+    return temp_file.name
+def combine_audio_files(audio_files):
+    combined = AudioSegment.empty()
+    for audio_file in audio_files:
+        segment = AudioSegment.from_wav(audio_file)
+        combined += segment
+        os.remove(audio_file)  # Remove temporary files
+    return combined
 #guardrail model
 guard_llm = "llama-3.1-8b-instant"
     ]
     response = client.chat.completions.create(model=guard_llm, messages=messages, temperature=0)
     if response.choices[0].message.content != "not moderated":
+        audio_files = []
         a_list = ["Sorry, I can't proceed for generating marketing email. Your content needs to be moderated first. Thank you!"]
+        audio_file = text_to_speech(a_list[0])
+        audio_files.append(audio_file)
+        final_audio = combine_audio_files(audio_files)
+        output_file = "final_output.mp3"
+        final_audio.export(output_file, format="mp3")
+        yield final_audio
     else:
+        audio_files = []
         output = llm.create_chat_completion(
         messages=[
             {
         for chunk in output:
             delta = chunk['choices'][0]['delta']
             if 'content' in delta:
+                audio_file = text_to_speech(delta.get('content', ''))
+                audio_files.append(audio_file)
+        final_audio = combine_audio_files(audio_files)
+        output_file = "final_output.mp3"
+        final_audio.export(output_file, format="mp3")
+        yield final_audio
 demo = gr.Interface(fn=greet, inputs=["text","text"], outputs=gr.Audio(), concurrency_limit=10)
 demo.launch()