plAIn_voice

Runtime error

rdlf commited on May 17, 2024

Commit

a741cb6

verified ·

1 Parent(s): 68d6ccb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,23 +1,20 @@
 import streamlit as st
 from PIL import Image
-from groq import Groq
 import os
-from transformers import pipeline
 st.image('calamo.png', caption="", use_column_width=False)
-import scipy.io.wavfile
-synthesizer = pipeline("text-to-speech", "suno/bark")
 client = Groq(
     api_key=os.environ.get("GROQ_API_KEY"),
 )
 # Other content of your app
 st.title("plAIn Voice")
-# Add more components here
 # Define a function to process the input
 def process_text(input_text):
@@ -42,11 +39,13 @@ def process_text(input_text):
     model="mixtral-8x7b-32768",
 )
     return (chat_completion.choices[0].message.content)
 def generate_audio(input_text):
     tts = process_text(input_text)
-    speech = synthesizer(tts, forward_params={"do_sample": True})
-    scipy.io.wavfile.write("bark_out.wav", rate=speech["sampling_rate"], data=speech["audio"])
-    return "bark_out.wav"
 user_input = st.text_input("Pega un texto para aclararlo y escuchar una lectura.")
@@ -55,7 +54,4 @@ if st.button('Aclarar'):
         st.write("Pega un texto aquí")
     else:
         speech_file = generate_audio(user_input)
-        st.audio(speech_file, format='audio/wav')

 import streamlit as st
 from PIL import Image
 import os
+from openai import OpenAI, Whisper
 st.image('calamo.png', caption="", use_column_width=False)
+# Initialize OpenAI and Whisper
+openai = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+whisper = Whisper(openai)
 client = Groq(
     api_key=os.environ.get("GROQ_API_KEY"),
 )
 # Other content of your app
 st.title("plAIn Voice")
 # Define a function to process the input
 def process_text(input_text):
     model="mixtral-8x7b-32768",
 )
     return (chat_completion.choices[0].message.content)
 def generate_audio(input_text):
     tts = process_text(input_text)
+    speech = whisper.synthesize(tts)
+    with open("whisper_out.wav", "wb") as f:
+        f.write(speech.audio)
+    return "whisper_out.wav"
 user_input = st.text_input("Pega un texto para aclararlo y escuchar una lectura.")
         st.write("Pega un texto aquí")
     else:
         speech_file = generate_audio(user_input)
+        st.audio(speech_file, format='audio/wav')