Spaces:

Amelia-James
/

voice-cloning-app

Running

App Files Files Community

Amelia-James commited on 12 days ago

Commit

2a95377

•

1 Parent(s): 6325c8c

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -15

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import os
 from dotenv import load_dotenv
 import streamlit as st
-from groq import Groq  # Ensure this is the correct import based on Groq's SDK
 import tempfile
 # Load environment variables
 load_dotenv()
@@ -12,7 +13,7 @@ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 # Streamlit UI
 st.title("Voice Cloning Application")
-st.markdown("Clone your voice using Groq's Whisper Model and generate natural responses.")
 # Upload audio file
 uploaded_file = st.file_uploader(
@@ -24,42 +25,50 @@ if uploaded_file is not None:
     # Display uploaded audio
     audio_format = uploaded_file.type.split('/')[-1]
     st.audio(uploaded_file, format=f"audio/{audio_format}")
-    st.write("Transcription in progress...")
     # Save the uploaded file to a temporary location
     with tempfile.NamedTemporaryFile(delete=False, suffix='.' + audio_format) as temp_audio:
         temp_audio.write(uploaded_file.read())
         temp_audio_path = temp_audio.name
-    # Transcription Logic
     try:
-        # Replace the following with the correct transcription method provided by Groq
-        transcription_response = client.transcriptions.create(
-            file_path=temp_audio_path,
-            model="whisper-large-v3"
         )
-        # Extract the transcribed text from the response
-        transcribed_text = transcription_response['transcription']  # Adjust based on actual response structure
         st.success("Transcription completed!")
         st.write("**Transcribed Text:**", transcribed_text)
-        # Placeholder for voice cloning (TTS integration can go here)
         st.markdown("---")
         st.subheader("Generate Speech from Transcription")
         tts_input = st.text_area("Enter text to generate speech:", value=transcribed_text)
         if st.button("Generate Speech"):
             if tts_input:
-                # Simulate TTS functionality (placeholder for TTS model integration)
-                st.success("Generated speech successfully! (Placeholder)")
             else:
                 st.warning("Please enter some text.")
     except Exception as e:
-        st.error(f"Error during transcription: {e}")
     finally:
-        # Clean up the temporary file
         os.remove(temp_audio_path)
 # Footer

 import os
 from dotenv import load_dotenv
 import streamlit as st
+from groq import Groq
 import tempfile
+import requests  # Use for calling APIs if Groq's SDK doesn't support transcription
 # Load environment variables
 load_dotenv()
 # Streamlit UI
 st.title("Voice Cloning Application")
+st.markdown("Clone your voice using Whisper for transcription and TTS for voice generation.")
 # Upload audio file
 uploaded_file = st.file_uploader(
     # Display uploaded audio
     audio_format = uploaded_file.type.split('/')[-1]
     st.audio(uploaded_file, format=f"audio/{audio_format}")
+    st.write("Processing your audio file...")
     # Save the uploaded file to a temporary location
     with tempfile.NamedTemporaryFile(delete=False, suffix='.' + audio_format) as temp_audio:
         temp_audio.write(uploaded_file.read())
         temp_audio_path = temp_audio.name
     try:
+        # Call transcription API (adjust as per Groq API documentation)
+        transcription_response = client.audio.transcriptions.create(
+            file=open(temp_audio_path, "rb"),  # Use binary file for API
+            model="whisper-large-v3-turbo",
+            response_format="text"  # Adjust format if needed
         )
+        # Extract the transcribed text
+        transcribed_text = transcription_response  # May vary; adjust based on API response
         st.success("Transcription completed!")
         st.write("**Transcribed Text:**", transcribed_text)
+        # Voice Cloning (TTS Integration)
         st.markdown("---")
         st.subheader("Generate Speech from Transcription")
         tts_input = st.text_area("Enter text to generate speech:", value=transcribed_text)
         if st.button("Generate Speech"):
             if tts_input:
+                # Use a TTS system to generate audio (placeholder)
+                tts_response = requests.post(
+                    "https://tts.api.url",  # Replace with actual TTS API URL
+                    json={"text": tts_input, "voice": "en-US-Wavenet-D"}  # Adjust parameters
+                )
+                if tts_response.status_code == 200:
+                    st.audio(tts_response.content, format="audio/mp3")
+                    st.success("Speech generation successful!")
+                else:
+                    st.error(f"Error in TTS: {tts_response.json()}")
             else:
                 st.warning("Please enter some text.")
     except Exception as e:
+        st.error(f"Error during processing: {e}")
     finally:
+        # Clean up temporary file
         os.remove(temp_audio_path)
 # Footer