Spaces:
Sleeping
Sleeping
File size: 2,795 Bytes
0d2a17a 2a95377 b1523cd 2a95377 0d2a17a 27002e6 0d2a17a 1564098 0d2a17a 1564098 b1523cd 2a95377 0d2a17a b1523cd af12bd1 0d2a17a 2a95377 0d2a17a 2a95377 0d2a17a b1523cd 2a95377 b1523cd 2a95377 b1523cd 0d2a17a 2a95377 b1523cd 2a95377 b1523cd 0d2a17a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import os
from dotenv import load_dotenv
import streamlit as st
from groq import Groq
import tempfile
import requests # Use for calling APIs if Groq's SDK doesn't support transcription
# Load environment variables
load_dotenv()
# Initialize Groq client
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
# Streamlit UI
st.title("Voice-Voice Application")
st.markdown("Listen your voice using Whisper for transcription and TTS for voice generation.")
# Upload audio file
uploaded_file = st.file_uploader(
"Upload your audio file for transcription",
type=["wav", "mp3", "mp4", "m4a"]
)
if uploaded_file is not None:
# Display uploaded audio
audio_format = uploaded_file.type.split('/')[-1]
st.audio(uploaded_file, format=f"audio/{audio_format}")
st.write("Processing your audio file...")
# Save the uploaded file to a temporary location
with tempfile.NamedTemporaryFile(delete=False, suffix='.' + audio_format) as temp_audio:
temp_audio.write(uploaded_file.read())
temp_audio_path = temp_audio.name
try:
# Call transcription API (adjust as per Groq API documentation)
transcription_response = client.audio.transcriptions.create(
file=open(temp_audio_path, "rb"), # Use binary file for API
model="whisper-large-v3-turbo",
response_format="text" # Adjust format if needed
)
# Extract the transcribed text
transcribed_text = transcription_response # May vary; adjust based on API response
st.success("Transcription completed!")
st.write("**Transcribed Text:**", transcribed_text)
# Voice Cloning (TTS Integration)
st.markdown("---")
st.subheader("Generate Speech from Transcription")
tts_input = st.text_area("Enter text to generate speech:", value=transcribed_text)
if st.button("Generate Speech"):
if tts_input:
# Use a TTS system to generate audio (placeholder)
tts_response = requests.post(
"https://tts.api.url", # Replace with actual TTS API URL
json={"text": tts_input, "voice": "en-US-Wavenet-D"} # Adjust parameters
)
if tts_response.status_code == 200:
st.audio(tts_response.content, format="audio/mp3")
st.success("Speech generation successful!")
else:
st.error(f"Error in TTS: {tts_response.json()}")
else:
st.warning("Please enter some text.")
except Exception as e:
st.error(f"Error during processing: {e}")
finally:
# Clean up temporary file
os.remove(temp_audio_path)
# Footer
st.markdown("Developed with ❤️ by Sanam Iftakhar")
|