File size: 2,795 Bytes
0d2a17a
 
 
2a95377
b1523cd
2a95377
0d2a17a
 
 
 
 
 
 
 
27002e6
 
0d2a17a
 
1564098
 
 
 
0d2a17a
 
1564098
b1523cd
 
2a95377
0d2a17a
b1523cd
 
 
 
af12bd1
0d2a17a
2a95377
 
 
 
 
0d2a17a
 
2a95377
 
0d2a17a
b1523cd
 
2a95377
b1523cd
 
 
 
 
 
2a95377
 
 
 
 
 
 
 
 
 
b1523cd
 
0d2a17a
 
2a95377
b1523cd
2a95377
b1523cd
0d2a17a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
from dotenv import load_dotenv
import streamlit as st
from groq import Groq
import tempfile
import requests  # Use for calling APIs if Groq's SDK doesn't support transcription

# Load environment variables
load_dotenv()

# Initialize Groq client
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

# Streamlit UI
st.title("Voice-Voice Application")
st.markdown("Listen your voice using Whisper for transcription and TTS for voice generation.")

# Upload audio file
uploaded_file = st.file_uploader(
    "Upload your audio file for transcription",
    type=["wav", "mp3", "mp4", "m4a"]
)

if uploaded_file is not None:
    # Display uploaded audio
    audio_format = uploaded_file.type.split('/')[-1]
    st.audio(uploaded_file, format=f"audio/{audio_format}")
    st.write("Processing your audio file...")

    # Save the uploaded file to a temporary location
    with tempfile.NamedTemporaryFile(delete=False, suffix='.' + audio_format) as temp_audio:
        temp_audio.write(uploaded_file.read())
        temp_audio_path = temp_audio.name

    try:
        # Call transcription API (adjust as per Groq API documentation)
        transcription_response = client.audio.transcriptions.create(
            file=open(temp_audio_path, "rb"),  # Use binary file for API
            model="whisper-large-v3-turbo",
            response_format="text"  # Adjust format if needed
        )

        # Extract the transcribed text
        transcribed_text = transcription_response  # May vary; adjust based on API response
        st.success("Transcription completed!")
        st.write("**Transcribed Text:**", transcribed_text)

        # Voice Cloning (TTS Integration)
        st.markdown("---")
        st.subheader("Generate Speech from Transcription")
        tts_input = st.text_area("Enter text to generate speech:", value=transcribed_text)

        if st.button("Generate Speech"):
            if tts_input:
                # Use a TTS system to generate audio (placeholder)
                tts_response = requests.post(
                    "https://tts.api.url",  # Replace with actual TTS API URL
                    json={"text": tts_input, "voice": "en-US-Wavenet-D"}  # Adjust parameters
                )
                if tts_response.status_code == 200:
                    st.audio(tts_response.content, format="audio/mp3")
                    st.success("Speech generation successful!")
                else:
                    st.error(f"Error in TTS: {tts_response.json()}")
            else:
                st.warning("Please enter some text.")

    except Exception as e:
        st.error(f"Error during processing: {e}")
    finally:
        # Clean up temporary file
        os.remove(temp_audio_path)

# Footer
st.markdown("Developed with ❤️ by Sanam Iftakhar")