File size: 2,320 Bytes
321fb0f
 
 
ae68901
 
 
 
c370778
 
 
 
321fb0f
 
 
 
7c4c876
 
321fb0f
c370778
c39573e
7c4c876
c39573e
 
 
 
 
 
 
 
 
 
 
 
 
7c4c876
 
c39573e
 
 
7c4c876
c39573e
 
321fb0f
ae68901
321fb0f
ae68901
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321fb0f
 
ae68901
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import torch
import torchaudio
import streamlit as st
import requests  
import streamlit.components.v1 as components  
import asyncio  
import io
import os
from dotenv import load_dotenv

load_dotenv()

model_id = '11mlabs/indri-0.1-124m-tts'
task = 'indri-tts'

st.title("Indri")
st.subheader("Ultrafast multi-modal AI")

baseUrl = os.getenv("BASE_URL")

speakers = {
    "[spkr_63]" : "๐Ÿ‡ฌ๐Ÿ‡ง ๐Ÿ‘จ book reader",
    "[spkr_67]" : "๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿ‘จ influencer",
    "[spkr_68]" : "๐Ÿ‡ฎ๐Ÿ‡ณ ๐Ÿ‘จ book reader",
    "[spkr_69]" : "๐Ÿ‡ฎ๐Ÿ‡ณ ๐Ÿ‘จ book reader",
    "[spkr_70]" : "๐Ÿ‡ฎ๐Ÿ‡ณ ๐Ÿ‘จ motivational speaker",
    "[spkr_62]" : "๐Ÿ‡ฎ๐Ÿ‡ณ ๐Ÿ‘จ book reader heavy",
    "[spkr_53]" : "๐Ÿ‡ฎ๐Ÿ‡ณ ๐Ÿ‘ฉ recipe reciter",
    "[spkr_60]" : "๐Ÿ‡ฎ๐Ÿ‡ณ ๐Ÿ‘ฉ book reader",
    "[spkr_74]" : "๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿ‘จ book reader",
    "[spkr_75]" : "๐Ÿ‡ฎ๐Ÿ‡ณ ๐Ÿ‘จ entrepreneur",
    "[spkr_76]" : "๐Ÿ‡ฌ๐Ÿ‡ง ๐Ÿ‘จ nature lover",
    "[spkr_77]" : "๐Ÿ‡ฎ๐Ÿ‡ณ ๐Ÿ‘จ influencer",
    "[spkr_66]" : "๐Ÿ‡ฎ๐Ÿ‡ณ ๐Ÿ‘จ politician"
}

with st.container():
    st.markdown("### Speaker Selection")
    speaker_id = st.selectbox("Select a speaker:", options=list(speakers.keys()), format_func=lambda x: speakers[x])

    st.markdown("### Text Input")
    text_input = st.text_area("Enter text for TTS (max 200 characters):", max_chars=200)

async def generate_audio():
    if text_input:
        speaker_name = speakers[speaker_id]
        
        response = requests.post(
            f"{baseUrl}/tts",  
            json={
                "text": text_input,
                "speaker": speaker_name
            },
            headers={
                "accept": "application/json",
                "Content-Type": "application/json"
            }
        )
        
        if response.ok:
            audio_blob = response.content
            
            audio_tensor, sample_rate = torchaudio.load(io.BytesIO(audio_blob))  
            
            torchaudio.save('output.wav', audio_tensor, sample_rate=sample_rate) 
            st.audio('output.wav')  
        else:
            st.warning(f"Received invalid response format. Status Code: {response.status_code}, Response: {response.text}")
    else:
        st.warning("Please enter text to generate audio.")

if st.button("Generate Audio", key="generate_audio"):
    asyncio.run(generate_audio())