ruslanmv commited on
Commit
d07996d
β€’
1 Parent(s): 57589cd

First commit

Browse files
README.md CHANGED
@@ -3,8 +3,8 @@ title: Text To Voice
3
  emoji: πŸ‘
4
  colorFrom: green
5
  colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 4.36.1
8
  app_file: app.py
9
  pinned: false
10
  ---
 
3
  emoji: πŸ‘
4
  colorFrom: green
5
  colorTo: yellow
6
+ sdk: streamlit
7
+ sdk_version: 1.28.2
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import torch
4
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
5
+ import soundfile as sf
6
+ from io import StringIO
7
+
8
+ # Load models outside of function calls for efficiency
9
+ @st.cache(allow_output_mutation=True)
10
+ def load_models():
11
+ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
12
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
13
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
14
+ return model, processor, vocoder
15
+
16
+ model, processor, vocoder = load_models()
17
+
18
+ # Load speaker embeddings
19
+ @st.cache(allow_output_mutation=True)
20
+ def get_speaker_embeddings():
21
+ speaker_embeddings = np.load("cmu_us_clb_arctic-wav-arctic_a0144.npy")
22
+ return torch.tensor(speaker_embeddings).unsqueeze(0)
23
+
24
+ speaker_embeddings = get_speaker_embeddings()
25
+
26
+ # Improved Styling (assuming style.css is present)
27
+ def local_css(file_name):
28
+ with open(file_name) as f:
29
+ st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
30
+
31
+ local_css("style.css") # Apply custom CSS styles
32
+
33
+ # Streamlit Layout
34
+ st.title("Text-to-Voice Conversion")
35
+ st.markdown("Convert your text to speech using advanced AI models.")
36
+
37
+ # Function to convert text to speech
38
+ def text_to_speech(text):
39
+ try:
40
+ max_length = 100 # Set a max length as per model's capability
41
+ segments = [text[i:i+max_length] for i in range(0, len(text), max_length)]
42
+ audio_paths = []
43
+
44
+ for i, segment in enumerate(segments):
45
+ inputs = processor(text=segment, return_tensors="pt")
46
+ spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
47
+ with torch.no_grad():
48
+ speech = vocoder(spectrogram)
49
+ audio_path = f"speech_segment_{i}.wav"
50
+ sf.write(audio_path, speech.numpy(), samplerate=16000)
51
+ audio_paths.append(audio_path)
52
+
53
+ return audio_paths
54
+ except Exception as e:
55
+ st.error(f"Error in text-to-speech conversion: {e}")
56
+ return []
57
+
58
+ # Function to combine audio segments
59
+ def combine_audio_segments(paths):
60
+ combined_speech = []
61
+ for path in paths:
62
+ data, samplerate = sf.read(path)
63
+ combined_speech.extend(data)
64
+ sf.write("combined_speech.wav", np.array(combined_speech), samplerate)
65
+ return "combined_speech.wav"
66
+
67
+ # Text Input and Conversion Button
68
+ text = st.text_area("Type your text here.")
69
+
70
+ if st.button("Convert"):
71
+ if text:
72
+ audio_paths = text_to_speech(text)
73
+ combined_audio_path = combine_audio_segments(audio_paths)
74
+ audio_bytes = open(combined_audio_path, 'rb').read()
75
+ st.audio(audio_bytes, format='audio/wav')
76
+ else:
77
+ st.error("Please enter some text to convert.")
78
+
79
+ # File Uploader and Conversion Button
80
+ uploaded_file = st.file_uploader("Upload a text file here", type=['txt'])
81
+
82
+ if uploaded_file is not None:
83
+ stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
84
+ text = stringio.read()
85
+ st.write(text)
86
+
87
+ if st.button("Convert Uploaded File", key="upload"):
88
+ audio_paths = text_to_speech(text)
89
+ combined_audio_path = combine_audio_segments(audio_paths)
90
+ audio_bytes = open(combined_audio_path, 'rb').read()
91
+ st.audio(audio_bytes, format='audio/wav')
cmu_us_awb_arctic-wav-arctic_a0002.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5db7a684ab490f21cec1628e00d461a184e369fe4eafb1ee441a796faf4ab6ae
3
+ size 2176
cmu_us_bdl_arctic-wav-arctic_a0009.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:215326eae3a428af8934c385fbe043b36c72849ca17d1d013adeb189e6bd6962
3
+ size 2176
cmu_us_clb_arctic-wav-arctic_a0144.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf67b36c47edfb1851466a1dff081b436bc6809b5ebc12811d9df0c0d0f28d0e
3
+ size 2176
cmu_us_ksp_arctic-wav-arctic_b0087.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6c5c2a38c2e400179019c560a74c4322f4ee13beda22ee601807545edee283e
3
+ size 2176
cmu_us_rms_arctic-wav-arctic_b0353.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a49dac3e9c3a71a4dbca4c364233c7915ae6e0cb71b2ceaed97296231b95cb50
3
+ size 2176
cmu_us_slt_arctic-wav-arctic_a0508.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f71ffadda3f3a4de079740a0b34963824dc644d9d5442283bd0a2b0d4f44ff0b
3
+ size 2176
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit_option_menu == 0.3.2
2
+ requests==2.28.1
3
+ streamlit-lottie==0.0.3
4
+ times==0.7
5
+ htbuilder==0.6.1
6
+ transformers==4.29.2
7
+ torch==2.0.1
8
+ soundfile==0.12.1
9
+ torchaudio == 2.0.2
10
+ sentencepiece==0.1.99
11
+ streamlit
speech.wav ADDED
Binary file (10.3 kB). View file
 
style.css ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Custom CSS for Streamlit App */
2
+
3
+ /* Header Style */
4
+ header {
5
+ background-color: #5072A7; /* A soothing shade of blue */
6
+ color: white; /* White text for contrast */
7
+ padding: 10px; /* Adding padding for spacing */
8
+ }
9
+
10
+ /* Main Body Style */
11
+ body {
12
+ font-family: Arial, sans-serif; /* Standard readable font stack */
13
+ color: #333333; /* Dark grey color for main text */
14
+ line-height: 1.6; /* Improved line spacing for readability */
15
+ }
16
+
17
+ /* Titles and Headings */
18
+ h1, h2, h3 {
19
+ color: #333366; /* Darker shade of blue for headings */
20
+ margin-bottom: 10px; /* Adding margin for separation */
21
+ }
22
+
23
+ /* Buttons */
24
+ button {
25
+ background-color: #4CAF50; /* Green background for buttons */
26
+ color: white; /* White text for contrast */
27
+ padding: 10px 20px; /* Padding for button size */
28
+ margin: 10px 0; /* Margin for spacing */
29
+ border: none; /* Removing default border */
30
+ cursor: pointer; /* Pointer cursor for usability */
31
+ border-radius: 5px; /* Rounded corners */
32
+ transition: background-color 0.3s ease; /* Smooth hover transition */
33
+ }
34
+
35
+ button:hover {
36
+ background-color: #45a049; /* Darker green on hover */
37
+ }
38
+
39
+ /* Footer Style */
40
+ footer {
41
+ background-color: #333333; /* Dark background for footer */
42
+ color: white; /* White text for contrast */
43
+ text-align: center; /* Center-aligning text */
44
+ padding: 10px; /* Padding for spacing */
45
+ position: fixed; /* Fixed positioning */
46
+ left: 0;
47
+ bottom: 0;
48
+ width: 100%; /* Full width */
49
+ }