Kuznetsov AV commited on
Commit
092b169
1 Parent(s): 2e83bf4

added project kuznetsov_av, main program run.py, modified readme.md

Browse files
README.md CHANGED
@@ -1 +1 @@
1
- # urfu_iml_2023_1_3_hw2
 
1
+ # Программная инженерия. Практическое задание №2
kuznetsov_av/kuznetsov_av.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from datasets import load_dataset
3
+ import torch
4
+ import streamlit as st
5
+
6
+ @st.cache_resource
7
+ def load_model():
8
+ synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts")
9
+
10
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
11
+ speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
12
+
13
+ return synthesiser, speaker_embedding
14
+
15
+ synthesiser, speaker_embedding = load_model()
16
+
17
+ text = st.text_area('Enter English text here')
18
+ st.write(f'You wrote {len(text)} characters.')
19
+
20
+ if st.button('Speech'):
21
+ speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
22
+
23
+ st.audio(speech['audio'], sample_rate=speech['sampling_rate'])
kuznetsov_av/requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ datasets==2.14.6
2
+ streamlit==1.28.1
3
+ torch==2.1.0
4
+ transformers==4.35.0
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ datasets==2.14.6
2
+ streamlit==1.28.1
3
+ torch==2.1.0
4
+ transformers==4.35.0
run.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from datasets import load_dataset
3
+ import torch
4
+ import streamlit as st
5
+
6
+ @st.cache_resource
7
+ def load_speech_model():
8
+ synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts")
9
+
10
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
11
+ speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
12
+
13
+ return synthesiser, speaker_embedding
14
+
15
+ def speech_elements():
16
+ synthesiser, speaker_embedding = load_speech_model()
17
+
18
+ text = st.text_area('Enter English text here')
19
+ st.write(f'You wrote {len(text)} characters.')
20
+
21
+ if st.button('Speech'):
22
+ speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
23
+
24
+ st.audio(speech['audio'], sample_rate=speech['sampling_rate'])
25
+
26
+
27
+ speech_elements()