ibvhim commited on
Commit
d324af8
·
1 Parent(s): 065e2bf

Create utils.py

Browse files
Files changed (1) hide show
  1. Voicechat-Hindi/utils.py +137 -0
Voicechat-Hindi/utils.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Some utility functions for the app."""
2
+ from base64 import b64encode
3
+ from io import BytesIO
4
+
5
+ from gtts import gTTS
6
+ from mtranslate import translate
7
+ from speech_recognition import AudioFile, Recognizer
8
+ from transformers import (BlenderbotSmallForConditionalGeneration,
9
+ BlenderbotSmallTokenizer)
10
+
11
+
12
+ def stt(audio: object, language: str) -> str:
13
+ """Converts speech to text.
14
+ Args:
15
+ audio: record of user speech
16
+ Returns:
17
+ text (str): recognized speech of user
18
+ """
19
+ r = Recognizer()
20
+ # open the audio file
21
+ with AudioFile(audio) as source:
22
+ # listen for the data (load audio to memory)
23
+ audio_data = r.record(source)
24
+ # recognize (convert from speech to text)
25
+ text = r.recognize_google(audio_data, language=language)
26
+ return text
27
+
28
+
29
+ def to_en_translation(text: str, language: str) -> str:
30
+ """Translates text from specified language to English.
31
+ Args:
32
+ text (str): input text
33
+ language (str): desired language
34
+ Returns:
35
+ str: translated text
36
+ """
37
+ return translate(text, "en", language)
38
+
39
+
40
+ def from_en_translation(text: str, language: str) -> str:
41
+ """Translates text from english to specified language.
42
+ Args:
43
+ text (str): input text
44
+ language (str): desired language
45
+ Returns:
46
+ str: translated text
47
+ """
48
+ return translate(text, language, "en")
49
+
50
+
51
+ class TextGenerationPipeline:
52
+ """Pipeline for text generation of blenderbot model.
53
+ Returns:
54
+ str: generated text
55
+ """
56
+
57
+ # load tokenizer and the model
58
+ model_name = "facebook/blenderbot_small-90M"
59
+ tokenizer = BlenderbotSmallTokenizer.from_pretrained(model_name)
60
+ model = BlenderbotSmallForConditionalGeneration.from_pretrained(model_name)
61
+
62
+ def __init__(self, **kwargs):
63
+ """Specififying text generation parameters.
64
+ For example: max_length=100 which generates text shorter than
65
+ 100 tokens. Visit:
66
+ https://huggingface.co/docs/transformers/main_classes/text_generation
67
+ for more parameters
68
+ """
69
+ self.__dict__.update(kwargs)
70
+
71
+ def preprocess(self, text) -> str:
72
+ """Tokenizes input text.
73
+ Args:
74
+ text (str): user specified text
75
+ Returns:
76
+ torch.Tensor (obj): text representation as tensors
77
+ """
78
+ return self.tokenizer(text, return_tensors="pt")
79
+
80
+ def postprocess(self, outputs) -> str:
81
+ """Converts tensors into text.
82
+ Args:
83
+ outputs (torch.Tensor obj): model text generation output
84
+ Returns:
85
+ str: generated text
86
+ """
87
+ return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
88
+
89
+ def __call__(self, text: str) -> str:
90
+ """Generates text from input text.
91
+ Args:
92
+ text (str): user specified text
93
+ Returns:
94
+ str: generated text
95
+ """
96
+ tokenized_text = self.preprocess(text)
97
+ output = self.model.generate(**tokenized_text, **self.__dict__)
98
+ return self.postprocess(output)
99
+
100
+
101
+ def tts(text: str, language: str) -> object:
102
+ """Converts text into audio object.
103
+ Args:
104
+ text (str): generated answer of bot
105
+ Returns:
106
+ object: text to speech object
107
+ """
108
+ return gTTS(text=text, lang=language, slow=False)
109
+
110
+
111
+ def tts_to_bytesio(tts_object: object) -> bytes:
112
+ """Converts tts object to bytes.
113
+ Args:
114
+ tts_object (object): audio object obtained from gtts
115
+ Returns:
116
+ bytes: audio bytes
117
+ """
118
+ bytes_object = BytesIO()
119
+ tts_object.write_to_fp(bytes_object)
120
+ bytes_object.seek(0)
121
+ return bytes_object.getvalue()
122
+
123
+
124
+ def html_audio_autoplay(bytes: bytes) -> object:
125
+ """Creates html object for autoplaying audio at gradio app.
126
+ Args:
127
+ bytes (bytes): audio bytes
128
+ Returns:
129
+ object: html object that provides audio autoplaying
130
+ """
131
+ b64 = b64encode(bytes).decode()
132
+ html = f"""
133
+ <audio controls autoplay>
134
+ <source src="data:audio/wav;base64,{b64}" type="audio/wav">
135
+ </audio>
136
+ """
137
+ return html