rayyanreda commited on
Commit
270b608
1 Parent(s): a282198

Upload 9 files

Browse files
audio-chatbot/audio_autoplay.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ def tts_to_bytesio(tts_object: object) -> bytes:
3
+ """Converts tts object to bytes.
4
+ Args:
5
+ tts_object (object): audio object obtained from gtts
6
+ Returns:
7
+ bytes: audio bytes
8
+ """
9
+ bytes_object = BytesIO()
10
+ tts_object.write_to_fp(bytes_object)
11
+ bytes_object.seek(0)
12
+ return bytes_object.getvalue()
13
+
14
+
15
+ def html_audio_autoplay(bytes: bytes) -> object:
16
+ """Creates html object for autoplaying audio at gradio app.
17
+ Args:
18
+ bytes (bytes): audio bytes
19
+ Returns:
20
+ object: html object that provides audio autoplaying
21
+ """
22
+ b64 = b64encode(bytes).decode()
23
+ html = f"""
24
+ <audio controls autoplay>
25
+ <source src="data:audio/wav;base64,{b64}" type="audio/wav">
26
+ </audio>
27
+ """
28
+ return html
audio-chatbot/gradio_app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Interface(
2
+ fn=main,
3
+ inputs=[
4
+ Audio(
5
+ source="microphone",
6
+ type="filepath",
7
+ ),
8
+ ],
9
+ outputs=[
10
+ Textbox(label="You said: "),
11
+ Textbox(label="AI said: "),
12
+ Textbox(label="AI said (English): "),
13
+ "html",
14
+ ],
15
+ live=True,
16
+ allow_flagging="never",
17
+ ).launch()
audio-chatbot/import_libraries.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from base64 import b64encode
3
+ from io import BytesIO
4
+
5
+ from gradio import Audio, Interface, Textbox
6
+ from gtts import gTTS
7
+ from mtranslate import translate
8
+ from speech_recognition import AudioFile, Recognizer
9
+ from transformers import (BlenderbotSmallForConditionalGeneration,
10
+ BlenderbotSmallTokenizer)
audio-chatbot/main_func.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ max_answer_length=100
2
+ desired_language = "de"
3
+ response_generator_pipe = TextGenerationPipeline(max_length=max_answer_length)
4
+
5
+
6
+ def main(audio: object):
7
+ """Calls functions for deploying gradio app.
8
+
9
+ It responds both verbally and in text
10
+ by taking voice input from user.
11
+
12
+ Args:
13
+ audio (object): recorded speech of user
14
+
15
+ Returns:
16
+ tuple containing
17
+
18
+ - user_speech_text (str) : recognized speech
19
+ - bot_response_de (str) : translated answer of bot
20
+ - bot_response_en (str) : bot's original answer
21
+ - html (object) : autoplayer for bot's speech
22
+ """
23
+ user_speech_text = stt(audio, desired_language)
24
+ tranlated_text = to_en_translation(user_speech_text, desired_language)
25
+ bot_response_en = response_generator_pipe(tranlated_text)
26
+ bot_response_de = from_en_translation(bot_response_en, desired_language)
27
+ bot_voice = tts(bot_response_de, desired_language)
28
+ bot_voice_bytes = tts_to_bytesio(bot_voice)
29
+ html = html_audio_autoplay(bot_voice_bytes)
30
+ return user_speech_text, bot_response_de, bot_response_en, html
audio-chatbot/requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ transformers==4.25.1
2
+ --find-links https://download.pytorch.org/whl/torch_stable.html
3
+ torch==1.13.1+cpu
4
+ gradio==3.14.0
5
+ SpeechRecognition==3.9.0
6
+ mtranslate==1.8
7
+ gTTS==2.3.0
audio-chatbot/stt.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def stt(audio: object, language: str) -> str:
2
+ """Converts speech to text.
3
+ Args:
4
+ audio: record of user speech
5
+ Returns:
6
+ text (str): recognized speech of user
7
+ """
8
+
9
+ # Create a Recognizer object
10
+ r = Recognizer()
11
+ # Open the audio file
12
+ with AudioFile(audio) as source:
13
+ # Listen for the data (load audio to memory)
14
+ audio_data = r.record(source)
15
+ # Transcribe the audio using Google's speech-to-text API
16
+ text = r.recognize_google(audio_data, language=language)
17
+ return text
audio-chatbot/text_generation_pipe.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class TextGenerationPipeline:
2
+ """Pipeline for text generation of blenderbot model.
3
+ Returns:
4
+ str: generated text
5
+ """
6
+
7
+ # load tokenizer and the model
8
+ model_name = "facebook/blenderbot_small-90M"
9
+ tokenizer = BlenderbotSmallTokenizer.from_pretrained(model_name)
10
+ model = BlenderbotSmallForConditionalGeneration.from_pretrained(model_name)
11
+
12
+ def __init__(self, **kwargs):
13
+ """Specififying text generation parameters.
14
+ For example: max_length=100 which generates text shorter than
15
+ 100 tokens. Visit:
16
+ https://huggingface.co/docs/transformers/main_classes/text_generation
17
+ for more parameters
18
+ """
19
+ self.__dict__.update(kwargs)
20
+
21
+ def preprocess(self, text) -> str:
22
+ """Tokenizes input text.
23
+ Args:
24
+ text (str): user specified text
25
+ Returns:
26
+ torch.Tensor (obj): text representation as tensors
27
+ """
28
+ return self.tokenizer(text, return_tensors="pt")
29
+
30
+ def postprocess(self, outputs) -> str:
31
+ """Converts tensors into text.
32
+ Args:
33
+ outputs (torch.Tensor obj): model text generation output
34
+ Returns:
35
+ str: generated text
36
+ """
37
+ return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
38
+
39
+ def __call__(self, text: str) -> str:
40
+ """Generates text from input text.
41
+ Args:
42
+ text (str): user specified text
43
+ Returns:
44
+ str: generated text
45
+ """
46
+ tokenized_text = self.preprocess(text)
47
+ output = self.model.generate(**tokenized_text, **self.__dict__)
48
+ return self.postprocess(output)
audio-chatbot/translators.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def to_en_translation(text: str, language: str) -> str:
2
+ """Translates text from specified language to English.
3
+ Args:
4
+ text (str): input text
5
+ language (str): desired language
6
+ Returns:
7
+ str: translated text
8
+ """
9
+ return translate(text, "en", language)
10
+
11
+
12
+ def from_en_translation(text: str, language: str) -> str:
13
+ """Translates text from english to specified language.
14
+ Args:
15
+ text (str): input text
16
+ language (str): desired language
17
+ Returns:
18
+ str: translated text
19
+ """
20
+ return translate(text, language, "en")
audio-chatbot/tts.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+ def tts(text: str, language: str) -> object:
3
+ """Converts text into audio object.
4
+ Args:
5
+ text (str): generated answer of bot
6
+ Returns:
7
+ object: text to speech object
8
+ """
9
+ return gTTS(text=text, lang=language, slow=False)