| |
| """Speech2Video.ipynb |
| |
| Automatically generated by Colaboratory. |
| |
| Original file is located at |
| https://colab.research.google.com/drive/1CcYNY0wwS05Ml7UVv4oY7cHjlVrhTbIq |
| """ |
|
|
| from google.colab import drive |
| drive.mount('/content/drive') |
|
|
| !apt-get install python3-pyaudio |
| !pip install SpeechRecognition |
| !pip install pydub |
|
|
| from pydub import AudioSegment |
| import speech_recognition as sr |
| import re |
| import nltk |
| from nltk.stem import PorterStemmer, WordNetLemmatizer |
| from nltk.tokenize import word_tokenize |
|
|
| nltk.download('punkt') |
| nltk.download('wordnet') |
|
|
| !pip install modelscope==1.4.2 |
| !pip install open_clip_torch |
| !pip install pytorch-lightning |
|
|
| from modelscope.pipelines import pipeline |
| from modelscope.outputs import OutputKeys |
|
|
| p = pipeline('text-to-video-synthesis', 'damo/text-to-video-synthesis') |
|
|
| def convert_to_wav(input_file, output_file): |
| audio = AudioSegment.from_ogg(input_file) |
| audio.export(output_file, format="wav") |
|
|
| |
| def speech_to_text(audio_file): |
| recognizer = sr.Recognizer() |
| with sr.AudioFile(audio_file) as source: |
| audio = recognizer.record(source) |
| try: |
| text = recognizer.recognize_google(audio) |
| return text |
| except sr.UnknownValueError: |
| print("Sorry, could not understand audio") |
| return "" |
| except sr.RequestError as e: |
| print("Error fetching results; {0}".format(e)) |
| return "" |
|
|
| |
| def preprocess_text(text): |
| |
| text = re.sub(r'[^a-zA-Z\s]', '', text) |
|
|
| |
| tokens = word_tokenize(text) |
|
|
| porter_stemmer = PorterStemmer() |
| lemmatizer = WordNetLemmatizer() |
|
|
| stemmed_tokens = [porter_stemmer.stem(token) for token in tokens] |
| lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens] |
|
|
| lemmatized_text = ' '.join(lemmatized_tokens) |
|
|
| return lemmatized_text |
|
|
| |
| def main(): |
| |
| input_file = "/content/drive/MyDrive/IV II PROJECT/WhatsApp Audio 2024-03-24 at 8.52.04 AM.ogg" |
| output_file = "/content/drive/MyDrive/IV II PROJECT/converted_audio.wav" |
|
|
| |
| convert_to_wav(input_file, output_file) |
|
|
| |
| text = speech_to_text(output_file) |
| print("Text from audio:", text) |
|
|
| |
| preprocessed_text = preprocess_text(text) |
| print("Preprocessed text:", preprocessed_text) |
|
|
| test_text = { |
| 'text': preprocessed_text, |
| } |
| output_video_path = p(test_text,)[OutputKeys.OUTPUT_VIDEO] |
| print('output_video_path:', output_video_path) |
| from google.colab import files |
| files.download(output_video_path) |
|
|
| if __name__ == "__main__": |
| main() |
|
|
|
|