Spaces:
Running
Running
KarthickAdopleAI
commited on
Commit
•
3ae161b
1
Parent(s):
95d1f8b
Update app.py
Browse files
app.py
CHANGED
@@ -15,8 +15,9 @@ import requests
|
|
15 |
import logging
|
16 |
import os
|
17 |
from pydub import AudioSegment
|
18 |
-
from pydub.silence import split_on_silence
|
19 |
import speech_recognition as sr
|
|
|
|
|
20 |
nltk.download('punkt')
|
21 |
nltk.download('stopwords')
|
22 |
|
@@ -43,6 +44,7 @@ class VideoAnalytics:
|
|
43 |
|
44 |
self.r = sr.Recognizer()
|
45 |
|
|
|
46 |
# Initialize english text variable
|
47 |
self.english_text = ""
|
48 |
|
@@ -84,12 +86,12 @@ class VideoAnalytics:
|
|
84 |
raise e
|
85 |
|
86 |
# Function to recognize speech in the audio file
|
87 |
-
def transcribe_audio(self,path):
|
88 |
"""Transcribe speech from an audio file."""
|
89 |
try:
|
90 |
with sr.AudioFile(path) as source:
|
91 |
audio_listened = self.r.record(source)
|
92 |
-
text = self.r.recognize_google(audio_listened)
|
93 |
return text
|
94 |
except sr.UnknownValueError as e:
|
95 |
logging.error(f"Speech recognition could not understand audio: {e}")
|
@@ -99,7 +101,7 @@ class VideoAnalytics:
|
|
99 |
return ""
|
100 |
|
101 |
# Function to split the audio file into chunks on silence and apply speech recognition
|
102 |
-
def get_large_audio_transcription_on_silence(self,path):
|
103 |
"""Split the large audio file into chunks and apply speech recognition on each chunk."""
|
104 |
try:
|
105 |
sound = AudioSegment.from_file(path)
|
@@ -115,7 +117,7 @@ class VideoAnalytics:
|
|
115 |
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
|
116 |
audio_chunk.export(chunk_filename, format="wav")
|
117 |
|
118 |
-
text = self.transcribe_audio(chunk_filename)
|
119 |
|
120 |
if text:
|
121 |
text = f"{text.capitalize()}. "
|
@@ -148,8 +150,11 @@ class VideoAnalytics:
|
|
148 |
|
149 |
# Replace 'input.mp3' and 'output.wav' with your file paths
|
150 |
audio_filename = self.mp3_to_wav("output_audio.mp3", 'output.wav')
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
153 |
# Update the transcribed_text attribute with the transcription result
|
154 |
self.transcribed_text = text
|
155 |
# Update the translation text into english_text
|
|
|
15 |
import logging
|
16 |
import os
|
17 |
from pydub import AudioSegment
|
|
|
18 |
import speech_recognition as sr
|
19 |
+
import torchaudio
|
20 |
+
from speechbrain.inference.classifiers import EncoderClassifier
|
21 |
nltk.download('punkt')
|
22 |
nltk.download('stopwords')
|
23 |
|
|
|
44 |
|
45 |
self.r = sr.Recognizer()
|
46 |
|
47 |
+
self.language_id = EncoderClassifier.from_hparams(source="speechbrain/lang-id-voxlingua107-ecapa", savedir="tmp")
|
48 |
# Initialize english text variable
|
49 |
self.english_text = ""
|
50 |
|
|
|
86 |
raise e
|
87 |
|
88 |
# Function to recognize speech in the audio file
|
89 |
+
def transcribe_audio(self,path: str,lang: str):
|
90 |
"""Transcribe speech from an audio file."""
|
91 |
try:
|
92 |
with sr.AudioFile(path) as source:
|
93 |
audio_listened = self.r.record(source)
|
94 |
+
text = self.r.recognize_google(audio_listened,language=lang)
|
95 |
return text
|
96 |
except sr.UnknownValueError as e:
|
97 |
logging.error(f"Speech recognition could not understand audio: {e}")
|
|
|
101 |
return ""
|
102 |
|
103 |
# Function to split the audio file into chunks on silence and apply speech recognition
|
104 |
+
def get_large_audio_transcription_on_silence(self,path: str,lang: str):
|
105 |
"""Split the large audio file into chunks and apply speech recognition on each chunk."""
|
106 |
try:
|
107 |
sound = AudioSegment.from_file(path)
|
|
|
117 |
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
|
118 |
audio_chunk.export(chunk_filename, format="wav")
|
119 |
|
120 |
+
text = self.transcribe_audio(chunk_filename,lang)
|
121 |
|
122 |
if text:
|
123 |
text = f"{text.capitalize()}. "
|
|
|
150 |
|
151 |
# Replace 'input.mp3' and 'output.wav' with your file paths
|
152 |
audio_filename = self.mp3_to_wav("output_audio.mp3", 'output.wav')
|
153 |
+
# for detect lang
|
154 |
+
signal = self.language_id.load_audio("/content/output_.wav")
|
155 |
+
prediction = self.language_id.classify_batch(signal)
|
156 |
+
lang = [prediction[3][0].split(":")][0][0]
|
157 |
+
text = self.get_large_audio_transcription_on_silence(audio_filename,lang)
|
158 |
# Update the transcribed_text attribute with the transcription result
|
159 |
self.transcribed_text = text
|
160 |
# Update the translation text into english_text
|