Spaces:
Runtime error
Runtime error
Applied VAD
Browse files- .gitignore +2 -1
- __pycache__/transcription_service.cpython-311.pyc +0 -0
- app.py +1 -1
- msq-ai-78bdccb055f4.json +0 -13
- requirements.txt +60 -48
- transcription_service.py +28 -45
.gitignore
CHANGED
@@ -2,4 +2,5 @@ rt/
|
|
2 |
checkopenai.py
|
3 |
lib
|
4 |
__pycache__
|
5 |
-
.env
|
|
|
|
2 |
checkopenai.py
|
3 |
lib
|
4 |
__pycache__
|
5 |
+
.env
|
6 |
+
VAD_service.ipynb
|
__pycache__/transcription_service.cpython-311.pyc
CHANGED
Binary files a/__pycache__/transcription_service.cpython-311.pyc and b/__pycache__/transcription_service.cpython-311.pyc differ
|
|
app.py
CHANGED
@@ -5,7 +5,7 @@ import os
|
|
5 |
import json
|
6 |
from transcription_service import transcribe_speech_local,translation_service
|
7 |
|
8 |
-
|
9 |
|
10 |
def translateoutput(text,language):
|
11 |
if text=="" or text is None:
|
|
|
5 |
import json
|
6 |
from transcription_service import transcribe_speech_local,translation_service
|
7 |
|
8 |
+
os.environ['OPENAI_API_KEY'] = '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'
|
9 |
|
10 |
def translateoutput(text,language):
|
11 |
if text=="" or text is None:
|
msq-ai-78bdccb055f4.json
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"type": "service_account",
|
3 |
-
"project_id": "msq-ai",
|
4 |
-
"private_key_id": "78bdccb055f435ec9a3767887d77dccded5b45fc",
|
5 |
-
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDlMHDRxYIwtRXU\n2KdBCgimYiGQ/ImfleFRnNM1QDO216wzjinvXw6oQZcCvJinVvV+iA9wA5l8Qk1Q\nUMIkzcoA9UCIrscTy7+tOfnjxP9Kzib0JQlA8NBXYdm7kCNIVoxwNfz/gCD/nGYK\nTZ6VYLT+DqIXf110rVG/hgv37Wa1G4IBauphj0SMKaS6/syU2HGkC0ozOJOppSwT\norSL+72ddhUAO5bwLYBNo0x/Kg+uozbXi4S+1Qfgjs3EZUbQr2l/s5pWfxKd1m56\nhmrxEpQ3DN55s+iN5PW1FiDqOxDQf823s4LOV99/mqPfStjzskMbuFi1/uO94ijP\n71MYebJRAgMBAAECggEABfzKm75XUP6wGfIJh45TrMYYRqWEUmgb3XIVhfj+CN8w\nS14BzDQ9aamiAHMtIUUJL5OuJJDhggCo7KnZcpTyO61skM2RG5wpbyT6OWg4e6uw\nUpVDnT/VT40ZvzfMJiwFqfpNGF8wbYEYXNlLhO9EdzUqaKb+Cenrvm9J8YGRGvbW\n+boIu+zIaOAF9R5MWJO9ZwGu5C1vEUjdZ+JweuhOpQQjQVT3jQuBtAYYdNxVte8B\niE2dMqQYn4cLrzrt8Tdwk0DM+XWDyXwaC4NM8nRZXtGdKiKMS33nfd9C5Okpcgo6\nAl1TaGz5TEpsYe8UFyw2TrJeUSo31UnvVqy2LGF8CQKBgQD1jhm5k79ov4TIBDlZ\ne+SPmhK/ate7jlN4Y6dNZIx82Jin2TUrOKy0mZvh9UTt4wbpcNWhpapzV7bnUN5o\nCRZ7pyjP4x84p5j9PordbFmr9mBf3tamMOVxAUopIXJIuSMRqjo3VWuyYIv4QWAX\nzZMGZzeaDAy3b1acBZ+t3I8QyQKBgQDu8CEII+C97W/JzAVuex6cpIa3GXH73abJ\n6QKoA2m2gaZq+GDhWe5lPSbf8EFWwrzY29uMObMN2l0S38N5vLf3oSAhCoFTgSTJ\nFHULlU/zT5J/uf02GLYuLmxAuafQmj4rMJR8WNlguQzjORIE3QkOB9D612U2MWK6\n64jqlGQhSQKBgQDbbqnUaZDCBWud5hBtw8YjAZdBUTQJ2g0DZ2JJSdI1esUoMXVO\n8216Uk635Js9TgSONZPiyQ2B11+OZHjrVTuvMljJNXhgA302yV4Est+LSTckVRlS\nGB3hFzrzeH0NOcx567nsWrxMAfTy21EI1F9PeIt6cMwoXCl0cZ4ZGyEJ6QKBgQCa\noLP/vJg40WvCwFHTicHpxV+ZJtJpGKxThoO9j1feUFEOcObhb32j7ss1qRnft+7W\nuJGAUebaVDJfgIfURJkZnCBawZ2oGK9hoeeNM3emEhzQo+CGHb/oawqLaJp9mRQa\nhznQrv8eZSM9+TT8CL9PYSeQ7cwGIjeul7bneFjEYQKBgG5TGOK9lkfFI+K63m8z\n6RjEdu/76UK+4GVAekH6VjiJpUWc/ViagREp33PP1wzMw8pqELW9DZVEodjsAsFb\npNrH7nHOXEOUmDEN2jVfqVmEOkdXGeYHH7HMotFQC51GSa+oYfOdjSA0XYNDEXP2\nLvglTAsQuxm9Yf3zwvlI7hEA\n-----END PRIVATE KEY-----\n",
|
6 |
-
"client_email": "realtimetranslator@msq-ai.iam.gserviceaccount.com",
|
7 |
-
"client_id": "101839625152756595826",
|
8 |
-
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
9 |
-
"token_uri": "https://oauth2.googleapis.com/token",
|
10 |
-
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
11 |
-
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/realtimetranslator%40msq-ai.iam.gserviceaccount.com",
|
12 |
-
"universe_domain": "googleapis.com"
|
13 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,90 +1,98 @@
|
|
1 |
aiofiles==23.2.1
|
2 |
-
|
3 |
-
aiosignal==1.3.1
|
4 |
-
altair==5.1.2
|
5 |
annotated-types==0.6.0
|
6 |
anyio==3.7.1
|
7 |
-
|
8 |
attrs==23.1.0
|
9 |
audioread==3.0.1
|
10 |
-
|
11 |
-
certifi==2023.7.22
|
12 |
cffi==1.16.0
|
13 |
charset-normalizer==3.3.2
|
14 |
click==8.1.7
|
15 |
colorama==0.4.6
|
|
|
|
|
16 |
contourpy==1.2.0
|
17 |
cycler==0.12.1
|
|
|
18 |
decorator==5.1.1
|
19 |
distro==1.8.0
|
20 |
-
|
|
|
|
|
21 |
ffmpy==0.3.1
|
22 |
filelock==3.13.1
|
23 |
-
|
24 |
-
|
25 |
-
fsspec==2023.
|
26 |
-
|
27 |
-
google-api-python-client==2.109.0
|
28 |
-
google-auth==2.23.4
|
29 |
-
google-auth-httplib2==0.1.1
|
30 |
-
google-cloud-speech==2.22.0
|
31 |
-
googleapis-common-protos==1.61.0
|
32 |
gradio==4.7.1
|
33 |
gradio_client==0.7.0
|
34 |
-
grpcio==1.59.3
|
35 |
-
grpcio-status==1.59.3
|
36 |
h11==0.14.0
|
37 |
-
httpcore==1.0.
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
idna==3.
|
42 |
importlib-resources==6.1.1
|
|
|
|
|
|
|
|
|
43 |
Jinja2==3.1.2
|
44 |
joblib==1.3.2
|
45 |
-
jsonschema==4.
|
46 |
-
jsonschema-specifications==2023.
|
|
|
|
|
|
|
47 |
kiwisolver==1.4.5
|
48 |
lazy_loader==0.3
|
49 |
librosa==0.10.1
|
50 |
llvmlite==0.41.1
|
51 |
markdown-it-py==3.0.0
|
52 |
MarkupSafe==2.1.3
|
53 |
-
matplotlib==3.8.
|
|
|
54 |
mdurl==0.1.2
|
55 |
mpmath==1.3.0
|
56 |
msgpack==1.0.7
|
57 |
-
|
58 |
networkx==3.2.1
|
59 |
-
noisereduce==3.0.0
|
60 |
numba==0.58.1
|
61 |
-
numpy==1.26.
|
62 |
-
|
|
|
|
|
|
|
63 |
orjson==3.9.10
|
64 |
packaging==23.2
|
65 |
-
pandas==2.1.
|
|
|
66 |
Pillow==10.1.0
|
67 |
-
platformdirs==4.
|
68 |
pooch==1.8.0
|
69 |
-
|
70 |
protobuf==4.25.1
|
71 |
-
|
72 |
-
|
73 |
pycparser==2.21
|
74 |
-
pydantic==2.
|
75 |
-
pydantic_core==2.
|
76 |
pydub==0.25.1
|
77 |
-
Pygments==2.
|
78 |
pyparsing==3.1.1
|
|
|
79 |
python-dateutil==2.8.2
|
80 |
python-multipart==0.0.6
|
81 |
pytz==2023.3.post1
|
|
|
82 |
PyYAML==6.0.1
|
83 |
-
|
|
|
84 |
requests==2.31.0
|
85 |
-
rich==13.
|
86 |
-
rpds-py==0.
|
87 |
-
rsa==4.9
|
88 |
scikit-learn==1.3.2
|
89 |
scipy==1.11.4
|
90 |
semantic-version==2.10.0
|
@@ -93,18 +101,22 @@ six==1.16.0
|
|
93 |
sniffio==1.3.0
|
94 |
soundfile==0.12.1
|
95 |
soxr==0.3.7
|
|
|
96 |
starlette==0.27.0
|
97 |
sympy==1.12
|
98 |
threadpoolctl==3.2.0
|
99 |
tomlkit==0.12.0
|
100 |
toolz==0.12.0
|
101 |
-
torch==2.1.
|
|
|
|
|
|
|
102 |
tqdm==4.66.1
|
|
|
103 |
typer==0.9.0
|
104 |
-
typing_extensions==4.
|
105 |
tzdata==2023.3
|
106 |
-
|
107 |
-
urllib3==2.0.7
|
108 |
uvicorn==0.24.0.post1
|
109 |
-
|
110 |
-
|
|
|
1 |
aiofiles==23.2.1
|
2 |
+
altair==5.2.0
|
|
|
|
|
3 |
annotated-types==0.6.0
|
4 |
anyio==3.7.1
|
5 |
+
asttokens==2.4.1
|
6 |
attrs==23.1.0
|
7 |
audioread==3.0.1
|
8 |
+
certifi==2023.11.17
|
|
|
9 |
cffi==1.16.0
|
10 |
charset-normalizer==3.3.2
|
11 |
click==8.1.7
|
12 |
colorama==0.4.6
|
13 |
+
coloredlogs==15.0.1
|
14 |
+
comm==0.2.0
|
15 |
contourpy==1.2.0
|
16 |
cycler==0.12.1
|
17 |
+
debugpy==1.8.0
|
18 |
decorator==5.1.1
|
19 |
distro==1.8.0
|
20 |
+
et-xmlfile==1.1.0
|
21 |
+
executing==2.0.1
|
22 |
+
fastapi==0.105.0
|
23 |
ffmpy==0.3.1
|
24 |
filelock==3.13.1
|
25 |
+
flatbuffers==23.5.26
|
26 |
+
fonttools==4.47.0
|
27 |
+
fsspec==2023.12.2
|
28 |
+
funasr-onnx==0.2.4
|
|
|
|
|
|
|
|
|
|
|
29 |
gradio==4.7.1
|
30 |
gradio_client==0.7.0
|
|
|
|
|
31 |
h11==0.14.0
|
32 |
+
httpcore==1.0.2
|
33 |
+
httpx==0.25.2
|
34 |
+
huggingface-hub==0.19.4
|
35 |
+
humanfriendly==10.0
|
36 |
+
idna==3.6
|
37 |
importlib-resources==6.1.1
|
38 |
+
ipykernel==6.27.1
|
39 |
+
ipython==8.18.1
|
40 |
+
jedi==0.19.1
|
41 |
+
jieba==0.42.1
|
42 |
Jinja2==3.1.2
|
43 |
joblib==1.3.2
|
44 |
+
jsonschema==4.20.0
|
45 |
+
jsonschema-specifications==2023.11.2
|
46 |
+
jupyter_client==8.6.0
|
47 |
+
jupyter_core==5.5.1
|
48 |
+
kaldi-native-fbank==1.18.5
|
49 |
kiwisolver==1.4.5
|
50 |
lazy_loader==0.3
|
51 |
librosa==0.10.1
|
52 |
llvmlite==0.41.1
|
53 |
markdown-it-py==3.0.0
|
54 |
MarkupSafe==2.1.3
|
55 |
+
matplotlib==3.8.2
|
56 |
+
matplotlib-inline==0.1.6
|
57 |
mdurl==0.1.2
|
58 |
mpmath==1.3.0
|
59 |
msgpack==1.0.7
|
60 |
+
nest-asyncio==1.5.8
|
61 |
networkx==3.2.1
|
|
|
62 |
numba==0.58.1
|
63 |
+
numpy==1.26.2
|
64 |
+
onnx==1.15.0
|
65 |
+
onnxruntime==1.16.3
|
66 |
+
openai==1.5.0
|
67 |
+
openpyxl==3.1.2
|
68 |
orjson==3.9.10
|
69 |
packaging==23.2
|
70 |
+
pandas==2.1.4
|
71 |
+
parso==0.8.3
|
72 |
Pillow==10.1.0
|
73 |
+
platformdirs==4.1.0
|
74 |
pooch==1.8.0
|
75 |
+
prompt-toolkit==3.0.43
|
76 |
protobuf==4.25.1
|
77 |
+
psutil==5.9.7
|
78 |
+
pure-eval==0.2.2
|
79 |
pycparser==2.21
|
80 |
+
pydantic==2.5.2
|
81 |
+
pydantic_core==2.14.5
|
82 |
pydub==0.25.1
|
83 |
+
Pygments==2.17.2
|
84 |
pyparsing==3.1.1
|
85 |
+
pyreadline3==3.4.1
|
86 |
python-dateutil==2.8.2
|
87 |
python-multipart==0.0.6
|
88 |
pytz==2023.3.post1
|
89 |
+
pywin32==306
|
90 |
PyYAML==6.0.1
|
91 |
+
pyzmq==25.1.2
|
92 |
+
referencing==0.32.0
|
93 |
requests==2.31.0
|
94 |
+
rich==13.7.0
|
95 |
+
rpds-py==0.15.2
|
|
|
96 |
scikit-learn==1.3.2
|
97 |
scipy==1.11.4
|
98 |
semantic-version==2.10.0
|
|
|
101 |
sniffio==1.3.0
|
102 |
soundfile==0.12.1
|
103 |
soxr==0.3.7
|
104 |
+
stack-data==0.6.3
|
105 |
starlette==0.27.0
|
106 |
sympy==1.12
|
107 |
threadpoolctl==3.2.0
|
108 |
tomlkit==0.12.0
|
109 |
toolz==0.12.0
|
110 |
+
torch==2.1.2
|
111 |
+
torchaudio==2.1.2
|
112 |
+
torchvision==0.16.2
|
113 |
+
tornado==6.4
|
114 |
tqdm==4.66.1
|
115 |
+
traitlets==5.14.0
|
116 |
typer==0.9.0
|
117 |
+
typing_extensions==4.9.0
|
118 |
tzdata==2023.3
|
119 |
+
urllib3==2.1.0
|
|
|
120 |
uvicorn==0.24.0.post1
|
121 |
+
wcwidth==0.2.12
|
122 |
+
websockets==11.0.3
|
transcription_service.py
CHANGED
@@ -1,58 +1,41 @@
|
|
1 |
import os
|
2 |
-
from google.cloud import speech
|
3 |
from openai import OpenAI
|
4 |
import subprocess
|
5 |
-
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
# flac_file=convert_wav_to_flac(wav_file)
|
26 |
-
# print(flac_file,'flac_file_path\n')
|
27 |
-
|
28 |
-
# """
|
29 |
-
# Transcribes a local WAV file using Google's Speech-to-Text API.
|
30 |
-
# """
|
31 |
-
# client = speech.SpeechClient()
|
32 |
-
|
33 |
-
# with open(wav_file, "rb") as audio_file:
|
34 |
-
# content = audio_file.read()
|
35 |
|
36 |
-
# audio = speech.RecognitionAudio(content=content)
|
37 |
-
# config = speech.RecognitionConfig(
|
38 |
-
# encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
39 |
-
# sample_rate_hertz=48000,
|
40 |
-
# # model="default",
|
41 |
-
# enable_spoken_punctuation=True,
|
42 |
-
# model="latest_short",
|
43 |
-
|
44 |
-
# # Adjust this according to your file's sample rate
|
45 |
-
# language_code=language,
|
46 |
-
# enable_automatic_punctuation=False
|
47 |
-
# )
|
48 |
|
49 |
-
|
50 |
|
51 |
-
# for result in response.results:
|
52 |
-
# return "{}".format(result.alternatives[0].transcript)
|
53 |
def transcribe_speech_local(wav_file,language):
|
54 |
-
|
55 |
client = OpenAI()
|
|
|
|
|
|
|
56 |
|
57 |
audio_file = open(wav_file, "rb")
|
58 |
transcript = client.audio.transcriptions.create(
|
|
|
1 |
import os
|
|
|
2 |
from openai import OpenAI
|
3 |
import subprocess
|
4 |
+
import torch
|
5 |
|
6 |
+
model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
|
7 |
+
model='silero_vad',
|
8 |
+
force_reload=True,
|
9 |
+
onnx=False)
|
10 |
+
|
11 |
+
def apply_Vad(wav_file):
|
12 |
|
13 |
+
try:
|
14 |
+
torch.set_num_threads(1)
|
15 |
+
SAMPLING_RATE = 16000
|
16 |
+
(get_speech_timestamps,
|
17 |
+
save_audio,
|
18 |
+
read_audio,
|
19 |
+
VADIterator,
|
20 |
+
collect_chunks) = utils
|
21 |
+
wav = read_audio(wav_file, sampling_rate=SAMPLING_RATE)
|
22 |
+
speech_timestamps = get_speech_timestamps(wav, model, sampling_rate=SAMPLING_RATE)
|
23 |
+
save_audio(wav_file,
|
24 |
+
collect_chunks(speech_timestamps, wav), sampling_rate=SAMPLING_RATE)
|
25 |
+
return wav_file if len(speech_timestamps) > 0 else None
|
26 |
+
except Exception as e:
|
27 |
+
print(e)
|
28 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
+
import os
|
32 |
|
|
|
|
|
33 |
def transcribe_speech_local(wav_file,language):
|
34 |
+
|
35 |
client = OpenAI()
|
36 |
+
wav_file=apply_Vad(wav_file)
|
37 |
+
if wav_file is None:
|
38 |
+
return ""
|
39 |
|
40 |
audio_file = open(wav_file, "rb")
|
41 |
transcript = client.audio.transcriptions.create(
|