MufinApps commited on
Commit
2ed8ecf
1 Parent(s): 11b7b41

Applied VAD

Browse files
.gitignore CHANGED
@@ -2,4 +2,5 @@ rt/
2
  checkopenai.py
3
  lib
4
  __pycache__
5
- .env
 
 
2
  checkopenai.py
3
  lib
4
  __pycache__
5
+ .env
6
+ VAD_service.ipynb
__pycache__/transcription_service.cpython-311.pyc CHANGED
Binary files a/__pycache__/transcription_service.cpython-311.pyc and b/__pycache__/transcription_service.cpython-311.pyc differ
 
app.py CHANGED
@@ -5,7 +5,7 @@ import os
5
  import json
6
  from transcription_service import transcribe_speech_local,translation_service
7
 
8
-
9
 
10
  def translateoutput(text,language):
11
  if text=="" or text is None:
 
5
  import json
6
  from transcription_service import transcribe_speech_local,translation_service
7
 
8
+ os.environ['OPENAI_API_KEY'] = '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'
9
 
10
  def translateoutput(text,language):
11
  if text=="" or text is None:
msq-ai-78bdccb055f4.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "type": "service_account",
3
- "project_id": "msq-ai",
4
- "private_key_id": "78bdccb055f435ec9a3767887d77dccded5b45fc",
5
- "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDlMHDRxYIwtRXU\n2KdBCgimYiGQ/ImfleFRnNM1QDO216wzjinvXw6oQZcCvJinVvV+iA9wA5l8Qk1Q\nUMIkzcoA9UCIrscTy7+tOfnjxP9Kzib0JQlA8NBXYdm7kCNIVoxwNfz/gCD/nGYK\nTZ6VYLT+DqIXf110rVG/hgv37Wa1G4IBauphj0SMKaS6/syU2HGkC0ozOJOppSwT\norSL+72ddhUAO5bwLYBNo0x/Kg+uozbXi4S+1Qfgjs3EZUbQr2l/s5pWfxKd1m56\nhmrxEpQ3DN55s+iN5PW1FiDqOxDQf823s4LOV99/mqPfStjzskMbuFi1/uO94ijP\n71MYebJRAgMBAAECggEABfzKm75XUP6wGfIJh45TrMYYRqWEUmgb3XIVhfj+CN8w\nS14BzDQ9aamiAHMtIUUJL5OuJJDhggCo7KnZcpTyO61skM2RG5wpbyT6OWg4e6uw\nUpVDnT/VT40ZvzfMJiwFqfpNGF8wbYEYXNlLhO9EdzUqaKb+Cenrvm9J8YGRGvbW\n+boIu+zIaOAF9R5MWJO9ZwGu5C1vEUjdZ+JweuhOpQQjQVT3jQuBtAYYdNxVte8B\niE2dMqQYn4cLrzrt8Tdwk0DM+XWDyXwaC4NM8nRZXtGdKiKMS33nfd9C5Okpcgo6\nAl1TaGz5TEpsYe8UFyw2TrJeUSo31UnvVqy2LGF8CQKBgQD1jhm5k79ov4TIBDlZ\ne+SPmhK/ate7jlN4Y6dNZIx82Jin2TUrOKy0mZvh9UTt4wbpcNWhpapzV7bnUN5o\nCRZ7pyjP4x84p5j9PordbFmr9mBf3tamMOVxAUopIXJIuSMRqjo3VWuyYIv4QWAX\nzZMGZzeaDAy3b1acBZ+t3I8QyQKBgQDu8CEII+C97W/JzAVuex6cpIa3GXH73abJ\n6QKoA2m2gaZq+GDhWe5lPSbf8EFWwrzY29uMObMN2l0S38N5vLf3oSAhCoFTgSTJ\nFHULlU/zT5J/uf02GLYuLmxAuafQmj4rMJR8WNlguQzjORIE3QkOB9D612U2MWK6\n64jqlGQhSQKBgQDbbqnUaZDCBWud5hBtw8YjAZdBUTQJ2g0DZ2JJSdI1esUoMXVO\n8216Uk635Js9TgSONZPiyQ2B11+OZHjrVTuvMljJNXhgA302yV4Est+LSTckVRlS\nGB3hFzrzeH0NOcx567nsWrxMAfTy21EI1F9PeIt6cMwoXCl0cZ4ZGyEJ6QKBgQCa\noLP/vJg40WvCwFHTicHpxV+ZJtJpGKxThoO9j1feUFEOcObhb32j7ss1qRnft+7W\nuJGAUebaVDJfgIfURJkZnCBawZ2oGK9hoeeNM3emEhzQo+CGHb/oawqLaJp9mRQa\nhznQrv8eZSM9+TT8CL9PYSeQ7cwGIjeul7bneFjEYQKBgG5TGOK9lkfFI+K63m8z\n6RjEdu/76UK+4GVAekH6VjiJpUWc/ViagREp33PP1wzMw8pqELW9DZVEodjsAsFb\npNrH7nHOXEOUmDEN2jVfqVmEOkdXGeYHH7HMotFQC51GSa+oYfOdjSA0XYNDEXP2\nLvglTAsQuxm9Yf3zwvlI7hEA\n-----END PRIVATE KEY-----\n",
6
- "client_email": "realtimetranslator@msq-ai.iam.gserviceaccount.com",
7
- "client_id": "101839625152756595826",
8
- "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
- "token_uri": "https://oauth2.googleapis.com/token",
10
- "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
- "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/realtimetranslator%40msq-ai.iam.gserviceaccount.com",
12
- "universe_domain": "googleapis.com"
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,90 +1,98 @@
1
  aiofiles==23.2.1
2
- aiohttp==3.8.6
3
- aiosignal==1.3.1
4
- altair==5.1.2
5
  annotated-types==0.6.0
6
  anyio==3.7.1
7
- async-timeout==4.0.3
8
  attrs==23.1.0
9
  audioread==3.0.1
10
- cachetools==5.3.2
11
- certifi==2023.7.22
12
  cffi==1.16.0
13
  charset-normalizer==3.3.2
14
  click==8.1.7
15
  colorama==0.4.6
 
 
16
  contourpy==1.2.0
17
  cycler==0.12.1
 
18
  decorator==5.1.1
19
  distro==1.8.0
20
- fastapi==0.104.1
 
 
21
  ffmpy==0.3.1
22
  filelock==3.13.1
23
- fonttools==4.44.0
24
- frozenlist==1.4.0
25
- fsspec==2023.10.0
26
- google-api-core==2.14.0
27
- google-api-python-client==2.109.0
28
- google-auth==2.23.4
29
- google-auth-httplib2==0.1.1
30
- google-cloud-speech==2.22.0
31
- googleapis-common-protos==1.61.0
32
  gradio==4.7.1
33
  gradio_client==0.7.0
34
- grpcio==1.59.3
35
- grpcio-status==1.59.3
36
  h11==0.14.0
37
- httpcore==1.0.1
38
- httplib2==0.22.0
39
- httpx==0.25.1
40
- huggingface-hub==0.18.0
41
- idna==3.4
42
  importlib-resources==6.1.1
 
 
 
 
43
  Jinja2==3.1.2
44
  joblib==1.3.2
45
- jsonschema==4.19.2
46
- jsonschema-specifications==2023.7.1
 
 
 
47
  kiwisolver==1.4.5
48
  lazy_loader==0.3
49
  librosa==0.10.1
50
  llvmlite==0.41.1
51
  markdown-it-py==3.0.0
52
  MarkupSafe==2.1.3
53
- matplotlib==3.8.1
 
54
  mdurl==0.1.2
55
  mpmath==1.3.0
56
  msgpack==1.0.7
57
- multidict==6.0.4
58
  networkx==3.2.1
59
- noisereduce==3.0.0
60
  numba==0.58.1
61
- numpy==1.26.1
62
- openai==1.3.6
 
 
 
63
  orjson==3.9.10
64
  packaging==23.2
65
- pandas==2.1.2
 
66
  Pillow==10.1.0
67
- platformdirs==4.0.0
68
  pooch==1.8.0
69
- proto-plus==1.22.3
70
  protobuf==4.25.1
71
- pyasn1==0.5.1
72
- pyasn1-modules==0.3.0
73
  pycparser==2.21
74
- pydantic==2.4.2
75
- pydantic_core==2.10.1
76
  pydub==0.25.1
77
- Pygments==2.16.1
78
  pyparsing==3.1.1
 
79
  python-dateutil==2.8.2
80
  python-multipart==0.0.6
81
  pytz==2023.3.post1
 
82
  PyYAML==6.0.1
83
- referencing==0.30.2
 
84
  requests==2.31.0
85
- rich==13.6.0
86
- rpds-py==0.12.0
87
- rsa==4.9
88
  scikit-learn==1.3.2
89
  scipy==1.11.4
90
  semantic-version==2.10.0
@@ -93,18 +101,22 @@ six==1.16.0
93
  sniffio==1.3.0
94
  soundfile==0.12.1
95
  soxr==0.3.7
 
96
  starlette==0.27.0
97
  sympy==1.12
98
  threadpoolctl==3.2.0
99
  tomlkit==0.12.0
100
  toolz==0.12.0
101
- torch==2.1.1
 
 
 
102
  tqdm==4.66.1
 
103
  typer==0.9.0
104
- typing_extensions==4.8.0
105
  tzdata==2023.3
106
- uritemplate==4.1.1
107
- urllib3==2.0.7
108
  uvicorn==0.24.0.post1
109
- websockets==11.0.3
110
- yarl==1.9.2
 
1
  aiofiles==23.2.1
2
+ altair==5.2.0
 
 
3
  annotated-types==0.6.0
4
  anyio==3.7.1
5
+ asttokens==2.4.1
6
  attrs==23.1.0
7
  audioread==3.0.1
8
+ certifi==2023.11.17
 
9
  cffi==1.16.0
10
  charset-normalizer==3.3.2
11
  click==8.1.7
12
  colorama==0.4.6
13
+ coloredlogs==15.0.1
14
+ comm==0.2.0
15
  contourpy==1.2.0
16
  cycler==0.12.1
17
+ debugpy==1.8.0
18
  decorator==5.1.1
19
  distro==1.8.0
20
+ et-xmlfile==1.1.0
21
+ executing==2.0.1
22
+ fastapi==0.105.0
23
  ffmpy==0.3.1
24
  filelock==3.13.1
25
+ flatbuffers==23.5.26
26
+ fonttools==4.47.0
27
+ fsspec==2023.12.2
28
+ funasr-onnx==0.2.4
 
 
 
 
 
29
  gradio==4.7.1
30
  gradio_client==0.7.0
 
 
31
  h11==0.14.0
32
+ httpcore==1.0.2
33
+ httpx==0.25.2
34
+ huggingface-hub==0.19.4
35
+ humanfriendly==10.0
36
+ idna==3.6
37
  importlib-resources==6.1.1
38
+ ipykernel==6.27.1
39
+ ipython==8.18.1
40
+ jedi==0.19.1
41
+ jieba==0.42.1
42
  Jinja2==3.1.2
43
  joblib==1.3.2
44
+ jsonschema==4.20.0
45
+ jsonschema-specifications==2023.11.2
46
+ jupyter_client==8.6.0
47
+ jupyter_core==5.5.1
48
+ kaldi-native-fbank==1.18.5
49
  kiwisolver==1.4.5
50
  lazy_loader==0.3
51
  librosa==0.10.1
52
  llvmlite==0.41.1
53
  markdown-it-py==3.0.0
54
  MarkupSafe==2.1.3
55
+ matplotlib==3.8.2
56
+ matplotlib-inline==0.1.6
57
  mdurl==0.1.2
58
  mpmath==1.3.0
59
  msgpack==1.0.7
60
+ nest-asyncio==1.5.8
61
  networkx==3.2.1
 
62
  numba==0.58.1
63
+ numpy==1.26.2
64
+ onnx==1.15.0
65
+ onnxruntime==1.16.3
66
+ openai==1.5.0
67
+ openpyxl==3.1.2
68
  orjson==3.9.10
69
  packaging==23.2
70
+ pandas==2.1.4
71
+ parso==0.8.3
72
  Pillow==10.1.0
73
+ platformdirs==4.1.0
74
  pooch==1.8.0
75
+ prompt-toolkit==3.0.43
76
  protobuf==4.25.1
77
+ psutil==5.9.7
78
+ pure-eval==0.2.2
79
  pycparser==2.21
80
+ pydantic==2.5.2
81
+ pydantic_core==2.14.5
82
  pydub==0.25.1
83
+ Pygments==2.17.2
84
  pyparsing==3.1.1
85
+ pyreadline3==3.4.1
86
  python-dateutil==2.8.2
87
  python-multipart==0.0.6
88
  pytz==2023.3.post1
89
+ pywin32==306
90
  PyYAML==6.0.1
91
+ pyzmq==25.1.2
92
+ referencing==0.32.0
93
  requests==2.31.0
94
+ rich==13.7.0
95
+ rpds-py==0.15.2
 
96
  scikit-learn==1.3.2
97
  scipy==1.11.4
98
  semantic-version==2.10.0
 
101
  sniffio==1.3.0
102
  soundfile==0.12.1
103
  soxr==0.3.7
104
+ stack-data==0.6.3
105
  starlette==0.27.0
106
  sympy==1.12
107
  threadpoolctl==3.2.0
108
  tomlkit==0.12.0
109
  toolz==0.12.0
110
+ torch==2.1.2
111
+ torchaudio==2.1.2
112
+ torchvision==0.16.2
113
+ tornado==6.4
114
  tqdm==4.66.1
115
+ traitlets==5.14.0
116
  typer==0.9.0
117
+ typing_extensions==4.9.0
118
  tzdata==2023.3
119
+ urllib3==2.1.0
 
120
  uvicorn==0.24.0.post1
121
+ wcwidth==0.2.12
122
+ websockets==11.0.3
transcription_service.py CHANGED
@@ -1,58 +1,41 @@
1
  import os
2
- from google.cloud import speech
3
  from openai import OpenAI
4
  import subprocess
5
- # os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'msq-ai-78bdccb055f4.json'
6
 
 
 
 
 
 
 
7
 
8
- import os
9
-
10
- # def convert_wav_to_flac(wav_file_path):
11
- # Extract the file name and extension from the input path
12
-
13
-
14
- # Generate the output FLAC file path
15
- # flac_file_path = wav_file_path.replace(".wav", ".flac")
16
-
17
-
18
-
19
-
20
-
21
- # def transcribe_speech_local(wav_file,language):
22
- # print(wav_file,'wav_file_path\n')
23
- # flac_file=convert_wav_to_flac(wav_file)
24
-
25
- # flac_file=convert_wav_to_flac(wav_file)
26
- # print(flac_file,'flac_file_path\n')
27
-
28
- # """
29
- # Transcribes a local WAV file using Google's Speech-to-Text API.
30
- # """
31
- # client = speech.SpeechClient()
32
-
33
- # with open(wav_file, "rb") as audio_file:
34
- # content = audio_file.read()
35
 
36
- # audio = speech.RecognitionAudio(content=content)
37
- # config = speech.RecognitionConfig(
38
- # encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
39
- # sample_rate_hertz=48000,
40
- # # model="default",
41
- # enable_spoken_punctuation=True,
42
- # model="latest_short",
43
-
44
- # # Adjust this according to your file's sample rate
45
- # language_code=language,
46
- # enable_automatic_punctuation=False
47
- # )
48
 
49
- # response = client.recognize(config=config, audio=audio)
50
 
51
- # for result in response.results:
52
- # return "{}".format(result.alternatives[0].transcript)
53
  def transcribe_speech_local(wav_file,language):
54
- from openai import OpenAI
55
  client = OpenAI()
 
 
 
56
 
57
  audio_file = open(wav_file, "rb")
58
  transcript = client.audio.transcriptions.create(
 
1
  import os
 
2
  from openai import OpenAI
3
  import subprocess
4
+ import torch
5
 
6
+ model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
7
+ model='silero_vad',
8
+ force_reload=True,
9
+ onnx=False)
10
+
11
+ def apply_Vad(wav_file):
12
 
13
+ try:
14
+ torch.set_num_threads(1)
15
+ SAMPLING_RATE = 16000
16
+ (get_speech_timestamps,
17
+ save_audio,
18
+ read_audio,
19
+ VADIterator,
20
+ collect_chunks) = utils
21
+ wav = read_audio(wav_file, sampling_rate=SAMPLING_RATE)
22
+ speech_timestamps = get_speech_timestamps(wav, model, sampling_rate=SAMPLING_RATE)
23
+ save_audio(wav_file,
24
+ collect_chunks(speech_timestamps, wav), sampling_rate=SAMPLING_RATE)
25
+ return wav_file if len(speech_timestamps) > 0 else None
26
+ except Exception as e:
27
+ print(e)
28
+ return None
 
 
 
 
 
 
 
 
 
 
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ import os
32
 
 
 
33
  def transcribe_speech_local(wav_file,language):
34
+
35
  client = OpenAI()
36
+ wav_file=apply_Vad(wav_file)
37
+ if wav_file is None:
38
+ return ""
39
 
40
  audio_file = open(wav_file, "rb")
41
  transcript = client.audio.transcriptions.create(