Spaces:

Mufintech
/

RealTime_Translation

Runtime error

App Files Files Community

MufinApps commited on Dec 19, 2023

Commit

2ed8ecf

•

1 Parent(s): 11b7b41

Applied VAD

Browse files

Files changed (6) hide show

.gitignore +2 -1
__pycache__/transcription_service.cpython-311.pyc +0 -0
app.py +1 -1
msq-ai-78bdccb055f4.json +0 -13
requirements.txt +60 -48
transcription_service.py +28 -45

.gitignore CHANGED Viewed

@@ -2,4 +2,5 @@ rt/
 checkopenai.py
 lib
 __pycache__
-.env

 checkopenai.py
 lib
 __pycache__
+.env
+VAD_service.ipynb

__pycache__/transcription_service.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/transcription_service.cpython-311.pyc and b/__pycache__/transcription_service.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 import json
 from transcription_service import transcribe_speech_local,translation_service
 def translateoutput(text,language):
     if text=="" or text is None:

 import json
 from transcription_service import transcribe_speech_local,translation_service
+os.environ['OPENAI_API_KEY'] = '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'
 def translateoutput(text,language):
     if text=="" or text is None:

msq-ai-78bdccb055f4.json DELETED Viewed

@@ -1,13 +0,0 @@
-{
-  "type": "service_account",
-  "project_id": "msq-ai",
-  "private_key_id": "78bdccb055f435ec9a3767887d77dccded5b45fc",
-  "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDlMHDRxYIwtRXU\n2KdBCgimYiGQ/ImfleFRnNM1QDO216wzjinvXw6oQZcCvJinVvV+iA9wA5l8Qk1Q\nUMIkzcoA9UCIrscTy7+tOfnjxP9Kzib0JQlA8NBXYdm7kCNIVoxwNfz/gCD/nGYK\nTZ6VYLT+DqIXf110rVG/hgv37Wa1G4IBauphj0SMKaS6/syU2HGkC0ozOJOppSwT\norSL+72ddhUAO5bwLYBNo0x/Kg+uozbXi4S+1Qfgjs3EZUbQr2l/s5pWfxKd1m56\nhmrxEpQ3DN55s+iN5PW1FiDqOxDQf823s4LOV99/mqPfStjzskMbuFi1/uO94ijP\n71MYebJRAgMBAAECggEABfzKm75XUP6wGfIJh45TrMYYRqWEUmgb3XIVhfj+CN8w\nS14BzDQ9aamiAHMtIUUJL5OuJJDhggCo7KnZcpTyO61skM2RG5wpbyT6OWg4e6uw\nUpVDnT/VT40ZvzfMJiwFqfpNGF8wbYEYXNlLhO9EdzUqaKb+Cenrvm9J8YGRGvbW\n+boIu+zIaOAF9R5MWJO9ZwGu5C1vEUjdZ+JweuhOpQQjQVT3jQuBtAYYdNxVte8B\niE2dMqQYn4cLrzrt8Tdwk0DM+XWDyXwaC4NM8nRZXtGdKiKMS33nfd9C5Okpcgo6\nAl1TaGz5TEpsYe8UFyw2TrJeUSo31UnvVqy2LGF8CQKBgQD1jhm5k79ov4TIBDlZ\ne+SPmhK/ate7jlN4Y6dNZIx82Jin2TUrOKy0mZvh9UTt4wbpcNWhpapzV7bnUN5o\nCRZ7pyjP4x84p5j9PordbFmr9mBf3tamMOVxAUopIXJIuSMRqjo3VWuyYIv4QWAX\nzZMGZzeaDAy3b1acBZ+t3I8QyQKBgQDu8CEII+C97W/JzAVuex6cpIa3GXH73abJ\n6QKoA2m2gaZq+GDhWe5lPSbf8EFWwrzY29uMObMN2l0S38N5vLf3oSAhCoFTgSTJ\nFHULlU/zT5J/uf02GLYuLmxAuafQmj4rMJR8WNlguQzjORIE3QkOB9D612U2MWK6\n64jqlGQhSQKBgQDbbqnUaZDCBWud5hBtw8YjAZdBUTQJ2g0DZ2JJSdI1esUoMXVO\n8216Uk635Js9TgSONZPiyQ2B11+OZHjrVTuvMljJNXhgA302yV4Est+LSTckVRlS\nGB3hFzrzeH0NOcx567nsWrxMAfTy21EI1F9PeIt6cMwoXCl0cZ4ZGyEJ6QKBgQCa\noLP/vJg40WvCwFHTicHpxV+ZJtJpGKxThoO9j1feUFEOcObhb32j7ss1qRnft+7W\nuJGAUebaVDJfgIfURJkZnCBawZ2oGK9hoeeNM3emEhzQo+CGHb/oawqLaJp9mRQa\nhznQrv8eZSM9+TT8CL9PYSeQ7cwGIjeul7bneFjEYQKBgG5TGOK9lkfFI+K63m8z\n6RjEdu/76UK+4GVAekH6VjiJpUWc/ViagREp33PP1wzMw8pqELW9DZVEodjsAsFb\npNrH7nHOXEOUmDEN2jVfqVmEOkdXGeYHH7HMotFQC51GSa+oYfOdjSA0XYNDEXP2\nLvglTAsQuxm9Yf3zwvlI7hEA\n-----END PRIVATE KEY-----\n",
-  "client_email": "realtimetranslator@msq-ai.iam.gserviceaccount.com",
-  "client_id": "101839625152756595826",
-  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
-  "token_uri": "https://oauth2.googleapis.com/token",
-  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
-  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/realtimetranslator%40msq-ai.iam.gserviceaccount.com",
-  "universe_domain": "googleapis.com"
-}

requirements.txt CHANGED Viewed

@@ -1,90 +1,98 @@
 aiofiles==23.2.1
-aiohttp==3.8.6
-aiosignal==1.3.1
-altair==5.1.2
 annotated-types==0.6.0
 anyio==3.7.1
-async-timeout==4.0.3
 attrs==23.1.0
 audioread==3.0.1
-cachetools==5.3.2
-certifi==2023.7.22
 cffi==1.16.0
 charset-normalizer==3.3.2
 click==8.1.7
 colorama==0.4.6
 contourpy==1.2.0
 cycler==0.12.1
 decorator==5.1.1
 distro==1.8.0
-fastapi==0.104.1
 ffmpy==0.3.1
 filelock==3.13.1
-fonttools==4.44.0
-frozenlist==1.4.0
-fsspec==2023.10.0
-google-api-core==2.14.0
-google-api-python-client==2.109.0
-google-auth==2.23.4
-google-auth-httplib2==0.1.1
-google-cloud-speech==2.22.0
-googleapis-common-protos==1.61.0
 gradio==4.7.1
 gradio_client==0.7.0
-grpcio==1.59.3
-grpcio-status==1.59.3
 h11==0.14.0
-httpcore==1.0.1
-httplib2==0.22.0
-httpx==0.25.1
-huggingface-hub==0.18.0
-idna==3.4
 importlib-resources==6.1.1
 Jinja2==3.1.2
 joblib==1.3.2
-jsonschema==4.19.2
-jsonschema-specifications==2023.7.1
 kiwisolver==1.4.5
 lazy_loader==0.3
 librosa==0.10.1
 llvmlite==0.41.1
 markdown-it-py==3.0.0
 MarkupSafe==2.1.3
-matplotlib==3.8.1
 mdurl==0.1.2
 mpmath==1.3.0
 msgpack==1.0.7
-multidict==6.0.4
 networkx==3.2.1
-noisereduce==3.0.0
 numba==0.58.1
-numpy==1.26.1
-openai==1.3.6
 orjson==3.9.10
 packaging==23.2
-pandas==2.1.2
 Pillow==10.1.0
-platformdirs==4.0.0
 pooch==1.8.0
-proto-plus==1.22.3
 protobuf==4.25.1
-pyasn1==0.5.1
-pyasn1-modules==0.3.0
 pycparser==2.21
-pydantic==2.4.2
-pydantic_core==2.10.1
 pydub==0.25.1
-Pygments==2.16.1
 pyparsing==3.1.1
 python-dateutil==2.8.2
 python-multipart==0.0.6
 pytz==2023.3.post1
 PyYAML==6.0.1
-referencing==0.30.2
 requests==2.31.0
-rich==13.6.0
-rpds-py==0.12.0
-rsa==4.9
 scikit-learn==1.3.2
 scipy==1.11.4
 semantic-version==2.10.0
@@ -93,18 +101,22 @@ six==1.16.0
 sniffio==1.3.0
 soundfile==0.12.1
 soxr==0.3.7
 starlette==0.27.0
 sympy==1.12
 threadpoolctl==3.2.0
 tomlkit==0.12.0
 toolz==0.12.0
-torch==2.1.1
 tqdm==4.66.1
 typer==0.9.0
-typing_extensions==4.8.0
 tzdata==2023.3
-uritemplate==4.1.1
-urllib3==2.0.7
 uvicorn==0.24.0.post1
-websockets==11.0.3
-yarl==1.9.2

 aiofiles==23.2.1
+altair==5.2.0
 annotated-types==0.6.0
 anyio==3.7.1
+asttokens==2.4.1
 attrs==23.1.0
 audioread==3.0.1
+certifi==2023.11.17
 cffi==1.16.0
 charset-normalizer==3.3.2
 click==8.1.7
 colorama==0.4.6
+coloredlogs==15.0.1
+comm==0.2.0
 contourpy==1.2.0
 cycler==0.12.1
+debugpy==1.8.0
 decorator==5.1.1
 distro==1.8.0
+et-xmlfile==1.1.0
+executing==2.0.1
+fastapi==0.105.0
 ffmpy==0.3.1
 filelock==3.13.1
+flatbuffers==23.5.26
+fonttools==4.47.0
+fsspec==2023.12.2
+funasr-onnx==0.2.4
 gradio==4.7.1
 gradio_client==0.7.0
 h11==0.14.0
+httpcore==1.0.2
+httpx==0.25.2
+huggingface-hub==0.19.4
+humanfriendly==10.0
+idna==3.6
 importlib-resources==6.1.1
+ipykernel==6.27.1
+ipython==8.18.1
+jedi==0.19.1
+jieba==0.42.1
 Jinja2==3.1.2
 joblib==1.3.2
+jsonschema==4.20.0
+jsonschema-specifications==2023.11.2
+jupyter_client==8.6.0
+jupyter_core==5.5.1
+kaldi-native-fbank==1.18.5
 kiwisolver==1.4.5
 lazy_loader==0.3
 librosa==0.10.1
 llvmlite==0.41.1
 markdown-it-py==3.0.0
 MarkupSafe==2.1.3
+matplotlib==3.8.2
+matplotlib-inline==0.1.6
 mdurl==0.1.2
 mpmath==1.3.0
 msgpack==1.0.7
+nest-asyncio==1.5.8
 networkx==3.2.1
 numba==0.58.1
+numpy==1.26.2
+onnx==1.15.0
+onnxruntime==1.16.3
+openai==1.5.0
+openpyxl==3.1.2
 orjson==3.9.10
 packaging==23.2
+pandas==2.1.4
+parso==0.8.3
 Pillow==10.1.0
+platformdirs==4.1.0
 pooch==1.8.0
+prompt-toolkit==3.0.43
 protobuf==4.25.1
+psutil==5.9.7
+pure-eval==0.2.2
 pycparser==2.21
+pydantic==2.5.2
+pydantic_core==2.14.5
 pydub==0.25.1
+Pygments==2.17.2
 pyparsing==3.1.1
+pyreadline3==3.4.1
 python-dateutil==2.8.2
 python-multipart==0.0.6
 pytz==2023.3.post1
+pywin32==306
 PyYAML==6.0.1
+pyzmq==25.1.2
+referencing==0.32.0
 requests==2.31.0
+rich==13.7.0
+rpds-py==0.15.2
 scikit-learn==1.3.2
 scipy==1.11.4
 semantic-version==2.10.0
 sniffio==1.3.0
 soundfile==0.12.1
 soxr==0.3.7
+stack-data==0.6.3
 starlette==0.27.0
 sympy==1.12
 threadpoolctl==3.2.0
 tomlkit==0.12.0
 toolz==0.12.0
+torch==2.1.2
+torchaudio==2.1.2
+torchvision==0.16.2
+tornado==6.4
 tqdm==4.66.1
+traitlets==5.14.0
 typer==0.9.0
+typing_extensions==4.9.0
 tzdata==2023.3
+urllib3==2.1.0
 uvicorn==0.24.0.post1
+wcwidth==0.2.12
+websockets==11.0.3

transcription_service.py CHANGED Viewed

@@ -1,58 +1,41 @@
 import os
-from google.cloud import speech
 from openai import OpenAI
 import subprocess
-# os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'msq-ai-78bdccb055f4.json'
-import os
-# def convert_wav_to_flac(wav_file_path):
-    # Extract the file name and extension from the input path
-    # Generate the output FLAC file path
-    # flac_file_path = wav_file_path.replace(".wav", ".flac")
-# def transcribe_speech_local(wav_file,language):
-    # print(wav_file,'wav_file_path\n')
-    # flac_file=convert_wav_to_flac(wav_file)
-    # flac_file=convert_wav_to_flac(wav_file)
-    # print(flac_file,'flac_file_path\n')
-    # """
-    # Transcribes a local WAV file using Google's Speech-to-Text API.
-    # """
-    # client = speech.SpeechClient()
-    # with open(wav_file, "rb") as audio_file:
-    #     content = audio_file.read()
-    # audio = speech.RecognitionAudio(content=content)
-    # config = speech.RecognitionConfig(
-    #     encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
-    #     sample_rate_hertz=48000,
-    #     # model="default",
-    #     enable_spoken_punctuation=True,
-    #     model="latest_short",
-    #   # Adjust this according to your file's sample rate
-    #     language_code=language,
-    #     enable_automatic_punctuation=False
-    # )
-    # response = client.recognize(config=config, audio=audio)
-    # for result in response.results:
-        # return "{}".format(result.alternatives[0].transcript)
 def transcribe_speech_local(wav_file,language):
-    from openai import OpenAI
     client = OpenAI()
     audio_file = open(wav_file, "rb")
     transcript = client.audio.transcriptions.create(

 import os
 from openai import OpenAI
 import subprocess
+import torch
+model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
+                            model='silero_vad',
+                            force_reload=True,
+                            onnx=False)
+def apply_Vad(wav_file):
+    try:
+        torch.set_num_threads(1)
+        SAMPLING_RATE = 16000
+        (get_speech_timestamps,
+        save_audio,
+        read_audio,
+        VADIterator,
+        collect_chunks) = utils
+        wav = read_audio(wav_file, sampling_rate=SAMPLING_RATE)
+        speech_timestamps = get_speech_timestamps(wav, model, sampling_rate=SAMPLING_RATE)
+        save_audio(wav_file,
+                collect_chunks(speech_timestamps, wav), sampling_rate=SAMPLING_RATE)
+        return wav_file if len(speech_timestamps) > 0 else None
+    except Exception as e:
+        print(e)
+        return None
+import os
 def transcribe_speech_local(wav_file,language):
     client = OpenAI()
+    wav_file=apply_Vad(wav_file)
+    if wav_file is None:
+        return ""
     audio_file = open(wav_file, "rb")
     transcript = client.audio.transcriptions.create(