Spaces:
Runtime error
Runtime error
File size: 4,669 Bytes
5b0d6da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
"""
Author: Tanmay Jain
Email: csetanmayjain@gmail.com
"""
import os
import threading
import uuid
from flask import Flask, jsonify, request, send_file
from flask_cors import CORS
import nemo.collections.asr as nemo_asr
import subprocess
import config
hi_asr_model = None
en_asr_model = None
en_upload_files_path=None
hi_upload_files_path=None
def initialize_path():
global hi_upload_files_path, en_upload_files_path, hi_am_model_path, en_am_model_path
hi_am_model_path = config.hi_am_model_path
en_am_model_path = config.en_am_model_path
hi_upload_files_path = os.path.join(config.upload_files_path, "hindi")
en_upload_files_path = os.path.join(config.upload_files_path, "english")
os.makedirs(hi_upload_files_path, exist_ok=True)
os.makedirs(en_upload_files_path, exist_ok=True)
def load_model(lang):
global hi_asr_model, en_asr_model
if lang == "hi" and hi_asr_model == None:
print("Loading Hindi Model")
if not os.path.exists(hi_am_model_path):
download=subprocess.run(["wget","-P",hi_am_model_path,"https://storage.googleapis.com/vakyansh-open-models/conformer_models/hindi/filtered_v1_ssl_2022-07-08_19-43-25/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True)
print('done Downloading hindi asr')
if download.returncode != 0:
raise Exception(f"wget download failed: {download.stderr}")
#hi_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from(hi_am_model_path)
# path for hi_asr_model for local sever running
hi_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from('./hi_am_model/Conformer-CTC-BPE-Large.nemo')
if lang == "en" and en_asr_model == None:
if not os.path.exists(en_am_model_path):
print(en_am_model_path)
print('not found downloading english model')
download=subprocess.run(["wget","-P",en_am_model_path,"https://storage.googleapis.com/vakyansh-open-models/conformer_models/english/2022-09-13_15-50-48/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True)
print('done Downloading')
print("Loading English Model")
en_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from('./en_am_model/Conformer-CTC-BPE-Large.nemo')
def transcribe(audio_file_path, transcription_file_path, lang, logprobs=False):
#load_model(lang)
if lang == "hi":
transcription = hi_asr_model.transcribe([audio_file_path], logprobs=logprobs)[0]
if lang == "en":
transcription = en_asr_model.transcribe([audio_file_path], logprobs=logprobs)[0]
f = open(transcription_file_path, "w")
f.write(transcription)
f.close()
def asr(audio_file_path,lang):
initialize_path()
global en_upload_files_path,hi_upload_files_path
if lang == None:
return jsonify({'error': 'no language code provided'}), 400
if lang != "hi" and lang != "en":
return jsonify({'error': 'Invalid language code'}), 400
client_id = str(uuid.uuid4())
if lang == "hi":
client_id_dir = os.path.join(hi_upload_files_path, client_id)
if lang == "en":
client_id_dir = os.path.join(en_upload_files_path, client_id)
transcription_file_path = os.path.join(client_id_dir, "transcript.txt")
os.makedirs(client_id_dir)
transcribe(audio_file_path,transcription_file_path,lang)
return client_id
def get_transcription(client_id,lang):
global en_upload_files_path
if client_id == None:
return 'No Client ID Provided'
if lang == None:
return 'no language code provided'
if lang != "hi" and lang != "en":
return 'Invalid language code'
print(type(lang),en_upload_files_path,"PATHS is thisss eherreb")
if lang == "hi":
print('looking in hindi file path')
client_dir = os.path.join('dependency/audio_logs/hindi', client_id)
if lang == "en":
print('looking in english file path')
client_dir = os.path.join('dependency/audio_logs/english', client_id)
transcript_file_path = os.path.join(client_dir,'transcript.txt')
if not os.path.exists(client_dir):
return "Invalid Client ID"
if os.path.exists(transcript_file_path):
print(transcript_file_path,"transcript file path")
#return send_file(transcript_file_path, as_attachment=True, download_name='transcript.txt'), 201
return transcript_file_path
else:
return 'your transcription file is not ready'
# if __name__ == '__main__':
# initialize_path()
# app.run(host='0.0.0.0', port=5000)
|