Spaces:

AkshatJain1402
/

Nemov2

Runtime error

File size: 4,669 Bytes

5b0d6da

"""
Author: Tanmay Jain
Email: csetanmayjain@gmail.com
"""

import os
import threading
import uuid

from flask import Flask, jsonify, request, send_file
from flask_cors import CORS
import nemo.collections.asr as nemo_asr
import subprocess
import config


hi_asr_model = None
en_asr_model = None
en_upload_files_path=None
hi_upload_files_path=None
def initialize_path():

    global hi_upload_files_path, en_upload_files_path, hi_am_model_path, en_am_model_path

    hi_am_model_path = config.hi_am_model_path
    en_am_model_path = config.en_am_model_path    
    hi_upload_files_path = os.path.join(config.upload_files_path, "hindi")
    en_upload_files_path = os.path.join(config.upload_files_path, "english")

    os.makedirs(hi_upload_files_path, exist_ok=True)
    os.makedirs(en_upload_files_path, exist_ok=True)
 

def load_model(lang):

    global hi_asr_model, en_asr_model


    if lang == "hi" and hi_asr_model == None:
        print("Loading Hindi Model")
        if not  os.path.exists(hi_am_model_path):
            download=subprocess.run(["wget","-P",hi_am_model_path,"https://storage.googleapis.com/vakyansh-open-models/conformer_models/hindi/filtered_v1_ssl_2022-07-08_19-43-25/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True)
            print('done Downloading hindi asr')
        if download.returncode != 0:
                raise Exception(f"wget download failed: {download.stderr}")


      
            

        #hi_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from(hi_am_model_path)    
        # path for hi_asr_model for local sever running
        hi_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from('./hi_am_model/Conformer-CTC-BPE-Large.nemo')
    if lang == "en" and en_asr_model == None:
       
        if not os.path.exists(en_am_model_path):
            print(en_am_model_path)
            print('not found downloading english model')
            download=subprocess.run(["wget","-P",en_am_model_path,"https://storage.googleapis.com/vakyansh-open-models/conformer_models/english/2022-09-13_15-50-48/Conformer-CTC-BPE-Large.nemo"],capture_output=True, text=True)
            print('done Downloading')
        print("Loading English Model")
        

        
        en_asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from('./en_am_model/Conformer-CTC-BPE-Large.nemo')    


def transcribe(audio_file_path, transcription_file_path, lang, logprobs=False):
    
    #load_model(lang)   
    if lang == "hi":
        transcription = hi_asr_model.transcribe([audio_file_path], logprobs=logprobs)[0]
    if lang == "en":
        transcription = en_asr_model.transcribe([audio_file_path], logprobs=logprobs)[0]

    f = open(transcription_file_path, "w")
    f.write(transcription)
    f.close()

def asr(audio_file_path,lang):
    initialize_path()
  
    global en_upload_files_path,hi_upload_files_path
    if lang == None:
        return jsonify({'error': 'no language code provided'}), 400
    if lang != "hi" and lang != "en":
        return jsonify({'error': 'Invalid language code'}), 400
    client_id = str(uuid.uuid4()) 

    if lang == "hi":
        client_id_dir = os.path.join(hi_upload_files_path, client_id)
    if lang == "en":
        client_id_dir = os.path.join(en_upload_files_path, client_id)


    transcription_file_path = os.path.join(client_id_dir, "transcript.txt")

    os.makedirs(client_id_dir)

    transcribe(audio_file_path,transcription_file_path,lang)
    return client_id
  
def get_transcription(client_id,lang):

    global en_upload_files_path
    if client_id == None:
        return  'No Client ID Provided'
    if lang == None:
        return  'no language code provided'
    if lang != "hi" and lang != "en":
        return  'Invalid language code'
    print(type(lang),en_upload_files_path,"PATHS is thisss eherreb")
    if lang == "hi":
        print('looking in hindi file path')
        client_dir = os.path.join('dependency/audio_logs/hindi', client_id)
    if lang == "en":
        print('looking in english file path')
        client_dir = os.path.join('dependency/audio_logs/english', client_id)
    transcript_file_path = os.path.join(client_dir,'transcript.txt')
    if not os.path.exists(client_dir):
        return "Invalid Client ID"
    if os.path.exists(transcript_file_path):
        print(transcript_file_path,"transcript file path")
        #return send_file(transcript_file_path, as_attachment=True, download_name='transcript.txt'), 201
        return transcript_file_path
    else:
        return 'your transcription file is not ready'
 

    
# if __name__ == '__main__':
#     initialize_path()
#     app.run(host='0.0.0.0', port=5000)