NeMo / main.py
dtfyu3's picture
Update script
5577945
import uuid
import numpy as np
from flask import Flask, request, jsonify
import sys
import os
import whisper
# import nemo.collections.asr as nemo_asr
# from nemo.collections.asr.models import EncDecCTCModel
# os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
# os.environ["XDG_CACHE_HOME"] = "/tmp/.cache"
# os.environ["HF_HOME"] = "/tmp/huggingface"
# os.environ["LHOTSE_CACHE"] = "/tmp/.lhotse"
# os.environ["NEMO_CACHE_DIR"] = "./nemo_cache"
#asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2")
#models = EncDecCTCModel.list_available_models()
#for m in models:
# print(m)
#asr_model = EncDecCTCModel.from_pretrained(model_name="stt_ru_conformer_ctc_large")
#asr_model = EncDecCTCModel.from_pretrained(model_name="stt_ru_quartznet15x5")
# asr_model = EncDecCTCModel.from_pretrained(model_name="nvidia/stt_ru_conformer_ctc")
asr_model = whisper.load_model("medium")
app = Flask(__name__)
@app.route('/')
def hello():
return "Server is alive"
def transcribe_audio(audio_path):
try:
output = asr_model.transcribe(audio_path,language="ru")
text = output["text"]
return {"text":text},200
except Exception as e:
return {f"Ошибка обработки: {e}"}, 500
@app.route('/transcribe', methods=['POST'])
def handle_audio():
if 'audio' not in request.files:
return jsonify({"error": "No audio file"}), 400
audio = request.files['audio']
temp_file = f"/tmp/audio_{uuid.uuid4()}.wav"
try:
audio.save(temp_file)
if os.path.getsize(temp_file) == 0:
return jsonify({"error": "Empty audio file"}), 400
response,status = transcribe_audio(temp_file)
return jsonify(response), status, {'Content-Type': 'application/json; charset=utf-8'}
except Exception as e:
return jsonify({"error": str(e)}), 500
finally:
if os.path.exists(temp_file):
os.remove(temp_file)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860, debug=True)