import uuid import numpy as np from flask import Flask, request, jsonify import sys import os import whisper # import nemo.collections.asr as nemo_asr # from nemo.collections.asr.models import EncDecCTCModel # os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib" # os.environ["XDG_CACHE_HOME"] = "/tmp/.cache" # os.environ["HF_HOME"] = "/tmp/huggingface" # os.environ["LHOTSE_CACHE"] = "/tmp/.lhotse" # os.environ["NEMO_CACHE_DIR"] = "./nemo_cache" #asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2") #models = EncDecCTCModel.list_available_models() #for m in models: # print(m) #asr_model = EncDecCTCModel.from_pretrained(model_name="stt_ru_conformer_ctc_large") #asr_model = EncDecCTCModel.from_pretrained(model_name="stt_ru_quartznet15x5") # asr_model = EncDecCTCModel.from_pretrained(model_name="nvidia/stt_ru_conformer_ctc") asr_model = whisper.load_model("medium") app = Flask(__name__) @app.route('/') def hello(): return "Server is alive" def transcribe_audio(audio_path): try: output = asr_model.transcribe(audio_path,language="ru") text = output["text"] return {"text":text},200 except Exception as e: return {f"Ошибка обработки: {e}"}, 500 @app.route('/transcribe', methods=['POST']) def handle_audio(): if 'audio' not in request.files: return jsonify({"error": "No audio file"}), 400 audio = request.files['audio'] temp_file = f"/tmp/audio_{uuid.uuid4()}.wav" try: audio.save(temp_file) if os.path.getsize(temp_file) == 0: return jsonify({"error": "Empty audio file"}), 400 response,status = transcribe_audio(temp_file) return jsonify(response), status, {'Content-Type': 'application/json; charset=utf-8'} except Exception as e: return jsonify({"error": str(e)}), 500 finally: if os.path.exists(temp_file): os.remove(temp_file) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860, debug=True)