alessandro trinca tornidor commited on
Commit
b5c05cd
1 Parent(s): 9bc64aa

feat: add dockerfile and save yml silero model within system temp folders to support docker container execution

Browse files
.dockerignore ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .venv/
2
+ venv/
3
+ *venv*
4
+ *.pyc
5
+ __cache__
6
+ .idea
7
+ tmp/
8
+ .env*
9
+ __pycache__
10
+ .DS_Store
11
+ .pytest_cache
12
+ node_modules
13
+ dist
14
+ static/node_modules
15
+ static/dist
16
+ static/.env*
17
+ .coverage
18
+ .coverage.*
19
+ .git
20
+ static/test*/*
21
+ test*/*
22
+ htmlcov
23
+ tmp/*
24
+ latest_silero_models.yml
Dockerfile ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Include global ARGs at the dockerfile top
2
+ ARG ARCH="x86_64"
3
+ ARG WORKDIR_ROOT="/var/task"
4
+
5
+
6
+ FROM python:3.12-bookworm AS builder_global
7
+
8
+ ARG ARCH
9
+ ARG WORKDIR_ROOT
10
+ ARG POETRY_NO_INTERACTION
11
+ ARG POETRY_VIRTUALENVS_IN_PROJECT
12
+ ARG POETRY_VIRTUALENVS_CREATE
13
+ ARG POETRY_CACHE_DIR
14
+ ARG ZLIB1G="http://ftp.it.debian.org/debian/pool/main/z/zlib/zlib1g_1.3.dfsg-3+b1_amd64.deb"
15
+ ENV PYTHONPATH="${WORKDIR_ROOT}:${PYTHONPATH}:/usr/local/lib/python3/dist-packages"
16
+ ENV MPLCONFIGDIR=/tmp/matplotlib
17
+ ARG USER="999"
18
+
19
+
20
+ RUN echo "ARCH: $ARCH, ARG POETRY_CACHE_DIR: ${POETRY_CACHE_DIR}, ENV PYTHONPATH: $PYTHONPATH, USER: $USER ..."
21
+ # RUN groupadd -g 999 python && useradd -r -u 999 -g python python
22
+
23
+ # Set working directory to function root directory
24
+ WORKDIR ${WORKDIR_ROOT}
25
+ COPY --chown=python:python requirements.txt ${WORKDIR_ROOT}/
26
+
27
+ # avoid segment-geospatial exception caused by missing libGL.so.1 library
28
+ RUN echo "BUILDER: check libz.s* before start" && ls -l /usr/lib/${ARCH}-linux-gnu/libz.so*
29
+ RUN apt update && apt install -y curl ffmpeg libgl1 python3-pip && apt clean
30
+ COPY --chown=python:python ./dockerfiles/apt_preferences /etc/apt/preferences
31
+ COPY --chown=python:python ./dockerfiles/debian.sources /etc/apt/sources.list.d/debian.sources
32
+ RUN apt update && apt install -t trixie zlib1g -y && apt clean
33
+ RUN echo "BUILDER: check libz.s* after install from trixie" && ls -l /usr/lib/${ARCH}-linux-gnu/libz.so*
34
+
35
+ RUN ls -l /etc/apt/sources* /etc/apt/preferences*
36
+
37
+ # poetry installation path is NOT within ${WORKDIR_ROOT}: not needed for runtime docker image
38
+ RUN python3 -m venv ${WORKDIR_ROOT}/.venv
39
+ ENV PATH="${WORKDIR_ROOT}/.venv/bin:$PATH"
40
+ RUN . ${WORKDIR_ROOT}/.venv/bin/activate && python -m pip install -r ${WORKDIR_ROOT}/requirements.txt
41
+
42
+ # USER 999
43
+
44
+
45
+ FROM python:3.12-slim-bookworm AS runtime
46
+
47
+ RUN groupadd -g 999 python && useradd -r -u 999 -g python python
48
+
49
+ ARG ARCH
50
+ ARG WORKDIR_ROOT
51
+ ENV PYTHONPATH="${WORKDIR_ROOT}:${WORKDIR_ROOT}/.venv:${PYTHONPATH}:/usr/local/lib/python3/dist-packages"
52
+ ENV MPLCONFIGDIR=/tmp/matplotlib
53
+ ENV IS_DOCKER_CONTAINER="YES"
54
+
55
+ ENV VIRTUAL_ENV=${WORKDIR_ROOT}/.venv PATH="${WORKDIR_ROOT}/.venv/bin:$PATH"
56
+
57
+ RUN apt update && apt install -y ffmpeg && apt clean
58
+ RUN echo "COPY --chown=python:python --from=builder_global /usr/lib/${ARCH}-linux-gnu/libGL.so* /usr/lib/${ARCH}-linux-gnu/"
59
+ RUN echo "RUNTIME: check libz.s* before upgrade" && ls -l /usr/lib/${ARCH}-linux-gnu/libz.so*
60
+ RUN echo "RUNTIME: remove libz.s* to force upgrade" && rm /usr/lib/${ARCH}-linux-gnu/libz.so*
61
+ COPY --chown=python:python --from=builder_global /usr/lib/${ARCH}-linux-gnu/libz.so* /usr/lib/${ARCH}-linux-gnu/
62
+ COPY --chown=python:python --from=builder_global /lib/${ARCH}-linux-gnu/libexpat.so* /lib/${ARCH}-linux-gnu/
63
+ RUN echo "RUNTIME: check libz.s* after copy" && ls -l /usr/lib/${ARCH}-linux-gnu/libz.so*
64
+ COPY --chown=python:python --from=builder_global ${WORKDIR_ROOT}/.venv ${WORKDIR_ROOT}/.venv
65
+ RUN echo "check ffmpeg files..."
66
+ RUN ls -ld /usr/share/ffmpeg || echo "ffpeg folder not found!"
67
+ RUN ls -l /usr/bin/ff* || echo "ffpeg bin not found!"
68
+ RUN ls -l /usr/share/ffmpeg || echo "ffpeg folder share not found!"
69
+ RUN . ${WORKDIR_ROOT}/.venv && which python && pip list
70
+
71
+ RUN echo "new WORKDIR_ROOT after hidden venv COPY --chown=python:python => ${WORKDIR_ROOT}"
72
+ RUN ls -ld ${WORKDIR_ROOT}/
73
+ RUN ls -lA ${WORKDIR_ROOT}/
74
+
75
+ COPY --chown=python:python . ${WORKDIR_ROOT}/.
76
+
77
+ RUN python --version
78
+ RUN pip list
79
+ RUN echo "PATH: ${PATH}."
80
+ RUN echo "WORKDIR_ROOT: ${WORKDIR_ROOT}."
81
+ RUN ls -l ${WORKDIR_ROOT}
82
+ RUN ls -ld ${WORKDIR_ROOT}
83
+ RUN python -c "import sys; print(sys.path)"
84
+ RUN python -c "import epitran"
85
+ RUN python -c "import flask"
86
+ RUN python -c "import pandas"
87
+ RUN python -c "from torch import Tensor"
88
+ RUN python -c "import gunicorn"
89
+ RUN df -h
90
+ RUN ls -l ${WORKDIR_ROOT}/app.py
91
+ RUN ls -l ${WORKDIR_ROOT}/static/
92
+
93
+ USER 999
94
+ ENV PATH="${WORKDIR_ROOT}:${WORKDIR_ROOT}/.venv/bin:$PATH"
95
+ RUN echo "PATH: $PATH ..."
96
+ RUN echo "PYTHONPATH: $PYTHONPATH ..."
97
+ RUN echo "MPLCONFIGDIR: $MPLCONFIGDIR ..."
98
+
99
+ CMD ["gunicorn", "--bind", "0.0.0.0:3000", "app:app"]
aip_trainer/lambdas/lambdaSpeechToScore.py CHANGED
@@ -61,7 +61,7 @@ def lambda_handler(event, context):
61
  app_logger.info(f'Loaded .ogg file {random_file_name} in {duration}s.')
62
 
63
  language_trainer_sst_lambda = trainer_SST_lambda[language]
64
- app_logger.info(f'language_trainer_sst_lambda: preparing...')
65
  result = language_trainer_sst_lambda.processAudioForGivenText(signal, real_text)
66
  app_logger.info(f'language_trainer_sst_lambda: result: {result}...')
67
 
 
61
  app_logger.info(f'Loaded .ogg file {random_file_name} in {duration}s.')
62
 
63
  language_trainer_sst_lambda = trainer_SST_lambda[language]
64
+ app_logger.info('language_trainer_sst_lambda: preparing...')
65
  result = language_trainer_sst_lambda.processAudioForGivenText(signal, real_text)
66
  app_logger.info(f'language_trainer_sst_lambda: result: {result}...')
67
 
aip_trainer/models/models.py CHANGED
@@ -1,14 +1,65 @@
 
 
 
1
  import torch.nn as nn
2
- from silero import silero_stt
3
  from silero.utils import Decoder
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  # second returned type here is the custom class src.silero.utils.Decoder from snakers4/silero-models
7
  def getASRModel(language: str) -> tuple[nn.Module, Decoder]:
 
8
  if language == 'de':
9
- model, decoder, _ = silero_stt(language='de', version="v4", jit_model="jit_large")
10
  elif language == 'en':
11
- model, decoder, _ = silero_stt(language='en')
12
  else:
13
  raise NotImplementedError("currenty works only for 'de' and 'en' languages, not for '{}'.".format(language))
14
 
 
1
+ import os
2
+ from pathlib import Path
3
+ import tempfile
4
  import torch.nn as nn
 
5
  from silero.utils import Decoder
6
 
7
+ from aip_trainer import app_logger
8
+
9
+
10
+ def silero_stt(language='en',
11
+ version='latest',
12
+ jit_model='jit',
13
+ output_folder: Path | str = None,
14
+ **kwargs):
15
+ """Modified Silero Speech-To-Text Model(s) function
16
+ language (str): language of the model, now available are ['en', 'de', 'es']
17
+ version:
18
+ jit_model:
19
+ output_folder: needed in case of docker build
20
+ Returns a model, decoder object and a set of utils
21
+ Please see https://github.com/snakers4/silero-models for usage examples
22
+ """
23
+ import torch
24
+ from omegaconf import OmegaConf
25
+ from silero.utils import (init_jit_model,
26
+ read_audio,
27
+ read_batch,
28
+ split_into_batches,
29
+ prepare_model_input)
30
+
31
+ output_folder = Path(output_folder) if output_folder is not None else Path(os.path.dirname(__file__)) / ".." / ".."
32
+ models_list_file = output_folder / f'latest_silero_model_{language}.yml'
33
+ if not os.path.exists(models_list_file):
34
+ app_logger.info(f"model yml for '{language}' language, '{version}' version not found, download it in folder {output_folder}...")
35
+ torch.hub.download_url_to_file(
36
+ 'https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml',
37
+ models_list_file,
38
+ progress=True
39
+ )
40
+ app_logger.info(f"model yml for '{language}' language, '{version}' version in folder {output_folder}: OK!")
41
+ assert os.path.exists(models_list_file)
42
+ models = OmegaConf.load(models_list_file)
43
+ available_languages = list(models.stt_models.keys())
44
+ assert language in available_languages
45
+
46
+ model, decoder = init_jit_model(model_url=models.stt_models.get(language).get(version).get(jit_model),
47
+ **kwargs)
48
+ utils = (read_batch,
49
+ split_into_batches,
50
+ read_audio,
51
+ prepare_model_input)
52
+
53
+ return model, decoder, utils
54
+
55
 
56
  # second returned type here is the custom class src.silero.utils.Decoder from snakers4/silero-models
57
  def getASRModel(language: str) -> tuple[nn.Module, Decoder]:
58
+ tmp_dir = tempfile.gettempdir()
59
  if language == 'de':
60
+ model, decoder, _ = silero_stt(language='de', version="v4", jit_model="jit_large", output_folder=tmp_dir)
61
  elif language == 'en':
62
+ model, decoder, _ = silero_stt(language='en', output_folder=tmp_dir)
63
  else:
64
  raise NotImplementedError("currenty works only for 'de' and 'en' languages, not for '{}'.".format(language))
65
 
aip_trainer/pronunciationTrainer.py CHANGED
@@ -113,19 +113,19 @@ class PronunciationTrainer:
113
  def getAudioTranscript(self, recordedAudio: torch.Tensor = None):
114
  current_recorded_audio = recordedAudio
115
 
116
- app_logger.info(f'starting preprocessAudio...')
117
  current_recorded_audio = self.preprocessAudio(current_recorded_audio)
118
 
119
- app_logger.info(f'starting processAudio...')
120
  self.asr_model.processAudio(current_recorded_audio)
121
 
122
- app_logger.info(f'starting getTranscriptAndWordsLocations...')
123
  current_recorded_transcript, current_recorded_word_locations = self.getTranscriptAndWordsLocations(
124
  current_recorded_audio.shape[1])
125
- app_logger.info(f'starting convertToPhonem...')
126
  current_recorded_ipa = self.ipa_converter.convertToPhonem(current_recorded_transcript)
127
 
128
- app_logger.info(f'ok, return audio transcript!')
129
  return current_recorded_transcript, current_recorded_ipa, current_recorded_word_locations
130
 
131
  def getWordLocationsFromRecordInSeconds(self, word_locations, mapped_words_indices) -> list:
 
113
  def getAudioTranscript(self, recordedAudio: torch.Tensor = None):
114
  current_recorded_audio = recordedAudio
115
 
116
+ app_logger.info('starting preprocessAudio...')
117
  current_recorded_audio = self.preprocessAudio(current_recorded_audio)
118
 
119
+ app_logger.info('starting processAudio...')
120
  self.asr_model.processAudio(current_recorded_audio)
121
 
122
+ app_logger.info('starting getTranscriptAndWordsLocations...')
123
  current_recorded_transcript, current_recorded_word_locations = self.getTranscriptAndWordsLocations(
124
  current_recorded_audio.shape[1])
125
+ app_logger.info('starting convertToPhonem...')
126
  current_recorded_ipa = self.ipa_converter.convertToPhonem(current_recorded_transcript)
127
 
128
+ app_logger.info('ok, return audio transcript!')
129
  return current_recorded_transcript, current_recorded_ipa, current_recorded_word_locations
130
 
131
  def getWordLocationsFromRecordInSeconds(self, word_locations, mapped_words_indices) -> list:
dockerfiles/apt_preferences ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Explanation: Uninstall or do not install any Debian-originated
2
+ Explanation: package versions other than those in the stable distro
3
+ Package: *
4
+ Pin: release a=stable
5
+ Pin-Priority: 900
6
+
7
+ Package: zlib1g
8
+ Pin: release a=trixie
9
+ Pin-Priority: -10
dockerfiles/debian.sources ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Types: deb deb-src
2
+ URIs: http://deb.debian.org/debian
3
+ Suites: bookworm bookworm-updates
4
+ Components: main
5
+ Signed-By: /usr/share/keyrings/debian-archive-keyring.gpg
6
+
7
+ Types: deb deb-src
8
+ URIs: http://deb.debian.org/debian-security
9
+ Suites: bookworm-security
10
+ Components: main
11
+ Signed-By: /usr/share/keyrings/debian-archive-keyring.gpg
12
+
13
+ Types: deb
14
+ URIs: http://deb.debian.org/debian
15
+ Suites: trixie
16
+ Components: main
17
+ Signed-By: /usr/share/keyrings/debian-archive-keyring.gpg
requirements.txt CHANGED
@@ -4,6 +4,7 @@ eng_to_ipa
4
  epitran==1.25.1
5
  flask
6
  flask_cors
 
7
  omegaconf
8
  ortools==9.11.4210
9
  pandas
@@ -11,6 +12,7 @@ pickle-mixin
11
  python-dotenv
12
  requests
13
  sentencepiece
 
14
  soundfile==0.12.1
15
  sqlalchemy
16
  structlog
 
4
  epitran==1.25.1
5
  flask
6
  flask_cors
7
+ gunicorn
8
  omegaconf
9
  ortools==9.11.4210
10
  pandas
 
12
  python-dotenv
13
  requests
14
  sentencepiece
15
+ silero==0.4.1
16
  soundfile==0.12.1
17
  sqlalchemy
18
  structlog
webApp.py CHANGED
@@ -1,8 +1,9 @@
1
  import json
 
2
  import webbrowser
3
 
4
- from aip_trainer import PROJECT_ROOT_FOLDER, app_logger
5
- from flask import Flask, render_template, request, Response
6
  from flask_cors import CORS
7
 
8
  from aip_trainer.lambdas import lambdaGetSample
@@ -30,6 +31,7 @@ def getNext():
30
  @app.route(rootPath+'/GetAccuracyFromRecordedAudio', methods=['POST'])
31
  def GetAccuracyFromRecordedAudio():
32
  try:
 
33
  event = {'body': json.dumps(request.get_json(force=True))}
34
  lambda_correct_output = lambdaSpeechToScore.lambda_handler(event, [])
35
  return lambda_correct_output
@@ -41,6 +43,9 @@ def GetAccuracyFromRecordedAudio():
41
 
42
 
43
  if __name__ == "__main__":
44
- language = 'de'
45
- webbrowser.open_new('http://127.0.0.1:3000/')
 
 
 
46
  app.run(host="0.0.0.0", port=3000) # , debug=True)
 
1
  import json
2
+ import os
3
  import webbrowser
4
 
5
+ from aip_trainer import app_logger
6
+ from flask import Flask, render_template, request
7
  from flask_cors import CORS
8
 
9
  from aip_trainer.lambdas import lambdaGetSample
 
31
  @app.route(rootPath+'/GetAccuracyFromRecordedAudio', methods=['POST'])
32
  def GetAccuracyFromRecordedAudio():
33
  try:
34
+ # todo: inserire
35
  event = {'body': json.dumps(request.get_json(force=True))}
36
  lambda_correct_output = lambdaSpeechToScore.lambda_handler(event, [])
37
  return lambda_correct_output
 
43
 
44
 
45
  if __name__ == "__main__":
46
+ is_docker_container = os.getenv("IS_DOCKER_CONTAINER", "").lower() == "yes"
47
+ app_logger.info(f"is_docker_container:{is_docker_container}.")
48
+ if not is_docker_container:
49
+ import webbrowser
50
+ webbrowser.open_new('http://127.0.0.1:3000/')
51
  app.run(host="0.0.0.0", port=3000) # , debug=True)