alessandro trinca tornidor commited on
Commit
d804881
·
1 Parent(s): 1e30c4b

feat: first working gradio fronted with refactored lambda_handler and tests

Browse files
aip_trainer/lambdas/lambdaSpeechToScore.py CHANGED
@@ -1,6 +1,8 @@
 
1
  import base64
2
  import json
3
  import os
 
4
  import tempfile
5
  import time
6
 
@@ -25,8 +27,9 @@ def lambda_handler(event, context):
25
  data = json.loads(event['body'])
26
 
27
  real_text = data['title']
28
- file_bytes = base64.b64decode(
29
- data['base64Audio'][22:].encode('utf-8'))
 
30
  language = data['language']
31
 
32
  if len(real_text) == 0:
@@ -40,13 +43,26 @@ def lambda_handler(event, context):
40
  },
41
  'body': ''
42
  }
 
 
 
 
43
 
 
 
 
 
44
  start0 = time.time()
45
- with tempfile.NamedTemporaryFile(prefix="temp_sound_speech_score_", suffix=".ogg", delete=False) as f1:
46
- f1.write(file_bytes)
47
- duration = time.time() - start0
48
- app_logger.info(f'Saved binary in file in {duration}s.')
49
- random_file_name = f1.name
 
 
 
 
 
50
 
51
  start = time.time()
52
  app_logger.info(f'Loading .ogg file file {random_file_name} ...')
@@ -66,7 +82,8 @@ def lambda_handler(event, context):
66
  app_logger.info(f'language_trainer_sst_lambda: result: {result}...')
67
 
68
  start = time.time()
69
- os.remove(random_file_name)
 
70
  duration = time.time() - start
71
  app_logger.info(f'Deleted file {random_file_name} in {duration}s.')
72
 
@@ -127,6 +144,8 @@ def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
127
  This loads one block at a time, and then concatenates the results.
128
  """
129
 
 
 
130
  y = []
131
  app_logger.debug(f"reading audio file at path:{path} ...")
132
  with audioread.audio_open(path) as input_file:
 
1
+
2
  import base64
3
  import json
4
  import os
5
+ from pathlib import Path
6
  import tempfile
7
  import time
8
 
 
27
  data = json.loads(event['body'])
28
 
29
  real_text = data['title']
30
+ base64Audio = data["base64Audio"]
31
+ app_logger.debug(f"base64Audio:{base64Audio} ...")
32
+ file_bytes_or_audiotmpfile = base64.b64decode(base64Audio[22:].encode('utf-8'))
33
  language = data['language']
34
 
35
  if len(real_text) == 0:
 
43
  },
44
  'body': ''
45
  }
46
+ output = get_speech_to_score(real_text=real_text, file_bytes_or_audiotmpfile=file_bytes_or_audiotmpfile, language=language)
47
+ app_logger.debug(f"output: {output} ...")
48
+ return output
49
+
50
 
51
+ def get_speech_to_score(real_text: str, file_bytes_or_audiotmpfile: str | dict, language: str = "en", remove_random_file: bool = True):
52
+ app_logger.info(f"real_text:{real_text} ...")
53
+ app_logger.debug(f"file_bytes:{file_bytes_or_audiotmpfile} ...")
54
+ app_logger.info(f"language:{language} ...")
55
  start0 = time.time()
56
+
57
+ random_file_name = file_bytes_or_audiotmpfile
58
+ app_logger.debug(f"random_file_name:{random_file_name} ...")
59
+ if isinstance(file_bytes_or_audiotmpfile, (bytes, bytearray)):
60
+ app_logger.debug("writing streaming data to file on disk...")
61
+ with tempfile.NamedTemporaryFile(prefix="temp_sound_speech_score_", suffix=".ogg", delete=False) as f1:
62
+ f1.write(file_bytes_or_audiotmpfile)
63
+ duration = time.time() - start0
64
+ app_logger.info(f'Saved binary data in file in {duration}s.')
65
+ random_file_name = f1.name
66
 
67
  start = time.time()
68
  app_logger.info(f'Loading .ogg file file {random_file_name} ...')
 
82
  app_logger.info(f'language_trainer_sst_lambda: result: {result}...')
83
 
84
  start = time.time()
85
+ if remove_random_file:
86
+ os.remove(random_file_name)
87
  duration = time.time() - start
88
  app_logger.info(f'Deleted file {random_file_name} in {duration}s.')
89
 
 
144
  This loads one block at a time, and then concatenates the results.
145
  """
146
 
147
+ import shutil
148
+ shutil.copyfile(path, Path("/tmp") / f"test_en_{Path(path).name}")
149
  y = []
150
  app_logger.debug(f"reading audio file at path:{path} ...")
151
  with audioread.audio_open(path) as input_file:
aip_trainer/lambdas/routes.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ import structlog
4
+ from fastapi import APIRouter
5
+
6
+
7
+ custom_structlog_logger = structlog.stdlib.get_logger(__name__)
8
+ router = APIRouter()
9
+
10
+
11
+ @router.get("/health")
12
+ def health():
13
+ import torch
14
+ import torchaudio
15
+ custom_structlog_logger.info(f"Still alive, torch version:{torch.__version__}, torchaudio:{torchaudio.__version__} ...")
16
+ return "Still alive!"
aip_trainer/utils/middlewares.py ADDED
File without changes
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import time
4
+
5
+ import gradio as gr
6
+ import structlog
7
+ import uvicorn
8
+ from aip_trainer.lambdas import lambdaSpeechToScore
9
+ from asgi_correlation_id import CorrelationIdMiddleware
10
+ from asgi_correlation_id.context import correlation_id
11
+ from dotenv import load_dotenv
12
+ from fastapi import FastAPI, Request, Response
13
+ from uvicorn.protocols.utils import get_path_with_query_string
14
+
15
+ from aip_trainer.utils.session_logger import setup_logging
16
+ from aip_trainer.lambdas.routes import router
17
+
18
+
19
+ load_dotenv()
20
+
21
+ LOG_JSON_FORMAT = bool(os.getenv("LOG_JSON_FORMAT", False))
22
+ LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
23
+ setup_logging(json_logs=LOG_JSON_FORMAT, log_level=LOG_LEVEL)
24
+ logger = structlog.stdlib.get_logger(__name__)
25
+ app = FastAPI(title="Example API", version="1.0.0")
26
+
27
+
28
+ @app.middleware("http")
29
+ async def logging_middleware(request: Request, call_next) -> Response:
30
+ structlog.contextvars.clear_contextvars()
31
+ # These context vars will be added to all log entries emitted during the request
32
+ request_id = correlation_id.get()
33
+ # print(f"request_id:{request_id}.")
34
+ structlog.contextvars.bind_contextvars(request_id=request_id)
35
+
36
+ start_time = time.perf_counter_ns()
37
+ # If the call_next raises an error, we still want to return our own 500 response,
38
+ # so we can add headers to it (process time, request ID...)
39
+ response = Response(status_code=500)
40
+ try:
41
+ response = await call_next(request)
42
+ except Exception:
43
+ # TODO: Validate that we don't swallow exceptions (unit test?)
44
+ structlog.stdlib.get_logger("api.error").exception("Uncaught exception")
45
+ raise
46
+ finally:
47
+ process_time = time.perf_counter_ns() - start_time
48
+ status_code = response.status_code
49
+ url = get_path_with_query_string(request.scope)
50
+ client_host = request.client.host
51
+ client_port = request.client.port
52
+ http_method = request.method
53
+ http_version = request.scope["http_version"]
54
+ # Recreate the Uvicorn access log format, but add all parameters as structured information
55
+ logger.info(
56
+ f"""{client_host}:{client_port} - "{http_method} {url} HTTP/{http_version}" {status_code}""",
57
+ http={
58
+ "url": str(request.url),
59
+ "status_code": status_code,
60
+ "method": http_method,
61
+ "request_id": request_id,
62
+ "version": http_version,
63
+ },
64
+ network={"client": {"ip": client_host, "port": client_port}},
65
+ duration=process_time,
66
+ )
67
+ response.headers["X-Process-Time"] = str(process_time / 10 ** 9)
68
+ return response
69
+
70
+
71
+ app.include_router(router)
72
+ logger.info("routes included, creating gradio app")
73
+ CUSTOM_GRADIO_PATH = "/"
74
+
75
+
76
+ def get_gradio_app():
77
+ with gr.Blocks() as gradio_app:
78
+ logger.info("start gradio app building...")
79
+ gr.Markdown(
80
+ """
81
+ # Hello World!
82
+
83
+ Start typing below to _see_ the *output*.
84
+
85
+ Here a [link](https://huggingface.co/spaces/aletrn/gradio_with_fastapi).
86
+ """
87
+ )
88
+ learner_transcription = gr.Textbox(
89
+ label="Learner Transcription",
90
+ placeholder="It is nice to wreck a nice beach",
91
+ )
92
+ language = gr.Textbox(
93
+ label="language",
94
+ placeholder="en",
95
+ )
96
+ learner_recording = gr.Audio(
97
+ label="Learner Recording",
98
+ sources=["microphone", "upload"],
99
+ type="filepath"
100
+ )
101
+ text_output = gr.Textbox(lines=1, placeholder=None, label="Text Output")
102
+ btn = gr.Button(value="get speech score")
103
+ """
104
+ event = {'body': json.dumps(request.get_json(force=True))}
105
+ lambda_correct_output = lambdaSpeechToScore.lambda_handler(event, [])
106
+ """
107
+ btn.click(
108
+ lambdaSpeechToScore.get_speech_to_score,
109
+ inputs=[learner_transcription, learner_recording, language],
110
+ outputs=[text_output]
111
+ )
112
+ return gradio_app
113
+
114
+
115
+ logger.info("mounting gradio app within FastAPI...")
116
+ gradio_app_md = get_gradio_app()
117
+ app.add_middleware(CorrelationIdMiddleware)
118
+ app = gr.mount_gradio_app(app, gradio_app_md, path=CUSTOM_GRADIO_PATH)
119
+ logger.info("gradio app mounted")
120
+
121
+
122
+ if __name__ == "__main__":
123
+ try:
124
+ uvicorn.run("app:app", host="127.0.0.1", port=7860, log_config=None, reload=True)
125
+ except Exception as ex:
126
+ logging.error(f"ex:{ex}.")
127
+ raise ex
tests/events/test_de.wav ADDED
Binary file (259 kB). View file
 
tests/events/test_en.wav ADDED
Binary file (196 kB). View file
 
tests/test_GetAccuracyFromRecordedAudio.py CHANGED
@@ -1,10 +1,19 @@
1
  import json
 
 
2
  import unittest
3
 
 
4
  from aip_trainer.lambdas import lambdaSpeechToScore
5
  from tests import EVENTS_FOLDER
6
 
7
 
 
 
 
 
 
 
8
  def check_output_by_field(output, key, match, expected_output):
9
  import re
10
 
@@ -17,10 +26,48 @@ def check_output_by_field(output, key, match, expected_output):
17
  return output
18
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
21
- def test_GetAccuracyFromRecordedAudio(self):
22
- self.maxDiff = None
 
 
 
 
 
 
 
23
 
 
24
  with open(EVENTS_FOLDER / "GetAccuracyFromRecordedAudio.json", "r") as src:
25
  inputs_outputs = json.load(src)
26
  inputs = inputs_outputs["inputs"]
@@ -29,23 +76,63 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
29
  expected_output = outputs[event_name]
30
  output = lambdaSpeechToScore.lambda_handler(event_content, [])
31
  output = json.loads(output)
32
- assert len(output["matched_transcripts"].strip()) > 0
33
- assert len(output["matched_transcripts_ipa"].strip()) > 0
34
- assert len(output["ipa_transcript"].strip()) > 0
35
- assert len(output["real_transcripts_ipa"].strip()) > 0
36
- output = check_output_by_field(output, "is_letter_correct_all_words", '[01]+', expected_output)
37
- output = check_output_by_field(output, "end_time", '\d+\.\d+', expected_output)
38
- output = check_output_by_field(output, "start_time", '\d+\.\d+', expected_output)
39
- output = check_output_by_field(output, "pronunciation_accuracy", '\d+', expected_output)
40
- output["matched_transcripts"] = expected_output["matched_transcripts"]
41
- output["matched_transcripts_ipa"] = expected_output["matched_transcripts_ipa"]
42
- output["pronunciation_accuracy"] = expected_output["pronunciation_accuracy"]
43
- output["pair_accuracy_category"] = expected_output["pair_accuracy_category"]
44
- output["ipa_transcript"] = expected_output["ipa_transcript"]
45
- output["real_transcript"] = expected_output["real_transcript"]
46
- output["real_transcripts_ipa"] = expected_output["real_transcripts_ipa"]
47
- self.assertEqual(expected_output, output)
48
-
49
-
50
- if __name__ == '__main__':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  unittest.main()
 
1
  import json
2
+ import os
3
+ import platform
4
  import unittest
5
 
6
+ from aip_trainer import app_logger
7
  from aip_trainer.lambdas import lambdaSpeechToScore
8
  from tests import EVENTS_FOLDER
9
 
10
 
11
+ text_dict = {
12
+ "de": "Ich bin Alex, wer bist du?",
13
+ "en": "Hi there, how are you?"
14
+ }
15
+
16
+
17
  def check_output_by_field(output, key, match, expected_output):
18
  import re
19
 
 
26
  return output
27
 
28
 
29
+ def check_output(self, output, expected_output):
30
+ self.maxDiff = None
31
+ try:
32
+ assert len(output["matched_transcripts"].strip()) > 0
33
+ assert len(output["matched_transcripts_ipa"].strip()) > 0
34
+ assert len(output["ipa_transcript"].strip()) > 0
35
+ assert len(output["real_transcripts_ipa"].strip()) > 0
36
+ output = check_output_by_field(
37
+ output, "is_letter_correct_all_words", "[01]+", expected_output
38
+ )
39
+ output = check_output_by_field(output, "end_time", "\d+\.\d+", expected_output)
40
+ output = check_output_by_field(
41
+ output, "start_time", "\d+\.\d+", expected_output
42
+ )
43
+ output = check_output_by_field(
44
+ output, "pronunciation_accuracy", "\d+", expected_output
45
+ )
46
+ output["matched_transcripts"] = expected_output["matched_transcripts"]
47
+ output["matched_transcripts_ipa"] = expected_output["matched_transcripts_ipa"]
48
+ output["pronunciation_accuracy"] = expected_output["pronunciation_accuracy"]
49
+ output["pair_accuracy_category"] = expected_output["pair_accuracy_category"]
50
+ output["ipa_transcript"] = expected_output["ipa_transcript"]
51
+ output["real_transcript"] = expected_output["real_transcript"]
52
+ output["real_transcripts_ipa"] = expected_output["real_transcripts_ipa"]
53
+ self.assertDictEqual(expected_output, output)
54
+ except Exception as e:
55
+ app_logger.error(f"e:{e}.")
56
+ raise e
57
+
58
+
59
  class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
60
+ def setUp(self):
61
+ if platform.system() == "Windows" or platform.system() == "Win32":
62
+ os.environ["PYTHONUTF8"] = "1"
63
+
64
+ def tearDown(self):
65
+ if (
66
+ platform.system() == "Windows" or platform.system() == "Win32"
67
+ ) and "PYTHONUTF8" in os.environ:
68
+ del os.environ["PYTHONUTF8"]
69
 
70
+ def test_GetAccuracyFromRecordedAudio(self):
71
  with open(EVENTS_FOLDER / "GetAccuracyFromRecordedAudio.json", "r") as src:
72
  inputs_outputs = json.load(src)
73
  inputs = inputs_outputs["inputs"]
 
76
  expected_output = outputs[event_name]
77
  output = lambdaSpeechToScore.lambda_handler(event_content, [])
78
  output = json.loads(output)
79
+ app_logger.info(
80
+ f"output type:{type(output)}, expected_output type:{type(expected_output)}."
81
+ )
82
+ check_output(self, output, expected_output)
83
+
84
+ def test_get_speech_to_score_en_ok(self):
85
+ from aip_trainer.lambdas import lambdaSpeechToScore
86
+
87
+ language = "en"
88
+ path = EVENTS_FOLDER / f"test_{language}.wav"
89
+ output = lambdaSpeechToScore.get_speech_to_score(
90
+ real_text=text_dict[language],
91
+ file_bytes_or_audiotmpfile=path,
92
+ language=language,
93
+ remove_random_file=False,
94
+ )
95
+ expected_output = {
96
+ "real_transcript": text_dict[language],
97
+ "ipa_transcript": "ha\u026a ha\u028a \u0259r ju",
98
+ "pronunciation_accuracy": "69",
99
+ "real_transcripts": text_dict[language],
100
+ "matched_transcripts": "hi - how are you",
101
+ "real_transcripts_ipa": "ha\u026a \u00f0\u025br, ha\u028a \u0259r ju?",
102
+ "matched_transcripts_ipa": "ha\u026a ha\u028a \u0259r ju",
103
+ "pair_accuracy_category": "0 2 0 0 0",
104
+ "start_time": "0.2245625 1.3228125 0.852125 1.04825 1.3228125",
105
+ "end_time": "0.559875 1.658125 1.14825 1.344375 1.658125",
106
+ "is_letter_correct_all_words": "11 000001 111 111 1111 ",
107
+ }
108
+ check_output(self, json.loads(output), expected_output)
109
+
110
+ def test_get_speech_to_score_de_ok(self):
111
+ from aip_trainer.lambdas import lambdaSpeechToScore
112
+
113
+ language = "de"
114
+ path = EVENTS_FOLDER / f"test_{language}.wav"
115
+ output = lambdaSpeechToScore.get_speech_to_score(
116
+ real_text=text_dict[language],
117
+ file_bytes_or_audiotmpfile=path,
118
+ language=language,
119
+ remove_random_file=False,
120
+ )
121
+ expected_output = {
122
+ "real_transcript": text_dict[language],
123
+ "ipa_transcript": "\u026a\u00e7 bi\u02d0n a\u02d0l\u025bksv\u025b\u02d0 b\u025bst\u025b\u02d0 du\u02d0",
124
+ "pronunciation_accuracy": "63",
125
+ "real_transcripts": text_dict[language],
126
+ "matched_transcripts": "ich bin alexwe - beste du",
127
+ "real_transcripts_ipa": "\u026a\u00e7 bi\u02d0n a\u02d0l\u025bks, v\u0250 b\u026ast du\u02d0?",
128
+ "matched_transcripts_ipa": "\u026a\u00e7 bi\u02d0n a\u02d0l\u025bksv\u0259 - b\u0259st\u0259 du\u02d0",
129
+ "pair_accuracy_category": "0 0 2 2 2 0",
130
+ "start_time": "0.0 0.3075 0.62525 2.1346875 1.5785625 2.1346875",
131
+ "end_time": "0.328 0.6458125 1.44025 2.4730625 2.15525 2.4730625",
132
+ "is_letter_correct_all_words": "111 111 11111 000 1011 111 ",
133
+ }
134
+ check_output(self, json.loads(output), expected_output)
135
+
136
+
137
+ if __name__ == "__main__":
138
  unittest.main()
tests/test_data_de_en_2.pickle CHANGED
File without changes