Spaces:

aletrn
/

ai-pronunciation-trainer

Running

App Files Files Community

alessandro trinca tornidor commited on Nov 18, 2024

Commit

d804881

1 Parent(s): 1e30c4b

feat: first working gradio fronted with refactored lambda_handler and tests

Browse files

Files changed (8) hide show

aip_trainer/lambdas/lambdaSpeechToScore.py +27 -8
aip_trainer/lambdas/routes.py +16 -0
aip_trainer/utils/middlewares.py +0 -0
app.py +127 -0
tests/events/test_de.wav +0 -0
tests/events/test_en.wav +0 -0
tests/test_GetAccuracyFromRecordedAudio.py +108 -21
tests/test_data_de_en_2.pickle +0 -0

aip_trainer/lambdas/lambdaSpeechToScore.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import base64
 import json
 import os
 import tempfile
 import time
@@ -25,8 +27,9 @@ def lambda_handler(event, context):
     data = json.loads(event['body'])
     real_text = data['title']
-    file_bytes = base64.b64decode(
-        data['base64Audio'][22:].encode('utf-8'))
     language = data['language']
     if len(real_text) == 0:
@@ -40,13 +43,26 @@ def lambda_handler(event, context):
             },
             'body': ''
         }
     start0 = time.time()
-    with tempfile.NamedTemporaryFile(prefix="temp_sound_speech_score_", suffix=".ogg", delete=False) as f1:
-        f1.write(file_bytes)
-        duration = time.time() - start0
-        app_logger.info(f'Saved binary in file in {duration}s.')
-        random_file_name = f1.name
     start = time.time()
     app_logger.info(f'Loading .ogg file file {random_file_name} ...')
@@ -66,7 +82,8 @@ def lambda_handler(event, context):
     app_logger.info(f'language_trainer_sst_lambda: result: {result}...')
     start = time.time()
-    os.remove(random_file_name)
     duration = time.time() - start
     app_logger.info(f'Deleted file {random_file_name} in {duration}s.')
@@ -127,6 +144,8 @@ def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
     This loads one block at a time, and then concatenates the results.
     """
     y = []
     app_logger.debug(f"reading audio file at path:{path} ...")
     with audioread.audio_open(path) as input_file:

 import base64
 import json
 import os
+from pathlib import Path
 import tempfile
 import time
     data = json.loads(event['body'])
     real_text = data['title']
+    base64Audio = data["base64Audio"]
+    app_logger.debug(f"base64Audio:{base64Audio} ...")
+    file_bytes_or_audiotmpfile = base64.b64decode(base64Audio[22:].encode('utf-8'))
     language = data['language']
     if len(real_text) == 0:
             },
             'body': ''
         }
+    output = get_speech_to_score(real_text=real_text, file_bytes_or_audiotmpfile=file_bytes_or_audiotmpfile, language=language)
+    app_logger.debug(f"output: {output} ...")
+    return output
+def get_speech_to_score(real_text: str, file_bytes_or_audiotmpfile: str | dict, language: str = "en", remove_random_file: bool = True):
+    app_logger.info(f"real_text:{real_text} ...")
+    app_logger.debug(f"file_bytes:{file_bytes_or_audiotmpfile} ...")
+    app_logger.info(f"language:{language} ...")
     start0 = time.time()
+    random_file_name = file_bytes_or_audiotmpfile
+    app_logger.debug(f"random_file_name:{random_file_name} ...")
+    if isinstance(file_bytes_or_audiotmpfile, (bytes, bytearray)):
+        app_logger.debug("writing streaming data to file on disk...")
+        with tempfile.NamedTemporaryFile(prefix="temp_sound_speech_score_", suffix=".ogg", delete=False) as f1:
+            f1.write(file_bytes_or_audiotmpfile)
+            duration = time.time() - start0
+            app_logger.info(f'Saved binary data in file in {duration}s.')
+            random_file_name = f1.name
     start = time.time()
     app_logger.info(f'Loading .ogg file file {random_file_name} ...')
     app_logger.info(f'language_trainer_sst_lambda: result: {result}...')
     start = time.time()
+    if remove_random_file:
+        os.remove(random_file_name)
     duration = time.time() - start
     app_logger.info(f'Deleted file {random_file_name} in {duration}s.')
     This loads one block at a time, and then concatenates the results.
     """
+    import shutil
+    shutil.copyfile(path, Path("/tmp") / f"test_en_{Path(path).name}")
     y = []
     app_logger.debug(f"reading audio file at path:{path} ...")
     with audioread.audio_open(path) as input_file:

aip_trainer/lambdas/routes.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import random
+import structlog
+from fastapi import APIRouter
+custom_structlog_logger = structlog.stdlib.get_logger(__name__)
+router = APIRouter()
+@router.get("/health")
+def health():
+    import torch
+    import torchaudio
+    custom_structlog_logger.info(f"Still alive, torch version:{torch.__version__}, torchaudio:{torchaudio.__version__} ...")
+    return "Still alive!"

aip_trainer/utils/middlewares.py ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import logging
+import os
+import time
+import gradio as gr
+import structlog
+import uvicorn
+from aip_trainer.lambdas import lambdaSpeechToScore
+from asgi_correlation_id import CorrelationIdMiddleware
+from asgi_correlation_id.context import correlation_id
+from dotenv import load_dotenv
+from fastapi import FastAPI, Request, Response
+from uvicorn.protocols.utils import get_path_with_query_string
+from aip_trainer.utils.session_logger import setup_logging
+from aip_trainer.lambdas.routes import router
+load_dotenv()
+LOG_JSON_FORMAT = bool(os.getenv("LOG_JSON_FORMAT", False))
+LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
+setup_logging(json_logs=LOG_JSON_FORMAT, log_level=LOG_LEVEL)
+logger = structlog.stdlib.get_logger(__name__)
+app = FastAPI(title="Example API", version="1.0.0")
+@app.middleware("http")
+async def logging_middleware(request: Request, call_next) -> Response:
+    structlog.contextvars.clear_contextvars()
+    # These context vars will be added to all log entries emitted during the request
+    request_id = correlation_id.get()
+    # print(f"request_id:{request_id}.")
+    structlog.contextvars.bind_contextvars(request_id=request_id)
+    start_time = time.perf_counter_ns()
+    # If the call_next raises an error, we still want to return our own 500 response,
+    # so we can add headers to it (process time, request ID...)
+    response = Response(status_code=500)
+    try:
+        response = await call_next(request)
+    except Exception:
+        # TODO: Validate that we don't swallow exceptions (unit test?)
+        structlog.stdlib.get_logger("api.error").exception("Uncaught exception")
+        raise
+    finally:
+        process_time = time.perf_counter_ns() - start_time
+        status_code = response.status_code
+        url = get_path_with_query_string(request.scope)
+        client_host = request.client.host
+        client_port = request.client.port
+        http_method = request.method
+        http_version = request.scope["http_version"]
+        # Recreate the Uvicorn access log format, but add all parameters as structured information
+        logger.info(
+            f"""{client_host}:{client_port} - "{http_method} {url} HTTP/{http_version}" {status_code}""",
+            http={
+                "url": str(request.url),
+                "status_code": status_code,
+                "method": http_method,
+                "request_id": request_id,
+                "version": http_version,
+            },
+            network={"client": {"ip": client_host, "port": client_port}},
+            duration=process_time,
+        )
+        response.headers["X-Process-Time"] = str(process_time / 10 ** 9)
+        return response
+app.include_router(router)
+logger.info("routes included, creating gradio app")
+CUSTOM_GRADIO_PATH = "/"
+def get_gradio_app():
+    with gr.Blocks() as gradio_app:
+        logger.info("start gradio app building...")
+        gr.Markdown(
+            """
+            # Hello World!
+            Start typing below to _see_ the *output*.
+            Here a [link](https://huggingface.co/spaces/aletrn/gradio_with_fastapi).
+            """
+        )
+        learner_transcription = gr.Textbox(
+            label="Learner Transcription",
+            placeholder="It is nice to wreck a nice beach",
+        )
+        language = gr.Textbox(
+            label="language",
+            placeholder="en",
+        )
+        learner_recording = gr.Audio(
+            label="Learner Recording",
+            sources=["microphone", "upload"],
+            type="filepath"
+        )
+        text_output = gr.Textbox(lines=1, placeholder=None, label="Text Output")
+        btn = gr.Button(value="get speech score")
+        """
+        event = {'body': json.dumps(request.get_json(force=True))}
+        lambda_correct_output = lambdaSpeechToScore.lambda_handler(event, [])
+        """
+        btn.click(
+            lambdaSpeechToScore.get_speech_to_score,
+            inputs=[learner_transcription, learner_recording, language],
+            outputs=[text_output]
+        )
+    return gradio_app
+logger.info("mounting gradio app within FastAPI...")
+gradio_app_md = get_gradio_app()
+app.add_middleware(CorrelationIdMiddleware)
+app = gr.mount_gradio_app(app, gradio_app_md, path=CUSTOM_GRADIO_PATH)
+logger.info("gradio app mounted")
+if __name__ == "__main__":
+    try:
+        uvicorn.run("app:app", host="127.0.0.1", port=7860, log_config=None, reload=True)
+    except Exception as ex:
+        logging.error(f"ex:{ex}.")
+        raise ex

tests/events/test_de.wav ADDED Viewed

Binary file (259 kB). View file

tests/events/test_en.wav ADDED Viewed

Binary file (196 kB). View file

tests/test_GetAccuracyFromRecordedAudio.py CHANGED Viewed

@@ -1,10 +1,19 @@
 import json
 import unittest
 from aip_trainer.lambdas import lambdaSpeechToScore
 from tests import EVENTS_FOLDER
 def check_output_by_field(output, key, match, expected_output):
     import re
@@ -17,10 +26,48 @@ def check_output_by_field(output, key, match, expected_output):
     return output
 class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
-    def test_GetAccuracyFromRecordedAudio(self):
-        self.maxDiff = None
         with open(EVENTS_FOLDER / "GetAccuracyFromRecordedAudio.json", "r") as src:
             inputs_outputs = json.load(src)
         inputs = inputs_outputs["inputs"]
@@ -29,23 +76,63 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
             expected_output = outputs[event_name]
             output = lambdaSpeechToScore.lambda_handler(event_content, [])
             output = json.loads(output)
-            assert len(output["matched_transcripts"].strip()) > 0
-            assert len(output["matched_transcripts_ipa"].strip()) > 0
-            assert len(output["ipa_transcript"].strip()) > 0
-            assert len(output["real_transcripts_ipa"].strip()) > 0
-            output = check_output_by_field(output, "is_letter_correct_all_words", '[01]+', expected_output)
-            output = check_output_by_field(output, "end_time", '\d+\.\d+', expected_output)
-            output = check_output_by_field(output, "start_time", '\d+\.\d+', expected_output)
-            output = check_output_by_field(output, "pronunciation_accuracy", '\d+', expected_output)
-            output["matched_transcripts"] = expected_output["matched_transcripts"]
-            output["matched_transcripts_ipa"] = expected_output["matched_transcripts_ipa"]
-            output["pronunciation_accuracy"] = expected_output["pronunciation_accuracy"]
-            output["pair_accuracy_category"] = expected_output["pair_accuracy_category"]
-            output["ipa_transcript"] = expected_output["ipa_transcript"]
-            output["real_transcript"] = expected_output["real_transcript"]
-            output["real_transcripts_ipa"] = expected_output["real_transcripts_ipa"]
-            self.assertEqual(expected_output, output)
-if __name__ == '__main__':
     unittest.main()

 import json
+import os
+import platform
 import unittest
+from aip_trainer import app_logger
 from aip_trainer.lambdas import lambdaSpeechToScore
 from tests import EVENTS_FOLDER
+text_dict = {
+    "de": "Ich bin Alex, wer bist du?",
+    "en": "Hi there, how are you?"
+}
 def check_output_by_field(output, key, match, expected_output):
     import re
     return output
+def check_output(self, output, expected_output):
+    self.maxDiff = None
+    try:
+        assert len(output["matched_transcripts"].strip()) > 0
+        assert len(output["matched_transcripts_ipa"].strip()) > 0
+        assert len(output["ipa_transcript"].strip()) > 0
+        assert len(output["real_transcripts_ipa"].strip()) > 0
+        output = check_output_by_field(
+            output, "is_letter_correct_all_words", "[01]+", expected_output
+        )
+        output = check_output_by_field(output, "end_time", "\d+\.\d+", expected_output)
+        output = check_output_by_field(
+            output, "start_time", "\d+\.\d+", expected_output
+        )
+        output = check_output_by_field(
+            output, "pronunciation_accuracy", "\d+", expected_output
+        )
+        output["matched_transcripts"] = expected_output["matched_transcripts"]
+        output["matched_transcripts_ipa"] = expected_output["matched_transcripts_ipa"]
+        output["pronunciation_accuracy"] = expected_output["pronunciation_accuracy"]
+        output["pair_accuracy_category"] = expected_output["pair_accuracy_category"]
+        output["ipa_transcript"] = expected_output["ipa_transcript"]
+        output["real_transcript"] = expected_output["real_transcript"]
+        output["real_transcripts_ipa"] = expected_output["real_transcripts_ipa"]
+        self.assertDictEqual(expected_output, output)
+    except Exception as e:
+        app_logger.error(f"e:{e}.")
+        raise e
 class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
+    def setUp(self):
+        if platform.system() == "Windows" or platform.system() == "Win32":
+            os.environ["PYTHONUTF8"] = "1"
+    def tearDown(self):
+        if (
+            platform.system() == "Windows" or platform.system() == "Win32"
+        ) and "PYTHONUTF8" in os.environ:
+            del os.environ["PYTHONUTF8"]
+    def test_GetAccuracyFromRecordedAudio(self):
         with open(EVENTS_FOLDER / "GetAccuracyFromRecordedAudio.json", "r") as src:
             inputs_outputs = json.load(src)
         inputs = inputs_outputs["inputs"]
             expected_output = outputs[event_name]
             output = lambdaSpeechToScore.lambda_handler(event_content, [])
             output = json.loads(output)
+            app_logger.info(
+                f"output type:{type(output)}, expected_output type:{type(expected_output)}."
+            )
+            check_output(self, output, expected_output)
+    def test_get_speech_to_score_en_ok(self):
+        from aip_trainer.lambdas import lambdaSpeechToScore
+        language = "en"
+        path = EVENTS_FOLDER / f"test_{language}.wav"
+        output = lambdaSpeechToScore.get_speech_to_score(
+            real_text=text_dict[language],
+            file_bytes_or_audiotmpfile=path,
+            language=language,
+            remove_random_file=False,
+        )
+        expected_output = {
+            "real_transcript": text_dict[language],
+            "ipa_transcript": "ha\u026a ha\u028a \u0259r ju",
+            "pronunciation_accuracy": "69",
+            "real_transcripts": text_dict[language],
+            "matched_transcripts": "hi - how are you",
+            "real_transcripts_ipa": "ha\u026a \u00f0\u025br, ha\u028a \u0259r ju?",
+            "matched_transcripts_ipa": "ha\u026a  ha\u028a \u0259r ju",
+            "pair_accuracy_category": "0 2 0 0 0",
+            "start_time": "0.2245625 1.3228125 0.852125 1.04825 1.3228125",
+            "end_time": "0.559875 1.658125 1.14825 1.344375 1.658125",
+            "is_letter_correct_all_words": "11 000001 111 111 1111 ",
+        }
+        check_output(self, json.loads(output), expected_output)
+    def test_get_speech_to_score_de_ok(self):
+        from aip_trainer.lambdas import lambdaSpeechToScore
+        language = "de"
+        path = EVENTS_FOLDER / f"test_{language}.wav"
+        output = lambdaSpeechToScore.get_speech_to_score(
+            real_text=text_dict[language],
+            file_bytes_or_audiotmpfile=path,
+            language=language,
+            remove_random_file=False,
+        )
+        expected_output = {
+            "real_transcript": text_dict[language],
+            "ipa_transcript": "\u026a\u00e7 bi\u02d0n a\u02d0l\u025bksv\u025b\u02d0 b\u025bst\u025b\u02d0 du\u02d0",
+            "pronunciation_accuracy": "63",
+            "real_transcripts": text_dict[language],
+            "matched_transcripts": "ich bin alexwe - beste du",
+            "real_transcripts_ipa": "\u026a\u00e7 bi\u02d0n a\u02d0l\u025bks, v\u0250 b\u026ast du\u02d0?",
+            "matched_transcripts_ipa": "\u026a\u00e7 bi\u02d0n a\u02d0l\u025bksv\u0259 - b\u0259st\u0259 du\u02d0",
+            "pair_accuracy_category": "0 0 2 2 2 0",
+            "start_time": "0.0 0.3075 0.62525 2.1346875 1.5785625 2.1346875",
+            "end_time": "0.328 0.6458125 1.44025 2.4730625 2.15525 2.4730625",
+            "is_letter_correct_all_words": "111 111 11111 000 1011 111 ",
+        }
+        check_output(self, json.loads(output), expected_output)
+if __name__ == "__main__":
     unittest.main()

tests/test_data_de_en_2.pickle CHANGED Viewed

File without changes