Spaces:

aletrn
/

ai-pronunciation-trainer

Running

App Files Files Community

alessandro trinca tornidor commited on Nov 15, 2024

Commit

0746ae5

1 Parent(s): 51b11b3

feat: refactor get_resulting_string to reduce its complexity, improve some return typing

Browse files

Files changed (3) hide show

aip_trainer/WordMatching.py +23 -12
aip_trainer/models/AIModels.py +2 -4
aip_trainer/pronunciationTrainer.py +2 -2

aip_trainer/WordMatching.py CHANGED Viewed

@@ -90,6 +90,7 @@ def get_resulting_string(mapped_indices: np.array, words_estimated: list, words_
     WORD_NOT_FOUND_TOKEN = '-'
     number_of_real_words = len(words_real)
     for word_idx in range(number_of_real_words):
         position_of_real_word_indices = np.where(
             mapped_indices == word_idx)[0].astype(int)
@@ -108,25 +109,35 @@ def get_resulting_string(mapped_indices: np.array, words_estimated: list, words_
             error = 99999
             best_possible_combination = ''
             best_possible_idx = -1
-            for single_word_idx in position_of_real_word_indices:
-                idx_above_word = single_word_idx >= len(words_estimated)
-                if idx_above_word:
-                    continue
-                error_word = WordMetrics.edit_distance_python(
-                    words_estimated[single_word_idx], words_real[word_idx])
-                if error_word < error:
-                    error = error_word*1
-                    best_possible_combination = words_estimated[single_word_idx]
-                    best_possible_idx = single_word_idx
             mapped_words.append(best_possible_combination)
             mapped_words_indices.append(best_possible_idx)
-            continue
     return mapped_words, mapped_words_indices
-def get_best_mapped_words(words_estimated: list, words_real: list) -> list:
     word_distance_matrix = get_word_distance_matrix(
         words_estimated, words_real)

     WORD_NOT_FOUND_TOKEN = '-'
     number_of_real_words = len(words_real)
     for word_idx in range(number_of_real_words):
+        app_logger.debug(f"{word_idx} => {mapped_indices} == {word_idx}, {mapped_indices == word_idx} #")
         position_of_real_word_indices = np.where(
             mapped_indices == word_idx)[0].astype(int)
             error = 99999
             best_possible_combination = ''
             best_possible_idx = -1
+            best_possible_combination, best_possible_idx = inner_get_resulting_string(
+                best_possible_combination, best_possible_idx, error, position_of_real_word_indices,
+                word_idx, words_estimated, words_real
+            )
             mapped_words.append(best_possible_combination)
             mapped_words_indices.append(best_possible_idx)
+            # continue
     return mapped_words, mapped_words_indices
+def inner_get_resulting_string(
+        best_possible_combination, best_possible_idx, error, position_of_real_word_indices, word_idx, words_estimated, words_real
+    ):
+    for single_word_idx in position_of_real_word_indices:
+        idx_above_word = single_word_idx >= len(words_estimated)
+        if idx_above_word:
+            continue
+        error_word = WordMetrics.edit_distance_python(
+            words_estimated[single_word_idx], words_real[word_idx])
+        if error_word < error:
+            error = error_word * 1
+            best_possible_combination = words_estimated[single_word_idx]
+            best_possible_idx = single_word_idx
+    return best_possible_combination, best_possible_idx
+def get_best_mapped_words(words_estimated: list, words_real: list) -> tuple[list, list]:
     word_distance_matrix = get_word_distance_matrix(
         words_estimated, words_real)

aip_trainer/models/AIModels.py CHANGED Viewed

@@ -15,14 +15,12 @@ class NeuralASR(ModelInterfaces.IASRModel):
     def getTranscript(self) -> str:
         """Get the transcripts of the process audio"""
-        assert(self.audio_transcript is not None,
-               'Can get audio transcripts without having processed the audio')
         return self.audio_transcript
     def getWordLocations(self) -> list:
         """Get the pair of words location from audio"""
-        assert(self.word_locations_in_samples is not None,
-               'Can get word locations without having processed the audio')
         return self.word_locations_in_samples

     def getTranscript(self) -> str:
         """Get the transcripts of the process audio"""
+        assert self.audio_transcript is not None, 'Can get audio transcripts without having processed the audio'
         return self.audio_transcript
     def getWordLocations(self) -> list:
         """Get the pair of words location from audio"""
+        assert self.word_locations_in_samples is not None, 'Can get word locations without having processed the audio'
         return self.word_locations_in_samples

aip_trainer/pronunciationTrainer.py CHANGED Viewed

@@ -128,7 +128,7 @@ class PronunciationTrainer:
         app_logger.info('ok, return audio transcript!')
         return current_recorded_transcript, current_recorded_ipa, current_recorded_word_locations
-    def getWordLocationsFromRecordInSeconds(self, word_locations, mapped_words_indices) -> list:
         start_time = []
         end_time = []
         for word_idx in range(len(mapped_words_indices)):
@@ -163,7 +163,7 @@ class PronunciationTrainer:
                                                    self.ipa_converter.convertToPhonem(mapped_words[word_idx])))
         return real_and_transcribed_words, real_and_transcribed_words_ipa, mapped_words_indices
-    def getPronunciationAccuracy(self, real_and_transcribed_words_ipa) -> float:
         total_mismatches = 0.
         number_of_phonemes = 0.
         current_words_pronunciation_accuracy = []

         app_logger.info('ok, return audio transcript!')
         return current_recorded_transcript, current_recorded_ipa, current_recorded_word_locations
+    def getWordLocationsFromRecordInSeconds(self, word_locations, mapped_words_indices) -> tuple[str, str]:
         start_time = []
         end_time = []
         for word_idx in range(len(mapped_words_indices)):
                                                    self.ipa_converter.convertToPhonem(mapped_words[word_idx])))
         return real_and_transcribed_words, real_and_transcribed_words_ipa, mapped_words_indices
+    def getPronunciationAccuracy(self, real_and_transcribed_words_ipa) -> tuple[float, list]:
         total_mismatches = 0.
         number_of_phonemes = 0.
         current_words_pronunciation_accuracy = []