Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
·
0746ae5
1
Parent(s):
51b11b3
feat: refactor get_resulting_string to reduce its complexity, improve some return typing
Browse files
aip_trainer/WordMatching.py
CHANGED
@@ -90,6 +90,7 @@ def get_resulting_string(mapped_indices: np.array, words_estimated: list, words_
|
|
90 |
WORD_NOT_FOUND_TOKEN = '-'
|
91 |
number_of_real_words = len(words_real)
|
92 |
for word_idx in range(number_of_real_words):
|
|
|
93 |
position_of_real_word_indices = np.where(
|
94 |
mapped_indices == word_idx)[0].astype(int)
|
95 |
|
@@ -108,25 +109,35 @@ def get_resulting_string(mapped_indices: np.array, words_estimated: list, words_
|
|
108 |
error = 99999
|
109 |
best_possible_combination = ''
|
110 |
best_possible_idx = -1
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
error_word = WordMetrics.edit_distance_python(
|
116 |
-
words_estimated[single_word_idx], words_real[word_idx])
|
117 |
-
if error_word < error:
|
118 |
-
error = error_word*1
|
119 |
-
best_possible_combination = words_estimated[single_word_idx]
|
120 |
-
best_possible_idx = single_word_idx
|
121 |
|
122 |
mapped_words.append(best_possible_combination)
|
123 |
mapped_words_indices.append(best_possible_idx)
|
124 |
-
continue
|
125 |
|
126 |
return mapped_words, mapped_words_indices
|
127 |
|
128 |
|
129 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
word_distance_matrix = get_word_distance_matrix(
|
132 |
words_estimated, words_real)
|
|
|
90 |
WORD_NOT_FOUND_TOKEN = '-'
|
91 |
number_of_real_words = len(words_real)
|
92 |
for word_idx in range(number_of_real_words):
|
93 |
+
app_logger.debug(f"{word_idx} => {mapped_indices} == {word_idx}, {mapped_indices == word_idx} #")
|
94 |
position_of_real_word_indices = np.where(
|
95 |
mapped_indices == word_idx)[0].astype(int)
|
96 |
|
|
|
109 |
error = 99999
|
110 |
best_possible_combination = ''
|
111 |
best_possible_idx = -1
|
112 |
+
best_possible_combination, best_possible_idx = inner_get_resulting_string(
|
113 |
+
best_possible_combination, best_possible_idx, error, position_of_real_word_indices,
|
114 |
+
word_idx, words_estimated, words_real
|
115 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
mapped_words.append(best_possible_combination)
|
118 |
mapped_words_indices.append(best_possible_idx)
|
119 |
+
# continue
|
120 |
|
121 |
return mapped_words, mapped_words_indices
|
122 |
|
123 |
|
124 |
+
def inner_get_resulting_string(
|
125 |
+
best_possible_combination, best_possible_idx, error, position_of_real_word_indices, word_idx, words_estimated, words_real
|
126 |
+
):
|
127 |
+
for single_word_idx in position_of_real_word_indices:
|
128 |
+
idx_above_word = single_word_idx >= len(words_estimated)
|
129 |
+
if idx_above_word:
|
130 |
+
continue
|
131 |
+
error_word = WordMetrics.edit_distance_python(
|
132 |
+
words_estimated[single_word_idx], words_real[word_idx])
|
133 |
+
if error_word < error:
|
134 |
+
error = error_word * 1
|
135 |
+
best_possible_combination = words_estimated[single_word_idx]
|
136 |
+
best_possible_idx = single_word_idx
|
137 |
+
return best_possible_combination, best_possible_idx
|
138 |
+
|
139 |
+
|
140 |
+
def get_best_mapped_words(words_estimated: list, words_real: list) -> tuple[list, list]:
|
141 |
|
142 |
word_distance_matrix = get_word_distance_matrix(
|
143 |
words_estimated, words_real)
|
aip_trainer/models/AIModels.py
CHANGED
@@ -15,14 +15,12 @@ class NeuralASR(ModelInterfaces.IASRModel):
|
|
15 |
|
16 |
def getTranscript(self) -> str:
|
17 |
"""Get the transcripts of the process audio"""
|
18 |
-
assert
|
19 |
-
'Can get audio transcripts without having processed the audio')
|
20 |
return self.audio_transcript
|
21 |
|
22 |
def getWordLocations(self) -> list:
|
23 |
"""Get the pair of words location from audio"""
|
24 |
-
assert
|
25 |
-
'Can get word locations without having processed the audio')
|
26 |
|
27 |
return self.word_locations_in_samples
|
28 |
|
|
|
15 |
|
16 |
def getTranscript(self) -> str:
|
17 |
"""Get the transcripts of the process audio"""
|
18 |
+
assert self.audio_transcript is not None, 'Can get audio transcripts without having processed the audio'
|
|
|
19 |
return self.audio_transcript
|
20 |
|
21 |
def getWordLocations(self) -> list:
|
22 |
"""Get the pair of words location from audio"""
|
23 |
+
assert self.word_locations_in_samples is not None, 'Can get word locations without having processed the audio'
|
|
|
24 |
|
25 |
return self.word_locations_in_samples
|
26 |
|
aip_trainer/pronunciationTrainer.py
CHANGED
@@ -128,7 +128,7 @@ class PronunciationTrainer:
|
|
128 |
app_logger.info('ok, return audio transcript!')
|
129 |
return current_recorded_transcript, current_recorded_ipa, current_recorded_word_locations
|
130 |
|
131 |
-
def getWordLocationsFromRecordInSeconds(self, word_locations, mapped_words_indices) ->
|
132 |
start_time = []
|
133 |
end_time = []
|
134 |
for word_idx in range(len(mapped_words_indices)):
|
@@ -163,7 +163,7 @@ class PronunciationTrainer:
|
|
163 |
self.ipa_converter.convertToPhonem(mapped_words[word_idx])))
|
164 |
return real_and_transcribed_words, real_and_transcribed_words_ipa, mapped_words_indices
|
165 |
|
166 |
-
def getPronunciationAccuracy(self, real_and_transcribed_words_ipa) -> float:
|
167 |
total_mismatches = 0.
|
168 |
number_of_phonemes = 0.
|
169 |
current_words_pronunciation_accuracy = []
|
|
|
128 |
app_logger.info('ok, return audio transcript!')
|
129 |
return current_recorded_transcript, current_recorded_ipa, current_recorded_word_locations
|
130 |
|
131 |
+
def getWordLocationsFromRecordInSeconds(self, word_locations, mapped_words_indices) -> tuple[str, str]:
|
132 |
start_time = []
|
133 |
end_time = []
|
134 |
for word_idx in range(len(mapped_words_indices)):
|
|
|
163 |
self.ipa_converter.convertToPhonem(mapped_words[word_idx])))
|
164 |
return real_and_transcribed_words, real_and_transcribed_words_ipa, mapped_words_indices
|
165 |
|
166 |
+
def getPronunciationAccuracy(self, real_and_transcribed_words_ipa) -> tuple[float, list]:
|
167 |
total_mismatches = 0.
|
168 |
number_of_phonemes = 0.
|
169 |
current_words_pronunciation_accuracy = []
|