Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
·
023235e
1
Parent(s):
3fdcb38
feat: substitute sample table selection with direct input, update TestDataset tests
Browse files- aip_trainer/lambdas/lambdaGetSample.py +7 -9
- static/css/{style-new.css → style.css} +23 -23
- static/javascript/callbacks.js +25 -69
- static/main.html +1 -2
- tests/test_dataset.py +1 -1
aip_trainer/lambdas/lambdaGetSample.py
CHANGED
@@ -60,20 +60,18 @@ def lambda_handler(event, context):
|
|
60 |
category = int(body['category'])
|
61 |
except KeyError:
|
62 |
category = 0
|
63 |
-
|
64 |
language = body['language']
|
65 |
try:
|
66 |
-
|
67 |
except KeyError:
|
68 |
-
|
69 |
-
|
70 |
-
app_logger.info(f"category={category}, language={language},
|
71 |
-
lambda_df_lang = lambda_database[language]
|
72 |
-
current_transcript = lambda_df_lang[sample_idx] if sample_idx is not None else lambda_df_lang.get_random_sample_from_df(language, category)
|
73 |
# sentence_category = getSentenceCategory(current_transcript[0])
|
74 |
-
|
|
|
75 |
|
76 |
-
app_logger.info(f"real_transcript={current_transcript}, ipa_transcript={current_ipa}.")
|
77 |
result = {
|
78 |
'real_transcript': current_transcript,
|
79 |
'ipa_transcript': current_ipa,
|
|
|
60 |
category = int(body['category'])
|
61 |
except KeyError:
|
62 |
category = 0
|
|
|
63 |
language = body['language']
|
64 |
try:
|
65 |
+
current_transcript = str(body["transcript"])
|
66 |
except KeyError:
|
67 |
+
lambda_df_lang = lambda_database[language]
|
68 |
+
current_transcript = lambda_df_lang.get_random_sample_from_df(language, category)
|
69 |
+
app_logger.info(f"category={category}, language={language}, current_transcript={current_transcript}.")
|
|
|
|
|
70 |
# sentence_category = getSentenceCategory(current_transcript[0])
|
71 |
+
current_transcript = current_transcript if isinstance(current_transcript, str) else current_transcript[0]
|
72 |
+
current_ipa = lambda_ipa_converter[language].convertToPhonem(current_transcript)
|
73 |
|
74 |
+
app_logger.info(f"real_transcript='{current_transcript}', ipa_transcript='{current_ipa}'.")
|
75 |
result = {
|
76 |
'real_transcript': current_transcript,
|
77 |
'ipa_transcript': current_ipa,
|
static/css/{style-new.css → style.css}
RENAMED
@@ -144,7 +144,7 @@ a.disabled {
|
|
144 |
left: 2%;
|
145 |
top: 63%;
|
146 |
transform: translate(-0%, -0%);
|
147 |
-
height:
|
148 |
width: 96%;
|
149 |
max-width: 96%;
|
150 |
background: #ffff;
|
@@ -152,7 +152,7 @@ a.disabled {
|
|
152 |
border-radius: 20px;
|
153 |
box-shadow: 0 0 20px 8px #d0d0d0;
|
154 |
overflow: scroll;
|
155 |
-
max-height:
|
156 |
}
|
157 |
|
158 |
.container-small {
|
@@ -240,8 +240,8 @@ a.disabled {
|
|
240 |
|
241 |
.mic-button-div {
|
242 |
position: fixed;
|
243 |
-
left:
|
244 |
-
top:
|
245 |
}
|
246 |
|
247 |
/*############### Drop-down ############# */
|
@@ -412,22 +412,22 @@ a.disabled {
|
|
412 |
box-shadow: 0 0 20px 8px #d0d0d0;
|
413 |
}
|
414 |
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
|
432 |
.icon-text {
|
433 |
font-size: 0.8em !important;
|
@@ -445,7 +445,7 @@ a.disabled {
|
|
445 |
/* 80px */
|
446 |
height: 3.5em;
|
447 |
padding-top: 0.4em;
|
448 |
-
left:
|
449 |
line-height: 0px;
|
450 |
border: 6px solid #fff;
|
451 |
border-radius: 50%;
|
@@ -460,8 +460,8 @@ a.disabled {
|
|
460 |
|
461 |
.mic-button-div {
|
462 |
position: fixed;
|
463 |
-
left:
|
464 |
-
top:
|
465 |
}
|
466 |
|
467 |
.link-icon-div {
|
|
|
144 |
left: 2%;
|
145 |
top: 63%;
|
146 |
transform: translate(-0%, -0%);
|
147 |
+
height: 10%;
|
148 |
width: 96%;
|
149 |
max-width: 96%;
|
150 |
background: #ffff;
|
|
|
152 |
border-radius: 20px;
|
153 |
box-shadow: 0 0 20px 8px #d0d0d0;
|
154 |
overflow: scroll;
|
155 |
+
max-height: 15%;
|
156 |
}
|
157 |
|
158 |
.container-small {
|
|
|
240 |
|
241 |
.mic-button-div {
|
242 |
position: fixed;
|
243 |
+
left: 50%;
|
244 |
+
top: 80%
|
245 |
}
|
246 |
|
247 |
/*############### Drop-down ############# */
|
|
|
412 |
box-shadow: 0 0 20px 8px #d0d0d0;
|
413 |
}
|
414 |
|
415 |
+
.container2 {
|
416 |
+
display: block;
|
417 |
+
position: absolute;
|
418 |
+
left: 2%;
|
419 |
+
top: 63%;
|
420 |
+
transform: translate(-0%, -0%);
|
421 |
+
height: 10%;
|
422 |
+
width: 96%;
|
423 |
+
max-width: 96%;
|
424 |
+
background: #ffff;
|
425 |
+
overflow: hidden;
|
426 |
+
border-radius: 20px;
|
427 |
+
box-shadow: 0 0 20px 8px #d0d0d0;
|
428 |
+
overflow: scroll;
|
429 |
+
max-height: 15%;
|
430 |
+
}
|
431 |
|
432 |
.icon-text {
|
433 |
font-size: 0.8em !important;
|
|
|
445 |
/* 80px */
|
446 |
height: 3.5em;
|
447 |
padding-top: 0.4em;
|
448 |
+
left: 50%;
|
449 |
line-height: 0px;
|
450 |
border: 6px solid #fff;
|
451 |
border-radius: 50%;
|
|
|
460 |
|
461 |
.mic-button-div {
|
462 |
position: fixed;
|
463 |
+
left: 50%;
|
464 |
+
top: 80%
|
465 |
}
|
466 |
|
467 |
.link-icon-div {
|
static/javascript/callbacks.js
CHANGED
@@ -23,9 +23,6 @@ let currentSoundRecorded = false;
|
|
23 |
let currentText, currentIpa, real_transcripts_ipa, matched_transcripts_ipa;
|
24 |
let wordCategories;
|
25 |
let startTime, endTime;
|
26 |
-
let allSamples = {};
|
27 |
-
let currentSamplesObj = {};
|
28 |
-
var timeout = null
|
29 |
|
30 |
// API related variables
|
31 |
let AILanguage = "de"; // Standard is German
|
@@ -180,7 +177,7 @@ const prepareUiForNextSample = async () => {
|
|
180 |
if (soundFileBad == null)
|
181 |
cacheSoundFiles();
|
182 |
|
183 |
-
updateScore(parseFloat(document.getElementById("pronunciation_accuracy").
|
184 |
|
185 |
document.getElementById("main_title").innerText = "Processing new sample...";
|
186 |
}
|
@@ -198,7 +195,6 @@ const populateSampleById = (dataById) => {
|
|
198 |
document.getElementById("recorded_ipa_script").innerText = ""
|
199 |
document.getElementById("pronunciation_accuracy").innerText = "";
|
200 |
document.getElementById("single_word_ipa_pair").innerText = "Reference | Spoken"
|
201 |
-
// document.getElementById("section_accuracy").innerText = "| Score: " + currentScore.toString() + " - (" + currentSample.toString() + ")";
|
202 |
document.getElementById("section_accuracy").innerText = `| Score: ${currentScore.toString()} - sample n: ${currentSample.toString()}`;
|
203 |
currentSample += 1;
|
204 |
|
@@ -277,7 +273,6 @@ const changeLanguage = (language, generateNewSample = false) => {
|
|
277 |
}
|
278 |
}
|
279 |
}
|
280 |
-
getTableFromSamples(allSamples, `${AILanguage}_sentence`);
|
281 |
if (generateNewSample)
|
282 |
getNextSample();
|
283 |
}
|
@@ -326,7 +321,7 @@ const startMediaDevice = () => {
|
|
326 |
try {
|
327 |
await fetch(apiMainPathSTS + '/GetAccuracyFromRecordedAudio', {
|
328 |
method: "post",
|
329 |
-
body: JSON.stringify({ "title": currentText
|
330 |
|
331 |
}).then(res => res.json()).
|
332 |
then(mediaData => {
|
@@ -349,7 +344,7 @@ const startMediaDevice = () => {
|
|
349 |
real_transcripts_ipa = mediaData.real_transcripts_ipa.split(" ")
|
350 |
matched_transcripts_ipa = mediaData.matched_transcripts_ipa.split(" ")
|
351 |
wordCategories = mediaData.pair_accuracy_category.split(" ")
|
352 |
-
let currentTextWords = currentText
|
353 |
|
354 |
coloredWords = "";
|
355 |
for (let word_idx = 0; word_idx < currentTextWords.length; word_idx++) {
|
@@ -406,7 +401,8 @@ const playSoundForAnswerAccuracy = async (accuracy) => {
|
|
406 |
const playAudio = async () => {
|
407 |
|
408 |
document.getElementById("main_title").innerText = "Generating sound...";
|
409 |
-
|
|
|
410 |
document.getElementById("main_title").innerText = "Current Sound was played";
|
411 |
|
412 |
};
|
@@ -472,7 +468,8 @@ const stopRecording = () => {
|
|
472 |
const playCurrentWord = async (word_idx) => {
|
473 |
|
474 |
document.getElementById("main_title").innerText = "Generating word...";
|
475 |
-
|
|
|
476 |
document.getElementById("main_title").innerText = "Word was played";
|
477 |
}
|
478 |
|
@@ -532,10 +529,6 @@ const wrapWordForIndividualPlayback = (word, word_idx) => {
|
|
532 |
// ########## Function to initialize server ###############
|
533 |
// This is to try to avoid aws lambda cold start
|
534 |
try {
|
535 |
-
fetch(apiMainPathSTS + '/getAllSamples').then(res => res.json()).then(dataAllSamples => {
|
536 |
-
populateAllSamples(dataAllSamples);
|
537 |
-
getTableFromSamples(dataAllSamples, `${AILanguage}_sentence`);
|
538 |
-
});
|
539 |
fetch(apiMainPathSTS + '/GetAccuracyFromRecordedAudio', {
|
540 |
method: "post",
|
541 |
body: JSON.stringify({ "title": '', "base64Audio": '', "language": AILanguage }),
|
@@ -571,67 +564,30 @@ const initializeServer = async () => {
|
|
571 |
}
|
572 |
}
|
573 |
|
574 |
-
const
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
}
|
588 |
-
table.appendChild(tr);
|
589 |
-
}
|
590 |
-
|
591 |
-
const createTableRow = (contentRow, sampleIdx, isFiltered = false) => {
|
592 |
-
var tr = document.createElement('tr');
|
593 |
-
tr.append(`${contentRow}`);
|
594 |
-
tr.onclick = async function () {
|
595 |
-
await prepareUiForNextSample()
|
596 |
-
// console.debug(`createTableRow:: ${isFiltered}, sampleIdx: `, sampleIdx);
|
597 |
-
await fetch(apiMainPathSample + '/getSample', {
|
598 |
-
method: "post",
|
599 |
-
body: JSON.stringify({
|
600 |
-
"language": AILanguage, "idx": sampleIdx
|
601 |
-
}),
|
602 |
-
}).then(res => {
|
603 |
-
let res2json = res.json()
|
604 |
-
// console.debug(`createTableRow:: ${isFiltered}, res2json: `, typeof res2json, "=>", res2json, "#");
|
605 |
-
return res2json
|
606 |
-
}).then(dataOnRowCreation => {
|
607 |
-
// console.debug(`createTableRow:: ${isFiltered}, dataOnRowCreation: `, typeof dataOnRowCreation, "=>", dataOnRowCreation, "#");
|
608 |
-
populateSampleById(dataOnRowCreation)
|
609 |
-
tr.style["background-color"] = "#f0f0f0";
|
610 |
-
})
|
611 |
-
};
|
612 |
-
return tr;
|
613 |
-
}
|
614 |
-
|
615 |
-
const filterAllSamples = async (obj, filter, lang) => {
|
616 |
-
if (filter == "") {
|
617 |
-
currentSamplesObj = {...obj}
|
618 |
-
};
|
619 |
-
objByLAng = obj[lang];
|
620 |
-
const filtered = Object.entries(objByLAng).filter(([key, value]) => value.toLowerCase().includes(filter));
|
621 |
-
currentSamplesObj = {
|
622 |
-
[lang]: Object.entries(filtered).map(([key, value]) => value[1])
|
623 |
-
};
|
624 |
}
|
625 |
|
626 |
-
// todo: fix the request from the rows filtered not working
|
627 |
$(document).ready(function(){
|
628 |
-
$("#field-filter-samples").on("keyup", function(e) {
|
629 |
e.preventDefault();
|
630 |
var keycode = (e.keyCode ? e.keyCode : e.which);
|
631 |
if (keycode === 13 || e.key === 'Enter') {
|
632 |
-
var valueFilter = $(this).val()
|
633 |
-
|
634 |
-
|
635 |
}
|
636 |
});
|
637 |
});
|
|
|
23 |
let currentText, currentIpa, real_transcripts_ipa, matched_transcripts_ipa;
|
24 |
let wordCategories;
|
25 |
let startTime, endTime;
|
|
|
|
|
|
|
26 |
|
27 |
// API related variables
|
28 |
let AILanguage = "de"; // Standard is German
|
|
|
177 |
if (soundFileBad == null)
|
178 |
cacheSoundFiles();
|
179 |
|
180 |
+
updateScore(parseFloat(document.getElementById("pronunciation_accuracy").innerText));
|
181 |
|
182 |
document.getElementById("main_title").innerText = "Processing new sample...";
|
183 |
}
|
|
|
195 |
document.getElementById("recorded_ipa_script").innerText = ""
|
196 |
document.getElementById("pronunciation_accuracy").innerText = "";
|
197 |
document.getElementById("single_word_ipa_pair").innerText = "Reference | Spoken"
|
|
|
198 |
document.getElementById("section_accuracy").innerText = `| Score: ${currentScore.toString()} - sample n: ${currentSample.toString()}`;
|
199 |
currentSample += 1;
|
200 |
|
|
|
273 |
}
|
274 |
}
|
275 |
}
|
|
|
276 |
if (generateNewSample)
|
277 |
getNextSample();
|
278 |
}
|
|
|
321 |
try {
|
322 |
await fetch(apiMainPathSTS + '/GetAccuracyFromRecordedAudio', {
|
323 |
method: "post",
|
324 |
+
body: JSON.stringify({ "title": currentText, "base64Audio": audioBase64, "language": AILanguage }),
|
325 |
|
326 |
}).then(res => res.json()).
|
327 |
then(mediaData => {
|
|
|
344 |
real_transcripts_ipa = mediaData.real_transcripts_ipa.split(" ")
|
345 |
matched_transcripts_ipa = mediaData.matched_transcripts_ipa.split(" ")
|
346 |
wordCategories = mediaData.pair_accuracy_category.split(" ")
|
347 |
+
let currentTextWords = currentText.split(" ")
|
348 |
|
349 |
coloredWords = "";
|
350 |
for (let word_idx = 0; word_idx < currentTextWords.length; word_idx++) {
|
|
|
401 |
const playAudio = async () => {
|
402 |
|
403 |
document.getElementById("main_title").innerText = "Generating sound...";
|
404 |
+
// console.debug(`playAudio:: currentText: `, typeof currentText, "=>", currentText, "#");
|
405 |
+
playWithMozillaApi(currentText);
|
406 |
document.getElementById("main_title").innerText = "Current Sound was played";
|
407 |
|
408 |
};
|
|
|
468 |
const playCurrentWord = async (word_idx) => {
|
469 |
|
470 |
document.getElementById("main_title").innerText = "Generating word...";
|
471 |
+
// console.debug(`playCurrentWord:: currentText: `, typeof currentText, "=>", currentText, "#");
|
472 |
+
playWithMozillaApi(currentText.split(' ')[word_idx]);
|
473 |
document.getElementById("main_title").innerText = "Word was played";
|
474 |
}
|
475 |
|
|
|
529 |
// ########## Function to initialize server ###############
|
530 |
// This is to try to avoid aws lambda cold start
|
531 |
try {
|
|
|
|
|
|
|
|
|
532 |
fetch(apiMainPathSTS + '/GetAccuracyFromRecordedAudio', {
|
533 |
method: "post",
|
534 |
body: JSON.stringify({ "title": '', "base64Audio": '', "language": AILanguage }),
|
|
|
564 |
}
|
565 |
}
|
566 |
|
567 |
+
const getSampleFromTextInput = async (AILanguage, textInput) => {
|
568 |
+
await fetch(apiMainPathSample + '/getSample', {
|
569 |
+
method: "post",
|
570 |
+
body: JSON.stringify({
|
571 |
+
"language": AILanguage, "transcript": textInput
|
572 |
+
}),
|
573 |
+
}).then(res => {
|
574 |
+
let res2json = res.json()
|
575 |
+
// console.debug(`getSampleFromTextInput:: res2json: `, typeof res2json, "=>", res2json, "#");
|
576 |
+
return res2json
|
577 |
+
}).then(dataOnInput => {
|
578 |
+
console.log(`getSampleFromTextInput:: dataOnInput: `, typeof dataOnInput, "=>", dataOnInput, "#");
|
579 |
+
populateSampleById(dataOnInput)
|
580 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
}
|
582 |
|
|
|
583 |
$(document).ready(function(){
|
584 |
+
$("#field-filter-samples").on("keyup", async function(e) {
|
585 |
e.preventDefault();
|
586 |
var keycode = (e.keyCode ? e.keyCode : e.which);
|
587 |
if (keycode === 13 || e.key === 'Enter') {
|
588 |
+
var valueFilter = $(this).val()
|
589 |
+
// console.debug(`input:: valueFilter: `, typeof valueFilter, "=>", valueFilter, ", AILanguage: ", AILanguage, "#");
|
590 |
+
await getSampleFromTextInput(AILanguage, valueFilter);
|
591 |
}
|
592 |
});
|
593 |
});
|
static/main.html
CHANGED
@@ -18,7 +18,7 @@
|
|
18 |
></script>
|
19 |
|
20 |
<script src="static/javascript/callbacks.js"></script>
|
21 |
-
<link rel="stylesheet" href="static/css/style
|
22 |
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet" />
|
23 |
</head>
|
24 |
|
@@ -127,7 +127,6 @@
|
|
127 |
<div class="container2">
|
128 |
<div id="div-field-filter-samples" style="position: absolute; width: 97%; margin: 1em;">
|
129 |
<input id="field-filter-samples" type="search" class="form-control" placeholder="Write and press enter to filter">
|
130 |
-
<div id="field-samples">{}</div>
|
131 |
</div>
|
132 |
</div>
|
133 |
|
|
|
18 |
></script>
|
19 |
|
20 |
<script src="static/javascript/callbacks.js"></script>
|
21 |
+
<link rel="stylesheet" href="static/css/style.css" />
|
22 |
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet" />
|
23 |
</head>
|
24 |
|
|
|
127 |
<div class="container2">
|
128 |
<div id="div-field-filter-samples" style="position: absolute; width: 97%; margin: 1em;">
|
129 |
<input id="field-filter-samples" type="search" class="form-control" placeholder="Write and press enter to filter">
|
|
|
130 |
</div>
|
131 |
</div>
|
132 |
|
tests/test_dataset.py
CHANGED
@@ -10,7 +10,7 @@ def helper_category(category: int, threshold_min: int, threshold_max: int, n: in
|
|
10 |
event = {'body': json.dumps({'category': category, 'language': 'de'})}
|
11 |
response = lambdaGetSample.lambda_handler(event, [])
|
12 |
response_dict = json.loads(response)
|
13 |
-
number_of_words = len(response_dict['real_transcript']
|
14 |
try:
|
15 |
assert threshold_min < number_of_words <= threshold_max
|
16 |
except AssertionError:
|
|
|
10 |
event = {'body': json.dumps({'category': category, 'language': 'de'})}
|
11 |
response = lambdaGetSample.lambda_handler(event, [])
|
12 |
response_dict = json.loads(response)
|
13 |
+
number_of_words = len(response_dict['real_transcript'].split())
|
14 |
try:
|
15 |
assert threshold_min < number_of_words <= threshold_max
|
16 |
except AssertionError:
|