diff --git "a/sample.input.json" "b/sample.input.json" new file mode 100644--- /dev/null +++ "b/sample.input.json" @@ -0,0 +1,9980 @@ +{ + "sentences": [ + { + "sentence_id": "S1", + "sentence_pid": "11312/t-00017816-1", + "aphasia_type": "BROCA", + "dialogues": [ + { + "INV": [ + { + "tokens": [ + "I~aux", + "go-PRESP", + "to", + "be", + "ask-PRESP", + "you", + "to", + "do", + "some", + "talk-PRESP" + ], + "word_pos_ids": [ + 1, + 2, + 3, + 4, + 2, + 5, + 3, + 6, + 7, + 2 + ], + "word_grammar_ids": [ + [ + 1, + 3, + 1 + ], + [ + 2, + 3, + 2 + ], + [ + 3, + 0, + 3 + ], + [ + 4, + 6, + 4 + ], + [ + 5, + 6, + 2 + ], + [ + 6, + 3, + 5 + ], + [ + 7, + 6, + 6 + ], + [ + 8, + 9, + 4 + ], + [ + 9, + 6, + 5 + ], + [ + 10, + 9, + 6 + ], + [ + 11, + 9, + 7 + ], + [ + 12, + 3, + 8 + ] + ], + "word_durations": [ + [ + "I'm", + 340 + ], + [ + "going", + 320 + ], + [ + "to", + 120 + ], + [ + "be", + 200 + ], + [ + "asking", + 410 + ], + [ + "you", + 110 + ], + [ + "to", + 80 + ], + [ + "do", + 170 + ], + [ + "some", + 220 + ], + [ + "talking", + 600 + ] + ] + } + ], + "PAR": [ + { + "tokens": [ + "yeah", + "well" + ], + "word_pos_ids": [ + 8, + 8 + ], + "word_grammar_ids": [ + [ + 1, + 2, + 9 + ], + [ + 2, + 0, + 10 + ], + [ + 3, + 2, + 8 + ] + ], + "word_durations": [ + [ + "yeah", + 410 + ], + [ + "well", + 1830 + ] + ] + } + ] + }, + { + "INV": [ + { + "tokens": [ + "how", + "do", + "you", + "think", + "your", + "speech", + "be&3S", + "these", + "day-PL" + ], + "word_pos_ids": [ + 9, + 10, + 5, + 6, + 11, + 12, + 13, + 14, + 12 + ], + "word_grammar_ids": [ + [ + 1, + 4, + 11 + ], + [ + 2, + 4, + 2 + ], + [ + 3, + 4, + 1 + ], + [ + 4, + 0, + 3 + ], + [ + 5, + 6, + 12 + ], + [ + 6, + 7, + 1 + ], + [ + 7, + 4, + 5 + ], + [ + 8, + 9, + 12 + ], + [ + 9, + 7, + 13 + ], + [ + 10, + 4, + 8 + ] + ], + "word_durations": [ + [ + "how", + 120 + ], + [ + "do", + 70 + ], + [ + "you", + 120 + ], + [ + "think", + 290 + ], + [ + "your", + 80 + ], + [ + "speech", + 410 + ], + [ + "is", + 150 + ], + [ + "these", + 280 + ], + [ + "days", + 380 + ] + ] + } + ], + "PAR": [ + { + "tokens": [ + "I", + "yeah", + "you_know", + "dada", + "dada" + ], + "word_pos_ids": [ + 1, + 8, + 8, + 15, + 15 + ], + "word_grammar_ids": [ + [ + 1, + 2, + 1 + ], + [ + 2, + 4, + 9 + ], + [ + 3, + 4, + 9 + ], + [ + 4, + 0, + 3 + ], + [ + 5, + 4, + 13 + ], + [ + 6, + 4, + 8 + ] + ], + "word_durations": [ + [ + "I", + 460 + ], + [ + "yeah", + 430 + ], + [ + "you_know", + 340 + ], + [ + "dada@b", + 780 + ], + [ + "dada@b", + 650 + ] + ] + } + ] + }, + { + "INV": [ + { + "tokens": [ + "okay" + ], + "word_pos_ids": [ + 8 + ], + "word_grammar_ids": [ + [ + 1, + 0, + 10 + ], + [ + 2, + 1, + 8 + ] + ], + "word_durations": [ + [ + "okay", + 120 + ] + ] + } + ], + "PAR": [] + }, + { + "INV": [ + { + "tokens": [ + "anything", + "else" + ], + "word_pos_ids": [ + 7, + 16 + ], + "word_grammar_ids": [ + [ + 1, + 0, + 10 + ], + [ + 2, + 1, + 14 + ], + [ + 3, + 1, + 8 + ] + ], + "word_durations": [ + [ + "anything", + 390 + ], + [ + "else", + 160 + ] + ] + } + ], + "PAR": [] + }, + { + "INV": [ + { + "tokens": [ + "do", + "you", + "remember", + "when", + "you", + "have&PAST", + "your", + "stroke" + ], + "word_pos_ids": [ + 10, + 5, + 6, + 17, + 5, + 6, + 11, + 12 + ], + "word_grammar_ids": [ + [ + 1, + 3, + 2 + ], + [ + 2, + 3, + 1 + ], + [ + 3, + 0, + 3 + ], + [ + 4, + 6, + 15 + ], + [ + 5, + 6, + 1 + ], + [ + 6, + 3, + 16 + ], + [ + 7, + 8, + 12 + ], + [ + 8, + 6, + 6 + ], + [ + 9, + 3, + 8 + ] + ], + "word_durations": [ + [ + "do", + 140 + ], + [ + "you", + 100 + ], + [ + "remember", + 360 + ], + [ + "when", + 190 + ], + [ + "you", + 110 + ], + [ + "had", + 200 + ], + [ + "your", + 90 + ], + [ + "stroke", + 470 + ] + ] + } + ], + "PAR": [ + { + "tokens": [ + "yes", + "beg", + "it~cop", + "two", + "thousand", + "two", + "day-PL" + ], + "word_pos_ids": [ + 8, + 18, + 5, + 19, + 19, + 19, + 12 + ], + "word_grammar_ids": [ + [ + 1, + 0, + 17 + ], + [ + 2, + 1, + 18 + ], + [ + 3, + 4, + 1 + ], + [ + 4, + 0, + 3 + ], + [ + 5, + 6, + 19 + ], + [ + 6, + 8, + 19 + ], + [ + 7, + 8, + 19 + ], + [ + 8, + 4, + 13 + ], + [ + 9, + 4, + 8 + ] + ], + "word_durations": [ + [ + "yes", + 430 + ], + [ + "it's", + 520 + ], + [ + "two", + 330 + ], + [ + "thousand", + 770 + ], + [ + "two", + 350 + ], + [ + "days", + 540 + ], + 0 + ] + }, + { + "tokens": [ + "no" + ], + "word_pos_ids": [ + 8 + ], + "word_grammar_ids": [ + [ + 1, + 0, + 10 + ], + [ + 2, + 1, + 8 + ] + ], + "word_durations": [ + [ + "no", + 300 + ] + ] + }, + { + "tokens": [ + "after", + "New_Year's_Day", + "two", + "thousand" + ], + "word_pos_ids": [ + 20, + 21, + 19, + 19 + ], + "word_grammar_ids": [ + [ + 1, + 0, + 10 + ], + [ + 2, + 4, + 20 + ], + [ + 3, + 4, + 19 + ], + [ + 4, + 1, + 21 + ], + [ + 5, + 1, + 8 + ] + ], + "word_durations": [ + [ + "after", + 740 + ], + [ + "p:w]", + 480 + ], + [ + "two", + 950 + ], + [ + "thousand", + 880 + ] + ] + } + ] + }, + { + "INV": [ + { + "tokens": [ + "two", + "thousand" + ], + "word_pos_ids": [ + 19, + 19 + ], + "word_grammar_ids": [ + [ + 1, + 2, + 19 + ], + [ + 2, + 0, + 10 + ], + [ + 3, + 2, + 8 + ] + ], + "word_durations": [ + [ + "two", + 150 + ], + [ + "thousand", + 550 + ] + ] + } + ], + "PAR": [ + { + "tokens": [], + "word_pos_ids": [], + "word_grammar_ids": [], + "word_durations": [] + } + ] + }, + { + "INV": [ + { + "tokens": [ + "okay" + ], + "word_pos_ids": [ + 8 + ], + "word_grammar_ids": [ + [ + 1, + 0, + 10 + ], + [ + 2, + 1, + 8 + ] + ], + "word_durations": [ + [ + "(o)kay", + 90 + ] + ] + } + ], + "PAR": [] + }, + { + "INV": [ + { + "tokens": [ + "can", + "you", + "tell", + "me", + "anything", + "about", + "it" + ], + "word_pos_ids": [ + 10, + 5, + 6, + 22, + 7, + 20, + 5 + ], + "word_grammar_ids": [ + [ + 1, + 3, + 2 + ], + [ + 2, + 3, + 1 + ], + [ + 3, + 0, + 3 + ], + [ + 4, + 3, + 22 + ], + [ + 5, + 3, + 6 + ], + [ + 6, + 5, + 23 + ], + [ + 7, + 6, + 21 + ], + [ + 8, + 3, + 8 + ] + ], + "word_durations": [ + [ + "can", + 190 + ], + [ + "you", + 80 + ], + [ + "tell", + 240 + ], + [ + "me", + 170 + ], + [ + "anything", + 410 + ], + [ + "about", + 330 + ], + [ + "it", + 210 + ] + ] + } + ], + "PAR": [ + { + "tokens": [ + "I", + "do~neg", + "remember" + ], + "word_pos_ids": [ + 1, + 10, + 6 + ], + "word_grammar_ids": [ + [ + 1, + 4, + 1 + ], + [ + 2, + 4, + 2 + ], + [ + 3, + 2, + 24 + ], + [ + 4, + 0, + 3 + ], + [ + 5, + 4, + 8 + ] + ], + "word_durations": [ + [ + "&-uh", + 50 + ], + [ + "I", + 410 + ], + [ + "don't", + 660 + ] + ] + }, + { + "tokens": [ + "I", + "do~neg", + "remember" + ], + "word_pos_ids": [ + 1, + 10, + 6 + ], + "word_grammar_ids": [ + [ + 1, + 4, + 1 + ], + [ + 2, + 4, + 2 + ], + [ + 3, + 2, + 24 + ], + [ + 4, + 0, + 3 + ], + [ + 5, + 4, + 8 + ] + ], + "word_durations": [ + [ + "I", + 200 + ], + [ + "don't", + 740 + ], + [ + "p:n]", + 560 + ] + ] + } + ] + }, + { + "INV": [ + { + "tokens": [], + "word_pos_ids": [], + "word_grammar_ids": [], + "word_durations": [] + } + ], + "PAR": [] + }, + { + "INV": [ + { + "tokens": [ + "tell", + "me", + "about", + "your", + "recovery" + ], + "word_pos_ids": [ + 6, + 22, + 20, + 11, + 12 + ], + "word_grammar_ids": [ + [ + 1, + 0, + 3 + ], + [ + 2, + 1, + 6 + ], + [ + 3, + 1, + 11 + ], + [ + 4, + 5, + 12 + ], + [ + 5, + 3, + 21 + ], + [ + 6, + 1, + 8 + ] + ], + "word_durations": [ + [ + "tell", + 240 + ], + [ + "me", + 110 + ], + [ + "about", + 280 + ], + [ + "your", + 130 + ], + [ + "recovery", + 950 + ] + ] + } + ], + "PAR": [] + }, + { + "INV": [ + { + "tokens": [ + "what", + "kind-PL", + "of", + "thing-PL", + "have", + "you", + "do&PASTP", + "to", + "get", + "good&CP", + "since", + "your", + "stroke" + ], + "word_pos_ids": [ + 9, + 12, + 20, + 12, + 6, + 5, + 2, + 3, + 6, + 23, + 20, + 11, + 12 + ], + "word_grammar_ids": [ + [ + 1, + 2, + 12 + ], + [ + 2, + 0, + 10 + ], + [ + 3, + 2, + 23 + ], + [ + 4, + 5, + 1 + ], + [ + 5, + 3, + 21 + ], + [ + 6, + 7, + 1 + ], + [ + 7, + 5, + 5 + ], + [ + 8, + 9, + 4 + ], + [ + 9, + 7, + 5 + ], + [ + 10, + 9, + 13 + ], + [ + 11, + 10, + 11 + ], + [ + 12, + 13, + 12 + ], + [ + 13, + 11, + 21 + ], + [ + 14, + 2, + 8 + ] + ], + "word_durations": [ + [ + "what", + 170 + ], + [ + "kinds", + 610 + ], + [ + "of", + 230 + ], + [ + "things", + 440 + ], + [ + "have", + 190 + ], + [ + "you", + 160 + ], + [ + "done", + 680 + ], + [ + "to", + 250 + ], + [ + "get", + 300 + ], + [ + "better", + 280 + ], + [ + "since", + 370 + ], + [ + "your", + 80 + ], + [ + "stroke", + 510 + ] + ] + } + ], + "PAR": [ + { + "tokens": [ + "oh", + "beg", + "yeah", + "beg", + "x" + ], + "word_pos_ids": [ + 8, + 18, + 8, + 18, + 24 + ], + "word_grammar_ids": [ + [ + 1, + 0, + 17 + ], + [ + 2, + 1, + 18 + ], + [ + 3, + 0, + 17 + ], + [ + 4, + 3, + 18 + ], + [ + 5, + 0, + 10 + ], + [ + 6, + 5, + 8 + ] + ], + "word_durations": [ + [ + "oh", + 280 + ], + [ + "yeah", + 250 + ], + [ + "n:uk]", + 230 + ], + [ + "&-uh", + 250 + ], + [ + "&-um", + 550 + ] + ] + }, + { + "tokens": [ + "I", + "aphasia" + ], + "word_pos_ids": [ + 1, + 12 + ], + "word_grammar_ids": [ + [ + 1, + 2, + 1 + ], + [ + 2, + 0, + 10 + ], + [ + 3, + 2, + 8 + ] + ], + "word_durations": [ + [ + "I", + 410 + ], + [ + "[//]", + 940 + ] + ] + }, + { + "tokens": [ + "oh", + "beg", + "yes" + ], + "word_pos_ids": [ + 8, + 18, + 8 + ], + "word_grammar_ids": [ + [ + 1, + 0, + 17 + ], + [ + 2, + 1, + 18 + ], + [ + 3, + 0, + 10 + ], + [ + 4, + 3, + 8 + ] + ], + "word_durations": [ + [ + "oh", + 410 + ], + [ + "[/]", + 530 + ], + [ + "[/]", + 480 + ] + ] + } + ] + }, + { + "INV": [ + { + "tokens": [ + "okay", + "beg", + "good" + ], + "word_pos_ids": [ + 8, + 18, + 23 + ], + "word_grammar_ids": [ + [ + 1, + 0, + 17 + ], + [ + 2, + 1, + 18 + ], + [ + 3, + 0, + 10 + ], + [ + 4, + 3, + 8 + ] + ], + "word_durations": [ + [ + "