update example
Browse files- example_usage.ipynb +115 -135
example_usage.ipynb
CHANGED
@@ -2,12 +2,21 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"id": "5920c653-448e-43b3-93eb-12d7073ad352",
|
7 |
"metadata": {
|
8 |
"tags": []
|
9 |
},
|
10 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
"source": [
|
12 |
"from espnet2.bin.asr_inference import Speech2Text\n",
|
13 |
"from espnet2.bin.asr_align import CTCSegmentation\n",
|
@@ -17,7 +26,7 @@
|
|
17 |
},
|
18 |
{
|
19 |
"cell_type": "code",
|
20 |
-
"execution_count":
|
21 |
"id": "83058587-1a8a-4b01-92ff-e9125fbe55a3",
|
22 |
"metadata": {
|
23 |
"tags": []
|
@@ -38,14 +47,14 @@
|
|
38 |
},
|
39 |
{
|
40 |
"cell_type": "code",
|
41 |
-
"execution_count":
|
42 |
"id": "5e4670d6-0949-48cf-b6b1-d9cc4cf3ad65",
|
43 |
"metadata": {
|
44 |
"tags": []
|
45 |
},
|
46 |
"outputs": [],
|
47 |
"source": [
|
48 |
-
"speech2text = Speech2Text(\"exp/config.yaml\", \"exp/valid.acc.ave_10best.pth\")"
|
49 |
]
|
50 |
},
|
51 |
{
|
@@ -60,7 +69,7 @@
|
|
60 |
},
|
61 |
{
|
62 |
"cell_type": "code",
|
63 |
-
"execution_count":
|
64 |
"id": "e8120e8e-3718-4a1a-ab7a-46ef98a6bc11",
|
65 |
"metadata": {
|
66 |
"tags": []
|
@@ -73,7 +82,7 @@
|
|
73 |
},
|
74 |
{
|
75 |
"cell_type": "code",
|
76 |
-
"execution_count":
|
77 |
"id": "eec8d4b2-c27a-4780-aeed-8aa7538f70e5",
|
78 |
"metadata": {
|
79 |
"tags": []
|
@@ -83,8 +92,8 @@
|
|
83 |
"name": "stdout",
|
84 |
"output_type": "stream",
|
85 |
"text": [
|
86 |
-
"CPU times: user
|
87 |
-
"Wall time:
|
88 |
]
|
89 |
}
|
90 |
],
|
@@ -94,7 +103,7 @@
|
|
94 |
},
|
95 |
{
|
96 |
"cell_type": "code",
|
97 |
-
"execution_count":
|
98 |
"id": "39f41a8b-94c3-42d6-a989-6c7183a6f94d",
|
99 |
"metadata": {
|
100 |
"tags": []
|
@@ -104,7 +113,7 @@
|
|
104 |
"name": "stdout",
|
105 |
"output_type": "stream",
|
106 |
"text": [
|
107 |
-
"mina tahaksin homme täna ja homme kui
|
108 |
]
|
109 |
}
|
110 |
],
|
@@ -114,7 +123,7 @@
|
|
114 |
},
|
115 |
{
|
116 |
"cell_type": "code",
|
117 |
-
"execution_count":
|
118 |
"id": "812060a6-90de-4134-8d1f-9f3d98853bc2",
|
119 |
"metadata": {
|
120 |
"tags": []
|
@@ -151,7 +160,7 @@
|
|
151 |
},
|
152 |
{
|
153 |
"cell_type": "code",
|
154 |
-
"execution_count":
|
155 |
"id": "e6b7331c-52f1-4162-b564-2e6a08b325b0",
|
156 |
"metadata": {
|
157 |
"tags": []
|
@@ -172,7 +181,7 @@
|
|
172 |
},
|
173 |
{
|
174 |
"cell_type": "code",
|
175 |
-
"execution_count":
|
176 |
"id": "e6d18b5f-3d2a-4fcf-bf4e-00480e58094a",
|
177 |
"metadata": {
|
178 |
"tags": []
|
@@ -189,13 +198,12 @@
|
|
189 |
"utt_0004 utt 3.24 3.76 -0.0037 ja\n",
|
190 |
"utt_0005 utt 3.76 4.28 -0.0000 homme\n",
|
191 |
"utt_0006 utt 5.61 6.13 -0.0001 kui\n",
|
192 |
-
"utt_0007 utt 6.17 6.69 -0.0009
|
193 |
-
"utt_0008 utt
|
194 |
-
"utt_0009 utt
|
195 |
-
"utt_0010 utt
|
196 |
-
"utt_0011 utt 9.
|
197 |
-
"utt_0012 utt
|
198 |
-
"utt_0013 utt 10.07 10.31 -0.1041 panna\n",
|
199 |
"\n"
|
200 |
]
|
201 |
}
|
@@ -216,7 +224,7 @@
|
|
216 |
},
|
217 |
{
|
218 |
"cell_type": "code",
|
219 |
-
"execution_count":
|
220 |
"id": "ae9f7e3f-b75d-4bcb-98d1-ae2f037fb4af",
|
221 |
"metadata": {
|
222 |
"tags": []
|
@@ -236,19 +244,7 @@
|
|
236 |
},
|
237 |
{
|
238 |
"cell_type": "code",
|
239 |
-
"execution_count":
|
240 |
-
"id": "93aa6281-3b73-47b7-93ca-e90fedd8d398",
|
241 |
-
"metadata": {
|
242 |
-
"tags": []
|
243 |
-
},
|
244 |
-
"outputs": [],
|
245 |
-
"source": [
|
246 |
-
"torch.set_num_threads(5)"
|
247 |
-
]
|
248 |
-
},
|
249 |
-
{
|
250 |
-
"cell_type": "code",
|
251 |
-
"execution_count": 131,
|
252 |
"id": "0215d312-1896-43f1-9782-c92aced787b7",
|
253 |
"metadata": {
|
254 |
"tags": []
|
@@ -258,8 +254,8 @@
|
|
258 |
"name": "stdout",
|
259 |
"output_type": "stream",
|
260 |
"text": [
|
261 |
-
"CPU times: user
|
262 |
-
"Wall time: 2.
|
263 |
]
|
264 |
}
|
265 |
],
|
@@ -272,7 +268,7 @@
|
|
272 |
},
|
273 |
{
|
274 |
"cell_type": "code",
|
275 |
-
"execution_count":
|
276 |
"id": "d31d6840-3a80-411a-969c-05f4a5e3e9a1",
|
277 |
"metadata": {
|
278 |
"tags": []
|
@@ -301,7 +297,7 @@
|
|
301 |
},
|
302 |
{
|
303 |
"cell_type": "code",
|
304 |
-
"execution_count":
|
305 |
"id": "53f3b63f-9b40-432b-b58c-f5b7223252ed",
|
306 |
"metadata": {
|
307 |
"tags": []
|
@@ -311,8 +307,8 @@
|
|
311 |
"name": "stdout",
|
312 |
"output_type": "stream",
|
313 |
"text": [
|
314 |
-
"CPU times: user
|
315 |
-
"Wall time:
|
316 |
]
|
317 |
}
|
318 |
],
|
@@ -322,7 +318,7 @@
|
|
322 |
},
|
323 |
{
|
324 |
"cell_type": "code",
|
325 |
-
"execution_count":
|
326 |
"id": "1b4dd747-4be2-4ace-a301-6207f7dd9a71",
|
327 |
"metadata": {
|
328 |
"tags": []
|
@@ -358,173 +354,149 @@
|
|
358 |
" <tbody>\n",
|
359 |
" <tr>\n",
|
360 |
" <th>0</th>\n",
|
361 |
-
" <td>0.
|
362 |
-
" <td>0.
|
363 |
-
" <td>-0.
|
364 |
" <td>klikid</td>\n",
|
365 |
" </tr>\n",
|
366 |
" <tr>\n",
|
367 |
" <th>1</th>\n",
|
368 |
-
" <td>0.
|
369 |
-
" <td>0.
|
370 |
-
" <td>-0.
|
371 |
" <td>neid</td>\n",
|
372 |
" </tr>\n",
|
373 |
" <tr>\n",
|
374 |
" <th>2</th>\n",
|
375 |
-
" <td>0.
|
376 |
-
" <td>1.
|
377 |
-
" <td>-0.
|
378 |
" <td>allserva</td>\n",
|
379 |
" </tr>\n",
|
380 |
" <tr>\n",
|
381 |
" <th>3</th>\n",
|
382 |
-
" <td>1.
|
383 |
-
" <td>1.
|
384 |
-
" <td>-0.
|
385 |
" <td>tekivad</td>\n",
|
386 |
" </tr>\n",
|
387 |
" <tr>\n",
|
388 |
" <th>4</th>\n",
|
389 |
-
" <td>2.
|
390 |
-
" <td>3.
|
391 |
-
" <td>-0.
|
392 |
" <td>need</td>\n",
|
393 |
" </tr>\n",
|
394 |
" <tr>\n",
|
395 |
" <th>5</th>\n",
|
396 |
-
" <td>3.
|
397 |
-
" <td>4.
|
398 |
-
" <td>-0.
|
399 |
" <td>lubaküpsiseid</td>\n",
|
400 |
" </tr>\n",
|
401 |
" <tr>\n",
|
402 |
" <th>6</th>\n",
|
403 |
-
" <td>4.
|
404 |
-
" <td>5.
|
405 |
-
" <td>-0.
|
406 |
" <td>mis</td>\n",
|
407 |
" </tr>\n",
|
408 |
" <tr>\n",
|
409 |
" <th>7</th>\n",
|
410 |
-
" <td>5.
|
411 |
-
" <td>5.
|
412 |
-
" <td>-0.
|
413 |
" <td>on</td>\n",
|
414 |
" </tr>\n",
|
415 |
" <tr>\n",
|
416 |
" <th>8</th>\n",
|
417 |
-
" <td>5.
|
418 |
-
" <td>5.
|
419 |
-
" <td>-0.
|
420 |
" <td>nagu</td>\n",
|
421 |
" </tr>\n",
|
422 |
" <tr>\n",
|
423 |
" <th>9</th>\n",
|
424 |
-
" <td>5.
|
425 |
-
" <td>5.
|
426 |
-
" <td>-0.
|
427 |
" <td>ilusti</td>\n",
|
428 |
" </tr>\n",
|
429 |
" <tr>\n",
|
430 |
" <th>10</th>\n",
|
431 |
-
" <td>5.
|
432 |
-
" <td>6.
|
433 |
-
" <td>-0.
|
434 |
" <td>kohati</td>\n",
|
435 |
" </tr>\n",
|
436 |
" <tr>\n",
|
437 |
" <th>11</th>\n",
|
438 |
-
" <td>6.
|
439 |
-
" <td>7.
|
440 |
-
" <td>-0.
|
441 |
" <td>tõlgitud</td>\n",
|
442 |
" </tr>\n",
|
443 |
" <tr>\n",
|
444 |
" <th>12</th>\n",
|
445 |
-
" <td>7.
|
446 |
-
" <td>7.
|
447 |
-
" <td>-0.
|
448 |
" <td>eesti</td>\n",
|
449 |
" </tr>\n",
|
450 |
" <tr>\n",
|
451 |
" <th>13</th>\n",
|
452 |
-
" <td>7.
|
453 |
-
" <td>7.
|
454 |
-
" <td>-0.
|
455 |
" <td>keelde</td>\n",
|
456 |
" </tr>\n",
|
457 |
" <tr>\n",
|
458 |
" <th>14</th>\n",
|
459 |
-
" <td>7.
|
460 |
-
" <td>8.
|
461 |
-
" <td>-0.
|
462 |
" <td>see</td>\n",
|
463 |
" </tr>\n",
|
464 |
" <tr>\n",
|
465 |
" <th>15</th>\n",
|
466 |
-
" <td>8.
|
467 |
-
" <td>8.
|
468 |
-
" <td>-0.
|
469 |
" <td>idee</td>\n",
|
470 |
" </tr>\n",
|
471 |
" <tr>\n",
|
472 |
" <th>16</th>\n",
|
473 |
-
" <td>8.
|
474 |
-
" <td>9.
|
475 |
-
" <td>-0.
|
476 |
" <td>arusaadavamaks</td>\n",
|
477 |
" </tr>\n",
|
478 |
-
" <tr>\n",
|
479 |
-
" <th>17</th>\n",
|
480 |
-
" <td>9.363527</td>\n",
|
481 |
-
" <td>9.584093</td>\n",
|
482 |
-
" <td>-0.390966</td>\n",
|
483 |
-
" <td>ma</td>\n",
|
484 |
-
" </tr>\n",
|
485 |
-
" <tr>\n",
|
486 |
-
" <th>18</th>\n",
|
487 |
-
" <td>9.584093</td>\n",
|
488 |
-
" <td>9.764557</td>\n",
|
489 |
-
" <td>-0.053868</td>\n",
|
490 |
-
" <td>tean</td>\n",
|
491 |
-
" </tr>\n",
|
492 |
-
" <tr>\n",
|
493 |
-
" <th>19</th>\n",
|
494 |
-
" <td>9.764557</td>\n",
|
495 |
-
" <td>9.924968</td>\n",
|
496 |
-
" <td>-0.000163</td>\n",
|
497 |
-
" <td>et</td>\n",
|
498 |
-
" </tr>\n",
|
499 |
" </tbody>\n",
|
500 |
"</table>\n",
|
501 |
"</div>"
|
502 |
],
|
503 |
"text/plain": [
|
504 |
" start end confidence words\n",
|
505 |
-
"0 0.
|
506 |
-
"1 0.
|
507 |
-
"2 0.
|
508 |
-
"3 1.
|
509 |
-
"4 2.
|
510 |
-
"5 3.
|
511 |
-
"6 4.
|
512 |
-
"7 5.
|
513 |
-
"8 5.
|
514 |
-
"9 5.
|
515 |
-
"10 5.
|
516 |
-
"11 6.
|
517 |
-
"12 7.
|
518 |
-
"13 7.
|
519 |
-
"14 7.
|
520 |
-
"15 8.
|
521 |
-
"16 8.
|
522 |
-
"17 9.363527 9.584093 -0.390966 ma\n",
|
523 |
-
"18 9.584093 9.764557 -0.053868 tean\n",
|
524 |
-
"19 9.764557 9.924968 -0.000163 et"
|
525 |
]
|
526 |
},
|
527 |
-
"execution_count":
|
528 |
"metadata": {},
|
529 |
"output_type": "execute_result"
|
530 |
}
|
@@ -540,6 +512,14 @@
|
|
540 |
"metadata": {},
|
541 |
"outputs": [],
|
542 |
"source": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
543 |
}
|
544 |
],
|
545 |
"metadata": {
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"id": "5920c653-448e-43b3-93eb-12d7073ad352",
|
7 |
"metadata": {
|
8 |
"tags": []
|
9 |
},
|
10 |
+
"outputs": [
|
11 |
+
{
|
12 |
+
"name": "stderr",
|
13 |
+
"output_type": "stream",
|
14 |
+
"text": [
|
15 |
+
"/opt/espnet/tools/anaconda/envs/espnet/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
16 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
17 |
+
]
|
18 |
+
}
|
19 |
+
],
|
20 |
"source": [
|
21 |
"from espnet2.bin.asr_inference import Speech2Text\n",
|
22 |
"from espnet2.bin.asr_align import CTCSegmentation\n",
|
|
|
26 |
},
|
27 |
{
|
28 |
"cell_type": "code",
|
29 |
+
"execution_count": 2,
|
30 |
"id": "83058587-1a8a-4b01-92ff-e9125fbe55a3",
|
31 |
"metadata": {
|
32 |
"tags": []
|
|
|
47 |
},
|
48 |
{
|
49 |
"cell_type": "code",
|
50 |
+
"execution_count": 3,
|
51 |
"id": "5e4670d6-0949-48cf-b6b1-d9cc4cf3ad65",
|
52 |
"metadata": {
|
53 |
"tags": []
|
54 |
},
|
55 |
"outputs": [],
|
56 |
"source": [
|
57 |
+
"speech2text = Speech2Text(\"exp/config.yaml\", \"exp/valid.acc.ave_10best.pth\", quantize_asr_model=True, quantize_lm=True)"
|
58 |
]
|
59 |
},
|
60 |
{
|
|
|
69 |
},
|
70 |
{
|
71 |
"cell_type": "code",
|
72 |
+
"execution_count": 4,
|
73 |
"id": "e8120e8e-3718-4a1a-ab7a-46ef98a6bc11",
|
74 |
"metadata": {
|
75 |
"tags": []
|
|
|
82 |
},
|
83 |
{
|
84 |
"cell_type": "code",
|
85 |
+
"execution_count": 5,
|
86 |
"id": "eec8d4b2-c27a-4780-aeed-8aa7538f70e5",
|
87 |
"metadata": {
|
88 |
"tags": []
|
|
|
92 |
"name": "stdout",
|
93 |
"output_type": "stream",
|
94 |
"text": [
|
95 |
+
"CPU times: user 2.64 s, sys: 6.23 ms, total: 2.65 s\n",
|
96 |
+
"Wall time: 2.66 s\n"
|
97 |
]
|
98 |
}
|
99 |
],
|
|
|
103 |
},
|
104 |
{
|
105 |
"cell_type": "code",
|
106 |
+
"execution_count": 6,
|
107 |
"id": "39f41a8b-94c3-42d6-a989-6c7183a6f94d",
|
108 |
"metadata": {
|
109 |
"tags": []
|
|
|
113 |
"name": "stdout",
|
114 |
"output_type": "stream",
|
115 |
"text": [
|
116 |
+
"mina tahaksin homme täna ja homme kui saan kolm krampsumas ise müüki panna\n"
|
117 |
]
|
118 |
}
|
119 |
],
|
|
|
123 |
},
|
124 |
{
|
125 |
"cell_type": "code",
|
126 |
+
"execution_count": 7,
|
127 |
"id": "812060a6-90de-4134-8d1f-9f3d98853bc2",
|
128 |
"metadata": {
|
129 |
"tags": []
|
|
|
160 |
},
|
161 |
{
|
162 |
"cell_type": "code",
|
163 |
+
"execution_count": 8,
|
164 |
"id": "e6b7331c-52f1-4162-b564-2e6a08b325b0",
|
165 |
"metadata": {
|
166 |
"tags": []
|
|
|
181 |
},
|
182 |
{
|
183 |
"cell_type": "code",
|
184 |
+
"execution_count": 9,
|
185 |
"id": "e6d18b5f-3d2a-4fcf-bf4e-00480e58094a",
|
186 |
"metadata": {
|
187 |
"tags": []
|
|
|
198 |
"utt_0004 utt 3.24 3.76 -0.0037 ja\n",
|
199 |
"utt_0005 utt 3.76 4.28 -0.0000 homme\n",
|
200 |
"utt_0006 utt 5.61 6.13 -0.0001 kui\n",
|
201 |
+
"utt_0007 utt 6.17 6.69 -0.0009 saan\n",
|
202 |
+
"utt_0008 utt 7.98 8.50 -0.2285 kolm\n",
|
203 |
+
"utt_0009 utt 8.50 9.34 -0.1062 krampsumas\n",
|
204 |
+
"utt_0010 utt 9.34 9.54 -0.1183 ise\n",
|
205 |
+
"utt_0011 utt 9.54 10.07 -0.2588 müüki\n",
|
206 |
+
"utt_0012 utt 10.07 10.31 -0.1041 panna\n",
|
|
|
207 |
"\n"
|
208 |
]
|
209 |
}
|
|
|
224 |
},
|
225 |
{
|
226 |
"cell_type": "code",
|
227 |
+
"execution_count": 10,
|
228 |
"id": "ae9f7e3f-b75d-4bcb-98d1-ae2f037fb4af",
|
229 |
"metadata": {
|
230 |
"tags": []
|
|
|
244 |
},
|
245 |
{
|
246 |
"cell_type": "code",
|
247 |
+
"execution_count": 11,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
"id": "0215d312-1896-43f1-9782-c92aced787b7",
|
249 |
"metadata": {
|
250 |
"tags": []
|
|
|
254 |
"name": "stdout",
|
255 |
"output_type": "stream",
|
256 |
"text": [
|
257 |
+
"CPU times: user 2.96 s, sys: 19 ms, total: 2.98 s\n",
|
258 |
+
"Wall time: 2.98 s\n"
|
259 |
]
|
260 |
}
|
261 |
],
|
|
|
268 |
},
|
269 |
{
|
270 |
"cell_type": "code",
|
271 |
+
"execution_count": 12,
|
272 |
"id": "d31d6840-3a80-411a-969c-05f4a5e3e9a1",
|
273 |
"metadata": {
|
274 |
"tags": []
|
|
|
297 |
},
|
298 |
{
|
299 |
"cell_type": "code",
|
300 |
+
"execution_count": 13,
|
301 |
"id": "53f3b63f-9b40-432b-b58c-f5b7223252ed",
|
302 |
"metadata": {
|
303 |
"tags": []
|
|
|
307 |
"name": "stdout",
|
308 |
"output_type": "stream",
|
309 |
"text": [
|
310 |
+
"CPU times: user 309 ms, sys: 8.51 ms, total: 318 ms\n",
|
311 |
+
"Wall time: 312 ms\n"
|
312 |
]
|
313 |
}
|
314 |
],
|
|
|
318 |
},
|
319 |
{
|
320 |
"cell_type": "code",
|
321 |
+
"execution_count": 14,
|
322 |
"id": "1b4dd747-4be2-4ace-a301-6207f7dd9a71",
|
323 |
"metadata": {
|
324 |
"tags": []
|
|
|
354 |
" <tbody>\n",
|
355 |
" <tr>\n",
|
356 |
" <th>0</th>\n",
|
357 |
+
" <td>0.260173</td>\n",
|
358 |
+
" <td>0.661328</td>\n",
|
359 |
+
" <td>-0.049087</td>\n",
|
360 |
" <td>klikid</td>\n",
|
361 |
" </tr>\n",
|
362 |
" <tr>\n",
|
363 |
" <th>1</th>\n",
|
364 |
+
" <td>0.661328</td>\n",
|
365 |
+
" <td>0.821789</td>\n",
|
366 |
+
" <td>-0.003573</td>\n",
|
367 |
" <td>neid</td>\n",
|
368 |
" </tr>\n",
|
369 |
" <tr>\n",
|
370 |
" <th>2</th>\n",
|
371 |
+
" <td>0.823233</td>\n",
|
372 |
+
" <td>1.784560</td>\n",
|
373 |
+
" <td>-0.001952</td>\n",
|
374 |
" <td>allserva</td>\n",
|
375 |
" </tr>\n",
|
376 |
" <tr>\n",
|
377 |
" <th>3</th>\n",
|
378 |
+
" <td>1.784560</td>\n",
|
379 |
+
" <td>1.985137</td>\n",
|
380 |
+
" <td>-0.034099</td>\n",
|
381 |
" <td>tekivad</td>\n",
|
382 |
" </tr>\n",
|
383 |
" <tr>\n",
|
384 |
" <th>4</th>\n",
|
385 |
+
" <td>2.548197</td>\n",
|
386 |
+
" <td>3.068255</td>\n",
|
387 |
+
" <td>-0.000037</td>\n",
|
388 |
" <td>need</td>\n",
|
389 |
" </tr>\n",
|
390 |
" <tr>\n",
|
391 |
" <th>5</th>\n",
|
392 |
+
" <td>3.068255</td>\n",
|
393 |
+
" <td>4.031025</td>\n",
|
394 |
+
" <td>-0.008919</td>\n",
|
395 |
" <td>lubaküpsiseid</td>\n",
|
396 |
" </tr>\n",
|
397 |
" <tr>\n",
|
398 |
" <th>6</th>\n",
|
399 |
+
" <td>4.754546</td>\n",
|
400 |
+
" <td>5.274604</td>\n",
|
401 |
+
" <td>-0.000385</td>\n",
|
402 |
" <td>mis</td>\n",
|
403 |
" </tr>\n",
|
404 |
" <tr>\n",
|
405 |
" <th>7</th>\n",
|
406 |
+
" <td>5.274604</td>\n",
|
407 |
+
" <td>5.415008</td>\n",
|
408 |
+
" <td>-0.078755</td>\n",
|
409 |
" <td>on</td>\n",
|
410 |
" </tr>\n",
|
411 |
" <tr>\n",
|
412 |
" <th>8</th>\n",
|
413 |
+
" <td>5.415008</td>\n",
|
414 |
+
" <td>5.555412</td>\n",
|
415 |
+
" <td>-0.000224</td>\n",
|
416 |
" <td>nagu</td>\n",
|
417 |
" </tr>\n",
|
418 |
" <tr>\n",
|
419 |
" <th>9</th>\n",
|
420 |
+
" <td>5.555412</td>\n",
|
421 |
+
" <td>5.836220</td>\n",
|
422 |
+
" <td>-0.000488</td>\n",
|
423 |
" <td>ilusti</td>\n",
|
424 |
" </tr>\n",
|
425 |
" <tr>\n",
|
426 |
" <th>10</th>\n",
|
427 |
+
" <td>5.836220</td>\n",
|
428 |
+
" <td>6.117029</td>\n",
|
429 |
+
" <td>-0.002274</td>\n",
|
430 |
" <td>kohati</td>\n",
|
431 |
" </tr>\n",
|
432 |
" <tr>\n",
|
433 |
" <th>11</th>\n",
|
434 |
+
" <td>6.238818</td>\n",
|
435 |
+
" <td>7.039684</td>\n",
|
436 |
+
" <td>-0.013956</td>\n",
|
437 |
" <td>tõlgitud</td>\n",
|
438 |
" </tr>\n",
|
439 |
" <tr>\n",
|
440 |
" <th>12</th>\n",
|
441 |
+
" <td>7.039684</td>\n",
|
442 |
+
" <td>7.240261</td>\n",
|
443 |
+
" <td>-0.002010</td>\n",
|
444 |
" <td>eesti</td>\n",
|
445 |
" </tr>\n",
|
446 |
" <tr>\n",
|
447 |
" <th>13</th>\n",
|
448 |
+
" <td>7.240261</td>\n",
|
449 |
+
" <td>7.681531</td>\n",
|
450 |
+
" <td>-0.002761</td>\n",
|
451 |
" <td>keelde</td>\n",
|
452 |
" </tr>\n",
|
453 |
" <tr>\n",
|
454 |
" <th>14</th>\n",
|
455 |
+
" <td>7.803320</td>\n",
|
456 |
+
" <td>8.323378</td>\n",
|
457 |
+
" <td>-0.001533</td>\n",
|
458 |
" <td>see</td>\n",
|
459 |
" </tr>\n",
|
460 |
" <tr>\n",
|
461 |
" <th>15</th>\n",
|
462 |
+
" <td>8.323378</td>\n",
|
463 |
+
" <td>8.644302</td>\n",
|
464 |
+
" <td>-0.044506</td>\n",
|
465 |
" <td>idee</td>\n",
|
466 |
" </tr>\n",
|
467 |
" <tr>\n",
|
468 |
" <th>16</th>\n",
|
469 |
+
" <td>8.644302</td>\n",
|
470 |
+
" <td>9.326264</td>\n",
|
471 |
+
" <td>-0.215737</td>\n",
|
472 |
" <td>arusaadavamaks</td>\n",
|
473 |
" </tr>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
474 |
" </tbody>\n",
|
475 |
"</table>\n",
|
476 |
"</div>"
|
477 |
],
|
478 |
"text/plain": [
|
479 |
" start end confidence words\n",
|
480 |
+
"0 0.260173 0.661328 -0.049087 klikid\n",
|
481 |
+
"1 0.661328 0.821789 -0.003573 neid\n",
|
482 |
+
"2 0.823233 1.784560 -0.001952 allserva\n",
|
483 |
+
"3 1.784560 1.985137 -0.034099 tekivad\n",
|
484 |
+
"4 2.548197 3.068255 -0.000037 need\n",
|
485 |
+
"5 3.068255 4.031025 -0.008919 lubaküpsiseid\n",
|
486 |
+
"6 4.754546 5.274604 -0.000385 mis\n",
|
487 |
+
"7 5.274604 5.415008 -0.078755 on\n",
|
488 |
+
"8 5.415008 5.555412 -0.000224 nagu\n",
|
489 |
+
"9 5.555412 5.836220 -0.000488 ilusti\n",
|
490 |
+
"10 5.836220 6.117029 -0.002274 kohati\n",
|
491 |
+
"11 6.238818 7.039684 -0.013956 tõlgitud\n",
|
492 |
+
"12 7.039684 7.240261 -0.002010 eesti\n",
|
493 |
+
"13 7.240261 7.681531 -0.002761 keelde\n",
|
494 |
+
"14 7.803320 8.323378 -0.001533 see\n",
|
495 |
+
"15 8.323378 8.644302 -0.044506 idee\n",
|
496 |
+
"16 8.644302 9.326264 -0.215737 arusaadavamaks"
|
|
|
|
|
|
|
497 |
]
|
498 |
},
|
499 |
+
"execution_count": 14,
|
500 |
"metadata": {},
|
501 |
"output_type": "execute_result"
|
502 |
}
|
|
|
512 |
"metadata": {},
|
513 |
"outputs": [],
|
514 |
"source": []
|
515 |
+
},
|
516 |
+
{
|
517 |
+
"cell_type": "code",
|
518 |
+
"execution_count": null,
|
519 |
+
"id": "1e9d45ad-c8fc-4bab-9285-b82ff3903702",
|
520 |
+
"metadata": {},
|
521 |
+
"outputs": [],
|
522 |
+
"source": []
|
523 |
}
|
524 |
],
|
525 |
"metadata": {
|